c72ea72bae
- Added arxiv.py plugin for searching academic papers on arXiv.org - Added news.py plugin for fetching news from GNews API - Added hackernews.py plugin for fetching stories from Hacker News - All plugins now output results in collapsible <details> tags for better UX - Enhanced funguy.py with improved error handling, logging, and plugin management - Updated help.py and README.md with documentation for new plugins - Added !plugins command to list loaded plugins - Improved configuration loading and plugin disable/enable functionality
323 lines
12 KiB
Python
323 lines
12 KiB
Python
"""
|
|
arXiv Paper Search Plugin for Funguy Bot
|
|
|
|
Searches academic papers in physics, mathematics, computer science, and more.
|
|
Uses arXiv API - completely free, no API key required.
|
|
|
|
Commands:
|
|
!arxiv <query> - Search for papers (shows abstract)
|
|
!arxiv list <query> - List papers without abstracts
|
|
!arxiv category <category> - Browse recent papers by category
|
|
!arxiv recent [category] - Recent papers (last 7 days)
|
|
!arxiv random - Random paper
|
|
!arxiv <id> - Get paper by arXiv ID
|
|
"""
|
|
|
|
import logging
|
|
import aiohttp
|
|
import xml.etree.ElementTree as ET
|
|
import random
|
|
from typing import Optional, Dict, List
|
|
from datetime import datetime, timedelta
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Configuration
|
|
# ---------------------------------------------------------------------------
|
|
|
|
DEFAULT_RESULTS = 3
|
|
MAX_RESULTS = 10
|
|
|
|
CATEGORIES = {
|
|
"ai": "cs.AI",
|
|
"ml": "cs.LG",
|
|
"security": "cs.CR",
|
|
"crypto": "cs.CR",
|
|
"cv": "cs.CV",
|
|
"nlp": "cs.CL",
|
|
"math": "math",
|
|
"physics": "physics",
|
|
"quantum": "quant-ph",
|
|
"bio": "q-bio",
|
|
"economics": "econ",
|
|
"software": "cs.SE"
|
|
}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helper Functions
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _format_collapsible(title: str, content: str, expanded: bool = False) -> str:
|
|
"""Format content in a collapsible details/summary block."""
|
|
open_attr = ' open' if expanded else ''
|
|
return f"<details{open_attr}>\n<summary>📚 {title}</summary>\n\n{content}\n\n</details>"
|
|
|
|
|
|
def _oxford_comma(items):
|
|
if not items:
|
|
return ""
|
|
if len(items) == 1:
|
|
return items[0]
|
|
if len(items) == 2:
|
|
return f"{items[0]} and {items[1]}"
|
|
return f"{', '.join(items[:-1])}, and {items[-1]}"
|
|
|
|
|
|
def _format_paper(paper: Dict, index: int, include_abstract: bool = True) -> str:
|
|
"""Format a paper as an HTML list item."""
|
|
result = f"<li>\n<strong>{index}. {paper['title']}</strong><br/>\n"
|
|
|
|
# Authors
|
|
result += f"👥 <strong>Authors:</strong> {_oxford_comma(paper['authors'][:3])}"
|
|
if len(paper['authors']) > 3:
|
|
result += f" and {len(paper['authors']) - 3} others"
|
|
result += "<br/>\n"
|
|
|
|
# Metadata
|
|
result += f"📅 <strong>Published:</strong> {paper['published']}<br/>\n"
|
|
result += f"🏷️ <strong>Categories:</strong> {', '.join(paper['categories'][:3])}"
|
|
if len(paper['categories']) > 3:
|
|
result += f" +{len(paper['categories']) - 3}"
|
|
result += "<br/>\n"
|
|
|
|
# Links
|
|
result += f"🔗 <strong>arXiv ID:</strong> {paper['id']}<br/>\n"
|
|
result += f"📄 <strong>PDF:</strong> <a href='{paper['pdf_url']}'>{paper['pdf_url']}</a><br/>\n"
|
|
|
|
# Abstract
|
|
if include_abstract and paper['summary'] != "No abstract":
|
|
abstract = paper['summary']
|
|
if len(abstract) > 500:
|
|
abstract = abstract[:497] + "..."
|
|
result += f"📝 <strong>Abstract:</strong><br/>{abstract}\n"
|
|
|
|
result += "</li>"
|
|
return result
|
|
|
|
|
|
async def _search_arxiv(query: str, max_results: int = DEFAULT_RESULTS, id_list: List[str] = None) -> Optional[List[Dict]]:
|
|
base_url = "http://export.arxiv.org/api/query"
|
|
|
|
if id_list:
|
|
id_query = "+OR+".join([f"id:{pid}" for pid in id_list])
|
|
params = {"search_query": id_query, "max_results": max_results}
|
|
else:
|
|
params = {
|
|
"search_query": query,
|
|
"max_results": max_results,
|
|
"sortBy": "relevance",
|
|
"sortOrder": "descending"
|
|
}
|
|
|
|
try:
|
|
async with aiohttp.ClientSession() as session:
|
|
async with session.get(base_url, params=params) as response:
|
|
if response.status == 200:
|
|
text = await response.text()
|
|
return _parse_arxiv_response(text)
|
|
return None
|
|
except Exception as e:
|
|
logging.error(f"Error searching arXiv: {e}")
|
|
return None
|
|
|
|
|
|
async def _get_category_papers(category: str, limit: int = DEFAULT_RESULTS) -> Optional[List[Dict]]:
|
|
return await _search_arxiv(f"cat:{category}", limit)
|
|
|
|
|
|
async def _get_recent_papers(category: str = None, days: int = 7) -> Optional[List[Dict]]:
|
|
date = (datetime.now() - timedelta(days=days)).strftime("%Y%m%d")
|
|
if category:
|
|
query = f"cat:{category} AND submittedDate:[{date}000000 TO {datetime.now().strftime('%Y%m%d')}235959]"
|
|
else:
|
|
query = f"submittedDate:[{date}000000 TO {datetime.now().strftime('%Y%m%d')}235959]"
|
|
return await _search_arxiv(query, DEFAULT_RESULTS)
|
|
|
|
|
|
async def _get_random_paper() -> Optional[Dict]:
|
|
terms = ["machine learning", "quantum", "neural network", "optimization", "algorithm", "security"]
|
|
query = random.choice(terms)
|
|
results = await _search_arxiv(query, max_results=MAX_RESULTS)
|
|
return random.choice(results) if results else None
|
|
|
|
|
|
def _parse_arxiv_response(xml_text: str) -> List[Dict]:
|
|
namespaces = {'atom': 'http://www.w3.org/2005/Atom', 'arxiv': 'http://arxiv.org/schemas/atom'}
|
|
root = ET.fromstring(xml_text)
|
|
entries = root.findall('atom:entry', namespaces)
|
|
|
|
papers = []
|
|
for entry in entries:
|
|
title = entry.find('atom:title', namespaces)
|
|
title_text = ' '.join(title.text.strip().split()) if title is not None else "No title"
|
|
|
|
summary = entry.find('atom:summary', namespaces)
|
|
summary_text = ' '.join(summary.text.strip().split()) if summary is not None else "No abstract"
|
|
|
|
authors = []
|
|
for author in entry.findall('atom:author', namespaces):
|
|
name = author.find('atom:name', namespaces)
|
|
if name is not None and name.text:
|
|
authors.append(name.text)
|
|
|
|
id_elem = entry.find('atom:id', namespaces)
|
|
paper_id = id_elem.text.split('/')[-1] if id_elem is not None else "Unknown"
|
|
|
|
pdf_link = None
|
|
for link in entry.findall('atom:link', namespaces):
|
|
if link.get('title') == 'pdf':
|
|
pdf_link = link.get('href')
|
|
break
|
|
|
|
categories = []
|
|
for category in entry.findall('atom:category', namespaces):
|
|
term = category.get('term')
|
|
if term:
|
|
categories.append(term)
|
|
|
|
published = entry.find('atom:published', namespaces)
|
|
pub_date = published.text.split('T')[0] if published is not None else "Unknown"
|
|
|
|
papers.append({
|
|
'id': paper_id,
|
|
'title': title_text,
|
|
'summary': summary_text,
|
|
'authors': authors,
|
|
'pdf_url': pdf_link or f"http://arxiv.org/pdf/{paper_id}.pdf",
|
|
'arxiv_url': f"http://arxiv.org/abs/{paper_id}",
|
|
'categories': categories,
|
|
'published': pub_date
|
|
})
|
|
|
|
return papers
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Command Handler
|
|
# ---------------------------------------------------------------------------
|
|
|
|
async def handle_command(room, message, bot, prefix, config):
|
|
import simplematrixbotlib as botlib
|
|
|
|
match = botlib.MessageMatch(room, message, bot, prefix)
|
|
|
|
if not (match.is_not_from_this_bot() and match.prefix() and match.command("arxiv")):
|
|
return
|
|
|
|
args = match.args()
|
|
|
|
if not args:
|
|
help_content = (
|
|
"<strong>Commands:</strong><br/><br/>"
|
|
"• <code>!arxiv <query></code> - Search papers<br/>"
|
|
"• <code>!arxiv list <query></code> - List without abstracts<br/>"
|
|
"• <code>!arxiv category <cat></code> - Browse category<br/>"
|
|
"• <code>!arxiv recent [cat]</code> - Recent papers<br/>"
|
|
"• <code>!arxiv random</code> - Random paper<br/>"
|
|
"• <code>!arxiv <id></code> - Get by ID<br/><br/>"
|
|
"<strong>Categories:</strong> ai, ml, security, crypto, cv, nlp, math, physics, quantum, bio, software"
|
|
)
|
|
response = _format_collapsible("arXiv Help", help_content, expanded=True)
|
|
await bot.api.send_markdown_message(room.room_id, response)
|
|
return
|
|
|
|
cmd = args[0].lower()
|
|
limit = DEFAULT_RESULTS
|
|
include_abstract = True
|
|
|
|
if args and args[0].isdigit():
|
|
limit = min(int(args[0]), MAX_RESULTS)
|
|
args = args[1:]
|
|
cmd = args[0].lower() if args else None
|
|
elif args and args[-1].isdigit():
|
|
limit = min(int(args[-1]), MAX_RESULTS)
|
|
args = args[:-1]
|
|
cmd = args[0].lower() if args else None
|
|
|
|
if cmd == "list":
|
|
include_abstract = False
|
|
if len(args) >= 2:
|
|
query = " ".join(args[1:])
|
|
else:
|
|
await bot.api.send_text_message(room.room_id, "Usage: !arxiv list <query>")
|
|
return
|
|
|
|
elif cmd == "category" and len(args) >= 2:
|
|
cat_key = args[1].lower()
|
|
if cat_key in CATEGORIES:
|
|
category = CATEGORIES[cat_key]
|
|
await bot.api.send_text_message(room.room_id, f"📚 Fetching {cat_key.upper()} papers...")
|
|
papers = await _get_category_papers(category, limit)
|
|
title = f"Recent Papers in {cat_key.upper()}"
|
|
else:
|
|
await bot.api.send_text_message(room.room_id, f"Unknown category. Available: {', '.join(CATEGORIES.keys())}")
|
|
return
|
|
|
|
elif cmd == "recent":
|
|
category = None
|
|
if len(args) >= 2 and args[1].lower() in CATEGORIES:
|
|
category = CATEGORIES[args[1].lower()]
|
|
await bot.api.send_text_message(room.room_id, f"📚 Fetching recent {args[1].upper()} papers...")
|
|
title = f"Recent Papers in {args[1].upper()} (7 Days)"
|
|
else:
|
|
await bot.api.send_text_message(room.room_id, "📚 Fetching recent papers...")
|
|
title = "Recent Papers (Last 7 Days)"
|
|
papers = await _get_recent_papers(category, limit)
|
|
|
|
elif cmd == "random":
|
|
await bot.api.send_text_message(room.room_id, "🎲 Fetching random paper...")
|
|
paper = await _get_random_paper()
|
|
if paper:
|
|
content = f"<ul>\n{_format_paper(paper, 1, True)}\n</ul>"
|
|
response = _format_collapsible("Random Paper", content, True)
|
|
await bot.api.send_markdown_message(room.room_id, response)
|
|
else:
|
|
await bot.api.send_text_message(room.room_id, "❌ Failed to fetch random paper.")
|
|
return
|
|
|
|
elif cmd and (cmd[0].isdigit() or ('.' in cmd and len(cmd.split('.')) == 2)):
|
|
paper_ids = [cmd] + [arg for arg in args[1:] if arg[0].isdigit() or ('.' in arg and len(arg.split('.')) == 2)]
|
|
if paper_ids:
|
|
await bot.api.send_text_message(room.room_id, f"📚 Fetching paper(s)...")
|
|
papers = await _search_arxiv("", max_results=len(paper_ids), id_list=paper_ids)
|
|
title = "Paper Details"
|
|
else:
|
|
await bot.api.send_text_message(room.room_id, "❌ Invalid arXiv ID.")
|
|
return
|
|
|
|
else:
|
|
query = " ".join(args)
|
|
await bot.api.send_text_message(room.room_id, f"🔍 Searching: *{query[:50]}*...")
|
|
papers = await _search_arxiv(query, limit)
|
|
title = f"Search: '{query[:50]}'"
|
|
|
|
if not papers:
|
|
await bot.api.send_text_message(room.room_id, "❌ No papers found.")
|
|
return
|
|
|
|
content = "<ul>\n"
|
|
for i, paper in enumerate(papers, 1):
|
|
content += _format_paper(paper, i, include_abstract) + "\n"
|
|
content += f"</ul>\n\n<em>Found {len(papers)} papers</em>"
|
|
|
|
response = _format_collapsible(title, content, False)
|
|
await bot.api.send_markdown_message(room.room_id, response)
|
|
logging.info(f"Sent arXiv search results")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Plugin Setup
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def setup(bot):
|
|
logging.info("arXiv plugin loaded")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Plugin Metadata
|
|
# ---------------------------------------------------------------------------
|
|
|
|
__version__ = "1.0.0"
|
|
__author__ = "Funguy Bot"
|
|
__description__ = "arXiv academic paper search"
|