""" arXiv Paper Search Plugin for Funguy Bot Searches academic papers in physics, mathematics, computer science, and more. Uses arXiv API - completely free, no API key required. Commands: !arxiv - Search for papers (shows abstract) !arxiv list - List papers without abstracts !arxiv category - Browse recent papers by category !arxiv recent [category] - Recent papers (last 7 days) !arxiv random - Random paper !arxiv - Get paper by arXiv ID """ import logging import aiohttp import xml.etree.ElementTree as ET import random from typing import Optional, Dict, List from datetime import datetime, timedelta # --------------------------------------------------------------------------- # Configuration # --------------------------------------------------------------------------- DEFAULT_RESULTS = 3 MAX_RESULTS = 10 CATEGORIES = { "ai": "cs.AI", "ml": "cs.LG", "security": "cs.CR", "crypto": "cs.CR", "cv": "cs.CV", "nlp": "cs.CL", "math": "math", "physics": "physics", "quantum": "quant-ph", "bio": "q-bio", "economics": "econ", "software": "cs.SE" } # --------------------------------------------------------------------------- # Helper Functions # --------------------------------------------------------------------------- def _format_collapsible(title: str, content: str, expanded: bool = False) -> str: """Format content in a collapsible details/summary block.""" open_attr = ' open' if expanded else '' return f"\n📚 {title}\n\n{content}\n\n" def _oxford_comma(items): if not items: return "" if len(items) == 1: return items[0] if len(items) == 2: return f"{items[0]} and {items[1]}" return f"{', '.join(items[:-1])}, and {items[-1]}" def _format_paper(paper: Dict, index: int, include_abstract: bool = True) -> str: """Format a paper as an HTML list item.""" result = f"
  • \n{index}. {paper['title']}
    \n" # Authors result += f"👥 Authors: {_oxford_comma(paper['authors'][:3])}" if len(paper['authors']) > 3: result += f" and {len(paper['authors']) - 3} others" result += "
    \n" # Metadata result += f"📅 Published: {paper['published']}
    \n" result += f"🏷️ Categories: {', '.join(paper['categories'][:3])}" if len(paper['categories']) > 3: result += f" +{len(paper['categories']) - 3}" result += "
    \n" # Links result += f"🔗 arXiv ID: {paper['id']}
    \n" result += f"📄 PDF: {paper['pdf_url']}
    \n" # Abstract if include_abstract and paper['summary'] != "No abstract": abstract = paper['summary'] if len(abstract) > 500: abstract = abstract[:497] + "..." result += f"📝 Abstract:
    {abstract}\n" result += "
  • " return result async def _search_arxiv(query: str, max_results: int = DEFAULT_RESULTS, id_list: List[str] = None) -> Optional[List[Dict]]: base_url = "http://export.arxiv.org/api/query" if id_list: id_query = "+OR+".join([f"id:{pid}" for pid in id_list]) params = {"search_query": id_query, "max_results": max_results} else: params = { "search_query": query, "max_results": max_results, "sortBy": "relevance", "sortOrder": "descending" } try: async with aiohttp.ClientSession() as session: async with session.get(base_url, params=params) as response: if response.status == 200: text = await response.text() return _parse_arxiv_response(text) return None except Exception as e: logging.error(f"Error searching arXiv: {e}") return None async def _get_category_papers(category: str, limit: int = DEFAULT_RESULTS) -> Optional[List[Dict]]: return await _search_arxiv(f"cat:{category}", limit) async def _get_recent_papers(category: str = None, days: int = 7) -> Optional[List[Dict]]: date = (datetime.now() - timedelta(days=days)).strftime("%Y%m%d") if category: query = f"cat:{category} AND submittedDate:[{date}000000 TO {datetime.now().strftime('%Y%m%d')}235959]" else: query = f"submittedDate:[{date}000000 TO {datetime.now().strftime('%Y%m%d')}235959]" return await _search_arxiv(query, DEFAULT_RESULTS) async def _get_random_paper() -> Optional[Dict]: terms = ["machine learning", "quantum", "neural network", "optimization", "algorithm", "security"] query = random.choice(terms) results = await _search_arxiv(query, max_results=MAX_RESULTS) return random.choice(results) if results else None def _parse_arxiv_response(xml_text: str) -> List[Dict]: namespaces = {'atom': 'http://www.w3.org/2005/Atom', 'arxiv': 'http://arxiv.org/schemas/atom'} root = ET.fromstring(xml_text) entries = root.findall('atom:entry', namespaces) papers = [] for entry in entries: title = entry.find('atom:title', namespaces) title_text = ' '.join(title.text.strip().split()) if title is not None else "No title" summary = entry.find('atom:summary', namespaces) summary_text = ' '.join(summary.text.strip().split()) if summary is not None else "No abstract" authors = [] for author in entry.findall('atom:author', namespaces): name = author.find('atom:name', namespaces) if name is not None and name.text: authors.append(name.text) id_elem = entry.find('atom:id', namespaces) paper_id = id_elem.text.split('/')[-1] if id_elem is not None else "Unknown" pdf_link = None for link in entry.findall('atom:link', namespaces): if link.get('title') == 'pdf': pdf_link = link.get('href') break categories = [] for category in entry.findall('atom:category', namespaces): term = category.get('term') if term: categories.append(term) published = entry.find('atom:published', namespaces) pub_date = published.text.split('T')[0] if published is not None else "Unknown" papers.append({ 'id': paper_id, 'title': title_text, 'summary': summary_text, 'authors': authors, 'pdf_url': pdf_link or f"http://arxiv.org/pdf/{paper_id}.pdf", 'arxiv_url': f"http://arxiv.org/abs/{paper_id}", 'categories': categories, 'published': pub_date }) return papers # --------------------------------------------------------------------------- # Command Handler # --------------------------------------------------------------------------- async def handle_command(room, message, bot, prefix, config): import simplematrixbotlib as botlib match = botlib.MessageMatch(room, message, bot, prefix) if not (match.is_not_from_this_bot() and match.prefix() and match.command("arxiv")): return args = match.args() if not args: help_content = ( "Commands:

    " "• !arxiv <query> - Search papers
    " "• !arxiv list <query> - List without abstracts
    " "• !arxiv category <cat> - Browse category
    " "• !arxiv recent [cat] - Recent papers
    " "• !arxiv random - Random paper
    " "• !arxiv <id> - Get by ID

    " "Categories: ai, ml, security, crypto, cv, nlp, math, physics, quantum, bio, software" ) response = _format_collapsible("arXiv Help", help_content, expanded=True) await bot.api.send_markdown_message(room.room_id, response) return cmd = args[0].lower() limit = DEFAULT_RESULTS include_abstract = True if args and args[0].isdigit(): limit = min(int(args[0]), MAX_RESULTS) args = args[1:] cmd = args[0].lower() if args else None elif args and args[-1].isdigit(): limit = min(int(args[-1]), MAX_RESULTS) args = args[:-1] cmd = args[0].lower() if args else None if cmd == "list": include_abstract = False if len(args) >= 2: query = " ".join(args[1:]) else: await bot.api.send_text_message(room.room_id, "Usage: !arxiv list ") return elif cmd == "category" and len(args) >= 2: cat_key = args[1].lower() if cat_key in CATEGORIES: category = CATEGORIES[cat_key] await bot.api.send_text_message(room.room_id, f"📚 Fetching {cat_key.upper()} papers...") papers = await _get_category_papers(category, limit) title = f"Recent Papers in {cat_key.upper()}" else: await bot.api.send_text_message(room.room_id, f"Unknown category. Available: {', '.join(CATEGORIES.keys())}") return elif cmd == "recent": category = None if len(args) >= 2 and args[1].lower() in CATEGORIES: category = CATEGORIES[args[1].lower()] await bot.api.send_text_message(room.room_id, f"📚 Fetching recent {args[1].upper()} papers...") title = f"Recent Papers in {args[1].upper()} (7 Days)" else: await bot.api.send_text_message(room.room_id, "📚 Fetching recent papers...") title = "Recent Papers (Last 7 Days)" papers = await _get_recent_papers(category, limit) elif cmd == "random": await bot.api.send_text_message(room.room_id, "🎲 Fetching random paper...") paper = await _get_random_paper() if paper: content = f"
      \n{_format_paper(paper, 1, True)}\n
    " response = _format_collapsible("Random Paper", content, True) await bot.api.send_markdown_message(room.room_id, response) else: await bot.api.send_text_message(room.room_id, "❌ Failed to fetch random paper.") return elif cmd and (cmd[0].isdigit() or ('.' in cmd and len(cmd.split('.')) == 2)): paper_ids = [cmd] + [arg for arg in args[1:] if arg[0].isdigit() or ('.' in arg and len(arg.split('.')) == 2)] if paper_ids: await bot.api.send_text_message(room.room_id, f"📚 Fetching paper(s)...") papers = await _search_arxiv("", max_results=len(paper_ids), id_list=paper_ids) title = "Paper Details" else: await bot.api.send_text_message(room.room_id, "❌ Invalid arXiv ID.") return else: query = " ".join(args) await bot.api.send_text_message(room.room_id, f"🔍 Searching: *{query[:50]}*...") papers = await _search_arxiv(query, limit) title = f"Search: '{query[:50]}'" if not papers: await bot.api.send_text_message(room.room_id, "❌ No papers found.") return content = "
      \n" for i, paper in enumerate(papers, 1): content += _format_paper(paper, i, include_abstract) + "\n" content += f"
    \n\nFound {len(papers)} papers" response = _format_collapsible(title, content, False) await bot.api.send_markdown_message(room.room_id, response) logging.info(f"Sent arXiv search results") # --------------------------------------------------------------------------- # Plugin Setup # --------------------------------------------------------------------------- def setup(bot): logging.info("arXiv plugin loaded") # --------------------------------------------------------------------------- # Plugin Metadata # --------------------------------------------------------------------------- __version__ = "1.0.0" __author__ = "Funguy Bot" __description__ = "arXiv academic paper search" __help__ = """
    !arxiv – Search academic papers on arXiv
    • !arxiv <query> – Search papers (shows abstracts)
    • !arxiv list <query> – List without abstracts
    • !arxiv category <category> – Browse recent papers by category
    • !arxiv recent [category] – Most recent papers (7 days)
    • !arxiv random – Random paper
    • !arxiv <id> – Get paper by arXiv ID (e.g., 2101.00101)

    Categories: ai, ml, security, crypto, cv, nlp, math, physics, quantum, bio, software

    """