Fixed arxiv plugin to respect rate limits. Fixed lastfm np to search genre tags and song length
This commit is contained in:
+107
-43
@@ -13,11 +13,13 @@ Commands:
|
||||
!arxiv <id> - Get paper by arXiv ID
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import time
|
||||
import aiohttp
|
||||
import xml.etree.ElementTree as ET
|
||||
import random
|
||||
from typing import Optional, Dict, List
|
||||
from typing import Optional, Dict, List, Tuple
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -27,6 +29,15 @@ from datetime import datetime, timedelta
|
||||
DEFAULT_RESULTS = 3
|
||||
MAX_RESULTS = 10
|
||||
|
||||
# REQUIRED by arXiv API terms – identify your bot.
|
||||
# Use a descriptive string with contact info. A Firefox User-Agent is
|
||||
# also accepted, but the bot-specific one is recommended.
|
||||
# Example Firefox UA: "Mozilla/5.0 (X11; Linux x86_64; rv:140.0) Gecko/20100101 Firefox/140.0"
|
||||
USER_AGENT = "FunguyBot/1.0 (mailto:your-email@example.com)"
|
||||
|
||||
# Minimum delay between successive API calls (arXiv asks for ≥3 seconds)
|
||||
MIN_REQUEST_INTERVAL = 5
|
||||
|
||||
CATEGORIES = {
|
||||
"ai": "cs.AI",
|
||||
"ml": "cs.LG",
|
||||
@@ -42,13 +53,11 @@ CATEGORIES = {
|
||||
"software": "cs.SE"
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helper Functions
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _format_collapsible(title: str, content: str, expanded: bool = False) -> str:
|
||||
"""Format content in a collapsible details/summary block."""
|
||||
open_attr = ' open' if expanded else ''
|
||||
return f"<details{open_attr}>\n<summary>📚 {title}</summary>\n\n{content}\n\n</details>"
|
||||
|
||||
@@ -64,27 +73,19 @@ def _oxford_comma(items):
|
||||
|
||||
|
||||
def _format_paper(paper: Dict, index: int, include_abstract: bool = True) -> str:
|
||||
"""Format a paper as an HTML list item."""
|
||||
result = f"<li>\n<strong>{index}. {paper['title']}</strong><br/>\n"
|
||||
|
||||
# Authors
|
||||
result += f"👥 <strong>Authors:</strong> {_oxford_comma(paper['authors'][:3])}"
|
||||
if len(paper['authors']) > 3:
|
||||
result += f" and {len(paper['authors']) - 3} others"
|
||||
result += "<br/>\n"
|
||||
|
||||
# Metadata
|
||||
result += f"📅 <strong>Published:</strong> {paper['published']}<br/>\n"
|
||||
result += f"🏷️ <strong>Categories:</strong> {', '.join(paper['categories'][:3])}"
|
||||
if len(paper['categories']) > 3:
|
||||
result += f" +{len(paper['categories']) - 3}"
|
||||
result += "<br/>\n"
|
||||
|
||||
# Links
|
||||
result += f"🔗 <strong>arXiv ID:</strong> {paper['id']}<br/>\n"
|
||||
result += f"📄 <strong>PDF:</strong> <a href='{paper['pdf_url']}'>{paper['pdf_url']}</a><br/>\n"
|
||||
|
||||
# Abstract
|
||||
if include_abstract and paper['summary'] != "No abstract":
|
||||
abstract = paper['summary']
|
||||
if len(abstract) > 500:
|
||||
@@ -95,11 +96,34 @@ def _format_paper(paper: Dict, index: int, include_abstract: bool = True) -> str
|
||||
return result
|
||||
|
||||
|
||||
async def _search_arxiv(query: str, max_results: int = DEFAULT_RESULTS, id_list: List[str] = None) -> Optional[List[Dict]]:
|
||||
# ---------------------------------------------------------------------------
|
||||
# Persist last request timestamp for rate limiting
|
||||
# ---------------------------------------------------------------------------
|
||||
_last_request_time = 0.0
|
||||
|
||||
|
||||
async def _search_arxiv(query: str, max_results: int = DEFAULT_RESULTS,
|
||||
id_list: List[str] = None) -> Tuple[Optional[List[Dict]], Optional[str]]:
|
||||
"""
|
||||
Search arXiv API. Returns (papers, error_message).
|
||||
- papers: list of paper dicts, or None on failure.
|
||||
- error_message: None on success, otherwise a user-friendly error string.
|
||||
"""
|
||||
global _last_request_time
|
||||
|
||||
# ----- Throttle -----
|
||||
now = time.monotonic()
|
||||
wait = _last_request_time + MIN_REQUEST_INTERVAL - now
|
||||
if wait > 0:
|
||||
logging.debug(f"arXiv throttling: waiting {wait:.1f}s")
|
||||
await asyncio.sleep(wait)
|
||||
_last_request_time = time.monotonic()
|
||||
|
||||
base_url = "http://export.arxiv.org/api/query"
|
||||
headers = {"User-Agent": USER_AGENT}
|
||||
|
||||
if id_list:
|
||||
id_query = "+OR+".join([f"id:{pid}" for pid in id_list])
|
||||
id_query = "+OR+".join(f"id:{pid}" for pid in id_list)
|
||||
params = {"search_query": id_query, "max_results": max_results}
|
||||
else:
|
||||
params = {
|
||||
@@ -110,19 +134,29 @@ async def _search_arxiv(query: str, max_results: int = DEFAULT_RESULTS, id_list:
|
||||
}
|
||||
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with aiohttp.ClientSession(headers=headers) as session:
|
||||
async with session.get(base_url, params=params) as response:
|
||||
if response.status == 200:
|
||||
text = await response.text()
|
||||
return _parse_arxiv_response(text)
|
||||
return None
|
||||
papers = _parse_arxiv_response(text)
|
||||
logging.info(f"arXiv returned {len(papers)} papers for query: {query[:60]}")
|
||||
return papers, None
|
||||
elif response.status == 429:
|
||||
retry_after = response.headers.get("Retry-After", "unknown")
|
||||
logging.error(f"arXiv rate limited (429). Retry-After: {retry_after}")
|
||||
return None, "⚠️ arXiv rate limit exceeded. Please wait a moment and try again."
|
||||
else:
|
||||
text = await response.text()
|
||||
logging.error(f"arXiv API error {response.status}: {text[:300]}")
|
||||
return None, f"❌ arXiv API error (HTTP {response.status})."
|
||||
except Exception as e:
|
||||
logging.error(f"Error searching arXiv: {e}")
|
||||
return None
|
||||
return None, "❌ Network or internal error while contacting arXiv."
|
||||
|
||||
|
||||
async def _get_category_papers(category: str, limit: int = DEFAULT_RESULTS) -> Optional[List[Dict]]:
|
||||
return await _search_arxiv(f"cat:{category}", limit)
|
||||
papers, _ = await _search_arxiv(f"cat:{category}", limit)
|
||||
return papers
|
||||
|
||||
|
||||
async def _get_recent_papers(category: str = None, days: int = 7) -> Optional[List[Dict]]:
|
||||
@@ -131,14 +165,16 @@ async def _get_recent_papers(category: str = None, days: int = 7) -> Optional[Li
|
||||
query = f"cat:{category} AND submittedDate:[{date}000000 TO {datetime.now().strftime('%Y%m%d')}235959]"
|
||||
else:
|
||||
query = f"submittedDate:[{date}000000 TO {datetime.now().strftime('%Y%m%d')}235959]"
|
||||
return await _search_arxiv(query, DEFAULT_RESULTS)
|
||||
papers, _ = await _search_arxiv(query, DEFAULT_RESULTS)
|
||||
return papers
|
||||
|
||||
|
||||
async def _get_random_paper() -> Optional[Dict]:
|
||||
"""Returns a single random paper or None."""
|
||||
terms = ["machine learning", "quantum", "neural network", "optimization", "algorithm", "security"]
|
||||
query = random.choice(terms)
|
||||
results = await _search_arxiv(query, max_results=MAX_RESULTS)
|
||||
return random.choice(results) if results else None
|
||||
papers, _ = await _search_arxiv(query, max_results=MAX_RESULTS)
|
||||
return random.choice(papers) if papers else None
|
||||
|
||||
|
||||
def _parse_arxiv_response(xml_text: str) -> List[Dict]:
|
||||
@@ -188,7 +224,6 @@ def _parse_arxiv_response(xml_text: str) -> List[Dict]:
|
||||
'categories': categories,
|
||||
'published': pub_date
|
||||
})
|
||||
|
||||
return papers
|
||||
|
||||
|
||||
@@ -200,12 +235,12 @@ async def handle_command(room, message, bot, prefix, config):
|
||||
import simplematrixbotlib as botlib
|
||||
|
||||
match = botlib.MessageMatch(room, message, bot, prefix)
|
||||
|
||||
if not (match.is_not_from_this_bot() and match.prefix() and match.command("arxiv")):
|
||||
return
|
||||
|
||||
args = match.args()
|
||||
|
||||
# No arguments → show help
|
||||
if not args:
|
||||
help_content = (
|
||||
"<strong>Commands:</strong><br/><br/>"
|
||||
@@ -225,6 +260,7 @@ async def handle_command(room, message, bot, prefix, config):
|
||||
limit = DEFAULT_RESULTS
|
||||
include_abstract = True
|
||||
|
||||
# Extract optional numeric limit (first or last argument)
|
||||
if args and args[0].isdigit():
|
||||
limit = min(int(args[0]), MAX_RESULTS)
|
||||
args = args[1:]
|
||||
@@ -234,6 +270,7 @@ async def handle_command(room, message, bot, prefix, config):
|
||||
args = args[:-1]
|
||||
cmd = args[0].lower() if args else None
|
||||
|
||||
# ---- LIST ----
|
||||
if cmd == "list":
|
||||
include_abstract = False
|
||||
if len(args) >= 2:
|
||||
@@ -241,18 +278,29 @@ async def handle_command(room, message, bot, prefix, config):
|
||||
else:
|
||||
await bot.api.send_text_message(room.room_id, "Usage: !arxiv list <query>")
|
||||
return
|
||||
await bot.api.send_text_message(room.room_id, f"🔍 Listing: *{query[:50]}*...")
|
||||
papers, error_msg = await _search_arxiv(query, limit)
|
||||
if error_msg:
|
||||
await bot.api.send_text_message(room.room_id, error_msg)
|
||||
return
|
||||
title = f"Search: '{query[:50]}'"
|
||||
|
||||
# ---- CATEGORY ----
|
||||
elif cmd == "category" and len(args) >= 2:
|
||||
cat_key = args[1].lower()
|
||||
if cat_key in CATEGORIES:
|
||||
category = CATEGORIES[cat_key]
|
||||
await bot.api.send_text_message(room.room_id, f"📚 Fetching {cat_key.upper()} papers...")
|
||||
papers = await _get_category_papers(category, limit)
|
||||
title = f"Recent Papers in {cat_key.upper()}"
|
||||
else:
|
||||
await bot.api.send_text_message(room.room_id, f"Unknown category. Available: {', '.join(CATEGORIES.keys())}")
|
||||
if cat_key not in CATEGORIES:
|
||||
await bot.api.send_text_message(room.room_id,
|
||||
f"Unknown category. Available: {', '.join(CATEGORIES.keys())}")
|
||||
return
|
||||
category = CATEGORIES[cat_key]
|
||||
await bot.api.send_text_message(room.room_id, f"📚 Fetching {cat_key.upper()} papers...")
|
||||
papers, error_msg = await _search_arxiv(f"cat:{category}", limit)
|
||||
if error_msg:
|
||||
await bot.api.send_text_message(room.room_id, error_msg)
|
||||
return
|
||||
title = f"Recent Papers in {cat_key.upper()}"
|
||||
|
||||
# ---- RECENT ----
|
||||
elif cmd == "recent":
|
||||
category = None
|
||||
if len(args) >= 2 and args[1].lower() in CATEGORIES:
|
||||
@@ -262,8 +310,16 @@ async def handle_command(room, message, bot, prefix, config):
|
||||
else:
|
||||
await bot.api.send_text_message(room.room_id, "📚 Fetching recent papers...")
|
||||
title = "Recent Papers (Last 7 Days)"
|
||||
papers = await _get_recent_papers(category, limit)
|
||||
papers, error_msg = await _search_arxiv(
|
||||
f"cat:{category} AND submittedDate:[{(datetime.now() - timedelta(days=7)).strftime('%Y%m%d')}000000 TO {datetime.now().strftime('%Y%m%d')}235959]" if category
|
||||
else f"submittedDate:[{(datetime.now() - timedelta(days=7)).strftime('%Y%m%d')}000000 TO {datetime.now().strftime('%Y%m%d')}235959]",
|
||||
limit
|
||||
)
|
||||
if error_msg:
|
||||
await bot.api.send_text_message(room.room_id, error_msg)
|
||||
return
|
||||
|
||||
# ---- RANDOM ----
|
||||
elif cmd == "random":
|
||||
await bot.api.send_text_message(room.room_id, "🎲 Fetching random paper...")
|
||||
paper = await _get_random_paper()
|
||||
@@ -272,25 +328,33 @@ async def handle_command(room, message, bot, prefix, config):
|
||||
response = _format_collapsible("Random Paper", content, True)
|
||||
await bot.api.send_markdown_message(room.room_id, response)
|
||||
else:
|
||||
await bot.api.send_text_message(room.room_id, "❌ Failed to fetch random paper.")
|
||||
return
|
||||
await bot.api.send_text_message(room.room_id, "❌ Failed to fetch random paper (rate limit or API error).")
|
||||
return # early return – we already sent the result
|
||||
|
||||
# ---- ID LOOKUP ----
|
||||
elif cmd and (cmd[0].isdigit() or ('.' in cmd and len(cmd.split('.')) == 2)):
|
||||
paper_ids = [cmd] + [arg for arg in args[1:] if arg[0].isdigit() or ('.' in arg and len(arg.split('.')) == 2)]
|
||||
if paper_ids:
|
||||
await bot.api.send_text_message(room.room_id, f"📚 Fetching paper(s)...")
|
||||
papers = await _search_arxiv("", max_results=len(paper_ids), id_list=paper_ids)
|
||||
title = "Paper Details"
|
||||
else:
|
||||
if not paper_ids:
|
||||
await bot.api.send_text_message(room.room_id, "❌ Invalid arXiv ID.")
|
||||
return
|
||||
await bot.api.send_text_message(room.room_id, "📚 Fetching paper(s)...")
|
||||
papers, error_msg = await _search_arxiv("", max_results=len(paper_ids), id_list=paper_ids)
|
||||
if error_msg:
|
||||
await bot.api.send_text_message(room.room_id, error_msg)
|
||||
return
|
||||
title = "Paper Details"
|
||||
|
||||
# ---- DEFAULT SEARCH ----
|
||||
else:
|
||||
query = " ".join(args)
|
||||
await bot.api.send_text_message(room.room_id, f"🔍 Searching: *{query[:50]}*...")
|
||||
papers = await _search_arxiv(query, limit)
|
||||
papers, error_msg = await _search_arxiv(query, limit)
|
||||
if error_msg:
|
||||
await bot.api.send_text_message(room.room_id, error_msg)
|
||||
return
|
||||
title = f"Search: '{query[:50]}'"
|
||||
|
||||
# If we get here, papers is a list (possibly empty)
|
||||
if not papers:
|
||||
await bot.api.send_text_message(room.room_id, "❌ No papers found.")
|
||||
return
|
||||
@@ -302,7 +366,7 @@ async def handle_command(room, message, bot, prefix, config):
|
||||
|
||||
response = _format_collapsible(title, content, False)
|
||||
await bot.api.send_markdown_message(room.room_id, response)
|
||||
logging.info(f"Sent arXiv search results")
|
||||
logging.info("Sent arXiv search results")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -317,9 +381,9 @@ def setup(bot):
|
||||
# Plugin Metadata
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
__version__ = "1.0.0"
|
||||
__version__ = "1.0.2"
|
||||
__author__ = "Funguy Bot"
|
||||
__description__ = "arXiv academic paper search"
|
||||
__description__ = "arXiv academic paper search (with rate limiting and error reporting)"
|
||||
__help__ = """
|
||||
<details>
|
||||
<summary><strong>!arxiv</strong> – Search academic papers on arXiv</summary>
|
||||
|
||||
Reference in New Issue
Block a user