Fixed arxiv plugin to respect rate limits. Fixed lastfm np to search genre tags and song length

This commit is contained in:
2026-05-07 03:49:33 -05:00
parent dba205685b
commit 972f34a25a
2 changed files with 140 additions and 43 deletions
+105 -41
View File
@@ -13,11 +13,13 @@ Commands:
!arxiv <id> - Get paper by arXiv ID
"""
import asyncio
import logging
import time
import aiohttp
import xml.etree.ElementTree as ET
import random
from typing import Optional, Dict, List
from typing import Optional, Dict, List, Tuple
from datetime import datetime, timedelta
# ---------------------------------------------------------------------------
@@ -27,6 +29,15 @@ from datetime import datetime, timedelta
DEFAULT_RESULTS = 3
MAX_RESULTS = 10
# REQUIRED by arXiv API terms identify your bot.
# Use a descriptive string with contact info. A Firefox User-Agent is
# also accepted, but the bot-specific one is recommended.
# Example Firefox UA: "Mozilla/5.0 (X11; Linux x86_64; rv:140.0) Gecko/20100101 Firefox/140.0"
USER_AGENT = "FunguyBot/1.0 (mailto:your-email@example.com)"
# Minimum delay between successive API calls (arXiv asks for ≥3 seconds)
MIN_REQUEST_INTERVAL = 5
CATEGORIES = {
"ai": "cs.AI",
"ml": "cs.LG",
@@ -42,13 +53,11 @@ CATEGORIES = {
"software": "cs.SE"
}
# ---------------------------------------------------------------------------
# Helper Functions
# Helpers
# ---------------------------------------------------------------------------
def _format_collapsible(title: str, content: str, expanded: bool = False) -> str:
"""Format content in a collapsible details/summary block."""
open_attr = ' open' if expanded else ''
return f"<details{open_attr}>\n<summary>📚 {title}</summary>\n\n{content}\n\n</details>"
@@ -64,27 +73,19 @@ def _oxford_comma(items):
def _format_paper(paper: Dict, index: int, include_abstract: bool = True) -> str:
"""Format a paper as an HTML list item."""
result = f"<li>\n<strong>{index}. {paper['title']}</strong><br/>\n"
# Authors
result += f"👥 <strong>Authors:</strong> {_oxford_comma(paper['authors'][:3])}"
if len(paper['authors']) > 3:
result += f" and {len(paper['authors']) - 3} others"
result += "<br/>\n"
# Metadata
result += f"📅 <strong>Published:</strong> {paper['published']}<br/>\n"
result += f"🏷️ <strong>Categories:</strong> {', '.join(paper['categories'][:3])}"
if len(paper['categories']) > 3:
result += f" +{len(paper['categories']) - 3}"
result += "<br/>\n"
# Links
result += f"🔗 <strong>arXiv ID:</strong> {paper['id']}<br/>\n"
result += f"📄 <strong>PDF:</strong> <a href='{paper['pdf_url']}'>{paper['pdf_url']}</a><br/>\n"
# Abstract
if include_abstract and paper['summary'] != "No abstract":
abstract = paper['summary']
if len(abstract) > 500:
@@ -95,11 +96,34 @@ def _format_paper(paper: Dict, index: int, include_abstract: bool = True) -> str
return result
async def _search_arxiv(query: str, max_results: int = DEFAULT_RESULTS, id_list: List[str] = None) -> Optional[List[Dict]]:
# ---------------------------------------------------------------------------
# Persist last request timestamp for rate limiting
# ---------------------------------------------------------------------------
_last_request_time = 0.0
async def _search_arxiv(query: str, max_results: int = DEFAULT_RESULTS,
id_list: List[str] = None) -> Tuple[Optional[List[Dict]], Optional[str]]:
"""
Search arXiv API. Returns (papers, error_message).
- papers: list of paper dicts, or None on failure.
- error_message: None on success, otherwise a user-friendly error string.
"""
global _last_request_time
# ----- Throttle -----
now = time.monotonic()
wait = _last_request_time + MIN_REQUEST_INTERVAL - now
if wait > 0:
logging.debug(f"arXiv throttling: waiting {wait:.1f}s")
await asyncio.sleep(wait)
_last_request_time = time.monotonic()
base_url = "http://export.arxiv.org/api/query"
headers = {"User-Agent": USER_AGENT}
if id_list:
id_query = "+OR+".join([f"id:{pid}" for pid in id_list])
id_query = "+OR+".join(f"id:{pid}" for pid in id_list)
params = {"search_query": id_query, "max_results": max_results}
else:
params = {
@@ -110,19 +134,29 @@ async def _search_arxiv(query: str, max_results: int = DEFAULT_RESULTS, id_list:
}
try:
async with aiohttp.ClientSession() as session:
async with aiohttp.ClientSession(headers=headers) as session:
async with session.get(base_url, params=params) as response:
if response.status == 200:
text = await response.text()
return _parse_arxiv_response(text)
return None
papers = _parse_arxiv_response(text)
logging.info(f"arXiv returned {len(papers)} papers for query: {query[:60]}")
return papers, None
elif response.status == 429:
retry_after = response.headers.get("Retry-After", "unknown")
logging.error(f"arXiv rate limited (429). Retry-After: {retry_after}")
return None, "⚠️ arXiv rate limit exceeded. Please wait a moment and try again."
else:
text = await response.text()
logging.error(f"arXiv API error {response.status}: {text[:300]}")
return None, f"❌ arXiv API error (HTTP {response.status})."
except Exception as e:
logging.error(f"Error searching arXiv: {e}")
return None
return None, "❌ Network or internal error while contacting arXiv."
async def _get_category_papers(category: str, limit: int = DEFAULT_RESULTS) -> Optional[List[Dict]]:
return await _search_arxiv(f"cat:{category}", limit)
papers, _ = await _search_arxiv(f"cat:{category}", limit)
return papers
async def _get_recent_papers(category: str = None, days: int = 7) -> Optional[List[Dict]]:
@@ -131,14 +165,16 @@ async def _get_recent_papers(category: str = None, days: int = 7) -> Optional[Li
query = f"cat:{category} AND submittedDate:[{date}000000 TO {datetime.now().strftime('%Y%m%d')}235959]"
else:
query = f"submittedDate:[{date}000000 TO {datetime.now().strftime('%Y%m%d')}235959]"
return await _search_arxiv(query, DEFAULT_RESULTS)
papers, _ = await _search_arxiv(query, DEFAULT_RESULTS)
return papers
async def _get_random_paper() -> Optional[Dict]:
"""Returns a single random paper or None."""
terms = ["machine learning", "quantum", "neural network", "optimization", "algorithm", "security"]
query = random.choice(terms)
results = await _search_arxiv(query, max_results=MAX_RESULTS)
return random.choice(results) if results else None
papers, _ = await _search_arxiv(query, max_results=MAX_RESULTS)
return random.choice(papers) if papers else None
def _parse_arxiv_response(xml_text: str) -> List[Dict]:
@@ -188,7 +224,6 @@ def _parse_arxiv_response(xml_text: str) -> List[Dict]:
'categories': categories,
'published': pub_date
})
return papers
@@ -200,12 +235,12 @@ async def handle_command(room, message, bot, prefix, config):
import simplematrixbotlib as botlib
match = botlib.MessageMatch(room, message, bot, prefix)
if not (match.is_not_from_this_bot() and match.prefix() and match.command("arxiv")):
return
args = match.args()
# No arguments → show help
if not args:
help_content = (
"<strong>Commands:</strong><br/><br/>"
@@ -225,6 +260,7 @@ async def handle_command(room, message, bot, prefix, config):
limit = DEFAULT_RESULTS
include_abstract = True
# Extract optional numeric limit (first or last argument)
if args and args[0].isdigit():
limit = min(int(args[0]), MAX_RESULTS)
args = args[1:]
@@ -234,6 +270,7 @@ async def handle_command(room, message, bot, prefix, config):
args = args[:-1]
cmd = args[0].lower() if args else None
# ---- LIST ----
if cmd == "list":
include_abstract = False
if len(args) >= 2:
@@ -241,18 +278,29 @@ async def handle_command(room, message, bot, prefix, config):
else:
await bot.api.send_text_message(room.room_id, "Usage: !arxiv list <query>")
return
await bot.api.send_text_message(room.room_id, f"🔍 Listing: *{query[:50]}*...")
papers, error_msg = await _search_arxiv(query, limit)
if error_msg:
await bot.api.send_text_message(room.room_id, error_msg)
return
title = f"Search: '{query[:50]}'"
# ---- CATEGORY ----
elif cmd == "category" and len(args) >= 2:
cat_key = args[1].lower()
if cat_key in CATEGORIES:
if cat_key not in CATEGORIES:
await bot.api.send_text_message(room.room_id,
f"Unknown category. Available: {', '.join(CATEGORIES.keys())}")
return
category = CATEGORIES[cat_key]
await bot.api.send_text_message(room.room_id, f"📚 Fetching {cat_key.upper()} papers...")
papers = await _get_category_papers(category, limit)
title = f"Recent Papers in {cat_key.upper()}"
else:
await bot.api.send_text_message(room.room_id, f"Unknown category. Available: {', '.join(CATEGORIES.keys())}")
papers, error_msg = await _search_arxiv(f"cat:{category}", limit)
if error_msg:
await bot.api.send_text_message(room.room_id, error_msg)
return
title = f"Recent Papers in {cat_key.upper()}"
# ---- RECENT ----
elif cmd == "recent":
category = None
if len(args) >= 2 and args[1].lower() in CATEGORIES:
@@ -262,8 +310,16 @@ async def handle_command(room, message, bot, prefix, config):
else:
await bot.api.send_text_message(room.room_id, "📚 Fetching recent papers...")
title = "Recent Papers (Last 7 Days)"
papers = await _get_recent_papers(category, limit)
papers, error_msg = await _search_arxiv(
f"cat:{category} AND submittedDate:[{(datetime.now() - timedelta(days=7)).strftime('%Y%m%d')}000000 TO {datetime.now().strftime('%Y%m%d')}235959]" if category
else f"submittedDate:[{(datetime.now() - timedelta(days=7)).strftime('%Y%m%d')}000000 TO {datetime.now().strftime('%Y%m%d')}235959]",
limit
)
if error_msg:
await bot.api.send_text_message(room.room_id, error_msg)
return
# ---- RANDOM ----
elif cmd == "random":
await bot.api.send_text_message(room.room_id, "🎲 Fetching random paper...")
paper = await _get_random_paper()
@@ -272,25 +328,33 @@ async def handle_command(room, message, bot, prefix, config):
response = _format_collapsible("Random Paper", content, True)
await bot.api.send_markdown_message(room.room_id, response)
else:
await bot.api.send_text_message(room.room_id, "❌ Failed to fetch random paper.")
return
await bot.api.send_text_message(room.room_id, "❌ Failed to fetch random paper (rate limit or API error).")
return # early return we already sent the result
# ---- ID LOOKUP ----
elif cmd and (cmd[0].isdigit() or ('.' in cmd and len(cmd.split('.')) == 2)):
paper_ids = [cmd] + [arg for arg in args[1:] if arg[0].isdigit() or ('.' in arg and len(arg.split('.')) == 2)]
if paper_ids:
await bot.api.send_text_message(room.room_id, f"📚 Fetching paper(s)...")
papers = await _search_arxiv("", max_results=len(paper_ids), id_list=paper_ids)
title = "Paper Details"
else:
if not paper_ids:
await bot.api.send_text_message(room.room_id, "❌ Invalid arXiv ID.")
return
await bot.api.send_text_message(room.room_id, "📚 Fetching paper(s)...")
papers, error_msg = await _search_arxiv("", max_results=len(paper_ids), id_list=paper_ids)
if error_msg:
await bot.api.send_text_message(room.room_id, error_msg)
return
title = "Paper Details"
# ---- DEFAULT SEARCH ----
else:
query = " ".join(args)
await bot.api.send_text_message(room.room_id, f"🔍 Searching: *{query[:50]}*...")
papers = await _search_arxiv(query, limit)
papers, error_msg = await _search_arxiv(query, limit)
if error_msg:
await bot.api.send_text_message(room.room_id, error_msg)
return
title = f"Search: '{query[:50]}'"
# If we get here, papers is a list (possibly empty)
if not papers:
await bot.api.send_text_message(room.room_id, "❌ No papers found.")
return
@@ -302,7 +366,7 @@ async def handle_command(room, message, bot, prefix, config):
response = _format_collapsible(title, content, False)
await bot.api.send_markdown_message(room.room_id, response)
logging.info(f"Sent arXiv search results")
logging.info("Sent arXiv search results")
# ---------------------------------------------------------------------------
@@ -317,9 +381,9 @@ def setup(bot):
# Plugin Metadata
# ---------------------------------------------------------------------------
__version__ = "1.0.0"
__version__ = "1.0.2"
__author__ = "Funguy Bot"
__description__ = "arXiv academic paper search"
__description__ = "arXiv academic paper search (with rate limiting and error reporting)"
__help__ = """
<details>
<summary><strong>!arxiv</strong> Search academic papers on arXiv</summary>
+33
View File
@@ -450,6 +450,39 @@ async def cmd_np(room, message, bot, args):
if youtube_link:
message_text += f" | [YouTube]({youtube_link})"
# ---- New: fetch track genres ----
# --- Fetch genres: try track-level first, fall back to artist ---
genre_tags = []
# 1) Try track top tags
track_tag_data = await call_lastfm_api("track.getTopTags", {"artist": artist, "track": name, "autocorrect": "1"})
if track_tag_data:
tags = track_tag_data.get("toptags", {}).get("tag", [])
genre_tags = [safe_text(t, "name") for t in tags if safe_text(t, "name")]
# 2) If empty, fall back to artist top tags
if not genre_tags:
artist_tag_data = await call_lastfm_api("artist.getTopTags", {"artist": artist, "autocorrect": "1"})
if artist_tag_data:
tags = artist_tag_data.get("toptags", {}).get("tag", [])
genre_tags = [safe_text(t, "name") for t in tags if safe_text(t, "name")]
# 3) Append to message if we got anything
if genre_tags:
genre_str = " | 🏷️ " + ", ".join(genre_tags[:3])
message_text += genre_str
# ---- Fetch track duration (new) ----
track_info = await call_lastfm_api("track.getInfo", {
"artist": artist,
"track": name,
"autocorrect": "1"
})
if track_info:
track_obj = track_info.get("track", {})
duration_ms = safe_int(track_obj, "duration")
if duration_ms > 0:
mins = duration_ms // 60000
secs = (duration_ms % 60000) // 1000
message_text += f" | ⏱️ {mins}:{secs:02d}"
await bot.api.send_markdown_message(room.room_id, message_text)
logging.info(f"Sent now playing for {lastfm_user}")