From 972f34a25a31924cbf050705bcd0058abe460525 Mon Sep 17 00:00:00 2001 From: Hash Borgir Date: Thu, 7 May 2026 03:49:33 -0500 Subject: [PATCH] Fixed arxiv plugin to respect rate limits. Fixed lastfm np to search genre tags and song length --- plugins/arxiv.py | 150 +++++++++++++++++++++++++++++++++------------- plugins/lastfm.py | 33 ++++++++++ 2 files changed, 140 insertions(+), 43 deletions(-) diff --git a/plugins/arxiv.py b/plugins/arxiv.py index 120e2ce..0bb9788 100644 --- a/plugins/arxiv.py +++ b/plugins/arxiv.py @@ -13,11 +13,13 @@ Commands: !arxiv - Get paper by arXiv ID """ +import asyncio import logging +import time import aiohttp import xml.etree.ElementTree as ET import random -from typing import Optional, Dict, List +from typing import Optional, Dict, List, Tuple from datetime import datetime, timedelta # --------------------------------------------------------------------------- @@ -27,6 +29,15 @@ from datetime import datetime, timedelta DEFAULT_RESULTS = 3 MAX_RESULTS = 10 +# REQUIRED by arXiv API terms – identify your bot. +# Use a descriptive string with contact info. A Firefox User-Agent is +# also accepted, but the bot-specific one is recommended. +# Example Firefox UA: "Mozilla/5.0 (X11; Linux x86_64; rv:140.0) Gecko/20100101 Firefox/140.0" +USER_AGENT = "FunguyBot/1.0 (mailto:your-email@example.com)" + +# Minimum delay between successive API calls (arXiv asks for ≥3 seconds) +MIN_REQUEST_INTERVAL = 5 + CATEGORIES = { "ai": "cs.AI", "ml": "cs.LG", @@ -42,13 +53,11 @@ CATEGORIES = { "software": "cs.SE" } - # --------------------------------------------------------------------------- -# Helper Functions +# Helpers # --------------------------------------------------------------------------- def _format_collapsible(title: str, content: str, expanded: bool = False) -> str: - """Format content in a collapsible details/summary block.""" open_attr = ' open' if expanded else '' return f"\n📚 {title}\n\n{content}\n\n" @@ -64,27 +73,19 @@ def _oxford_comma(items): def _format_paper(paper: Dict, index: int, include_abstract: bool = True) -> str: - """Format a paper as an HTML list item.""" result = f"
  • \n{index}. {paper['title']}
    \n" - - # Authors result += f"👥 Authors: {_oxford_comma(paper['authors'][:3])}" if len(paper['authors']) > 3: result += f" and {len(paper['authors']) - 3} others" result += "
    \n" - - # Metadata result += f"📅 Published: {paper['published']}
    \n" result += f"🏷️ Categories: {', '.join(paper['categories'][:3])}" if len(paper['categories']) > 3: result += f" +{len(paper['categories']) - 3}" result += "
    \n" - - # Links result += f"🔗 arXiv ID: {paper['id']}
    \n" result += f"📄 PDF: {paper['pdf_url']}
    \n" - # Abstract if include_abstract and paper['summary'] != "No abstract": abstract = paper['summary'] if len(abstract) > 500: @@ -95,11 +96,34 @@ def _format_paper(paper: Dict, index: int, include_abstract: bool = True) -> str return result -async def _search_arxiv(query: str, max_results: int = DEFAULT_RESULTS, id_list: List[str] = None) -> Optional[List[Dict]]: +# --------------------------------------------------------------------------- +# Persist last request timestamp for rate limiting +# --------------------------------------------------------------------------- +_last_request_time = 0.0 + + +async def _search_arxiv(query: str, max_results: int = DEFAULT_RESULTS, + id_list: List[str] = None) -> Tuple[Optional[List[Dict]], Optional[str]]: + """ + Search arXiv API. Returns (papers, error_message). + - papers: list of paper dicts, or None on failure. + - error_message: None on success, otherwise a user-friendly error string. + """ + global _last_request_time + + # ----- Throttle ----- + now = time.monotonic() + wait = _last_request_time + MIN_REQUEST_INTERVAL - now + if wait > 0: + logging.debug(f"arXiv throttling: waiting {wait:.1f}s") + await asyncio.sleep(wait) + _last_request_time = time.monotonic() + base_url = "http://export.arxiv.org/api/query" + headers = {"User-Agent": USER_AGENT} if id_list: - id_query = "+OR+".join([f"id:{pid}" for pid in id_list]) + id_query = "+OR+".join(f"id:{pid}" for pid in id_list) params = {"search_query": id_query, "max_results": max_results} else: params = { @@ -110,19 +134,29 @@ async def _search_arxiv(query: str, max_results: int = DEFAULT_RESULTS, id_list: } try: - async with aiohttp.ClientSession() as session: + async with aiohttp.ClientSession(headers=headers) as session: async with session.get(base_url, params=params) as response: if response.status == 200: text = await response.text() - return _parse_arxiv_response(text) - return None + papers = _parse_arxiv_response(text) + logging.info(f"arXiv returned {len(papers)} papers for query: {query[:60]}") + return papers, None + elif response.status == 429: + retry_after = response.headers.get("Retry-After", "unknown") + logging.error(f"arXiv rate limited (429). Retry-After: {retry_after}") + return None, "⚠️ arXiv rate limit exceeded. Please wait a moment and try again." + else: + text = await response.text() + logging.error(f"arXiv API error {response.status}: {text[:300]}") + return None, f"❌ arXiv API error (HTTP {response.status})." except Exception as e: logging.error(f"Error searching arXiv: {e}") - return None + return None, "❌ Network or internal error while contacting arXiv." async def _get_category_papers(category: str, limit: int = DEFAULT_RESULTS) -> Optional[List[Dict]]: - return await _search_arxiv(f"cat:{category}", limit) + papers, _ = await _search_arxiv(f"cat:{category}", limit) + return papers async def _get_recent_papers(category: str = None, days: int = 7) -> Optional[List[Dict]]: @@ -131,14 +165,16 @@ async def _get_recent_papers(category: str = None, days: int = 7) -> Optional[Li query = f"cat:{category} AND submittedDate:[{date}000000 TO {datetime.now().strftime('%Y%m%d')}235959]" else: query = f"submittedDate:[{date}000000 TO {datetime.now().strftime('%Y%m%d')}235959]" - return await _search_arxiv(query, DEFAULT_RESULTS) + papers, _ = await _search_arxiv(query, DEFAULT_RESULTS) + return papers async def _get_random_paper() -> Optional[Dict]: + """Returns a single random paper or None.""" terms = ["machine learning", "quantum", "neural network", "optimization", "algorithm", "security"] query = random.choice(terms) - results = await _search_arxiv(query, max_results=MAX_RESULTS) - return random.choice(results) if results else None + papers, _ = await _search_arxiv(query, max_results=MAX_RESULTS) + return random.choice(papers) if papers else None def _parse_arxiv_response(xml_text: str) -> List[Dict]: @@ -188,7 +224,6 @@ def _parse_arxiv_response(xml_text: str) -> List[Dict]: 'categories': categories, 'published': pub_date }) - return papers @@ -200,12 +235,12 @@ async def handle_command(room, message, bot, prefix, config): import simplematrixbotlib as botlib match = botlib.MessageMatch(room, message, bot, prefix) - if not (match.is_not_from_this_bot() and match.prefix() and match.command("arxiv")): return args = match.args() + # No arguments → show help if not args: help_content = ( "Commands:

    " @@ -225,6 +260,7 @@ async def handle_command(room, message, bot, prefix, config): limit = DEFAULT_RESULTS include_abstract = True + # Extract optional numeric limit (first or last argument) if args and args[0].isdigit(): limit = min(int(args[0]), MAX_RESULTS) args = args[1:] @@ -234,6 +270,7 @@ async def handle_command(room, message, bot, prefix, config): args = args[:-1] cmd = args[0].lower() if args else None + # ---- LIST ---- if cmd == "list": include_abstract = False if len(args) >= 2: @@ -241,18 +278,29 @@ async def handle_command(room, message, bot, prefix, config): else: await bot.api.send_text_message(room.room_id, "Usage: !arxiv list ") return + await bot.api.send_text_message(room.room_id, f"🔍 Listing: *{query[:50]}*...") + papers, error_msg = await _search_arxiv(query, limit) + if error_msg: + await bot.api.send_text_message(room.room_id, error_msg) + return + title = f"Search: '{query[:50]}'" + # ---- CATEGORY ---- elif cmd == "category" and len(args) >= 2: cat_key = args[1].lower() - if cat_key in CATEGORIES: - category = CATEGORIES[cat_key] - await bot.api.send_text_message(room.room_id, f"📚 Fetching {cat_key.upper()} papers...") - papers = await _get_category_papers(category, limit) - title = f"Recent Papers in {cat_key.upper()}" - else: - await bot.api.send_text_message(room.room_id, f"Unknown category. Available: {', '.join(CATEGORIES.keys())}") + if cat_key not in CATEGORIES: + await bot.api.send_text_message(room.room_id, + f"Unknown category. Available: {', '.join(CATEGORIES.keys())}") return + category = CATEGORIES[cat_key] + await bot.api.send_text_message(room.room_id, f"📚 Fetching {cat_key.upper()} papers...") + papers, error_msg = await _search_arxiv(f"cat:{category}", limit) + if error_msg: + await bot.api.send_text_message(room.room_id, error_msg) + return + title = f"Recent Papers in {cat_key.upper()}" + # ---- RECENT ---- elif cmd == "recent": category = None if len(args) >= 2 and args[1].lower() in CATEGORIES: @@ -262,8 +310,16 @@ async def handle_command(room, message, bot, prefix, config): else: await bot.api.send_text_message(room.room_id, "📚 Fetching recent papers...") title = "Recent Papers (Last 7 Days)" - papers = await _get_recent_papers(category, limit) + papers, error_msg = await _search_arxiv( + f"cat:{category} AND submittedDate:[{(datetime.now() - timedelta(days=7)).strftime('%Y%m%d')}000000 TO {datetime.now().strftime('%Y%m%d')}235959]" if category + else f"submittedDate:[{(datetime.now() - timedelta(days=7)).strftime('%Y%m%d')}000000 TO {datetime.now().strftime('%Y%m%d')}235959]", + limit + ) + if error_msg: + await bot.api.send_text_message(room.room_id, error_msg) + return + # ---- RANDOM ---- elif cmd == "random": await bot.api.send_text_message(room.room_id, "🎲 Fetching random paper...") paper = await _get_random_paper() @@ -272,25 +328,33 @@ async def handle_command(room, message, bot, prefix, config): response = _format_collapsible("Random Paper", content, True) await bot.api.send_markdown_message(room.room_id, response) else: - await bot.api.send_text_message(room.room_id, "❌ Failed to fetch random paper.") - return + await bot.api.send_text_message(room.room_id, "❌ Failed to fetch random paper (rate limit or API error).") + return # early return – we already sent the result + # ---- ID LOOKUP ---- elif cmd and (cmd[0].isdigit() or ('.' in cmd and len(cmd.split('.')) == 2)): paper_ids = [cmd] + [arg for arg in args[1:] if arg[0].isdigit() or ('.' in arg and len(arg.split('.')) == 2)] - if paper_ids: - await bot.api.send_text_message(room.room_id, f"📚 Fetching paper(s)...") - papers = await _search_arxiv("", max_results=len(paper_ids), id_list=paper_ids) - title = "Paper Details" - else: + if not paper_ids: await bot.api.send_text_message(room.room_id, "❌ Invalid arXiv ID.") return + await bot.api.send_text_message(room.room_id, "📚 Fetching paper(s)...") + papers, error_msg = await _search_arxiv("", max_results=len(paper_ids), id_list=paper_ids) + if error_msg: + await bot.api.send_text_message(room.room_id, error_msg) + return + title = "Paper Details" + # ---- DEFAULT SEARCH ---- else: query = " ".join(args) await bot.api.send_text_message(room.room_id, f"🔍 Searching: *{query[:50]}*...") - papers = await _search_arxiv(query, limit) + papers, error_msg = await _search_arxiv(query, limit) + if error_msg: + await bot.api.send_text_message(room.room_id, error_msg) + return title = f"Search: '{query[:50]}'" + # If we get here, papers is a list (possibly empty) if not papers: await bot.api.send_text_message(room.room_id, "❌ No papers found.") return @@ -302,7 +366,7 @@ async def handle_command(room, message, bot, prefix, config): response = _format_collapsible(title, content, False) await bot.api.send_markdown_message(room.room_id, response) - logging.info(f"Sent arXiv search results") + logging.info("Sent arXiv search results") # --------------------------------------------------------------------------- @@ -317,9 +381,9 @@ def setup(bot): # Plugin Metadata # --------------------------------------------------------------------------- -__version__ = "1.0.0" +__version__ = "1.0.2" __author__ = "Funguy Bot" -__description__ = "arXiv academic paper search" +__description__ = "arXiv academic paper search (with rate limiting and error reporting)" __help__ = """
    !arxiv – Search academic papers on arXiv diff --git a/plugins/lastfm.py b/plugins/lastfm.py index 1e61ca6..90d2f3c 100644 --- a/plugins/lastfm.py +++ b/plugins/lastfm.py @@ -450,6 +450,39 @@ async def cmd_np(room, message, bot, args): if youtube_link: message_text += f" | [YouTube]({youtube_link})" + # ---- New: fetch track genres ---- + # --- Fetch genres: try track-level first, fall back to artist --- + genre_tags = [] + # 1) Try track top tags + track_tag_data = await call_lastfm_api("track.getTopTags", {"artist": artist, "track": name, "autocorrect": "1"}) + if track_tag_data: + tags = track_tag_data.get("toptags", {}).get("tag", []) + genre_tags = [safe_text(t, "name") for t in tags if safe_text(t, "name")] + # 2) If empty, fall back to artist top tags + if not genre_tags: + artist_tag_data = await call_lastfm_api("artist.getTopTags", {"artist": artist, "autocorrect": "1"}) + if artist_tag_data: + tags = artist_tag_data.get("toptags", {}).get("tag", []) + genre_tags = [safe_text(t, "name") for t in tags if safe_text(t, "name")] + # 3) Append to message if we got anything + if genre_tags: + genre_str = " | 🏷️ " + ", ".join(genre_tags[:3]) + message_text += genre_str + + # ---- Fetch track duration (new) ---- + track_info = await call_lastfm_api("track.getInfo", { + "artist": artist, + "track": name, + "autocorrect": "1" + }) + if track_info: + track_obj = track_info.get("track", {}) + duration_ms = safe_int(track_obj, "duration") + if duration_ms > 0: + mins = duration_ms // 60000 + secs = (duration_ms % 60000) // 1000 + message_text += f" | ⏱️ {mins}:{secs:02d}" + await bot.api.send_markdown_message(room.room_id, message_text) logging.info(f"Sent now playing for {lastfm_user}")