Fixed arxiv plugin to respect rate limits. Fixed lastfm np to search genre tags and song length

2026-05-07 03:49:33 -05:00
parent dba205685b
commit 972f34a25a
2 changed files with 140 additions and 43 deletions
@@ -13,11 +13,13 @@ Commands:
  !arxiv <id>                 - Get paper by arXiv ID
 """

+import asyncio
 import logging
+import time
 import aiohttp
 import xml.etree.ElementTree as ET
 import random
-from typing import Optional, Dict, List
+from typing import Optional, Dict, List, Tuple
 from datetime import datetime, timedelta

 # ---------------------------------------------------------------------------
@@ -27,6 +29,15 @@ from datetime import datetime, timedelta
 DEFAULT_RESULTS = 3
 MAX_RESULTS = 10

+# REQUIRED by arXiv API terms – identify your bot.
+# Use a descriptive string with contact info. A Firefox User-Agent is
+# also accepted, but the bot-specific one is recommended.
+# Example Firefox UA: "Mozilla/5.0 (X11; Linux x86_64; rv:140.0) Gecko/20100101 Firefox/140.0"
+USER_AGENT = "FunguyBot/1.0 (mailto:your-email@example.com)"
+
+# Minimum delay between successive API calls (arXiv asks for ≥3 seconds)
+MIN_REQUEST_INTERVAL = 5
+
 CATEGORIES = {
    "ai": "cs.AI",
    "ml": "cs.LG",
@@ -42,13 +53,11 @@ CATEGORIES = {
    "software": "cs.SE"
 }

-
 # ---------------------------------------------------------------------------
-# Helper Functions
+# Helpers
 # ---------------------------------------------------------------------------

 def _format_collapsible(title: str, content: str, expanded: bool = False) -> str:
-    """Format content in a collapsible details/summary block."""
    open_attr = ' open' if expanded else ''
    return f"<details{open_attr}>\n<summary>📚 {title}</summary>\n\n{content}\n\n</details>"

@@ -64,27 +73,19 @@ def _oxford_comma(items):


 def _format_paper(paper: Dict, index: int, include_abstract: bool = True) -> str:
-    """Format a paper as an HTML list item."""
    result = f"<li>\n<strong>{index}. {paper['title']}</strong><br/>\n"
-
-    # Authors
    result += f"👥 <strong>Authors:</strong> {_oxford_comma(paper['authors'][:3])}"
    if len(paper['authors']) > 3:
        result += f" and {len(paper['authors']) - 3} others"
    result += "<br/>\n"
-
-    # Metadata
    result += f"📅 <strong>Published:</strong> {paper['published']}<br/>\n"
    result += f"🏷️ <strong>Categories:</strong> {', '.join(paper['categories'][:3])}"
    if len(paper['categories']) > 3:
        result += f" +{len(paper['categories']) - 3}"
    result += "<br/>\n"
-
-    # Links
    result += f"🔗 <strong>arXiv ID:</strong> {paper['id']}<br/>\n"
    result += f"📄 <strong>PDF:</strong> <a href='{paper['pdf_url']}'>{paper['pdf_url']}</a><br/>\n"

-    # Abstract
    if include_abstract and paper['summary'] != "No abstract":
        abstract = paper['summary']
        if len(abstract) > 500:
@@ -95,11 +96,34 @@ def _format_paper(paper: Dict, index: int, include_abstract: bool = True) -> str
    return result


-async def _search_arxiv(query: str, max_results: int = DEFAULT_RESULTS, id_list: List[str] = None) -> Optional[List[Dict]]:
+# ---------------------------------------------------------------------------
+# Persist last request timestamp for rate limiting
+# ---------------------------------------------------------------------------
+_last_request_time = 0.0
+
+
+async def _search_arxiv(query: str, max_results: int = DEFAULT_RESULTS,
+                        id_list: List[str] = None) -> Tuple[Optional[List[Dict]], Optional[str]]:
+    """
+    Search arXiv API. Returns (papers, error_message).
+    - papers: list of paper dicts, or None on failure.
+    - error_message: None on success, otherwise a user-friendly error string.
+    """
+    global _last_request_time
+
+    # ----- Throttle -----
+    now = time.monotonic()
+    wait = _last_request_time + MIN_REQUEST_INTERVAL - now
+    if wait > 0:
+        logging.debug(f"arXiv throttling: waiting {wait:.1f}s")
+        await asyncio.sleep(wait)
+    _last_request_time = time.monotonic()
+
    base_url = "http://export.arxiv.org/api/query"
+    headers = {"User-Agent": USER_AGENT}

    if id_list:
-        id_query = "+OR+".join([f"id:{pid}" for pid in id_list])
+        id_query = "+OR+".join(f"id:{pid}" for pid in id_list)
        params = {"search_query": id_query, "max_results": max_results}
    else:
        params = {
@@ -110,19 +134,29 @@ async def _search_arxiv(query: str, max_results: int = DEFAULT_RESULTS, id_list:
        }

    try:
-        async with aiohttp.ClientSession() as session:
+        async with aiohttp.ClientSession(headers=headers) as session:
            async with session.get(base_url, params=params) as response:
                if response.status == 200:
                    text = await response.text()
-                    return _parse_arxiv_response(text)
-                return None
+                    papers = _parse_arxiv_response(text)
+                    logging.info(f"arXiv returned {len(papers)} papers for query: {query[:60]}")
+                    return papers, None
+                elif response.status == 429:
+                    retry_after = response.headers.get("Retry-After", "unknown")
+                    logging.error(f"arXiv rate limited (429). Retry-After: {retry_after}")
+                    return None, "⚠️ arXiv rate limit exceeded. Please wait a moment and try again."
+                else:
+                    text = await response.text()
+                    logging.error(f"arXiv API error {response.status}: {text[:300]}")
+                    return None, f"❌ arXiv API error (HTTP {response.status})."
    except Exception as e:
        logging.error(f"Error searching arXiv: {e}")
-        return None
+        return None, "❌ Network or internal error while contacting arXiv."


 async def _get_category_papers(category: str, limit: int = DEFAULT_RESULTS) -> Optional[List[Dict]]:
-    return await _search_arxiv(f"cat:{category}", limit)
+    papers, _ = await _search_arxiv(f"cat:{category}", limit)
+    return papers


 async def _get_recent_papers(category: str = None, days: int = 7) -> Optional[List[Dict]]:
@@ -131,14 +165,16 @@ async def _get_recent_papers(category: str = None, days: int = 7) -> Optional[Li
        query = f"cat:{category} AND submittedDate:[{date}000000 TO {datetime.now().strftime('%Y%m%d')}235959]"
    else:
        query = f"submittedDate:[{date}000000 TO {datetime.now().strftime('%Y%m%d')}235959]"
-    return await _search_arxiv(query, DEFAULT_RESULTS)
+    papers, _ = await _search_arxiv(query, DEFAULT_RESULTS)
+    return papers


 async def _get_random_paper() -> Optional[Dict]:
+    """Returns a single random paper or None."""
    terms = ["machine learning", "quantum", "neural network", "optimization", "algorithm", "security"]
    query = random.choice(terms)
-    results = await _search_arxiv(query, max_results=MAX_RESULTS)
-    return random.choice(results) if results else None
+    papers, _ = await _search_arxiv(query, max_results=MAX_RESULTS)
+    return random.choice(papers) if papers else None


 def _parse_arxiv_response(xml_text: str) -> List[Dict]:
@@ -188,7 +224,6 @@ def _parse_arxiv_response(xml_text: str) -> List[Dict]:
            'categories': categories,
            'published': pub_date
        })
-
    return papers


@@ -200,12 +235,12 @@ async def handle_command(room, message, bot, prefix, config):
    import simplematrixbotlib as botlib

    match = botlib.MessageMatch(room, message, bot, prefix)
-
    if not (match.is_not_from_this_bot() and match.prefix() and match.command("arxiv")):
        return

    args = match.args()

+    # No arguments → show help
    if not args:
        help_content = (
            "<strong>Commands:</strong><br/><br/>"
@@ -225,6 +260,7 @@ async def handle_command(room, message, bot, prefix, config):
    limit = DEFAULT_RESULTS
    include_abstract = True

+    # Extract optional numeric limit (first or last argument)
    if args and args[0].isdigit():
        limit = min(int(args[0]), MAX_RESULTS)
        args = args[1:]
@@ -234,6 +270,7 @@ async def handle_command(room, message, bot, prefix, config):
        args = args[:-1]
        cmd = args[0].lower() if args else None

+    # ---- LIST ----
    if cmd == "list":
        include_abstract = False
        if len(args) >= 2:
@@ -241,18 +278,29 @@ async def handle_command(room, message, bot, prefix, config):
        else:
            await bot.api.send_text_message(room.room_id, "Usage: !arxiv list <query>")
            return
+        await bot.api.send_text_message(room.room_id, f"🔍 Listing: *{query[:50]}*...")
+        papers, error_msg = await _search_arxiv(query, limit)
+        if error_msg:
+            await bot.api.send_text_message(room.room_id, error_msg)
+            return
+        title = f"Search: '{query[:50]}'"

+    # ---- CATEGORY ----
    elif cmd == "category" and len(args) >= 2:
        cat_key = args[1].lower()
-        if cat_key in CATEGORIES:
+        if cat_key not in CATEGORIES:
+            await bot.api.send_text_message(room.room_id,
+                                            f"Unknown category. Available: {', '.join(CATEGORIES.keys())}")
+            return
        category = CATEGORIES[cat_key]
        await bot.api.send_text_message(room.room_id, f"📚 Fetching {cat_key.upper()} papers...")
-            papers = await _get_category_papers(category, limit)
-            title = f"Recent Papers in {cat_key.upper()}"
-        else:
-            await bot.api.send_text_message(room.room_id, f"Unknown category. Available: {', '.join(CATEGORIES.keys())}")
+        papers, error_msg = await _search_arxiv(f"cat:{category}", limit)
+        if error_msg:
+            await bot.api.send_text_message(room.room_id, error_msg)
            return
+        title = f"Recent Papers in {cat_key.upper()}"

+    # ---- RECENT ----
    elif cmd == "recent":
        category = None
        if len(args) >= 2 and args[1].lower() in CATEGORIES:
@@ -262,8 +310,16 @@ async def handle_command(room, message, bot, prefix, config):
        else:
            await bot.api.send_text_message(room.room_id, "📚 Fetching recent papers...")
            title = "Recent Papers (Last 7 Days)"
-        papers = await _get_recent_papers(category, limit)
+        papers, error_msg = await _search_arxiv(
+            f"cat:{category} AND submittedDate:[{(datetime.now() - timedelta(days=7)).strftime('%Y%m%d')}000000 TO {datetime.now().strftime('%Y%m%d')}235959]" if category
+            else f"submittedDate:[{(datetime.now() - timedelta(days=7)).strftime('%Y%m%d')}000000 TO {datetime.now().strftime('%Y%m%d')}235959]",
+            limit
+        )
+        if error_msg:
+            await bot.api.send_text_message(room.room_id, error_msg)
+            return

+    # ---- RANDOM ----
    elif cmd == "random":
        await bot.api.send_text_message(room.room_id, "🎲 Fetching random paper...")
        paper = await _get_random_paper()
@@ -272,25 +328,33 @@ async def handle_command(room, message, bot, prefix, config):
            response = _format_collapsible("Random Paper", content, True)
            await bot.api.send_markdown_message(room.room_id, response)
        else:
-            await bot.api.send_text_message(room.room_id, "❌ Failed to fetch random paper.")
-        return
+            await bot.api.send_text_message(room.room_id, "❌ Failed to fetch random paper (rate limit or API error).")
+        return   # early return – we already sent the result

+    # ---- ID LOOKUP ----
    elif cmd and (cmd[0].isdigit() or ('.' in cmd and len(cmd.split('.')) == 2)):
        paper_ids = [cmd] + [arg for arg in args[1:] if arg[0].isdigit() or ('.' in arg and len(arg.split('.')) == 2)]
-        if paper_ids:
-            await bot.api.send_text_message(room.room_id, f"📚 Fetching paper(s)...")
-            papers = await _search_arxiv("", max_results=len(paper_ids), id_list=paper_ids)
-            title = "Paper Details"
-        else:
+        if not paper_ids:
            await bot.api.send_text_message(room.room_id, "❌ Invalid arXiv ID.")
            return
+        await bot.api.send_text_message(room.room_id, "📚 Fetching paper(s)...")
+        papers, error_msg = await _search_arxiv("", max_results=len(paper_ids), id_list=paper_ids)
+        if error_msg:
+            await bot.api.send_text_message(room.room_id, error_msg)
+            return
+        title = "Paper Details"

+    # ---- DEFAULT SEARCH ----
    else:
        query = " ".join(args)
        await bot.api.send_text_message(room.room_id, f"🔍 Searching: *{query[:50]}*...")
-        papers = await _search_arxiv(query, limit)
+        papers, error_msg = await _search_arxiv(query, limit)
+        if error_msg:
+            await bot.api.send_text_message(room.room_id, error_msg)
+            return
        title = f"Search: '{query[:50]}'"

+    # If we get here, papers is a list (possibly empty)
    if not papers:
        await bot.api.send_text_message(room.room_id, "❌ No papers found.")
        return
@@ -302,7 +366,7 @@ async def handle_command(room, message, bot, prefix, config):

    response = _format_collapsible(title, content, False)
    await bot.api.send_markdown_message(room.room_id, response)
-    logging.info(f"Sent arXiv search results")
+    logging.info("Sent arXiv search results")


 # ---------------------------------------------------------------------------
@@ -317,9 +381,9 @@ def setup(bot):
 # Plugin Metadata
 # ---------------------------------------------------------------------------

-__version__ = "1.0.0"
+__version__ = "1.0.2"
 __author__ = "Funguy Bot"
-__description__ = "arXiv academic paper search"
+__description__ = "arXiv academic paper search (with rate limiting and error reporting)"
 __help__ = """
 <details>
 <summary><strong>!arxiv</strong> – Search academic papers on arXiv</summary>
@@ -450,6 +450,39 @@ async def cmd_np(room, message, bot, args):
    if youtube_link:
        message_text += f" | [YouTube]({youtube_link})"

+    # ---- New: fetch track genres ----
+    # --- Fetch genres: try track-level first, fall back to artist ---
+    genre_tags = []
+    # 1) Try track top tags
+    track_tag_data = await call_lastfm_api("track.getTopTags", {"artist": artist, "track": name, "autocorrect": "1"})
+    if track_tag_data:
+        tags = track_tag_data.get("toptags", {}).get("tag", [])
+        genre_tags = [safe_text(t, "name") for t in tags if safe_text(t, "name")]
+    # 2) If empty, fall back to artist top tags
+    if not genre_tags:
+        artist_tag_data = await call_lastfm_api("artist.getTopTags", {"artist": artist, "autocorrect": "1"})
+        if artist_tag_data:
+            tags = artist_tag_data.get("toptags", {}).get("tag", [])
+            genre_tags = [safe_text(t, "name") for t in tags if safe_text(t, "name")]
+    # 3) Append to message if we got anything
+    if genre_tags:
+        genre_str = " | 🏷️ " + ", ".join(genre_tags[:3])
+        message_text += genre_str
+
+    # ---- Fetch track duration (new) ----
+    track_info = await call_lastfm_api("track.getInfo", {
+        "artist": artist,
+        "track": name,
+        "autocorrect": "1"
+    })
+    if track_info:
+        track_obj = track_info.get("track", {})
+        duration_ms = safe_int(track_obj, "duration")
+        if duration_ms > 0:
+            mins = duration_ms // 60000
+            secs = (duration_ms % 60000) // 1000
+            message_text += f" | ⏱️ {mins}:{secs:02d}"
+
    await bot.api.send_markdown_message(room.room_id, message_text)
    logging.info(f"Sent now playing for {lastfm_user}")