Fixed arxiv plugin to respect rate limits. Fixed lastfm np to search genre tags and song length

This commit is contained in:
2026-05-07 03:49:33 -05:00
parent dba205685b
commit 972f34a25a
2 changed files with 140 additions and 43 deletions
+107 -43
View File
@@ -13,11 +13,13 @@ Commands:
!arxiv <id> - Get paper by arXiv ID !arxiv <id> - Get paper by arXiv ID
""" """
import asyncio
import logging import logging
import time
import aiohttp import aiohttp
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
import random import random
from typing import Optional, Dict, List from typing import Optional, Dict, List, Tuple
from datetime import datetime, timedelta from datetime import datetime, timedelta
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@@ -27,6 +29,15 @@ from datetime import datetime, timedelta
DEFAULT_RESULTS = 3 DEFAULT_RESULTS = 3
MAX_RESULTS = 10 MAX_RESULTS = 10
# REQUIRED by arXiv API terms identify your bot.
# Use a descriptive string with contact info. A Firefox User-Agent is
# also accepted, but the bot-specific one is recommended.
# Example Firefox UA: "Mozilla/5.0 (X11; Linux x86_64; rv:140.0) Gecko/20100101 Firefox/140.0"
USER_AGENT = "FunguyBot/1.0 (mailto:your-email@example.com)"
# Minimum delay between successive API calls (arXiv asks for ≥3 seconds)
MIN_REQUEST_INTERVAL = 5
CATEGORIES = { CATEGORIES = {
"ai": "cs.AI", "ai": "cs.AI",
"ml": "cs.LG", "ml": "cs.LG",
@@ -42,13 +53,11 @@ CATEGORIES = {
"software": "cs.SE" "software": "cs.SE"
} }
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Helper Functions # Helpers
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def _format_collapsible(title: str, content: str, expanded: bool = False) -> str: def _format_collapsible(title: str, content: str, expanded: bool = False) -> str:
"""Format content in a collapsible details/summary block."""
open_attr = ' open' if expanded else '' open_attr = ' open' if expanded else ''
return f"<details{open_attr}>\n<summary>📚 {title}</summary>\n\n{content}\n\n</details>" return f"<details{open_attr}>\n<summary>📚 {title}</summary>\n\n{content}\n\n</details>"
@@ -64,27 +73,19 @@ def _oxford_comma(items):
def _format_paper(paper: Dict, index: int, include_abstract: bool = True) -> str: def _format_paper(paper: Dict, index: int, include_abstract: bool = True) -> str:
"""Format a paper as an HTML list item."""
result = f"<li>\n<strong>{index}. {paper['title']}</strong><br/>\n" result = f"<li>\n<strong>{index}. {paper['title']}</strong><br/>\n"
# Authors
result += f"👥 <strong>Authors:</strong> {_oxford_comma(paper['authors'][:3])}" result += f"👥 <strong>Authors:</strong> {_oxford_comma(paper['authors'][:3])}"
if len(paper['authors']) > 3: if len(paper['authors']) > 3:
result += f" and {len(paper['authors']) - 3} others" result += f" and {len(paper['authors']) - 3} others"
result += "<br/>\n" result += "<br/>\n"
# Metadata
result += f"📅 <strong>Published:</strong> {paper['published']}<br/>\n" result += f"📅 <strong>Published:</strong> {paper['published']}<br/>\n"
result += f"🏷️ <strong>Categories:</strong> {', '.join(paper['categories'][:3])}" result += f"🏷️ <strong>Categories:</strong> {', '.join(paper['categories'][:3])}"
if len(paper['categories']) > 3: if len(paper['categories']) > 3:
result += f" +{len(paper['categories']) - 3}" result += f" +{len(paper['categories']) - 3}"
result += "<br/>\n" result += "<br/>\n"
# Links
result += f"🔗 <strong>arXiv ID:</strong> {paper['id']}<br/>\n" result += f"🔗 <strong>arXiv ID:</strong> {paper['id']}<br/>\n"
result += f"📄 <strong>PDF:</strong> <a href='{paper['pdf_url']}'>{paper['pdf_url']}</a><br/>\n" result += f"📄 <strong>PDF:</strong> <a href='{paper['pdf_url']}'>{paper['pdf_url']}</a><br/>\n"
# Abstract
if include_abstract and paper['summary'] != "No abstract": if include_abstract and paper['summary'] != "No abstract":
abstract = paper['summary'] abstract = paper['summary']
if len(abstract) > 500: if len(abstract) > 500:
@@ -95,11 +96,34 @@ def _format_paper(paper: Dict, index: int, include_abstract: bool = True) -> str
return result return result
async def _search_arxiv(query: str, max_results: int = DEFAULT_RESULTS, id_list: List[str] = None) -> Optional[List[Dict]]: # ---------------------------------------------------------------------------
# Persist last request timestamp for rate limiting
# ---------------------------------------------------------------------------
_last_request_time = 0.0
async def _search_arxiv(query: str, max_results: int = DEFAULT_RESULTS,
id_list: List[str] = None) -> Tuple[Optional[List[Dict]], Optional[str]]:
"""
Search arXiv API. Returns (papers, error_message).
- papers: list of paper dicts, or None on failure.
- error_message: None on success, otherwise a user-friendly error string.
"""
global _last_request_time
# ----- Throttle -----
now = time.monotonic()
wait = _last_request_time + MIN_REQUEST_INTERVAL - now
if wait > 0:
logging.debug(f"arXiv throttling: waiting {wait:.1f}s")
await asyncio.sleep(wait)
_last_request_time = time.monotonic()
base_url = "http://export.arxiv.org/api/query" base_url = "http://export.arxiv.org/api/query"
headers = {"User-Agent": USER_AGENT}
if id_list: if id_list:
id_query = "+OR+".join([f"id:{pid}" for pid in id_list]) id_query = "+OR+".join(f"id:{pid}" for pid in id_list)
params = {"search_query": id_query, "max_results": max_results} params = {"search_query": id_query, "max_results": max_results}
else: else:
params = { params = {
@@ -110,19 +134,29 @@ async def _search_arxiv(query: str, max_results: int = DEFAULT_RESULTS, id_list:
} }
try: try:
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession(headers=headers) as session:
async with session.get(base_url, params=params) as response: async with session.get(base_url, params=params) as response:
if response.status == 200: if response.status == 200:
text = await response.text() text = await response.text()
return _parse_arxiv_response(text) papers = _parse_arxiv_response(text)
return None logging.info(f"arXiv returned {len(papers)} papers for query: {query[:60]}")
return papers, None
elif response.status == 429:
retry_after = response.headers.get("Retry-After", "unknown")
logging.error(f"arXiv rate limited (429). Retry-After: {retry_after}")
return None, "⚠️ arXiv rate limit exceeded. Please wait a moment and try again."
else:
text = await response.text()
logging.error(f"arXiv API error {response.status}: {text[:300]}")
return None, f"❌ arXiv API error (HTTP {response.status})."
except Exception as e: except Exception as e:
logging.error(f"Error searching arXiv: {e}") logging.error(f"Error searching arXiv: {e}")
return None return None, "❌ Network or internal error while contacting arXiv."
async def _get_category_papers(category: str, limit: int = DEFAULT_RESULTS) -> Optional[List[Dict]]: async def _get_category_papers(category: str, limit: int = DEFAULT_RESULTS) -> Optional[List[Dict]]:
return await _search_arxiv(f"cat:{category}", limit) papers, _ = await _search_arxiv(f"cat:{category}", limit)
return papers
async def _get_recent_papers(category: str = None, days: int = 7) -> Optional[List[Dict]]: async def _get_recent_papers(category: str = None, days: int = 7) -> Optional[List[Dict]]:
@@ -131,14 +165,16 @@ async def _get_recent_papers(category: str = None, days: int = 7) -> Optional[Li
query = f"cat:{category} AND submittedDate:[{date}000000 TO {datetime.now().strftime('%Y%m%d')}235959]" query = f"cat:{category} AND submittedDate:[{date}000000 TO {datetime.now().strftime('%Y%m%d')}235959]"
else: else:
query = f"submittedDate:[{date}000000 TO {datetime.now().strftime('%Y%m%d')}235959]" query = f"submittedDate:[{date}000000 TO {datetime.now().strftime('%Y%m%d')}235959]"
return await _search_arxiv(query, DEFAULT_RESULTS) papers, _ = await _search_arxiv(query, DEFAULT_RESULTS)
return papers
async def _get_random_paper() -> Optional[Dict]: async def _get_random_paper() -> Optional[Dict]:
"""Returns a single random paper or None."""
terms = ["machine learning", "quantum", "neural network", "optimization", "algorithm", "security"] terms = ["machine learning", "quantum", "neural network", "optimization", "algorithm", "security"]
query = random.choice(terms) query = random.choice(terms)
results = await _search_arxiv(query, max_results=MAX_RESULTS) papers, _ = await _search_arxiv(query, max_results=MAX_RESULTS)
return random.choice(results) if results else None return random.choice(papers) if papers else None
def _parse_arxiv_response(xml_text: str) -> List[Dict]: def _parse_arxiv_response(xml_text: str) -> List[Dict]:
@@ -188,7 +224,6 @@ def _parse_arxiv_response(xml_text: str) -> List[Dict]:
'categories': categories, 'categories': categories,
'published': pub_date 'published': pub_date
}) })
return papers return papers
@@ -200,12 +235,12 @@ async def handle_command(room, message, bot, prefix, config):
import simplematrixbotlib as botlib import simplematrixbotlib as botlib
match = botlib.MessageMatch(room, message, bot, prefix) match = botlib.MessageMatch(room, message, bot, prefix)
if not (match.is_not_from_this_bot() and match.prefix() and match.command("arxiv")): if not (match.is_not_from_this_bot() and match.prefix() and match.command("arxiv")):
return return
args = match.args() args = match.args()
# No arguments → show help
if not args: if not args:
help_content = ( help_content = (
"<strong>Commands:</strong><br/><br/>" "<strong>Commands:</strong><br/><br/>"
@@ -225,6 +260,7 @@ async def handle_command(room, message, bot, prefix, config):
limit = DEFAULT_RESULTS limit = DEFAULT_RESULTS
include_abstract = True include_abstract = True
# Extract optional numeric limit (first or last argument)
if args and args[0].isdigit(): if args and args[0].isdigit():
limit = min(int(args[0]), MAX_RESULTS) limit = min(int(args[0]), MAX_RESULTS)
args = args[1:] args = args[1:]
@@ -234,6 +270,7 @@ async def handle_command(room, message, bot, prefix, config):
args = args[:-1] args = args[:-1]
cmd = args[0].lower() if args else None cmd = args[0].lower() if args else None
# ---- LIST ----
if cmd == "list": if cmd == "list":
include_abstract = False include_abstract = False
if len(args) >= 2: if len(args) >= 2:
@@ -241,18 +278,29 @@ async def handle_command(room, message, bot, prefix, config):
else: else:
await bot.api.send_text_message(room.room_id, "Usage: !arxiv list <query>") await bot.api.send_text_message(room.room_id, "Usage: !arxiv list <query>")
return return
await bot.api.send_text_message(room.room_id, f"🔍 Listing: *{query[:50]}*...")
papers, error_msg = await _search_arxiv(query, limit)
if error_msg:
await bot.api.send_text_message(room.room_id, error_msg)
return
title = f"Search: '{query[:50]}'"
# ---- CATEGORY ----
elif cmd == "category" and len(args) >= 2: elif cmd == "category" and len(args) >= 2:
cat_key = args[1].lower() cat_key = args[1].lower()
if cat_key in CATEGORIES: if cat_key not in CATEGORIES:
category = CATEGORIES[cat_key] await bot.api.send_text_message(room.room_id,
await bot.api.send_text_message(room.room_id, f"📚 Fetching {cat_key.upper()} papers...") f"Unknown category. Available: {', '.join(CATEGORIES.keys())}")
papers = await _get_category_papers(category, limit)
title = f"Recent Papers in {cat_key.upper()}"
else:
await bot.api.send_text_message(room.room_id, f"Unknown category. Available: {', '.join(CATEGORIES.keys())}")
return return
category = CATEGORIES[cat_key]
await bot.api.send_text_message(room.room_id, f"📚 Fetching {cat_key.upper()} papers...")
papers, error_msg = await _search_arxiv(f"cat:{category}", limit)
if error_msg:
await bot.api.send_text_message(room.room_id, error_msg)
return
title = f"Recent Papers in {cat_key.upper()}"
# ---- RECENT ----
elif cmd == "recent": elif cmd == "recent":
category = None category = None
if len(args) >= 2 and args[1].lower() in CATEGORIES: if len(args) >= 2 and args[1].lower() in CATEGORIES:
@@ -262,8 +310,16 @@ async def handle_command(room, message, bot, prefix, config):
else: else:
await bot.api.send_text_message(room.room_id, "📚 Fetching recent papers...") await bot.api.send_text_message(room.room_id, "📚 Fetching recent papers...")
title = "Recent Papers (Last 7 Days)" title = "Recent Papers (Last 7 Days)"
papers = await _get_recent_papers(category, limit) papers, error_msg = await _search_arxiv(
f"cat:{category} AND submittedDate:[{(datetime.now() - timedelta(days=7)).strftime('%Y%m%d')}000000 TO {datetime.now().strftime('%Y%m%d')}235959]" if category
else f"submittedDate:[{(datetime.now() - timedelta(days=7)).strftime('%Y%m%d')}000000 TO {datetime.now().strftime('%Y%m%d')}235959]",
limit
)
if error_msg:
await bot.api.send_text_message(room.room_id, error_msg)
return
# ---- RANDOM ----
elif cmd == "random": elif cmd == "random":
await bot.api.send_text_message(room.room_id, "🎲 Fetching random paper...") await bot.api.send_text_message(room.room_id, "🎲 Fetching random paper...")
paper = await _get_random_paper() paper = await _get_random_paper()
@@ -272,25 +328,33 @@ async def handle_command(room, message, bot, prefix, config):
response = _format_collapsible("Random Paper", content, True) response = _format_collapsible("Random Paper", content, True)
await bot.api.send_markdown_message(room.room_id, response) await bot.api.send_markdown_message(room.room_id, response)
else: else:
await bot.api.send_text_message(room.room_id, "❌ Failed to fetch random paper.") await bot.api.send_text_message(room.room_id, "❌ Failed to fetch random paper (rate limit or API error).")
return return # early return we already sent the result
# ---- ID LOOKUP ----
elif cmd and (cmd[0].isdigit() or ('.' in cmd and len(cmd.split('.')) == 2)): elif cmd and (cmd[0].isdigit() or ('.' in cmd and len(cmd.split('.')) == 2)):
paper_ids = [cmd] + [arg for arg in args[1:] if arg[0].isdigit() or ('.' in arg and len(arg.split('.')) == 2)] paper_ids = [cmd] + [arg for arg in args[1:] if arg[0].isdigit() or ('.' in arg and len(arg.split('.')) == 2)]
if paper_ids: if not paper_ids:
await bot.api.send_text_message(room.room_id, f"📚 Fetching paper(s)...")
papers = await _search_arxiv("", max_results=len(paper_ids), id_list=paper_ids)
title = "Paper Details"
else:
await bot.api.send_text_message(room.room_id, "❌ Invalid arXiv ID.") await bot.api.send_text_message(room.room_id, "❌ Invalid arXiv ID.")
return return
await bot.api.send_text_message(room.room_id, "📚 Fetching paper(s)...")
papers, error_msg = await _search_arxiv("", max_results=len(paper_ids), id_list=paper_ids)
if error_msg:
await bot.api.send_text_message(room.room_id, error_msg)
return
title = "Paper Details"
# ---- DEFAULT SEARCH ----
else: else:
query = " ".join(args) query = " ".join(args)
await bot.api.send_text_message(room.room_id, f"🔍 Searching: *{query[:50]}*...") await bot.api.send_text_message(room.room_id, f"🔍 Searching: *{query[:50]}*...")
papers = await _search_arxiv(query, limit) papers, error_msg = await _search_arxiv(query, limit)
if error_msg:
await bot.api.send_text_message(room.room_id, error_msg)
return
title = f"Search: '{query[:50]}'" title = f"Search: '{query[:50]}'"
# If we get here, papers is a list (possibly empty)
if not papers: if not papers:
await bot.api.send_text_message(room.room_id, "❌ No papers found.") await bot.api.send_text_message(room.room_id, "❌ No papers found.")
return return
@@ -302,7 +366,7 @@ async def handle_command(room, message, bot, prefix, config):
response = _format_collapsible(title, content, False) response = _format_collapsible(title, content, False)
await bot.api.send_markdown_message(room.room_id, response) await bot.api.send_markdown_message(room.room_id, response)
logging.info(f"Sent arXiv search results") logging.info("Sent arXiv search results")
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@@ -317,9 +381,9 @@ def setup(bot):
# Plugin Metadata # Plugin Metadata
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
__version__ = "1.0.0" __version__ = "1.0.2"
__author__ = "Funguy Bot" __author__ = "Funguy Bot"
__description__ = "arXiv academic paper search" __description__ = "arXiv academic paper search (with rate limiting and error reporting)"
__help__ = """ __help__ = """
<details> <details>
<summary><strong>!arxiv</strong> Search academic papers on arXiv</summary> <summary><strong>!arxiv</strong> Search academic papers on arXiv</summary>
+33
View File
@@ -450,6 +450,39 @@ async def cmd_np(room, message, bot, args):
if youtube_link: if youtube_link:
message_text += f" | [YouTube]({youtube_link})" message_text += f" | [YouTube]({youtube_link})"
# ---- New: fetch track genres ----
# --- Fetch genres: try track-level first, fall back to artist ---
genre_tags = []
# 1) Try track top tags
track_tag_data = await call_lastfm_api("track.getTopTags", {"artist": artist, "track": name, "autocorrect": "1"})
if track_tag_data:
tags = track_tag_data.get("toptags", {}).get("tag", [])
genre_tags = [safe_text(t, "name") for t in tags if safe_text(t, "name")]
# 2) If empty, fall back to artist top tags
if not genre_tags:
artist_tag_data = await call_lastfm_api("artist.getTopTags", {"artist": artist, "autocorrect": "1"})
if artist_tag_data:
tags = artist_tag_data.get("toptags", {}).get("tag", [])
genre_tags = [safe_text(t, "name") for t in tags if safe_text(t, "name")]
# 3) Append to message if we got anything
if genre_tags:
genre_str = " | 🏷️ " + ", ".join(genre_tags[:3])
message_text += genre_str
# ---- Fetch track duration (new) ----
track_info = await call_lastfm_api("track.getInfo", {
"artist": artist,
"track": name,
"autocorrect": "1"
})
if track_info:
track_obj = track_info.get("track", {})
duration_ms = safe_int(track_obj, "duration")
if duration_ms > 0:
mins = duration_ms // 60000
secs = (duration_ms % 60000) // 1000
message_text += f" | ⏱️ {mins}:{secs:02d}"
await bot.api.send_markdown_message(room.room_id, message_text) await bot.api.send_markdown_message(room.room_id, message_text)
logging.info(f"Sent now playing for {lastfm_user}") logging.info(f"Sent now playing for {lastfm_user}")