quote plugin added. Fixed funguy.py and plugins.py for plugin list.

2026-05-07 22:43:21 -05:00
parent c263b2c40e
commit a4f3725354
4 changed files with 236 additions and 72 deletions
@@ -0,0 +1,199 @@
+"""
+Goodreads Quote Scraper – Playwright (headless Chromium)
+No external APIs, no keys; scrapes directly from goodreads.com
+"""
+
+import logging
+import random
+import re
+import asyncio
+import simplematrixbotlib as botlib
+from bs4 import BeautifulSoup
+from urllib.parse import urlencode
+
+logger = logging.getLogger("quote")
+
+GR_POPULAR = "https://www.goodreads.com/quotes"
+GR_SEARCH  = "https://www.goodreads.com/quotes/search"
+QUOTES_PER_PAGE = 30
+MAX_SEARCH_PAGES = 3
+
+# ---------------------------------------------------------------------------
+# Playwright browser (shared, launched once)
+# ---------------------------------------------------------------------------
+_browser = None
+_playwright = None
+
+async def _get_browser():
+    global _browser, _playwright
+    if _browser is None:
+        from playwright.async_api import async_playwright
+        _playwright = await async_playwright().start()
+        _browser = await _playwright.chromium.launch(headless=True)
+        logger.info("Playwright browser started")
+    return _browser
+
+async def _close_browser():
+    global _browser, _playwright
+    if _browser:
+        await _browser.close()
+        _browser = None
+    if _playwright:
+        await _playwright.stop()
+        _playwright = None
+
+# ---------------------------------------------------------------------------
+# HTML parsing (Goodreads specific)
+# ---------------------------------------------------------------------------
+def _extract_quotes(html: str) -> list[dict]:
+    """Parse Goodreads HTML and return a list of {content, author} dicts."""
+    soup = BeautifulSoup(html, "lxml")
+    quotes = []
+
+    for div in soup.find_all("div", class_="quoteText"):
+        full_text = div.get_text(" ", strip=True)
+        # Try curly quotes
+        m = re.search(r"“(.+?)”", full_text)
+        if not m:
+            m = re.search(r"(.+?)\s*―", full_text)
+        if not m:
+            continue
+        content = m.group(1).strip()
+
+        author_span = div.find("span", class_="authorOrTitle")
+        author = author_span.get_text(strip=True).rstrip(",") if author_span else "Unknown"
+        quotes.append({"content": content, "author": author})
+
+    # Alternative layout (if first method yielded nothing)
+    for div in soup.find_all("div", class_="quoteDetails"):
+        text_elem = div.find("div", class_="quoteText")
+        author_elem = div.find("span", class_="authorOrTitle")
+        if text_elem:
+            content = text_elem.get_text(strip=True).strip("“”")
+        else:
+            continue
+        author = author_elem.get_text(strip=True).rstrip(",") if author_elem else "Unknown"
+        quotes.append({"content": content, "author": author})
+
+    return quotes
+
+# ---------------------------------------------------------------------------
+# Page fetching
+# ---------------------------------------------------------------------------
+async def _scrape(url: str, params: dict = None) -> str:
+    browser = await _get_browser()
+    context = await browser.new_context(
+        user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"
+    )
+    page = await context.new_page()
+    try:
+        if params:
+            full_url = f"{url}?{urlencode(params)}"
+        else:
+            full_url = url
+        await page.goto(full_url, wait_until="networkidle", timeout=15000)
+        html = await page.content()
+        return html
+    except Exception as e:
+        logger.error(f"Failed to load {full_url}: {e}")
+        return ""
+    finally:
+        await page.close()
+        await context.close()
+
+async def get_random_popular() -> list[dict]:
+    html = await _scrape(GR_POPULAR)
+    return _extract_quotes(html)
+
+async def get_author_quotes(author: str) -> list[dict]:
+    all_quotes = []
+    for page in range(1, MAX_SEARCH_PAGES + 1):
+        html = await _scrape(GR_SEARCH, {"q": author, "commit": "Search", "page": page})
+        page_quotes = _extract_quotes(html)
+        all_quotes.extend(page_quotes)
+        if len(page_quotes) < QUOTES_PER_PAGE:
+            break
+    return all_quotes
+
+# ---------------------------------------------------------------------------
+# Formatting
+# ---------------------------------------------------------------------------
+def format_quote(q: dict) -> str:
+    return f'"{q["content"]}"\n\n— {q["author"]}'
+
+# ---------------------------------------------------------------------------
+# Command handler
+# ---------------------------------------------------------------------------
+async def handle_command(room, message, bot, prefix, config):
+    match = botlib.MessageMatch(room, message, bot, prefix)
+    if not (match.is_not_from_this_bot() and match.prefix() and match.command("quote")):
+        return
+
+    args = match.args()
+
+    # Help
+    if args and args[0].lower() in ("help", "-h", "--help"):
+        help_html = (
+            "<details><summary><strong>📖 !quote help</strong></summary>"
+            "<ul>"
+            "<li><code>!quote</code> – random popular quote from Goodreads</li>"
+            "<li><code>!quote &lt;author&gt;</code> – random quote by that author</li>"
+            "<li><code>!quote help</code> – this</li>"
+            "</ul>"
+            "<p><b>Examples:</b><br><code>!quote</code><br>"
+            "<code>!quote Terence McKenna</code><br>"
+            "<code>!quote Oscar Wilde</code></p>"
+            "<p>Scraped with Playwright (headless browser).</p>"
+            "</details>"
+        )
+        await bot.api.send_markdown_message(room.room_id, help_html)
+        return
+
+    try:
+        if args:
+            author = " ".join(args).strip()
+            await bot.api.send_text_message(
+                room.room_id, f"🔍 Searching Goodreads for quotes by **{author}**…"
+            )
+            quotes = await get_author_quotes(author)
+            if not quotes:
+                await bot.api.send_text_message(
+                    room.room_id,
+                    f"❌ No quotes found for '**{author}**'. Try a different spelling."
+                )
+                return
+            chosen = random.choice(quotes)
+        else:
+            await bot.api.send_text_message(room.room_id, "✨ Fetching a random popular quote…")
+            quotes = await get_random_popular()
+            if not quotes:
+                await bot.api.send_text_message(room.room_id, "❌ Could not fetch any quotes.")
+                return
+            chosen = random.choice(quotes)
+
+        await bot.api.send_markdown_message(room.room_id, format_quote(chosen))
+        logger.info(f"Quote sent: {chosen['author']}")
+
+    except Exception as e:
+        logger.exception("Unexpected error in quote plugin")
+        await bot.api.send_text_message(
+            room.room_id, f"❌ Scraping error: {e}"
+        )
+
+# ---------------------------------------------------------------------------
+# Plugin metadata
+# ---------------------------------------------------------------------------
+__version__ = "1.0.1"
+__author__ = "Funguy Bot"
+__description__ = "Goodreads quotes via Playwright (headless browser)"
+__help__ = """
+<details>
+<summary><strong>!quote</strong> – Quotes from Goodreads (scraped with Playwright)</summary>
+<ul>
+<li><code>!quote</code> – random popular quote</li>
+<li><code>!quote &lt;author&gt;</code> – random quote by that author</li>
+<li><code>!quote help</code></li>
+</ul>
+<p>No API keys, no JSON files – just a real browser fetching from Goodreads.</p>
+</details>
+"""