""" Goodreads Quote Scraper – Playwright (headless Chromium) """ import logging import random import re import asyncio import simplematrixbotlib as botlib from bs4 import BeautifulSoup from plugins.common import html_escape, collapsible_summary GR_POPULAR = "https://www.goodreads.com/quotes" GR_SEARCH = "https://www.goodreads.com/quotes/search" QUOTES_PER_PAGE = 30 MAX_SEARCH_PAGES = 3 _browser = None _playwright = None async def _get_browser(): global _browser, _playwright if _browser is None: from playwright.async_api import async_playwright _playwright = await async_playwright().start() _browser = await _playwright.chromium.launch(headless=True) logging.info("Playwright browser started") return _browser def _extract_quotes(html: str) -> list: soup = BeautifulSoup(html, "lxml") quotes = [] for div in soup.find_all("div", class_="quoteText"): full_text = div.get_text(" ", strip=True) m = re.search(r"“(.+?)”", full_text) if not m: m = re.search(r"(.+?)\s*―", full_text) if not m: continue content = m.group(1).strip() author_span = div.find("span", class_="authorOrTitle") author = author_span.get_text(strip=True).rstrip(",") if author_span else "Unknown" quotes.append({"content": content, "author": author}) return quotes async def _scrape(url: str, params: dict = None) -> str: browser = await _get_browser() context = await browser.new_context(user_agent="Mozilla/5.0 ...") page = await context.new_page() try: if params: from urllib.parse import urlencode full_url = f"{url}?{urlencode(params)}" else: full_url = url await page.goto(full_url, wait_until="networkidle", timeout=15000) html = await page.content() return html except Exception as e: logging.error(f"Scrape error: {e}") return "" finally: await page.close() await context.close() async def get_random_popular() -> list: html = await _scrape(GR_POPULAR) return _extract_quotes(html) async def get_author_quotes(author: str) -> list: all_quotes = [] for page in range(1, MAX_SEARCH_PAGES + 1): html = await _scrape(GR_SEARCH, {"q": author, "commit": "Search", "page": page}) page_quotes = _extract_quotes(html) all_quotes.extend(page_quotes) if len(page_quotes) < QUOTES_PER_PAGE: break return all_quotes def format_quote(q): safe_content = html_escape(q["content"]) safe_author = html_escape(q["author"]) return f'"{safe_content}"\n\n— {safe_author}' async def handle_command(room, message, bot, prefix, config): match = botlib.MessageMatch(room, message, bot, prefix) if not (match.is_not_from_this_bot() and match.prefix() and match.command("quote")): return args = match.args() if args and args[0].lower() in ("help", "-h", "--help"): help_html = collapsible_summary("📖 !quote help", "") await bot.api.send_markdown_message(room.room_id, help_html) return try: if args: author = " ".join(args).strip() safe_author = html_escape(author) await bot.api.send_text_message(room.room_id, f"🔍 Searching Goodreads for quotes by **{safe_author}**…") quotes = await get_author_quotes(author) if not quotes: await bot.api.send_text_message(room.room_id, f"❌ No quotes found for '{safe_author}'.") return chosen = random.choice(quotes) else: await bot.api.send_text_message(room.room_id, "✨ Fetching a random popular quote…") quotes = await get_random_popular() if not quotes: await bot.api.send_text_message(room.room_id, "❌ Could not fetch any quotes.") return chosen = random.choice(quotes) await bot.api.send_markdown_message(room.room_id, format_quote(chosen)) logging.info(f"Quote sent: {chosen['author']}") except Exception as e: logging.exception("Unexpected error in quote plugin") await bot.api.send_text_message(room.room_id, f"❌ Scraping error: {e}") __version__ = "1.0.2" __author__ = "Funguy Bot" __description__ = "Fetch Goodreads quotes" __help__ = """
!quote – Quotes from Goodreads

!quote random, !quote <author>.

"""