""" Goodreads Quote Scraper – Playwright (headless Chromium) No external APIs, no keys; scrapes directly from goodreads.com """ import logging import random import re import asyncio import simplematrixbotlib as botlib from bs4 import BeautifulSoup from urllib.parse import urlencode logger = logging.getLogger("quote") GR_POPULAR = "https://www.goodreads.com/quotes" GR_SEARCH = "https://www.goodreads.com/quotes/search" QUOTES_PER_PAGE = 30 MAX_SEARCH_PAGES = 3 # --------------------------------------------------------------------------- # Playwright browser (shared, launched once) # --------------------------------------------------------------------------- _browser = None _playwright = None async def _get_browser(): global _browser, _playwright if _browser is None: from playwright.async_api import async_playwright _playwright = await async_playwright().start() _browser = await _playwright.chromium.launch(headless=True) logger.info("Playwright browser started") return _browser async def _close_browser(): global _browser, _playwright if _browser: await _browser.close() _browser = None if _playwright: await _playwright.stop() _playwright = None # --------------------------------------------------------------------------- # HTML parsing (Goodreads specific) # --------------------------------------------------------------------------- def _extract_quotes(html: str) -> list[dict]: """Parse Goodreads HTML and return a list of {content, author} dicts.""" soup = BeautifulSoup(html, "lxml") quotes = [] for div in soup.find_all("div", class_="quoteText"): full_text = div.get_text(" ", strip=True) # Try curly quotes m = re.search(r"“(.+?)”", full_text) if not m: m = re.search(r"(.+?)\s*―", full_text) if not m: continue content = m.group(1).strip() author_span = div.find("span", class_="authorOrTitle") author = author_span.get_text(strip=True).rstrip(",") if author_span else "Unknown" quotes.append({"content": content, "author": author}) # Alternative layout (if first method yielded nothing) for div in soup.find_all("div", class_="quoteDetails"): text_elem = div.find("div", class_="quoteText") author_elem = div.find("span", class_="authorOrTitle") if text_elem: content = text_elem.get_text(strip=True).strip("“”") else: continue author = author_elem.get_text(strip=True).rstrip(",") if author_elem else "Unknown" quotes.append({"content": content, "author": author}) return quotes # --------------------------------------------------------------------------- # Page fetching # --------------------------------------------------------------------------- async def _scrape(url: str, params: dict = None) -> str: browser = await _get_browser() context = await browser.new_context( user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36" ) page = await context.new_page() try: if params: full_url = f"{url}?{urlencode(params)}" else: full_url = url await page.goto(full_url, wait_until="networkidle", timeout=15000) html = await page.content() return html except Exception as e: logger.error(f"Failed to load {full_url}: {e}") return "" finally: await page.close() await context.close() async def get_random_popular() -> list[dict]: html = await _scrape(GR_POPULAR) return _extract_quotes(html) async def get_author_quotes(author: str) -> list[dict]: all_quotes = [] for page in range(1, MAX_SEARCH_PAGES + 1): html = await _scrape(GR_SEARCH, {"q": author, "commit": "Search", "page": page}) page_quotes = _extract_quotes(html) all_quotes.extend(page_quotes) if len(page_quotes) < QUOTES_PER_PAGE: break return all_quotes # --------------------------------------------------------------------------- # Formatting # --------------------------------------------------------------------------- def format_quote(q: dict) -> str: return f'"{q["content"]}"\n\n— {q["author"]}' # --------------------------------------------------------------------------- # Command handler # --------------------------------------------------------------------------- async def handle_command(room, message, bot, prefix, config): match = botlib.MessageMatch(room, message, bot, prefix) if not (match.is_not_from_this_bot() and match.prefix() and match.command("quote")): return args = match.args() # Help if args and args[0].lower() in ("help", "-h", "--help"): help_html = ( "
📖 !quote help" "" "

Examples:
!quote
" "!quote Terence McKenna
" "!quote Oscar Wilde

" "

Scraped with Playwright (headless browser).

" "
" ) await bot.api.send_markdown_message(room.room_id, help_html) return try: if args: author = " ".join(args).strip() await bot.api.send_text_message( room.room_id, f"🔍 Searching Goodreads for quotes by **{author}**…" ) quotes = await get_author_quotes(author) if not quotes: await bot.api.send_text_message( room.room_id, f"❌ No quotes found for '**{author}**'. Try a different spelling." ) return chosen = random.choice(quotes) else: await bot.api.send_text_message(room.room_id, "✨ Fetching a random popular quote…") quotes = await get_random_popular() if not quotes: await bot.api.send_text_message(room.room_id, "❌ Could not fetch any quotes.") return chosen = random.choice(quotes) await bot.api.send_markdown_message(room.room_id, format_quote(chosen)) logger.info(f"Quote sent: {chosen['author']}") except Exception as e: logger.exception("Unexpected error in quote plugin") await bot.api.send_text_message( room.room_id, f"❌ Scraping error: {e}" ) # --------------------------------------------------------------------------- # Plugin metadata # --------------------------------------------------------------------------- __version__ = "1.0.1" __author__ = "Funguy Bot" __description__ = "Goodreads quotes via Playwright (headless browser)" __help__ = """
!quote – Quotes from Goodreads (scraped with Playwright)

No API keys, no JSON files – just a real browser fetching from Goodreads.

"""