""" Goodreads Quote Scraper – Playwright (headless Chromium) """ import logging import random import re import asyncio import simplematrixbotlib as botlib from bs4 import BeautifulSoup from plugins.common import html_escape, collapsible_summary GR_POPULAR = "https://www.goodreads.com/quotes" GR_SEARCH = "https://www.goodreads.com/quotes/search" QUOTES_PER_PAGE = 30 MAX_SEARCH_PAGES = 3 _browser = None _playwright = None async def _get_browser(): global _browser, _playwright if _browser is None: from playwright.async_api import async_playwright _playwright = await async_playwright().start() _browser = await _playwright.chromium.launch(headless=True) logging.info("Playwright browser started") return _browser def _extract_quotes(html: str) -> list: soup = BeautifulSoup(html, "lxml") quotes = [] for div in soup.find_all("div", class_="quoteText"): full_text = div.get_text(" ", strip=True) m = re.search(r"“(.+?)”", full_text) if not m: m = re.search(r"(.+?)\s*―", full_text) if not m: continue content = m.group(1).strip() author_span = div.find("span", class_="authorOrTitle") author = author_span.get_text(strip=True).rstrip(",") if author_span else "Unknown" quotes.append({"content": content, "author": author}) return quotes async def _scrape(url: str, params: dict = None) -> str: browser = await _get_browser() context = await browser.new_context(user_agent="Mozilla/5.0 ...") page = await context.new_page() try: if params: from urllib.parse import urlencode full_url = f"{url}?{urlencode(params)}" else: full_url = url await page.goto(full_url, wait_until="networkidle", timeout=15000) html = await page.content() return html except Exception as e: logging.error(f"Scrape error: {e}") return "" finally: await page.close() await context.close() async def get_random_popular() -> list: html = await _scrape(GR_POPULAR) return _extract_quotes(html) async def get_author_quotes(author: str) -> list: all_quotes = [] for page in range(1, MAX_SEARCH_PAGES + 1): html = await _scrape(GR_SEARCH, {"q": author, "commit": "Search", "page": page}) page_quotes = _extract_quotes(html) all_quotes.extend(page_quotes) if len(page_quotes) < QUOTES_PER_PAGE: break return all_quotes def format_quote(q): safe_content = html_escape(q["content"]) safe_author = html_escape(q["author"]) return f'"{safe_content}"\n\n— {safe_author}' async def handle_command(room, message, bot, prefix, config): match = botlib.MessageMatch(room, message, bot, prefix) if not (match.is_not_from_this_bot() and match.prefix() and match.command("quote")): return args = match.args() if args and args[0].lower() in ("help", "-h", "--help"): help_html = collapsible_summary("📖 !quote help", "
!quote – random popular quote!quote <author> – quote by author!quote random, !quote <author>.