126 lines
4.6 KiB
Python
126 lines
4.6 KiB
Python
"""
|
||
Goodreads Quote Scraper – Playwright (headless Chromium)
|
||
"""
|
||
import logging
|
||
import random
|
||
import re
|
||
import asyncio
|
||
import simplematrixbotlib as botlib
|
||
from bs4 import BeautifulSoup
|
||
from plugins.common import html_escape, collapsible_summary
|
||
|
||
GR_POPULAR = "https://www.goodreads.com/quotes"
|
||
GR_SEARCH = "https://www.goodreads.com/quotes/search"
|
||
QUOTES_PER_PAGE = 30
|
||
MAX_SEARCH_PAGES = 3
|
||
|
||
_browser = None
|
||
_playwright = None
|
||
|
||
async def _get_browser():
|
||
global _browser, _playwright
|
||
if _browser is None:
|
||
from playwright.async_api import async_playwright
|
||
_playwright = await async_playwright().start()
|
||
_browser = await _playwright.chromium.launch(headless=True)
|
||
logging.info("Playwright browser started")
|
||
return _browser
|
||
|
||
def _extract_quotes(html: str) -> list:
|
||
soup = BeautifulSoup(html, "lxml")
|
||
quotes = []
|
||
for div in soup.find_all("div", class_="quoteText"):
|
||
full_text = div.get_text(" ", strip=True)
|
||
m = re.search(r"“(.+?)”", full_text)
|
||
if not m:
|
||
m = re.search(r"(.+?)\s*―", full_text)
|
||
if not m:
|
||
continue
|
||
content = m.group(1).strip()
|
||
author_span = div.find("span", class_="authorOrTitle")
|
||
author = author_span.get_text(strip=True).rstrip(",") if author_span else "Unknown"
|
||
quotes.append({"content": content, "author": author})
|
||
return quotes
|
||
|
||
async def _scrape(url: str, params: dict = None) -> str:
|
||
browser = await _get_browser()
|
||
context = await browser.new_context(user_agent="Mozilla/5.0 ...")
|
||
page = await context.new_page()
|
||
try:
|
||
if params:
|
||
from urllib.parse import urlencode
|
||
full_url = f"{url}?{urlencode(params)}"
|
||
else:
|
||
full_url = url
|
||
await page.goto(full_url, wait_until="networkidle", timeout=15000)
|
||
html = await page.content()
|
||
return html
|
||
except Exception as e:
|
||
logging.error(f"Scrape error: {e}")
|
||
return ""
|
||
finally:
|
||
await page.close()
|
||
await context.close()
|
||
|
||
async def get_random_popular() -> list:
|
||
html = await _scrape(GR_POPULAR)
|
||
return _extract_quotes(html)
|
||
|
||
async def get_author_quotes(author: str) -> list:
|
||
all_quotes = []
|
||
for page in range(1, MAX_SEARCH_PAGES + 1):
|
||
html = await _scrape(GR_SEARCH, {"q": author, "commit": "Search", "page": page})
|
||
page_quotes = _extract_quotes(html)
|
||
all_quotes.extend(page_quotes)
|
||
if len(page_quotes) < QUOTES_PER_PAGE:
|
||
break
|
||
return all_quotes
|
||
|
||
def format_quote(q):
|
||
safe_content = html_escape(q["content"])
|
||
safe_author = html_escape(q["author"])
|
||
return f'"{safe_content}"\n\n— {safe_author}'
|
||
|
||
async def handle_command(room, message, bot, prefix, config):
|
||
match = botlib.MessageMatch(room, message, bot, prefix)
|
||
if not (match.is_not_from_this_bot() and match.prefix() and match.command("quote")):
|
||
return
|
||
|
||
args = match.args()
|
||
if args and args[0].lower() in ("help", "-h", "--help"):
|
||
help_html = collapsible_summary("📖 !quote help",
|
||
"<ul><li><code>!quote</code> – random popular quote</li>"
|
||
"<li><code>!quote <author></code> – quote by author</li></ul>")
|
||
await bot.api.send_markdown_message(room.room_id, help_html)
|
||
return
|
||
|
||
try:
|
||
if args:
|
||
author = " ".join(args).strip()
|
||
safe_author = html_escape(author)
|
||
await bot.api.send_text_message(room.room_id, f"🔍 Searching Goodreads for quotes by **{safe_author}**…")
|
||
quotes = await get_author_quotes(author)
|
||
if not quotes:
|
||
await bot.api.send_text_message(room.room_id, f"❌ No quotes found for '{safe_author}'.")
|
||
return
|
||
chosen = random.choice(quotes)
|
||
else:
|
||
await bot.api.send_text_message(room.room_id, "✨ Fetching a random popular quote…")
|
||
quotes = await get_random_popular()
|
||
if not quotes:
|
||
await bot.api.send_text_message(room.room_id, "❌ Could not fetch any quotes.")
|
||
return
|
||
chosen = random.choice(quotes)
|
||
|
||
await bot.api.send_markdown_message(room.room_id, format_quote(chosen))
|
||
logging.info(f"Quote sent: {chosen['author']}")
|
||
except Exception as e:
|
||
logging.exception("Unexpected error in quote plugin")
|
||
await bot.api.send_text_message(room.room_id, f"❌ Scraping error: {e}")
|
||
|
||
__version__ = "1.0.2"
|
||
__author__ = "Funguy Bot"
|
||
__description__ = "Goodreads quotes via Playwright (headless)"
|
||
__help__ = """<details><summary><strong>!quote</strong> – Quotes from Goodreads</summary>
|
||
<p><code>!quote</code> random, <code>!quote <author></code>.</p></details>"""
|