Files
FunguyBot/plugins/quote.py
T

126 lines
4.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Goodreads Quote Scraper Playwright (headless Chromium)
"""
import logging
import random
import re
import asyncio
import simplematrixbotlib as botlib
from bs4 import BeautifulSoup
from plugins.common import html_escape, collapsible_summary
GR_POPULAR = "https://www.goodreads.com/quotes"
GR_SEARCH = "https://www.goodreads.com/quotes/search"
QUOTES_PER_PAGE = 30
MAX_SEARCH_PAGES = 3
_browser = None
_playwright = None
async def _get_browser():
global _browser, _playwright
if _browser is None:
from playwright.async_api import async_playwright
_playwright = await async_playwright().start()
_browser = await _playwright.chromium.launch(headless=True)
logging.info("Playwright browser started")
return _browser
def _extract_quotes(html: str) -> list:
soup = BeautifulSoup(html, "lxml")
quotes = []
for div in soup.find_all("div", class_="quoteText"):
full_text = div.get_text(" ", strip=True)
m = re.search(r"“(.+?)”", full_text)
if not m:
m = re.search(r"(.+?)\s*―", full_text)
if not m:
continue
content = m.group(1).strip()
author_span = div.find("span", class_="authorOrTitle")
author = author_span.get_text(strip=True).rstrip(",") if author_span else "Unknown"
quotes.append({"content": content, "author": author})
return quotes
async def _scrape(url: str, params: dict = None) -> str:
browser = await _get_browser()
context = await browser.new_context(user_agent="Mozilla/5.0 ...")
page = await context.new_page()
try:
if params:
from urllib.parse import urlencode
full_url = f"{url}?{urlencode(params)}"
else:
full_url = url
await page.goto(full_url, wait_until="networkidle", timeout=15000)
html = await page.content()
return html
except Exception as e:
logging.error(f"Scrape error: {e}")
return ""
finally:
await page.close()
await context.close()
async def get_random_popular() -> list:
html = await _scrape(GR_POPULAR)
return _extract_quotes(html)
async def get_author_quotes(author: str) -> list:
all_quotes = []
for page in range(1, MAX_SEARCH_PAGES + 1):
html = await _scrape(GR_SEARCH, {"q": author, "commit": "Search", "page": page})
page_quotes = _extract_quotes(html)
all_quotes.extend(page_quotes)
if len(page_quotes) < QUOTES_PER_PAGE:
break
return all_quotes
def format_quote(q):
safe_content = html_escape(q["content"])
safe_author = html_escape(q["author"])
return f'"{safe_content}"\n\n{safe_author}'
async def handle_command(room, message, bot, prefix, config):
match = botlib.MessageMatch(room, message, bot, prefix)
if not (match.is_not_from_this_bot() and match.prefix() and match.command("quote")):
return
args = match.args()
if args and args[0].lower() in ("help", "-h", "--help"):
help_html = collapsible_summary("📖 !quote help",
"<ul><li><code>!quote</code> random popular quote</li>"
"<li><code>!quote &lt;author&gt;</code> quote by author</li></ul>")
await bot.api.send_markdown_message(room.room_id, help_html)
return
try:
if args:
author = " ".join(args).strip()
safe_author = html_escape(author)
await bot.api.send_text_message(room.room_id, f"🔍 Searching Goodreads for quotes by **{safe_author}**…")
quotes = await get_author_quotes(author)
if not quotes:
await bot.api.send_text_message(room.room_id, f"❌ No quotes found for '{safe_author}'.")
return
chosen = random.choice(quotes)
else:
await bot.api.send_text_message(room.room_id, "✨ Fetching a random popular quote…")
quotes = await get_random_popular()
if not quotes:
await bot.api.send_text_message(room.room_id, "❌ Could not fetch any quotes.")
return
chosen = random.choice(quotes)
await bot.api.send_markdown_message(room.room_id, format_quote(chosen))
logging.info(f"Quote sent: {chosen['author']}")
except Exception as e:
logging.exception("Unexpected error in quote plugin")
await bot.api.send_text_message(room.room_id, f"❌ Scraping error: {e}")
__version__ = "1.0.2"
__author__ = "Funguy Bot"
__description__ = "Fetch Goodreads quotes"
__help__ = """<details><summary><strong>!quote</strong> Quotes from Goodreads</summary>
<p><code>!quote</code> random, <code>!quote &lt;author&gt;</code>.</p></details>"""