quote plugin added. Fixed funguy.py and plugins.py for plugin list.
This commit is contained in:
@@ -0,0 +1,199 @@
|
||||
"""
|
||||
Goodreads Quote Scraper – Playwright (headless Chromium)
|
||||
No external APIs, no keys; scrapes directly from goodreads.com
|
||||
"""
|
||||
|
||||
import logging
|
||||
import random
|
||||
import re
|
||||
import asyncio
|
||||
import simplematrixbotlib as botlib
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.parse import urlencode
|
||||
|
||||
logger = logging.getLogger("quote")
|
||||
|
||||
GR_POPULAR = "https://www.goodreads.com/quotes"
|
||||
GR_SEARCH = "https://www.goodreads.com/quotes/search"
|
||||
QUOTES_PER_PAGE = 30
|
||||
MAX_SEARCH_PAGES = 3
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Playwright browser (shared, launched once)
|
||||
# ---------------------------------------------------------------------------
|
||||
_browser = None
|
||||
_playwright = None
|
||||
|
||||
async def _get_browser():
|
||||
global _browser, _playwright
|
||||
if _browser is None:
|
||||
from playwright.async_api import async_playwright
|
||||
_playwright = await async_playwright().start()
|
||||
_browser = await _playwright.chromium.launch(headless=True)
|
||||
logger.info("Playwright browser started")
|
||||
return _browser
|
||||
|
||||
async def _close_browser():
|
||||
global _browser, _playwright
|
||||
if _browser:
|
||||
await _browser.close()
|
||||
_browser = None
|
||||
if _playwright:
|
||||
await _playwright.stop()
|
||||
_playwright = None
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# HTML parsing (Goodreads specific)
|
||||
# ---------------------------------------------------------------------------
|
||||
def _extract_quotes(html: str) -> list[dict]:
|
||||
"""Parse Goodreads HTML and return a list of {content, author} dicts."""
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
quotes = []
|
||||
|
||||
for div in soup.find_all("div", class_="quoteText"):
|
||||
full_text = div.get_text(" ", strip=True)
|
||||
# Try curly quotes
|
||||
m = re.search(r"“(.+?)”", full_text)
|
||||
if not m:
|
||||
m = re.search(r"(.+?)\s*―", full_text)
|
||||
if not m:
|
||||
continue
|
||||
content = m.group(1).strip()
|
||||
|
||||
author_span = div.find("span", class_="authorOrTitle")
|
||||
author = author_span.get_text(strip=True).rstrip(",") if author_span else "Unknown"
|
||||
quotes.append({"content": content, "author": author})
|
||||
|
||||
# Alternative layout (if first method yielded nothing)
|
||||
for div in soup.find_all("div", class_="quoteDetails"):
|
||||
text_elem = div.find("div", class_="quoteText")
|
||||
author_elem = div.find("span", class_="authorOrTitle")
|
||||
if text_elem:
|
||||
content = text_elem.get_text(strip=True).strip("“”")
|
||||
else:
|
||||
continue
|
||||
author = author_elem.get_text(strip=True).rstrip(",") if author_elem else "Unknown"
|
||||
quotes.append({"content": content, "author": author})
|
||||
|
||||
return quotes
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Page fetching
|
||||
# ---------------------------------------------------------------------------
|
||||
async def _scrape(url: str, params: dict = None) -> str:
|
||||
browser = await _get_browser()
|
||||
context = await browser.new_context(
|
||||
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"
|
||||
)
|
||||
page = await context.new_page()
|
||||
try:
|
||||
if params:
|
||||
full_url = f"{url}?{urlencode(params)}"
|
||||
else:
|
||||
full_url = url
|
||||
await page.goto(full_url, wait_until="networkidle", timeout=15000)
|
||||
html = await page.content()
|
||||
return html
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load {full_url}: {e}")
|
||||
return ""
|
||||
finally:
|
||||
await page.close()
|
||||
await context.close()
|
||||
|
||||
async def get_random_popular() -> list[dict]:
|
||||
html = await _scrape(GR_POPULAR)
|
||||
return _extract_quotes(html)
|
||||
|
||||
async def get_author_quotes(author: str) -> list[dict]:
|
||||
all_quotes = []
|
||||
for page in range(1, MAX_SEARCH_PAGES + 1):
|
||||
html = await _scrape(GR_SEARCH, {"q": author, "commit": "Search", "page": page})
|
||||
page_quotes = _extract_quotes(html)
|
||||
all_quotes.extend(page_quotes)
|
||||
if len(page_quotes) < QUOTES_PER_PAGE:
|
||||
break
|
||||
return all_quotes
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Formatting
|
||||
# ---------------------------------------------------------------------------
|
||||
def format_quote(q: dict) -> str:
|
||||
return f'"{q["content"]}"\n\n— {q["author"]}'
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Command handler
|
||||
# ---------------------------------------------------------------------------
|
||||
async def handle_command(room, message, bot, prefix, config):
|
||||
match = botlib.MessageMatch(room, message, bot, prefix)
|
||||
if not (match.is_not_from_this_bot() and match.prefix() and match.command("quote")):
|
||||
return
|
||||
|
||||
args = match.args()
|
||||
|
||||
# Help
|
||||
if args and args[0].lower() in ("help", "-h", "--help"):
|
||||
help_html = (
|
||||
"<details><summary><strong>📖 !quote help</strong></summary>"
|
||||
"<ul>"
|
||||
"<li><code>!quote</code> – random popular quote from Goodreads</li>"
|
||||
"<li><code>!quote <author></code> – random quote by that author</li>"
|
||||
"<li><code>!quote help</code> – this</li>"
|
||||
"</ul>"
|
||||
"<p><b>Examples:</b><br><code>!quote</code><br>"
|
||||
"<code>!quote Terence McKenna</code><br>"
|
||||
"<code>!quote Oscar Wilde</code></p>"
|
||||
"<p>Scraped with Playwright (headless browser).</p>"
|
||||
"</details>"
|
||||
)
|
||||
await bot.api.send_markdown_message(room.room_id, help_html)
|
||||
return
|
||||
|
||||
try:
|
||||
if args:
|
||||
author = " ".join(args).strip()
|
||||
await bot.api.send_text_message(
|
||||
room.room_id, f"🔍 Searching Goodreads for quotes by **{author}**…"
|
||||
)
|
||||
quotes = await get_author_quotes(author)
|
||||
if not quotes:
|
||||
await bot.api.send_text_message(
|
||||
room.room_id,
|
||||
f"❌ No quotes found for '**{author}**'. Try a different spelling."
|
||||
)
|
||||
return
|
||||
chosen = random.choice(quotes)
|
||||
else:
|
||||
await bot.api.send_text_message(room.room_id, "✨ Fetching a random popular quote…")
|
||||
quotes = await get_random_popular()
|
||||
if not quotes:
|
||||
await bot.api.send_text_message(room.room_id, "❌ Could not fetch any quotes.")
|
||||
return
|
||||
chosen = random.choice(quotes)
|
||||
|
||||
await bot.api.send_markdown_message(room.room_id, format_quote(chosen))
|
||||
logger.info(f"Quote sent: {chosen['author']}")
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("Unexpected error in quote plugin")
|
||||
await bot.api.send_text_message(
|
||||
room.room_id, f"❌ Scraping error: {e}"
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Plugin metadata
|
||||
# ---------------------------------------------------------------------------
|
||||
__version__ = "1.0.1"
|
||||
__author__ = "Funguy Bot"
|
||||
__description__ = "Goodreads quotes via Playwright (headless browser)"
|
||||
__help__ = """
|
||||
<details>
|
||||
<summary><strong>!quote</strong> – Quotes from Goodreads (scraped with Playwright)</summary>
|
||||
<ul>
|
||||
<li><code>!quote</code> – random popular quote</li>
|
||||
<li><code>!quote <author></code> – random quote by that author</li>
|
||||
<li><code>!quote help</code></li>
|
||||
</ul>
|
||||
<p>No API keys, no JSON files – just a real browser fetching from Goodreads.</p>
|
||||
</details>
|
||||
"""
|
||||
Reference in New Issue
Block a user