Nightly: refactor readability, progress callbacks, and resource handling

This commit is contained in:
2026-02-23 16:47:00 +01:00
parent 7a330c9bc0
commit d414fac022
13 changed files with 668 additions and 455 deletions

View File

@@ -17,7 +17,7 @@ import os
import re
import time
import unicodedata
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple
from urllib.parse import quote
try: # pragma: no cover - optional dependency
@@ -80,6 +80,16 @@ SESSION_CACHE_MAX_TITLE_URLS = 800
CATALOG_SEARCH_TTL_SECONDS = 600
CATALOG_SEARCH_CACHE_KEY = "catalog_index"
_CATALOG_INDEX_MEMORY: tuple[float, List["SeriesResult"]] = (0.0, [])
ProgressCallback = Optional[Callable[[str, Optional[int]], Any]]
def _emit_progress(callback: ProgressCallback, message: str, percent: Optional[int] = None) -> None:
if not callable(callback):
return
try:
callback(str(message or ""), None if percent is None else int(percent))
except Exception:
return
@dataclass
@@ -398,37 +408,56 @@ def _get_soup(url: str, *, session: Optional[RequestsSession] = None) -> Beautif
_ensure_requests()
_log_visit(url)
sess = session or get_requests_session("serienstream", headers=HEADERS)
response = None
try:
response = sess.get(url, headers=HEADERS, timeout=DEFAULT_TIMEOUT)
response.raise_for_status()
except Exception as exc:
_log_error(f"GET {url} failed: {exc}")
raise
if response.url and response.url != url:
_log_url(response.url, kind="REDIRECT")
_log_response_html(url, response.text)
if _looks_like_cloudflare_challenge(response.text):
raise RuntimeError("Cloudflare-Schutz erkannt. requests reicht ggf. nicht aus.")
return BeautifulSoup(response.text, "html.parser")
try:
final_url = (response.url or url) if response is not None else url
body = (response.text or "") if response is not None else ""
if final_url != url:
_log_url(final_url, kind="REDIRECT")
_log_response_html(url, body)
if _looks_like_cloudflare_challenge(body):
raise RuntimeError("Cloudflare-Schutz erkannt. requests reicht ggf. nicht aus.")
return BeautifulSoup(body, "html.parser")
finally:
if response is not None:
try:
response.close()
except Exception:
pass
def _get_html_simple(url: str) -> str:
_ensure_requests()
_log_visit(url)
sess = get_requests_session("serienstream", headers=HEADERS)
response = None
try:
response = sess.get(url, headers=HEADERS, timeout=DEFAULT_TIMEOUT)
response.raise_for_status()
except Exception as exc:
_log_error(f"GET {url} failed: {exc}")
raise
if response.url and response.url != url:
_log_url(response.url, kind="REDIRECT")
body = response.text
_log_response_html(url, body)
if _looks_like_cloudflare_challenge(body):
raise RuntimeError("Cloudflare-Schutz erkannt. requests reicht ggf. nicht aus.")
return body
try:
final_url = (response.url or url) if response is not None else url
body = (response.text or "") if response is not None else ""
if final_url != url:
_log_url(final_url, kind="REDIRECT")
_log_response_html(url, body)
if _looks_like_cloudflare_challenge(body):
raise RuntimeError("Cloudflare-Schutz erkannt. requests reicht ggf. nicht aus.")
return body
finally:
if response is not None:
try:
response.close()
except Exception:
pass
def _get_soup_simple(url: str) -> BeautifulSoupT:
@@ -472,6 +501,7 @@ def _search_series_api(query: str) -> List[SeriesResult]:
terms.extend([token for token in query.split() if token])
seen_urls: set[str] = set()
for term in terms:
response = None
try:
response = sess.get(
f"{_get_base_url()}/api/search/suggest",
@@ -486,6 +516,12 @@ def _search_series_api(query: str) -> List[SeriesResult]:
payload = response.json()
except Exception:
continue
finally:
if response is not None:
try:
response.close()
except Exception:
pass
shows = payload.get("shows") if isinstance(payload, dict) else None
if not isinstance(shows, list):
continue
@@ -558,7 +594,7 @@ def _search_series_server(query: str) -> List[SeriesResult]:
return []
def _extract_catalog_index_from_html(body: str) -> List[SeriesResult]:
def _extract_catalog_index_from_html(body: str, *, progress_callback: ProgressCallback = None) -> List[SeriesResult]:
items: List[SeriesResult] = []
if not body:
return items
@@ -569,7 +605,9 @@ def _extract_catalog_index_from_html(body: str) -> List[SeriesResult]:
)
anchor_re = re.compile(r"<a[^>]+href=[\"']([^\"']+)[\"'][^>]*>(.*?)</a>", re.IGNORECASE | re.DOTALL)
data_search_re = re.compile(r"data-search=[\"']([^\"']*)[\"']", re.IGNORECASE)
for match in item_re.finditer(body):
for idx, match in enumerate(item_re.finditer(body), start=1):
if idx == 1 or idx % 200 == 0:
_emit_progress(progress_callback, f"Katalog parsen {idx}", 62)
block = match.group(0)
inner = match.group(1) or ""
anchor_match = anchor_re.search(inner)
@@ -651,26 +689,33 @@ def _store_catalog_index_in_cache(items: List[SeriesResult]) -> None:
_session_cache_set(CATALOG_SEARCH_CACHE_KEY, payload, ttl_seconds=CATALOG_SEARCH_TTL_SECONDS)
def search_series(query: str) -> List[SeriesResult]:
def search_series(query: str, *, progress_callback: ProgressCallback = None) -> List[SeriesResult]:
"""Sucht Serien im (/serien)-Katalog nach Titel. Nutzt Cache + Ein-Pass-Filter."""
_ensure_requests()
if not _normalize_search_text(query):
return []
_emit_progress(progress_callback, "Server-Suche", 15)
server_results = _search_series_server(query)
if server_results:
_emit_progress(progress_callback, f"Server-Treffer: {len(server_results)}", 35)
return [entry for entry in server_results if entry.title and _matches_query(query, title=entry.title)]
_emit_progress(progress_callback, "Pruefe Such-Cache", 42)
cached = _load_catalog_index_from_cache()
if cached is not None:
_emit_progress(progress_callback, f"Cache-Treffer: {len(cached)}", 52)
return [entry for entry in cached if entry.title and _matches_query(query, title=entry.title)]
_emit_progress(progress_callback, "Lade Katalogseite", 58)
catalog_url = f"{_get_base_url()}/serien?by=genre"
body = _get_html_simple(catalog_url)
items = _extract_catalog_index_from_html(body)
items = _extract_catalog_index_from_html(body, progress_callback=progress_callback)
if not items:
_emit_progress(progress_callback, "Fallback-Parser", 70)
soup = BeautifulSoup(body, "html.parser")
items = _catalog_index_from_soup(soup)
if items:
_store_catalog_index_in_cache(items)
_emit_progress(progress_callback, f"Filtere Treffer ({len(items)})", 85)
return [entry for entry in items if entry.title and _matches_query(query, title=entry.title)]
@@ -989,15 +1034,23 @@ def resolve_redirect(target_url: str) -> Optional[str]:
_get_soup(_get_base_url(), session=session)
except Exception:
pass
response = session.get(
normalized_url,
headers=HEADERS,
timeout=DEFAULT_TIMEOUT,
allow_redirects=True,
)
if response.url:
_log_url(response.url, kind="RESOLVED")
return response.url if response.url else None
response = None
try:
response = session.get(
normalized_url,
headers=HEADERS,
timeout=DEFAULT_TIMEOUT,
allow_redirects=True,
)
if response.url:
_log_url(response.url, kind="RESOLVED")
return response.url if response.url else None
finally:
if response is not None:
try:
response.close()
except Exception:
pass
def scrape_series_detail(
@@ -1681,7 +1734,7 @@ class SerienstreamPlugin(BasisPlugin):
return self._episode_label_cache.get(cache_key, {}).get(episode_label)
return None
async def search_titles(self, query: str) -> List[str]:
async def search_titles(self, query: str, progress_callback: ProgressCallback = None) -> List[str]:
query = query.strip()
if not query:
self._series_results.clear()
@@ -1695,7 +1748,8 @@ class SerienstreamPlugin(BasisPlugin):
try:
# Nutzt den Katalog (/serien), der jetzt nach Genres gruppiert ist.
# Alternativ gäbe es ein Ajax-Endpoint, aber der ist nicht immer zuverlässig erreichbar.
results = search_series(query)
_emit_progress(progress_callback, "Serienstream Suche startet", 10)
results = search_series(query, progress_callback=progress_callback)
except Exception as exc: # pragma: no cover - defensive logging
self._series_results.clear()
self._season_cache.clear()
@@ -1708,6 +1762,7 @@ class SerienstreamPlugin(BasisPlugin):
self._season_cache.clear()
self._season_links_cache.clear()
self._episode_label_cache.clear()
_emit_progress(progress_callback, f"Treffer aufbereitet: {len(results)}", 95)
return [result.title for result in results]
def _ensure_seasons(self, title: str) -> List[SeasonInfo]: