dev: bump to 0.1.66 and harden resolveurl + serienstream

This commit is contained in:
2026-02-25 16:35:16 +01:00
parent 74d15cb25e
commit 73f07d20b4
20 changed files with 522 additions and 232 deletions

View File

@@ -43,8 +43,10 @@ except ImportError: # pragma: no cover - allow running outside Kodi
from plugin_interface import BasisPlugin
from plugin_helpers import dump_response_html, get_setting_bool, get_setting_string, log_error, log_url, notify_url
from http_session_pool import get_requests_session
from http_session_pool import close_all_sessions, get_requests_session
from regex_patterns import SEASON_EPISODE_TAG, SEASON_EPISODE_URL
from search_utils import matches_query as _matches_query, normalize_search_text as _normalize_search_text
from genre_utils import normalize_genre_label as _normalize_genre_label
if TYPE_CHECKING: # pragma: no cover
from requests import Session as RequestsSession
@@ -293,27 +295,6 @@ def _normalize_text(value: str) -> str:
return value
def _normalize_search_text(value: str) -> str:
"""Normalisiert Text für die Suche ohne Wortgrenzen zu "verschmelzen".
Wichtig: Wir ersetzen Nicht-Alphanumerisches durch Leerzeichen, statt es zu entfernen.
Dadurch entstehen keine künstlichen Treffer über Wortgrenzen hinweg (z.B. "an" + "na" -> "anna").
"""
value = (value or "").casefold()
value = re.sub(r"[^a-z0-9]+", " ", value)
value = re.sub(r"\s+", " ", value).strip()
return value
def _matches_query(query: str, *, title: str) -> bool:
normalized_query = _normalize_search_text(query)
if not normalized_query:
return False
haystack = f" {_normalize_search_text(title)} "
return f" {normalized_query} " in haystack
def _is_episode_tba(title: str, original_title: str) -> bool:
combined = f"{title} {original_title}".casefold()
markers = ("tba", "demnächst", "demnaechst", "coming soon", "to be announced")
@@ -392,6 +373,42 @@ def _ensure_requests() -> None:
raise RuntimeError("requests/bs4 sind nicht verfuegbar.")
def _is_retryable_http_error(exc: Exception) -> bool:
text = str(exc or "").casefold()
markers = (
"connection reset by peer",
"connection aborted",
"remote end closed connection",
"temporarily unavailable",
"timed out",
"read timeout",
)
return any(marker in text for marker in markers)
def _http_get(url: str, *, timeout: int, params: Optional[dict[str, str]] = None):
_ensure_requests()
last_exc: Exception | None = None
for attempt in range(2):
sess = get_requests_session("serienstream", headers=HEADERS)
try:
response = sess.get(url, params=params, headers=HEADERS, timeout=timeout)
response.raise_for_status()
return response
except Exception as exc:
last_exc = exc
if attempt >= 1 or not _is_retryable_http_error(exc):
raise
_log_error(f"GET {url} retry nach Fehler: {exc}")
try:
close_all_sessions()
except Exception:
pass
if last_exc is not None:
raise last_exc
raise RuntimeError(f"GET {url} fehlgeschlagen")
def _looks_like_cloudflare_challenge(body: str) -> bool:
lower = body.lower()
markers = (
@@ -409,11 +426,13 @@ def _looks_like_cloudflare_challenge(body: str) -> bool:
def _get_soup(url: str, *, session: Optional[RequestsSession] = None) -> BeautifulSoupT:
_ensure_requests()
_log_visit(url)
sess = session or get_requests_session("serienstream", headers=HEADERS)
response = None
try:
response = sess.get(url, headers=HEADERS, timeout=DEFAULT_TIMEOUT)
response.raise_for_status()
if session is not None:
response = session.get(url, headers=HEADERS, timeout=DEFAULT_TIMEOUT)
response.raise_for_status()
else:
response = _http_get(url, timeout=DEFAULT_TIMEOUT)
except Exception as exc:
_log_error(f"GET {url} failed: {exc}")
raise
@@ -437,11 +456,9 @@ def _get_soup(url: str, *, session: Optional[RequestsSession] = None) -> Beautif
def _get_html_simple(url: str) -> str:
_ensure_requests()
_log_visit(url)
sess = get_requests_session("serienstream", headers=HEADERS)
response = None
try:
response = sess.get(url, headers=HEADERS, timeout=DEFAULT_TIMEOUT)
response.raise_for_status()
response = _http_get(url, timeout=DEFAULT_TIMEOUT)
except Exception as exc:
_log_error(f"GET {url} failed: {exc}")
raise
@@ -468,27 +485,6 @@ def _get_soup_simple(url: str) -> BeautifulSoupT:
def _extract_genre_names_from_html(body: str) -> List[str]:
def _normalize_genre_label(raw: str) -> str:
text = unescape(re.sub(r"\s+", " ", str(raw or ""))).strip()
if not text:
return ""
key_prefix = "filter.genre_"
if text.casefold().startswith(key_prefix):
slug = text[len(key_prefix) :].strip().casefold()
slug = slug.replace("_", "-")
slug = re.sub(r"[^a-z0-9-]+", "-", slug).strip("-")
if not slug:
return ""
special = {
"doku-soap": "Doku-Soap",
"scifi": "SciFi",
"fighting-shounen": "Fighting-Shounen",
}
if slug in special:
return special[slug]
return " ".join(chunk.capitalize() for chunk in slug.split("-") if chunk)
return text
names: List[str] = []
seen: set[str] = set()
pattern = re.compile(
@@ -577,9 +573,6 @@ def _search_series_api(query: str) -> List[SeriesResult]:
def _search_series_server(query: str) -> List[SeriesResult]:
if not query:
return []
api_results = _search_series_api(query)
if api_results:
return api_results
base = _get_base_url()
search_url = f"{base}/search?q={quote(query)}"
alt_url = f"{base}/suche?q={quote(query)}"
@@ -614,6 +607,9 @@ def _search_series_server(query: str) -> List[SeriesResult]:
results.append(SeriesResult(title=title, description="", url=url_abs))
if results:
return results
api_results = _search_series_api(query)
if api_results:
return api_results
return []
@@ -718,56 +714,45 @@ def search_series(query: str, *, progress_callback: ProgressCallback = None) ->
_ensure_requests()
if not _normalize_search_text(query):
return []
_emit_progress(progress_callback, "Server-Suche", 15)
server_results = _search_series_server(query)
if server_results:
_emit_progress(progress_callback, f"Server-Treffer: {len(server_results)}", 35)
return [entry for entry in server_results if entry.title and _matches_query(query, title=entry.title)]
_emit_progress(progress_callback, "Pruefe Such-Cache", 42)
_emit_progress(progress_callback, "Pruefe Such-Cache", 15)
cached = _load_catalog_index_from_cache()
if cached is not None:
_emit_progress(progress_callback, f"Cache-Treffer: {len(cached)}", 52)
return [entry for entry in cached if entry.title and _matches_query(query, title=entry.title)]
matched_from_cache = [entry for entry in cached if entry.title and _matches_query(query, title=entry.title)]
_emit_progress(progress_callback, f"Cache-Treffer: {len(cached)}", 35)
if matched_from_cache:
return matched_from_cache
_emit_progress(progress_callback, "Lade Katalogseite", 58)
_emit_progress(progress_callback, "Lade Katalogseite", 42)
catalog_url = f"{_get_base_url()}/serien?by=genre"
body = _get_html_simple(catalog_url)
items = _extract_catalog_index_from_html(body, progress_callback=progress_callback)
if not items:
_emit_progress(progress_callback, "Fallback-Parser", 70)
soup = BeautifulSoup(body, "html.parser")
items: List[SeriesResult] = []
try:
# Bevorzugt den Soup-Helper, damit Tests HTML einfache injizieren koennen.
soup = _get_soup_simple(catalog_url)
items = _catalog_index_from_soup(soup)
except Exception:
body = _get_html_simple(catalog_url)
items = _extract_catalog_index_from_html(body, progress_callback=progress_callback)
if not items:
_emit_progress(progress_callback, "Fallback-Parser", 58)
soup = BeautifulSoup(body, "html.parser")
items = _catalog_index_from_soup(soup)
if items:
_store_catalog_index_in_cache(items)
_emit_progress(progress_callback, f"Filtere Treffer ({len(items)})", 85)
return [entry for entry in items if entry.title and _matches_query(query, title=entry.title)]
_emit_progress(progress_callback, f"Filtere Treffer ({len(items)})", 70)
return [entry for entry in items if entry.title and _matches_query(query, title=entry.title)]
_emit_progress(progress_callback, "Server-Suche", 85)
server_results = _search_series_server(query)
if server_results:
_emit_progress(progress_callback, f"Server-Treffer: {len(server_results)}", 95)
return [entry for entry in server_results if entry.title and _matches_query(query, title=entry.title)]
return []
def parse_series_catalog(soup: BeautifulSoupT) -> Dict[str, List[SeriesResult]]:
"""Parst die Serien-Übersicht (/serien) und liefert Genre -> Serienliste."""
catalog: Dict[str, List[SeriesResult]] = {}
def _normalize_genre_label(raw: str) -> str:
text = re.sub(r"\s+", " ", str(raw or "")).strip()
if not text:
return ""
key_prefix = "filter.genre_"
if text.casefold().startswith(key_prefix):
slug = text[len(key_prefix) :].strip().casefold()
slug = slug.replace("_", "-")
slug = re.sub(r"[^a-z0-9-]+", "-", slug).strip("-")
if not slug:
return ""
special = {
"doku-soap": "Doku-Soap",
"scifi": "SciFi",
"fighting-shounen": "Fighting-Shounen",
}
if slug in special:
return special[slug]
return " ".join(chunk.capitalize() for chunk in slug.split("-") if chunk)
return text
# Neues Layout (Stand: 2026-01): Gruppen-Header + Liste.
# - Header: `div.background-1 ...` mit `h3`
# - Einträge: `ul.series-list` -> `li.series-item[data-search]` -> `a[href]`