nightly: playback fast-path, windows asyncio fix, v0.1.56

2026-02-19 14:10:09 +01:00
parent 307df97d74
commit 4409f9432c
8 changed files with 382 additions and 44 deletions
--- a/addon/plugins/serienstream_plugin.py
+++ b/addon/plugins/serienstream_plugin.py
@@ -17,7 +17,8 @@ import os
 import re
 import time
 import unicodedata
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, TypeAlias
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
+from urllib.parse import quote

 try:  # pragma: no cover - optional dependency
    import requests
@@ -49,14 +50,15 @@ if TYPE_CHECKING:  # pragma: no cover
    from requests import Session as RequestsSession
    from bs4 import BeautifulSoup as BeautifulSoupT  # type: ignore[import-not-found]
 else:  # pragma: no cover
-    RequestsSession: TypeAlias = Any
-    BeautifulSoupT: TypeAlias = Any
+    RequestsSession = Any
+    BeautifulSoupT = Any


 SETTING_BASE_URL = "serienstream_base_url"
 DEFAULT_BASE_URL = "https://s.to"
 DEFAULT_PREFERRED_HOSTERS = ["voe"]
 DEFAULT_TIMEOUT = 20
+SEARCH_TIMEOUT = 8
 ADDON_ID = "plugin.video.viewit"
 GLOBAL_SETTING_LOG_URLS = "debug_log_urls"
 GLOBAL_SETTING_DUMP_HTML = "debug_dump_html"
@@ -75,6 +77,9 @@ HEADERS = {
 SESSION_CACHE_TTL_SECONDS = 300
 SESSION_CACHE_PREFIX = "viewit.serienstream"
 SESSION_CACHE_MAX_TITLE_URLS = 800
+CATALOG_SEARCH_TTL_SECONDS = 600
+CATALOG_SEARCH_CACHE_KEY = "catalog_index"
+_CATALOG_INDEX_MEMORY: tuple[float, List["SeriesResult"]] = (0.0, [])


@dataclass
@@ -451,20 +456,222 @@ def _extract_genre_names_from_html(body: str) -> List[str]:
    return names


+def _strip_tags(value: str) -> str:
+    return re.sub(r"<[^>]+>", " ", value or "")
+
+
+def _search_series_api(query: str) -> List[SeriesResult]:
+    query = (query or "").strip()
+    if not query:
+        return []
+    _ensure_requests()
+    sess = get_requests_session("serienstream", headers=HEADERS)
+    terms = [query]
+    if " " in query:
+        # Fallback: einzelne Tokens liefern in der API oft bessere Treffer.
+        terms.extend([token for token in query.split() if token])
+    seen_urls: set[str] = set()
+    for term in terms:
+        try:
+            response = sess.get(
+                f"{_get_base_url()}/api/search/suggest",
+                params={"term": term},
+                headers=HEADERS,
+                timeout=SEARCH_TIMEOUT,
+            )
+            response.raise_for_status()
+        except Exception:
+            continue
+        try:
+            payload = response.json()
+        except Exception:
+            continue
+        shows = payload.get("shows") if isinstance(payload, dict) else None
+        if not isinstance(shows, list):
+            continue
+        results: List[SeriesResult] = []
+        for item in shows:
+            if not isinstance(item, dict):
+                continue
+            title = (item.get("name") or "").strip()
+            href = (item.get("url") or "").strip()
+            if not title or not href:
+                continue
+            url_abs = _absolute_url(href)
+            if not url_abs or url_abs in seen_urls:
+                continue
+            if "/staffel-" in url_abs or "/episode-" in url_abs:
+                continue
+            seen_urls.add(url_abs)
+            results.append(SeriesResult(title=title, description="", url=url_abs))
+        if not results:
+            continue
+        filtered = [entry for entry in results if _matches_query(query, title=entry.title)]
+        if filtered:
+            return filtered
+        # Falls nur Token-Suche möglich war, zumindest die Ergebnisse liefern.
+        if term != query:
+            return results
+    return []
+
+
+def _search_series_server(query: str) -> List[SeriesResult]:
+    if not query:
+        return []
+    api_results = _search_series_api(query)
+    if api_results:
+        return api_results
+    base = _get_base_url()
+    search_url = f"{base}/search?q={quote(query)}"
+    alt_url = f"{base}/suche?q={quote(query)}"
+    for url in (search_url, alt_url):
+        try:
+            body = _get_html_simple(url)
+        except Exception:
+            continue
+        if not body:
+            continue
+        soup = BeautifulSoup(body, "html.parser")
+        root = soup.select_one(".search-results-list")
+        if root is None:
+            continue
+        seen_urls: set[str] = set()
+        results: List[SeriesResult] = []
+        for card in root.select(".cover-card"):
+            anchor = card.select_one("a[href*='/serie/']")
+            if not anchor:
+                continue
+            href = (anchor.get("href") or "").strip()
+            url_abs = _absolute_url(href)
+            if not url_abs or url_abs in seen_urls:
+                continue
+            if "/staffel-" in url_abs or "/episode-" in url_abs:
+                continue
+            title_tag = card.select_one(".show-title") or card.select_one("h3") or card.select_one("h4")
+            title = (title_tag.get_text(" ", strip=True) if title_tag else anchor.get_text(" ", strip=True)).strip()
+            if not title:
+                continue
+            seen_urls.add(url_abs)
+            results.append(SeriesResult(title=title, description="", url=url_abs))
+        if results:
+            return results
+    return []
+
+
+def _extract_catalog_index_from_html(body: str) -> List[SeriesResult]:
+    items: List[SeriesResult] = []
+    if not body:
+        return items
+    seen_urls: set[str] = set()
+    item_re = re.compile(
+        r"<li[^>]*class=[\"'][^\"']*series-item[^\"']*[\"'][^>]*>(.*?)</li>",
+        re.IGNORECASE | re.DOTALL,
+    )
+    anchor_re = re.compile(r"<a[^>]+href=[\"']([^\"']+)[\"'][^>]*>(.*?)</a>", re.IGNORECASE | re.DOTALL)
+    data_search_re = re.compile(r"data-search=[\"']([^\"']*)[\"']", re.IGNORECASE)
+    for match in item_re.finditer(body):
+        block = match.group(0)
+        inner = match.group(1) or ""
+        anchor_match = anchor_re.search(inner)
+        if not anchor_match:
+            continue
+        href = (anchor_match.group(1) or "").strip()
+        url = _absolute_url(href)
+        if not url or "/serie/" not in url or "/staffel-" in url or "/episode-" in url:
+            continue
+        if url in seen_urls:
+            continue
+        seen_urls.add(url)
+        title_raw = anchor_match.group(2) or ""
+        title = unescape(re.sub(r"\s+", " ", _strip_tags(title_raw))).strip()
+        if not title:
+            continue
+        search_match = data_search_re.search(block)
+        description = (search_match.group(1) or "").strip() if search_match else ""
+        items.append(SeriesResult(title=title, description=description, url=url))
+    return items
+
+
+def _catalog_index_from_soup(soup: BeautifulSoupT) -> List[SeriesResult]:
+    items: List[SeriesResult] = []
+    if not soup:
+        return items
+    seen_urls: set[str] = set()
+    for item in soup.select("li.series-item"):
+        anchor = item.find("a", href=True)
+        if not anchor:
+            continue
+        href = (anchor.get("href") or "").strip()
+        url = _absolute_url(href)
+        if not url or "/serie/" not in url or "/staffel-" in url or "/episode-" in url:
+            continue
+        if url in seen_urls:
+            continue
+        seen_urls.add(url)
+        title = (anchor.get_text(" ", strip=True) or "").strip()
+        if not title:
+            continue
+        description = (item.get("data-search") or "").strip()
+        items.append(SeriesResult(title=title, description=description, url=url))
+    return items
+
+
+def _load_catalog_index_from_cache() -> Optional[List[SeriesResult]]:
+    global _CATALOG_INDEX_MEMORY
+    expires_at, cached = _CATALOG_INDEX_MEMORY
+    if cached and expires_at > time.time():
+        return list(cached)
+    raw = _session_cache_get(CATALOG_SEARCH_CACHE_KEY)
+    if not isinstance(raw, list):
+        return None
+    items: List[SeriesResult] = []
+    for entry in raw:
+        if not isinstance(entry, list) or len(entry) < 2:
+            continue
+        title = str(entry[0] or "").strip()
+        url = str(entry[1] or "").strip()
+        description = str(entry[2] or "") if len(entry) > 2 else ""
+        if title and url:
+            items.append(SeriesResult(title=title, description=description, url=url))
+    if items:
+        _CATALOG_INDEX_MEMORY = (time.time() + CATALOG_SEARCH_TTL_SECONDS, list(items))
+    return items or None
+
+
+def _store_catalog_index_in_cache(items: List[SeriesResult]) -> None:
+    global _CATALOG_INDEX_MEMORY
+    if not items:
+        return
+    _CATALOG_INDEX_MEMORY = (time.time() + CATALOG_SEARCH_TTL_SECONDS, list(items))
+    payload: List[List[str]] = []
+    for entry in items:
+        if not entry.title or not entry.url:
+            continue
+        payload.append([entry.title, entry.url, entry.description])
+    _session_cache_set(CATALOG_SEARCH_CACHE_KEY, payload, ttl_seconds=CATALOG_SEARCH_TTL_SECONDS)
+
+
 def search_series(query: str) -> List[SeriesResult]:
-    """Sucht Serien im (/serien)-Katalog (Genre-liste) nach Titel/Alt-Titel."""
+    """Sucht Serien im (/serien)-Katalog nach Titel. Nutzt Cache + Ein-Pass-Filter."""
    _ensure_requests()
    if not _normalize_search_text(query):
        return []
-    # Direkter Abruf wie in fetch_serien.py.
+    server_results = _search_series_server(query)
+    if server_results:
+        return [entry for entry in server_results if entry.title and _matches_query(query, title=entry.title)]
+    cached = _load_catalog_index_from_cache()
+    if cached is not None:
+        return [entry for entry in cached if entry.title and _matches_query(query, title=entry.title)]
+
    catalog_url = f"{_get_base_url()}/serien?by=genre"
-    soup = _get_soup_simple(catalog_url)
-    results: List[SeriesResult] = []
-    for series in parse_series_catalog(soup).values():
-        for entry in series:
-            if entry.title and _matches_query(query, title=entry.title):
-                results.append(entry)
-    return results
+    body = _get_html_simple(catalog_url)
+    items = _extract_catalog_index_from_html(body)
+    if not items:
+        soup = BeautifulSoup(body, "html.parser")
+        items = _catalog_index_from_soup(soup)
+    if items:
+        _store_catalog_index_in_cache(items)
+    return [entry for entry in items if entry.title and _matches_query(query, title=entry.title)]


 def parse_series_catalog(soup: BeautifulSoupT) -> Dict[str, List[SeriesResult]]:
@@ -1569,6 +1776,18 @@ class SerienstreamPlugin(BasisPlugin):
        except Exception as exc:  # pragma: no cover - defensive logging
            raise RuntimeError(f"Stream-Link konnte nicht geladen werden: {exc}") from exc

+    def episode_url_for(self, title: str, season: str, episode: str) -> str:
+        cache_key = (title, season)
+        cached = self._episode_label_cache.get(cache_key)
+        if cached:
+            info = cached.get(episode)
+            if info and info.url:
+                return info.url
+        episode_info = self._lookup_episode(title, season, episode)
+        if episode_info and episode_info.url:
+            return episode_info.url
+        return ""
+
    def available_hosters_for(self, title: str, season: str, episode: str) -> List[str]:
        if not self._requests_available:
            raise RuntimeError("SerienstreamPlugin kann ohne requests/bs4 keine Hoster laden.")