main: consolidate integrated changes after v0.1.54

This commit is contained in:
2026-02-23 20:38:36 +01:00
parent 6ce1bf71c1
commit 22b9ae9c31
38 changed files with 3139 additions and 1306 deletions

View File

@@ -17,7 +17,8 @@ import os
import re
import time
import unicodedata
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, TypeAlias
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple
from urllib.parse import quote
try: # pragma: no cover - optional dependency
import requests
@@ -49,14 +50,15 @@ if TYPE_CHECKING: # pragma: no cover
from requests import Session as RequestsSession
from bs4 import BeautifulSoup as BeautifulSoupT # type: ignore[import-not-found]
else: # pragma: no cover
RequestsSession: TypeAlias = Any
BeautifulSoupT: TypeAlias = Any
RequestsSession = Any
BeautifulSoupT = Any
SETTING_BASE_URL = "serienstream_base_url"
DEFAULT_BASE_URL = "https://s.to"
DEFAULT_PREFERRED_HOSTERS = ["voe"]
DEFAULT_TIMEOUT = 20
SEARCH_TIMEOUT = 8
ADDON_ID = "plugin.video.viewit"
GLOBAL_SETTING_LOG_URLS = "debug_log_urls"
GLOBAL_SETTING_DUMP_HTML = "debug_dump_html"
@@ -75,6 +77,19 @@ HEADERS = {
SESSION_CACHE_TTL_SECONDS = 300
SESSION_CACHE_PREFIX = "viewit.serienstream"
SESSION_CACHE_MAX_TITLE_URLS = 800
CATALOG_SEARCH_TTL_SECONDS = 600
CATALOG_SEARCH_CACHE_KEY = "catalog_index"
_CATALOG_INDEX_MEMORY: tuple[float, List["SeriesResult"]] = (0.0, [])
ProgressCallback = Optional[Callable[[str, Optional[int]], Any]]
def _emit_progress(callback: ProgressCallback, message: str, percent: Optional[int] = None) -> None:
if not callable(callback):
return
try:
callback(str(message or ""), None if percent is None else int(percent))
except Exception:
return
@dataclass
@@ -111,6 +126,57 @@ class SeasonInfo:
episodes: List[EpisodeInfo]
def _extract_series_metadata(soup: BeautifulSoupT) -> Tuple[Dict[str, str], Dict[str, str]]:
info: Dict[str, str] = {}
art: Dict[str, str] = {}
if not soup:
return info, art
title_tag = soup.select_one("h1")
title = (title_tag.get_text(" ", strip=True) if title_tag else "").strip()
if title:
info["title"] = title
description = ""
desc_tag = soup.select_one(".series-description .description-text")
if desc_tag:
description = (desc_tag.get_text(" ", strip=True) or "").strip()
if not description:
meta_desc = soup.select_one("meta[property='og:description'], meta[name='description']")
if meta_desc:
description = (meta_desc.get("content") or "").strip()
if description:
info["plot"] = description
poster = ""
poster_tag = soup.select_one(
".show-cover-mobile img[data-src], .show-cover-mobile img[src], .col-3 img[data-src], .col-3 img[src]"
)
if poster_tag:
poster = (poster_tag.get("data-src") or poster_tag.get("src") or "").strip()
if not poster:
for candidate in soup.select("img[data-src], img[src]"):
url = (candidate.get("data-src") or candidate.get("src") or "").strip()
if "/media/images/channel/" in url:
poster = url
break
if poster:
poster = _absolute_url(poster)
art["poster"] = poster
art["thumb"] = poster
fanart = ""
fanart_tag = soup.select_one("meta[property='og:image']")
if fanart_tag:
fanart = (fanart_tag.get("content") or "").strip()
if fanart:
fanart = _absolute_url(fanart)
art["fanart"] = fanart
art["landscape"] = fanart
return info, art
def _get_base_url() -> str:
base = get_setting_string(ADDON_ID, SETTING_BASE_URL, default=DEFAULT_BASE_URL).strip()
if not base:
@@ -342,37 +408,56 @@ def _get_soup(url: str, *, session: Optional[RequestsSession] = None) -> Beautif
_ensure_requests()
_log_visit(url)
sess = session or get_requests_session("serienstream", headers=HEADERS)
response = None
try:
response = sess.get(url, headers=HEADERS, timeout=DEFAULT_TIMEOUT)
response.raise_for_status()
except Exception as exc:
_log_error(f"GET {url} failed: {exc}")
raise
if response.url and response.url != url:
_log_url(response.url, kind="REDIRECT")
_log_response_html(url, response.text)
if _looks_like_cloudflare_challenge(response.text):
raise RuntimeError("Cloudflare-Schutz erkannt. requests reicht ggf. nicht aus.")
return BeautifulSoup(response.text, "html.parser")
try:
final_url = (response.url or url) if response is not None else url
body = (response.text or "") if response is not None else ""
if final_url != url:
_log_url(final_url, kind="REDIRECT")
_log_response_html(url, body)
if _looks_like_cloudflare_challenge(body):
raise RuntimeError("Cloudflare-Schutz erkannt. requests reicht ggf. nicht aus.")
return BeautifulSoup(body, "html.parser")
finally:
if response is not None:
try:
response.close()
except Exception:
pass
def _get_html_simple(url: str) -> str:
_ensure_requests()
_log_visit(url)
sess = get_requests_session("serienstream", headers=HEADERS)
response = None
try:
response = sess.get(url, headers=HEADERS, timeout=DEFAULT_TIMEOUT)
response.raise_for_status()
except Exception as exc:
_log_error(f"GET {url} failed: {exc}")
raise
if response.url and response.url != url:
_log_url(response.url, kind="REDIRECT")
body = response.text
_log_response_html(url, body)
if _looks_like_cloudflare_challenge(body):
raise RuntimeError("Cloudflare-Schutz erkannt. requests reicht ggf. nicht aus.")
return body
try:
final_url = (response.url or url) if response is not None else url
body = (response.text or "") if response is not None else ""
if final_url != url:
_log_url(final_url, kind="REDIRECT")
_log_response_html(url, body)
if _looks_like_cloudflare_challenge(body):
raise RuntimeError("Cloudflare-Schutz erkannt. requests reicht ggf. nicht aus.")
return body
finally:
if response is not None:
try:
response.close()
except Exception:
pass
def _get_soup_simple(url: str) -> BeautifulSoupT:
@@ -400,20 +485,238 @@ def _extract_genre_names_from_html(body: str) -> List[str]:
return names
def search_series(query: str) -> List[SeriesResult]:
"""Sucht Serien im (/serien)-Katalog (Genre-liste) nach Titel/Alt-Titel."""
def _strip_tags(value: str) -> str:
return re.sub(r"<[^>]+>", " ", value or "")
def _search_series_api(query: str) -> List[SeriesResult]:
query = (query or "").strip()
if not query:
return []
_ensure_requests()
sess = get_requests_session("serienstream", headers=HEADERS)
terms = [query]
if " " in query:
# Fallback: einzelne Tokens liefern in der API oft bessere Treffer.
terms.extend([token for token in query.split() if token])
seen_urls: set[str] = set()
for term in terms:
response = None
try:
response = sess.get(
f"{_get_base_url()}/api/search/suggest",
params={"term": term},
headers=HEADERS,
timeout=SEARCH_TIMEOUT,
)
response.raise_for_status()
except Exception:
continue
try:
payload = response.json()
except Exception:
continue
finally:
if response is not None:
try:
response.close()
except Exception:
pass
shows = payload.get("shows") if isinstance(payload, dict) else None
if not isinstance(shows, list):
continue
results: List[SeriesResult] = []
for item in shows:
if not isinstance(item, dict):
continue
title = (item.get("name") or "").strip()
href = (item.get("url") or "").strip()
if not title or not href:
continue
url_abs = _absolute_url(href)
if not url_abs or url_abs in seen_urls:
continue
if "/staffel-" in url_abs or "/episode-" in url_abs:
continue
seen_urls.add(url_abs)
results.append(SeriesResult(title=title, description="", url=url_abs))
if not results:
continue
filtered = [entry for entry in results if _matches_query(query, title=entry.title)]
if filtered:
return filtered
# Falls nur Token-Suche möglich war, zumindest die Ergebnisse liefern.
if term != query:
return results
return []
def _search_series_server(query: str) -> List[SeriesResult]:
if not query:
return []
api_results = _search_series_api(query)
if api_results:
return api_results
base = _get_base_url()
search_url = f"{base}/search?q={quote(query)}"
alt_url = f"{base}/suche?q={quote(query)}"
for url in (search_url, alt_url):
try:
body = _get_html_simple(url)
except Exception:
continue
if not body:
continue
soup = BeautifulSoup(body, "html.parser")
root = soup.select_one(".search-results-list")
if root is None:
continue
seen_urls: set[str] = set()
results: List[SeriesResult] = []
for card in root.select(".cover-card"):
anchor = card.select_one("a[href*='/serie/']")
if not anchor:
continue
href = (anchor.get("href") or "").strip()
url_abs = _absolute_url(href)
if not url_abs or url_abs in seen_urls:
continue
if "/staffel-" in url_abs or "/episode-" in url_abs:
continue
title_tag = card.select_one(".show-title") or card.select_one("h3") or card.select_one("h4")
title = (title_tag.get_text(" ", strip=True) if title_tag else anchor.get_text(" ", strip=True)).strip()
if not title:
continue
seen_urls.add(url_abs)
results.append(SeriesResult(title=title, description="", url=url_abs))
if results:
return results
return []
def _extract_catalog_index_from_html(body: str, *, progress_callback: ProgressCallback = None) -> List[SeriesResult]:
items: List[SeriesResult] = []
if not body:
return items
seen_urls: set[str] = set()
item_re = re.compile(
r"<li[^>]*class=[\"'][^\"']*series-item[^\"']*[\"'][^>]*>(.*?)</li>",
re.IGNORECASE | re.DOTALL,
)
anchor_re = re.compile(r"<a[^>]+href=[\"']([^\"']+)[\"'][^>]*>(.*?)</a>", re.IGNORECASE | re.DOTALL)
data_search_re = re.compile(r"data-search=[\"']([^\"']*)[\"']", re.IGNORECASE)
for idx, match in enumerate(item_re.finditer(body), start=1):
if idx == 1 or idx % 200 == 0:
_emit_progress(progress_callback, f"Katalog parsen {idx}", 62)
block = match.group(0)
inner = match.group(1) or ""
anchor_match = anchor_re.search(inner)
if not anchor_match:
continue
href = (anchor_match.group(1) or "").strip()
url = _absolute_url(href)
if not url or "/serie/" not in url or "/staffel-" in url or "/episode-" in url:
continue
if url in seen_urls:
continue
seen_urls.add(url)
title_raw = anchor_match.group(2) or ""
title = unescape(re.sub(r"\s+", " ", _strip_tags(title_raw))).strip()
if not title:
continue
search_match = data_search_re.search(block)
description = (search_match.group(1) or "").strip() if search_match else ""
items.append(SeriesResult(title=title, description=description, url=url))
return items
def _catalog_index_from_soup(soup: BeautifulSoupT) -> List[SeriesResult]:
items: List[SeriesResult] = []
if not soup:
return items
seen_urls: set[str] = set()
for item in soup.select("li.series-item"):
anchor = item.find("a", href=True)
if not anchor:
continue
href = (anchor.get("href") or "").strip()
url = _absolute_url(href)
if not url or "/serie/" not in url or "/staffel-" in url or "/episode-" in url:
continue
if url in seen_urls:
continue
seen_urls.add(url)
title = (anchor.get_text(" ", strip=True) or "").strip()
if not title:
continue
description = (item.get("data-search") or "").strip()
items.append(SeriesResult(title=title, description=description, url=url))
return items
def _load_catalog_index_from_cache() -> Optional[List[SeriesResult]]:
global _CATALOG_INDEX_MEMORY
expires_at, cached = _CATALOG_INDEX_MEMORY
if cached and expires_at > time.time():
return list(cached)
raw = _session_cache_get(CATALOG_SEARCH_CACHE_KEY)
if not isinstance(raw, list):
return None
items: List[SeriesResult] = []
for entry in raw:
if not isinstance(entry, list) or len(entry) < 2:
continue
title = str(entry[0] or "").strip()
url = str(entry[1] or "").strip()
description = str(entry[2] or "") if len(entry) > 2 else ""
if title and url:
items.append(SeriesResult(title=title, description=description, url=url))
if items:
_CATALOG_INDEX_MEMORY = (time.time() + CATALOG_SEARCH_TTL_SECONDS, list(items))
return items or None
def _store_catalog_index_in_cache(items: List[SeriesResult]) -> None:
global _CATALOG_INDEX_MEMORY
if not items:
return
_CATALOG_INDEX_MEMORY = (time.time() + CATALOG_SEARCH_TTL_SECONDS, list(items))
payload: List[List[str]] = []
for entry in items:
if not entry.title or not entry.url:
continue
payload.append([entry.title, entry.url, entry.description])
_session_cache_set(CATALOG_SEARCH_CACHE_KEY, payload, ttl_seconds=CATALOG_SEARCH_TTL_SECONDS)
def search_series(query: str, *, progress_callback: ProgressCallback = None) -> List[SeriesResult]:
"""Sucht Serien im (/serien)-Katalog nach Titel. Nutzt Cache + Ein-Pass-Filter."""
_ensure_requests()
if not _normalize_search_text(query):
return []
# Direkter Abruf wie in fetch_serien.py.
_emit_progress(progress_callback, "Server-Suche", 15)
server_results = _search_series_server(query)
if server_results:
_emit_progress(progress_callback, f"Server-Treffer: {len(server_results)}", 35)
return [entry for entry in server_results if entry.title and _matches_query(query, title=entry.title)]
_emit_progress(progress_callback, "Pruefe Such-Cache", 42)
cached = _load_catalog_index_from_cache()
if cached is not None:
_emit_progress(progress_callback, f"Cache-Treffer: {len(cached)}", 52)
return [entry for entry in cached if entry.title and _matches_query(query, title=entry.title)]
_emit_progress(progress_callback, "Lade Katalogseite", 58)
catalog_url = f"{_get_base_url()}/serien?by=genre"
soup = _get_soup_simple(catalog_url)
results: List[SeriesResult] = []
for series in parse_series_catalog(soup).values():
for entry in series:
if entry.title and _matches_query(query, title=entry.title):
results.append(entry)
return results
body = _get_html_simple(catalog_url)
items = _extract_catalog_index_from_html(body, progress_callback=progress_callback)
if not items:
_emit_progress(progress_callback, "Fallback-Parser", 70)
soup = BeautifulSoup(body, "html.parser")
items = _catalog_index_from_soup(soup)
if items:
_store_catalog_index_in_cache(items)
_emit_progress(progress_callback, f"Filtere Treffer ({len(items)})", 85)
return [entry for entry in items if entry.title and _matches_query(query, title=entry.title)]
def parse_series_catalog(soup: BeautifulSoupT) -> Dict[str, List[SeriesResult]]:
@@ -731,15 +1034,23 @@ def resolve_redirect(target_url: str) -> Optional[str]:
_get_soup(_get_base_url(), session=session)
except Exception:
pass
response = session.get(
normalized_url,
headers=HEADERS,
timeout=DEFAULT_TIMEOUT,
allow_redirects=True,
)
if response.url:
_log_url(response.url, kind="RESOLVED")
return response.url if response.url else None
response = None
try:
response = session.get(
normalized_url,
headers=HEADERS,
timeout=DEFAULT_TIMEOUT,
allow_redirects=True,
)
if response.url:
_log_url(response.url, kind="RESOLVED")
return response.url if response.url else None
finally:
if response is not None:
try:
response.close()
except Exception:
pass
def scrape_series_detail(
@@ -785,7 +1096,7 @@ class SerienstreamPlugin(BasisPlugin):
name = "Serienstream"
version = "1.0.0"
POPULAR_GENRE_LABEL = "⭐ Beliebte Serien"
POPULAR_GENRE_LABEL = "Haeufig gesehen"
def __init__(self) -> None:
self._series_results: Dict[str, SeriesResult] = {}
@@ -805,6 +1116,7 @@ class SerienstreamPlugin(BasisPlugin):
self._hoster_cache: Dict[Tuple[str, str, str], List[str]] = {}
self._latest_cache: Dict[int, List[LatestEpisode]] = {}
self._latest_hoster_cache: Dict[str, List[str]] = {}
self._series_metadata_cache: Dict[str, Tuple[Dict[str, str], Dict[str, str]]] = {}
self.is_available = True
self.unavailable_reason: Optional[str] = None
if not self._requests_available: # pragma: no cover - optional dependency
@@ -851,12 +1163,30 @@ class SerienstreamPlugin(BasisPlugin):
cache_key = title.casefold()
if self._title_url_cache.get(cache_key) != url:
self._title_url_cache[cache_key] = url
self._save_title_url_cache()
self._save_title_url_cache()
if url:
return
current = self._series_results.get(title)
if current is None:
self._series_results[title] = SeriesResult(title=title, description=description, url="")
@staticmethod
def _metadata_cache_key(title: str) -> str:
return (title or "").strip().casefold()
def _series_for_title(self, title: str) -> Optional[SeriesResult]:
direct = self._series_results.get(title)
if direct and direct.url:
return direct
lookup_key = (title or "").strip().casefold()
for item in self._series_results.values():
if item.title.casefold().strip() == lookup_key and item.url:
return item
cached_url = self._title_url_cache.get(lookup_key, "")
if cached_url:
return SeriesResult(title=title, description="", url=cached_url)
return None
@staticmethod
def _season_links_cache_name(series_url: str) -> str:
digest = hashlib.sha1((series_url or "").encode("utf-8")).hexdigest()[:20]
@@ -1274,7 +1604,28 @@ class SerienstreamPlugin(BasisPlugin):
self._season_links_cache[title] = list(session_links)
return list(session_links)
try:
seasons = scrape_series_detail(series.url, load_episodes=False)
series_soup = _get_soup(series.url, session=get_requests_session("serienstream", headers=HEADERS))
info_labels, art = _extract_series_metadata(series_soup)
if series.description and "plot" not in info_labels:
info_labels["plot"] = series.description
cache_key = self._metadata_cache_key(title)
if info_labels or art:
self._series_metadata_cache[cache_key] = (info_labels, art)
base_series_url = _series_root_url(_extract_canonical_url(series_soup, series.url))
season_links = _extract_season_links(series_soup)
season_count = _extract_number_of_seasons(series_soup)
if season_count and (not season_links or len(season_links) < season_count):
existing = {number for number, _ in season_links}
for number in range(1, season_count + 1):
if number in existing:
continue
season_url = f"{base_series_url}/staffel-{number}"
_log_parsed_url(season_url)
season_links.append((number, season_url))
season_links.sort(key=lambda item: item[0])
seasons = [SeasonInfo(number=number, url=url, episodes=[]) for number, url in season_links]
seasons.sort(key=lambda s: s.number)
except Exception as exc: # pragma: no cover - defensive logging
raise RuntimeError(f"Serienstream-Staffeln konnten nicht geladen werden: {exc}") from exc
self._season_links_cache[title] = list(seasons)
@@ -1288,6 +1639,41 @@ class SerienstreamPlugin(BasisPlugin):
return
self._remember_series_result(title, series_url)
def metadata_for(self, title: str) -> Tuple[Dict[str, str], Dict[str, str], Optional[List[Any]]]:
title = (title or "").strip()
if not title or not self._requests_available:
return {}, {}, None
cache_key = self._metadata_cache_key(title)
cached = self._series_metadata_cache.get(cache_key)
if cached is not None:
info, art = cached
return dict(info), dict(art), None
series = self._series_for_title(title)
if series is None or not series.url:
info = {"title": title}
self._series_metadata_cache[cache_key] = (dict(info), {})
return info, {}, None
info: Dict[str, str] = {"title": title}
art: Dict[str, str] = {}
if series.description:
info["plot"] = series.description
try:
soup = _get_soup(series.url, session=get_requests_session("serienstream", headers=HEADERS))
parsed_info, parsed_art = _extract_series_metadata(soup)
if parsed_info:
info.update(parsed_info)
if parsed_art:
art.update(parsed_art)
except Exception:
pass
self._series_metadata_cache[cache_key] = (dict(info), dict(art))
return info, art, None
def series_url_for_title(self, title: str) -> str:
title = (title or "").strip()
if not title:
@@ -1348,7 +1734,7 @@ class SerienstreamPlugin(BasisPlugin):
return self._episode_label_cache.get(cache_key, {}).get(episode_label)
return None
async def search_titles(self, query: str) -> List[str]:
async def search_titles(self, query: str, progress_callback: ProgressCallback = None) -> List[str]:
query = query.strip()
if not query:
self._series_results.clear()
@@ -1362,7 +1748,8 @@ class SerienstreamPlugin(BasisPlugin):
try:
# Nutzt den Katalog (/serien), der jetzt nach Genres gruppiert ist.
# Alternativ gäbe es ein Ajax-Endpoint, aber der ist nicht immer zuverlässig erreichbar.
results = search_series(query)
_emit_progress(progress_callback, "Serienstream Suche startet", 10)
results = search_series(query, progress_callback=progress_callback)
except Exception as exc: # pragma: no cover - defensive logging
self._series_results.clear()
self._season_cache.clear()
@@ -1375,6 +1762,7 @@ class SerienstreamPlugin(BasisPlugin):
self._season_cache.clear()
self._season_links_cache.clear()
self._episode_label_cache.clear()
_emit_progress(progress_callback, f"Treffer aufbereitet: {len(results)}", 95)
return [result.title for result in results]
def _ensure_seasons(self, title: str) -> List[SeasonInfo]:
@@ -1443,6 +1831,18 @@ class SerienstreamPlugin(BasisPlugin):
except Exception as exc: # pragma: no cover - defensive logging
raise RuntimeError(f"Stream-Link konnte nicht geladen werden: {exc}") from exc
def episode_url_for(self, title: str, season: str, episode: str) -> str:
cache_key = (title, season)
cached = self._episode_label_cache.get(cache_key)
if cached:
info = cached.get(episode)
if info and info.url:
return info.url
episode_info = self._lookup_episode(title, season, episode)
if episode_info and episode_info.url:
return episode_info.url
return ""
def available_hosters_for(self, title: str, season: str, episode: str) -> List[str]:
if not self._requests_available:
raise RuntimeError("SerienstreamPlugin kann ohne requests/bs4 keine Hoster laden.")