Core & Architektur: - Neues Verzeichnis addon/core/ mit router.py, trakt.py, metadata.py, gui.py, playstate.py, plugin_manager.py, updater.py - Tests-Verzeichnis hinzugefügt (24 Tests, pytest + Coverage) Trakt-Integration: - OAuth Device Flow, Scrobbling, Watchlist, History, Calendar - Upcoming Episodes, Weiterschauen (Continue Watching) - Watched-Status in Episodenlisten - _trakt_find_in_plugins() mit 5-Min-Cache Serienstream-Suche: - API-Ergebnisse werden immer mit Katalog-Cache ergänzt (serverseitiges 10-Treffer-Limit) - Katalog-Cache wird beim Addon-Start im Daemon-Thread vorgewärmt - Notification nach Cache-Load via xbmc.executebuiltin() (thread-sicher) Bugfixes (Code-Review): - Race Condition auf _TRAKT_WATCHED_CACHE: _TRAKT_WATCHED_CACHE_LOCK hinzugefügt - GUI-Dialog aus Daemon-Thread: xbmcgui -> xbmc.executebuiltin() - ValueError in Trakt-Watchlist-Routen abgesichert - Token expires_at==0 Check korrigiert - get_setting_bool() Kontrollfluss in gui.py bereinigt - topstreamfilm_plugin: try-finally um xbmcvfs.File.close() Cleanup: - default.py.bak und refactor_router.py entfernt - .gitignore: /tests/ Eintrag entfernt - Type-Hints vereinheitlicht (Dict/List/Tuple -> dict/list/tuple)
2134 lines
81 KiB
Python
2134 lines
81 KiB
Python
"""Serienstream (s.to) Integration als Downloader-Plugin.
|
|
|
|
Hinweise:
|
|
- Diese Integration nutzt optional `requests` + `beautifulsoup4` (bs4).
|
|
- In Kodi koennen zusaetzliche Debug-Funktionen ueber Addon-Settings aktiviert werden
|
|
(URL-Logging, HTML-Dumps, Benachrichtigungen).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime
|
|
from html import unescape
|
|
import json
|
|
import hashlib
|
|
import os
|
|
import re
|
|
import time
|
|
import unicodedata
|
|
from typing import TYPE_CHECKING, Any, Callable, Optional
|
|
from urllib.parse import quote
|
|
|
|
try: # pragma: no cover - optional dependency
|
|
import requests
|
|
from bs4 import BeautifulSoup # type: ignore[import-not-found]
|
|
except ImportError as exc: # pragma: no cover - optional dependency
|
|
requests = None
|
|
BeautifulSoup = None
|
|
REQUESTS_AVAILABLE = False
|
|
REQUESTS_IMPORT_ERROR = exc
|
|
else:
|
|
REQUESTS_AVAILABLE = True
|
|
REQUESTS_IMPORT_ERROR = None
|
|
|
|
try: # pragma: no cover - optional Kodi helpers
|
|
import xbmcaddon # type: ignore[import-not-found]
|
|
import xbmcvfs # type: ignore[import-not-found]
|
|
import xbmcgui # type: ignore[import-not-found]
|
|
except ImportError: # pragma: no cover - allow running outside Kodi
|
|
xbmcaddon = None
|
|
xbmcvfs = None
|
|
xbmcgui = None
|
|
|
|
from plugin_interface import BasisPlugin
|
|
from plugin_helpers import dump_response_html, get_setting_bool, get_setting_string, log_error, log_url, notify_url
|
|
from http_session_pool import close_all_sessions, get_requests_session
|
|
from regex_patterns import SEASON_EPISODE_TAG, SEASON_EPISODE_URL
|
|
from search_utils import matches_query as _matches_query, normalize_search_text as _normalize_search_text
|
|
from genre_utils import normalize_genre_label as _normalize_genre_label
|
|
|
|
if TYPE_CHECKING: # pragma: no cover
|
|
from requests import Session as RequestsSession
|
|
from bs4 import BeautifulSoup as BeautifulSoupT # type: ignore[import-not-found]
|
|
else: # pragma: no cover
|
|
RequestsSession = Any
|
|
BeautifulSoupT = Any
|
|
|
|
|
|
SETTING_BASE_URL = "serienstream_base_url"
|
|
DEFAULT_BASE_URL = "https://s.to"
|
|
DEFAULT_PREFERRED_HOSTERS = ["voe"]
|
|
DEFAULT_TIMEOUT = 20
|
|
SEARCH_TIMEOUT = 8
|
|
ADDON_ID = "plugin.video.viewit"
|
|
GLOBAL_SETTING_LOG_URLS = "debug_log_urls"
|
|
GLOBAL_SETTING_DUMP_HTML = "debug_dump_html"
|
|
GLOBAL_SETTING_SHOW_URL_INFO = "debug_show_url_info"
|
|
GLOBAL_SETTING_LOG_ERRORS = "debug_log_errors"
|
|
SETTING_LOG_URLS = "log_urls_serienstream"
|
|
SETTING_DUMP_HTML = "dump_html_serienstream"
|
|
SETTING_SHOW_URL_INFO = "show_url_info_serienstream"
|
|
SETTING_LOG_ERRORS = "log_errors_serienstream"
|
|
HEADERS = {
|
|
"User-Agent": "Mozilla/5.0 (Kodi; ViewIt) AppleWebKit/537.36 (KHTML, like Gecko)",
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
"Accept-Language": "de-DE,de;q=0.9,en;q=0.8",
|
|
"Connection": "keep-alive",
|
|
}
|
|
SESSION_CACHE_TTL_SECONDS = 300
|
|
SESSION_CACHE_PREFIX = "viewit.serienstream"
|
|
SESSION_CACHE_MAX_TITLE_URLS = 800
|
|
CATALOG_SEARCH_TTL_SECONDS = 600
|
|
CATALOG_SEARCH_CACHE_KEY = "catalog_index"
|
|
GENRE_LIST_PAGE_SIZE = 20
|
|
_CATALOG_INDEX_MEMORY: tuple[float, list["SeriesResult"]] = (0.0, [])
|
|
ProgressCallback = Optional[Callable[[str, int | None], Any]]
|
|
|
|
|
|
def _emit_progress(callback: ProgressCallback, message: str, percent: int | None = None) -> None:
|
|
if not callable(callback):
|
|
return
|
|
try:
|
|
callback(str(message or ""), None if percent is None else int(percent))
|
|
except Exception:
|
|
return
|
|
|
|
|
|
@dataclass
|
|
class SeriesResult:
|
|
title: str
|
|
description: str
|
|
url: str
|
|
cover: str = ""
|
|
|
|
|
|
@dataclass
|
|
class EpisodeInfo:
|
|
number: int
|
|
title: str
|
|
original_title: str
|
|
url: str
|
|
season_label: str = ""
|
|
languages: list[str] = field(default_factory=list)
|
|
hosters: list[str] = field(default_factory=list)
|
|
|
|
|
|
@dataclass
|
|
class LatestEpisode:
|
|
series_title: str
|
|
season: int
|
|
episode: int
|
|
url: str
|
|
airdate: str
|
|
|
|
|
|
@dataclass
|
|
class SeasonInfo:
|
|
number: int
|
|
url: str
|
|
episodes: list[EpisodeInfo]
|
|
|
|
|
|
def _extract_series_metadata(soup: BeautifulSoupT) -> tuple[dict[str, str], dict[str, str]]:
|
|
info: dict[str, str] = {}
|
|
art: dict[str, str] = {}
|
|
if not soup:
|
|
return info, art
|
|
|
|
title_tag = soup.select_one("h1")
|
|
title = (title_tag.get_text(" ", strip=True) if title_tag else "").strip()
|
|
if title:
|
|
info["title"] = title
|
|
|
|
description = ""
|
|
desc_tag = soup.select_one(".series-description .description-text")
|
|
if desc_tag:
|
|
description = (desc_tag.get_text(" ", strip=True) or "").strip()
|
|
if not description:
|
|
meta_desc = soup.select_one("meta[property='og:description'], meta[name='description']")
|
|
if meta_desc:
|
|
description = (meta_desc.get("content") or "").strip()
|
|
if description:
|
|
info["plot"] = description
|
|
|
|
poster = ""
|
|
poster_tag = soup.select_one(
|
|
".show-cover-mobile img[data-src], .show-cover-mobile img[src], .col-3 img[data-src], .col-3 img[src]"
|
|
)
|
|
if poster_tag:
|
|
poster = (poster_tag.get("data-src") or poster_tag.get("src") or "").strip()
|
|
if not poster:
|
|
for candidate in soup.select("img[data-src], img[src]"):
|
|
url = (candidate.get("data-src") or candidate.get("src") or "").strip()
|
|
if "/media/images/channel/" in url:
|
|
poster = url
|
|
break
|
|
if poster:
|
|
poster = _absolute_url(poster)
|
|
art["poster"] = poster
|
|
art["thumb"] = poster
|
|
|
|
fanart = ""
|
|
fanart_tag = soup.select_one("meta[property='og:image']")
|
|
if fanart_tag:
|
|
fanart = (fanart_tag.get("content") or "").strip()
|
|
if fanart:
|
|
fanart = _absolute_url(fanart)
|
|
art["fanart"] = fanart
|
|
art["landscape"] = fanart
|
|
|
|
return info, art
|
|
|
|
|
|
def _get_base_url() -> str:
|
|
base = get_setting_string(ADDON_ID, SETTING_BASE_URL, default=DEFAULT_BASE_URL).strip()
|
|
if not base:
|
|
base = DEFAULT_BASE_URL
|
|
return base.rstrip("/")
|
|
|
|
|
|
def _series_base_url() -> str:
|
|
return f"{_get_base_url()}/serie/stream"
|
|
|
|
|
|
def _popular_series_url() -> str:
|
|
return f"{_get_base_url()}/beliebte-serien"
|
|
|
|
|
|
def _latest_episodes_url() -> str:
|
|
return f"{_get_base_url()}"
|
|
|
|
|
|
def _absolute_url(href: str) -> str:
|
|
return f"{_get_base_url()}{href}" if href.startswith("/") else href
|
|
|
|
|
|
def _session_window() -> Any:
|
|
if xbmcgui is None:
|
|
return None
|
|
try:
|
|
return xbmcgui.Window(10000)
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
def _session_cache_key(name: str) -> str:
|
|
base_hash = hashlib.sha1(_get_base_url().encode("utf-8")).hexdigest()[:12]
|
|
return f"{SESSION_CACHE_PREFIX}.{base_hash}.{name}"
|
|
|
|
|
|
def _session_cache_get(name: str) -> Any:
|
|
window = _session_window()
|
|
if window is None:
|
|
return None
|
|
raw = ""
|
|
try:
|
|
raw = window.getProperty(_session_cache_key(name)) or ""
|
|
except Exception:
|
|
return None
|
|
if not raw:
|
|
return None
|
|
try:
|
|
payload = json.loads(raw)
|
|
except Exception:
|
|
return None
|
|
if not isinstance(payload, dict):
|
|
return None
|
|
expires_at = payload.get("expires_at")
|
|
data = payload.get("data")
|
|
try:
|
|
if float(expires_at or 0) <= time.time():
|
|
return None
|
|
except Exception:
|
|
return None
|
|
return data
|
|
|
|
|
|
def _session_cache_set(name: str, data: Any, *, ttl_seconds: int = SESSION_CACHE_TTL_SECONDS) -> None:
|
|
window = _session_window()
|
|
if window is None:
|
|
return
|
|
payload = {
|
|
"expires_at": float(time.time() + max(1, int(ttl_seconds))),
|
|
"data": data,
|
|
}
|
|
try:
|
|
raw = json.dumps(payload, ensure_ascii=False, separators=(",", ":"))
|
|
except Exception:
|
|
return
|
|
# Kodi-Properties sind kein Dauer-Storage; begrenzen, damit UI stabil bleibt.
|
|
if len(raw) > 240_000:
|
|
return
|
|
try:
|
|
window.setProperty(_session_cache_key(name), raw)
|
|
except Exception:
|
|
return
|
|
|
|
|
|
def _normalize_series_url(identifier: str) -> str:
|
|
if identifier.startswith("http://") or identifier.startswith("https://"):
|
|
return identifier.rstrip("/")
|
|
slug = identifier.strip("/")
|
|
return f"{_series_base_url()}/{slug}"
|
|
|
|
|
|
def _series_root_url(url: str) -> str:
|
|
"""Normalisiert eine Serien-URL auf die Root-URL (ohne /staffel-x oder /episode-x)."""
|
|
normalized = (url or "").strip().rstrip("/")
|
|
normalized = re.sub(r"/staffel-\d+(?:/.*)?$", "", normalized)
|
|
normalized = re.sub(r"/episode-\d+(?:/.*)?$", "", normalized)
|
|
return normalized.rstrip("/")
|
|
|
|
|
|
def _log_visit(url: str) -> None:
|
|
_log_url(url, kind="VISIT")
|
|
_notify_url(url)
|
|
if xbmcaddon is None:
|
|
print(f"Visiting: {url}")
|
|
|
|
|
|
def _normalize_text(value: str) -> str:
|
|
"""Legacy normalization (kept for backwards compatibility)."""
|
|
value = value.casefold()
|
|
value = re.sub(r"[^a-z0-9]+", "", value)
|
|
return value
|
|
|
|
|
|
def _is_episode_tba(title: str, original_title: str) -> bool:
|
|
combined = f"{title} {original_title}".casefold()
|
|
markers = ("tba", "demnächst", "demnaechst", "coming soon", "to be announced")
|
|
return any(marker in combined for marker in markers)
|
|
|
|
|
|
def _row_is_upcoming(row: BeautifulSoupT) -> bool:
|
|
classes = row.get("class") or []
|
|
if isinstance(classes, str):
|
|
classes = classes.split()
|
|
if "upcoming" in classes:
|
|
return True
|
|
badge = row.select_one(".badge-upcoming")
|
|
if badge and (badge.get_text(" ", strip=True) or "").strip():
|
|
return True
|
|
watch_cell = row.select_one(".episode-watch-cell")
|
|
if watch_cell:
|
|
text = watch_cell.get_text(" ", strip=True).casefold()
|
|
if "tba" in text:
|
|
return True
|
|
return False
|
|
|
|
|
|
def _get_setting_bool(setting_id: str, *, default: bool = False) -> bool:
|
|
return get_setting_bool(ADDON_ID, setting_id, default=default)
|
|
|
|
|
|
def _notify_url(url: str) -> None:
|
|
notify_url(
|
|
ADDON_ID,
|
|
heading="Serienstream",
|
|
url=url,
|
|
enabled_setting_id=GLOBAL_SETTING_SHOW_URL_INFO,
|
|
plugin_setting_id=SETTING_SHOW_URL_INFO,
|
|
)
|
|
|
|
|
|
def _log_url(url: str, *, kind: str = "VISIT") -> None:
|
|
log_url(
|
|
ADDON_ID,
|
|
enabled_setting_id=GLOBAL_SETTING_LOG_URLS,
|
|
plugin_setting_id=SETTING_LOG_URLS,
|
|
log_filename="serienstream_urls.log",
|
|
url=url,
|
|
kind=kind,
|
|
)
|
|
|
|
|
|
def _log_parsed_url(url: str) -> None:
|
|
_log_url(url, kind="PARSE")
|
|
|
|
|
|
def _log_response_html(url: str, body: str) -> None:
|
|
dump_response_html(
|
|
ADDON_ID,
|
|
enabled_setting_id=GLOBAL_SETTING_DUMP_HTML,
|
|
plugin_setting_id=SETTING_DUMP_HTML,
|
|
url=url,
|
|
body=body,
|
|
filename_prefix="s_to_response",
|
|
)
|
|
|
|
|
|
def _log_error(message: str) -> None:
|
|
log_error(
|
|
ADDON_ID,
|
|
enabled_setting_id=GLOBAL_SETTING_LOG_ERRORS,
|
|
plugin_setting_id=SETTING_LOG_ERRORS,
|
|
log_filename="serienstream_errors.log",
|
|
message=message,
|
|
)
|
|
|
|
|
|
def _ensure_requests() -> None:
|
|
if requests is None or BeautifulSoup is None:
|
|
raise RuntimeError("requests/bs4 sind nicht verfuegbar.")
|
|
|
|
|
|
def _is_retryable_http_error(exc: Exception) -> bool:
|
|
text = str(exc or "").casefold()
|
|
markers = (
|
|
"connection reset by peer",
|
|
"connection aborted",
|
|
"remote end closed connection",
|
|
"temporarily unavailable",
|
|
"timed out",
|
|
"read timeout",
|
|
)
|
|
return any(marker in text for marker in markers)
|
|
|
|
|
|
def _http_get(url: str, *, timeout: int, params: Optional[dict[str, str]] = None):
|
|
_ensure_requests()
|
|
last_exc: Exception | None = None
|
|
for attempt in range(2):
|
|
sess = get_requests_session("serienstream", headers=HEADERS)
|
|
try:
|
|
response = sess.get(url, params=params, headers=HEADERS, timeout=timeout)
|
|
response.raise_for_status()
|
|
return response
|
|
except Exception as exc:
|
|
last_exc = exc
|
|
if attempt >= 1 or not _is_retryable_http_error(exc):
|
|
raise
|
|
_log_error(f"GET {url} retry nach Fehler: {exc}")
|
|
try:
|
|
close_all_sessions()
|
|
except Exception:
|
|
pass
|
|
if last_exc is not None:
|
|
raise last_exc
|
|
raise RuntimeError(f"GET {url} fehlgeschlagen")
|
|
|
|
|
|
def _looks_like_cloudflare_challenge(body: str) -> bool:
|
|
lower = body.lower()
|
|
markers = (
|
|
"cf-browser-verification",
|
|
"cf-challenge",
|
|
"cf_chl",
|
|
"challenge-platform",
|
|
"attention required! | cloudflare",
|
|
"just a moment...",
|
|
"cloudflare ray id",
|
|
)
|
|
return any(marker in lower for marker in markers)
|
|
|
|
|
|
def _get_soup(url: str, *, session: RequestsSession | None = None) -> BeautifulSoupT:
|
|
_ensure_requests()
|
|
_log_visit(url)
|
|
response = None
|
|
try:
|
|
if session is not None:
|
|
response = session.get(url, headers=HEADERS, timeout=DEFAULT_TIMEOUT)
|
|
response.raise_for_status()
|
|
else:
|
|
response = _http_get(url, timeout=DEFAULT_TIMEOUT)
|
|
except Exception as exc:
|
|
_log_error(f"GET {url} failed: {exc}")
|
|
raise
|
|
try:
|
|
final_url = (response.url or url) if response is not None else url
|
|
body = (response.text or "") if response is not None else ""
|
|
if final_url != url:
|
|
_log_url(final_url, kind="REDIRECT")
|
|
_log_response_html(url, body)
|
|
if _looks_like_cloudflare_challenge(body):
|
|
raise RuntimeError("Cloudflare-Schutz erkannt. requests reicht ggf. nicht aus.")
|
|
return BeautifulSoup(body, "html.parser")
|
|
finally:
|
|
if response is not None:
|
|
try:
|
|
response.close()
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def _get_html_simple(url: str) -> str:
|
|
_ensure_requests()
|
|
_log_visit(url)
|
|
response = None
|
|
try:
|
|
response = _http_get(url, timeout=DEFAULT_TIMEOUT)
|
|
except Exception as exc:
|
|
_log_error(f"GET {url} failed: {exc}")
|
|
raise
|
|
try:
|
|
final_url = (response.url or url) if response is not None else url
|
|
body = (response.text or "") if response is not None else ""
|
|
if final_url != url:
|
|
_log_url(final_url, kind="REDIRECT")
|
|
_log_response_html(url, body)
|
|
if _looks_like_cloudflare_challenge(body):
|
|
raise RuntimeError("Cloudflare-Schutz erkannt. requests reicht ggf. nicht aus.")
|
|
return body
|
|
finally:
|
|
if response is not None:
|
|
try:
|
|
response.close()
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def _get_soup_simple(url: str) -> BeautifulSoupT:
|
|
body = _get_html_simple(url)
|
|
return BeautifulSoup(body, "html.parser")
|
|
|
|
|
|
def _extract_genre_names_from_html(body: str) -> list[str]:
|
|
names: list[str] = []
|
|
seen: set[str] = set()
|
|
pattern = re.compile(
|
|
r"<div[^>]*class=[\"'][^\"']*background-1[^\"']*[\"'][^>]*>.*?<h3[^>]*>(.*?)</h3>",
|
|
re.IGNORECASE | re.DOTALL,
|
|
)
|
|
for match in pattern.finditer(body or ""):
|
|
text = re.sub(r"<[^>]+>", " ", match.group(1) or "")
|
|
text = _normalize_genre_label(text)
|
|
if not text:
|
|
continue
|
|
key = text.casefold()
|
|
if key in seen:
|
|
continue
|
|
seen.add(key)
|
|
names.append(text)
|
|
return names
|
|
|
|
|
|
def _strip_tags(value: str) -> str:
|
|
return re.sub(r"<[^>]+>", " ", value or "")
|
|
|
|
|
|
def _search_series_api(query: str) -> list[SeriesResult]:
|
|
query = (query or "").strip()
|
|
if not query:
|
|
return []
|
|
_ensure_requests()
|
|
sess = get_requests_session("serienstream", headers=HEADERS)
|
|
terms = [query]
|
|
if " " in query:
|
|
# Fallback: einzelne Tokens liefern in der API oft bessere Treffer.
|
|
terms.extend([token for token in query.split() if token])
|
|
seen_urls: set[str] = set()
|
|
for term in terms:
|
|
response = None
|
|
try:
|
|
response = sess.get(
|
|
f"{_get_base_url()}/api/search/suggest",
|
|
params={"term": term},
|
|
headers=HEADERS,
|
|
timeout=SEARCH_TIMEOUT,
|
|
)
|
|
response.raise_for_status()
|
|
except Exception:
|
|
continue
|
|
try:
|
|
payload = response.json()
|
|
except Exception:
|
|
continue
|
|
finally:
|
|
if response is not None:
|
|
try:
|
|
response.close()
|
|
except Exception:
|
|
pass
|
|
shows = payload.get("shows") if isinstance(payload, dict) else None
|
|
if not isinstance(shows, list):
|
|
continue
|
|
results: list[SeriesResult] = []
|
|
for item in shows:
|
|
if not isinstance(item, dict):
|
|
continue
|
|
title = (item.get("name") or "").strip()
|
|
href = (item.get("url") or "").strip()
|
|
if not title or not href:
|
|
continue
|
|
url_abs = _absolute_url(href)
|
|
if not url_abs or url_abs in seen_urls:
|
|
continue
|
|
if "/staffel-" in url_abs or "/episode-" in url_abs:
|
|
continue
|
|
seen_urls.add(url_abs)
|
|
results.append(SeriesResult(title=title, description="", url=url_abs))
|
|
if not results:
|
|
continue
|
|
filtered = [entry for entry in results if _matches_query(query, title=entry.title)]
|
|
if filtered:
|
|
return filtered
|
|
# Falls nur Token-Suche möglich war, zumindest die Ergebnisse liefern.
|
|
if term != query:
|
|
return results
|
|
return []
|
|
|
|
|
|
def _search_series_server(query: str) -> list[SeriesResult]:
|
|
if not query:
|
|
return []
|
|
base = _get_base_url()
|
|
search_url = f"{base}/search?q={quote(query)}"
|
|
alt_url = f"{base}/suche?q={quote(query)}"
|
|
for url in (search_url, alt_url):
|
|
try:
|
|
body = _get_html_simple(url)
|
|
except Exception:
|
|
continue
|
|
if not body:
|
|
continue
|
|
soup = BeautifulSoup(body, "html.parser")
|
|
root = soup.select_one(".search-results-list")
|
|
if root is None:
|
|
continue
|
|
seen_urls: set[str] = set()
|
|
results: list[SeriesResult] = []
|
|
for card in root.select(".cover-card"):
|
|
anchor = card.select_one("a[href*='/serie/']")
|
|
if not anchor:
|
|
continue
|
|
href = (anchor.get("href") or "").strip()
|
|
url_abs = _absolute_url(href)
|
|
if not url_abs or url_abs in seen_urls:
|
|
continue
|
|
if "/staffel-" in url_abs or "/episode-" in url_abs:
|
|
continue
|
|
title_tag = card.select_one(".show-title") or card.select_one("h3") or card.select_one("h4")
|
|
title = (title_tag.get_text(" ", strip=True) if title_tag else anchor.get_text(" ", strip=True)).strip()
|
|
if not title:
|
|
continue
|
|
seen_urls.add(url_abs)
|
|
results.append(SeriesResult(title=title, description="", url=url_abs))
|
|
if results:
|
|
return results
|
|
api_results = _search_series_api(query)
|
|
if api_results:
|
|
return api_results
|
|
return []
|
|
|
|
|
|
def _extract_catalog_index_from_html(body: str, *, progress_callback: ProgressCallback = None) -> list[SeriesResult]:
|
|
items: list[SeriesResult] = []
|
|
if not body:
|
|
return items
|
|
seen_urls: set[str] = set()
|
|
item_re = re.compile(
|
|
r"<li[^>]*class=[\"'][^\"']*series-item[^\"']*[\"'][^>]*>(.*?)</li>",
|
|
re.IGNORECASE | re.DOTALL,
|
|
)
|
|
anchor_re = re.compile(r"<a[^>]+href=[\"']([^\"']+)[\"'][^>]*>(.*?)</a>", re.IGNORECASE | re.DOTALL)
|
|
data_search_re = re.compile(r"data-search=[\"']([^\"']*)[\"']", re.IGNORECASE)
|
|
for idx, match in enumerate(item_re.finditer(body), start=1):
|
|
if idx == 1 or idx % 200 == 0:
|
|
_emit_progress(progress_callback, f"Katalog parsen {idx}", 62)
|
|
block = match.group(0)
|
|
inner = match.group(1) or ""
|
|
anchor_match = anchor_re.search(inner)
|
|
if not anchor_match:
|
|
continue
|
|
href = (anchor_match.group(1) or "").strip()
|
|
url = _absolute_url(href)
|
|
if not url or "/serie/" not in url or "/staffel-" in url or "/episode-" in url:
|
|
continue
|
|
if url in seen_urls:
|
|
continue
|
|
seen_urls.add(url)
|
|
title_raw = anchor_match.group(2) or ""
|
|
title = unescape(re.sub(r"\s+", " ", _strip_tags(title_raw))).strip()
|
|
if not title:
|
|
continue
|
|
search_match = data_search_re.search(block)
|
|
description = (search_match.group(1) or "").strip() if search_match else ""
|
|
items.append(SeriesResult(title=title, description=description, url=url))
|
|
return items
|
|
|
|
|
|
def _catalog_index_from_soup(soup: BeautifulSoupT) -> list[SeriesResult]:
|
|
items: list[SeriesResult] = []
|
|
if not soup:
|
|
return items
|
|
seen_urls: set[str] = set()
|
|
for item in soup.select("li.series-item"):
|
|
anchor = item.find("a", href=True)
|
|
if not anchor:
|
|
continue
|
|
href = (anchor.get("href") or "").strip()
|
|
url = _absolute_url(href)
|
|
if not url or "/serie/" not in url or "/staffel-" in url or "/episode-" in url:
|
|
continue
|
|
if url in seen_urls:
|
|
continue
|
|
seen_urls.add(url)
|
|
title = (anchor.get_text(" ", strip=True) or "").strip()
|
|
if not title:
|
|
continue
|
|
description = (item.get("data-search") or "").strip()
|
|
items.append(SeriesResult(title=title, description=description, url=url))
|
|
return items
|
|
|
|
|
|
def _load_catalog_index_from_cache() -> Optional[list[SeriesResult]]:
|
|
global _CATALOG_INDEX_MEMORY
|
|
expires_at, cached = _CATALOG_INDEX_MEMORY
|
|
if cached and expires_at > time.time():
|
|
return list(cached)
|
|
raw = _session_cache_get(CATALOG_SEARCH_CACHE_KEY)
|
|
if not isinstance(raw, list):
|
|
return None
|
|
items: list[SeriesResult] = []
|
|
for entry in raw:
|
|
if not isinstance(entry, list) or len(entry) < 2:
|
|
continue
|
|
title = str(entry[0] or "").strip()
|
|
url = str(entry[1] or "").strip()
|
|
description = str(entry[2] or "") if len(entry) > 2 else ""
|
|
cover = str(entry[3] or "").strip() if len(entry) > 3 else ""
|
|
if title and url:
|
|
items.append(SeriesResult(title=title, description=description, url=url, cover=cover))
|
|
if items:
|
|
_CATALOG_INDEX_MEMORY = (time.time() + CATALOG_SEARCH_TTL_SECONDS, list(items))
|
|
return items or None
|
|
|
|
|
|
def _store_catalog_index_in_cache(items: list[SeriesResult]) -> None:
|
|
global _CATALOG_INDEX_MEMORY
|
|
if not items:
|
|
return
|
|
_CATALOG_INDEX_MEMORY = (time.time() + CATALOG_SEARCH_TTL_SECONDS, list(items))
|
|
payload: list[list[str]] = []
|
|
for entry in items:
|
|
if not entry.title or not entry.url:
|
|
continue
|
|
payload.append([entry.title, entry.url, entry.description, entry.cover])
|
|
_session_cache_set(CATALOG_SEARCH_CACHE_KEY, payload, ttl_seconds=CATALOG_SEARCH_TTL_SECONDS)
|
|
|
|
|
|
def search_series(query: str, *, progress_callback: ProgressCallback = None) -> list[SeriesResult]:
|
|
"""Sucht Serien im (/serien)-Katalog nach Titel. Nutzt Cache + Ein-Pass-Filter."""
|
|
_ensure_requests()
|
|
if not _normalize_search_text(query):
|
|
return []
|
|
_emit_progress(progress_callback, "Pruefe Such-Cache", 15)
|
|
cached = _load_catalog_index_from_cache()
|
|
if cached is not None:
|
|
matched_from_cache = [entry for entry in cached if entry.title and _matches_query(query, title=entry.title)]
|
|
_emit_progress(progress_callback, f"Cache-Treffer: {len(cached)}", 35)
|
|
if matched_from_cache:
|
|
return matched_from_cache
|
|
|
|
_emit_progress(progress_callback, "Lade Katalogseite", 42)
|
|
catalog_url = f"{_get_base_url()}/serien?by=genre"
|
|
items: list[SeriesResult] = []
|
|
try:
|
|
# Bevorzugt den Soup-Helper, damit Tests HTML einfache injizieren koennen.
|
|
soup = _get_soup_simple(catalog_url)
|
|
items = _catalog_index_from_soup(soup)
|
|
except Exception:
|
|
body = _get_html_simple(catalog_url)
|
|
items = _extract_catalog_index_from_html(body, progress_callback=progress_callback)
|
|
if not items:
|
|
_emit_progress(progress_callback, "Fallback-Parser", 58)
|
|
soup = BeautifulSoup(body, "html.parser")
|
|
items = _catalog_index_from_soup(soup)
|
|
if items:
|
|
_store_catalog_index_in_cache(items)
|
|
_emit_progress(progress_callback, f"Filtere Treffer ({len(items)})", 70)
|
|
return [entry for entry in items if entry.title and _matches_query(query, title=entry.title)]
|
|
|
|
_emit_progress(progress_callback, "Server-Suche", 85)
|
|
server_results = _search_series_server(query)
|
|
if server_results:
|
|
_emit_progress(progress_callback, f"Server-Treffer: {len(server_results)}", 95)
|
|
return [entry for entry in server_results if entry.title and _matches_query(query, title=entry.title)]
|
|
return []
|
|
|
|
|
|
def parse_series_catalog(soup: BeautifulSoupT) -> dict[str, list[SeriesResult]]:
|
|
"""Parst die Serien-Übersicht (/serien) und liefert Genre -> Serienliste."""
|
|
catalog: dict[str, list[SeriesResult]] = {}
|
|
|
|
# Neues Layout (Stand: 2026-01): Gruppen-Header + Liste.
|
|
# - Header: `div.background-1 ...` mit `h3`
|
|
# - Einträge: `ul.series-list` -> `li.series-item[data-search]` -> `a[href]`
|
|
for header in soup.select("div.background-1 h3"):
|
|
group = _normalize_genre_label(header.get_text(strip=True))
|
|
if not group:
|
|
continue
|
|
list_node = header.parent.find_next_sibling("ul", class_="series-list")
|
|
if not list_node:
|
|
continue
|
|
series: list[SeriesResult] = []
|
|
for item in list_node.select("li.series-item"):
|
|
anchor = item.find("a", href=True)
|
|
if not anchor:
|
|
continue
|
|
href = (anchor.get("href") or "").strip()
|
|
url = _absolute_url(href)
|
|
if url:
|
|
_log_parsed_url(url)
|
|
if ("/serie/" not in url) or "/staffel-" in url or "/episode-" in url:
|
|
continue
|
|
title = (anchor.get_text(" ", strip=True) or "").strip()
|
|
description = (item.get("data-search") or "").strip()
|
|
if title:
|
|
series.append(SeriesResult(title=title, description=description, url=url))
|
|
if series:
|
|
catalog[group] = series
|
|
|
|
return catalog
|
|
|
|
|
|
def _extract_season_links(soup: BeautifulSoupT) -> list[tuple[int, str]]:
|
|
season_links: list[tuple[int, str]] = []
|
|
seen_numbers: set[int] = set()
|
|
anchors = soup.select("ul.nav.list-items-nav a[data-season-pill][href]")
|
|
for anchor in anchors:
|
|
href = anchor.get("href") or ""
|
|
if "/episode-" in href:
|
|
continue
|
|
data_number = (anchor.get("data-season-pill") or "").strip()
|
|
match = re.search(r"/staffel-(\d+)", href)
|
|
if match:
|
|
number = int(match.group(1))
|
|
elif data_number.isdigit():
|
|
number = int(data_number)
|
|
else:
|
|
label = anchor.get_text(strip=True)
|
|
if not label.isdigit():
|
|
continue
|
|
number = int(label)
|
|
if number in seen_numbers:
|
|
continue
|
|
seen_numbers.add(number)
|
|
season_url = _absolute_url(href)
|
|
if season_url:
|
|
_log_parsed_url(season_url)
|
|
season_links.append((number, season_url))
|
|
season_links.sort(key=lambda item: item[0])
|
|
return season_links
|
|
|
|
|
|
def _extract_number_of_seasons(soup: BeautifulSoupT) -> int | None:
|
|
tag = soup.select_one('meta[itemprop="numberOfSeasons"]')
|
|
if not tag:
|
|
return None
|
|
content = (tag.get("content") or "").strip()
|
|
if not content.isdigit():
|
|
return None
|
|
count = int(content)
|
|
return count if count > 0 else None
|
|
|
|
|
|
def _extract_canonical_url(soup: BeautifulSoupT, fallback: str) -> str:
|
|
canonical = soup.select_one('link[rel="canonical"][href]')
|
|
href = (canonical.get("href") if canonical else "") or ""
|
|
href = href.strip()
|
|
if href.startswith("http://") or href.startswith("https://"):
|
|
return href.rstrip("/")
|
|
return fallback.rstrip("/")
|
|
|
|
|
|
def _extract_episodes(soup: BeautifulSoupT) -> list[EpisodeInfo]:
|
|
episodes: list[EpisodeInfo] = []
|
|
season_label = ""
|
|
season_header = soup.select_one("section.episode-section h2") or soup.select_one("h2.h3")
|
|
if season_header:
|
|
season_label = (season_header.get_text(" ", strip=True) or "").strip()
|
|
|
|
language_map = {
|
|
"german": "DE",
|
|
"english": "EN",
|
|
"japanese": "JP",
|
|
"turkish": "TR",
|
|
"spanish": "ES",
|
|
"italian": "IT",
|
|
"french": "FR",
|
|
"korean": "KO",
|
|
"russian": "RU",
|
|
"polish": "PL",
|
|
"portuguese": "PT",
|
|
"chinese": "ZH",
|
|
"arabic": "AR",
|
|
"thai": "TH",
|
|
}
|
|
# Neues Layout (Stand: 2026-01): Episoden-Tabelle mit Zeilen und onclick-URL.
|
|
rows = soup.select("table.episode-table tbody tr.episode-row")
|
|
for index, row in enumerate(rows):
|
|
if _row_is_upcoming(row):
|
|
continue
|
|
onclick = (row.get("onclick") or "").strip()
|
|
url = ""
|
|
if onclick:
|
|
match = re.search(r"location=['\\\"]([^'\\\"]+)['\\\"]", onclick)
|
|
if match:
|
|
url = _absolute_url(match.group(1))
|
|
if not url:
|
|
anchor = row.find("a", href=True)
|
|
url = _absolute_url(anchor.get("href")) if anchor else ""
|
|
if url:
|
|
_log_parsed_url(url)
|
|
|
|
number_tag = row.select_one(".episode-number-cell")
|
|
number_text = (number_tag.get_text(strip=True) if number_tag else "").strip()
|
|
match = re.search(r"/episode-(\d+)", url) if url else None
|
|
if match:
|
|
number = int(match.group(1))
|
|
else:
|
|
digits = "".join(ch for ch in number_text if ch.isdigit())
|
|
number = int(digits) if digits else index + 1
|
|
|
|
title_tag = row.select_one(".episode-title-ger")
|
|
original_tag = row.select_one(".episode-title-eng")
|
|
title = (title_tag.get_text(strip=True) if title_tag else "").strip()
|
|
original_title = (original_tag.get_text(strip=True) if original_tag else "").strip()
|
|
if not title:
|
|
title = f"Episode {number}"
|
|
if _is_episode_tba(title, original_title):
|
|
continue
|
|
|
|
hosters: list[str] = []
|
|
for img in row.select(".episode-watch-cell img"):
|
|
label = (img.get("alt") or img.get("title") or "").strip()
|
|
if label and label not in hosters:
|
|
hosters.append(label)
|
|
|
|
languages: list[str] = []
|
|
for flag in row.select(".episode-language-cell .watch-language"):
|
|
classes = flag.get("class") or []
|
|
if isinstance(classes, str):
|
|
classes = classes.split()
|
|
for cls in classes:
|
|
if cls.startswith("svg-flag-"):
|
|
key = cls.replace("svg-flag-", "").strip()
|
|
if not key:
|
|
continue
|
|
value = language_map.get(key, key.upper())
|
|
if value and value not in languages:
|
|
languages.append(value)
|
|
|
|
episodes.append(
|
|
EpisodeInfo(
|
|
number=number,
|
|
title=title,
|
|
original_title=original_title,
|
|
url=url,
|
|
season_label=season_label,
|
|
languages=languages,
|
|
hosters=hosters,
|
|
)
|
|
)
|
|
if episodes:
|
|
return episodes
|
|
return episodes
|
|
|
|
|
|
def fetch_episode_stream_link(
|
|
episode_url: str,
|
|
*,
|
|
preferred_hosters: Optional[list[str]] = None,
|
|
) -> str | None:
|
|
_ensure_requests()
|
|
normalized_url = _absolute_url(episode_url)
|
|
preferred = [hoster.lower() for hoster in (preferred_hosters or DEFAULT_PREFERRED_HOSTERS)]
|
|
session = get_requests_session("serienstream", headers=HEADERS)
|
|
# Preflight optional: Startseite kann 5xx liefern, Zielseite aber funktionieren.
|
|
try:
|
|
_get_soup(_get_base_url(), session=session)
|
|
except Exception:
|
|
pass
|
|
soup = _get_soup(normalized_url, session=session)
|
|
candidates: list[tuple[str, str]] = []
|
|
for button in soup.select("button.link-box[data-play-url]"):
|
|
play_url = (button.get("data-play-url") or "").strip()
|
|
provider = (button.get("data-provider-name") or "").strip()
|
|
url = _absolute_url(play_url)
|
|
if url:
|
|
_log_parsed_url(url)
|
|
if provider and url:
|
|
candidates.append((provider, url))
|
|
if not candidates:
|
|
return None
|
|
for preferred_name in preferred:
|
|
for name, url in candidates:
|
|
if name.lower() == preferred_name:
|
|
return url
|
|
return candidates[0][1]
|
|
|
|
|
|
def fetch_episode_hoster_names(episode_url: str) -> list[str]:
|
|
"""Liest die verfügbaren Hoster-Namen für eine Episode aus."""
|
|
_ensure_requests()
|
|
normalized_url = _absolute_url(episode_url)
|
|
session = get_requests_session("serienstream", headers=HEADERS)
|
|
# Preflight optional: Startseite kann 5xx liefern, Zielseite aber funktionieren.
|
|
try:
|
|
_get_soup(_get_base_url(), session=session)
|
|
except Exception:
|
|
pass
|
|
soup = _get_soup(normalized_url, session=session)
|
|
names: list[str] = []
|
|
seen: set[str] = set()
|
|
for button in soup.select("button.link-box[data-provider-name]"):
|
|
name = (button.get("data-provider-name") or "").strip()
|
|
play_url = (button.get("data-play-url") or "").strip()
|
|
url = _absolute_url(play_url)
|
|
if url:
|
|
_log_parsed_url(url)
|
|
key = name.casefold().strip()
|
|
if not key or key in seen:
|
|
continue
|
|
seen.add(key)
|
|
names.append(name)
|
|
_log_url(name, kind="HOSTER")
|
|
if names:
|
|
_log_url(f"{normalized_url}#hosters={','.join(names)}", kind="HOSTERS")
|
|
return names
|
|
|
|
|
|
_LATEST_EPISODE_TAG_RE = re.compile(SEASON_EPISODE_TAG, re.IGNORECASE)
|
|
_LATEST_EPISODE_URL_RE = re.compile(SEASON_EPISODE_URL, re.IGNORECASE)
|
|
|
|
|
|
def _extract_latest_episodes(soup: BeautifulSoupT) -> list[LatestEpisode]:
|
|
"""Parst die neuesten Episoden von der Startseite."""
|
|
episodes: list[LatestEpisode] = []
|
|
seen: set[str] = set()
|
|
|
|
for anchor in soup.select("a.latest-episode-row[href]"):
|
|
href = (anchor.get("href") or "").strip()
|
|
if not href or "/serie/" not in href:
|
|
continue
|
|
url = _absolute_url(href)
|
|
if not url:
|
|
continue
|
|
|
|
title_node = anchor.select_one(".ep-title")
|
|
series_title = (title_node.get("title") if title_node else "") or ""
|
|
series_title = series_title.strip() or (title_node.get_text(strip=True) if title_node else "").strip()
|
|
if not series_title:
|
|
continue
|
|
|
|
season_text = (anchor.select_one(".ep-season").get_text(strip=True) if anchor.select_one(".ep-season") else "").strip()
|
|
episode_text = (anchor.select_one(".ep-episode").get_text(strip=True) if anchor.select_one(".ep-episode") else "").strip()
|
|
season_number: int | None = None
|
|
episode_number: int | None = None
|
|
match = re.search(r"S\s*(\d+)", season_text, re.IGNORECASE)
|
|
if match:
|
|
season_number = int(match.group(1))
|
|
match = re.search(r"E\s*(\d+)", episode_text, re.IGNORECASE)
|
|
if match:
|
|
episode_number = int(match.group(1))
|
|
if season_number is None or episode_number is None:
|
|
match = _LATEST_EPISODE_URL_RE.search(href)
|
|
if match:
|
|
season_number = int(match.group(1))
|
|
episode_number = int(match.group(2))
|
|
if season_number is None or episode_number is None:
|
|
continue
|
|
|
|
airdate_node = anchor.select_one(".ep-time")
|
|
airdate = (airdate_node.get_text(" ", strip=True) if airdate_node else "").strip()
|
|
|
|
key = f"{url}\\t{season_number}\\t{episode_number}"
|
|
if key in seen:
|
|
continue
|
|
seen.add(key)
|
|
|
|
_log_parsed_url(url)
|
|
episodes.append(
|
|
LatestEpisode(
|
|
series_title=series_title,
|
|
season=int(season_number),
|
|
episode=int(episode_number),
|
|
url=url,
|
|
airdate=airdate,
|
|
)
|
|
)
|
|
|
|
return episodes
|
|
|
|
|
|
def resolve_redirect(target_url: str) -> str | None:
|
|
_ensure_requests()
|
|
normalized_url = _absolute_url(target_url)
|
|
_log_visit(normalized_url)
|
|
session = get_requests_session("serienstream", headers=HEADERS)
|
|
# Preflight optional: Startseite kann 5xx liefern, Zielseite aber funktionieren.
|
|
try:
|
|
_get_soup(_get_base_url(), session=session)
|
|
except Exception:
|
|
pass
|
|
response = None
|
|
try:
|
|
response = session.get(
|
|
normalized_url,
|
|
headers=HEADERS,
|
|
timeout=DEFAULT_TIMEOUT,
|
|
allow_redirects=True,
|
|
)
|
|
if response.url:
|
|
_log_url(response.url, kind="RESOLVED")
|
|
return response.url if response.url else None
|
|
finally:
|
|
if response is not None:
|
|
try:
|
|
response.close()
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def scrape_series_detail(
|
|
series_identifier: str,
|
|
max_seasons: int | None = None,
|
|
*,
|
|
load_episodes: bool = True,
|
|
) -> list[SeasonInfo]:
|
|
_ensure_requests()
|
|
series_url = _series_root_url(_normalize_series_url(series_identifier))
|
|
_log_url(series_url, kind="SERIES")
|
|
_notify_url(series_url)
|
|
session = get_requests_session("serienstream", headers=HEADERS)
|
|
soup = _get_soup(series_url, session=session)
|
|
|
|
base_series_url = _series_root_url(_extract_canonical_url(soup, series_url))
|
|
season_links = _extract_season_links(soup)
|
|
season_count = _extract_number_of_seasons(soup)
|
|
if season_count and (not season_links or len(season_links) < season_count):
|
|
existing = {number for number, _ in season_links}
|
|
for number in range(1, season_count + 1):
|
|
if number in existing:
|
|
continue
|
|
season_url = f"{base_series_url}/staffel-{number}"
|
|
_log_parsed_url(season_url)
|
|
season_links.append((number, season_url))
|
|
season_links.sort(key=lambda item: item[0])
|
|
if max_seasons is not None:
|
|
season_links = season_links[:max_seasons]
|
|
seasons: list[SeasonInfo] = []
|
|
for number, url in season_links:
|
|
episodes: list[EpisodeInfo] = []
|
|
if load_episodes:
|
|
season_soup = _get_soup(url, session=session)
|
|
episodes = _extract_episodes(season_soup)
|
|
seasons.append(SeasonInfo(number=number, url=url, episodes=episodes))
|
|
seasons.sort(key=lambda s: s.number)
|
|
return seasons
|
|
|
|
|
|
class SerienstreamPlugin(BasisPlugin):
|
|
"""Downloader-Plugin, das Serien von s.to ueber requests/bs4 bereitstellt."""
|
|
|
|
name = "Serienstream"
|
|
version = "1.0.0"
|
|
POPULAR_GENRE_LABEL = "Haeufig gesehen"
|
|
|
|
def __init__(self) -> None:
|
|
self._series_results: dict[str, SeriesResult] = {}
|
|
self._title_url_cache: dict[str, str] = self._load_title_url_cache()
|
|
self._genre_names_cache: Optional[list[str]] = None
|
|
self._season_cache: dict[str, list[SeasonInfo]] = {}
|
|
self._season_links_cache: dict[str, list[SeasonInfo]] = {}
|
|
self._episode_label_cache: dict[tuple[str, str], dict[str, EpisodeInfo]] = {}
|
|
self._catalog_cache: Optional[dict[str, list[SeriesResult]]] = None
|
|
self._genre_group_cache: dict[str, dict[str, list[str]]] = {}
|
|
self._genre_page_entries_cache: dict[tuple[str, int], list[SeriesResult]] = {}
|
|
self._genre_page_has_more_cache: dict[tuple[str, int], bool] = {}
|
|
self._popular_cache: Optional[list[SeriesResult]] = None
|
|
self._requests_available = REQUESTS_AVAILABLE
|
|
self._default_preferred_hosters: list[str] = list(DEFAULT_PREFERRED_HOSTERS)
|
|
self._preferred_hosters: list[str] = list(self._default_preferred_hosters)
|
|
self._hoster_cache: dict[tuple[str, str, str], list[str]] = {}
|
|
self._latest_cache: dict[int, list[LatestEpisode]] = {}
|
|
self._latest_hoster_cache: dict[str, list[str]] = {}
|
|
self._series_metadata_cache: dict[str, tuple[dict[str, str], dict[str, str]]] = {}
|
|
self._series_metadata_full: set[str] = set()
|
|
self.is_available = True
|
|
self.unavailable_reason: str | None = None
|
|
if not self._requests_available: # pragma: no cover - optional dependency
|
|
self.is_available = False
|
|
self.unavailable_reason = (
|
|
"requests/bs4 fehlen. Installiere 'requests' und 'beautifulsoup4'."
|
|
)
|
|
print(
|
|
"SerienstreamPlugin deaktiviert: requests/bs4 fehlen. "
|
|
"Installiere 'requests' und 'beautifulsoup4'."
|
|
)
|
|
if REQUESTS_IMPORT_ERROR:
|
|
print(f"Importfehler: {REQUESTS_IMPORT_ERROR}")
|
|
return
|
|
|
|
def _load_title_url_cache(self) -> dict[str, str]:
|
|
raw = _session_cache_get("title_urls")
|
|
if not isinstance(raw, dict):
|
|
return {}
|
|
result: dict[str, str] = {}
|
|
for key, value in raw.items():
|
|
key_text = str(key or "").strip().casefold()
|
|
url_text = str(value or "").strip()
|
|
if not key_text or not url_text:
|
|
continue
|
|
result[key_text] = url_text
|
|
return result
|
|
|
|
def _save_title_url_cache(self) -> None:
|
|
if not self._title_url_cache:
|
|
return
|
|
# Begrenzt die Session-Daten auf die jüngsten Einträge.
|
|
while len(self._title_url_cache) > SESSION_CACHE_MAX_TITLE_URLS:
|
|
self._title_url_cache.pop(next(iter(self._title_url_cache)))
|
|
_session_cache_set("title_urls", self._title_url_cache)
|
|
|
|
def _remember_series_result(self, title: str, url: str, description: str = "") -> None:
|
|
title = (title or "").strip()
|
|
url = (url or "").strip()
|
|
if not title:
|
|
return
|
|
if url:
|
|
self._series_results[title] = SeriesResult(title=title, description=description, url=url)
|
|
cache_key = title.casefold()
|
|
if self._title_url_cache.get(cache_key) != url:
|
|
self._title_url_cache[cache_key] = url
|
|
self._save_title_url_cache()
|
|
if url:
|
|
return
|
|
current = self._series_results.get(title)
|
|
if current is None:
|
|
self._series_results[title] = SeriesResult(title=title, description=description, url="")
|
|
|
|
@staticmethod
|
|
def _metadata_cache_key(title: str) -> str:
|
|
return (title or "").strip().casefold()
|
|
|
|
def _series_for_title(self, title: str) -> SeriesResult | None:
|
|
direct = self._series_results.get(title)
|
|
if direct and direct.url:
|
|
return direct
|
|
lookup_key = (title or "").strip().casefold()
|
|
for item in self._series_results.values():
|
|
if item.title.casefold().strip() == lookup_key and item.url:
|
|
return item
|
|
cached_url = self._title_url_cache.get(lookup_key, "")
|
|
if cached_url:
|
|
return SeriesResult(title=title, description="", url=cached_url)
|
|
return None
|
|
|
|
@staticmethod
|
|
def _season_links_cache_name(series_url: str) -> str:
|
|
digest = hashlib.sha1((series_url or "").encode("utf-8")).hexdigest()[:20]
|
|
return f"season_links.{digest}"
|
|
|
|
@staticmethod
|
|
def _season_episodes_cache_name(season_url: str) -> str:
|
|
digest = hashlib.sha1((season_url or "").encode("utf-8")).hexdigest()[:20]
|
|
return f"season_episodes.{digest}"
|
|
|
|
def _load_session_season_links(self, series_url: str) -> Optional[list[SeasonInfo]]:
|
|
raw = _session_cache_get(self._season_links_cache_name(series_url))
|
|
if not isinstance(raw, list):
|
|
return None
|
|
seasons: list[SeasonInfo] = []
|
|
for item in raw:
|
|
if not isinstance(item, dict):
|
|
continue
|
|
try:
|
|
number = int(item.get("number"))
|
|
except Exception:
|
|
continue
|
|
url = str(item.get("url") or "").strip()
|
|
if number <= 0 or not url:
|
|
continue
|
|
seasons.append(SeasonInfo(number=number, url=url, episodes=[]))
|
|
if not seasons:
|
|
return None
|
|
seasons.sort(key=lambda s: s.number)
|
|
return seasons
|
|
|
|
def _save_session_season_links(self, series_url: str, seasons: list[SeasonInfo]) -> None:
|
|
payload = [{"number": int(season.number), "url": season.url} for season in seasons if season.url]
|
|
if payload:
|
|
_session_cache_set(self._season_links_cache_name(series_url), payload)
|
|
|
|
def _load_session_season_episodes(self, season_url: str) -> Optional[list[EpisodeInfo]]:
|
|
raw = _session_cache_get(self._season_episodes_cache_name(season_url))
|
|
if not isinstance(raw, list):
|
|
return None
|
|
episodes: list[EpisodeInfo] = []
|
|
for item in raw:
|
|
if not isinstance(item, dict):
|
|
continue
|
|
try:
|
|
number = int(item.get("number"))
|
|
except Exception:
|
|
continue
|
|
title = str(item.get("title") or "").strip()
|
|
original_title = str(item.get("original_title") or "").strip()
|
|
url = str(item.get("url") or "").strip()
|
|
season_label = str(item.get("season_label") or "").strip()
|
|
languages = [str(lang).strip() for lang in list(item.get("languages") or []) if str(lang).strip()]
|
|
hosters = [str(host).strip() for host in list(item.get("hosters") or []) if str(host).strip()]
|
|
if number <= 0:
|
|
continue
|
|
episodes.append(
|
|
EpisodeInfo(
|
|
number=number,
|
|
title=title or f"Episode {number}",
|
|
original_title=original_title,
|
|
url=url,
|
|
season_label=season_label,
|
|
languages=languages,
|
|
hosters=hosters,
|
|
)
|
|
)
|
|
if not episodes:
|
|
return None
|
|
episodes.sort(key=lambda item: item.number)
|
|
return episodes
|
|
|
|
def _save_session_season_episodes(self, season_url: str, episodes: list[EpisodeInfo]) -> None:
|
|
payload = []
|
|
for item in episodes:
|
|
payload.append(
|
|
{
|
|
"number": int(item.number),
|
|
"title": item.title,
|
|
"original_title": item.original_title,
|
|
"url": item.url,
|
|
"season_label": item.season_label,
|
|
"languages": list(item.languages or []),
|
|
"hosters": list(item.hosters or []),
|
|
}
|
|
)
|
|
if payload:
|
|
_session_cache_set(self._season_episodes_cache_name(season_url), payload)
|
|
|
|
def _ensure_catalog(self) -> dict[str, list[SeriesResult]]:
|
|
if self._catalog_cache is not None:
|
|
return self._catalog_cache
|
|
# Stand: 2026-01 liefert `?by=genre` konsistente Gruppen für `genres()`.
|
|
catalog_url = f"{_get_base_url()}/serien?by=genre"
|
|
soup = _get_soup_simple(catalog_url)
|
|
self._catalog_cache = parse_series_catalog(soup)
|
|
_session_cache_set("genres", sorted(self._catalog_cache.keys(), key=str.casefold))
|
|
return self._catalog_cache
|
|
|
|
def _ensure_genre_names(self) -> list[str]:
|
|
if self._genre_names_cache is not None:
|
|
return list(self._genre_names_cache)
|
|
|
|
def _normalize_cached_genre(raw: str) -> str:
|
|
text = re.sub(r"\s+", " ", str(raw or "")).strip()
|
|
key_prefix = "filter.genre_"
|
|
if text.casefold().startswith(key_prefix):
|
|
slug = text[len(key_prefix) :].strip().casefold()
|
|
slug = slug.replace("_", "-")
|
|
slug = re.sub(r"[^a-z0-9-]+", "-", slug).strip("-")
|
|
if not slug:
|
|
return ""
|
|
if slug == "doku-soap":
|
|
return "Doku-Soap"
|
|
if slug == "scifi":
|
|
return "SciFi"
|
|
if slug == "fighting-shounen":
|
|
return "Fighting-Shounen"
|
|
return " ".join(chunk.capitalize() for chunk in slug.split("-") if chunk)
|
|
return text
|
|
|
|
cached = _session_cache_get("genres")
|
|
if isinstance(cached, list):
|
|
genres: list[str] = []
|
|
for value in cached:
|
|
normalized = _normalize_cached_genre(value)
|
|
if normalized:
|
|
genres.append(normalized)
|
|
if genres:
|
|
self._genre_names_cache = sorted(set(genres), key=str.casefold)
|
|
return list(self._genre_names_cache)
|
|
catalog_url = f"{_get_base_url()}/serien?by=genre"
|
|
try:
|
|
body = _get_html_simple(catalog_url)
|
|
genres = _extract_genre_names_from_html(body)
|
|
except Exception:
|
|
genres = []
|
|
if not genres:
|
|
catalog = self._ensure_catalog()
|
|
genres = sorted(catalog.keys(), key=str.casefold)
|
|
else:
|
|
genres = sorted(set(genres), key=str.casefold)
|
|
self._genre_names_cache = list(genres)
|
|
_session_cache_set("genres", self._genre_names_cache)
|
|
return list(self._genre_names_cache)
|
|
|
|
def genres(self) -> list[str]:
|
|
"""Optional: Liefert alle Genres aus dem Serien-Katalog."""
|
|
if not self._requests_available:
|
|
return []
|
|
return self._ensure_genre_names()
|
|
|
|
def capabilities(self) -> set[str]:
|
|
"""Meldet unterstützte Features für Router-Menüs."""
|
|
return {"popular_series", "genres", "latest_episodes"}
|
|
|
|
def popular_series(self) -> list[str]:
|
|
"""Liefert die Titel der beliebten Serien (Quelle: `/beliebte-serien`)."""
|
|
if not self._requests_available:
|
|
return []
|
|
entries = self._ensure_popular()
|
|
for entry in entries:
|
|
self._remember_series_result(entry.title, entry.url, entry.description)
|
|
return [entry.title for entry in entries if entry.title]
|
|
|
|
def titles_for_genre(self, genre: str) -> list[str]:
|
|
"""Optional: Liefert Titel für ein Genre."""
|
|
if not self._requests_available:
|
|
return []
|
|
genre = (genre or "").strip()
|
|
if not genre:
|
|
return []
|
|
if genre == self.POPULAR_GENRE_LABEL:
|
|
return self.popular_series()
|
|
catalog = self._ensure_catalog()
|
|
entries = catalog.get(genre, [])
|
|
for entry in entries:
|
|
self._remember_series_result(entry.title, entry.url, entry.description)
|
|
return [entry.title for entry in entries if entry.title]
|
|
|
|
@staticmethod
|
|
def _title_group_key(title: str) -> str:
|
|
raw = (title or "").strip()
|
|
if not raw:
|
|
return "#"
|
|
for char in raw:
|
|
if char.isdigit():
|
|
return "0-9"
|
|
if char.isalpha():
|
|
normalized = char.casefold()
|
|
if normalized == "ä":
|
|
normalized = "a"
|
|
elif normalized == "ö":
|
|
normalized = "o"
|
|
elif normalized == "ü":
|
|
normalized = "u"
|
|
elif normalized == "ß":
|
|
normalized = "s"
|
|
return normalized.upper()
|
|
return "#"
|
|
|
|
@classmethod
|
|
def _group_matches(cls, group_code: str, title: str) -> bool:
|
|
key = cls._title_group_key(title)
|
|
if group_code == "0-9":
|
|
return key == "0-9"
|
|
if key == "0-9" or key == "#":
|
|
return False
|
|
if group_code == "A-E":
|
|
return "A" <= key <= "E"
|
|
if group_code == "F-J":
|
|
return "F" <= key <= "J"
|
|
if group_code == "K-O":
|
|
return "K" <= key <= "O"
|
|
if group_code == "P-T":
|
|
return "P" <= key <= "T"
|
|
if group_code == "U-Z":
|
|
return "U" <= key <= "Z"
|
|
return False
|
|
|
|
def _ensure_genre_group_cache(self, genre: str) -> dict[str, list[str]]:
|
|
cached = self._genre_group_cache.get(genre)
|
|
if cached is not None:
|
|
return cached
|
|
titles = self.titles_for_genre(genre)
|
|
grouped: dict[str, list[str]] = {}
|
|
for title in titles:
|
|
for code in ("A-E", "F-J", "K-O", "P-T", "U-Z", "0-9"):
|
|
if self._group_matches(code, title):
|
|
grouped.setdefault(code, []).append(title)
|
|
break
|
|
for code in grouped:
|
|
grouped[code].sort(key=str.casefold)
|
|
self._genre_group_cache[genre] = grouped
|
|
return grouped
|
|
|
|
@staticmethod
|
|
def _genre_slug(genre: str) -> str:
|
|
value = (genre or "").strip().casefold()
|
|
value = value.replace("&", " und ")
|
|
value = unicodedata.normalize("NFKD", value)
|
|
value = "".join(ch for ch in value if not unicodedata.combining(ch))
|
|
value = re.sub(r"[^a-z0-9]+", "-", value).strip("-")
|
|
return value
|
|
|
|
def _cache_list_metadata(self, title: str, description: str = "", cover: str = "") -> None:
|
|
key = self._metadata_cache_key(title)
|
|
cached = self._series_metadata_cache.get(key)
|
|
info = dict(cached[0]) if cached else {}
|
|
art = dict(cached[1]) if cached else {}
|
|
info.setdefault("title", title)
|
|
description = (description or "").strip()
|
|
if description and not info.get("plot"):
|
|
info["plot"] = description
|
|
cover = _absolute_url((cover or "").strip()) if cover else ""
|
|
if cover:
|
|
art.setdefault("thumb", cover)
|
|
art.setdefault("poster", cover)
|
|
self._series_metadata_cache[key] = (info, art)
|
|
|
|
@staticmethod
|
|
def _card_description(anchor: BeautifulSoupT) -> str:
|
|
if not anchor:
|
|
return ""
|
|
candidates: list[str] = []
|
|
direct = (anchor.get("data-search") or "").strip()
|
|
if direct:
|
|
candidates.append(direct)
|
|
title_attr = (anchor.get("data-title") or "").strip()
|
|
if title_attr:
|
|
candidates.append(title_attr)
|
|
for selector in ("p", ".description", ".desc", ".text-muted", ".small", ".overview"):
|
|
node = anchor.select_one(selector)
|
|
if node is None:
|
|
continue
|
|
text = (node.get_text(" ", strip=True) or "").strip()
|
|
if text:
|
|
candidates.append(text)
|
|
parent = anchor.parent if anchor else None
|
|
if parent is not None:
|
|
parent_data = (parent.get("data-search") or "").strip()
|
|
if parent_data:
|
|
candidates.append(parent_data)
|
|
parent_text = ""
|
|
try:
|
|
parent_text = (parent.get_text(" ", strip=True) or "").strip()
|
|
except Exception:
|
|
parent_text = ""
|
|
if parent_text and len(parent_text) > 24:
|
|
candidates.append(parent_text)
|
|
for value in candidates:
|
|
cleaned = re.sub(r"\s+", " ", str(value or "")).strip()
|
|
if cleaned and len(cleaned) > 12:
|
|
return cleaned
|
|
return ""
|
|
|
|
def _parse_genre_entries_from_soup(self, soup: BeautifulSoupT) -> list[SeriesResult]:
|
|
entries: list[SeriesResult] = []
|
|
seen_urls: set[str] = set()
|
|
|
|
def _add_entry(title: str, description: str, href: str, cover: str) -> None:
|
|
series_url = _absolute_url(href).split("#", 1)[0].split("?", 1)[0].rstrip("/")
|
|
if not series_url or "/serie/" not in series_url:
|
|
return
|
|
if "/staffel-" in series_url or "/episode-" in series_url:
|
|
return
|
|
if series_url in seen_urls:
|
|
return
|
|
title = (title or "").strip()
|
|
if not title:
|
|
return
|
|
description = (description or "").strip()
|
|
cover_url = _absolute_url((cover or "").strip()) if cover else ""
|
|
seen_urls.add(series_url)
|
|
self._remember_series_result(title, series_url, description)
|
|
self._cache_list_metadata(title, description=description, cover=cover_url)
|
|
entries.append(SeriesResult(title=title, description=description, url=series_url, cover=cover_url))
|
|
|
|
for anchor in soup.select("a.show-card[href]"):
|
|
href = (anchor.get("href") or "").strip()
|
|
if not href:
|
|
continue
|
|
img = anchor.select_one("img")
|
|
title = (
|
|
(img.get("alt") if img else "")
|
|
or (anchor.get("title") or "")
|
|
or (anchor.get_text(" ", strip=True) or "")
|
|
).strip()
|
|
description = self._card_description(anchor)
|
|
cover = (img.get("data-src") if img else "") or (img.get("src") if img else "")
|
|
_add_entry(title, description, href, cover)
|
|
|
|
if entries:
|
|
return entries
|
|
|
|
for item in soup.select("li.series-item"):
|
|
anchor = item.find("a", href=True)
|
|
if not anchor:
|
|
continue
|
|
href = (anchor.get("href") or "").strip()
|
|
title = (anchor.get_text(" ", strip=True) or "").strip()
|
|
description = (item.get("data-search") or "").strip()
|
|
img = anchor.find("img")
|
|
cover = (img.get("data-src") if img else "") or (img.get("src") if img else "")
|
|
_add_entry(title, description, href, cover)
|
|
return entries
|
|
|
|
def _fetch_genre_page_entries(self, genre: str, page: int) -> tuple[list[SeriesResult], bool]:
|
|
slug = self._genre_slug(genre)
|
|
if not slug:
|
|
return [], False
|
|
cache_key = (slug, page)
|
|
cached_entries = self._genre_page_entries_cache.get(cache_key)
|
|
cached_has_more = self._genre_page_has_more_cache.get(cache_key)
|
|
if cached_entries is not None and cached_has_more is not None:
|
|
return list(cached_entries), bool(cached_has_more)
|
|
url = f"{_get_base_url()}/genre/{slug}"
|
|
if page > 1:
|
|
url = f"{url}?page={int(page)}"
|
|
soup = _get_soup_simple(url)
|
|
entries = self._parse_genre_entries_from_soup(soup)
|
|
|
|
has_more = False
|
|
for anchor in soup.select("a[rel='next'][href], a[href*='?page=']"):
|
|
href = (anchor.get("href") or "").strip()
|
|
if not href:
|
|
continue
|
|
match = re.search(r"[?&]page=(\d+)", href)
|
|
if not match:
|
|
if "next" in href.casefold():
|
|
has_more = True
|
|
continue
|
|
try:
|
|
if int(match.group(1)) > int(page):
|
|
has_more = True
|
|
break
|
|
except Exception:
|
|
continue
|
|
if len(entries) > GENRE_LIST_PAGE_SIZE:
|
|
has_more = True
|
|
entries = entries[:GENRE_LIST_PAGE_SIZE]
|
|
|
|
self._genre_page_entries_cache[cache_key] = list(entries)
|
|
self._genre_page_has_more_cache[cache_key] = bool(has_more)
|
|
return list(entries), bool(has_more)
|
|
|
|
def titles_for_genre_page(self, genre: str, page: int) -> list[str]:
|
|
genre = (genre or "").strip()
|
|
page = max(1, int(page or 1))
|
|
entries, _ = self._fetch_genre_page_entries(genre, page)
|
|
return [entry.title for entry in entries if entry.title]
|
|
|
|
def genre_has_more(self, genre: str, page: int) -> bool:
|
|
genre = (genre or "").strip()
|
|
page = max(1, int(page or 1))
|
|
slug = self._genre_slug(genre)
|
|
if not slug:
|
|
return False
|
|
cache_key = (slug, page)
|
|
cached = self._genre_page_has_more_cache.get(cache_key)
|
|
if cached is not None:
|
|
return bool(cached)
|
|
_, has_more = self._fetch_genre_page_entries(genre, page)
|
|
return bool(has_more)
|
|
|
|
def titles_for_genre_group_page(self, genre: str, group_code: str, page: int = 1, page_size: int = 10) -> list[str]:
|
|
genre = (genre or "").strip()
|
|
group_code = (group_code or "").strip()
|
|
page = max(1, int(page or 1))
|
|
page_size = max(1, int(page_size or 10))
|
|
needed = page * page_size + 1
|
|
matched: list[str] = []
|
|
try:
|
|
page_index = 1
|
|
has_more = True
|
|
while has_more:
|
|
page_entries, has_more = self._fetch_genre_page_entries(genre, page_index)
|
|
for entry in page_entries:
|
|
title = entry.title
|
|
if self._group_matches(group_code, title):
|
|
matched.append(title)
|
|
if len(matched) >= needed:
|
|
break
|
|
page_index += 1
|
|
start = (page - 1) * page_size
|
|
end = start + page_size
|
|
return list(matched[start:end])
|
|
except Exception:
|
|
grouped = self._ensure_genre_group_cache(genre)
|
|
titles = grouped.get(group_code, [])
|
|
start = (page - 1) * page_size
|
|
end = start + page_size
|
|
return list(titles[start:end])
|
|
|
|
def genre_group_has_more(self, genre: str, group_code: str, page: int = 1, page_size: int = 10) -> bool:
|
|
genre = (genre or "").strip()
|
|
group_code = (group_code or "").strip()
|
|
page = max(1, int(page or 1))
|
|
page_size = max(1, int(page_size or 10))
|
|
needed = page * page_size + 1
|
|
count = 0
|
|
try:
|
|
page_index = 1
|
|
has_more = True
|
|
while has_more:
|
|
page_entries, has_more = self._fetch_genre_page_entries(genre, page_index)
|
|
for entry in page_entries:
|
|
title = entry.title
|
|
if self._group_matches(group_code, title):
|
|
count += 1
|
|
if count >= needed:
|
|
return True
|
|
page_index += 1
|
|
return False
|
|
except Exception:
|
|
grouped = self._ensure_genre_group_cache(genre)
|
|
titles = grouped.get(group_code, [])
|
|
return len(titles) > (page * page_size)
|
|
|
|
def _ensure_popular(self) -> list[SeriesResult]:
|
|
"""Laedt und cached die Liste der beliebten Serien aus `/beliebte-serien`."""
|
|
if self._popular_cache is not None:
|
|
return list(self._popular_cache)
|
|
soup = _get_soup_simple(_popular_series_url())
|
|
results: list[SeriesResult] = []
|
|
seen: set[str] = set()
|
|
|
|
# Neues Layout (Stand: 2026-01): Abschnitt "Meistgesehen" hat Karten mit
|
|
# `a.show-card` und Titel im `img alt=...`.
|
|
anchors = None
|
|
for section in soup.select("div.mb-5"):
|
|
h2 = section.select_one("h2")
|
|
label = (h2.get_text(" ", strip=True) if h2 else "").casefold()
|
|
if "meistgesehen" in label:
|
|
anchors = section.select("a.show-card[href]")
|
|
break
|
|
if anchors is None:
|
|
anchors = soup.select("a.show-card[href]")
|
|
|
|
for anchor in anchors:
|
|
href = (anchor.get("href") or "").strip()
|
|
if not href or "/serie/" not in href:
|
|
continue
|
|
img = anchor.select_one("img[alt]")
|
|
title = ((img.get("alt") if img else "") or "").strip()
|
|
if not title or title in seen:
|
|
continue
|
|
url = _absolute_url(href).split("#", 1)[0].split("?", 1)[0].rstrip("/")
|
|
url = re.sub(r"/staffel-\d+(?:/.*)?$", "", url).rstrip("/")
|
|
if not url:
|
|
continue
|
|
_log_parsed_url(url)
|
|
seen.add(title)
|
|
results.append(SeriesResult(title=title, description="", url=url))
|
|
|
|
|
|
self._popular_cache = list(results)
|
|
return list(results)
|
|
|
|
@staticmethod
|
|
def _season_label(number: int) -> str:
|
|
return f"Staffel {number}"
|
|
|
|
@staticmethod
|
|
def _episode_label(info: EpisodeInfo) -> str:
|
|
suffix_parts: list[str] = []
|
|
if info.original_title:
|
|
suffix_parts.append(info.original_title)
|
|
# Staffel nicht im Episoden-Label anzeigen (wird im UI bereits gesetzt).
|
|
suffix = f" ({' | '.join(suffix_parts)})" if suffix_parts else ""
|
|
|
|
return f"Episode {info.number}: {info.title}{suffix}"
|
|
|
|
@staticmethod
|
|
def _parse_season_number(label: str) -> int | None:
|
|
digits = "".join(ch for ch in label if ch.isdigit())
|
|
if not digits:
|
|
return None
|
|
return int(digits)
|
|
|
|
def _clear_episode_cache_for_title(self, title: str) -> None:
|
|
keys_to_remove = [key for key in self._episode_label_cache if key[0] == title]
|
|
for key in keys_to_remove:
|
|
self._episode_label_cache.pop(key, None)
|
|
keys_to_remove = [key for key in self._hoster_cache if key[0] == title]
|
|
for key in keys_to_remove:
|
|
self._hoster_cache.pop(key, None)
|
|
|
|
def _cache_episode_labels(self, title: str, season_label: str, season_info: SeasonInfo) -> None:
|
|
cache_key = (title, season_label)
|
|
self._episode_label_cache[cache_key] = {
|
|
self._episode_label(info): info for info in season_info.episodes
|
|
}
|
|
|
|
def _ensure_season_links(self, title: str) -> list[SeasonInfo]:
|
|
cached = self._season_links_cache.get(title)
|
|
if cached is not None:
|
|
return list(cached)
|
|
series = self._series_results.get(title)
|
|
if not series:
|
|
cached_url = self._title_url_cache.get(title.casefold().strip(), "")
|
|
if cached_url:
|
|
series = SeriesResult(title=title, description="", url=cached_url)
|
|
self._series_results[title] = series
|
|
if not series:
|
|
catalog = self._ensure_catalog()
|
|
lookup_key = title.casefold().strip()
|
|
for entries in catalog.values():
|
|
for entry in entries:
|
|
if entry.title.casefold().strip() == lookup_key:
|
|
series = entry
|
|
self._remember_series_result(entry.title, entry.url, entry.description)
|
|
break
|
|
if series:
|
|
break
|
|
if not series:
|
|
return []
|
|
session_links = self._load_session_season_links(series.url)
|
|
if session_links:
|
|
self._season_links_cache[title] = list(session_links)
|
|
return list(session_links)
|
|
try:
|
|
series_soup = _get_soup(series.url, session=get_requests_session("serienstream", headers=HEADERS))
|
|
info_labels, art = _extract_series_metadata(series_soup)
|
|
if series.description and "plot" not in info_labels:
|
|
info_labels["plot"] = series.description
|
|
cache_key = self._metadata_cache_key(title)
|
|
if info_labels or art:
|
|
self._series_metadata_cache[cache_key] = (info_labels, art)
|
|
self._series_metadata_full.add(cache_key)
|
|
|
|
base_series_url = _series_root_url(_extract_canonical_url(series_soup, series.url))
|
|
season_links = _extract_season_links(series_soup)
|
|
season_count = _extract_number_of_seasons(series_soup)
|
|
if season_count and (not season_links or len(season_links) < season_count):
|
|
existing = {number for number, _ in season_links}
|
|
for number in range(1, season_count + 1):
|
|
if number in existing:
|
|
continue
|
|
season_url = f"{base_series_url}/staffel-{number}"
|
|
_log_parsed_url(season_url)
|
|
season_links.append((number, season_url))
|
|
season_links.sort(key=lambda item: item[0])
|
|
seasons = [SeasonInfo(number=number, url=url, episodes=[]) for number, url in season_links]
|
|
seasons.sort(key=lambda s: s.number)
|
|
except Exception as exc: # pragma: no cover - defensive logging
|
|
raise RuntimeError(f"Serienstream-Staffeln konnten nicht geladen werden: {exc}") from exc
|
|
self._season_links_cache[title] = list(seasons)
|
|
self._save_session_season_links(series.url, seasons)
|
|
return list(seasons)
|
|
|
|
def remember_series_url(self, title: str, series_url: str) -> None:
|
|
title = (title or "").strip()
|
|
series_url = (series_url or "").strip()
|
|
if not title or not series_url:
|
|
return
|
|
self._remember_series_result(title, series_url)
|
|
|
|
def metadata_for(self, title: str) -> tuple[dict[str, str], dict[str, str], Optional[list[Any]]]:
|
|
title = (title or "").strip()
|
|
if not title or not self._requests_available:
|
|
return {}, {}, None
|
|
|
|
cache_key = self._metadata_cache_key(title)
|
|
cached = self._series_metadata_cache.get(cache_key)
|
|
if cached is not None and cache_key in self._series_metadata_full:
|
|
info, art = cached
|
|
return dict(info), dict(art), None
|
|
|
|
series = self._series_for_title(title)
|
|
if series is None or not series.url:
|
|
info = {"title": title}
|
|
self._series_metadata_cache[cache_key] = (dict(info), {})
|
|
return info, {}, None
|
|
|
|
info: dict[str, str] = dict(cached[0]) if cached else {"title": title}
|
|
art: dict[str, str] = dict(cached[1]) if cached else {}
|
|
info.setdefault("title", title)
|
|
if series.description:
|
|
info.setdefault("plot", series.description)
|
|
|
|
# Fuer Listenansichten laden wir pro Seite die Detail-Metadaten vollstaendig nach.
|
|
loaded_full = False
|
|
try:
|
|
soup = _get_soup(series.url, session=get_requests_session("serienstream", headers=HEADERS))
|
|
parsed_info, parsed_art = _extract_series_metadata(soup)
|
|
if parsed_info:
|
|
info.update(parsed_info)
|
|
if parsed_art:
|
|
art.update(parsed_art)
|
|
loaded_full = True
|
|
except Exception:
|
|
pass
|
|
|
|
self._series_metadata_cache[cache_key] = (dict(info), dict(art))
|
|
if loaded_full:
|
|
self._series_metadata_full.add(cache_key)
|
|
return info, art, None
|
|
|
|
def series_url_for_title(self, title: str) -> str:
|
|
title = (title or "").strip()
|
|
if not title:
|
|
return ""
|
|
direct = self._series_results.get(title)
|
|
if direct and direct.url:
|
|
return direct.url
|
|
cached_url = self._title_url_cache.get(title.casefold().strip(), "")
|
|
if cached_url:
|
|
return cached_url
|
|
lookup_key = title.casefold().strip()
|
|
for entry in self._series_results.values():
|
|
if entry.title.casefold().strip() == lookup_key and entry.url:
|
|
return entry.url
|
|
return ""
|
|
|
|
def _ensure_season_episodes(self, title: str, season_number: int) -> SeasonInfo | None:
|
|
seasons = self._season_cache.get(title) or []
|
|
for season in seasons:
|
|
if season.number == season_number and season.episodes:
|
|
return season
|
|
links = self._ensure_season_links(title)
|
|
target = next((season for season in links if season.number == season_number), None)
|
|
if not target:
|
|
return None
|
|
cached_episodes = self._load_session_season_episodes(target.url)
|
|
if cached_episodes:
|
|
season_info = SeasonInfo(number=target.number, url=target.url, episodes=list(cached_episodes))
|
|
updated = [season for season in seasons if season.number != season_number]
|
|
updated.append(season_info)
|
|
updated.sort(key=lambda item: item.number)
|
|
self._season_cache[title] = updated
|
|
return season_info
|
|
try:
|
|
season_soup = _get_soup(target.url, session=get_requests_session("serienstream", headers=HEADERS))
|
|
season_info = SeasonInfo(number=target.number, url=target.url, episodes=_extract_episodes(season_soup))
|
|
except Exception as exc: # pragma: no cover - defensive logging
|
|
raise RuntimeError(f"Serienstream-Episoden konnten nicht geladen werden: {exc}") from exc
|
|
|
|
updated = [season for season in seasons if season.number != season_number]
|
|
updated.append(season_info)
|
|
updated.sort(key=lambda item: item.number)
|
|
self._season_cache[title] = updated
|
|
self._save_session_season_episodes(target.url, season_info.episodes)
|
|
return season_info
|
|
|
|
def _lookup_episode(self, title: str, season_label: str, episode_label: str) -> EpisodeInfo | None:
|
|
cache_key = (title, season_label)
|
|
cached = self._episode_label_cache.get(cache_key)
|
|
if cached:
|
|
return cached.get(episode_label)
|
|
number = self._parse_season_number(season_label)
|
|
if number is None:
|
|
return None
|
|
season_info = self._ensure_season_episodes(title, number)
|
|
if season_info:
|
|
self._cache_episode_labels(title, season_label, season_info)
|
|
return self._episode_label_cache.get(cache_key, {}).get(episode_label)
|
|
return None
|
|
|
|
async def search_titles(self, query: str, progress_callback: ProgressCallback = None) -> list[str]:
|
|
query = query.strip()
|
|
if not query:
|
|
self._series_results.clear()
|
|
self._season_cache.clear()
|
|
self._season_links_cache.clear()
|
|
self._episode_label_cache.clear()
|
|
self._catalog_cache = None
|
|
self._series_metadata_cache.clear()
|
|
self._series_metadata_full.clear()
|
|
return []
|
|
if not self._requests_available:
|
|
raise RuntimeError("SerienstreamPlugin kann ohne requests/bs4 nicht suchen.")
|
|
try:
|
|
# Nutzt den Katalog (/serien), der jetzt nach Genres gruppiert ist.
|
|
# Alternativ gäbe es ein Ajax-Endpoint, aber der ist nicht immer zuverlässig erreichbar.
|
|
_emit_progress(progress_callback, "Serienstream Suche startet", 10)
|
|
results = search_series(query, progress_callback=progress_callback)
|
|
except Exception as exc: # pragma: no cover - defensive logging
|
|
self._series_results.clear()
|
|
self._season_cache.clear()
|
|
self._episode_label_cache.clear()
|
|
self._catalog_cache = None
|
|
self._series_metadata_cache.clear()
|
|
self._series_metadata_full.clear()
|
|
raise RuntimeError(f"Serienstream-Suche fehlgeschlagen: {exc}") from exc
|
|
self._series_results = {}
|
|
for result in results:
|
|
self._remember_series_result(result.title, result.url, result.description)
|
|
self._season_cache.clear()
|
|
self._season_links_cache.clear()
|
|
self._episode_label_cache.clear()
|
|
_emit_progress(progress_callback, f"Treffer aufbereitet: {len(results)}", 95)
|
|
return [result.title for result in results]
|
|
|
|
def _ensure_seasons(self, title: str) -> list[SeasonInfo]:
|
|
if title in self._season_cache:
|
|
seasons = self._season_cache[title]
|
|
# Auch bei Cache-Treffern die URLs loggen, damit nachvollziehbar bleibt,
|
|
# welche Seiten für Staffel-/Episodenlisten relevant sind.
|
|
if _get_setting_bool(GLOBAL_SETTING_LOG_URLS, default=False):
|
|
series = self._series_results.get(title)
|
|
if series and series.url:
|
|
_log_url(series.url, kind="CACHE")
|
|
for season in seasons:
|
|
if season.url:
|
|
_log_url(season.url, kind="CACHE")
|
|
return seasons
|
|
series = self._series_results.get(title)
|
|
if not series:
|
|
# Kodi startet das Plugin pro Navigation neu -> Such-Cache im RAM geht verloren.
|
|
# Daher den Titel erneut im Katalog auflösen, um die Serien-URL zu bekommen.
|
|
catalog = self._ensure_catalog()
|
|
lookup_key = title.casefold().strip()
|
|
for entries in catalog.values():
|
|
for entry in entries:
|
|
if entry.title.casefold().strip() == lookup_key:
|
|
series = entry
|
|
self._remember_series_result(entry.title, entry.url, entry.description)
|
|
break
|
|
if series:
|
|
break
|
|
if not series:
|
|
return []
|
|
seasons = self._ensure_season_links(title)
|
|
self._clear_episode_cache_for_title(title)
|
|
self._season_cache[title] = list(seasons)
|
|
return list(seasons)
|
|
|
|
def seasons_for(self, title: str) -> list[str]:
|
|
seasons = self._ensure_seasons(title)
|
|
return [self._season_label(season.number) for season in seasons]
|
|
|
|
def episodes_for(self, title: str, season: str) -> list[str]:
|
|
number = self._parse_season_number(season)
|
|
if number is None:
|
|
return []
|
|
season_info = self._ensure_season_episodes(title, number)
|
|
if season_info:
|
|
labels = [self._episode_label(info) for info in season_info.episodes]
|
|
self._cache_episode_labels(title, season, season_info)
|
|
return labels
|
|
return []
|
|
|
|
def stream_link_for(self, title: str, season: str, episode: str) -> str | None:
|
|
if not self._requests_available:
|
|
raise RuntimeError("SerienstreamPlugin kann ohne requests/bs4 keine Stream-Links liefern.")
|
|
episode_info = self._lookup_episode(title, season, episode)
|
|
if not episode_info:
|
|
return None
|
|
try:
|
|
link = fetch_episode_stream_link(
|
|
episode_info.url,
|
|
preferred_hosters=self._preferred_hosters,
|
|
)
|
|
if link:
|
|
_log_url(link, kind="FOUND")
|
|
return link
|
|
except Exception as exc: # pragma: no cover - defensive logging
|
|
raise RuntimeError(f"Stream-Link konnte nicht geladen werden: {exc}") from exc
|
|
|
|
def episode_url_for(self, title: str, season: str, episode: str) -> str:
|
|
cache_key = (title, season)
|
|
cached = self._episode_label_cache.get(cache_key)
|
|
if cached:
|
|
info = cached.get(episode)
|
|
if info and info.url:
|
|
return info.url
|
|
episode_info = self._lookup_episode(title, season, episode)
|
|
if episode_info and episode_info.url:
|
|
return episode_info.url
|
|
return ""
|
|
|
|
def available_hosters_for(self, title: str, season: str, episode: str) -> list[str]:
|
|
if not self._requests_available:
|
|
raise RuntimeError("SerienstreamPlugin kann ohne requests/bs4 keine Hoster laden.")
|
|
cache_key = (title, season, episode)
|
|
cached = self._hoster_cache.get(cache_key)
|
|
if cached is not None:
|
|
return list(cached)
|
|
|
|
episode_info = self._lookup_episode(title, season, episode)
|
|
if not episode_info:
|
|
return []
|
|
try:
|
|
names = fetch_episode_hoster_names(episode_info.url)
|
|
except Exception as exc: # pragma: no cover - defensive logging
|
|
raise RuntimeError(f"Hoster konnten nicht geladen werden: {exc}") from exc
|
|
self._hoster_cache[cache_key] = list(names)
|
|
return list(names)
|
|
|
|
def latest_episodes(self, page: int = 1) -> list[LatestEpisode]:
|
|
"""Liefert die neuesten Episoden aus `/neue-episoden`."""
|
|
if not self._requests_available:
|
|
return []
|
|
try:
|
|
page = int(page or 1)
|
|
except Exception:
|
|
page = 1
|
|
page = max(1, page)
|
|
cached = self._latest_cache.get(page)
|
|
if cached is not None:
|
|
return list(cached)
|
|
|
|
url = _latest_episodes_url()
|
|
if page > 1:
|
|
url = f"{url}?page={page}"
|
|
soup = _get_soup_simple(url)
|
|
episodes = _extract_latest_episodes(soup)
|
|
self._latest_cache[page] = list(episodes)
|
|
return list(episodes)
|
|
|
|
def available_hosters_for_url(self, episode_url: str) -> list[str]:
|
|
if not self._requests_available:
|
|
raise RuntimeError("SerienstreamPlugin kann ohne requests/bs4 keine Hoster laden.")
|
|
normalized = _absolute_url(episode_url)
|
|
cached = self._latest_hoster_cache.get(normalized)
|
|
if cached is not None:
|
|
return list(cached)
|
|
try:
|
|
names = fetch_episode_hoster_names(normalized)
|
|
except Exception as exc: # pragma: no cover - defensive logging
|
|
raise RuntimeError(f"Hoster konnten nicht geladen werden: {exc}") from exc
|
|
self._latest_hoster_cache[normalized] = list(names)
|
|
return list(names)
|
|
|
|
def stream_link_for_url(self, episode_url: str) -> str | None:
|
|
if not self._requests_available:
|
|
raise RuntimeError("SerienstreamPlugin kann ohne requests/bs4 keine Stream-Links liefern.")
|
|
normalized = _absolute_url(episode_url)
|
|
try:
|
|
link = fetch_episode_stream_link(
|
|
normalized,
|
|
preferred_hosters=self._preferred_hosters,
|
|
)
|
|
if link:
|
|
_log_url(link, kind="FOUND")
|
|
return link
|
|
except Exception as exc: # pragma: no cover - defensive logging
|
|
raise RuntimeError(f"Stream-Link konnte nicht geladen werden: {exc}") from exc
|
|
|
|
def resolve_stream_link(self, link: str) -> str | None:
|
|
if not self._requests_available:
|
|
raise RuntimeError("SerienstreamPlugin kann ohne requests/bs4 keine Stream-Links aufloesen.")
|
|
try:
|
|
resolved = resolve_redirect(link)
|
|
if not resolved:
|
|
return None
|
|
try:
|
|
from resolveurl_backend import resolve as resolve_with_resolveurl
|
|
except Exception:
|
|
resolve_with_resolveurl = None
|
|
if callable(resolve_with_resolveurl):
|
|
resolved_by_resolveurl = resolve_with_resolveurl(resolved)
|
|
if resolved_by_resolveurl:
|
|
_log_url("ResolveURL", kind="HOSTER_RESOLVER")
|
|
_log_url(resolved_by_resolveurl, kind="MEDIA")
|
|
return resolved_by_resolveurl
|
|
_log_url(resolved, kind="FINAL")
|
|
return resolved
|
|
except Exception as exc: # pragma: no cover - defensive logging
|
|
raise RuntimeError(f"Stream-Link konnte nicht verfolgt werden: {exc}") from exc
|
|
|
|
def set_preferred_hosters(self, hosters: list[str]) -> None:
|
|
normalized = [hoster.strip().lower() for hoster in hosters if hoster.strip()]
|
|
if normalized:
|
|
self._preferred_hosters = normalized
|
|
|
|
def reset_preferred_hosters(self) -> None:
|
|
self._preferred_hosters = list(self._default_preferred_hosters)
|
|
|
|
|
|
# Alias für die automatische Plugin-Erkennung.
|
|
Plugin = SerienstreamPlugin
|