dev: bump to 0.1.66 and harden resolveurl + serienstream
This commit is contained in:
@@ -43,8 +43,10 @@ except ImportError: # pragma: no cover - allow running outside Kodi
|
||||
|
||||
from plugin_interface import BasisPlugin
|
||||
from plugin_helpers import dump_response_html, get_setting_bool, get_setting_string, log_error, log_url, notify_url
|
||||
from http_session_pool import get_requests_session
|
||||
from http_session_pool import close_all_sessions, get_requests_session
|
||||
from regex_patterns import SEASON_EPISODE_TAG, SEASON_EPISODE_URL
|
||||
from search_utils import matches_query as _matches_query, normalize_search_text as _normalize_search_text
|
||||
from genre_utils import normalize_genre_label as _normalize_genre_label
|
||||
|
||||
if TYPE_CHECKING: # pragma: no cover
|
||||
from requests import Session as RequestsSession
|
||||
@@ -293,27 +295,6 @@ def _normalize_text(value: str) -> str:
|
||||
return value
|
||||
|
||||
|
||||
def _normalize_search_text(value: str) -> str:
|
||||
"""Normalisiert Text für die Suche ohne Wortgrenzen zu "verschmelzen".
|
||||
|
||||
Wichtig: Wir ersetzen Nicht-Alphanumerisches durch Leerzeichen, statt es zu entfernen.
|
||||
Dadurch entstehen keine künstlichen Treffer über Wortgrenzen hinweg (z.B. "an" + "na" -> "anna").
|
||||
"""
|
||||
|
||||
value = (value or "").casefold()
|
||||
value = re.sub(r"[^a-z0-9]+", " ", value)
|
||||
value = re.sub(r"\s+", " ", value).strip()
|
||||
return value
|
||||
|
||||
|
||||
def _matches_query(query: str, *, title: str) -> bool:
|
||||
normalized_query = _normalize_search_text(query)
|
||||
if not normalized_query:
|
||||
return False
|
||||
haystack = f" {_normalize_search_text(title)} "
|
||||
return f" {normalized_query} " in haystack
|
||||
|
||||
|
||||
def _is_episode_tba(title: str, original_title: str) -> bool:
|
||||
combined = f"{title} {original_title}".casefold()
|
||||
markers = ("tba", "demnächst", "demnaechst", "coming soon", "to be announced")
|
||||
@@ -392,6 +373,42 @@ def _ensure_requests() -> None:
|
||||
raise RuntimeError("requests/bs4 sind nicht verfuegbar.")
|
||||
|
||||
|
||||
def _is_retryable_http_error(exc: Exception) -> bool:
|
||||
text = str(exc or "").casefold()
|
||||
markers = (
|
||||
"connection reset by peer",
|
||||
"connection aborted",
|
||||
"remote end closed connection",
|
||||
"temporarily unavailable",
|
||||
"timed out",
|
||||
"read timeout",
|
||||
)
|
||||
return any(marker in text for marker in markers)
|
||||
|
||||
|
||||
def _http_get(url: str, *, timeout: int, params: Optional[dict[str, str]] = None):
|
||||
_ensure_requests()
|
||||
last_exc: Exception | None = None
|
||||
for attempt in range(2):
|
||||
sess = get_requests_session("serienstream", headers=HEADERS)
|
||||
try:
|
||||
response = sess.get(url, params=params, headers=HEADERS, timeout=timeout)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
except Exception as exc:
|
||||
last_exc = exc
|
||||
if attempt >= 1 or not _is_retryable_http_error(exc):
|
||||
raise
|
||||
_log_error(f"GET {url} retry nach Fehler: {exc}")
|
||||
try:
|
||||
close_all_sessions()
|
||||
except Exception:
|
||||
pass
|
||||
if last_exc is not None:
|
||||
raise last_exc
|
||||
raise RuntimeError(f"GET {url} fehlgeschlagen")
|
||||
|
||||
|
||||
def _looks_like_cloudflare_challenge(body: str) -> bool:
|
||||
lower = body.lower()
|
||||
markers = (
|
||||
@@ -409,11 +426,13 @@ def _looks_like_cloudflare_challenge(body: str) -> bool:
|
||||
def _get_soup(url: str, *, session: Optional[RequestsSession] = None) -> BeautifulSoupT:
|
||||
_ensure_requests()
|
||||
_log_visit(url)
|
||||
sess = session or get_requests_session("serienstream", headers=HEADERS)
|
||||
response = None
|
||||
try:
|
||||
response = sess.get(url, headers=HEADERS, timeout=DEFAULT_TIMEOUT)
|
||||
response.raise_for_status()
|
||||
if session is not None:
|
||||
response = session.get(url, headers=HEADERS, timeout=DEFAULT_TIMEOUT)
|
||||
response.raise_for_status()
|
||||
else:
|
||||
response = _http_get(url, timeout=DEFAULT_TIMEOUT)
|
||||
except Exception as exc:
|
||||
_log_error(f"GET {url} failed: {exc}")
|
||||
raise
|
||||
@@ -437,11 +456,9 @@ def _get_soup(url: str, *, session: Optional[RequestsSession] = None) -> Beautif
|
||||
def _get_html_simple(url: str) -> str:
|
||||
_ensure_requests()
|
||||
_log_visit(url)
|
||||
sess = get_requests_session("serienstream", headers=HEADERS)
|
||||
response = None
|
||||
try:
|
||||
response = sess.get(url, headers=HEADERS, timeout=DEFAULT_TIMEOUT)
|
||||
response.raise_for_status()
|
||||
response = _http_get(url, timeout=DEFAULT_TIMEOUT)
|
||||
except Exception as exc:
|
||||
_log_error(f"GET {url} failed: {exc}")
|
||||
raise
|
||||
@@ -468,27 +485,6 @@ def _get_soup_simple(url: str) -> BeautifulSoupT:
|
||||
|
||||
|
||||
def _extract_genre_names_from_html(body: str) -> List[str]:
|
||||
def _normalize_genre_label(raw: str) -> str:
|
||||
text = unescape(re.sub(r"\s+", " ", str(raw or ""))).strip()
|
||||
if not text:
|
||||
return ""
|
||||
key_prefix = "filter.genre_"
|
||||
if text.casefold().startswith(key_prefix):
|
||||
slug = text[len(key_prefix) :].strip().casefold()
|
||||
slug = slug.replace("_", "-")
|
||||
slug = re.sub(r"[^a-z0-9-]+", "-", slug).strip("-")
|
||||
if not slug:
|
||||
return ""
|
||||
special = {
|
||||
"doku-soap": "Doku-Soap",
|
||||
"scifi": "SciFi",
|
||||
"fighting-shounen": "Fighting-Shounen",
|
||||
}
|
||||
if slug in special:
|
||||
return special[slug]
|
||||
return " ".join(chunk.capitalize() for chunk in slug.split("-") if chunk)
|
||||
return text
|
||||
|
||||
names: List[str] = []
|
||||
seen: set[str] = set()
|
||||
pattern = re.compile(
|
||||
@@ -577,9 +573,6 @@ def _search_series_api(query: str) -> List[SeriesResult]:
|
||||
def _search_series_server(query: str) -> List[SeriesResult]:
|
||||
if not query:
|
||||
return []
|
||||
api_results = _search_series_api(query)
|
||||
if api_results:
|
||||
return api_results
|
||||
base = _get_base_url()
|
||||
search_url = f"{base}/search?q={quote(query)}"
|
||||
alt_url = f"{base}/suche?q={quote(query)}"
|
||||
@@ -614,6 +607,9 @@ def _search_series_server(query: str) -> List[SeriesResult]:
|
||||
results.append(SeriesResult(title=title, description="", url=url_abs))
|
||||
if results:
|
||||
return results
|
||||
api_results = _search_series_api(query)
|
||||
if api_results:
|
||||
return api_results
|
||||
return []
|
||||
|
||||
|
||||
@@ -718,56 +714,45 @@ def search_series(query: str, *, progress_callback: ProgressCallback = None) ->
|
||||
_ensure_requests()
|
||||
if not _normalize_search_text(query):
|
||||
return []
|
||||
_emit_progress(progress_callback, "Server-Suche", 15)
|
||||
server_results = _search_series_server(query)
|
||||
if server_results:
|
||||
_emit_progress(progress_callback, f"Server-Treffer: {len(server_results)}", 35)
|
||||
return [entry for entry in server_results if entry.title and _matches_query(query, title=entry.title)]
|
||||
_emit_progress(progress_callback, "Pruefe Such-Cache", 42)
|
||||
_emit_progress(progress_callback, "Pruefe Such-Cache", 15)
|
||||
cached = _load_catalog_index_from_cache()
|
||||
if cached is not None:
|
||||
_emit_progress(progress_callback, f"Cache-Treffer: {len(cached)}", 52)
|
||||
return [entry for entry in cached if entry.title and _matches_query(query, title=entry.title)]
|
||||
matched_from_cache = [entry for entry in cached if entry.title and _matches_query(query, title=entry.title)]
|
||||
_emit_progress(progress_callback, f"Cache-Treffer: {len(cached)}", 35)
|
||||
if matched_from_cache:
|
||||
return matched_from_cache
|
||||
|
||||
_emit_progress(progress_callback, "Lade Katalogseite", 58)
|
||||
_emit_progress(progress_callback, "Lade Katalogseite", 42)
|
||||
catalog_url = f"{_get_base_url()}/serien?by=genre"
|
||||
body = _get_html_simple(catalog_url)
|
||||
items = _extract_catalog_index_from_html(body, progress_callback=progress_callback)
|
||||
if not items:
|
||||
_emit_progress(progress_callback, "Fallback-Parser", 70)
|
||||
soup = BeautifulSoup(body, "html.parser")
|
||||
items: List[SeriesResult] = []
|
||||
try:
|
||||
# Bevorzugt den Soup-Helper, damit Tests HTML einfache injizieren koennen.
|
||||
soup = _get_soup_simple(catalog_url)
|
||||
items = _catalog_index_from_soup(soup)
|
||||
except Exception:
|
||||
body = _get_html_simple(catalog_url)
|
||||
items = _extract_catalog_index_from_html(body, progress_callback=progress_callback)
|
||||
if not items:
|
||||
_emit_progress(progress_callback, "Fallback-Parser", 58)
|
||||
soup = BeautifulSoup(body, "html.parser")
|
||||
items = _catalog_index_from_soup(soup)
|
||||
if items:
|
||||
_store_catalog_index_in_cache(items)
|
||||
_emit_progress(progress_callback, f"Filtere Treffer ({len(items)})", 85)
|
||||
return [entry for entry in items if entry.title and _matches_query(query, title=entry.title)]
|
||||
_emit_progress(progress_callback, f"Filtere Treffer ({len(items)})", 70)
|
||||
return [entry for entry in items if entry.title and _matches_query(query, title=entry.title)]
|
||||
|
||||
_emit_progress(progress_callback, "Server-Suche", 85)
|
||||
server_results = _search_series_server(query)
|
||||
if server_results:
|
||||
_emit_progress(progress_callback, f"Server-Treffer: {len(server_results)}", 95)
|
||||
return [entry for entry in server_results if entry.title and _matches_query(query, title=entry.title)]
|
||||
return []
|
||||
|
||||
|
||||
def parse_series_catalog(soup: BeautifulSoupT) -> Dict[str, List[SeriesResult]]:
|
||||
"""Parst die Serien-Übersicht (/serien) und liefert Genre -> Serienliste."""
|
||||
catalog: Dict[str, List[SeriesResult]] = {}
|
||||
|
||||
def _normalize_genre_label(raw: str) -> str:
|
||||
text = re.sub(r"\s+", " ", str(raw or "")).strip()
|
||||
if not text:
|
||||
return ""
|
||||
key_prefix = "filter.genre_"
|
||||
if text.casefold().startswith(key_prefix):
|
||||
slug = text[len(key_prefix) :].strip().casefold()
|
||||
slug = slug.replace("_", "-")
|
||||
slug = re.sub(r"[^a-z0-9-]+", "-", slug).strip("-")
|
||||
if not slug:
|
||||
return ""
|
||||
special = {
|
||||
"doku-soap": "Doku-Soap",
|
||||
"scifi": "SciFi",
|
||||
"fighting-shounen": "Fighting-Shounen",
|
||||
}
|
||||
if slug in special:
|
||||
return special[slug]
|
||||
return " ".join(chunk.capitalize() for chunk in slug.split("-") if chunk)
|
||||
return text
|
||||
|
||||
# Neues Layout (Stand: 2026-01): Gruppen-Header + Liste.
|
||||
# - Header: `div.background-1 ...` mit `h3`
|
||||
# - Einträge: `ul.series-list` -> `li.series-item[data-search]` -> `a[href]`
|
||||
|
||||
Reference in New Issue
Block a user