Nightly: refactor readability, progress callbacks, and resource handling

This commit is contained in:
2026-02-23 16:47:00 +01:00
parent 7a330c9bc0
commit d414fac022
13 changed files with 668 additions and 455 deletions

View File

@@ -9,7 +9,7 @@ Zum Verwenden:
from __future__ import annotations
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any, List, Optional
from typing import TYPE_CHECKING, Any, Callable, List, Optional
try: # pragma: no cover - optional dependency
import requests
@@ -88,9 +88,13 @@ class TemplatePlugin(BasisPlugin):
self._session = session
return self._session
async def search_titles(self, query: str) -> List[str]:
async def search_titles(
self,
query: str,
progress_callback: Optional[Callable[[str, Optional[int]], Any]] = None,
) -> List[str]:
"""TODO: Suche auf der Zielseite implementieren."""
_ = query
_ = (query, progress_callback)
return []
def seasons_for(self, title: str) -> List[str]:

View File

@@ -13,7 +13,8 @@ import hashlib
import json
import re
import time
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple
from urllib.parse import quote
try: # pragma: no cover - optional dependency
import requests
@@ -69,6 +70,16 @@ HEADERS = {
SESSION_CACHE_TTL_SECONDS = 300
SESSION_CACHE_PREFIX = "viewit.aniworld"
SESSION_CACHE_MAX_TITLE_URLS = 800
ProgressCallback = Optional[Callable[[str, Optional[int]], Any]]
def _emit_progress(callback: ProgressCallback, message: str, percent: Optional[int] = None) -> None:
if not callable(callback):
return
try:
callback(str(message or ""), None if percent is None else int(percent))
except Exception:
return
@dataclass
@@ -126,7 +137,7 @@ def _latest_episodes_url() -> str:
def _search_url(query: str) -> str:
return f"{_get_base_url()}/search?q={query}"
return f"{_get_base_url()}/search?q={quote((query or '').strip())}"
def _search_api_url() -> str:
@@ -289,37 +300,56 @@ def _get_soup(url: str, *, session: Optional[RequestsSession] = None) -> Beautif
_ensure_requests()
_log_visit(url)
sess = session or get_requests_session("aniworld", headers=HEADERS)
response = None
try:
response = sess.get(url, headers=HEADERS, timeout=DEFAULT_TIMEOUT)
response.raise_for_status()
except Exception as exc:
_log_error(f"GET {url} failed: {exc}")
raise
if response.url and response.url != url:
_log_url(response.url, kind="REDIRECT")
_log_response_html(url, response.text)
if _looks_like_cloudflare_challenge(response.text):
raise RuntimeError("Cloudflare-Schutz erkannt. requests reicht ggf. nicht aus.")
return BeautifulSoup(response.text, "html.parser")
try:
final_url = (response.url or url) if response is not None else url
body = (response.text or "") if response is not None else ""
if final_url != url:
_log_url(final_url, kind="REDIRECT")
_log_response_html(url, body)
if _looks_like_cloudflare_challenge(body):
raise RuntimeError("Cloudflare-Schutz erkannt. requests reicht ggf. nicht aus.")
return BeautifulSoup(body, "html.parser")
finally:
if response is not None:
try:
response.close()
except Exception:
pass
def _get_html_simple(url: str) -> str:
_ensure_requests()
_log_visit(url)
sess = get_requests_session("aniworld", headers=HEADERS)
response = None
try:
response = sess.get(url, headers=HEADERS, timeout=DEFAULT_TIMEOUT)
response.raise_for_status()
except Exception as exc:
_log_error(f"GET {url} failed: {exc}")
raise
if response.url and response.url != url:
_log_url(response.url, kind="REDIRECT")
body = response.text
_log_response_html(url, body)
if _looks_like_cloudflare_challenge(body):
raise RuntimeError("Cloudflare-Schutz erkannt. requests reicht ggf. nicht aus.")
return body
try:
final_url = (response.url or url) if response is not None else url
body = (response.text or "") if response is not None else ""
if final_url != url:
_log_url(final_url, kind="REDIRECT")
_log_response_html(url, body)
if _looks_like_cloudflare_challenge(body):
raise RuntimeError("Cloudflare-Schutz erkannt. requests reicht ggf. nicht aus.")
return body
finally:
if response is not None:
try:
response.close()
except Exception:
pass
def _get_soup_simple(url: str) -> BeautifulSoupT:
@@ -351,17 +381,27 @@ def _post_json(url: str, *, payload: Dict[str, str], session: Optional[RequestsS
_ensure_requests()
_log_visit(url)
sess = session or get_requests_session("aniworld", headers=HEADERS)
response = sess.post(url, data=payload, headers=HEADERS, timeout=DEFAULT_TIMEOUT)
response.raise_for_status()
if response.url and response.url != url:
_log_url(response.url, kind="REDIRECT")
_log_response_html(url, response.text)
if _looks_like_cloudflare_challenge(response.text):
raise RuntimeError("Cloudflare-Schutz erkannt. requests reicht ggf. nicht aus.")
response = None
try:
return response.json()
except Exception:
return None
response = sess.post(url, data=payload, headers=HEADERS, timeout=DEFAULT_TIMEOUT)
response.raise_for_status()
final_url = (response.url or url) if response is not None else url
body = (response.text or "") if response is not None else ""
if final_url != url:
_log_url(final_url, kind="REDIRECT")
_log_response_html(url, body)
if _looks_like_cloudflare_challenge(body):
raise RuntimeError("Cloudflare-Schutz erkannt. requests reicht ggf. nicht aus.")
try:
return response.json()
except Exception:
return None
finally:
if response is not None:
try:
response.close()
except Exception:
pass
def _extract_canonical_url(soup: BeautifulSoupT, fallback: str) -> str:
@@ -555,10 +595,18 @@ def resolve_redirect(target_url: str) -> Optional[str]:
_log_visit(normalized_url)
session = get_requests_session("aniworld", headers=HEADERS)
_get_soup(_get_base_url(), session=session)
response = session.get(normalized_url, headers=HEADERS, timeout=DEFAULT_TIMEOUT, allow_redirects=True)
if response.url:
_log_url(response.url, kind="RESOLVED")
return response.url if response.url else None
response = None
try:
response = session.get(normalized_url, headers=HEADERS, timeout=DEFAULT_TIMEOUT, allow_redirects=True)
if response.url:
_log_url(response.url, kind="RESOLVED")
return response.url if response.url else None
finally:
if response is not None:
try:
response.close()
except Exception:
pass
def fetch_episode_hoster_names(episode_url: str) -> List[str]:
@@ -629,11 +677,12 @@ def fetch_episode_stream_link(
return resolved
def search_animes(query: str) -> List[SeriesResult]:
def search_animes(query: str, *, progress_callback: ProgressCallback = None) -> List[SeriesResult]:
_ensure_requests()
query = (query or "").strip()
if not query:
return []
_emit_progress(progress_callback, "AniWorld API-Suche", 15)
session = get_requests_session("aniworld", headers=HEADERS)
try:
session.get(_get_base_url(), headers=HEADERS, timeout=DEFAULT_TIMEOUT)
@@ -643,7 +692,9 @@ def search_animes(query: str) -> List[SeriesResult]:
results: List[SeriesResult] = []
seen: set[str] = set()
if isinstance(data, list):
for entry in data:
for idx, entry in enumerate(data, start=1):
if idx == 1 or idx % 50 == 0:
_emit_progress(progress_callback, f"API auswerten {idx}/{len(data)}", 35)
if not isinstance(entry, dict):
continue
title = _strip_html((entry.get("title") or "").strip())
@@ -665,10 +716,16 @@ def search_animes(query: str) -> List[SeriesResult]:
seen.add(key)
description = (entry.get("description") or "").strip()
results.append(SeriesResult(title=title, description=description, url=url))
_emit_progress(progress_callback, f"API-Treffer: {len(results)}", 85)
return results
soup = _get_soup_simple(_search_url(requests.utils.quote(query)))
for anchor in soup.select("a[href^='/anime/stream/'][href]"):
_emit_progress(progress_callback, "HTML-Suche (Fallback)", 55)
soup = _get_soup_simple(_search_url(query))
anchors = soup.select("a[href^='/anime/stream/'][href]")
total_anchors = max(1, len(anchors))
for idx, anchor in enumerate(anchors, start=1):
if idx == 1 or idx % 100 == 0:
_emit_progress(progress_callback, f"HTML auswerten {idx}/{total_anchors}", 70)
href = (anchor.get("href") or "").strip()
if not href or "/staffel-" in href or "/episode-" in href:
continue
@@ -686,6 +743,7 @@ def search_animes(query: str) -> List[SeriesResult]:
continue
seen.add(key)
results.append(SeriesResult(title=title, description="", url=url))
_emit_progress(progress_callback, f"HTML-Treffer: {len(results)}", 85)
return results
@@ -1151,7 +1209,7 @@ class AniworldPlugin(BasisPlugin):
return self._episode_label_cache.get(cache_key, {}).get(episode_label)
return None
async def search_titles(self, query: str) -> List[str]:
async def search_titles(self, query: str, progress_callback: ProgressCallback = None) -> List[str]:
query = (query or "").strip()
if not query:
self._anime_results.clear()
@@ -1163,7 +1221,8 @@ class AniworldPlugin(BasisPlugin):
if not self._requests_available:
raise RuntimeError("AniworldPlugin kann ohne requests/bs4 nicht suchen.")
try:
results = search_animes(query)
_emit_progress(progress_callback, "AniWorld Suche startet", 10)
results = search_animes(query, progress_callback=progress_callback)
except Exception as exc: # pragma: no cover
self._anime_results.clear()
self._season_cache.clear()
@@ -1178,6 +1237,7 @@ class AniworldPlugin(BasisPlugin):
self._season_cache.clear()
self._season_links_cache.clear()
self._episode_label_cache.clear()
_emit_progress(progress_callback, f"Treffer aufbereitet: {len(results)}", 95)
return [result.title for result in results]
def _ensure_seasons(self, title: str) -> List[SeasonInfo]:

View File

@@ -5,7 +5,7 @@ from __future__ import annotations
from dataclasses import dataclass
import re
from urllib.parse import quote
from typing import TYPE_CHECKING, Any, Dict, List, Optional
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional
try: # pragma: no cover - optional dependency
import requests
@@ -44,6 +44,16 @@ SETTING_LOG_URLS = "log_urls_dokustreams"
SETTING_DUMP_HTML = "dump_html_dokustreams"
SETTING_SHOW_URL_INFO = "show_url_info_dokustreams"
SETTING_LOG_ERRORS = "log_errors_dokustreams"
ProgressCallback = Optional[Callable[[str, Optional[int]], Any]]
def _emit_progress(callback: ProgressCallback, message: str, percent: Optional[int] = None) -> None:
if not callable(callback):
return
try:
callback(str(message or ""), None if percent is None else int(percent))
except Exception:
return
HEADERS = {
"User-Agent": "Mozilla/5.0 (Kodi; ViewIt) AppleWebKit/537.36 (KHTML, like Gecko)",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
@@ -213,16 +223,26 @@ def _get_soup(url: str, *, session: Optional[RequestsSession] = None) -> Beautif
raise RuntimeError("requests/bs4 sind nicht verfuegbar.")
_log_visit(url)
sess = session or get_requests_session("dokustreams", headers=HEADERS)
response = None
try:
response = sess.get(url, headers=HEADERS, timeout=DEFAULT_TIMEOUT)
response.raise_for_status()
except Exception as exc:
_log_error_message(f"GET {url} failed: {exc}")
raise
if response.url and response.url != url:
_log_url_event(response.url, kind="REDIRECT")
_log_response_html(url, response.text)
return BeautifulSoup(response.text, "html.parser")
try:
final_url = (response.url or url) if response is not None else url
body = (response.text or "") if response is not None else ""
if final_url != url:
_log_url_event(final_url, kind="REDIRECT")
_log_response_html(url, body)
return BeautifulSoup(body, "html.parser")
finally:
if response is not None:
try:
response.close()
except Exception:
pass
class DokuStreamsPlugin(BasisPlugin):
@@ -247,14 +267,17 @@ class DokuStreamsPlugin(BasisPlugin):
if REQUESTS_IMPORT_ERROR:
print(f"DokuStreamsPlugin Importfehler: {REQUESTS_IMPORT_ERROR}")
async def search_titles(self, query: str) -> List[str]:
async def search_titles(self, query: str, progress_callback: ProgressCallback = None) -> List[str]:
_emit_progress(progress_callback, "Doku-Streams Suche", 15)
hits = self._search_hits(query)
_emit_progress(progress_callback, f"Treffer verarbeiten ({len(hits)})", 70)
self._title_to_url = {hit.title: hit.url for hit in hits if hit.title and hit.url}
for hit in hits:
if hit.title:
self._title_meta[hit.title] = (hit.plot, hit.poster)
titles = [hit.title for hit in hits if hit.title]
titles.sort(key=lambda value: value.casefold())
_emit_progress(progress_callback, f"Fertig: {len(titles)} Treffer", 95)
return titles
def _search_hits(self, query: str) -> List[SearchHit]:

View File

@@ -11,7 +11,7 @@ from __future__ import annotations
import json
import re
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Set
from typing import Any, Callable, Dict, List, Optional, Set
from urllib.parse import urlencode, urljoin, urlsplit
try: # pragma: no cover - optional dependency (Kodi dependency)
@@ -56,6 +56,16 @@ HEADERS = {
"Accept-Language": "de-DE,de;q=0.9,en;q=0.8",
"Connection": "keep-alive",
}
ProgressCallback = Optional[Callable[[str, Optional[int]], Any]]
def _emit_progress(callback: ProgressCallback, message: str, percent: Optional[int] = None) -> None:
if not callable(callback):
return
try:
callback(str(message or ""), None if percent is None else int(percent))
except Exception:
return
@dataclass(frozen=True)
@@ -526,6 +536,34 @@ class EinschaltenPlugin(BasisPlugin):
self._session = requests.Session()
return self._session
def _http_get_text(self, url: str, *, timeout: int = 20) -> tuple[str, str]:
_log_url(url, kind="GET")
_notify_url(url)
sess = self._get_session()
response = None
try:
response = sess.get(url, headers=HEADERS, timeout=timeout)
response.raise_for_status()
final_url = (response.url or url) if response is not None else url
body = (response.text or "") if response is not None else ""
_log_url(final_url, kind="OK")
_log_response_html(final_url, body)
return final_url, body
finally:
if response is not None:
try:
response.close()
except Exception:
pass
def _http_get_json(self, url: str, *, timeout: int = 20) -> tuple[str, Any]:
final_url, body = self._http_get_text(url, timeout=timeout)
try:
payload = json.loads(body or "{}")
except Exception:
payload = {}
return final_url, payload
def _get_base_url(self) -> str:
base = _get_setting_text(SETTING_BASE_URL, default=DEFAULT_BASE_URL).strip()
return base.rstrip("/")
@@ -646,15 +684,9 @@ class EinschaltenPlugin(BasisPlugin):
if not url:
return ""
try:
_log_url(url, kind="GET")
_notify_url(url)
sess = self._get_session()
resp = sess.get(url, headers=HEADERS, timeout=20)
resp.raise_for_status()
_log_url(resp.url or url, kind="OK")
_log_response_html(resp.url or url, resp.text)
self._detail_html_by_id[movie_id] = resp.text or ""
return resp.text or ""
_, body = self._http_get_text(url, timeout=20)
self._detail_html_by_id[movie_id] = body
return body
except Exception as exc:
_log_error(f"GET {url} failed: {exc}")
return ""
@@ -667,16 +699,8 @@ class EinschaltenPlugin(BasisPlugin):
if not url:
return {}
try:
_log_url(url, kind="GET")
_notify_url(url)
sess = self._get_session()
resp = sess.get(url, headers=HEADERS, timeout=20)
resp.raise_for_status()
_log_url(resp.url or url, kind="OK")
# Some backends may return JSON with a JSON content-type; for debugging we still dump text.
_log_response_html(resp.url or url, resp.text)
data = resp.json()
return dict(data) if isinstance(data, dict) else {}
_, data = self._http_get_json(url, timeout=20)
return data
except Exception as exc:
_log_error(f"GET {url} failed: {exc}")
return {}
@@ -741,14 +765,8 @@ class EinschaltenPlugin(BasisPlugin):
if not url:
return []
try:
_log_url(url, kind="GET")
_notify_url(url)
sess = self._get_session()
resp = sess.get(url, headers=HEADERS, timeout=20)
resp.raise_for_status()
_log_url(resp.url or url, kind="OK")
_log_response_html(resp.url or url, resp.text)
payload = _extract_ng_state_payload(resp.text)
_, body = self._http_get_text(url, timeout=20)
payload = _extract_ng_state_payload(body)
return _parse_ng_state_movies(payload)
except Exception:
return []
@@ -759,14 +777,8 @@ class EinschaltenPlugin(BasisPlugin):
if not url:
return []
try:
_log_url(url, kind="GET")
_notify_url(url)
sess = self._get_session()
resp = sess.get(url, headers=HEADERS, timeout=20)
resp.raise_for_status()
_log_url(resp.url or url, kind="OK")
_log_response_html(resp.url or url, resp.text)
payload = _extract_ng_state_payload(resp.text)
_, body = self._http_get_text(url, timeout=20)
payload = _extract_ng_state_payload(body)
movies = _parse_ng_state_movies(payload)
_log_debug_line(f"parse_ng_state_movies:count={len(movies)}")
if movies:
@@ -784,14 +796,8 @@ class EinschaltenPlugin(BasisPlugin):
if page > 1:
url = f"{url}?{urlencode({'page': str(page)})}"
try:
_log_url(url, kind="GET")
_notify_url(url)
sess = self._get_session()
resp = sess.get(url, headers=HEADERS, timeout=20)
resp.raise_for_status()
_log_url(resp.url or url, kind="OK")
_log_response_html(resp.url or url, resp.text)
payload = _extract_ng_state_payload(resp.text)
_, body = self._http_get_text(url, timeout=20)
payload = _extract_ng_state_payload(body)
movies, has_more, current_page = _parse_ng_state_movies_with_pagination(payload)
_log_debug_line(f"parse_ng_state_movies_page:page={page} count={len(movies)}")
if has_more is not None:
@@ -844,14 +850,8 @@ class EinschaltenPlugin(BasisPlugin):
if not url:
return []
try:
_log_url(url, kind="GET")
_notify_url(url)
sess = self._get_session()
resp = sess.get(url, headers=HEADERS, timeout=20)
resp.raise_for_status()
_log_url(resp.url or url, kind="OK")
_log_response_html(resp.url or url, resp.text)
payload = _extract_ng_state_payload(resp.text)
_, body = self._http_get_text(url, timeout=20)
payload = _extract_ng_state_payload(body)
results = _parse_ng_state_search_results(payload)
return _filter_movies_by_title(query, results)
except Exception:
@@ -867,13 +867,7 @@ class EinschaltenPlugin(BasisPlugin):
api_url = self._api_genres_url()
if api_url:
try:
_log_url(api_url, kind="GET")
_notify_url(api_url)
sess = self._get_session()
resp = sess.get(api_url, headers=HEADERS, timeout=20)
resp.raise_for_status()
_log_url(resp.url or api_url, kind="OK")
payload = resp.json()
_, payload = self._http_get_json(api_url, timeout=20)
if isinstance(payload, list):
parsed: Dict[str, int] = {}
for item in payload:
@@ -900,14 +894,8 @@ class EinschaltenPlugin(BasisPlugin):
if not url:
return
try:
_log_url(url, kind="GET")
_notify_url(url)
sess = self._get_session()
resp = sess.get(url, headers=HEADERS, timeout=20)
resp.raise_for_status()
_log_url(resp.url or url, kind="OK")
_log_response_html(resp.url or url, resp.text)
payload = _extract_ng_state_payload(resp.text)
_, body = self._http_get_text(url, timeout=20)
payload = _extract_ng_state_payload(body)
parsed = _parse_ng_state_genres(payload)
if parsed:
self._genre_id_by_name.clear()
@@ -915,7 +903,7 @@ class EinschaltenPlugin(BasisPlugin):
except Exception:
return
async def search_titles(self, query: str) -> List[str]:
async def search_titles(self, query: str, progress_callback: ProgressCallback = None) -> List[str]:
if not REQUESTS_AVAILABLE:
return []
query = (query or "").strip()
@@ -924,9 +912,12 @@ class EinschaltenPlugin(BasisPlugin):
if not self._get_base_url():
return []
_emit_progress(progress_callback, "Einschalten Suche", 15)
movies = self._fetch_search_movies(query)
if not movies:
_emit_progress(progress_callback, "Fallback: Index filtern", 45)
movies = _filter_movies_by_title(query, self._load_movies())
_emit_progress(progress_callback, f"Treffer verarbeiten ({len(movies)})", 75)
titles: List[str] = []
seen: set[str] = set()
for movie in movies:
@@ -936,6 +927,7 @@ class EinschaltenPlugin(BasisPlugin):
self._id_by_title[movie.title] = movie.id
titles.append(movie.title)
titles.sort(key=lambda value: value.casefold())
_emit_progress(progress_callback, f"Fertig: {len(titles)} Treffer", 95)
return titles
def genres(self) -> List[str]:
@@ -971,14 +963,8 @@ class EinschaltenPlugin(BasisPlugin):
if not url:
return []
try:
_log_url(url, kind="GET")
_notify_url(url)
sess = self._get_session()
resp = sess.get(url, headers=HEADERS, timeout=20)
resp.raise_for_status()
_log_url(resp.url or url, kind="OK")
_log_response_html(resp.url or url, resp.text)
payload = _extract_ng_state_payload(resp.text)
_, body = self._http_get_text(url, timeout=20)
payload = _extract_ng_state_payload(body)
except Exception:
return []
if not isinstance(payload, dict):

View File

@@ -11,7 +11,7 @@ from dataclasses import dataclass
import re
from urllib.parse import quote, urlencode
from urllib.parse import urljoin
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple
try: # pragma: no cover - optional dependency
import requests
@@ -53,6 +53,16 @@ SETTING_LOG_URLS = "log_urls_filmpalast"
SETTING_DUMP_HTML = "dump_html_filmpalast"
SETTING_SHOW_URL_INFO = "show_url_info_filmpalast"
SETTING_LOG_ERRORS = "log_errors_filmpalast"
ProgressCallback = Optional[Callable[[str, Optional[int]], Any]]
def _emit_progress(callback: ProgressCallback, message: str, percent: Optional[int] = None) -> None:
if not callable(callback):
return
try:
callback(str(message or ""), None if percent is None else int(percent))
except Exception:
return
HEADERS = {
"User-Agent": "Mozilla/5.0 (Kodi; ViewIt) AppleWebKit/537.36 (KHTML, like Gecko)",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
@@ -206,16 +216,26 @@ def _get_soup(url: str, *, session: Optional[RequestsSession] = None) -> Beautif
raise RuntimeError("requests/bs4 sind nicht verfuegbar.")
_log_visit(url)
sess = session or get_requests_session("filmpalast", headers=HEADERS)
response = None
try:
response = sess.get(url, headers=HEADERS, timeout=DEFAULT_TIMEOUT)
response.raise_for_status()
except Exception as exc:
_log_error_message(f"GET {url} failed: {exc}")
raise
if response.url and response.url != url:
_log_url_event(response.url, kind="REDIRECT")
_log_response_html(url, response.text)
return BeautifulSoup(response.text, "html.parser")
try:
final_url = (response.url or url) if response is not None else url
body = (response.text or "") if response is not None else ""
if final_url != url:
_log_url_event(final_url, kind="REDIRECT")
_log_response_html(url, body)
return BeautifulSoup(body, "html.parser")
finally:
if response is not None:
try:
response.close()
except Exception:
pass
class FilmpalastPlugin(BasisPlugin):
@@ -352,6 +372,7 @@ class FilmpalastPlugin(BasisPlugin):
seen_titles: set[str] = set()
seen_urls: set[str] = set()
for base_url, params in search_requests:
response = None
try:
request_url = base_url if not params else f"{base_url}?{urlencode(params)}"
_log_url_event(request_url, kind="GET")
@@ -365,6 +386,12 @@ class FilmpalastPlugin(BasisPlugin):
except Exception as exc:
_log_error_message(f"search request failed ({base_url}): {exc}")
continue
finally:
if response is not None:
try:
response.close()
except Exception:
pass
anchors = soup.select("article.liste h2 a[href], article.liste h3 a[href]")
if not anchors:
@@ -466,9 +493,13 @@ class FilmpalastPlugin(BasisPlugin):
titles.sort(key=lambda value: value.casefold())
return titles
async def search_titles(self, query: str) -> List[str]:
async def search_titles(self, query: str, progress_callback: ProgressCallback = None) -> List[str]:
_emit_progress(progress_callback, "Filmpalast Suche", 15)
hits = self._search_hits(query)
return self._apply_hits_to_title_index(hits)
_emit_progress(progress_callback, f"Treffer verarbeiten ({len(hits)})", 70)
titles = self._apply_hits_to_title_index(hits)
_emit_progress(progress_callback, f"Fertig: {len(titles)} Treffer", 95)
return titles
def _parse_genres(self, soup: BeautifulSoupT) -> Dict[str, str]:
genres: Dict[str, str] = {}
@@ -913,6 +944,7 @@ class FilmpalastPlugin(BasisPlugin):
redirected = link
if self._requests_available:
response = None
try:
session = get_requests_session("filmpalast", headers=HEADERS)
response = session.get(link, headers=HEADERS, timeout=DEFAULT_TIMEOUT, allow_redirects=True)
@@ -920,6 +952,12 @@ class FilmpalastPlugin(BasisPlugin):
redirected = (response.url or link).strip() or link
except Exception:
redirected = link
finally:
if response is not None:
try:
response.close()
except Exception:
pass
# 2) Danach optional die Redirect-URL nochmals auflösen.
if callable(resolve_with_resolveurl) and redirected and redirected != link:

View File

@@ -17,7 +17,7 @@ import os
import re
import time
import unicodedata
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple
from urllib.parse import quote
try: # pragma: no cover - optional dependency
@@ -80,6 +80,16 @@ SESSION_CACHE_MAX_TITLE_URLS = 800
CATALOG_SEARCH_TTL_SECONDS = 600
CATALOG_SEARCH_CACHE_KEY = "catalog_index"
_CATALOG_INDEX_MEMORY: tuple[float, List["SeriesResult"]] = (0.0, [])
ProgressCallback = Optional[Callable[[str, Optional[int]], Any]]
def _emit_progress(callback: ProgressCallback, message: str, percent: Optional[int] = None) -> None:
if not callable(callback):
return
try:
callback(str(message or ""), None if percent is None else int(percent))
except Exception:
return
@dataclass
@@ -398,37 +408,56 @@ def _get_soup(url: str, *, session: Optional[RequestsSession] = None) -> Beautif
_ensure_requests()
_log_visit(url)
sess = session or get_requests_session("serienstream", headers=HEADERS)
response = None
try:
response = sess.get(url, headers=HEADERS, timeout=DEFAULT_TIMEOUT)
response.raise_for_status()
except Exception as exc:
_log_error(f"GET {url} failed: {exc}")
raise
if response.url and response.url != url:
_log_url(response.url, kind="REDIRECT")
_log_response_html(url, response.text)
if _looks_like_cloudflare_challenge(response.text):
raise RuntimeError("Cloudflare-Schutz erkannt. requests reicht ggf. nicht aus.")
return BeautifulSoup(response.text, "html.parser")
try:
final_url = (response.url or url) if response is not None else url
body = (response.text or "") if response is not None else ""
if final_url != url:
_log_url(final_url, kind="REDIRECT")
_log_response_html(url, body)
if _looks_like_cloudflare_challenge(body):
raise RuntimeError("Cloudflare-Schutz erkannt. requests reicht ggf. nicht aus.")
return BeautifulSoup(body, "html.parser")
finally:
if response is not None:
try:
response.close()
except Exception:
pass
def _get_html_simple(url: str) -> str:
_ensure_requests()
_log_visit(url)
sess = get_requests_session("serienstream", headers=HEADERS)
response = None
try:
response = sess.get(url, headers=HEADERS, timeout=DEFAULT_TIMEOUT)
response.raise_for_status()
except Exception as exc:
_log_error(f"GET {url} failed: {exc}")
raise
if response.url and response.url != url:
_log_url(response.url, kind="REDIRECT")
body = response.text
_log_response_html(url, body)
if _looks_like_cloudflare_challenge(body):
raise RuntimeError("Cloudflare-Schutz erkannt. requests reicht ggf. nicht aus.")
return body
try:
final_url = (response.url or url) if response is not None else url
body = (response.text or "") if response is not None else ""
if final_url != url:
_log_url(final_url, kind="REDIRECT")
_log_response_html(url, body)
if _looks_like_cloudflare_challenge(body):
raise RuntimeError("Cloudflare-Schutz erkannt. requests reicht ggf. nicht aus.")
return body
finally:
if response is not None:
try:
response.close()
except Exception:
pass
def _get_soup_simple(url: str) -> BeautifulSoupT:
@@ -472,6 +501,7 @@ def _search_series_api(query: str) -> List[SeriesResult]:
terms.extend([token for token in query.split() if token])
seen_urls: set[str] = set()
for term in terms:
response = None
try:
response = sess.get(
f"{_get_base_url()}/api/search/suggest",
@@ -486,6 +516,12 @@ def _search_series_api(query: str) -> List[SeriesResult]:
payload = response.json()
except Exception:
continue
finally:
if response is not None:
try:
response.close()
except Exception:
pass
shows = payload.get("shows") if isinstance(payload, dict) else None
if not isinstance(shows, list):
continue
@@ -558,7 +594,7 @@ def _search_series_server(query: str) -> List[SeriesResult]:
return []
def _extract_catalog_index_from_html(body: str) -> List[SeriesResult]:
def _extract_catalog_index_from_html(body: str, *, progress_callback: ProgressCallback = None) -> List[SeriesResult]:
items: List[SeriesResult] = []
if not body:
return items
@@ -569,7 +605,9 @@ def _extract_catalog_index_from_html(body: str) -> List[SeriesResult]:
)
anchor_re = re.compile(r"<a[^>]+href=[\"']([^\"']+)[\"'][^>]*>(.*?)</a>", re.IGNORECASE | re.DOTALL)
data_search_re = re.compile(r"data-search=[\"']([^\"']*)[\"']", re.IGNORECASE)
for match in item_re.finditer(body):
for idx, match in enumerate(item_re.finditer(body), start=1):
if idx == 1 or idx % 200 == 0:
_emit_progress(progress_callback, f"Katalog parsen {idx}", 62)
block = match.group(0)
inner = match.group(1) or ""
anchor_match = anchor_re.search(inner)
@@ -651,26 +689,33 @@ def _store_catalog_index_in_cache(items: List[SeriesResult]) -> None:
_session_cache_set(CATALOG_SEARCH_CACHE_KEY, payload, ttl_seconds=CATALOG_SEARCH_TTL_SECONDS)
def search_series(query: str) -> List[SeriesResult]:
def search_series(query: str, *, progress_callback: ProgressCallback = None) -> List[SeriesResult]:
"""Sucht Serien im (/serien)-Katalog nach Titel. Nutzt Cache + Ein-Pass-Filter."""
_ensure_requests()
if not _normalize_search_text(query):
return []
_emit_progress(progress_callback, "Server-Suche", 15)
server_results = _search_series_server(query)
if server_results:
_emit_progress(progress_callback, f"Server-Treffer: {len(server_results)}", 35)
return [entry for entry in server_results if entry.title and _matches_query(query, title=entry.title)]
_emit_progress(progress_callback, "Pruefe Such-Cache", 42)
cached = _load_catalog_index_from_cache()
if cached is not None:
_emit_progress(progress_callback, f"Cache-Treffer: {len(cached)}", 52)
return [entry for entry in cached if entry.title and _matches_query(query, title=entry.title)]
_emit_progress(progress_callback, "Lade Katalogseite", 58)
catalog_url = f"{_get_base_url()}/serien?by=genre"
body = _get_html_simple(catalog_url)
items = _extract_catalog_index_from_html(body)
items = _extract_catalog_index_from_html(body, progress_callback=progress_callback)
if not items:
_emit_progress(progress_callback, "Fallback-Parser", 70)
soup = BeautifulSoup(body, "html.parser")
items = _catalog_index_from_soup(soup)
if items:
_store_catalog_index_in_cache(items)
_emit_progress(progress_callback, f"Filtere Treffer ({len(items)})", 85)
return [entry for entry in items if entry.title and _matches_query(query, title=entry.title)]
@@ -989,15 +1034,23 @@ def resolve_redirect(target_url: str) -> Optional[str]:
_get_soup(_get_base_url(), session=session)
except Exception:
pass
response = session.get(
normalized_url,
headers=HEADERS,
timeout=DEFAULT_TIMEOUT,
allow_redirects=True,
)
if response.url:
_log_url(response.url, kind="RESOLVED")
return response.url if response.url else None
response = None
try:
response = session.get(
normalized_url,
headers=HEADERS,
timeout=DEFAULT_TIMEOUT,
allow_redirects=True,
)
if response.url:
_log_url(response.url, kind="RESOLVED")
return response.url if response.url else None
finally:
if response is not None:
try:
response.close()
except Exception:
pass
def scrape_series_detail(
@@ -1681,7 +1734,7 @@ class SerienstreamPlugin(BasisPlugin):
return self._episode_label_cache.get(cache_key, {}).get(episode_label)
return None
async def search_titles(self, query: str) -> List[str]:
async def search_titles(self, query: str, progress_callback: ProgressCallback = None) -> List[str]:
query = query.strip()
if not query:
self._series_results.clear()
@@ -1695,7 +1748,8 @@ class SerienstreamPlugin(BasisPlugin):
try:
# Nutzt den Katalog (/serien), der jetzt nach Genres gruppiert ist.
# Alternativ gäbe es ein Ajax-Endpoint, aber der ist nicht immer zuverlässig erreichbar.
results = search_series(query)
_emit_progress(progress_callback, "Serienstream Suche startet", 10)
results = search_series(query, progress_callback=progress_callback)
except Exception as exc: # pragma: no cover - defensive logging
self._series_results.clear()
self._season_cache.clear()
@@ -1708,6 +1762,7 @@ class SerienstreamPlugin(BasisPlugin):
self._season_cache.clear()
self._season_links_cache.clear()
self._episode_label_cache.clear()
_emit_progress(progress_callback, f"Treffer aufbereitet: {len(results)}", 95)
return [result.title for result in results]
def _ensure_seasons(self, title: str) -> List[SeasonInfo]:

View File

@@ -19,7 +19,7 @@ import hashlib
import os
import re
import json
from typing import TYPE_CHECKING, Any, Dict, List, Optional
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional
from urllib.parse import urlencode, urljoin
try: # pragma: no cover - optional dependency
@@ -78,6 +78,16 @@ HEADERS = {
"Accept-Language": "de-DE,de;q=0.9,en;q=0.8",
"Connection": "keep-alive",
}
ProgressCallback = Optional[Callable[[str, Optional[int]], Any]]
def _emit_progress(callback: ProgressCallback, message: str, percent: Optional[int] = None) -> None:
if not callable(callback):
return
try:
callback(str(message or ""), None if percent is None else int(percent))
except Exception:
return
@dataclass(frozen=True)
@@ -584,15 +594,25 @@ class TopstreamfilmPlugin(BasisPlugin):
session = self._get_session()
self._log_url(url, kind="VISIT")
self._notify_url(url)
response = None
try:
response = session.get(url, timeout=DEFAULT_TIMEOUT)
response.raise_for_status()
except Exception as exc:
self._log_error(f"GET {url} failed: {exc}")
raise
self._log_url(response.url, kind="OK")
self._log_response_html(response.url, response.text)
return BeautifulSoup(response.text, "html.parser")
try:
final_url = (response.url or url) if response is not None else url
body = (response.text or "") if response is not None else ""
self._log_url(final_url, kind="OK")
self._log_response_html(final_url, body)
return BeautifulSoup(body, "html.parser")
finally:
if response is not None:
try:
response.close()
except Exception:
pass
def _get_detail_soup(self, title: str) -> Optional[BeautifulSoupT]:
title = (title or "").strip()
@@ -814,7 +834,7 @@ class TopstreamfilmPlugin(BasisPlugin):
# Sonst: Serie via Streams-Accordion parsen (falls vorhanden).
self._parse_stream_accordion(soup, title=title)
async def search_titles(self, query: str) -> List[str]:
async def search_titles(self, query: str, progress_callback: ProgressCallback = None) -> List[str]:
"""Sucht Titel ueber eine HTML-Suche.
Erwartetes HTML (Snippet):
@@ -827,6 +847,7 @@ class TopstreamfilmPlugin(BasisPlugin):
query = (query or "").strip()
if not query:
return []
_emit_progress(progress_callback, "Topstreamfilm Suche", 15)
session = self._get_session()
url = self._get_base_url() + "/"
@@ -834,6 +855,7 @@ class TopstreamfilmPlugin(BasisPlugin):
request_url = f"{url}?{urlencode(params)}"
self._log_url(request_url, kind="GET")
self._notify_url(request_url)
response = None
try:
response = session.get(
url,
@@ -844,15 +866,28 @@ class TopstreamfilmPlugin(BasisPlugin):
except Exception as exc:
self._log_error(f"GET {request_url} failed: {exc}")
raise
self._log_url(response.url, kind="OK")
self._log_response_html(response.url, response.text)
try:
final_url = (response.url or request_url) if response is not None else request_url
body = (response.text or "") if response is not None else ""
self._log_url(final_url, kind="OK")
self._log_response_html(final_url, body)
if BeautifulSoup is None:
return []
soup = BeautifulSoup(response.text, "html.parser")
if BeautifulSoup is None:
return []
soup = BeautifulSoup(body, "html.parser")
finally:
if response is not None:
try:
response.close()
except Exception:
pass
hits: List[SearchHit] = []
for item in soup.select("li.TPostMv"):
items = soup.select("li.TPostMv")
total_items = max(1, len(items))
for idx, item in enumerate(items, start=1):
if idx == 1 or idx % 20 == 0:
_emit_progress(progress_callback, f"Treffer pruefen {idx}/{total_items}", 55)
anchor = item.select_one("a[href]")
if not anchor:
continue
@@ -885,6 +920,7 @@ class TopstreamfilmPlugin(BasisPlugin):
self._title_to_url[hit.title] = hit.url
titles.append(hit.title)
self._save_title_url_cache()
_emit_progress(progress_callback, f"Fertig: {len(titles)} Treffer", 95)
return titles
def genres(self) -> List[str]: