nightly: playback fast-path, windows asyncio fix, v0.1.56
This commit is contained in:
@@ -17,7 +17,8 @@ import os
|
||||
import re
|
||||
import time
|
||||
import unicodedata
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, TypeAlias
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
|
||||
from urllib.parse import quote
|
||||
|
||||
try: # pragma: no cover - optional dependency
|
||||
import requests
|
||||
@@ -49,14 +50,15 @@ if TYPE_CHECKING: # pragma: no cover
|
||||
from requests import Session as RequestsSession
|
||||
from bs4 import BeautifulSoup as BeautifulSoupT # type: ignore[import-not-found]
|
||||
else: # pragma: no cover
|
||||
RequestsSession: TypeAlias = Any
|
||||
BeautifulSoupT: TypeAlias = Any
|
||||
RequestsSession = Any
|
||||
BeautifulSoupT = Any
|
||||
|
||||
|
||||
SETTING_BASE_URL = "serienstream_base_url"
|
||||
DEFAULT_BASE_URL = "https://s.to"
|
||||
DEFAULT_PREFERRED_HOSTERS = ["voe"]
|
||||
DEFAULT_TIMEOUT = 20
|
||||
SEARCH_TIMEOUT = 8
|
||||
ADDON_ID = "plugin.video.viewit"
|
||||
GLOBAL_SETTING_LOG_URLS = "debug_log_urls"
|
||||
GLOBAL_SETTING_DUMP_HTML = "debug_dump_html"
|
||||
@@ -75,6 +77,9 @@ HEADERS = {
|
||||
SESSION_CACHE_TTL_SECONDS = 300
|
||||
SESSION_CACHE_PREFIX = "viewit.serienstream"
|
||||
SESSION_CACHE_MAX_TITLE_URLS = 800
|
||||
CATALOG_SEARCH_TTL_SECONDS = 600
|
||||
CATALOG_SEARCH_CACHE_KEY = "catalog_index"
|
||||
_CATALOG_INDEX_MEMORY: tuple[float, List["SeriesResult"]] = (0.0, [])
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -451,20 +456,222 @@ def _extract_genre_names_from_html(body: str) -> List[str]:
|
||||
return names
|
||||
|
||||
|
||||
def _strip_tags(value: str) -> str:
|
||||
return re.sub(r"<[^>]+>", " ", value or "")
|
||||
|
||||
|
||||
def _search_series_api(query: str) -> List[SeriesResult]:
|
||||
query = (query or "").strip()
|
||||
if not query:
|
||||
return []
|
||||
_ensure_requests()
|
||||
sess = get_requests_session("serienstream", headers=HEADERS)
|
||||
terms = [query]
|
||||
if " " in query:
|
||||
# Fallback: einzelne Tokens liefern in der API oft bessere Treffer.
|
||||
terms.extend([token for token in query.split() if token])
|
||||
seen_urls: set[str] = set()
|
||||
for term in terms:
|
||||
try:
|
||||
response = sess.get(
|
||||
f"{_get_base_url()}/api/search/suggest",
|
||||
params={"term": term},
|
||||
headers=HEADERS,
|
||||
timeout=SEARCH_TIMEOUT,
|
||||
)
|
||||
response.raise_for_status()
|
||||
except Exception:
|
||||
continue
|
||||
try:
|
||||
payload = response.json()
|
||||
except Exception:
|
||||
continue
|
||||
shows = payload.get("shows") if isinstance(payload, dict) else None
|
||||
if not isinstance(shows, list):
|
||||
continue
|
||||
results: List[SeriesResult] = []
|
||||
for item in shows:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
title = (item.get("name") or "").strip()
|
||||
href = (item.get("url") or "").strip()
|
||||
if not title or not href:
|
||||
continue
|
||||
url_abs = _absolute_url(href)
|
||||
if not url_abs or url_abs in seen_urls:
|
||||
continue
|
||||
if "/staffel-" in url_abs or "/episode-" in url_abs:
|
||||
continue
|
||||
seen_urls.add(url_abs)
|
||||
results.append(SeriesResult(title=title, description="", url=url_abs))
|
||||
if not results:
|
||||
continue
|
||||
filtered = [entry for entry in results if _matches_query(query, title=entry.title)]
|
||||
if filtered:
|
||||
return filtered
|
||||
# Falls nur Token-Suche möglich war, zumindest die Ergebnisse liefern.
|
||||
if term != query:
|
||||
return results
|
||||
return []
|
||||
|
||||
|
||||
def _search_series_server(query: str) -> List[SeriesResult]:
|
||||
if not query:
|
||||
return []
|
||||
api_results = _search_series_api(query)
|
||||
if api_results:
|
||||
return api_results
|
||||
base = _get_base_url()
|
||||
search_url = f"{base}/search?q={quote(query)}"
|
||||
alt_url = f"{base}/suche?q={quote(query)}"
|
||||
for url in (search_url, alt_url):
|
||||
try:
|
||||
body = _get_html_simple(url)
|
||||
except Exception:
|
||||
continue
|
||||
if not body:
|
||||
continue
|
||||
soup = BeautifulSoup(body, "html.parser")
|
||||
root = soup.select_one(".search-results-list")
|
||||
if root is None:
|
||||
continue
|
||||
seen_urls: set[str] = set()
|
||||
results: List[SeriesResult] = []
|
||||
for card in root.select(".cover-card"):
|
||||
anchor = card.select_one("a[href*='/serie/']")
|
||||
if not anchor:
|
||||
continue
|
||||
href = (anchor.get("href") or "").strip()
|
||||
url_abs = _absolute_url(href)
|
||||
if not url_abs or url_abs in seen_urls:
|
||||
continue
|
||||
if "/staffel-" in url_abs or "/episode-" in url_abs:
|
||||
continue
|
||||
title_tag = card.select_one(".show-title") or card.select_one("h3") or card.select_one("h4")
|
||||
title = (title_tag.get_text(" ", strip=True) if title_tag else anchor.get_text(" ", strip=True)).strip()
|
||||
if not title:
|
||||
continue
|
||||
seen_urls.add(url_abs)
|
||||
results.append(SeriesResult(title=title, description="", url=url_abs))
|
||||
if results:
|
||||
return results
|
||||
return []
|
||||
|
||||
|
||||
def _extract_catalog_index_from_html(body: str) -> List[SeriesResult]:
|
||||
items: List[SeriesResult] = []
|
||||
if not body:
|
||||
return items
|
||||
seen_urls: set[str] = set()
|
||||
item_re = re.compile(
|
||||
r"<li[^>]*class=[\"'][^\"']*series-item[^\"']*[\"'][^>]*>(.*?)</li>",
|
||||
re.IGNORECASE | re.DOTALL,
|
||||
)
|
||||
anchor_re = re.compile(r"<a[^>]+href=[\"']([^\"']+)[\"'][^>]*>(.*?)</a>", re.IGNORECASE | re.DOTALL)
|
||||
data_search_re = re.compile(r"data-search=[\"']([^\"']*)[\"']", re.IGNORECASE)
|
||||
for match in item_re.finditer(body):
|
||||
block = match.group(0)
|
||||
inner = match.group(1) or ""
|
||||
anchor_match = anchor_re.search(inner)
|
||||
if not anchor_match:
|
||||
continue
|
||||
href = (anchor_match.group(1) or "").strip()
|
||||
url = _absolute_url(href)
|
||||
if not url or "/serie/" not in url or "/staffel-" in url or "/episode-" in url:
|
||||
continue
|
||||
if url in seen_urls:
|
||||
continue
|
||||
seen_urls.add(url)
|
||||
title_raw = anchor_match.group(2) or ""
|
||||
title = unescape(re.sub(r"\s+", " ", _strip_tags(title_raw))).strip()
|
||||
if not title:
|
||||
continue
|
||||
search_match = data_search_re.search(block)
|
||||
description = (search_match.group(1) or "").strip() if search_match else ""
|
||||
items.append(SeriesResult(title=title, description=description, url=url))
|
||||
return items
|
||||
|
||||
|
||||
def _catalog_index_from_soup(soup: BeautifulSoupT) -> List[SeriesResult]:
|
||||
items: List[SeriesResult] = []
|
||||
if not soup:
|
||||
return items
|
||||
seen_urls: set[str] = set()
|
||||
for item in soup.select("li.series-item"):
|
||||
anchor = item.find("a", href=True)
|
||||
if not anchor:
|
||||
continue
|
||||
href = (anchor.get("href") or "").strip()
|
||||
url = _absolute_url(href)
|
||||
if not url or "/serie/" not in url or "/staffel-" in url or "/episode-" in url:
|
||||
continue
|
||||
if url in seen_urls:
|
||||
continue
|
||||
seen_urls.add(url)
|
||||
title = (anchor.get_text(" ", strip=True) or "").strip()
|
||||
if not title:
|
||||
continue
|
||||
description = (item.get("data-search") or "").strip()
|
||||
items.append(SeriesResult(title=title, description=description, url=url))
|
||||
return items
|
||||
|
||||
|
||||
def _load_catalog_index_from_cache() -> Optional[List[SeriesResult]]:
|
||||
global _CATALOG_INDEX_MEMORY
|
||||
expires_at, cached = _CATALOG_INDEX_MEMORY
|
||||
if cached and expires_at > time.time():
|
||||
return list(cached)
|
||||
raw = _session_cache_get(CATALOG_SEARCH_CACHE_KEY)
|
||||
if not isinstance(raw, list):
|
||||
return None
|
||||
items: List[SeriesResult] = []
|
||||
for entry in raw:
|
||||
if not isinstance(entry, list) or len(entry) < 2:
|
||||
continue
|
||||
title = str(entry[0] or "").strip()
|
||||
url = str(entry[1] or "").strip()
|
||||
description = str(entry[2] or "") if len(entry) > 2 else ""
|
||||
if title and url:
|
||||
items.append(SeriesResult(title=title, description=description, url=url))
|
||||
if items:
|
||||
_CATALOG_INDEX_MEMORY = (time.time() + CATALOG_SEARCH_TTL_SECONDS, list(items))
|
||||
return items or None
|
||||
|
||||
|
||||
def _store_catalog_index_in_cache(items: List[SeriesResult]) -> None:
|
||||
global _CATALOG_INDEX_MEMORY
|
||||
if not items:
|
||||
return
|
||||
_CATALOG_INDEX_MEMORY = (time.time() + CATALOG_SEARCH_TTL_SECONDS, list(items))
|
||||
payload: List[List[str]] = []
|
||||
for entry in items:
|
||||
if not entry.title or not entry.url:
|
||||
continue
|
||||
payload.append([entry.title, entry.url, entry.description])
|
||||
_session_cache_set(CATALOG_SEARCH_CACHE_KEY, payload, ttl_seconds=CATALOG_SEARCH_TTL_SECONDS)
|
||||
|
||||
|
||||
def search_series(query: str) -> List[SeriesResult]:
|
||||
"""Sucht Serien im (/serien)-Katalog (Genre-liste) nach Titel/Alt-Titel."""
|
||||
"""Sucht Serien im (/serien)-Katalog nach Titel. Nutzt Cache + Ein-Pass-Filter."""
|
||||
_ensure_requests()
|
||||
if not _normalize_search_text(query):
|
||||
return []
|
||||
# Direkter Abruf wie in fetch_serien.py.
|
||||
server_results = _search_series_server(query)
|
||||
if server_results:
|
||||
return [entry for entry in server_results if entry.title and _matches_query(query, title=entry.title)]
|
||||
cached = _load_catalog_index_from_cache()
|
||||
if cached is not None:
|
||||
return [entry for entry in cached if entry.title and _matches_query(query, title=entry.title)]
|
||||
|
||||
catalog_url = f"{_get_base_url()}/serien?by=genre"
|
||||
soup = _get_soup_simple(catalog_url)
|
||||
results: List[SeriesResult] = []
|
||||
for series in parse_series_catalog(soup).values():
|
||||
for entry in series:
|
||||
if entry.title and _matches_query(query, title=entry.title):
|
||||
results.append(entry)
|
||||
return results
|
||||
body = _get_html_simple(catalog_url)
|
||||
items = _extract_catalog_index_from_html(body)
|
||||
if not items:
|
||||
soup = BeautifulSoup(body, "html.parser")
|
||||
items = _catalog_index_from_soup(soup)
|
||||
if items:
|
||||
_store_catalog_index_in_cache(items)
|
||||
return [entry for entry in items if entry.title and _matches_query(query, title=entry.title)]
|
||||
|
||||
|
||||
def parse_series_catalog(soup: BeautifulSoupT) -> Dict[str, List[SeriesResult]]:
|
||||
@@ -1569,6 +1776,18 @@ class SerienstreamPlugin(BasisPlugin):
|
||||
except Exception as exc: # pragma: no cover - defensive logging
|
||||
raise RuntimeError(f"Stream-Link konnte nicht geladen werden: {exc}") from exc
|
||||
|
||||
def episode_url_for(self, title: str, season: str, episode: str) -> str:
|
||||
cache_key = (title, season)
|
||||
cached = self._episode_label_cache.get(cache_key)
|
||||
if cached:
|
||||
info = cached.get(episode)
|
||||
if info and info.url:
|
||||
return info.url
|
||||
episode_info = self._lookup_episode(title, season, episode)
|
||||
if episode_info and episode_info.url:
|
||||
return episode_info.url
|
||||
return ""
|
||||
|
||||
def available_hosters_for(self, title: str, season: str, episode: str) -> List[str]:
|
||||
if not self._requests_available:
|
||||
raise RuntimeError("SerienstreamPlugin kann ohne requests/bs4 keine Hoster laden.")
|
||||
|
||||
Reference in New Issue
Block a user