|
|
|
|
@@ -79,6 +79,7 @@ SESSION_CACHE_PREFIX = "viewit.serienstream"
|
|
|
|
|
SESSION_CACHE_MAX_TITLE_URLS = 800
|
|
|
|
|
CATALOG_SEARCH_TTL_SECONDS = 600
|
|
|
|
|
CATALOG_SEARCH_CACHE_KEY = "catalog_index"
|
|
|
|
|
GENRE_LIST_PAGE_SIZE = 20
|
|
|
|
|
_CATALOG_INDEX_MEMORY: tuple[float, List["SeriesResult"]] = (0.0, [])
|
|
|
|
|
ProgressCallback = Optional[Callable[[str, Optional[int]], Any]]
|
|
|
|
|
|
|
|
|
|
@@ -97,6 +98,7 @@ class SeriesResult:
|
|
|
|
|
title: str
|
|
|
|
|
description: str
|
|
|
|
|
url: str
|
|
|
|
|
cover: str = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
|
@@ -669,8 +671,9 @@ def _load_catalog_index_from_cache() -> Optional[List[SeriesResult]]:
|
|
|
|
|
title = str(entry[0] or "").strip()
|
|
|
|
|
url = str(entry[1] or "").strip()
|
|
|
|
|
description = str(entry[2] or "") if len(entry) > 2 else ""
|
|
|
|
|
cover = str(entry[3] or "").strip() if len(entry) > 3 else ""
|
|
|
|
|
if title and url:
|
|
|
|
|
items.append(SeriesResult(title=title, description=description, url=url))
|
|
|
|
|
items.append(SeriesResult(title=title, description=description, url=url, cover=cover))
|
|
|
|
|
if items:
|
|
|
|
|
_CATALOG_INDEX_MEMORY = (time.time() + CATALOG_SEARCH_TTL_SECONDS, list(items))
|
|
|
|
|
return items or None
|
|
|
|
|
@@ -685,7 +688,7 @@ def _store_catalog_index_in_cache(items: List[SeriesResult]) -> None:
|
|
|
|
|
for entry in items:
|
|
|
|
|
if not entry.title or not entry.url:
|
|
|
|
|
continue
|
|
|
|
|
payload.append([entry.title, entry.url, entry.description])
|
|
|
|
|
payload.append([entry.title, entry.url, entry.description, entry.cover])
|
|
|
|
|
_session_cache_set(CATALOG_SEARCH_CACHE_KEY, payload, ttl_seconds=CATALOG_SEARCH_TTL_SECONDS)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -1107,8 +1110,8 @@ class SerienstreamPlugin(BasisPlugin):
|
|
|
|
|
self._episode_label_cache: Dict[Tuple[str, str], Dict[str, EpisodeInfo]] = {}
|
|
|
|
|
self._catalog_cache: Optional[Dict[str, List[SeriesResult]]] = None
|
|
|
|
|
self._genre_group_cache: Dict[str, Dict[str, List[str]]] = {}
|
|
|
|
|
self._genre_page_titles_cache: Dict[Tuple[str, int], List[str]] = {}
|
|
|
|
|
self._genre_page_count_cache: Dict[str, int] = {}
|
|
|
|
|
self._genre_page_entries_cache: Dict[Tuple[str, int], List[SeriesResult]] = {}
|
|
|
|
|
self._genre_page_has_more_cache: Dict[Tuple[str, int], bool] = {}
|
|
|
|
|
self._popular_cache: Optional[List[SeriesResult]] = None
|
|
|
|
|
self._requests_available = REQUESTS_AVAILABLE
|
|
|
|
|
self._default_preferred_hosters: List[str] = list(DEFAULT_PREFERRED_HOSTERS)
|
|
|
|
|
@@ -1117,6 +1120,7 @@ class SerienstreamPlugin(BasisPlugin):
|
|
|
|
|
self._latest_cache: Dict[int, List[LatestEpisode]] = {}
|
|
|
|
|
self._latest_hoster_cache: Dict[str, List[str]] = {}
|
|
|
|
|
self._series_metadata_cache: Dict[str, Tuple[Dict[str, str], Dict[str, str]]] = {}
|
|
|
|
|
self._series_metadata_full: set[str] = set()
|
|
|
|
|
self.is_available = True
|
|
|
|
|
self.unavailable_reason: Optional[str] = None
|
|
|
|
|
if not self._requests_available: # pragma: no cover - optional dependency
|
|
|
|
|
@@ -1409,49 +1413,165 @@ class SerienstreamPlugin(BasisPlugin):
|
|
|
|
|
value = re.sub(r"[^a-z0-9]+", "-", value).strip("-")
|
|
|
|
|
return value
|
|
|
|
|
|
|
|
|
|
def _fetch_genre_page_titles(self, genre: str, page: int) -> Tuple[List[str], int]:
|
|
|
|
|
def _cache_list_metadata(self, title: str, description: str = "", cover: str = "") -> None:
|
|
|
|
|
key = self._metadata_cache_key(title)
|
|
|
|
|
cached = self._series_metadata_cache.get(key)
|
|
|
|
|
info = dict(cached[0]) if cached else {}
|
|
|
|
|
art = dict(cached[1]) if cached else {}
|
|
|
|
|
info.setdefault("title", title)
|
|
|
|
|
description = (description or "").strip()
|
|
|
|
|
if description and not info.get("plot"):
|
|
|
|
|
info["plot"] = description
|
|
|
|
|
cover = _absolute_url((cover or "").strip()) if cover else ""
|
|
|
|
|
if cover:
|
|
|
|
|
art.setdefault("thumb", cover)
|
|
|
|
|
art.setdefault("poster", cover)
|
|
|
|
|
self._series_metadata_cache[key] = (info, art)
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def _card_description(anchor: BeautifulSoupT) -> str:
|
|
|
|
|
if not anchor:
|
|
|
|
|
return ""
|
|
|
|
|
candidates: List[str] = []
|
|
|
|
|
direct = (anchor.get("data-search") or "").strip()
|
|
|
|
|
if direct:
|
|
|
|
|
candidates.append(direct)
|
|
|
|
|
title_attr = (anchor.get("data-title") or "").strip()
|
|
|
|
|
if title_attr:
|
|
|
|
|
candidates.append(title_attr)
|
|
|
|
|
for selector in ("p", ".description", ".desc", ".text-muted", ".small", ".overview"):
|
|
|
|
|
node = anchor.select_one(selector)
|
|
|
|
|
if node is None:
|
|
|
|
|
continue
|
|
|
|
|
text = (node.get_text(" ", strip=True) or "").strip()
|
|
|
|
|
if text:
|
|
|
|
|
candidates.append(text)
|
|
|
|
|
parent = anchor.parent if anchor else None
|
|
|
|
|
if parent is not None:
|
|
|
|
|
parent_data = (parent.get("data-search") or "").strip()
|
|
|
|
|
if parent_data:
|
|
|
|
|
candidates.append(parent_data)
|
|
|
|
|
parent_text = ""
|
|
|
|
|
try:
|
|
|
|
|
parent_text = (parent.get_text(" ", strip=True) or "").strip()
|
|
|
|
|
except Exception:
|
|
|
|
|
parent_text = ""
|
|
|
|
|
if parent_text and len(parent_text) > 24:
|
|
|
|
|
candidates.append(parent_text)
|
|
|
|
|
for value in candidates:
|
|
|
|
|
cleaned = re.sub(r"\s+", " ", str(value or "")).strip()
|
|
|
|
|
if cleaned and len(cleaned) > 12:
|
|
|
|
|
return cleaned
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
def _parse_genre_entries_from_soup(self, soup: BeautifulSoupT) -> List[SeriesResult]:
|
|
|
|
|
entries: List[SeriesResult] = []
|
|
|
|
|
seen_urls: set[str] = set()
|
|
|
|
|
|
|
|
|
|
def _add_entry(title: str, description: str, href: str, cover: str) -> None:
|
|
|
|
|
series_url = _absolute_url(href).split("#", 1)[0].split("?", 1)[0].rstrip("/")
|
|
|
|
|
if not series_url or "/serie/" not in series_url:
|
|
|
|
|
return
|
|
|
|
|
if "/staffel-" in series_url or "/episode-" in series_url:
|
|
|
|
|
return
|
|
|
|
|
if series_url in seen_urls:
|
|
|
|
|
return
|
|
|
|
|
title = (title or "").strip()
|
|
|
|
|
if not title:
|
|
|
|
|
return
|
|
|
|
|
description = (description or "").strip()
|
|
|
|
|
cover_url = _absolute_url((cover or "").strip()) if cover else ""
|
|
|
|
|
seen_urls.add(series_url)
|
|
|
|
|
self._remember_series_result(title, series_url, description)
|
|
|
|
|
self._cache_list_metadata(title, description=description, cover=cover_url)
|
|
|
|
|
entries.append(SeriesResult(title=title, description=description, url=series_url, cover=cover_url))
|
|
|
|
|
|
|
|
|
|
for anchor in soup.select("a.show-card[href]"):
|
|
|
|
|
href = (anchor.get("href") or "").strip()
|
|
|
|
|
if not href:
|
|
|
|
|
continue
|
|
|
|
|
img = anchor.select_one("img")
|
|
|
|
|
title = (
|
|
|
|
|
(img.get("alt") if img else "")
|
|
|
|
|
or (anchor.get("title") or "")
|
|
|
|
|
or (anchor.get_text(" ", strip=True) or "")
|
|
|
|
|
).strip()
|
|
|
|
|
description = self._card_description(anchor)
|
|
|
|
|
cover = (img.get("data-src") if img else "") or (img.get("src") if img else "")
|
|
|
|
|
_add_entry(title, description, href, cover)
|
|
|
|
|
|
|
|
|
|
if entries:
|
|
|
|
|
return entries
|
|
|
|
|
|
|
|
|
|
for item in soup.select("li.series-item"):
|
|
|
|
|
anchor = item.find("a", href=True)
|
|
|
|
|
if not anchor:
|
|
|
|
|
continue
|
|
|
|
|
href = (anchor.get("href") or "").strip()
|
|
|
|
|
title = (anchor.get_text(" ", strip=True) or "").strip()
|
|
|
|
|
description = (item.get("data-search") or "").strip()
|
|
|
|
|
img = anchor.find("img")
|
|
|
|
|
cover = (img.get("data-src") if img else "") or (img.get("src") if img else "")
|
|
|
|
|
_add_entry(title, description, href, cover)
|
|
|
|
|
return entries
|
|
|
|
|
|
|
|
|
|
def _fetch_genre_page_entries(self, genre: str, page: int) -> Tuple[List[SeriesResult], bool]:
|
|
|
|
|
slug = self._genre_slug(genre)
|
|
|
|
|
if not slug:
|
|
|
|
|
return [], 1
|
|
|
|
|
return [], False
|
|
|
|
|
cache_key = (slug, page)
|
|
|
|
|
cached = self._genre_page_titles_cache.get(cache_key)
|
|
|
|
|
cached_pages = self._genre_page_count_cache.get(slug)
|
|
|
|
|
if cached is not None and cached_pages is not None:
|
|
|
|
|
return list(cached), int(cached_pages)
|
|
|
|
|
cached_entries = self._genre_page_entries_cache.get(cache_key)
|
|
|
|
|
cached_has_more = self._genre_page_has_more_cache.get(cache_key)
|
|
|
|
|
if cached_entries is not None and cached_has_more is not None:
|
|
|
|
|
return list(cached_entries), bool(cached_has_more)
|
|
|
|
|
url = f"{_get_base_url()}/genre/{slug}"
|
|
|
|
|
if page > 1:
|
|
|
|
|
url = f"{url}?page={int(page)}"
|
|
|
|
|
soup = _get_soup_simple(url)
|
|
|
|
|
titles: List[str] = []
|
|
|
|
|
seen: set[str] = set()
|
|
|
|
|
for anchor in soup.select("a.show-card[href]"):
|
|
|
|
|
entries = self._parse_genre_entries_from_soup(soup)
|
|
|
|
|
|
|
|
|
|
has_more = False
|
|
|
|
|
for anchor in soup.select("a[rel='next'][href], a[href*='?page=']"):
|
|
|
|
|
href = (anchor.get("href") or "").strip()
|
|
|
|
|
series_url = _absolute_url(href).split("#", 1)[0].split("?", 1)[0].rstrip("/")
|
|
|
|
|
if "/serie/" not in series_url:
|
|
|
|
|
if not href:
|
|
|
|
|
continue
|
|
|
|
|
img = anchor.select_one("img[alt]")
|
|
|
|
|
title = ((img.get("alt") if img else "") or "").strip()
|
|
|
|
|
if not title:
|
|
|
|
|
continue
|
|
|
|
|
key = title.casefold()
|
|
|
|
|
if key in seen:
|
|
|
|
|
continue
|
|
|
|
|
seen.add(key)
|
|
|
|
|
self._remember_series_result(title, series_url)
|
|
|
|
|
titles.append(title)
|
|
|
|
|
max_page = 1
|
|
|
|
|
for anchor in soup.select("a[href*='?page=']"):
|
|
|
|
|
href = (anchor.get("href") or "").strip()
|
|
|
|
|
match = re.search(r"[?&]page=(\d+)", href)
|
|
|
|
|
if not match:
|
|
|
|
|
if "next" in href.casefold():
|
|
|
|
|
has_more = True
|
|
|
|
|
continue
|
|
|
|
|
try:
|
|
|
|
|
max_page = max(max_page, int(match.group(1)))
|
|
|
|
|
if int(match.group(1)) > int(page):
|
|
|
|
|
has_more = True
|
|
|
|
|
break
|
|
|
|
|
except Exception:
|
|
|
|
|
continue
|
|
|
|
|
self._genre_page_titles_cache[cache_key] = list(titles)
|
|
|
|
|
self._genre_page_count_cache[slug] = max_page
|
|
|
|
|
return list(titles), max_page
|
|
|
|
|
if len(entries) > GENRE_LIST_PAGE_SIZE:
|
|
|
|
|
has_more = True
|
|
|
|
|
entries = entries[:GENRE_LIST_PAGE_SIZE]
|
|
|
|
|
|
|
|
|
|
self._genre_page_entries_cache[cache_key] = list(entries)
|
|
|
|
|
self._genre_page_has_more_cache[cache_key] = bool(has_more)
|
|
|
|
|
return list(entries), bool(has_more)
|
|
|
|
|
|
|
|
|
|
def titles_for_genre_page(self, genre: str, page: int) -> List[str]:
|
|
|
|
|
genre = (genre or "").strip()
|
|
|
|
|
page = max(1, int(page or 1))
|
|
|
|
|
entries, _ = self._fetch_genre_page_entries(genre, page)
|
|
|
|
|
return [entry.title for entry in entries if entry.title]
|
|
|
|
|
|
|
|
|
|
def genre_has_more(self, genre: str, page: int) -> bool:
|
|
|
|
|
genre = (genre or "").strip()
|
|
|
|
|
page = max(1, int(page or 1))
|
|
|
|
|
slug = self._genre_slug(genre)
|
|
|
|
|
if not slug:
|
|
|
|
|
return False
|
|
|
|
|
cache_key = (slug, page)
|
|
|
|
|
cached = self._genre_page_has_more_cache.get(cache_key)
|
|
|
|
|
if cached is not None:
|
|
|
|
|
return bool(cached)
|
|
|
|
|
_, has_more = self._fetch_genre_page_entries(genre, page)
|
|
|
|
|
return bool(has_more)
|
|
|
|
|
|
|
|
|
|
def titles_for_genre_group_page(self, genre: str, group_code: str, page: int = 1, page_size: int = 10) -> List[str]:
|
|
|
|
|
genre = (genre or "").strip()
|
|
|
|
|
@@ -1461,14 +1581,17 @@ class SerienstreamPlugin(BasisPlugin):
|
|
|
|
|
needed = page * page_size + 1
|
|
|
|
|
matched: List[str] = []
|
|
|
|
|
try:
|
|
|
|
|
_, max_pages = self._fetch_genre_page_titles(genre, 1)
|
|
|
|
|
for page_index in range(1, max_pages + 1):
|
|
|
|
|
page_titles, _ = self._fetch_genre_page_titles(genre, page_index)
|
|
|
|
|
for title in page_titles:
|
|
|
|
|
page_index = 1
|
|
|
|
|
has_more = True
|
|
|
|
|
while has_more:
|
|
|
|
|
page_entries, has_more = self._fetch_genre_page_entries(genre, page_index)
|
|
|
|
|
for entry in page_entries:
|
|
|
|
|
title = entry.title
|
|
|
|
|
if self._group_matches(group_code, title):
|
|
|
|
|
matched.append(title)
|
|
|
|
|
if len(matched) >= needed:
|
|
|
|
|
break
|
|
|
|
|
page_index += 1
|
|
|
|
|
start = (page - 1) * page_size
|
|
|
|
|
end = start + page_size
|
|
|
|
|
return list(matched[start:end])
|
|
|
|
|
@@ -1487,14 +1610,17 @@ class SerienstreamPlugin(BasisPlugin):
|
|
|
|
|
needed = page * page_size + 1
|
|
|
|
|
count = 0
|
|
|
|
|
try:
|
|
|
|
|
_, max_pages = self._fetch_genre_page_titles(genre, 1)
|
|
|
|
|
for page_index in range(1, max_pages + 1):
|
|
|
|
|
page_titles, _ = self._fetch_genre_page_titles(genre, page_index)
|
|
|
|
|
for title in page_titles:
|
|
|
|
|
page_index = 1
|
|
|
|
|
has_more = True
|
|
|
|
|
while has_more:
|
|
|
|
|
page_entries, has_more = self._fetch_genre_page_entries(genre, page_index)
|
|
|
|
|
for entry in page_entries:
|
|
|
|
|
title = entry.title
|
|
|
|
|
if self._group_matches(group_code, title):
|
|
|
|
|
count += 1
|
|
|
|
|
if count >= needed:
|
|
|
|
|
return True
|
|
|
|
|
page_index += 1
|
|
|
|
|
return False
|
|
|
|
|
except Exception:
|
|
|
|
|
grouped = self._ensure_genre_group_cache(genre)
|
|
|
|
|
@@ -1611,6 +1737,7 @@ class SerienstreamPlugin(BasisPlugin):
|
|
|
|
|
cache_key = self._metadata_cache_key(title)
|
|
|
|
|
if info_labels or art:
|
|
|
|
|
self._series_metadata_cache[cache_key] = (info_labels, art)
|
|
|
|
|
self._series_metadata_full.add(cache_key)
|
|
|
|
|
|
|
|
|
|
base_series_url = _series_root_url(_extract_canonical_url(series_soup, series.url))
|
|
|
|
|
season_links = _extract_season_links(series_soup)
|
|
|
|
|
@@ -1646,7 +1773,7 @@ class SerienstreamPlugin(BasisPlugin):
|
|
|
|
|
|
|
|
|
|
cache_key = self._metadata_cache_key(title)
|
|
|
|
|
cached = self._series_metadata_cache.get(cache_key)
|
|
|
|
|
if cached is not None:
|
|
|
|
|
if cached is not None and cache_key in self._series_metadata_full:
|
|
|
|
|
info, art = cached
|
|
|
|
|
return dict(info), dict(art), None
|
|
|
|
|
|
|
|
|
|
@@ -1656,11 +1783,14 @@ class SerienstreamPlugin(BasisPlugin):
|
|
|
|
|
self._series_metadata_cache[cache_key] = (dict(info), {})
|
|
|
|
|
return info, {}, None
|
|
|
|
|
|
|
|
|
|
info: Dict[str, str] = {"title": title}
|
|
|
|
|
art: Dict[str, str] = {}
|
|
|
|
|
info: Dict[str, str] = dict(cached[0]) if cached else {"title": title}
|
|
|
|
|
art: Dict[str, str] = dict(cached[1]) if cached else {}
|
|
|
|
|
info.setdefault("title", title)
|
|
|
|
|
if series.description:
|
|
|
|
|
info["plot"] = series.description
|
|
|
|
|
info.setdefault("plot", series.description)
|
|
|
|
|
|
|
|
|
|
# Fuer Listenansichten laden wir pro Seite die Detail-Metadaten vollstaendig nach.
|
|
|
|
|
loaded_full = False
|
|
|
|
|
try:
|
|
|
|
|
soup = _get_soup(series.url, session=get_requests_session("serienstream", headers=HEADERS))
|
|
|
|
|
parsed_info, parsed_art = _extract_series_metadata(soup)
|
|
|
|
|
@@ -1668,10 +1798,13 @@ class SerienstreamPlugin(BasisPlugin):
|
|
|
|
|
info.update(parsed_info)
|
|
|
|
|
if parsed_art:
|
|
|
|
|
art.update(parsed_art)
|
|
|
|
|
loaded_full = True
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
self._series_metadata_cache[cache_key] = (dict(info), dict(art))
|
|
|
|
|
if loaded_full:
|
|
|
|
|
self._series_metadata_full.add(cache_key)
|
|
|
|
|
return info, art, None
|
|
|
|
|
|
|
|
|
|
def series_url_for_title(self, title: str) -> str:
|
|
|
|
|
@@ -1742,6 +1875,8 @@ class SerienstreamPlugin(BasisPlugin):
|
|
|
|
|
self._season_links_cache.clear()
|
|
|
|
|
self._episode_label_cache.clear()
|
|
|
|
|
self._catalog_cache = None
|
|
|
|
|
self._series_metadata_cache.clear()
|
|
|
|
|
self._series_metadata_full.clear()
|
|
|
|
|
return []
|
|
|
|
|
if not self._requests_available:
|
|
|
|
|
raise RuntimeError("SerienstreamPlugin kann ohne requests/bs4 nicht suchen.")
|
|
|
|
|
@@ -1755,6 +1890,8 @@ class SerienstreamPlugin(BasisPlugin):
|
|
|
|
|
self._season_cache.clear()
|
|
|
|
|
self._episode_label_cache.clear()
|
|
|
|
|
self._catalog_cache = None
|
|
|
|
|
self._series_metadata_cache.clear()
|
|
|
|
|
self._series_metadata_full.clear()
|
|
|
|
|
raise RuntimeError(f"Serienstream-Suche fehlgeschlagen: {exc}") from exc
|
|
|
|
|
self._series_results = {}
|
|
|
|
|
for result in results:
|
|
|
|
|
|