nightly: fix movie search flow and add source metadata fallbacks

This commit is contained in:
2026-02-23 17:52:44 +01:00
parent d414fac022
commit d5a1125e03
4 changed files with 445 additions and 62 deletions

View File

@@ -97,6 +97,7 @@ class SearchHit:
title: str
url: str
description: str = ""
poster: str = ""
def _normalize_search_text(value: str) -> str:
@@ -149,6 +150,7 @@ class TopstreamfilmPlugin(BasisPlugin):
self._season_to_episode_numbers: Dict[tuple[str, str], List[int]] = {}
self._episode_title_by_number: Dict[tuple[str, int, int], str] = {}
self._detail_html_cache: Dict[str, str] = {}
self._title_meta: Dict[str, tuple[str, str]] = {}
self._popular_cache: List[str] | None = None
self._default_preferred_hosters: List[str] = list(DEFAULT_PREFERRED_HOSTERS)
self._preferred_hosters: List[str] = list(self._default_preferred_hosters)
@@ -429,6 +431,7 @@ class TopstreamfilmPlugin(BasisPlugin):
continue
seen.add(hit.title)
self._title_to_url[hit.title] = hit.url
self._store_title_meta(hit.title, plot=hit.description, poster=hit.poster)
titles.append(hit.title)
if titles:
self._save_title_url_cache()
@@ -487,6 +490,69 @@ class TopstreamfilmPlugin(BasisPlugin):
except Exception:
return ""
def _pick_image_from_node(self, node: Any) -> str:
if node is None:
return ""
image = node.select_one("img")
if image is None:
return ""
for attr in ("data-src", "src"):
value = (image.get(attr) or "").strip()
if value and "lazy_placeholder" not in value.casefold():
return self._absolute_external_url(value, base=self._get_base_url())
srcset = (image.get("data-srcset") or image.get("srcset") or "").strip()
if srcset:
first = srcset.split(",")[0].strip().split(" ", 1)[0].strip()
if first:
return self._absolute_external_url(first, base=self._get_base_url())
return ""
def _store_title_meta(self, title: str, *, plot: str = "", poster: str = "") -> None:
title = (title or "").strip()
if not title:
return
old_plot, old_poster = self._title_meta.get(title, ("", ""))
merged_plot = (plot or old_plot or "").strip()
merged_poster = (poster or old_poster or "").strip()
self._title_meta[title] = (merged_plot, merged_poster)
def _extract_detail_metadata(self, soup: BeautifulSoupT) -> tuple[str, str]:
if not soup:
return "", ""
plot = ""
poster = ""
for selector in ("meta[property='og:description']", "meta[name='description']"):
node = soup.select_one(selector)
if node is None:
continue
content = (node.get("content") or "").strip()
if content:
plot = content
break
if not plot:
candidates: list[str] = []
for paragraph in soup.select("article p, .TPost p, .Description p, .entry-content p"):
text = (paragraph.get_text(" ", strip=True) or "").strip()
if len(text) >= 60:
candidates.append(text)
if candidates:
plot = max(candidates, key=len)
for selector in ("meta[property='og:image']", "meta[name='twitter:image']"):
node = soup.select_one(selector)
if node is None:
continue
content = (node.get("content") or "").strip()
if content:
poster = self._absolute_external_url(content, base=self._get_base_url())
break
if not poster:
for selector in ("article", ".TPost", ".entry-content"):
poster = self._pick_image_from_node(soup.select_one(selector))
if poster:
break
return plot, poster
def _clear_stream_index_for_title(self, title: str) -> None:
for key in list(self._season_to_episode_numbers.keys()):
if key[0] == title:
@@ -721,7 +787,17 @@ class TopstreamfilmPlugin(BasisPlugin):
continue
if is_movie_hint:
self._movie_title_hint.add(title)
hits.append(SearchHit(title=title, url=self._absolute_url(href), description=""))
description_tag = item.select_one(".TPMvCn .Description, .Description, .entry-summary")
description = (description_tag.get_text(" ", strip=True) or "").strip() if description_tag else ""
poster = self._pick_image_from_node(item)
hits.append(
SearchHit(
title=title,
url=self._absolute_url(href),
description=description,
poster=poster,
)
)
return hits
def is_movie(self, title: str) -> bool:
@@ -794,6 +870,7 @@ class TopstreamfilmPlugin(BasisPlugin):
continue
seen.add(hit.title)
self._title_to_url[hit.title] = hit.url
self._store_title_meta(hit.title, plot=hit.description, poster=hit.poster)
titles.append(hit.title)
if titles:
self._save_title_url_cache()
@@ -905,7 +982,8 @@ class TopstreamfilmPlugin(BasisPlugin):
self._movie_title_hint.add(title)
description_tag = item.select_one(".TPMvCn .Description")
description = description_tag.get_text(" ", strip=True) if description_tag else ""
hit = SearchHit(title=title, url=self._absolute_url(href), description=description)
poster = self._pick_image_from_node(item)
hit = SearchHit(title=title, url=self._absolute_url(href), description=description, poster=poster)
if _matches_query(query, title=hit.title, description=hit.description):
hits.append(hit)
@@ -918,11 +996,41 @@ class TopstreamfilmPlugin(BasisPlugin):
continue
seen.add(hit.title)
self._title_to_url[hit.title] = hit.url
self._store_title_meta(hit.title, plot=hit.description, poster=hit.poster)
titles.append(hit.title)
self._save_title_url_cache()
_emit_progress(progress_callback, f"Fertig: {len(titles)} Treffer", 95)
return titles
def metadata_for(self, title: str) -> tuple[dict[str, str], dict[str, str], list[object] | None]:
title = (title or "").strip()
if not title:
return {}, {}, None
info: dict[str, str] = {"title": title}
art: dict[str, str] = {}
cached_plot, cached_poster = self._title_meta.get(title, ("", ""))
if cached_plot:
info["plot"] = cached_plot
if cached_poster:
art = {"thumb": cached_poster, "poster": cached_poster}
if "plot" in info and art:
return info, art, None
soup = self._get_detail_soup(title)
if soup is None:
return info, art, None
plot, poster = self._extract_detail_metadata(soup)
if plot:
info["plot"] = plot
if poster:
art = {"thumb": poster, "poster": poster}
self._store_title_meta(title, plot=plot, poster=poster)
return info, art, None
def genres(self) -> List[str]:
if not REQUESTS_AVAILABLE or BeautifulSoup is None:
return []