Add Filmpalast genre browsing and paged genre titles

This commit is contained in:
2026-02-02 23:13:23 +01:00
parent 4f7b0eba0c
commit 951e99cb4c

View File

@@ -10,6 +10,7 @@ from __future__ import annotations
from dataclasses import dataclass from dataclasses import dataclass
import re import re
from urllib.parse import quote, urlencode from urllib.parse import quote, urlencode
from urllib.parse import urljoin
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, TypeAlias from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, TypeAlias
try: # pragma: no cover - optional dependency try: # pragma: no cover - optional dependency
@@ -223,6 +224,8 @@ class FilmpalastPlugin(BasisPlugin):
self._title_to_url: Dict[str, str] = {} self._title_to_url: Dict[str, str] = {}
self._series_entries: Dict[str, Dict[int, Dict[int, EpisodeEntry]]] = {} self._series_entries: Dict[str, Dict[int, Dict[int, EpisodeEntry]]] = {}
self._hoster_cache: Dict[str, Dict[str, str]] = {} self._hoster_cache: Dict[str, Dict[str, str]] = {}
self._genre_to_url: Dict[str, str] = {}
self._genre_page_count_cache: Dict[str, int] = {}
self._requests_available = REQUESTS_AVAILABLE self._requests_available = REQUESTS_AVAILABLE
self._default_preferred_hosters: List[str] = list(DEFAULT_PREFERRED_HOSTERS) self._default_preferred_hosters: List[str] = list(DEFAULT_PREFERRED_HOSTERS)
self._preferred_hosters: List[str] = list(self._default_preferred_hosters) self._preferred_hosters: List[str] = list(self._default_preferred_hosters)
@@ -391,8 +394,41 @@ class FilmpalastPlugin(BasisPlugin):
return hits return hits
async def search_titles(self, query: str) -> List[str]: def _parse_listing_hits(self, soup: BeautifulSoupT, *, query: str = "") -> List[SearchHit]:
hits = self._search_hits(query) hits: List[SearchHit] = []
if not soup:
return hits
seen_titles: set[str] = set()
seen_urls: set[str] = set()
anchors = soup.select("article.liste h2 a[href], article.liste h3 a[href]")
if not anchors:
anchors = soup.select("a[href*='/stream/'][title], a[href*='/stream/']")
for anchor in anchors:
href = (anchor.get("href") or "").strip()
if not href:
continue
url = _absolute_url(href).split("#", 1)[0].split("?", 1)[0].rstrip("/")
if not _is_probably_content_url(url):
continue
title = (anchor.get("title") or anchor.get_text(" ", strip=True)).strip()
if not title:
continue
if title.casefold() in {"details/play", "play", "details"}:
continue
if query and not _matches_query(query, title=title):
continue
title_key = title.casefold()
url_key = url.casefold()
if title_key in seen_titles or url_key in seen_urls:
continue
seen_titles.add(title_key)
seen_urls.add(url_key)
_log_url_event(url, kind="PARSE")
hits.append(SearchHit(title=title, url=url))
return hits
def _apply_hits_to_title_index(self, hits: List[SearchHit]) -> List[str]:
self._title_to_url = {} self._title_to_url = {}
self._series_entries = {} self._series_entries = {}
self._hoster_cache.clear() self._hoster_cache.clear()
@@ -425,6 +461,97 @@ class FilmpalastPlugin(BasisPlugin):
titles.sort(key=lambda value: value.casefold()) titles.sort(key=lambda value: value.casefold())
return titles return titles
async def search_titles(self, query: str) -> List[str]:
hits = self._search_hits(query)
return self._apply_hits_to_title_index(hits)
def _parse_genres(self, soup: BeautifulSoupT) -> Dict[str, str]:
genres: Dict[str, str] = {}
if not soup:
return genres
for anchor in soup.select("section#genre a[href], #genre a[href], aside #genre a[href]"):
name = (anchor.get_text(" ", strip=True) or "").strip()
href = (anchor.get("href") or "").strip()
if not name or not href:
continue
if "/search/genre/" not in href:
continue
genres[name] = _absolute_url(href)
return genres
def _extract_last_page(self, soup: BeautifulSoupT) -> int:
max_page = 1
if not soup:
return max_page
for anchor in soup.select("#paging a[href], .paging a[href], a.pageing[href]"):
text = (anchor.get_text(" ", strip=True) or "").strip()
for candidate in (text, (anchor.get("href") or "").strip()):
for value in re.findall(r"(\d+)", candidate):
try:
max_page = max(max_page, int(value))
except Exception:
continue
return max_page
def capabilities(self) -> set[str]:
return {"genres"}
def genres(self) -> List[str]:
if not self._requests_available:
return []
if self._genre_to_url:
return sorted(self._genre_to_url.keys(), key=lambda value: value.casefold())
try:
soup = _get_soup(_absolute_url("/"), session=get_requests_session("filmpalast", headers=HEADERS))
except Exception:
return []
parsed = self._parse_genres(soup)
if parsed:
self._genre_to_url = dict(parsed)
return sorted(self._genre_to_url.keys(), key=lambda value: value.casefold())
def genre_page_count(self, genre: str) -> int:
genre = (genre or "").strip()
if not genre:
return 1
if genre in self._genre_page_count_cache:
return max(1, int(self._genre_page_count_cache.get(genre, 1)))
if not self._genre_to_url:
self.genres()
base_url = self._genre_to_url.get(genre, "")
if not base_url:
return 1
try:
soup = _get_soup(base_url, session=get_requests_session("filmpalast", headers=HEADERS))
except Exception:
return 1
pages = self._extract_last_page(soup)
self._genre_page_count_cache[genre] = max(1, pages)
return self._genre_page_count_cache[genre]
def titles_for_genre_page(self, genre: str, page: int) -> List[str]:
genre = (genre or "").strip()
if not genre or not self._requests_available:
return []
if not self._genre_to_url:
self.genres()
base_url = self._genre_to_url.get(genre, "")
if not base_url:
return []
page = max(1, int(page or 1))
url = base_url if page == 1 else urljoin(base_url.rstrip("/") + "/", f"page/{page}")
try:
soup = _get_soup(url, session=get_requests_session("filmpalast", headers=HEADERS))
except Exception:
return []
hits = self._parse_listing_hits(soup)
return self._apply_hits_to_title_index(hits)
def titles_for_genre(self, genre: str) -> List[str]:
titles = self.titles_for_genre_page(genre, 1)
titles.sort(key=lambda value: value.casefold())
return titles
def _ensure_title_url(self, title: str) -> str: def _ensure_title_url(self, title: str) -> str:
title = (title or "").strip() title = (title or "").strip()
if not title: if not title: