From 951e99cb4cbbd8f1d5239f55fd28853b371e08bb Mon Sep 17 00:00:00 2001 From: "itdrui.de" Date: Mon, 2 Feb 2026 23:13:23 +0100 Subject: [PATCH] Add Filmpalast genre browsing and paged genre titles --- addon/plugins/filmpalast_plugin.py | 131 ++++++++++++++++++++++++++++- 1 file changed, 129 insertions(+), 2 deletions(-) diff --git a/addon/plugins/filmpalast_plugin.py b/addon/plugins/filmpalast_plugin.py index be35dde..958907f 100644 --- a/addon/plugins/filmpalast_plugin.py +++ b/addon/plugins/filmpalast_plugin.py @@ -10,6 +10,7 @@ from __future__ import annotations from dataclasses import dataclass import re from urllib.parse import quote, urlencode +from urllib.parse import urljoin from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, TypeAlias try: # pragma: no cover - optional dependency @@ -223,6 +224,8 @@ class FilmpalastPlugin(BasisPlugin): self._title_to_url: Dict[str, str] = {} self._series_entries: Dict[str, Dict[int, Dict[int, EpisodeEntry]]] = {} self._hoster_cache: Dict[str, Dict[str, str]] = {} + self._genre_to_url: Dict[str, str] = {} + self._genre_page_count_cache: Dict[str, int] = {} self._requests_available = REQUESTS_AVAILABLE self._default_preferred_hosters: List[str] = list(DEFAULT_PREFERRED_HOSTERS) self._preferred_hosters: List[str] = list(self._default_preferred_hosters) @@ -391,8 +394,41 @@ class FilmpalastPlugin(BasisPlugin): return hits - async def search_titles(self, query: str) -> List[str]: - hits = self._search_hits(query) + def _parse_listing_hits(self, soup: BeautifulSoupT, *, query: str = "") -> List[SearchHit]: + hits: List[SearchHit] = [] + if not soup: + return hits + seen_titles: set[str] = set() + seen_urls: set[str] = set() + anchors = soup.select("article.liste h2 a[href], article.liste h3 a[href]") + if not anchors: + anchors = soup.select("a[href*='/stream/'][title], a[href*='/stream/']") + for anchor in anchors: + href = (anchor.get("href") or "").strip() + if not href: + continue + url = _absolute_url(href).split("#", 1)[0].split("?", 1)[0].rstrip("/") + if not _is_probably_content_url(url): + continue + + title = (anchor.get("title") or anchor.get_text(" ", strip=True)).strip() + if not title: + continue + if title.casefold() in {"details/play", "play", "details"}: + continue + if query and not _matches_query(query, title=title): + continue + title_key = title.casefold() + url_key = url.casefold() + if title_key in seen_titles or url_key in seen_urls: + continue + seen_titles.add(title_key) + seen_urls.add(url_key) + _log_url_event(url, kind="PARSE") + hits.append(SearchHit(title=title, url=url)) + return hits + + def _apply_hits_to_title_index(self, hits: List[SearchHit]) -> List[str]: self._title_to_url = {} self._series_entries = {} self._hoster_cache.clear() @@ -425,6 +461,97 @@ class FilmpalastPlugin(BasisPlugin): titles.sort(key=lambda value: value.casefold()) return titles + async def search_titles(self, query: str) -> List[str]: + hits = self._search_hits(query) + return self._apply_hits_to_title_index(hits) + + def _parse_genres(self, soup: BeautifulSoupT) -> Dict[str, str]: + genres: Dict[str, str] = {} + if not soup: + return genres + for anchor in soup.select("section#genre a[href], #genre a[href], aside #genre a[href]"): + name = (anchor.get_text(" ", strip=True) or "").strip() + href = (anchor.get("href") or "").strip() + if not name or not href: + continue + if "/search/genre/" not in href: + continue + genres[name] = _absolute_url(href) + return genres + + def _extract_last_page(self, soup: BeautifulSoupT) -> int: + max_page = 1 + if not soup: + return max_page + for anchor in soup.select("#paging a[href], .paging a[href], a.pageing[href]"): + text = (anchor.get_text(" ", strip=True) or "").strip() + for candidate in (text, (anchor.get("href") or "").strip()): + for value in re.findall(r"(\d+)", candidate): + try: + max_page = max(max_page, int(value)) + except Exception: + continue + return max_page + + def capabilities(self) -> set[str]: + return {"genres"} + + def genres(self) -> List[str]: + if not self._requests_available: + return [] + if self._genre_to_url: + return sorted(self._genre_to_url.keys(), key=lambda value: value.casefold()) + try: + soup = _get_soup(_absolute_url("/"), session=get_requests_session("filmpalast", headers=HEADERS)) + except Exception: + return [] + parsed = self._parse_genres(soup) + if parsed: + self._genre_to_url = dict(parsed) + return sorted(self._genre_to_url.keys(), key=lambda value: value.casefold()) + + def genre_page_count(self, genre: str) -> int: + genre = (genre or "").strip() + if not genre: + return 1 + if genre in self._genre_page_count_cache: + return max(1, int(self._genre_page_count_cache.get(genre, 1))) + if not self._genre_to_url: + self.genres() + base_url = self._genre_to_url.get(genre, "") + if not base_url: + return 1 + try: + soup = _get_soup(base_url, session=get_requests_session("filmpalast", headers=HEADERS)) + except Exception: + return 1 + pages = self._extract_last_page(soup) + self._genre_page_count_cache[genre] = max(1, pages) + return self._genre_page_count_cache[genre] + + def titles_for_genre_page(self, genre: str, page: int) -> List[str]: + genre = (genre or "").strip() + if not genre or not self._requests_available: + return [] + if not self._genre_to_url: + self.genres() + base_url = self._genre_to_url.get(genre, "") + if not base_url: + return [] + page = max(1, int(page or 1)) + url = base_url if page == 1 else urljoin(base_url.rstrip("/") + "/", f"page/{page}") + try: + soup = _get_soup(url, session=get_requests_session("filmpalast", headers=HEADERS)) + except Exception: + return [] + hits = self._parse_listing_hits(soup) + return self._apply_hits_to_title_index(hits) + + def titles_for_genre(self, genre: str) -> List[str]: + titles = self.titles_for_genre_page(genre, 1) + titles.sort(key=lambda value: value.casefold()) + return titles + def _ensure_title_url(self, title: str) -> str: title = (title or "").strip() if not title: