Add Filmpalast genre browsing and paged genre titles
This commit is contained in:
@@ -10,6 +10,7 @@ from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
import re
|
||||
from urllib.parse import quote, urlencode
|
||||
from urllib.parse import urljoin
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, TypeAlias
|
||||
|
||||
try: # pragma: no cover - optional dependency
|
||||
@@ -223,6 +224,8 @@ class FilmpalastPlugin(BasisPlugin):
|
||||
self._title_to_url: Dict[str, str] = {}
|
||||
self._series_entries: Dict[str, Dict[int, Dict[int, EpisodeEntry]]] = {}
|
||||
self._hoster_cache: Dict[str, Dict[str, str]] = {}
|
||||
self._genre_to_url: Dict[str, str] = {}
|
||||
self._genre_page_count_cache: Dict[str, int] = {}
|
||||
self._requests_available = REQUESTS_AVAILABLE
|
||||
self._default_preferred_hosters: List[str] = list(DEFAULT_PREFERRED_HOSTERS)
|
||||
self._preferred_hosters: List[str] = list(self._default_preferred_hosters)
|
||||
@@ -391,8 +394,41 @@ class FilmpalastPlugin(BasisPlugin):
|
||||
|
||||
return hits
|
||||
|
||||
async def search_titles(self, query: str) -> List[str]:
|
||||
hits = self._search_hits(query)
|
||||
def _parse_listing_hits(self, soup: BeautifulSoupT, *, query: str = "") -> List[SearchHit]:
|
||||
hits: List[SearchHit] = []
|
||||
if not soup:
|
||||
return hits
|
||||
seen_titles: set[str] = set()
|
||||
seen_urls: set[str] = set()
|
||||
anchors = soup.select("article.liste h2 a[href], article.liste h3 a[href]")
|
||||
if not anchors:
|
||||
anchors = soup.select("a[href*='/stream/'][title], a[href*='/stream/']")
|
||||
for anchor in anchors:
|
||||
href = (anchor.get("href") or "").strip()
|
||||
if not href:
|
||||
continue
|
||||
url = _absolute_url(href).split("#", 1)[0].split("?", 1)[0].rstrip("/")
|
||||
if not _is_probably_content_url(url):
|
||||
continue
|
||||
|
||||
title = (anchor.get("title") or anchor.get_text(" ", strip=True)).strip()
|
||||
if not title:
|
||||
continue
|
||||
if title.casefold() in {"details/play", "play", "details"}:
|
||||
continue
|
||||
if query and not _matches_query(query, title=title):
|
||||
continue
|
||||
title_key = title.casefold()
|
||||
url_key = url.casefold()
|
||||
if title_key in seen_titles or url_key in seen_urls:
|
||||
continue
|
||||
seen_titles.add(title_key)
|
||||
seen_urls.add(url_key)
|
||||
_log_url_event(url, kind="PARSE")
|
||||
hits.append(SearchHit(title=title, url=url))
|
||||
return hits
|
||||
|
||||
def _apply_hits_to_title_index(self, hits: List[SearchHit]) -> List[str]:
|
||||
self._title_to_url = {}
|
||||
self._series_entries = {}
|
||||
self._hoster_cache.clear()
|
||||
@@ -425,6 +461,97 @@ class FilmpalastPlugin(BasisPlugin):
|
||||
titles.sort(key=lambda value: value.casefold())
|
||||
return titles
|
||||
|
||||
async def search_titles(self, query: str) -> List[str]:
|
||||
hits = self._search_hits(query)
|
||||
return self._apply_hits_to_title_index(hits)
|
||||
|
||||
def _parse_genres(self, soup: BeautifulSoupT) -> Dict[str, str]:
|
||||
genres: Dict[str, str] = {}
|
||||
if not soup:
|
||||
return genres
|
||||
for anchor in soup.select("section#genre a[href], #genre a[href], aside #genre a[href]"):
|
||||
name = (anchor.get_text(" ", strip=True) or "").strip()
|
||||
href = (anchor.get("href") or "").strip()
|
||||
if not name or not href:
|
||||
continue
|
||||
if "/search/genre/" not in href:
|
||||
continue
|
||||
genres[name] = _absolute_url(href)
|
||||
return genres
|
||||
|
||||
def _extract_last_page(self, soup: BeautifulSoupT) -> int:
|
||||
max_page = 1
|
||||
if not soup:
|
||||
return max_page
|
||||
for anchor in soup.select("#paging a[href], .paging a[href], a.pageing[href]"):
|
||||
text = (anchor.get_text(" ", strip=True) or "").strip()
|
||||
for candidate in (text, (anchor.get("href") or "").strip()):
|
||||
for value in re.findall(r"(\d+)", candidate):
|
||||
try:
|
||||
max_page = max(max_page, int(value))
|
||||
except Exception:
|
||||
continue
|
||||
return max_page
|
||||
|
||||
def capabilities(self) -> set[str]:
|
||||
return {"genres"}
|
||||
|
||||
def genres(self) -> List[str]:
|
||||
if not self._requests_available:
|
||||
return []
|
||||
if self._genre_to_url:
|
||||
return sorted(self._genre_to_url.keys(), key=lambda value: value.casefold())
|
||||
try:
|
||||
soup = _get_soup(_absolute_url("/"), session=get_requests_session("filmpalast", headers=HEADERS))
|
||||
except Exception:
|
||||
return []
|
||||
parsed = self._parse_genres(soup)
|
||||
if parsed:
|
||||
self._genre_to_url = dict(parsed)
|
||||
return sorted(self._genre_to_url.keys(), key=lambda value: value.casefold())
|
||||
|
||||
def genre_page_count(self, genre: str) -> int:
|
||||
genre = (genre or "").strip()
|
||||
if not genre:
|
||||
return 1
|
||||
if genre in self._genre_page_count_cache:
|
||||
return max(1, int(self._genre_page_count_cache.get(genre, 1)))
|
||||
if not self._genre_to_url:
|
||||
self.genres()
|
||||
base_url = self._genre_to_url.get(genre, "")
|
||||
if not base_url:
|
||||
return 1
|
||||
try:
|
||||
soup = _get_soup(base_url, session=get_requests_session("filmpalast", headers=HEADERS))
|
||||
except Exception:
|
||||
return 1
|
||||
pages = self._extract_last_page(soup)
|
||||
self._genre_page_count_cache[genre] = max(1, pages)
|
||||
return self._genre_page_count_cache[genre]
|
||||
|
||||
def titles_for_genre_page(self, genre: str, page: int) -> List[str]:
|
||||
genre = (genre or "").strip()
|
||||
if not genre or not self._requests_available:
|
||||
return []
|
||||
if not self._genre_to_url:
|
||||
self.genres()
|
||||
base_url = self._genre_to_url.get(genre, "")
|
||||
if not base_url:
|
||||
return []
|
||||
page = max(1, int(page or 1))
|
||||
url = base_url if page == 1 else urljoin(base_url.rstrip("/") + "/", f"page/{page}")
|
||||
try:
|
||||
soup = _get_soup(url, session=get_requests_session("filmpalast", headers=HEADERS))
|
||||
except Exception:
|
||||
return []
|
||||
hits = self._parse_listing_hits(soup)
|
||||
return self._apply_hits_to_title_index(hits)
|
||||
|
||||
def titles_for_genre(self, genre: str) -> List[str]:
|
||||
titles = self.titles_for_genre_page(genre, 1)
|
||||
titles.sort(key=lambda value: value.casefold())
|
||||
return titles
|
||||
|
||||
def _ensure_title_url(self, title: str) -> str:
|
||||
title = (title or "").strip()
|
||||
if not title:
|
||||
|
||||
Reference in New Issue
Block a user