"""HDFilme Plugin für ViewIT. HTML-Scraping von hdfilme.garden. Filme und Serien, Hoster-Auflösung via ResolveURL. Hinweis: Die Domain ändert sich gelegentlich – als DOMAIN-Konstante konfigurierbar. """ from __future__ import annotations import re from typing import Any, Callable, List, Optional from urllib.parse import quote_plus try: # pragma: no cover import requests except ImportError as exc: # pragma: no cover requests = None REQUESTS_AVAILABLE = False REQUESTS_IMPORT_ERROR = exc else: REQUESTS_AVAILABLE = True REQUESTS_IMPORT_ERROR = None from plugin_interface import BasisPlugin # --------------------------------------------------------------------------- # Konstanten # --------------------------------------------------------------------------- DOMAIN = "hdfilme.garden" BASE_URL = "https://" + DOMAIN DEFAULT_TIMEOUT = 20 HEADERS = { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "de-DE,de;q=0.9,en;q=0.8", "Referer": BASE_URL + "/", } _URL_SEARCH = BASE_URL + "/index.php?do=search&subaction=search&story={query}" _URL_NEW = BASE_URL + "/kinofilme-online/" _URL_SERIES = BASE_URL + "/serienstream-deutsch/" # HTML-Parsing-Muster _RE_ENTRIES = re.compile( r'
]*>([^<]+).*?data-src="([^"]+)', re.DOTALL, ) _RE_EPISODES = re.compile(r'>([^<]+)') _RE_HOSTERS = re.compile(r'link="([^"]+)"') _RE_THUMB_STANDALONE = re.compile(r'data-src="([^"]+)"') _SKIP_HOSTERS = {"youtube", "dropload"} ProgressCallback = Optional[Callable[[str, Optional[int]], Any]] # --------------------------------------------------------------------------- # Plugin-Klasse # --------------------------------------------------------------------------- class HDFilmePlugin(BasisPlugin): """HDFilme Integration für ViewIT (hdfilme.garden).""" name = "HDFilme" def __init__(self) -> None: # title → Detail-Page-URL self._title_to_url: dict[str, str] = {} # title → (plot, poster, fanart) self._title_meta: dict[str, tuple[str, str, str]] = {} # title → True wenn Serie self._is_series: dict[str, bool] = {} # ------------------------------------------------------------------ # Verfügbarkeit # ------------------------------------------------------------------ @property def is_available(self) -> bool: return REQUESTS_AVAILABLE @property def unavailable_reason(self) -> str: if REQUESTS_AVAILABLE: return "" return f"requests nicht verfügbar: {REQUESTS_IMPORT_ERROR}" # ------------------------------------------------------------------ # HTTP # ------------------------------------------------------------------ def _get_session(self): # type: ignore[return] from http_session_pool import get_requests_session return get_requests_session("hdfilme", headers=HEADERS) def _get_html(self, url: str) -> str: session = self._get_session() response = None try: response = session.get(url, headers=HEADERS, timeout=DEFAULT_TIMEOUT) response.raise_for_status() return response.text except Exception: return "" finally: if response is not None: try: response.close() except Exception: pass # ------------------------------------------------------------------ # Interne Hilfsmethoden # ------------------------------------------------------------------ def _parse_entries(self, html: str) -> List[str]: """Parst Ergebnisseite und cached Einträge. Gibt Titelliste zurück.""" titles: list[str] = [] for m in _RE_ENTRIES.finditer(html): raw_url, raw_title, raw_thumb = m.group(1), m.group(2), m.group(3) title = raw_title.strip() if not title: continue # Absolute URL sicherstellen url = raw_url.strip() if url.startswith("/"): url = BASE_URL + url if not url.startswith("http"): continue thumb = raw_thumb.strip() if thumb.startswith("/"): thumb = BASE_URL + thumb is_series = "taffel" in title # "Staffel" (xStream-Konvention) self._title_to_url[title] = url self._is_series[title] = is_series self._title_meta[title] = ("", thumb, "") titles.append(title) return titles def _get_hoster_links(self, html: str, episode: str = "") -> List[str]: """Extrahiert Hoster-URLs aus HTML, optional nach Episode gefiltert.""" search_area = html if episode: # Episode-Abschnitt isolieren m = re.search(re.escape(episode) + r"<.*?", html, re.DOTALL) if m: search_area = m.group(0) links: list[str] = [] for m in _RE_HOSTERS.finditer(search_area): link = m.group(1).strip() if not link: continue if link.startswith("//"): link = "https:" + link name = link.split("//")[-1].split(".")[0].lower() if name in _SKIP_HOSTERS: continue links.append(link) return links # ------------------------------------------------------------------ # Pflicht-Methoden # ------------------------------------------------------------------ async def search_titles( self, query: str, progress_callback: ProgressCallback = None ) -> List[str]: query = (query or "").strip() if not query or not REQUESTS_AVAILABLE: return [] url = _URL_SEARCH.format(query=quote_plus(query)) html = self._get_html(url) if not html: return [] # Suche filtert clientseitig nach Titel q_lower = query.lower() all_titles = self._parse_entries(html) return [t for t in all_titles if q_lower in t.lower()] def seasons_for(self, title: str) -> List[str]: title = (title or "").strip() if not title: return [] if self._is_series.get(title): # Staffelnummer aus Titel ableiten, falls vorhanden m = re.search(r"Staffel\s*(\d+)", title, re.IGNORECASE) if m: return [f"Staffel {m.group(1)}"] return ["Staffel 1"] return ["Film"] def episodes_for(self, title: str, season: str) -> List[str]: title = (title or "").strip() if not title: return [] if season == "Film": return [title] url = self._title_to_url.get(title, "") if not url: return [] html = self._get_html(url) if not html: return [title] episodes = _RE_EPISODES.findall(html) return [ep.strip() for ep in episodes if ep.strip()] or [title] # ------------------------------------------------------------------ # Stream # ------------------------------------------------------------------ def stream_link_for( self, title: str, season: str, episode: str ) -> Optional[str]: title = (title or "").strip() url = self._title_to_url.get(title, "") if not url: return None html = self._get_html(url) if not html: return None # Für Serien: nach Episode-Abschnitt filtern (wenn episode != title) ep_filter = "" if (season == "Film" or episode == title) else episode links = self._get_hoster_links(html, ep_filter) return links[0] if links else None def resolve_stream_link(self, link: str) -> Optional[str]: link = (link or "").strip() if not link: return None try: from plugin_helpers import resolve_via_resolveurl return resolve_via_resolveurl(link, fallback_to_link=False) except Exception: return None # ------------------------------------------------------------------ # Metadaten # ------------------------------------------------------------------ def metadata_for( self, title: str ) -> tuple[dict[str, str], dict[str, str], list | None]: title = (title or "").strip() if not title: return {}, {}, None info: dict[str, str] = {"title": title} art: dict[str, str] = {} cached = self._title_meta.get(title) if cached: plot, poster, fanart = cached if plot: info["plot"] = plot if poster: art["thumb"] = poster art["poster"] = poster if fanart: art["fanart"] = fanart return info, art, None # ------------------------------------------------------------------ # Browsing # ------------------------------------------------------------------ def latest_titles(self, page: int = 1) -> List[str]: html = self._get_html(_URL_NEW) return self._parse_entries(html) if html else [] def popular_series(self) -> List[str]: html = self._get_html(_URL_SERIES) return self._parse_entries(html) if html else [] def capabilities(self) -> set[str]: return {"latest_titles", "popular_series"}