"""HDFilme Plugin für ViewIT. HTML-Scraping von hdfilme-tv.cc (ehemals hdfilme.garden). Filme und Serien, Hoster-Auflösung via ResolveURL. """ from __future__ import annotations import re from typing import Any, Callable, List, Optional from urllib.parse import quote_plus try: # pragma: no cover import requests from bs4 import BeautifulSoup except ImportError as exc: # pragma: no cover requests = None BeautifulSoup = None REQUESTS_AVAILABLE = False REQUESTS_IMPORT_ERROR = exc else: REQUESTS_AVAILABLE = True REQUESTS_IMPORT_ERROR = None from plugin_interface import BasisPlugin # --------------------------------------------------------------------------- # Konstanten # --------------------------------------------------------------------------- BASE_URL = "https://hdfilme-tv.cc" DEFAULT_TIMEOUT = 20 HEADERS = { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "de-DE,de;q=0.9,en;q=0.8", "Connection": "keep-alive", } _URL_SEARCH = BASE_URL + "/?do=search&subaction=search&story={query}" _URL_NEW = BASE_URL + "/kinofilme-online/" _URL_SERIES = BASE_URL + "/serienstream-deutsch/" # Genre-Slug → URL-Pfad GENRE_SLUGS: dict[str, str] = { "Abenteuer": "abenteuer", "Action": "action", "Animation": "animation", "Biographie": "biographie", "Dokumentation": "dokumentation", "Drama": "drama", "Erotik": "erotikfilme", "Familie": "familie", "Fantasy": "fantasy", "Historienfilm": "historien", "Horror": "horror", "Komödie": "komodie", "Krieg": "krieg", "Krimi": "krimi", "Musikfilm": "musikfilme", "Mystery": "mystery", "Romantik": "romantik", "Sci-Fi": "sci-fi", "Sport": "sport", "Thriller": "thriller", "Western": "western", } # Hoster die übersprungen werden (kein Stream / nur Trailer) _SKIP_LINK_KEYWORDS = ("youtube.com", "youtu.be", "hdfilme-tv.cc") ProgressCallback = Optional[Callable[[str, Optional[int]], Any]] # --------------------------------------------------------------------------- # Hilfsfunktionen # --------------------------------------------------------------------------- def _absolute_url(url: str) -> str: """Macht eine relative oder protokoll-relative URL absolut.""" url = (url or "").strip() if url.startswith("//"): return "https:" + url if url.startswith("/"): return BASE_URL + url return url def _clean_title(raw: str) -> str: """Bereinigt einen Rohtitel von Seiten-Suffixen.""" title = (raw or "").strip() for suffix in (" stream", " Stream", " kostenlos", " Deutsch", " German", " online"): if title.endswith(suffix): title = title[: -len(suffix)].strip() return title def _get_soup(url: str) -> Any: """HTTP-GET und BeautifulSoup-Parsing. Gibt None bei Fehler.""" if requests is None or BeautifulSoup is None: return None try: response = requests.get(url, headers=HEADERS, timeout=DEFAULT_TIMEOUT) response.raise_for_status() return BeautifulSoup(response.text, "html.parser") except Exception: return None # --------------------------------------------------------------------------- # Plugin-Klasse # --------------------------------------------------------------------------- class HdfilmePlugin(BasisPlugin): """HDFilme Integration für ViewIT. HTML-Scraping via BeautifulSoup.""" name = "HDFilme" def __init__(self) -> None: self._title_to_url: dict[str, str] = {} self._is_series: dict[str, bool] = {} self._title_meta: dict[str, tuple[str, str]] = {} # title → (plot, poster) self._episode_cache: dict[str, list[str]] = {} # detail_url → episode labels self._preferred_hosters: list[str] = [] # ------------------------------------------------------------------ # Verfügbarkeit # ------------------------------------------------------------------ @property def is_available(self) -> bool: return REQUESTS_AVAILABLE @property def unavailable_reason(self) -> str: if REQUESTS_AVAILABLE: return "" return f"requests/bs4 nicht verfügbar: {REQUESTS_IMPORT_ERROR}" # ------------------------------------------------------------------ # Internes Parsing # ------------------------------------------------------------------ def _parse_entries(self, soup: Any) -> list[str]: """Parst eine Listing-Seite und gibt Titel zurück (cached).""" if soup is None: return [] titles: list[str] = [] seen: set[str] = set() for box in soup.select("div.box-product"): # URL aus erstem Link link = box.find("a", href=True) if not link: continue url = _absolute_url(link["href"]) if not url.endswith(".html"): continue # Titel aus h3 h3_a = box.select_one("h3 a") if not h3_a: continue raw_title = h3_a.get_text(strip=True) title = _clean_title(raw_title) if not title or title in seen: continue seen.add(title) # Thumbnail img = box.select_one("img.lazyload") poster = "" if img and img.get("data-src"): poster = _absolute_url(img["data-src"]) # Serien-Erkennung via Titel is_series = bool(re.search(r"\bStaffel\b|\bSeason\b", raw_title, re.I)) self._title_to_url[title] = url self._is_series[title] = is_series if poster: self._title_meta[title] = ("", poster) titles.append(title) return titles def _ensure_detail_url(self, title: str) -> str: """Gibt die Detail-URL für einen Titel zurück. Sucht zuerst im Cache, dann live über die Suchfunktion. """ url = self._title_to_url.get(title, "") if url: return url # Fallback: Live-Suche (nötig wenn Plugin-Instanz neu, Cache leer) search_url = _URL_SEARCH.format(query=quote_plus(title.strip())) soup = _get_soup(search_url) if soup: self._parse_entries(soup) url = self._title_to_url.get(title, "") return url def _get_detail_soup(self, title: str) -> Any: """Lädt die Detailseite eines Titels.""" url = self._ensure_detail_url(title) if not url: return None return _get_soup(url) def _extract_hoster_links(self, soup: Any, episode_id: str = "") -> dict[str, str]: """Extrahiert Hoster-Links aus einer Detailseite. Gibt dict {Hoster-Name → URL} zurück. episode_id: wenn gesetzt, nur Links aus dem `
  • ` Block. """ if soup is None: return {} hosters: dict[str, str] = {} if episode_id: container = soup.select_one(f"li#{episode_id}") if container is None: return {} candidates = container.select("a[data-link]") else: candidates = soup.select(".mirrors [data-link]") seen_names: set[str] = set() for el in candidates: href = _absolute_url((el.get("data-link") or "").strip()) if not href: continue if any(kw in href for kw in _SKIP_LINK_KEYWORDS): continue name = el.get_text(strip=True) or "Hoster" # Eindeutiger Name bei Duplikaten base_name = name i = 2 while name in seen_names: name = f"{base_name} {i}" i += 1 seen_names.add(name) hosters[name] = href return hosters def _staffel_nr(self, season: str) -> int: """Extrahiert die Staffelnummer aus einem Label wie 'Staffel 2'.""" m = re.search(r"\d+", season or "") return int(m.group()) if m else 1 def _ep_index(self, episode: str) -> int: """Extrahiert den Episode-Index aus einem Label wie 'Episode 3'.""" m = re.search(r"\d+", episode or "") return int(m.group()) if m else 1 # ------------------------------------------------------------------ # Pflicht-Methoden # ------------------------------------------------------------------ async def search_titles( self, query: str, progress_callback: ProgressCallback = None, ) -> List[str]: if not query or not REQUESTS_AVAILABLE: return [] url = _URL_SEARCH.format(query=quote_plus(query.strip())) soup = _get_soup(url) return self._parse_entries(soup) def seasons_for(self, title: str) -> List[str]: title = (title or "").strip() if not title: return [] if self._is_series.get(title) is False: return ["Film"] if self._is_series.get(title) is True: m = re.search(r"Staffel\s*(\d+)|Season\s*(\d+)", title, re.I) nr = int(m.group(1) or m.group(2)) if m else 1 return [f"Staffel {nr}"] # Unbekannt: Detailseite laden und prüfen soup = self._get_detail_soup(title) if soup and soup.select_one("div.series"): self._is_series[title] = True m = re.search(r"Staffel\s*(\d+)|Season\s*(\d+)", title, re.I) nr = int(m.group(1) or m.group(2)) if m else 1 return [f"Staffel {nr}"] self._is_series[title] = False return ["Film"] def episodes_for(self, title: str, season: str) -> List[str]: title = (title or "").strip() season = (season or "").strip() if not title: return [] if season == "Film": return [title] detail_url = self._ensure_detail_url(title) cached = self._episode_cache.get(detail_url) if cached is not None: return cached staffel_nr = self._staffel_nr(season) soup = self._get_detail_soup(title) if soup is None: return [title] # li IDs: "serie-{staffel}_{episode}" pattern = f"serie-{staffel_nr}_" episode_items = [li for li in soup.select("li[id]") if li.get("id", "").startswith(pattern)] labels: list[str] = [] for li in episode_items: ep_id = li.get("id", "") # z.B. "serie-1_3" ep_num_str = ep_id.split("_")[-1] # Episodentitel aus erstem a = li.find("a", href="#") if a: raw = a.get_text(strip=True) # "Episoden 3" → "Episode 3" ep_label = re.sub(r"^Episoden?\s*", "", raw, flags=re.I).strip() label = f"Episode {ep_label}" if ep_label else f"Episode {ep_num_str}" else: label = f"Episode {ep_num_str}" labels.append(label) result = labels if labels else [title] if detail_url: self._episode_cache[detail_url] = result return result def _hosters_for(self, title: str, season: str, episode: str) -> dict[str, str]: """Gibt alle verfügbaren Hoster {Name → URL} für Titel/Staffel/Episode zurück.""" soup = self._get_detail_soup(title) if soup is None: return {} if season == "Film" or not self._is_series.get(title, False): return self._extract_hoster_links(soup) staffel_nr = self._staffel_nr(season) ep_idx = self._ep_index(episode) episode_id = f"serie-{staffel_nr}_{ep_idx}" return self._extract_hoster_links(soup, episode_id) def available_hosters_for(self, title: str, season: str, episode: str) -> List[str]: return list(self._hosters_for(title, season, episode).keys()) def set_preferred_hosters(self, hosters: List[str]) -> None: self._preferred_hosters = [h for h in hosters if h] def stream_link_for(self, title: str, season: str, episode: str) -> Optional[str]: title = (title or "").strip() season = (season or "").strip() if not title: return None hosters = self._hosters_for(title, season, episode) if not hosters: return None # Bevorzugten Hoster nutzen falls gesetzt for preferred in self._preferred_hosters: key = preferred.casefold() for name, url in hosters.items(): if key in name.casefold() or key in url.casefold(): return url # Fallback: erster Hoster return next(iter(hosters.values())) def resolve_stream_link(self, link: str) -> Optional[str]: link = (link or "").strip() if not link: return None try: from plugin_helpers import resolve_via_resolveurl return resolve_via_resolveurl(link, fallback_to_link=False) except Exception: return None # ------------------------------------------------------------------ # Metadaten # ------------------------------------------------------------------ def metadata_for( self, title: str ) -> tuple[dict[str, str], dict[str, str], list[object] | None]: title = (title or "").strip() if not title: return {}, {}, None info: dict[str, str] = {"title": title} art: dict[str, str] = {} # Cache-Hit – nur zurückgeben wenn Plot vorhanden (sonst Detailseite laden) cached = self._title_meta.get(title) if cached: plot, poster = cached if plot: info["plot"] = plot if poster: art["thumb"] = art["poster"] = poster if plot: return info, art, None # Detailseite laden soup = self._get_detail_soup(title) if soup is None: return info, art, None og_desc = soup.find("meta", attrs={"property": "og:description"}) if og_desc and og_desc.get("content"): info["plot"] = og_desc["content"].strip() og_img = soup.find("meta", attrs={"property": "og:image"}) poster = "" if og_img and og_img.get("content"): poster = _absolute_url(og_img["content"].strip()) art["thumb"] = art["poster"] = poster # Jahr aus Textabschnitt "Titel YYYY" year_el = soup.select_one("p.text-capitalize") if year_el: m = re.search(r"\b(19|20)\d{2}\b", year_el.get_text()) if m: info["year"] = m.group() self._title_meta[title] = (info.get("plot", ""), poster) return info, art, None # ------------------------------------------------------------------ # Browsing # ------------------------------------------------------------------ def new_titles(self) -> List[str]: if not REQUESTS_AVAILABLE: return [] return self._parse_entries(_get_soup(_URL_NEW)) def new_titles_page(self, page: int = 1) -> List[str]: if not REQUESTS_AVAILABLE: return [] page = max(1, int(page or 1)) url = _URL_NEW if page == 1 else f"{_URL_NEW}page/{page}/" return self._parse_entries(_get_soup(url)) def popular_series(self) -> List[str]: if not REQUESTS_AVAILABLE: return [] return self._parse_entries(_get_soup(_URL_SERIES)) def genres(self) -> List[str]: return sorted(GENRE_SLUGS.keys()) def titles_for_genre(self, genre: str) -> List[str]: return self.titles_for_genre_page(genre, 1) def titles_for_genre_page(self, genre: str, page: int = 1) -> List[str]: slug = GENRE_SLUGS.get(genre, "") if not slug or not REQUESTS_AVAILABLE: return [] page = max(1, int(page or 1)) url = f"{BASE_URL}/{slug}/" if page == 1 else f"{BASE_URL}/{slug}/page/{page}/" return self._parse_entries(_get_soup(url)) def capabilities(self) -> set[str]: return {"new_titles", "popular_series", "genres"}