diff --git a/addon/addon.xml b/addon/addon.xml index d361f00..6687472 100644 --- a/addon/addon.xml +++ b/addon/addon.xml @@ -1,5 +1,5 @@ - + diff --git a/addon/plugins/hdfilme_plugin.py b/addon/plugins/hdfilme_plugin.py index 37f916c..1b62eff 100644 --- a/addon/plugins/hdfilme_plugin.py +++ b/addon/plugins/hdfilme_plugin.py @@ -1,9 +1,7 @@ """HDFilme Plugin für ViewIT. -HTML-Scraping von hdfilme.garden. +HTML-Scraping von hdfilme-tv.cc (ehemals hdfilme.garden). Filme und Serien, Hoster-Auflösung via ResolveURL. - -Hinweis: Die Domain ändert sich gelegentlich – als DOMAIN-Konstante konfigurierbar. """ from __future__ import annotations @@ -14,8 +12,10 @@ from urllib.parse import quote_plus try: # pragma: no cover import requests + from bs4 import BeautifulSoup except ImportError as exc: # pragma: no cover requests = None + BeautifulSoup = None REQUESTS_AVAILABLE = False REQUESTS_IMPORT_ERROR = exc else: @@ -28,51 +28,100 @@ from plugin_interface import BasisPlugin # Konstanten # --------------------------------------------------------------------------- -DOMAIN = "hdfilme.garden" -BASE_URL = "https://" + DOMAIN +BASE_URL = "https://hdfilme-tv.cc" DEFAULT_TIMEOUT = 20 HEADERS = { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "de-DE,de;q=0.9,en;q=0.8", - "Referer": BASE_URL + "/", + "Connection": "keep-alive", } -_URL_SEARCH = BASE_URL + "/index.php?do=search&subaction=search&story={query}" +_URL_SEARCH = BASE_URL + "/?s={query}" _URL_NEW = BASE_URL + "/kinofilme-online/" _URL_SERIES = BASE_URL + "/serienstream-deutsch/" -# HTML-Parsing-Muster -_RE_ENTRIES = re.compile( - r'
]*>([^<]+).*?data-src="([^"]+)', - re.DOTALL, -) -_RE_EPISODES = re.compile(r'>([^<]+)') -_RE_HOSTERS = re.compile(r'link="([^"]+)"') -_RE_THUMB_STANDALONE = re.compile(r'data-src="([^"]+)"') +# Genre-Slug → URL-Pfad +GENRE_SLUGS: dict[str, str] = { + "Abenteuer": "abenteuer", + "Action": "action", + "Animation": "animation", + "Biographie": "biographie", + "Dokumentation": "dokumentation", + "Drama": "drama", + "Erotik": "erotikfilme", + "Familie": "familie", + "Fantasy": "fantasy", + "Historienfilm": "historien", + "Horror": "horror", + "Komödie": "komodie", + "Krieg": "krieg", + "Krimi": "krimi", + "Musikfilm": "musikfilme", + "Mystery": "mystery", + "Romantik": "romantik", + "Sci-Fi": "sci-fi", + "Sport": "sport", + "Thriller": "thriller", + "Western": "western", +} -_SKIP_HOSTERS = {"youtube", "dropload"} +# Hoster die übersprungen werden (kein Stream / nur Trailer) +_SKIP_LINK_KEYWORDS = ("youtube.com", "youtu.be") ProgressCallback = Optional[Callable[[str, Optional[int]], Any]] +# --------------------------------------------------------------------------- +# Hilfsfunktionen +# --------------------------------------------------------------------------- + +def _absolute_url(url: str) -> str: + """Macht eine relative oder protokoll-relative URL absolut.""" + url = (url or "").strip() + if url.startswith("//"): + return "https:" + url + if url.startswith("/"): + return BASE_URL + url + return url + + +def _clean_title(raw: str) -> str: + """Bereinigt einen Rohtitel von Seiten-Suffixen.""" + title = (raw or "").strip() + for suffix in (" stream", " Stream", " kostenlos", " Deutsch", " German", " online"): + if title.endswith(suffix): + title = title[: -len(suffix)].strip() + return title + + +def _get_soup(url: str) -> Any: + """HTTP-GET und BeautifulSoup-Parsing. Gibt None bei Fehler.""" + if requests is None or BeautifulSoup is None: + return None + try: + response = requests.get(url, headers=HEADERS, timeout=DEFAULT_TIMEOUT) + response.raise_for_status() + return BeautifulSoup(response.text, "html.parser") + except Exception: + return None + + # --------------------------------------------------------------------------- # Plugin-Klasse # --------------------------------------------------------------------------- -class HDFilmePlugin(BasisPlugin): - """HDFilme Integration für ViewIT (hdfilme.garden).""" +class HdfilmePlugin(BasisPlugin): + """HDFilme Integration für ViewIT. HTML-Scraping via BeautifulSoup.""" name = "HDFilme" def __init__(self) -> None: - # title → Detail-Page-URL self._title_to_url: dict[str, str] = {} - # title → (plot, poster, fanart) - self._title_meta: dict[str, tuple[str, str, str]] = {} - # title → True wenn Serie self._is_series: dict[str, bool] = {} + self._title_meta: dict[str, tuple[str, str]] = {} # title → (plot, poster) + self._episode_cache: dict[str, list[str]] = {} # detail_url → episode labels # ------------------------------------------------------------------ # Verfügbarkeit @@ -86,154 +135,195 @@ class HDFilmePlugin(BasisPlugin): def unavailable_reason(self) -> str: if REQUESTS_AVAILABLE: return "" - return f"requests nicht verfügbar: {REQUESTS_IMPORT_ERROR}" + return f"requests/bs4 nicht verfügbar: {REQUESTS_IMPORT_ERROR}" # ------------------------------------------------------------------ - # HTTP + # Internes Parsing # ------------------------------------------------------------------ - def _get_session(self): # type: ignore[return] - from http_session_pool import get_requests_session - return get_requests_session("hdfilme", headers=HEADERS) - - def _get_html(self, url: str) -> str: - session = self._get_session() - response = None - try: - response = session.get(url, headers=HEADERS, timeout=DEFAULT_TIMEOUT) - response.raise_for_status() - return response.text - except Exception: - return "" - finally: - if response is not None: - try: - response.close() - except Exception: - pass - - # ------------------------------------------------------------------ - # Interne Hilfsmethoden - # ------------------------------------------------------------------ - - def _parse_entries(self, html: str) -> List[str]: - """Parst Ergebnisseite und cached Einträge. Gibt Titelliste zurück.""" + def _parse_entries(self, soup: Any) -> list[str]: + """Parst eine Listing-Seite und gibt Titel zurück (cached).""" + if soup is None: + return [] titles: list[str] = [] - for m in _RE_ENTRIES.finditer(html): - raw_url, raw_title, raw_thumb = m.group(1), m.group(2), m.group(3) - title = raw_title.strip() - if not title: + seen: set[str] = set() + for box in soup.select("div.box-product"): + # URL aus erstem Link + link = box.find("a", href=True) + if not link: + continue + url = _absolute_url(link["href"]) + if not url.endswith(".html"): continue - # Absolute URL sicherstellen - url = raw_url.strip() - if url.startswith("/"): - url = BASE_URL + url - if not url.startswith("http"): + # Titel aus h3 + h3_a = box.select_one("h3 a") + if not h3_a: continue + raw_title = h3_a.get_text(strip=True) + title = _clean_title(raw_title) + if not title or title in seen: + continue + seen.add(title) - thumb = raw_thumb.strip() - if thumb.startswith("/"): - thumb = BASE_URL + thumb + # Thumbnail + img = box.select_one("img.lazyload") + poster = "" + if img and img.get("data-src"): + poster = _absolute_url(img["data-src"]) + + # Serien-Erkennung via Titel + is_series = bool(re.search(r"\bStaffel\b|\bSeason\b", raw_title, re.I)) - is_series = "taffel" in title # "Staffel" (xStream-Konvention) self._title_to_url[title] = url self._is_series[title] = is_series - self._title_meta[title] = ("", thumb, "") + if poster: + self._title_meta[title] = ("", poster) titles.append(title) return titles - def _get_hoster_links(self, html: str, episode: str = "") -> List[str]: - """Extrahiert Hoster-URLs aus HTML, optional nach Episode gefiltert.""" - search_area = html - if episode: - # Episode-Abschnitt isolieren - m = re.search(re.escape(episode) + r"<.*?", html, re.DOTALL) - if m: - search_area = m.group(0) + def _get_detail_soup(self, title: str) -> Any: + """Lädt die Detailseite eines Titels.""" + url = self._title_to_url.get(title, "") + if not url: + return None + return _get_soup(url) + def _extract_hoster_links(self, soup: Any, episode_id: str = "") -> list[str]: + """Extrahiert Hoster-Links aus einer Detailseite. + + episode_id: wenn gesetzt, nur Links aus dem `
  • ` Block. + """ + if soup is None: + return [] links: list[str] = [] - for m in _RE_HOSTERS.finditer(search_area): - link = m.group(1).strip() - if not link: + + if episode_id: + # Serien-Episode: Links aus dem spezifischen Episode-Container + container = soup.select_one(f"li#{episode_id}") + if container is None: + return [] + candidates = container.select("a[data-link]") + else: + # Film: Links aus .mirrors + candidates = soup.select(".mirrors [data-link]") + + for el in candidates: + href = _absolute_url((el.get("data-link") or "").strip()) + if not href: continue - if link.startswith("//"): - link = "https:" + link - name = link.split("//")[-1].split(".")[0].lower() - if name in _SKIP_HOSTERS: + if any(kw in href for kw in _SKIP_LINK_KEYWORDS): continue - links.append(link) + links.append(href) return links + def _staffel_nr(self, season: str) -> int: + """Extrahiert die Staffelnummer aus einem Label wie 'Staffel 2'.""" + m = re.search(r"\d+", season or "") + return int(m.group()) if m else 1 + + def _ep_index(self, episode: str) -> int: + """Extrahiert den Episode-Index aus einem Label wie 'Episode 3'.""" + m = re.search(r"\d+", episode or "") + return int(m.group()) if m else 1 + # ------------------------------------------------------------------ # Pflicht-Methoden # ------------------------------------------------------------------ async def search_titles( - self, query: str, progress_callback: ProgressCallback = None + self, + query: str, + progress_callback: ProgressCallback = None, ) -> List[str]: - query = (query or "").strip() if not query or not REQUESTS_AVAILABLE: return [] - url = _URL_SEARCH.format(query=quote_plus(query)) - html = self._get_html(url) - if not html: - return [] - # Suche filtert clientseitig nach Titel - q_lower = query.lower() - all_titles = self._parse_entries(html) - return [t for t in all_titles if q_lower in t.lower()] + url = _URL_SEARCH.format(query=quote_plus(query.strip())) + soup = _get_soup(url) + return self._parse_entries(soup) def seasons_for(self, title: str) -> List[str]: title = (title or "").strip() if not title: return [] - if self._is_series.get(title): - # Staffelnummer aus Titel ableiten, falls vorhanden - m = re.search(r"Staffel\s*(\d+)", title, re.IGNORECASE) - if m: - return [f"Staffel {m.group(1)}"] - return ["Staffel 1"] + if self._is_series.get(title) is False: + return ["Film"] + if self._is_series.get(title) is True: + m = re.search(r"Staffel\s*(\d+)|Season\s*(\d+)", title, re.I) + nr = int(m.group(1) or m.group(2)) if m else 1 + return [f"Staffel {nr}"] + # Unbekannt: Detailseite laden und prüfen + soup = self._get_detail_soup(title) + if soup and soup.select_one("div.series"): + self._is_series[title] = True + m = re.search(r"Staffel\s*(\d+)|Season\s*(\d+)", title, re.I) + nr = int(m.group(1) or m.group(2)) if m else 1 + return [f"Staffel {nr}"] + self._is_series[title] = False return ["Film"] def episodes_for(self, title: str, season: str) -> List[str]: title = (title or "").strip() + season = (season or "").strip() if not title: return [] - if season == "Film": return [title] - url = self._title_to_url.get(title, "") - if not url: - return [] + detail_url = self._title_to_url.get(title, "") + cached = self._episode_cache.get(detail_url) + if cached is not None: + return cached - html = self._get_html(url) - if not html: + staffel_nr = self._staffel_nr(season) + soup = self._get_detail_soup(title) + if soup is None: return [title] - episodes = _RE_EPISODES.findall(html) - return [ep.strip() for ep in episodes if ep.strip()] or [title] + # li IDs: "serie-{staffel}_{episode}" + pattern = f"serie-{staffel_nr}_" + episode_items = [li for li in soup.select("li[id]") if li.get("id", "").startswith(pattern)] - # ------------------------------------------------------------------ - # Stream - # ------------------------------------------------------------------ + labels: list[str] = [] + for li in episode_items: + ep_id = li.get("id", "") # z.B. "serie-1_3" + ep_num_str = ep_id.split("_")[-1] + # Episodentitel aus erstem + a = li.find("a", href="#") + if a: + raw = a.get_text(strip=True) + # "Episoden 3" → "Episode 3" + ep_label = re.sub(r"^Episoden?\s*", "", raw, flags=re.I).strip() + label = f"Episode {ep_label}" if ep_label else f"Episode {ep_num_str}" + else: + label = f"Episode {ep_num_str}" + labels.append(label) - def stream_link_for( - self, title: str, season: str, episode: str - ) -> Optional[str]: + result = labels if labels else [title] + if detail_url: + self._episode_cache[detail_url] = result + return result + + def stream_link_for(self, title: str, season: str, episode: str) -> Optional[str]: title = (title or "").strip() - url = self._title_to_url.get(title, "") - if not url: + season = (season or "").strip() + if not title: return None - html = self._get_html(url) - if not html: + soup = self._get_detail_soup(title) + if soup is None: return None - # Für Serien: nach Episode-Abschnitt filtern (wenn episode != title) - ep_filter = "" if (season == "Film" or episode == title) else episode - links = self._get_hoster_links(html, ep_filter) + if season == "Film" or not self._is_series.get(title, False): + # Film: .mirrors [data-link] + links = self._extract_hoster_links(soup) + else: + # Serie: Episode-Container + staffel_nr = self._staffel_nr(season) + ep_idx = self._ep_index(episode) + episode_id = f"serie-{staffel_nr}_{ep_idx}" + links = self._extract_hoster_links(soup, episode_id) + return links[0] if links else None def resolve_stream_link(self, link: str) -> Optional[str]: @@ -252,7 +342,7 @@ class HDFilmePlugin(BasisPlugin): def metadata_for( self, title: str - ) -> tuple[dict[str, str], dict[str, str], list | None]: + ) -> tuple[dict[str, str], dict[str, str], list[object] | None]: title = (title or "").strip() if not title: return {}, {}, None @@ -260,17 +350,40 @@ class HDFilmePlugin(BasisPlugin): info: dict[str, str] = {"title": title} art: dict[str, str] = {} + # Cache-Hit cached = self._title_meta.get(title) if cached: - plot, poster, fanart = cached + plot, poster = cached if plot: info["plot"] = plot if poster: - art["thumb"] = poster - art["poster"] = poster - if fanart: - art["fanart"] = fanart + art["thumb"] = art["poster"] = poster + if info or art: + return info, art, None + # Detailseite laden + soup = self._get_detail_soup(title) + if soup is None: + return info, art, None + + og_desc = soup.find("meta", attrs={"property": "og:description"}) + if og_desc and og_desc.get("content"): + info["plot"] = og_desc["content"].strip() + + og_img = soup.find("meta", attrs={"property": "og:image"}) + poster = "" + if og_img and og_img.get("content"): + poster = _absolute_url(og_img["content"].strip()) + art["thumb"] = art["poster"] = poster + + # Jahr aus Textabschnitt "Titel YYYY" + year_el = soup.select_one("p.text-capitalize") + if year_el: + m = re.search(r"\b(19|20)\d{2}\b", year_el.get_text()) + if m: + info["year"] = m.group() + + self._title_meta[title] = (info.get("plot", ""), poster) return info, art, None # ------------------------------------------------------------------ @@ -278,12 +391,30 @@ class HDFilmePlugin(BasisPlugin): # ------------------------------------------------------------------ def latest_titles(self, page: int = 1) -> List[str]: - html = self._get_html(_URL_NEW) - return self._parse_entries(html) if html else [] + if not REQUESTS_AVAILABLE: + return [] + page = max(1, int(page or 1)) + url = _URL_NEW if page == 1 else f"{_URL_NEW}page/{page}/" + return self._parse_entries(_get_soup(url)) def popular_series(self) -> List[str]: - html = self._get_html(_URL_SERIES) - return self._parse_entries(html) if html else [] + if not REQUESTS_AVAILABLE: + return [] + return self._parse_entries(_get_soup(_URL_SERIES)) + + def genres(self) -> List[str]: + return sorted(GENRE_SLUGS.keys()) + + def titles_for_genre(self, genre: str) -> List[str]: + return self.titles_for_genre_page(genre, 1) + + def titles_for_genre_page(self, genre: str, page: int = 1) -> List[str]: + slug = GENRE_SLUGS.get(genre, "") + if not slug or not REQUESTS_AVAILABLE: + return [] + page = max(1, int(page or 1)) + url = f"{BASE_URL}/{slug}/" if page == 1 else f"{BASE_URL}/{slug}/page/{page}/" + return self._parse_entries(_get_soup(url)) def capabilities(self) -> set[str]: - return {"latest_titles", "popular_series"} + return {"latest_titles", "popular_series", "genres"}