Files
ViewIT/addon/plugins/hdfilme_plugin.py

464 lines
16 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""HDFilme Plugin für ViewIT.
HTML-Scraping von hdfilme-tv.cc (ehemals hdfilme.garden).
Filme und Serien, Hoster-Auflösung via ResolveURL.
"""
from __future__ import annotations
import re
from typing import Any, Callable, List, Optional
from urllib.parse import quote_plus
try: # pragma: no cover
import requests
from bs4 import BeautifulSoup
except ImportError as exc: # pragma: no cover
requests = None
BeautifulSoup = None
REQUESTS_AVAILABLE = False
REQUESTS_IMPORT_ERROR = exc
else:
REQUESTS_AVAILABLE = True
REQUESTS_IMPORT_ERROR = None
from plugin_interface import BasisPlugin
# ---------------------------------------------------------------------------
# Konstanten
# ---------------------------------------------------------------------------
BASE_URL = "https://hdfilme-tv.cc"
DEFAULT_TIMEOUT = 20
HEADERS = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "de-DE,de;q=0.9,en;q=0.8",
"Connection": "keep-alive",
}
_URL_SEARCH = BASE_URL + "/?do=search&subaction=search&story={query}"
_URL_NEW = BASE_URL + "/kinofilme-online/"
_URL_SERIES = BASE_URL + "/serienstream-deutsch/"
# Genre-Slug → URL-Pfad
GENRE_SLUGS: dict[str, str] = {
"Abenteuer": "abenteuer",
"Action": "action",
"Animation": "animation",
"Biographie": "biographie",
"Dokumentation": "dokumentation",
"Drama": "drama",
"Erotik": "erotikfilme",
"Familie": "familie",
"Fantasy": "fantasy",
"Historienfilm": "historien",
"Horror": "horror",
"Komödie": "komodie",
"Krieg": "krieg",
"Krimi": "krimi",
"Musikfilm": "musikfilme",
"Mystery": "mystery",
"Romantik": "romantik",
"Sci-Fi": "sci-fi",
"Sport": "sport",
"Thriller": "thriller",
"Western": "western",
}
# Hoster die übersprungen werden (kein Stream / nur Trailer)
_SKIP_LINK_KEYWORDS = ("youtube.com", "youtu.be", "hdfilme-tv.cc")
ProgressCallback = Optional[Callable[[str, Optional[int]], Any]]
# ---------------------------------------------------------------------------
# Hilfsfunktionen
# ---------------------------------------------------------------------------
def _absolute_url(url: str) -> str:
"""Macht eine relative oder protokoll-relative URL absolut."""
url = (url or "").strip()
if url.startswith("//"):
return "https:" + url
if url.startswith("/"):
return BASE_URL + url
return url
def _clean_title(raw: str) -> str:
"""Bereinigt einen Rohtitel von Seiten-Suffixen."""
title = (raw or "").strip()
for suffix in (" stream", " Stream", " kostenlos", " Deutsch", " German", " online"):
if title.endswith(suffix):
title = title[: -len(suffix)].strip()
return title
def _get_soup(url: str) -> Any:
"""HTTP-GET und BeautifulSoup-Parsing. Gibt None bei Fehler."""
if requests is None or BeautifulSoup is None:
return None
try:
response = requests.get(url, headers=HEADERS, timeout=DEFAULT_TIMEOUT)
response.raise_for_status()
return BeautifulSoup(response.text, "html.parser")
except Exception:
return None
# ---------------------------------------------------------------------------
# Plugin-Klasse
# ---------------------------------------------------------------------------
class HdfilmePlugin(BasisPlugin):
"""HDFilme Integration für ViewIT. HTML-Scraping via BeautifulSoup."""
name = "HDFilme"
def __init__(self) -> None:
self._title_to_url: dict[str, str] = {}
self._is_series: dict[str, bool] = {}
self._title_meta: dict[str, tuple[str, str]] = {} # title → (plot, poster)
self._episode_cache: dict[str, list[str]] = {} # detail_url → episode labels
self._preferred_hosters: list[str] = []
# ------------------------------------------------------------------
# Verfügbarkeit
# ------------------------------------------------------------------
@property
def is_available(self) -> bool:
return REQUESTS_AVAILABLE
@property
def unavailable_reason(self) -> str:
if REQUESTS_AVAILABLE:
return ""
return f"requests/bs4 nicht verfügbar: {REQUESTS_IMPORT_ERROR}"
# ------------------------------------------------------------------
# Internes Parsing
# ------------------------------------------------------------------
def _parse_entries(self, soup: Any) -> list[str]:
"""Parst eine Listing-Seite und gibt Titel zurück (cached)."""
if soup is None:
return []
titles: list[str] = []
seen: set[str] = set()
for box in soup.select("div.box-product"):
# URL aus erstem Link
link = box.find("a", href=True)
if not link:
continue
url = _absolute_url(link["href"])
if not url.endswith(".html"):
continue
# Titel aus h3
h3_a = box.select_one("h3 a")
if not h3_a:
continue
raw_title = h3_a.get_text(strip=True)
title = _clean_title(raw_title)
if not title or title in seen:
continue
seen.add(title)
# Thumbnail
img = box.select_one("img.lazyload")
poster = ""
if img and img.get("data-src"):
poster = _absolute_url(img["data-src"])
# Serien-Erkennung via Titel
is_series = bool(re.search(r"\bStaffel\b|\bSeason\b", raw_title, re.I))
self._title_to_url[title] = url
self._is_series[title] = is_series
if poster:
self._title_meta[title] = ("", poster)
titles.append(title)
return titles
def _ensure_detail_url(self, title: str) -> str:
"""Gibt die Detail-URL für einen Titel zurück.
Sucht zuerst im Cache, dann live über die Suchfunktion.
"""
url = self._title_to_url.get(title, "")
if url:
return url
# Fallback: Live-Suche (nötig wenn Plugin-Instanz neu, Cache leer)
search_url = _URL_SEARCH.format(query=quote_plus(title.strip()))
soup = _get_soup(search_url)
if soup:
self._parse_entries(soup)
url = self._title_to_url.get(title, "")
return url
def _get_detail_soup(self, title: str) -> Any:
"""Lädt die Detailseite eines Titels."""
url = self._ensure_detail_url(title)
if not url:
return None
return _get_soup(url)
def _extract_hoster_links(self, soup: Any, episode_id: str = "") -> dict[str, str]:
"""Extrahiert Hoster-Links aus einer Detailseite.
Gibt dict {Hoster-Name → URL} zurück.
episode_id: wenn gesetzt, nur Links aus dem `<li id="{episode_id}">` Block.
"""
if soup is None:
return {}
hosters: dict[str, str] = {}
if episode_id:
container = soup.select_one(f"li#{episode_id}")
if container is None:
return {}
candidates = container.select("a[data-link]")
else:
candidates = soup.select(".mirrors [data-link]")
seen_names: set[str] = set()
for el in candidates:
href = _absolute_url((el.get("data-link") or "").strip())
if not href:
continue
if any(kw in href for kw in _SKIP_LINK_KEYWORDS):
continue
name = el.get_text(strip=True) or "Hoster"
# Eindeutiger Name bei Duplikaten
base_name = name
i = 2
while name in seen_names:
name = f"{base_name} {i}"
i += 1
seen_names.add(name)
hosters[name] = href
return hosters
def _staffel_nr(self, season: str) -> int:
"""Extrahiert die Staffelnummer aus einem Label wie 'Staffel 2'."""
m = re.search(r"\d+", season or "")
return int(m.group()) if m else 1
def _ep_index(self, episode: str) -> int:
"""Extrahiert den Episode-Index aus einem Label wie 'Episode 3'."""
m = re.search(r"\d+", episode or "")
return int(m.group()) if m else 1
# ------------------------------------------------------------------
# Pflicht-Methoden
# ------------------------------------------------------------------
async def search_titles(
self,
query: str,
progress_callback: ProgressCallback = None,
) -> List[str]:
if not query or not REQUESTS_AVAILABLE:
return []
url = _URL_SEARCH.format(query=quote_plus(query.strip()))
soup = _get_soup(url)
return self._parse_entries(soup)
def seasons_for(self, title: str) -> List[str]:
title = (title or "").strip()
if not title:
return []
if self._is_series.get(title) is False:
return ["Film"]
if self._is_series.get(title) is True:
m = re.search(r"Staffel\s*(\d+)|Season\s*(\d+)", title, re.I)
nr = int(m.group(1) or m.group(2)) if m else 1
return [f"Staffel {nr}"]
# Unbekannt: Detailseite laden und prüfen
soup = self._get_detail_soup(title)
if soup and soup.select_one("div.series"):
self._is_series[title] = True
m = re.search(r"Staffel\s*(\d+)|Season\s*(\d+)", title, re.I)
nr = int(m.group(1) or m.group(2)) if m else 1
return [f"Staffel {nr}"]
self._is_series[title] = False
return ["Film"]
def episodes_for(self, title: str, season: str) -> List[str]:
title = (title or "").strip()
season = (season or "").strip()
if not title:
return []
if season == "Film":
return [title]
detail_url = self._ensure_detail_url(title)
cached = self._episode_cache.get(detail_url)
if cached is not None:
return cached
staffel_nr = self._staffel_nr(season)
soup = self._get_detail_soup(title)
if soup is None:
return [title]
# li IDs: "serie-{staffel}_{episode}"
pattern = f"serie-{staffel_nr}_"
episode_items = [li for li in soup.select("li[id]") if li.get("id", "").startswith(pattern)]
labels: list[str] = []
for li in episode_items:
ep_id = li.get("id", "") # z.B. "serie-1_3"
ep_num_str = ep_id.split("_")[-1]
# Episodentitel aus erstem <a href="#">
a = li.find("a", href="#")
if a:
raw = a.get_text(strip=True)
# "Episoden 3" → "Episode 3"
ep_label = re.sub(r"^Episoden?\s*", "", raw, flags=re.I).strip()
label = f"Episode {ep_label}" if ep_label else f"Episode {ep_num_str}"
else:
label = f"Episode {ep_num_str}"
labels.append(label)
result = labels if labels else [title]
if detail_url:
self._episode_cache[detail_url] = result
return result
def _hosters_for(self, title: str, season: str, episode: str) -> dict[str, str]:
"""Gibt alle verfügbaren Hoster {Name → URL} für Titel/Staffel/Episode zurück."""
soup = self._get_detail_soup(title)
if soup is None:
return {}
if season == "Film" or not self._is_series.get(title, False):
return self._extract_hoster_links(soup)
staffel_nr = self._staffel_nr(season)
ep_idx = self._ep_index(episode)
episode_id = f"serie-{staffel_nr}_{ep_idx}"
return self._extract_hoster_links(soup, episode_id)
def available_hosters_for(self, title: str, season: str, episode: str) -> List[str]:
return list(self._hosters_for(title, season, episode).keys())
def set_preferred_hosters(self, hosters: List[str]) -> None:
self._preferred_hosters = [h for h in hosters if h]
def stream_link_for(self, title: str, season: str, episode: str) -> Optional[str]:
title = (title or "").strip()
season = (season or "").strip()
if not title:
return None
hosters = self._hosters_for(title, season, episode)
if not hosters:
return None
# Bevorzugten Hoster nutzen falls gesetzt
for preferred in self._preferred_hosters:
key = preferred.casefold()
for name, url in hosters.items():
if key in name.casefold() or key in url.casefold():
return url
# Fallback: erster Hoster
return next(iter(hosters.values()))
def resolve_stream_link(self, link: str) -> Optional[str]:
link = (link or "").strip()
if not link:
return None
try:
from plugin_helpers import resolve_via_resolveurl
return resolve_via_resolveurl(link, fallback_to_link=False)
except Exception:
return None
# ------------------------------------------------------------------
# Metadaten
# ------------------------------------------------------------------
def metadata_for(
self, title: str
) -> tuple[dict[str, str], dict[str, str], list[object] | None]:
title = (title or "").strip()
if not title:
return {}, {}, None
info: dict[str, str] = {"title": title}
art: dict[str, str] = {}
# Cache-Hit nur zurückgeben wenn Plot vorhanden (sonst Detailseite laden)
cached = self._title_meta.get(title)
if cached:
plot, poster = cached
if plot:
info["plot"] = plot
if poster:
art["thumb"] = art["poster"] = poster
if plot:
return info, art, None
# Detailseite laden
soup = self._get_detail_soup(title)
if soup is None:
return info, art, None
og_desc = soup.find("meta", attrs={"property": "og:description"})
if og_desc and og_desc.get("content"):
info["plot"] = og_desc["content"].strip()
og_img = soup.find("meta", attrs={"property": "og:image"})
poster = ""
if og_img and og_img.get("content"):
poster = _absolute_url(og_img["content"].strip())
art["thumb"] = art["poster"] = poster
# Jahr aus Textabschnitt "Titel YYYY"
year_el = soup.select_one("p.text-capitalize")
if year_el:
m = re.search(r"\b(19|20)\d{2}\b", year_el.get_text())
if m:
info["year"] = m.group()
self._title_meta[title] = (info.get("plot", ""), poster)
return info, art, None
# ------------------------------------------------------------------
# Browsing
# ------------------------------------------------------------------
def new_titles(self) -> List[str]:
if not REQUESTS_AVAILABLE:
return []
return self._parse_entries(_get_soup(_URL_NEW))
def new_titles_page(self, page: int = 1) -> List[str]:
if not REQUESTS_AVAILABLE:
return []
page = max(1, int(page or 1))
url = _URL_NEW if page == 1 else f"{_URL_NEW}page/{page}/"
return self._parse_entries(_get_soup(url))
def popular_series(self) -> List[str]:
if not REQUESTS_AVAILABLE:
return []
return self._parse_entries(_get_soup(_URL_SERIES))
def genres(self) -> List[str]:
return sorted(GENRE_SLUGS.keys())
def titles_for_genre(self, genre: str) -> List[str]:
return self.titles_for_genre_page(genre, 1)
def titles_for_genre_page(self, genre: str, page: int = 1) -> List[str]:
slug = GENRE_SLUGS.get(genre, "")
if not slug or not REQUESTS_AVAILABLE:
return []
page = max(1, int(page or 1))
url = f"{BASE_URL}/{slug}/" if page == 1 else f"{BASE_URL}/{slug}/page/{page}/"
return self._parse_entries(_get_soup(url))
def capabilities(self) -> set[str]:
return {"new_titles", "popular_series", "genres"}