464 lines
16 KiB
Python
464 lines
16 KiB
Python
"""HDFilme Plugin für ViewIT.
|
||
|
||
HTML-Scraping von hdfilme-tv.cc (ehemals hdfilme.garden).
|
||
Filme und Serien, Hoster-Auflösung via ResolveURL.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import re
|
||
from typing import Any, Callable, List, Optional
|
||
from urllib.parse import quote_plus
|
||
|
||
try: # pragma: no cover
|
||
import requests
|
||
from bs4 import BeautifulSoup
|
||
except ImportError as exc: # pragma: no cover
|
||
requests = None
|
||
BeautifulSoup = None
|
||
REQUESTS_AVAILABLE = False
|
||
REQUESTS_IMPORT_ERROR = exc
|
||
else:
|
||
REQUESTS_AVAILABLE = True
|
||
REQUESTS_IMPORT_ERROR = None
|
||
|
||
from plugin_interface import BasisPlugin
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Konstanten
|
||
# ---------------------------------------------------------------------------
|
||
|
||
BASE_URL = "https://hdfilme-tv.cc"
|
||
DEFAULT_TIMEOUT = 20
|
||
|
||
HEADERS = {
|
||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||
"Accept-Language": "de-DE,de;q=0.9,en;q=0.8",
|
||
"Connection": "keep-alive",
|
||
}
|
||
|
||
_URL_SEARCH = BASE_URL + "/?do=search&subaction=search&story={query}"
|
||
_URL_NEW = BASE_URL + "/kinofilme-online/"
|
||
_URL_SERIES = BASE_URL + "/serienstream-deutsch/"
|
||
|
||
# Genre-Slug → URL-Pfad
|
||
GENRE_SLUGS: dict[str, str] = {
|
||
"Abenteuer": "abenteuer",
|
||
"Action": "action",
|
||
"Animation": "animation",
|
||
"Biographie": "biographie",
|
||
"Dokumentation": "dokumentation",
|
||
"Drama": "drama",
|
||
"Erotik": "erotikfilme",
|
||
"Familie": "familie",
|
||
"Fantasy": "fantasy",
|
||
"Historienfilm": "historien",
|
||
"Horror": "horror",
|
||
"Komödie": "komodie",
|
||
"Krieg": "krieg",
|
||
"Krimi": "krimi",
|
||
"Musikfilm": "musikfilme",
|
||
"Mystery": "mystery",
|
||
"Romantik": "romantik",
|
||
"Sci-Fi": "sci-fi",
|
||
"Sport": "sport",
|
||
"Thriller": "thriller",
|
||
"Western": "western",
|
||
}
|
||
|
||
# Hoster die übersprungen werden (kein Stream / nur Trailer)
|
||
_SKIP_LINK_KEYWORDS = ("youtube.com", "youtu.be", "hdfilme-tv.cc")
|
||
|
||
ProgressCallback = Optional[Callable[[str, Optional[int]], Any]]
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Hilfsfunktionen
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def _absolute_url(url: str) -> str:
|
||
"""Macht eine relative oder protokoll-relative URL absolut."""
|
||
url = (url or "").strip()
|
||
if url.startswith("//"):
|
||
return "https:" + url
|
||
if url.startswith("/"):
|
||
return BASE_URL + url
|
||
return url
|
||
|
||
|
||
def _clean_title(raw: str) -> str:
|
||
"""Bereinigt einen Rohtitel von Seiten-Suffixen."""
|
||
title = (raw or "").strip()
|
||
for suffix in (" stream", " Stream", " kostenlos", " Deutsch", " German", " online"):
|
||
if title.endswith(suffix):
|
||
title = title[: -len(suffix)].strip()
|
||
return title
|
||
|
||
|
||
def _get_soup(url: str) -> Any:
|
||
"""HTTP-GET und BeautifulSoup-Parsing. Gibt None bei Fehler."""
|
||
if requests is None or BeautifulSoup is None:
|
||
return None
|
||
try:
|
||
response = requests.get(url, headers=HEADERS, timeout=DEFAULT_TIMEOUT)
|
||
response.raise_for_status()
|
||
return BeautifulSoup(response.text, "html.parser")
|
||
except Exception:
|
||
return None
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Plugin-Klasse
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class HdfilmePlugin(BasisPlugin):
|
||
"""HDFilme Integration für ViewIT. HTML-Scraping via BeautifulSoup."""
|
||
|
||
name = "HDFilme"
|
||
|
||
def __init__(self) -> None:
|
||
self._title_to_url: dict[str, str] = {}
|
||
self._is_series: dict[str, bool] = {}
|
||
self._title_meta: dict[str, tuple[str, str]] = {} # title → (plot, poster)
|
||
self._episode_cache: dict[str, list[str]] = {} # detail_url → episode labels
|
||
self._preferred_hosters: list[str] = []
|
||
|
||
# ------------------------------------------------------------------
|
||
# Verfügbarkeit
|
||
# ------------------------------------------------------------------
|
||
|
||
@property
|
||
def is_available(self) -> bool:
|
||
return REQUESTS_AVAILABLE
|
||
|
||
@property
|
||
def unavailable_reason(self) -> str:
|
||
if REQUESTS_AVAILABLE:
|
||
return ""
|
||
return f"requests/bs4 nicht verfügbar: {REQUESTS_IMPORT_ERROR}"
|
||
|
||
# ------------------------------------------------------------------
|
||
# Internes Parsing
|
||
# ------------------------------------------------------------------
|
||
|
||
def _parse_entries(self, soup: Any) -> list[str]:
|
||
"""Parst eine Listing-Seite und gibt Titel zurück (cached)."""
|
||
if soup is None:
|
||
return []
|
||
titles: list[str] = []
|
||
seen: set[str] = set()
|
||
for box in soup.select("div.box-product"):
|
||
# URL aus erstem Link
|
||
link = box.find("a", href=True)
|
||
if not link:
|
||
continue
|
||
url = _absolute_url(link["href"])
|
||
if not url.endswith(".html"):
|
||
continue
|
||
|
||
# Titel aus h3
|
||
h3_a = box.select_one("h3 a")
|
||
if not h3_a:
|
||
continue
|
||
raw_title = h3_a.get_text(strip=True)
|
||
title = _clean_title(raw_title)
|
||
if not title or title in seen:
|
||
continue
|
||
seen.add(title)
|
||
|
||
# Thumbnail
|
||
img = box.select_one("img.lazyload")
|
||
poster = ""
|
||
if img and img.get("data-src"):
|
||
poster = _absolute_url(img["data-src"])
|
||
|
||
# Serien-Erkennung via Titel
|
||
is_series = bool(re.search(r"\bStaffel\b|\bSeason\b", raw_title, re.I))
|
||
|
||
self._title_to_url[title] = url
|
||
self._is_series[title] = is_series
|
||
if poster:
|
||
self._title_meta[title] = ("", poster)
|
||
titles.append(title)
|
||
return titles
|
||
|
||
def _ensure_detail_url(self, title: str) -> str:
|
||
"""Gibt die Detail-URL für einen Titel zurück.
|
||
|
||
Sucht zuerst im Cache, dann live über die Suchfunktion.
|
||
"""
|
||
url = self._title_to_url.get(title, "")
|
||
if url:
|
||
return url
|
||
# Fallback: Live-Suche (nötig wenn Plugin-Instanz neu, Cache leer)
|
||
search_url = _URL_SEARCH.format(query=quote_plus(title.strip()))
|
||
soup = _get_soup(search_url)
|
||
if soup:
|
||
self._parse_entries(soup)
|
||
url = self._title_to_url.get(title, "")
|
||
return url
|
||
|
||
def _get_detail_soup(self, title: str) -> Any:
|
||
"""Lädt die Detailseite eines Titels."""
|
||
url = self._ensure_detail_url(title)
|
||
if not url:
|
||
return None
|
||
return _get_soup(url)
|
||
|
||
def _extract_hoster_links(self, soup: Any, episode_id: str = "") -> dict[str, str]:
|
||
"""Extrahiert Hoster-Links aus einer Detailseite.
|
||
|
||
Gibt dict {Hoster-Name → URL} zurück.
|
||
episode_id: wenn gesetzt, nur Links aus dem `<li id="{episode_id}">` Block.
|
||
"""
|
||
if soup is None:
|
||
return {}
|
||
hosters: dict[str, str] = {}
|
||
|
||
if episode_id:
|
||
container = soup.select_one(f"li#{episode_id}")
|
||
if container is None:
|
||
return {}
|
||
candidates = container.select("a[data-link]")
|
||
else:
|
||
candidates = soup.select(".mirrors [data-link]")
|
||
|
||
seen_names: set[str] = set()
|
||
for el in candidates:
|
||
href = _absolute_url((el.get("data-link") or "").strip())
|
||
if not href:
|
||
continue
|
||
if any(kw in href for kw in _SKIP_LINK_KEYWORDS):
|
||
continue
|
||
name = el.get_text(strip=True) or "Hoster"
|
||
# Eindeutiger Name bei Duplikaten
|
||
base_name = name
|
||
i = 2
|
||
while name in seen_names:
|
||
name = f"{base_name} {i}"
|
||
i += 1
|
||
seen_names.add(name)
|
||
hosters[name] = href
|
||
return hosters
|
||
|
||
def _staffel_nr(self, season: str) -> int:
|
||
"""Extrahiert die Staffelnummer aus einem Label wie 'Staffel 2'."""
|
||
m = re.search(r"\d+", season or "")
|
||
return int(m.group()) if m else 1
|
||
|
||
def _ep_index(self, episode: str) -> int:
|
||
"""Extrahiert den Episode-Index aus einem Label wie 'Episode 3'."""
|
||
m = re.search(r"\d+", episode or "")
|
||
return int(m.group()) if m else 1
|
||
|
||
# ------------------------------------------------------------------
|
||
# Pflicht-Methoden
|
||
# ------------------------------------------------------------------
|
||
|
||
async def search_titles(
|
||
self,
|
||
query: str,
|
||
progress_callback: ProgressCallback = None,
|
||
) -> List[str]:
|
||
if not query or not REQUESTS_AVAILABLE:
|
||
return []
|
||
url = _URL_SEARCH.format(query=quote_plus(query.strip()))
|
||
soup = _get_soup(url)
|
||
return self._parse_entries(soup)
|
||
|
||
def seasons_for(self, title: str) -> List[str]:
|
||
title = (title or "").strip()
|
||
if not title:
|
||
return []
|
||
if self._is_series.get(title) is False:
|
||
return ["Film"]
|
||
if self._is_series.get(title) is True:
|
||
m = re.search(r"Staffel\s*(\d+)|Season\s*(\d+)", title, re.I)
|
||
nr = int(m.group(1) or m.group(2)) if m else 1
|
||
return [f"Staffel {nr}"]
|
||
# Unbekannt: Detailseite laden und prüfen
|
||
soup = self._get_detail_soup(title)
|
||
if soup and soup.select_one("div.series"):
|
||
self._is_series[title] = True
|
||
m = re.search(r"Staffel\s*(\d+)|Season\s*(\d+)", title, re.I)
|
||
nr = int(m.group(1) or m.group(2)) if m else 1
|
||
return [f"Staffel {nr}"]
|
||
self._is_series[title] = False
|
||
return ["Film"]
|
||
|
||
def episodes_for(self, title: str, season: str) -> List[str]:
|
||
title = (title or "").strip()
|
||
season = (season or "").strip()
|
||
if not title:
|
||
return []
|
||
if season == "Film":
|
||
return [title]
|
||
|
||
detail_url = self._ensure_detail_url(title)
|
||
cached = self._episode_cache.get(detail_url)
|
||
if cached is not None:
|
||
return cached
|
||
|
||
staffel_nr = self._staffel_nr(season)
|
||
soup = self._get_detail_soup(title)
|
||
if soup is None:
|
||
return [title]
|
||
|
||
# li IDs: "serie-{staffel}_{episode}"
|
||
pattern = f"serie-{staffel_nr}_"
|
||
episode_items = [li for li in soup.select("li[id]") if li.get("id", "").startswith(pattern)]
|
||
|
||
labels: list[str] = []
|
||
for li in episode_items:
|
||
ep_id = li.get("id", "") # z.B. "serie-1_3"
|
||
ep_num_str = ep_id.split("_")[-1]
|
||
# Episodentitel aus erstem <a href="#">
|
||
a = li.find("a", href="#")
|
||
if a:
|
||
raw = a.get_text(strip=True)
|
||
# "Episoden 3" → "Episode 3"
|
||
ep_label = re.sub(r"^Episoden?\s*", "", raw, flags=re.I).strip()
|
||
label = f"Episode {ep_label}" if ep_label else f"Episode {ep_num_str}"
|
||
else:
|
||
label = f"Episode {ep_num_str}"
|
||
labels.append(label)
|
||
|
||
result = labels if labels else [title]
|
||
if detail_url:
|
||
self._episode_cache[detail_url] = result
|
||
return result
|
||
|
||
def _hosters_for(self, title: str, season: str, episode: str) -> dict[str, str]:
|
||
"""Gibt alle verfügbaren Hoster {Name → URL} für Titel/Staffel/Episode zurück."""
|
||
soup = self._get_detail_soup(title)
|
||
if soup is None:
|
||
return {}
|
||
if season == "Film" or not self._is_series.get(title, False):
|
||
return self._extract_hoster_links(soup)
|
||
staffel_nr = self._staffel_nr(season)
|
||
ep_idx = self._ep_index(episode)
|
||
episode_id = f"serie-{staffel_nr}_{ep_idx}"
|
||
return self._extract_hoster_links(soup, episode_id)
|
||
|
||
def available_hosters_for(self, title: str, season: str, episode: str) -> List[str]:
|
||
return list(self._hosters_for(title, season, episode).keys())
|
||
|
||
def set_preferred_hosters(self, hosters: List[str]) -> None:
|
||
self._preferred_hosters = [h for h in hosters if h]
|
||
|
||
def stream_link_for(self, title: str, season: str, episode: str) -> Optional[str]:
|
||
title = (title or "").strip()
|
||
season = (season or "").strip()
|
||
if not title:
|
||
return None
|
||
hosters = self._hosters_for(title, season, episode)
|
||
if not hosters:
|
||
return None
|
||
# Bevorzugten Hoster nutzen falls gesetzt
|
||
for preferred in self._preferred_hosters:
|
||
key = preferred.casefold()
|
||
for name, url in hosters.items():
|
||
if key in name.casefold() or key in url.casefold():
|
||
return url
|
||
# Fallback: erster Hoster
|
||
return next(iter(hosters.values()))
|
||
|
||
def resolve_stream_link(self, link: str) -> Optional[str]:
|
||
link = (link or "").strip()
|
||
if not link:
|
||
return None
|
||
try:
|
||
from plugin_helpers import resolve_via_resolveurl
|
||
return resolve_via_resolveurl(link, fallback_to_link=False)
|
||
except Exception:
|
||
return None
|
||
|
||
# ------------------------------------------------------------------
|
||
# Metadaten
|
||
# ------------------------------------------------------------------
|
||
|
||
def metadata_for(
|
||
self, title: str
|
||
) -> tuple[dict[str, str], dict[str, str], list[object] | None]:
|
||
title = (title or "").strip()
|
||
if not title:
|
||
return {}, {}, None
|
||
|
||
info: dict[str, str] = {"title": title}
|
||
art: dict[str, str] = {}
|
||
|
||
# Cache-Hit – nur zurückgeben wenn Plot vorhanden (sonst Detailseite laden)
|
||
cached = self._title_meta.get(title)
|
||
if cached:
|
||
plot, poster = cached
|
||
if plot:
|
||
info["plot"] = plot
|
||
if poster:
|
||
art["thumb"] = art["poster"] = poster
|
||
if plot:
|
||
return info, art, None
|
||
|
||
# Detailseite laden
|
||
soup = self._get_detail_soup(title)
|
||
if soup is None:
|
||
return info, art, None
|
||
|
||
og_desc = soup.find("meta", attrs={"property": "og:description"})
|
||
if og_desc and og_desc.get("content"):
|
||
info["plot"] = og_desc["content"].strip()
|
||
|
||
og_img = soup.find("meta", attrs={"property": "og:image"})
|
||
poster = ""
|
||
if og_img and og_img.get("content"):
|
||
poster = _absolute_url(og_img["content"].strip())
|
||
art["thumb"] = art["poster"] = poster
|
||
|
||
# Jahr aus Textabschnitt "Titel YYYY"
|
||
year_el = soup.select_one("p.text-capitalize")
|
||
if year_el:
|
||
m = re.search(r"\b(19|20)\d{2}\b", year_el.get_text())
|
||
if m:
|
||
info["year"] = m.group()
|
||
|
||
self._title_meta[title] = (info.get("plot", ""), poster)
|
||
return info, art, None
|
||
|
||
# ------------------------------------------------------------------
|
||
# Browsing
|
||
# ------------------------------------------------------------------
|
||
|
||
def new_titles(self) -> List[str]:
|
||
if not REQUESTS_AVAILABLE:
|
||
return []
|
||
return self._parse_entries(_get_soup(_URL_NEW))
|
||
|
||
def new_titles_page(self, page: int = 1) -> List[str]:
|
||
if not REQUESTS_AVAILABLE:
|
||
return []
|
||
page = max(1, int(page or 1))
|
||
url = _URL_NEW if page == 1 else f"{_URL_NEW}page/{page}/"
|
||
return self._parse_entries(_get_soup(url))
|
||
|
||
def popular_series(self) -> List[str]:
|
||
if not REQUESTS_AVAILABLE:
|
||
return []
|
||
return self._parse_entries(_get_soup(_URL_SERIES))
|
||
|
||
def genres(self) -> List[str]:
|
||
return sorted(GENRE_SLUGS.keys())
|
||
|
||
def titles_for_genre(self, genre: str) -> List[str]:
|
||
return self.titles_for_genre_page(genre, 1)
|
||
|
||
def titles_for_genre_page(self, genre: str, page: int = 1) -> List[str]:
|
||
slug = GENRE_SLUGS.get(genre, "")
|
||
if not slug or not REQUESTS_AVAILABLE:
|
||
return []
|
||
page = max(1, int(page or 1))
|
||
url = f"{BASE_URL}/{slug}/" if page == 1 else f"{BASE_URL}/{slug}/page/{page}/"
|
||
return self._parse_entries(_get_soup(url))
|
||
|
||
def capabilities(self) -> set[str]:
|
||
return {"new_titles", "popular_series", "genres"}
|