290 lines
9.4 KiB
Python
290 lines
9.4 KiB
Python
"""HDFilme Plugin für ViewIT.
|
||
|
||
HTML-Scraping von hdfilme.garden.
|
||
Filme und Serien, Hoster-Auflösung via ResolveURL.
|
||
|
||
Hinweis: Die Domain ändert sich gelegentlich – als DOMAIN-Konstante konfigurierbar.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import re
|
||
from typing import Any, Callable, List, Optional
|
||
from urllib.parse import quote_plus
|
||
|
||
try: # pragma: no cover
|
||
import requests
|
||
except ImportError as exc: # pragma: no cover
|
||
requests = None
|
||
REQUESTS_AVAILABLE = False
|
||
REQUESTS_IMPORT_ERROR = exc
|
||
else:
|
||
REQUESTS_AVAILABLE = True
|
||
REQUESTS_IMPORT_ERROR = None
|
||
|
||
from plugin_interface import BasisPlugin
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Konstanten
|
||
# ---------------------------------------------------------------------------
|
||
|
||
DOMAIN = "hdfilme.garden"
|
||
BASE_URL = "https://" + DOMAIN
|
||
DEFAULT_TIMEOUT = 20
|
||
|
||
HEADERS = {
|
||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||
"Accept-Language": "de-DE,de;q=0.9,en;q=0.8",
|
||
"Referer": BASE_URL + "/",
|
||
}
|
||
|
||
_URL_SEARCH = BASE_URL + "/index.php?do=search&subaction=search&story={query}"
|
||
_URL_NEW = BASE_URL + "/kinofilme-online/"
|
||
_URL_SERIES = BASE_URL + "/serienstream-deutsch/"
|
||
|
||
# HTML-Parsing-Muster
|
||
_RE_ENTRIES = re.compile(
|
||
r'<div class="box-product.*?href="([^"]+)[^>]*>([^<]+).*?data-src="([^"]+)',
|
||
re.DOTALL,
|
||
)
|
||
_RE_EPISODES = re.compile(r'><a href="#">([^<]+)')
|
||
_RE_HOSTERS = re.compile(r'link="([^"]+)"')
|
||
_RE_THUMB_STANDALONE = re.compile(r'data-src="([^"]+)"')
|
||
|
||
_SKIP_HOSTERS = {"youtube", "dropload"}
|
||
|
||
ProgressCallback = Optional[Callable[[str, Optional[int]], Any]]
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Plugin-Klasse
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class HDFilmePlugin(BasisPlugin):
|
||
"""HDFilme Integration für ViewIT (hdfilme.garden)."""
|
||
|
||
name = "HDFilme"
|
||
|
||
def __init__(self) -> None:
|
||
# title → Detail-Page-URL
|
||
self._title_to_url: dict[str, str] = {}
|
||
# title → (plot, poster, fanart)
|
||
self._title_meta: dict[str, tuple[str, str, str]] = {}
|
||
# title → True wenn Serie
|
||
self._is_series: dict[str, bool] = {}
|
||
|
||
# ------------------------------------------------------------------
|
||
# Verfügbarkeit
|
||
# ------------------------------------------------------------------
|
||
|
||
@property
|
||
def is_available(self) -> bool:
|
||
return REQUESTS_AVAILABLE
|
||
|
||
@property
|
||
def unavailable_reason(self) -> str:
|
||
if REQUESTS_AVAILABLE:
|
||
return ""
|
||
return f"requests nicht verfügbar: {REQUESTS_IMPORT_ERROR}"
|
||
|
||
# ------------------------------------------------------------------
|
||
# HTTP
|
||
# ------------------------------------------------------------------
|
||
|
||
def _get_session(self): # type: ignore[return]
|
||
from http_session_pool import get_requests_session
|
||
return get_requests_session("hdfilme", headers=HEADERS)
|
||
|
||
def _get_html(self, url: str) -> str:
|
||
session = self._get_session()
|
||
response = None
|
||
try:
|
||
response = session.get(url, headers=HEADERS, timeout=DEFAULT_TIMEOUT)
|
||
response.raise_for_status()
|
||
return response.text
|
||
except Exception:
|
||
return ""
|
||
finally:
|
||
if response is not None:
|
||
try:
|
||
response.close()
|
||
except Exception:
|
||
pass
|
||
|
||
# ------------------------------------------------------------------
|
||
# Interne Hilfsmethoden
|
||
# ------------------------------------------------------------------
|
||
|
||
def _parse_entries(self, html: str) -> List[str]:
|
||
"""Parst Ergebnisseite und cached Einträge. Gibt Titelliste zurück."""
|
||
titles: list[str] = []
|
||
for m in _RE_ENTRIES.finditer(html):
|
||
raw_url, raw_title, raw_thumb = m.group(1), m.group(2), m.group(3)
|
||
title = raw_title.strip()
|
||
if not title:
|
||
continue
|
||
|
||
# Absolute URL sicherstellen
|
||
url = raw_url.strip()
|
||
if url.startswith("/"):
|
||
url = BASE_URL + url
|
||
if not url.startswith("http"):
|
||
continue
|
||
|
||
thumb = raw_thumb.strip()
|
||
if thumb.startswith("/"):
|
||
thumb = BASE_URL + thumb
|
||
|
||
is_series = "taffel" in title # "Staffel" (xStream-Konvention)
|
||
self._title_to_url[title] = url
|
||
self._is_series[title] = is_series
|
||
self._title_meta[title] = ("", thumb, "")
|
||
titles.append(title)
|
||
return titles
|
||
|
||
def _get_hoster_links(self, html: str, episode: str = "") -> List[str]:
|
||
"""Extrahiert Hoster-URLs aus HTML, optional nach Episode gefiltert."""
|
||
search_area = html
|
||
if episode:
|
||
# Episode-Abschnitt isolieren
|
||
m = re.search(re.escape(episode) + r"<.*?</ul>", html, re.DOTALL)
|
||
if m:
|
||
search_area = m.group(0)
|
||
|
||
links: list[str] = []
|
||
for m in _RE_HOSTERS.finditer(search_area):
|
||
link = m.group(1).strip()
|
||
if not link:
|
||
continue
|
||
if link.startswith("//"):
|
||
link = "https:" + link
|
||
name = link.split("//")[-1].split(".")[0].lower()
|
||
if name in _SKIP_HOSTERS:
|
||
continue
|
||
links.append(link)
|
||
return links
|
||
|
||
# ------------------------------------------------------------------
|
||
# Pflicht-Methoden
|
||
# ------------------------------------------------------------------
|
||
|
||
async def search_titles(
|
||
self, query: str, progress_callback: ProgressCallback = None
|
||
) -> List[str]:
|
||
query = (query or "").strip()
|
||
if not query or not REQUESTS_AVAILABLE:
|
||
return []
|
||
url = _URL_SEARCH.format(query=quote_plus(query))
|
||
html = self._get_html(url)
|
||
if not html:
|
||
return []
|
||
# Suche filtert clientseitig nach Titel
|
||
q_lower = query.lower()
|
||
all_titles = self._parse_entries(html)
|
||
return [t for t in all_titles if q_lower in t.lower()]
|
||
|
||
def seasons_for(self, title: str) -> List[str]:
|
||
title = (title or "").strip()
|
||
if not title:
|
||
return []
|
||
if self._is_series.get(title):
|
||
# Staffelnummer aus Titel ableiten, falls vorhanden
|
||
m = re.search(r"Staffel\s*(\d+)", title, re.IGNORECASE)
|
||
if m:
|
||
return [f"Staffel {m.group(1)}"]
|
||
return ["Staffel 1"]
|
||
return ["Film"]
|
||
|
||
def episodes_for(self, title: str, season: str) -> List[str]:
|
||
title = (title or "").strip()
|
||
if not title:
|
||
return []
|
||
|
||
if season == "Film":
|
||
return [title]
|
||
|
||
url = self._title_to_url.get(title, "")
|
||
if not url:
|
||
return []
|
||
|
||
html = self._get_html(url)
|
||
if not html:
|
||
return [title]
|
||
|
||
episodes = _RE_EPISODES.findall(html)
|
||
return [ep.strip() for ep in episodes if ep.strip()] or [title]
|
||
|
||
# ------------------------------------------------------------------
|
||
# Stream
|
||
# ------------------------------------------------------------------
|
||
|
||
def stream_link_for(
|
||
self, title: str, season: str, episode: str
|
||
) -> Optional[str]:
|
||
title = (title or "").strip()
|
||
url = self._title_to_url.get(title, "")
|
||
if not url:
|
||
return None
|
||
|
||
html = self._get_html(url)
|
||
if not html:
|
||
return None
|
||
|
||
# Für Serien: nach Episode-Abschnitt filtern (wenn episode != title)
|
||
ep_filter = "" if (season == "Film" or episode == title) else episode
|
||
links = self._get_hoster_links(html, ep_filter)
|
||
return links[0] if links else None
|
||
|
||
def resolve_stream_link(self, link: str) -> Optional[str]:
|
||
link = (link or "").strip()
|
||
if not link:
|
||
return None
|
||
try:
|
||
from plugin_helpers import resolve_via_resolveurl
|
||
return resolve_via_resolveurl(link, fallback_to_link=False)
|
||
except Exception:
|
||
return None
|
||
|
||
# ------------------------------------------------------------------
|
||
# Metadaten
|
||
# ------------------------------------------------------------------
|
||
|
||
def metadata_for(
|
||
self, title: str
|
||
) -> tuple[dict[str, str], dict[str, str], list | None]:
|
||
title = (title or "").strip()
|
||
if not title:
|
||
return {}, {}, None
|
||
|
||
info: dict[str, str] = {"title": title}
|
||
art: dict[str, str] = {}
|
||
|
||
cached = self._title_meta.get(title)
|
||
if cached:
|
||
plot, poster, fanart = cached
|
||
if plot:
|
||
info["plot"] = plot
|
||
if poster:
|
||
art["thumb"] = poster
|
||
art["poster"] = poster
|
||
if fanart:
|
||
art["fanart"] = fanart
|
||
|
||
return info, art, None
|
||
|
||
# ------------------------------------------------------------------
|
||
# Browsing
|
||
# ------------------------------------------------------------------
|
||
|
||
def latest_titles(self, page: int = 1) -> List[str]:
|
||
html = self._get_html(_URL_NEW)
|
||
return self._parse_entries(html) if html else []
|
||
|
||
def popular_series(self) -> List[str]:
|
||
html = self._get_html(_URL_SERIES)
|
||
return self._parse_entries(html) if html else []
|
||
|
||
def capabilities(self) -> set[str]:
|
||
return {"latest_titles", "popular_series"}
|