Files
ViewIT/addon/plugins/hdfilme_plugin.py

290 lines
9.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""HDFilme Plugin für ViewIT.
HTML-Scraping von hdfilme.garden.
Filme und Serien, Hoster-Auflösung via ResolveURL.
Hinweis: Die Domain ändert sich gelegentlich als DOMAIN-Konstante konfigurierbar.
"""
from __future__ import annotations
import re
from typing import Any, Callable, List, Optional
from urllib.parse import quote_plus
try: # pragma: no cover
import requests
except ImportError as exc: # pragma: no cover
requests = None
REQUESTS_AVAILABLE = False
REQUESTS_IMPORT_ERROR = exc
else:
REQUESTS_AVAILABLE = True
REQUESTS_IMPORT_ERROR = None
from plugin_interface import BasisPlugin
# ---------------------------------------------------------------------------
# Konstanten
# ---------------------------------------------------------------------------
DOMAIN = "hdfilme.garden"
BASE_URL = "https://" + DOMAIN
DEFAULT_TIMEOUT = 20
HEADERS = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "de-DE,de;q=0.9,en;q=0.8",
"Referer": BASE_URL + "/",
}
_URL_SEARCH = BASE_URL + "/index.php?do=search&subaction=search&story={query}"
_URL_NEW = BASE_URL + "/kinofilme-online/"
_URL_SERIES = BASE_URL + "/serienstream-deutsch/"
# HTML-Parsing-Muster
_RE_ENTRIES = re.compile(
r'<div class="box-product.*?href="([^"]+)[^>]*>([^<]+).*?data-src="([^"]+)',
re.DOTALL,
)
_RE_EPISODES = re.compile(r'><a href="#">([^<]+)')
_RE_HOSTERS = re.compile(r'link="([^"]+)"')
_RE_THUMB_STANDALONE = re.compile(r'data-src="([^"]+)"')
_SKIP_HOSTERS = {"youtube", "dropload"}
ProgressCallback = Optional[Callable[[str, Optional[int]], Any]]
# ---------------------------------------------------------------------------
# Plugin-Klasse
# ---------------------------------------------------------------------------
class HDFilmePlugin(BasisPlugin):
"""HDFilme Integration für ViewIT (hdfilme.garden)."""
name = "HDFilme"
def __init__(self) -> None:
# title → Detail-Page-URL
self._title_to_url: dict[str, str] = {}
# title → (plot, poster, fanart)
self._title_meta: dict[str, tuple[str, str, str]] = {}
# title → True wenn Serie
self._is_series: dict[str, bool] = {}
# ------------------------------------------------------------------
# Verfügbarkeit
# ------------------------------------------------------------------
@property
def is_available(self) -> bool:
return REQUESTS_AVAILABLE
@property
def unavailable_reason(self) -> str:
if REQUESTS_AVAILABLE:
return ""
return f"requests nicht verfügbar: {REQUESTS_IMPORT_ERROR}"
# ------------------------------------------------------------------
# HTTP
# ------------------------------------------------------------------
def _get_session(self): # type: ignore[return]
from http_session_pool import get_requests_session
return get_requests_session("hdfilme", headers=HEADERS)
def _get_html(self, url: str) -> str:
session = self._get_session()
response = None
try:
response = session.get(url, headers=HEADERS, timeout=DEFAULT_TIMEOUT)
response.raise_for_status()
return response.text
except Exception:
return ""
finally:
if response is not None:
try:
response.close()
except Exception:
pass
# ------------------------------------------------------------------
# Interne Hilfsmethoden
# ------------------------------------------------------------------
def _parse_entries(self, html: str) -> List[str]:
"""Parst Ergebnisseite und cached Einträge. Gibt Titelliste zurück."""
titles: list[str] = []
for m in _RE_ENTRIES.finditer(html):
raw_url, raw_title, raw_thumb = m.group(1), m.group(2), m.group(3)
title = raw_title.strip()
if not title:
continue
# Absolute URL sicherstellen
url = raw_url.strip()
if url.startswith("/"):
url = BASE_URL + url
if not url.startswith("http"):
continue
thumb = raw_thumb.strip()
if thumb.startswith("/"):
thumb = BASE_URL + thumb
is_series = "taffel" in title # "Staffel" (xStream-Konvention)
self._title_to_url[title] = url
self._is_series[title] = is_series
self._title_meta[title] = ("", thumb, "")
titles.append(title)
return titles
def _get_hoster_links(self, html: str, episode: str = "") -> List[str]:
"""Extrahiert Hoster-URLs aus HTML, optional nach Episode gefiltert."""
search_area = html
if episode:
# Episode-Abschnitt isolieren
m = re.search(re.escape(episode) + r"<.*?</ul>", html, re.DOTALL)
if m:
search_area = m.group(0)
links: list[str] = []
for m in _RE_HOSTERS.finditer(search_area):
link = m.group(1).strip()
if not link:
continue
if link.startswith("//"):
link = "https:" + link
name = link.split("//")[-1].split(".")[0].lower()
if name in _SKIP_HOSTERS:
continue
links.append(link)
return links
# ------------------------------------------------------------------
# Pflicht-Methoden
# ------------------------------------------------------------------
async def search_titles(
self, query: str, progress_callback: ProgressCallback = None
) -> List[str]:
query = (query or "").strip()
if not query or not REQUESTS_AVAILABLE:
return []
url = _URL_SEARCH.format(query=quote_plus(query))
html = self._get_html(url)
if not html:
return []
# Suche filtert clientseitig nach Titel
q_lower = query.lower()
all_titles = self._parse_entries(html)
return [t for t in all_titles if q_lower in t.lower()]
def seasons_for(self, title: str) -> List[str]:
title = (title or "").strip()
if not title:
return []
if self._is_series.get(title):
# Staffelnummer aus Titel ableiten, falls vorhanden
m = re.search(r"Staffel\s*(\d+)", title, re.IGNORECASE)
if m:
return [f"Staffel {m.group(1)}"]
return ["Staffel 1"]
return ["Film"]
def episodes_for(self, title: str, season: str) -> List[str]:
title = (title or "").strip()
if not title:
return []
if season == "Film":
return [title]
url = self._title_to_url.get(title, "")
if not url:
return []
html = self._get_html(url)
if not html:
return [title]
episodes = _RE_EPISODES.findall(html)
return [ep.strip() for ep in episodes if ep.strip()] or [title]
# ------------------------------------------------------------------
# Stream
# ------------------------------------------------------------------
def stream_link_for(
self, title: str, season: str, episode: str
) -> Optional[str]:
title = (title or "").strip()
url = self._title_to_url.get(title, "")
if not url:
return None
html = self._get_html(url)
if not html:
return None
# Für Serien: nach Episode-Abschnitt filtern (wenn episode != title)
ep_filter = "" if (season == "Film" or episode == title) else episode
links = self._get_hoster_links(html, ep_filter)
return links[0] if links else None
def resolve_stream_link(self, link: str) -> Optional[str]:
link = (link or "").strip()
if not link:
return None
try:
from plugin_helpers import resolve_via_resolveurl
return resolve_via_resolveurl(link, fallback_to_link=False)
except Exception:
return None
# ------------------------------------------------------------------
# Metadaten
# ------------------------------------------------------------------
def metadata_for(
self, title: str
) -> tuple[dict[str, str], dict[str, str], list | None]:
title = (title or "").strip()
if not title:
return {}, {}, None
info: dict[str, str] = {"title": title}
art: dict[str, str] = {}
cached = self._title_meta.get(title)
if cached:
plot, poster, fanart = cached
if plot:
info["plot"] = plot
if poster:
art["thumb"] = poster
art["poster"] = poster
if fanart:
art["fanart"] = fanart
return info, art, None
# ------------------------------------------------------------------
# Browsing
# ------------------------------------------------------------------
def latest_titles(self, page: int = 1) -> List[str]:
html = self._get_html(_URL_NEW)
return self._parse_entries(html) if html else []
def popular_series(self) -> List[str]:
html = self._get_html(_URL_SERIES)
return self._parse_entries(html) if html else []
def capabilities(self) -> set[str]:
return {"latest_titles", "popular_series"}