ViewIT/addon/plugins/hdfilme_plugin.py

"""HDFilme Plugin für ViewIT.

HTML-Scraping von hdfilme.garden.
Filme und Serien, Hoster-Auflösung via ResolveURL.

Hinweis: Die Domain ändert sich gelegentlich – als DOMAIN-Konstante konfigurierbar.
"""

from __future__ import annotations

import re
from typing import Any, Callable, List, Optional
from urllib.parse import quote_plus

try:  # pragma: no cover
    import requests
except ImportError as exc:  # pragma: no cover
    requests = None
    REQUESTS_AVAILABLE = False
    REQUESTS_IMPORT_ERROR = exc
else:
    REQUESTS_AVAILABLE = True
    REQUESTS_IMPORT_ERROR = None

from plugin_interface import BasisPlugin

# ---------------------------------------------------------------------------
# Konstanten
# ---------------------------------------------------------------------------

DOMAIN = "hdfilme.garden"
BASE_URL = "https://" + DOMAIN
DEFAULT_TIMEOUT = 20

HEADERS = {
    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
    "Accept-Language": "de-DE,de;q=0.9,en;q=0.8",
    "Referer": BASE_URL + "/",
}

_URL_SEARCH = BASE_URL + "/index.php?do=search&subaction=search&story={query}"
_URL_NEW    = BASE_URL + "/kinofilme-online/"
_URL_SERIES = BASE_URL + "/serienstream-deutsch/"

# HTML-Parsing-Muster
_RE_ENTRIES = re.compile(
    r'<div class="box-product.*?href="([^"]+)[^>]*>([^<]+).*?data-src="([^"]+)',
    re.DOTALL,
)
_RE_EPISODES = re.compile(r'><a href="#">([^<]+)')
_RE_HOSTERS  = re.compile(r'link="([^"]+)"')
_RE_THUMB_STANDALONE = re.compile(r'data-src="([^"]+)"')

_SKIP_HOSTERS = {"youtube", "dropload"}

ProgressCallback = Optional[Callable[[str, Optional[int]], Any]]


# ---------------------------------------------------------------------------
# Plugin-Klasse
# ---------------------------------------------------------------------------

class HDFilmePlugin(BasisPlugin):
    """HDFilme Integration für ViewIT (hdfilme.garden)."""

    name = "HDFilme"

    def __init__(self) -> None:
        # title → Detail-Page-URL
        self._title_to_url: dict[str, str] = {}
        # title → (plot, poster, fanart)
        self._title_meta: dict[str, tuple[str, str, str]] = {}
        # title → True wenn Serie
        self._is_series: dict[str, bool] = {}

    # ------------------------------------------------------------------
    # Verfügbarkeit
    # ------------------------------------------------------------------

    @property
    def is_available(self) -> bool:
        return REQUESTS_AVAILABLE

    @property
    def unavailable_reason(self) -> str:
        if REQUESTS_AVAILABLE:
            return ""
        return f"requests nicht verfügbar: {REQUESTS_IMPORT_ERROR}"

    # ------------------------------------------------------------------
    # HTTP
    # ------------------------------------------------------------------

    def _get_session(self):  # type: ignore[return]
        from http_session_pool import get_requests_session
        return get_requests_session("hdfilme", headers=HEADERS)

    def _get_html(self, url: str) -> str:
        session = self._get_session()
        response = None
        try:
            response = session.get(url, headers=HEADERS, timeout=DEFAULT_TIMEOUT)
            response.raise_for_status()
            return response.text
        except Exception:
            return ""
        finally:
            if response is not None:
                try:
                    response.close()
                except Exception:
                    pass

    # ------------------------------------------------------------------
    # Interne Hilfsmethoden
    # ------------------------------------------------------------------

    def _parse_entries(self, html: str) -> List[str]:
        """Parst Ergebnisseite und cached Einträge. Gibt Titelliste zurück."""
        titles: list[str] = []
        for m in _RE_ENTRIES.finditer(html):
            raw_url, raw_title, raw_thumb = m.group(1), m.group(2), m.group(3)
            title = raw_title.strip()
            if not title:
                continue

            # Absolute URL sicherstellen
            url = raw_url.strip()
            if url.startswith("/"):
                url = BASE_URL + url
            if not url.startswith("http"):
                continue

            thumb = raw_thumb.strip()
            if thumb.startswith("/"):
                thumb = BASE_URL + thumb

            is_series = "taffel" in title  # "Staffel" (xStream-Konvention)
            self._title_to_url[title] = url
            self._is_series[title] = is_series
            self._title_meta[title] = ("", thumb, "")
            titles.append(title)
        return titles

    def _get_hoster_links(self, html: str, episode: str = "") -> List[str]:
        """Extrahiert Hoster-URLs aus HTML, optional nach Episode gefiltert."""
        search_area = html
        if episode:
            # Episode-Abschnitt isolieren
            m = re.search(re.escape(episode) + r"<.*?</ul>", html, re.DOTALL)
            if m:
                search_area = m.group(0)

        links: list[str] = []
        for m in _RE_HOSTERS.finditer(search_area):
            link = m.group(1).strip()
            if not link:
                continue
            if link.startswith("//"):
                link = "https:" + link
            name = link.split("//")[-1].split(".")[0].lower()
            if name in _SKIP_HOSTERS:
                continue
            links.append(link)
        return links

    # ------------------------------------------------------------------
    # Pflicht-Methoden
    # ------------------------------------------------------------------

    async def search_titles(
        self, query: str, progress_callback: ProgressCallback = None
    ) -> List[str]:
        query = (query or "").strip()
        if not query or not REQUESTS_AVAILABLE:
            return []
        url = _URL_SEARCH.format(query=quote_plus(query))
        html = self._get_html(url)
        if not html:
            return []
        # Suche filtert clientseitig nach Titel
        q_lower = query.lower()
        all_titles = self._parse_entries(html)
        return [t for t in all_titles if q_lower in t.lower()]

    def seasons_for(self, title: str) -> List[str]:
        title = (title or "").strip()
        if not title:
            return []
        if self._is_series.get(title):
            # Staffelnummer aus Titel ableiten, falls vorhanden
            m = re.search(r"Staffel\s*(\d+)", title, re.IGNORECASE)
            if m:
                return [f"Staffel {m.group(1)}"]
            return ["Staffel 1"]
        return ["Film"]

    def episodes_for(self, title: str, season: str) -> List[str]:
        title = (title or "").strip()
        if not title:
            return []

        if season == "Film":
            return [title]

        url = self._title_to_url.get(title, "")
        if not url:
            return []

        html = self._get_html(url)
        if not html:
            return [title]

        episodes = _RE_EPISODES.findall(html)
        return [ep.strip() for ep in episodes if ep.strip()] or [title]

    # ------------------------------------------------------------------
    # Stream
    # ------------------------------------------------------------------

    def stream_link_for(
        self, title: str, season: str, episode: str
    ) -> Optional[str]:
        title = (title or "").strip()
        url = self._title_to_url.get(title, "")
        if not url:
            return None

        html = self._get_html(url)
        if not html:
            return None

        # Für Serien: nach Episode-Abschnitt filtern (wenn episode != title)
        ep_filter = "" if (season == "Film" or episode == title) else episode
        links = self._get_hoster_links(html, ep_filter)
        return links[0] if links else None

    def resolve_stream_link(self, link: str) -> Optional[str]:
        link = (link or "").strip()
        if not link:
            return None
        try:
            from plugin_helpers import resolve_via_resolveurl
            return resolve_via_resolveurl(link, fallback_to_link=False)
        except Exception:
            return None

    # ------------------------------------------------------------------
    # Metadaten
    # ------------------------------------------------------------------

    def metadata_for(
        self, title: str
    ) -> tuple[dict[str, str], dict[str, str], list | None]:
        title = (title or "").strip()
        if not title:
            return {}, {}, None

        info: dict[str, str] = {"title": title}
        art: dict[str, str] = {}

        cached = self._title_meta.get(title)
        if cached:
            plot, poster, fanart = cached
            if plot:
                info["plot"] = plot
            if poster:
                art["thumb"] = poster
                art["poster"] = poster
            if fanart:
                art["fanart"] = fanart

        return info, art, None

    # ------------------------------------------------------------------
    # Browsing
    # ------------------------------------------------------------------

    def latest_titles(self, page: int = 1) -> List[str]:
        html = self._get_html(_URL_NEW)
        return self._parse_entries(html) if html else []

    def popular_series(self) -> List[str]:
        html = self._get_html(_URL_SERIES)
        return self._parse_entries(html) if html else []

    def capabilities(self) -> set[str]:
        return {"latest_titles", "popular_series"}