ViewIT/addon/plugins/dokustreams_plugin.py

"""Doku-Streams (doku-streams.com) Integration."""

from __future__ import annotations

from dataclasses import dataclass
import re
from urllib.parse import quote
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional

try:  # pragma: no cover - optional dependency
    import requests
    from bs4 import BeautifulSoup  # type: ignore[import-not-found]
except ImportError as exc:  # pragma: no cover - optional dependency
    requests = None
    BeautifulSoup = None
    REQUESTS_AVAILABLE = False
    REQUESTS_IMPORT_ERROR = exc
else:
    REQUESTS_AVAILABLE = True
    REQUESTS_IMPORT_ERROR = None

from plugin_interface import BasisPlugin
from plugin_helpers import dump_response_html, get_setting_bool, get_setting_string, log_error, log_url, notify_url
from http_session_pool import get_requests_session

if TYPE_CHECKING:  # pragma: no cover
    from requests import Session as RequestsSession
    from bs4 import BeautifulSoup as BeautifulSoupT  # type: ignore[import-not-found]
else:  # pragma: no cover
    RequestsSession = Any
    BeautifulSoupT = Any


ADDON_ID = "plugin.video.viewit"
SETTING_BASE_URL = "doku_streams_base_url"
DEFAULT_BASE_URL = "https://doku-streams.com"
MOST_VIEWED_PATH = "/meistgesehene/"
DEFAULT_TIMEOUT = 20
GLOBAL_SETTING_LOG_URLS = "debug_log_urls"
GLOBAL_SETTING_DUMP_HTML = "debug_dump_html"
GLOBAL_SETTING_SHOW_URL_INFO = "debug_show_url_info"
GLOBAL_SETTING_LOG_ERRORS = "debug_log_errors"
SETTING_LOG_URLS = "log_urls_dokustreams"
SETTING_DUMP_HTML = "dump_html_dokustreams"
SETTING_SHOW_URL_INFO = "show_url_info_dokustreams"
SETTING_LOG_ERRORS = "log_errors_dokustreams"
ProgressCallback = Optional[Callable[[str, Optional[int]], Any]]


def _emit_progress(callback: ProgressCallback, message: str, percent: Optional[int] = None) -> None:
    if not callable(callback):
        return
    try:
        callback(str(message or ""), None if percent is None else int(percent))
    except Exception:
        return
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Kodi; ViewIt) AppleWebKit/537.36 (KHTML, like Gecko)",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
    "Accept-Language": "de-DE,de;q=0.9,en;q=0.8",
    "Connection": "keep-alive",
}


@dataclass(frozen=True)
class SearchHit:
    title: str
    url: str
    plot: str = ""
    poster: str = ""


def _extract_last_page(soup: BeautifulSoupT) -> int:
    max_page = 1
    if not soup:
        return max_page
    for anchor in soup.select("nav.navigation a[href], nav.pagination a[href], a.page-numbers[href]"):
        text = (anchor.get_text(" ", strip=True) or "").strip()
        for candidate in (text, (anchor.get("href") or "").strip()):
            for value in re.findall(r"/page/(\\d+)/", candidate):
                try:
                    max_page = max(max_page, int(value))
                except Exception:
                    continue
            for value in re.findall(r"(\\d+)", candidate):
                try:
                    max_page = max(max_page, int(value))
                except Exception:
                    continue
    return max_page


def _extract_summary_and_poster(article: BeautifulSoupT) -> tuple[str, str]:
    summary = ""
    if article:
        summary_box = article.select_one("div.entry-summary")
        if summary_box is not None:
            for p in summary_box.find_all("p"):
                text = (p.get_text(" ", strip=True) or "").strip()
                if text:
                    summary = text
                    break
    poster = ""
    if article:
        img = article.select_one("div.entry-thumb img")
        if img is not None:
            poster = (img.get("data-src") or "").strip() or (img.get("src") or "").strip()
            if "lazy_placeholder" in poster and img.get("data-src"):
                poster = (img.get("data-src") or "").strip()
            poster = _absolute_url(poster)
    return summary, poster


def _parse_listing_hits(soup: BeautifulSoupT, *, query: str = "") -> List[SearchHit]:
    hits: List[SearchHit] = []
    if not soup:
        return hits
    seen_titles: set[str] = set()
    seen_urls: set[str] = set()
    for article in soup.select("article[id^='post-']"):
        anchor = article.select_one("h2.entry-title a[href]")
        if anchor is None:
            continue
        href = (anchor.get("href") or "").strip()
        title = (anchor.get_text(" ", strip=True) or "").strip()
        if not href or not title:
            continue
        if query and not _matches_query(query, title=title):
            continue
        url = _absolute_url(href).split("#", 1)[0].split("?", 1)[0].rstrip("/")
        title_key = title.casefold()
        url_key = url.casefold()
        if title_key in seen_titles or url_key in seen_urls:
            continue
        seen_titles.add(title_key)
        seen_urls.add(url_key)
        _log_url_event(url, kind="PARSE")
        summary, poster = _extract_summary_and_poster(article)
        hits.append(SearchHit(title=title, url=url, plot=summary, poster=poster))
    return hits


def _get_base_url() -> str:
    base = get_setting_string(ADDON_ID, SETTING_BASE_URL, default=DEFAULT_BASE_URL).strip()
    if not base:
        base = DEFAULT_BASE_URL
    return base.rstrip("/")


def _absolute_url(url: str) -> str:
    url = (url or "").strip()
    if not url:
        return ""
    if url.startswith("http://") or url.startswith("https://"):
        return url
    if url.startswith("//"):
        return f"https:{url}"
    if url.startswith("/"):
        return f"{_get_base_url()}{url}"
    return f"{_get_base_url()}/{url.lstrip('/')}"


def _normalize_search_text(value: str) -> str:
    value = (value or "").casefold()
    value = re.sub(r"[^a-z0-9]+", " ", value)
    value = re.sub(r"\s+", " ", value).strip()
    return value


def _matches_query(query: str, *, title: str) -> bool:
    normalized_query = _normalize_search_text(query)
    if not normalized_query:
        return False
    haystack = f" {_normalize_search_text(title)} "
    return f" {normalized_query} " in haystack


def _log_url_event(url: str, *, kind: str = "VISIT") -> None:
    log_url(
        ADDON_ID,
        enabled_setting_id=GLOBAL_SETTING_LOG_URLS,
        plugin_setting_id=SETTING_LOG_URLS,
        log_filename="dokustreams_urls.log",
        url=url,
        kind=kind,
    )


def _log_visit(url: str) -> None:
    _log_url_event(url, kind="VISIT")
    notify_url(
        ADDON_ID,
        heading="Doku-Streams",
        url=url,
        enabled_setting_id=GLOBAL_SETTING_SHOW_URL_INFO,
        plugin_setting_id=SETTING_SHOW_URL_INFO,
    )


def _log_response_html(url: str, body: str) -> None:
    dump_response_html(
        ADDON_ID,
        enabled_setting_id=GLOBAL_SETTING_DUMP_HTML,
        plugin_setting_id=SETTING_DUMP_HTML,
        url=url,
        body=body,
        filename_prefix="dokustreams_response",
    )


def _log_error_message(message: str) -> None:
    log_error(
        ADDON_ID,
        enabled_setting_id=GLOBAL_SETTING_LOG_ERRORS,
        plugin_setting_id=SETTING_LOG_ERRORS,
        log_filename="dokustreams_errors.log",
        message=message,
    )


def _get_soup(url: str, *, session: Optional[RequestsSession] = None) -> BeautifulSoupT:
    if requests is None or BeautifulSoup is None:
        raise RuntimeError("requests/bs4 sind nicht verfuegbar.")
    _log_visit(url)
    sess = session or get_requests_session("dokustreams", headers=HEADERS)
    response = None
    try:
        response = sess.get(url, headers=HEADERS, timeout=DEFAULT_TIMEOUT)
        response.raise_for_status()
    except Exception as exc:
        _log_error_message(f"GET {url} failed: {exc}")
        raise
    try:
        final_url = (response.url or url) if response is not None else url
        body = (response.text or "") if response is not None else ""
        if final_url != url:
            _log_url_event(final_url, kind="REDIRECT")
        _log_response_html(url, body)
        return BeautifulSoup(body, "html.parser")
    finally:
        if response is not None:
            try:
                response.close()
            except Exception:
                pass


class DokuStreamsPlugin(BasisPlugin):
    name = "Doku-Streams"
    version = "1.0.0"
    prefer_source_metadata = True

    def __init__(self) -> None:
        self._title_to_url: Dict[str, str] = {}
        self._category_to_url: Dict[str, str] = {}
        self._category_page_count_cache: Dict[str, int] = {}
        self._popular_cache: Optional[List[SearchHit]] = None
        self._title_meta: Dict[str, tuple[str, str]] = {}
        self._requests_available = REQUESTS_AVAILABLE
        self.is_available = True
        self.unavailable_reason: Optional[str] = None
        if not self._requests_available:  # pragma: no cover - optional dependency
            self.is_available = False
            self.unavailable_reason = (
                "requests/bs4 fehlen. Installiere 'requests' und 'beautifulsoup4'."
            )
            if REQUESTS_IMPORT_ERROR:
                print(f"DokuStreamsPlugin Importfehler: {REQUESTS_IMPORT_ERROR}")

    async def search_titles(self, query: str, progress_callback: ProgressCallback = None) -> List[str]:
        _emit_progress(progress_callback, "Doku-Streams Suche", 15)
        hits = self._search_hits(query)
        _emit_progress(progress_callback, f"Treffer verarbeiten ({len(hits)})", 70)
        self._title_to_url = {hit.title: hit.url for hit in hits if hit.title and hit.url}
        for hit in hits:
            if hit.title:
                self._title_meta[hit.title] = (hit.plot, hit.poster)
        titles = [hit.title for hit in hits if hit.title]
        titles.sort(key=lambda value: value.casefold())
        _emit_progress(progress_callback, f"Fertig: {len(titles)} Treffer", 95)
        return titles

    def _search_hits(self, query: str) -> List[SearchHit]:
        query = (query or "").strip()
        if not query or not self._requests_available:
            return []
        search_url = _absolute_url(f"/?s={quote(query)}")
        session = get_requests_session("dokustreams", headers=HEADERS)
        try:
            soup = _get_soup(search_url, session=session)
        except Exception:
            return []
        return _parse_listing_hits(soup, query=query)

    def capabilities(self) -> set[str]:
        return {"genres", "popular_series"}

    def _categories_url(self) -> str:
        return _absolute_url("/kategorien/")

    def _parse_categories(self, soup: BeautifulSoupT) -> Dict[str, str]:
        categories: Dict[str, str] = {}
        if not soup:
            return categories
        root = soup.select_one("ul.nested-category-list")
        if root is None:
            return categories

        def clean_name(value: str) -> str:
            value = (value or "").strip()
            return re.sub(r"\\s*\\(\\d+\\)\\s*$", "", value).strip()

        def walk(ul, parents: List[str]) -> None:
            for li in ul.find_all("li", recursive=False):
                anchor = li.find("a", href=True)
                if anchor is None:
                    continue
                name = clean_name(anchor.get_text(" ", strip=True) or "")
                href = (anchor.get("href") or "").strip()
                if not name or not href:
                    continue
                child_ul = li.find("ul", class_="nested-category-list")
                if child_ul is not None:
                    walk(child_ul, parents + [name])
                else:
                    if parents:
                        label = " \u2192 ".join(parents + [name])
                        categories[label] = _absolute_url(href)

        walk(root, [])
        return categories

    def _parse_top_categories(self, soup: BeautifulSoupT) -> Dict[str, str]:
        categories: Dict[str, str] = {}
        if not soup:
            return categories
        root = soup.select_one("ul.nested-category-list")
        if root is None:
            return categories
        for li in root.find_all("li", recursive=False):
            anchor = li.find("a", href=True)
            if anchor is None:
                continue
            name = (anchor.get_text(" ", strip=True) or "").strip()
            href = (anchor.get("href") or "").strip()
            if not name or not href:
                continue
            categories[name] = _absolute_url(href)
        return categories

    def genres(self) -> List[str]:
        if not self._requests_available:
            return []
        if self._category_to_url:
            return sorted(self._category_to_url.keys(), key=lambda value: value.casefold())
        try:
            soup = _get_soup(self._categories_url(), session=get_requests_session("dokustreams", headers=HEADERS))
        except Exception:
            return []
        parsed = self._parse_categories(soup)
        if parsed:
            self._category_to_url = dict(parsed)
        return sorted(self._category_to_url.keys(), key=lambda value: value.casefold())

    def categories(self) -> List[str]:
        if not self._requests_available:
            return []
        try:
            soup = _get_soup(self._categories_url(), session=get_requests_session("dokustreams", headers=HEADERS))
        except Exception:
            return []
        parsed = self._parse_top_categories(soup)
        if parsed:
            for key, value in parsed.items():
                self._category_to_url.setdefault(key, value)
        return list(parsed.keys())

    def genre_page_count(self, genre: str) -> int:
        genre = (genre or "").strip()
        if not genre:
            return 1
        if genre in self._category_page_count_cache:
            return max(1, int(self._category_page_count_cache.get(genre, 1)))
        if not self._category_to_url:
            self.genres()
        base_url = self._category_to_url.get(genre, "")
        if not base_url:
            return 1
        try:
            soup = _get_soup(base_url, session=get_requests_session("dokustreams", headers=HEADERS))
        except Exception:
            return 1
        pages = _extract_last_page(soup)
        self._category_page_count_cache[genre] = max(1, pages)
        return self._category_page_count_cache[genre]

    def titles_for_genre_page(self, genre: str, page: int) -> List[str]:
        genre = (genre or "").strip()
        if not genre or not self._requests_available:
            return []
        if not self._category_to_url:
            self.genres()
        base_url = self._category_to_url.get(genre, "")
        if not base_url:
            return []
        page = max(1, int(page or 1))
        url = base_url if page == 1 else f"{base_url.rstrip('/')}/page/{page}/"
        try:
            soup = _get_soup(url, session=get_requests_session("dokustreams", headers=HEADERS))
        except Exception:
            return []
        hits = _parse_listing_hits(soup)
        for hit in hits:
            if hit.title:
                self._title_meta[hit.title] = (hit.plot, hit.poster)
        titles = [hit.title for hit in hits if hit.title]
        self._title_to_url.update({hit.title: hit.url for hit in hits if hit.title and hit.url})
        return titles

    def titles_for_genre(self, genre: str) -> List[str]:
        titles = self.titles_for_genre_page(genre, 1)
        titles.sort(key=lambda value: value.casefold())
        return titles

    def _most_viewed_url(self) -> str:
        return _absolute_url(MOST_VIEWED_PATH)

    def popular_series(self) -> List[str]:
        if not self._requests_available:
            return []
        if self._popular_cache is not None:
            titles = [hit.title for hit in self._popular_cache if hit.title]
            titles.sort(key=lambda value: value.casefold())
            return titles
        try:
            soup = _get_soup(self._most_viewed_url(), session=get_requests_session("dokustreams", headers=HEADERS))
        except Exception:
            return []
        hits = _parse_listing_hits(soup)
        self._popular_cache = list(hits)
        self._title_to_url.update({hit.title: hit.url for hit in hits if hit.title and hit.url})
        for hit in hits:
            if hit.title:
                self._title_meta[hit.title] = (hit.plot, hit.poster)
        titles = [hit.title for hit in hits if hit.title]
        titles.sort(key=lambda value: value.casefold())
        return titles

    def metadata_for(self, title: str) -> tuple[dict[str, str], dict[str, str], list[object] | None]:
        title = (title or "").strip()
        if not title:
            return {}, {}, None
        plot, poster = self._title_meta.get(title, ("", ""))
        info: dict[str, str] = {"title": title}
        if plot:
            info["plot"] = plot
        art: dict[str, str] = {}
        if poster:
            art = {"thumb": poster, "poster": poster}
        return info, art, None

    def seasons_for(self, title: str) -> List[str]:
        title = (title or "").strip()
        if not title or title not in self._title_to_url:
            return []
        return ["Stream"]

    def episodes_for(self, title: str, season: str) -> List[str]:
        title = (title or "").strip()
        if not title or title not in self._title_to_url:
            return []
        return [title]

    def stream_link_for(self, title: str, season: str, episode: str) -> Optional[str]:
        title = (title or "").strip()
        if not title:
            return None
        url = self._title_to_url.get(title)
        if not url:
            return None
        if not self._requests_available:
            return None
        try:
            soup = _get_soup(url, session=get_requests_session("dokustreams", headers=HEADERS))
        except Exception:
            return None
        iframe = soup.select_one("div.fluid-width-video-wrapper iframe[src]")
        if iframe is None:
            iframe = soup.select_one("iframe[src*='youtube'], iframe[src*='vimeo'], iframe[src]")
        if iframe is None:
            return None
        src = (iframe.get("src") or "").strip()
        if not src:
            return None
        return _absolute_url(src)


# Alias für die automatische Plugin-Erkennung.
Plugin = DokuStreamsPlugin