"""Doku-Streams (doku-streams.com) Integration.""" from __future__ import annotations from dataclasses import dataclass import re from urllib.parse import quote from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional try: # pragma: no cover - optional dependency import requests from bs4 import BeautifulSoup # type: ignore[import-not-found] except ImportError as exc: # pragma: no cover - optional dependency requests = None BeautifulSoup = None REQUESTS_AVAILABLE = False REQUESTS_IMPORT_ERROR = exc else: REQUESTS_AVAILABLE = True REQUESTS_IMPORT_ERROR = None from plugin_interface import BasisPlugin from plugin_helpers import dump_response_html, get_setting_bool, get_setting_string, log_error, log_url, notify_url from search_utils import matches_query as _shared_matches_query, normalize_search_text as _shared_normalize_search_text from http_session_pool import get_requests_session if TYPE_CHECKING: # pragma: no cover from requests import Session as RequestsSession from bs4 import BeautifulSoup as BeautifulSoupT # type: ignore[import-not-found] else: # pragma: no cover RequestsSession = Any BeautifulSoupT = Any ADDON_ID = "plugin.video.viewit" SETTING_BASE_URL = "doku_streams_base_url" DEFAULT_BASE_URL = "https://doku-streams.com" MOST_VIEWED_PATH = "/meistgesehene/" RANDOM_PATH = "/zufaellige-doku/" TAGS_BASE_PATH = "/tag/" DEFAULT_TIMEOUT = 20 GLOBAL_SETTING_LOG_URLS = "debug_log_urls" GLOBAL_SETTING_DUMP_HTML = "debug_dump_html" GLOBAL_SETTING_SHOW_URL_INFO = "debug_show_url_info" GLOBAL_SETTING_LOG_ERRORS = "debug_log_errors" SETTING_LOG_URLS = "log_urls_dokustreams" SETTING_DUMP_HTML = "dump_html_dokustreams" SETTING_SHOW_URL_INFO = "show_url_info_dokustreams" SETTING_LOG_ERRORS = "log_errors_dokustreams" ProgressCallback = Optional[Callable[[str, Optional[int]], Any]] def _emit_progress(callback: ProgressCallback, message: str, percent: Optional[int] = None) -> None: if not callable(callback): return try: callback(str(message or ""), None if percent is None else int(percent)) except Exception: return HEADERS = { "User-Agent": "Mozilla/5.0 (Kodi; ViewIt) AppleWebKit/537.36 (KHTML, like Gecko)", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "de-DE,de;q=0.9,en;q=0.8", "Connection": "keep-alive", } @dataclass(frozen=True) class SearchHit: title: str url: str plot: str = "" poster: str = "" def _extract_last_page(soup: BeautifulSoupT) -> int: max_page = 1 if not soup: return max_page for anchor in soup.select("nav.navigation a[href], nav.pagination a[href], a.page-numbers[href]"): text = (anchor.get_text(" ", strip=True) or "").strip() for candidate in (text, (anchor.get("href") or "").strip()): for value in re.findall(r"/page/(\d+)/", candidate): try: max_page = max(max_page, int(value)) except Exception: continue for value in re.findall(r"(\d+)", candidate): try: max_page = max(max_page, int(value)) except Exception: continue return max_page def _extract_summary_and_poster(article: BeautifulSoupT) -> tuple[str, str]: summary = "" if article: summary_box = article.select_one("div.entry-summary") if summary_box is not None: for p in summary_box.find_all("p"): text = (p.get_text(" ", strip=True) or "").strip() if text: summary = text break poster = "" if article: img = article.select_one("div.entry-thumb img") if img is not None: poster = (img.get("data-src") or "").strip() or (img.get("src") or "").strip() if "lazy_placeholder" in poster and img.get("data-src"): poster = (img.get("data-src") or "").strip() poster = _absolute_url(poster) return summary, poster def _parse_listing_hits(soup: BeautifulSoupT, *, query: str = "") -> List[SearchHit]: hits: List[SearchHit] = [] if not soup: return hits seen_titles: set[str] = set() seen_urls: set[str] = set() for article in soup.select("article[id^='post-']"): anchor = article.select_one("h2.entry-title a[href]") if anchor is None: continue href = (anchor.get("href") or "").strip() title = (anchor.get_text(" ", strip=True) or "").strip() if not href or not title: continue if query and not _matches_query(query, title=title): continue url = _absolute_url(href).split("#", 1)[0].split("?", 1)[0].rstrip("/") title_key = title.casefold() url_key = url.casefold() if title_key in seen_titles or url_key in seen_urls: continue seen_titles.add(title_key) seen_urls.add(url_key) _log_url_event(url, kind="PARSE") summary, poster = _extract_summary_and_poster(article) hits.append(SearchHit(title=title, url=url, plot=summary, poster=poster)) return hits def _get_base_url() -> str: base = get_setting_string(ADDON_ID, SETTING_BASE_URL, default=DEFAULT_BASE_URL).strip() if not base: base = DEFAULT_BASE_URL return base.rstrip("/") def _absolute_url(url: str) -> str: url = (url or "").strip() if not url: return "" if url.startswith("http://") or url.startswith("https://"): return url if url.startswith("//"): return f"https:{url}" if url.startswith("/"): return f"{_get_base_url()}{url}" return f"{_get_base_url()}/{url.lstrip('/')}" def _normalize_search_text(value: str) -> str: return _shared_normalize_search_text(value) def _matches_query(query: str, *, title: str) -> bool: return _shared_matches_query(query, title=title) def _log_url_event(url: str, *, kind: str = "VISIT") -> None: log_url( ADDON_ID, enabled_setting_id=GLOBAL_SETTING_LOG_URLS, plugin_setting_id=SETTING_LOG_URLS, log_filename="dokustreams_urls.log", url=url, kind=kind, ) def _log_visit(url: str) -> None: _log_url_event(url, kind="VISIT") notify_url( ADDON_ID, heading="Doku-Streams", url=url, enabled_setting_id=GLOBAL_SETTING_SHOW_URL_INFO, plugin_setting_id=SETTING_SHOW_URL_INFO, ) def _log_response_html(url: str, body: str) -> None: dump_response_html( ADDON_ID, enabled_setting_id=GLOBAL_SETTING_DUMP_HTML, plugin_setting_id=SETTING_DUMP_HTML, url=url, body=body, filename_prefix="dokustreams_response", ) def _log_error_message(message: str) -> None: log_error( ADDON_ID, enabled_setting_id=GLOBAL_SETTING_LOG_ERRORS, plugin_setting_id=SETTING_LOG_ERRORS, log_filename="dokustreams_errors.log", message=message, ) def _get_soup(url: str, *, session: Optional[RequestsSession] = None) -> BeautifulSoupT: if requests is None or BeautifulSoup is None: raise RuntimeError("requests/bs4 sind nicht verfuegbar.") _log_visit(url) sess = session or get_requests_session("dokustreams", headers=HEADERS) response = None try: response = sess.get(url, headers=HEADERS, timeout=DEFAULT_TIMEOUT) response.raise_for_status() except Exception as exc: _log_error_message(f"GET {url} failed: {exc}") raise try: final_url = (response.url or url) if response is not None else url body = (response.text or "") if response is not None else "" if final_url != url: _log_url_event(final_url, kind="REDIRECT") _log_response_html(url, body) return BeautifulSoup(body, "html.parser") finally: if response is not None: try: response.close() except Exception: pass class DokuStreamsPlugin(BasisPlugin): name = "Doku-Streams" version = "1.0.0" prefer_source_metadata = True def __init__(self) -> None: self._title_to_url: Dict[str, str] = {} self._category_to_url: Dict[str, str] = {} self._category_page_count_cache: Dict[str, int] = {} self._popular_cache: Optional[List[SearchHit]] = None self._title_meta: Dict[str, tuple[str, str]] = {} self._requests_available = REQUESTS_AVAILABLE self.is_available = True self.unavailable_reason: Optional[str] = None if not self._requests_available: # pragma: no cover - optional dependency self.is_available = False self.unavailable_reason = ( "requests/bs4 fehlen. Installiere 'requests' und 'beautifulsoup4'." ) if REQUESTS_IMPORT_ERROR: print(f"DokuStreamsPlugin Importfehler: {REQUESTS_IMPORT_ERROR}") async def search_titles(self, query: str, progress_callback: ProgressCallback = None) -> List[str]: _emit_progress(progress_callback, "Doku-Streams Suche", 15) hits = self._search_hits(query) _emit_progress(progress_callback, f"Treffer verarbeiten ({len(hits)})", 70) self._title_to_url = {hit.title: hit.url for hit in hits if hit.title and hit.url} for hit in hits: if hit.title: self._title_meta[hit.title] = (hit.plot, hit.poster) titles = [hit.title for hit in hits if hit.title] titles.sort(key=lambda value: value.casefold()) _emit_progress(progress_callback, f"Fertig: {len(titles)} Treffer", 95) return titles def _search_hits(self, query: str) -> List[SearchHit]: query = (query or "").strip() if not query or not self._requests_available: return [] search_url = _absolute_url(f"/?s={quote(query)}") session = get_requests_session("dokustreams", headers=HEADERS) try: soup = _get_soup(search_url, session=session) except Exception: return [] return _parse_listing_hits(soup) def capabilities(self) -> set[str]: return {"genres", "popular_series", "tags", "random"} def _categories_url(self) -> str: return _absolute_url("/kategorien/") def _parse_categories(self, soup: BeautifulSoupT) -> Dict[str, str]: categories: Dict[str, str] = {} if not soup: return categories root = soup.select_one("ul.nested-category-list") if root is None: return categories def clean_name(value: str) -> str: value = (value or "").strip() return re.sub(r"\s*\(\d+\)\s*$", "", value).strip() def walk(ul, parents: List[str]) -> None: for li in ul.find_all("li", recursive=False): anchor = li.find("a", href=True) if anchor is None: continue name = clean_name(anchor.get_text(" ", strip=True) or "") href = (anchor.get("href") or "").strip() if not name or not href: continue child_ul = li.find("ul", class_="nested-category-list") if child_ul is not None: walk(child_ul, parents + [name]) else: if parents: label = " \u2192 ".join(parents + [name]) categories[label] = _absolute_url(href) walk(root, []) return categories def _parse_top_categories(self, soup: BeautifulSoupT) -> Dict[str, str]: categories: Dict[str, str] = {} if not soup: return categories root = soup.select_one("ul.nested-category-list") if root is None: return categories for li in root.find_all("li", recursive=False): anchor = li.find("a", href=True) if anchor is None: continue name = (anchor.get_text(" ", strip=True) or "").strip() href = (anchor.get("href") or "").strip() if not name or not href: continue categories[name] = _absolute_url(href) return categories def genres(self) -> List[str]: if not self._requests_available: return [] if self._category_to_url: return sorted(self._category_to_url.keys(), key=lambda value: value.casefold()) try: soup = _get_soup(self._categories_url(), session=get_requests_session("dokustreams", headers=HEADERS)) except Exception: return [] parsed = self._parse_categories(soup) if parsed: self._category_to_url = dict(parsed) return sorted(self._category_to_url.keys(), key=lambda value: value.casefold()) def categories(self) -> List[str]: if not self._requests_available: return [] try: soup = _get_soup(self._categories_url(), session=get_requests_session("dokustreams", headers=HEADERS)) except Exception: return [] parsed = self._parse_top_categories(soup) if parsed: for key, value in parsed.items(): self._category_to_url.setdefault(key, value) return list(parsed.keys()) def genre_page_count(self, genre: str) -> int: genre = (genre or "").strip() if not genre: return 1 if genre in self._category_page_count_cache: return max(1, int(self._category_page_count_cache.get(genre, 1))) if not self._category_to_url: self.genres() base_url = self._category_to_url.get(genre, "") if not base_url: return 1 try: soup = _get_soup(base_url, session=get_requests_session("dokustreams", headers=HEADERS)) except Exception: return 1 pages = _extract_last_page(soup) self._category_page_count_cache[genre] = max(1, pages) return self._category_page_count_cache[genre] def titles_for_genre_page(self, genre: str, page: int) -> List[str]: genre = (genre or "").strip() if not genre or not self._requests_available: return [] if not self._category_to_url: self.genres() base_url = self._category_to_url.get(genre, "") if not base_url: return [] page = max(1, int(page or 1)) url = base_url if page == 1 else f"{base_url.rstrip('/')}/page/{page}/" try: soup = _get_soup(url, session=get_requests_session("dokustreams", headers=HEADERS)) except Exception: return [] hits = _parse_listing_hits(soup) for hit in hits: if hit.title: self._title_meta[hit.title] = (hit.plot, hit.poster) titles = [hit.title for hit in hits if hit.title] self._title_to_url.update({hit.title: hit.url for hit in hits if hit.title and hit.url}) return titles def titles_for_genre(self, genre: str) -> List[str]: titles = self.titles_for_genre_page(genre, 1) titles.sort(key=lambda value: value.casefold()) return titles def _most_viewed_url(self) -> str: return _absolute_url(MOST_VIEWED_PATH) def popular_series(self) -> List[str]: if not self._requests_available: return [] if self._popular_cache is not None: titles = [hit.title for hit in self._popular_cache if hit.title] titles.sort(key=lambda value: value.casefold()) return titles try: soup = _get_soup(self._most_viewed_url(), session=get_requests_session("dokustreams", headers=HEADERS)) except Exception: return [] hits = _parse_listing_hits(soup) self._popular_cache = list(hits) self._title_to_url.update({hit.title: hit.url for hit in hits if hit.title and hit.url}) for hit in hits: if hit.title: self._title_meta[hit.title] = (hit.plot, hit.poster) titles = [hit.title for hit in hits if hit.title] titles.sort(key=lambda value: value.casefold()) return titles def metadata_for(self, title: str) -> tuple[dict[str, str], dict[str, str], list[object] | None]: title = (title or "").strip() if not title: return {}, {}, None plot, poster = self._title_meta.get(title, ("", "")) info: dict[str, str] = {"title": title} if plot: info["plot"] = plot art: dict[str, str] = {} if poster: art = {"thumb": poster, "poster": poster} return info, art, None def series_url_for_title(self, title: str) -> Optional[str]: return self._title_to_url.get((title or "").strip()) def remember_series_url(self, title: str, url: str) -> None: title = (title or "").strip() url = (url or "").strip() if title and url: self._title_to_url[title] = url def seasons_for(self, title: str) -> List[str]: title = (title or "").strip() if not title: return [] return ["Stream"] def episodes_for(self, title: str, season: str) -> List[str]: title = (title or "").strip() if not title: return [] return [title] def tags(self) -> List[str]: """Liefert Schlagworte/Tags von der Startseite.""" if not self._requests_available: return [] try: soup = _get_soup(_absolute_url("/"), session=get_requests_session("dokustreams", headers=HEADERS)) except Exception: return [] tag_list: list[str] = [] for anchor in soup.select("a[href*='/tag/']"): name = (anchor.get_text(" ", strip=True) or "").strip() href = (anchor.get("href") or "").strip() if name and TAGS_BASE_PATH in href and name not in tag_list: tag_list.append(name) return sorted(tag_list, key=lambda t: t.casefold()) def titles_for_tag(self, tag: str, page: int = 1) -> List[str]: """Liefert Titel zu einem Schlagwort.""" tag = (tag or "").strip() if not tag or not self._requests_available: return [] page = max(1, int(page or 1)) slug = tag.lower().replace(" ", "-") base = _absolute_url(f"{TAGS_BASE_PATH}{slug}/") url = base if page == 1 else f"{base}page/{page}/" try: soup = _get_soup(url, session=get_requests_session("dokustreams", headers=HEADERS)) except Exception: return [] hits = _parse_listing_hits(soup) self._title_to_url.update({hit.title: hit.url for hit in hits if hit.title and hit.url}) for hit in hits: if hit.title: self._title_meta[hit.title] = (hit.plot, hit.poster) return [hit.title for hit in hits if hit.title] def random_title(self) -> Optional[str]: """Liefert einen zufaelligen Doku-Titel via Redirect.""" if not self._requests_available: return None try: session = get_requests_session("dokustreams", headers=HEADERS) resp = session.get(_absolute_url(RANDOM_PATH), headers=HEADERS, timeout=DEFAULT_TIMEOUT, allow_redirects=True) resp.raise_for_status() final_url = (resp.url or "").strip() if not final_url or final_url.rstrip("/").endswith(RANDOM_PATH.rstrip("/")): return None soup = _get_soup(final_url, session=session) hits = _parse_listing_hits(soup) if not hits: # Einzelseite: Titel aus H1 oder og:title lesen h1 = soup.select_one("h1.entry-title, h1") title = (h1.get_text(" ", strip=True) if h1 else "").strip() if title: self._title_to_url[title] = final_url return title return None hit = hits[0] if hit.title: self._title_to_url[hit.title] = hit.url return hit.title except Exception: return None return None def resolve_stream_link(self, link: str) -> Optional[str]: """Folgt Redirects und versucht ResolveURL fuer Hoster-Links.""" if not link: return None # YouTube-URLs via yt-dlp aufloesen from ytdlp_helper import extract_youtube_id, resolve_youtube_url yt_id = extract_youtube_id(link) if yt_id: resolved = resolve_youtube_url(yt_id) if resolved: return resolved return None from plugin_helpers import resolve_via_resolveurl resolved = resolve_via_resolveurl(link, fallback_to_link=False) if resolved: return resolved if self._requests_available: try: session = get_requests_session("dokustreams", headers=HEADERS) resp = session.get(link, headers=HEADERS, timeout=DEFAULT_TIMEOUT, allow_redirects=True) resp.raise_for_status() return (resp.url or link).strip() or link except Exception: pass return link def stream_link_for(self, title: str, season: str, episode: str) -> Optional[str]: title = (title or "").strip() if not title: return None url = self._title_to_url.get(title) if not url: return None if not self._requests_available: return None try: soup = _get_soup(url, session=get_requests_session("dokustreams", headers=HEADERS)) except Exception: return None iframe = soup.select_one("div.fluid-width-video-wrapper iframe[src]") if iframe is None: iframe = soup.select_one("iframe[src*='youtube'], iframe[src*='vimeo'], iframe[src]") if iframe is None: return None src = (iframe.get("src") or "").strip() if not src: return None return _absolute_url(src) # Alias für die automatische Plugin-Erkennung. Plugin = DokuStreamsPlugin