Add Doku-Streams plugin and prefer source metadata
This commit is contained in:
476
addon/plugins/dokustreams_plugin.py
Normal file
476
addon/plugins/dokustreams_plugin.py
Normal file
@@ -0,0 +1,476 @@
|
||||
"""Doku-Streams (doku-streams.com) Integration."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
import re
|
||||
from urllib.parse import quote
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypeAlias
|
||||
|
||||
try: # pragma: no cover - optional dependency
|
||||
import requests
|
||||
from bs4 import BeautifulSoup # type: ignore[import-not-found]
|
||||
except ImportError as exc: # pragma: no cover - optional dependency
|
||||
requests = None
|
||||
BeautifulSoup = None
|
||||
REQUESTS_AVAILABLE = False
|
||||
REQUESTS_IMPORT_ERROR = exc
|
||||
else:
|
||||
REQUESTS_AVAILABLE = True
|
||||
REQUESTS_IMPORT_ERROR = None
|
||||
|
||||
from plugin_interface import BasisPlugin
|
||||
from plugin_helpers import dump_response_html, get_setting_bool, get_setting_string, log_error, log_url, notify_url
|
||||
from http_session_pool import get_requests_session
|
||||
|
||||
if TYPE_CHECKING: # pragma: no cover
|
||||
from requests import Session as RequestsSession
|
||||
from bs4 import BeautifulSoup as BeautifulSoupT # type: ignore[import-not-found]
|
||||
else: # pragma: no cover
|
||||
RequestsSession: TypeAlias = Any
|
||||
BeautifulSoupT: TypeAlias = Any
|
||||
|
||||
|
||||
ADDON_ID = "plugin.video.viewit"
|
||||
SETTING_BASE_URL = "doku_streams_base_url"
|
||||
DEFAULT_BASE_URL = "https://doku-streams.com"
|
||||
MOST_VIEWED_PATH = "/meistgesehene/"
|
||||
DEFAULT_TIMEOUT = 20
|
||||
GLOBAL_SETTING_LOG_URLS = "debug_log_urls"
|
||||
GLOBAL_SETTING_DUMP_HTML = "debug_dump_html"
|
||||
GLOBAL_SETTING_SHOW_URL_INFO = "debug_show_url_info"
|
||||
GLOBAL_SETTING_LOG_ERRORS = "debug_log_errors"
|
||||
SETTING_LOG_URLS = "log_urls_dokustreams"
|
||||
SETTING_DUMP_HTML = "dump_html_dokustreams"
|
||||
SETTING_SHOW_URL_INFO = "show_url_info_dokustreams"
|
||||
SETTING_LOG_ERRORS = "log_errors_dokustreams"
|
||||
HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (Kodi; ViewIt) AppleWebKit/537.36 (KHTML, like Gecko)",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "de-DE,de;q=0.9,en;q=0.8",
|
||||
"Connection": "keep-alive",
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SearchHit:
|
||||
title: str
|
||||
url: str
|
||||
plot: str = ""
|
||||
poster: str = ""
|
||||
|
||||
|
||||
def _extract_last_page(soup: BeautifulSoupT) -> int:
|
||||
max_page = 1
|
||||
if not soup:
|
||||
return max_page
|
||||
for anchor in soup.select("nav.navigation a[href], nav.pagination a[href], a.page-numbers[href]"):
|
||||
text = (anchor.get_text(" ", strip=True) or "").strip()
|
||||
for candidate in (text, (anchor.get("href") or "").strip()):
|
||||
for value in re.findall(r"/page/(\\d+)/", candidate):
|
||||
try:
|
||||
max_page = max(max_page, int(value))
|
||||
except Exception:
|
||||
continue
|
||||
for value in re.findall(r"(\\d+)", candidate):
|
||||
try:
|
||||
max_page = max(max_page, int(value))
|
||||
except Exception:
|
||||
continue
|
||||
return max_page
|
||||
|
||||
|
||||
def _extract_summary_and_poster(article: BeautifulSoupT) -> tuple[str, str]:
|
||||
summary = ""
|
||||
if article:
|
||||
summary_box = article.select_one("div.entry-summary")
|
||||
if summary_box is not None:
|
||||
for p in summary_box.find_all("p"):
|
||||
text = (p.get_text(" ", strip=True) or "").strip()
|
||||
if text:
|
||||
summary = text
|
||||
break
|
||||
poster = ""
|
||||
if article:
|
||||
img = article.select_one("div.entry-thumb img")
|
||||
if img is not None:
|
||||
poster = (img.get("data-src") or "").strip() or (img.get("src") or "").strip()
|
||||
if "lazy_placeholder" in poster and img.get("data-src"):
|
||||
poster = (img.get("data-src") or "").strip()
|
||||
poster = _absolute_url(poster)
|
||||
return summary, poster
|
||||
|
||||
|
||||
def _parse_listing_hits(soup: BeautifulSoupT, *, query: str = "") -> List[SearchHit]:
|
||||
hits: List[SearchHit] = []
|
||||
if not soup:
|
||||
return hits
|
||||
seen_titles: set[str] = set()
|
||||
seen_urls: set[str] = set()
|
||||
for article in soup.select("article[id^='post-']"):
|
||||
anchor = article.select_one("h2.entry-title a[href]")
|
||||
if anchor is None:
|
||||
continue
|
||||
href = (anchor.get("href") or "").strip()
|
||||
title = (anchor.get_text(" ", strip=True) or "").strip()
|
||||
if not href or not title:
|
||||
continue
|
||||
if query and not _matches_query(query, title=title):
|
||||
continue
|
||||
url = _absolute_url(href).split("#", 1)[0].split("?", 1)[0].rstrip("/")
|
||||
title_key = title.casefold()
|
||||
url_key = url.casefold()
|
||||
if title_key in seen_titles or url_key in seen_urls:
|
||||
continue
|
||||
seen_titles.add(title_key)
|
||||
seen_urls.add(url_key)
|
||||
_log_url_event(url, kind="PARSE")
|
||||
summary, poster = _extract_summary_and_poster(article)
|
||||
hits.append(SearchHit(title=title, url=url, plot=summary, poster=poster))
|
||||
return hits
|
||||
|
||||
|
||||
def _get_base_url() -> str:
|
||||
base = get_setting_string(ADDON_ID, SETTING_BASE_URL, default=DEFAULT_BASE_URL).strip()
|
||||
if not base:
|
||||
base = DEFAULT_BASE_URL
|
||||
return base.rstrip("/")
|
||||
|
||||
|
||||
def _absolute_url(url: str) -> str:
|
||||
url = (url or "").strip()
|
||||
if not url:
|
||||
return ""
|
||||
if url.startswith("http://") or url.startswith("https://"):
|
||||
return url
|
||||
if url.startswith("//"):
|
||||
return f"https:{url}"
|
||||
if url.startswith("/"):
|
||||
return f"{_get_base_url()}{url}"
|
||||
return f"{_get_base_url()}/{url.lstrip('/')}"
|
||||
|
||||
|
||||
def _normalize_search_text(value: str) -> str:
|
||||
value = (value or "").casefold()
|
||||
value = re.sub(r"[^a-z0-9]+", " ", value)
|
||||
value = re.sub(r"\s+", " ", value).strip()
|
||||
return value
|
||||
|
||||
|
||||
def _matches_query(query: str, *, title: str) -> bool:
|
||||
normalized_query = _normalize_search_text(query)
|
||||
if not normalized_query:
|
||||
return False
|
||||
haystack = f" {_normalize_search_text(title)} "
|
||||
return f" {normalized_query} " in haystack
|
||||
|
||||
|
||||
def _log_url_event(url: str, *, kind: str = "VISIT") -> None:
|
||||
log_url(
|
||||
ADDON_ID,
|
||||
enabled_setting_id=GLOBAL_SETTING_LOG_URLS,
|
||||
plugin_setting_id=SETTING_LOG_URLS,
|
||||
log_filename="dokustreams_urls.log",
|
||||
url=url,
|
||||
kind=kind,
|
||||
)
|
||||
|
||||
|
||||
def _log_visit(url: str) -> None:
|
||||
_log_url_event(url, kind="VISIT")
|
||||
notify_url(
|
||||
ADDON_ID,
|
||||
heading="Doku-Streams",
|
||||
url=url,
|
||||
enabled_setting_id=GLOBAL_SETTING_SHOW_URL_INFO,
|
||||
plugin_setting_id=SETTING_SHOW_URL_INFO,
|
||||
)
|
||||
|
||||
|
||||
def _log_response_html(url: str, body: str) -> None:
|
||||
dump_response_html(
|
||||
ADDON_ID,
|
||||
enabled_setting_id=GLOBAL_SETTING_DUMP_HTML,
|
||||
plugin_setting_id=SETTING_DUMP_HTML,
|
||||
url=url,
|
||||
body=body,
|
||||
filename_prefix="dokustreams_response",
|
||||
)
|
||||
|
||||
|
||||
def _log_error_message(message: str) -> None:
|
||||
log_error(
|
||||
ADDON_ID,
|
||||
enabled_setting_id=GLOBAL_SETTING_LOG_ERRORS,
|
||||
plugin_setting_id=SETTING_LOG_ERRORS,
|
||||
log_filename="dokustreams_errors.log",
|
||||
message=message,
|
||||
)
|
||||
|
||||
|
||||
def _get_soup(url: str, *, session: Optional[RequestsSession] = None) -> BeautifulSoupT:
|
||||
if requests is None or BeautifulSoup is None:
|
||||
raise RuntimeError("requests/bs4 sind nicht verfuegbar.")
|
||||
_log_visit(url)
|
||||
sess = session or get_requests_session("dokustreams", headers=HEADERS)
|
||||
try:
|
||||
response = sess.get(url, headers=HEADERS, timeout=DEFAULT_TIMEOUT)
|
||||
response.raise_for_status()
|
||||
except Exception as exc:
|
||||
_log_error_message(f"GET {url} failed: {exc}")
|
||||
raise
|
||||
if response.url and response.url != url:
|
||||
_log_url_event(response.url, kind="REDIRECT")
|
||||
_log_response_html(url, response.text)
|
||||
return BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
|
||||
class DokuStreamsPlugin(BasisPlugin):
|
||||
name = "Doku-Streams"
|
||||
version = "1.0.0"
|
||||
prefer_source_metadata = True
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._title_to_url: Dict[str, str] = {}
|
||||
self._category_to_url: Dict[str, str] = {}
|
||||
self._category_page_count_cache: Dict[str, int] = {}
|
||||
self._popular_cache: Optional[List[SearchHit]] = None
|
||||
self._title_meta: Dict[str, tuple[str, str]] = {}
|
||||
self._requests_available = REQUESTS_AVAILABLE
|
||||
self.is_available = True
|
||||
self.unavailable_reason: Optional[str] = None
|
||||
if not self._requests_available: # pragma: no cover - optional dependency
|
||||
self.is_available = False
|
||||
self.unavailable_reason = (
|
||||
"requests/bs4 fehlen. Installiere 'requests' und 'beautifulsoup4'."
|
||||
)
|
||||
if REQUESTS_IMPORT_ERROR:
|
||||
print(f"DokuStreamsPlugin Importfehler: {REQUESTS_IMPORT_ERROR}")
|
||||
|
||||
async def search_titles(self, query: str) -> List[str]:
|
||||
hits = self._search_hits(query)
|
||||
self._title_to_url = {hit.title: hit.url for hit in hits if hit.title and hit.url}
|
||||
for hit in hits:
|
||||
if hit.title:
|
||||
self._title_meta[hit.title] = (hit.plot, hit.poster)
|
||||
titles = [hit.title for hit in hits if hit.title]
|
||||
titles.sort(key=lambda value: value.casefold())
|
||||
return titles
|
||||
|
||||
def _search_hits(self, query: str) -> List[SearchHit]:
|
||||
query = (query or "").strip()
|
||||
if not query or not self._requests_available:
|
||||
return []
|
||||
search_url = _absolute_url(f"/?s={quote(query)}")
|
||||
session = get_requests_session("dokustreams", headers=HEADERS)
|
||||
try:
|
||||
soup = _get_soup(search_url, session=session)
|
||||
except Exception:
|
||||
return []
|
||||
return _parse_listing_hits(soup, query=query)
|
||||
|
||||
def capabilities(self) -> set[str]:
|
||||
return {"genres", "popular_series"}
|
||||
|
||||
def _categories_url(self) -> str:
|
||||
return _absolute_url("/kategorien/")
|
||||
|
||||
def _parse_categories(self, soup: BeautifulSoupT) -> Dict[str, str]:
|
||||
categories: Dict[str, str] = {}
|
||||
if not soup:
|
||||
return categories
|
||||
root = soup.select_one("ul.nested-category-list")
|
||||
if root is None:
|
||||
return categories
|
||||
|
||||
def clean_name(value: str) -> str:
|
||||
value = (value or "").strip()
|
||||
return re.sub(r"\\s*\\(\\d+\\)\\s*$", "", value).strip()
|
||||
|
||||
def walk(ul, parents: List[str]) -> None:
|
||||
for li in ul.find_all("li", recursive=False):
|
||||
anchor = li.find("a", href=True)
|
||||
if anchor is None:
|
||||
continue
|
||||
name = clean_name(anchor.get_text(" ", strip=True) or "")
|
||||
href = (anchor.get("href") or "").strip()
|
||||
if not name or not href:
|
||||
continue
|
||||
child_ul = li.find("ul", class_="nested-category-list")
|
||||
if child_ul is not None:
|
||||
walk(child_ul, parents + [name])
|
||||
else:
|
||||
if parents:
|
||||
label = " \u2192 ".join(parents + [name])
|
||||
categories[label] = _absolute_url(href)
|
||||
|
||||
walk(root, [])
|
||||
return categories
|
||||
|
||||
def _parse_top_categories(self, soup: BeautifulSoupT) -> Dict[str, str]:
|
||||
categories: Dict[str, str] = {}
|
||||
if not soup:
|
||||
return categories
|
||||
root = soup.select_one("ul.nested-category-list")
|
||||
if root is None:
|
||||
return categories
|
||||
for li in root.find_all("li", recursive=False):
|
||||
anchor = li.find("a", href=True)
|
||||
if anchor is None:
|
||||
continue
|
||||
name = (anchor.get_text(" ", strip=True) or "").strip()
|
||||
href = (anchor.get("href") or "").strip()
|
||||
if not name or not href:
|
||||
continue
|
||||
categories[name] = _absolute_url(href)
|
||||
return categories
|
||||
|
||||
def genres(self) -> List[str]:
|
||||
if not self._requests_available:
|
||||
return []
|
||||
if self._category_to_url:
|
||||
return sorted(self._category_to_url.keys(), key=lambda value: value.casefold())
|
||||
try:
|
||||
soup = _get_soup(self._categories_url(), session=get_requests_session("dokustreams", headers=HEADERS))
|
||||
except Exception:
|
||||
return []
|
||||
parsed = self._parse_categories(soup)
|
||||
if parsed:
|
||||
self._category_to_url = dict(parsed)
|
||||
return sorted(self._category_to_url.keys(), key=lambda value: value.casefold())
|
||||
|
||||
def categories(self) -> List[str]:
|
||||
if not self._requests_available:
|
||||
return []
|
||||
try:
|
||||
soup = _get_soup(self._categories_url(), session=get_requests_session("dokustreams", headers=HEADERS))
|
||||
except Exception:
|
||||
return []
|
||||
parsed = self._parse_top_categories(soup)
|
||||
if parsed:
|
||||
for key, value in parsed.items():
|
||||
self._category_to_url.setdefault(key, value)
|
||||
return list(parsed.keys())
|
||||
|
||||
def genre_page_count(self, genre: str) -> int:
|
||||
genre = (genre or "").strip()
|
||||
if not genre:
|
||||
return 1
|
||||
if genre in self._category_page_count_cache:
|
||||
return max(1, int(self._category_page_count_cache.get(genre, 1)))
|
||||
if not self._category_to_url:
|
||||
self.genres()
|
||||
base_url = self._category_to_url.get(genre, "")
|
||||
if not base_url:
|
||||
return 1
|
||||
try:
|
||||
soup = _get_soup(base_url, session=get_requests_session("dokustreams", headers=HEADERS))
|
||||
except Exception:
|
||||
return 1
|
||||
pages = _extract_last_page(soup)
|
||||
self._category_page_count_cache[genre] = max(1, pages)
|
||||
return self._category_page_count_cache[genre]
|
||||
|
||||
def titles_for_genre_page(self, genre: str, page: int) -> List[str]:
|
||||
genre = (genre or "").strip()
|
||||
if not genre or not self._requests_available:
|
||||
return []
|
||||
if not self._category_to_url:
|
||||
self.genres()
|
||||
base_url = self._category_to_url.get(genre, "")
|
||||
if not base_url:
|
||||
return []
|
||||
page = max(1, int(page or 1))
|
||||
url = base_url if page == 1 else f"{base_url.rstrip('/')}/page/{page}/"
|
||||
try:
|
||||
soup = _get_soup(url, session=get_requests_session("dokustreams", headers=HEADERS))
|
||||
except Exception:
|
||||
return []
|
||||
hits = _parse_listing_hits(soup)
|
||||
for hit in hits:
|
||||
if hit.title:
|
||||
self._title_meta[hit.title] = (hit.plot, hit.poster)
|
||||
titles = [hit.title for hit in hits if hit.title]
|
||||
self._title_to_url.update({hit.title: hit.url for hit in hits if hit.title and hit.url})
|
||||
return titles
|
||||
|
||||
def titles_for_genre(self, genre: str) -> List[str]:
|
||||
titles = self.titles_for_genre_page(genre, 1)
|
||||
titles.sort(key=lambda value: value.casefold())
|
||||
return titles
|
||||
|
||||
def _most_viewed_url(self) -> str:
|
||||
return _absolute_url(MOST_VIEWED_PATH)
|
||||
|
||||
def popular_series(self) -> List[str]:
|
||||
if not self._requests_available:
|
||||
return []
|
||||
if self._popular_cache is not None:
|
||||
titles = [hit.title for hit in self._popular_cache if hit.title]
|
||||
titles.sort(key=lambda value: value.casefold())
|
||||
return titles
|
||||
try:
|
||||
soup = _get_soup(self._most_viewed_url(), session=get_requests_session("dokustreams", headers=HEADERS))
|
||||
except Exception:
|
||||
return []
|
||||
hits = _parse_listing_hits(soup)
|
||||
self._popular_cache = list(hits)
|
||||
self._title_to_url.update({hit.title: hit.url for hit in hits if hit.title and hit.url})
|
||||
for hit in hits:
|
||||
if hit.title:
|
||||
self._title_meta[hit.title] = (hit.plot, hit.poster)
|
||||
titles = [hit.title for hit in hits if hit.title]
|
||||
titles.sort(key=lambda value: value.casefold())
|
||||
return titles
|
||||
|
||||
def metadata_for(self, title: str) -> tuple[dict[str, str], dict[str, str], list[object] | None]:
|
||||
title = (title or "").strip()
|
||||
if not title:
|
||||
return {}, {}, None
|
||||
plot, poster = self._title_meta.get(title, ("", ""))
|
||||
info: dict[str, str] = {"title": title}
|
||||
if plot:
|
||||
info["plot"] = plot
|
||||
art: dict[str, str] = {}
|
||||
if poster:
|
||||
art = {"thumb": poster, "poster": poster}
|
||||
return info, art, None
|
||||
|
||||
def seasons_for(self, title: str) -> List[str]:
|
||||
title = (title or "").strip()
|
||||
if not title or title not in self._title_to_url:
|
||||
return []
|
||||
return ["Stream"]
|
||||
|
||||
def episodes_for(self, title: str, season: str) -> List[str]:
|
||||
title = (title or "").strip()
|
||||
if not title or title not in self._title_to_url:
|
||||
return []
|
||||
return [title]
|
||||
|
||||
def stream_link_for(self, title: str, season: str, episode: str) -> Optional[str]:
|
||||
title = (title or "").strip()
|
||||
if not title:
|
||||
return None
|
||||
url = self._title_to_url.get(title)
|
||||
if not url:
|
||||
return None
|
||||
if not self._requests_available:
|
||||
return None
|
||||
try:
|
||||
soup = _get_soup(url, session=get_requests_session("dokustreams", headers=HEADERS))
|
||||
except Exception:
|
||||
return None
|
||||
iframe = soup.select_one("div.fluid-width-video-wrapper iframe[src]")
|
||||
if iframe is None:
|
||||
iframe = soup.select_one("iframe[src*='youtube'], iframe[src*='vimeo'], iframe[src]")
|
||||
if iframe is None:
|
||||
return None
|
||||
src = (iframe.get("src") or "").strip()
|
||||
if not src:
|
||||
return None
|
||||
return _absolute_url(src)
|
||||
|
||||
|
||||
# Alias für die automatische Plugin-Erkennung.
|
||||
Plugin = DokuStreamsPlugin
|
||||
Reference in New Issue
Block a user