"""YouTube Plugin fuer ViewIT. Suche und Wiedergabe von YouTube-Videos via HTML-Scraping und yt-dlp. Benoetigt script.module.yt-dlp (optional). Video-Eintraege werden als "Titel||VIDEO_ID" kodiert. """ from __future__ import annotations import json import re from typing import Any, Callable, Dict, List, Optional, Set try: import requests except ImportError: requests = None # type: ignore from plugin_interface import BasisPlugin try: import xbmc # type: ignore def _log(msg: str) -> None: xbmc.log(f"[ViewIt][YouTube] {msg}", xbmc.LOGWARNING) except ImportError: def _log(msg: str) -> None: pass # --------------------------------------------------------------------------- # Konstanten # --------------------------------------------------------------------------- DEFAULT_TIMEOUT = 20 _SEP = "||" # Trennzeichen zwischen Titel und Video-ID BASE_URL = "https://www.youtube.com" HEADERS = { "User-Agent": ( "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" ), "Accept-Language": "de-DE,de;q=0.9,en;q=0.8", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", } ProgressCallback = Optional[Callable[[str, Optional[int]], Any]] # --------------------------------------------------------------------------- # Hilfsfunktionen # --------------------------------------------------------------------------- def _encode(title: str, video_id: str) -> str: return f"{title}{_SEP}{video_id}" def _decode_id(entry: str) -> Optional[str]: """Extrahiert Video-ID aus einem kodierten Eintrag.""" if _SEP in entry: return entry.split(_SEP, 1)[1].strip() # Fallback: 11-Zeichen YouTube-ID am Ende m = re.search(r"([A-Za-z0-9_-]{11})$", entry) return m.group(1) if m else None def _decode_title(entry: str) -> str: if _SEP in entry: return entry.split(_SEP, 1)[0].strip() return entry def _get_session() -> Any: try: from http_session_pool import get_requests_session return get_requests_session("youtube", headers=HEADERS) except Exception: if requests: s = requests.Session() s.headers.update(HEADERS) return s return None def _extract_yt_initial_data(html: str) -> Optional[dict]: """Extrahiert ytInitialData JSON aus dem HTML-Source.""" m = re.search(r"var ytInitialData\s*=\s*(\{.*?\});\s*(?:var |)", html, re.DOTALL) if not m: # Alternativer Pattern m = re.search(r"ytInitialData\s*=\s*(\{.+?\})\s*;", html, re.DOTALL) if not m: return None try: return json.loads(m.group(1)) except Exception: return None def _videos_from_search_data(data: dict) -> List[str]: """Extrahiert Video-Eintraege aus ytInitialData (Suchergebnisse).""" results: List[str] = [] try: contents = ( data .get("contents", {}) .get("twoColumnSearchResultsRenderer", {}) .get("primaryContents", {}) .get("sectionListRenderer", {}) .get("contents", []) ) for section in contents: items = ( section .get("itemSectionRenderer", {}) .get("contents", []) ) for item in items: vr = item.get("videoRenderer") or item.get("compactVideoRenderer") if not vr: continue video_id = vr.get("videoId", "").strip() if not video_id: continue title_runs = vr.get("title", {}).get("runs", []) title = "".join(r.get("text", "") for r in title_runs).strip() if not title: title = vr.get("title", {}).get("simpleText", "").strip() if title and video_id: results.append(_encode(title, video_id)) except Exception as exc: _log(f"[YouTube] _videos_from_search_data Fehler: {exc}") return results def _search_with_ytdlp(query: str, count: int = 20) -> List[str]: """Sucht YouTube-Videos via yt-dlp ytsearch-Extraktor.""" if not ensure_ytdlp_in_path(): return [] try: from yt_dlp import YoutubeDL # type: ignore except ImportError: return [] ydl_opts = {"quiet": True, "no_warnings": True, "extract_flat": True} try: with YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(f"ytsearch{count}:{query}", download=False) if not info: return [] return [ _encode(e["title"], e["id"]) for e in (info.get("entries") or []) if e.get("id") and e.get("title") ] except Exception as exc: _log(f"[YouTube] yt-dlp Suche Fehler: {exc}") return [] def _fetch_search_videos(url: str) -> List[str]: """Holt Videos von einer YouTube-Suche via ytInitialData.""" session = _get_session() if session is None: return [] try: resp = session.get(url, timeout=DEFAULT_TIMEOUT) resp.raise_for_status() data = _extract_yt_initial_data(resp.text) if not data: return [] return _videos_from_search_data(data) except Exception as exc: _log(f"[YouTube] _fetch_search_videos ({url}): {exc}") return [] from ytdlp_helper import ensure_ytdlp_in_path, resolve_youtube_url # --------------------------------------------------------------------------- # Plugin # --------------------------------------------------------------------------- class YoutubePlugin(BasisPlugin): name = "YouTube" _SEASONS = ["Stream"] def capabilities(self) -> Set[str]: return set() async def search_titles( self, query: str, progress_callback: ProgressCallback = None, ) -> List[str]: if not query.strip(): return [] # Primär: yt-dlp (robust, kein HTML-Scraping) results = _search_with_ytdlp(query) if results: return results # Fallback: HTML-Scraping if requests is None: return [] url = f"{BASE_URL}/results?search_query={requests.utils.quote(query)}" # type: ignore return _fetch_search_videos(url) def seasons_for(self, title: str) -> List[str]: return list(self._SEASONS) def episodes_for(self, title: str, season: str) -> List[str]: if season == "Stream": return [title] return [] def stream_link_for(self, title: str, season: str, episode: str) -> Optional[str]: video_id = _decode_id(episode) or _decode_id(title) if not video_id: return None return resolve_youtube_url(video_id) def resolve_stream_link(self, link: str) -> Optional[str]: return link # bereits direkte URL def metadata_for(self, title: str): """Thumbnail aus Video-ID ableiten.""" video_id = _decode_id(title) clean_title = _decode_title(title) info: Dict[str, str] = {"title": clean_title} art: Dict[str, str] = {} if video_id: art["thumb"] = f"https://i.ytimg.com/vi/{video_id}/hqdefault.jpg" art["poster"] = f"https://i.ytimg.com/vi/{video_id}/maxresdefault.jpg" return info, art, None Plugin = YoutubePlugin