From 1e3c6ffdf67d1795ebdee9af8b45495885daddf5 Mon Sep 17 00:00:00 2001 From: "itdrui.de" Date: Sun, 1 Feb 2026 23:14:10 +0100 Subject: [PATCH] Refine title search to whole-word matching and bump 0.1.49 --- addon/addon.xml | 2 +- addon/plugins/aniworld_plugin.py | 322 ++++++++++++++++++++++++-- addon/plugins/serienstream_plugin.py | 14 +- addon/plugins/topstreamfilm_plugin.py | 6 +- 4 files changed, 318 insertions(+), 26 deletions(-) diff --git a/addon/addon.xml b/addon/addon.xml index eb36d17..9785181 100644 --- a/addon/addon.xml +++ b/addon/addon.xml @@ -1,5 +1,5 @@ - + diff --git a/addon/plugins/aniworld_plugin.py b/addon/plugins/aniworld_plugin.py index 6b852f5..9749478 100644 --- a/addon/plugins/aniworld_plugin.py +++ b/addon/plugins/aniworld_plugin.py @@ -8,7 +8,11 @@ Dieses Plugin ist weitgehend kompatibel zur Serienstream-Integration: from __future__ import annotations from dataclasses import dataclass +from html import unescape +import hashlib +import json import re +import time from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, TypeAlias try: # pragma: no cover - optional dependency @@ -25,8 +29,10 @@ else: try: # pragma: no cover - optional Kodi helpers import xbmcaddon # type: ignore[import-not-found] + import xbmcgui # type: ignore[import-not-found] except ImportError: # pragma: no cover - allow running outside Kodi xbmcaddon = None + xbmcgui = None from plugin_interface import BasisPlugin from plugin_helpers import dump_response_html, get_setting_bool, get_setting_string, log_error, log_url, notify_url @@ -60,6 +66,9 @@ HEADERS = { "Accept-Language": "de-DE,de;q=0.9,en;q=0.8", "Connection": "keep-alive", } +SESSION_CACHE_TTL_SECONDS = 300 +SESSION_CACHE_PREFIX = "viewit.aniworld" +SESSION_CACHE_MAX_TITLE_URLS = 800 @dataclass @@ -128,6 +137,67 @@ def _absolute_url(href: str) -> str: return f"{_get_base_url()}{href}" if href.startswith("/") else href +def _session_window() -> Any: + if xbmcgui is None: + return None + try: + return xbmcgui.Window(10000) + except Exception: + return None + + +def _session_cache_key(name: str) -> str: + base_hash = hashlib.sha1(_get_base_url().encode("utf-8")).hexdigest()[:12] + return f"{SESSION_CACHE_PREFIX}.{base_hash}.{name}" + + +def _session_cache_get(name: str) -> Any: + window = _session_window() + if window is None: + return None + raw = "" + try: + raw = window.getProperty(_session_cache_key(name)) or "" + except Exception: + return None + if not raw: + return None + try: + payload = json.loads(raw) + except Exception: + return None + if not isinstance(payload, dict): + return None + expires_at = payload.get("expires_at") + data = payload.get("data") + try: + if float(expires_at or 0) <= time.time(): + return None + except Exception: + return None + return data + + +def _session_cache_set(name: str, data: Any, *, ttl_seconds: int = SESSION_CACHE_TTL_SECONDS) -> None: + window = _session_window() + if window is None: + return + payload = { + "expires_at": float(time.time() + max(1, int(ttl_seconds))), + "data": data, + } + try: + raw = json.dumps(payload, ensure_ascii=False, separators=(",", ":")) + except Exception: + return + if len(raw) > 240_000: + return + try: + window.setProperty(_session_cache_key(name), raw) + except Exception: + return + + def _log_url(url: str, *, kind: str = "VISIT") -> None: log_url( ADDON_ID, @@ -192,10 +262,8 @@ def _matches_query(query: str, *, title: str) -> bool: normalized_query = _normalize_search_text(query) if not normalized_query: return False - haystack = _normalize_search_text(title) - if not haystack: - return False - return normalized_query in haystack + haystack = f" {_normalize_search_text(title)} " + return f" {normalized_query} " in haystack def _ensure_requests() -> None: @@ -235,7 +303,7 @@ def _get_soup(url: str, *, session: Optional[RequestsSession] = None) -> Beautif return BeautifulSoup(response.text, "html.parser") -def _get_soup_simple(url: str) -> BeautifulSoupT: +def _get_html_simple(url: str) -> str: _ensure_requests() _log_visit(url) sess = get_requests_session("aniworld", headers=HEADERS) @@ -247,10 +315,36 @@ def _get_soup_simple(url: str) -> BeautifulSoupT: raise if response.url and response.url != url: _log_url(response.url, kind="REDIRECT") - _log_response_html(url, response.text) - if _looks_like_cloudflare_challenge(response.text): + body = response.text + _log_response_html(url, body) + if _looks_like_cloudflare_challenge(body): raise RuntimeError("Cloudflare-Schutz erkannt. requests reicht ggf. nicht aus.") - return BeautifulSoup(response.text, "html.parser") + return body + + +def _get_soup_simple(url: str) -> BeautifulSoupT: + body = _get_html_simple(url) + return BeautifulSoup(body, "html.parser") + + +def _extract_genre_names_from_html(body: str) -> List[str]: + names: List[str] = [] + seen: set[str] = set() + pattern = re.compile( + r"]*class=[\"'][^\"']*seriesGenreList[^\"']*[\"'][^>]*>.*?]*>(.*?)", + re.IGNORECASE | re.DOTALL, + ) + for match in pattern.finditer(body or ""): + text = re.sub(r"<[^>]+>", " ", match.group(1) or "") + text = unescape(re.sub(r"\s+", " ", text)).strip() + if not text: + continue + key = text.casefold() + if key in seen: + continue + seen.add(key) + names.append(text) + return names def _post_json(url: str, *, payload: Dict[str, str], session: Optional[RequestsSession] = None) -> Any: @@ -600,6 +694,8 @@ class AniworldPlugin(BasisPlugin): def __init__(self) -> None: self._anime_results: Dict[str, SeriesResult] = {} + self._title_url_cache: Dict[str, str] = self._load_title_url_cache() + self._genre_names_cache: Optional[List[str]] = None self._season_cache: Dict[str, List[SeasonInfo]] = {} self._season_links_cache: Dict[str, List[SeasonInfo]] = {} self._episode_label_cache: Dict[Tuple[str, str], Dict[str, EpisodeInfo]] = {} @@ -619,6 +715,132 @@ class AniworldPlugin(BasisPlugin): if REQUESTS_IMPORT_ERROR: print(f"AniworldPlugin Importfehler: {REQUESTS_IMPORT_ERROR}") + def _load_title_url_cache(self) -> Dict[str, str]: + raw = _session_cache_get("title_urls") + if not isinstance(raw, dict): + return {} + result: Dict[str, str] = {} + for key, value in raw.items(): + key_text = str(key or "").strip().casefold() + url_text = str(value or "").strip() + if not key_text or not url_text: + continue + result[key_text] = url_text + return result + + def _save_title_url_cache(self) -> None: + if not self._title_url_cache: + return + while len(self._title_url_cache) > SESSION_CACHE_MAX_TITLE_URLS: + self._title_url_cache.pop(next(iter(self._title_url_cache))) + _session_cache_set("title_urls", self._title_url_cache) + + def _remember_anime_result( + self, + title: str, + url: str, + description: str = "", + *, + persist: bool = True, + ) -> bool: + title = (title or "").strip() + url = (url or "").strip() + if not title: + return False + changed = False + current = self._anime_results.get(title) + if current is None or (url and current.url != url) or (description and current.description != description): + self._anime_results[title] = SeriesResult(title=title, description=description, url=url) + changed = True + if url: + key = title.casefold() + if self._title_url_cache.get(key) != url: + self._title_url_cache[key] = url + changed = True + if changed and persist: + self._save_title_url_cache() + return changed + + @staticmethod + def _season_links_cache_name(series_url: str) -> str: + digest = hashlib.sha1((series_url or "").encode("utf-8")).hexdigest()[:20] + return f"season_links.{digest}" + + @staticmethod + def _season_episodes_cache_name(season_url: str) -> str: + digest = hashlib.sha1((season_url or "").encode("utf-8")).hexdigest()[:20] + return f"season_episodes.{digest}" + + def _load_session_season_links(self, series_url: str) -> Optional[List[SeasonInfo]]: + raw = _session_cache_get(self._season_links_cache_name(series_url)) + if not isinstance(raw, list): + return None + seasons: List[SeasonInfo] = [] + for item in raw: + if not isinstance(item, dict): + continue + try: + number = int(item.get("number")) + except Exception: + continue + url = str(item.get("url") or "").strip() + if number <= 0 or not url: + continue + seasons.append(SeasonInfo(number=number, url=url, episodes=[])) + if not seasons: + return None + seasons.sort(key=lambda s: s.number) + return seasons + + def _save_session_season_links(self, series_url: str, seasons: List[SeasonInfo]) -> None: + payload = [{"number": int(season.number), "url": season.url} for season in seasons if season.url] + if payload: + _session_cache_set(self._season_links_cache_name(series_url), payload) + + def _load_session_season_episodes(self, season_url: str) -> Optional[List[EpisodeInfo]]: + raw = _session_cache_get(self._season_episodes_cache_name(season_url)) + if not isinstance(raw, list): + return None + episodes: List[EpisodeInfo] = [] + for item in raw: + if not isinstance(item, dict): + continue + try: + number = int(item.get("number")) + except Exception: + continue + title = str(item.get("title") or "").strip() + original_title = str(item.get("original_title") or "").strip() + url = str(item.get("url") or "").strip() + if number <= 0: + continue + episodes.append( + EpisodeInfo( + number=number, + title=title or f"Episode {number}", + original_title=original_title, + url=url, + ) + ) + if not episodes: + return None + episodes.sort(key=lambda item: item.number) + return episodes + + def _save_session_season_episodes(self, season_url: str, episodes: List[EpisodeInfo]) -> None: + payload = [] + for item in episodes: + payload.append( + { + "number": int(item.number), + "title": item.title, + "original_title": item.original_title, + "url": item.url, + } + ) + if payload: + _session_cache_set(self._season_episodes_cache_name(season_url), payload) + def capabilities(self) -> set[str]: return {"popular_series", "genres", "latest_episodes"} @@ -633,6 +855,12 @@ class AniworldPlugin(BasisPlugin): wanted = title.casefold().strip() + cached_url = self._title_url_cache.get(wanted, "") + if cached_url: + result = SeriesResult(title=title, description="", url=cached_url) + self._anime_results[title] = result + return result + for candidate in self._anime_results.values(): if candidate.title and candidate.title.casefold().strip() == wanted: return candidate @@ -640,7 +868,7 @@ class AniworldPlugin(BasisPlugin): try: for entry in self._ensure_popular(): if entry.title and entry.title.casefold().strip() == wanted: - self._anime_results[entry.title] = entry + self._remember_anime_result(entry.title, entry.url, entry.description) return entry except Exception: pass @@ -649,7 +877,7 @@ class AniworldPlugin(BasisPlugin): for entries in self._ensure_genres().values(): for entry in entries: if entry.title and entry.title.casefold().strip() == wanted: - self._anime_results[entry.title] = entry + self._remember_anime_result(entry.title, entry.url, entry.description) return entry except Exception: pass @@ -657,7 +885,7 @@ class AniworldPlugin(BasisPlugin): try: for entry in search_animes(title): if entry.title and entry.title.casefold().strip() == wanted: - self._anime_results[entry.title] = entry + self._remember_anime_result(entry.title, entry.url, entry.description) return entry except Exception: pass @@ -669,6 +897,7 @@ class AniworldPlugin(BasisPlugin): return list(self._popular_cache) soup = _get_soup_simple(_popular_animes_url()) results: List[SeriesResult] = [] + cache_dirty = False seen: set[str] = set() for anchor in soup.select("div.seriesListContainer a[href^='/anime/stream/']"): href = (anchor.get("href") or "").strip() @@ -690,6 +919,9 @@ class AniworldPlugin(BasisPlugin): continue seen.add(key) results.append(SeriesResult(title=title, description=description, url=url)) + cache_dirty = self._remember_anime_result(title, url, description, persist=False) or cache_dirty + if cache_dirty: + self._save_title_url_cache() self._popular_cache = list(results) return list(results) @@ -697,7 +929,11 @@ class AniworldPlugin(BasisPlugin): if not self._requests_available: return [] entries = self._ensure_popular() - self._anime_results.update({entry.title: entry for entry in entries if entry.title}) + cache_dirty = False + for entry in entries: + cache_dirty = self._remember_anime_result(entry.title, entry.url, entry.description, persist=False) or cache_dirty + if cache_dirty: + self._save_title_url_cache() return [entry.title for entry in entries if entry.title] def latest_episodes(self, page: int = 1) -> List[LatestEpisode]: @@ -727,6 +963,7 @@ class AniworldPlugin(BasisPlugin): return {key: list(value) for key, value in self._genre_cache.items()} soup = _get_soup_simple(_genres_url()) results: Dict[str, List[SeriesResult]] = {} + cache_dirty = False genre_blocks = soup.select("#seriesContainer div.genre") if not genre_blocks: genre_blocks = soup.select("div.genre") @@ -752,9 +989,14 @@ class AniworldPlugin(BasisPlugin): continue seen.add(key) entries.append(SeriesResult(title=title, description="", url=url)) + cache_dirty = self._remember_anime_result(title, url, persist=False) or cache_dirty if entries: results[genre_name] = entries + if cache_dirty: + self._save_title_url_cache() self._genre_cache = {key: list(value) for key, value in results.items()} + self._genre_names_cache = sorted(self._genre_cache.keys(), key=str.casefold) + _session_cache_set("genres", self._genre_names_cache) # Für spätere Auflösung (Seasons/Episoden) die Titel->URL Zuordnung auffüllen. for entries in results.values(): for entry in entries: @@ -764,11 +1006,31 @@ class AniworldPlugin(BasisPlugin): self._anime_results[entry.title] = entry return {key: list(value) for key, value in results.items()} + def _ensure_genre_names(self) -> List[str]: + if self._genre_names_cache is not None: + return list(self._genre_names_cache) + cached = _session_cache_get("genres") + if isinstance(cached, list): + names = [str(value).strip() for value in cached if str(value).strip()] + if names: + self._genre_names_cache = sorted(set(names), key=str.casefold) + return list(self._genre_names_cache) + try: + body = _get_html_simple(_genres_url()) + names = _extract_genre_names_from_html(body) + except Exception: + names = [] + if not names: + mapping = self._ensure_genres() + names = list(mapping.keys()) + self._genre_names_cache = sorted({name for name in names if name}, key=str.casefold) + _session_cache_set("genres", self._genre_names_cache) + return list(self._genre_names_cache) + def genres(self) -> List[str]: if not self._requests_available: return [] - genres = list(self._ensure_genres().keys()) - return [g for g in genres if g] + return self._ensure_genre_names() def titles_for_genre(self, genre: str) -> List[str]: genre = (genre or "").strip() @@ -785,7 +1047,11 @@ class AniworldPlugin(BasisPlugin): if not entries: return [] # Zusätzlich sicherstellen, dass die Titel im Cache sind. - self._anime_results.update({entry.title: entry for entry in entries if entry.title and entry.title not in self._anime_results}) + cache_dirty = False + for entry in entries: + cache_dirty = self._remember_anime_result(entry.title, entry.url, entry.description, persist=False) or cache_dirty + if cache_dirty: + self._save_title_url_cache() return [entry.title for entry in entries if entry.title] def _season_label(self, number: int) -> str: @@ -810,7 +1076,7 @@ class AniworldPlugin(BasisPlugin): series_url = (series_url or "").strip() if not title or not series_url: return - self._anime_results[title] = SeriesResult(title=title, description="", url=series_url) + self._remember_anime_result(title, series_url) def series_url_for_title(self, title: str) -> str: title = (title or "").strip() @@ -820,6 +1086,9 @@ class AniworldPlugin(BasisPlugin): if direct and direct.url: return direct.url wanted = title.casefold().strip() + cached_url = self._title_url_cache.get(wanted, "") + if cached_url: + return cached_url for candidate in self._anime_results.values(): if candidate.title and candidate.title.casefold().strip() == wanted and candidate.url: return candidate.url @@ -832,8 +1101,13 @@ class AniworldPlugin(BasisPlugin): anime = self._find_series_by_title(title) if not anime: return [] + session_links = self._load_session_season_links(anime.url) + if session_links: + self._season_links_cache[title] = list(session_links) + return list(session_links) seasons = scrape_anime_detail(anime.url, load_episodes=False) self._season_links_cache[title] = list(seasons) + self._save_session_season_links(anime.url, seasons) return list(seasons) def _ensure_season_episodes(self, title: str, season_number: int) -> Optional[SeasonInfo]: @@ -845,12 +1119,21 @@ class AniworldPlugin(BasisPlugin): target = next((season for season in links if season.number == season_number), None) if not target: return None + cached_episodes = self._load_session_season_episodes(target.url) + if cached_episodes: + season_info = SeasonInfo(number=target.number, url=target.url, episodes=list(cached_episodes)) + updated = [season for season in seasons if season.number != season_number] + updated.append(season_info) + updated.sort(key=lambda item: item.number) + self._season_cache[title] = updated + return season_info season_soup = _get_soup(target.url, session=get_requests_session("aniworld", headers=HEADERS)) season_info = SeasonInfo(number=target.number, url=target.url, episodes=_extract_episodes(season_soup)) updated = [season for season in seasons if season.number != season_number] updated.append(season_info) updated.sort(key=lambda item: item.number) self._season_cache[title] = updated + self._save_session_season_episodes(target.url, season_info.episodes) return season_info def _lookup_episode(self, title: str, season_label: str, episode_label: str) -> Optional[EpisodeInfo]: @@ -885,7 +1168,12 @@ class AniworldPlugin(BasisPlugin): self._season_cache.clear() self._episode_label_cache.clear() raise RuntimeError(f"AniWorld-Suche fehlgeschlagen: {exc}") from exc - self._anime_results = {result.title: result for result in results} + self._anime_results = {} + cache_dirty = False + for result in results: + cache_dirty = self._remember_anime_result(result.title, result.url, result.description, persist=False) or cache_dirty + if cache_dirty: + self._save_title_url_cache() self._season_cache.clear() self._season_links_cache.clear() self._episode_label_cache.clear() diff --git a/addon/plugins/serienstream_plugin.py b/addon/plugins/serienstream_plugin.py index 9c4b803..c8dfa13 100644 --- a/addon/plugins/serienstream_plugin.py +++ b/addon/plugins/serienstream_plugin.py @@ -238,6 +238,14 @@ def _normalize_search_text(value: str) -> str: return value +def _matches_query(query: str, *, title: str) -> bool: + normalized_query = _normalize_search_text(query) + if not normalized_query: + return False + haystack = f" {_normalize_search_text(title)} " + return f" {normalized_query} " in haystack + + def _is_episode_tba(title: str, original_title: str) -> bool: combined = f"{title} {original_title}".casefold() markers = ("tba", "demnächst", "demnaechst", "coming soon", "to be announced") @@ -395,8 +403,7 @@ def _extract_genre_names_from_html(body: str) -> List[str]: def search_series(query: str) -> List[SeriesResult]: """Sucht Serien im (/serien)-Katalog (Genre-liste) nach Titel/Alt-Titel.""" _ensure_requests() - normalized_query = _normalize_search_text(query) - if not normalized_query: + if not _normalize_search_text(query): return [] # Direkter Abruf wie in fetch_serien.py. catalog_url = f"{_get_base_url()}/serien?by=genre" @@ -404,8 +411,7 @@ def search_series(query: str) -> List[SeriesResult]: results: List[SeriesResult] = [] for series in parse_series_catalog(soup).values(): for entry in series: - haystack = _normalize_search_text(entry.title) - if entry.title and normalized_query in haystack: + if entry.title and _matches_query(query, title=entry.title): results.append(entry) return results diff --git a/addon/plugins/topstreamfilm_plugin.py b/addon/plugins/topstreamfilm_plugin.py index 3334f59..469dff3 100644 --- a/addon/plugins/topstreamfilm_plugin.py +++ b/addon/plugins/topstreamfilm_plugin.py @@ -106,10 +106,8 @@ def _matches_query(query: str, *, title: str, description: str) -> bool: normalized_query = _normalize_search_text(query) if not normalized_query: return False - haystack = _normalize_search_text(title) - if not haystack: - return False - return normalized_query in haystack + haystack = f" {_normalize_search_text(title)} " + return f" {normalized_query} " in haystack def _strip_der_film_suffix(title: str) -> str: