Refine title search to whole-word matching and bump 0.1.49
This commit is contained in:
@@ -8,7 +8,11 @@ Dieses Plugin ist weitgehend kompatibel zur Serienstream-Integration:
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from html import unescape
|
||||
import hashlib
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, TypeAlias
|
||||
|
||||
try: # pragma: no cover - optional dependency
|
||||
@@ -25,8 +29,10 @@ else:
|
||||
|
||||
try: # pragma: no cover - optional Kodi helpers
|
||||
import xbmcaddon # type: ignore[import-not-found]
|
||||
import xbmcgui # type: ignore[import-not-found]
|
||||
except ImportError: # pragma: no cover - allow running outside Kodi
|
||||
xbmcaddon = None
|
||||
xbmcgui = None
|
||||
|
||||
from plugin_interface import BasisPlugin
|
||||
from plugin_helpers import dump_response_html, get_setting_bool, get_setting_string, log_error, log_url, notify_url
|
||||
@@ -60,6 +66,9 @@ HEADERS = {
|
||||
"Accept-Language": "de-DE,de;q=0.9,en;q=0.8",
|
||||
"Connection": "keep-alive",
|
||||
}
|
||||
SESSION_CACHE_TTL_SECONDS = 300
|
||||
SESSION_CACHE_PREFIX = "viewit.aniworld"
|
||||
SESSION_CACHE_MAX_TITLE_URLS = 800
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -128,6 +137,67 @@ def _absolute_url(href: str) -> str:
|
||||
return f"{_get_base_url()}{href}" if href.startswith("/") else href
|
||||
|
||||
|
||||
def _session_window() -> Any:
|
||||
if xbmcgui is None:
|
||||
return None
|
||||
try:
|
||||
return xbmcgui.Window(10000)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _session_cache_key(name: str) -> str:
|
||||
base_hash = hashlib.sha1(_get_base_url().encode("utf-8")).hexdigest()[:12]
|
||||
return f"{SESSION_CACHE_PREFIX}.{base_hash}.{name}"
|
||||
|
||||
|
||||
def _session_cache_get(name: str) -> Any:
|
||||
window = _session_window()
|
||||
if window is None:
|
||||
return None
|
||||
raw = ""
|
||||
try:
|
||||
raw = window.getProperty(_session_cache_key(name)) or ""
|
||||
except Exception:
|
||||
return None
|
||||
if not raw:
|
||||
return None
|
||||
try:
|
||||
payload = json.loads(raw)
|
||||
except Exception:
|
||||
return None
|
||||
if not isinstance(payload, dict):
|
||||
return None
|
||||
expires_at = payload.get("expires_at")
|
||||
data = payload.get("data")
|
||||
try:
|
||||
if float(expires_at or 0) <= time.time():
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
return data
|
||||
|
||||
|
||||
def _session_cache_set(name: str, data: Any, *, ttl_seconds: int = SESSION_CACHE_TTL_SECONDS) -> None:
|
||||
window = _session_window()
|
||||
if window is None:
|
||||
return
|
||||
payload = {
|
||||
"expires_at": float(time.time() + max(1, int(ttl_seconds))),
|
||||
"data": data,
|
||||
}
|
||||
try:
|
||||
raw = json.dumps(payload, ensure_ascii=False, separators=(",", ":"))
|
||||
except Exception:
|
||||
return
|
||||
if len(raw) > 240_000:
|
||||
return
|
||||
try:
|
||||
window.setProperty(_session_cache_key(name), raw)
|
||||
except Exception:
|
||||
return
|
||||
|
||||
|
||||
def _log_url(url: str, *, kind: str = "VISIT") -> None:
|
||||
log_url(
|
||||
ADDON_ID,
|
||||
@@ -192,10 +262,8 @@ def _matches_query(query: str, *, title: str) -> bool:
|
||||
normalized_query = _normalize_search_text(query)
|
||||
if not normalized_query:
|
||||
return False
|
||||
haystack = _normalize_search_text(title)
|
||||
if not haystack:
|
||||
return False
|
||||
return normalized_query in haystack
|
||||
haystack = f" {_normalize_search_text(title)} "
|
||||
return f" {normalized_query} " in haystack
|
||||
|
||||
|
||||
def _ensure_requests() -> None:
|
||||
@@ -235,7 +303,7 @@ def _get_soup(url: str, *, session: Optional[RequestsSession] = None) -> Beautif
|
||||
return BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
|
||||
def _get_soup_simple(url: str) -> BeautifulSoupT:
|
||||
def _get_html_simple(url: str) -> str:
|
||||
_ensure_requests()
|
||||
_log_visit(url)
|
||||
sess = get_requests_session("aniworld", headers=HEADERS)
|
||||
@@ -247,10 +315,36 @@ def _get_soup_simple(url: str) -> BeautifulSoupT:
|
||||
raise
|
||||
if response.url and response.url != url:
|
||||
_log_url(response.url, kind="REDIRECT")
|
||||
_log_response_html(url, response.text)
|
||||
if _looks_like_cloudflare_challenge(response.text):
|
||||
body = response.text
|
||||
_log_response_html(url, body)
|
||||
if _looks_like_cloudflare_challenge(body):
|
||||
raise RuntimeError("Cloudflare-Schutz erkannt. requests reicht ggf. nicht aus.")
|
||||
return BeautifulSoup(response.text, "html.parser")
|
||||
return body
|
||||
|
||||
|
||||
def _get_soup_simple(url: str) -> BeautifulSoupT:
|
||||
body = _get_html_simple(url)
|
||||
return BeautifulSoup(body, "html.parser")
|
||||
|
||||
|
||||
def _extract_genre_names_from_html(body: str) -> List[str]:
|
||||
names: List[str] = []
|
||||
seen: set[str] = set()
|
||||
pattern = re.compile(
|
||||
r"<div[^>]*class=[\"'][^\"']*seriesGenreList[^\"']*[\"'][^>]*>.*?<h3[^>]*>(.*?)</h3>",
|
||||
re.IGNORECASE | re.DOTALL,
|
||||
)
|
||||
for match in pattern.finditer(body or ""):
|
||||
text = re.sub(r"<[^>]+>", " ", match.group(1) or "")
|
||||
text = unescape(re.sub(r"\s+", " ", text)).strip()
|
||||
if not text:
|
||||
continue
|
||||
key = text.casefold()
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
names.append(text)
|
||||
return names
|
||||
|
||||
|
||||
def _post_json(url: str, *, payload: Dict[str, str], session: Optional[RequestsSession] = None) -> Any:
|
||||
@@ -600,6 +694,8 @@ class AniworldPlugin(BasisPlugin):
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._anime_results: Dict[str, SeriesResult] = {}
|
||||
self._title_url_cache: Dict[str, str] = self._load_title_url_cache()
|
||||
self._genre_names_cache: Optional[List[str]] = None
|
||||
self._season_cache: Dict[str, List[SeasonInfo]] = {}
|
||||
self._season_links_cache: Dict[str, List[SeasonInfo]] = {}
|
||||
self._episode_label_cache: Dict[Tuple[str, str], Dict[str, EpisodeInfo]] = {}
|
||||
@@ -619,6 +715,132 @@ class AniworldPlugin(BasisPlugin):
|
||||
if REQUESTS_IMPORT_ERROR:
|
||||
print(f"AniworldPlugin Importfehler: {REQUESTS_IMPORT_ERROR}")
|
||||
|
||||
def _load_title_url_cache(self) -> Dict[str, str]:
|
||||
raw = _session_cache_get("title_urls")
|
||||
if not isinstance(raw, dict):
|
||||
return {}
|
||||
result: Dict[str, str] = {}
|
||||
for key, value in raw.items():
|
||||
key_text = str(key or "").strip().casefold()
|
||||
url_text = str(value or "").strip()
|
||||
if not key_text or not url_text:
|
||||
continue
|
||||
result[key_text] = url_text
|
||||
return result
|
||||
|
||||
def _save_title_url_cache(self) -> None:
|
||||
if not self._title_url_cache:
|
||||
return
|
||||
while len(self._title_url_cache) > SESSION_CACHE_MAX_TITLE_URLS:
|
||||
self._title_url_cache.pop(next(iter(self._title_url_cache)))
|
||||
_session_cache_set("title_urls", self._title_url_cache)
|
||||
|
||||
def _remember_anime_result(
|
||||
self,
|
||||
title: str,
|
||||
url: str,
|
||||
description: str = "",
|
||||
*,
|
||||
persist: bool = True,
|
||||
) -> bool:
|
||||
title = (title or "").strip()
|
||||
url = (url or "").strip()
|
||||
if not title:
|
||||
return False
|
||||
changed = False
|
||||
current = self._anime_results.get(title)
|
||||
if current is None or (url and current.url != url) or (description and current.description != description):
|
||||
self._anime_results[title] = SeriesResult(title=title, description=description, url=url)
|
||||
changed = True
|
||||
if url:
|
||||
key = title.casefold()
|
||||
if self._title_url_cache.get(key) != url:
|
||||
self._title_url_cache[key] = url
|
||||
changed = True
|
||||
if changed and persist:
|
||||
self._save_title_url_cache()
|
||||
return changed
|
||||
|
||||
@staticmethod
|
||||
def _season_links_cache_name(series_url: str) -> str:
|
||||
digest = hashlib.sha1((series_url or "").encode("utf-8")).hexdigest()[:20]
|
||||
return f"season_links.{digest}"
|
||||
|
||||
@staticmethod
|
||||
def _season_episodes_cache_name(season_url: str) -> str:
|
||||
digest = hashlib.sha1((season_url or "").encode("utf-8")).hexdigest()[:20]
|
||||
return f"season_episodes.{digest}"
|
||||
|
||||
def _load_session_season_links(self, series_url: str) -> Optional[List[SeasonInfo]]:
|
||||
raw = _session_cache_get(self._season_links_cache_name(series_url))
|
||||
if not isinstance(raw, list):
|
||||
return None
|
||||
seasons: List[SeasonInfo] = []
|
||||
for item in raw:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
try:
|
||||
number = int(item.get("number"))
|
||||
except Exception:
|
||||
continue
|
||||
url = str(item.get("url") or "").strip()
|
||||
if number <= 0 or not url:
|
||||
continue
|
||||
seasons.append(SeasonInfo(number=number, url=url, episodes=[]))
|
||||
if not seasons:
|
||||
return None
|
||||
seasons.sort(key=lambda s: s.number)
|
||||
return seasons
|
||||
|
||||
def _save_session_season_links(self, series_url: str, seasons: List[SeasonInfo]) -> None:
|
||||
payload = [{"number": int(season.number), "url": season.url} for season in seasons if season.url]
|
||||
if payload:
|
||||
_session_cache_set(self._season_links_cache_name(series_url), payload)
|
||||
|
||||
def _load_session_season_episodes(self, season_url: str) -> Optional[List[EpisodeInfo]]:
|
||||
raw = _session_cache_get(self._season_episodes_cache_name(season_url))
|
||||
if not isinstance(raw, list):
|
||||
return None
|
||||
episodes: List[EpisodeInfo] = []
|
||||
for item in raw:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
try:
|
||||
number = int(item.get("number"))
|
||||
except Exception:
|
||||
continue
|
||||
title = str(item.get("title") or "").strip()
|
||||
original_title = str(item.get("original_title") or "").strip()
|
||||
url = str(item.get("url") or "").strip()
|
||||
if number <= 0:
|
||||
continue
|
||||
episodes.append(
|
||||
EpisodeInfo(
|
||||
number=number,
|
||||
title=title or f"Episode {number}",
|
||||
original_title=original_title,
|
||||
url=url,
|
||||
)
|
||||
)
|
||||
if not episodes:
|
||||
return None
|
||||
episodes.sort(key=lambda item: item.number)
|
||||
return episodes
|
||||
|
||||
def _save_session_season_episodes(self, season_url: str, episodes: List[EpisodeInfo]) -> None:
|
||||
payload = []
|
||||
for item in episodes:
|
||||
payload.append(
|
||||
{
|
||||
"number": int(item.number),
|
||||
"title": item.title,
|
||||
"original_title": item.original_title,
|
||||
"url": item.url,
|
||||
}
|
||||
)
|
||||
if payload:
|
||||
_session_cache_set(self._season_episodes_cache_name(season_url), payload)
|
||||
|
||||
def capabilities(self) -> set[str]:
|
||||
return {"popular_series", "genres", "latest_episodes"}
|
||||
|
||||
@@ -633,6 +855,12 @@ class AniworldPlugin(BasisPlugin):
|
||||
|
||||
wanted = title.casefold().strip()
|
||||
|
||||
cached_url = self._title_url_cache.get(wanted, "")
|
||||
if cached_url:
|
||||
result = SeriesResult(title=title, description="", url=cached_url)
|
||||
self._anime_results[title] = result
|
||||
return result
|
||||
|
||||
for candidate in self._anime_results.values():
|
||||
if candidate.title and candidate.title.casefold().strip() == wanted:
|
||||
return candidate
|
||||
@@ -640,7 +868,7 @@ class AniworldPlugin(BasisPlugin):
|
||||
try:
|
||||
for entry in self._ensure_popular():
|
||||
if entry.title and entry.title.casefold().strip() == wanted:
|
||||
self._anime_results[entry.title] = entry
|
||||
self._remember_anime_result(entry.title, entry.url, entry.description)
|
||||
return entry
|
||||
except Exception:
|
||||
pass
|
||||
@@ -649,7 +877,7 @@ class AniworldPlugin(BasisPlugin):
|
||||
for entries in self._ensure_genres().values():
|
||||
for entry in entries:
|
||||
if entry.title and entry.title.casefold().strip() == wanted:
|
||||
self._anime_results[entry.title] = entry
|
||||
self._remember_anime_result(entry.title, entry.url, entry.description)
|
||||
return entry
|
||||
except Exception:
|
||||
pass
|
||||
@@ -657,7 +885,7 @@ class AniworldPlugin(BasisPlugin):
|
||||
try:
|
||||
for entry in search_animes(title):
|
||||
if entry.title and entry.title.casefold().strip() == wanted:
|
||||
self._anime_results[entry.title] = entry
|
||||
self._remember_anime_result(entry.title, entry.url, entry.description)
|
||||
return entry
|
||||
except Exception:
|
||||
pass
|
||||
@@ -669,6 +897,7 @@ class AniworldPlugin(BasisPlugin):
|
||||
return list(self._popular_cache)
|
||||
soup = _get_soup_simple(_popular_animes_url())
|
||||
results: List[SeriesResult] = []
|
||||
cache_dirty = False
|
||||
seen: set[str] = set()
|
||||
for anchor in soup.select("div.seriesListContainer a[href^='/anime/stream/']"):
|
||||
href = (anchor.get("href") or "").strip()
|
||||
@@ -690,6 +919,9 @@ class AniworldPlugin(BasisPlugin):
|
||||
continue
|
||||
seen.add(key)
|
||||
results.append(SeriesResult(title=title, description=description, url=url))
|
||||
cache_dirty = self._remember_anime_result(title, url, description, persist=False) or cache_dirty
|
||||
if cache_dirty:
|
||||
self._save_title_url_cache()
|
||||
self._popular_cache = list(results)
|
||||
return list(results)
|
||||
|
||||
@@ -697,7 +929,11 @@ class AniworldPlugin(BasisPlugin):
|
||||
if not self._requests_available:
|
||||
return []
|
||||
entries = self._ensure_popular()
|
||||
self._anime_results.update({entry.title: entry for entry in entries if entry.title})
|
||||
cache_dirty = False
|
||||
for entry in entries:
|
||||
cache_dirty = self._remember_anime_result(entry.title, entry.url, entry.description, persist=False) or cache_dirty
|
||||
if cache_dirty:
|
||||
self._save_title_url_cache()
|
||||
return [entry.title for entry in entries if entry.title]
|
||||
|
||||
def latest_episodes(self, page: int = 1) -> List[LatestEpisode]:
|
||||
@@ -727,6 +963,7 @@ class AniworldPlugin(BasisPlugin):
|
||||
return {key: list(value) for key, value in self._genre_cache.items()}
|
||||
soup = _get_soup_simple(_genres_url())
|
||||
results: Dict[str, List[SeriesResult]] = {}
|
||||
cache_dirty = False
|
||||
genre_blocks = soup.select("#seriesContainer div.genre")
|
||||
if not genre_blocks:
|
||||
genre_blocks = soup.select("div.genre")
|
||||
@@ -752,9 +989,14 @@ class AniworldPlugin(BasisPlugin):
|
||||
continue
|
||||
seen.add(key)
|
||||
entries.append(SeriesResult(title=title, description="", url=url))
|
||||
cache_dirty = self._remember_anime_result(title, url, persist=False) or cache_dirty
|
||||
if entries:
|
||||
results[genre_name] = entries
|
||||
if cache_dirty:
|
||||
self._save_title_url_cache()
|
||||
self._genre_cache = {key: list(value) for key, value in results.items()}
|
||||
self._genre_names_cache = sorted(self._genre_cache.keys(), key=str.casefold)
|
||||
_session_cache_set("genres", self._genre_names_cache)
|
||||
# Für spätere Auflösung (Seasons/Episoden) die Titel->URL Zuordnung auffüllen.
|
||||
for entries in results.values():
|
||||
for entry in entries:
|
||||
@@ -764,11 +1006,31 @@ class AniworldPlugin(BasisPlugin):
|
||||
self._anime_results[entry.title] = entry
|
||||
return {key: list(value) for key, value in results.items()}
|
||||
|
||||
def _ensure_genre_names(self) -> List[str]:
|
||||
if self._genre_names_cache is not None:
|
||||
return list(self._genre_names_cache)
|
||||
cached = _session_cache_get("genres")
|
||||
if isinstance(cached, list):
|
||||
names = [str(value).strip() for value in cached if str(value).strip()]
|
||||
if names:
|
||||
self._genre_names_cache = sorted(set(names), key=str.casefold)
|
||||
return list(self._genre_names_cache)
|
||||
try:
|
||||
body = _get_html_simple(_genres_url())
|
||||
names = _extract_genre_names_from_html(body)
|
||||
except Exception:
|
||||
names = []
|
||||
if not names:
|
||||
mapping = self._ensure_genres()
|
||||
names = list(mapping.keys())
|
||||
self._genre_names_cache = sorted({name for name in names if name}, key=str.casefold)
|
||||
_session_cache_set("genres", self._genre_names_cache)
|
||||
return list(self._genre_names_cache)
|
||||
|
||||
def genres(self) -> List[str]:
|
||||
if not self._requests_available:
|
||||
return []
|
||||
genres = list(self._ensure_genres().keys())
|
||||
return [g for g in genres if g]
|
||||
return self._ensure_genre_names()
|
||||
|
||||
def titles_for_genre(self, genre: str) -> List[str]:
|
||||
genre = (genre or "").strip()
|
||||
@@ -785,7 +1047,11 @@ class AniworldPlugin(BasisPlugin):
|
||||
if not entries:
|
||||
return []
|
||||
# Zusätzlich sicherstellen, dass die Titel im Cache sind.
|
||||
self._anime_results.update({entry.title: entry for entry in entries if entry.title and entry.title not in self._anime_results})
|
||||
cache_dirty = False
|
||||
for entry in entries:
|
||||
cache_dirty = self._remember_anime_result(entry.title, entry.url, entry.description, persist=False) or cache_dirty
|
||||
if cache_dirty:
|
||||
self._save_title_url_cache()
|
||||
return [entry.title for entry in entries if entry.title]
|
||||
|
||||
def _season_label(self, number: int) -> str:
|
||||
@@ -810,7 +1076,7 @@ class AniworldPlugin(BasisPlugin):
|
||||
series_url = (series_url or "").strip()
|
||||
if not title or not series_url:
|
||||
return
|
||||
self._anime_results[title] = SeriesResult(title=title, description="", url=series_url)
|
||||
self._remember_anime_result(title, series_url)
|
||||
|
||||
def series_url_for_title(self, title: str) -> str:
|
||||
title = (title or "").strip()
|
||||
@@ -820,6 +1086,9 @@ class AniworldPlugin(BasisPlugin):
|
||||
if direct and direct.url:
|
||||
return direct.url
|
||||
wanted = title.casefold().strip()
|
||||
cached_url = self._title_url_cache.get(wanted, "")
|
||||
if cached_url:
|
||||
return cached_url
|
||||
for candidate in self._anime_results.values():
|
||||
if candidate.title and candidate.title.casefold().strip() == wanted and candidate.url:
|
||||
return candidate.url
|
||||
@@ -832,8 +1101,13 @@ class AniworldPlugin(BasisPlugin):
|
||||
anime = self._find_series_by_title(title)
|
||||
if not anime:
|
||||
return []
|
||||
session_links = self._load_session_season_links(anime.url)
|
||||
if session_links:
|
||||
self._season_links_cache[title] = list(session_links)
|
||||
return list(session_links)
|
||||
seasons = scrape_anime_detail(anime.url, load_episodes=False)
|
||||
self._season_links_cache[title] = list(seasons)
|
||||
self._save_session_season_links(anime.url, seasons)
|
||||
return list(seasons)
|
||||
|
||||
def _ensure_season_episodes(self, title: str, season_number: int) -> Optional[SeasonInfo]:
|
||||
@@ -845,12 +1119,21 @@ class AniworldPlugin(BasisPlugin):
|
||||
target = next((season for season in links if season.number == season_number), None)
|
||||
if not target:
|
||||
return None
|
||||
cached_episodes = self._load_session_season_episodes(target.url)
|
||||
if cached_episodes:
|
||||
season_info = SeasonInfo(number=target.number, url=target.url, episodes=list(cached_episodes))
|
||||
updated = [season for season in seasons if season.number != season_number]
|
||||
updated.append(season_info)
|
||||
updated.sort(key=lambda item: item.number)
|
||||
self._season_cache[title] = updated
|
||||
return season_info
|
||||
season_soup = _get_soup(target.url, session=get_requests_session("aniworld", headers=HEADERS))
|
||||
season_info = SeasonInfo(number=target.number, url=target.url, episodes=_extract_episodes(season_soup))
|
||||
updated = [season for season in seasons if season.number != season_number]
|
||||
updated.append(season_info)
|
||||
updated.sort(key=lambda item: item.number)
|
||||
self._season_cache[title] = updated
|
||||
self._save_session_season_episodes(target.url, season_info.episodes)
|
||||
return season_info
|
||||
|
||||
def _lookup_episode(self, title: str, season_label: str, episode_label: str) -> Optional[EpisodeInfo]:
|
||||
@@ -885,7 +1168,12 @@ class AniworldPlugin(BasisPlugin):
|
||||
self._season_cache.clear()
|
||||
self._episode_label_cache.clear()
|
||||
raise RuntimeError(f"AniWorld-Suche fehlgeschlagen: {exc}") from exc
|
||||
self._anime_results = {result.title: result for result in results}
|
||||
self._anime_results = {}
|
||||
cache_dirty = False
|
||||
for result in results:
|
||||
cache_dirty = self._remember_anime_result(result.title, result.url, result.description, persist=False) or cache_dirty
|
||||
if cache_dirty:
|
||||
self._save_title_url_cache()
|
||||
self._season_cache.clear()
|
||||
self._season_links_cache.clear()
|
||||
self._episode_label_cache.clear()
|
||||
|
||||
Reference in New Issue
Block a user