30 lines
886 B
Python
30 lines
886 B
Python
from __future__ import annotations
|
|
|
|
import re
|
|
from html import unescape
|
|
|
|
|
|
def normalize_genre_label(raw: str) -> str:
|
|
"""Normalisiert Genre-Bezeichner aus HTML-Labels oder Datenattributen."""
|
|
|
|
text = unescape(re.sub(r"\s+", " ", str(raw or ""))).strip()
|
|
if not text:
|
|
return ""
|
|
key_prefix = "filter.genre_"
|
|
if text.casefold().startswith(key_prefix):
|
|
slug = text[len(key_prefix) :].strip().casefold()
|
|
slug = slug.replace("_", "-")
|
|
slug = re.sub(r"[^a-z0-9-]+", "-", slug).strip("-")
|
|
if not slug:
|
|
return ""
|
|
special = {
|
|
"doku-soap": "Doku-Soap",
|
|
"scifi": "SciFi",
|
|
"fighting-shounen": "Fighting-Shounen",
|
|
}
|
|
if slug in special:
|
|
return special[slug]
|
|
return " ".join(chunk.capitalize() for chunk in slug.split("-") if chunk)
|
|
return text
|
|
|