From 088d232bfdac793fcd02977ceab11dc7307ecc1e Mon Sep 17 00:00:00 2001 From: Benexl Date: Tue, 12 Aug 2025 00:39:43 +0300 Subject: [PATCH] feat(normalizer): add user normalizer json --- .../interactive/menu/media/provider_search.py | 8 ++- fastanime/cli/service/download/service.py | 29 ++++++---- fastanime/core/utils/normalizer.py | 55 ++++++++++++++----- 3 files changed, 64 insertions(+), 28 deletions(-) diff --git a/fastanime/cli/interactive/menu/media/provider_search.py b/fastanime/cli/interactive/menu/media/provider_search.py index ce15eb7..d3f0da1 100644 --- a/fastanime/cli/interactive/menu/media/provider_search.py +++ b/fastanime/cli/interactive/menu/media/provider_search.py @@ -7,7 +7,7 @@ from ...state import InternalDirective, MenuName, ProviderState, State @session.menu def provider_search(ctx: Context, state: State) -> State | InternalDirective: from .....core.utils.fuzzy import fuzz - from .....core.utils.normalizer import normalize_title + from .....core.utils.normalizer import normalize_title, update_user_normalizer_json feedback = ctx.feedback media_item = state.media_api.media_item @@ -71,6 +71,12 @@ def provider_search(ctx: Context, state: State) -> State | InternalDirective: if not chosen_title or chosen_title == "Back": return InternalDirective.BACK + if selector.confirm( + f"Would you like to update your local normalizer json with: {chosen_title} for {media_title}" + ): + update_user_normalizer_json( + chosen_title, media_title, config.general.provider.value + ) selected_provider_anime = provider_results_map[chosen_title] with feedback.progress( diff --git a/fastanime/cli/service/download/service.py b/fastanime/cli/service/download/service.py index 4c4807b..cce526b 100644 --- a/fastanime/cli/service/download/service.py +++ b/fastanime/cli/service/download/service.py @@ -34,7 +34,7 @@ class DownloadService: media_api_service: "BaseApiClient", provider_service: "BaseAnimeProvider", ): - self.config = config + self.app_config = config self.registry = registry_service self.media_api = media_api_service self.provider = provider_service @@ -157,7 +157,7 @@ class DownloadService: continue if ( episode.download_attempts - <= self.config.downloads.max_retry_attempts + <= self.app_config.downloads.max_retry_attempts ): logger.info( f"Retrying {episode_number} of {record.media_item.title.english}" @@ -187,12 +187,17 @@ class DownloadService: # 1. Search the provider to get the provider-specific ID provider_search_results = self.provider.search( - SearchParams(query=media_title) + SearchParams( + query=normalize_title( + media_title, self.app_config.general.provider.value, True + ), + translation_type=self.app_config.stream.translation_type, + ) ) if not provider_search_results or not provider_search_results.results: raise ValueError( - f"Could not find '{media_title}' on provider '{self.config.general.provider.value}'" + f"Could not find '{media_title}' on provider '{self.app_config.general.provider.value}'" ) # 2. Find the best match using fuzzy logic (like auto-select) @@ -203,7 +208,7 @@ class DownloadService: provider_results_map.keys(), key=lambda p_title: fuzz.ratio( normalize_title( - p_title, self.config.general.provider.value + p_title, self.app_config.general.provider.value ).lower(), media_title.lower(), ), @@ -225,7 +230,7 @@ class DownloadService: anime_id=provider_anime.id, query=media_title, episode=episode_number, - translation_type=self.config.stream.translation_type, + translation_type=self.app_config.stream.translation_type, ) ) if not streams_iterator: @@ -235,11 +240,11 @@ class DownloadService: if not server or not server.links: raise ValueError(f"No stream links found for Episode {episode_number}") - if server.name != self.config.downloads.server.value: + if server.name != self.app_config.downloads.server.value: while True: try: _server = next(streams_iterator) - if _server.name == self.config.downloads.server.value: + if _server.name == self.app_config.downloads.server.value: server = _server break except StopIteration: @@ -259,9 +264,9 @@ class DownloadService: silent=False, headers=server.headers, subtitles=[sub.url for sub in server.subtitles], - merge=self.config.downloads.merge_subtitles, - clean=self.config.downloads.cleanup_after_merge, - no_check_certificate=self.config.downloads.no_check_certificate, + merge=self.app_config.downloads.merge_subtitles, + clean=self.app_config.downloads.cleanup_after_merge, + no_check_certificate=self.app_config.downloads.no_check_certificate, ) result = self.downloader.download(download_params) @@ -280,7 +285,7 @@ class DownloadService: file_path=result.merged_path or result.video_path, file_size=file_size, quality=stream_link.quality, - provider_name=self.config.general.provider.value, + provider_name=self.app_config.general.provider.value, server_name=server.name, subtitle_paths=result.subtitle_paths, ) diff --git a/fastanime/core/utils/normalizer.py b/fastanime/core/utils/normalizer.py index c45ac43..5eb8db5 100644 --- a/fastanime/core/utils/normalizer.py +++ b/fastanime/core/utils/normalizer.py @@ -46,14 +46,19 @@ import json import logging from typing import Dict, Optional -from ..constants import ASSETS_DIR +from ..constants import APP_DATA_DIR, ASSETS_DIR logger = logging.getLogger(__name__) # Cache for the normalizer data to avoid repeated file reads _normalizer_cache: Optional[Dict[str, Dict[str, str]]] = None +USER_NORMALIZER_JSON = APP_DATA_DIR / "normalizer.json" +DEFAULT_NORMALIZER_JSON = ASSETS_DIR / "normalizer.json" + + +# will load one in the config dir if available and merge them def _load_normalizer_data() -> Dict[str, Dict[str, str]]: """ Load the normalizer.json file and cache it. @@ -70,21 +75,41 @@ def _load_normalizer_data() -> Dict[str, Dict[str, str]]: if _normalizer_cache is not None: return _normalizer_cache - normalizer_path = ASSETS_DIR / "normalizer.json" + default_normalizer = {} + user_normalizer = {} + with open(DEFAULT_NORMALIZER_JSON, "r", encoding="utf-8") as f: + default_normalizer: dict = json.load(f) + if USER_NORMALIZER_JSON.exists(): + with open(USER_NORMALIZER_JSON, "r", encoding="utf-8") as f: + user_normalizer: dict = json.load(f) - try: - with open(normalizer_path, "r", encoding="utf-8") as f: - _normalizer_cache = json.load(f) - logger.debug("Loaded normalizer data from %s", normalizer_path) - # Type checker now knows _normalizer_cache is not None - assert _normalizer_cache is not None - return _normalizer_cache - except FileNotFoundError: - logger.error("Normalizer file not found at %s", normalizer_path) - raise - except json.JSONDecodeError as e: - logger.error("Invalid JSON in normalizer file: %s", e) - raise + _normalizer_cache = default_normalizer + for key in default_normalizer: + if key in user_normalizer: + _normalizer_cache[key].update(user_normalizer[key]) + + return _normalizer_cache + + +def update_user_normalizer_json( + provider_title: str, media_api_title: str, provider_name: str +): + import time + + from .file import AtomicWriter + + print( + "UPDATING USER NORMALIZER JSON. PLEASE CONTRIBUTE TO THE PROJECT BY OPENING A PR ON GITHUB TO MERGE YOUR NORMALIZER JSON TO MAIN. MAEMOTTE KANSHA SHIMASU :)" + ) + print(f"NORMALIZER JSON PATH IS: {USER_NORMALIZER_JSON}") + time.sleep(5) + if not _normalizer_cache: + raise RuntimeError( + "Fatal _normalizer_cache missing this should not be the case : (. Please report" + ) + _normalizer_cache[provider_name][provider_title] = media_api_title + with AtomicWriter(USER_NORMALIZER_JSON) as f: + json.dump(_normalizer_cache, f, indent=2) def provider_title_to_media_api_title(provider_title: str, provider_name: str) -> str: