From 088d232bfdac793fcd02977ceab11dc7307ecc1e Mon Sep 17 00:00:00 2001
From: Benexl <benextempest@gmail.com>
Date: Tue, 12 Aug 2025 00:39:43 +0300
Subject: [PATCH] feat(normalizer): add user normalizer json

---
 .../interactive/menu/media/provider_search.py |  8 ++-
 fastanime/cli/service/download/service.py     | 29 ++++++----
 fastanime/core/utils/normalizer.py            | 55 ++++++++++++++-----
 3 files changed, 64 insertions(+), 28 deletions(-)

diff --git a/fastanime/cli/interactive/menu/media/provider_search.py b/fastanime/cli/interactive/menu/media/provider_search.py
index ce15eb7..d3f0da1 100644
--- a/fastanime/cli/interactive/menu/media/provider_search.py
+++ b/fastanime/cli/interactive/menu/media/provider_search.py
@@ -7,7 +7,7 @@ from ...state import InternalDirective, MenuName, ProviderState, State
 @session.menu
 def provider_search(ctx: Context, state: State) -> State | InternalDirective:
     from .....core.utils.fuzzy import fuzz
-    from .....core.utils.normalizer import normalize_title
+    from .....core.utils.normalizer import normalize_title, update_user_normalizer_json
 
     feedback = ctx.feedback
     media_item = state.media_api.media_item
@@ -71,6 +71,12 @@ def provider_search(ctx: Context, state: State) -> State | InternalDirective:
         if not chosen_title or chosen_title == "Back":
             return InternalDirective.BACK
 
+        if selector.confirm(
+            f"Would you like to update your local normalizer json with: {chosen_title} for {media_title}"
+        ):
+            update_user_normalizer_json(
+                chosen_title, media_title, config.general.provider.value
+            )
         selected_provider_anime = provider_results_map[chosen_title]
 
     with feedback.progress(
diff --git a/fastanime/cli/service/download/service.py b/fastanime/cli/service/download/service.py
index 4c4807b..cce526b 100644
--- a/fastanime/cli/service/download/service.py
+++ b/fastanime/cli/service/download/service.py
@@ -34,7 +34,7 @@ class DownloadService:
         media_api_service: "BaseApiClient",
         provider_service: "BaseAnimeProvider",
     ):
-        self.config = config
+        self.app_config = config
         self.registry = registry_service
         self.media_api = media_api_service
         self.provider = provider_service
@@ -157,7 +157,7 @@ class DownloadService:
                         continue
                     if (
                         episode.download_attempts
-                        <= self.config.downloads.max_retry_attempts
+                        <= self.app_config.downloads.max_retry_attempts
                     ):
                         logger.info(
                             f"Retrying {episode_number} of {record.media_item.title.english}"
@@ -187,12 +187,17 @@ class DownloadService:
 
             # 1. Search the provider to get the provider-specific ID
             provider_search_results = self.provider.search(
-                SearchParams(query=media_title)
+                SearchParams(
+                    query=normalize_title(
+                        media_title, self.app_config.general.provider.value, True
+                    ),
+                    translation_type=self.app_config.stream.translation_type,
+                )
             )
 
             if not provider_search_results or not provider_search_results.results:
                 raise ValueError(
-                    f"Could not find '{media_title}' on provider '{self.config.general.provider.value}'"
+                    f"Could not find '{media_title}' on provider '{self.app_config.general.provider.value}'"
                 )
 
             # 2. Find the best match using fuzzy logic (like auto-select)
@@ -203,7 +208,7 @@ class DownloadService:
                 provider_results_map.keys(),
                 key=lambda p_title: fuzz.ratio(
                     normalize_title(
-                        p_title, self.config.general.provider.value
+                        p_title, self.app_config.general.provider.value
                     ).lower(),
                     media_title.lower(),
                 ),
@@ -225,7 +230,7 @@ class DownloadService:
                     anime_id=provider_anime.id,
                     query=media_title,
                     episode=episode_number,
-                    translation_type=self.config.stream.translation_type,
+                    translation_type=self.app_config.stream.translation_type,
                 )
             )
             if not streams_iterator:
@@ -235,11 +240,11 @@ class DownloadService:
             if not server or not server.links:
                 raise ValueError(f"No stream links found for Episode {episode_number}")
 
-            if server.name != self.config.downloads.server.value:
+            if server.name != self.app_config.downloads.server.value:
                 while True:
                     try:
                         _server = next(streams_iterator)
-                        if _server.name == self.config.downloads.server.value:
+                        if _server.name == self.app_config.downloads.server.value:
                             server = _server
                             break
                     except StopIteration:
@@ -259,9 +264,9 @@ class DownloadService:
                 silent=False,
                 headers=server.headers,
                 subtitles=[sub.url for sub in server.subtitles],
-                merge=self.config.downloads.merge_subtitles,
-                clean=self.config.downloads.cleanup_after_merge,
-                no_check_certificate=self.config.downloads.no_check_certificate,
+                merge=self.app_config.downloads.merge_subtitles,
+                clean=self.app_config.downloads.cleanup_after_merge,
+                no_check_certificate=self.app_config.downloads.no_check_certificate,
             )
 
             result = self.downloader.download(download_params)
@@ -280,7 +285,7 @@ class DownloadService:
                     file_path=result.merged_path or result.video_path,
                     file_size=file_size,
                     quality=stream_link.quality,
-                    provider_name=self.config.general.provider.value,
+                    provider_name=self.app_config.general.provider.value,
                     server_name=server.name,
                     subtitle_paths=result.subtitle_paths,
                 )
diff --git a/fastanime/core/utils/normalizer.py b/fastanime/core/utils/normalizer.py
index c45ac43..5eb8db5 100644
--- a/fastanime/core/utils/normalizer.py
+++ b/fastanime/core/utils/normalizer.py
@@ -46,14 +46,19 @@ import json
 import logging
 from typing import Dict, Optional
 
-from ..constants import ASSETS_DIR
+from ..constants import APP_DATA_DIR, ASSETS_DIR
 
 logger = logging.getLogger(__name__)
 
 # Cache for the normalizer data to avoid repeated file reads
 _normalizer_cache: Optional[Dict[str, Dict[str, str]]] = None
 
+USER_NORMALIZER_JSON = APP_DATA_DIR / "normalizer.json"
 
+DEFAULT_NORMALIZER_JSON = ASSETS_DIR / "normalizer.json"
+
+
+# will load one in the config dir if available and merge them
 def _load_normalizer_data() -> Dict[str, Dict[str, str]]:
     """
     Load the normalizer.json file and cache it.
@@ -70,21 +75,41 @@ def _load_normalizer_data() -> Dict[str, Dict[str, str]]:
     if _normalizer_cache is not None:
         return _normalizer_cache
 
-    normalizer_path = ASSETS_DIR / "normalizer.json"
+    default_normalizer = {}
+    user_normalizer = {}
+    with open(DEFAULT_NORMALIZER_JSON, "r", encoding="utf-8") as f:
+        default_normalizer: dict = json.load(f)
+    if USER_NORMALIZER_JSON.exists():
+        with open(USER_NORMALIZER_JSON, "r", encoding="utf-8") as f:
+            user_normalizer: dict = json.load(f)
 
-    try:
-        with open(normalizer_path, "r", encoding="utf-8") as f:
-            _normalizer_cache = json.load(f)
-        logger.debug("Loaded normalizer data from %s", normalizer_path)
-        # Type checker now knows _normalizer_cache is not None
-        assert _normalizer_cache is not None
-        return _normalizer_cache
-    except FileNotFoundError:
-        logger.error("Normalizer file not found at %s", normalizer_path)
-        raise
-    except json.JSONDecodeError as e:
-        logger.error("Invalid JSON in normalizer file: %s", e)
-        raise
+    _normalizer_cache = default_normalizer
+    for key in default_normalizer:
+        if key in user_normalizer:
+            _normalizer_cache[key].update(user_normalizer[key])
+
+    return _normalizer_cache
+
+
+def update_user_normalizer_json(
+    provider_title: str, media_api_title: str, provider_name: str
+):
+    import time
+
+    from .file import AtomicWriter
+
+    print(
+        "UPDATING USER NORMALIZER JSON. PLEASE CONTRIBUTE TO THE PROJECT BY OPENING A PR ON GITHUB TO MERGE YOUR NORMALIZER JSON TO MAIN. MAEMOTTE KANSHA SHIMASU :)"
+    )
+    print(f"NORMALIZER JSON PATH IS: {USER_NORMALIZER_JSON}")
+    time.sleep(5)
+    if not _normalizer_cache:
+        raise RuntimeError(
+            "Fatal _normalizer_cache missing this should not be the case : (. Please report"
+        )
+    _normalizer_cache[provider_name][provider_title] = media_api_title
+    with AtomicWriter(USER_NORMALIZER_JSON) as f:
+        json.dump(_normalizer_cache, f, indent=2)
 
 
 def provider_title_to_media_api_title(provider_title: str, provider_name: str) -> str: