fix refs

2025-12-10 06:40:47 -08:00 · 2025-08-21 00:23:50 +02:00
parent 414eeda035
commit ea3041d9a2
3 changed files with 188 additions and 1 deletions
--- a/scripts/compare_and_fix_refs.py
+++ b/scripts/compare_and_fix_refs.py
@@ -0,0 +1,149 @@
 #!/usr/bin/env python3
 import argparse
 import json
 import re
 from pathlib import Path
 SRC_DIR = Path("./src")
 REFS_JSON = Path("/tmp/refs.json")
 # Matches content between {{#ref}} and {{#endref}}, including newlines, lazily
 REF_RE = re.compile(r"{{#ref}}\s*([\s\S]*?)\s*{{#endref}}", re.MULTILINE)
 def extract_refs(text: str):
    """Return a list of refs (trimmed) in appearance order."""
    return [m.strip() for m in REF_RE.findall(text)]
 def replace_refs_in_text(text: str, new_refs: list):
    """Replace all refs in text with new_refs, maintaining order."""
    matches = list(REF_RE.finditer(text))
    if len(matches) != len(new_refs):
        return text  # Can't replace if counts don't match
    # Replace from end to beginning to avoid offset issues
    result = text
    for match, new_ref in zip(reversed(matches), reversed(new_refs)):
        # Get the full match span to replace the entire {{#ref}}...{{#endref}} block
        start, end = match.span()
        # Format the replacement with proper newlines
        formatted_replacement = f"{{{{#ref}}}}\n{new_ref}\n{{{{#endref}}}}"
        result = result[:start] + formatted_replacement + result[end:]
    return result
 def main():
    parser = argparse.ArgumentParser(description="Compare and fix refs between current branch and master branch")
    parser.add_argument("--files-unmatched-paths", type=str, 
                       help="Path to file where unmatched file paths will be saved (comma-separated on first line)")
    args = parser.parse_args()
    if not SRC_DIR.is_dir():
        raise SystemExit(f"Not a directory: {SRC_DIR}")
    if not REFS_JSON.exists():
        raise SystemExit(f"Reference file not found: {REFS_JSON}")
    # Load the reference refs from master branch
    try:
        with open(REFS_JSON, 'r', encoding='utf-8') as f:
            master_refs = json.load(f)
    except (json.JSONDecodeError, UnicodeDecodeError) as e:
        raise SystemExit(f"Error reading {REFS_JSON}: {e}")
    print(f"Loaded reference data for {len(master_refs)} files from {REFS_JSON}")
    files_processed = 0
    files_modified = 0
    files_with_differences = 0
    unmatched_files = []  # Track files with unmatched refs
    for md_path in sorted(SRC_DIR.rglob("*.md")):
        rel = md_path.relative_to(SRC_DIR).as_posix()
        rel_with_src = f"{SRC_DIR.name}/{rel}"  # Include src/ prefix for output
        files_processed += 1
        try:
            content = md_path.read_text(encoding="utf-8")
        except UnicodeDecodeError:
            # Fallback if encoding is odd
            content = md_path.read_text(errors="replace")
        current_refs = extract_refs(content)
        # Check if file exists in master refs
        if rel not in master_refs:
            if current_refs:
                print(f"⚠️  NEW FILE with refs: {rel_with_src} (has {len(current_refs)} refs)")
                files_with_differences += 1
                unmatched_files.append(rel_with_src)
            continue
        master_file_refs = master_refs[rel]
        # Compare ref counts
        if len(current_refs) != len(master_file_refs):
            print(f"📊 REF COUNT MISMATCH: {rel_with_src} -- Master: {len(master_file_refs)} refs, Current: {len(current_refs)} refs")
            files_with_differences += 1
            unmatched_files.append(rel_with_src)
            continue
        # If no refs in either, skip
        if not current_refs and not master_file_refs:
            continue
        # Compare individual refs
        differences_found = False
        for i, (current_ref, master_ref) in enumerate(zip(current_refs, master_file_refs)):
            if current_ref != master_ref:
                if not differences_found:
                    print(f"🔍 REF DIFFERENCES in {rel_with_src}:")
                    differences_found = True
                print(f"   Ref {i+1}:")
                print(f"     Master:  {repr(master_ref)}")
                print(f"     Current: {repr(current_ref)}")
        if differences_found:
            files_with_differences += 1
            unmatched_files.append(rel_with_src)
            # Replace current refs with master refs
            try:
                new_content = replace_refs_in_text(content, master_file_refs)
                if new_content != content:
                    md_path.write_text(new_content, encoding="utf-8")
                    files_modified += 1
                    print(f"   ✅ Fixed refs in {rel_with_src}")
                else:
                    print(f"   ❌ Failed to replace refs in {rel_with_src}")
            except Exception as e:
                print(f"   ❌ Error fixing refs in {rel_with_src}: {e}")
    # Save unmatched files to specified path if requested
    if args.files_unmatched_paths and unmatched_files:
        try:
            unmatched_paths_file = Path(args.files_unmatched_paths)
            unmatched_paths_file.parent.mkdir(parents=True, exist_ok=True)
            with open(unmatched_paths_file, 'w', encoding='utf-8') as f:
                f.write(','.join(unmatched_files))
            print(f"📝 Saved {len(unmatched_files)} unmatched file paths to: {unmatched_paths_file}")
        except Exception as e:
            print(f"❌ Error saving unmatched paths to {args.files_unmatched_paths}: {e}")
    elif args.files_unmatched_paths and not unmatched_files:
        # Create empty file if no unmatched files found
        try:
            unmatched_paths_file = Path(args.files_unmatched_paths)
            unmatched_paths_file.parent.mkdir(parents=True, exist_ok=True)
            unmatched_paths_file.write_text('\n', encoding='utf-8')
            print(f"<EFBFBD> No unmatched files found. Created empty file: {unmatched_paths_file}")
        except Exception as e:
            print(f"❌ Error creating empty unmatched paths file {args.files_unmatched_paths}: {e}")
    print(f"\n<EFBFBD>📈 SUMMARY:")
    print(f"   Files processed: {files_processed}")
    print(f"   Files with differences: {files_with_differences}")
    print(f"   Files modified: {files_modified}")
    if unmatched_files:
        print(f"   Unmatched files: {len(unmatched_files)}")
 if __name__ == "__main__":
    main()
--- a/scripts/get_and_save_refs.py
+++ b/scripts/get_and_save_refs.py
@@ -0,0 +1,38 @@
 #!/usr/bin/env python3
 import json
 import re
 from pathlib import Path
 SRC_DIR = Path("./src")
 REFS_JSON = Path("/tmp/refs.json")
 # Matches content between {{#ref}} and {{#endref}}, including newlines, lazily
 REF_RE = re.compile(r"{{#ref}}\s*([\s\S]*?)\s*{{#endref}}", re.MULTILINE)
 def extract_refs(text: str):
    """Return a list of refs (trimmed) in appearance order."""
    return [m.strip() for m in REF_RE.findall(text)]
 def main():
    if not SRC_DIR.is_dir():
        raise SystemExit(f"Not a directory: {SRC_DIR}")
    refs_per_path = {}  # { "relative/path.md": [ref1, ref2, ...] }
    for md_path in sorted(SRC_DIR.rglob("*.md")):
        rel = md_path.relative_to(SRC_DIR).as_posix()
        try:
            content = md_path.read_text(encoding="utf-8")
        except UnicodeDecodeError:
            # Fallback if encoding is odd
            content = md_path.read_text(errors="replace")
        refs = extract_refs(content)
        refs_per_path[rel] = refs  # keep order from findall
    REFS_JSON.write_text(json.dumps(refs_per_path, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
    print(f"Wrote {REFS_JSON} with {len(refs_per_path)} files.")
 if __name__ == "__main__":
    main()
--- a/scripts/translator.py
+++ b/scripts/translator.py
@@ -425,7 +425,7 @@ if __name__ == "__main__":
    translate_files = None # Need to initialize it here to avoid error
    if args.file_paths:
        # Translate only the indicated file
-        translate_files = [f for f in args.file_paths.split(' , ') if f]
+        translate_files = [f.strip() for f in args.file_paths.split(',') if f]
        for file_path in translate_files:
            #with tqdm(total=len(all_markdown_files), desc="Translating Files") as pbar:
            with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor: