This commit is contained in:
carlospolop
2025-08-21 00:23:50 +02:00
parent 414eeda035
commit ea3041d9a2
3 changed files with 188 additions and 1 deletions

View File

@@ -0,0 +1,38 @@
#!/usr/bin/env python3
import json
import re
from pathlib import Path
SRC_DIR = Path("./src")
REFS_JSON = Path("/tmp/refs.json")
# Matches content between {{#ref}} and {{#endref}}, including newlines, lazily
REF_RE = re.compile(r"{{#ref}}\s*([\s\S]*?)\s*{{#endref}}", re.MULTILINE)
def extract_refs(text: str):
"""Return a list of refs (trimmed) in appearance order."""
return [m.strip() for m in REF_RE.findall(text)]
def main():
if not SRC_DIR.is_dir():
raise SystemExit(f"Not a directory: {SRC_DIR}")
refs_per_path = {} # { "relative/path.md": [ref1, ref2, ...] }
for md_path in sorted(SRC_DIR.rglob("*.md")):
rel = md_path.relative_to(SRC_DIR).as_posix()
try:
content = md_path.read_text(encoding="utf-8")
except UnicodeDecodeError:
# Fallback if encoding is odd
content = md_path.read_text(errors="replace")
refs = extract_refs(content)
refs_per_path[rel] = refs # keep order from findall
REFS_JSON.write_text(json.dumps(refs_per_path, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
print(f"Wrote {REFS_JSON} with {len(refs_per_path)} files.")
if __name__ == "__main__":
main()