Files
hacktricks-cloud/scripts/get_and_save_refs.py
carlospolop ea3041d9a2 fix refs
2025-08-21 00:23:50 +02:00

39 lines
1.2 KiB
Python

#!/usr/bin/env python3
import json
import re
from pathlib import Path
SRC_DIR = Path("./src")
REFS_JSON = Path("/tmp/refs.json")
# Matches content between {{#ref}} and {{#endref}}, including newlines, lazily
REF_RE = re.compile(r"{{#ref}}\s*([\s\S]*?)\s*{{#endref}}", re.MULTILINE)
def extract_refs(text: str):
"""Return a list of refs (trimmed) in appearance order."""
return [m.strip() for m in REF_RE.findall(text)]
def main():
if not SRC_DIR.is_dir():
raise SystemExit(f"Not a directory: {SRC_DIR}")
refs_per_path = {} # { "relative/path.md": [ref1, ref2, ...] }
for md_path in sorted(SRC_DIR.rglob("*.md")):
rel = md_path.relative_to(SRC_DIR).as_posix()
try:
content = md_path.read_text(encoding="utf-8")
except UnicodeDecodeError:
# Fallback if encoding is odd
content = md_path.read_text(errors="replace")
refs = extract_refs(content)
refs_per_path[rel] = refs # keep order from findall
REFS_JSON.write_text(json.dumps(refs_per_path, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
print(f"Wrote {REFS_JSON} with {len(refs_per_path)} files.")
if __name__ == "__main__":
main()