diff --git a/scripts/clean_for_ai b/scripts/clean_for_ai
new file mode 100644
index 000000000..412b39e85
--- /dev/null
+++ b/scripts/clean_for_ai
@@ -0,0 +1,102 @@
+import os
+import re
+import tempfile
+
+def clean_and_merge_md_files(start_folder, exclude_keywords, output_file):
+ def clean_file_content(file_path):
+ """Clean the content of a single file and return the cleaned lines."""
+ with open(file_path, "r", encoding="utf-8") as f:
+ content = f.readlines()
+
+ cleaned_lines = []
+ inside_hint = False
+ for line in content:
+ # Skip lines containing excluded keywords
+ if any(keyword in line for keyword in exclude_keywords):
+ continue
+
+ # Detect and skip {% hint %} ... {% endhint %} blocks
+ if "{% hint style=\"success\" %}" in line:
+ inside_hint = True
+ if "{% endhint %}" in line:
+ inside_hint = False
+ continue
+ if inside_hint:
+ continue
+
+ # Skip lines with ...
+ if re.match(r".*?", line):
+ continue
+
+ # Add the line if it passed all checks
+ cleaned_lines.append(line.rstrip())
+
+ # Remove excess consecutive empty lines
+ cleaned_lines = remove_consecutive_empty_lines(cleaned_lines)
+ return cleaned_lines
+
+ def remove_consecutive_empty_lines(lines):
+ """Allow no more than one consecutive empty line."""
+ cleaned_lines = []
+ previous_line_empty = False
+ for line in lines:
+ if line.strip() == "":
+ if not previous_line_empty:
+ cleaned_lines.append("")
+ previous_line_empty = True
+ else:
+ cleaned_lines.append(line)
+ previous_line_empty = False
+ return cleaned_lines
+
+ def gather_files_in_order(start_folder):
+ """Gather all .md files in a depth-first order."""
+ files = []
+ for root, _, filenames in os.walk(start_folder):
+ md_files = sorted([os.path.join(root, f) for f in filenames if f.endswith(".md")])
+ files.extend(md_files)
+ return files
+
+ # Gather files in depth-first order
+ all_files = gather_files_in_order(start_folder)
+
+ # Process files and merge into a single output
+ with open(output_file, "w", encoding="utf-8") as output:
+ for file_path in all_files:
+ # Clean the content of the file
+ cleaned_content = clean_file_content(file_path)
+
+ # Skip saving if the cleaned file has fewer than 10 non-empty lines
+ if len([line for line in cleaned_content if line.strip()]) < 10:
+ continue
+
+ # Get the name of the file for the header
+ file_name = os.path.basename(file_path)
+
+ # Write header, cleaned content, and 2 extra new lines
+ output.write(f"# {file_name}\n\n")
+ output.write("\n".join(cleaned_content))
+ output.write("\n\n")
+
+def main():
+ # Specify the starting folder and output file
+ start_folder = os.getcwd()
+ output_file = os.path.join(tempfile.gettempdir(), "merged_output.md")
+
+ # Keywords to exclude from lines
+ exclude_keywords = [
+ "Keyword1", # Replace with your keywords
+ "Keyword2",
+ "HackTricks", # Example
+ ]
+
+ # Clean and merge .md files
+ clean_and_merge_md_files(start_folder, exclude_keywords, output_file)
+
+ # Print the path to the output file
+ print(f"Merged content has been saved to: {output_file}")
+
+if __name__ == "__main__":
+ # Execute this from the hacktricks folder to clean
+ # It will clean all the .md files and compile them into 1 in a proper order
+ main()