From b5109928548752f485386d509a8ca37daf7fe122 Mon Sep 17 00:00:00 2001 From: carlospolop Date: Fri, 21 Nov 2025 13:42:46 +0100 Subject: [PATCH] f --- book.toml | 13 +-- hacktricks-preprocessor.py | 26 +++++- hacktricks-preprocessor.py.bak | 166 +++++++++++++++++++++++++++++++++ theme/index.hbs | 28 +++--- 4 files changed, 203 insertions(+), 30 deletions(-) create mode 100644 hacktricks-preprocessor.py.bak diff --git a/book.toml b/book.toml index 15214d764..a01a3d751 100644 --- a/book.toml +++ b/book.toml @@ -8,25 +8,15 @@ title = "HackTricks Cloud" create-missing = false extra-watch-dirs = ["translations"] -[preprocessor.alerts] -after = ["links"] - -[preprocessor.reading-time] - -[preprocessor.pagetoc] - [preprocessor.tabs] -[preprocessor.codename] - [preprocessor.hacktricks] command = "python3 ./hacktricks-preprocessor.py" env = "prod" [output.html] -additional-css = ["theme/pagetoc.css", "theme/tabs.css"] +additional-css = ["theme/tabs.css"] additional-js = [ - "theme/pagetoc.js", "theme/tabs.js", "theme/ht_searcher.js", "theme/sponsor.js", @@ -35,6 +25,7 @@ additional-js = [ no-section-label = true preferred-dark-theme = "hacktricks-dark" default-theme = "hacktricks-light" +hash-files = false [output.html.fold] enable = true # whether or not to enable section folding diff --git a/hacktricks-preprocessor.py b/hacktricks-preprocessor.py index f19dddbe1..25086220e 100644 --- a/hacktricks-preprocessor.py +++ b/hacktricks-preprocessor.py @@ -53,11 +53,17 @@ def ref(matchobj): if href.endswith("/"): href = href+"README.md" # Fix if ref points to a folder if "#" in href: - chapter, _path = findtitle(href.split("#")[0], book, "source_path") + result = findtitle(href.split("#")[0], book, "source_path") + if result is None or result[0] is None: + raise Exception(f"Chapter not found") + chapter, _path = result title = " ".join(href.split("#")[1].split("-")).title() logger.debug(f'Ref has # using title: {title}') else: - chapter, _path = findtitle(href, book, "source_path") + result = findtitle(href, book, "source_path") + if result is None or result[0] is None: + raise Exception(f"Chapter not found") + chapter, _path = result logger.debug(f'Recursive title search result: {chapter["name"]}') title = chapter['name'] except Exception as e: @@ -65,11 +71,17 @@ def ref(matchobj): dir = path.dirname(current_chapter['source_path']) logger.debug(f'Error getting chapter title: {href} trying with relative path {path.normpath(path.join(dir,href))}') if "#" in href: - chapter, _path = findtitle(path.normpath(path.join(dir,href.split('#')[0])), book, "source_path") + result = findtitle(path.normpath(path.join(dir,href.split('#')[0])), book, "source_path") + if result is None or result[0] is None: + raise Exception(f"Chapter not found") + chapter, _path = result title = " ".join(href.split("#")[1].split("-")).title() logger.debug(f'Ref has # using title: {title}') else: - chapter, _path = findtitle(path.normpath(path.join(dir,href.split('#')[0])), book, "source_path") + result = findtitle(path.normpath(path.join(dir,href.split('#')[0])), book, "source_path") + if result is None or result[0] is None: + raise Exception(f"Chapter not found") + chapter, _path = result title = chapter["name"] logger.debug(f'Recursive title search result: {chapter["name"]}') except Exception as e: @@ -147,8 +159,12 @@ if __name__ == '__main__': context, book = json.load(sys.stdin) logger.debug(f"Context: {context}") + logger.debug(f"Book keys: {book.keys()}") - for chapter in iterate_chapters(book['sections']): + # Handle both old (sections) and new (items) mdbook API + book_items = book.get('sections') or book.get('items', []) + + for chapter in iterate_chapters(book_items): logger.debug(f"Chapter: {chapter['path']}") current_chapter = chapter # regex = r'{{[\s]*#ref[\s]*}}(?:\n)?([^\\\n]*)(?:\n)?{{[\s]*#endref[\s]*}}' diff --git a/hacktricks-preprocessor.py.bak b/hacktricks-preprocessor.py.bak new file mode 100644 index 000000000..f19dddbe1 --- /dev/null +++ b/hacktricks-preprocessor.py.bak @@ -0,0 +1,166 @@ +import json +import os +import sys +import re +import logging +from os import path +from urllib.request import urlopen, Request + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) +handler = logging.FileHandler(filename='hacktricks-preprocessor.log', mode='w', encoding='utf-8') +handler.setLevel(logging.DEBUG) +logger.addHandler(handler) + +handler2 = logging.FileHandler(filename='hacktricks-preprocessor-error.log', mode='w', encoding='utf-8') +handler2.setLevel(logging.ERROR) +logger.addHandler(handler2) + + +def findtitle(search ,obj, key, path=(),): + # logger.debug(f"Looking for {search} in {path}") + if isinstance(obj, dict) and key in obj and obj[key] == search: + return obj, path + if isinstance(obj, list): + for k, v in enumerate(obj): + item = findtitle(search, v, key, (*path, k)) + if item is not None: + return item + if isinstance(obj, dict): + for k, v in obj.items(): + item = findtitle(search, v, key, (*path, k)) + if item is not None: + return item + + +def ref(matchobj): + logger.debug(f'Ref match: {matchobj.groups(0)[0].strip()}') + href = matchobj.groups(0)[0].strip() + title = href + if href.startswith("http://") or href.startswith("https://"): + if context['config']['preprocessor']['hacktricks']['env'] == 'dev': + pass + else: + try: + raw_html = str(urlopen(Request(href, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:124.0) Gecko/20100101 Firefox/124.0'})).read()) + match = re.search('(.*?)', raw_html) + title = match.group(1) if match else href + except Exception as e: + logger.error(f'Error opening URL {href}: {e}') + pass #Dont stop on broken link + else: + try: + if href.endswith("/"): + href = href+"README.md" # Fix if ref points to a folder + if "#" in href: + chapter, _path = findtitle(href.split("#")[0], book, "source_path") + title = " ".join(href.split("#")[1].split("-")).title() + logger.debug(f'Ref has # using title: {title}') + else: + chapter, _path = findtitle(href, book, "source_path") + logger.debug(f'Recursive title search result: {chapter["name"]}') + title = chapter['name'] + except Exception as e: + try: + dir = path.dirname(current_chapter['source_path']) + logger.debug(f'Error getting chapter title: {href} trying with relative path {path.normpath(path.join(dir,href))}') + if "#" in href: + chapter, _path = findtitle(path.normpath(path.join(dir,href.split('#')[0])), book, "source_path") + title = " ".join(href.split("#")[1].split("-")).title() + logger.debug(f'Ref has # using title: {title}') + else: + chapter, _path = findtitle(path.normpath(path.join(dir,href.split('#')[0])), book, "source_path") + title = chapter["name"] + logger.debug(f'Recursive title search result: {chapter["name"]}') + except Exception as e: + logger.error(f"Error: {e}") + logger.error(f'Error getting chapter title: {path.normpath(path.join(dir,href))}') + sys.exit(1) + + if href.endswith("/README.md"): + href = href.replace("/README.md", "/index.html") + + template = f"""{title}""" + + # translate_table = str.maketrans({"\"":"\\\"","\n":"\\n"}) + # translated_text = template.translate(translate_table) + result = template + + return result + + +def files(matchobj): + logger.debug(f'Files match: {matchobj.groups(0)[0].strip()}') + href = matchobj.groups(0)[0].strip() + title = "" + + try: + for root, dirs, files in os.walk(os.getcwd()+'/src/files'): + logger.debug(root) + logger.debug(files) + if href in files: + title = href + logger.debug(f'File search result: {os.path.join(root, href)}') + + except Exception as e: + logger.error(f"Error: {e}") + logger.error(f'Error searching file: {href}') + sys.exit(1) + + if title=="": + logger.error(f'Error searching file: {href}') + sys.exit(1) + + template = f"""{title}""" + + result = template + + return result + + +def add_read_time(content): + regex = r'(<\/style>\n# .*(?=\n))' + new_content = re.sub(regex, lambda x: x.group(0) + "\n\nReading time: {{ #reading_time }}", content) + return new_content + + +def iterate_chapters(sections): + if isinstance(sections, dict) and "PartTitle" in sections: # Not a chapter section + return + elif isinstance(sections, dict) and "Chapter" in sections: # Is a chapter return it and look into sub items + # logger.debug(f"Chapter {sections['Chapter']}") + yield sections['Chapter'] + yield from iterate_chapters(sections['Chapter']["sub_items"]) + elif isinstance(sections, list): # Iterate through list when in sections and in sub_items + for k, v in enumerate(sections): + yield from iterate_chapters(v) + + +if __name__ == '__main__': + global context, book, current_chapter + if len(sys.argv) > 1: # we check if we received any argument + if sys.argv[1] == "supports": + # then we are good to return an exit status code of 0, since the other argument will just be the renderer's name + sys.exit(0) + logger.debug('Started hacktricks preprocessor') + # load both the context and the book representations from stdin + context, book = json.load(sys.stdin) + + logger.debug(f"Context: {context}") + + for chapter in iterate_chapters(book['sections']): + logger.debug(f"Chapter: {chapter['path']}") + current_chapter = chapter + # regex = r'{{[\s]*#ref[\s]*}}(?:\n)?([^\\\n]*)(?:\n)?{{[\s]*#endref[\s]*}}' + regex = r'{{[\s]*#ref[\s]*}}(?:\n)?([^\\\n#]*(?:#(.*))?)(?:\n)?{{[\s]*#endref[\s]*}}' + new_content = re.sub(regex, ref, chapter['content']) + regex = r'{{[\s]*#file[\s]*}}(?:\n)?([^\\\n]*)(?:\n)?{{[\s]*#endfile[\s]*}}' + new_content = re.sub(regex, files, new_content) + new_content = add_read_time(new_content) + chapter['content'] = new_content + + content = json.dumps(book) + logger.debug(content) + + + print(content) \ No newline at end of file diff --git a/theme/index.hbs b/theme/index.hbs index 9c7fa3155..a0d4b65bf 100644 --- a/theme/index.hbs +++ b/theme/index.hbs @@ -255,33 +255,33 @@