# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import sys import random import logging from pathlib import Path import capa.rules logger = logging.getLogger(__name__) start_dir = Path(sys.argv[1]) txt_file_path = Path(sys.argv[2]) out_dir = Path(sys.argv[3]) output_html_path = out_dir / "index.html" assert start_dir.exists(), "input directory must exist" assert txt_file_path.exists(), "file-modification txt file must exist" assert out_dir.exists(), "output directory must exist" predefined_colors = [ "#9CAFAA", "#577590", "#a98467", "#D6DAC8", "#adc178", "#f4d35e", "#85182a", "#d6c399", "#dde5b6", "#8da9c4", "#fcd5ce", "#706993", "#FBF3D5", "#1a659e", "#c71f37", "#EFBC9B", "#7e7f9a", ] def read_file_paths(txt_file_path: Path): categorized_files: dict[str, list[Path]] = { "modified in the last day": [], "modified in the last week": [], "modified in the last month": [], "modified in the last three months": [], "modified in the last year": [], "older": [], } lines = txt_file_path.read_text(encoding="utf-8").splitlines() current_category = None for line in lines: line = line.strip() if not line: continue if "===" in line: category = line.strip("=").strip() if category in categorized_files: current_category = category else: logger.warning("Unrecognized category '%s'", category) current_category = None elif current_category: parts = line.split(" ", 1) if len(parts) == 2: file_path, last_modified_date_str = parts categorized_files[current_category].append(Path(file_path)) else: logger.warning("Skipping line due to unexpected format: %s", line) return categorized_files def parse_rule(file_path: Path): rule = capa.rules.Rule.from_yaml_file(file_path) return { "name": rule.name, "namespace": rule.meta.get("namespace", ""), "authors": rule.meta.get("authors", []), "path": file_path, "filename": file_path.name, } def generate_color(): return "#{:06x}".format(random.randint(0, 0xFFFFFF)) def get_first_word(namespace): return namespace.split("/")[0] if "/" in namespace else namespace def generate_html(categories_data, color_map): html_content = """ capa rules
""" for category, files in categories_data.items(): if not files: continue html_content += f'

{category}

' cards_data = [] for file_path in files: try: card_data = parse_rule(file_path) cards_data.append(card_data) except Exception as e: logger.error("error parsing %s: %s", file_path, e) for card in cards_data: first_word = get_first_word(card["namespace"]) rectangle_color = color_map[first_word] card_html = f"""
{card['namespace']}
{', '.join(card['authors'])}
""" html_content += card_html num_cards = len(cards_data) num_empty_cells = (4 - (num_cards % 4)) % 4 if num_empty_cells > 0: for _ in range(num_empty_cells): html_content += """
""" html_content += "
" html_content += """
""" output_html_path.write_text(html_content, encoding="utf-8") categories_data = read_file_paths(txt_file_path) color_map = {} used_colors = set(predefined_colors) color_index = 0 all_files = [file for category in categories_data.values() for file in category] for file_path in all_files: try: card_data = parse_rule(file_path) first_word = get_first_word(card_data["namespace"]) if first_word not in color_map: if color_index < len(predefined_colors): color_map[first_word] = predefined_colors[color_index] color_index += 1 else: new_color = generate_color() while new_color in used_colors: new_color = generate_color() color_map[first_word] = new_color used_colors.add(new_color) except Exception as e: logger.error("error parsing %s: %s", file_path, e) generate_html(categories_data, color_map) logger.info("HTML file has been generated: %s", output_html_path)