# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import sys import logging import urllib.parse from pathlib import Path import pygments from pygments.lexers import YamlLexer from pygments.formatters import HtmlFormatter import capa.rules logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) input_directory = Path(sys.argv[1]) txt_file_path = Path(sys.argv[2]) output_directory = Path(sys.argv[3]) assert input_directory.exists(), "input directory must exist" assert txt_file_path.exists(), "file-modification txt file must exist" assert output_directory.exists(), "output directory must exist" def render_rule(timestamps, path: Path) -> str: rule_content = path.read_text(encoding="utf-8") rule = capa.rules.Rule.from_yaml(rule_content, use_ruamel=True) filename = path.with_suffix("").name namespace = rule.meta.get("namespace", "") timestamp = timestamps[path.as_posix()] rendered_rule = pygments.highlight( rule_content, YamlLexer(), HtmlFormatter( style="xcode", noclasses=True, wrapcode=True, nobackground=True, ), ) gh_link = f"https://github.com/mandiant/capa-rules/tree/master/{namespace}/{filename}.yml" vt_query = 'behavior_signature:"' + rule.name + '"' vt_fragment = urllib.parse.quote(urllib.parse.quote(vt_query)) vt_link = f"https://www.virustotal.com/gui/search/{vt_fragment}/files" ns_query = f'"namespace: {namespace} "' ns_link = f"../?{urllib.parse.urlencode({'q': ns_query})}" html_content = f""" {rule.name}

{rule.name}

{rendered_rule}

last edited: {timestamp}

""" return html_content yaml_files = [ str(p) for p in input_directory.glob("**/*.yml") if not any(part.startswith(".") for part in p.relative_to(input_directory).parts) ] timestamps = {} for line in txt_file_path.read_text(encoding="utf-8").splitlines(): if not line: continue if line.startswith("==="): continue filepath, _, timestamp = line.partition(" ") timestamps[filepath] = timestamp for yaml_file in yaml_files: path = Path(yaml_file) rule_content = path.read_text(encoding="utf-8") html_content = render_rule(timestamps, path) rule = capa.rules.Rule.from_yaml(path.read_text(encoding="utf-8"), use_ruamel=True) # like: rules/create file/index.html # # which looks like the URL fragments: # # rules/create%20file/index.html # rules/create%20file/ # rules/create file/ html_path = output_directory / rule.name / "index.html" html_path.parent.mkdir(parents=True, exist_ok=True) html_path.write_text(html_content, encoding="utf-8") logger.info("wrote: %s", html_path) # like: create-file rule_id = path.with_suffix("").name # like: rules/create-file/index.html # # which looks like the URL fragments: # # rules/create-file/index.html # rules/create-file/ # # and redirects, via meta refresh, to the canonical path above. # since we don't control the GH Pages web server, we can't use HTTP redirects. id_path = output_directory / rule_id / "index.html" id_path.parent.mkdir(parents=True, exist_ok=True) redirect = f"""""" id_path.write_text(redirect, encoding="utf-8") logger.info("wrote: %s", id_path)