# Copyright (C) 2022 Mandiant, Inc. All Rights Reserved. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: [package root]/LICENSE.txt # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. """ Generate capa linter-data.json, used to validate Att&ck/MBC IDs and names. Use the --extractor option to extract data from Att&ck or MBC (or both) frameworks. Use the --output to choose the output json file. By default, the script will create a linter-data.json in the scripts/ directory for both frameworks. Note: The capa rules linter will try to load from its default location (scripts/linter-data.json). Usage: usage: setup-linter-dependencies.py [-h] [--extractor {both,mbc,att&ck}] [--output OUTPUT] Setup linter dependencies. optional arguments: -h, --help show this help message and exit --extractor {both,mbc,att&ck} Extractor that will be run --output OUTPUT, -o OUTPUT Path to output file (lint.py will be looking for linter-data.json) Example: $ python3 setup-linter-dependencies.py 2022-01-24 22:35:06,901 [INFO] Extracting Mitre Att&ck techniques... 2022-01-24 22:35:06,901 [INFO] Downloading STIX data at: https://raw.githubusercontent.com/mitre-attack/attack-stix-data/master/enterprise-attack/enterprise-attack.json 2022-01-24 22:35:13,001 [INFO] Starting extraction... 2022-01-24 22:35:39,395 [INFO] Extracting MBC behaviors... 2022-01-24 22:35:39,395 [INFO] Downloading STIX data at: https://raw.githubusercontent.com/MBCProject/mbc-stix2/master/mbc/mbc.json 2022-01-24 22:35:39,839 [INFO] Starting extraction... 2022-01-24 22:35:42,632 [INFO] Writing results to linter-data.json """ import json import logging import argparse from sys import argv from typing import Dict, List from pathlib import Path import requests from stix2 import Filter, MemoryStore, AttackPattern logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") class MitreExtractor: """ This class extract Mitre techniques and sub techniques that are represented as "attack-pattern" in STIX format. The STIX data is collected in JSON format by requesting the specified URL. url: must point to json stix location kill_chain_name: mitre-attack, mitre-mbc... """ url = "" kill_chain_name = "" def __init__(self): """Download and store in memory the STIX data on instantiation.""" if self.kill_chain_name == "": raise ValueError(f"Kill chain name not specified in class {self.__class__.__name__}") if self.url == "": raise ValueError(f"URL not specified in class {self.__class__.__name__}") logging.info("Downloading STIX data at: %s", self.url) stix_json = requests.get(self.url).json() self._memory_store = MemoryStore(stix_data=stix_json["objects"]) @staticmethod def _remove_deprecated_objects(stix_objects) -> List[AttackPattern]: """Remove any revoked or deprecated objects from queries made to the data source.""" return list( filter( lambda x: x.get("x_mitre_deprecated", False) is False and x.get("revoked", False) is False, stix_objects, ) ) def _get_tactics(self) -> List[Dict]: """Get tactics IDs from Mitre matrix.""" # Only one matrix for enterprise att&ck framework matrix = self._remove_deprecated_objects( self._memory_store.query( [ Filter("type", "=", "x-mitre-matrix"), ] ) )[0] return list(map(self._memory_store.get, matrix["tactic_refs"])) def _get_techniques_from_tactic(self, tactic: str) -> List[AttackPattern]: """Get techniques and sub techniques from a Mitre tactic (kill_chain_phases->phase_name)""" techniques = self._remove_deprecated_objects( self._memory_store.query( [ Filter("type", "=", "attack-pattern"), Filter("kill_chain_phases.phase_name", "=", tactic), Filter("kill_chain_phases.kill_chain_name", "=", self.kill_chain_name), ] ) ) return techniques def _get_parent_technique_from_subtechnique(self, technique: AttackPattern) -> AttackPattern: """Get parent technique of a sub technique using the technique ID TXXXX.YYY""" sub_id = technique["external_references"][0]["external_id"].split(".")[0] parent_technique = self._remove_deprecated_objects( self._memory_store.query( [ Filter("type", "=", "attack-pattern"), Filter("external_references.external_id", "=", sub_id), ] ) )[0] return parent_technique def run(self) -> Dict[str, Dict[str, str]]: """Iterate over every technique over every tactic. If the technique is a sub technique, then we also search for the parent technique name. """ logging.info("Starting extraction...") data: Dict[str, Dict[str, str]] = {} for tactic in self._get_tactics(): data[tactic["name"]] = {} for technique in sorted( self._get_techniques_from_tactic(tactic["x_mitre_shortname"]), key=lambda x: x["external_references"][0]["external_id"], ): tid = technique["external_references"][0]["external_id"] technique_name = technique["name"].split("::")[0] if technique["x_mitre_is_subtechnique"]: parent_technique = self._get_parent_technique_from_subtechnique(technique) data[tactic["name"]][tid] = f"{parent_technique['name']}::{technique_name}" else: data[tactic["name"]][tid] = technique_name return data class AttckExtractor(MitreExtractor): """Extractor for the Mitre Enterprise Att&ck Framework.""" url = "https://raw.githubusercontent.com/mitre-attack/attack-stix-data/master/enterprise-attack/enterprise-attack.json" kill_chain_name = "mitre-attack" class MbcExtractor(MitreExtractor): """Extractor for the Mitre Malware Behavior Catalog.""" url = "https://raw.githubusercontent.com/MBCProject/mbc-stix2/master/mbc/mbc.json" kill_chain_name = "mitre-mbc" def _get_tactics(self) -> List[Dict]: """Override _get_tactics to edit the tactic name for Micro-objective""" tactics = super()._get_tactics() # We don't want the Micro-objective string inside objective names for tactic in tactics: tactic["name"] = tactic["name"].replace(" Micro-objective", "") return tactics def main(args: argparse.Namespace) -> None: data = {} if args.extractor == "att&ck" or args.extractor == "both": logging.info("Extracting Mitre Att&ck techniques...") data["att&ck"] = AttckExtractor().run() if args.extractor == "mbc" or args.extractor == "both": logging.info("Extracting MBC behaviors...") data["mbc"] = MbcExtractor().run() logging.info("Writing results to %s", args.output) with Path(args.output).open("w", encoding="utf-8") as jf: json.dump(data, jf, indent=2) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Setup linter dependencies.") parser.add_argument( "--extractor", type=str, choices=["both", "mbc", "att&ck"], default="both", help="Extractor that will be run" ) parser.add_argument( "--output", "-o", type=str, default=str(Path(__file__).resolve().parent / "linter-data.json"), help="Path to output file (lint.py will be looking for linter-data.json)", ) main(parser.parse_args(args=argv[1:]))