Files
capa/scripts/setup-linter-dependencies.py
N0stalgikow 0eb4291b25 Updating copyright across all files based on when it was first introduced. (#2027)
* updating copyright, back to the date of origin of file

* updating regex to account for linter violation
2024-03-13 14:04:53 +01:00

198 lines
8.1 KiB
Python

# Copyright (C) 2022 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
"""
Generate capa linter-data.json, used to validate Att&ck/MBC IDs and names.
Use the --extractor option to extract data from Att&ck or MBC (or both) frameworks.
Use the --output to choose the output json file.
By default, the script will create a linter-data.json in the scripts/ directory for both frameworks.
Note: The capa rules linter will try to load from its default location (scripts/linter-data.json).
Usage:
usage: setup-linter-dependencies.py [-h] [--extractor {both,mbc,att&ck}] [--output OUTPUT]
Setup linter dependencies.
optional arguments:
-h, --help show this help message and exit
--extractor {both,mbc,att&ck}
Extractor that will be run
--output OUTPUT, -o OUTPUT
Path to output file (lint.py will be looking for linter-data.json)
Example:
$ python3 setup-linter-dependencies.py
2022-01-24 22:35:06,901 [INFO] Extracting Mitre Att&ck techniques...
2022-01-24 22:35:06,901 [INFO] Downloading STIX data at: https://raw.githubusercontent.com/mitre-attack/attack-stix-data/master/enterprise-attack/enterprise-attack.json
2022-01-24 22:35:13,001 [INFO] Starting extraction...
2022-01-24 22:35:39,395 [INFO] Extracting MBC behaviors...
2022-01-24 22:35:39,395 [INFO] Downloading STIX data at: https://raw.githubusercontent.com/MBCProject/mbc-stix2/master/mbc/mbc.json
2022-01-24 22:35:39,839 [INFO] Starting extraction...
2022-01-24 22:35:42,632 [INFO] Writing results to linter-data.json
"""
import json
import logging
import argparse
from sys import argv
from typing import Dict, List
from pathlib import Path
import requests
from stix2 import Filter, MemoryStore, AttackPattern
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
class MitreExtractor:
"""
This class extract Mitre techniques and sub techniques that are represented as "attack-pattern" in STIX format.
The STIX data is collected in JSON format by requesting the specified URL.
url: must point to json stix location
kill_chain_name: mitre-attack, mitre-mbc...
"""
url = ""
kill_chain_name = ""
def __init__(self):
"""Download and store in memory the STIX data on instantiation."""
if self.kill_chain_name == "":
raise ValueError(f"Kill chain name not specified in class {self.__class__.__name__}")
if self.url == "":
raise ValueError(f"URL not specified in class {self.__class__.__name__}")
logging.info("Downloading STIX data at: %s", self.url)
stix_json = requests.get(self.url).json()
self._memory_store = MemoryStore(stix_data=stix_json["objects"])
@staticmethod
def _remove_deprecated_objects(stix_objects) -> List[AttackPattern]:
"""Remove any revoked or deprecated objects from queries made to the data source."""
return list(
filter(
lambda x: x.get("x_mitre_deprecated", False) is False and x.get("revoked", False) is False,
stix_objects,
)
)
def _get_tactics(self) -> List[Dict]:
"""Get tactics IDs from Mitre matrix."""
# Only one matrix for enterprise att&ck framework
matrix = self._remove_deprecated_objects(
self._memory_store.query(
[
Filter("type", "=", "x-mitre-matrix"),
]
)
)[0]
return list(map(self._memory_store.get, matrix["tactic_refs"]))
def _get_techniques_from_tactic(self, tactic: str) -> List[AttackPattern]:
"""Get techniques and sub techniques from a Mitre tactic (kill_chain_phases->phase_name)"""
techniques = self._remove_deprecated_objects(
self._memory_store.query(
[
Filter("type", "=", "attack-pattern"),
Filter("kill_chain_phases.phase_name", "=", tactic),
Filter("kill_chain_phases.kill_chain_name", "=", self.kill_chain_name),
]
)
)
return techniques
def _get_parent_technique_from_subtechnique(self, technique: AttackPattern) -> AttackPattern:
"""Get parent technique of a sub technique using the technique ID TXXXX.YYY"""
sub_id = technique["external_references"][0]["external_id"].split(".")[0]
parent_technique = self._remove_deprecated_objects(
self._memory_store.query(
[
Filter("type", "=", "attack-pattern"),
Filter("external_references.external_id", "=", sub_id),
]
)
)[0]
return parent_technique
def run(self) -> Dict[str, Dict[str, str]]:
"""Iterate over every technique over every tactic. If the technique is a sub technique, then
we also search for the parent technique name.
"""
logging.info("Starting extraction...")
data: Dict[str, Dict[str, str]] = {}
for tactic in self._get_tactics():
data[tactic["name"]] = {}
for technique in sorted(
self._get_techniques_from_tactic(tactic["x_mitre_shortname"]),
key=lambda x: x["external_references"][0]["external_id"],
):
tid = technique["external_references"][0]["external_id"]
technique_name = technique["name"].split("::")[0]
if technique["x_mitre_is_subtechnique"]:
parent_technique = self._get_parent_technique_from_subtechnique(technique)
data[tactic["name"]][tid] = f"{parent_technique['name']}::{technique_name}"
else:
data[tactic["name"]][tid] = technique_name
return data
class AttckExtractor(MitreExtractor):
"""Extractor for the Mitre Enterprise Att&ck Framework."""
url = "https://raw.githubusercontent.com/mitre-attack/attack-stix-data/master/enterprise-attack/enterprise-attack.json"
kill_chain_name = "mitre-attack"
class MbcExtractor(MitreExtractor):
"""Extractor for the Mitre Malware Behavior Catalog."""
url = "https://raw.githubusercontent.com/MBCProject/mbc-stix2/master/mbc/mbc.json"
kill_chain_name = "mitre-mbc"
def _get_tactics(self) -> List[Dict]:
"""Override _get_tactics to edit the tactic name for Micro-objective"""
tactics = super()._get_tactics()
# We don't want the Micro-objective string inside objective names
for tactic in tactics:
tactic["name"] = tactic["name"].replace(" Micro-objective", "")
return tactics
def main(args: argparse.Namespace) -> None:
data = {}
if args.extractor == "att&ck" or args.extractor == "both":
logging.info("Extracting Mitre Att&ck techniques...")
data["att&ck"] = AttckExtractor().run()
if args.extractor == "mbc" or args.extractor == "both":
logging.info("Extracting MBC behaviors...")
data["mbc"] = MbcExtractor().run()
logging.info("Writing results to %s", args.output)
with Path(args.output).open("w", encoding="utf-8") as jf:
json.dump(data, jf, indent=2)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Setup linter dependencies.")
parser.add_argument(
"--extractor", type=str, choices=["both", "mbc", "att&ck"], default="both", help="Extractor that will be run"
)
parser.add_argument(
"--output",
"-o",
type=str,
default=str(Path(__file__).resolve().parent / "linter-data.json"),
help="Path to output file (lint.py will be looking for linter-data.json)",
)
main(parser.parse_args(args=argv[1:]))