Files
capa/scripts/setup-linter-dependencies.py
Ana Maria Martinez Gomez 3cd97ae9f2 [copyright + license] Fix headers
Replace the header from source code files using the following script:
```Python
for dir_path, dir_names, file_names in os.walk("capa"):
    for file_name in file_names:
        # header are only in `.py` and `.toml` files
        if file_name[-3:] not in (".py", "oml"):
            continue
        file_path = f"{dir_path}/{file_name}"
        f = open(file_path, "rb+")
        content = f.read()
        m = re.search(OLD_HEADER, content)
        if not m:
            continue
        print(f"{file_path}: {m.group('year')}")
        content = content.replace(m.group(0), NEW_HEADER % m.group("year"))
        f.seek(0)
        f.write(content)
```

Some files had the copyright headers inside a `"""` comment and needed
manual changes before applying the script. `hook-vivisect.py` and
`pyinstaller.spec` didn't include the license in the header and also
needed manual changes.

The old header had the confusing sentence `All rights reserved`, which
does not make sense for an open source license. Replace the header by
the default Google header that corrects this issue and keep capa
consistent with other Google projects.

Adapt the linter to work with the new header.

Replace also the copyright text in the `web/public/index.html` file for
consistency.
2025-01-15 08:52:42 -07:00

204 lines
8.1 KiB
Python

# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Generate capa linter-data.json, used to validate Att&ck/MBC IDs and names.
Use the --extractor option to extract data from Att&ck or MBC (or both) frameworks.
Use the --output to choose the output json file.
By default, the script will create a linter-data.json in the scripts/ directory for both frameworks.
Note: The capa rules linter will try to load from its default location (scripts/linter-data.json).
Usage:
usage: setup-linter-dependencies.py [-h] [--extractor {both,mbc,att&ck}] [--output OUTPUT]
Setup linter dependencies.
optional arguments:
-h, --help show this help message and exit
--extractor {both,mbc,att&ck}
Extractor that will be run
--output OUTPUT, -o OUTPUT
Path to output file (lint.py will be looking for linter-data.json)
Example:
$ python3 setup-linter-dependencies.py
2022-01-24 22:35:06,901 [INFO] Extracting Mitre Att&ck techniques...
2022-01-24 22:35:06,901 [INFO] Downloading STIX data at: https://raw.githubusercontent.com/mitre-attack/attack-stix-data/master/enterprise-attack/enterprise-attack.json
2022-01-24 22:35:13,001 [INFO] Starting extraction...
2022-01-24 22:35:39,395 [INFO] Extracting MBC behaviors...
2022-01-24 22:35:39,395 [INFO] Downloading STIX data at: https://raw.githubusercontent.com/MBCProject/mbc-stix2/master/mbc/mbc.json
2022-01-24 22:35:39,839 [INFO] Starting extraction...
2022-01-24 22:35:42,632 [INFO] Writing results to linter-data.json
"""
import json
import logging
import argparse
from sys import argv
from pathlib import Path
import requests
from stix2 import Filter, MemoryStore, AttackPattern
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
class MitreExtractor:
"""
This class extract Mitre techniques and sub techniques that are represented as "attack-pattern" in STIX format.
The STIX data is collected in JSON format by requesting the specified URL.
url: must point to json stix location
kill_chain_name: mitre-attack, mitre-mbc...
"""
url = ""
kill_chain_name = ""
def __init__(self):
"""Download and store in memory the STIX data on instantiation."""
if self.kill_chain_name == "":
raise ValueError(f"Kill chain name not specified in class {self.__class__.__name__}")
if self.url == "":
raise ValueError(f"URL not specified in class {self.__class__.__name__}")
logging.info("Downloading STIX data at: %s", self.url)
stix_json = requests.get(self.url).json()
self._memory_store = MemoryStore(stix_data=stix_json["objects"])
@staticmethod
def _remove_deprecated_objects(stix_objects) -> list[AttackPattern]:
"""Remove any revoked or deprecated objects from queries made to the data source."""
return list(
filter(
lambda x: x.get("x_mitre_deprecated", False) is False and x.get("revoked", False) is False,
stix_objects,
)
)
def _get_tactics(self) -> list[dict]:
"""Get tactics IDs from Mitre matrix."""
# Only one matrix for enterprise att&ck framework
matrix = self._remove_deprecated_objects(
self._memory_store.query(
[
Filter("type", "=", "x-mitre-matrix"),
]
)
)[0]
return list(map(self._memory_store.get, matrix["tactic_refs"]))
def _get_techniques_from_tactic(self, tactic: str) -> list[AttackPattern]:
"""Get techniques and sub techniques from a Mitre tactic (kill_chain_phases->phase_name)"""
techniques = self._remove_deprecated_objects(
self._memory_store.query(
[
Filter("type", "=", "attack-pattern"),
Filter("kill_chain_phases.phase_name", "=", tactic),
Filter("kill_chain_phases.kill_chain_name", "=", self.kill_chain_name),
]
)
)
return techniques
def _get_parent_technique_from_subtechnique(self, technique: AttackPattern) -> AttackPattern:
"""Get parent technique of a sub technique using the technique ID TXXXX.YYY"""
sub_id = technique["external_references"][0]["external_id"].split(".")[0]
parent_technique = self._remove_deprecated_objects(
self._memory_store.query(
[
Filter("type", "=", "attack-pattern"),
Filter("external_references.external_id", "=", sub_id),
]
)
)[0]
return parent_technique
def run(self) -> dict[str, dict[str, str]]:
"""Iterate over every technique over every tactic. If the technique is a sub technique, then
we also search for the parent technique name.
"""
logging.info("Starting extraction...")
data: dict[str, dict[str, str]] = {}
for tactic in self._get_tactics():
data[tactic["name"]] = {}
for technique in sorted(
self._get_techniques_from_tactic(tactic["x_mitre_shortname"]),
key=lambda x: x["external_references"][0]["external_id"],
):
tid = technique["external_references"][0]["external_id"]
technique_name = technique["name"].split("::")[0]
if technique["x_mitre_is_subtechnique"]:
parent_technique = self._get_parent_technique_from_subtechnique(technique)
data[tactic["name"]][tid] = f"{parent_technique['name']}::{technique_name}"
else:
data[tactic["name"]][tid] = technique_name
return data
class AttckExtractor(MitreExtractor):
"""Extractor for the Mitre Enterprise Att&ck Framework."""
url = "https://raw.githubusercontent.com/mitre-attack/attack-stix-data/master/enterprise-attack/enterprise-attack.json"
kill_chain_name = "mitre-attack"
class MbcExtractor(MitreExtractor):
"""Extractor for the Mitre Malware Behavior Catalog."""
url = "https://raw.githubusercontent.com/MBCProject/mbc-stix2/master/mbc/mbc.json"
kill_chain_name = "mitre-mbc"
def _get_tactics(self) -> list[dict]:
"""Override _get_tactics to edit the tactic name for Micro-objective"""
tactics = super()._get_tactics()
# We don't want the Micro-objective string inside objective names
for tactic in tactics:
tactic["name"] = tactic["name"].replace(" Micro-objective", "")
return tactics
def main(args: argparse.Namespace) -> None:
data = {}
if args.extractor == "att&ck" or args.extractor == "both":
logging.info("Extracting Mitre Att&ck techniques...")
data["att&ck"] = AttckExtractor().run()
if args.extractor == "mbc" or args.extractor == "both":
logging.info("Extracting MBC behaviors...")
data["mbc"] = MbcExtractor().run()
logging.info("Writing results to %s", args.output)
with Path(args.output).open("w", encoding="utf-8") as jf:
json.dump(data, jf, indent=2)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Setup linter dependencies.")
parser.add_argument(
"--extractor", type=str, choices=["both", "mbc", "att&ck"], default="both", help="Extractor that will be run"
)
parser.add_argument(
"--output",
"-o",
type=str,
default=str(Path(__file__).resolve().parent / "linter-data.json"),
help="Path to output file (lint.py will be looking for linter-data.json)",
)
main(parser.parse_args(args=argv[1:]))