mirror of
https://github.com/mandiant/capa.git
synced 2025-12-06 04:41:00 -08:00
Replace the header from source code files using the following script:
```Python
for dir_path, dir_names, file_names in os.walk("capa"):
for file_name in file_names:
# header are only in `.py` and `.toml` files
if file_name[-3:] not in (".py", "oml"):
continue
file_path = f"{dir_path}/{file_name}"
f = open(file_path, "rb+")
content = f.read()
m = re.search(OLD_HEADER, content)
if not m:
continue
print(f"{file_path}: {m.group('year')}")
content = content.replace(m.group(0), NEW_HEADER % m.group("year"))
f.seek(0)
f.write(content)
```
Some files had the copyright headers inside a `"""` comment and needed
manual changes before applying the script. `hook-vivisect.py` and
`pyinstaller.spec` didn't include the license in the header and also
needed manual changes.
The old header had the confusing sentence `All rights reserved`, which
does not make sense for an open source license. Replace the header by
the default Google header that corrects this issue and keep capa
consistent with other Google projects.
Adapt the linter to work with the new header.
Replace also the copyright text in the `web/public/index.html` file for
consistency.
204 lines
8.1 KiB
Python
204 lines
8.1 KiB
Python
# Copyright 2022 Google LLC
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""
|
|
Generate capa linter-data.json, used to validate Att&ck/MBC IDs and names.
|
|
|
|
Use the --extractor option to extract data from Att&ck or MBC (or both) frameworks.
|
|
Use the --output to choose the output json file.
|
|
By default, the script will create a linter-data.json in the scripts/ directory for both frameworks.
|
|
|
|
Note: The capa rules linter will try to load from its default location (scripts/linter-data.json).
|
|
|
|
Usage:
|
|
|
|
usage: setup-linter-dependencies.py [-h] [--extractor {both,mbc,att&ck}] [--output OUTPUT]
|
|
|
|
Setup linter dependencies.
|
|
|
|
optional arguments:
|
|
-h, --help show this help message and exit
|
|
--extractor {both,mbc,att&ck}
|
|
Extractor that will be run
|
|
--output OUTPUT, -o OUTPUT
|
|
Path to output file (lint.py will be looking for linter-data.json)
|
|
|
|
|
|
Example:
|
|
|
|
$ python3 setup-linter-dependencies.py
|
|
2022-01-24 22:35:06,901 [INFO] Extracting Mitre Att&ck techniques...
|
|
2022-01-24 22:35:06,901 [INFO] Downloading STIX data at: https://raw.githubusercontent.com/mitre-attack/attack-stix-data/master/enterprise-attack/enterprise-attack.json
|
|
2022-01-24 22:35:13,001 [INFO] Starting extraction...
|
|
2022-01-24 22:35:39,395 [INFO] Extracting MBC behaviors...
|
|
2022-01-24 22:35:39,395 [INFO] Downloading STIX data at: https://raw.githubusercontent.com/MBCProject/mbc-stix2/master/mbc/mbc.json
|
|
2022-01-24 22:35:39,839 [INFO] Starting extraction...
|
|
2022-01-24 22:35:42,632 [INFO] Writing results to linter-data.json
|
|
"""
|
|
import json
|
|
import logging
|
|
import argparse
|
|
from sys import argv
|
|
from pathlib import Path
|
|
|
|
import requests
|
|
from stix2 import Filter, MemoryStore, AttackPattern
|
|
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
|
|
|
|
|
class MitreExtractor:
|
|
"""
|
|
This class extract Mitre techniques and sub techniques that are represented as "attack-pattern" in STIX format.
|
|
The STIX data is collected in JSON format by requesting the specified URL.
|
|
|
|
url: must point to json stix location
|
|
kill_chain_name: mitre-attack, mitre-mbc...
|
|
"""
|
|
|
|
url = ""
|
|
kill_chain_name = ""
|
|
|
|
def __init__(self):
|
|
"""Download and store in memory the STIX data on instantiation."""
|
|
if self.kill_chain_name == "":
|
|
raise ValueError(f"Kill chain name not specified in class {self.__class__.__name__}")
|
|
|
|
if self.url == "":
|
|
raise ValueError(f"URL not specified in class {self.__class__.__name__}")
|
|
|
|
logging.info("Downloading STIX data at: %s", self.url)
|
|
stix_json = requests.get(self.url).json()
|
|
self._memory_store = MemoryStore(stix_data=stix_json["objects"])
|
|
|
|
@staticmethod
|
|
def _remove_deprecated_objects(stix_objects) -> list[AttackPattern]:
|
|
"""Remove any revoked or deprecated objects from queries made to the data source."""
|
|
return list(
|
|
filter(
|
|
lambda x: x.get("x_mitre_deprecated", False) is False and x.get("revoked", False) is False,
|
|
stix_objects,
|
|
)
|
|
)
|
|
|
|
def _get_tactics(self) -> list[dict]:
|
|
"""Get tactics IDs from Mitre matrix."""
|
|
# Only one matrix for enterprise att&ck framework
|
|
matrix = self._remove_deprecated_objects(
|
|
self._memory_store.query(
|
|
[
|
|
Filter("type", "=", "x-mitre-matrix"),
|
|
]
|
|
)
|
|
)[0]
|
|
return list(map(self._memory_store.get, matrix["tactic_refs"]))
|
|
|
|
def _get_techniques_from_tactic(self, tactic: str) -> list[AttackPattern]:
|
|
"""Get techniques and sub techniques from a Mitre tactic (kill_chain_phases->phase_name)"""
|
|
techniques = self._remove_deprecated_objects(
|
|
self._memory_store.query(
|
|
[
|
|
Filter("type", "=", "attack-pattern"),
|
|
Filter("kill_chain_phases.phase_name", "=", tactic),
|
|
Filter("kill_chain_phases.kill_chain_name", "=", self.kill_chain_name),
|
|
]
|
|
)
|
|
)
|
|
return techniques
|
|
|
|
def _get_parent_technique_from_subtechnique(self, technique: AttackPattern) -> AttackPattern:
|
|
"""Get parent technique of a sub technique using the technique ID TXXXX.YYY"""
|
|
sub_id = technique["external_references"][0]["external_id"].split(".")[0]
|
|
parent_technique = self._remove_deprecated_objects(
|
|
self._memory_store.query(
|
|
[
|
|
Filter("type", "=", "attack-pattern"),
|
|
Filter("external_references.external_id", "=", sub_id),
|
|
]
|
|
)
|
|
)[0]
|
|
return parent_technique
|
|
|
|
def run(self) -> dict[str, dict[str, str]]:
|
|
"""Iterate over every technique over every tactic. If the technique is a sub technique, then
|
|
we also search for the parent technique name.
|
|
"""
|
|
logging.info("Starting extraction...")
|
|
data: dict[str, dict[str, str]] = {}
|
|
for tactic in self._get_tactics():
|
|
data[tactic["name"]] = {}
|
|
for technique in sorted(
|
|
self._get_techniques_from_tactic(tactic["x_mitre_shortname"]),
|
|
key=lambda x: x["external_references"][0]["external_id"],
|
|
):
|
|
tid = technique["external_references"][0]["external_id"]
|
|
technique_name = technique["name"].split("::")[0]
|
|
if technique["x_mitre_is_subtechnique"]:
|
|
parent_technique = self._get_parent_technique_from_subtechnique(technique)
|
|
data[tactic["name"]][tid] = f"{parent_technique['name']}::{technique_name}"
|
|
else:
|
|
data[tactic["name"]][tid] = technique_name
|
|
return data
|
|
|
|
|
|
class AttckExtractor(MitreExtractor):
|
|
"""Extractor for the Mitre Enterprise Att&ck Framework."""
|
|
|
|
url = "https://raw.githubusercontent.com/mitre-attack/attack-stix-data/master/enterprise-attack/enterprise-attack.json"
|
|
kill_chain_name = "mitre-attack"
|
|
|
|
|
|
class MbcExtractor(MitreExtractor):
|
|
"""Extractor for the Mitre Malware Behavior Catalog."""
|
|
|
|
url = "https://raw.githubusercontent.com/MBCProject/mbc-stix2/master/mbc/mbc.json"
|
|
kill_chain_name = "mitre-mbc"
|
|
|
|
def _get_tactics(self) -> list[dict]:
|
|
"""Override _get_tactics to edit the tactic name for Micro-objective"""
|
|
tactics = super()._get_tactics()
|
|
# We don't want the Micro-objective string inside objective names
|
|
for tactic in tactics:
|
|
tactic["name"] = tactic["name"].replace(" Micro-objective", "")
|
|
return tactics
|
|
|
|
|
|
def main(args: argparse.Namespace) -> None:
|
|
data = {}
|
|
if args.extractor == "att&ck" or args.extractor == "both":
|
|
logging.info("Extracting Mitre Att&ck techniques...")
|
|
data["att&ck"] = AttckExtractor().run()
|
|
if args.extractor == "mbc" or args.extractor == "both":
|
|
logging.info("Extracting MBC behaviors...")
|
|
data["mbc"] = MbcExtractor().run()
|
|
|
|
logging.info("Writing results to %s", args.output)
|
|
with Path(args.output).open("w", encoding="utf-8") as jf:
|
|
json.dump(data, jf, indent=2)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Setup linter dependencies.")
|
|
parser.add_argument(
|
|
"--extractor", type=str, choices=["both", "mbc", "att&ck"], default="both", help="Extractor that will be run"
|
|
)
|
|
parser.add_argument(
|
|
"--output",
|
|
"-o",
|
|
type=str,
|
|
default=str(Path(__file__).resolve().parent / "linter-data.json"),
|
|
help="Path to output file (lint.py will be looking for linter-data.json)",
|
|
)
|
|
main(parser.parse_args(args=argv[1:]))
|