mirror of
https://github.com/mandiant/capa.git
synced 2025-12-06 21:00:57 -08:00
227 lines
8.3 KiB
Python
227 lines
8.3 KiB
Python
#!/usr/bin/env python3
|
|
# Copyright 2020 Google LLC
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
|
|
import json
|
|
import collections
|
|
from typing import Any
|
|
from pathlib import Path
|
|
|
|
import capa.main
|
|
import capa.rules
|
|
import capa.engine
|
|
import capa.loader
|
|
import capa.features
|
|
import capa.render.json
|
|
import capa.render.utils as rutils
|
|
import capa.render.default
|
|
import capa.capabilities.common
|
|
import capa.render.result_document as rd
|
|
import capa.features.freeze.features as frzf
|
|
from capa.features.common import OS_AUTO, FORMAT_AUTO
|
|
|
|
|
|
# == Render dictionary helpers
|
|
def render_meta(doc: rd.ResultDocument, result):
|
|
result["md5"] = doc.meta.sample.md5
|
|
result["sha1"] = doc.meta.sample.sha1
|
|
result["sha256"] = doc.meta.sample.sha256
|
|
result["path"] = doc.meta.sample.path
|
|
|
|
|
|
def find_subrule_matches(doc: rd.ResultDocument) -> set[str]:
|
|
"""
|
|
collect the rule names that have been matched as a subrule match.
|
|
this way we can avoid displaying entries for things that are too specific.
|
|
"""
|
|
matches = set()
|
|
|
|
def rec(node: rd.Match):
|
|
if not node.success:
|
|
# there's probably a bug here for rules that do `not: match: ...`
|
|
# but we don't have any examples of this yet
|
|
return
|
|
|
|
elif isinstance(node.node, rd.StatementNode):
|
|
for child in node.children:
|
|
rec(child)
|
|
|
|
elif isinstance(node.node, rd.FeatureNode):
|
|
if isinstance(node.node.feature, frzf.MatchFeature):
|
|
matches.add(node.node.feature.match)
|
|
|
|
for rule in rutils.capability_rules(doc):
|
|
for _, node in rule.matches:
|
|
rec(node)
|
|
|
|
return matches
|
|
|
|
|
|
def render_capabilities(doc: rd.ResultDocument, result):
|
|
"""
|
|
example::
|
|
{'CAPABILITY': {'accept command line arguments': 'host-interaction/cli',
|
|
'allocate thread local storage (2 matches)': 'host-interaction/process',
|
|
'check for time delay via GetTickCount': 'anti-analysis/anti-debugging/debugger-detection',
|
|
'check if process is running under wine': 'anti-analysis/anti-emulation/wine',
|
|
'contain a resource (.rsrc) section': 'executable/pe/section/rsrc',
|
|
'write file (3 matches)': 'host-interaction/file-system/write'}
|
|
}
|
|
"""
|
|
subrule_matches = find_subrule_matches(doc)
|
|
|
|
result["CAPABILITY"] = {}
|
|
for rule in rutils.capability_rules(doc):
|
|
if rule.meta.name in subrule_matches:
|
|
# rules that are also matched by other rules should not get rendered by default.
|
|
# this cuts down on the amount of output while giving approx the same detail.
|
|
# see #224
|
|
continue
|
|
|
|
count = len(rule.matches)
|
|
if count == 1:
|
|
capability = rule.meta.name
|
|
else:
|
|
capability = f"{rule.meta.name} ({count} matches)"
|
|
|
|
result["CAPABILITY"].setdefault(rule.meta.namespace, [])
|
|
result["CAPABILITY"][rule.meta.namespace].append(capability)
|
|
|
|
|
|
def render_attack(doc, result):
|
|
"""
|
|
example::
|
|
{'ATT&CK': {'COLLECTION': ['Input Capture::Keylogging [T1056.001]'],
|
|
'DEFENSE EVASION': ['Obfuscated Files or Information [T1027]',
|
|
'Virtualization/Sandbox Evasion::System Checks '
|
|
'[T1497.001]'],
|
|
'DISCOVERY': ['File and Directory Discovery [T1083]',
|
|
'Query Registry [T1012]',
|
|
'System Information Discovery [T1082]'],
|
|
'EXECUTION': ['Shared Modules [T1129]']}
|
|
}
|
|
"""
|
|
result["ATTCK"] = {}
|
|
tactics = collections.defaultdict(set)
|
|
for rule in rutils.capability_rules(doc):
|
|
if not rule.meta.attack:
|
|
continue
|
|
for attack in rule.meta.attack:
|
|
tactics[attack.tactic].add((attack.technique, attack.subtechnique, attack.id))
|
|
|
|
for tactic, techniques in sorted(tactics.items()):
|
|
inner_rows = []
|
|
for technique, subtechnique, id in sorted(techniques):
|
|
if subtechnique is None:
|
|
inner_rows.append(f"{technique} {id}")
|
|
else:
|
|
inner_rows.append(f"{technique}::{subtechnique} {id}")
|
|
result["ATTCK"].setdefault(tactic.upper(), inner_rows)
|
|
|
|
|
|
def render_mbc(doc, result):
|
|
"""
|
|
example::
|
|
{'MBC': {'ANTI-BEHAVIORAL ANALYSIS': ['Debugger Detection::Timing/Delay Check '
|
|
'GetTickCount [B0001.032]',
|
|
'Emulator Detection [B0004]',
|
|
'Virtual Machine Detection::Instruction '
|
|
'Testing [B0009.029]',
|
|
'Virtual Machine Detection [B0009]'],
|
|
'COLLECTION': ['Keylogging::Polling [F0002.002]'],
|
|
'CRYPTOGRAPHY': ['Encrypt Data::RC4 [C0027.009]',
|
|
'Generate Pseudo-random Sequence::RC4 PRGA '
|
|
'[C0021.004]']}
|
|
}
|
|
"""
|
|
result["MBC"] = {}
|
|
objectives = collections.defaultdict(set)
|
|
for rule in rutils.capability_rules(doc):
|
|
if not rule.meta.mbc:
|
|
continue
|
|
|
|
for mbc in rule.meta.mbc:
|
|
objectives[mbc.objective].add((mbc.behavior, mbc.method, mbc.id))
|
|
|
|
for objective, behaviors in sorted(objectives.items()):
|
|
inner_rows = []
|
|
for behavior, method, id in sorted(behaviors):
|
|
if method is None:
|
|
inner_rows.append(f"{behavior} [{id}]")
|
|
else:
|
|
inner_rows.append(f"{behavior}::{method} [{id}]")
|
|
result["MBC"].setdefault(objective.upper(), inner_rows)
|
|
|
|
|
|
def render_dictionary(doc: rd.ResultDocument) -> dict[str, Any]:
|
|
result: dict[str, Any] = {}
|
|
render_meta(doc, result)
|
|
render_attack(doc, result)
|
|
render_mbc(doc, result)
|
|
render_capabilities(doc, result)
|
|
|
|
return result
|
|
|
|
|
|
# ==== render dictionary helpers
|
|
def capa_details(rules_path: Path, input_file: Path, output_format="dictionary"):
|
|
# load rules from disk
|
|
rules = capa.rules.get_rules([rules_path])
|
|
|
|
# extract features and find capabilities
|
|
extractor = capa.loader.get_extractor(
|
|
input_file, FORMAT_AUTO, OS_AUTO, capa.main.BACKEND_VIV, [], should_save_workspace=False, disable_progress=True
|
|
)
|
|
capabilities = capa.capabilities.common.find_capabilities(rules, extractor, disable_progress=True)
|
|
|
|
# collect metadata (used only to make rendering more complete)
|
|
meta = capa.loader.collect_metadata([], input_file, FORMAT_AUTO, OS_AUTO, [rules_path], extractor, capabilities)
|
|
meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities.matches)
|
|
|
|
capa_output: Any = False
|
|
|
|
if output_format == "dictionary":
|
|
# ...as python dictionary, simplified as textable but in dictionary
|
|
doc = rd.ResultDocument.from_capa(meta, rules, capabilities.matches)
|
|
capa_output = render_dictionary(doc)
|
|
elif output_format == "json":
|
|
# render results
|
|
# ...as json
|
|
capa_output = json.loads(capa.render.json.render(meta, rules, capabilities.matches))
|
|
elif output_format == "texttable":
|
|
# ...as human readable text table
|
|
capa_output = capa.render.default.render(meta, rules, capabilities.matches)
|
|
|
|
return capa_output
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import sys
|
|
import argparse
|
|
|
|
RULES_PATH = capa.main.get_default_root() / "rules"
|
|
|
|
parser = argparse.ArgumentParser(description="Extract capabilities from a file")
|
|
parser.add_argument("input_file", help="file to extract capabilities from")
|
|
parser.add_argument("--rules", help="path to rules directory", default=RULES_PATH)
|
|
parser.add_argument(
|
|
"--output", help="output format", choices=["dictionary", "json", "texttable"], default="dictionary"
|
|
)
|
|
args = parser.parse_args()
|
|
if args.rules != RULES_PATH:
|
|
args.rules = Path(args.rules)
|
|
print(capa_details(args.rules, Path(args.input_file), args.output))
|
|
sys.exit(0)
|