Files
capa/scripts/capa-as-library.py
Ana Maria Martinez Gomez 3cd97ae9f2 [copyright + license] Fix headers
Replace the header from source code files using the following script:
```Python
for dir_path, dir_names, file_names in os.walk("capa"):
    for file_name in file_names:
        # header are only in `.py` and `.toml` files
        if file_name[-3:] not in (".py", "oml"):
            continue
        file_path = f"{dir_path}/{file_name}"
        f = open(file_path, "rb+")
        content = f.read()
        m = re.search(OLD_HEADER, content)
        if not m:
            continue
        print(f"{file_path}: {m.group('year')}")
        content = content.replace(m.group(0), NEW_HEADER % m.group("year"))
        f.seek(0)
        f.write(content)
```

Some files had the copyright headers inside a `"""` comment and needed
manual changes before applying the script. `hook-vivisect.py` and
`pyinstaller.spec` didn't include the license in the header and also
needed manual changes.

The old header had the confusing sentence `All rights reserved`, which
does not make sense for an open source license. Replace the header by
the default Google header that corrects this issue and keep capa
consistent with other Google projects.

Adapt the linter to work with the new header.

Replace also the copyright text in the `web/public/index.html` file for
consistency.
2025-01-15 08:52:42 -07:00

227 lines
8.3 KiB
Python

#!/usr/bin/env python3
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import collections
from typing import Any
from pathlib import Path
import capa.main
import capa.rules
import capa.engine
import capa.loader
import capa.features
import capa.render.json
import capa.render.utils as rutils
import capa.render.default
import capa.capabilities.common
import capa.render.result_document as rd
import capa.features.freeze.features as frzf
from capa.features.common import OS_AUTO, FORMAT_AUTO
# == Render dictionary helpers
def render_meta(doc: rd.ResultDocument, result):
result["md5"] = doc.meta.sample.md5
result["sha1"] = doc.meta.sample.sha1
result["sha256"] = doc.meta.sample.sha256
result["path"] = doc.meta.sample.path
def find_subrule_matches(doc: rd.ResultDocument) -> set[str]:
"""
collect the rule names that have been matched as a subrule match.
this way we can avoid displaying entries for things that are too specific.
"""
matches = set()
def rec(node: rd.Match):
if not node.success:
# there's probably a bug here for rules that do `not: match: ...`
# but we don't have any examples of this yet
return
elif isinstance(node.node, rd.StatementNode):
for child in node.children:
rec(child)
elif isinstance(node.node, rd.FeatureNode):
if isinstance(node.node.feature, frzf.MatchFeature):
matches.add(node.node.feature.match)
for rule in rutils.capability_rules(doc):
for _, node in rule.matches:
rec(node)
return matches
def render_capabilities(doc: rd.ResultDocument, result):
"""
example::
{'CAPABILITY': {'accept command line arguments': 'host-interaction/cli',
'allocate thread local storage (2 matches)': 'host-interaction/process',
'check for time delay via GetTickCount': 'anti-analysis/anti-debugging/debugger-detection',
'check if process is running under wine': 'anti-analysis/anti-emulation/wine',
'contain a resource (.rsrc) section': 'executable/pe/section/rsrc',
'write file (3 matches)': 'host-interaction/file-system/write'}
}
"""
subrule_matches = find_subrule_matches(doc)
result["CAPABILITY"] = {}
for rule in rutils.capability_rules(doc):
if rule.meta.name in subrule_matches:
# rules that are also matched by other rules should not get rendered by default.
# this cuts down on the amount of output while giving approx the same detail.
# see #224
continue
count = len(rule.matches)
if count == 1:
capability = rule.meta.name
else:
capability = f"{rule.meta.name} ({count} matches)"
result["CAPABILITY"].setdefault(rule.meta.namespace, [])
result["CAPABILITY"][rule.meta.namespace].append(capability)
def render_attack(doc, result):
"""
example::
{'ATT&CK': {'COLLECTION': ['Input Capture::Keylogging [T1056.001]'],
'DEFENSE EVASION': ['Obfuscated Files or Information [T1027]',
'Virtualization/Sandbox Evasion::System Checks '
'[T1497.001]'],
'DISCOVERY': ['File and Directory Discovery [T1083]',
'Query Registry [T1012]',
'System Information Discovery [T1082]'],
'EXECUTION': ['Shared Modules [T1129]']}
}
"""
result["ATTCK"] = {}
tactics = collections.defaultdict(set)
for rule in rutils.capability_rules(doc):
if not rule.meta.attack:
continue
for attack in rule.meta.attack:
tactics[attack.tactic].add((attack.technique, attack.subtechnique, attack.id))
for tactic, techniques in sorted(tactics.items()):
inner_rows = []
for technique, subtechnique, id in sorted(techniques):
if subtechnique is None:
inner_rows.append(f"{technique} {id}")
else:
inner_rows.append(f"{technique}::{subtechnique} {id}")
result["ATTCK"].setdefault(tactic.upper(), inner_rows)
def render_mbc(doc, result):
"""
example::
{'MBC': {'ANTI-BEHAVIORAL ANALYSIS': ['Debugger Detection::Timing/Delay Check '
'GetTickCount [B0001.032]',
'Emulator Detection [B0004]',
'Virtual Machine Detection::Instruction '
'Testing [B0009.029]',
'Virtual Machine Detection [B0009]'],
'COLLECTION': ['Keylogging::Polling [F0002.002]'],
'CRYPTOGRAPHY': ['Encrypt Data::RC4 [C0027.009]',
'Generate Pseudo-random Sequence::RC4 PRGA '
'[C0021.004]']}
}
"""
result["MBC"] = {}
objectives = collections.defaultdict(set)
for rule in rutils.capability_rules(doc):
if not rule.meta.mbc:
continue
for mbc in rule.meta.mbc:
objectives[mbc.objective].add((mbc.behavior, mbc.method, mbc.id))
for objective, behaviors in sorted(objectives.items()):
inner_rows = []
for behavior, method, id in sorted(behaviors):
if method is None:
inner_rows.append(f"{behavior} [{id}]")
else:
inner_rows.append(f"{behavior}::{method} [{id}]")
result["MBC"].setdefault(objective.upper(), inner_rows)
def render_dictionary(doc: rd.ResultDocument) -> dict[str, Any]:
result: dict[str, Any] = {}
render_meta(doc, result)
render_attack(doc, result)
render_mbc(doc, result)
render_capabilities(doc, result)
return result
# ==== render dictionary helpers
def capa_details(rules_path: Path, input_file: Path, output_format="dictionary"):
# load rules from disk
rules = capa.rules.get_rules([rules_path])
# extract features and find capabilities
extractor = capa.loader.get_extractor(
input_file, FORMAT_AUTO, OS_AUTO, capa.main.BACKEND_VIV, [], should_save_workspace=False, disable_progress=True
)
capabilities, counts = capa.capabilities.common.find_capabilities(rules, extractor, disable_progress=True)
# collect metadata (used only to make rendering more complete)
meta = capa.loader.collect_metadata([], input_file, FORMAT_AUTO, OS_AUTO, [rules_path], extractor, counts)
meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities)
capa_output: Any = False
if output_format == "dictionary":
# ...as python dictionary, simplified as textable but in dictionary
doc = rd.ResultDocument.from_capa(meta, rules, capabilities)
capa_output = render_dictionary(doc)
elif output_format == "json":
# render results
# ...as json
capa_output = json.loads(capa.render.json.render(meta, rules, capabilities))
elif output_format == "texttable":
# ...as human readable text table
capa_output = capa.render.default.render(meta, rules, capabilities)
return capa_output
if __name__ == "__main__":
import sys
import argparse
RULES_PATH = capa.main.get_default_root() / "rules"
parser = argparse.ArgumentParser(description="Extract capabilities from a file")
parser.add_argument("input_file", help="file to extract capabilities from")
parser.add_argument("--rules", help="path to rules directory", default=RULES_PATH)
parser.add_argument(
"--output", help="output format", choices=["dictionary", "json", "texttable"], default="dictionary"
)
args = parser.parse_args()
if args.rules != RULES_PATH:
args.rules = Path(args.rules)
print(capa_details(args.rules, Path(args.input_file), args.output))
sys.exit(0)