From cd06ee4544d488d4ee734b024256f56e7eeb4670 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Tue, 10 Aug 2021 13:05:31 -0600 Subject: [PATCH 1/6] main: correctly extract namespaces matches across scopes closes #721 --- capa/engine.py | 32 +++++++++++++++++++++++--------- capa/main.py | 14 ++++++++------ 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/capa/engine.py b/capa/engine.py index 3b969569..aa454d77 100644 --- a/capa/engine.py +++ b/capa/engine.py @@ -8,7 +8,7 @@ import copy import collections -from typing import Set, Dict, List, Tuple, Union, Mapping +from typing import Set, Dict, List, Tuple, Union, Mapping, Iterable import capa.rules import capa.features.common @@ -228,6 +228,23 @@ class Subscope(Statement): MatchResults = Mapping[str, List[Tuple[int, Result]]] +def index_rule_matches(features: FeatureSet, rule: "capa.rules.Rule", locations: Iterable[int]): + """ + record into the given featureset that the given rule matched at the given locations. + + naively, this is just adding a MatchedRule feature; + however, we also want to record matches for the rule's namespaces. + + updates `features` in-place. doesn't modify the remaining arguments. + """ + features[capa.features.common.MatchedRule(rule.name)].update(locations) + namespace = rule.meta.get("namespace") + if namespace: + while namespace: + features[capa.features.common.MatchedRule(namespace)].update(locations) + namespace, _, _ = namespace.rpartition("/") + + def match(rules: List["capa.rules.Rule"], features: FeatureSet, va: int) -> Tuple[FeatureSet, MatchResults]: """ Args: @@ -237,7 +254,7 @@ def match(rules: List["capa.rules.Rule"], features: FeatureSet, va: int) -> Tupl Returns: Tuple[FeatureSet, MatchResults]: two-tuple with entries: - - set of features used for matching (which may be greater than argument, due to rule match features), and + - set of features used for matching (which may be a superset of the given `features` argument, due to rule match features), and - mapping from rule name to [(location of match, result object)] """ results = collections.defaultdict(list) # type: MatchResults @@ -252,12 +269,9 @@ def match(rules: List["capa.rules.Rule"], features: FeatureSet, va: int) -> Tupl res = rule.evaluate(features) if res: results[rule.name].append((va, res)) - features[capa.features.common.MatchedRule(rule.name)].add(va) - - namespace = rule.meta.get("namespace") - if namespace: - while namespace: - features[capa.features.common.MatchedRule(namespace)].add(va) - namespace, _, _ = namespace.rpartition("/") + # we need to update the current `features` + # because subsequent iterations of this loop may use newly added features, + # such as rule or namespace matches. + index_rule_matches(features, rule, [va]) return (features, results) diff --git a/capa/main.py b/capa/main.py index 32ef811d..8bd2a4a6 100644 --- a/capa/main.py +++ b/capa/main.py @@ -21,7 +21,7 @@ import textwrap import itertools import contextlib import collections -from typing import Any, Dict, List, Tuple +from typing import Any, Dict, List, Tuple, Iterable import halo import tqdm @@ -101,8 +101,9 @@ def find_function_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, f: for rule_name, res in matches.items(): bb_matches[rule_name].extend(res) + rule = ruleset[rule_name] for va, _ in res: - function_features[capa.features.common.MatchedRule(rule_name)].add(va) + capa.engine.index_rule_matches(function_features, rule, [va]) _, function_matches = capa.engine.match(ruleset.function_rules, function_features, int(f)) return function_matches, bb_matches, len(function_features) @@ -175,10 +176,11 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_pro # collection of features that captures the rule matches within function and BB scopes. # mapping from feature (matched rule) to set of addresses at which it matched. - function_and_lower_features = { - capa.features.common.MatchedRule(rule_name): set(map(lambda p: p[0], results)) - for rule_name, results in itertools.chain(all_function_matches.items(), all_bb_matches.items()) - } # type: FeatureSet + function_and_lower_features: FeatureSet = collections.defaultdict(set) + for rule_name, results in itertools.chain(all_function_matches.items(), all_bb_matches.items()): + locations = set(map(lambda p: p[0], results)) + rule = ruleset[rule_name] + capa.engine.index_rule_matches(function_and_lower_features, rule, locations) all_file_matches, feature_count = find_file_capabilities(ruleset, extractor, function_and_lower_features) meta["feature_counts"]["file"] = feature_count From 7e60162d658f22af9902d9381a7f198410ae6e46 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Tue, 10 Aug 2021 13:06:04 -0600 Subject: [PATCH 2/6] result_document: extract only the relevant namespace locations closes #703 --- capa/render/result_document.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/capa/render/result_document.py b/capa/render/result_document.py index 3e4d6c25..1fe556b8 100644 --- a/capa/render/result_document.py +++ b/capa/render/result_document.py @@ -198,9 +198,19 @@ def convert_match_to_result_document(rules, capabilities, result): # in the meantime, the above might be sufficient. rule_matches = {address: result for (address, result) in capabilities[rule.name]} for location in doc["locations"]: - doc["children"].append( - convert_match_to_result_document(rules, capabilities, rule_matches[location]) - ) + # doc[locations] contains all matches for the given namespace. + # for example, the feature might be `match: anti-analysis/packer` + # which matches against "generic unpacker" and "UPX". + # in this case, doc[locations] contains locations for *both* of thse. + # + # rule_matches contains the matches for the specific rule. + # this is a subset of doc[locations]. + # + # so, grab only the locations for current rule. + if location in rule_matches: + doc["children"].append( + convert_match_to_result_document(rules, capabilities, rule_matches[location]) + ) return doc From 44dc4efe57df897619ad584f7053a65efb21d9c6 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Tue, 10 Aug 2021 13:14:00 -0600 Subject: [PATCH 3/6] changlog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3857d512..0a6e09a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,9 @@ ### Bug Fixes +- main: fix `KeyError: 0` when reporting results @williballehtin #703 +- main: fix potential false negatives due to namespaces across scopes @williballenthin #721 + ### capa explorer IDA Pro plugin - explorer: add additional filter logic when displaying matches by function #686 @mike-hunhoff From 5af59cecdac01f01d8fe49b1b3e2fb03f8623184 Mon Sep 17 00:00:00 2001 From: doomedraven Date: Wed, 18 Aug 2021 14:23:36 +0200 Subject: [PATCH 4/6] update capa_as_library for capa v2 --- scripts/capa_as_library.py | 49 +++++++++----------------------------- 1 file changed, 11 insertions(+), 38 deletions(-) diff --git a/scripts/capa_as_library.py b/scripts/capa_as_library.py index 36244a3f..c85445f0 100644 --- a/scripts/capa_as_library.py +++ b/scripts/capa_as_library.py @@ -85,7 +85,6 @@ def render_capabilities(doc, ostream): ostream["CAPABILITY"].setdefault(rule["meta"]["namespace"], list()) ostream["CAPABILITY"][rule["meta"]["namespace"]].append(capability) - def render_attack(doc, ostream): """ example:: @@ -104,28 +103,16 @@ def render_attack(doc, ostream): for rule in rutils.capability_rules(doc): if not rule["meta"].get("att&ck"): continue - for attack in rule["meta"]["att&ck"]: - tactic, _, rest = attack.partition("::") - if "::" in rest: - technique, _, rest = rest.partition("::") - subtechnique, _, id = rest.rpartition(" ") - tactics[tactic].add((technique, subtechnique, id)) - else: - technique, _, id = rest.rpartition(" ") - tactics[tactic].add((technique, id)) + tactics[attack["tactic"]].add((attack["technique"], attack.get("subtechnique"), attack["id"])) for tactic, techniques in sorted(tactics.items()): inner_rows = [] - for spec in sorted(techniques): - if len(spec) == 2: - technique, id = spec + for (technique, subtechnique, id) in sorted(techniques): + if subtechnique is None: inner_rows.append("%s %s" % (technique, id)) - elif len(spec) == 3: - technique, subtechnique, id = spec - inner_rows.append("%s::%s %s" % (technique, subtechnique, id)) else: - raise RuntimeError("unexpected ATT&CK spec format") + inner_rows.append("%s::%s %s" % (technique, subtechnique, id)) ostream["ATTCK"].setdefault(tactic.upper(), inner_rows) @@ -150,34 +137,20 @@ def render_mbc(doc, ostream): if not rule["meta"].get("mbc"): continue - mbcs = rule["meta"]["mbc"] - if not isinstance(mbcs, list): - raise ValueError("invalid rule: MBC mapping is not a list") - - for mbc in mbcs: - objective, _, rest = mbc.partition("::") - if "::" in rest: - behavior, _, rest = rest.partition("::") - method, _, id = rest.rpartition(" ") - objectives[objective].add((behavior, method, id)) - else: - behavior, _, id = rest.rpartition(" ") - objectives[objective].add((behavior, id)) + for mbc in rule["meta"]["mbc"]: + objectives[mbc["objective"]].add((mbc["behavior"], mbc.get("method"), mbc["id"])) for objective, behaviors in sorted(objectives.items()): inner_rows = [] - for spec in sorted(behaviors): - if len(spec) == 2: - behavior, id = spec - inner_rows.append("%s %s" % (behavior, id)) - elif len(spec) == 3: - behavior, method, id = spec - inner_rows.append("%s::%s %s" % (behavior, method, id)) + for (behavior, method, id) in sorted(behaviors): + if method is None: + inner_rows.append("%s [%s]" % (behavior, id)) else: - raise RuntimeError("unexpected MBC spec format") + inner_rows.append("%s::%s [%s]" % (behavior, method, id)) ostream["MBC"].setdefault(objective.upper(), inner_rows) + def render_dictionary(doc): ostream = dict() render_meta(doc, ostream) From b1171864e355d57e092cff5b50041d605cbaeb95 Mon Sep 17 00:00:00 2001 From: doomedraven Date: Wed, 18 Aug 2021 14:25:58 +0200 Subject: [PATCH 5/6] black --- scripts/capa_as_library.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/capa_as_library.py b/scripts/capa_as_library.py index c85445f0..449c35b9 100644 --- a/scripts/capa_as_library.py +++ b/scripts/capa_as_library.py @@ -85,6 +85,7 @@ def render_capabilities(doc, ostream): ostream["CAPABILITY"].setdefault(rule["meta"]["namespace"], list()) ostream["CAPABILITY"][rule["meta"]["namespace"]].append(capability) + def render_attack(doc, ostream): """ example:: @@ -150,7 +151,6 @@ def render_mbc(doc, ostream): ostream["MBC"].setdefault(objective.upper(), inner_rows) - def render_dictionary(doc): ostream = dict() render_meta(doc, ostream) From 51ac57c65787a04fecaaffa15a42de745ebe7265 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Wed, 18 Aug 2021 20:33:02 +0000 Subject: [PATCH 6/6] Sync capa-testfiles submodule --- tests/data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data b/tests/data index 878f3b5b..efc5abd8 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit 878f3b5b5b7c6a39269a04e0fbd313e2cfbc0632 +Subproject commit efc5abd88802e38554d1128914a8bcb874eab780