From cd06ee4544d488d4ee734b024256f56e7eeb4670 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Tue, 10 Aug 2021 13:05:31 -0600
Subject: [PATCH 1/6] main: correctly extract namespaces matches across scopes

closes #721
---
 capa/engine.py | 32 +++++++++++++++++++++++---------
 capa/main.py   | 14 ++++++++------
 2 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/capa/engine.py b/capa/engine.py
index 3b969569..aa454d77 100644
--- a/capa/engine.py
+++ b/capa/engine.py
@@ -8,7 +8,7 @@
 
 import copy
 import collections
-from typing import Set, Dict, List, Tuple, Union, Mapping
+from typing import Set, Dict, List, Tuple, Union, Mapping, Iterable
 
 import capa.rules
 import capa.features.common
@@ -228,6 +228,23 @@ class Subscope(Statement):
 MatchResults = Mapping[str, List[Tuple[int, Result]]]
 
 
+def index_rule_matches(features: FeatureSet, rule: "capa.rules.Rule", locations: Iterable[int]):
+    """
+    record into the given featureset that the given rule matched at the given locations.
+
+    naively, this is just adding a MatchedRule feature;
+    however, we also want to record matches for the rule's namespaces.
+
+    updates `features` in-place. doesn't modify the remaining arguments.
+    """
+    features[capa.features.common.MatchedRule(rule.name)].update(locations)
+    namespace = rule.meta.get("namespace")
+    if namespace:
+        while namespace:
+            features[capa.features.common.MatchedRule(namespace)].update(locations)
+            namespace, _, _ = namespace.rpartition("/")
+
+
 def match(rules: List["capa.rules.Rule"], features: FeatureSet, va: int) -> Tuple[FeatureSet, MatchResults]:
     """
     Args:
@@ -237,7 +254,7 @@ def match(rules: List["capa.rules.Rule"], features: FeatureSet, va: int) -> Tupl
 
     Returns:
       Tuple[FeatureSet, MatchResults]: two-tuple with entries:
-        - set of features used for matching (which may be greater than argument, due to rule match features), and
+        - set of features used for matching (which may be a superset of the given `features` argument, due to rule match features), and
         - mapping from rule name to [(location of match, result object)]
     """
     results = collections.defaultdict(list)  # type: MatchResults
@@ -252,12 +269,9 @@ def match(rules: List["capa.rules.Rule"], features: FeatureSet, va: int) -> Tupl
         res = rule.evaluate(features)
         if res:
             results[rule.name].append((va, res))
-            features[capa.features.common.MatchedRule(rule.name)].add(va)
-
-            namespace = rule.meta.get("namespace")
-            if namespace:
-                while namespace:
-                    features[capa.features.common.MatchedRule(namespace)].add(va)
-                    namespace, _, _ = namespace.rpartition("/")
+            # we need to update the current `features`
+            # because subsequent iterations of this loop may use newly added features,
+            # such as rule or namespace matches.
+            index_rule_matches(features, rule, [va])
 
     return (features, results)
diff --git a/capa/main.py b/capa/main.py
index 32ef811d..8bd2a4a6 100644
--- a/capa/main.py
+++ b/capa/main.py
@@ -21,7 +21,7 @@ import textwrap
 import itertools
 import contextlib
 import collections
-from typing import Any, Dict, List, Tuple
+from typing import Any, Dict, List, Tuple, Iterable
 
 import halo
 import tqdm
@@ -101,8 +101,9 @@ def find_function_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, f:
 
         for rule_name, res in matches.items():
             bb_matches[rule_name].extend(res)
+            rule = ruleset[rule_name]
             for va, _ in res:
-                function_features[capa.features.common.MatchedRule(rule_name)].add(va)
+                capa.engine.index_rule_matches(function_features, rule, [va])
 
     _, function_matches = capa.engine.match(ruleset.function_rules, function_features, int(f))
     return function_matches, bb_matches, len(function_features)
@@ -175,10 +176,11 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_pro
 
     # collection of features that captures the rule matches within function and BB scopes.
     # mapping from feature (matched rule) to set of addresses at which it matched.
-    function_and_lower_features = {
-        capa.features.common.MatchedRule(rule_name): set(map(lambda p: p[0], results))
-        for rule_name, results in itertools.chain(all_function_matches.items(), all_bb_matches.items())
-    }  # type: FeatureSet
+    function_and_lower_features: FeatureSet = collections.defaultdict(set)
+    for rule_name, results in itertools.chain(all_function_matches.items(), all_bb_matches.items()):
+        locations = set(map(lambda p: p[0], results))
+        rule = ruleset[rule_name]
+        capa.engine.index_rule_matches(function_and_lower_features, rule, locations)
 
     all_file_matches, feature_count = find_file_capabilities(ruleset, extractor, function_and_lower_features)
     meta["feature_counts"]["file"] = feature_count

From 7e60162d658f22af9902d9381a7f198410ae6e46 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Tue, 10 Aug 2021 13:06:04 -0600
Subject: [PATCH 2/6] result_document: extract only the relevant namespace
 locations

closes #703
---
 capa/render/result_document.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/capa/render/result_document.py b/capa/render/result_document.py
index 3e4d6c25..1fe556b8 100644
--- a/capa/render/result_document.py
+++ b/capa/render/result_document.py
@@ -198,9 +198,19 @@ def convert_match_to_result_document(rules, capabilities, result):
                     # in the meantime, the above might be sufficient.
                     rule_matches = {address: result for (address, result) in capabilities[rule.name]}
                     for location in doc["locations"]:
-                        doc["children"].append(
-                            convert_match_to_result_document(rules, capabilities, rule_matches[location])
-                        )
+                        # doc[locations] contains all matches for the given namespace.
+                        # for example, the feature might be `match: anti-analysis/packer`
+                        # which matches against "generic unpacker" and "UPX".
+                        # in this case, doc[locations] contains locations for *both* of thse.
+                        #
+                        # rule_matches contains the matches for the specific rule.
+                        # this is a subset of doc[locations].
+                        #
+                        # so, grab only the locations for current rule.
+                        if location in rule_matches:
+                            doc["children"].append(
+                                convert_match_to_result_document(rules, capabilities, rule_matches[location])
+                            )
 
     return doc
 

From 44dc4efe57df897619ad584f7053a65efb21d9c6 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Tue, 10 Aug 2021 13:14:00 -0600
Subject: [PATCH 3/6] changlog

---
 CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3857d512..0a6e09a8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -36,6 +36,9 @@
 
 ### Bug Fixes
 
+- main: fix `KeyError: 0` when reporting results @williballehtin #703
+- main: fix potential false negatives due to namespaces across scopes @williballenthin #721
+
 ### capa explorer IDA Pro plugin
 
 - explorer: add additional filter logic when displaying matches by function #686 @mike-hunhoff

From 5af59cecdac01f01d8fe49b1b3e2fb03f8623184 Mon Sep 17 00:00:00 2001
From: doomedraven <doomedraven@users.noreply.github.com>
Date: Wed, 18 Aug 2021 14:23:36 +0200
Subject: [PATCH 4/6] update capa_as_library for capa v2

---
 scripts/capa_as_library.py | 49 +++++++++-----------------------------
 1 file changed, 11 insertions(+), 38 deletions(-)

diff --git a/scripts/capa_as_library.py b/scripts/capa_as_library.py
index 36244a3f..c85445f0 100644
--- a/scripts/capa_as_library.py
+++ b/scripts/capa_as_library.py
@@ -85,7 +85,6 @@ def render_capabilities(doc, ostream):
         ostream["CAPABILITY"].setdefault(rule["meta"]["namespace"], list())
         ostream["CAPABILITY"][rule["meta"]["namespace"]].append(capability)
 
-
 def render_attack(doc, ostream):
     """
     example::
@@ -104,28 +103,16 @@ def render_attack(doc, ostream):
     for rule in rutils.capability_rules(doc):
         if not rule["meta"].get("att&ck"):
             continue
-
         for attack in rule["meta"]["att&ck"]:
-            tactic, _, rest = attack.partition("::")
-            if "::" in rest:
-                technique, _, rest = rest.partition("::")
-                subtechnique, _, id = rest.rpartition(" ")
-                tactics[tactic].add((technique, subtechnique, id))
-            else:
-                technique, _, id = rest.rpartition(" ")
-                tactics[tactic].add((technique, id))
+            tactics[attack["tactic"]].add((attack["technique"], attack.get("subtechnique"), attack["id"]))
 
     for tactic, techniques in sorted(tactics.items()):
         inner_rows = []
-        for spec in sorted(techniques):
-            if len(spec) == 2:
-                technique, id = spec
+        for (technique, subtechnique, id) in sorted(techniques):
+            if subtechnique is None:
                 inner_rows.append("%s %s" % (technique, id))
-            elif len(spec) == 3:
-                technique, subtechnique, id = spec
-                inner_rows.append("%s::%s %s" % (technique, subtechnique, id))
             else:
-                raise RuntimeError("unexpected ATT&CK spec format")
+                inner_rows.append("%s::%s %s" % (technique, subtechnique, id))
         ostream["ATTCK"].setdefault(tactic.upper(), inner_rows)
 
 
@@ -150,34 +137,20 @@ def render_mbc(doc, ostream):
         if not rule["meta"].get("mbc"):
             continue
 
-        mbcs = rule["meta"]["mbc"]
-        if not isinstance(mbcs, list):
-            raise ValueError("invalid rule: MBC mapping is not a list")
-
-        for mbc in mbcs:
-            objective, _, rest = mbc.partition("::")
-            if "::" in rest:
-                behavior, _, rest = rest.partition("::")
-                method, _, id = rest.rpartition(" ")
-                objectives[objective].add((behavior, method, id))
-            else:
-                behavior, _, id = rest.rpartition(" ")
-                objectives[objective].add((behavior, id))
+        for mbc in rule["meta"]["mbc"]:
+            objectives[mbc["objective"]].add((mbc["behavior"], mbc.get("method"), mbc["id"]))
 
     for objective, behaviors in sorted(objectives.items()):
         inner_rows = []
-        for spec in sorted(behaviors):
-            if len(spec) == 2:
-                behavior, id = spec
-                inner_rows.append("%s %s" % (behavior, id))
-            elif len(spec) == 3:
-                behavior, method, id = spec
-                inner_rows.append("%s::%s %s" % (behavior, method, id))
+        for (behavior, method, id) in sorted(behaviors):
+            if method is None:
+                inner_rows.append("%s [%s]" % (behavior, id))
             else:
-                raise RuntimeError("unexpected MBC spec format")
+                inner_rows.append("%s::%s [%s]" % (behavior, method, id))
         ostream["MBC"].setdefault(objective.upper(), inner_rows)
 
 
+
 def render_dictionary(doc):
     ostream = dict()
     render_meta(doc, ostream)

From b1171864e355d57e092cff5b50041d605cbaeb95 Mon Sep 17 00:00:00 2001
From: doomedraven <doomedraven@users.noreply.github.com>
Date: Wed, 18 Aug 2021 14:25:58 +0200
Subject: [PATCH 5/6] black

---
 scripts/capa_as_library.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/capa_as_library.py b/scripts/capa_as_library.py
index c85445f0..449c35b9 100644
--- a/scripts/capa_as_library.py
+++ b/scripts/capa_as_library.py
@@ -85,6 +85,7 @@ def render_capabilities(doc, ostream):
         ostream["CAPABILITY"].setdefault(rule["meta"]["namespace"], list())
         ostream["CAPABILITY"][rule["meta"]["namespace"]].append(capability)
 
+
 def render_attack(doc, ostream):
     """
     example::
@@ -150,7 +151,6 @@ def render_mbc(doc, ostream):
         ostream["MBC"].setdefault(objective.upper(), inner_rows)
 
 
-
 def render_dictionary(doc):
     ostream = dict()
     render_meta(doc, ostream)

From 51ac57c65787a04fecaaffa15a42de745ebe7265 Mon Sep 17 00:00:00 2001
From: Capa Bot <capa-dev@fireeye.com>
Date: Wed, 18 Aug 2021 20:33:02 +0000
Subject: [PATCH 6/6] Sync capa-testfiles submodule

---
 tests/data | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/data b/tests/data
index 878f3b5b..efc5abd8 160000
--- a/tests/data
+++ b/tests/data
@@ -1 +1 @@
-Subproject commit 878f3b5b5b7c6a39269a04e0fbd313e2cfbc0632
+Subproject commit efc5abd88802e38554d1128914a8bcb874eab780