fix duplicate rule candidate evaluation in optimized matching engine (#3080)

* fix duplicate rule candidate evaluation in optimized matching engine * update CHANGELOG * update comments
2026-06-12 11:01:31 -07:00 · 2026-05-18 17:40:55 -06:00
parent ced180ddbc
commit a98fd8240e
3 changed files with 88 additions and 2 deletions
@@ -116,6 +116,7 @@
 - fix: incorrect bytes() constructor usage in buf_filled_with @mike-hunhoff #3077
 - fix: remove redundant code related to cli loading @mike-hunhoff #3076
 - fix: optimize all_zeros using fast bytes comparison @mike-hunhoff #3078
+- fix: duplicate rule candidate evaluation in optimized matching engine @mike-hunhoff #3080

 ### capa Explorer Web

@@ -2266,9 +2266,15 @@ class RuleSet:
                    new_features.append(capa.features.common.MatchedRule(namespace))

                if new_features:
-                    new_candidates: list[str] = []
+                    new_candidates: set[str] = set()
                    for new_feature in new_features:
-                        new_candidates.extend(feature_index.rules_by_feature.get(new_feature, ()))
+                        for candidate_name in feature_index.rules_by_feature.get(new_feature, ()):
+                            # Deduplicate candidate rules at two levels:
+                            # 1. Globally: Ensure we don't re-queue rules already evaluated or waiting in `candidate_rule_names`.
+                            # 2. Locally: The `new_candidates` set prevents duplicates if a rule is triggered
+                            #    by both the matched rule name and its namespace in the same pass.
+                            if candidate_name not in candidate_rule_names:
+                                new_candidates.add(candidate_name)

                    if new_candidates:
                        candidate_rule_names.update(new_candidates)
@@ -967,3 +967,82 @@ def test_bytes_prefix_index_mixed_short_and_long_patterns():
    )
    assert "test short pattern rule" not in matches
    assert "test long pattern rule" in matches
+
+
+def test_match_no_duplicate_candidate_evaluations():
+    """
+    Ensure that when a rule has multiple candidate paths to trigger it,
+    it is evaluated only once and does not create duplicate match results.
+    Verifies both global deduplication (avoiding re-queuing rules already
+    evaluated/queued) and local deduplication (avoiding duplicate queueing when
+    multiple features trigger the same candidate in a single pass).
+    """
+    rules = [
+        capa.rules.Rule.from_yaml(
+            textwrap.dedent("""
+                rule:
+                    meta:
+                        name: Dependency Rule 1
+                        scopes:
+                            static: function
+                            dynamic: process
+                    features:
+                        - number: 100
+                """)
+        ),
+        capa.rules.Rule.from_yaml(
+            textwrap.dedent("""
+                rule:
+                    meta:
+                        name: Dependency Rule 2
+                        scopes:
+                            static: function
+                            dynamic: process
+                        namespace: testns
+                    features:
+                        - number: 300
+                """)
+        ),
+        capa.rules.Rule.from_yaml(
+            textwrap.dedent("""
+                rule:
+                    meta:
+                        name: Target Rule
+                        scopes:
+                            static: function
+                            dynamic: process
+                    features:
+                        - or:
+                            # Trigger Case 1 (Global): Target Rule is seeded by number 200,
+                            # and also gets triggered later when Dependency Rule 1 matches.
+                            - match: Dependency Rule 1
+                            - number: 200
+
+                            # Trigger Case 2 (Local): Target Rule depends on both rule name and namespace,
+                            # which will try to add it twice in the same iteration when Dependency Rule 2 matches.
+                            - and:
+                                - match: Dependency Rule 2
+                                - match: testns
+                """)
+        ),
+    ]
+
+    # Seed all features to trigger both Case 1 and Case 2
+    features = {
+        capa.features.insn.Number(100): {0x0},
+        capa.features.insn.Number(200): {0x0},
+        capa.features.insn.Number(300): {0x0},
+    }
+
+    _, matches = match(
+        capa.rules.topologically_order_rules(rules),
+        features,
+        0x0,
+    )
+
+    assert "Dependency Rule 1" in matches
+    assert "Dependency Rule 2" in matches
+    assert "Target Rule" in matches
+
+    # Ensure Target Rule was evaluated and returned exactly ONCE
+    assert len(matches["Target Rule"]) == 1