diff --git a/CHANGELOG.md b/CHANGELOG.md index f4c218e5..140a0a42 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -116,6 +116,7 @@ - fix: incorrect bytes() constructor usage in buf_filled_with @mike-hunhoff #3077 - fix: remove redundant code related to cli loading @mike-hunhoff #3076 - fix: optimize all_zeros using fast bytes comparison @mike-hunhoff #3078 +- fix: duplicate rule candidate evaluation in optimized matching engine @mike-hunhoff #3080 ### capa Explorer Web diff --git a/capa/rules/__init__.py b/capa/rules/__init__.py index 2e1c8fa6..ef4e372c 100644 --- a/capa/rules/__init__.py +++ b/capa/rules/__init__.py @@ -2266,9 +2266,15 @@ class RuleSet: new_features.append(capa.features.common.MatchedRule(namespace)) if new_features: - new_candidates: list[str] = [] + new_candidates: set[str] = set() for new_feature in new_features: - new_candidates.extend(feature_index.rules_by_feature.get(new_feature, ())) + for candidate_name in feature_index.rules_by_feature.get(new_feature, ()): + # Deduplicate candidate rules at two levels: + # 1. Globally: Ensure we don't re-queue rules already evaluated or waiting in `candidate_rule_names`. + # 2. Locally: The `new_candidates` set prevents duplicates if a rule is triggered + # by both the matched rule name and its namespace in the same pass. + if candidate_name not in candidate_rule_names: + new_candidates.add(candidate_name) if new_candidates: candidate_rule_names.update(new_candidates) diff --git a/tests/test_match.py b/tests/test_match.py index 674b71b3..f55e54f8 100644 --- a/tests/test_match.py +++ b/tests/test_match.py @@ -967,3 +967,82 @@ def test_bytes_prefix_index_mixed_short_and_long_patterns(): ) assert "test short pattern rule" not in matches assert "test long pattern rule" in matches + + +def test_match_no_duplicate_candidate_evaluations(): + """ + Ensure that when a rule has multiple candidate paths to trigger it, + it is evaluated only once and does not create duplicate match results. + Verifies both global deduplication (avoiding re-queuing rules already + evaluated/queued) and local deduplication (avoiding duplicate queueing when + multiple features trigger the same candidate in a single pass). + """ + rules = [ + capa.rules.Rule.from_yaml( + textwrap.dedent(""" + rule: + meta: + name: Dependency Rule 1 + scopes: + static: function + dynamic: process + features: + - number: 100 + """) + ), + capa.rules.Rule.from_yaml( + textwrap.dedent(""" + rule: + meta: + name: Dependency Rule 2 + scopes: + static: function + dynamic: process + namespace: testns + features: + - number: 300 + """) + ), + capa.rules.Rule.from_yaml( + textwrap.dedent(""" + rule: + meta: + name: Target Rule + scopes: + static: function + dynamic: process + features: + - or: + # Trigger Case 1 (Global): Target Rule is seeded by number 200, + # and also gets triggered later when Dependency Rule 1 matches. + - match: Dependency Rule 1 + - number: 200 + + # Trigger Case 2 (Local): Target Rule depends on both rule name and namespace, + # which will try to add it twice in the same iteration when Dependency Rule 2 matches. + - and: + - match: Dependency Rule 2 + - match: testns + """) + ), + ] + + # Seed all features to trigger both Case 1 and Case 2 + features = { + capa.features.insn.Number(100): {0x0}, + capa.features.insn.Number(200): {0x0}, + capa.features.insn.Number(300): {0x0}, + } + + _, matches = match( + capa.rules.topologically_order_rules(rules), + features, + 0x0, + ) + + assert "Dependency Rule 1" in matches + assert "Dependency Rule 2" in matches + assert "Target Rule" in matches + + # Ensure Target Rule was evaluated and returned exactly ONCE + assert len(matches["Target Rule"]) == 1