fix duplicate rule candidate evaluation in optimized matching engine (#3080)

* fix duplicate rule candidate evaluation in optimized matching engine

* update CHANGELOG

* update comments
This commit is contained in:
Mike Hunhoff
2026-05-18 17:40:55 -06:00
committed by GitHub
parent ced180ddbc
commit a98fd8240e
3 changed files with 88 additions and 2 deletions
+1
View File
@@ -116,6 +116,7 @@
- fix: incorrect bytes() constructor usage in buf_filled_with @mike-hunhoff #3077
- fix: remove redundant code related to cli loading @mike-hunhoff #3076
- fix: optimize all_zeros using fast bytes comparison @mike-hunhoff #3078
- fix: duplicate rule candidate evaluation in optimized matching engine @mike-hunhoff #3080
### capa Explorer Web
+8 -2
View File
@@ -2266,9 +2266,15 @@ class RuleSet:
new_features.append(capa.features.common.MatchedRule(namespace))
if new_features:
new_candidates: list[str] = []
new_candidates: set[str] = set()
for new_feature in new_features:
new_candidates.extend(feature_index.rules_by_feature.get(new_feature, ()))
for candidate_name in feature_index.rules_by_feature.get(new_feature, ()):
# Deduplicate candidate rules at two levels:
# 1. Globally: Ensure we don't re-queue rules already evaluated or waiting in `candidate_rule_names`.
# 2. Locally: The `new_candidates` set prevents duplicates if a rule is triggered
# by both the matched rule name and its namespace in the same pass.
if candidate_name not in candidate_rule_names:
new_candidates.add(candidate_name)
if new_candidates:
candidate_rule_names.update(new_candidates)
+79
View File
@@ -967,3 +967,82 @@ def test_bytes_prefix_index_mixed_short_and_long_patterns():
)
assert "test short pattern rule" not in matches
assert "test long pattern rule" in matches
def test_match_no_duplicate_candidate_evaluations():
"""
Ensure that when a rule has multiple candidate paths to trigger it,
it is evaluated only once and does not create duplicate match results.
Verifies both global deduplication (avoiding re-queuing rules already
evaluated/queued) and local deduplication (avoiding duplicate queueing when
multiple features trigger the same candidate in a single pass).
"""
rules = [
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: Dependency Rule 1
scopes:
static: function
dynamic: process
features:
- number: 100
""")
),
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: Dependency Rule 2
scopes:
static: function
dynamic: process
namespace: testns
features:
- number: 300
""")
),
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: Target Rule
scopes:
static: function
dynamic: process
features:
- or:
# Trigger Case 1 (Global): Target Rule is seeded by number 200,
# and also gets triggered later when Dependency Rule 1 matches.
- match: Dependency Rule 1
- number: 200
# Trigger Case 2 (Local): Target Rule depends on both rule name and namespace,
# which will try to add it twice in the same iteration when Dependency Rule 2 matches.
- and:
- match: Dependency Rule 2
- match: testns
""")
),
]
# Seed all features to trigger both Case 1 and Case 2
features = {
capa.features.insn.Number(100): {0x0},
capa.features.insn.Number(200): {0x0},
capa.features.insn.Number(300): {0x0},
}
_, matches = match(
capa.rules.topologically_order_rules(rules),
features,
0x0,
)
assert "Dependency Rule 1" in matches
assert "Dependency Rule 2" in matches
assert "Target Rule" in matches
# Ensure Target Rule was evaluated and returned exactly ONCE
assert len(matches["Target Rule"]) == 1