# Copyright 2021 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import textwrap import pytest import capa.rules import capa.engine import capa.features.insn import capa.features.common from capa.rules import Scope def match(rules, features, va, scope=Scope.FUNCTION): """ use all matching algorithms and verify that they compute the same result. then, return those results to the caller so they can make their asserts. """ features1, matches1 = capa.engine.match(rules, features, va) ruleset = capa.rules.RuleSet(rules) features2, matches2 = ruleset.match(scope, features, va) for feature, locations in features1.items(): assert feature in features2 assert locations == features2[feature] for rulename, results in matches1.items(): assert rulename in matches2 assert len(results) == len(matches2[rulename]) return features1, matches1 @pytest.mark.parametrize( "pattern", [ "/test\\.exe/", "/hello/i", "/foo\\\\bar/", ], ) def test_regex_get_value_str(pattern): # Regex.get_value_str() must return the raw pattern without escaping, see #1909. assert capa.features.common.Regex(pattern).get_value_str() == pattern @pytest.mark.xfail(reason="can't have top level NOT") def test_match_only_not(): rule = textwrap.dedent(""" rule: meta: name: test rule scopes: static: function dynamic: process namespace: testns1/testns2 features: - not: - number: 99 """) r = capa.rules.Rule.from_yaml(rule) _, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0) assert "test rule" in matches @pytest.mark.xfail(reason="can't have nested NOT") def test_match_not_not(): rule = textwrap.dedent(""" rule: meta: name: test rule scopes: static: function dynamic: process namespace: testns1/testns2 features: - not: - not: - number: 100 """) r = capa.rules.Rule.from_yaml(rule) _, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0) assert "test rule" in matches # this test demonstrates the behavior of unstable features that may change before the next major release. def test_index_features_and_unstable(): rule = textwrap.dedent(""" rule: meta: name: test rule scopes: static: function dynamic: process features: - and: - mnemonic: mov - api: CreateFileW """) r = capa.rules.Rule.from_yaml(rule) rr = capa.rules.RuleSet([r]) index: capa.rules.RuleSet._RuleFeatureIndex = rr._feature_indexes_by_scopes[capa.rules.Scope.FUNCTION] # there's a single rule, and its indexed by a single feature assert len(index.rules_by_feature) == 1 # and we index by the more uncommon API feature, not the common mnemonic feature assert capa.features.insn.API("CreateFileW") in index.rules_by_feature assert not index.string_rules assert not index.bytes_prefix_index # this test demonstrates the behavior of unstable features that may change before the next major release. def test_index_features_or_unstable(): rule = textwrap.dedent(""" rule: meta: name: test rule scopes: static: function dynamic: process features: - or: - mnemonic: mov - api: CreateFileW """) r = capa.rules.Rule.from_yaml(rule) rr = capa.rules.RuleSet([r]) index: capa.rules.RuleSet._RuleFeatureIndex = rr._feature_indexes_by_scopes[capa.rules.Scope.FUNCTION] # there's a single rule, and its indexed by both features, # because they fall under the single root OR node. assert len(index.rules_by_feature) == 2 assert capa.features.insn.API("CreateFileW") in index.rules_by_feature assert capa.features.insn.Mnemonic("mov") in index.rules_by_feature assert not index.string_rules assert not index.bytes_prefix_index # this test demonstrates the behavior of unstable features that may change before the next major release. def test_index_features_nested_unstable(): rule = textwrap.dedent(""" rule: meta: name: test rule scopes: static: function dynamic: process features: - and: - mnemonic: mov - or: - api: CreateFileW - string: foo """) r = capa.rules.Rule.from_yaml(rule) rr = capa.rules.RuleSet([r]) index: capa.rules.RuleSet._RuleFeatureIndex = rr._feature_indexes_by_scopes[capa.rules.Scope.FUNCTION] # there's a single rule, and its indexed by the two uncommon features, # not the single common feature. assert len(index.rules_by_feature) == 2 assert capa.features.insn.API("CreateFileW") in index.rules_by_feature assert capa.features.common.String("foo") in index.rules_by_feature assert capa.features.insn.Mnemonic("mov") not in index.rules_by_feature assert not index.string_rules assert not index.bytes_prefix_index def test_bytes_prefix_index_correctness(): """Verify that the bytes prefix pre-filter preserves match behavior.""" rule_text = textwrap.dedent(""" rule: meta: name: test bytes prefix index scopes: static: function dynamic: process features: - bytes: 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 """) r = capa.rules.Rule.from_yaml(rule_text) # 16 nop bytes - exact match nop16 = b"\x90" * 16 _, matches = match([r], {capa.features.common.Bytes(nop16): {0x0}}, 0x0) assert "test bytes prefix index" in matches # 32 nop bytes - startswith match (first 16 bytes are nops) nop32 = b"\x90" * 32 _, matches = match([r], {capa.features.common.Bytes(nop32): {0x0}}, 0x0) assert "test bytes prefix index" in matches # Different bytes - should not match other = b"\x00" * 16 _, matches = match([r], {capa.features.common.Bytes(other): {0x0}}, 0x0) assert "test bytes prefix index" not in matches # Bytes shorter than pattern - should not match short = b"\x90" * 8 _, matches = match([r], {capa.features.common.Bytes(short): {0x0}}, 0x0) assert "test bytes prefix index" not in matches def test_bytes_prefix_index_collision(): rule_text = textwrap.dedent(""" rule: meta: name: test bytes prefix collision scopes: static: function dynamic: process features: - bytes: 41 42 43 44 45 46 47 48 """) r = capa.rules.Rule.from_yaml(rule_text) features = { capa.features.common.Bytes(b"ABCD1234"): {0x0}, capa.features.common.Bytes(b"ABCDEFGHzz"): {0x1}, } _, matches = match([r], features, 0x0) assert "test bytes prefix collision" in matches def test_bytes_prefix_index_short_pattern_fallback(): rule_text = textwrap.dedent(""" rule: meta: name: test bytes short prefix fallback scopes: static: function dynamic: process features: - bytes: 41 42 43 """) r = capa.rules.Rule.from_yaml(rule_text) _, matches = match([r], {capa.features.common.Bytes(b"ABCDEF"): {0x0}}, 0x0) assert "test bytes short prefix fallback" in matches _, matches = match([r], {capa.features.common.Bytes(b"XABCDEF"): {0x0}}, 0x0) assert "test bytes short prefix fallback" not in matches def test_bytes_prefix_index_mixed_short_and_long_patterns(): """A rule with both a short (<4B) and a long (>=4B) bytes pattern exercises both code paths.""" short_rule_text = textwrap.dedent(""" rule: meta: name: test short pattern rule scopes: static: function dynamic: process features: - bytes: AA BB """) long_rule_text = textwrap.dedent(""" rule: meta: name: test long pattern rule scopes: static: function dynamic: process features: - bytes: CC DD EE FF 11 22 33 44 """) short_rule = capa.rules.Rule.from_yaml(short_rule_text) long_rule = capa.rules.Rule.from_yaml(long_rule_text) # Both rules match their respective extracted values. features = { capa.features.common.Bytes(b"\xaa\xbb\xcc"): {0x0}, capa.features.common.Bytes(b"\xcc\xdd\xee\xff\x11\x22\x33\x44\x55"): {0x1}, } _, matches = match([short_rule, long_rule], features, 0x0) assert "test short pattern rule" in matches assert "test long pattern rule" in matches # Only the short rule matches when the long pattern is absent. _, matches = match([short_rule, long_rule], {capa.features.common.Bytes(b"\xaa\xbb\xcc"): {0x0}}, 0x0) assert "test short pattern rule" in matches assert "test long pattern rule" not in matches # Only the long rule matches when the short pattern is absent. _, matches = match( [short_rule, long_rule], {capa.features.common.Bytes(b"\xcc\xdd\xee\xff\x11\x22\x33\x44"): {0x0}}, 0x0, ) assert "test short pattern rule" not in matches assert "test long pattern rule" in matches def test_match_no_duplicate_candidate_evaluations(): """ Ensure that when a rule has multiple candidate paths to trigger it, it is evaluated only once and does not create duplicate match results. Verifies both global deduplication (avoiding re-queuing rules already evaluated/queued) and local deduplication (avoiding duplicate queueing when multiple features trigger the same candidate in a single pass). """ rules = [ capa.rules.Rule.from_yaml( textwrap.dedent(""" rule: meta: name: Dependency Rule 1 scopes: static: function dynamic: process features: - number: 100 """) ), capa.rules.Rule.from_yaml( textwrap.dedent(""" rule: meta: name: Dependency Rule 2 scopes: static: function dynamic: process namespace: testns features: - number: 300 """) ), capa.rules.Rule.from_yaml( textwrap.dedent(""" rule: meta: name: Target Rule scopes: static: function dynamic: process features: - or: # Trigger Case 1 (Global): Target Rule is seeded by number 200, # and also gets triggered later when Dependency Rule 1 matches. - match: Dependency Rule 1 - number: 200 # Trigger Case 2 (Local): Target Rule depends on both rule name and namespace, # which will try to add it twice in the same iteration when Dependency Rule 2 matches. - and: - match: Dependency Rule 2 - match: testns """) ), ] # Seed all features to trigger both Case 1 and Case 2 features = { capa.features.insn.Number(100): {0x0}, capa.features.insn.Number(200): {0x0}, capa.features.insn.Number(300): {0x0}, } _, matches = match( capa.rules.topologically_order_rules(rules), features, 0x0, ) assert "Dependency Rule 1" in matches assert "Dependency Rule 2" in matches assert "Target Rule" in matches # Ensure Target Rule was evaluated and returned exactly ONCE assert len(matches["Target Rule"]) == 1