# Copyright 2021 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import textwrap import pytest import capa.rules import capa.engine import capa.features.insn import capa.features.common from capa.rules import Scope from capa.features.common import OS, OS_ANY, OS_WINDOWS, String, MatchedRule def match(rules, features, va, scope=Scope.FUNCTION): """ use all matching algorithms and verify that they compute the same result. then, return those results to the caller so they can make their asserts. """ features1, matches1 = capa.engine.match(rules, features, va) ruleset = capa.rules.RuleSet(rules) features2, matches2 = ruleset.match(scope, features, va) for feature, locations in features1.items(): assert feature in features2 assert locations == features2[feature] for rulename, results in matches1.items(): assert rulename in matches2 assert len(results) == len(matches2[rulename]) return features1, matches1 def test_match_simple(): rule = textwrap.dedent( """ rule: meta: name: test rule scopes: static: function dynamic: process namespace: testns1/testns2 features: - number: 100 """ ) r = capa.rules.Rule.from_yaml(rule) features, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0) assert "test rule" in matches assert MatchedRule("test rule") in features assert MatchedRule("testns1") in features assert MatchedRule("testns1/testns2") in features def test_match_range_exact(): rule = textwrap.dedent( """ rule: meta: name: test rule scopes: static: function dynamic: process features: - count(number(100)): 2 """ ) r = capa.rules.Rule.from_yaml(rule) # just enough matches _, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0) assert "test rule" in matches # not enough matches _, matches = match([r], {capa.features.insn.Number(100): {1}}, 0x0) assert "test rule" not in matches # too many matches _, matches = match([r], {capa.features.insn.Number(100): {1, 2, 3}}, 0x0) assert "test rule" not in matches def test_match_range_range(): rule = textwrap.dedent( """ rule: meta: name: test rule scopes: static: function dynamic: process features: - count(number(100)): (2, 3) """ ) r = capa.rules.Rule.from_yaml(rule) # just enough matches _, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0) assert "test rule" in matches # enough matches _, matches = match([r], {capa.features.insn.Number(100): {1, 2, 3}}, 0x0) assert "test rule" in matches # not enough matches _, matches = match([r], {capa.features.insn.Number(100): {1}}, 0x0) assert "test rule" not in matches # too many matches _, matches = match([r], {capa.features.insn.Number(100): {1, 2, 3, 4}}, 0x0) assert "test rule" not in matches def test_match_range_exact_zero(): rule = textwrap.dedent( """ rule: meta: name: test rule scopes: static: function dynamic: process features: - and: - count(number(100)): 0 # we can't have `count(foo): 0` at the top level, # since we don't support top level NOT statements. # so we have this additional trivial feature. - mnemonic: mov """ ) r = capa.rules.Rule.from_yaml(rule) # feature isn't indexed - good. _, matches = match([r], {capa.features.insn.Mnemonic("mov"): {}}, 0x0) assert "test rule" in matches # feature is indexed, but no matches. # i don't think we should ever really have this case, but good to check anyways. _, matches = match([r], {capa.features.insn.Number(100): {}, capa.features.insn.Mnemonic("mov"): {}}, 0x0) assert "test rule" in matches # too many matches _, matches = match([r], {capa.features.insn.Number(100): {1}, capa.features.insn.Mnemonic("mov"): {1}}, 0x0) assert "test rule" not in matches def test_match_range_with_zero(): rule = textwrap.dedent( """ rule: meta: name: test rule scopes: static: function dynamic: process features: - and: - count(number(100)): (0, 1) # we can't have `count(foo): 0` at the top level, # since we don't support top level NOT statements. # so we have this additional trivial feature. - mnemonic: mov """ ) r = capa.rules.Rule.from_yaml(rule) # ok _, matches = match([r], {capa.features.insn.Mnemonic("mov"): {}}, 0x0) assert "test rule" in matches _, matches = match([r], {capa.features.insn.Number(100): {}, capa.features.insn.Mnemonic("mov"): {}}, 0x0) assert "test rule" in matches _, matches = match([r], {capa.features.insn.Number(100): {1}, capa.features.insn.Mnemonic("mov"): {1}}, 0x0) assert "test rule" in matches # too many matches _, matches = match([r], {capa.features.insn.Number(100): {1, 2}, capa.features.insn.Mnemonic("mov"): {1, 2}}, 0x0) assert "test rule" not in matches def test_match_adds_matched_rule_feature(): """show that using `match` adds a feature for matched rules.""" rule = textwrap.dedent( """ rule: meta: name: test rule scopes: static: function dynamic: process features: - number: 100 """ ) r = capa.rules.Rule.from_yaml(rule) features, _ = match([r], {capa.features.insn.Number(100): {1}}, 0x0) assert capa.features.common.MatchedRule("test rule") in features def test_match_matched_rules(): """show that using `match` adds a feature for matched rules.""" rules = [ capa.rules.Rule.from_yaml( textwrap.dedent( """ rule: meta: name: test rule1 scopes: static: function dynamic: process features: - number: 100 """ ) ), capa.rules.Rule.from_yaml( textwrap.dedent( """ rule: meta: name: test rule2 scopes: static: function dynamic: process features: - match: test rule1 """ ) ), ] features, _ = match( capa.rules.topologically_order_rules(rules), {capa.features.insn.Number(100): {1}}, 0x0, ) assert capa.features.common.MatchedRule("test rule1") in features assert capa.features.common.MatchedRule("test rule2") in features # the ordering of the rules must not matter, # the engine should match rules in an appropriate order. features, _ = match( capa.rules.topologically_order_rules(list(reversed(rules))), {capa.features.insn.Number(100): {1}}, 0x0, ) assert capa.features.common.MatchedRule("test rule1") in features assert capa.features.common.MatchedRule("test rule2") in features def test_match_namespace(): rules = [ capa.rules.Rule.from_yaml( textwrap.dedent( """ rule: meta: name: CreateFile API scopes: static: function dynamic: process namespace: file/create/CreateFile features: - api: CreateFile """ ) ), capa.rules.Rule.from_yaml( textwrap.dedent( """ rule: meta: name: WriteFile API scopes: static: function dynamic: process namespace: file/write features: - api: WriteFile """ ) ), capa.rules.Rule.from_yaml( textwrap.dedent( """ rule: meta: name: file-create scopes: static: function dynamic: process features: - match: file/create """ ) ), capa.rules.Rule.from_yaml( textwrap.dedent( """ rule: meta: name: filesystem-any scopes: static: function dynamic: process features: - match: file """ ) ), ] features, matches = match( capa.rules.topologically_order_rules(rules), {capa.features.insn.API("CreateFile"): {1}}, 0x0, ) assert "CreateFile API" in matches assert "file-create" in matches assert "filesystem-any" in matches assert capa.features.common.MatchedRule("file") in features assert capa.features.common.MatchedRule("file/create") in features assert capa.features.common.MatchedRule("file/create/CreateFile") in features features, matches = match( capa.rules.topologically_order_rules(rules), {capa.features.insn.API("WriteFile"): {1}}, 0x0, ) assert "WriteFile API" in matches assert "file-create" not in matches assert "filesystem-any" in matches def test_match_substring(): rules = [ capa.rules.Rule.from_yaml( textwrap.dedent( """ rule: meta: name: test rule scopes: static: function dynamic: process features: - and: - substring: abc """ ) ), ] features, _ = match( capa.rules.topologically_order_rules(rules), {capa.features.common.String("aaaa"): {1}}, 0x0, ) assert capa.features.common.MatchedRule("test rule") not in features features, _ = match( capa.rules.topologically_order_rules(rules), {capa.features.common.String("abc"): {1}}, 0x0, ) assert capa.features.common.MatchedRule("test rule") in features features, _ = match( capa.rules.topologically_order_rules(rules), {capa.features.common.String("111abc222"): {1}}, 0x0, ) assert capa.features.common.MatchedRule("test rule") in features features, _ = match( capa.rules.topologically_order_rules(rules), {capa.features.common.String("111abc"): {1}}, 0x0, ) assert capa.features.common.MatchedRule("test rule") in features features, _ = match( capa.rules.topologically_order_rules(rules), {capa.features.common.String("abc222"): {1}}, 0x0, ) assert capa.features.common.MatchedRule("test rule") in features def test_match_regex(): rules = [ capa.rules.Rule.from_yaml( textwrap.dedent( """ rule: meta: name: test rule scopes: static: function dynamic: process features: - and: - string: /.*bbbb.*/ """ ) ), capa.rules.Rule.from_yaml( textwrap.dedent( """ rule: meta: name: rule with implied wildcards scopes: static: function dynamic: process features: - and: - string: /bbbb/ """ ) ), capa.rules.Rule.from_yaml( textwrap.dedent( """ rule: meta: name: rule with anchor scopes: static: function dynamic: process features: - and: - string: /^bbbb/ """ ) ), ] features, _ = match( capa.rules.topologically_order_rules(rules), {capa.features.insn.Number(100): {1}}, 0x0, ) assert capa.features.common.MatchedRule("test rule") not in features features, _ = match( capa.rules.topologically_order_rules(rules), {capa.features.common.String("aaaa"): {1}}, 0x0, ) assert capa.features.common.MatchedRule("test rule") not in features features, _ = match( capa.rules.topologically_order_rules(rules), {capa.features.common.String("aBBBBa"): {1}}, 0x0, ) assert capa.features.common.MatchedRule("test rule") not in features features, _ = match( capa.rules.topologically_order_rules(rules), {capa.features.common.String("abbbba"): {1}}, 0x0, ) assert capa.features.common.MatchedRule("test rule") in features assert capa.features.common.MatchedRule("rule with implied wildcards") in features assert capa.features.common.MatchedRule("rule with anchor") not in features def test_match_regex_ignorecase(): rules = [ capa.rules.Rule.from_yaml( textwrap.dedent( """ rule: meta: name: test rule scopes: static: function dynamic: process features: - and: - string: /.*bbbb.*/i """ ) ), ] features, _ = match( capa.rules.topologically_order_rules(rules), {capa.features.common.String("aBBBBa"): {1}}, 0x0, ) assert capa.features.common.MatchedRule("test rule") in features def test_match_regex_complex(): rules = [ capa.rules.Rule.from_yaml( textwrap.dedent( r""" rule: meta: name: test rule scopes: static: function dynamic: process features: - or: - string: /.*HARDWARE\\Key\\key with spaces\\.*/i """ ) ), ] features, _ = match( capa.rules.topologically_order_rules(rules), {capa.features.common.String(r"Hardware\Key\key with spaces\some value"): {1}}, 0x0, ) assert capa.features.common.MatchedRule("test rule") in features def test_match_regex_values_always_string(): rules = [ capa.rules.Rule.from_yaml( textwrap.dedent( """ rule: meta: name: test rule scopes: static: function dynamic: process features: - or: - string: /123/ - string: /0x123/ """ ) ), ] features, _ = match( capa.rules.topologically_order_rules(rules), {capa.features.common.String("123"): {1}}, 0x0, ) assert capa.features.common.MatchedRule("test rule") in features features, _ = match( capa.rules.topologically_order_rules(rules), {capa.features.common.String("0x123"): {1}}, 0x0, ) assert capa.features.common.MatchedRule("test rule") in features @pytest.mark.xfail(reason="can't have top level NOT") def test_match_only_not(): rule = textwrap.dedent( """ rule: meta: name: test rule scopes: static: function dynamic: process namespace: testns1/testns2 features: - not: - number: 99 """ ) r = capa.rules.Rule.from_yaml(rule) _, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0) assert "test rule" in matches def test_match_not(): rule = textwrap.dedent( """ rule: meta: name: test rule scopes: static: function dynamic: process namespace: testns1/testns2 features: - and: - mnemonic: mov - not: - number: 99 """ ) r = capa.rules.Rule.from_yaml(rule) _, matches = match([r], {capa.features.insn.Number(100): {1, 2}, capa.features.insn.Mnemonic("mov"): {1, 2}}, 0x0) assert "test rule" in matches @pytest.mark.xfail(reason="can't have nested NOT") def test_match_not_not(): rule = textwrap.dedent( """ rule: meta: name: test rule scopes: static: function dynamic: process namespace: testns1/testns2 features: - not: - not: - number: 100 """ ) r = capa.rules.Rule.from_yaml(rule) _, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0) assert "test rule" in matches def test_match_operand_number(): rule = textwrap.dedent( """ rule: meta: name: test rule scopes: static: function dynamic: process features: - and: - operand[0].number: 0x10 """ ) r = capa.rules.Rule.from_yaml(rule) assert capa.features.insn.OperandNumber(0, 0x10) in {capa.features.insn.OperandNumber(0, 0x10)} _, matches = match([r], {capa.features.insn.OperandNumber(0, 0x10): {1, 2}}, 0x0) assert "test rule" in matches # mismatching index _, matches = match([r], {capa.features.insn.OperandNumber(1, 0x10): {1, 2}}, 0x0) assert "test rule" not in matches # mismatching value _, matches = match([r], {capa.features.insn.OperandNumber(0, 0x11): {1, 2}}, 0x0) assert "test rule" not in matches def test_match_operand_offset(): rule = textwrap.dedent( """ rule: meta: name: test rule scopes: static: function dynamic: process features: - and: - operand[0].offset: 0x10 """ ) r = capa.rules.Rule.from_yaml(rule) assert capa.features.insn.OperandOffset(0, 0x10) in {capa.features.insn.OperandOffset(0, 0x10)} _, matches = match([r], {capa.features.insn.OperandOffset(0, 0x10): {1, 2}}, 0x0) assert "test rule" in matches # mismatching index _, matches = match([r], {capa.features.insn.OperandOffset(1, 0x10): {1, 2}}, 0x0) assert "test rule" not in matches # mismatching value _, matches = match([r], {capa.features.insn.OperandOffset(0, 0x11): {1, 2}}, 0x0) assert "test rule" not in matches def test_match_property_access(): rule = textwrap.dedent( """ rule: meta: name: test rule scopes: static: function dynamic: process features: - and: - property/read: System.IO.FileInfo::Length """ ) r = capa.rules.Rule.from_yaml(rule) assert capa.features.insn.Property("System.IO.FileInfo::Length", capa.features.common.FeatureAccess.READ) in { capa.features.insn.Property("System.IO.FileInfo::Length", capa.features.common.FeatureAccess.READ) } _, matches = match( [r], {capa.features.insn.Property("System.IO.FileInfo::Length", capa.features.common.FeatureAccess.READ): {1, 2}}, 0x0, ) assert "test rule" in matches # mismatching access _, matches = match( [r], {capa.features.insn.Property("System.IO.FileInfo::Length", capa.features.common.FeatureAccess.WRITE): {1, 2}}, 0x0, ) assert "test rule" not in matches # mismatching value _, matches = match( [r], {capa.features.insn.Property("System.IO.FileInfo::Size", capa.features.common.FeatureAccess.READ): {1, 2}}, 0x0, ) assert "test rule" not in matches def test_match_os_any(): rule = textwrap.dedent( """ rule: meta: name: test rule scopes: static: function dynamic: process features: - or: - and: - or: - os: windows - os: linux - os: macos - string: "Hello world" - and: - os: any - string: "Goodbye world" """ ) r = capa.rules.Rule.from_yaml(rule) _, matches = match( [r], {OS(OS_ANY): {1}, String("Hello world"): {1}}, 0x0, ) assert "test rule" in matches _, matches = match( [r], {OS(OS_WINDOWS): {1}, String("Hello world"): {1}}, 0x0, ) assert "test rule" in matches _, matches = match( [r], {OS(OS_ANY): {1}, String("Goodbye world"): {1}}, 0x0, ) assert "test rule" in matches _, matches = match( [r], {OS(OS_WINDOWS): {1}, String("Goodbye world"): {1}}, 0x0, ) assert "test rule" in matches # this test demonstrates the behavior of unstable features that may change before the next major release. def test_index_features_and_unstable(): rule = textwrap.dedent( """ rule: meta: name: test rule scopes: static: function dynamic: process features: - and: - mnemonic: mov - api: CreateFileW """ ) r = capa.rules.Rule.from_yaml(rule) rr = capa.rules.RuleSet([r]) index: capa.rules.RuleSet._RuleFeatureIndex = rr._feature_indexes_by_scopes[capa.rules.Scope.FUNCTION] # there's a single rule, and its indexed by a single feature assert len(index.rules_by_feature) == 1 # and we index by the more uncommon API feature, not the common mnemonic feature assert capa.features.insn.API("CreateFileW") in index.rules_by_feature assert not index.string_rules assert not index.bytes_rules # this test demonstrates the behavior of unstable features that may change before the next major release. def test_index_features_or_unstable(): rule = textwrap.dedent( """ rule: meta: name: test rule scopes: static: function dynamic: process features: - or: - mnemonic: mov - api: CreateFileW """ ) r = capa.rules.Rule.from_yaml(rule) rr = capa.rules.RuleSet([r]) index: capa.rules.RuleSet._RuleFeatureIndex = rr._feature_indexes_by_scopes[capa.rules.Scope.FUNCTION] # there's a single rule, and its indexed by both features, # because they fall under the single root OR node. assert len(index.rules_by_feature) == 2 assert capa.features.insn.API("CreateFileW") in index.rules_by_feature assert capa.features.insn.Mnemonic("mov") in index.rules_by_feature assert not index.string_rules assert not index.bytes_rules # this test demonstrates the behavior of unstable features that may change before the next major release. def test_index_features_nested_unstable(): rule = textwrap.dedent( """ rule: meta: name: test rule scopes: static: function dynamic: process features: - and: - mnemonic: mov - or: - api: CreateFileW - string: foo """ ) r = capa.rules.Rule.from_yaml(rule) rr = capa.rules.RuleSet([r]) index: capa.rules.RuleSet._RuleFeatureIndex = rr._feature_indexes_by_scopes[capa.rules.Scope.FUNCTION] # there's a single rule, and its indexed by the two uncommon features, # not the single common feature. assert len(index.rules_by_feature) == 2 assert capa.features.insn.API("CreateFileW") in index.rules_by_feature assert capa.features.common.String("foo") in index.rules_by_feature assert capa.features.insn.Mnemonic("mov") not in index.rules_by_feature assert not index.string_rules assert not index.bytes_rules