Files
capa/tests/test_match.py
2026-03-12 13:34:13 +00:00

819 lines
26 KiB
Python

# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import textwrap
import pytest
import capa.rules
import capa.engine
import capa.features.insn
import capa.features.common
from capa.rules import Scope
from capa.features.common import OS, OS_ANY, OS_WINDOWS, String, MatchedRule
def match(rules, features, va, scope=Scope.FUNCTION):
"""
use all matching algorithms and verify that they compute the same result.
then, return those results to the caller so they can make their asserts.
"""
features1, matches1 = capa.engine.match(rules, features, va)
ruleset = capa.rules.RuleSet(rules)
features2, matches2 = ruleset.match(scope, features, va)
for feature, locations in features1.items():
assert feature in features2
assert locations == features2[feature]
for rulename, results in matches1.items():
assert rulename in matches2
assert len(results) == len(matches2[rulename])
return features1, matches1
def test_match_simple():
rule = textwrap.dedent("""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: process
namespace: testns1/testns2
features:
- number: 100
""")
r = capa.rules.Rule.from_yaml(rule)
features, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
assert "test rule" in matches
assert MatchedRule("test rule") in features
assert MatchedRule("testns1") in features
assert MatchedRule("testns1/testns2") in features
def test_match_range_exact():
rule = textwrap.dedent("""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: process
features:
- count(number(100)): 2
""")
r = capa.rules.Rule.from_yaml(rule)
# just enough matches
_, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
assert "test rule" in matches
# not enough matches
_, matches = match([r], {capa.features.insn.Number(100): {1}}, 0x0)
assert "test rule" not in matches
# too many matches
_, matches = match([r], {capa.features.insn.Number(100): {1, 2, 3}}, 0x0)
assert "test rule" not in matches
def test_match_range_range():
rule = textwrap.dedent("""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: process
features:
- count(number(100)): (2, 3)
""")
r = capa.rules.Rule.from_yaml(rule)
# just enough matches
_, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
assert "test rule" in matches
# enough matches
_, matches = match([r], {capa.features.insn.Number(100): {1, 2, 3}}, 0x0)
assert "test rule" in matches
# not enough matches
_, matches = match([r], {capa.features.insn.Number(100): {1}}, 0x0)
assert "test rule" not in matches
# too many matches
_, matches = match([r], {capa.features.insn.Number(100): {1, 2, 3, 4}}, 0x0)
assert "test rule" not in matches
def test_match_range_exact_zero():
rule = textwrap.dedent("""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: process
features:
- and:
- count(number(100)): 0
# we can't have `count(foo): 0` at the top level,
# since we don't support top level NOT statements.
# so we have this additional trivial feature.
- mnemonic: mov
""")
r = capa.rules.Rule.from_yaml(rule)
# feature isn't indexed - good.
_, matches = match([r], {capa.features.insn.Mnemonic("mov"): {}}, 0x0)
assert "test rule" in matches
# feature is indexed, but no matches.
# i don't think we should ever really have this case, but good to check anyways.
_, matches = match([r], {capa.features.insn.Number(100): {}, capa.features.insn.Mnemonic("mov"): {}}, 0x0)
assert "test rule" in matches
# too many matches
_, matches = match([r], {capa.features.insn.Number(100): {1}, capa.features.insn.Mnemonic("mov"): {1}}, 0x0)
assert "test rule" not in matches
def test_match_range_with_zero():
rule = textwrap.dedent("""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: process
features:
- and:
- count(number(100)): (0, 1)
# we can't have `count(foo): 0` at the top level,
# since we don't support top level NOT statements.
# so we have this additional trivial feature.
- mnemonic: mov
""")
r = capa.rules.Rule.from_yaml(rule)
# ok
_, matches = match([r], {capa.features.insn.Mnemonic("mov"): {}}, 0x0)
assert "test rule" in matches
_, matches = match([r], {capa.features.insn.Number(100): {}, capa.features.insn.Mnemonic("mov"): {}}, 0x0)
assert "test rule" in matches
_, matches = match([r], {capa.features.insn.Number(100): {1}, capa.features.insn.Mnemonic("mov"): {1}}, 0x0)
assert "test rule" in matches
# too many matches
_, matches = match([r], {capa.features.insn.Number(100): {1, 2}, capa.features.insn.Mnemonic("mov"): {1, 2}}, 0x0)
assert "test rule" not in matches
def test_match_adds_matched_rule_feature():
"""show that using `match` adds a feature for matched rules."""
rule = textwrap.dedent("""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: process
features:
- number: 100
""")
r = capa.rules.Rule.from_yaml(rule)
features, _ = match([r], {capa.features.insn.Number(100): {1}}, 0x0)
assert capa.features.common.MatchedRule("test rule") in features
def test_match_matched_rules():
"""show that using `match` adds a feature for matched rules."""
rules = [
capa.rules.Rule.from_yaml(textwrap.dedent("""
rule:
meta:
name: test rule1
scopes:
static: function
dynamic: process
features:
- number: 100
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
rule:
meta:
name: test rule2
scopes:
static: function
dynamic: process
features:
- match: test rule1
""")),
]
features, _ = match(
capa.rules.topologically_order_rules(rules),
{capa.features.insn.Number(100): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule1") in features
assert capa.features.common.MatchedRule("test rule2") in features
# the ordering of the rules must not matter,
# the engine should match rules in an appropriate order.
features, _ = match(
capa.rules.topologically_order_rules(list(reversed(rules))),
{capa.features.insn.Number(100): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule1") in features
assert capa.features.common.MatchedRule("test rule2") in features
def test_match_namespace():
rules = [
capa.rules.Rule.from_yaml(textwrap.dedent("""
rule:
meta:
name: CreateFile API
scopes:
static: function
dynamic: process
namespace: file/create/CreateFile
features:
- api: CreateFile
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
rule:
meta:
name: WriteFile API
scopes:
static: function
dynamic: process
namespace: file/write
features:
- api: WriteFile
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
rule:
meta:
name: file-create
scopes:
static: function
dynamic: process
features:
- match: file/create
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
rule:
meta:
name: filesystem-any
scopes:
static: function
dynamic: process
features:
- match: file
""")),
]
features, matches = match(
capa.rules.topologically_order_rules(rules),
{capa.features.insn.API("CreateFile"): {1}},
0x0,
)
assert "CreateFile API" in matches
assert "file-create" in matches
assert "filesystem-any" in matches
assert capa.features.common.MatchedRule("file") in features
assert capa.features.common.MatchedRule("file/create") in features
assert capa.features.common.MatchedRule("file/create/CreateFile") in features
features, matches = match(
capa.rules.topologically_order_rules(rules),
{capa.features.insn.API("WriteFile"): {1}},
0x0,
)
assert "WriteFile API" in matches
assert "file-create" not in matches
assert "filesystem-any" in matches
def test_match_substring():
rules = [
capa.rules.Rule.from_yaml(textwrap.dedent("""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: process
features:
- and:
- substring: abc
""")),
]
features, _ = match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("aaaa"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") not in features
features, _ = match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("abc"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
features, _ = match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("111abc222"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
features, _ = match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("111abc"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
features, _ = match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("abc222"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
def test_match_regex():
rules = [
capa.rules.Rule.from_yaml(textwrap.dedent("""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: process
features:
- and:
- string: /.*bbbb.*/
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
rule:
meta:
name: rule with implied wildcards
scopes:
static: function
dynamic: process
features:
- and:
- string: /bbbb/
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
rule:
meta:
name: rule with anchor
scopes:
static: function
dynamic: process
features:
- and:
- string: /^bbbb/
""")),
]
features, _ = match(
capa.rules.topologically_order_rules(rules),
{capa.features.insn.Number(100): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") not in features
features, _ = match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("aaaa"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") not in features
features, _ = match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("aBBBBa"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") not in features
features, _ = match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("abbbba"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
assert capa.features.common.MatchedRule("rule with implied wildcards") in features
assert capa.features.common.MatchedRule("rule with anchor") not in features
def test_match_regex_ignorecase():
rules = [
capa.rules.Rule.from_yaml(textwrap.dedent("""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: process
features:
- and:
- string: /.*bbbb.*/i
""")),
]
features, _ = match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("aBBBBa"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
def test_match_regex_complex():
rules = [
capa.rules.Rule.from_yaml(textwrap.dedent(r"""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: process
features:
- or:
- string: /.*HARDWARE\\Key\\key with spaces\\.*/i
""")),
]
features, _ = match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String(r"Hardware\Key\key with spaces\some value"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
def test_match_regex_values_always_string():
rules = [
capa.rules.Rule.from_yaml(textwrap.dedent("""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: process
features:
- or:
- string: /123/
- string: /0x123/
""")),
]
features, _ = match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("123"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
features, _ = match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("0x123"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
@pytest.mark.parametrize(
"pattern",
[
"/test\\.exe/",
"/hello/i",
"/foo\\\\bar/",
],
)
def test_regex_get_value_str(pattern):
# Regex.get_value_str() must return the raw pattern without escaping, see #1909.
assert capa.features.common.Regex(pattern).get_value_str() == pattern
@pytest.mark.xfail(reason="can't have top level NOT")
def test_match_only_not():
rule = textwrap.dedent("""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: process
namespace: testns1/testns2
features:
- not:
- number: 99
""")
r = capa.rules.Rule.from_yaml(rule)
_, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
assert "test rule" in matches
def test_match_not():
rule = textwrap.dedent("""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: process
namespace: testns1/testns2
features:
- and:
- mnemonic: mov
- not:
- number: 99
""")
r = capa.rules.Rule.from_yaml(rule)
_, matches = match([r], {capa.features.insn.Number(100): {1, 2}, capa.features.insn.Mnemonic("mov"): {1, 2}}, 0x0)
assert "test rule" in matches
@pytest.mark.xfail(reason="can't have nested NOT")
def test_match_not_not():
rule = textwrap.dedent("""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: process
namespace: testns1/testns2
features:
- not:
- not:
- number: 100
""")
r = capa.rules.Rule.from_yaml(rule)
_, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
assert "test rule" in matches
def test_match_operand_number():
rule = textwrap.dedent("""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: process
features:
- and:
- operand[0].number: 0x10
""")
r = capa.rules.Rule.from_yaml(rule)
assert capa.features.insn.OperandNumber(0, 0x10) in {capa.features.insn.OperandNumber(0, 0x10)}
_, matches = match([r], {capa.features.insn.OperandNumber(0, 0x10): {1, 2}}, 0x0)
assert "test rule" in matches
# mismatching index
_, matches = match([r], {capa.features.insn.OperandNumber(1, 0x10): {1, 2}}, 0x0)
assert "test rule" not in matches
# mismatching value
_, matches = match([r], {capa.features.insn.OperandNumber(0, 0x11): {1, 2}}, 0x0)
assert "test rule" not in matches
def test_match_operand_offset():
rule = textwrap.dedent("""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: process
features:
- and:
- operand[0].offset: 0x10
""")
r = capa.rules.Rule.from_yaml(rule)
assert capa.features.insn.OperandOffset(0, 0x10) in {capa.features.insn.OperandOffset(0, 0x10)}
_, matches = match([r], {capa.features.insn.OperandOffset(0, 0x10): {1, 2}}, 0x0)
assert "test rule" in matches
# mismatching index
_, matches = match([r], {capa.features.insn.OperandOffset(1, 0x10): {1, 2}}, 0x0)
assert "test rule" not in matches
# mismatching value
_, matches = match([r], {capa.features.insn.OperandOffset(0, 0x11): {1, 2}}, 0x0)
assert "test rule" not in matches
def test_match_property_access():
rule = textwrap.dedent("""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: process
features:
- and:
- property/read: System.IO.FileInfo::Length
""")
r = capa.rules.Rule.from_yaml(rule)
assert capa.features.insn.Property("System.IO.FileInfo::Length", capa.features.common.FeatureAccess.READ) in {
capa.features.insn.Property("System.IO.FileInfo::Length", capa.features.common.FeatureAccess.READ)
}
_, matches = match(
[r],
{capa.features.insn.Property("System.IO.FileInfo::Length", capa.features.common.FeatureAccess.READ): {1, 2}},
0x0,
)
assert "test rule" in matches
# mismatching access
_, matches = match(
[r],
{capa.features.insn.Property("System.IO.FileInfo::Length", capa.features.common.FeatureAccess.WRITE): {1, 2}},
0x0,
)
assert "test rule" not in matches
# mismatching value
_, matches = match(
[r],
{capa.features.insn.Property("System.IO.FileInfo::Size", capa.features.common.FeatureAccess.READ): {1, 2}},
0x0,
)
assert "test rule" not in matches
def test_match_os_any():
rule = textwrap.dedent("""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: process
features:
- or:
- and:
- or:
- os: windows
- os: linux
- os: macos
- string: "Hello world"
- and:
- os: any
- string: "Goodbye world"
""")
r = capa.rules.Rule.from_yaml(rule)
_, matches = match(
[r],
{OS(OS_ANY): {1}, String("Hello world"): {1}},
0x0,
)
assert "test rule" in matches
_, matches = match(
[r],
{OS(OS_WINDOWS): {1}, String("Hello world"): {1}},
0x0,
)
assert "test rule" in matches
_, matches = match(
[r],
{OS(OS_ANY): {1}, String("Goodbye world"): {1}},
0x0,
)
assert "test rule" in matches
_, matches = match(
[r],
{OS(OS_WINDOWS): {1}, String("Goodbye world"): {1}},
0x0,
)
assert "test rule" in matches
# this test demonstrates the behavior of unstable features that may change before the next major release.
def test_index_features_and_unstable():
rule = textwrap.dedent("""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: process
features:
- and:
- mnemonic: mov
- api: CreateFileW
""")
r = capa.rules.Rule.from_yaml(rule)
rr = capa.rules.RuleSet([r])
index: capa.rules.RuleSet._RuleFeatureIndex = rr._feature_indexes_by_scopes[capa.rules.Scope.FUNCTION]
# there's a single rule, and its indexed by a single feature
assert len(index.rules_by_feature) == 1
# and we index by the more uncommon API feature, not the common mnemonic feature
assert capa.features.insn.API("CreateFileW") in index.rules_by_feature
assert not index.string_rules
assert not index.bytes_rules
# this test demonstrates the behavior of unstable features that may change before the next major release.
def test_index_features_or_unstable():
rule = textwrap.dedent("""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: process
features:
- or:
- mnemonic: mov
- api: CreateFileW
""")
r = capa.rules.Rule.from_yaml(rule)
rr = capa.rules.RuleSet([r])
index: capa.rules.RuleSet._RuleFeatureIndex = rr._feature_indexes_by_scopes[capa.rules.Scope.FUNCTION]
# there's a single rule, and its indexed by both features,
# because they fall under the single root OR node.
assert len(index.rules_by_feature) == 2
assert capa.features.insn.API("CreateFileW") in index.rules_by_feature
assert capa.features.insn.Mnemonic("mov") in index.rules_by_feature
assert not index.string_rules
assert not index.bytes_rules
# this test demonstrates the behavior of unstable features that may change before the next major release.
def test_index_features_nested_unstable():
rule = textwrap.dedent("""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: process
features:
- and:
- mnemonic: mov
- or:
- api: CreateFileW
- string: foo
""")
r = capa.rules.Rule.from_yaml(rule)
rr = capa.rules.RuleSet([r])
index: capa.rules.RuleSet._RuleFeatureIndex = rr._feature_indexes_by_scopes[capa.rules.Scope.FUNCTION]
# there's a single rule, and its indexed by the two uncommon features,
# not the single common feature.
assert len(index.rules_by_feature) == 2
assert capa.features.insn.API("CreateFileW") in index.rules_by_feature
assert capa.features.common.String("foo") in index.rules_by_feature
assert capa.features.insn.Mnemonic("mov") not in index.rules_by_feature
assert not index.string_rules
assert not index.bytes_rules