From 2c12cbb4854265e013f03c42d1bb1404f198adc6 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Thu, 11 Jun 2026 13:42:29 +0200 Subject: [PATCH] tests: add data-driven test fixtures for rule matcher (#2987) --- CHANGELOG.md | 1 + tests/{fixtures.py => fixtures/__init__.py} | 5 +- tests/fixtures/matcher/README.md | 74 ++ tests/fixtures/matcher/dynamic/call.yml | 53 ++ tests/fixtures/matcher/dynamic/process.yml | 40 + tests/fixtures/matcher/dynamic/span.yml | 316 ++++++++ tests/fixtures/matcher/dynamic/thread.yml | 38 + tests/fixtures/matcher/static/composition.yml | 122 +++ tests/fixtures/matcher/static/features.yml | 361 +++++++++ tests/fixtures/matcher/static/logic.yml | 712 ++++++++++++++++++ tests/fixtures/matcher/static/parsing.yml | 33 + tests/fixtures/matcher/static/scopes.yml | 141 ++++ tests/fixtures/matcher/static/strings.yml | 274 +++++++ tests/test_dynamic_span_of_calls_scope.py | 412 ---------- tests/test_engine.py | 132 +--- tests/test_match.py | 660 ---------------- tests/test_match_fixtures.py | 711 +++++++++++++++++ tests/test_rules.py | 242 +----- 18 files changed, 2881 insertions(+), 1446 deletions(-) rename tests/{fixtures.py => fixtures/__init__.py} (99%) create mode 100644 tests/fixtures/matcher/README.md create mode 100644 tests/fixtures/matcher/dynamic/call.yml create mode 100644 tests/fixtures/matcher/dynamic/process.yml create mode 100644 tests/fixtures/matcher/dynamic/span.yml create mode 100644 tests/fixtures/matcher/dynamic/thread.yml create mode 100644 tests/fixtures/matcher/static/composition.yml create mode 100644 tests/fixtures/matcher/static/features.yml create mode 100644 tests/fixtures/matcher/static/logic.yml create mode 100644 tests/fixtures/matcher/static/parsing.yml create mode 100644 tests/fixtures/matcher/static/scopes.yml create mode 100644 tests/fixtures/matcher/static/strings.yml create mode 100644 tests/test_match_fixtures.py diff --git a/CHANGELOG.md b/CHANGELOG.md index a67b77d1..e1381441 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -211,6 +211,7 @@ This release includes Ghidra PyGhidra support, performance improvements, depende ### Development +- tests: add data-driven rule matcher fixtures with a show-features-like DSL and authoring documentation #2985 - doc: document that default output shows top-level matches only; -v/-vv show nested matches @devs6186 #1410 - doc: fix typo in usage.md, add documentation links to README @devs6186 #2274 - doc: add table comparing ways to consume capa output (CLI, IDA, Ghidra, dynamic sandbox, web) @devs6186 #2273 diff --git a/tests/fixtures.py b/tests/fixtures/__init__.py similarity index 99% rename from tests/fixtures.py rename to tests/fixtures/__init__.py index 3396644b..d8d8e25d 100644 --- a/tests/fixtures.py +++ b/tests/fixtures/__init__.py @@ -41,8 +41,9 @@ from capa.features.extractors.base_extractor import ( from capa.features.extractors.dnfile.extractor import DnfileFeatureExtractor logger = logging.getLogger(__name__) -CD = Path(__file__).resolve().parent -FIXTURE_MANIFEST_DIR = CD / "fixtures" / "features" +_FIXTURES_DIR = Path(__file__).resolve().parent +CD = _FIXTURES_DIR.parent +FIXTURE_MANIFEST_DIR = _FIXTURES_DIR / "features" DNFILE_TESTFILES = CD / "data" / "dotnet" / "dnfile-testfiles" diff --git a/tests/fixtures/matcher/README.md b/tests/fixtures/matcher/README.md new file mode 100644 index 00000000..c0a0e1d1 --- /dev/null +++ b/tests/fixtures/matcher/README.md @@ -0,0 +1,74 @@ +Data-driven matcher tests. Each test pairs a rule fragment, a synthetic feature listing, and the exact matches that capa should report. These test the matcher itself, not end-to-end binary analysis. Tests for rule parsing and representation (e.g. how a rule looks after deserialization) belong in `test_rules.py`, not here. + +Fixture files live under `static/` and `dynamic/` directories, organized by theme (e.g. `logic.yml`, `scopes.yml`, `strings.yml`). Flavor is inferred from the directory. The pytest entrypoint and DSL parser both live in `tests/test_match_fixtures.py`. + +```sh +pytest -q tests/test_match_fixtures.py +pytest -q tests/test_match_fixtures.py -k +``` + +## Fixture file format + +Each file is a YAML list. Each element is one test case. + +```yaml +- name: and-both-present + description: and requires all children to match + rules: + - name: and-match + description: should match because the function contains both mov and number 0x10 + scopes: + static: function + features: + - and: + - mnemonic: mov + - number: 0x10 + features: | + func: 0x402000 + bb: 0x402000: basic block + insn: 0x402000: mnemonic(mov) + insn: 0x402000: number(0x10) + expect: + matches: + and-match: + - 0x402000 +``` + +The `name` field is a stable human-readable identifier that appears in pytest ids. The `description` explains the behavior under test. Rules are specified under `rules` with `name`, `scopes`, and `features` at the top level (no `meta:` wrapper needed); the loader fills in the missing scope side with `unsupported`. The `features` field is a block string or list of strings in the DSL described below. Expected results go in `expect.matches`, mapping rule names to exact match locations. + +Optional fields: `base address` (static only, defaults to `0`) and `options.span size` (patches `SPAN_SIZE` for that test). + +Keep tests small and focused: each test case should have its own minimal feature set. Prefer many small individual tests over grouped rules sharing features. + +## Feature DSL + +Line prefixes for static tests: `global:`, `file:`, `func:`, `bb:`, `insn:`. +Line prefixes for dynamic tests: `global:`, `file:`, `proc:`, `thread:`, `call:`. + +Static examples: +``` +global: global: os(windows) + file: 0x402345: characteristic(embedded pe) + func: 0x401000 + func: 0x401000: string(hello world) + bb: 0x401000: basic block + insn: 0x401000: mnemonic(mov) + insn: 0x401000: offset(0x402000) -> 0x402000 + insn: 0x401000: string(key: value) +``` + +Dynamic examples: +``` +proc: sample.exe (pid=3052) + thread: 3064 + call: 11: api(LdrGetProcedureAddress) + call: 11: string(AddVectoredExceptionHandler) +``` + +`-> ` overrides the feature location. Feature text may contain `: `. Dynamic call IDs must be unique within a test and can be used as shorthand in `expect.matches`. + +## Address syntax + +String forms: `0x401000`, `base address+0x100`, `file+0x20`, `token(0x1234)`, `token(0x1234)+0x10`, `global`, `process{pid:3052}`, `process{pid:3052,tid:3064}`, `process{pid:3052,tid:3064,call:11}` (with optional `ppid:`). + +Dynamic tests may use a bare integer call ID in `expect.matches` when that call ID is unique within the test. diff --git a/tests/fixtures/matcher/dynamic/call.yml b/tests/fixtures/matcher/dynamic/call.yml new file mode 100644 index 00000000..41a1ea5e --- /dev/null +++ b/tests/fixtures/matcher/dynamic/call.yml @@ -0,0 +1,53 @@ +- name: call-scope-single-api + description: call scope matches a single API at the correct call + rules: + - name: call-api + scopes: + dynamic: call + features: + - api: GetSystemTimeAsFileTime + features: | + proc: sample.exe (pid=3052) + thread: 3064 + call: 8: api(GetSystemTimeAsFileTime) + call: 9: api(GetSystemInfo) + expect: + matches: + call-api: + - 8 + +- name: call-scope-multiple-matches + description: call scope reports multiple matching calls + rules: + - name: call-multi + scopes: + dynamic: call + features: + - api: LdrGetDllHandle + features: | + proc: sample.exe (pid=3052) + thread: 3064 + call: 10: api(LdrGetDllHandle) + call: 11: api(LdrGetProcedureAddress) + call: 12: api(LdrGetDllHandle) + expect: + matches: + call-multi: + - 10 + - 12 + +- name: call-scope-no-match + description: call scope does not match when no call has the required feature + rules: + - name: call-absent + scopes: + dynamic: call + features: + - api: CreateFileW + features: | + proc: sample.exe (pid=3052) + thread: 3064 + call: 8: api(GetSystemTimeAsFileTime) + call: 9: api(GetSystemInfo) + expect: + matches: {} diff --git a/tests/fixtures/matcher/dynamic/process.yml b/tests/fixtures/matcher/dynamic/process.yml new file mode 100644 index 00000000..308d934a --- /dev/null +++ b/tests/fixtures/matcher/dynamic/process.yml @@ -0,0 +1,40 @@ +- name: process-scope-basic + description: process scope matches features aggregated across threads + rules: + - name: process-apis + scopes: + dynamic: process + features: + - and: + - api: CreateFileW + - api: WriteFile + features: | + proc: sample.exe (pid=3052) + thread: 3064 + call: 1: api(CreateFileW) + thread: 3065 + call: 2: api(WriteFile) + expect: + matches: + process-apis: + - "process{pid:3052}" + +- name: process-scope-no-match + description: process scope does not match when features are split across processes + rules: + - name: process-split + scopes: + dynamic: process + features: + - and: + - api: CreateFileW + - api: WriteFile + features: | + proc: sample.exe (pid=3052) + thread: 3064 + call: 1: api(CreateFileW) + proc: other.exe (pid=3053) + thread: 4000 + call: 2: api(WriteFile) + expect: + matches: {} diff --git a/tests/fixtures/matcher/dynamic/span.yml b/tests/fixtures/matcher/dynamic/span.yml new file mode 100644 index 00000000..23536f09 --- /dev/null +++ b/tests/fixtures/matcher/dynamic/span.yml @@ -0,0 +1,316 @@ +- name: span-window-contains-all + description: span-of-calls matches when all features fall within the window + options: + span size: 2 + rules: + - name: span-resolve-add-veh + description: should match the span ending at the call that resolves AddVectoredExceptionHandler + scopes: + dynamic: span of calls + features: + - and: + - api: LdrGetDllHandle + - api: LdrGetProcedureAddress + - string: AddVectoredExceptionHandler + features: | + proc: sample.exe (pid=3052) + thread: 3064 + call: 10: api(LdrGetDllHandle) + call: 11: api(LdrGetProcedureAddress) + call: 11: string(AddVectoredExceptionHandler) + call: 12: api(RtlAddVectoredExceptionHandler) + expect: + matches: + span-resolve-add-veh: + - 11 + +- name: span-window-too-small + description: span-of-calls does not match when the window is too small to contain all features + options: + span size: 2 + rules: + - name: span-window-too-small + description: should not match because the configured span window does not include both APIs together + scopes: + dynamic: span of calls + features: + - and: + - api: LdrGetDllHandle + - api: RtlAddVectoredExceptionHandler + features: | + proc: sample.exe (pid=3052) + thread: 3064 + call: 10: api(LdrGetDllHandle) + call: 11: api(LdrGetProcedureAddress) + call: 12: api(RtlAddVectoredExceptionHandler) + expect: + matches: {} + +- name: span-with-count + description: span matches when count constraint is satisfied within the window + rules: + - name: span-count + scopes: + dynamic: span of calls + features: + - and: + - api: GetSystemTimeAsFileTime + - api: GetSystemInfo + - api: LdrGetDllHandle + - api: LdrGetProcedureAddress + - count(api(LdrGetDllHandle)): 2 + features: | + proc: sample.exe (pid=3052) + thread: 3064 + call: 8: api(GetSystemTimeAsFileTime) + call: 9: api(GetSystemInfo) + call: 10: api(LdrGetDllHandle) + call: 11: api(LdrGetProcedureAddress) + call: 11: string(AddVectoredExceptionHandler) + call: 12: api(LdrGetDllHandle) + expect: + matches: + span-count: + - 12 + +- name: span-size-exactly-fits + description: span matches when features are exactly at the span window boundary + options: + span size: 3 + rules: + - name: span-boundary + scopes: + dynamic: span of calls + features: + - and: + - api: GetSystemTimeAsFileTime + - api: LdrGetDllHandle + features: | + proc: sample.exe (pid=3052) + thread: 3064 + call: 8: api(GetSystemTimeAsFileTime) + call: 9: api(GetSystemInfo) + call: 10: api(LdrGetDllHandle) + expect: + matches: + span-boundary: + - 10 + +- name: span-size-off-by-one + description: span does not match when features are just outside the window boundary + options: + span size: 2 + rules: + - name: span-off-by-one + scopes: + dynamic: span of calls + features: + - and: + - api: GetSystemTimeAsFileTime + - api: LdrGetDllHandle + features: | + proc: sample.exe (pid=3052) + thread: 3064 + call: 8: api(GetSystemTimeAsFileTime) + call: 9: api(GetSystemInfo) + call: 10: api(LdrGetDllHandle) + expect: + matches: {} + +- name: span-length-too-short + description: span does not match when features are outside the span window + options: + span size: 5 + rules: + - name: span-length + scopes: + dynamic: span of calls + features: + - and: + - api: GetSystemTimeAsFileTime + - api: RtlAddVectoredExceptionHandler + features: | + proc: sample.exe (pid=3052) + thread: 3064 + call: 8: api(GetSystemTimeAsFileTime) + call: 9: api(GetSystemInfo) + call: 10: api(LdrGetDllHandle) + call: 11: api(LdrGetProcedureAddress) + call: 12: api(LdrGetDllHandle) + call: 13: api(LdrGetProcedureAddress) + call: 14: api(RtlAddVectoredExceptionHandler) + expect: + matches: {} + +- name: span-call-subscope + description: call subscope within span matches features at a single call + rules: + - name: span-call-sub + scopes: + dynamic: span of calls + features: + - and: + - call: + - and: + - api: LdrGetProcedureAddress + - string: AddVectoredExceptionHandler + features: | + proc: sample.exe (pid=3052) + thread: 3064 + call: 10: api(LdrGetDllHandle) + call: 11: api(LdrGetProcedureAddress) + call: 11: string(AddVectoredExceptionHandler) + call: 12: api(LdrGetDllHandle) + expect: + matches: + span-call-sub: + - 11 + +- name: span-nested-span-subscopes + description: nested span subscopes match when each sub-span is satisfied + rules: + - name: span-nested + scopes: + dynamic: span of calls + features: + - and: + - span of calls: + - description: resolve add VEH + - and: + - api: LdrGetDllHandle + - api: LdrGetProcedureAddress + - string: AddVectoredExceptionHandler + - span of calls: + - description: resolve remove VEH + - and: + - api: LdrGetDllHandle + - api: LdrGetProcedureAddress + - string: RemoveVectoredExceptionHandler + features: | + proc: sample.exe (pid=3052) + thread: 3064 + call: 10: api(LdrGetDllHandle) + call: 11: api(LdrGetProcedureAddress) + call: 11: string(AddVectoredExceptionHandler) + call: 12: api(LdrGetDllHandle) + call: 13: api(LdrGetProcedureAddress) + call: 13: string(RemoveVectoredExceptionHandler) + expect: + matches: + span-nested: + - 13 + +- name: span-example-practical + description: practical span pattern combining call subscopes with direct API feature + rules: + - name: span-practical + scopes: + dynamic: span of calls + features: + - and: + - call: + - and: + - api: LdrGetDllHandle + - string: "kernel32.dll" + - call: + - and: + - api: LdrGetProcedureAddress + - string: "AddVectoredExceptionHandler" + - api: RtlAddVectoredExceptionHandler + features: | + proc: sample.exe (pid=3052) + thread: 3064 + call: 10: api(LdrGetDllHandle) + call: 10: string(kernel32.dll) + call: 11: api(LdrGetProcedureAddress) + call: 11: string(AddVectoredExceptionHandler) + call: 12: api(LdrGetDllHandle) + call: 12: string(kernel32.dll) + call: 13: api(LdrGetProcedureAddress) + call: 13: string(RemoveVectoredExceptionHandler) + call: 14: api(RtlAddVectoredExceptionHandler) + expect: + matches: + span-practical: + - 14 + +- name: span-overlapping-single-event + description: overlapping spans that match on a single event return only the first match + rules: + - name: span-overlap + scopes: + dynamic: span of calls + features: + - and: + - call: + - and: + - api: LdrGetProcedureAddress + - string: "AddVectoredExceptionHandler" + features: | + proc: sample.exe (pid=3052) + thread: 3064 + call: 10: api(LdrGetDllHandle) + call: 11: api(LdrGetProcedureAddress) + call: 11: string(AddVectoredExceptionHandler) + call: 12: api(LdrGetDllHandle) + call: 13: api(LdrGetProcedureAddress) + call: 13: string(RemoveVectoredExceptionHandler) + expect: + matches: + span-overlap: + - 11 + +- name: span-match-statements + description: match statements work within span-of-calls rules including namespace matching + rules: + - name: resolve add VEH + namespace: linking/runtime-linking/veh + scopes: + dynamic: span of calls + features: + - and: + - api: LdrGetDllHandle + - api: LdrGetProcedureAddress + - string: AddVectoredExceptionHandler + - name: resolve remove VEH + namespace: linking/runtime-linking/veh + scopes: + dynamic: span of calls + features: + - and: + - api: LdrGetDllHandle + - api: LdrGetProcedureAddress + - string: RemoveVectoredExceptionHandler + - name: resolve add and remove VEH + scopes: + dynamic: span of calls + features: + - and: + - match: resolve add VEH + - match: resolve remove VEH + - name: has VEH runtime linking + scopes: + dynamic: span of calls + features: + - and: + - match: linking/runtime-linking/veh + features: | + proc: sample.exe (pid=3052) + thread: 3064 + call: 10: api(LdrGetDllHandle) + call: 11: api(LdrGetProcedureAddress) + call: 11: string(AddVectoredExceptionHandler) + call: 12: api(LdrGetDllHandle) + call: 13: api(LdrGetProcedureAddress) + call: 13: string(RemoveVectoredExceptionHandler) + expect: + matches: + resolve add VEH: + - 11 + - 13 + resolve remove VEH: + - 13 + resolve add and remove VEH: + - 13 + has VEH runtime linking: + - 11 diff --git a/tests/fixtures/matcher/dynamic/thread.yml b/tests/fixtures/matcher/dynamic/thread.yml new file mode 100644 index 00000000..af99e3c7 --- /dev/null +++ b/tests/fixtures/matcher/dynamic/thread.yml @@ -0,0 +1,38 @@ +- name: thread-scope-basic + description: thread scope matches features aggregated across calls within a thread + rules: + - name: thread-apis + scopes: + dynamic: thread + features: + - and: + - api: CreateFileW + - api: WriteFile + features: | + proc: sample.exe (pid=3052) + thread: 3064 + call: 1: api(CreateFileW) + call: 2: api(WriteFile) + expect: + matches: + thread-apis: + - "process{pid:3052,tid:3064}" + +- name: thread-scope-no-match + description: thread scope does not match when features are split across different threads + rules: + - name: thread-split + scopes: + dynamic: thread + features: + - and: + - api: CreateFileW + - api: WriteFile + features: | + proc: sample.exe (pid=3052) + thread: 3064 + call: 1: api(CreateFileW) + thread: 3065 + call: 2: api(WriteFile) + expect: + matches: {} diff --git a/tests/fixtures/matcher/static/composition.yml b/tests/fixtures/matcher/static/composition.yml new file mode 100644 index 00000000..cffe31dc --- /dev/null +++ b/tests/fixtures/matcher/static/composition.yml @@ -0,0 +1,122 @@ +- name: match-rule-dependency + description: a rule using match can depend on another rule's result + rules: + - name: base rule + scopes: + static: function + features: + - number: 100 + - name: dependent rule + scopes: + static: function + features: + - match: base rule + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: number(100) + expect: + matches: + base rule: + - 0x401000 + dependent rule: + - 0x401000 + +- name: namespace-match-direct + description: match on a namespace prefix matches rules in that namespace + rules: + - name: CreateFile API + namespace: file/create/CreateFile + scopes: + static: function + features: + - api: CreateFile + - name: file-create + scopes: + static: function + features: + - match: file/create + - name: filesystem-any + scopes: + static: function + features: + - match: file + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: api(CreateFile) + expect: + matches: + CreateFile API: + - 0x401000 + file-create: + - 0x401000 + filesystem-any: + - 0x401000 + +- name: namespace-match-intermediate-prefix + description: namespace match at an intermediate level matches rules below it + rules: + - name: kernel32 CreateFile + namespace: file/create/kernel32 + scopes: + static: function + features: + - api: CreateFile + - name: ntdll NtCreateFile + namespace: file/create/ntdll + scopes: + static: function + features: + - api: NtCreateFile + - name: any-file-create + scopes: + static: function + features: + - match: file/create + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: api(CreateFile) + expect: + matches: + kernel32 CreateFile: + - 0x401000 + any-file-create: + - 0x401000 + +- name: namespace-match-sibling-no-match + description: namespace match does not match sibling namespaces + rules: + - name: CreateFile API + namespace: file/create/CreateFile + scopes: + static: function + features: + - api: CreateFile + - name: WriteFile API + namespace: file/write + scopes: + static: function + features: + - api: WriteFile + - name: file-create + scopes: + static: function + features: + - match: file/create + - name: filesystem-any + scopes: + static: function + features: + - match: file + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: api(WriteFile) + expect: + matches: + WriteFile API: + - 0x401000 + filesystem-any: + - 0x401000 diff --git a/tests/fixtures/matcher/static/features.yml b/tests/fixtures/matcher/static/features.yml new file mode 100644 index 00000000..6992ed43 --- /dev/null +++ b/tests/fixtures/matcher/static/features.yml @@ -0,0 +1,361 @@ +- name: simple-number-match + description: basic number feature matches at function scope + rules: + - name: simple-number + scopes: + static: function + features: + - number: 100 + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: number(100) + expect: + matches: + simple-number: + - 0x401000 + +- name: not-with-and + description: not inside and prevents match when the negated feature is present + rules: + - name: not-with-and-present + scopes: + static: function + features: + - and: + - mnemonic: mov + - not: + - number: 99 + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: number(100) + insn: 0x401000: mnemonic(mov) + expect: + matches: + not-with-and-present: + - 0x401000 + +- name: operand-number-match + description: operand number matches by index and value + rules: + - name: operand-number + scopes: + static: function + features: + - operand[0].number: 0x10 + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: operand[0].number(0x10) + expect: + matches: + operand-number: + - 0x401000 + +- name: operand-number-wrong-index + description: operand number does not match when the operand index differs + rules: + - name: operand-number-idx + scopes: + static: function + features: + - operand[0].number: 0x10 + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: operand[1].number(0x10) + expect: + matches: {} + +- name: operand-number-wrong-value + description: operand number does not match when the value differs + rules: + - name: operand-number-val + scopes: + static: function + features: + - operand[0].number: 0x10 + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: operand[0].number(0x11) + expect: + matches: {} + +- name: operand-offset-match + description: operand offset matches by index and value + rules: + - name: operand-offset + scopes: + static: function + features: + - operand[0].offset: 0x10 + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: operand[0].offset(0x10) + expect: + matches: + operand-offset: + - 0x401000 + +- name: operand-offset-wrong-index + description: operand offset does not match when the operand index differs + rules: + - name: operand-offset-idx + scopes: + static: function + features: + - operand[0].offset: 0x10 + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: operand[1].offset(0x10) + expect: + matches: {} + +- name: operand-offset-wrong-value + description: operand offset does not match when the value differs + rules: + - name: operand-offset-val + scopes: + static: function + features: + - operand[0].offset: 0x10 + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: operand[0].offset(0x11) + expect: + matches: {} + +- name: property-read-match + description: property/read matches the correct property name and access mode + rules: + - name: property-read + scopes: + static: function + features: + - property/read: System.IO.FileInfo::Length + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: property/read(System.IO.FileInfo::Length) + expect: + matches: + property-read: + - 0x401000 + +- name: property-read-wrong-access + description: property/read does not match a property/write feature + rules: + - name: property-read-access + scopes: + static: function + features: + - property/read: System.IO.FileInfo::Length + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: property/write(System.IO.FileInfo::Length) + expect: + matches: {} + +- name: property-read-wrong-value + description: property/read does not match a different property name + rules: + - name: property-read-value + scopes: + static: function + features: + - property/read: System.IO.FileInfo::Length + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: property/read(System.IO.FileInfo::Size) + expect: + matches: {} + +- name: os-any-matches-specific + description: os any matches when a specific os feature is present + rules: + - name: os-any-specific + scopes: + static: function + features: + - or: + - and: + - or: + - os: windows + - os: linux + - os: macos + - string: "Hello world" + - and: + - os: any + - string: "Goodbye world" + features: | + func: 0x401000 + func: 0x401000: string(Hello world) + bb: 0x401000: basic block + insn: 0x401000: os(windows) + expect: + matches: + os-any-specific: + - 0x401000 + +- name: os-any-matches-any + description: os any feature matches for "Goodbye world" path + rules: + - name: os-any-goodbye + scopes: + static: function + features: + - or: + - and: + - os: any + - string: "Goodbye world" + features: | + func: 0x401000 + func: 0x401000: string(Goodbye world) + bb: 0x401000: basic block + insn: 0x401000: os(any) + expect: + matches: + os-any-goodbye: + - 0x401000 + +- name: os-any-matches-specific-os + description: rule with os any matches when extracted feature is a specific os + rules: + - name: os-any-wildcard + scopes: + static: function + features: + - and: + - os: any + - string: "Hello world" + features: | + func: 0x401000 + func: 0x401000: string(Hello world) + bb: 0x401000: basic block + insn: 0x401000: os(windows) + expect: + matches: + os-any-wildcard: + - 0x401000 + +- name: bytes-exact-match + description: bytes feature matches when extracted bytes are identical + rules: + - name: bytes-exact + scopes: + static: function + features: + - bytes: 90 90 90 90 + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: bytes(90909090) + expect: + matches: + bytes-exact: + - 0x401000 + +- name: bytes-prefix-match + description: bytes feature matches when extracted bytes start with the rule pattern + rules: + - name: bytes-prefix + scopes: + static: function + features: + - bytes: 90 90 90 90 + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: bytes(9090909090909090) + expect: + matches: + bytes-prefix: + - 0x401000 + +- name: bytes-no-match + description: bytes feature does not match when extracted bytes differ + rules: + - name: bytes-diff + scopes: + static: function + features: + - bytes: 90 90 90 90 + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: bytes(00000000) + expect: + matches: {} + +- name: bytes-too-short + description: bytes feature does not match when extracted bytes are shorter than the pattern + rules: + - name: bytes-short + scopes: + static: function + features: + - bytes: 90 90 90 90 + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: bytes(9090) + expect: + matches: {} + +- name: negative-number-match + description: negative number matches correctly + rules: + - name: negative-num + scopes: + static: function + features: + - number: -1 + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: number(-1) + expect: + matches: + negative-num: + - 0x401000 + +- name: number-zero-match + description: number zero matches correctly and is not confused with absence + rules: + - name: num-zero + scopes: + static: function + features: + - number: 0 + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: number(0) + expect: + matches: + num-zero: + - 0x401000 + +- name: characteristic-match + description: characteristic feature matches correctly + rules: + - name: char-nzxor + scopes: + static: function + features: + - characteristic: nzxor + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: characteristic(nzxor) + expect: + matches: + char-nzxor: + - 0x401000 diff --git a/tests/fixtures/matcher/static/logic.yml b/tests/fixtures/matcher/static/logic.yml new file mode 100644 index 00000000..935f97e0 --- /dev/null +++ b/tests/fixtures/matcher/static/logic.yml @@ -0,0 +1,712 @@ +- name: and-both-present + description: and requires all children to match + rules: + - name: and-match + description: should match because the function contains both mov and number 0x10 + scopes: + static: function + features: + - and: + - mnemonic: mov + - number: 0x10 + features: | + func: 0x402000 + bb: 0x402000: basic block + insn: 0x402000: mnemonic(mov) + insn: 0x402000: number(0x10) + expect: + matches: + and-match: + - 0x402000 + +- name: or-one-branch-present + description: or requires at least one child to match + rules: + - name: or-match + description: should match because one branch of the or is satisfied by number 0x10 + scopes: + static: function + features: + - or: + - api: CreateFileW + - number: 0x10 + features: | + func: 0x402000 + bb: 0x402000: basic block + insn: 0x402000: number(0x10) + expect: + matches: + or-match: + - 0x402000 + +- name: not-absent-feature + description: not succeeds when the child feature is absent + rules: + - name: not-match + description: should match because mov is present and number 0x20 is absent + scopes: + static: function + features: + - and: + - mnemonic: mov + - not: + - number: 0x20 + features: | + func: 0x402000 + bb: 0x402000: basic block + insn: 0x402000: mnemonic(mov) + expect: + matches: + not-match: + - 0x402000 + +- name: optional-absent-feature + description: optional does not prevent a match when the child feature is absent + rules: + - name: optional-match + description: should match even though the optional child is absent + scopes: + static: function + features: + - and: + - mnemonic: mov + - optional: + - number: 0x30 + features: | + func: 0x402000 + bb: 0x402000: basic block + insn: 0x402000: mnemonic(mov) + expect: + matches: + optional-match: + - 0x402000 + +- name: count-exact + description: count matches when the feature appears the exact number of times + rules: + - name: count-exact-match + description: should match because number 0x10 appears exactly twice + scopes: + static: function + features: + - count(number(0x10)): 2 + features: | + func: 0x402000 + bb: 0x402000: basic block + insn: 0x402000: number(0x10) + insn: 0x402002: number(0x10) + expect: + matches: + count-exact-match: + - 0x402000 + +- name: count-range + description: count matches when the feature count falls within the range + rules: + - name: count-range-match + description: should match because number 0x10 appears within the allowed range + scopes: + static: function + features: + - count(number(0x10)): (1, 2) + features: | + func: 0x402000 + bb: 0x402000: basic block + insn: 0x402000: number(0x10) + insn: 0x402002: number(0x10) + expect: + matches: + count-range-match: + - 0x402000 + +- name: count-mismatch + description: count does not match when the feature count differs + rules: + - name: count-negative-no-match + description: should not match because number 0x10 does not appear three times + scopes: + static: function + features: + - count(number(0x10)): 3 + features: | + func: 0x402000 + bb: 0x402000: basic block + insn: 0x402000: number(0x10) + insn: 0x402002: number(0x10) + expect: + matches: {} + +- name: count-exact-not-enough + description: count does not match when there are fewer occurrences than required + rules: + - name: count-exact-not-enough + scopes: + static: function + features: + - count(number(100)): 2 + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: number(100) + expect: + matches: {} + +- name: count-exact-too-many + description: count does not match when there are more occurrences than required + rules: + - name: count-exact-too-many + scopes: + static: function + features: + - count(number(100)): 2 + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: number(100) + insn: 0x401002: number(100) + insn: 0x401004: number(100) + expect: + matches: {} + +- name: count-range-at-lower-bound + description: count range matches at the lower bound + rules: + - name: count-range-lower + scopes: + static: function + features: + - count(number(100)): (2, 3) + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: number(100) + insn: 0x401002: number(100) + expect: + matches: + count-range-lower: + - 0x401000 + +- name: count-range-at-upper-bound + description: count range matches at the upper bound + rules: + - name: count-range-upper + scopes: + static: function + features: + - count(number(100)): (2, 3) + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: number(100) + insn: 0x401002: number(100) + insn: 0x401004: number(100) + expect: + matches: + count-range-upper: + - 0x401000 + +- name: count-range-below-lower-bound + description: count range does not match when below the lower bound + rules: + - name: count-range-below + scopes: + static: function + features: + - count(number(100)): (2, 3) + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: number(100) + expect: + matches: {} + +- name: count-range-above-upper-bound + description: count range does not match when above the upper bound + rules: + - name: count-range-above + scopes: + static: function + features: + - count(number(100)): (2, 3) + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: number(100) + insn: 0x401002: number(100) + insn: 0x401004: number(100) + insn: 0x401006: number(100) + expect: + matches: {} + +- name: count-zero-absent + description: count zero matches when the feature is absent + rules: + - name: count-zero-absent + scopes: + static: function + features: + - and: + - count(number(100)): 0 + - mnemonic: mov + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: mnemonic(mov) + expect: + matches: + count-zero-absent: + - 0x401000 + +- name: count-zero-present + description: count zero does not match when the feature is present + rules: + - name: count-zero-present + scopes: + static: function + features: + - and: + - count(number(100)): 0 + - mnemonic: mov + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: mnemonic(mov) + insn: 0x401000: number(100) + expect: + matches: {} + +- name: count-range-with-zero-absent + description: count range including zero matches when feature is absent + rules: + - name: count-range-zero + scopes: + static: function + features: + - and: + - count(number(100)): (0, 1) + - mnemonic: mov + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: mnemonic(mov) + expect: + matches: + count-range-zero: + - 0x401000 + +- name: count-range-with-zero-one-present + description: count range including zero matches when feature appears once + rules: + - name: count-range-zero-one + scopes: + static: function + features: + - and: + - count(number(100)): (0, 1) + - mnemonic: mov + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: mnemonic(mov) + insn: 0x401002: number(100) + expect: + matches: + count-range-zero-one: + - 0x401000 + +- name: count-range-with-zero-too-many + description: count range including zero does not match when feature appears too many times + rules: + - name: count-range-zero-many + scopes: + static: function + features: + - and: + - count(number(100)): (0, 1) + - mnemonic: mov + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: mnemonic(mov) + insn: 0x401002: number(100) + insn: 0x401004: number(100) + expect: + matches: {} + +- name: two-or-more-match + description: N-or-more matches when at least two children are satisfied + rules: + - name: two-or-more + scopes: + static: function + features: + - or: + - and: + - number: 1 + - number: 2 + - or: + - number: 3 + - 2 or more: + - number: 4 + - number: 5 + - number: 6 + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: number(5) + insn: 0x401002: number(6) + expect: + matches: + two-or-more: + - 0x401000 + +- name: two-or-more-not-enough + description: N-or-more does not match when fewer than two children are satisfied + rules: + - name: two-or-more-fail + scopes: + static: function + features: + - or: + - and: + - number: 1 + - number: 2 + - or: + - number: 3 + - 2 or more: + - number: 4 + - number: 5 + - number: 6 + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: number(6) + expect: + matches: {} + +- name: count-string-match + description: count on string features matches the exact occurrence count + rules: + - name: count-string + scopes: + static: function + features: + - count(string(foo)): 2 + features: | + func: 0x401000 + func: 0x401000: string(foo) + bb: 0x401000: basic block + insn: 0x401002: string(foo) + expect: + matches: + count-string: + - 0x401000 + +- name: count-string-not-enough + description: count on string features does not match with too few occurrences + rules: + - name: count-string-few + scopes: + static: function + features: + - count(string(foo)): 2 + features: | + func: 0x401000 + func: 0x401000: string(foo) + expect: + matches: {} + +- name: count-string-too-many + description: count on string features does not match with too many occurrences + rules: + - name: count-string-many + scopes: + static: function + features: + - count(string(foo)): 2 + features: | + func: 0x401000 + func: 0x401000: string(foo) + bb: 0x401000: basic block + insn: 0x401002: string(foo) + insn: 0x401004: string(foo) + expect: + matches: {} + +- name: count-api-match + description: count on api features matches the exact occurrence count + rules: + - name: count-api + scopes: + static: function + features: + - count(api(CreateFileA)): 1 + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: api(CreateFileA) + expect: + matches: + count-api: + - 0x401000 + +- name: count-api-no-match + description: count on api features does not match with zero occurrences + rules: + - name: count-api-zero + scopes: + static: function + features: + - count(api(CreateFileA)): 1 + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: api(CreateFile) + expect: + matches: {} + +- name: count-offset-match + description: count on offset features matches the exact occurrence count + rules: + - name: count-offset + scopes: + static: function + features: + - or: + - count(offset(2)): 1 + - count(offset(0x100)): 2 or more + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: offset(2) + expect: + matches: + count-offset: + - 0x401000 + +- name: count-offset-or-more + description: count "2 or more" on offset features matches when threshold is met + rules: + - name: count-offset-more + scopes: + static: function + features: + - or: + - count(offset(2)): 1 + - count(offset(0x100)): 2 or more + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: offset(0x100) + insn: 0x401002: offset(0x100) + insn: 0x401004: offset(0x100) + expect: + matches: + count-offset-more: + - 0x401000 + +- name: and-one-missing + description: and does not match when one operand is absent + rules: + - name: and-incomplete + scopes: + static: function + features: + - and: + - mnemonic: mov + - number: 0x10 + features: | + func: 0x402000 + bb: 0x402000: basic block + insn: 0x402000: mnemonic(mov) + expect: + matches: {} + +- name: or-no-branches-present + description: or does not match when no branches are satisfied + rules: + - name: or-none + scopes: + static: function + features: + - or: + - api: CreateFileW + - number: 0x10 + features: | + func: 0x402000 + bb: 0x402000: basic block + insn: 0x402000: mnemonic(mov) + expect: + matches: {} + +- name: not-present-prevents-match + description: not blocks a match when the negated feature is present + rules: + - name: not-blocks + scopes: + static: function + features: + - and: + - mnemonic: mov + - not: + - number: 0x10 + features: | + func: 0x402000 + bb: 0x402000: basic block + insn: 0x402000: mnemonic(mov) + insn: 0x402000: number(0x10) + expect: + matches: {} + +- name: optional-present-still-matches + description: optional does not prevent a match when the child feature is present + rules: + - name: optional-present + scopes: + static: function + features: + - and: + - mnemonic: mov + - optional: + - number: 0x10 + features: | + func: 0x402000 + bb: 0x402000: basic block + insn: 0x402000: mnemonic(mov) + insn: 0x402002: number(0x10) + expect: + matches: + optional-present: + - 0x402000 + +- name: count-inside-not + description: not negates a count that is satisfied + rules: + - name: count-not + scopes: + static: function + features: + - and: + - mnemonic: mov + - not: + - count(number(100)): 2 + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: mnemonic(mov) + insn: 0x401002: number(100) + insn: 0x401004: number(100) + expect: + matches: {} + +- name: count-inside-not-unsatisfied + description: not succeeds when the count is not satisfied + rules: + - name: count-not-ok + scopes: + static: function + features: + - and: + - mnemonic: mov + - not: + - count(number(100)): 3 + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: mnemonic(mov) + insn: 0x401002: number(100) + insn: 0x401004: number(100) + expect: + matches: + count-not-ok: + - 0x401000 + +- name: deeply-nested-logic + description: three levels of nesting evaluate correctly + rules: + - name: deep-nest + scopes: + static: function + features: + - and: + - or: + - and: + - number: 1 + - number: 2 + - number: 3 + - number: 4 + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: number(1) + insn: 0x401002: number(2) + insn: 0x401004: number(4) + expect: + matches: + deep-nest: + - 0x401000 + +- name: deeply-nested-logic-inner-fail + description: deeply nested logic fails when inner and is not satisfied + rules: + - name: deep-nest-fail + scopes: + static: function + features: + - and: + - or: + - and: + - number: 1 + - number: 2 + - number: 3 + - number: 4 + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: number(1) + insn: 0x401004: number(4) + expect: + matches: {} + +- name: three-or-more-at-threshold + description: N-or-more matches when exactly N children are satisfied + rules: + - name: three-or-more + scopes: + static: function + features: + - 3 or more: + - number: 1 + - number: 2 + - number: 3 + - number: 4 + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: number(1) + insn: 0x401002: number(2) + insn: 0x401004: number(3) + expect: + matches: + three-or-more: + - 0x401000 + +- name: three-or-more-below-threshold + description: N-or-more does not match when fewer than N children are satisfied + rules: + - name: three-or-more-fail + scopes: + static: function + features: + - 3 or more: + - number: 1 + - number: 2 + - number: 3 + - number: 4 + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: number(1) + insn: 0x401002: number(2) + expect: + matches: {} diff --git a/tests/fixtures/matcher/static/parsing.yml b/tests/fixtures/matcher/static/parsing.yml new file mode 100644 index 00000000..ad77545e --- /dev/null +++ b/tests/fixtures/matcher/static/parsing.yml @@ -0,0 +1,33 @@ +- name: hex-number-with-e + description: hex literals containing the letter e are parsed as integers, not floats + rules: + - name: hex-e-match + scopes: + static: function + features: + - number: 0x1e + features: | + func: 0x600000 + bb: 0x600000: basic block + insn: 0x600000: number(0x1e) + expect: + matches: + hex-e-match: + - 0x600000 + +- name: colon-in-feature-text + description: feature text containing a colon is parsed correctly + rules: + - name: colon-string-match + scopes: + static: function + features: + - string: "key: value" + features: | + func: 0x600000 + bb: 0x600000: basic block + insn: 0x600001: string(key: value) + expect: + matches: + colon-string-match: + - 0x600000 diff --git a/tests/fixtures/matcher/static/scopes.yml b/tests/fixtures/matcher/static/scopes.yml new file mode 100644 index 00000000..ee4f24db --- /dev/null +++ b/tests/fixtures/matcher/static/scopes.yml @@ -0,0 +1,141 @@ +- name: function-scope-aggregates-basic-blocks + description: function scope sees features from all basic blocks in the function + rules: + - name: function-cross-basic-block + description: should match when function scope aggregates features from different basic blocks + scopes: + static: function + features: + - and: + - mnemonic: mov + - mnemonic: add + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: mnemonic(mov) + bb: 0x401010: basic block + insn: 0x401010: mnemonic(add) + expect: + matches: + function-cross-basic-block: + - 0x401000 + +- name: basic-block-scope-does-not-aggregate + description: basic block scope only sees features within a single basic block + rules: + - name: basic-block-cross-basic-block + description: should not match because no single basic block contains both mnemonics + scopes: + static: basic block + features: + - and: + - mnemonic: mov + - mnemonic: add + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: mnemonic(mov) + bb: 0x401010: basic block + insn: 0x401010: mnemonic(add) + expect: + matches: {} + +- name: instruction-scope-single-instruction + description: instruction scope matches at the single instruction containing the feature + rules: + - name: instruction-single-mnemonic + description: should match once at the instruction that contains mov + scopes: + static: instruction + features: + - mnemonic: mov + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: mnemonic(mov) + bb: 0x401010: basic block + insn: 0x401010: mnemonic(add) + expect: + matches: + instruction-single-mnemonic: + - 0x401000 + +- name: function-scope-isolation + description: features from one function do not leak into another function's match + rules: + - name: both-features + scopes: + static: function + features: + - and: + - mnemonic: mov + - mnemonic: add + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: mnemonic(mov) + func: 0x402000 + bb: 0x402000: basic block + insn: 0x402000: mnemonic(add) + expect: + matches: {} + +- name: file-scope-basic + description: file scope matches features extracted at file level + rules: + - name: file-import + scopes: + static: file + features: + - import: kernel32.CreateFileW + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: mnemonic(mov) + file: 0x401000: import(kernel32.CreateFileW) + expect: + matches: + file-import: + - "no address" + +- name: basic-block-scope-match + description: basic block scope matches when a single BB contains all required features + rules: + - name: bb-both + scopes: + static: basic block + features: + - and: + - mnemonic: mov + - mnemonic: add + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: mnemonic(mov) + insn: 0x401002: mnemonic(add) + bb: 0x401010: basic block + insn: 0x401010: mnemonic(xor) + expect: + matches: + bb-both: + - 0x401000 + +- name: instruction-scope-multiple-matches + description: instruction scope reports multiple matching instructions + rules: + - name: insn-multi + scopes: + static: instruction + features: + - mnemonic: mov + features: | + func: 0x401000 + bb: 0x401000: basic block + insn: 0x401000: mnemonic(mov) + insn: 0x401002: mnemonic(add) + insn: 0x401004: mnemonic(mov) + expect: + matches: + insn-multi: + - 0x401000 + - 0x401004 diff --git a/tests/fixtures/matcher/static/strings.yml b/tests/fixtures/matcher/static/strings.yml new file mode 100644 index 00000000..a5054292 --- /dev/null +++ b/tests/fixtures/matcher/static/strings.yml @@ -0,0 +1,274 @@ +- name: exact-string + description: string feature matches only the exact value + rules: + - name: exact-string-match + scopes: + static: function + features: + - string: hello world + features: | + func: 0x500000 + func: 0x500000: string(hello world) + expect: + matches: + exact-string-match: + - 0x500000 + +- name: substring + description: substring feature matches when the value appears within a string + rules: + - name: substring-match + scopes: + static: function + features: + - substring: abc + features: | + func: 0x500000 + func: 0x500000: string(zabczz) + expect: + matches: + substring-match: + - 0x500000 + +- name: regex-unanchored + description: regex feature matches when the pattern appears anywhere in a string + rules: + - name: regex-match + scopes: + static: function + features: + - string: /bbbb/ + features: | + func: 0x500000 + func: 0x500000: string(abbbba) + expect: + matches: + regex-match: + - 0x500000 + +- name: regex-case-insensitive + description: regex /i flag enables case-insensitive matching + rules: + - name: regex-ignorecase-match + scopes: + static: function + features: + - string: /BBBB/i + features: | + func: 0x500000 + func: 0x500000: string(abbbba) + expect: + matches: + regex-ignorecase-match: + - 0x500000 + +- name: regex-anchored-no-match + description: anchored regex does not match when the string does not start with the pattern + rules: + - name: regex-anchor-no-match + scopes: + static: function + features: + - string: /^bbbb/ + features: | + func: 0x500000 + func: 0x500000: string(abbbba) + expect: + matches: {} + +- name: substring-no-match + description: substring does not match when the value is not contained in any string + rules: + - name: substring-no-match + scopes: + static: function + features: + - substring: abc + features: | + func: 0x500000 + func: 0x500000: string(aaaa) + expect: + matches: {} + +- name: substring-exact + description: substring matches when the string is exactly the substring value + rules: + - name: substring-exact + scopes: + static: function + features: + - substring: abc + features: | + func: 0x500000 + func: 0x500000: string(abc) + expect: + matches: + substring-exact: + - 0x500000 + +- name: substring-prefix + description: substring matches when it appears at the start of a string + rules: + - name: substring-prefix + scopes: + static: function + features: + - substring: abc + features: | + func: 0x500000 + func: 0x500000: string(abc222) + expect: + matches: + substring-prefix: + - 0x500000 + +- name: substring-suffix + description: substring matches when it appears at the end of a string + rules: + - name: substring-suffix + scopes: + static: function + features: + - substring: abc + features: | + func: 0x500000 + func: 0x500000: string(111abc) + expect: + matches: + substring-suffix: + - 0x500000 + +- name: regex-no-match-wrong-type + description: regex does not match when only non-string features are present + rules: + - name: regex-wrong-type + scopes: + static: function + features: + - string: /.*bbbb.*/ + features: | + func: 0x500000 + bb: 0x500000: basic block + insn: 0x500000: number(100) + expect: + matches: {} + +- name: regex-no-match-wrong-value + description: regex does not match when the string does not contain the pattern + rules: + - name: regex-wrong-value + scopes: + static: function + features: + - string: /.*bbbb.*/ + features: | + func: 0x500000 + func: 0x500000: string(aaaa) + expect: + matches: {} + +- name: regex-no-match-case-sensitive + description: regex without /i flag does not match different-case strings + rules: + - name: regex-case-sensitive + scopes: + static: function + features: + - string: /.*bbbb.*/ + features: | + func: 0x500000 + func: 0x500000: string(aBBBBa) + expect: + matches: {} + +- name: regex-explicit-wildcards + description: regex with explicit .* wildcards matches the same as implied wildcards + rules: + - name: regex-explicit + scopes: + static: function + features: + - string: /.*bbbb.*/ + - name: regex-implied + scopes: + static: function + features: + - string: /bbbb/ + features: | + func: 0x500000 + func: 0x500000: string(abbbba) + expect: + matches: + regex-explicit: + - 0x500000 + regex-implied: + - 0x500000 + +- name: regex-complex-backslash + description: regex with escaped backslashes and spaces matches correctly + rules: + - name: regex-backslash + scopes: + static: function + features: + - or: + - string: /.*HARDWARE\\Key\\key with spaces\\.*/i + features: | + func: 0x500000 + func: 0x500000: string(Hardware\Key\key with spaces\some value) + expect: + matches: + regex-backslash: + - 0x500000 + +- name: regex-numeric-string + description: regex matches numeric string values correctly + rules: + - name: regex-numeric + scopes: + static: function + features: + - or: + - string: /123/ + - string: /0x123/ + features: | + func: 0x500000 + func: 0x500000: string(123) + expect: + matches: + regex-numeric: + - 0x500000 + +- name: regex-hex-numeric-string + description: regex matches hex-prefixed numeric string values + rules: + - name: regex-hex-numeric + scopes: + static: function + features: + - or: + - string: /123/ + - string: /0x123/ + features: | + func: 0x500000 + func: 0x500000: string(0x123) + expect: + matches: + regex-hex-numeric: + - 0x500000 + +- name: regex-ignorecase-explicit-wildcards + description: regex /i flag with explicit wildcards matches case-insensitively + rules: + - name: regex-ignorecase-explicit + scopes: + static: function + features: + - string: /.*bbbb.*/i + features: | + func: 0x500000 + func: 0x500000: string(aBBBBa) + expect: + matches: + regex-ignorecase-explicit: + - 0x500000 diff --git a/tests/test_dynamic_span_of_calls_scope.py b/tests/test_dynamic_span_of_calls_scope.py index d06f80e9..bb9abf20 100644 --- a/tests/test_dynamic_span_of_calls_scope.py +++ b/tests/test_dynamic_span_of_calls_scope.py @@ -12,258 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -# tests/data/dynamic/cape/v2.2/0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json.gz -# -# proc: 0000A65749F5902C4D82.exe (ppid=2456, pid=3052) -# ... -# thread: 3064 -# call 8: GetSystemTimeAsFileTime() -# call 9: GetSystemInfo() -# call 10: LdrGetDllHandle(1974337536, kernel32.dll) -# call 11: LdrGetProcedureAddress(2010595649, 0, AddVectoredExceptionHandler, 1974337536, kernel32.dll) -# call 12: LdrGetDllHandle(1974337536, kernel32.dll) -# call 13: LdrGetProcedureAddress(2010595072, 0, RemoveVectoredExceptionHandler, 1974337536, kernel32.dll) -# call 14: RtlAddVectoredExceptionHandler(1921490089, 0) -# call 15: GetSystemTime() -# call 16: NtAllocateVirtualMemory(no, 4, 786432, 4784128, 4294967295) -# call 17: NtAllocateVirtualMemory(no, 4, 12288, 4784128, 4294967295) -# call 18: GetSystemInfo() -# ... -# ... - import textwrap -from typing import Iterator -from functools import lru_cache import pytest -import fixtures import capa.rules -import capa.capabilities.dynamic -from capa.features.extractors.base_extractor import ThreadFilter, DynamicFeatureExtractor -def filter_threads(extractor: DynamicFeatureExtractor, ppid: int, pid: int, tid: int) -> DynamicFeatureExtractor: - for ph in extractor.get_processes(): - if (ph.address.ppid, ph.address.pid) != (ppid, pid): - continue - - for th in extractor.get_threads(ph): - if th.address.tid != tid: - continue - - return ThreadFilter( - extractor, - { - th.address, - }, - ) - - raise ValueError("failed to find target thread") - - -@lru_cache(maxsize=1) -def get_0000a657_thread3064(): - extractor = fixtures.get_cape_extractor( - fixtures.CD - / "data" - / "dynamic" - / "cape" - / "v2.2" - / "0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json.gz" - ) - extractor = filter_threads(extractor, 2456, 3052, 3064) - return extractor - - -def get_call_ids(matches) -> Iterator[int]: - for address, _ in matches: - yield address.id - - -# sanity check: match the first call -# -# proc: 0000A65749F5902C4D82.exe (ppid=2456, pid=3052) -# thread: 3064 -# call 8: GetSystemTimeAsFileTime() -def test_dynamic_call_scope(): - extractor = get_0000a657_thread3064() - - rule = textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: unsupported - dynamic: call - features: - - api: GetSystemTimeAsFileTime - """) - - r = capa.rules.Rule.from_yaml(rule) - ruleset = capa.rules.RuleSet([r]) - - capabilities = capa.capabilities.dynamic.find_dynamic_capabilities(ruleset, extractor, disable_progress=True) - assert r.name in capabilities.matches - assert 8 in get_call_ids(capabilities.matches[r.name]) - - -# match the first span. -# -# proc: 0000A65749F5902C4D82.exe (ppid=2456, pid=3052) -# thread: 3064 -# call 8: GetSystemTimeAsFileTime() -# call 9: GetSystemInfo() -# call 10: LdrGetDllHandle(1974337536, kernel32.dll) -# call 11: LdrGetProcedureAddress(2010595649, 0, AddVectoredExceptionHandler, 1974337536, kernel32.dll) -# call 12: LdrGetDllHandle(1974337536, kernel32.dll) -def test_dynamic_span_scope(): - extractor = get_0000a657_thread3064() - - rule = textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: unsupported - dynamic: span of calls - features: - - and: - - api: GetSystemTimeAsFileTime - - api: GetSystemInfo - - api: LdrGetDllHandle - - api: LdrGetProcedureAddress - - count(api(LdrGetDllHandle)): 2 - """) - - r = capa.rules.Rule.from_yaml(rule) - ruleset = capa.rules.RuleSet([r]) - - capabilities = capa.capabilities.dynamic.find_dynamic_capabilities(ruleset, extractor, disable_progress=True) - assert r.name in capabilities.matches - assert 12 in get_call_ids(capabilities.matches[r.name]) - - -# show that when the span is only 5 calls long (for example), it doesn't match beyond that 5-tuple. -# -# proc: 0000A65749F5902C4D82.exe (ppid=2456, pid=3052) -# thread: 3064 -# call 8: GetSystemTimeAsFileTime() -# call 9: GetSystemInfo() -# call 10: LdrGetDllHandle(1974337536, kernel32.dll) -# call 11: LdrGetProcedureAddress(2010595649, 0, AddVectoredExceptionHandler, 1974337536, kernel32.dll) -# call 12: LdrGetDllHandle(1974337536, kernel32.dll) -# call 13: LdrGetProcedureAddress(2010595072, 0, RemoveVectoredExceptionHandler, 1974337536, kernel32.dll) -# call 14: RtlAddVectoredExceptionHandler(1921490089, 0) -# call 15: GetSystemTime() -# call 16: NtAllocateVirtualMemory(no, 4, 786432, 4784128, 4294967295) -def test_dynamic_span_scope_length(): - extractor = get_0000a657_thread3064() - - rule = textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: unsupported - dynamic: span of calls - features: - - and: - - api: GetSystemTimeAsFileTime - - api: RtlAddVectoredExceptionHandler - """) - - r = capa.rules.Rule.from_yaml(rule) - ruleset = capa.rules.RuleSet([r]) - - # patch SPAN_SIZE since we may use a much larger value in the real world. - from pytest import MonkeyPatch - - with MonkeyPatch.context() as m: - m.setattr(capa.capabilities.dynamic, "SPAN_SIZE", 5) - capabilities = capa.capabilities.dynamic.find_dynamic_capabilities(ruleset, extractor, disable_progress=True) - - assert r.name not in capabilities.matches - - -# show that you can use a call subscope in span-of-calls rules. -# -# proc: 0000A65749F5902C4D82.exe (ppid=2456, pid=3052) -# thread: 3064 -# ... -# call 11: LdrGetProcedureAddress(2010595649, 0, AddVectoredExceptionHandler, 1974337536, kernel32.dll) -# ... -def test_dynamic_span_call_subscope(): - extractor = get_0000a657_thread3064() - - rule = textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: unsupported - dynamic: span of calls - features: - - and: - - call: - - and: - - api: LdrGetProcedureAddress - - string: AddVectoredExceptionHandler - """) - - r = capa.rules.Rule.from_yaml(rule) - ruleset = capa.rules.RuleSet([r]) - - capabilities = capa.capabilities.dynamic.find_dynamic_capabilities(ruleset, extractor, disable_progress=True) - assert r.name in capabilities.matches - assert 11 in get_call_ids(capabilities.matches[r.name]) - - -# show that you can use a span subscope in span rules. -# -# proc: 0000A65749F5902C4D82.exe (ppid=2456, pid=3052) -# thread: 3064 -# ... -# call 10: LdrGetDllHandle(1974337536, kernel32.dll) -# call 11: LdrGetProcedureAddress(2010595649, 0, AddVectoredExceptionHandler, 1974337536, kernel32.dll) -# call 12: LdrGetDllHandle(1974337536, kernel32.dll) -# call 13: LdrGetProcedureAddress(2010595072, 0, RemoveVectoredExceptionHandler, 1974337536, kernel32.dll) -# ... -def test_dynamic_span_scope_span_subscope(): - extractor = get_0000a657_thread3064() - - rule = textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: unsupported - dynamic: span of calls - features: - - and: - - span of calls: - - description: resolve add VEH # should match at 11 - - and: - - api: LdrGetDllHandle - - api: LdrGetProcedureAddress - - string: AddVectoredExceptionHandler - - span of calls: - - description: resolve remove VEH # should match at 13 - - and: - - api: LdrGetDllHandle - - api: LdrGetProcedureAddress - - string: RemoveVectoredExceptionHandler - """) - - r = capa.rules.Rule.from_yaml(rule) - ruleset = capa.rules.RuleSet([r]) - - capabilities = capa.capabilities.dynamic.find_dynamic_capabilities(ruleset, extractor, disable_progress=True) - assert r.name in capabilities.matches - assert 13 in get_call_ids(capabilities.matches[r.name]) - - -# show that you can't use thread subscope in span rules. def test_dynamic_span_scope_thread_subscope(): rule = textwrap.dedent(""" rule: @@ -280,170 +35,3 @@ def test_dynamic_span_scope_thread_subscope(): with pytest.raises(capa.rules.InvalidRule): capa.rules.Rule.from_yaml(rule) - - -# show how you might use a span-of-calls rule: to match a small window for a collection of features. -# -# proc: 0000A65749F5902C4D82.exe (ppid=2456, pid=3052) -# thread: 3064 -# call 10: LdrGetDllHandle(1974337536, kernel32.dll) -# call 11: LdrGetProcedureAddress(2010595649, 0, AddVectoredExceptionHandler, 1974337536, kernel32.dll) -# call 12: ... -# call 13: ... -# call 14: RtlAddVectoredExceptionHandler(1921490089, 0) -def test_dynamic_span_example(): - extractor = get_0000a657_thread3064() - - rule = textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: unsupported - dynamic: span of calls - features: - - and: - - call: - - and: - - api: LdrGetDllHandle - - string: "kernel32.dll" - - call: - - and: - - api: LdrGetProcedureAddress - - string: "AddVectoredExceptionHandler" - - api: RtlAddVectoredExceptionHandler - """) - - r = capa.rules.Rule.from_yaml(rule) - ruleset = capa.rules.RuleSet([r]) - - capabilities = capa.capabilities.dynamic.find_dynamic_capabilities(ruleset, extractor, disable_progress=True) - assert r.name in capabilities.matches - assert 14 in get_call_ids(capabilities.matches[r.name]) - - -# show how spans that overlap a single event are handled. -# -# proc: 0000A65749F5902C4D82.exe (ppid=2456, pid=3052) -# thread: 3064 -# ... -# call 10: ... -# call 11: LdrGetProcedureAddress(2010595649, 0, AddVectoredExceptionHandler, 1974337536, kernel32.dll) -# call 12: ... -# call 13: ... -# call 14: ... -# call 15: ... -# ... -def test_dynamic_span_multiple_spans_overlapping_single_event(): - extractor = get_0000a657_thread3064() - - rule = textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: unsupported - dynamic: span of calls - features: - - and: - - call: - - and: - - api: LdrGetProcedureAddress - - string: "AddVectoredExceptionHandler" - """) - - r = capa.rules.Rule.from_yaml(rule) - ruleset = capa.rules.RuleSet([r]) - - capabilities = capa.capabilities.dynamic.find_dynamic_capabilities(ruleset, extractor, disable_progress=True) - assert r.name in capabilities.matches - # we only match the first overlapping span - assert [11] == list(get_call_ids(capabilities.matches[r.name])) - - -# show that you can use match statements in span-of-calls rules. -# -# proc: 0000A65749F5902C4D82.exe (ppid=2456, pid=3052) -# thread: 3064 -# ... -# call 10: LdrGetDllHandle(1974337536, kernel32.dll) -# call 11: LdrGetProcedureAddress(2010595649, 0, AddVectoredExceptionHandler, 1974337536, kernel32.dll) -# call 12: LdrGetDllHandle(1974337536, kernel32.dll) -# call 13: LdrGetProcedureAddress(2010595072, 0, RemoveVectoredExceptionHandler, 1974337536, kernel32.dll) -# ... -def test_dynamic_span_scope_match_statements(): - extractor = get_0000a657_thread3064() - - ruleset = capa.rules.RuleSet([ - capa.rules.Rule.from_yaml( - textwrap.dedent(""" - rule: - meta: - name: resolve add VEH - namespace: linking/runtime-linking/veh - scopes: - static: unsupported - dynamic: span of calls - features: - - and: - - api: LdrGetDllHandle - - api: LdrGetProcedureAddress - - string: AddVectoredExceptionHandler - """) - ), - capa.rules.Rule.from_yaml( - textwrap.dedent(""" - rule: - meta: - name: resolve remove VEH - namespace: linking/runtime-linking/veh - scopes: - static: unsupported - dynamic: span of calls - features: - - and: - - api: LdrGetDllHandle - - api: LdrGetProcedureAddress - - string: RemoveVectoredExceptionHandler - """) - ), - capa.rules.Rule.from_yaml( - textwrap.dedent(""" - rule: - meta: - name: resolve add and remove VEH - scopes: - static: unsupported - dynamic: span of calls - features: - - and: - - match: resolve add VEH - - match: resolve remove VEH - """) - ), - capa.rules.Rule.from_yaml( - textwrap.dedent(""" - rule: - meta: - name: has VEH runtime linking - scopes: - static: unsupported - dynamic: span of calls - features: - - and: - - match: linking/runtime-linking/veh - """) - ), - ]) - - capabilities = capa.capabilities.dynamic.find_dynamic_capabilities(ruleset, extractor, disable_progress=True) - - # basic functionality, already known to work - assert "resolve add VEH" in capabilities.matches - assert "resolve remove VEH" in capabilities.matches - - # requires `match: ` to be working - assert "resolve add and remove VEH" in capabilities.matches - - # requires `match: ` to be working - assert "has VEH runtime linking" in capabilities.matches diff --git a/tests/test_engine.py b/tests/test_engine.py index 637fa572..a4ece82e 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -15,7 +15,7 @@ import pytest import capa.features.address -from capa.engine import Or, And, Not, Some, Range +from capa.engine import Or from capa.features.insn import Number from capa.features.address import ( ThreadAddress, @@ -78,136 +78,6 @@ def test_dn_token_offset_address_cross_type_lt(): assert (addr < DNTokenOffsetAddress(0x1000, 0x10)) is False -def test_number(): - assert bool(Number(1).evaluate({Number(0): {ADDR1}})) is False - assert bool(Number(1).evaluate({Number(1): {ADDR1}})) is True - assert bool(Number(1).evaluate({Number(2): {ADDR1, ADDR2}})) is False - - -def test_and(): - assert bool(And([Number(1)]).evaluate({Number(0): {ADDR1}})) is False - assert bool(And([Number(1)]).evaluate({Number(1): {ADDR1}})) is True - assert bool(And([Number(1), Number(2)]).evaluate({Number(0): {ADDR1}})) is False - assert bool(And([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}})) is False - assert bool(And([Number(1), Number(2)]).evaluate({Number(2): {ADDR1}})) is False - assert bool(And([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}, Number(2): {ADDR2}})) is True - - -def test_or(): - assert bool(Or([Number(1)]).evaluate({Number(0): {ADDR1}})) is False - assert bool(Or([Number(1)]).evaluate({Number(1): {ADDR1}})) is True - assert bool(Or([Number(1), Number(2)]).evaluate({Number(0): {ADDR1}})) is False - assert bool(Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}})) is True - assert bool(Or([Number(1), Number(2)]).evaluate({Number(2): {ADDR1}})) is True - assert bool(Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}, Number(2): {ADDR2}})) is True - - -def test_not(): - assert bool(Not(Number(1)).evaluate({Number(0): {ADDR1}})) is True - assert bool(Not(Number(1)).evaluate({Number(1): {ADDR1}})) is False - - -def test_some(): - assert bool(Some(0, [Number(1)]).evaluate({Number(0): {ADDR1}})) is True - assert bool(Some(1, [Number(1)]).evaluate({Number(0): {ADDR1}})) is False - - assert bool(Some(2, [Number(1), Number(2), Number(3)]).evaluate({Number(0): {ADDR1}})) is False - assert bool(Some(2, [Number(1), Number(2), Number(3)]).evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}})) is False - assert ( - bool( - Some(2, [Number(1), Number(2), Number(3)]).evaluate({ - Number(0): {ADDR1}, - Number(1): {ADDR1}, - Number(2): {ADDR1}, - }) - ) - is True - ) - assert ( - bool( - Some(2, [Number(1), Number(2), Number(3)]).evaluate({ - Number(0): {ADDR1}, - Number(1): {ADDR1}, - Number(2): {ADDR1}, - Number(3): {ADDR1}, - }) - ) - is True - ) - assert ( - bool( - Some(2, [Number(1), Number(2), Number(3)]).evaluate({ - Number(0): {ADDR1}, - Number(1): {ADDR1}, - Number(2): {ADDR1}, - Number(3): {ADDR1}, - Number(4): {ADDR1}, - }) - ) - is True - ) - - -def test_complex(): - assert True is bool( - Or([ - And([Number(1), Number(2)]), - Or([Number(3), Some(2, [Number(4), Number(5), Number(6)])]), - ]).evaluate({ - Number(5): {ADDR1}, - Number(6): {ADDR1}, - Number(7): {ADDR1}, - Number(8): {ADDR1}, - }) - ) - - assert False is bool( - Or([ - And([Number(1), Number(2)]), - Or([Number(3), Some(2, [Number(4), Number(5)])]), - ]).evaluate({ - Number(5): {ADDR1}, - Number(6): {ADDR1}, - Number(7): {ADDR1}, - Number(8): {ADDR1}, - }) - ) - - -def test_range(): - # unbounded range, but no matching feature - # since the lower bound is zero, and there are zero matches, ok - assert bool(Range(Number(1)).evaluate({Number(2): {}})) is True # type: ignore - - # unbounded range with matching feature should always match - assert bool(Range(Number(1)).evaluate({Number(1): {}})) is True # type: ignore - assert bool(Range(Number(1)).evaluate({Number(1): {ADDR1}})) is True - - # unbounded max - assert bool(Range(Number(1), min=1).evaluate({Number(1): {ADDR1}})) is True - assert bool(Range(Number(1), min=2).evaluate({Number(1): {ADDR1}})) is False - assert bool(Range(Number(1), min=2).evaluate({Number(1): {ADDR1, ADDR2}})) is True - - # unbounded min - assert bool(Range(Number(1), max=0).evaluate({Number(1): {ADDR1}})) is False - assert bool(Range(Number(1), max=1).evaluate({Number(1): {ADDR1}})) is True - assert bool(Range(Number(1), max=2).evaluate({Number(1): {ADDR1}})) is True - assert bool(Range(Number(1), max=2).evaluate({Number(1): {ADDR1, ADDR2}})) is True - assert bool(Range(Number(1), max=2).evaluate({Number(1): {ADDR1, ADDR2, ADDR3}})) is False - - # we can do an exact match by setting min==max - assert bool(Range(Number(1), min=1, max=1).evaluate({Number(1): {}})) is False # type: ignore - assert bool(Range(Number(1), min=1, max=1).evaluate({Number(1): {ADDR1}})) is True - assert bool(Range(Number(1), min=1, max=1).evaluate({Number(1): {ADDR1, ADDR2}})) is False - - # bounded range - assert bool(Range(Number(1), min=1, max=3).evaluate({Number(1): {}})) is False # type: ignore - assert bool(Range(Number(1), min=1, max=3).evaluate({Number(1): {ADDR1}})) is True - assert bool(Range(Number(1), min=1, max=3).evaluate({Number(1): {ADDR1, ADDR2}})) is True - assert bool(Range(Number(1), min=1, max=3).evaluate({Number(1): {ADDR1, ADDR2, ADDR3}})) is True - assert bool(Range(Number(1), min=1, max=3).evaluate({Number(1): {ADDR1, ADDR2, ADDR3, ADDR4}})) is False - - def test_short_circuit(): assert bool(Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}})) is True diff --git a/tests/test_match.py b/tests/test_match.py index f55e54f8..a01b4d72 100644 --- a/tests/test_match.py +++ b/tests/test_match.py @@ -21,7 +21,6 @@ import capa.engine import capa.features.insn import capa.features.common from capa.rules import Scope -from capa.features.common import OS, OS_ANY, OS_WINDOWS, String, MatchedRule def match(rules, features, va, scope=Scope.FUNCTION): @@ -45,495 +44,6 @@ def match(rules, features, va, scope=Scope.FUNCTION): return features1, matches1 -def test_match_simple(): - rule = textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: function - dynamic: process - namespace: testns1/testns2 - features: - - number: 100 - """) - r = capa.rules.Rule.from_yaml(rule) - - features, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0) - assert "test rule" in matches - assert MatchedRule("test rule") in features - assert MatchedRule("testns1") in features - assert MatchedRule("testns1/testns2") in features - - -def test_match_range_exact(): - rule = textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: function - dynamic: process - features: - - count(number(100)): 2 - """) - r = capa.rules.Rule.from_yaml(rule) - - # just enough matches - _, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0) - assert "test rule" in matches - - # not enough matches - _, matches = match([r], {capa.features.insn.Number(100): {1}}, 0x0) - assert "test rule" not in matches - - # too many matches - _, matches = match([r], {capa.features.insn.Number(100): {1, 2, 3}}, 0x0) - assert "test rule" not in matches - - -def test_match_range_range(): - rule = textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: function - dynamic: process - features: - - count(number(100)): (2, 3) - """) - r = capa.rules.Rule.from_yaml(rule) - - # just enough matches - _, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0) - assert "test rule" in matches - - # enough matches - _, matches = match([r], {capa.features.insn.Number(100): {1, 2, 3}}, 0x0) - assert "test rule" in matches - - # not enough matches - _, matches = match([r], {capa.features.insn.Number(100): {1}}, 0x0) - assert "test rule" not in matches - - # too many matches - _, matches = match([r], {capa.features.insn.Number(100): {1, 2, 3, 4}}, 0x0) - assert "test rule" not in matches - - -def test_match_range_exact_zero(): - rule = textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: function - dynamic: process - features: - - and: - - count(number(100)): 0 - - # we can't have `count(foo): 0` at the top level, - # since we don't support top level NOT statements. - # so we have this additional trivial feature. - - mnemonic: mov - - """) - r = capa.rules.Rule.from_yaml(rule) - - # feature isn't indexed - good. - _, matches = match([r], {capa.features.insn.Mnemonic("mov"): {}}, 0x0) - assert "test rule" in matches - - # feature is indexed, but no matches. - # i don't think we should ever really have this case, but good to check anyways. - _, matches = match([r], {capa.features.insn.Number(100): {}, capa.features.insn.Mnemonic("mov"): {}}, 0x0) - assert "test rule" in matches - - # too many matches - _, matches = match([r], {capa.features.insn.Number(100): {1}, capa.features.insn.Mnemonic("mov"): {1}}, 0x0) - assert "test rule" not in matches - - -def test_match_range_with_zero(): - rule = textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: function - dynamic: process - features: - - and: - - count(number(100)): (0, 1) - - # we can't have `count(foo): 0` at the top level, - # since we don't support top level NOT statements. - # so we have this additional trivial feature. - - mnemonic: mov - """) - r = capa.rules.Rule.from_yaml(rule) - - # ok - _, matches = match([r], {capa.features.insn.Mnemonic("mov"): {}}, 0x0) - assert "test rule" in matches - _, matches = match([r], {capa.features.insn.Number(100): {}, capa.features.insn.Mnemonic("mov"): {}}, 0x0) - assert "test rule" in matches - _, matches = match([r], {capa.features.insn.Number(100): {1}, capa.features.insn.Mnemonic("mov"): {1}}, 0x0) - assert "test rule" in matches - - # too many matches - _, matches = match([r], {capa.features.insn.Number(100): {1, 2}, capa.features.insn.Mnemonic("mov"): {1, 2}}, 0x0) - assert "test rule" not in matches - - -def test_match_adds_matched_rule_feature(): - """show that using `match` adds a feature for matched rules.""" - rule = textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: function - dynamic: process - features: - - number: 100 - """) - r = capa.rules.Rule.from_yaml(rule) - features, _ = match([r], {capa.features.insn.Number(100): {1}}, 0x0) - assert capa.features.common.MatchedRule("test rule") in features - - -def test_match_matched_rules(): - """show that using `match` adds a feature for matched rules.""" - rules = [ - capa.rules.Rule.from_yaml( - textwrap.dedent(""" - rule: - meta: - name: test rule1 - scopes: - static: function - dynamic: process - features: - - number: 100 - """) - ), - capa.rules.Rule.from_yaml( - textwrap.dedent(""" - rule: - meta: - name: test rule2 - scopes: - static: function - dynamic: process - features: - - match: test rule1 - """) - ), - ] - - features, _ = match( - capa.rules.topologically_order_rules(rules), - {capa.features.insn.Number(100): {1}}, - 0x0, - ) - assert capa.features.common.MatchedRule("test rule1") in features - assert capa.features.common.MatchedRule("test rule2") in features - - # the ordering of the rules must not matter, - # the engine should match rules in an appropriate order. - features, _ = match( - capa.rules.topologically_order_rules(list(reversed(rules))), - {capa.features.insn.Number(100): {1}}, - 0x0, - ) - assert capa.features.common.MatchedRule("test rule1") in features - assert capa.features.common.MatchedRule("test rule2") in features - - -def test_match_namespace(): - rules = [ - capa.rules.Rule.from_yaml( - textwrap.dedent(""" - rule: - meta: - name: CreateFile API - scopes: - static: function - dynamic: process - namespace: file/create/CreateFile - features: - - api: CreateFile - """) - ), - capa.rules.Rule.from_yaml( - textwrap.dedent(""" - rule: - meta: - name: WriteFile API - scopes: - static: function - dynamic: process - namespace: file/write - features: - - api: WriteFile - """) - ), - capa.rules.Rule.from_yaml( - textwrap.dedent(""" - rule: - meta: - name: file-create - scopes: - static: function - dynamic: process - features: - - match: file/create - """) - ), - capa.rules.Rule.from_yaml( - textwrap.dedent(""" - rule: - meta: - name: filesystem-any - scopes: - static: function - dynamic: process - features: - - match: file - """) - ), - ] - - features, matches = match( - capa.rules.topologically_order_rules(rules), - {capa.features.insn.API("CreateFile"): {1}}, - 0x0, - ) - assert "CreateFile API" in matches - assert "file-create" in matches - assert "filesystem-any" in matches - assert capa.features.common.MatchedRule("file") in features - assert capa.features.common.MatchedRule("file/create") in features - assert capa.features.common.MatchedRule("file/create/CreateFile") in features - - features, matches = match( - capa.rules.topologically_order_rules(rules), - {capa.features.insn.API("WriteFile"): {1}}, - 0x0, - ) - assert "WriteFile API" in matches - assert "file-create" not in matches - assert "filesystem-any" in matches - - -def test_match_substring(): - rules = [ - capa.rules.Rule.from_yaml( - textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: function - dynamic: process - features: - - and: - - substring: abc - """) - ), - ] - features, _ = match( - capa.rules.topologically_order_rules(rules), - {capa.features.common.String("aaaa"): {1}}, - 0x0, - ) - assert capa.features.common.MatchedRule("test rule") not in features - - features, _ = match( - capa.rules.topologically_order_rules(rules), - {capa.features.common.String("abc"): {1}}, - 0x0, - ) - assert capa.features.common.MatchedRule("test rule") in features - - features, _ = match( - capa.rules.topologically_order_rules(rules), - {capa.features.common.String("111abc222"): {1}}, - 0x0, - ) - assert capa.features.common.MatchedRule("test rule") in features - - features, _ = match( - capa.rules.topologically_order_rules(rules), - {capa.features.common.String("111abc"): {1}}, - 0x0, - ) - assert capa.features.common.MatchedRule("test rule") in features - - features, _ = match( - capa.rules.topologically_order_rules(rules), - {capa.features.common.String("abc222"): {1}}, - 0x0, - ) - assert capa.features.common.MatchedRule("test rule") in features - - -def test_match_regex(): - rules = [ - capa.rules.Rule.from_yaml( - textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: function - dynamic: process - features: - - and: - - string: /.*bbbb.*/ - """) - ), - capa.rules.Rule.from_yaml( - textwrap.dedent(""" - rule: - meta: - name: rule with implied wildcards - scopes: - static: function - dynamic: process - features: - - and: - - string: /bbbb/ - """) - ), - capa.rules.Rule.from_yaml( - textwrap.dedent(""" - rule: - meta: - name: rule with anchor - scopes: - static: function - dynamic: process - features: - - and: - - string: /^bbbb/ - """) - ), - ] - features, _ = match( - capa.rules.topologically_order_rules(rules), - {capa.features.insn.Number(100): {1}}, - 0x0, - ) - assert capa.features.common.MatchedRule("test rule") not in features - - features, _ = match( - capa.rules.topologically_order_rules(rules), - {capa.features.common.String("aaaa"): {1}}, - 0x0, - ) - assert capa.features.common.MatchedRule("test rule") not in features - - features, _ = match( - capa.rules.topologically_order_rules(rules), - {capa.features.common.String("aBBBBa"): {1}}, - 0x0, - ) - assert capa.features.common.MatchedRule("test rule") not in features - - features, _ = match( - capa.rules.topologically_order_rules(rules), - {capa.features.common.String("abbbba"): {1}}, - 0x0, - ) - assert capa.features.common.MatchedRule("test rule") in features - assert capa.features.common.MatchedRule("rule with implied wildcards") in features - assert capa.features.common.MatchedRule("rule with anchor") not in features - - -def test_match_regex_ignorecase(): - rules = [ - capa.rules.Rule.from_yaml( - textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: function - dynamic: process - features: - - and: - - string: /.*bbbb.*/i - """) - ), - ] - features, _ = match( - capa.rules.topologically_order_rules(rules), - {capa.features.common.String("aBBBBa"): {1}}, - 0x0, - ) - assert capa.features.common.MatchedRule("test rule") in features - - -def test_match_regex_complex(): - rules = [ - capa.rules.Rule.from_yaml( - textwrap.dedent(r""" - rule: - meta: - name: test rule - scopes: - static: function - dynamic: process - features: - - or: - - string: /.*HARDWARE\\Key\\key with spaces\\.*/i - """) - ), - ] - features, _ = match( - capa.rules.topologically_order_rules(rules), - {capa.features.common.String(r"Hardware\Key\key with spaces\some value"): {1}}, - 0x0, - ) - assert capa.features.common.MatchedRule("test rule") in features - - -def test_match_regex_values_always_string(): - rules = [ - capa.rules.Rule.from_yaml( - textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: function - dynamic: process - features: - - or: - - string: /123/ - - string: /0x123/ - """) - ), - ] - features, _ = match( - capa.rules.topologically_order_rules(rules), - {capa.features.common.String("123"): {1}}, - 0x0, - ) - assert capa.features.common.MatchedRule("test rule") in features - - features, _ = match( - capa.rules.topologically_order_rules(rules), - {capa.features.common.String("0x123"): {1}}, - 0x0, - ) - assert capa.features.common.MatchedRule("test rule") in features - - @pytest.mark.parametrize( "pattern", [ @@ -567,27 +77,6 @@ def test_match_only_not(): assert "test rule" in matches -def test_match_not(): - rule = textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: function - dynamic: process - namespace: testns1/testns2 - features: - - and: - - mnemonic: mov - - not: - - number: 99 - """) - r = capa.rules.Rule.from_yaml(rule) - - _, matches = match([r], {capa.features.insn.Number(100): {1, 2}, capa.features.insn.Mnemonic("mov"): {1, 2}}, 0x0) - assert "test rule" in matches - - @pytest.mark.xfail(reason="can't have nested NOT") def test_match_not_not(): rule = textwrap.dedent(""" @@ -609,155 +98,6 @@ def test_match_not_not(): assert "test rule" in matches -def test_match_operand_number(): - rule = textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: function - dynamic: process - features: - - and: - - operand[0].number: 0x10 - """) - r = capa.rules.Rule.from_yaml(rule) - - assert capa.features.insn.OperandNumber(0, 0x10) in {capa.features.insn.OperandNumber(0, 0x10)} - - _, matches = match([r], {capa.features.insn.OperandNumber(0, 0x10): {1, 2}}, 0x0) - assert "test rule" in matches - - # mismatching index - _, matches = match([r], {capa.features.insn.OperandNumber(1, 0x10): {1, 2}}, 0x0) - assert "test rule" not in matches - - # mismatching value - _, matches = match([r], {capa.features.insn.OperandNumber(0, 0x11): {1, 2}}, 0x0) - assert "test rule" not in matches - - -def test_match_operand_offset(): - rule = textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: function - dynamic: process - features: - - and: - - operand[0].offset: 0x10 - """) - r = capa.rules.Rule.from_yaml(rule) - - assert capa.features.insn.OperandOffset(0, 0x10) in {capa.features.insn.OperandOffset(0, 0x10)} - - _, matches = match([r], {capa.features.insn.OperandOffset(0, 0x10): {1, 2}}, 0x0) - assert "test rule" in matches - - # mismatching index - _, matches = match([r], {capa.features.insn.OperandOffset(1, 0x10): {1, 2}}, 0x0) - assert "test rule" not in matches - - # mismatching value - _, matches = match([r], {capa.features.insn.OperandOffset(0, 0x11): {1, 2}}, 0x0) - assert "test rule" not in matches - - -def test_match_property_access(): - rule = textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: function - dynamic: process - features: - - and: - - property/read: System.IO.FileInfo::Length - """) - r = capa.rules.Rule.from_yaml(rule) - - assert capa.features.insn.Property("System.IO.FileInfo::Length", capa.features.common.FeatureAccess.READ) in { - capa.features.insn.Property("System.IO.FileInfo::Length", capa.features.common.FeatureAccess.READ) - } - - _, matches = match( - [r], - {capa.features.insn.Property("System.IO.FileInfo::Length", capa.features.common.FeatureAccess.READ): {1, 2}}, - 0x0, - ) - assert "test rule" in matches - - # mismatching access - _, matches = match( - [r], - {capa.features.insn.Property("System.IO.FileInfo::Length", capa.features.common.FeatureAccess.WRITE): {1, 2}}, - 0x0, - ) - assert "test rule" not in matches - - # mismatching value - _, matches = match( - [r], - {capa.features.insn.Property("System.IO.FileInfo::Size", capa.features.common.FeatureAccess.READ): {1, 2}}, - 0x0, - ) - assert "test rule" not in matches - - -def test_match_os_any(): - rule = textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: function - dynamic: process - features: - - or: - - and: - - or: - - os: windows - - os: linux - - os: macos - - string: "Hello world" - - and: - - os: any - - string: "Goodbye world" - """) - r = capa.rules.Rule.from_yaml(rule) - - _, matches = match( - [r], - {OS(OS_ANY): {1}, String("Hello world"): {1}}, - 0x0, - ) - assert "test rule" in matches - - _, matches = match( - [r], - {OS(OS_WINDOWS): {1}, String("Hello world"): {1}}, - 0x0, - ) - assert "test rule" in matches - - _, matches = match( - [r], - {OS(OS_ANY): {1}, String("Goodbye world"): {1}}, - 0x0, - ) - assert "test rule" in matches - - _, matches = match( - [r], - {OS(OS_WINDOWS): {1}, String("Goodbye world"): {1}}, - 0x0, - ) - assert "test rule" in matches - - # this test demonstrates the behavior of unstable features that may change before the next major release. def test_index_features_and_unstable(): rule = textwrap.dedent(""" diff --git a/tests/test_match_fixtures.py b/tests/test_match_fixtures.py new file mode 100644 index 00000000..23f5c284 --- /dev/null +++ b/tests/test_match_fixtures.py @@ -0,0 +1,711 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import re +from typing import Any, Mapping, Iterable +from pathlib import Path +from dataclasses import dataclass + +import yaml +import pytest + +import capa.rules +import capa.features.file +import capa.features.insn +import capa.features.common +import capa.capabilities.common +import capa.features.basicblock +import capa.capabilities.dynamic +import capa.features.extractors.null +from capa.features.common import Feature +from capa.features.address import ( + NO_ADDRESS, + Address, + ThreadAddress, + DNTokenAddress, + ProcessAddress, + FileOffsetAddress, + DynamicCallAddress, + DNTokenOffsetAddress, + AbsoluteVirtualAddress, + RelativeVirtualAddress, +) +from capa.features.extractors.base_extractor import SampleHashes, FeatureExtractor + +DUMMY_SAMPLE_HASHES = SampleHashes.from_bytes(b"") +PROCESS_HEADER = re.compile(r"^(?P.+) \((?:ppid=(?P\d+), )?pid=(?P\d+)\)$") + + +@dataclass(frozen=True) +class MatchFixture: + path: Path + index: int + name: str + description: str + flavor: str + ruleset: capa.rules.RuleSet + extractor: FeatureExtractor + expected_matches: dict[str, list[Address]] + span_size: int | None + + +class StaticFeatureParser: + def __init__(self, base_address: Address): + self.base_address = base_address + self.global_features: list[Feature] = [] + self.file_features: list[tuple[Address, Feature]] = [] + self.functions: dict[Address, capa.features.extractors.null.FunctionFeatures] = {} + self.current_function: Address | None = None + self.current_basic_block: Address | None = None + + def parse(self, source: Any) -> capa.features.extractors.null.NullStaticFeatureExtractor: + for line in _iter_feature_lines(source): + self.consume(line) + + return capa.features.extractors.null.NullStaticFeatureExtractor( + base_address=self.base_address, + sample_hashes=DUMMY_SAMPLE_HASHES, + global_features=self.global_features, + file_features=self.file_features, + functions=self.functions, + ) + + def consume(self, line: str) -> None: + if line.startswith("global:"): + self.consume_global(line) + elif line.startswith("file:"): + self.consume_file(line) + elif line.startswith("func:"): + self.consume_function(line) + elif line.startswith("bb:"): + self.consume_basic_block(line) + elif line.startswith("insn:"): + self.consume_instruction(line) + else: + raise ValueError(f"unsupported static feature line: {line}") + + def consume_global(self, line: str) -> None: + rest = _strip_prefix(line, "global:") + if rest.startswith("global: "): + rest = rest[len("global: ") :] + self.global_features.append(_parse_feature(rest)) + + def consume_file(self, line: str) -> None: + addr_text, feature_text, target_text = _split_feature_line(_strip_prefix(line, "file:")) + if target_text is not None: + raise ValueError("file feature lines do not support relocated addresses") + self.file_features.append((_parse_static_address(addr_text), _parse_feature(feature_text))) + + def consume_function(self, line: str) -> None: + rest = _strip_prefix(line, "func:") + if ": " not in rest: + function_address = _parse_static_address(rest) + self.ensure_function(function_address) + self.current_function = function_address + self.current_basic_block = None + return + + addr_text, feature_text, target_text = _split_feature_line(rest) + function_address = _parse_static_address(addr_text) + feature_address = _parse_static_address(target_text) if target_text is not None else function_address + self.ensure_function(function_address).features.append((feature_address, _parse_feature(feature_text))) + self.current_function = function_address + self.current_basic_block = None + + def consume_basic_block(self, line: str) -> None: + if self.current_function is None: + raise ValueError(f"basic block line without current function: {line}") + + addr_text, feature_text, target_text = _split_feature_line(_strip_prefix(line, "bb:")) + basic_block_address = _parse_static_address(addr_text) + feature_address = _parse_static_address(target_text) if target_text is not None else basic_block_address + self.ensure_basic_block(self.current_function, basic_block_address).features.append(( + feature_address, + _parse_feature(feature_text), + )) + self.current_basic_block = basic_block_address + + def consume_instruction(self, line: str) -> None: + if self.current_function is None or self.current_basic_block is None: + raise ValueError(f"instruction line without current basic block: {line}") + + addr_text, feature_text, target_text = _split_feature_line(_strip_prefix(line, "insn:")) + instruction_address = _parse_static_address(addr_text) + + feature_address = _parse_static_address(target_text) if target_text is not None else instruction_address + basic_block = self.ensure_basic_block(self.current_function, self.current_basic_block) + instruction = basic_block.instructions.get(instruction_address) + if instruction is None: + instruction = capa.features.extractors.null.InstructionFeatures(features=[]) + basic_block.instructions[instruction_address] = instruction + instruction.features.append((feature_address, _parse_feature(feature_text))) + + def ensure_function(self, address: Address) -> capa.features.extractors.null.FunctionFeatures: + function = self.functions.get(address) + if function is None: + function = capa.features.extractors.null.FunctionFeatures(features=[], basic_blocks={}) + self.functions[address] = function + return function + + def ensure_basic_block( + self, function_address: Address, basic_block_address: Address + ) -> capa.features.extractors.null.BasicBlockFeatures: + function = self.ensure_function(function_address) + basic_block = function.basic_blocks.get(basic_block_address) + if basic_block is None: + basic_block = capa.features.extractors.null.BasicBlockFeatures(features=[], instructions={}) + function.basic_blocks[basic_block_address] = basic_block + return basic_block + + +class DynamicFeatureParser: + def __init__(self): + self.global_features: list[Feature] = [] + self.file_features: list[tuple[Address, Feature]] = [] + self.processes: dict[Address, capa.features.extractors.null.ProcessFeatures] = {} + self.calls_by_id: dict[int, DynamicCallAddress] = {} + self.current_process: ProcessAddress | None = None + self.current_thread: ThreadAddress | None = None + + def parse(self, source: Any) -> capa.features.extractors.null.NullDynamicFeatureExtractor: + for line in _iter_feature_lines(source): + self.consume(line) + + return capa.features.extractors.null.NullDynamicFeatureExtractor( + base_address=NO_ADDRESS, + sample_hashes=DUMMY_SAMPLE_HASHES, + global_features=self.global_features, + file_features=self.file_features, + processes=self.processes, + ) + + def consume(self, line: str) -> None: + if line.startswith("global:"): + self.consume_global(line) + elif line.startswith("file:"): + self.consume_file(line) + elif line.startswith("proc:"): + self.consume_process(line) + elif line.startswith("thread:"): + self.consume_thread(line) + elif line.startswith("call:"): + self.consume_call(line) + else: + raise ValueError(f"unsupported dynamic feature line: {line}") + + def consume_global(self, line: str) -> None: + rest = _strip_prefix(line, "global:") + if rest.startswith("global: "): + rest = rest[len("global: ") :] + self.global_features.append(_parse_feature(rest)) + + def consume_file(self, line: str) -> None: + addr_text, feature_text, target_text = _split_feature_line(_strip_prefix(line, "file:")) + if target_text is not None: + raise ValueError("file feature lines do not support relocated addresses") + self.file_features.append((_parse_address(addr_text), _parse_feature(feature_text))) + + def consume_process(self, line: str) -> None: + rest = _strip_prefix(line, "proc:") + header = PROCESS_HEADER.fullmatch(rest) + if header is not None: + ppid = header.group("ppid") + process_address = ProcessAddress(ppid=int(ppid) if ppid is not None else 0, pid=int(header.group("pid"))) + self.ensure_process(process_address, header.group("name")) + self.current_process = process_address + self.current_thread = None + return + + if self.current_process is None: + raise ValueError(f"process feature line without current process: {line}") + + name, feature_text, target_text = _split_feature_line(rest) + process = self.ensure_process(self.current_process) + if process.name != name: + raise ValueError(f"process feature line does not match current process: {line}") + feature_address = _parse_address(target_text) if target_text is not None else self.current_process + process.features.append((feature_address, _parse_feature(feature_text))) + + def consume_thread(self, line: str) -> None: + if self.current_process is None: + raise ValueError(f"thread line without current process: {line}") + + rest = _strip_prefix(line, "thread:") + if ": " not in rest: + thread_address = ThreadAddress(process=self.current_process, tid=int(rest, 0)) + self.ensure_thread(thread_address) + self.current_thread = thread_address + return + + tid_text, feature_text, target_text = _split_feature_line(rest) + thread_address = ThreadAddress(process=self.current_process, tid=int(tid_text, 0)) + thread = self.ensure_thread(thread_address) + feature_address = _parse_address(target_text) if target_text is not None else thread_address + thread.features.append((feature_address, _parse_feature(feature_text))) + self.current_thread = thread_address + + def consume_call(self, line: str) -> None: + if self.current_thread is None: + raise ValueError(f"call line without current thread: {line}") + + call_id_text, feature_text, target_text = _split_feature_line(_strip_prefix(line, "call:")) + call_address = DynamicCallAddress(thread=self.current_thread, id=int(call_id_text, 0)) + call = self.ensure_call(call_address) + feature_address = _parse_address(target_text) if target_text is not None else call_address + call.features.append((feature_address, _parse_feature(feature_text))) + + def ensure_process( + self, address: ProcessAddress, name: str | None = None + ) -> capa.features.extractors.null.ProcessFeatures: + process = self.processes.get(address) + if process is None: + process = capa.features.extractors.null.ProcessFeatures( + name=name or f"process-{address.pid}", + features=[], + threads={}, + ) + self.processes[address] = process + elif name is not None: + process.name = name + return process + + def ensure_thread(self, address: ThreadAddress) -> capa.features.extractors.null.ThreadFeatures: + process = self.ensure_process(address.process) + thread = process.threads.get(address) + if thread is None: + thread = capa.features.extractors.null.ThreadFeatures(features=[], calls={}) + process.threads[address] = thread + return thread + + def ensure_call(self, address: DynamicCallAddress) -> capa.features.extractors.null.CallFeatures: + existing = self.calls_by_id.get(address.id) + if existing is not None and existing != address: + raise ValueError(f"dynamic fixture call IDs must be unique within a test: {address.id}") + + self.calls_by_id[address.id] = address + + thread = self.ensure_thread(address.thread) + call = thread.calls.get(address) + if call is None: + call = capa.features.extractors.null.CallFeatures(name=f"call-{address.id}", features=[]) + thread.calls[address] = call + return call + + +def load_fixtures(path: Path) -> list[MatchFixture]: + doc = yaml.safe_load(path.read_text()) + fixture_docs = _get_fixture_docs(path, doc) + fixtures: list[MatchFixture] = [] + + for index, fixture_doc in enumerate(fixture_docs, start=1): + flavor = _get_fixture_flavor(path, fixture_doc) + span_size = _load_span_size(fixture_doc) + + extractor: FeatureExtractor + if flavor == "static": + static_parser = StaticFeatureParser(_parse_static_address(fixture_doc.get("base address", 0))) + extractor = static_parser.parse(fixture_doc.get("features", "")) + expected_matches = _load_expected_matches(fixture_doc, flavor) + elif flavor == "dynamic": + dynamic_parser = DynamicFeatureParser() + extractor = dynamic_parser.parse(fixture_doc.get("features", "")) + expected_matches = _load_expected_matches( + fixture_doc, + flavor, + dynamic_parser=dynamic_parser, + ) + else: + raise ValueError(f"unsupported fixture flavor: {flavor}") + + ruleset = _load_ruleset(path, fixture_doc, flavor) + + fixtures.append( + MatchFixture( + path=path, + index=index, + name=str(fixture_doc.get("name", f"{path.stem}-{index}")), + description=str(fixture_doc.get("description", "")), + flavor=flavor, + ruleset=ruleset, + extractor=extractor, + expected_matches=expected_matches, + span_size=span_size, + ) + ) + + return fixtures + + +def render_matches(fixture: MatchFixture, matches: Mapping[str, Any]) -> dict[str, list[Address]]: + return { + rule_name: [address for address, _ in results] + for rule_name, results in matches.items() + if rule_name in fixture.ruleset and not fixture.ruleset[rule_name].is_subscope_rule() + } + + +def _get_fixture_docs(path: Path, doc: Any) -> list[dict[str, Any]]: + if isinstance(doc, list): + fixture_docs = doc + elif isinstance(doc, dict) and isinstance(doc.get("tests"), list): + fixture_docs = doc["tests"] + elif isinstance(doc, dict): + fixture_docs = [doc] + else: + raise ValueError(f"fixture file must contain a mapping or list: {path}") + + for fixture_doc in fixture_docs: + if not isinstance(fixture_doc, dict): + raise ValueError(f"fixture test must be a mapping: {path}") + + return fixture_docs + + +def _get_fixture_flavor(path: Path, doc: dict[str, Any]) -> str: + explicit = doc.get("flavor") + inferred = next( + (part for part in reversed(path.parts) if part in {"static", "dynamic"}), + None, + ) + + if explicit is None: + if inferred is None: + raise ValueError(f"fixture flavor could not be inferred from path: {path}") + return inferred + + if not isinstance(explicit, str): + raise ValueError("fixture flavor must be a string") + + if inferred is not None and explicit != inferred: + raise ValueError(f"fixture flavor {explicit!r} does not match file location {inferred!r}: {path}") + + return explicit + + +def _normalize_rule_doc(rule_doc: dict[str, Any], flavor: str) -> dict[str, Any]: + if "meta" not in rule_doc: + meta: dict[str, Any] = {} + for key in ("name", "namespace", "description", "scopes", "authors", "att&ck", "mbc", "lib"): + if key in rule_doc: + meta[key] = rule_doc.pop(key) + rule_doc["meta"] = meta + meta = rule_doc["meta"] + + if not isinstance(meta, dict): + raise ValueError("rule meta must be a mapping") + + scopes = meta.setdefault("scopes", {}) + if not isinstance(scopes, dict): + raise ValueError("rule scopes must be a mapping") + + if flavor == "static": + scopes.setdefault("dynamic", "unsupported") + elif flavor == "dynamic": + scopes.setdefault("static", "unsupported") + + return rule_doc + + +def _load_ruleset(path: Path, doc: dict[str, Any], flavor: str) -> capa.rules.RuleSet: + rules: list[capa.rules.Rule] = [] + for rule_doc in doc.get("rules", []): + if not isinstance(rule_doc, dict): + raise ValueError(f"rule must be a mapping: {path}") + wrapped = {"rule": _normalize_rule_doc(rule_doc, flavor)} + definition = yaml.safe_dump(wrapped, sort_keys=False) + rules.append(capa.rules.Rule.from_dict(wrapped, definition)) + return capa.rules.RuleSet(rules) + + +def _load_expected_matches( + doc: dict[str, Any], + flavor: str, + dynamic_parser: DynamicFeatureParser | None = None, +) -> dict[str, list[Address]]: + expect = doc.get("expect", {}) + if not isinstance(expect, dict): + raise ValueError("fixture expect must be a mapping") + + matches = expect.get("matches", {}) + if not isinstance(matches, dict): + raise ValueError("fixture expect.matches must be a mapping") + + return { + rule_name: [_parse_expected_address(spec, flavor, dynamic_parser) for spec in locations] + for rule_name, locations in matches.items() + } + + +def _parse_expected_address( + spec: Any, + flavor: str, + dynamic_parser: DynamicFeatureParser | None = None, +) -> Address: + if flavor == "dynamic" and dynamic_parser is not None: + if isinstance(spec, int) and spec in dynamic_parser.calls_by_id: + return dynamic_parser.calls_by_id[spec] + + if isinstance(spec, str): + call_id = re.fullmatch(r"call\((\d+)\)", spec) + if call_id is not None: + call_address = dynamic_parser.calls_by_id.get(int(call_id.group(1))) + if call_address is None: + raise ValueError(f"unknown dynamic fixture call ID: {spec}") + return call_address + + return _parse_address(spec) + + +def _load_span_size(doc: dict[str, Any]) -> int | None: + options = doc.get("options", {}) + if not isinstance(options, dict): + raise ValueError("fixture options must be a mapping") + + span_size = options.get("span size") + if span_size is None: + return None + if not isinstance(span_size, int): + raise ValueError("fixture options.span size must be an integer") + return span_size + + +def _iter_feature_lines(source: Any) -> Iterable[str]: + if isinstance(source, str): + lines = source.splitlines() + elif isinstance(source, list): + lines = source + else: + raise ValueError("fixture features must be a block string or list of strings") + + for line in lines: + if not isinstance(line, str): + raise ValueError("fixture feature lines must be strings") + stripped = line.strip() + if not stripped or stripped.startswith("#"): + continue + yield stripped + + +def _split_feature_line(text: str) -> tuple[str, str, str | None]: + body, target = _split_target(text) + scope_text, separator, feature_text = body.partition(": ") + if not separator: + raise ValueError(f"expected ': ': {text}") + return scope_text, feature_text, target + + +def _split_target(text: str) -> tuple[str, str | None]: + if " -> " not in text: + return text, None + return text.rsplit(" -> ", 1) # type: ignore[return-value] # rsplit with maxsplit=1 always returns 2 elements + + +def _parse_feature(text: str) -> Feature: + text = text.strip() + if text == "basic block": + return capa.features.basicblock.BasicBlock() + + operand_number = re.fullmatch(r"operand\[(\d+)\]\.number\((.*)\)", text) + if operand_number: + return capa.features.insn.OperandNumber( + int(operand_number.group(1)), + _parse_number_literal(operand_number.group(2)), + ) + + operand_offset = re.fullmatch(r"operand\[(\d+)\]\.offset\((.*)\)", text) + if operand_offset: + return capa.features.insn.OperandOffset( + int(operand_offset.group(1)), + _parse_int_literal(operand_offset.group(2)), + ) + + property_ = re.fullmatch(r"property(?:/(read|write))?\((.*)\)", text) + if property_: + return capa.features.insn.Property( + _strip_quotes(property_.group(2).strip()), + access=property_.group(1), + ) + + feature = re.fullmatch(r"([a-z][a-z0-9\- ]*)\((.*)\)", text) + if feature is None: + raise ValueError(f"unsupported feature syntax: {text}") + + name = feature.group(1) + value = _strip_quotes(feature.group(2).strip()) + + if name == "api": + return capa.features.insn.API(value) + if name == "arch": + return capa.features.common.Arch(value) + if name == "bytes": + return capa.features.common.Bytes(bytes.fromhex(value.replace(" ", ""))) + if name == "characteristic": + return capa.features.common.Characteristic(value) + if name == "class": + return capa.features.common.Class(value) + if name == "export": + return capa.features.file.Export(value) + if name == "format": + return capa.features.common.Format(value) + if name in ("function-name", "function name"): + return capa.features.file.FunctionName(value) + if name == "import": + return capa.features.file.Import(value) + if name == "match": + return capa.features.common.MatchedRule(value) + if name == "mnemonic": + return capa.features.insn.Mnemonic(value) + if name == "namespace": + return capa.features.common.Namespace(value) + if name == "number": + return capa.features.insn.Number(_parse_number_literal(value)) + if name == "offset": + return capa.features.insn.Offset(_parse_int_literal(value)) + if name == "os": + return capa.features.common.OS(value) + if name == "section": + return capa.features.file.Section(value) + if name == "string": + return capa.features.common.String(value) + if name == "substring": + return capa.features.common.Substring(value) + + raise ValueError(f"unsupported feature type: {name}") + + +def _parse_number_literal(value: str) -> int | float: + value = value.strip() + if _looks_like_hex_literal(value): + return int(value, 0) + if any(character in value for character in ".eE"): + return float(value) + return int(value, 0) + + +def _looks_like_hex_literal(value: str) -> bool: + return value.lstrip("+-").lower().startswith("0x") + + +def _parse_int_literal(value: str) -> int: + return int(value, 0) + + +def _parse_static_address(spec: Any) -> Address: + address = _parse_address(spec) + if isinstance(address, (ProcessAddress, ThreadAddress, DynamicCallAddress)): + raise ValueError(f"expected a static address, got {spec!r}") + return address + + +def _parse_address(spec: Any) -> Address: + if spec is None: + return NO_ADDRESS + + if isinstance(spec, int): + return AbsoluteVirtualAddress(spec) + + if not isinstance(spec, str): + raise ValueError(f"unsupported address: {spec!r}") + + if spec in {"global", "no address"}: + return NO_ADDRESS + if spec.startswith("base address+"): + return RelativeVirtualAddress(_coerce_int(spec[len("base address+") :])) + if spec.startswith("file+"): + return FileOffsetAddress(_coerce_int(spec[len("file+") :])) + if token_offset := re.fullmatch(r"token\((.+)\)\+(.+)", spec): + return DNTokenOffsetAddress(_coerce_int(token_offset.group(1)), _coerce_int(token_offset.group(2))) + if token := re.fullmatch(r"token\((.+)\)", spec): + return DNTokenAddress(_coerce_int(token.group(1))) + if process := re.fullmatch(r"process\{ppid:(\d+),pid:(\d+)\}", spec): + return ProcessAddress(ppid=int(process.group(1)), pid=int(process.group(2))) + if process := re.fullmatch(r"process\{pid:(\d+)\}", spec): + return ProcessAddress(pid=int(process.group(1))) + if thread := re.fullmatch(r"process\{ppid:(\d+),pid:(\d+),tid:(\d+)\}", spec): + return ThreadAddress( + process=ProcessAddress(ppid=int(thread.group(1)), pid=int(thread.group(2))), + tid=int(thread.group(3)), + ) + if thread := re.fullmatch(r"process\{pid:(\d+),tid:(\d+)\}", spec): + return ThreadAddress(process=ProcessAddress(pid=int(thread.group(1))), tid=int(thread.group(2))) + if call := re.fullmatch(r"process\{ppid:(\d+),pid:(\d+),tid:(\d+),call:(\d+)\}", spec): + return DynamicCallAddress( + thread=ThreadAddress( + process=ProcessAddress(ppid=int(call.group(1)), pid=int(call.group(2))), + tid=int(call.group(3)), + ), + id=int(call.group(4)), + ) + if call := re.fullmatch(r"process\{pid:(\d+),tid:(\d+),call:(\d+)\}", spec): + return DynamicCallAddress( + thread=ThreadAddress(process=ProcessAddress(pid=int(call.group(1))), tid=int(call.group(2))), + id=int(call.group(3)), + ) + return AbsoluteVirtualAddress(_coerce_int(spec)) + + +def _coerce_int(value: Any) -> int: + if isinstance(value, int): + return value + if isinstance(value, str): + return int(value, 0) + raise ValueError(f"expected integer value: {value!r}") + + +def _strip_prefix(text: str, prefix: str) -> str: + return text[len(prefix) :].strip() + + +def _strip_quotes(value: str) -> str: + if len(value) >= 2 and value[0] == value[-1] and value[0] in {'"', "'"}: + return value[1:-1] + return value + + +# --------------------------------------------------------------------------- +# Test collection and parametrization +# --------------------------------------------------------------------------- + +FIXTURE_DIR = Path(__file__).parent / "fixtures" / "matcher" +FIXTURE_PATHS = sorted(path for path in FIXTURE_DIR.rglob("*") if path.suffix in {".json", ".yml", ".yaml"}) +FIXTURES = [fixture for path in FIXTURE_PATHS for fixture in load_fixtures(path)] +FIXTURE_IDS = [f"{fixture.path.relative_to(FIXTURE_DIR)}[{fixture.index}]::{fixture.name}" for fixture in FIXTURES] + + +def _enable_paranoid_matching(patch: pytest.MonkeyPatch, ruleset: capa.rules.RuleSet) -> None: + original_match = ruleset.match + + def paranoid_match(scope, features, addr, paranoid=False): + return original_match(scope, features, addr, paranoid=True) + + patch.setattr(ruleset, "match", paranoid_match) + + +@pytest.mark.parametrize("fixture", FIXTURES, ids=FIXTURE_IDS) +def test_match_fixture(fixture: MatchFixture): + with pytest.MonkeyPatch.context() as patch: + if fixture.span_size is not None: + patch.setattr(capa.capabilities.dynamic, "SPAN_SIZE", fixture.span_size) + + _enable_paranoid_matching(patch, fixture.ruleset) + + capabilities = capa.capabilities.common.find_capabilities( + fixture.ruleset, + fixture.extractor, + disable_progress=True, + ) + + assert render_matches(fixture, capabilities.matches) == fixture.expected_matches diff --git a/tests/test_rules.py b/tests/test_rules.py index 6b1be70a..6dd1aff3 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -21,9 +21,8 @@ import capa.rules import capa.engine import capa.features.common import capa.features.address -from capa.engine import Or from capa.features.file import FunctionName -from capa.features.insn import API, Number, Offset, Property +from capa.features.insn import Number, Offset from capa.features.common import ( OS, OS_LINUX, @@ -36,7 +35,6 @@ from capa.features.common import ( Format, String, Substring, - FeatureAccess, ) ADDR1 = capa.features.address.AbsoluteVirtualAddress(0x401001) @@ -45,14 +43,6 @@ ADDR3 = capa.features.address.AbsoluteVirtualAddress(0x401003) ADDR4 = capa.features.address.AbsoluteVirtualAddress(0x401004) -def test_rule_ctor(): - r = capa.rules.Rule( - "test rule", capa.rules.Scopes(capa.rules.Scope.FUNCTION, capa.rules.Scope.FILE), Or([Number(1)]), {} - ) - assert bool(r.evaluate({Number(0): {ADDR1}})) is False - assert bool(r.evaluate({Number(1): {ADDR2}})) is True - - def test_scopes_from_dict(): scopes = capa.rules.Scopes.from_dict({"static": "function", "dynamic": "process"}) assert scopes.static == capa.rules.Scope.FUNCTION @@ -66,56 +56,6 @@ def test_scopes_from_dict(): assert isinstance(sub, SubScopes) -def test_rule_yaml(): - rule = textwrap.dedent(""" - rule: - meta: - name: test rule - authors: - - user@domain.com - scopes: - static: function - dynamic: process - examples: - - foo1234 - - bar5678 - features: - - and: - - number: 1 - - number: 2 - """) - r = capa.rules.Rule.from_yaml(rule) - assert bool(r.evaluate({Number(0): {ADDR1}})) is False - assert bool(r.evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}})) is False - assert bool(r.evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}})) is True - assert bool(r.evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}, Number(3): {ADDR1}})) is True - - -def test_rule_yaml_complex(): - rule = textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: function - dynamic: process - features: - - or: - - and: - - number: 1 - - number: 2 - - or: - - number: 3 - - 2 or more: - - number: 4 - - number: 5 - - number: 6 - """) - r = capa.rules.Rule.from_yaml(rule) - assert bool(r.evaluate({Number(5): {ADDR1}, Number(6): {ADDR1}, Number(7): {ADDR1}, Number(8): {ADDR1}})) is True - assert bool(r.evaluate({Number(6): {ADDR1}, Number(7): {ADDR1}, Number(8): {ADDR1}})) is False - - def test_rule_descriptions(): rule = textwrap.dedent(""" rule: @@ -207,78 +147,6 @@ def test_get_rules_skips_empty_yaml(tmp_path): assert len(rules) == 1 -def test_rule_yaml_not(): - rule = textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: function - dynamic: process - features: - - and: - - number: 1 - - not: - - number: 2 - """) - r = capa.rules.Rule.from_yaml(rule) - assert bool(r.evaluate({Number(1): {ADDR1}})) is True - assert bool(r.evaluate({Number(1): {ADDR1}, Number(2): {ADDR1}})) is False - - -def test_rule_yaml_count(): - rule = textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: function - dynamic: process - features: - - count(number(100)): 1 - """) - r = capa.rules.Rule.from_yaml(rule) - assert bool(r.evaluate({Number(100): set()})) is False - assert bool(r.evaluate({Number(100): {ADDR1}})) is True - assert bool(r.evaluate({Number(100): {ADDR1, ADDR2}})) is False - - -def test_rule_yaml_count_range(): - rule = textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: function - dynamic: process - features: - - count(number(100)): (1, 2) - """) - r = capa.rules.Rule.from_yaml(rule) - assert bool(r.evaluate({Number(100): set()})) is False - assert bool(r.evaluate({Number(100): {ADDR1}})) is True - assert bool(r.evaluate({Number(100): {ADDR1, ADDR2}})) is True - assert bool(r.evaluate({Number(100): {ADDR1, ADDR2, ADDR3}})) is False - - -def test_rule_yaml_count_string(): - rule = textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: function - dynamic: process - features: - - count(string(foo)): 2 - """) - r = capa.rules.Rule.from_yaml(rule) - assert bool(r.evaluate({String("foo"): set()})) is False - assert bool(r.evaluate({String("foo"): {ADDR1}})) is False - assert bool(r.evaluate({String("foo"): {ADDR1, ADDR2}})) is True - assert bool(r.evaluate({String("foo"): {ADDR1, ADDR2, ADDR3}})) is False - - def test_invalid_rule_feature(): with pytest.raises(capa.rules.InvalidRule): capa.rules.Rule.from_yaml( @@ -852,50 +720,6 @@ def test_number_symbol(): assert (Number(0x100, description="symbol name") in children) is True -def test_count_number_symbol(): - rule = textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: function - dynamic: process - features: - - or: - - count(number(2 = symbol name)): 1 - - count(number(0x100 = symbol name)): 2 or more - - count(number(0x11 = (FLAG_A | FLAG_B))): 2 or more - """) - r = capa.rules.Rule.from_yaml(rule) - assert bool(r.evaluate({Number(2): set()})) is False - assert bool(r.evaluate({Number(2): {ADDR1}})) is True - assert bool(r.evaluate({Number(2): {ADDR1, ADDR2}})) is False - assert bool(r.evaluate({Number(0x100, description="symbol name"): {ADDR1}})) is False - assert bool(r.evaluate({Number(0x100, description="symbol name"): {ADDR1, ADDR2, ADDR3}})) is True - - -def test_count_api(): - rule = textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: function - dynamic: thread - features: - - or: - - count(api(kernel32.CreateFileA)): 1 - - count(api(System.Convert::FromBase64String)): 1 - """) - r = capa.rules.Rule.from_yaml(rule) - # apis including their DLL names are not extracted anymore - assert bool(r.evaluate({API("kernel32.CreateFileA"): set()})) is False - assert bool(r.evaluate({API("kernel32.CreateFile"): set()})) is False - assert bool(r.evaluate({API("CreateFile"): {ADDR1}})) is False - assert bool(r.evaluate({API("CreateFileA"): {ADDR1}})) is True - assert bool(r.evaluate({API("System.Convert::FromBase64String"): {ADDR1}})) is True - - def test_invalid_number(): with pytest.raises(capa.rules.InvalidRule): _ = capa.rules.Rule.from_yaml( @@ -965,28 +789,6 @@ def test_offset_symbol(): assert (Offset(0x100, description="symbol name") in children) is True -def test_count_offset_symbol(): - rule = textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: function - dynamic: process - features: - - or: - - count(offset(2 = symbol name)): 1 - - count(offset(0x100 = symbol name)): 2 or more - - count(offset(0x11 = (FLAG_A | FLAG_B))): 2 or more - """) - r = capa.rules.Rule.from_yaml(rule) - assert bool(r.evaluate({Offset(2): set()})) is False - assert bool(r.evaluate({Offset(2): {ADDR1}})) is True - assert bool(r.evaluate({Offset(2): {ADDR1, ADDR2}})) is False - assert bool(r.evaluate({Offset(0x100, description="symbol name"): {ADDR1}})) is False - assert bool(r.evaluate({Offset(0x100, description="symbol name"): {ADDR1, ADDR2, ADDR3}})) is True - - def test_invalid_offset(): with pytest.raises(capa.rules.InvalidRule): _ = capa.rules.Rule.from_yaml( @@ -1380,48 +1182,6 @@ def test_arch_features(): assert (Arch(ARCH_I386) not in children) is True -def test_property_access(): - r = capa.rules.Rule.from_yaml( - textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: function - dynamic: process - features: - - property/read: System.IO.FileInfo::Length - """) - ) - assert bool(r.evaluate({Property("System.IO.FileInfo::Length", access=FeatureAccess.READ): {ADDR1}})) is True - - assert bool(r.evaluate({Property("System.IO.FileInfo::Length"): {ADDR1}})) is False - assert bool(r.evaluate({Property("System.IO.FileInfo::Length", access=FeatureAccess.WRITE): {ADDR1}})) is False - - -def test_property_access_symbol(): - r = capa.rules.Rule.from_yaml( - textwrap.dedent(""" - rule: - meta: - name: test rule - scopes: - static: function - dynamic: process - features: - - property/read: System.IO.FileInfo::Length = some property - """) - ) - assert ( - bool( - r.evaluate({ - Property("System.IO.FileInfo::Length", access=FeatureAccess.READ, description="some property"): {ADDR1} - }) - ) - is True - ) - - def test_translate_com_features(): r = capa.rules.Rule.from_yaml( textwrap.dedent("""