dynamic: add sequence scope

addresses discussion in
https://github.com/mandiant/capa-rules/discussions/951

pep8

sequence: add test showing multiple sequences overlapping a single event
This commit is contained in:
Willi Ballenthin
2024-12-09 13:20:22 +00:00
committed by Willi Ballenthin
parent 8d17319128
commit b06fea130c
12 changed files with 793 additions and 409 deletions

View File

@@ -35,6 +35,10 @@ class CallCapabilities:
matches: MatchResults
# The number of calls that make up a sequence.
SEQUENCE_SIZE = 5
def find_call_capabilities(
ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
) -> CallCapabilities:
@@ -64,6 +68,7 @@ def find_call_capabilities(
class ThreadCapabilities:
features: FeatureSet
thread_matches: MatchResults
sequence_matches: MatchResults
call_matches: MatchResults
@@ -81,6 +86,11 @@ def find_thread_capabilities(
# might be found at different calls, that's ok.
call_matches: MatchResults = collections.defaultdict(list)
# matches found at the sequence scope.
sequence_matches: MatchResults = collections.defaultdict(list)
sequence: collections.deque[FeatureSet] = collections.deque(maxlen=SEQUENCE_SIZE)
for ch in extractor.get_calls(ph, th):
call_capabilities = find_call_capabilities(ruleset, extractor, ph, th, ch)
for feature, vas in call_capabilities.features.items():
@@ -89,6 +99,16 @@ def find_thread_capabilities(
for rule_name, res in call_capabilities.matches.items():
call_matches[rule_name].extend(res)
sequence.append(call_capabilities.features)
sequence_features: FeatureSet = collections.defaultdict(set)
for call in sequence:
for feature, vas in call.items():
sequence_features[feature].update(vas)
_, smatches = ruleset.match(Scope.SEQUENCE, sequence_features, ch.address)
for rule_name, res in smatches.items():
sequence_matches[rule_name].extend(res)
for feature, va in itertools.chain(extractor.extract_thread_features(ph, th), extractor.extract_global_features()):
features[feature].add(va)
@@ -100,7 +120,7 @@ def find_thread_capabilities(
for va, _ in res:
capa.engine.index_rule_matches(features, rule, [va])
return ThreadCapabilities(features, matches, call_matches)
return ThreadCapabilities(features, matches, sequence_matches, call_matches)
@dataclass
@@ -125,6 +145,10 @@ def find_process_capabilities(
# might be found at different threads, that's ok.
thread_matches: MatchResults = collections.defaultdict(list)
# matches found at the sequence scope.
# might be found at different sequences, that's ok.
sequence_matches: MatchResults = collections.defaultdict(list)
# matches found at the call scope.
# might be found at different calls, that's ok.
call_matches: MatchResults = collections.defaultdict(list)
@@ -137,6 +161,9 @@ def find_process_capabilities(
for rule_name, res in thread_capabilities.thread_matches.items():
thread_matches[rule_name].extend(res)
for rule_name, res in thread_capabilities.sequence_matches.items():
sequence_matches[rule_name].extend(res)
for rule_name, res in thread_capabilities.call_matches.items():
call_matches[rule_name].extend(res)
@@ -152,6 +179,7 @@ def find_dynamic_capabilities(
) -> Capabilities:
all_process_matches: MatchResults = collections.defaultdict(list)
all_thread_matches: MatchResults = collections.defaultdict(list)
all_sequence_matches: MatchResults = collections.defaultdict(list)
all_call_matches: MatchResults = collections.defaultdict(list)
feature_counts = rdoc.DynamicFeatureCounts(file=0, processes=())
@@ -177,6 +205,8 @@ def find_dynamic_capabilities(
all_process_matches[rule_name].extend(res)
for rule_name, res in process_capabilities.thread_matches.items():
all_thread_matches[rule_name].extend(res)
for rule_name, res in process_capabilities.sequence_matches.items():
all_sequence_matches[rule_name].extend(res)
for rule_name, res in process_capabilities.call_matches.items():
all_call_matches[rule_name].extend(res)
@@ -186,7 +216,7 @@ def find_dynamic_capabilities(
# mapping from feature (matched rule) to set of addresses at which it matched.
process_and_lower_features: FeatureSet = collections.defaultdict(set)
for rule_name, results in itertools.chain(
all_process_matches.items(), all_thread_matches.items(), all_call_matches.items()
all_process_matches.items(), all_thread_matches.items(), all_sequence_matches.items(), all_call_matches.items()
):
locations = {p[0] for p in results}
rule = ruleset[rule_name]
@@ -200,6 +230,8 @@ def find_dynamic_capabilities(
# each rule exists in exactly one scope,
# so there won't be any overlap among these following MatchResults,
# and we can merge the dictionaries naively.
all_call_matches.items(),
all_sequence_matches.items(),
all_thread_matches.items(),
all_process_matches.items(),
all_file_capabilities.matches.items(),

View File

@@ -504,4 +504,16 @@ def ProcessFilter(extractor: DynamicFeatureExtractor, processes: set) -> Dynamic
return new_extractor
def ThreadFilter(extractor: DynamicFeatureExtractor, threads: set) -> DynamicFeatureExtractor:
original_get_threads = extractor.get_threads
def filtered_get_threads(self, ph: ProcessHandle):
yield from (t for t in original_get_threads(ph) if t.address in threads)
new_extractor = copy(extractor)
new_extractor.get_threads = MethodType(filtered_get_threads, extractor) # type: ignore
return new_extractor
FeatureExtractor: TypeAlias = Union[StaticFeatureExtractor, DynamicFeatureExtractor]

View File

@@ -163,6 +163,8 @@ def scope_to_pb2(scope: capa.rules.Scope) -> capa_pb2.Scope.ValueType:
return capa_pb2.Scope.SCOPE_PROCESS
elif scope == capa.rules.Scope.THREAD:
return capa_pb2.Scope.SCOPE_THREAD
elif scope == capa.rules.Scope.SEQUENCE:
return capa_pb2.Scope.SCOPE_SEQUENCE
elif scope == capa.rules.Scope.CALL:
return capa_pb2.Scope.SCOPE_CALL
else:
@@ -655,6 +657,8 @@ def scope_from_pb2(scope: capa_pb2.Scope.ValueType) -> capa.rules.Scope:
return capa.rules.Scope.PROCESS
elif scope == capa_pb2.Scope.SCOPE_THREAD:
return capa.rules.Scope.THREAD
elif scope == capa_pb2.Scope.SCOPE_SEQUENCE:
return capa.rules.Scope.SEQUENCE
elif scope == capa_pb2.Scope.SCOPE_CALL:
return capa.rules.Scope.CALL
else:

View File

@@ -378,6 +378,7 @@ enum Scope {
SCOPE_PROCESS = 5;
SCOPE_THREAD = 6;
SCOPE_CALL = 7;
SCOPE_SEQUENCE = 8;
}
message Scopes {

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@@ -126,6 +126,16 @@ def render_thread(layout: rd.DynamicLayout, addr: frz.Address) -> str:
return f"{name}{{pid:{thread.process.pid},tid:{thread.tid}}}"
def render_sequence(layout: rd.DynamicLayout, addrs: list[frz.Address]) -> str:
calls: list[capa.features.address.DynamicCallAddress] = [addr.to_capa() for addr in addrs] # type: ignore
for call in calls:
assert isinstance(call, capa.features.address.DynamicCallAddress)
pname = _get_process_name(layout, frz.Address.from_capa(calls[0].thread.process))
call_ids = [str(call.id) for call in calls]
return f"{pname}{{pid:{call.thread.process.pid},tid:{call.thread.tid},calls:{{{','.join(call_ids)}}}}}"
def render_call(layout: rd.DynamicLayout, addr: frz.Address) -> str:
call = addr.to_capa()
assert isinstance(call, capa.features.address.DynamicCallAddress)
@@ -318,7 +328,7 @@ def render_rules(console: Console, doc: rd.ResultDocument):
lines = [render_process(doc.meta.analysis.layout, loc) for loc in locations]
elif rule.meta.scopes.dynamic == capa.rules.Scope.THREAD:
lines = [render_thread(doc.meta.analysis.layout, loc) for loc in locations]
elif rule.meta.scopes.dynamic == capa.rules.Scope.CALL:
elif rule.meta.scopes.dynamic in (capa.rules.Scope.CALL, capa.rules.Scope.SEQUENCE):
# because we're only in verbose mode, we won't show the full call details (name, args, retval)
# we'll only show the details of the thread in which the calls are found.
# so select the thread locations and render those.

View File

@@ -311,6 +311,36 @@ def render_match(
render_match(console, layout, rule, child, indent=indent + 1, mode=child_mode)
def collect_call_locations(
match: rd.Match,
mode=MODE_SUCCESS,
):
"""
Find all the DynamicCallAddress locations in the given match, recursively.
Useful to collect the calls used to match a sequence scoped rule.
"""
if isinstance(match.node, rd.StatementNode):
if (
isinstance(match.node.statement, rd.CompoundStatement)
and match.node.statement.type == rd.CompoundStatementType.NOT
):
child_mode = MODE_FAILURE if mode == MODE_SUCCESS else MODE_SUCCESS
for child in match.children:
yield from collect_call_locations(child, child_mode)
else:
for child in match.children:
yield from collect_call_locations(child, mode)
elif isinstance(match.node, rd.FeatureNode):
for location in match.locations:
if location.type != frz.AddressType.CALL:
continue
if mode == MODE_FAILURE:
continue
yield location
else:
raise ValueError("unexpected node type")
def render_rules(console: Console, doc: rd.ResultDocument):
"""
like:
@@ -450,6 +480,9 @@ def render_rules(console: Console, doc: rd.ResultDocument):
console.write(v.render_process(doc.meta.analysis.layout, location))
elif rule.meta.scopes.dynamic == capa.rules.Scope.THREAD:
console.write(v.render_thread(doc.meta.analysis.layout, location))
elif rule.meta.scopes.dynamic == capa.rules.Scope.SEQUENCE:
calls = sorted(set(collect_call_locations(match)))
console.write(hanging_indent(v.render_sequence(doc.meta.analysis.layout, calls), indent=1))
elif rule.meta.scopes.dynamic == capa.rules.Scope.CALL:
console.write(hanging_indent(v.render_call(doc.meta.analysis.layout, location), indent=1))
else:

View File

@@ -86,6 +86,7 @@ class Scope(str, Enum):
FILE = "file"
PROCESS = "process"
THREAD = "thread"
SEQUENCE = "sequence"
CALL = "call"
FUNCTION = "function"
BASIC_BLOCK = "basic block"
@@ -114,6 +115,7 @@ DYNAMIC_SCOPES = {
Scope.GLOBAL,
Scope.PROCESS,
Scope.THREAD,
Scope.SEQUENCE,
Scope.CALL,
}
@@ -199,6 +201,7 @@ SUPPORTED_FEATURES: dict[str, set] = {
capa.features.common.MatchedRule,
},
Scope.THREAD: set(),
Scope.SEQUENCE: set(),
Scope.CALL: {
capa.features.common.MatchedRule,
capa.features.common.Regex,
@@ -253,11 +256,14 @@ SUPPORTED_FEATURES[Scope.FUNCTION].update(SUPPORTED_FEATURES[Scope.GLOBAL])
SUPPORTED_FEATURES[Scope.FILE].update(SUPPORTED_FEATURES[Scope.GLOBAL])
SUPPORTED_FEATURES[Scope.PROCESS].update(SUPPORTED_FEATURES[Scope.GLOBAL])
SUPPORTED_FEATURES[Scope.THREAD].update(SUPPORTED_FEATURES[Scope.GLOBAL])
SUPPORTED_FEATURES[Scope.SEQUENCE].update(SUPPORTED_FEATURES[Scope.GLOBAL])
SUPPORTED_FEATURES[Scope.CALL].update(SUPPORTED_FEATURES[Scope.GLOBAL])
# all call scope features are also thread features
SUPPORTED_FEATURES[Scope.THREAD].update(SUPPORTED_FEATURES[Scope.CALL])
# all call scope features are also sequence features
SUPPORTED_FEATURES[Scope.SEQUENCE].update(SUPPORTED_FEATURES[Scope.CALL])
# all sequence scope features (and therefore, call features) are also thread features
SUPPORTED_FEATURES[Scope.THREAD].update(SUPPORTED_FEATURES[Scope.SEQUENCE])
# all thread scope features are also process features
SUPPORTED_FEATURES[Scope.PROCESS].update(SUPPORTED_FEATURES[Scope.THREAD])
@@ -636,8 +642,19 @@ def build_statements(d, scopes: Scopes):
Scope.THREAD, build_statements(d[key][0], Scopes(dynamic=Scope.THREAD)), description=description
)
elif key == "sequence":
if all(s not in scopes for s in (Scope.FILE, Scope.PROCESS, Scope.THREAD)):
raise InvalidRule("sequence subscope supported only for the process and thread scopes")
if len(d[key]) != 1:
raise InvalidRule("subscope must have exactly one child statement")
return ceng.Subscope(
Scope.SEQUENCE, build_statements(d[key][0], Scopes(dynamic=Scope.SEQUENCE)), description=description
)
elif key == "call":
if all(s not in scopes for s in (Scope.FILE, Scope.PROCESS, Scope.THREAD, Scope.CALL)):
if all(s not in scopes for s in (Scope.FILE, Scope.PROCESS, Scope.THREAD, Scope.SEQUENCE, Scope.CALL)):
raise InvalidRule("call subscope supported only for the process, thread, and call scopes")
if len(d[key]) != 1:
@@ -1383,6 +1400,7 @@ class RuleSet:
scopes = (
Scope.CALL,
Scope.SEQUENCE,
Scope.THREAD,
Scope.PROCESS,
Scope.INSTRUCTION,
@@ -1413,6 +1431,10 @@ class RuleSet:
def thread_rules(self):
return self.rules_by_scope[Scope.THREAD]
@property
def sequence_rules(self):
return self.rules_by_scope[Scope.SEQUENCE]
@property
def call_rules(self):
return self.rules_by_scope[Scope.CALL]

View File

@@ -194,6 +194,7 @@ class InvalidDynamicScope(Lint):
"file",
"process",
"thread",
"sequence",
"call",
"unsupported",
)

View File

@@ -0,0 +1,256 @@
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
# tests/data/dynamic/cape/v2.2/0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json.gz
#
# proc: 0000A65749F5902C4D82.exe (ppid=2456, pid=3052)
# ...
# thread: 3064
# call 8: GetSystemTimeAsFileTime()
# call 9: GetSystemInfo()
# call 10: LdrGetDllHandle(1974337536, kernel32.dll)
# call 11: LdrGetProcedureAddress(2010595649, 0, AddVectoredExceptionHandler, 1974337536, kernel32.dll)
# call 12: LdrGetDllHandle(1974337536, kernel32.dll)
# call 13: LdrGetProcedureAddress(2010595072, 0, RemoveVectoredExceptionHandler, 1974337536, kernel32.dll)
# call 14: RtlAddVectoredExceptionHandler(1921490089, 0)
# call 15: GetSystemTime()
# call 16: NtAllocateVirtualMemory(no, 4, 786432, 4784128, 4294967295)
# call 17: NtAllocateVirtualMemory(no, 4, 12288, 4784128, 4294967295)
# call 18: GetSystemInfo()
# ...
# ...
import textwrap
from functools import lru_cache
import fixtures
import capa.main
import capa.capabilities.dynamic
from capa.features.extractors.base_extractor import ThreadFilter, DynamicFeatureExtractor
def filter_threads(extractor: DynamicFeatureExtractor, ppid: int, pid: int, tid: int) -> DynamicFeatureExtractor:
for ph in extractor.get_processes():
if (ph.address.ppid, ph.address.pid) != (ppid, pid):
continue
for th in extractor.get_threads(ph):
if th.address.tid != tid:
continue
return ThreadFilter(
extractor,
{
th.address,
},
)
raise ValueError("failed to find target thread")
@lru_cache(maxsize=1)
def get_0000a657_thread3064():
extractor = fixtures.get_cape_extractor(fixtures.get_data_path_by_name("0000a657"))
extractor = filter_threads(extractor, 2456, 3052, 3064)
return extractor
def get_call_ids(matches):
for address, _ in matches:
yield address.id
# sanity check: match the first call
#
# proc: 0000A65749F5902C4D82.exe (ppid=2456, pid=3052)
# thread: 3064
# call 8: GetSystemTimeAsFileTime()
def test_dynamic_call_scope():
extractor = get_0000a657_thread3064()
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
scopes:
static: unsupported
dynamic: call
features:
- api: GetSystemTimeAsFileTime
"""
)
r = capa.rules.Rule.from_yaml(rule)
ruleset = capa.rules.RuleSet([r])
matches, features = capa.capabilities.dynamic.find_dynamic_capabilities(ruleset, extractor, disable_progress=True)
assert r.name in matches
assert 8 in get_call_ids(matches[r.name])
# match the first 5-tuple sequence.
#
# proc: 0000A65749F5902C4D82.exe (ppid=2456, pid=3052)
# thread: 3064
# call 8: GetSystemTimeAsFileTime()
# call 9: GetSystemInfo()
# call 10: LdrGetDllHandle(1974337536, kernel32.dll)
# call 11: LdrGetProcedureAddress(2010595649, 0, AddVectoredExceptionHandler, 1974337536, kernel32.dll)
# call 12: LdrGetDllHandle(1974337536, kernel32.dll)
def test_dynamic_sequence_scope():
extractor = get_0000a657_thread3064()
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
scopes:
static: unsupported
dynamic: sequence
features:
- and:
- api: GetSystemTimeAsFileTime
- api: GetSystemInfo
- api: LdrGetDllHandle
- api: LdrGetProcedureAddress
- count(api(LdrGetDllHandle)): 2
"""
)
r = capa.rules.Rule.from_yaml(rule)
ruleset = capa.rules.RuleSet([r])
matches, features = capa.capabilities.dynamic.find_dynamic_capabilities(ruleset, extractor, disable_progress=True)
assert r.name in matches
assert 12 in get_call_ids(matches[r.name])
# show the sequence is only 5 calls long, and doesn't match beyond that 5-tuple.
#
# proc: 0000A65749F5902C4D82.exe (ppid=2456, pid=3052)
# thread: 3064
# call 8: GetSystemTimeAsFileTime()
# call 9: GetSystemInfo()
# call 10: LdrGetDllHandle(1974337536, kernel32.dll)
# call 11: LdrGetProcedureAddress(2010595649, 0, AddVectoredExceptionHandler, 1974337536, kernel32.dll)
# call 12: LdrGetDllHandle(1974337536, kernel32.dll)
# call 13: LdrGetProcedureAddress(2010595072, 0, RemoveVectoredExceptionHandler, 1974337536, kernel32.dll)
# call 14: RtlAddVectoredExceptionHandler(1921490089, 0)
# call 15: GetSystemTime()
# call 16: NtAllocateVirtualMemory(no, 4, 786432, 4784128, 4294967295)
def test_dynamic_sequence_scope2():
extractor = get_0000a657_thread3064()
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
scopes:
static: unsupported
dynamic: sequence
features:
- and:
- api: GetSystemTimeAsFileTime
- api: RtlAddVectoredExceptionHandler
"""
)
r = capa.rules.Rule.from_yaml(rule)
ruleset = capa.rules.RuleSet([r])
matches, features = capa.capabilities.dynamic.find_dynamic_capabilities(ruleset, extractor, disable_progress=True)
assert r.name not in matches
# show how you might use a sequence rule: to match a small window for a collection of features.
#
# proc: 0000A65749F5902C4D82.exe (ppid=2456, pid=3052)
# thread: 3064
# call 10: LdrGetDllHandle(1974337536, kernel32.dll)
# call 11: LdrGetProcedureAddress(2010595649, 0, AddVectoredExceptionHandler, 1974337536, kernel32.dll)
# call 12: ...
# call 13: ...
# call 14: RtlAddVectoredExceptionHandler(1921490089, 0)
def test_dynamic_sequence_example():
extractor = get_0000a657_thread3064()
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
scopes:
static: unsupported
dynamic: sequence
features:
- and:
- call:
- and:
- api: LdrGetDllHandle
- string: "kernel32.dll"
- call:
- and:
- api: LdrGetProcedureAddress
- string: "AddVectoredExceptionHandler"
- api: RtlAddVectoredExceptionHandler
"""
)
r = capa.rules.Rule.from_yaml(rule)
ruleset = capa.rules.RuleSet([r])
matches, features = capa.capabilities.dynamic.find_dynamic_capabilities(ruleset, extractor, disable_progress=True)
assert r.name in matches
assert 14 in get_call_ids(matches[r.name])
# show how sequences that overlap a single event are handled.
# TODO(williballenthin): but I think we really just want one match for this, not copies of the same thing.
#
# proc: 0000A65749F5902C4D82.exe (ppid=2456, pid=3052)
# thread: 3064
# ...
# call 10: ...
# call 11: LdrGetProcedureAddress(2010595649, 0, AddVectoredExceptionHandler, 1974337536, kernel32.dll)
# call 12: ...
# call 13: ...
# call 14: ...
# call 15: ...
# ...
def test_dynamic_sequence_multiple_sequences_overlapping_single_event():
extractor = get_0000a657_thread3064()
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
scopes:
static: unsupported
dynamic: sequence
features:
- and:
- call:
- and:
- api: LdrGetProcedureAddress
- string: "AddVectoredExceptionHandler"
"""
)
r = capa.rules.Rule.from_yaml(rule)
ruleset = capa.rules.RuleSet([r])
matches, features = capa.capabilities.dynamic.find_dynamic_capabilities(ruleset, extractor, disable_progress=True)
assert r.name in matches
assert [11, 12, 13, 14, 15] == list(get_call_ids(matches[r.name]))

View File

@@ -129,6 +129,7 @@ def test_scope_to_pb2():
assert capa.render.proto.scope_to_pb2(capa.rules.Scope.INSTRUCTION) == capa_pb2.SCOPE_INSTRUCTION
assert capa.render.proto.scope_to_pb2(capa.rules.Scope.PROCESS) == capa_pb2.SCOPE_PROCESS
assert capa.render.proto.scope_to_pb2(capa.rules.Scope.THREAD) == capa_pb2.SCOPE_THREAD
assert capa.render.proto.scope_to_pb2(capa.rules.Scope.SEQUENCE) == capa_pb2.SCOPE_SEQUENCE
assert capa.render.proto.scope_to_pb2(capa.rules.Scope.CALL) == capa_pb2.SCOPE_CALL