From a1d46bc3c0f515339270bf7b80e9b03986e4c657 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Fri, 17 Jan 2025 11:25:41 +0000 Subject: [PATCH] sequence: don't update feature locations in place pep8 --- capa/capabilities/dynamic.py | 16 +++++++------- capa/features/common.py | 4 ++-- capa/render/result_document.py | 33 +++++++++++++++++----------- capa/render/verbose.py | 1 + capa/render/vverbose.py | 4 ++-- capa/rules/__init__.py | 1 + tests/test_dynamic_sequence_scope.py | 19 ++++++++++------ 7 files changed, 46 insertions(+), 32 deletions(-) diff --git a/capa/capabilities/dynamic.py b/capa/capabilities/dynamic.py index a4e97be6..c261c1e5 100644 --- a/capa/capabilities/dynamic.py +++ b/capa/capabilities/dynamic.py @@ -19,6 +19,8 @@ import collections from dataclasses import dataclass import capa.perf +import capa.engine +import capa.helpers import capa.features.freeze as frz import capa.render.result_document as rdoc from capa.rules import Scope, RuleSet @@ -106,7 +108,7 @@ class SequenceMatcher: self.current_features: FeatureSet = collections.defaultdict(set) # the names of rules matched at the last sequence, - # so that we can deduplicate long strings of the same matche. + # so that we can deduplicate long strings of the same matches. self.last_sequence_matches: set[str] = set() def next(self, ch: CallHandle, call_features: FeatureSet): @@ -124,15 +126,14 @@ class SequenceMatcher: # like arch/os/format. continue - feature_vas = self.current_features[feature] - feature_vas.difference_update(vas) - if not feature_vas: + self.current_features[feature] -= vas + if not self.current_features[feature]: del self.current_features[feature] # update the deque and set of features with the latest call's worth of features. self.current_feature_sets.append(call_features) for feature, vas in call_features.items(): - self.current_features[feature].update(vas) + self.current_features[feature] |= vas _, matches = self.ruleset.match(Scope.SEQUENCE, self.current_features, ch.address) @@ -154,7 +155,7 @@ class SequenceMatcher: # see: https://github.com/mandiant/capa/pull/2532#issuecomment-2548508130 for new_rule in newly_encountered_rules: suppressed_rules -= set(self.ruleset.rules[new_rule].get_dependencies(self.ruleset.rules_by_namespace)) - + for rule_name, res in matches.items(): if rule_name in suppressed_rules: continue @@ -181,8 +182,7 @@ def find_thread_capabilities( sequence_matcher = SequenceMatcher(ruleset) call_count = 0 - for ch in extractor.get_calls(ph, th): - call_count += 1 + for call_count, ch in enumerate(extractor.get_calls(ph, th)): # noqa: B007 call_capabilities = find_call_capabilities(ruleset, extractor, ph, th, ch) for feature, vas in call_capabilities.features.items(): features[feature].update(vas) diff --git a/capa/features/common.py b/capa/features/common.py index 674400a4..44d42cce 100644 --- a/capa/features/common.py +++ b/capa/features/common.py @@ -108,7 +108,8 @@ class Result: def __str__(self): # as this object isn't user facing, this formatting is just to help with debugging - lines = [] + lines: list[str] = [] + def rec(m: "Result", indent: int): if isinstance(m.statement, capa.engine.Statement): line = (" " * indent) + str(m.statement.name) + " " + str(m.success) @@ -124,7 +125,6 @@ class Result: return "\n".join(lines) - class Feature(abc.ABC): # noqa: B024 # this is an abstract class, since we don't want anyone to instantiate it directly, # but it doesn't have any abstract methods. diff --git a/capa/render/result_document.py b/capa/render/result_document.py index b2b35c00..d1bce9ad 100644 --- a/capa/render/result_document.py +++ b/capa/render/result_document.py @@ -406,15 +406,18 @@ class Match(FrozenModel): # like the way a function contains a basic block. # So when we have a match within a sequence for another sequence, we need to look # for all the places it might be found. - # + # # Despite the edge cases (like API hammering), this turns out to be pretty easy: # collect the most recent match (with the given name) prior to the wanted location. - matches_in_thread = sorted([ - (a.id, m) for a, m in rule_matches.items() - if isinstance(a, DynamicCallAddress) - and a.thread == location.thread - and a.id <= location.id - ]) + matches_in_thread = sorted( + [ + (a.id, m) + for a, m in rule_matches.items() + if isinstance(a, DynamicCallAddress) + and a.thread == location.thread + and a.id <= location.id + ] + ) _, most_recent_match = matches_in_thread[-1] children.append(Match.from_capa(rules, capabilities, most_recent_match)) @@ -466,12 +469,15 @@ class Match(FrozenModel): if location in rule_matches: children.append(Match.from_capa(rules, capabilities, rule_matches[location])) else: - matches_in_thread = sorted([ - (a.id, m) for a, m in rule_matches.items() - if isinstance(a, DynamicCallAddress) - and a.thread == location.thread - and a.id <= location.id - ]) + matches_in_thread = sorted( + [ + (a.id, m) + for a, m in rule_matches.items() + if isinstance(a, DynamicCallAddress) + and a.thread == location.thread + and a.id <= location.id + ] + ) _, most_recent_match = matches_in_thread[-1] children.append(Match.from_capa(rules, capabilities, most_recent_match)) else: @@ -523,6 +529,7 @@ class Match(FrozenModel): # as this object isn't user facing, this formatting is just to help with debugging lines = [] + def rec(m: "Match", indent: int): if isinstance(m.node, StatementNode): line = (" " * indent) + str(m.node.statement.type) + " " + str(m.success) diff --git a/capa/render/verbose.py b/capa/render/verbose.py index 81bf93e0..a89db503 100644 --- a/capa/render/verbose.py +++ b/capa/render/verbose.py @@ -43,6 +43,7 @@ from capa.rules import RuleSet from capa.engine import MatchResults from capa.render.utils import Console + def format_address(address: frz.Address) -> str: if address.type == frz.AddressType.ABSOLUTE: assert isinstance(address.value, int) diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index c7e37615..ad8ff496 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -330,7 +330,7 @@ def collect_sequence_locations( yield from collect_sequence_locations(child, child_mode) elif isinstance(match.node.statement, rd.RangeStatement): for location in match.locations: - if location.type not in (frz.AddressType.CALL, ): + if location.type not in (frz.AddressType.CALL,): continue if mode == MODE_FAILURE: continue @@ -340,7 +340,7 @@ def collect_sequence_locations( yield from collect_sequence_locations(child, mode) elif isinstance(match.node, rd.FeatureNode): for location in match.locations: - if location.type not in (frz.AddressType.CALL, ): + if location.type not in (frz.AddressType.CALL,): continue if mode == MODE_FAILURE: continue diff --git a/capa/rules/__init__.py b/capa/rules/__init__.py index 2ca4e0bc..907ae9be 100644 --- a/capa/rules/__init__.py +++ b/capa/rules/__init__.py @@ -897,6 +897,7 @@ class Rule: # but, namespaces tend to use `-` while rule names use ` `. so, unlikely, but possible. if statement.value in namespaces: # matches a namespace, so take precedence and don't even check rule names. + assert isinstance(statement.value, str) deps.update(r.name for r in namespaces[statement.value]) else: # not a namespace, assume it's a rule name. diff --git a/tests/test_dynamic_sequence_scope.py b/tests/test_dynamic_sequence_scope.py index 8d5341e0..98b3eaf4 100644 --- a/tests/test_dynamic_sequence_scope.py +++ b/tests/test_dynamic_sequence_scope.py @@ -1,11 +1,16 @@ -# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved. +# Copyright 2022 Google LLC +# # Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: [package root]/LICENSE.txt -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and limitations under the License. - +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # tests/data/dynamic/cape/v2.2/0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json.gz #