From 94fc7b4e9aeaa0b72b77a6a8df668efee9f11053 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Mon, 26 Jun 2023 01:23:01 +0100 Subject: [PATCH] FeatureExtractor alias: add type casts to either StaticFeatureExtractor or DynamicFeatureExtractor --- capa/features/extractors/base_extractor.py | 7 +++++++ capa/features/extractors/cape/extractor.py | 5 ++++- capa/main.py | 22 ++++++++++++++++++---- scripts/profile-time.py | 3 ++- scripts/show-capabilities-by-function.py | 3 ++- 5 files changed, 33 insertions(+), 7 deletions(-) diff --git a/capa/features/extractors/base_extractor.py b/capa/features/extractors/base_extractor.py index f6eddcce..3272e9c2 100644 --- a/capa/features/extractors/base_extractor.py +++ b/capa/features/extractors/base_extractor.py @@ -307,6 +307,13 @@ class DynamicFeatureExtractor: This class is not instantiated directly; it is the base class for other implementations. """ + @abc.abstractmethod + def get_base_address(self) -> Union[AbsoluteVirtualAddress, capa.features.address._NoAddress]: + """ + fetch the preferred load address at which the sample was analyzed. + """ + raise NotImplementedError() + @abc.abstractmethod def get_processes(self) -> Iterator[ProcessHandle]: """ diff --git a/capa/features/extractors/cape/extractor.py b/capa/features/extractors/cape/extractor.py index 611b83e5..01a1e3c9 100644 --- a/capa/features/extractors/cape/extractor.py +++ b/capa/features/extractors/cape/extractor.py @@ -13,7 +13,7 @@ import capa.features.extractors.cape.thread import capa.features.extractors.cape.global_ import capa.features.extractors.cape.process from capa.features.common import Feature -from capa.features.address import Address +from capa.features.address import NO_ADDRESS, Address from capa.features.extractors.base_extractor import ThreadHandle, ProcessHandle, DynamicFeatureExtractor logger = logging.getLogger(__name__) @@ -27,6 +27,9 @@ class CapeExtractor(DynamicFeatureExtractor): self.global_features = capa.features.extractors.cape.global_.extract_features(self.static) + def get_base_address(self): + return NO_ADDRESS + def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]: yield from self.global_features diff --git a/capa/main.py b/capa/main.py index 7147c1f8..6000c49c 100644 --- a/capa/main.py +++ b/capa/main.py @@ -20,7 +20,7 @@ import textwrap import itertools import contextlib import collections -from typing import Any, Dict, List, Tuple, Callable +from typing import Any, Dict, List, Tuple, Callable, cast import halo import tqdm @@ -231,7 +231,12 @@ def find_code_capabilities( def find_file_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, function_features: FeatureSet): file_features = collections.defaultdict(set) # type: FeatureSet - for feature, va in itertools.chain(extractor.extract_file_features(), extractor.extract_global_features()): + if isinstance(extractor, StaticFeatureExtractor): + extractor_: StaticFeatureExtractor = cast(StaticFeatureExtractor, extractor) + else: + extractor_: DynamicFeatureExtractor = cast(DynamicFeatureExtractor, extractor) + + for feature, va in itertools.chain(extractor_.extract_file_features(), extractor_.extract_global_features()): # not all file features may have virtual addresses. # if not, then at least ensure the feature shows up in the index. # the set of addresses will still be empty. @@ -249,7 +254,7 @@ def find_file_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, functi return matches, len(file_features) -def find_capabilities( +def find_capabilities_static( ruleset: RuleSet, extractor: StaticFeatureExtractor, disable_progress=None ) -> Tuple[MatchResults, Any]: all_function_matches = collections.defaultdict(list) # type: MatchResults @@ -334,6 +339,15 @@ def find_capabilities( return matches, meta +def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, **kwargs) -> Tuple[MatchResults, Any]: + if isinstance(extractor, StaticFeatureExtractor): + extractor_: StaticFeatureExtractor = cast(StaticFeatureExtractor, extractor) + return find_capabilities_static(ruleset, extractor_, kwargs) + else: + # extractor_ = cast(DynamicFeatureExtractor, extractor) + print("nni") + + # TODO move all to helpers? def has_rule_with_namespace(rules, capabilities, rule_cat): for rule_name in capabilities.keys(): @@ -1252,7 +1266,7 @@ def main(argv=None): should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None) try: - extractor = get_extractor( + extractor: FeatureExtractor = get_extractor( args.sample, format_, args.os, diff --git a/scripts/profile-time.py b/scripts/profile-time.py index 09d125d8..0bd4e389 100644 --- a/scripts/profile-time.py +++ b/scripts/profile-time.py @@ -46,6 +46,7 @@ import capa.helpers import capa.features import capa.features.common import capa.features.freeze +from capa.features.extractors.base_extractor import FeatureExtractor logger = logging.getLogger("capa.profile") @@ -105,7 +106,7 @@ def main(argv=None): with open(args.sample, "rb") as f: extractor = capa.features.freeze.load(f.read()) else: - extractor = capa.main.get_extractor( + extractor: FeatureExtractor = capa.main.get_extractor( args.sample, args.format, args.os, capa.main.BACKEND_VIV, sig_paths, should_save_workspace=False ) diff --git a/scripts/show-capabilities-by-function.py b/scripts/show-capabilities-by-function.py index b58c7568..6855db2c 100644 --- a/scripts/show-capabilities-by-function.py +++ b/scripts/show-capabilities-by-function.py @@ -70,6 +70,7 @@ import capa.render.result_document as rd from capa.helpers import get_file_taste from capa.features.common import FORMAT_AUTO from capa.features.freeze import Address +from capa.features.extractors.base_extractor import FeatureExtractor logger = logging.getLogger("capa.show-capabilities-by-function") @@ -166,7 +167,7 @@ def main(argv=None): should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None) try: - extractor = capa.main.get_extractor( + extractor: FeatureExtractor = capa.main.get_extractor( args.sample, args.format, args.os, args.backend, sig_paths, should_save_workspace ) except capa.exceptions.UnsupportedFormatError: