diff --git a/CHANGELOG.md b/CHANGELOG.md index a1c4f0a7..ec85a032 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - dotnet: emit calls to/from MethodDef methods #1236 @mike-hunhoff - dotnet: emit namespace/class features for ldvirtftn/ldftn instructions #1241 @mike-hunhoff - dotnet: emit namespace/class features for type references #1242 @mike-hunhoff +- dotnet: extract dotnet and pe format #1187 @mr-tz ### Breaking Changes - remove SMDA backend #1062 @williballenthin diff --git a/capa/features/extractors/dnfile_.py b/capa/features/extractors/dnfile_.py index 7286001b..ef6b3999 100644 --- a/capa/features/extractors/dnfile_.py +++ b/capa/features/extractors/dnfile_.py @@ -4,7 +4,18 @@ from typing import Tuple, Iterator import dnfile import pefile -from capa.features.common import OS, OS_ANY, ARCH_ANY, ARCH_I386, ARCH_AMD64, FORMAT_DOTNET, Arch, Format, Feature +from capa.features.common import ( + OS, + OS_ANY, + ARCH_ANY, + ARCH_I386, + FORMAT_PE, + ARCH_AMD64, + FORMAT_DOTNET, + Arch, + Format, + Feature, +) from capa.features.address import NO_ADDRESS, Address, AbsoluteVirtualAddress from capa.features.extractors.base_extractor import FeatureExtractor @@ -12,6 +23,7 @@ logger = logging.getLogger(__name__) def extract_file_format(**kwargs) -> Iterator[Tuple[Feature, Address]]: + yield Format(FORMAT_PE), NO_ADDRESS yield Format(FORMAT_DOTNET), NO_ADDRESS diff --git a/capa/features/extractors/dotnetfile.py b/capa/features/extractors/dotnetfile.py index 1b5aa1f3..bfe77281 100644 --- a/capa/features/extractors/dotnetfile.py +++ b/capa/features/extractors/dotnetfile.py @@ -11,6 +11,7 @@ from capa.features.common import ( OS_ANY, ARCH_ANY, ARCH_I386, + FORMAT_PE, ARCH_AMD64, FORMAT_DOTNET, Arch, @@ -37,6 +38,7 @@ logger = logging.getLogger(__name__) def extract_file_format(**kwargs) -> Iterator[Tuple[Format, Address]]: + yield Format(FORMAT_PE), NO_ADDRESS yield Format(FORMAT_DOTNET), NO_ADDRESS diff --git a/capa/main.py b/capa/main.py index ee356384..d330ae81 100644 --- a/capa/main.py +++ b/capa/main.py @@ -20,7 +20,7 @@ import textwrap import itertools import contextlib import collections -from typing import Any, Dict, List, Tuple +from typing import Any, Dict, List, Tuple, Optional import halo import tqdm @@ -647,6 +647,7 @@ def collect_metadata( sample_path: str, rules_path: List[str], extractor: capa.features.extractors.base_extractor.FeatureExtractor, + format_: Optional[str] = None, ): md5 = hashlib.md5() sha1 = hashlib.sha1() @@ -662,7 +663,8 @@ def collect_metadata( if rules_path != [RULES_PATH_DEFAULT_STRING]: rules_path = [os.path.abspath(os.path.normpath(r)) for r in rules_path] - format_ = get_format(sample_path) + if format_ is None: + format_ = get_format(sample_path) arch = get_arch(sample_path) os_ = get_os(sample_path) @@ -1099,7 +1101,7 @@ def main(argv=None): log_unsupported_os_error() return E_INVALID_FILE_OS - meta = collect_metadata(argv, args.sample, args.rules, extractor) + meta = collect_metadata(argv, args.sample, args.rules, extractor, format_=format_) capabilities, counts = find_capabilities(rules, extractor, disable_progress=args.quiet) meta["analysis"].update(counts) diff --git a/scripts/show-capabilities-by-function.py b/scripts/show-capabilities-by-function.py index d1773021..7f0b13b6 100644 --- a/scripts/show-capabilities-by-function.py +++ b/scripts/show-capabilities-by-function.py @@ -157,11 +157,11 @@ def main(argv=None): return -1 if (args.format == "freeze") or (args.format == "auto" and capa.features.freeze.is_freeze(taste)): - format = "freeze" + format_ = "freeze" with open(args.sample, "rb") as f: extractor = capa.features.freeze.load(f.read()) else: - format = args.format + format_ = args.format should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None) try: @@ -175,7 +175,7 @@ def main(argv=None): capa.helpers.log_unsupported_runtime_error() return -1 - meta = capa.main.collect_metadata(argv, args.sample, args.rules, extractor) + meta = capa.main.collect_metadata(argv, args.sample, args.rules, extractor, format_=format_) capabilities, counts = capa.main.find_capabilities(rules, extractor) meta["analysis"].update(counts) meta["analysis"]["layout"] = capa.main.compute_layout(rules, extractor, capabilities) diff --git a/tests/fixtures.py b/tests/fixtures.py index 6dbc9817..f6686c4b 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -716,6 +716,7 @@ FEATURE_PRESENCE_TESTS_DOTNET = sorted( ("mixed-mode-64", "file", capa.features.common.Characteristic("mixed mode"), True), ("hello-world", "file", capa.features.common.Characteristic("mixed mode"), False), ("b9f5b", "file", OS(OS_ANY), True), + ("b9f5b", "file", Format(FORMAT_PE), True), ("b9f5b", "file", Format(FORMAT_DOTNET), True), ("hello-world", "file", capa.features.file.FunctionName("HelloWorld::Main"), True), ("hello-world", "file", capa.features.file.FunctionName("HelloWorld::ctor"), True),