From 5bc44aef0f6397bbadb7ab4ccb7f8ab1916a4e29 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Fri, 8 Apr 2022 10:34:02 +0000 Subject: [PATCH 01/30] Sync capa-testfiles submodule --- tests/data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data b/tests/data index 12c64af2..11ae8d0d 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit 12c64af268337d6213b603e00cb9df908c779ff9 +Subproject commit 11ae8d0d38b9703b999c988f927198c8fd132ff5 From 580a2d7e4519ea5d353650d66468020968f0f27d Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Fri, 8 Apr 2022 14:55:00 -0600 Subject: [PATCH 02/30] dotnet: basic detection and feature extraction (#987) --- .github/mypy/mypy.ini | 3 + .github/workflows/tests.yml | 4 +- CHANGELOG.md | 1 + capa/exceptions.py | 14 ++ capa/features/common.py | 17 +- capa/features/extractors/common.py | 5 +- capa/features/extractors/dnfile/__init__.py | 0 capa/features/extractors/dnfile/extractor.py | 70 +++++++ capa/features/extractors/dnfile/file.py | 40 ++++ capa/features/extractors/dnfile/helpers.py | 169 +++++++++++++++ capa/features/extractors/dnfile/insn.py | 96 +++++++++ capa/features/extractors/dotnetfile.py | 129 ++++++++++++ capa/features/extractors/helpers.py | 3 + capa/features/freeze.py | 3 +- capa/features/insn.py | 5 - capa/helpers.py | 81 ++++++- capa/main.py | 209 +++++++++---------- capa/render/json.py | 4 +- capa/render/result_document.py | 1 - scripts/lint.py | 13 +- scripts/show-capabilities-by-function.py | 24 +-- scripts/show-features.py | 30 ++- setup.py | 2 + tests/fixtures.py | 97 ++++++++- tests/test_dnfile_features.py | 30 +++ tests/test_dotnetfile_features.py | 43 ++++ 26 files changed, 928 insertions(+), 165 deletions(-) create mode 100644 capa/exceptions.py create mode 100644 capa/features/extractors/dnfile/__init__.py create mode 100644 capa/features/extractors/dnfile/extractor.py create mode 100644 capa/features/extractors/dnfile/file.py create mode 100644 capa/features/extractors/dnfile/helpers.py create mode 100644 capa/features/extractors/dnfile/insn.py create mode 100644 capa/features/extractors/dotnetfile.py create mode 100644 tests/test_dnfile_features.py create mode 100644 tests/test_dotnetfile_features.py diff --git a/.github/mypy/mypy.ini b/.github/mypy/mypy.ini index 6d177d40..3d22b05f 100644 --- a/.github/mypy/mypy.ini +++ b/.github/mypy/mypy.ini @@ -74,3 +74,6 @@ ignore_missing_imports = True [mypy-elftools.*] ignore_missing_imports = True + +[mypy-dncil.*] +ignore_missing_imports = True \ No newline at end of file diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 1f3cc7f7..128c4591 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -48,7 +48,7 @@ jobs: - name: Checkout capa with submodules uses: actions/checkout@v2 with: - submodules: true + submodules: recursive - name: Set up Python 3.8 uses: actions/setup-python@v2 with: @@ -78,7 +78,7 @@ jobs: - name: Checkout capa with submodules uses: actions/checkout@v2 with: - submodules: true + submodules: recursive - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: diff --git a/CHANGELOG.md b/CHANGELOG.md index 5168b849..8bf139e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ - add new feature "operand[{0, 1, 2}].number" for matching instruction operand immediate values #767 @williballenthin - add new feature "operand[{0, 1, 2}].offset" for matching instruction operand offsets #767 @williballenthin - extract additional offset/number features in certain circumstances #320 @williballenthin + - add detection and basic feature extraction for dotnet #987 @mr-tz, @mike-hunhoff, @williballenthin ### Breaking Changes diff --git a/capa/exceptions.py b/capa/exceptions.py new file mode 100644 index 00000000..8c939997 --- /dev/null +++ b/capa/exceptions.py @@ -0,0 +1,14 @@ +class UnsupportedRuntimeError(RuntimeError): + pass + + +class UnsupportedFormatError(ValueError): + pass + + +class UnsupportedArchError(ValueError): + pass + + +class UnsupportedOSError(ValueError): + pass diff --git a/capa/features/common.py b/capa/features/common.py index b86f86e4..ed2b288e 100644 --- a/capa/features/common.py +++ b/capa/features/common.py @@ -390,7 +390,9 @@ class Bytes(Feature): # other candidates here: https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#machine-types ARCH_I386 = "i386" ARCH_AMD64 = "amd64" -VALID_ARCH = (ARCH_I386, ARCH_AMD64) +# dotnet +ARCH_ANY = "any" +VALID_ARCH = (ARCH_I386, ARCH_AMD64, ARCH_ANY) class Arch(Feature): @@ -402,8 +404,10 @@ class Arch(Feature): OS_WINDOWS = "windows" OS_LINUX = "linux" OS_MACOS = "macos" +# dotnet +OS_ANY = "any" VALID_OS = {os.value for os in capa.features.extractors.elf.OS} -VALID_OS.update({OS_WINDOWS, OS_LINUX, OS_MACOS}) +VALID_OS.update({OS_WINDOWS, OS_LINUX, OS_MACOS, OS_ANY}) class OS(Feature): @@ -414,7 +418,14 @@ class OS(Feature): FORMAT_PE = "pe" FORMAT_ELF = "elf" -VALID_FORMAT = (FORMAT_PE, FORMAT_ELF) +FORMAT_DOTNET = "dotnet" +VALID_FORMAT = (FORMAT_PE, FORMAT_ELF, FORMAT_DOTNET) +# internal only, not to be used in rules +FORMAT_AUTO = "auto" +FORMAT_SC32 = "sc32" +FORMAT_SC64 = "sc64" +FORMAT_FREEZE = "freeze" +FORMAT_UNKNOWN = "unknown" class Format(Feature): diff --git a/capa/features/extractors/common.py b/capa/features/extractors/common.py index 99f0ea08..786e4faf 100644 --- a/capa/features/extractors/common.py +++ b/capa/features/extractors/common.py @@ -8,7 +8,8 @@ import pefile import capa.features import capa.features.extractors.elf import capa.features.extractors.pefile -from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, Arch, Format, String +from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, FORMAT_FREEZE, Arch, Format, String +from capa.features.freeze import is_freeze logger = logging.getLogger(__name__) @@ -29,6 +30,8 @@ def extract_format(buf): yield Format(FORMAT_PE), 0x0 elif buf.startswith(b"\x7fELF"): yield Format(FORMAT_ELF), 0x0 + elif is_freeze(buf): + yield Format(FORMAT_FREEZE), 0x0 else: # we likely end up here: # 1. handling a file format (e.g. macho) diff --git a/capa/features/extractors/dnfile/__init__.py b/capa/features/extractors/dnfile/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/capa/features/extractors/dnfile/extractor.py b/capa/features/extractors/dnfile/extractor.py new file mode 100644 index 00000000..f82364a2 --- /dev/null +++ b/capa/features/extractors/dnfile/extractor.py @@ -0,0 +1,70 @@ +# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, List, Tuple + +if TYPE_CHECKING: + from capa.features.common import Feature + +import dnfile + +import capa.features.extractors +import capa.features.extractors.dnfile.file +import capa.features.extractors.dnfile.insn +from capa.features.extractors.base_extractor import FeatureExtractor +from capa.features.extractors.dnfile.helpers import get_dotnet_managed_method_bodies + + +class DnfileFeatureExtractor(FeatureExtractor): + def __init__(self, path: str): + super(DnfileFeatureExtractor, self).__init__() + self.pe: dnfile.dnPE = dnfile.dnPE(path) + + # pre-compute these because we'll yield them at *every* scope. + self.global_features: List[Tuple[Feature, int]] = [] + self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_os(pe=self.pe)) + self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_arch(pe=self.pe)) + + def get_base_address(self): + return 0x0 + + def extract_global_features(self): + yield from self.global_features + + def extract_file_features(self): + yield from capa.features.extractors.dnfile.file.extract_features(self.pe) + + def get_functions(self): + # data structure shared across functions yielded here. + # useful for caching analysis relevant across a single workspace. + ctx = {} + ctx["pe"] = self.pe + + for f in get_dotnet_managed_method_bodies(self.pe): + setattr(f, "ctx", ctx) + yield f + + def extract_function_features(self, f): + # TODO + yield from [] + + def get_basic_blocks(self, f): + # each dotnet method is considered 1 basic block + yield f + + def extract_basic_block_features(self, f, bb): + # we don't support basic block features + yield from [] + + def get_instructions(self, f, bb): + yield from f.instructions + + def extract_insn_features(self, f, bb, insn): + yield from capa.features.extractors.dnfile.insn.extract_features(f, bb, insn) diff --git a/capa/features/extractors/dnfile/file.py b/capa/features/extractors/dnfile/file.py new file mode 100644 index 00000000..99e2643c --- /dev/null +++ b/capa/features/extractors/dnfile/file.py @@ -0,0 +1,40 @@ +# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +from __future__ import annotations + +from typing import TYPE_CHECKING, Tuple, Iterator + +if TYPE_CHECKING: + import dnfile + from capa.features.common import Feature, Format + from capa.features.file import Import + +import capa.features.extractors + + +def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[Tuple[Import, int]]: + yield from capa.features.extractors.dotnetfile.extract_file_import_names(pe) + + +def extract_file_format(pe: dnfile.dnPE) -> Iterator[Tuple[Format, int]]: + yield from capa.features.extractors.dotnetfile.extract_file_format(pe=pe) + + +def extract_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]: + for file_handler in FILE_HANDLERS: + for (feature, token) in file_handler(pe): + yield feature, token + + +FILE_HANDLERS = ( + extract_file_import_names, + # TODO extract_file_strings, + # TODO extract_file_function_names, + extract_file_format, +) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py new file mode 100644 index 00000000..c7304462 --- /dev/null +++ b/capa/features/extractors/dnfile/helpers.py @@ -0,0 +1,169 @@ +# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +from __future__ import annotations + +import logging +from typing import Any, Tuple, Iterator, Optional + +import dnfile +from dncil.cil.body import CilMethodBody +from dncil.cil.error import MethodBodyFormatError +from dncil.clr.token import Token, StringToken, InvalidToken +from dncil.cil.body.reader import CilMethodBodyReaderBase + +logger = logging.getLogger(__name__) + +# key indexes to dotnet metadata tables +DOTNET_META_TABLES_BY_INDEX = {table.value: table.name for table in dnfile.enums.MetadataTables} + + +class DnfileMethodBodyReader(CilMethodBodyReaderBase): + def __init__(self, pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow): + self.pe: dnfile.dnPE = pe + self.offset: int = self.pe.get_offset_from_rva(row.Rva) + + def read(self, n: int) -> bytes: + data: bytes = self.pe.get_data(self.pe.get_rva_from_offset(self.offset), n) + self.offset += n + return data + + def tell(self) -> int: + return self.offset + + def seek(self, offset: int) -> int: + self.offset = offset + return self.offset + + +def calculate_dotnet_token_value(table: int, rid: int) -> int: + return ((table & 0xFF) << Token.TABLE_SHIFT) | (rid & Token.RID_MASK) + + +def resolve_dotnet_token(pe: dnfile.dnPE, token: Token) -> Any: + """map generic token to string or table row""" + if isinstance(token, StringToken): + user_string: Optional[str] = read_dotnet_user_string(pe, token) + if user_string is None: + return InvalidToken(token.value) + return user_string + + table_name: str = DOTNET_META_TABLES_BY_INDEX.get(token.table, "") + if not table_name: + # table_index is not valid + return InvalidToken(token.value) + + table: Any = getattr(pe.net.mdtables, table_name, None) + if table is None: + # table index is valid but table is not present + return InvalidToken(token.value) + + try: + return table.rows[token.rid - 1] + except IndexError: + # table index is valid but row index is not valid + return InvalidToken(token.value) + + +def read_dotnet_method_body(pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow) -> Optional[CilMethodBody]: + """read dotnet method body""" + try: + return CilMethodBody(DnfileMethodBodyReader(pe, row)) + except MethodBodyFormatError as e: + logger.warn("failed to parse managed method body @ 0x%08x (%s)" % (row.Rva, e)) + return None + + +def read_dotnet_user_string(pe: dnfile.dnPE, token: StringToken) -> Optional[str]: + """read user string from #US stream""" + try: + user_string: Optional[dnfile.stream.UserString] = pe.net.user_strings.get_us(token.rid) + except UnicodeDecodeError as e: + logger.warn("failed to decode #US stream index 0x%08x (%s)" % (token.rid, e)) + return None + if user_string is None: + return None + return user_string.value + + +def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: + """get managed imports from MemberRef table + + see https://www.ntcore.com/files/dotnetformat.htm + + 10 - MemberRef Table + Each row represents an imported method + Class (index into the TypeRef, ModuleRef, MethodDef, TypeSpec or TypeDef tables) + Name (index into String heap) + 01 - TypeRef Table + Each row represents an imported class, its namespace and the assembly which contains it + TypeName (index into String heap) + TypeNamespace (index into String heap) + """ + if not hasattr(pe.net.mdtables, "MemberRef"): + return + + for (rid, row) in enumerate(pe.net.mdtables.MemberRef): + if not isinstance(row.Class.row, (dnfile.mdtable.TypeRefRow,)): + continue + + token: int = calculate_dotnet_token_value(dnfile.enums.MetadataTables.MemberRef.value, rid + 1) + # like System.IO.File::OpenRead + imp: str = f"{row.Class.row.TypeNamespace}.{row.Class.row.TypeName}::{row.Name}" + + yield token, imp + + +def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: + """get unmanaged imports from ImplMap table + + see https://www.ntcore.com/files/dotnetformat.htm + + 28 - ImplMap Table + ImplMap table holds information about unmanaged methods that can be reached from managed code, using PInvoke dispatch + MemberForwarded (index into the Field or MethodDef table; more precisely, a MemberForwarded coded index) + ImportName (index into the String heap) + ImportScope (index into the ModuleRef table) + """ + if not hasattr(pe.net.mdtables, "ImplMap"): + return + + for row in pe.net.mdtables.ImplMap: + dll: str = row.ImportScope.row.Name + symbol: str = row.ImportName + + # ECMA says "Each row of the ImplMap table associates a row in the MethodDef table (MemberForwarded) with the + # name of a routine (ImportName) in some unmanaged DLL (ImportScope)"; so we calculate and map the MemberForwarded + # MethodDef table token to help us later record native import method calls made from CIL + token: int = calculate_dotnet_token_value(row.MemberForwarded.table.number, row.MemberForwarded.row_index) + + # like Kernel32.dll + if dll and "." in dll: + dll = dll.split(".")[0] + + # like kernel32.CreateFileA + imp: str = f"{dll}.{symbol}" + + yield token, imp + + +def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[CilMethodBody]: + """get managed methods from MethodDef table""" + if not hasattr(pe.net.mdtables, "MethodDef"): + return + + for row in pe.net.mdtables.MethodDef: + if not row.ImplFlags.miIL or any((row.Flags.mdAbstract, row.Flags.mdPinvokeImpl)): + # skip methods that do not have a method body + continue + + body: Optional[CilMethodBody] = read_dotnet_method_body(pe, row) + if body is None: + continue + + yield body diff --git a/capa/features/extractors/dnfile/insn.py b/capa/features/extractors/dnfile/insn.py new file mode 100644 index 00000000..262b9779 --- /dev/null +++ b/capa/features/extractors/dnfile/insn.py @@ -0,0 +1,96 @@ +# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +from __future__ import annotations + +from typing import TYPE_CHECKING, Dict, Tuple, Iterator, Optional +from itertools import chain + +if TYPE_CHECKING: + from dncil.cil.instruction import Instruction + from dncil.cil.body import CilMethodBody + from capa.features.common import Feature + +from dncil.clr.token import StringToken +from dncil.cil.opcode import OpCodes + +import capa.features.extractors.helpers +from capa.features.insn import API, Number +from capa.features.common import String +from capa.features.extractors.dnfile.helpers import ( + read_dotnet_user_string, + get_dotnet_managed_imports, + get_dotnet_unmanaged_imports, +) + + +def get_imports(ctx: Dict) -> Dict: + if "imports_cache" not in ctx: + ctx["imports_cache"] = { + token: imp + for (token, imp) in chain(get_dotnet_managed_imports(ctx["pe"]), get_dotnet_unmanaged_imports(ctx["pe"])) + } + return ctx["imports_cache"] + + +def extract_insn_api_features(f: CilMethodBody, bb: CilMethodBody, insn: Instruction) -> Iterator[Tuple[API, int]]: + """parse instruction API features""" + if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli): + return + + name: str = get_imports(f.ctx).get(insn.operand.value, "") + if not name: + return + + if "::" in name: + # like System.IO.File::OpenRead + yield API(name), insn.offset + else: + # like kernel32.CreateFileA + dll, _, symbol = name.rpartition(".") + for name_variant in capa.features.extractors.helpers.generate_symbols(dll, symbol): + yield API(name_variant), insn.offset + + +def extract_insn_number_features( + f: CilMethodBody, bb: CilMethodBody, insn: Instruction +) -> Iterator[Tuple[Number, int]]: + """parse instruction number features""" + if insn.is_ldc(): + yield Number(insn.get_ldc()), insn.offset + + +def extract_insn_string_features( + f: CilMethodBody, bb: CilMethodBody, insn: Instruction +) -> Iterator[Tuple[String, int]]: + """parse instruction string features""" + if not insn.is_ldstr(): + return + + if not isinstance(insn.operand, StringToken): + return + + user_string: Optional[str] = read_dotnet_user_string(f.ctx["pe"], insn.operand) + if user_string is None: + return + + yield String(user_string), insn.offset + + +def extract_features(f: CilMethodBody, bb: CilMethodBody, insn: Instruction) -> Iterator[Tuple[Feature, int]]: + """extract instruction features""" + for inst_handler in INSTRUCTION_HANDLERS: + for (feature, offset) in inst_handler(f, bb, insn): + yield feature, offset + + +INSTRUCTION_HANDLERS = ( + extract_insn_api_features, + extract_insn_number_features, + extract_insn_string_features, +) diff --git a/capa/features/extractors/dotnetfile.py b/capa/features/extractors/dotnetfile.py new file mode 100644 index 00000000..a9a2c600 --- /dev/null +++ b/capa/features/extractors/dotnetfile.py @@ -0,0 +1,129 @@ +import logging +from typing import Tuple, Iterator +from itertools import chain + +import dnfile +import pefile + +import capa.features.extractors.helpers +from capa.features.file import Import +from capa.features.common import OS, OS_ANY, ARCH_ANY, ARCH_I386, ARCH_AMD64, FORMAT_DOTNET, Arch, Format, Feature +from capa.features.extractors.base_extractor import FeatureExtractor +from capa.features.extractors.dnfile.helpers import get_dotnet_managed_imports, get_dotnet_unmanaged_imports + +logger = logging.getLogger(__name__) + + +def extract_file_format(**kwargs) -> Iterator[Tuple[Format, int]]: + yield Format(FORMAT_DOTNET), 0x0 + + +def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Import, int]]: + for (token, imp) in chain(get_dotnet_managed_imports(pe), get_dotnet_unmanaged_imports(pe)): + if "::" in imp: + # like System.IO.File::OpenRead + yield Import(imp), token + else: + # like kernel32.CreateFileA + dll, _, symbol = imp.rpartition(".") + for symbol_variant in capa.features.extractors.helpers.generate_symbols(dll, symbol): + yield Import(symbol_variant), token + + +def extract_file_os(**kwargs) -> Iterator[Tuple[OS, int]]: + yield OS(OS_ANY), 0x0 + + +def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Arch, int]]: + # to distinguish in more detail, see https://stackoverflow.com/a/23614024/10548020 + # .NET 4.5 added option: any CPU, 32-bit preferred + if pe.net.Flags.CLR_32BITREQUIRED and pe.PE_TYPE == pefile.OPTIONAL_HEADER_MAGIC_PE: + yield Arch(ARCH_I386), 0x0 + elif not pe.net.Flags.CLR_32BITREQUIRED and pe.PE_TYPE == pefile.OPTIONAL_HEADER_MAGIC_PE_PLUS: + yield Arch(ARCH_AMD64), 0x0 + else: + yield Arch(ARCH_ANY), 0x0 + + +def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]: + for file_handler in FILE_HANDLERS: + for feature, va in file_handler(pe=pe): # type: ignore + yield feature, va + + +FILE_HANDLERS = ( + extract_file_import_names, + # TODO extract_file_strings, + # TODO extract_file_function_names, + extract_file_format, +) + + +def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]: + for handler in GLOBAL_HANDLERS: + for feature, va in handler(pe=pe): # type: ignore + yield feature, va + + +GLOBAL_HANDLERS = ( + extract_file_os, + extract_file_arch, +) + + +class DotnetFileFeatureExtractor(FeatureExtractor): + def __init__(self, path: str): + super(DotnetFileFeatureExtractor, self).__init__() + self.path: str = path + self.pe: dnfile.dnPE = dnfile.dnPE(path) + + def get_base_address(self) -> int: + return 0x0 + + def get_entry_point(self) -> int: + # self.pe.net.Flags.CLT_NATIVE_ENTRYPOINT + # True: native EP: Token + # False: managed EP: RVA + return self.pe.net.struct.EntryPointTokenOrRva + + def extract_global_features(self): + yield from extract_global_features(self.pe) + + def extract_file_features(self): + yield from extract_file_features(self.pe) + + def is_dotnet_file(self) -> bool: + return bool(self.pe.net) + + def is_mixed_mode(self) -> bool: + return not bool(self.pe.net.Flags.CLR_ILONLY) + + def get_runtime_version(self) -> Tuple[int, int]: + return self.pe.net.struct.MajorRuntimeVersion, self.pe.net.struct.MinorRuntimeVersion + + def get_meta_version_string(self) -> str: + return self.pe.net.metadata.struct.Version.rstrip(b"\x00").decode("utf-8") + + def get_functions(self): + raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features") + + def extract_function_features(self, f): + raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features") + + def get_basic_blocks(self, f): + raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features") + + def extract_basic_block_features(self, f, bb): + raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features") + + def get_instructions(self, f, bb): + raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features") + + def extract_insn_features(self, f, bb, insn): + raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features") + + def is_library_function(self, va): + raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features") + + def get_function_name(self, va): + raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features") diff --git a/capa/features/extractors/helpers.py b/capa/features/extractors/helpers.py index 8593a5bb..8fe920c5 100644 --- a/capa/features/extractors/helpers.py +++ b/capa/features/extractors/helpers.py @@ -51,6 +51,9 @@ def generate_symbols(dll: str, symbol: str) -> Iterator[str]: - CreateFileA - CreateFile """ + # normalize dll name + dll = dll.lower() + # kernel32.CreateFileA yield "%s.%s" % (dll, symbol) diff --git a/capa/features/freeze.py b/capa/features/freeze.py index bfa92460..ff465f77 100644 --- a/capa/features/freeze.py +++ b/capa/features/freeze.py @@ -53,12 +53,12 @@ import zlib import logging from typing import Dict, Type +import capa.helpers import capa.features.file import capa.features.insn import capa.features.common import capa.features.basicblock import capa.features.extractors.base_extractor -from capa.helpers import hex from capa.features.common import Feature logger = logging.getLogger(__name__) @@ -87,6 +87,7 @@ def dumps(extractor): returns: str: the serialized features. """ + hex = capa.helpers.hex ret = { "version": 1, "base address": extractor.get_base_address(), diff --git a/capa/features/insn.py b/capa/features/insn.py index 00a549f1..bb8924ee 100644 --- a/capa/features/insn.py +++ b/capa/features/insn.py @@ -13,11 +13,6 @@ from capa.features.common import Feature class API(Feature): def __init__(self, name: str, description=None): - # Downcase library name if given - if "." in name: - modname, _, impname = name.rpartition(".") - name = modname.lower() + "." + impname - super(API, self).__init__(name, description=description) diff --git a/capa/helpers.py b/capa/helpers.py index e36ca3ac..5c0bcfd6 100644 --- a/capa/helpers.py +++ b/capa/helpers.py @@ -5,10 +5,20 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. - import os +import logging from typing import NoReturn +from capa.exceptions import UnsupportedFormatError +from capa.features.common import FORMAT_SC32, FORMAT_SC64, FORMAT_UNKNOWN +from capa.features.extractors.common import extract_format + +EXTENSIONS_SHELLCODE_32 = ("sc32", "raw32") +EXTENSIONS_SHELLCODE_64 = ("sc64", "raw64") + + +logger = logging.getLogger("capa") + _hex = hex @@ -35,3 +45,72 @@ def is_runtime_ida(): def assert_never(value: NoReturn) -> NoReturn: assert False, f"Unhandled value: {value} ({type(value).__name__})" + + +def get_format_from_extension(sample: str) -> str: + if sample.endswith(EXTENSIONS_SHELLCODE_32): + return FORMAT_SC32 + elif sample.endswith(EXTENSIONS_SHELLCODE_64): + return FORMAT_SC64 + return FORMAT_UNKNOWN + + +def get_auto_format(path: str) -> str: + format_ = get_format(path) + if format_ == FORMAT_UNKNOWN: + format_ = get_format_from_extension(path) + if format_ == FORMAT_UNKNOWN: + raise UnsupportedFormatError() + return format_ + + +def get_format(sample: str) -> str: + with open(sample, "rb") as f: + buf = f.read() + + for feature, _ in extract_format(buf): + assert isinstance(feature.value, str) + return feature.value + + return FORMAT_UNKNOWN + + +def log_unsupported_format_error(): + logger.error("-" * 80) + logger.error(" Input file does not appear to be a PE or ELF file.") + logger.error(" ") + logger.error( + " capa currently only supports analyzing PE and ELF files (or shellcode, when using --format sc32|sc64)." + ) + logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.") + logger.error("-" * 80) + + +def log_unsupported_os_error(): + logger.error("-" * 80) + logger.error(" Input file does not appear to target a supported OS.") + logger.error(" ") + logger.error( + " capa currently only supports analyzing executables for some operating systems (including Windows and Linux)." + ) + logger.error("-" * 80) + + +def log_unsupported_arch_error(): + logger.error("-" * 80) + logger.error(" Input file does not appear to target a supported architecture.") + logger.error(" ") + logger.error(" capa currently only supports analyzing x86 (32- and 64-bit).") + logger.error("-" * 80) + + +def log_unsupported_runtime_error(): + logger.error("-" * 80) + logger.error(" Unsupported runtime or Python interpreter.") + logger.error(" ") + logger.error(" capa supports running under Python 3.7 and higher.") + logger.error(" ") + logger.error( + " If you're seeing this message on the command line, please ensure you're running a supported Python version." + ) + logger.error("-" * 80) diff --git a/capa/main.py b/capa/main.py index 720d1a3d..016f3fd0 100644 --- a/capa/main.py +++ b/capa/main.py @@ -42,17 +42,34 @@ import capa.features.extractors import capa.features.extractors.common import capa.features.extractors.pefile import capa.features.extractors.elffile +import capa.features.extractors.dotnetfile from capa.rules import Rule, Scope, RuleSet from capa.engine import FeatureSet, MatchResults -from capa.helpers import get_file_taste +from capa.helpers import ( + get_format, + get_file_taste, + get_auto_format, + log_unsupported_os_error, + log_unsupported_arch_error, + log_unsupported_format_error, +) +from capa.exceptions import UnsupportedOSError, UnsupportedArchError, UnsupportedFormatError, UnsupportedRuntimeError +from capa.features.common import ( + FORMAT_PE, + FORMAT_ELF, + FORMAT_AUTO, + FORMAT_SC32, + FORMAT_SC64, + FORMAT_DOTNET, + FORMAT_FREEZE, +) from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor RULES_PATH_DEFAULT_STRING = "(embedded rules)" SIGNATURES_PATH_DEFAULT_STRING = "(embedded signatures)" BACKEND_VIV = "vivisect" BACKEND_SMDA = "smda" -EXTENSIONS_SHELLCODE_32 = ("sc32", "raw32") -EXTENSIONS_SHELLCODE_64 = ("sc64", "raw64") +BACKEND_DOTNET = "dotnet" E_MISSING_RULES = -10 E_MISSING_FILE = -11 @@ -287,6 +304,7 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_pro return matches, meta +# TODO move all to helpers? def has_rule_with_namespace(rules, capabilities, rule_cat): for rule_name in capabilities.keys(): if rules.rules[rule_name].meta.get("namespace", "").startswith(rule_cat): @@ -334,17 +352,6 @@ def is_supported_format(sample: str) -> bool: return len(list(capa.features.extractors.common.extract_format(taste))) == 1 -def get_format(sample: str) -> str: - with open(sample, "rb") as f: - buf = f.read() - - for feature, _ in capa.features.extractors.common.extract_format(buf): - assert isinstance(feature.value, str) - return feature.value - - return "unknown" - - def is_supported_arch(sample: str) -> bool: with open(sample, "rb") as f: buf = f.read() @@ -433,19 +440,7 @@ def get_default_signatures() -> List[str]: return ret -class UnsupportedFormatError(ValueError): - pass - - -class UnsupportedArchError(ValueError): - pass - - -class UnsupportedOSError(ValueError): - pass - - -def get_workspace(path, format, sigpaths): +def get_workspace(path, format_, sigpaths): """ load the program at the given path into a vivisect workspace using the given format. also apply the given FLIRT signatures. @@ -465,21 +460,22 @@ def get_workspace(path, format, sigpaths): import viv_utils logger.debug("generating vivisect workspace for: %s", path) - if format == "auto": + # TODO should not be auto at this point, anymore + if format_ == FORMAT_AUTO: if not is_supported_format(path): raise UnsupportedFormatError() # don't analyze, so that we can add our Flirt function analyzer first. vw = viv_utils.getWorkspace(path, analyze=False, should_save=False) - elif format in {"pe", "elf"}: + elif format_ in {FORMAT_PE, FORMAT_ELF}: vw = viv_utils.getWorkspace(path, analyze=False, should_save=False) - elif format == "sc32": + elif format_ == FORMAT_SC32: # these are not analyzed nor saved. vw = viv_utils.getShellcodeWorkspaceFromFile(path, arch="i386", analyze=False) - elif format == "sc64": + elif format_ == FORMAT_SC64: vw = viv_utils.getShellcodeWorkspaceFromFile(path, arch="amd64", analyze=False) else: - raise ValueError("unexpected format: " + format) + raise ValueError("unexpected format: " + format_) viv_utils.flirt.register_flirt_signature_analyzers(vw, sigpaths) @@ -489,12 +485,9 @@ def get_workspace(path, format, sigpaths): return vw -class UnsupportedRuntimeError(RuntimeError): - pass - - +# TODO get_extractors -> List[FeatureExtractor]? def get_extractor( - path: str, format: str, backend: str, sigpaths: List[str], should_save_workspace=False, disable_progress=False + path: str, format_: str, backend: str, sigpaths: List[str], should_save_workspace=False, disable_progress=False ) -> FeatureExtractor: """ raises: @@ -502,7 +495,7 @@ def get_extractor( UnsupportedArchError UnsupportedOSError """ - if format not in ("sc32", "sc64"): + if format_ not in (FORMAT_SC32, FORMAT_SC64): if not is_supported_format(path): raise UnsupportedFormatError() @@ -512,6 +505,11 @@ def get_extractor( if not is_supported_os(path): raise UnsupportedOSError() + if format_ == FORMAT_DOTNET: + import capa.features.extractors.dnfile.extractor + + return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path) + if backend == "smda": from smda.SmdaConfig import SmdaConfig from smda.Disassembler import Disassembler @@ -530,7 +528,7 @@ def get_extractor( import capa.features.extractors.viv.extractor with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress): - vw = get_workspace(path, format, sigpaths) + vw = get_workspace(path, format_, sigpaths) if should_save_workspace: logger.debug("saving workspace") @@ -545,6 +543,22 @@ def get_extractor( return capa.features.extractors.viv.extractor.VivisectFeatureExtractor(vw, path) +def get_file_extractors(sample: str, format_: str) -> List[FeatureExtractor]: + file_extractors: List[FeatureExtractor] = list() + + if format_ == capa.features.extractors.common.FORMAT_PE: + file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(sample)) + + dotnetfile_extractor = capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor(sample) + if dotnetfile_extractor.is_dotnet_file(): + file_extractors.append(dotnetfile_extractor) + + elif format_ == capa.features.extractors.common.FORMAT_ELF: + file_extractors.append(capa.features.extractors.elffile.ElfFeatureExtractor(sample)) + + return file_extractors + + def is_nursery_rule_path(path: str) -> bool: """ The nursery is a spot for rules that have not yet been fully polished. @@ -653,7 +667,7 @@ def collect_metadata(argv, sample_path, rules_path, extractor): if rules_path != [RULES_PATH_DEFAULT_STRING]: rules_path = [os.path.abspath(os.path.normpath(r)) for r in rules_path] - format = get_format(sample_path) + format_ = get_format(sample_path) arch = get_arch(sample_path) os_ = get_os(sample_path) @@ -668,7 +682,7 @@ def collect_metadata(argv, sample_path, rules_path, extractor): "path": os.path.normpath(sample_path), }, "analysis": { - "format": format, + "format": format_, "arch": arch, "os": os_, "extractor": extractor.__class__.__name__, @@ -783,19 +797,20 @@ def install_common_args(parser, wanted=None): if "format" in wanted: formats = [ - ("auto", "(default) detect file type automatically"), - ("pe", "Windows PE file"), - ("elf", "Executable and Linkable Format"), - ("sc32", "32-bit shellcode"), - ("sc64", "64-bit shellcode"), - ("freeze", "features previously frozen by capa"), + (FORMAT_AUTO, "(default) detect file type automatically"), + (FORMAT_PE, "Windows PE file"), + (FORMAT_DOTNET, ".NET PE file"), + (FORMAT_ELF, "Executable and Linkable Format"), + (FORMAT_SC32, "32-bit shellcode"), + (FORMAT_SC64, "64-bit shellcode"), + (FORMAT_FREEZE, "features previously frozen by capa"), ] format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats]) parser.add_argument( "-f", "--format", choices=[f[0] for f in formats], - default="auto", + default=FORMAT_AUTO, help="select sample format, %s" % format_help, ) @@ -974,13 +989,21 @@ def main(argv=None): return ret try: - taste = get_file_taste(args.sample) + _ = get_file_taste(args.sample) except IOError as e: # per our research there's not a programmatic way to render the IOError with non-ASCII filename unless we # handle the IOError separately and reach into the args logger.error("%s", e.args[0]) return E_MISSING_FILE + format_ = args.format + if format_ == FORMAT_AUTO: + try: + format_ = get_auto_format(args.sample) + except UnsupportedFormatError: + log_unsupported_format_error() + return E_INVALID_FILE_TYPE + try: rules = get_rules(args.rules, disable_progress=args.quiet) rules = capa.rules.RuleSet(rules) @@ -1002,26 +1025,23 @@ def main(argv=None): logger.error("%s", str(e)) return E_INVALID_RULE - file_extractor = None - if args.format == "pe" or (args.format == "auto" and taste.startswith(b"MZ")): - # these pefile and elffile file feature extractors are pretty light weight: they don't do any code analysis. - # so we can fairly quickly determine if the given file has "pure" file-scope rules - # that indicate a limitation (like "file is packed based on section names") - # and avoid doing a full code analysis on difficult/impossible binaries. - try: - file_extractor = capa.features.extractors.pefile.PefileFeatureExtractor(args.sample) - except PEFormatError as e: - logger.error("Input file '%s' is not a valid PE file: %s", args.sample, str(e)) - return E_CORRUPT_FILE + # file feature extractors are pretty lightweight: they don't do any code analysis. + # so we can fairly quickly determine if the given file has "pure" file-scope rules + # that indicate a limitation (like "file is packed based on section names") + # and avoid doing a full code analysis on difficult/impossible binaries. + # + # this pass can inspect multiple file extractors, e.g., dotnet and pe to identify + # various limitations + try: + file_extractors = get_file_extractors(args.sample, format_) + except PEFormatError as e: + logger.error("Input file '%s' is not a valid PE file: %s", args.sample, str(e)) + return E_CORRUPT_FILE + except (ELFError, OverflowError) as e: + logger.error("Input file '%s' is not a valid ELF file: %s", args.sample, str(e)) + return E_CORRUPT_FILE - elif args.format == "elf" or (args.format == "auto" and taste.startswith(b"\x7fELF")): - try: - file_extractor = capa.features.extractors.elffile.ElfFeatureExtractor(args.sample) - except (ELFError, OverflowError) as e: - logger.error("Input file '%s' is not a valid ELF file: %s", args.sample, str(e)) - return E_CORRUPT_FILE - - if file_extractor: + for file_extractor in file_extractors: try: pure_file_capabilities, _ = find_file_capabilities(rules, file_extractor, {}) except PEFormatError as e: @@ -1040,58 +1060,37 @@ def main(argv=None): logger.debug("file limitation short circuit, won't analyze fully.") return E_FILE_LIMITATION - try: - if args.format == "pe" or (args.format == "auto" and taste.startswith(b"MZ")): - sig_paths = get_signatures(args.signatures) - else: - sig_paths = [] - logger.debug("skipping library code matching: only have PE signatures") - except (IOError) as e: - logger.error("%s", str(e)) - return E_INVALID_SIG + if isinstance(file_extractor, capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor): + format_ = FORMAT_DOTNET - if (args.format == "freeze") or (args.format == "auto" and capa.features.freeze.is_freeze(taste)): - format = "freeze" + if format_ == FORMAT_FREEZE: with open(args.sample, "rb") as f: extractor = capa.features.freeze.load(f.read()) else: - format = args.format - if format == "auto" and args.sample.endswith(EXTENSIONS_SHELLCODE_32): - format = "sc32" - elif format == "auto" and args.sample.endswith(EXTENSIONS_SHELLCODE_64): - format = "sc64" + try: + if format_ == FORMAT_PE: + sig_paths = get_signatures(args.signatures) + else: + sig_paths = [] + logger.debug("skipping library code matching: only have native PE signatures") + except IOError as e: + logger.error("%s", str(e)) + return E_INVALID_SIG should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None) try: extractor = get_extractor( - args.sample, format, args.backend, sig_paths, should_save_workspace, disable_progress=args.quiet + args.sample, format_, args.backend, sig_paths, should_save_workspace, disable_progress=args.quiet ) except UnsupportedFormatError: - logger.error("-" * 80) - logger.error(" Input file does not appear to be a PE or ELF file.") - logger.error(" ") - logger.error( - " capa currently only supports analyzing PE and ELF files (or shellcode, when using --format sc32|sc64)." - ) - logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.") - logger.error("-" * 80) + log_unsupported_format_error() return E_INVALID_FILE_TYPE except UnsupportedArchError: - logger.error("-" * 80) - logger.error(" Input file does not appear to target a supported architecture.") - logger.error(" ") - logger.error(" capa currently only supports analyzing x86 (32- and 64-bit).") - logger.error("-" * 80) + log_unsupported_arch_error() return E_INVALID_FILE_ARCH except UnsupportedOSError: - logger.error("-" * 80) - logger.error(" Input file does not appear to target a supported OS.") - logger.error(" ") - logger.error( - " capa currently only supports analyzing executables for some operating systems (including Windows and Linux)." - ) - logger.error("-" * 80) + log_unsupported_os_error() return E_INVALID_FILE_OS meta = collect_metadata(argv, args.sample, args.rules, extractor) diff --git a/capa/render/json.py b/capa/render/json.py index a70f9122..9f595d4a 100644 --- a/capa/render/json.py +++ b/capa/render/json.py @@ -7,9 +7,9 @@ # See the License for the specific language governing permissions and limitations under the License. import json -import capa.render.result_document from capa.rules import RuleSet from capa.engine import MatchResults +from capa.render.result_document import convert_capabilities_to_result_document class CapaJsonObjectEncoder(json.JSONEncoder): @@ -27,7 +27,7 @@ class CapaJsonObjectEncoder(json.JSONEncoder): def render(meta, rules: RuleSet, capabilities: MatchResults) -> str: return json.dumps( - capa.render.result_document.convert_capabilities_to_result_document(meta, rules, capabilities), + convert_capabilities_to_result_document(meta, rules, capabilities), cls=CapaJsonObjectEncoder, sort_keys=True, ) diff --git a/capa/render/result_document.py b/capa/render/result_document.py index 1a0bde69..33e083fa 100644 --- a/capa/render/result_document.py +++ b/capa/render/result_document.py @@ -7,7 +7,6 @@ # See the License for the specific language governing permissions and limitations under the License. import copy -import capa.rules import capa.engine import capa.render.utils import capa.features.common diff --git a/scripts/lint.py b/scripts/lint.py index 6c337cff..553e9f2e 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -41,6 +41,7 @@ import tqdm.contrib.logging import capa.main import capa.rules import capa.engine +import capa.helpers import capa.features.insn import capa.features.common from capa.rules import Rule, RuleSet @@ -286,16 +287,16 @@ def get_sample_capabilities(ctx: Context, path: Path) -> Set[str]: logger.debug("found cached results: %s: %d capabilities", nice_path, len(ctx.capabilities_by_sample[path])) return ctx.capabilities_by_sample[path] - if nice_path.endswith(capa.main.EXTENSIONS_SHELLCODE_32): - format = "sc32" - elif nice_path.endswith(capa.main.EXTENSIONS_SHELLCODE_64): - format = "sc64" + if nice_path.endswith(capa.helpers.EXTENSIONS_SHELLCODE_32): + format_ = "sc32" + elif nice_path.endswith(capa.helpers.EXTENSIONS_SHELLCODE_64): + format_ = "sc64" else: - format = "auto" + format_ = "auto" logger.debug("analyzing sample: %s", nice_path) extractor = capa.main.get_extractor( - nice_path, format, capa.main.BACKEND_VIV, DEFAULT_SIGNATURES, False, disable_progress=True + nice_path, format_, capa.main.BACKEND_VIV, DEFAULT_SIGNATURES, False, disable_progress=True ) capabilities, _ = capa.main.find_capabilities(ctx.rules, extractor, disable_progress=True) diff --git a/scripts/show-capabilities-by-function.py b/scripts/show-capabilities-by-function.py index 4f5761b6..b2af9446 100644 --- a/scripts/show-capabilities-by-function.py +++ b/scripts/show-capabilities-by-function.py @@ -59,7 +59,9 @@ import colorama import capa.main import capa.rules import capa.engine +import capa.helpers import capa.features +import capa.exceptions import capa.render.utils as rutils import capa.features.freeze import capa.render.result_document @@ -162,25 +164,11 @@ def main(argv=None): extractor = capa.main.get_extractor( args.sample, args.format, args.backend, sig_paths, should_save_workspace ) - except capa.main.UnsupportedFormatError: - logger.error("-" * 80) - logger.error(" Input file does not appear to be a PE file.") - logger.error(" ") - logger.error( - " capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64)." - ) - logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.") - logger.error("-" * 80) + except capa.exceptions.UnsupportedFormatError: + capa.helpers.log_unsupported_format_error() return -1 - except capa.main.UnsupportedRuntimeError: - logger.error("-" * 80) - logger.error(" Unsupported runtime or Python interpreter.") - logger.error(" ") - logger.error(" capa supports running under Python 2.7 using Vivisect for binary analysis.") - logger.error(" It can also run within IDA Pro, using either Python 2.7 or 3.5+.") - logger.error(" ") - logger.error(" If you're seeing this message on the command line, please ensure you're running Python 2.7.") - logger.error("-" * 80) + except capa.exceptions.UnsupportedRuntimeError: + capa.helpers.log_unsupported_runtime_error() return -1 meta = capa.main.collect_metadata(argv, args.sample, args.rules, extractor) diff --git a/scripts/show-features.py b/scripts/show-features.py index a4f7f3b2..134d1df6 100644 --- a/scripts/show-features.py +++ b/scripts/show-features.py @@ -75,8 +75,10 @@ import capa.rules import capa.engine import capa.helpers import capa.features +import capa.exceptions import capa.features.common import capa.features.freeze +from capa.helpers import log_unsupported_runtime_error logger = logging.getLogger("capa.show-features") @@ -113,27 +115,19 @@ def main(argv=None): extractor = capa.main.get_extractor( args.sample, args.format, args.backend, sig_paths, should_save_workspace ) - except capa.main.UnsupportedFormatError: - logger.error("-" * 80) - logger.error(" Input file does not appear to be a PE file.") - logger.error(" ") - logger.error( - " capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64)." - ) - logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.") - logger.error("-" * 80) + except capa.exceptions.UnsupportedFormatError: + capa.helpers.log_unsupported_format_error() return -1 - except capa.main.UnsupportedRuntimeError: - logger.error("-" * 80) - logger.error(" Unsupported runtime or Python interpreter.") - logger.error(" ") - logger.error(" capa supports running under Python 2.7 using Vivisect for binary analysis.") - logger.error(" It can also run within IDA Pro, using either Python 2.7 or 3.5+.") - logger.error(" ") - logger.error(" If you're seeing this message on the command line, please ensure you're running Python 2.7.") - logger.error("-" * 80) + except capa.exceptions.UnsupportedRuntimeError: + log_unsupported_runtime_error() return -1 + for feature, va in extractor.extract_global_features(): + if va: + print("global: 0x%08x: %s" % (va, feature)) + else: + print("global: 0x00000000: %s" % (feature)) + if not args.function: for feature, va in extractor.extract_file_features(): if va: diff --git a/setup.py b/setup.py index 92234b9f..9dea5043 100644 --- a/setup.py +++ b/setup.py @@ -26,6 +26,8 @@ requirements = [ "smda==1.7.1", "pefile==2021.9.3", "pyelftools==0.28", + "dnfile==0.10.0", + "dncil==1.0.0", ] # this sets __version__ diff --git a/tests/fixtures.py b/tests/fixtures.py index a23f0c57..7f5abc73 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -22,9 +22,23 @@ import capa.features.file import capa.features.insn import capa.features.common import capa.features.basicblock -from capa.features.common import OS, OS_LINUX, ARCH_I386, FORMAT_PE, ARCH_AMD64, FORMAT_ELF, OS_WINDOWS, Arch, Format +from capa.features.common import ( + OS, + OS_ANY, + OS_LINUX, + ARCH_I386, + FORMAT_PE, + ARCH_AMD64, + FORMAT_ELF, + OS_WINDOWS, + FORMAT_DOTNET, + Arch, + Format, +) CD = os.path.dirname(__file__) +DOTNET_DIR = os.path.join(CD, "data", "dotnet") +DNFILE_TESTFILES = os.path.join(DOTNET_DIR, "dnfile-testfiles") @contextlib.contextmanager @@ -122,6 +136,19 @@ def get_pefile_extractor(path): return capa.features.extractors.pefile.PefileFeatureExtractor(path) +def get_dotnetfile_extractor(path): + import capa.features.extractors.dotnetfile + + return capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor(path) + + +@lru_cache(maxsize=1) +def get_dnfile_extractor(path): + import capa.features.extractors.dnfile.extractor + + return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path) + + def extract_global_features(extractor): features = collections.defaultdict(set) for feature, va in extractor.extract_global_features(): @@ -220,6 +247,14 @@ def get_data_path_by_name(name): return os.path.join(CD, "data", "79abd17391adc6251ecdc58d13d76baf.dll_") elif name.startswith("946a9"): return os.path.join(CD, "data", "946a99f36a46d335dec080d9a4371940.dll_") + elif name.startswith("b9f5b"): + return os.path.join(CD, "data", "b9f5bd514485fb06da39beff051b9fdc.exe_") + elif name.startswith("mixed-mode-64"): + return os.path.join(DNFILE_TESTFILES, "mixed-mode", "ModuleCode", "bin", "ModuleCode_amd64.exe") + elif name.startswith("hello-world"): + return os.path.join(DNFILE_TESTFILES, "hello-world", "hello-world.exe") + elif name.startswith("_1c444"): + return os.path.join(CD, "data", "dotnet", "1c444ebeba24dcba8628b7dfe5fec7c6.exe_") else: raise ValueError("unexpected sample fixture: %s" % name) @@ -272,7 +307,9 @@ def get_sample_md5_by_name(name): elif name.startswith("79abd"): return "79abd17391adc6251ecdc58d13d76baf" elif name.startswith("946a9"): - return "946a99f36a46d335dec080d9a4371940.dll_" + return "946a99f36a46d335dec080d9a4371940" + elif name.startswith("b9f5b"): + return "b9f5bd514485fb06da39beff051b9fdc" else: raise ValueError("unexpected sample fixture: %s" % name) @@ -626,6 +663,39 @@ FEATURE_PRESENCE_TESTS = sorted( key=lambda t: (t[0], t[1]), ) +FEATURE_PRESENCE_TESTS_DOTNET = sorted( + [ + ("b9f5b", "file", Arch(ARCH_I386), True), + ("b9f5b", "file", Arch(ARCH_AMD64), False), + ("mixed-mode-64", "file", Arch(ARCH_AMD64), True), + ("mixed-mode-64", "file", Arch(ARCH_I386), False), + ("b9f5b", "file", OS(OS_ANY), True), + ("b9f5b", "file", Format(FORMAT_DOTNET), True), + ("hello-world", "function=0x250", capa.features.common.String("Hello World!"), True), + ("hello-world", "function=0x250, bb=0x250, insn=0x252", capa.features.common.String("Hello World!"), True), + ("hello-world", "function=0x250", capa.features.insn.API("System.Console::WriteLine"), True), + ("hello-world", "file", capa.features.file.Import("System.Console::WriteLine"), True), + ("_1c444", "file", capa.features.file.Import("gdi32.CreateCompatibleBitmap"), True), + ("_1c444", "file", capa.features.file.Import("CreateCompatibleBitmap"), True), + ("_1c444", "file", capa.features.file.Import("gdi32::CreateCompatibleBitmap"), False), + ("_1c444", "function=0x1F68", capa.features.insn.API("GetWindowDC"), True), + ("_1c444", "function=0x1F68", capa.features.insn.API("user32.GetWindowDC"), True), + ("_1c444", "function=0x1F68", capa.features.insn.Number(0xCC0020), True), + ("_1c444", "function=0x1F68", capa.features.insn.Number(0x0), True), + ("_1c444", "function=0x1F68", capa.features.insn.Number(0x1), False), + ( + "_1c444", + "function=0x1F68, bb=0x1F68, insn=0x1FF9", + capa.features.insn.API("System.Drawing.Image::FromHbitmap"), + True, + ), + ("_1c444", "function=0x1F68, bb=0x1F68, insn=0x1FF9", capa.features.insn.API("FromHbitmap"), False), + ], + # order tests by (file, item) + # so that our LRU cache is most effective. + key=lambda t: (t[0], t[1]), +) + FEATURE_PRESENCE_TESTS_IDA = [ # file/imports # IDA can recover more names of APIs imported by ordinal @@ -641,6 +711,9 @@ FEATURE_COUNT_TESTS = [ ] +FEATURE_COUNT_TESTS_DOTNET = [] # type: ignore + + def do_test_feature_presence(get_extractor, sample, scope, feature, expected): extractor = get_extractor(sample) features = scope(extractor) @@ -738,3 +811,23 @@ def al_khaser_x86_extractor(): @pytest.fixture def pingtaest_extractor(): return get_extractor(get_data_path_by_name("pingtaest")) + + +@pytest.fixture +def b9f5b_dotnetfile_extractor(): + return get_dotnetfile_extractor(get_data_path_by_name("b9f5b")) + + +@pytest.fixture +def mixed_mode_64_dotnetfile_extractor(): + return get_dotnetfile_extractor(get_data_path_by_name("mixed-mode-64")) + + +@pytest.fixture +def hello_world_dnfile_extractor(): + return get_dnfile_extractor(get_data_path_by_name("hello-world")) + + +@pytest.fixture +def _1c444_dnfile_extractor(): + return get_dnfile_extractor(get_data_path_by_name("1c444...")) diff --git a/tests/test_dnfile_features.py b/tests/test_dnfile_features.py new file mode 100644 index 00000000..76540468 --- /dev/null +++ b/tests/test_dnfile_features.py @@ -0,0 +1,30 @@ +# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +import pytest +import fixtures +from fixtures import * +from fixtures import parametrize + + +@parametrize( + "sample,scope,feature,expected", + fixtures.FEATURE_PRESENCE_TESTS_DOTNET, + indirect=["sample", "scope"], +) +def test_dnfile_features(sample, scope, feature, expected): + fixtures.do_test_feature_presence(fixtures.get_dnfile_extractor, sample, scope, feature, expected) + + +@parametrize( + "sample,scope,feature,expected", + fixtures.FEATURE_COUNT_TESTS_DOTNET, + indirect=["sample", "scope"], +) +def test_dnfile_feature_counts(sample, scope, feature, expected): + fixtures.do_test_feature_count(fixtures.get_dnfile_extractor, sample, scope, feature, expected) diff --git a/tests/test_dotnetfile_features.py b/tests/test_dotnetfile_features.py new file mode 100644 index 00000000..28d63cdb --- /dev/null +++ b/tests/test_dotnetfile_features.py @@ -0,0 +1,43 @@ +# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +import pytest +import fixtures +from fixtures import * +from fixtures import parametrize + + +@parametrize( + "sample,scope,feature,expected", + fixtures.FEATURE_PRESENCE_TESTS_DOTNET, + indirect=["sample", "scope"], +) +def test_dotnetfile_features(sample, scope, feature, expected): + if scope.__name__ != "file": + pytest.xfail("dotnetfile only extracts file scope features") + + if isinstance(feature, capa.features.file.FunctionName): + pytest.xfail("dotnetfile doesn't extract function names") + + fixtures.do_test_feature_presence(fixtures.get_dotnetfile_extractor, sample, scope, feature, expected) + + +@parametrize( + "extractor,function,expected", + [ + ("b9f5b_dotnetfile_extractor", "is_dotnet_file", True), + ("b9f5b_dotnetfile_extractor", "is_mixed_mode", False), + ("mixed_mode_64_dotnetfile_extractor", "is_mixed_mode", True), + ("b9f5b_dotnetfile_extractor", "get_entry_point", 0x6000007), + ("b9f5b_dotnetfile_extractor", "get_runtime_version", (2, 5)), + ("b9f5b_dotnetfile_extractor", "get_meta_version_string", "v2.0.50727"), + ], +) +def test_dotnetfile_extractor(request, extractor, function, expected): + extractor_function = getattr(request.getfixturevalue(extractor), function) + assert extractor_function() == expected From 45c22a24a62ea349a250d923c9bb89f982bd375b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Apr 2022 14:12:38 +0000 Subject: [PATCH 03/30] build(deps-dev): bump types-requests from 2.27.16 to 2.27.19 Bumps [types-requests](https://github.com/python/typeshed) from 2.27.16 to 2.27.19. - [Release notes](https://github.com/python/typeshed/releases) - [Commits](https://github.com/python/typeshed/commits) --- updated-dependencies: - dependency-name: types-requests dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9dea5043..147899e9 100644 --- a/setup.py +++ b/setup.py @@ -86,7 +86,7 @@ setuptools.setup( "types-tabulate==0.8.6", "types-termcolor==1.1.3", "types-psutil==5.8.20", - "types_requests==2.27.16", + "types_requests==2.27.19", ], }, zip_safe=False, From fd1785fe653c10378b8c2a139a4d9f8dcee20344 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Apr 2022 14:12:43 +0000 Subject: [PATCH 04/30] build(deps-dev): bump types-pyyaml from 6.0.5 to 6.0.6 Bumps [types-pyyaml](https://github.com/python/typeshed) from 6.0.5 to 6.0.6. - [Release notes](https://github.com/python/typeshed/releases) - [Commits](https://github.com/python/typeshed/commits) --- updated-dependencies: - dependency-name: types-pyyaml dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9dea5043..48768969 100644 --- a/setup.py +++ b/setup.py @@ -82,7 +82,7 @@ setuptools.setup( # type stubs for mypy "types-backports==0.1.3", "types-colorama==0.4.10", - "types-PyYAML==6.0.5", + "types-PyYAML==6.0.6", "types-tabulate==0.8.6", "types-termcolor==1.1.3", "types-psutil==5.8.20", From 65cf8509f91959b13d266163cca228cce938f83b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 20 Apr 2022 15:48:15 +0000 Subject: [PATCH 05/30] build(deps-dev): bump types-colorama from 0.4.10 to 0.4.12 Bumps [types-colorama](https://github.com/python/typeshed) from 0.4.10 to 0.4.12. - [Release notes](https://github.com/python/typeshed/releases) - [Commits](https://github.com/python/typeshed/commits) --- updated-dependencies: - dependency-name: types-colorama dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 123c6d14..c7833c07 100644 --- a/setup.py +++ b/setup.py @@ -81,7 +81,7 @@ setuptools.setup( "requests==2.27.1", # type stubs for mypy "types-backports==0.1.3", - "types-colorama==0.4.10", + "types-colorama==0.4.12", "types-PyYAML==6.0.6", "types-tabulate==0.8.6", "types-termcolor==1.1.3", From 2226bf0faa0e2acb1cac3b429dd8663c143a4ed9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 20 Apr 2022 15:48:17 +0000 Subject: [PATCH 06/30] build(deps-dev): bump types-psutil from 5.8.20 to 5.8.22 Bumps [types-psutil](https://github.com/python/typeshed) from 5.8.20 to 5.8.22. - [Release notes](https://github.com/python/typeshed/releases) - [Commits](https://github.com/python/typeshed/commits) --- updated-dependencies: - dependency-name: types-psutil dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 123c6d14..e1cafb1c 100644 --- a/setup.py +++ b/setup.py @@ -85,7 +85,7 @@ setuptools.setup( "types-PyYAML==6.0.6", "types-tabulate==0.8.6", "types-termcolor==1.1.3", - "types-psutil==5.8.20", + "types-psutil==5.8.22", "types_requests==2.27.19", ], }, From 308a47a7846a425d2804a22bb3f71861e4112379 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 20 Apr 2022 15:48:18 +0000 Subject: [PATCH 07/30] build(deps-dev): bump types-tabulate from 0.8.6 to 0.8.7 Bumps [types-tabulate](https://github.com/python/typeshed) from 0.8.6 to 0.8.7. - [Release notes](https://github.com/python/typeshed/releases) - [Commits](https://github.com/python/typeshed/commits) --- updated-dependencies: - dependency-name: types-tabulate dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 123c6d14..0d3d6357 100644 --- a/setup.py +++ b/setup.py @@ -83,7 +83,7 @@ setuptools.setup( "types-backports==0.1.3", "types-colorama==0.4.10", "types-PyYAML==6.0.6", - "types-tabulate==0.8.6", + "types-tabulate==0.8.7", "types-termcolor==1.1.3", "types-psutil==5.8.20", "types_requests==2.27.19", From 054bcc9cb8d63a8d8e5576adb7a36cab39c48128 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Apr 2022 14:13:26 +0000 Subject: [PATCH 08/30] build(deps-dev): bump pytest from 7.1.1 to 7.1.2 Bumps [pytest](https://github.com/pytest-dev/pytest) from 7.1.1 to 7.1.2. - [Release notes](https://github.com/pytest-dev/pytest/releases) - [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest/compare/7.1.1...7.1.2) --- updated-dependencies: - dependency-name: pytest dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c4be8605..f62fef88 100644 --- a/setup.py +++ b/setup.py @@ -68,7 +68,7 @@ setuptools.setup( install_requires=requirements, extras_require={ "dev": [ - "pytest==7.1.1", + "pytest==7.1.2", "pytest-sugar==0.9.4", "pytest-instafail==0.4.2", "pytest-cov==3.0.0", From 45738773ca8ac78640502d604190282d737e0c1f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Apr 2022 14:13:28 +0000 Subject: [PATCH 09/30] build(deps-dev): bump types-pyyaml from 6.0.6 to 6.0.7 Bumps [types-pyyaml](https://github.com/python/typeshed) from 6.0.6 to 6.0.7. - [Release notes](https://github.com/python/typeshed/releases) - [Commits](https://github.com/python/typeshed/commits) --- updated-dependencies: - dependency-name: types-pyyaml dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c4be8605..d889a81d 100644 --- a/setup.py +++ b/setup.py @@ -82,7 +82,7 @@ setuptools.setup( # type stubs for mypy "types-backports==0.1.3", "types-colorama==0.4.12", - "types-PyYAML==6.0.6", + "types-PyYAML==6.0.7", "types-tabulate==0.8.7", "types-termcolor==1.1.3", "types-psutil==5.8.22", From e950932e434e5e2d2b6389ab0f36e3ef691e56b9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Apr 2022 14:13:31 +0000 Subject: [PATCH 10/30] build(deps-dev): bump types-requests from 2.27.19 to 2.27.20 Bumps [types-requests](https://github.com/python/typeshed) from 2.27.19 to 2.27.20. - [Release notes](https://github.com/python/typeshed/releases) - [Commits](https://github.com/python/typeshed/commits) --- updated-dependencies: - dependency-name: types-requests dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c4be8605..de4ec6e4 100644 --- a/setup.py +++ b/setup.py @@ -86,7 +86,7 @@ setuptools.setup( "types-tabulate==0.8.7", "types-termcolor==1.1.3", "types-psutil==5.8.22", - "types_requests==2.27.19", + "types_requests==2.27.20", ], }, zip_safe=False, From 0e18cea11ac334eac68acc6e0555da22448dcf8c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Apr 2022 14:13:34 +0000 Subject: [PATCH 11/30] build(deps): bump viv-utils[flirt] from 0.6.11 to 0.7.1 Bumps [viv-utils[flirt]](https://github.com/williballenthin/viv-utils) from 0.6.11 to 0.7.1. - [Release notes](https://github.com/williballenthin/viv-utils/releases) - [Commits](https://github.com/williballenthin/viv-utils/compare/v0.6.11...v0.7.1) --- updated-dependencies: - dependency-name: viv-utils[flirt] dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c4be8605..a67c3709 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ requirements = [ "termcolor==1.1.0", "wcwidth==0.2.5", "ida-settings==2.1.0", - "viv-utils[flirt]==0.6.11", + "viv-utils[flirt]==0.7.1", "halo==0.0.31", "networkx==2.5.1", "ruamel.yaml==0.17.21", From 10852a5d96c7fcf86096450289461139fef42c1f Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Wed, 27 Apr 2022 11:36:08 +0000 Subject: [PATCH 12/30] Sync capa rules submodule --- CHANGELOG.md | 3 ++- rules | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8bf139e6..b4f2bf86 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,13 +17,14 @@ - remove /x32 and /x64 flavors of number and operand features #932 @williballenthin - the tool now accepts multiple paths to rules, and JSON doc updated accordingly @williballenthin -### New Rules (5) +### New Rules (6) - data-manipulation/encryption/aes/manually-build-aes-constants huynh.t.nhan@gmail.com - nursery/get-process-image-filename michael.hunhoff@mandiant.com - compiler/v/compiled-with-v jakub.jozwiak@mandiant.com - compiler/zig/compiled-with-zig jakub.jozwiak@mandiant.com - anti-analysis/packer/huan/packed-with-huan jakub.jozwiak@mandiant.com +- internal/limitation/file/internal-dotnet-file-limitation william.ballenthin@mandiant.com - ### Bug Fixes diff --git a/rules b/rules index f8a03a30..3b72e490 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit f8a03a3014c9e7fa97cfd5b681cfe089d6083de0 +Subproject commit 3b72e490b3d8dcec25fc2fe1cff32e82bc002dbe From 9f12f069ee5fe1ba622f1a040366a3caed6292f9 Mon Sep 17 00:00:00 2001 From: Moritz Date: Wed, 27 Apr 2022 15:09:58 +0200 Subject: [PATCH 13/30] ci: fix build (#980) * ci: fix build * fix: newest PyInstaller version * fix: logo path * fix: logo path 2 * fix: logo path 3 * fix: icon another way * fix: remove icon for now * ci: only build after tests succeed * ci: add workflow_run check --- .github/pyinstaller/pyinstaller.spec | 2 +- .github/workflows/build.yml | 18 +++++++++++------- setup.py | 3 +++ 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/.github/pyinstaller/pyinstaller.spec b/.github/pyinstaller/pyinstaller.spec index 6d0854a9..f1c4fb1b 100644 --- a/.github/pyinstaller/pyinstaller.spec +++ b/.github/pyinstaller/pyinstaller.spec @@ -95,7 +95,7 @@ exe = EXE(pyz, a.datas, exclude_binaries=False, name='capa', - icon='logo.ico', + # TODO not working anymore for unknown reason icon='logo.ico', debug=False, strip=None, upx=True, diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9ef58012..0d85fdce 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,16 +1,22 @@ name: build on: - push: - branches: [master] + workflow_run: + workflows: [CI] + types: + - completed release: types: [edited, published] jobs: build: + # only build on release or if tests pass + if: ${{ github.event_name == 'release' }} || ${{ github.event.workflow_run.conclusion == 'success' }} name: PyInstaller for ${{ matrix.os }} runs-on: ${{ matrix.os }} strategy: + # set to false for debugging + fail-fast: true matrix: include: - os: ubuntu-18.04 @@ -37,12 +43,10 @@ jobs: run: sudo apt-get install -y libyaml-dev - name: Upgrade pip, setuptools run: pip install --upgrade pip setuptools - - name: Install PyInstaller - run: pip install 'pyinstaller==4.10' - - name: Install capa - run: pip install -e . + - name: Install capa with build requirements + run: pip install -e .[build] - name: Build standalone executable - run: pyinstaller .github/pyinstaller/pyinstaller.spec + run: pyinstaller --log-level DEBUG .github/pyinstaller/pyinstaller.spec - name: Does it run (PE)? run: dist/capa "tests/data/Practical Malware Analysis Lab 01-01.dll_" - name: Does it run (Shellcode)? diff --git a/setup.py b/setup.py index db9f5381..29e3acf1 100644 --- a/setup.py +++ b/setup.py @@ -88,6 +88,9 @@ setuptools.setup( "types-psutil==5.8.22", "types_requests==2.27.20", ], + "build": [ + "pyinstaller==5.0", + ], }, zip_safe=False, keywords="capa malware analysis capability detection FLARE", From 49b1296d6eb494d4a98ed1dc4df93cad86c53b9c Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Wed, 27 Apr 2022 17:18:08 +0000 Subject: [PATCH 14/30] Sync capa rules submodule --- rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules b/rules index 3b72e490..6728fb0d 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 3b72e490b3d8dcec25fc2fe1cff32e82bc002dbe +Subproject commit 6728fb0d5ad4d452959f7d4da49032fa48b3e49f From daf483309eb74c1b9bf04c10efdd0869de6ae000 Mon Sep 17 00:00:00 2001 From: Moritz Date: Thu, 5 May 2022 20:19:35 +0200 Subject: [PATCH 15/30] fix: temporarily accept x32/x64 flavors but ignore (#1014) --- capa/rules.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/capa/rules.py b/capa/rules.py index f42b56a4..abc06b19 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -259,6 +259,13 @@ def parse_feature(key: str): return capa.features.insn.Number elif key == "offset": return capa.features.insn.Offset + # TODO remove x32/x64 flavor keys once fixed master/rules + elif key.startswith("number/"): + logger.warning("x32/x64 flavor currently not supported and deprecated") + return capa.features.insn.Number + elif key.startswith("offset/"): + logger.warning("x32/x64 flavor currently not supported and deprecated") + return capa.features.insn.Offset elif key == "mnemonic": return capa.features.insn.Mnemonic elif key == "basic blocks": From 0066b3f33ab17eb96c654c29a45eb2b3b6215775 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 5 May 2022 13:28:48 -0600 Subject: [PATCH 16/30] build(deps): bump dnfile from 0.10.0 to 0.11.0 (#1004) --- capa/features/extractors/dnfile/helpers.py | 6 +++--- setup.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index c7304462..4457afe9 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -105,7 +105,7 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: TypeName (index into String heap) TypeNamespace (index into String heap) """ - if not hasattr(pe.net.mdtables, "MemberRef"): + if not hasattr(pe.net.mdtables, "MemberRef") or pe.net.mdtables.MemberRef is None: return for (rid, row) in enumerate(pe.net.mdtables.MemberRef): @@ -130,7 +130,7 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: ImportName (index into the String heap) ImportScope (index into the ModuleRef table) """ - if not hasattr(pe.net.mdtables, "ImplMap"): + if not hasattr(pe.net.mdtables, "ImplMap") or pe.net.mdtables.ImplMap is None: return for row in pe.net.mdtables.ImplMap: @@ -154,7 +154,7 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[CilMethodBody]: """get managed methods from MethodDef table""" - if not hasattr(pe.net.mdtables, "MethodDef"): + if not hasattr(pe.net.mdtables, "MethodDef") or pe.net.mdtables.MethodDef is None: return for row in pe.net.mdtables.MethodDef: diff --git a/setup.py b/setup.py index 29e3acf1..b9cabb91 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ requirements = [ "smda==1.7.1", "pefile==2021.9.3", "pyelftools==0.28", - "dnfile==0.10.0", + "dnfile==0.11.0", "dncil==1.0.0", ] From 24c4215820a005dcd03353c3d5494989f947356e Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Thu, 5 May 2022 13:39:29 -0600 Subject: [PATCH 17/30] dotnet: add file string parsing (#1012) --- CHANGELOG.md | 1 + capa/features/extractors/dnfile/file.py | 8 ++++++-- capa/features/extractors/dotnetfile.py | 19 +++++++++++++++++-- tests/fixtures.py | 3 +++ 4 files changed, 27 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b4f2bf86..15d56672 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ - add new feature "operand[{0, 1, 2}].offset" for matching instruction operand offsets #767 @williballenthin - extract additional offset/number features in certain circumstances #320 @williballenthin - add detection and basic feature extraction for dotnet #987 @mr-tz, @mike-hunhoff, @williballenthin + - add file string extraction for dotnet files #1012 @mike-hunhoff ### Breaking Changes diff --git a/capa/features/extractors/dnfile/file.py b/capa/features/extractors/dnfile/file.py index 99e2643c..f0af0085 100644 --- a/capa/features/extractors/dnfile/file.py +++ b/capa/features/extractors/dnfile/file.py @@ -12,7 +12,7 @@ from typing import TYPE_CHECKING, Tuple, Iterator if TYPE_CHECKING: import dnfile - from capa.features.common import Feature, Format + from capa.features.common import Feature, Format, String from capa.features.file import Import import capa.features.extractors @@ -26,6 +26,10 @@ def extract_file_format(pe: dnfile.dnPE) -> Iterator[Tuple[Format, int]]: yield from capa.features.extractors.dotnetfile.extract_file_format(pe=pe) +def extract_file_strings(pe: dnfile.dnPE) -> Iterator[Tuple[String, int]]: + yield from capa.features.extractors.dotnetfile.extract_file_strings(pe=pe) + + def extract_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]: for file_handler in FILE_HANDLERS: for (feature, token) in file_handler(pe): @@ -34,7 +38,7 @@ def extract_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]: FILE_HANDLERS = ( extract_file_import_names, - # TODO extract_file_strings, + extract_file_strings, # TODO extract_file_function_names, extract_file_format, ) diff --git a/capa/features/extractors/dotnetfile.py b/capa/features/extractors/dotnetfile.py index a9a2c600..07a88147 100644 --- a/capa/features/extractors/dotnetfile.py +++ b/capa/features/extractors/dotnetfile.py @@ -7,7 +7,18 @@ import pefile import capa.features.extractors.helpers from capa.features.file import Import -from capa.features.common import OS, OS_ANY, ARCH_ANY, ARCH_I386, ARCH_AMD64, FORMAT_DOTNET, Arch, Format, Feature +from capa.features.common import ( + OS, + OS_ANY, + ARCH_ANY, + ARCH_I386, + ARCH_AMD64, + FORMAT_DOTNET, + Arch, + Format, + String, + Feature, +) from capa.features.extractors.base_extractor import FeatureExtractor from capa.features.extractors.dnfile.helpers import get_dotnet_managed_imports, get_dotnet_unmanaged_imports @@ -45,6 +56,10 @@ def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Arch, int]]: yield Arch(ARCH_ANY), 0x0 +def extract_file_strings(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[String, int]]: + yield from capa.features.extractors.common.extract_file_strings(pe.__data__) + + def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]: for file_handler in FILE_HANDLERS: for feature, va in file_handler(pe=pe): # type: ignore @@ -53,7 +68,7 @@ def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]: FILE_HANDLERS = ( extract_file_import_names, - # TODO extract_file_strings, + extract_file_strings, # TODO extract_file_function_names, extract_file_format, ) diff --git a/tests/fixtures.py b/tests/fixtures.py index 7f5abc73..af56cb40 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -671,10 +671,13 @@ FEATURE_PRESENCE_TESTS_DOTNET = sorted( ("mixed-mode-64", "file", Arch(ARCH_I386), False), ("b9f5b", "file", OS(OS_ANY), True), ("b9f5b", "file", Format(FORMAT_DOTNET), True), + ("hello-world", "file", capa.features.common.String("Hello World!"), True), ("hello-world", "function=0x250", capa.features.common.String("Hello World!"), True), ("hello-world", "function=0x250, bb=0x250, insn=0x252", capa.features.common.String("Hello World!"), True), ("hello-world", "function=0x250", capa.features.insn.API("System.Console::WriteLine"), True), ("hello-world", "file", capa.features.file.Import("System.Console::WriteLine"), True), + ("_1c444", "file", capa.features.common.String(r"SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall"), True), + ("_1c444", "file", capa.features.common.String("get_IsAlive"), True), ("_1c444", "file", capa.features.file.Import("gdi32.CreateCompatibleBitmap"), True), ("_1c444", "file", capa.features.file.Import("CreateCompatibleBitmap"), True), ("_1c444", "file", capa.features.file.Import("gdi32::CreateCompatibleBitmap"), False), From 20d80c1a2e1ddfb3fe1848257b81584659e78e48 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 5 May 2022 14:19:21 -0600 Subject: [PATCH 18/30] build(deps-dev): bump types-colorama from 0.4.12 to 0.4.13 (#1010) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b9cabb91..006bbeba 100644 --- a/setup.py +++ b/setup.py @@ -81,7 +81,7 @@ setuptools.setup( "requests==2.27.1", # type stubs for mypy "types-backports==0.1.3", - "types-colorama==0.4.12", + "types-colorama==0.4.13", "types-PyYAML==6.0.7", "types-tabulate==0.8.7", "types-termcolor==1.1.3", From 6d218aaf0dd739ae4de9447d980a2068c55eebaf Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 5 May 2022 14:19:35 -0600 Subject: [PATCH 19/30] build(deps-dev): bump types-requests from 2.27.20 to 2.27.25 (#1007) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 006bbeba..17519585 100644 --- a/setup.py +++ b/setup.py @@ -86,7 +86,7 @@ setuptools.setup( "types-tabulate==0.8.7", "types-termcolor==1.1.3", "types-psutil==5.8.22", - "types_requests==2.27.20", + "types_requests==2.27.25", ], "build": [ "pyinstaller==5.0", From dcf43b6feedf44e7800a2f0ffa29ee449e12b8fe Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 5 May 2022 20:19:53 +0000 Subject: [PATCH 20/30] build(deps): bump vivisect from 1.0.7 to 1.0.8 Bumps [vivisect](https://github.com/vivisect/vivisect) from 1.0.7 to 1.0.8. - [Release notes](https://github.com/vivisect/vivisect/releases) - [Changelog](https://github.com/vivisect/vivisect/blob/master/CHANGELOG.rst) - [Commits](https://github.com/vivisect/vivisect/compare/v1.0.7...v1.0.8) --- updated-dependencies: - dependency-name: vivisect dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index b9cabb91..bf1205aa 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ requirements = [ "halo==0.0.31", "networkx==2.5.1", "ruamel.yaml==0.17.21", - "vivisect==1.0.7", + "vivisect==1.0.8", "smda==1.7.1", "pefile==2021.9.3", "pyelftools==0.28", @@ -81,12 +81,12 @@ setuptools.setup( "requests==2.27.1", # type stubs for mypy "types-backports==0.1.3", - "types-colorama==0.4.12", + "types-colorama==0.4.13", "types-PyYAML==6.0.7", "types-tabulate==0.8.7", "types-termcolor==1.1.3", "types-psutil==5.8.22", - "types_requests==2.27.20", + "types_requests==2.27.25", ], "build": [ "pyinstaller==5.0", From eefc0a9632fb81638384a2a9d5eab011a9e06e2f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 5 May 2022 14:19:56 -0600 Subject: [PATCH 21/30] build(deps-dev): bump pyinstaller from 5.0 to 5.0.1 (#1008) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 17519585..65e5a091 100644 --- a/setup.py +++ b/setup.py @@ -89,7 +89,7 @@ setuptools.setup( "types_requests==2.27.25", ], "build": [ - "pyinstaller==5.0", + "pyinstaller==5.0.1", ], }, zip_safe=False, From bcd00004b8f5bfd4f44134cbb1895f14e7822c56 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 5 May 2022 14:20:16 -0600 Subject: [PATCH 22/30] build(deps-dev): bump types-tabulate from 0.8.7 to 0.8.8 (#1009) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 65e5a091..67c2da6c 100644 --- a/setup.py +++ b/setup.py @@ -83,7 +83,7 @@ setuptools.setup( "types-backports==0.1.3", "types-colorama==0.4.13", "types-PyYAML==6.0.7", - "types-tabulate==0.8.7", + "types-tabulate==0.8.8", "types-termcolor==1.1.3", "types-psutil==5.8.22", "types_requests==2.27.25", From aca4f27ee88be2f6fa6b1aac96e4b65969b0bf3a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 5 May 2022 20:20:18 +0000 Subject: [PATCH 23/30] build(deps-dev): bump mypy from 0.942 to 0.950 Bumps [mypy](https://github.com/python/mypy) from 0.942 to 0.950. - [Release notes](https://github.com/python/mypy/releases) - [Commits](https://github.com/python/mypy/compare/v0.942...v0.950) --- updated-dependencies: - dependency-name: mypy dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 65e5a091..dd43d968 100644 --- a/setup.py +++ b/setup.py @@ -75,7 +75,7 @@ setuptools.setup( "pycodestyle==2.8.0", "black==22.3.0", "isort==5.10.1", - "mypy==0.942", + "mypy==0.950", "psutil==5.9.0", "stix2==3.0.1", "requests==2.27.1", From d0a1313f33bcd942e38dc49f08c529f626e7806a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 5 May 2022 20:21:15 +0000 Subject: [PATCH 24/30] build(deps-dev): bump types-termcolor from 1.1.3 to 1.1.4 Bumps [types-termcolor](https://github.com/python/typeshed) from 1.1.3 to 1.1.4. - [Release notes](https://github.com/python/typeshed/releases) - [Commits](https://github.com/python/typeshed/commits) --- updated-dependencies: - dependency-name: types-termcolor dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 67c2da6c..75f82b26 100644 --- a/setup.py +++ b/setup.py @@ -84,7 +84,7 @@ setuptools.setup( "types-colorama==0.4.13", "types-PyYAML==6.0.7", "types-tabulate==0.8.8", - "types-termcolor==1.1.3", + "types-termcolor==1.1.4", "types-psutil==5.8.22", "types_requests==2.27.25", ], From 5573794a1f4fe688d31d583254b2e9695a5efe91 Mon Sep 17 00:00:00 2001 From: Moritz Raabe Date: Fri, 6 May 2022 15:49:04 +0200 Subject: [PATCH 25/30] dep: bump viv-utils --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index bf1205aa..7f699f06 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ requirements = [ "termcolor==1.1.0", "wcwidth==0.2.5", "ida-settings==2.1.0", - "viv-utils[flirt]==0.7.1", + "viv-utils[flirt]==0.7.4", "halo==0.0.31", "networkx==2.5.1", "ruamel.yaml==0.17.21", From 80e4161b40ddb1e63ffe5d265b97bdd5810eee30 Mon Sep 17 00:00:00 2001 From: Moritz Raabe Date: Fri, 6 May 2022 16:29:54 +0200 Subject: [PATCH 26/30] ci: build on PR --- .github/workflows/build.yml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0d85fdce..022fc0ee 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,17 +1,13 @@ name: build on: - workflow_run: - workflows: [CI] - types: - - completed + pull_request: + branches: [ master ] release: types: [edited, published] jobs: build: - # only build on release or if tests pass - if: ${{ github.event_name == 'release' }} || ${{ github.event.workflow_run.conclusion == 'success' }} name: PyInstaller for ${{ matrix.os }} runs-on: ${{ matrix.os }} strategy: From a9c9b3cea8e6b3d436fc3294d50d6f41e3aaf70e Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Fri, 6 May 2022 08:34:50 -0600 Subject: [PATCH 27/30] dotnet: extract file function names (#1015) --- CHANGELOG.md | 1 + capa/features/extractors/dnfile/file.py | 10 ++-- capa/features/extractors/dnfile/helpers.py | 57 ++++++++++++++++++---- capa/features/extractors/dnfile/insn.py | 26 ++++++++-- capa/features/extractors/dotnetfile.py | 35 ++++++++----- tests/fixtures.py | 3 ++ 6 files changed, 102 insertions(+), 30 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 15d56672..aba76951 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ - extract additional offset/number features in certain circumstances #320 @williballenthin - add detection and basic feature extraction for dotnet #987 @mr-tz, @mike-hunhoff, @williballenthin - add file string extraction for dotnet files #1012 @mike-hunhoff + - add file function-name extraction for dotnet files #1015 @mike-hunhoff ### Breaking Changes diff --git a/capa/features/extractors/dnfile/file.py b/capa/features/extractors/dnfile/file.py index f0af0085..4168249a 100644 --- a/capa/features/extractors/dnfile/file.py +++ b/capa/features/extractors/dnfile/file.py @@ -13,19 +13,23 @@ from typing import TYPE_CHECKING, Tuple, Iterator if TYPE_CHECKING: import dnfile from capa.features.common import Feature, Format, String - from capa.features.file import Import + from capa.features.file import Import, FunctionName import capa.features.extractors def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[Tuple[Import, int]]: - yield from capa.features.extractors.dotnetfile.extract_file_import_names(pe) + yield from capa.features.extractors.dotnetfile.extract_file_import_names(pe=pe) def extract_file_format(pe: dnfile.dnPE) -> Iterator[Tuple[Format, int]]: yield from capa.features.extractors.dotnetfile.extract_file_format(pe=pe) +def extract_file_function_names(pe: dnfile.dnPE) -> Iterator[Tuple[FunctionName, int]]: + yield from capa.features.extractors.dotnetfile.extract_file_function_names(pe=pe) + + def extract_file_strings(pe: dnfile.dnPE) -> Iterator[Tuple[String, int]]: yield from capa.features.extractors.dotnetfile.extract_file_strings(pe=pe) @@ -38,7 +42,7 @@ def extract_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]: FILE_HANDLERS = ( extract_file_import_names, + extract_file_function_names, extract_file_strings, - # TODO extract_file_function_names, extract_file_format, ) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index 4457afe9..ddca5e61 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -105,18 +105,24 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: TypeName (index into String heap) TypeNamespace (index into String heap) """ - if not hasattr(pe.net.mdtables, "MemberRef") or pe.net.mdtables.MemberRef is None: + if not is_dotnet_table_valid(pe, "MemberRef"): return for (rid, row) in enumerate(pe.net.mdtables.MemberRef): if not isinstance(row.Class.row, (dnfile.mdtable.TypeRefRow,)): continue - token: int = calculate_dotnet_token_value(dnfile.enums.MetadataTables.MemberRef.value, rid + 1) - # like System.IO.File::OpenRead - imp: str = f"{row.Class.row.TypeNamespace}.{row.Class.row.TypeName}::{row.Name}" + # like File::OpenRead + name = f"{row.Class.row.TypeName}::{row.Name}" - yield token, imp + # ECMA II.22.38: TypeNamespace can be null or non-null + if row.Class.row.TypeNamespace: + # like System.IO.File::OpenRead + name = f"{row.Class.row.TypeNamespace}.{name}" + + token: int = calculate_dotnet_token_value(pe.net.mdtables.MemberRef.number, rid + 1) + + yield token, name def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: @@ -130,7 +136,7 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: ImportName (index into the String heap) ImportScope (index into the ModuleRef table) """ - if not hasattr(pe.net.mdtables, "ImplMap") or pe.net.mdtables.ImplMap is None: + if not is_dotnet_table_valid(pe, "ImplMap"): return for row in pe.net.mdtables.ImplMap: @@ -147,14 +153,14 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: dll = dll.split(".")[0] # like kernel32.CreateFileA - imp: str = f"{dll}.{symbol}" + name: str = f"{dll}.{symbol}" - yield token, imp + yield token, name def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[CilMethodBody]: """get managed methods from MethodDef table""" - if not hasattr(pe.net.mdtables, "MethodDef") or pe.net.mdtables.MethodDef is None: + if not is_dotnet_table_valid(pe, "MethodDef"): return for row in pe.net.mdtables.MethodDef: @@ -167,3 +173,36 @@ def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[CilMethodBody] continue yield body + + +def is_dotnet_table_valid(pe: dnfile.dnPE, table_name: str) -> bool: + return bool(getattr(pe.net.mdtables, table_name, None)) + + +def get_dotnet_managed_method_names(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: + """get managed method names from TypeDef table + + see https://www.ntcore.com/files/dotnetformat.htm + + 02 - TypeDef Table + Each row represents a class in the current assembly. + TypeName (index into String heap) + TypeNamespace (index into String heap) + MethodList (index into MethodDef table; it marks the first of a continguous run of Methods owned by this Type) + """ + if not is_dotnet_table_valid(pe, "TypeDef"): + return + + for row in pe.net.mdtables.TypeDef: + for index in row.MethodList: + # like File::OpenRead + name = f"{row.TypeName}::{index.row.Name}" + + # ECMA II.22.37: TypeNamespace can be null or non-null + if row.TypeNamespace: + # like System.IO.File::OpenRead + name = f"{row.TypeNamespace}.{name}" + + token = calculate_dotnet_token_value(index.table.number, index.row_index) + + yield token, name diff --git a/capa/features/extractors/dnfile/insn.py b/capa/features/extractors/dnfile/insn.py index 262b9779..15a75ae0 100644 --- a/capa/features/extractors/dnfile/insn.py +++ b/capa/features/extractors/dnfile/insn.py @@ -9,7 +9,6 @@ from __future__ import annotations from typing import TYPE_CHECKING, Dict, Tuple, Iterator, Optional -from itertools import chain if TYPE_CHECKING: from dncil.cil.instruction import Instruction @@ -26,24 +25,41 @@ from capa.features.extractors.dnfile.helpers import ( read_dotnet_user_string, get_dotnet_managed_imports, get_dotnet_unmanaged_imports, + get_dotnet_managed_method_names, ) def get_imports(ctx: Dict) -> Dict: if "imports_cache" not in ctx: - ctx["imports_cache"] = { - token: imp - for (token, imp) in chain(get_dotnet_managed_imports(ctx["pe"]), get_dotnet_unmanaged_imports(ctx["pe"])) - } + ctx["imports_cache"] = {} + + for (token, name) in get_dotnet_managed_imports(ctx["pe"]): + ctx["imports_cache"][token] = name + for (token, name) in get_dotnet_unmanaged_imports(ctx["pe"]): + ctx["imports_cache"][token] = name + return ctx["imports_cache"] +def get_methods(ctx: Dict) -> Dict: + if "methods_cache" not in ctx: + ctx["methods_cache"] = {} + + for (token, name) in get_dotnet_managed_method_names(ctx["pe"]): + ctx["methods_cache"][token] = name + + return ctx["methods_cache"] + + def extract_insn_api_features(f: CilMethodBody, bb: CilMethodBody, insn: Instruction) -> Iterator[Tuple[API, int]]: """parse instruction API features""" if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli): return name: str = get_imports(f.ctx).get(insn.operand.value, "") + if not name: + name = get_methods(f.ctx).get(insn.operand.value, "") + if not name: return diff --git a/capa/features/extractors/dotnetfile.py b/capa/features/extractors/dotnetfile.py index 07a88147..6c6adb0d 100644 --- a/capa/features/extractors/dotnetfile.py +++ b/capa/features/extractors/dotnetfile.py @@ -1,12 +1,11 @@ import logging from typing import Tuple, Iterator -from itertools import chain import dnfile import pefile import capa.features.extractors.helpers -from capa.features.file import Import +from capa.features.file import Import, FunctionName from capa.features.common import ( OS, OS_ANY, @@ -20,7 +19,12 @@ from capa.features.common import ( Feature, ) from capa.features.extractors.base_extractor import FeatureExtractor -from capa.features.extractors.dnfile.helpers import get_dotnet_managed_imports, get_dotnet_unmanaged_imports +from capa.features.extractors.dnfile.helpers import ( + get_dotnet_managed_imports, + calculate_dotnet_token_value, + get_dotnet_unmanaged_imports, + get_dotnet_managed_method_names, +) logger = logging.getLogger(__name__) @@ -30,15 +34,20 @@ def extract_file_format(**kwargs) -> Iterator[Tuple[Format, int]]: def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Import, int]]: - for (token, imp) in chain(get_dotnet_managed_imports(pe), get_dotnet_unmanaged_imports(pe)): - if "::" in imp: - # like System.IO.File::OpenRead - yield Import(imp), token - else: - # like kernel32.CreateFileA - dll, _, symbol = imp.rpartition(".") - for symbol_variant in capa.features.extractors.helpers.generate_symbols(dll, symbol): - yield Import(symbol_variant), token + for (token, name) in get_dotnet_managed_imports(pe): + # like System.IO.File::OpenRead + yield Import(name), token + + for (token, name) in get_dotnet_unmanaged_imports(pe): + # like kernel32.CreateFileA + dll, _, symbol = name.rpartition(".") + for name_variant in capa.features.extractors.helpers.generate_symbols(dll, symbol): + yield Import(name_variant), token + + +def extract_file_function_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[FunctionName, int]]: + for (token, name) in get_dotnet_managed_method_names(pe): + yield FunctionName(name), token def extract_file_os(**kwargs) -> Iterator[Tuple[OS, int]]: @@ -68,8 +77,8 @@ def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]: FILE_HANDLERS = ( extract_file_import_names, + extract_file_function_names, extract_file_strings, - # TODO extract_file_function_names, extract_file_format, ) diff --git a/tests/fixtures.py b/tests/fixtures.py index af56cb40..f4a91580 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -671,6 +671,9 @@ FEATURE_PRESENCE_TESTS_DOTNET = sorted( ("mixed-mode-64", "file", Arch(ARCH_I386), False), ("b9f5b", "file", OS(OS_ANY), True), ("b9f5b", "file", Format(FORMAT_DOTNET), True), + ("hello-world", "file", capa.features.file.FunctionName("HelloWorld::Main"), True), + ("hello-world", "file", capa.features.file.FunctionName("HelloWorld::.ctor"), True), + ("hello-world", "file", capa.features.file.FunctionName("HelloWorld::.cctor"), False), ("hello-world", "file", capa.features.common.String("Hello World!"), True), ("hello-world", "function=0x250", capa.features.common.String("Hello World!"), True), ("hello-world", "function=0x250, bb=0x250, insn=0x252", capa.features.common.String("Hello World!"), True), From 6fb9dd961a7917e2bb5b334d59fadaf7ac56a502 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Fri, 6 May 2022 13:05:48 -0600 Subject: [PATCH 28/30] dotnet: emit unmanaged call characteristic (#1023) --- CHANGELOG.md | 1 + capa/features/extractors/dnfile/insn.py | 64 ++++++++++++++++++------- capa/rules.py | 1 + tests/fixtures.py | 7 +++ 4 files changed, 56 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aba76951..6b38aa34 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - add detection and basic feature extraction for dotnet #987 @mr-tz, @mike-hunhoff, @williballenthin - add file string extraction for dotnet files #1012 @mike-hunhoff - add file function-name extraction for dotnet files #1015 @mike-hunhoff + - add unmanaged call characteristic for dotnet files #1023 @mike-hunhoff ### Breaking Changes diff --git a/capa/features/extractors/dnfile/insn.py b/capa/features/extractors/dnfile/insn.py index 15a75ae0..5974cde1 100644 --- a/capa/features/extractors/dnfile/insn.py +++ b/capa/features/extractors/dnfile/insn.py @@ -8,20 +8,22 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Dict, Tuple, Iterator, Optional +from typing import TYPE_CHECKING, Any, Dict, Tuple, Iterator, Optional if TYPE_CHECKING: from dncil.cil.instruction import Instruction from dncil.cil.body import CilMethodBody from capa.features.common import Feature -from dncil.clr.token import StringToken +import dnfile +from dncil.clr.token import StringToken, InvalidToken from dncil.cil.opcode import OpCodes import capa.features.extractors.helpers from capa.features.insn import API, Number -from capa.features.common import String +from capa.features.common import String, Characteristic from capa.features.extractors.dnfile.helpers import ( + resolve_dotnet_token, read_dotnet_user_string, get_dotnet_managed_imports, get_dotnet_unmanaged_imports, @@ -29,37 +31,48 @@ from capa.features.extractors.dnfile.helpers import ( ) -def get_imports(ctx: Dict) -> Dict: - if "imports_cache" not in ctx: - ctx["imports_cache"] = {} - +def get_managed_imports(ctx: Dict) -> Dict: + if "managed_imports_cache" not in ctx: + ctx["managed_imports_cache"] = {} for (token, name) in get_dotnet_managed_imports(ctx["pe"]): - ctx["imports_cache"][token] = name - for (token, name) in get_dotnet_unmanaged_imports(ctx["pe"]): - ctx["imports_cache"][token] = name + ctx["managed_imports_cache"][token] = name + return ctx["managed_imports_cache"] - return ctx["imports_cache"] + +def get_unmanaged_imports(ctx: Dict) -> Dict: + if "unmanaged_imports_cache" not in ctx: + ctx["unmanaged_imports_cache"] = {} + for (token, name) in get_dotnet_unmanaged_imports(ctx["pe"]): + ctx["unmanaged_imports_cache"][token] = name + return ctx["unmanaged_imports_cache"] def get_methods(ctx: Dict) -> Dict: if "methods_cache" not in ctx: ctx["methods_cache"] = {} - for (token, name) in get_dotnet_managed_method_names(ctx["pe"]): ctx["methods_cache"][token] = name - return ctx["methods_cache"] +def get_callee_name(ctx: Dict, token: int) -> str: + """map dotnet token to method name""" + name: str = get_managed_imports(ctx).get(token, "") + if not name: + # we must check unmanaged imports before managed methods because we map forwarded managed methods + # to their unmanaged imports; we prefer a forwarded managed method be mapped to its unmanaged import for analysis + name = get_unmanaged_imports(ctx).get(token, "") + if not name: + name = get_methods(ctx).get(token, "") + return name + + def extract_insn_api_features(f: CilMethodBody, bb: CilMethodBody, insn: Instruction) -> Iterator[Tuple[API, int]]: """parse instruction API features""" if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli): return - name: str = get_imports(f.ctx).get(insn.operand.value, "") - if not name: - name = get_methods(f.ctx).get(insn.operand.value, "") - + name: str = get_callee_name(f.ctx, insn.operand.value) if not name: return @@ -98,6 +111,22 @@ def extract_insn_string_features( yield String(user_string), insn.offset +def extract_unmanaged_call_characteristic_features( + f: CilMethodBody, bb: CilMethodBody, insn: Instruction +) -> Iterator[Tuple[Characteristic, int]]: + if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli): + return + + token: Any = resolve_dotnet_token(f.ctx["pe"], insn.operand) + if isinstance(token, InvalidToken): + return + if not isinstance(token, dnfile.mdtable.MethodDefRow): + return + + if any((token.Flags.mdPinvokeImpl, token.ImplFlags.miUnmanaged, token.ImplFlags.miNative)): + yield Characteristic("unmanaged call"), insn.offset + + def extract_features(f: CilMethodBody, bb: CilMethodBody, insn: Instruction) -> Iterator[Tuple[Feature, int]]: """extract instruction features""" for inst_handler in INSTRUCTION_HANDLERS: @@ -109,4 +138,5 @@ INSTRUCTION_HANDLERS = ( extract_insn_api_features, extract_insn_number_features, extract_insn_string_features, + extract_unmanaged_call_characteristic_features, ) diff --git a/capa/rules.py b/capa/rules.py index abc06b19..59d7238f 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -134,6 +134,7 @@ SUPPORTED_FEATURES: Dict[str, Set] = { capa.features.common.Characteristic("indirect call"), capa.features.common.Characteristic("call $+5"), capa.features.common.Characteristic("cross section flow"), + capa.features.common.Characteristic("unmanaged call"), }, } diff --git a/tests/fixtures.py b/tests/fixtures.py index f4a91580..0b4feea5 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -689,6 +689,13 @@ FEATURE_PRESENCE_TESTS_DOTNET = sorted( ("_1c444", "function=0x1F68", capa.features.insn.Number(0xCC0020), True), ("_1c444", "function=0x1F68", capa.features.insn.Number(0x0), True), ("_1c444", "function=0x1F68", capa.features.insn.Number(0x1), False), + ( + "_1c444", + "function=0x1F59, bb=0x1F59, insn=0x1F5B", + capa.features.common.Characteristic("unmanaged call"), + True, + ), + ("_1c444", "function=0x2544", capa.features.common.Characteristic("unmanaged call"), False), ( "_1c444", "function=0x1F68, bb=0x1F68, insn=0x1FF9", From 0d849142bab8639727efe4842397a9efa881005c Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Fri, 6 May 2022 14:32:06 -0600 Subject: [PATCH 29/30] dotnet: emit mixed mode characteristic (#1024) --- CHANGELOG.md | 1 + capa/features/extractors/dnfile/file.py | 7 ++++++- capa/features/extractors/dnfile/helpers.py | 4 ++++ capa/features/extractors/dotnetfile.py | 10 +++++++++- capa/rules.py | 1 + tests/fixtures.py | 2 ++ 6 files changed, 23 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b38aa34..b52eb948 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ - add file string extraction for dotnet files #1012 @mike-hunhoff - add file function-name extraction for dotnet files #1015 @mike-hunhoff - add unmanaged call characteristic for dotnet files #1023 @mike-hunhoff + - add mixed mode characteristic feature extraction for dotnet files #1024 @mike-hunhoff ### Breaking Changes diff --git a/capa/features/extractors/dnfile/file.py b/capa/features/extractors/dnfile/file.py index 4168249a..248d8108 100644 --- a/capa/features/extractors/dnfile/file.py +++ b/capa/features/extractors/dnfile/file.py @@ -12,7 +12,7 @@ from typing import TYPE_CHECKING, Tuple, Iterator if TYPE_CHECKING: import dnfile - from capa.features.common import Feature, Format, String + from capa.features.common import Feature, Format, String, Characteristic from capa.features.file import Import, FunctionName import capa.features.extractors @@ -34,6 +34,10 @@ def extract_file_strings(pe: dnfile.dnPE) -> Iterator[Tuple[String, int]]: yield from capa.features.extractors.dotnetfile.extract_file_strings(pe=pe) +def extract_mixed_mode_characteristic_features(pe: dnfile.dnPE) -> Iterator[Tuple[Characteristic, int]]: + yield from capa.features.extractors.dotnetfile.extract_mixed_mode_characteristic_features(pe=pe) + + def extract_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]: for file_handler in FILE_HANDLERS: for (feature, token) in file_handler(pe): @@ -45,4 +49,5 @@ FILE_HANDLERS = ( extract_file_function_names, extract_file_strings, extract_file_format, + extract_mixed_mode_characteristic_features, ) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index ddca5e61..7c8adcbd 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -206,3 +206,7 @@ def get_dotnet_managed_method_names(pe: dnfile.dnPE) -> Iterator[Tuple[int, str] token = calculate_dotnet_token_value(index.table.number, index.row_index) yield token, name + + +def is_dotnet_mixed_mode(pe: dnfile.dnPE) -> bool: + return not bool(pe.net.Flags.CLR_ILONLY) diff --git a/capa/features/extractors/dotnetfile.py b/capa/features/extractors/dotnetfile.py index 6c6adb0d..965c4ea4 100644 --- a/capa/features/extractors/dotnetfile.py +++ b/capa/features/extractors/dotnetfile.py @@ -17,9 +17,11 @@ from capa.features.common import ( Format, String, Feature, + Characteristic, ) from capa.features.extractors.base_extractor import FeatureExtractor from capa.features.extractors.dnfile.helpers import ( + is_dotnet_mixed_mode, get_dotnet_managed_imports, calculate_dotnet_token_value, get_dotnet_unmanaged_imports, @@ -69,6 +71,11 @@ def extract_file_strings(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[String, in yield from capa.features.extractors.common.extract_file_strings(pe.__data__) +def extract_mixed_mode_characteristic_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Characteristic, int]]: + if is_dotnet_mixed_mode(pe): + yield Characteristic("mixed mode"), 0x0 + + def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]: for file_handler in FILE_HANDLERS: for feature, va in file_handler(pe=pe): # type: ignore @@ -80,6 +87,7 @@ FILE_HANDLERS = ( extract_file_function_names, extract_file_strings, extract_file_format, + extract_mixed_mode_characteristic_features, ) @@ -120,7 +128,7 @@ class DotnetFileFeatureExtractor(FeatureExtractor): return bool(self.pe.net) def is_mixed_mode(self) -> bool: - return not bool(self.pe.net.Flags.CLR_ILONLY) + return is_dotnet_mixed_mode(self.pe) def get_runtime_version(self) -> Tuple[int, int]: return self.pe.net.struct.MajorRuntimeVersion, self.pe.net.struct.MinorRuntimeVersion diff --git a/capa/rules.py b/capa/rules.py index 59d7238f..1421c70a 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -101,6 +101,7 @@ SUPPORTED_FEATURES: Dict[str, Set] = { capa.features.common.Characteristic("embedded pe"), capa.features.common.String, capa.features.common.Format, + capa.features.common.Characteristic("mixed mode"), }, FUNCTION_SCOPE: { capa.features.common.MatchedRule, diff --git a/tests/fixtures.py b/tests/fixtures.py index 0b4feea5..9fbc57a0 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -669,6 +669,8 @@ FEATURE_PRESENCE_TESTS_DOTNET = sorted( ("b9f5b", "file", Arch(ARCH_AMD64), False), ("mixed-mode-64", "file", Arch(ARCH_AMD64), True), ("mixed-mode-64", "file", Arch(ARCH_I386), False), + ("mixed-mode-64", "file", capa.features.common.Characteristic("mixed mode"), True), + ("hello-world", "file", capa.features.common.Characteristic("mixed mode"), False), ("b9f5b", "file", OS(OS_ANY), True), ("b9f5b", "file", Format(FORMAT_DOTNET), True), ("hello-world", "file", capa.features.file.FunctionName("HelloWorld::Main"), True), From 141da27715efbe92408bd2f90af516a073a71034 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Wed, 11 May 2022 16:34:46 +0000 Subject: [PATCH 30/30] Sync capa rules submodule --- rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules b/rules index 6728fb0d..52ff654c 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 6728fb0d5ad4d452959f7d4da49032fa48b3e49f +Subproject commit 52ff654ca0a73235df7d2e9bfbd52961f957cbc8