diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index d76fc748..c8444ed3 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -41,7 +41,7 @@ // "forwardPorts": [], // Use 'postCreateCommand' to run commands after the container is created. - "postCreateCommand": "git submodule update --init && pip3 install --user -e .[dev]", + "postCreateCommand": "git submodule update --init && pip3 install --user -e .[dev] && pre-commit install", // Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. "remoteUser": "vscode", diff --git a/CHANGELOG.md b/CHANGELOG.md index e60575c7..ed8cdb24 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## master (unreleased) ### New Features +- ELF: implement file import and export name extractor #1607 @Aayush-Goel-04 - Add a dynamic feature extractor for the CAPE sandbox @yelhamer [#1535](https://github.com/mandiant/capa/issues/1535) - Add unit tests for the new CAPE extractor #1563 @yelhamer - Add a CAPE file format and CAPE-based dynamic feature extraction to scripts/show-features.py #1566 @yelhamer @@ -15,14 +16,17 @@ ### Breaking Changes - -### New Rules (1) - +### New Rules (4) - executable/pe/export/forwarded-export ronnie.salomonsen@mandiant.com +- host-interaction/bootloader/get-uefi-variable jakub.jozwiak@mandiant.com +- host-interaction/bootloader/set-uefi-variable jakub.jozwiak@mandiant.com - ### Bug Fixes +- Fix binja backend stack string detection. #1473 @xusheng6 +- linter: skip native API check for NtProtectVirtualMemory #1675 @williballenthin + ### capa explorer IDA Pro plugin ### Development diff --git a/README.md b/README.md index 1c08af30..1bf6b159 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa) [![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases) -[![Number of rules](https://img.shields.io/badge/rules-824-blue.svg)](https://github.com/mandiant/capa-rules) +[![Number of rules](https://img.shields.io/badge/rules-826-blue.svg)](https://github.com/mandiant/capa-rules) [![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster) [![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases) [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt) diff --git a/capa/features/extractors/binja/basicblock.py b/capa/features/extractors/binja/basicblock.py index 30af516d..568ecc7a 100644 --- a/capa/features/extractors/binja/basicblock.py +++ b/capa/features/extractors/binja/basicblock.py @@ -75,10 +75,11 @@ def get_stack_string_len(f: Function, il: MediumLevelILInstruction) -> int: return 0 dest = il.params[0] - if dest.operation != MediumLevelILOperation.MLIL_ADDRESS_OF: + if dest.operation in [MediumLevelILOperation.MLIL_ADDRESS_OF, MediumLevelILOperation.MLIL_VAR]: + var = dest.src + else: return 0 - var = dest.src if var.source_type != VariableSourceType.StackVariableSourceType: return 0 diff --git a/capa/features/extractors/elffile.py b/capa/features/extractors/elffile.py index 7e2249e0..c72dc43d 100644 --- a/capa/features/extractors/elffile.py +++ b/capa/features/extractors/elffile.py @@ -11,9 +11,10 @@ from typing import Tuple, Iterator from pathlib import Path from elftools.elf.elffile import ELFFile, SymbolTableSection +from elftools.elf.relocation import RelocationSection import capa.features.extractors.common -from capa.features.file import Import, Section +from capa.features.file import Export, Import, Section from capa.features.common import OS, FORMAT_ELF, Arch, Format, Feature from capa.features.address import NO_ADDRESS, FileOffsetAddress, AbsoluteVirtualAddress from capa.features.extractors.base_extractor import SampleHashes, StaticFeatureExtractor @@ -21,11 +22,8 @@ from capa.features.extractors.base_extractor import SampleHashes, StaticFeatureE logger = logging.getLogger(__name__) -def extract_file_import_names(elf, **kwargs): - # see https://github.com/eliben/pyelftools/blob/0664de05ed2db3d39041e2d51d19622a8ef4fb0f/scripts/readelf.py#L372 - symbol_tables = [(idx, s) for idx, s in enumerate(elf.iter_sections()) if isinstance(s, SymbolTableSection)] - - for _, section in symbol_tables: +def extract_file_export_names(elf: ELFFile, **kwargs): + for section in elf.iter_sections(): if not isinstance(section, SymbolTableSection): continue @@ -35,14 +33,64 @@ def extract_file_import_names(elf, **kwargs): logger.debug("Symbol table '%s' contains %s entries:", section.name, section.num_symbols()) + for symbol in section.iter_symbols(): + # The following conditions are based on the following article + # http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html + if not symbol.name: + continue + if symbol.entry.st_info.type not in ["STT_FUNC", "STT_OBJECT", "STT_IFUNC"]: + continue + if symbol.entry.st_value == 0: + continue + if symbol.entry.st_shndx == "SHN_UNDEF": + continue + + yield Export(symbol.name), AbsoluteVirtualAddress(symbol.entry.st_value) + + +def extract_file_import_names(elf: ELFFile, **kwargs): + # Create a dictionary to store symbol names by their index + symbol_names = {} + + # Extract symbol names and store them in the dictionary + for section in elf.iter_sections(): + if not isinstance(section, SymbolTableSection): + continue + for _, symbol in enumerate(section.iter_symbols()): - if symbol.name and symbol.entry.st_info.type == "STT_FUNC": - # TODO(williballenthin): extract symbol address - # https://github.com/mandiant/capa/issues/1608 - yield Import(symbol.name), FileOffsetAddress(0x0) + # The following conditions are based on the following article + # http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html + if not symbol.name: + continue + if symbol.entry.st_info.type not in ["STT_FUNC", "STT_OBJECT", "STT_IFUNC"]: + continue + if symbol.entry.st_value != 0: + continue + if symbol.entry.st_shndx != "SHN_UNDEF": + continue + if symbol.entry.st_name == 0: + continue + + symbol_names[_] = symbol.name + + for section in elf.iter_sections(): + if not isinstance(section, RelocationSection): + continue + + if section["sh_entsize"] == 0: + logger.debug("Symbol table '%s' has a sh_entsize of zero!", section.name) + continue + + logger.debug("Symbol table '%s' contains %s entries:", section.name, section.num_relocations()) + + for relocation in section.iter_relocations(): + # Extract the symbol name from the symbol table using the symbol index in the relocation + if relocation["r_info_sym"] not in symbol_names: + continue + yield Import(symbol_names[relocation["r_info_sym"]]), FileOffsetAddress(relocation["r_offset"]) -def extract_file_section_names(elf, **kwargs): +def extract_file_section_names(elf: ELFFile, **kwargs): for section in elf.iter_sections(): if section.name: yield Section(section.name), AbsoluteVirtualAddress(section.header.sh_addr) @@ -54,7 +102,7 @@ def extract_file_strings(buf, **kwargs): yield from capa.features.extractors.common.extract_file_strings(buf) -def extract_file_os(elf, buf, **kwargs): +def extract_file_os(elf: ELFFile, buf, **kwargs): # our current approach does not always get an OS value, e.g. for packed samples # for file limitation purposes, we're more lax here try: @@ -68,7 +116,7 @@ def extract_file_format(**kwargs): yield Format(FORMAT_ELF), NO_ADDRESS -def extract_file_arch(elf, **kwargs): +def extract_file_arch(elf: ELFFile, **kwargs): arch = elf.get_machine_arch() if arch == "x86": yield Arch("i386"), NO_ADDRESS @@ -85,8 +133,7 @@ def extract_file_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature, i FILE_HANDLERS = ( - # TODO(williballenthin): implement extract_file_export_names - # https://github.com/mandiant/capa/issues/1607 + extract_file_export_names, extract_file_import_names, extract_file_section_names, extract_file_strings, diff --git a/pyproject.toml b/pyproject.toml index a28e244c..c5911126 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ classifiers = [ ] dependencies = [ "tqdm==4.65.0", - "pyyaml==6.0", + "pyyaml==6.0.1", "tabulate==0.9.0", "colorama==0.4.6", "termcolor==2.3.0", @@ -77,7 +77,7 @@ dev = [ "flake8-simplify==0.20.0", "flake8-use-pathlib==0.3.0", "flake8-copyright==0.2.4", - "ruff==0.0.278", + "ruff==0.0.280", "black==23.7.0", "isort==5.11.4", "mypy==1.4.1", @@ -89,11 +89,11 @@ dev = [ "types-backports==0.1.3", "types-colorama==0.4.15.11", "types-PyYAML==6.0.8", - "types-tabulate==0.9.0.1", + "types-tabulate==0.9.0.3", "types-termcolor==1.1.4", "types-psutil==5.8.23", - "types_requests==2.31.0.1", - "types-protobuf==4.23.0.1", + "types_requests==2.31.0.2", + "types-protobuf==4.23.0.2", ] build = [ "pyinstaller==5.10.1", diff --git a/rules b/rules index a49c174f..7685a232 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit a49c174fee5058ca3617a23e782bdcadacb12406 +Subproject commit 7685a232d94acbe7e69addb8bd89d752c9fa27a2 diff --git a/scripts/lint.py b/scripts/lint.py index 7c3c4acd..9256109c 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -569,6 +569,10 @@ class FeatureNtdllNtoskrnlApi(Lint): "ZwCreateProcess", "ZwCreateUserProcess", "RtlCreateUserProcess", + "NtProtectVirtualMemory", + "NtEnumerateSystemEnvironmentValuesEx", + "NtQuerySystemEnvironmentValueEx", + "NtQuerySystemEnvironmentValue", ): # ntoskrnl.exe does not export these routines continue @@ -579,6 +583,7 @@ class FeatureNtdllNtoskrnlApi(Lint): "KeStackAttachProcess", "ObfDereferenceObject", "KeUnstackDetachProcess", + "ExGetFirmwareEnvironmentVariable", ): # ntdll.dll does not export these routines continue diff --git a/tests/test_binja_features.py b/tests/test_binja_features.py index f0f13778..e9133ad8 100644 --- a/tests/test_binja_features.py +++ b/tests/test_binja_features.py @@ -40,9 +40,6 @@ except ImportError: indirect=["sample", "scope"], ) def test_binja_features(sample, scope, feature, expected): - if feature == capa.features.common.Characteristic("stack string"): - pytest.xfail("skip failing Binja stack string detection temporarily, see #1473") - if isinstance(feature, capa.features.file.Export) and "." in str(feature.value): pytest.xfail("skip Binja unsupported forwarded export feature, see #1646") diff --git a/tests/test_elffile_features.py b/tests/test_elffile_features.py new file mode 100644 index 00000000..7c10bc48 --- /dev/null +++ b/tests/test_elffile_features.py @@ -0,0 +1,71 @@ +# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. +import io +from pathlib import Path + +from elftools.elf.elffile import ELFFile + +from capa.features.extractors.elffile import extract_file_export_names, extract_file_import_names + +CD = Path(__file__).resolve().parent +SAMPLE_PATH = CD / "data" / "055da8e6ccfe5a9380231ea04b850e18.elf_" + + +def test_elffile_import_features(): + expected_imports = [ + "memfrob", + "puts", + "__libc_start_main", + "malloc", + "__cxa_finalize", + ] + path = Path(SAMPLE_PATH) + elf = ELFFile(io.BytesIO(path.read_bytes())) + # Extract imports + imports = list(extract_file_import_names(elf)) + + # Verify that at least one import was found + assert len(imports) > 0, "No imports were found." + + # Extract the symbol names from the extracted imports + extracted_symbol_names = [imported[0].value for imported in imports] + + # Check if all expected symbol names are found + for symbol_name in expected_imports: + assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in imports." + + +def test_elffile_export_features(): + expected_exports = [ + "deregister_tm_clones", + "register_tm_clones", + "__do_global_dtors_aux", + "completed.8060", + "__do_global_dtors_aux_fini_array_entry", + "frame_dummy", + "_init", + "__libc_csu_fini", + "_fini", + "__dso_handle", + "_IO_stdin_used", + "__libc_csu_init", + ] + path = Path(SAMPLE_PATH) + elf = ELFFile(io.BytesIO(path.read_bytes())) + # Extract imports + exports = list(extract_file_export_names(elf)) + + # Verify that at least one export was found + assert len(exports) > 0, "No exports were found." + + # Extract the symbol names from the extracted imports + extracted_symbol_names = [exported[0].value for exported in exports] + + # Check if all expected symbol names are found + for symbol_name in expected_exports: + assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in exports."