This commit is contained in:
Willi Ballenthin
2023-08-07 08:40:03 +00:00
10 changed files with 156 additions and 31 deletions

View File

@@ -41,7 +41,7 @@
// "forwardPorts": [],
// Use 'postCreateCommand' to run commands after the container is created.
"postCreateCommand": "git submodule update --init && pip3 install --user -e .[dev]",
"postCreateCommand": "git submodule update --init && pip3 install --user -e .[dev] && pre-commit install",
// Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root.
"remoteUser": "vscode",

View File

@@ -3,6 +3,7 @@
## master (unreleased)
### New Features
- ELF: implement file import and export name extractor #1607 @Aayush-Goel-04
- Add a dynamic feature extractor for the CAPE sandbox @yelhamer [#1535](https://github.com/mandiant/capa/issues/1535)
- Add unit tests for the new CAPE extractor #1563 @yelhamer
- Add a CAPE file format and CAPE-based dynamic feature extraction to scripts/show-features.py #1566 @yelhamer
@@ -15,14 +16,17 @@
### Breaking Changes
### New Rules (1)
### New Rules (4)
- executable/pe/export/forwarded-export ronnie.salomonsen@mandiant.com
- host-interaction/bootloader/get-uefi-variable jakub.jozwiak@mandiant.com
- host-interaction/bootloader/set-uefi-variable jakub.jozwiak@mandiant.com
-
### Bug Fixes
- Fix binja backend stack string detection. #1473 @xusheng6
- linter: skip native API check for NtProtectVirtualMemory #1675 @williballenthin
### capa explorer IDA Pro plugin
### Development

View File

@@ -2,7 +2,7 @@
[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa)
[![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases)
[![Number of rules](https://img.shields.io/badge/rules-824-blue.svg)](https://github.com/mandiant/capa-rules)
[![Number of rules](https://img.shields.io/badge/rules-826-blue.svg)](https://github.com/mandiant/capa-rules)
[![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster)
[![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases)
[![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt)

View File

@@ -75,10 +75,11 @@ def get_stack_string_len(f: Function, il: MediumLevelILInstruction) -> int:
return 0
dest = il.params[0]
if dest.operation != MediumLevelILOperation.MLIL_ADDRESS_OF:
if dest.operation in [MediumLevelILOperation.MLIL_ADDRESS_OF, MediumLevelILOperation.MLIL_VAR]:
var = dest.src
else:
return 0
var = dest.src
if var.source_type != VariableSourceType.StackVariableSourceType:
return 0

View File

@@ -11,9 +11,10 @@ from typing import Tuple, Iterator
from pathlib import Path
from elftools.elf.elffile import ELFFile, SymbolTableSection
from elftools.elf.relocation import RelocationSection
import capa.features.extractors.common
from capa.features.file import Import, Section
from capa.features.file import Export, Import, Section
from capa.features.common import OS, FORMAT_ELF, Arch, Format, Feature
from capa.features.address import NO_ADDRESS, FileOffsetAddress, AbsoluteVirtualAddress
from capa.features.extractors.base_extractor import SampleHashes, StaticFeatureExtractor
@@ -21,11 +22,8 @@ from capa.features.extractors.base_extractor import SampleHashes, StaticFeatureE
logger = logging.getLogger(__name__)
def extract_file_import_names(elf, **kwargs):
# see https://github.com/eliben/pyelftools/blob/0664de05ed2db3d39041e2d51d19622a8ef4fb0f/scripts/readelf.py#L372
symbol_tables = [(idx, s) for idx, s in enumerate(elf.iter_sections()) if isinstance(s, SymbolTableSection)]
for _, section in symbol_tables:
def extract_file_export_names(elf: ELFFile, **kwargs):
for section in elf.iter_sections():
if not isinstance(section, SymbolTableSection):
continue
@@ -35,14 +33,64 @@ def extract_file_import_names(elf, **kwargs):
logger.debug("Symbol table '%s' contains %s entries:", section.name, section.num_symbols())
for symbol in section.iter_symbols():
# The following conditions are based on the following article
# http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html
if not symbol.name:
continue
if symbol.entry.st_info.type not in ["STT_FUNC", "STT_OBJECT", "STT_IFUNC"]:
continue
if symbol.entry.st_value == 0:
continue
if symbol.entry.st_shndx == "SHN_UNDEF":
continue
yield Export(symbol.name), AbsoluteVirtualAddress(symbol.entry.st_value)
def extract_file_import_names(elf: ELFFile, **kwargs):
# Create a dictionary to store symbol names by their index
symbol_names = {}
# Extract symbol names and store them in the dictionary
for section in elf.iter_sections():
if not isinstance(section, SymbolTableSection):
continue
for _, symbol in enumerate(section.iter_symbols()):
if symbol.name and symbol.entry.st_info.type == "STT_FUNC":
# TODO(williballenthin): extract symbol address
# https://github.com/mandiant/capa/issues/1608
yield Import(symbol.name), FileOffsetAddress(0x0)
# The following conditions are based on the following article
# http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html
if not symbol.name:
continue
if symbol.entry.st_info.type not in ["STT_FUNC", "STT_OBJECT", "STT_IFUNC"]:
continue
if symbol.entry.st_value != 0:
continue
if symbol.entry.st_shndx != "SHN_UNDEF":
continue
if symbol.entry.st_name == 0:
continue
symbol_names[_] = symbol.name
for section in elf.iter_sections():
if not isinstance(section, RelocationSection):
continue
if section["sh_entsize"] == 0:
logger.debug("Symbol table '%s' has a sh_entsize of zero!", section.name)
continue
logger.debug("Symbol table '%s' contains %s entries:", section.name, section.num_relocations())
for relocation in section.iter_relocations():
# Extract the symbol name from the symbol table using the symbol index in the relocation
if relocation["r_info_sym"] not in symbol_names:
continue
yield Import(symbol_names[relocation["r_info_sym"]]), FileOffsetAddress(relocation["r_offset"])
def extract_file_section_names(elf, **kwargs):
def extract_file_section_names(elf: ELFFile, **kwargs):
for section in elf.iter_sections():
if section.name:
yield Section(section.name), AbsoluteVirtualAddress(section.header.sh_addr)
@@ -54,7 +102,7 @@ def extract_file_strings(buf, **kwargs):
yield from capa.features.extractors.common.extract_file_strings(buf)
def extract_file_os(elf, buf, **kwargs):
def extract_file_os(elf: ELFFile, buf, **kwargs):
# our current approach does not always get an OS value, e.g. for packed samples
# for file limitation purposes, we're more lax here
try:
@@ -68,7 +116,7 @@ def extract_file_format(**kwargs):
yield Format(FORMAT_ELF), NO_ADDRESS
def extract_file_arch(elf, **kwargs):
def extract_file_arch(elf: ELFFile, **kwargs):
arch = elf.get_machine_arch()
if arch == "x86":
yield Arch("i386"), NO_ADDRESS
@@ -85,8 +133,7 @@ def extract_file_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature, i
FILE_HANDLERS = (
# TODO(williballenthin): implement extract_file_export_names
# https://github.com/mandiant/capa/issues/1607
extract_file_export_names,
extract_file_import_names,
extract_file_section_names,
extract_file_strings,

View File

@@ -33,7 +33,7 @@ classifiers = [
]
dependencies = [
"tqdm==4.65.0",
"pyyaml==6.0",
"pyyaml==6.0.1",
"tabulate==0.9.0",
"colorama==0.4.6",
"termcolor==2.3.0",
@@ -77,7 +77,7 @@ dev = [
"flake8-simplify==0.20.0",
"flake8-use-pathlib==0.3.0",
"flake8-copyright==0.2.4",
"ruff==0.0.278",
"ruff==0.0.280",
"black==23.7.0",
"isort==5.11.4",
"mypy==1.4.1",
@@ -89,11 +89,11 @@ dev = [
"types-backports==0.1.3",
"types-colorama==0.4.15.11",
"types-PyYAML==6.0.8",
"types-tabulate==0.9.0.1",
"types-tabulate==0.9.0.3",
"types-termcolor==1.1.4",
"types-psutil==5.8.23",
"types_requests==2.31.0.1",
"types-protobuf==4.23.0.1",
"types_requests==2.31.0.2",
"types-protobuf==4.23.0.2",
]
build = [
"pyinstaller==5.10.1",

2
rules

Submodule rules updated: a49c174fee...7685a232d9

View File

@@ -569,6 +569,10 @@ class FeatureNtdllNtoskrnlApi(Lint):
"ZwCreateProcess",
"ZwCreateUserProcess",
"RtlCreateUserProcess",
"NtProtectVirtualMemory",
"NtEnumerateSystemEnvironmentValuesEx",
"NtQuerySystemEnvironmentValueEx",
"NtQuerySystemEnvironmentValue",
):
# ntoskrnl.exe does not export these routines
continue
@@ -579,6 +583,7 @@ class FeatureNtdllNtoskrnlApi(Lint):
"KeStackAttachProcess",
"ObfDereferenceObject",
"KeUnstackDetachProcess",
"ExGetFirmwareEnvironmentVariable",
):
# ntdll.dll does not export these routines
continue

View File

@@ -40,9 +40,6 @@ except ImportError:
indirect=["sample", "scope"],
)
def test_binja_features(sample, scope, feature, expected):
if feature == capa.features.common.Characteristic("stack string"):
pytest.xfail("skip failing Binja stack string detection temporarily, see #1473")
if isinstance(feature, capa.features.file.Export) and "." in str(feature.value):
pytest.xfail("skip Binja unsupported forwarded export feature, see #1646")

View File

@@ -0,0 +1,71 @@
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import io
from pathlib import Path
from elftools.elf.elffile import ELFFile
from capa.features.extractors.elffile import extract_file_export_names, extract_file_import_names
CD = Path(__file__).resolve().parent
SAMPLE_PATH = CD / "data" / "055da8e6ccfe5a9380231ea04b850e18.elf_"
def test_elffile_import_features():
expected_imports = [
"memfrob",
"puts",
"__libc_start_main",
"malloc",
"__cxa_finalize",
]
path = Path(SAMPLE_PATH)
elf = ELFFile(io.BytesIO(path.read_bytes()))
# Extract imports
imports = list(extract_file_import_names(elf))
# Verify that at least one import was found
assert len(imports) > 0, "No imports were found."
# Extract the symbol names from the extracted imports
extracted_symbol_names = [imported[0].value for imported in imports]
# Check if all expected symbol names are found
for symbol_name in expected_imports:
assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in imports."
def test_elffile_export_features():
expected_exports = [
"deregister_tm_clones",
"register_tm_clones",
"__do_global_dtors_aux",
"completed.8060",
"__do_global_dtors_aux_fini_array_entry",
"frame_dummy",
"_init",
"__libc_csu_fini",
"_fini",
"__dso_handle",
"_IO_stdin_used",
"__libc_csu_init",
]
path = Path(SAMPLE_PATH)
elf = ELFFile(io.BytesIO(path.read_bytes()))
# Extract imports
exports = list(extract_file_export_names(elf))
# Verify that at least one export was found
assert len(exports) > 0, "No exports were found."
# Extract the symbol names from the extracted imports
extracted_symbol_names = [exported[0].value for exported in exports]
# Check if all expected symbol names are found
for symbol_name in expected_exports:
assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in exports."