Compare commits

...

4 Commits

Author SHA1 Message Date
Mike Hunhoff
08cf8f8d03 merge upstream 2024-06-18 10:57:57 -06:00
Yannis Gasparis
ac6924552b add test case 2024-06-18 07:35:38 -07:00
ygasparis
33b2e78dbc Update CHANGELOG.md
Co-authored-by: Willi Ballenthin <wballenthin@google.com>
2024-06-11 13:37:02 -07:00
Ioannis Gasparis
d74b7823a3 closes #2096 2024-06-10 16:30:46 -07:00
3 changed files with 85 additions and 40 deletions

View File

@@ -83,6 +83,7 @@ Special thanks to our repeat and new contributors:
- binja: fix and simplify stack string detection code after binja 4.0 @xusheng6
- binja: add support for forwarded export #1646 @xusheng6
- cape: support more report formats #2035 @mr-tz
- elf: extract import / export symbols from stripped binaries #2096 @ygasparis
### capa explorer IDA Pro plugin
- replace deprecated IDA API find_binary with bin_search #1606 @s-ff

View File

@@ -10,8 +10,7 @@ import logging
from typing import Tuple, Iterator
from pathlib import Path
from elftools.elf.elffile import ELFFile, SymbolTableSection
from elftools.elf.relocation import RelocationSection
from elftools.elf.elffile import ELFFile, DynamicSegment, SymbolTableSection
import capa.features.extractors.common
from capa.features.file import Export, Import, Section
@@ -47,17 +46,37 @@ def extract_file_export_names(elf: ELFFile, **kwargs):
yield Export(symbol.name), AbsoluteVirtualAddress(symbol.entry.st_value)
for segment in elf.iter_segments():
if not isinstance(segment, DynamicSegment):
continue
logger.debug("Dynamic Segment contains %s symbols: ", segment.num_symbols())
for symbol in segment.iter_symbols():
# The following conditions are based on the following article
# http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html
if not symbol.name:
continue
if symbol.entry.st_info.type not in ["STT_FUNC", "STT_OBJECT", "STT_IFUNC"]:
continue
if symbol.entry.st_value == 0:
continue
if symbol.entry.st_shndx == "SHN_UNDEF":
continue
yield Export(symbol.name), AbsoluteVirtualAddress(symbol.entry.st_value)
def extract_file_import_names(elf: ELFFile, **kwargs):
# Create a dictionary to store symbol names by their index
symbol_names = {}
# Extract symbol names and store them in the dictionary
for section in elf.iter_sections():
if not isinstance(section, SymbolTableSection):
for segment in elf.iter_segments():
if not isinstance(segment, DynamicSegment):
continue
for _, symbol in enumerate(section.iter_symbols()):
for _, symbol in enumerate(segment.iter_symbols()):
# The following conditions are based on the following article
# http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html
if not symbol.name:
@@ -73,21 +92,19 @@ def extract_file_import_names(elf: ELFFile, **kwargs):
symbol_names[_] = symbol.name
for section in elf.iter_sections():
if not isinstance(section, RelocationSection):
for segment in elf.iter_segments():
if not isinstance(segment, DynamicSegment):
continue
if section["sh_entsize"] == 0:
logger.debug("Symbol table '%s' has a sh_entsize of zero!", section.name)
continue
relocation_tables = segment.get_relocation_tables()
logger.debug("Dynamic Segment contains %s relocation tables:", len(relocation_tables))
logger.debug("Symbol table '%s' contains %s entries:", section.name, section.num_relocations())
for relocation in section.iter_relocations():
# Extract the symbol name from the symbol table using the symbol index in the relocation
if relocation["r_info_sym"] not in symbol_names:
continue
yield Import(symbol_names[relocation["r_info_sym"]]), FileOffsetAddress(relocation["r_offset"])
for relocation_table in relocation_tables.values():
for relocation in relocation_table.iter_relocations():
# Extract the symbol name from the symbol table using the symbol index in the relocation
if relocation["r_info_sym"] not in symbol_names:
continue
yield Import(symbol_names[relocation["r_info_sym"]]), FileOffsetAddress(relocation["r_offset"])
def extract_file_section_names(elf: ELFFile, **kwargs):

View File

@@ -14,17 +14,11 @@ from capa.features.extractors.elffile import extract_file_export_names, extract_
CD = Path(__file__).resolve().parent
SAMPLE_PATH = CD / "data" / "055da8e6ccfe5a9380231ea04b850e18.elf_"
STRIPPED_SAMPLE_PATH = CD / "data" / "bb38149ff4b5c95722b83f24ca27a42b.elf_"
def test_elffile_import_features():
expected_imports = [
"memfrob",
"puts",
"__libc_start_main",
"malloc",
"__cxa_finalize",
]
path = Path(SAMPLE_PATH)
def check_import_features(sample_path, expected_imports):
path = Path(sample_path)
elf = ELFFile(io.BytesIO(path.read_bytes()))
# Extract imports
imports = list(extract_file_import_names(elf))
@@ -40,6 +34,52 @@ def test_elffile_import_features():
assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in imports."
def check_export_features(sample_path, expected_exports):
path = Path(sample_path)
elf = ELFFile(io.BytesIO(path.read_bytes()))
# Extract imports
exports = list(extract_file_export_names(elf))
# Verify that at least one export was found
assert len(exports) > 0, "No exports were found."
# Extract the symbol names from the extracted imports
extracted_symbol_names = [exported[0].value for exported in exports]
# Check if all expected symbol names are found
for symbol_name in expected_exports:
assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in exports."
def test_stripped_elffile_import_features():
expected_imports = ["__cxa_atexit", "__cxa_finalize", "__stack_chk_fail", "fclose", "fopen", "__android_log_print"]
check_import_features(STRIPPED_SAMPLE_PATH, expected_imports)
def test_stripped_elffile_export_features():
expected_exports = [
"_ZN7_JNIEnv14GetArrayLengthEP7_jarray",
"Java_o_ac_a",
"Java_o_ac_b",
"_Z6existsPKc",
"_ZN7_JNIEnv17GetStringUTFCharsEP8_jstringPh",
"_ZN7_JNIEnv21GetObjectArrayElementEP13_jobjectArrayi",
"_ZN7_JNIEnv21ReleaseStringUTFCharsEP8_jstringPKc",
]
check_export_features(STRIPPED_SAMPLE_PATH, expected_exports)
def test_elffile_import_features():
expected_imports = [
"memfrob",
"puts",
"__libc_start_main",
"malloc",
"__cxa_finalize",
]
check_import_features(SAMPLE_PATH, expected_imports)
def test_elffile_export_features():
expected_exports = [
"deregister_tm_clones",
@@ -55,17 +95,4 @@ def test_elffile_export_features():
"_IO_stdin_used",
"__libc_csu_init",
]
path = Path(SAMPLE_PATH)
elf = ELFFile(io.BytesIO(path.read_bytes()))
# Extract imports
exports = list(extract_file_export_names(elf))
# Verify that at least one export was found
assert len(exports) > 0, "No exports were found."
# Extract the symbol names from the extracted imports
extracted_symbol_names = [exported[0].value for exported in exports]
# Check if all expected symbol names are found
for symbol_name in expected_exports:
assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in exports."
check_export_features(SAMPLE_PATH, expected_exports)