Compare commits

...

4 Commits

Author SHA1 Message Date
Mike Hunhoff
08cf8f8d03 merge upstream 2024-06-18 10:57:57 -06:00
Yannis Gasparis
ac6924552b add test case 2024-06-18 07:35:38 -07:00
ygasparis
33b2e78dbc Update CHANGELOG.md
Co-authored-by: Willi Ballenthin <wballenthin@google.com>
2024-06-11 13:37:02 -07:00
Ioannis Gasparis
d74b7823a3 closes #2096 2024-06-10 16:30:46 -07:00
3 changed files with 85 additions and 40 deletions

View File

@@ -83,6 +83,7 @@ Special thanks to our repeat and new contributors:
- binja: fix and simplify stack string detection code after binja 4.0 @xusheng6 - binja: fix and simplify stack string detection code after binja 4.0 @xusheng6
- binja: add support for forwarded export #1646 @xusheng6 - binja: add support for forwarded export #1646 @xusheng6
- cape: support more report formats #2035 @mr-tz - cape: support more report formats #2035 @mr-tz
- elf: extract import / export symbols from stripped binaries #2096 @ygasparis
### capa explorer IDA Pro plugin ### capa explorer IDA Pro plugin
- replace deprecated IDA API find_binary with bin_search #1606 @s-ff - replace deprecated IDA API find_binary with bin_search #1606 @s-ff

View File

@@ -10,8 +10,7 @@ import logging
from typing import Tuple, Iterator from typing import Tuple, Iterator
from pathlib import Path from pathlib import Path
from elftools.elf.elffile import ELFFile, SymbolTableSection from elftools.elf.elffile import ELFFile, DynamicSegment, SymbolTableSection
from elftools.elf.relocation import RelocationSection
import capa.features.extractors.common import capa.features.extractors.common
from capa.features.file import Export, Import, Section from capa.features.file import Export, Import, Section
@@ -47,17 +46,37 @@ def extract_file_export_names(elf: ELFFile, **kwargs):
yield Export(symbol.name), AbsoluteVirtualAddress(symbol.entry.st_value) yield Export(symbol.name), AbsoluteVirtualAddress(symbol.entry.st_value)
for segment in elf.iter_segments():
if not isinstance(segment, DynamicSegment):
continue
logger.debug("Dynamic Segment contains %s symbols: ", segment.num_symbols())
for symbol in segment.iter_symbols():
# The following conditions are based on the following article
# http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html
if not symbol.name:
continue
if symbol.entry.st_info.type not in ["STT_FUNC", "STT_OBJECT", "STT_IFUNC"]:
continue
if symbol.entry.st_value == 0:
continue
if symbol.entry.st_shndx == "SHN_UNDEF":
continue
yield Export(symbol.name), AbsoluteVirtualAddress(symbol.entry.st_value)
def extract_file_import_names(elf: ELFFile, **kwargs): def extract_file_import_names(elf: ELFFile, **kwargs):
# Create a dictionary to store symbol names by their index # Create a dictionary to store symbol names by their index
symbol_names = {} symbol_names = {}
# Extract symbol names and store them in the dictionary # Extract symbol names and store them in the dictionary
for section in elf.iter_sections(): for segment in elf.iter_segments():
if not isinstance(section, SymbolTableSection): if not isinstance(segment, DynamicSegment):
continue continue
for _, symbol in enumerate(section.iter_symbols()): for _, symbol in enumerate(segment.iter_symbols()):
# The following conditions are based on the following article # The following conditions are based on the following article
# http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html # http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html
if not symbol.name: if not symbol.name:
@@ -73,21 +92,19 @@ def extract_file_import_names(elf: ELFFile, **kwargs):
symbol_names[_] = symbol.name symbol_names[_] = symbol.name
for section in elf.iter_sections(): for segment in elf.iter_segments():
if not isinstance(section, RelocationSection): if not isinstance(segment, DynamicSegment):
continue continue
if section["sh_entsize"] == 0: relocation_tables = segment.get_relocation_tables()
logger.debug("Symbol table '%s' has a sh_entsize of zero!", section.name) logger.debug("Dynamic Segment contains %s relocation tables:", len(relocation_tables))
continue
logger.debug("Symbol table '%s' contains %s entries:", section.name, section.num_relocations()) for relocation_table in relocation_tables.values():
for relocation in relocation_table.iter_relocations():
for relocation in section.iter_relocations(): # Extract the symbol name from the symbol table using the symbol index in the relocation
# Extract the symbol name from the symbol table using the symbol index in the relocation if relocation["r_info_sym"] not in symbol_names:
if relocation["r_info_sym"] not in symbol_names: continue
continue yield Import(symbol_names[relocation["r_info_sym"]]), FileOffsetAddress(relocation["r_offset"])
yield Import(symbol_names[relocation["r_info_sym"]]), FileOffsetAddress(relocation["r_offset"])
def extract_file_section_names(elf: ELFFile, **kwargs): def extract_file_section_names(elf: ELFFile, **kwargs):

View File

@@ -14,17 +14,11 @@ from capa.features.extractors.elffile import extract_file_export_names, extract_
CD = Path(__file__).resolve().parent CD = Path(__file__).resolve().parent
SAMPLE_PATH = CD / "data" / "055da8e6ccfe5a9380231ea04b850e18.elf_" SAMPLE_PATH = CD / "data" / "055da8e6ccfe5a9380231ea04b850e18.elf_"
STRIPPED_SAMPLE_PATH = CD / "data" / "bb38149ff4b5c95722b83f24ca27a42b.elf_"
def test_elffile_import_features(): def check_import_features(sample_path, expected_imports):
expected_imports = [ path = Path(sample_path)
"memfrob",
"puts",
"__libc_start_main",
"malloc",
"__cxa_finalize",
]
path = Path(SAMPLE_PATH)
elf = ELFFile(io.BytesIO(path.read_bytes())) elf = ELFFile(io.BytesIO(path.read_bytes()))
# Extract imports # Extract imports
imports = list(extract_file_import_names(elf)) imports = list(extract_file_import_names(elf))
@@ -40,6 +34,52 @@ def test_elffile_import_features():
assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in imports." assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in imports."
def check_export_features(sample_path, expected_exports):
path = Path(sample_path)
elf = ELFFile(io.BytesIO(path.read_bytes()))
# Extract imports
exports = list(extract_file_export_names(elf))
# Verify that at least one export was found
assert len(exports) > 0, "No exports were found."
# Extract the symbol names from the extracted imports
extracted_symbol_names = [exported[0].value for exported in exports]
# Check if all expected symbol names are found
for symbol_name in expected_exports:
assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in exports."
def test_stripped_elffile_import_features():
expected_imports = ["__cxa_atexit", "__cxa_finalize", "__stack_chk_fail", "fclose", "fopen", "__android_log_print"]
check_import_features(STRIPPED_SAMPLE_PATH, expected_imports)
def test_stripped_elffile_export_features():
expected_exports = [
"_ZN7_JNIEnv14GetArrayLengthEP7_jarray",
"Java_o_ac_a",
"Java_o_ac_b",
"_Z6existsPKc",
"_ZN7_JNIEnv17GetStringUTFCharsEP8_jstringPh",
"_ZN7_JNIEnv21GetObjectArrayElementEP13_jobjectArrayi",
"_ZN7_JNIEnv21ReleaseStringUTFCharsEP8_jstringPKc",
]
check_export_features(STRIPPED_SAMPLE_PATH, expected_exports)
def test_elffile_import_features():
expected_imports = [
"memfrob",
"puts",
"__libc_start_main",
"malloc",
"__cxa_finalize",
]
check_import_features(SAMPLE_PATH, expected_imports)
def test_elffile_export_features(): def test_elffile_export_features():
expected_exports = [ expected_exports = [
"deregister_tm_clones", "deregister_tm_clones",
@@ -55,17 +95,4 @@ def test_elffile_export_features():
"_IO_stdin_used", "_IO_stdin_used",
"__libc_csu_init", "__libc_csu_init",
] ]
path = Path(SAMPLE_PATH) check_export_features(SAMPLE_PATH, expected_exports)
elf = ELFFile(io.BytesIO(path.read_bytes()))
# Extract imports
exports = list(extract_file_export_names(elf))
# Verify that at least one export was found
assert len(exports) > 0, "No exports were found."
# Extract the symbol names from the extracted imports
extracted_symbol_names = [exported[0].value for exported in exports]
# Check if all expected symbol names are found
for symbol_name in expected_exports:
assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in exports."