ELF: better handle corrupt files (#2227)

such as when there's a missing symbol table and invalid relocation table.
and then handle when Viv fails to load a workspace.

closes #2226
This commit is contained in:
Willi Ballenthin
2024-07-24 07:22:30 +00:00
committed by GitHub
parent fd686ac591
commit e2e84f7f50
4 changed files with 50 additions and 16 deletions

View File

@@ -13,6 +13,7 @@
### Bug Fixes
- elf: extract import / export symbols from stripped binaries #2096 @ygasparis
- elf: fix handling of symbols in corrupt ELF files #2226 @williballenthin
### capa explorer IDA Pro plugin

View File

@@ -50,7 +50,12 @@ def extract_file_export_names(elf: ELFFile, **kwargs):
if not isinstance(segment, DynamicSegment):
continue
logger.debug("Dynamic Segment contains %s symbols: ", segment.num_symbols())
tab_ptr, tab_offset = segment.get_table_offset("DT_SYMTAB")
if tab_ptr is None or tab_offset is None:
logger.debug("Dynamic segment doesn't contain DT_SYMTAB")
continue
logger.debug("Dynamic segment contains %s symbols: ", segment.num_symbols())
for symbol in segment.iter_symbols():
# The following conditions are based on the following article
@@ -76,6 +81,11 @@ def extract_file_import_names(elf: ELFFile, **kwargs):
if not isinstance(segment, DynamicSegment):
continue
tab_ptr, tab_offset = segment.get_table_offset("DT_SYMTAB")
if tab_ptr is None or tab_offset is None:
logger.debug("Dynamic segment doesn't contain DT_SYMTAB")
continue
for _, symbol in enumerate(segment.iter_symbols()):
# The following conditions are based on the following article
# http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html
@@ -100,7 +110,16 @@ def extract_file_import_names(elf: ELFFile, **kwargs):
logger.debug("Dynamic Segment contains %s relocation tables:", len(relocation_tables))
for relocation_table in relocation_tables.values():
for relocation in relocation_table.iter_relocations():
relocations = []
for i in range(relocation_table.num_relocations()):
try:
relocations.append(relocation_table.get_relocation(i))
except TypeError:
# ELF is corrupt and the relocation table is invalid,
# so stop processing it.
break
for relocation in relocations:
# Extract the symbol name from the symbol table using the symbol index in the relocation
if relocation["r_info_sym"] not in symbol_names:
continue

View File

@@ -64,6 +64,10 @@ BACKEND_CAPE = "cape"
BACKEND_FREEZE = "freeze"
class CorruptFile(ValueError):
pass
def is_supported_format(sample: Path) -> bool:
"""
Return if this is a supported file based on magic header values
@@ -137,21 +141,28 @@ def get_workspace(path: Path, input_format: str, sigpaths: List[Path]):
import viv_utils.flirt
logger.debug("generating vivisect workspace for: %s", path)
if input_format == FORMAT_AUTO:
if not is_supported_format(path):
raise UnsupportedFormatError()
# don't analyze, so that we can add our Flirt function analyzer first.
vw = viv_utils.getWorkspace(str(path), analyze=False, should_save=False)
elif input_format in {FORMAT_PE, FORMAT_ELF}:
vw = viv_utils.getWorkspace(str(path), analyze=False, should_save=False)
elif input_format == FORMAT_SC32:
# these are not analyzed nor saved.
vw = viv_utils.getShellcodeWorkspaceFromFile(str(path), arch="i386", analyze=False)
elif input_format == FORMAT_SC64:
vw = viv_utils.getShellcodeWorkspaceFromFile(str(path), arch="amd64", analyze=False)
else:
raise ValueError("unexpected format: " + input_format)
try:
if input_format == FORMAT_AUTO:
if not is_supported_format(path):
raise UnsupportedFormatError()
# don't analyze, so that we can add our Flirt function analyzer first.
vw = viv_utils.getWorkspace(str(path), analyze=False, should_save=False)
elif input_format in {FORMAT_PE, FORMAT_ELF}:
vw = viv_utils.getWorkspace(str(path), analyze=False, should_save=False)
elif input_format == FORMAT_SC32:
# these are not analyzed nor saved.
vw = viv_utils.getShellcodeWorkspaceFromFile(str(path), arch="i386", analyze=False)
elif input_format == FORMAT_SC64:
vw = viv_utils.getShellcodeWorkspaceFromFile(str(path), arch="amd64", analyze=False)
else:
raise ValueError("unexpected format: " + input_format)
except Exception as e:
# vivisect raises raw Exception instances, and we don't want
# to do a subclass check via isinstance.
if type(e) is Exception and "Couldn't convert rva" in e.args[0]:
raise CorruptFile(e.args[0]) from e
viv_utils.flirt.register_flirt_signature_analyzers(vw, [str(s) for s in sigpaths])

View File

@@ -753,6 +753,9 @@ def get_extractor_from_cli(args, input_format: str, backend: str) -> FeatureExtr
except UnsupportedOSError as e:
log_unsupported_os_error()
raise ShouldExitError(E_INVALID_FILE_OS) from e
except capa.loader.CorruptFile as e:
logger.error("Input file '%s' is not a valid file: %s", args.input_file, str(e))
raise ShouldExitError(E_CORRUPT_FILE) from e
def main(argv: Optional[List[str]] = None):