mirror of
https://github.com/mandiant/capa.git
synced 2026-02-04 11:07:53 -08:00
ELF: better handle corrupt files (#2227)
such as when there's a missing symbol table and invalid relocation table. and then handle when Viv fails to load a workspace. closes #2226
This commit is contained in:
@@ -13,6 +13,7 @@
|
||||
### Bug Fixes
|
||||
|
||||
- elf: extract import / export symbols from stripped binaries #2096 @ygasparis
|
||||
- elf: fix handling of symbols in corrupt ELF files #2226 @williballenthin
|
||||
|
||||
### capa explorer IDA Pro plugin
|
||||
|
||||
|
||||
@@ -50,7 +50,12 @@ def extract_file_export_names(elf: ELFFile, **kwargs):
|
||||
if not isinstance(segment, DynamicSegment):
|
||||
continue
|
||||
|
||||
logger.debug("Dynamic Segment contains %s symbols: ", segment.num_symbols())
|
||||
tab_ptr, tab_offset = segment.get_table_offset("DT_SYMTAB")
|
||||
if tab_ptr is None or tab_offset is None:
|
||||
logger.debug("Dynamic segment doesn't contain DT_SYMTAB")
|
||||
continue
|
||||
|
||||
logger.debug("Dynamic segment contains %s symbols: ", segment.num_symbols())
|
||||
|
||||
for symbol in segment.iter_symbols():
|
||||
# The following conditions are based on the following article
|
||||
@@ -76,6 +81,11 @@ def extract_file_import_names(elf: ELFFile, **kwargs):
|
||||
if not isinstance(segment, DynamicSegment):
|
||||
continue
|
||||
|
||||
tab_ptr, tab_offset = segment.get_table_offset("DT_SYMTAB")
|
||||
if tab_ptr is None or tab_offset is None:
|
||||
logger.debug("Dynamic segment doesn't contain DT_SYMTAB")
|
||||
continue
|
||||
|
||||
for _, symbol in enumerate(segment.iter_symbols()):
|
||||
# The following conditions are based on the following article
|
||||
# http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html
|
||||
@@ -100,7 +110,16 @@ def extract_file_import_names(elf: ELFFile, **kwargs):
|
||||
logger.debug("Dynamic Segment contains %s relocation tables:", len(relocation_tables))
|
||||
|
||||
for relocation_table in relocation_tables.values():
|
||||
for relocation in relocation_table.iter_relocations():
|
||||
relocations = []
|
||||
for i in range(relocation_table.num_relocations()):
|
||||
try:
|
||||
relocations.append(relocation_table.get_relocation(i))
|
||||
except TypeError:
|
||||
# ELF is corrupt and the relocation table is invalid,
|
||||
# so stop processing it.
|
||||
break
|
||||
|
||||
for relocation in relocations:
|
||||
# Extract the symbol name from the symbol table using the symbol index in the relocation
|
||||
if relocation["r_info_sym"] not in symbol_names:
|
||||
continue
|
||||
|
||||
@@ -64,6 +64,10 @@ BACKEND_CAPE = "cape"
|
||||
BACKEND_FREEZE = "freeze"
|
||||
|
||||
|
||||
class CorruptFile(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
def is_supported_format(sample: Path) -> bool:
|
||||
"""
|
||||
Return if this is a supported file based on magic header values
|
||||
@@ -137,21 +141,28 @@ def get_workspace(path: Path, input_format: str, sigpaths: List[Path]):
|
||||
import viv_utils.flirt
|
||||
|
||||
logger.debug("generating vivisect workspace for: %s", path)
|
||||
if input_format == FORMAT_AUTO:
|
||||
if not is_supported_format(path):
|
||||
raise UnsupportedFormatError()
|
||||
|
||||
# don't analyze, so that we can add our Flirt function analyzer first.
|
||||
vw = viv_utils.getWorkspace(str(path), analyze=False, should_save=False)
|
||||
elif input_format in {FORMAT_PE, FORMAT_ELF}:
|
||||
vw = viv_utils.getWorkspace(str(path), analyze=False, should_save=False)
|
||||
elif input_format == FORMAT_SC32:
|
||||
# these are not analyzed nor saved.
|
||||
vw = viv_utils.getShellcodeWorkspaceFromFile(str(path), arch="i386", analyze=False)
|
||||
elif input_format == FORMAT_SC64:
|
||||
vw = viv_utils.getShellcodeWorkspaceFromFile(str(path), arch="amd64", analyze=False)
|
||||
else:
|
||||
raise ValueError("unexpected format: " + input_format)
|
||||
try:
|
||||
if input_format == FORMAT_AUTO:
|
||||
if not is_supported_format(path):
|
||||
raise UnsupportedFormatError()
|
||||
|
||||
# don't analyze, so that we can add our Flirt function analyzer first.
|
||||
vw = viv_utils.getWorkspace(str(path), analyze=False, should_save=False)
|
||||
elif input_format in {FORMAT_PE, FORMAT_ELF}:
|
||||
vw = viv_utils.getWorkspace(str(path), analyze=False, should_save=False)
|
||||
elif input_format == FORMAT_SC32:
|
||||
# these are not analyzed nor saved.
|
||||
vw = viv_utils.getShellcodeWorkspaceFromFile(str(path), arch="i386", analyze=False)
|
||||
elif input_format == FORMAT_SC64:
|
||||
vw = viv_utils.getShellcodeWorkspaceFromFile(str(path), arch="amd64", analyze=False)
|
||||
else:
|
||||
raise ValueError("unexpected format: " + input_format)
|
||||
except Exception as e:
|
||||
# vivisect raises raw Exception instances, and we don't want
|
||||
# to do a subclass check via isinstance.
|
||||
if type(e) is Exception and "Couldn't convert rva" in e.args[0]:
|
||||
raise CorruptFile(e.args[0]) from e
|
||||
|
||||
viv_utils.flirt.register_flirt_signature_analyzers(vw, [str(s) for s in sigpaths])
|
||||
|
||||
|
||||
@@ -753,6 +753,9 @@ def get_extractor_from_cli(args, input_format: str, backend: str) -> FeatureExtr
|
||||
except UnsupportedOSError as e:
|
||||
log_unsupported_os_error()
|
||||
raise ShouldExitError(E_INVALID_FILE_OS) from e
|
||||
except capa.loader.CorruptFile as e:
|
||||
logger.error("Input file '%s' is not a valid file: %s", args.input_file, str(e))
|
||||
raise ShouldExitError(E_CORRUPT_FILE) from e
|
||||
|
||||
|
||||
def main(argv: Optional[List[str]] = None):
|
||||
|
||||
Reference in New Issue
Block a user