diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e2a1e3b..030b537e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ ### Bug Fixes - elf: extract import / export symbols from stripped binaries #2096 @ygasparis +- elf: fix handling of symbols in corrupt ELF files #2226 @williballenthin ### capa explorer IDA Pro plugin diff --git a/capa/features/extractors/elffile.py b/capa/features/extractors/elffile.py index c499ac4f..b75c938e 100644 --- a/capa/features/extractors/elffile.py +++ b/capa/features/extractors/elffile.py @@ -50,7 +50,12 @@ def extract_file_export_names(elf: ELFFile, **kwargs): if not isinstance(segment, DynamicSegment): continue - logger.debug("Dynamic Segment contains %s symbols: ", segment.num_symbols()) + tab_ptr, tab_offset = segment.get_table_offset("DT_SYMTAB") + if tab_ptr is None or tab_offset is None: + logger.debug("Dynamic segment doesn't contain DT_SYMTAB") + continue + + logger.debug("Dynamic segment contains %s symbols: ", segment.num_symbols()) for symbol in segment.iter_symbols(): # The following conditions are based on the following article @@ -76,6 +81,11 @@ def extract_file_import_names(elf: ELFFile, **kwargs): if not isinstance(segment, DynamicSegment): continue + tab_ptr, tab_offset = segment.get_table_offset("DT_SYMTAB") + if tab_ptr is None or tab_offset is None: + logger.debug("Dynamic segment doesn't contain DT_SYMTAB") + continue + for _, symbol in enumerate(segment.iter_symbols()): # The following conditions are based on the following article # http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html @@ -100,7 +110,16 @@ def extract_file_import_names(elf: ELFFile, **kwargs): logger.debug("Dynamic Segment contains %s relocation tables:", len(relocation_tables)) for relocation_table in relocation_tables.values(): - for relocation in relocation_table.iter_relocations(): + relocations = [] + for i in range(relocation_table.num_relocations()): + try: + relocations.append(relocation_table.get_relocation(i)) + except TypeError: + # ELF is corrupt and the relocation table is invalid, + # so stop processing it. + break + + for relocation in relocations: # Extract the symbol name from the symbol table using the symbol index in the relocation if relocation["r_info_sym"] not in symbol_names: continue diff --git a/capa/loader.py b/capa/loader.py index e741175e..4de39f0c 100644 --- a/capa/loader.py +++ b/capa/loader.py @@ -64,6 +64,10 @@ BACKEND_CAPE = "cape" BACKEND_FREEZE = "freeze" +class CorruptFile(ValueError): + pass + + def is_supported_format(sample: Path) -> bool: """ Return if this is a supported file based on magic header values @@ -137,21 +141,28 @@ def get_workspace(path: Path, input_format: str, sigpaths: List[Path]): import viv_utils.flirt logger.debug("generating vivisect workspace for: %s", path) - if input_format == FORMAT_AUTO: - if not is_supported_format(path): - raise UnsupportedFormatError() - # don't analyze, so that we can add our Flirt function analyzer first. - vw = viv_utils.getWorkspace(str(path), analyze=False, should_save=False) - elif input_format in {FORMAT_PE, FORMAT_ELF}: - vw = viv_utils.getWorkspace(str(path), analyze=False, should_save=False) - elif input_format == FORMAT_SC32: - # these are not analyzed nor saved. - vw = viv_utils.getShellcodeWorkspaceFromFile(str(path), arch="i386", analyze=False) - elif input_format == FORMAT_SC64: - vw = viv_utils.getShellcodeWorkspaceFromFile(str(path), arch="amd64", analyze=False) - else: - raise ValueError("unexpected format: " + input_format) + try: + if input_format == FORMAT_AUTO: + if not is_supported_format(path): + raise UnsupportedFormatError() + + # don't analyze, so that we can add our Flirt function analyzer first. + vw = viv_utils.getWorkspace(str(path), analyze=False, should_save=False) + elif input_format in {FORMAT_PE, FORMAT_ELF}: + vw = viv_utils.getWorkspace(str(path), analyze=False, should_save=False) + elif input_format == FORMAT_SC32: + # these are not analyzed nor saved. + vw = viv_utils.getShellcodeWorkspaceFromFile(str(path), arch="i386", analyze=False) + elif input_format == FORMAT_SC64: + vw = viv_utils.getShellcodeWorkspaceFromFile(str(path), arch="amd64", analyze=False) + else: + raise ValueError("unexpected format: " + input_format) + except Exception as e: + # vivisect raises raw Exception instances, and we don't want + # to do a subclass check via isinstance. + if type(e) is Exception and "Couldn't convert rva" in e.args[0]: + raise CorruptFile(e.args[0]) from e viv_utils.flirt.register_flirt_signature_analyzers(vw, [str(s) for s in sigpaths]) diff --git a/capa/main.py b/capa/main.py index eb43769d..5e818330 100644 --- a/capa/main.py +++ b/capa/main.py @@ -753,6 +753,9 @@ def get_extractor_from_cli(args, input_format: str, backend: str) -> FeatureExtr except UnsupportedOSError as e: log_unsupported_os_error() raise ShouldExitError(E_INVALID_FILE_OS) from e + except capa.loader.CorruptFile as e: + logger.error("Input file '%s' is not a valid file: %s", args.input_file, str(e)) + raise ShouldExitError(E_CORRUPT_FILE) from e def main(argv: Optional[List[str]] = None):