diff --git a/.github/mypy/mypy.ini b/.github/mypy/mypy.ini index e6e0e942..936b8e45 100644 --- a/.github/mypy/mypy.ini +++ b/.github/mypy/mypy.ini @@ -63,6 +63,9 @@ ignore_missing_imports = True [mypy-ida_loader.*] ignore_missing_imports = True +[mypy-ida_segment.*] +ignore_missing_imports = True + [mypy-PyQt5.*] ignore_missing_imports = True diff --git a/CHANGELOG.md b/CHANGELOG.md index ae1e0dc0..934636e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -57,6 +57,7 @@ - fix: display instruction items #1154 @mr-tz - fix: accept only plaintext pasted content #1194 @williballenthin - fix: UnboundLocalError #1217 @williballenthin +- extractor: add support for COFF files and extern functions #1223 @mike-hunhoff ### Development diff --git a/capa/features/extractors/ida/file.py b/capa/features/extractors/ida/file.py index eefef531..31c7fb09 100644 --- a/capa/features/extractors/ida/file.py +++ b/capa/features/extractors/ida/file.py @@ -115,6 +115,9 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]: for name in capa.features.extractors.helpers.generate_symbols(dll, symbol): yield Import(name), addr + for (ea, info) in capa.features.extractors.ida.helpers.get_file_externs().items(): + yield Import(info[1]), AbsoluteVirtualAddress(ea) + def extract_file_section_names() -> Iterator[Tuple[Feature, Address]]: """extract section names @@ -165,7 +168,7 @@ def extract_file_function_names() -> Iterator[Tuple[Feature, Address]]: def extract_file_format() -> Iterator[Tuple[Feature, Address]]: file_info = idaapi.get_inf_structure() - if file_info.filetype == idaapi.f_PE: + if file_info.filetype in (idaapi.f_PE, idaapi.f_COFF): yield Format(FORMAT_PE), NO_ADDRESS elif file_info.filetype == idaapi.f_ELF: yield Format(FORMAT_ELF), NO_ADDRESS @@ -173,7 +176,7 @@ def extract_file_format() -> Iterator[Tuple[Feature, Address]]: # no file type to return when processing a binary file, but we want to continue processing return else: - raise NotImplementedError("file format: %d" % file_info.filetype) + raise NotImplementedError("unexpected file format: %d" % file_info.filetype) def extract_features() -> Iterator[Tuple[Feature, Address]]: diff --git a/capa/features/extractors/ida/helpers.py b/capa/features/extractors/ida/helpers.py index 186723d2..a333f064 100644 --- a/capa/features/extractors/ida/helpers.py +++ b/capa/features/extractors/ida/helpers.py @@ -11,6 +11,7 @@ import idc import idaapi import idautils import ida_bytes +import ida_segment from capa.features.address import AbsoluteVirtualAddress from capa.features.extractors.base_extractor import FunctionHandle @@ -109,6 +110,19 @@ def get_file_imports() -> Dict[int, Tuple[str, str, int]]: return imports +def get_file_externs() -> Dict[int, Tuple[str, str, int]]: + externs = {} + + for seg in get_segments(skip_header_segments=True): + if not (seg.type == ida_segment.SEG_XTRN): + continue + + for ea in idautils.Functions(seg.start_ea, seg.end_ea): + externs[ea] = ("", idaapi.get_func_name(ea), -1) + + return externs + + def get_instructions_in_range(start: int, end: int) -> Iterator[idaapi.insn_t]: """yield instructions in range diff --git a/capa/features/extractors/ida/insn.py b/capa/features/extractors/ida/insn.py index 75ad987c..da9e1387 100644 --- a/capa/features/extractors/ida/insn.py +++ b/capa/features/extractors/ida/insn.py @@ -23,13 +23,19 @@ from capa.features.extractors.base_extractor import BBHandle, InsnHandle, Functi SECURITY_COOKIE_BYTES_DELTA = 0x40 -def get_imports(ctx: Dict[str, Any]) -> Dict[str, Any]: +def get_imports(ctx: Dict[str, Any]) -> Dict[int, Any]: if "imports_cache" not in ctx: ctx["imports_cache"] = capa.features.extractors.ida.helpers.get_file_imports() return ctx["imports_cache"] -def check_for_api_call(ctx: Dict[str, Any], insn: idaapi.insn_t) -> Iterator[str]: +def get_externs(ctx: Dict[str, Any]) -> Dict[int, Any]: + if "externs_cache" not in ctx: + ctx["externs_cache"] = capa.features.extractors.ida.helpers.get_file_externs() + return ctx["externs_cache"] + + +def check_for_api_call(insn: idaapi.insn_t, funcs: Dict[int, Any]) -> Iterator[Any]: """check instruction for API call""" info = () ref = insn.ea @@ -46,7 +52,7 @@ def check_for_api_call(ctx: Dict[str, Any], insn: idaapi.insn_t) -> Iterator[str except IndexError: break - info = get_imports(ctx).get(ref, ()) + info = funcs.get(ref, ()) if info: break @@ -55,7 +61,7 @@ def check_for_api_call(ctx: Dict[str, Any], insn: idaapi.insn_t) -> Iterator[str break if info: - yield "%s.%s" % (info[0], info[1]) + yield info def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: @@ -70,11 +76,17 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) if not insn.get_canon_mnem() in ("call", "jmp"): return - for api in check_for_api_call(fh.ctx, insn): - dll, _, symbol = api.rpartition(".") - for name in capa.features.extractors.helpers.generate_symbols(dll, symbol): + # check calls to imported functions + for api in check_for_api_call(insn, get_imports(fh.ctx)): + # tuple (, , ) + for name in capa.features.extractors.helpers.generate_symbols(api[0], api[1]): yield API(name), ih.address + # check calls to extern functions + for api in check_for_api_call(insn, get_externs(fh.ctx)): + # tuple (, , ) + yield API(api[1]), ih.address + # extract IDA/FLIRT recognized API functions targets = tuple(idautils.CodeRefsFrom(insn.ea, False)) if not targets: diff --git a/capa/ida/helpers.py b/capa/ida/helpers.py index eb3151d9..d1ef3093 100644 --- a/capa/ida/helpers.py +++ b/capa/ida/helpers.py @@ -27,6 +27,7 @@ SUPPORTED_FILE_TYPES = ( idaapi.f_PE, idaapi.f_ELF, idaapi.f_BIN, + idaapi.f_COFF, # idaapi.f_MACHO, )