From dde76e301df8fe3a01b26504e890bf38f3c334dd Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Fri, 2 Jun 2023 12:15:05 +0100 Subject: [PATCH] add a method to construct SymTab objects from Elf objects --- capa/features/extractors/elf.py | 32 ++++++++++++---------------- capa/features/extractors/viv/insn.py | 14 +----------- 2 files changed, 15 insertions(+), 31 deletions(-) diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index a32b5761..7818f507 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -91,24 +91,6 @@ class Shdr: entsize: int buf: bytes - @classmethod - def from_viv(cls, section, buf: bytes): - """ - construct a Shdr object from vivisect's representation of - section headers (Elf.Elf32Section or Elf.Elf64Section) - """ - return cls( - section.sh_name, - section.sh_type, - section.sh_flags, - section.sh_addr, - section.sh_offset, - section.sh_size, - section.sh_link, - section.sh_entsize, - buf, - ) - class ELF: def __init__(self, f: BinaryIO): @@ -713,6 +695,20 @@ class SymTab: for symbol in self.symbols: yield symbol + @classmethod + def from_Elf(cls, ElfBinary) -> "SymTab": + endian = "<" if ElfBinary.getEndian() == 0 else ">" + bitness = ElfBinary.bits + + SHT_SYMTAB = 0x2 + for section in ElfBinary.sections: + if section.sh_info & SHT_SYMTAB: + strtab_section = ElfBinary.sections[section.vsGetField("sh_link")] + sh_symtab = Shdr.from_viv(section, ElfBinary.readAtOffset(section.sh_offset, section.sh_size)) + sh_strtab = Shdr.from_viv(strtab_section, ElfBinary.readAtOffset(strtab_section.sh_offset, strtab_section.sh_size)) + + return cls(endian, bitness, sh_symtab, sh_strtab) + def guess_os_from_osabi(elf: ELF) -> Optional[OS]: return elf.ei_osabi diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index cfb2fcc0..73bc4548 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -115,19 +115,7 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato if "symtab" not in fh.ctx["cache"]: # the symbol table gets stored as a function's attribute in order to avoid running # this code everytime the call is made, thus preventing the computational overhead. - elf = f.vw.parsedbin - endian = "<" if elf.getEndian() == 0 else ">" - bitness = elf.bits - - SHT_SYMTAB = 0x2 - for section in elf.sections: - if section.sh_info & SHT_SYMTAB: - strtab_section = elf.sections[section.vsGetField("sh_link")] - sh_symtab = Shdr.from_viv(section, elf.readAtOffset(section.sh_offset, section.sh_size)) - sh_strtab = Shdr.from_viv(strtab_section, elf.readAtOffset(strtab_section.sh_offset, strtab_section.sh_size)) - - symtab = SymTab(endian, bitness, sh_symtab, sh_strtab) - fh.ctx["cache"]["symtab"] = symtab + fh.ctx["cache"]["symtab"] = SymTab.from_Elf(f.vw.parsedbin) symtab = fh.ctx["cache"]["symtab"] for symbol in symtab.get_symbols():