From fa8b4a4203bb76f47e7bd54161cf11afc720dcda Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Wed, 11 Aug 2021 14:43:13 -0600 Subject: [PATCH] extractors: add common routine to extract OS from ELF --- capa/features/extractors/elf.py | 224 ++++++++++++++++++++++++++++++++ 1 file changed, 224 insertions(+) create mode 100644 capa/features/extractors/elf.py diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py new file mode 100644 index 00000000..45388003 --- /dev/null +++ b/capa/features/extractors/elf.py @@ -0,0 +1,224 @@ +import struct +import logging +from enum import Enum +from typing import BinaryIO + +logger = logging.getLogger(__name__) + + +def align(v, alignment): + remainder = v % alignment + if remainder == 0: + return v + else: + return v + remainder + + +class CorruptElfFile(ValueError): + pass + + +class OS(str, Enum): + HPUX = "HPUX" + NETBSD = "NETBSD" + LINUX = "LINUX" + HURD = "HURD" + _86OPEN = "86OPEN" + SOLARIS = "SOLARIS" + AIX = "AIX" + IRIX = "IRIX" + FREEBSD = "FREEBSD" + TRU64 = "TRU64" + MODESTO = "MODESTO" + OPENBSD = "OPENBSD" + OPENVMS = "OPENVMS" + NSK = "NSK" + AROS = "AROS" + FENIXOS = "FENIXOS" + CLOUD = "CLOUD" + SORTFIX = "SORTFIX" + ARM_AEABI = "ARM_AEABI" + SYLLABLE = "SYLLABLE" + NACL = "NACL" + + +def detect_elf_os(f: BinaryIO) -> str: + f.seek(0x0) + file_header = f.read(0x40) + + # we'll set this to the detected OS + # prefer the first heuristics, + # but rather than short circuiting, + # we'll still parse out the remainder, for debugging. + ret = None + + if not file_header.startswith(b"\x7fELF"): + raise CorruptElfFile("missing magic header") + + ei_class, ei_data = struct.unpack_from("BB", file_header, 4) + logger.debug("ei_class: 0x%02x ei_data: 0x%02x", ei_class, ei_data) + if ei_class == 1: + bitness = 32 + elif ei_class == 2: + bitness = 64 + else: + raise CorruptElfFile("invalid ei_class: 0x%02x" % ei_class) + + if ei_data == 1: + endian = "<" + elif ei_data == 2: + endian = ">" + else: + raise CorruptElfFile("not an ELF file: invalid ei_data: 0x%02x" % ei_data) + + if bitness == 32: + (e_phoff,) = struct.unpack_from(endian + "I", file_header, 0x1C) + e_phentsize, e_phnum = struct.unpack_from(endian + "HH", file_header, 0x2A) + elif bitness == 64: + (e_phoff,) = struct.unpack_from(endian + "Q", file_header, 0x20) + e_phentsize, e_phnum = struct.unpack_from(endian + "HH", file_header, 0x36) + else: + raise NotImplemented + + logger.debug("e_phoff: 0x%02x e_phentsize: 0x%02x e_phnum: %d", e_phoff, e_phentsize, e_phnum) + + (ei_osabi,) = struct.unpack_from(endian + "B", file_header, 7) + OSABI = { + # via pyelftools: https://github.com/eliben/pyelftools/blob/0664de05ed2db3d39041e2d51d19622a8ef4fb0f/elftools/elf/enums.py#L35-L58 + # 0: "SYSV", + 1: OS.HPUX, + 2: OS.NETBSD, + 3: OS.LINUX, + 4: OS.HURD, + 5: OS._86OPEN, + 6: OS.SOLARIS, + 7: OS.AIX, + 8: OS.IRIX, + 9: OS.FREEBSD, + 10: OS.TRU64, + 11: OS.MODESTO, + 12: OS.OPENBSD, + 13: OS.OPENVMS, + 14: OS.NSK, + 15: OS.AROS, + 16: OS.FENIXOS, + 17: OS.CLOUD, + # 53: "SORTFIX", + # 64: "ARM_AEABI", + # 97: "ARM", + # 255: "STANDALONE", + } + logger.debug("ei_osabi: 0x%02x (%s)", ei_osabi, OSABI.get(ei_osabi, "unknown")) + + if ei_osabi in OSABI and ei_osabi != 0x0: + # update only if not set + # so we can get the debugging output of subsequent strategies + ret = OSABI[ei_osabi] if not ret else ret + + f.seek(e_phoff) + program_header_size = e_phnum * e_phentsize + program_headers = f.read(program_header_size) + if len(program_headers) != program_header_size: + logger.warning("failed to read program headers") + e_phnum = 0 + + for i in range(e_phnum): + offset = i * e_phentsize + phent = program_headers[offset : offset + e_phentsize] + + PT_NOTE = 0x4 + + (p_type,) = struct.unpack_from(endian + "I", phent, 0x0) + logger.debug("p_type: 0x%04x", p_type) + if p_type != PT_NOTE: + continue + + if bitness == 32: + p_offset, _, _, p_filesz = struct.unpack_from(endian + "IIII", phent, 0x4) + elif bitness == 64: + p_offset, _, _, p_filesz = struct.unpack_from(endian + "QQQQ", phent, 0x8) + else: + raise NotImplemented + + logger.debug("p_offset: 0x%02x p_filesz: 0x%04x", p_offset, p_filesz) + + f.seek(p_offset) + note = f.read(p_filesz) + if len(note) != p_filesz: + logger.warning("failed to read note content") + continue + + namesz, descsz, type_ = struct.unpack_from(endian + "III", note, 0x0) + name_offset = 0xC + desc_offset = name_offset + align(namesz, 0x4) + + logger.debug("namesz: 0x%02x descsz: 0x%02x type: 0x%04x", namesz, descsz, type_) + + name = note[name_offset : name_offset + namesz].partition(b"\x00")[0].decode("ascii") + logger.debug("name: %s", name) + + if type_ != 1: + continue + + if name == "GNU": + if descsz < 16: + continue + + desc = note[desc_offset : desc_offset + descsz] + abi_tag, kmajor, kminor, kpatch = struct.unpack_from(endian + "IIII", desc, 0x0) + # via readelf: https://github.com/bminor/binutils-gdb/blob/c0e94211e1ac05049a4ce7c192c9d14d1764eb3e/binutils/readelf.c#L19635-L19658 + # and here: https://github.com/bminor/binutils-gdb/blob/34c54daa337da9fadf87d2706d6a590ae1f88f4d/include/elf/common.h#L933-L939 + GNU_ABI_TAG = { + 0: OS.LINUX, + 1: OS.HURD, + 2: OS.SOLARIS, + 3: OS.FREEBSD, + 4: OS.NETBSD, + 5: OS.SYLLABLE, + 6: OS.NACL, + } + logger.debug("GNU_ABI_TAG: 0x%02x", abi_tag) + + if abi_tag in GNU_ABI_TAG: + # update only if not set + # so we can get the debugging output of subsequent strategies + ret = GNU_ABI_TAG[abi_tag] if not ret else ret + logger.debug("abi tag: %s earliest compatible kernel: %d.%d.%d", ret, kmajor, kminor, kpatch) + elif name == "OpenBSD": + logger.debug("note owner: %s", "OPENBSD") + ret = OS.OPENBSD if not ret else ret + elif name == "NetBSD": + logger.debug("note owner: %s", "NETBSD") + ret = OS.NETBSD if not ret else ret + + for i in range(e_phnum): + offset = i * e_phentsize + phent = program_headers[offset : offset + e_phentsize] + + PT_INTERP = 0x3 + + (p_type,) = struct.unpack_from(endian + "I", phent, 0x0) + if p_type != PT_INTERP: + continue + + if bitness == 32: + p_offset, _, _, p_filesz = struct.unpack_from(endian + "IIII", phent, 0x4) + elif bitness == 64: + p_offset, _, _, p_filesz = struct.unpack_from(endian + "QQQQ", phent, 0x8) + else: + raise NotImplemented + + f.seek(p_offset) + interp = f.read(p_filesz) + if len(interp) != p_filesz: + logger.warning("failed to read interp content") + continue + + linker = interp.partition(b"\x00")[0].decode("ascii") + logger.debug("linker: %s", linker) + if "ld-linux" in linker: + # update only if not set + # so we can get the debugging output of subsequent strategies + ret = OS.LINUX if ret is None else ret + + return ret.value if ret is not None else "unknown" \ No newline at end of file