elf: detect linux via GCC .ident directives (#1928)

* elf: detect linux via GCC .ident directives

* changelog

* pep8
This commit is contained in:
Willi Ballenthin
2024-01-11 16:15:26 +01:00
committed by GitHub
parent efc26be196
commit 1dc72a3183
2 changed files with 72 additions and 4 deletions

View File

@@ -70,6 +70,7 @@
- binja: improve function call site detection @xusheng6
- binja: use `binaryninja.load` to open files @xusheng6
- binja: bump binja version to 3.5 #1789 @xusheng6
- elf: better detect ELF OS via GCC .ident directives #1928 @williballenthin
### capa explorer IDA Pro plugin

View File

@@ -108,6 +108,9 @@ class Shdr:
buf,
)
def get_name(self, elf: "ELF") -> str:
return elf.shstrtab.buf[self.name :].partition(b"\x00")[0].decode("ascii")
class ELF:
def __init__(self, f: BinaryIO):
@@ -120,6 +123,7 @@ class ELF:
self.e_phnum: int
self.e_shentsize: int
self.e_shnum: int
self.e_shstrndx: int
self.phbuf: bytes
self.shbuf: bytes
@@ -151,11 +155,15 @@ class ELF:
if self.bitness == 32:
e_phoff, e_shoff = struct.unpack_from(self.endian + "II", self.file_header, 0x1C)
self.e_phentsize, self.e_phnum = struct.unpack_from(self.endian + "HH", self.file_header, 0x2A)
self.e_shentsize, self.e_shnum = struct.unpack_from(self.endian + "HH", self.file_header, 0x2E)
self.e_shentsize, self.e_shnum, self.e_shstrndx = struct.unpack_from(
self.endian + "HHH", self.file_header, 0x2E
)
elif self.bitness == 64:
e_phoff, e_shoff = struct.unpack_from(self.endian + "QQ", self.file_header, 0x20)
self.e_phentsize, self.e_phnum = struct.unpack_from(self.endian + "HH", self.file_header, 0x36)
self.e_shentsize, self.e_shnum = struct.unpack_from(self.endian + "HH", self.file_header, 0x3A)
self.e_shentsize, self.e_shnum, self.e_shstrndx = struct.unpack_from(
self.endian + "HHH", self.file_header, 0x3A
)
else:
raise NotImplementedError()
@@ -365,6 +373,10 @@ class ELF:
except ValueError:
continue
@property
def shstrtab(self) -> Shdr:
return self.parse_section_header(self.e_shstrndx)
@property
def linker(self):
PT_INTERP = 0x3
@@ -816,6 +828,48 @@ def guess_os_from_sh_notes(elf: ELF) -> Optional[OS]:
return None
def guess_os_from_ident_directive(elf: ELF) -> Optional[OS]:
# GCC inserts the GNU version via an .ident directive
# that gets stored in a section named ".comment".
# look at the version and recognize common OSes.
#
# assume the GCC version matches the target OS version,
# which I guess could be wrong during cross-compilation?
# therefore, don't rely on this if possible.
#
# https://stackoverflow.com/q/6263425
# https://gcc.gnu.org/onlinedocs/cpp/Other-Directives.html
SHT_PROGBITS = 0x1
for shdr in elf.section_headers:
if shdr.type != SHT_PROGBITS:
continue
if shdr.get_name(elf) != ".comment":
continue
try:
comment = shdr.buf.decode("utf-8")
except ValueError:
continue
if "GCC:" not in comment:
continue
logger.debug(".ident: %s", comment)
# these values come from our testfiles, like:
# rg -a "GCC: " tests/data/
if "Debian" in comment:
return OS.LINUX
elif "Ubuntu" in comment:
return OS.LINUX
elif "Red Hat" in comment:
return OS.LINUX
return None
def guess_os_from_linker(elf: ELF) -> Optional[OS]:
# search for recognizable dynamic linkers (interpreters)
# for example, on linux, we see file paths like: /lib64/ld-linux-x86-64.so.2
@@ -851,8 +905,10 @@ def guess_os_from_abi_versions_needed(elf: ELF) -> Optional[OS]:
return OS.HURD
else:
# we don't have any good guesses based on versions needed
pass
# in practice, Hurd isn't a common/viable OS,
# so this is almost certain to be Linux,
# so lets just make that guess.
return OS.LINUX
return None
@@ -927,6 +983,13 @@ def detect_elf_os(f) -> str:
logger.warning("Error guessing OS from section header notes: %s", e)
sh_notes_guess = None
try:
ident_guess = guess_os_from_ident_directive(elf)
logger.debug("guess: .ident: %s", ident_guess)
except Exception as e:
logger.warning("Error guessing OS from .ident directive: %s", e)
ident_guess = None
try:
linker_guess = guess_os_from_linker(elf)
logger.debug("guess: linker: %s", linker_guess)
@@ -960,6 +1023,10 @@ def detect_elf_os(f) -> str:
if osabi_guess:
ret = osabi_guess
elif ident_guess:
# we don't trust this too much due to non-cross-compilation assumptions
ret = ident_guess
elif ph_notes_guess:
ret = ph_notes_guess