From 38dc92d2fae1b7dc16b47f65be8ebe70f3bd4226 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Mon, 3 Nov 2025 12:24:04 +0000 Subject: [PATCH] bn: use FileOffsetAddress for embedded PE closes binary ninja: embedded pe: offsets are virtual addresses rather than file offsets Fixes #2748 --- capa/features/extractors/binja/file.py | 7 ++++-- capa/features/extractors/binja/helpers.py | 27 +++++++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/capa/features/extractors/binja/file.py b/capa/features/extractors/binja/file.py index 64d67cf6..27fee628 100644 --- a/capa/features/extractors/binja/file.py +++ b/capa/features/extractors/binja/file.py @@ -33,6 +33,7 @@ from capa.features.common import ( ) from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, AbsoluteVirtualAddress from capa.features.extractors.binja.helpers import read_c_string, unmangle_c_name +from capa.features.extractors.binja.helpers import va_to_file_offset def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[tuple[Feature, Address]]: @@ -46,7 +47,8 @@ def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[tuple[Feature buf = bv.read(seg.start, seg.length) for offset, _ in capa.features.extractors.helpers.carve_pe(buf, start): - yield Characteristic("embedded pe"), FileOffsetAddress(seg.start + offset) + file_off = va_to_file_offset(bv, seg.start + offset) + yield Characteristic("embedded pe"), FileOffsetAddress(file_off) def extract_file_embedded_pe(bv: BinaryView) -> Iterator[tuple[Feature, Address]]: @@ -122,7 +124,8 @@ def extract_file_section_names(bv: BinaryView) -> Iterator[tuple[Feature, Addres def extract_file_strings(bv: BinaryView) -> Iterator[tuple[Feature, Address]]: """extract ASCII and UTF-16 LE strings""" for s in bv.strings: - yield String(s.value), FileOffsetAddress(s.start) + file_off = va_to_file_offset(bv, s.start) + yield String(s.value), FileOffsetAddress(file_off) def extract_file_function_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]: diff --git a/capa/features/extractors/binja/helpers.py b/capa/features/extractors/binja/helpers.py index b473d76d..79e906a9 100644 --- a/capa/features/extractors/binja/helpers.py +++ b/capa/features/extractors/binja/helpers.py @@ -84,3 +84,30 @@ def get_llil_instr_at_addr(bv: BinaryView, addr: int) -> Optional[LowLevelILInst if arch.get_instruction_low_level_il(buffer, addr, llil) == 0: return None return llil[0] + + +def va_to_file_offset(bv: BinaryView, va: int) -> int: + """Map a BinaryView virtual address to a file offset using segment/section data offsets. + + Assumes a modern Binary Ninja API where Segment and Section objects expose + a `data_offset` attribute which is the file offset of the start of the + segment/section. The file offset is computed as: + + file_offset = segment.data_offset + (va - segment.start) + + If no containing segment/section is found, fall back to returning the + given virtual address as an integer. + """ + # prefer segments (they map ranges of the file view) + for seg in bv.segments: + if seg.start <= va < seg.start + seg.length: + return int(seg.data_offset + (va - seg.start)) + + # otherwise check sections + for _, sec in bv.sections.items(): + if sec.start <= va < sec.start + sec.length: + return int(sec.data_offset + (va - sec.start)) + + # If we cannot map the VA to a file offset via segments or sections, raise. + # This enforces strict mapping so callers must handle missing mappings explicitly. + raise RuntimeError(f"unable to map virtual address to file offset: 0x{va:x}")