bn: use FileOffsetAddress for embedded PE

closes binary ninja: embedded pe: offsets are virtual addresses rather than file offsets
Fixes #2748
This commit is contained in:
Willi Ballenthin
2025-11-03 12:24:04 +00:00
parent 92e8e49532
commit 38dc92d2fa
2 changed files with 32 additions and 2 deletions

View File

@@ -33,6 +33,7 @@ from capa.features.common import (
)
from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, AbsoluteVirtualAddress
from capa.features.extractors.binja.helpers import read_c_string, unmangle_c_name
from capa.features.extractors.binja.helpers import va_to_file_offset
def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[tuple[Feature, Address]]:
@@ -46,7 +47,8 @@ def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[tuple[Feature
buf = bv.read(seg.start, seg.length)
for offset, _ in capa.features.extractors.helpers.carve_pe(buf, start):
yield Characteristic("embedded pe"), FileOffsetAddress(seg.start + offset)
file_off = va_to_file_offset(bv, seg.start + offset)
yield Characteristic("embedded pe"), FileOffsetAddress(file_off)
def extract_file_embedded_pe(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
@@ -122,7 +124,8 @@ def extract_file_section_names(bv: BinaryView) -> Iterator[tuple[Feature, Addres
def extract_file_strings(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
"""extract ASCII and UTF-16 LE strings"""
for s in bv.strings:
yield String(s.value), FileOffsetAddress(s.start)
file_off = va_to_file_offset(bv, s.start)
yield String(s.value), FileOffsetAddress(file_off)
def extract_file_function_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:

View File

@@ -84,3 +84,30 @@ def get_llil_instr_at_addr(bv: BinaryView, addr: int) -> Optional[LowLevelILInst
if arch.get_instruction_low_level_il(buffer, addr, llil) == 0:
return None
return llil[0]
def va_to_file_offset(bv: BinaryView, va: int) -> int:
"""Map a BinaryView virtual address to a file offset using segment/section data offsets.
Assumes a modern Binary Ninja API where Segment and Section objects expose
a `data_offset` attribute which is the file offset of the start of the
segment/section. The file offset is computed as:
file_offset = segment.data_offset + (va - segment.start)
If no containing segment/section is found, fall back to returning the
given virtual address as an integer.
"""
# prefer segments (they map ranges of the file view)
for seg in bv.segments:
if seg.start <= va < seg.start + seg.length:
return int(seg.data_offset + (va - seg.start))
# otherwise check sections
for _, sec in bv.sections.items():
if sec.start <= va < sec.start + sec.length:
return int(sec.data_offset + (va - sec.start))
# If we cannot map the VA to a file offset via segments or sections, raise.
# This enforces strict mapping so callers must handle missing mappings explicitly.
raise RuntimeError(f"unable to map virtual address to file offset: 0x{va:x}")