mirror of
https://github.com/mandiant/capa.git
synced 2026-02-04 11:07:53 -08:00
bn: use FileOffsetAddress for embedded PE
closes binary ninja: embedded pe: offsets are virtual addresses rather than file offsets Fixes #2748
This commit is contained in:
@@ -33,6 +33,7 @@ from capa.features.common import (
|
||||
)
|
||||
from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.binja.helpers import read_c_string, unmangle_c_name
|
||||
from capa.features.extractors.binja.helpers import va_to_file_offset
|
||||
|
||||
|
||||
def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[tuple[Feature, Address]]:
|
||||
@@ -46,7 +47,8 @@ def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[tuple[Feature
|
||||
buf = bv.read(seg.start, seg.length)
|
||||
|
||||
for offset, _ in capa.features.extractors.helpers.carve_pe(buf, start):
|
||||
yield Characteristic("embedded pe"), FileOffsetAddress(seg.start + offset)
|
||||
file_off = va_to_file_offset(bv, seg.start + offset)
|
||||
yield Characteristic("embedded pe"), FileOffsetAddress(file_off)
|
||||
|
||||
|
||||
def extract_file_embedded_pe(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||
@@ -122,7 +124,8 @@ def extract_file_section_names(bv: BinaryView) -> Iterator[tuple[Feature, Addres
|
||||
def extract_file_strings(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract ASCII and UTF-16 LE strings"""
|
||||
for s in bv.strings:
|
||||
yield String(s.value), FileOffsetAddress(s.start)
|
||||
file_off = va_to_file_offset(bv, s.start)
|
||||
yield String(s.value), FileOffsetAddress(file_off)
|
||||
|
||||
|
||||
def extract_file_function_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||
|
||||
@@ -84,3 +84,30 @@ def get_llil_instr_at_addr(bv: BinaryView, addr: int) -> Optional[LowLevelILInst
|
||||
if arch.get_instruction_low_level_il(buffer, addr, llil) == 0:
|
||||
return None
|
||||
return llil[0]
|
||||
|
||||
|
||||
def va_to_file_offset(bv: BinaryView, va: int) -> int:
|
||||
"""Map a BinaryView virtual address to a file offset using segment/section data offsets.
|
||||
|
||||
Assumes a modern Binary Ninja API where Segment and Section objects expose
|
||||
a `data_offset` attribute which is the file offset of the start of the
|
||||
segment/section. The file offset is computed as:
|
||||
|
||||
file_offset = segment.data_offset + (va - segment.start)
|
||||
|
||||
If no containing segment/section is found, fall back to returning the
|
||||
given virtual address as an integer.
|
||||
"""
|
||||
# prefer segments (they map ranges of the file view)
|
||||
for seg in bv.segments:
|
||||
if seg.start <= va < seg.start + seg.length:
|
||||
return int(seg.data_offset + (va - seg.start))
|
||||
|
||||
# otherwise check sections
|
||||
for _, sec in bv.sections.items():
|
||||
if sec.start <= va < sec.start + sec.length:
|
||||
return int(sec.data_offset + (va - sec.start))
|
||||
|
||||
# If we cannot map the VA to a file offset via segments or sections, raise.
|
||||
# This enforces strict mapping so callers must handle missing mappings explicitly.
|
||||
raise RuntimeError(f"unable to map virtual address to file offset: 0x{va:x}")
|
||||
|
||||
Reference in New Issue
Block a user