mirror of
https://github.com/mandiant/capa.git
synced 2026-06-12 11:01:31 -07:00
fix: use AbsoluteVirtualAddress for string addresses in Ghidra and IDA file extractors
block.getStart().getOffset() and seg.start_ea both return virtual addresses, not file offsets. Wrapping them in FileOffsetAddress was semantically wrong for PE/ELF binaries where VA != file offset. Switch to AbsoluteVirtualAddress to match what the value actually represents.
This commit is contained in:
committed by
Willi Ballenthin
parent
b348867e55
commit
52e8fdfc92
+2
-1
@@ -48,12 +48,13 @@
|
||||
- fix: remove unreachable backports.functools_lru_cache fallback and dead dependency @williballenthin
|
||||
- fix: Scopes.from_dict uses cls instead of self so subclasses return the correct type @williballenthin
|
||||
- fix: correct wrong dict key in VMRay _compute_monitor_threads assertion (used thread_id instead of process_id) @williballenthin
|
||||
- fix: replace assert with isinstance guard in get_callee for invalid MethodSpec tokens @williballenthin
|
||||
- fix: use AbsoluteVirtualAddress instead of FileOffsetAddress for string addresses in Ghidra and IDA file extractors @williballenthin (SURF-48)
|
||||
- fix: use dest.value.value and indirect_src.value.value for LLIL_CONST call destinations in binja insn.py @williballenthin (SURF-47)
|
||||
- fix: remove duplicate getPrevLocation call and dead loc variable in get_previous_instructions @williballenthin (SURF-46)
|
||||
- fix: unpack getByteDef offset and slice buffer so ENDBRANCH check applies to target address, not segment start @williballenthin (SURF-45)
|
||||
- fix: correct inverted loop structure in extract_function_loop so each block edge is recorded as (src, dest) @williballenthin (SURF-44)
|
||||
- fix: initialize addr to None in Ghidra import extractors to prevent UnboundLocalError when external functions have no data references @williballenthin (SURF-43)
|
||||
- fix: replace assert with isinstance guard in get_callee for invalid MethodSpec tokens @williballenthin
|
||||
- fix: assign ConfigDict to model_config in ConciseModel so extra="ignore" is actually applied @williballenthin (SURF-42)
|
||||
- fix: replace assert with isinstance guard in get_callee for invalid MethodSpec tokens @williballenthin (SURF-41)
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ from capa.features.common import (
|
||||
Feature,
|
||||
Characteristic,
|
||||
)
|
||||
from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, AbsoluteVirtualAddress
|
||||
from capa.features.address import NO_ADDRESS, Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.binja.helpers import read_c_string, unmangle_c_name
|
||||
|
||||
|
||||
@@ -46,7 +46,7 @@ def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[tuple[Feature
|
||||
buf = bv.read(seg.start, seg.length)
|
||||
|
||||
for offset, _ in capa.features.extractors.helpers.carve_pe(buf, start):
|
||||
yield Characteristic("embedded pe"), FileOffsetAddress(seg.start + offset)
|
||||
yield Characteristic("embedded pe"), AbsoluteVirtualAddress(seg.start + offset)
|
||||
|
||||
|
||||
def extract_file_embedded_pe(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||
@@ -122,7 +122,7 @@ def extract_file_section_names(bv: BinaryView) -> Iterator[tuple[Feature, Addres
|
||||
def extract_file_strings(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract ASCII and UTF-16 LE strings"""
|
||||
for s in bv.strings:
|
||||
yield String(s.value), FileOffsetAddress(s.start)
|
||||
yield String(s.value), AbsoluteVirtualAddress(s.start)
|
||||
|
||||
|
||||
def extract_file_function_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||
|
||||
@@ -85,14 +85,13 @@ def extract_file_embedded_pe() -> Iterator[tuple[Feature, Address]]:
|
||||
continue
|
||||
|
||||
for off, _ in find_embedded_pe(capa.features.extractors.ghidra.helpers.get_block_bytes(block), mz_xor):
|
||||
# add offset back to block start
|
||||
ea_addr = block.getStart().add(off)
|
||||
ea = ea_addr.getOffset()
|
||||
f_offset = capa.features.extractors.ghidra.helpers.get_file_offset(ea_addr)
|
||||
if f_offset != -1:
|
||||
ea = f_offset
|
||||
|
||||
yield Characteristic("embedded pe"), FileOffsetAddress(ea)
|
||||
yield Characteristic("embedded pe"), FileOffsetAddress(f_offset)
|
||||
else:
|
||||
yield Characteristic("embedded pe"), AbsoluteVirtualAddress(ea)
|
||||
|
||||
|
||||
def extract_file_export_names() -> Iterator[tuple[Feature, Address]]:
|
||||
@@ -187,11 +186,11 @@ def extract_file_strings() -> Iterator[tuple[Feature, Address]]:
|
||||
|
||||
for s in capa.features.extractors.strings.extract_ascii_strings(p_bytes):
|
||||
offset = block.getStart().getOffset() + s.offset
|
||||
yield String(s.s), FileOffsetAddress(offset)
|
||||
yield String(s.s), AbsoluteVirtualAddress(offset)
|
||||
|
||||
for s in capa.features.extractors.strings.extract_unicode_strings(p_bytes):
|
||||
offset = block.getStart().getOffset() + s.offset
|
||||
yield String(s.s), FileOffsetAddress(offset)
|
||||
yield String(s.s), AbsoluteVirtualAddress(offset)
|
||||
|
||||
|
||||
def extract_file_function_names() -> Iterator[tuple[Feature, Address]]:
|
||||
|
||||
@@ -28,7 +28,7 @@ import capa.features.extractors.strings
|
||||
import capa.features.extractors.ida.helpers
|
||||
from capa.features.file import Export, Import, Section, FunctionName
|
||||
from capa.features.common import FORMAT_PE, FORMAT_ELF, Format, String, Feature, Characteristic
|
||||
from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, AbsoluteVirtualAddress
|
||||
from capa.features.address import NO_ADDRESS, Address, AbsoluteVirtualAddress
|
||||
|
||||
MAX_OFFSET_PE_AFTER_MZ = 0x200
|
||||
|
||||
@@ -87,7 +87,7 @@ def extract_file_embedded_pe() -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
for seg in capa.features.extractors.ida.helpers.get_segments(skip_header_segments=True):
|
||||
for ea, _ in check_segment_for_pe(seg):
|
||||
yield Characteristic("embedded pe"), FileOffsetAddress(ea)
|
||||
yield Characteristic("embedded pe"), AbsoluteVirtualAddress(ea)
|
||||
|
||||
|
||||
def extract_file_export_names() -> Iterator[tuple[Feature, Address]]:
|
||||
@@ -161,10 +161,10 @@ def extract_file_strings() -> Iterator[tuple[Feature, Address]]:
|
||||
|
||||
# differing to common string extractor factor in segment offset here
|
||||
for s in capa.features.extractors.strings.extract_ascii_strings(seg_buff):
|
||||
yield String(s.s), FileOffsetAddress(seg.start_ea + s.offset)
|
||||
yield String(s.s), AbsoluteVirtualAddress(seg.start_ea + s.offset)
|
||||
|
||||
for s in capa.features.extractors.strings.extract_unicode_strings(seg_buff):
|
||||
yield String(s.s), FileOffsetAddress(seg.start_ea + s.offset)
|
||||
yield String(s.s), AbsoluteVirtualAddress(seg.start_ea + s.offset)
|
||||
|
||||
|
||||
def extract_file_function_names() -> Iterator[tuple[Feature, Address]]:
|
||||
|
||||
Reference in New Issue
Block a user