mirror of
https://github.com/mandiant/capa.git
synced 2025-12-19 02:32:30 -08:00
Compare commits
11 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
34d37c9129 | ||
|
|
92b6916030 | ||
|
|
14996956ea | ||
|
|
2ce7c6a388 | ||
|
|
5b48ae009a | ||
|
|
abdd18d897 | ||
|
|
9f94375391 | ||
|
|
8f9678af4f | ||
|
|
38dc92d2fa | ||
|
|
92e8e49532 | ||
|
|
6a727fa8c0 |
@@ -34,6 +34,7 @@
|
|||||||
### Bug Fixes
|
### Bug Fixes
|
||||||
|
|
||||||
- binja: fix a crash during feature extraction when the MLIL is unavailable @xusheng6 #2714
|
- binja: fix a crash during feature extraction when the MLIL is unavailable @xusheng6 #2714
|
||||||
|
- embedded pe: use FileOffset rather than AbsoluteVirtualAddress for IDA, Ghidra, and Binary Ninja @williballenthin #2745
|
||||||
|
|
||||||
### capa Explorer Web
|
### capa Explorer Web
|
||||||
|
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ from capa.features.common import (
|
|||||||
Characteristic,
|
Characteristic,
|
||||||
)
|
)
|
||||||
from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, AbsoluteVirtualAddress
|
from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, AbsoluteVirtualAddress
|
||||||
from capa.features.extractors.binja.helpers import read_c_string, unmangle_c_name
|
from capa.features.extractors.binja.helpers import read_c_string, unmangle_c_name, va_to_file_offset
|
||||||
|
|
||||||
|
|
||||||
def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[tuple[Feature, Address]]:
|
def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[tuple[Feature, Address]]:
|
||||||
@@ -46,7 +46,8 @@ def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[tuple[Feature
|
|||||||
buf = bv.read(seg.start, seg.length)
|
buf = bv.read(seg.start, seg.length)
|
||||||
|
|
||||||
for offset, _ in capa.features.extractors.helpers.carve_pe(buf, start):
|
for offset, _ in capa.features.extractors.helpers.carve_pe(buf, start):
|
||||||
yield Characteristic("embedded pe"), FileOffsetAddress(seg.start + offset)
|
file_off = va_to_file_offset(bv, seg.start + offset)
|
||||||
|
yield Characteristic("embedded pe"), FileOffsetAddress(file_off)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_embedded_pe(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_embedded_pe(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||||
@@ -122,7 +123,8 @@ def extract_file_section_names(bv: BinaryView) -> Iterator[tuple[Feature, Addres
|
|||||||
def extract_file_strings(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_strings(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract ASCII and UTF-16 LE strings"""
|
"""extract ASCII and UTF-16 LE strings"""
|
||||||
for s in bv.strings:
|
for s in bv.strings:
|
||||||
yield String(s.value), FileOffsetAddress(s.start)
|
file_off = va_to_file_offset(bv, s.start)
|
||||||
|
yield String(s.value), FileOffsetAddress(file_off)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_function_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_function_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||||
|
|||||||
@@ -84,3 +84,29 @@ def get_llil_instr_at_addr(bv: BinaryView, addr: int) -> Optional[LowLevelILInst
|
|||||||
if arch.get_instruction_low_level_il(buffer, addr, llil) == 0:
|
if arch.get_instruction_low_level_il(buffer, addr, llil) == 0:
|
||||||
return None
|
return None
|
||||||
return llil[0]
|
return llil[0]
|
||||||
|
|
||||||
|
|
||||||
|
def va_to_file_offset(bv: BinaryView, va: int) -> int:
|
||||||
|
"""Map a BinaryView virtual address to a file offset using segment/section data offsets.
|
||||||
|
|
||||||
|
Assumes a modern Binary Ninja API where Segment and Section objects expose
|
||||||
|
a `data_offset` attribute which is the file offset of the start of the
|
||||||
|
segment/section. The file offset is computed as:
|
||||||
|
|
||||||
|
file_offset = segment.data_offset + (va - segment.start)
|
||||||
|
|
||||||
|
If no containing segment/section is found, fall back to returning the
|
||||||
|
given virtual address as an integer.
|
||||||
|
"""
|
||||||
|
# prefer segments (they map ranges of the file view)
|
||||||
|
for seg in bv.segments:
|
||||||
|
if seg.start <= va < seg.start + seg.length:
|
||||||
|
return int(seg.data_offset + (va - seg.start))
|
||||||
|
|
||||||
|
# otherwise check sections
|
||||||
|
for _, sec in bv.sections.items():
|
||||||
|
if sec.start <= va < sec.start + sec.length:
|
||||||
|
return int(sec.data_offset + (va - sec.start))
|
||||||
|
|
||||||
|
# fallback
|
||||||
|
return int(va)
|
||||||
|
|||||||
@@ -85,10 +85,11 @@ def extract_file_embedded_pe() -> Iterator[tuple[Feature, Address]]:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
for off, _ in find_embedded_pe(capa.features.extractors.ghidra.helpers.get_block_bytes(block), mz_xor):
|
for off, _ in find_embedded_pe(capa.features.extractors.ghidra.helpers.get_block_bytes(block), mz_xor):
|
||||||
# add offset back to block start
|
# add offset back to block start (Address)
|
||||||
ea: int = block.getStart().add(off).getOffset()
|
addr = block.getStart().add(off)
|
||||||
|
off_file = capa.features.extractors.ghidra.helpers.addr_to_file_offset(addr)
|
||||||
|
|
||||||
yield Characteristic("embedded pe"), FileOffsetAddress(ea)
|
yield Characteristic("embedded pe"), FileOffsetAddress(int(off_file))
|
||||||
|
|
||||||
|
|
||||||
def extract_file_export_names() -> Iterator[tuple[Feature, Address]]:
|
def extract_file_export_names() -> Iterator[tuple[Feature, Address]]:
|
||||||
@@ -140,12 +141,14 @@ def extract_file_strings() -> Iterator[tuple[Feature, Address]]:
|
|||||||
p_bytes = capa.features.extractors.ghidra.helpers.get_block_bytes(block)
|
p_bytes = capa.features.extractors.ghidra.helpers.get_block_bytes(block)
|
||||||
|
|
||||||
for s in capa.features.extractors.strings.extract_ascii_strings(p_bytes):
|
for s in capa.features.extractors.strings.extract_ascii_strings(p_bytes):
|
||||||
offset = block.getStart().getOffset() + s.offset
|
addr = block.getStart().add(s.offset)
|
||||||
yield String(s.s), FileOffsetAddress(offset)
|
offset = capa.features.extractors.ghidra.helpers.addr_to_file_offset(addr)
|
||||||
|
yield String(s.s), FileOffsetAddress(int(offset))
|
||||||
|
|
||||||
for s in capa.features.extractors.strings.extract_unicode_strings(p_bytes):
|
for s in capa.features.extractors.strings.extract_unicode_strings(p_bytes):
|
||||||
offset = block.getStart().getOffset() + s.offset
|
addr = block.getStart().add(s.offset)
|
||||||
yield String(s.s), FileOffsetAddress(offset)
|
offset = capa.features.extractors.ghidra.helpers.addr_to_file_offset(addr)
|
||||||
|
yield String(s.s), FileOffsetAddress(int(offset))
|
||||||
|
|
||||||
|
|
||||||
def extract_file_function_names() -> Iterator[tuple[Feature, Address]]:
|
def extract_file_function_names() -> Iterator[tuple[Feature, Address]]:
|
||||||
|
|||||||
@@ -306,3 +306,31 @@ def find_data_references_from_insn(insn, max_depth: int = 10):
|
|||||||
break
|
break
|
||||||
|
|
||||||
yield to_addr
|
yield to_addr
|
||||||
|
|
||||||
|
|
||||||
|
def addr_to_file_offset(addr: ghidra.program.model.address.Address) -> int:
|
||||||
|
"""Map a Ghidra Address to a file offset using section information.
|
||||||
|
|
||||||
|
Assumes a modern Ghidra version where MemoryBlock provides
|
||||||
|
`getStartingOffset()` and `getStart()/getEnd()` are available.
|
||||||
|
|
||||||
|
Algorithm:
|
||||||
|
- iterate memory blocks, find the block containing `addr`
|
||||||
|
- compute section-relative offset = addr - block.start
|
||||||
|
- compute file offset = block.getStartingOffset() + section-relative offset
|
||||||
|
- if no block matches, fall back to subtracting program image base
|
||||||
|
"""
|
||||||
|
prog = currentProgram() # type: ignore[name-defined] # noqa: F821
|
||||||
|
aoff = addr.getOffset()
|
||||||
|
|
||||||
|
for block in prog.getMemory().getBlocks(): # type: ignore[name-defined] # noqa: F821
|
||||||
|
bstart = block.getStart().getOffset()
|
||||||
|
bend = block.getEnd().getOffset()
|
||||||
|
if bstart <= aoff <= bend:
|
||||||
|
sec_rel = aoff - bstart
|
||||||
|
file_base = block.getStartingOffset()
|
||||||
|
return int(file_base + sec_rel)
|
||||||
|
|
||||||
|
# if no block matched, fall back to image-base subtraction
|
||||||
|
base = prog.getImageBase().getOffset()
|
||||||
|
return int(aoff - base)
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ import idc
|
|||||||
import idaapi
|
import idaapi
|
||||||
import idautils
|
import idautils
|
||||||
import ida_entry
|
import ida_entry
|
||||||
|
import ida_loader
|
||||||
|
|
||||||
import capa.ida.helpers
|
import capa.ida.helpers
|
||||||
import capa.features.extractors.common
|
import capa.features.extractors.common
|
||||||
@@ -87,7 +88,8 @@ def extract_file_embedded_pe() -> Iterator[tuple[Feature, Address]]:
|
|||||||
"""
|
"""
|
||||||
for seg in capa.features.extractors.ida.helpers.get_segments(skip_header_segments=True):
|
for seg in capa.features.extractors.ida.helpers.get_segments(skip_header_segments=True):
|
||||||
for ea, _ in check_segment_for_pe(seg):
|
for ea, _ in check_segment_for_pe(seg):
|
||||||
yield Characteristic("embedded pe"), FileOffsetAddress(ea)
|
off = ida_loader.get_fileregion_offset(ea)
|
||||||
|
yield Characteristic("embedded pe"), FileOffsetAddress(off)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_export_names() -> Iterator[tuple[Feature, Address]]:
|
def extract_file_export_names() -> Iterator[tuple[Feature, Address]]:
|
||||||
@@ -161,10 +163,12 @@ def extract_file_strings() -> Iterator[tuple[Feature, Address]]:
|
|||||||
|
|
||||||
# differing to common string extractor factor in segment offset here
|
# differing to common string extractor factor in segment offset here
|
||||||
for s in capa.features.extractors.strings.extract_ascii_strings(seg_buff):
|
for s in capa.features.extractors.strings.extract_ascii_strings(seg_buff):
|
||||||
yield String(s.s), FileOffsetAddress(seg.start_ea + s.offset)
|
off = ida_loader.get_fileregion_offset(seg.start_ea + s.offset)
|
||||||
|
yield String(s.s), FileOffsetAddress(off)
|
||||||
|
|
||||||
for s in capa.features.extractors.strings.extract_unicode_strings(seg_buff):
|
for s in capa.features.extractors.strings.extract_unicode_strings(seg_buff):
|
||||||
yield String(s.s), FileOffsetAddress(seg.start_ea + s.offset)
|
off = ida_loader.get_fileregion_offset(seg.start_ea + s.offset)
|
||||||
|
yield String(s.s), FileOffsetAddress(off)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_function_names() -> Iterator[tuple[Feature, Address]]:
|
def extract_file_function_names() -> Iterator[tuple[Feature, Address]]:
|
||||||
|
|||||||
Reference in New Issue
Block a user