diff --git a/CHANGELOG.md b/CHANGELOG.md index 446287fa..7ca290a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,12 +48,13 @@ - fix: remove unreachable backports.functools_lru_cache fallback and dead dependency @williballenthin - fix: Scopes.from_dict uses cls instead of self so subclasses return the correct type @williballenthin - fix: correct wrong dict key in VMRay _compute_monitor_threads assertion (used thread_id instead of process_id) @williballenthin -- fix: replace assert with isinstance guard in get_callee for invalid MethodSpec tokens @williballenthin +- fix: use AbsoluteVirtualAddress instead of FileOffsetAddress for string addresses in Ghidra and IDA file extractors @williballenthin (SURF-48) - fix: use dest.value.value and indirect_src.value.value for LLIL_CONST call destinations in binja insn.py @williballenthin (SURF-47) - fix: remove duplicate getPrevLocation call and dead loc variable in get_previous_instructions @williballenthin (SURF-46) - fix: unpack getByteDef offset and slice buffer so ENDBRANCH check applies to target address, not segment start @williballenthin (SURF-45) - fix: correct inverted loop structure in extract_function_loop so each block edge is recorded as (src, dest) @williballenthin (SURF-44) - fix: initialize addr to None in Ghidra import extractors to prevent UnboundLocalError when external functions have no data references @williballenthin (SURF-43) +- fix: replace assert with isinstance guard in get_callee for invalid MethodSpec tokens @williballenthin - fix: assign ConfigDict to model_config in ConciseModel so extra="ignore" is actually applied @williballenthin (SURF-42) - fix: replace assert with isinstance guard in get_callee for invalid MethodSpec tokens @williballenthin (SURF-41) diff --git a/capa/features/extractors/binja/file.py b/capa/features/extractors/binja/file.py index 64d67cf6..181142d5 100644 --- a/capa/features/extractors/binja/file.py +++ b/capa/features/extractors/binja/file.py @@ -31,7 +31,7 @@ from capa.features.common import ( Feature, Characteristic, ) -from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, AbsoluteVirtualAddress +from capa.features.address import NO_ADDRESS, Address, AbsoluteVirtualAddress from capa.features.extractors.binja.helpers import read_c_string, unmangle_c_name @@ -46,7 +46,7 @@ def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[tuple[Feature buf = bv.read(seg.start, seg.length) for offset, _ in capa.features.extractors.helpers.carve_pe(buf, start): - yield Characteristic("embedded pe"), FileOffsetAddress(seg.start + offset) + yield Characteristic("embedded pe"), AbsoluteVirtualAddress(seg.start + offset) def extract_file_embedded_pe(bv: BinaryView) -> Iterator[tuple[Feature, Address]]: @@ -122,7 +122,7 @@ def extract_file_section_names(bv: BinaryView) -> Iterator[tuple[Feature, Addres def extract_file_strings(bv: BinaryView) -> Iterator[tuple[Feature, Address]]: """extract ASCII and UTF-16 LE strings""" for s in bv.strings: - yield String(s.value), FileOffsetAddress(s.start) + yield String(s.value), AbsoluteVirtualAddress(s.start) def extract_file_function_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]: diff --git a/capa/features/extractors/ghidra/file.py b/capa/features/extractors/ghidra/file.py index 0bc908d5..70b49e9a 100644 --- a/capa/features/extractors/ghidra/file.py +++ b/capa/features/extractors/ghidra/file.py @@ -85,14 +85,13 @@ def extract_file_embedded_pe() -> Iterator[tuple[Feature, Address]]: continue for off, _ in find_embedded_pe(capa.features.extractors.ghidra.helpers.get_block_bytes(block), mz_xor): - # add offset back to block start ea_addr = block.getStart().add(off) ea = ea_addr.getOffset() f_offset = capa.features.extractors.ghidra.helpers.get_file_offset(ea_addr) if f_offset != -1: - ea = f_offset - - yield Characteristic("embedded pe"), FileOffsetAddress(ea) + yield Characteristic("embedded pe"), FileOffsetAddress(f_offset) + else: + yield Characteristic("embedded pe"), AbsoluteVirtualAddress(ea) def extract_file_export_names() -> Iterator[tuple[Feature, Address]]: @@ -187,11 +186,11 @@ def extract_file_strings() -> Iterator[tuple[Feature, Address]]: for s in capa.features.extractors.strings.extract_ascii_strings(p_bytes): offset = block.getStart().getOffset() + s.offset - yield String(s.s), FileOffsetAddress(offset) + yield String(s.s), AbsoluteVirtualAddress(offset) for s in capa.features.extractors.strings.extract_unicode_strings(p_bytes): offset = block.getStart().getOffset() + s.offset - yield String(s.s), FileOffsetAddress(offset) + yield String(s.s), AbsoluteVirtualAddress(offset) def extract_file_function_names() -> Iterator[tuple[Feature, Address]]: diff --git a/capa/features/extractors/ida/file.py b/capa/features/extractors/ida/file.py index a47f1524..f3744ebe 100644 --- a/capa/features/extractors/ida/file.py +++ b/capa/features/extractors/ida/file.py @@ -28,7 +28,7 @@ import capa.features.extractors.strings import capa.features.extractors.ida.helpers from capa.features.file import Export, Import, Section, FunctionName from capa.features.common import FORMAT_PE, FORMAT_ELF, Format, String, Feature, Characteristic -from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, AbsoluteVirtualAddress +from capa.features.address import NO_ADDRESS, Address, AbsoluteVirtualAddress MAX_OFFSET_PE_AFTER_MZ = 0x200 @@ -87,7 +87,7 @@ def extract_file_embedded_pe() -> Iterator[tuple[Feature, Address]]: """ for seg in capa.features.extractors.ida.helpers.get_segments(skip_header_segments=True): for ea, _ in check_segment_for_pe(seg): - yield Characteristic("embedded pe"), FileOffsetAddress(ea) + yield Characteristic("embedded pe"), AbsoluteVirtualAddress(ea) def extract_file_export_names() -> Iterator[tuple[Feature, Address]]: @@ -161,10 +161,10 @@ def extract_file_strings() -> Iterator[tuple[Feature, Address]]: # differing to common string extractor factor in segment offset here for s in capa.features.extractors.strings.extract_ascii_strings(seg_buff): - yield String(s.s), FileOffsetAddress(seg.start_ea + s.offset) + yield String(s.s), AbsoluteVirtualAddress(seg.start_ea + s.offset) for s in capa.features.extractors.strings.extract_unicode_strings(seg_buff): - yield String(s.s), FileOffsetAddress(seg.start_ea + s.offset) + yield String(s.s), AbsoluteVirtualAddress(seg.start_ea + s.offset) def extract_file_function_names() -> Iterator[tuple[Feature, Address]]: