diff --git a/capa/features/extractors/binja/file.py b/capa/features/extractors/binja/file.py index 69aafd23..3d794be8 100644 --- a/capa/features/extractors/binja/file.py +++ b/capa/features/extractors/binja/file.py @@ -123,7 +123,10 @@ def extract_file_section_names(bv: BinaryView) -> Iterator[tuple[Feature, Addres def extract_file_strings(bv: BinaryView) -> Iterator[tuple[Feature, Address]]: """extract ASCII and UTF-16 LE strings""" for s in bv.strings: - yield String(s.value), AbsoluteVirtualAddress(s.start) + for seg in bv.get_segments_at(s.start): + if s.start - seg.start < seg.data_length: + yield String(s.value), FileOffsetAddress(seg.data_offset + (s.start - seg.start)) + break def extract_file_function_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]: diff --git a/capa/features/extractors/ghidra/file.py b/capa/features/extractors/ghidra/file.py index a9d67b8e..5f87a663 100644 --- a/capa/features/extractors/ghidra/file.py +++ b/capa/features/extractors/ghidra/file.py @@ -182,12 +182,16 @@ def extract_file_strings() -> Iterator[tuple[Feature, Address]]: p_bytes = capa.features.extractors.ghidra.helpers.get_block_bytes(block) for s in capa.features.extractors.strings.extract_ascii_strings(p_bytes): - offset = block.getStart().getOffset() + s.offset - yield String(s.s), AbsoluteVirtualAddress(offset) + ea_addr = block.getStart().add(s.offset) + f_offset = capa.features.extractors.ghidra.helpers.get_file_offset(ea_addr) + if f_offset != -1: + yield String(s.s), FileOffsetAddress(f_offset) for s in capa.features.extractors.strings.extract_unicode_strings(p_bytes): - offset = block.getStart().getOffset() + s.offset - yield String(s.s), AbsoluteVirtualAddress(offset) + ea_addr = block.getStart().add(s.offset) + f_offset = capa.features.extractors.ghidra.helpers.get_file_offset(ea_addr) + if f_offset != -1: + yield String(s.s), FileOffsetAddress(f_offset) def extract_file_function_names() -> Iterator[tuple[Feature, Address]]: