fix: ghidra and binja file strings yield FileOffsetAddress

Both extractors were yielding AbsoluteVirtualAddress for file-scope strings,
inconsistent with all other backends (common, pefile, elffile, viv, ida)
which yield FileOffsetAddress. Convert VAs to file offsets using each
backend's respective API.
This commit is contained in:
Willi Ballenthin
2026-05-07 12:34:22 +02:00
committed by Willi Ballenthin
parent 57e730fad2
commit 309231f261
2 changed files with 12 additions and 5 deletions
+4 -1
View File
@@ -123,7 +123,10 @@ def extract_file_section_names(bv: BinaryView) -> Iterator[tuple[Feature, Addres
def extract_file_strings(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
"""extract ASCII and UTF-16 LE strings"""
for s in bv.strings:
yield String(s.value), AbsoluteVirtualAddress(s.start)
for seg in bv.get_segments_at(s.start):
if s.start - seg.start < seg.data_length:
yield String(s.value), FileOffsetAddress(seg.data_offset + (s.start - seg.start))
break
def extract_file_function_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
+8 -4
View File
@@ -182,12 +182,16 @@ def extract_file_strings() -> Iterator[tuple[Feature, Address]]:
p_bytes = capa.features.extractors.ghidra.helpers.get_block_bytes(block)
for s in capa.features.extractors.strings.extract_ascii_strings(p_bytes):
offset = block.getStart().getOffset() + s.offset
yield String(s.s), AbsoluteVirtualAddress(offset)
ea_addr = block.getStart().add(s.offset)
f_offset = capa.features.extractors.ghidra.helpers.get_file_offset(ea_addr)
if f_offset != -1:
yield String(s.s), FileOffsetAddress(f_offset)
for s in capa.features.extractors.strings.extract_unicode_strings(p_bytes):
offset = block.getStart().getOffset() + s.offset
yield String(s.s), AbsoluteVirtualAddress(offset)
ea_addr = block.getStart().add(s.offset)
f_offset = capa.features.extractors.ghidra.helpers.get_file_offset(ea_addr)
if f_offset != -1:
yield String(s.s), FileOffsetAddress(f_offset)
def extract_file_function_names() -> Iterator[tuple[Feature, Address]]: