fix: correct inverted loop structure in extract_function_loop

The outer while loop over dests and inner for loop over s_addrs were
swapped, causing s_addrs to be exhausted after the first iteration and
dests.next() to be called multiple times per destination. Fix uses the
block's first start address as a fixed source and iterates dests in the
inner while loop, matching the IDA and Binja extractor pattern.
This commit is contained in:
Willi Ballenthin
2026-04-22 19:25:44 +03:00
committed by Willi Ballenthin
parent 885b2c88b8
commit 4b99c506fa
2 changed files with 18 additions and 9 deletions
+1
View File
@@ -49,6 +49,7 @@
- fix: Scopes.from_dict uses cls instead of self so subclasses return the correct type @williballenthin
- fix: correct wrong dict key in VMRay _compute_monitor_threads assertion (used thread_id instead of process_id) @williballenthin
- fix: replace assert with isinstance guard in get_callee for invalid MethodSpec tokens @williballenthin
- fix: correct inverted loop structure in extract_function_loop so each block edge is recorded as (src, dest) @williballenthin (SURF-44)
- fix: initialize addr to None in Ghidra import extractors to prevent UnboundLocalError when external functions have no data references @williballenthin (SURF-43)
- fix: assign ConfigDict to model_config in ConciseModel so extra="ignore" is actually applied @williballenthin (SURF-42)
- fix: replace assert with isinstance guard in get_callee for invalid MethodSpec tokens @williballenthin (SURF-41)
+17 -9
View File
@@ -29,27 +29,32 @@ def extract_function_calls_to(fh: FunctionHandle):
f: ghidra.program.database.function.FunctionDB = fh.inner
for ref in f.getSymbol().getReferences():
if ref.getReferenceType().isCall():
yield Characteristic("calls to"), AbsoluteVirtualAddress(ref.getFromAddress().getOffset())
yield (
Characteristic("calls to"),
AbsoluteVirtualAddress(ref.getFromAddress().getOffset()),
)
def extract_function_loop(fh: FunctionHandle):
f: ghidra.program.database.function.FunctionDB = fh.inner
edges = []
monitor = capa.features.extractors.ghidra.helpers.get_monitor()
for block in SimpleBlockIterator(
BasicBlockModel(capa.features.extractors.ghidra.helpers.get_current_program()),
f.getBody(),
capa.features.extractors.ghidra.helpers.get_monitor(),
monitor,
):
dests = block.getDestinations(capa.features.extractors.ghidra.helpers.get_monitor())
s_addrs = block.getStartAddresses()
src = block.getFirstStartAddress().getOffset()
dests = block.getDestinations(monitor)
while dests.hasNext():
for addr in s_addrs:
edges.append((addr.getOffset(), dests.next().getDestinationAddress().getOffset()))
edges.append((src, dests.next().getDestinationAddress().getOffset()))
if loops.has_loop(edges):
yield Characteristic("loop"), AbsoluteVirtualAddress(f.getEntryPoint().getOffset())
yield (
Characteristic("loop"),
AbsoluteVirtualAddress(f.getEntryPoint().getOffset()),
)
def extract_recursive_call(fh: FunctionHandle):
@@ -57,7 +62,10 @@ def extract_recursive_call(fh: FunctionHandle):
for func in f.getCalledFunctions(capa.features.extractors.ghidra.helpers.get_monitor()):
if func.getEntryPoint().getOffset() == f.getEntryPoint().getOffset():
yield Characteristic("recursive call"), AbsoluteVirtualAddress(f.getEntryPoint().getOffset())
yield (
Characteristic("recursive call"),
AbsoluteVirtualAddress(f.getEntryPoint().getOffset()),
)
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: