From 4b99c506faf080ee9275ff161dbbfeff6df446c7 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 22 Apr 2026 19:25:44 +0300 Subject: [PATCH] fix: correct inverted loop structure in extract_function_loop The outer while loop over dests and inner for loop over s_addrs were swapped, causing s_addrs to be exhausted after the first iteration and dests.next() to be called multiple times per destination. Fix uses the block's first start address as a fixed source and iterates dests in the inner while loop, matching the IDA and Binja extractor pattern. --- CHANGELOG.md | 1 + capa/features/extractors/ghidra/function.py | 26 ++++++++++++++------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aff155b6..3ad5e4c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -49,6 +49,7 @@ - fix: Scopes.from_dict uses cls instead of self so subclasses return the correct type @williballenthin - fix: correct wrong dict key in VMRay _compute_monitor_threads assertion (used thread_id instead of process_id) @williballenthin - fix: replace assert with isinstance guard in get_callee for invalid MethodSpec tokens @williballenthin +- fix: correct inverted loop structure in extract_function_loop so each block edge is recorded as (src, dest) @williballenthin (SURF-44) - fix: initialize addr to None in Ghidra import extractors to prevent UnboundLocalError when external functions have no data references @williballenthin (SURF-43) - fix: assign ConfigDict to model_config in ConciseModel so extra="ignore" is actually applied @williballenthin (SURF-42) - fix: replace assert with isinstance guard in get_callee for invalid MethodSpec tokens @williballenthin (SURF-41) diff --git a/capa/features/extractors/ghidra/function.py b/capa/features/extractors/ghidra/function.py index a46d8b15..ccd19945 100644 --- a/capa/features/extractors/ghidra/function.py +++ b/capa/features/extractors/ghidra/function.py @@ -29,27 +29,32 @@ def extract_function_calls_to(fh: FunctionHandle): f: ghidra.program.database.function.FunctionDB = fh.inner for ref in f.getSymbol().getReferences(): if ref.getReferenceType().isCall(): - yield Characteristic("calls to"), AbsoluteVirtualAddress(ref.getFromAddress().getOffset()) + yield ( + Characteristic("calls to"), + AbsoluteVirtualAddress(ref.getFromAddress().getOffset()), + ) def extract_function_loop(fh: FunctionHandle): f: ghidra.program.database.function.FunctionDB = fh.inner edges = [] + monitor = capa.features.extractors.ghidra.helpers.get_monitor() for block in SimpleBlockIterator( BasicBlockModel(capa.features.extractors.ghidra.helpers.get_current_program()), f.getBody(), - capa.features.extractors.ghidra.helpers.get_monitor(), + monitor, ): - dests = block.getDestinations(capa.features.extractors.ghidra.helpers.get_monitor()) - s_addrs = block.getStartAddresses() - + src = block.getFirstStartAddress().getOffset() + dests = block.getDestinations(monitor) while dests.hasNext(): - for addr in s_addrs: - edges.append((addr.getOffset(), dests.next().getDestinationAddress().getOffset())) + edges.append((src, dests.next().getDestinationAddress().getOffset())) if loops.has_loop(edges): - yield Characteristic("loop"), AbsoluteVirtualAddress(f.getEntryPoint().getOffset()) + yield ( + Characteristic("loop"), + AbsoluteVirtualAddress(f.getEntryPoint().getOffset()), + ) def extract_recursive_call(fh: FunctionHandle): @@ -57,7 +62,10 @@ def extract_recursive_call(fh: FunctionHandle): for func in f.getCalledFunctions(capa.features.extractors.ghidra.helpers.get_monitor()): if func.getEntryPoint().getOffset() == f.getEntryPoint().getOffset(): - yield Characteristic("recursive call"), AbsoluteVirtualAddress(f.getEntryPoint().getOffset()) + yield ( + Characteristic("recursive call"), + AbsoluteVirtualAddress(f.getEntryPoint().getOffset()), + ) def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: