diff --git a/capa/analysis/libraries.py b/capa/analysis/libraries.py index 51f6ba88..0f6b57f7 100644 --- a/capa/analysis/libraries.py +++ b/capa/analysis/libraries.py @@ -55,10 +55,15 @@ class Method(str, Enum): class FunctionClassification(BaseModel): va: int classification: Classification + # name per the disassembler/analysis tool + # may be combined with the recovered/suspected name TODO below + name: str # if is library, this must be provided method: Optional[Method] + # TODO if is library, recovered/suspected name? + # if is library, these can optionally be provided. library_name: Optional[str] = None library_version: Optional[str] = None @@ -137,6 +142,7 @@ def main(argv=None): function_classifications.append( FunctionClassification( va=flirt_match.va, + name=flirt_match.name, classification=Classification.LIBRARY, method=Method.FLIRT, # note: we cannot currently include which signature matched per function via the IDA API @@ -149,6 +155,7 @@ def main(argv=None): function_classifications.append( FunctionClassification( va=fva, + name=idaapi.get_func_name(fva), classification=Classification.THUNK, method=None, ) @@ -159,6 +166,7 @@ def main(argv=None): function_classifications.append( FunctionClassification( va=string_match.va, + name=idaapi.get_func_name(string_match.va), classification=Classification.LIBRARY, method=Method.STRINGS, library_name=string_match.metadata.library_name, @@ -166,21 +174,22 @@ def main(argv=None): ) ) - if args.json: - doc = FunctionIdResults(function_classifications=[]) - classifications_by_va = capa.analysis.strings.create_index(function_classifications, "va") - for va in idautils.Functions(): - if classifications := classifications_by_va.get(va): - doc.function_classifications.extend(classifications) - else: - doc.function_classifications.append( - FunctionClassification( - va=va, - classification=Classification.UNKNOWN, - method=None, - ) + doc = FunctionIdResults(function_classifications=[]) + classifications_by_va = capa.analysis.strings.create_index(function_classifications, "va") + for va in idautils.Functions(): + if classifications := classifications_by_va.get(va): + doc.function_classifications.extend(classifications) + else: + doc.function_classifications.append( + FunctionClassification( + va=va, + name=idaapi.get_func_name(va), + classification=Classification.UNKNOWN, + method=None, ) + ) + if args.json: print(doc.model_dump_json()) # noqa: T201 print found else: @@ -191,13 +200,13 @@ def main(argv=None): table.add_column("FNAME") table.add_column("EXTRA INFO") - classifications_by_va = capa.analysis.strings.create_index(function_classifications, "va") - for va in idautils.Functions(start=0, end=None): - name = idaapi.get_func_name(va) - if name.startswith("sub_"): + classifications_by_va = capa.analysis.strings.create_index(doc.function_classifications, "va", sorted_=True) + for va, classifications in classifications_by_va.items(): + name = ", ".join({c.name for c in classifications}) + if "sub_" in name: name = Text(name, style="grey37") - if classifications := classifications_by_va.get(va): + if classifications: classification = {c.classification for c in classifications} method = {c.method for c in classifications if c.method} extra = {f"{c.library_name}@{c.library_version}" for c in classifications if c.library_name} diff --git a/capa/analysis/strings/__init__.py b/capa/analysis/strings/__init__.py index 869c984f..0d00f928 100644 --- a/capa/analysis/strings/__init__.py +++ b/capa/analysis/strings/__init__.py @@ -218,8 +218,11 @@ class LibraryStringClassification: metadata: LibraryString -def create_index(s: list, k: str) -> Mapping[Any, list]: - """create an index of the elements in `s` using the key `k`""" +def create_index(s: list, k: str, sorted_: bool = False) -> Mapping[Any, list]: + """create an index of the elements in `s` using the key `k`, optionally sorted by `k`""" + if sorted_: + s = sorted(s, key=lambda x: getattr(x, k)) + s_by_k = collections.defaultdict(list) for v in s: p = getattr(v, k)