From ffe089d4448603d0ee67b6b9af5bf6fc82742e24 Mon Sep 17 00:00:00 2001 From: colton-gabertan Date: Fri, 19 May 2023 19:10:39 -0700 Subject: [PATCH] colton: GhidraFeatureExtractor constructor pulls OS & Arch --- capa/features/extractors/ghidra/extractor.py | 1 + capa/features/extractors/ghidra/global_.py | 30 +++++++++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/capa/features/extractors/ghidra/extractor.py b/capa/features/extractors/ghidra/extractor.py index 65a5f13f..2f5a593a 100644 --- a/capa/features/extractors/ghidra/extractor.py +++ b/capa/features/extractors/ghidra/extractor.py @@ -12,3 +12,4 @@ class GhidraFeatureExtractor(FeatureExtractor): super().__init__() self.global_features: List[Tuple[Feature, Address]] = [] self.global_features.extend(capa.features.extractors.ghidra.global_.extract_os()) + self.global_features.extend(capa.features.extractors.ghidra.global_.extract_arch()) diff --git a/capa/features/extractors/ghidra/global_.py b/capa/features/extractors/ghidra/global_.py index 645fd587..b0fad5cf 100644 --- a/capa/features/extractors/ghidra/global_.py +++ b/capa/features/extractors/ghidra/global_.py @@ -3,6 +3,10 @@ import contextlib from io import BytesIO from typing import Tuple, Iterator +# imports for clarity +# note: currentProgram is a static variable accessible in +# the specific ghidra runtime environment +import ghidra.program.database.mem import ghidra.program.flatapi as flatapi ghidraapi = flatapi.FlatProgramAPI(currentProgram) # Ghidrathon hacks :) @@ -20,12 +24,13 @@ def extract_os() -> Iterator[Tuple[Feature, Address]]: yield OS(OS_WINDOWS), NO_ADDRESS elif "ELF" in format_name: - program_memory = current_program.getMemory() + program_memory = current_program.getMemory() # ghidra.program.database.mem.MemoryMapDB fbytes_list = program_memory.getAllFileBytes() # java.util.List fbytes = fbytes_list[0] # ghidra.program.database.mem.FileBytes # Java likes to return signed ints, so we must convert them # back into unsigned bytes manually and write to BytesIO + # note: May be deprecated if Jep has implements better support for Java Lists pb_arr = b'' for i in range(fbytes.getSize()): pb_arr = pb_arr + (fbytes.getOriginalByte(i) & 0xff).to_bytes(1, 'little') @@ -51,3 +56,26 @@ def extract_os() -> Iterator[Tuple[Feature, Address]]: return +def extract_arch() -> Iterator[Tuple[Feature, Address]]: + current_program = ghidraapi.getCurrentProgram() + lang_id = current_program.getMetadata().get('Language ID') + + if 'x86' in lang_id and '64' in lang_id: + yield Arch(ARCH_AMD64), NO_ADDRESS + + elif 'x86' in lang_id and '32' in lang_id: + yield Arch(ARCH_I386), NO_ADDRESS + + elif 'x86' not in lang_id: + logger.debug("unsupported architecture: non-32-bit nor non-64-bit intel") + return + + else: + # we likely end up here: + # 1. handling a new architecture (e.g. aarch64) + # + # for (1), this logic will need to be updated as the format is implemented. + logger.debug("unsupported architecture: %s", lang_id) + return + +