diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index d51fc9bd..8a35d7a1 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -196,9 +196,9 @@ jobs: unzip .github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC.zip -d .github/ghidra/ - name: Install pyyaml run: sudo apt-get install -y libyaml-dev - - name: Install capa + - name: Install capa with Ghidra extra run: | - pip install -e .[dev] + pip install -e .[dev,ghidra] - name: Run tests env: GHIDRA_INSTALL_DIR: ${{ github.workspace }}/.github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC diff --git a/README.md b/README.md index 830b6739..ca21a0c3 100644 --- a/README.md +++ b/README.md @@ -290,17 +290,19 @@ It also uses your local changes to the .idb to extract better features, such as ![capa + IDA Pro integration](https://github.com/mandiant/capa/blob/master/doc/img/explorer_expanded.png) -# Ghidra integration +# Ghidra support ![capa + Ghidra integration](https://github.com/mandiant/capa/blob/master/doc/img/ghidra_backend_logo.png) -If you use Ghidra, then you can instruct capa to analyze your samples using Ghidra. capa creates a temporary Ghidra project and uses PyGhidra to import the sample, analyze it, and extract features. The temporary project is deleted after analysis. +capa supports using Ghidra (via [PyGhidra](https://github.com/NationalSecurityAgency/ghidra/tree/master/Ghidra/Features/PyGhidra)) as a feature extraction backend. This allows you to run capa against binaries using Ghidra's analysis engine. -## Prerequisites +See [here](https://github.com/mandiant/capa/blob/master/capa/ghidra/README.md) for more details. -- Ghidra >= 12.0 must be installed and available to PyGhidra +## prerequisites -## Usage +- [Ghidra](https://github.com/NationalSecurityAgency/ghidra) >= 12.0 must be installed and available via the `GHIDRA_INSTALL_DIR` environment variable. + +## usage ```bash $ capa -b ghidra /path/to/sample diff --git a/capa/features/extractors/ghidra/context.py b/capa/features/extractors/ghidra/context.py index a1b47ba0..e8f92792 100644 --- a/capa/features/extractors/ghidra/context.py +++ b/capa/features/extractors/ghidra/context.py @@ -16,6 +16,14 @@ from typing import Optional class GhidraContext: + """ + State holder for the Ghidra backend to avoid passing state to every function. + + PyGhidra uses a context manager to set up the Ghidra environment (program, transaction, etc.). + We store the relevant objects here to allow easy access throughout the extractor + without needing to pass them as arguments to every feature extraction method. + """ + def __init__(self, program, flat_api, monitor): self.program = program self.flat_api = flat_api diff --git a/capa/features/extractors/ghidra/function.py b/capa/features/extractors/ghidra/function.py index 160ca104..8fa8cc71 100644 --- a/capa/features/extractors/ghidra/function.py +++ b/capa/features/extractors/ghidra/function.py @@ -44,7 +44,7 @@ def extract_function_loop(fh: FunctionHandle): dests = block.getDestinations(capa.features.extractors.ghidra.helpers.get_monitor()) s_addrs = block.getStartAddresses() - while dests.hasNext(): # For loop throws Python TypeError + while dests.hasNext(): for addr in s_addrs: edges.append((addr.getOffset(), dests.next().getDestinationAddress().getOffset())) @@ -61,7 +61,6 @@ def extract_recursive_call(fh: FunctionHandle): def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: - """extract function features""" for function_handler in FUNCTION_HANDLERS: for feature, addr in function_handler(fh): yield feature, addr diff --git a/capa/ghidra/README.md b/capa/ghidra/README.md new file mode 100644 index 00000000..beb467d0 --- /dev/null +++ b/capa/ghidra/README.md @@ -0,0 +1,39 @@ +# Ghidra support + +capa supports using Ghidra (via [PyGhidra](https://github.com/NationalSecurityAgency/ghidra/tree/master/Ghidra/Features/PyGhidra)) as a feature extraction backend. This allows you to run capa against binaries using Ghidra's analysis engine. + +## prerequisites + +- [Ghidra](https://github.com/NationalSecurityAgency/ghidra) >= 12.0 must be installed and available via the `GHIDRA_INSTALL_DIR` environment variable. + +## installation + +### standalone binary (recommended) + +The standalone binary is the preferred way to run capa with the Ghidra backend. +Although the binary does not bundle the Java environment or Ghidra itself, it will dynamically load them at runtime. + +### python package + +To use the Ghidra backend, install `flare-capa` with the `ghidra` extra. This ensures PyGhidra and other necessary dependencies are installed. + +```bash +pip install "flare-capa[ghidra]" +``` + +## usage + +To use the Ghidra backend, specify it with the `-b` or `--backend` flag: + +```bash +capa -b ghidra /path/to/sample +``` + +capa will: +1. Initialize a headless Ghidra instance. +2. Create a temporary project. +3. Import and analyze the sample. +4. Extract features and match rules. +5. Clean up the temporary project. + +**Note:** The first time you run this, it may take a few moments to initialize the Ghidra environment. diff --git a/pyproject.toml b/pyproject.toml index aea40108..de3d9255 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,7 +79,6 @@ dependencies = [ "ruamel.yaml>=0.18", "pefile>=2023.2.7", "pyelftools>=0.31", - "pyghidra>=3.0.0", "pydantic>=2", "rich>=13", "humanize>=4", @@ -172,6 +171,9 @@ scripts = [ "sarif_om==1.0.4", "requests>=2.32.4", ] +ghidra = [ + "pyghidra>=3.0.0", +] [tool.deptry] extend_exclude = [