Fix Binary Ninja MLIL None handling in extract_stackstring

Co-authored-by: williballenthin <156560+williballenthin@users.noreply.github.com>
Initial plan
2026-03-20 00:38:53 -07:00 · 2025-09-03 09:44:38 +00:00 · 2025-09-03 09:38:07 +00:00
123 changed files with 2167 additions and 2542 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,27 +0,0 @@
-[tool.bumpversion]
-current_version = "9.3.1"
-
-[[tool.bumpversion.files]]
-filename = "capa/version.py"
-search = '__version__ = "{current_version}"'
-replace = '__version__ = "{new_version}"'
-
-[[tool.bumpversion.files]]
-filename = "capa/ida/plugin/ida-plugin.json"
-search = '"version": "{current_version}"'
-replace = '"version": "{new_version}"'
-
-[[tool.bumpversion.files]]
-filename = "capa/ida/plugin/ida-plugin.json"
-search = '"flare-capa=={current_version}"'
-replace = '"flare-capa=={new_version}"'
-
-[[tool.bumpversion.files]]
-filename = "CHANGELOG.md"
-search = "v{current_version}...master"
-replace = "v{current_version}...{new_version}"
-
-[[tool.bumpversion.files]]
-filename = "CHANGELOG.md"
-search = "master (unreleased)"
-replace = "v{new_version}"
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -4,13 +4,6 @@ updates:
    directory: "/"
    schedule:
      interval: "weekly"
-    groups:
-      vivisect:
-        patterns:
-          - "vivisect"
-          - "pyasn1"
-          - "pyasn1-modules"
-          - "msgpack"
    ignore:
      - dependency-name: "*"
        update-types: ["version-update:semver-patch"]
--- a/.github/flake8.ini
+++ b/.github/flake8.ini
@@ -33,6 +33,8 @@ per-file-ignores =
    scripts/*: T201
    # capa.exe is meant to print output
    capa/main.py: T201
+    # IDA tests emit results to output window so need to print
+    tests/test_ida_features.py: T201
    # utility used to find the Binary Ninja API via invoking python.exe
    capa/features/extractors/binja/find_binja_api.py: T201

--- a/.github/mypy/mypy.ini
+++ b/.github/mypy/mypy.ini
@@ -63,9 +63,6 @@ ignore_missing_imports = True
 [mypy-PyQt5.*]
 ignore_missing_imports = True

-[mypy-binaryninja]
-ignore_missing_imports = True
-
 [mypy-binaryninja.*]
 ignore_missing_imports = True

--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -20,5 +20,3 @@ closes #issue_number
 - [ ] No new tests needed
 <!-- Please help us keeping capa documentation up-to-date -->
 - [ ] No documentation update needed
-<!-- Please indicate if and how you have used AI to generate (parts of) your code submission. Include your prompt, model, tool, etc. -->
- [ ] This submission includes AI-generated code and I have provided details in the description.
--- a/.github/pyinstaller/pyinstaller.spec
+++ b/.github/pyinstaller/pyinstaller.spec
@@ -17,8 +17,6 @@ import sys

 import capa.rules.cache

-from PyInstaller.utils.hooks import collect_submodules
-
 from pathlib import Path

 # SPECPATH is a global variable which points to .spec file path
@@ -36,7 +34,6 @@ a = Analysis(
    ["../../capa/main.py"],
    pathex=["capa"],
    binaries=None,
-    hiddenimports=collect_submodules('rich'),
    datas=[
        # when invoking pyinstaller from the project root,
        # this gets invoked from the directory of the spec file,
@@ -77,7 +74,6 @@ a = Analysis(
        # only be installed locally.
        "binaryninja",
        "ida",
-        "ghidra",
        # remove once https://github.com/mandiant/capa/issues/2681 has
        # been addressed by PyInstaller
        "pkg_resources",
--- a/.github/workflows/black-format.yml
+++ b/.github/workflows/black-format.yml
@@ -1,62 +0,0 @@
-name: black auto-format
-
-on:
-  pull_request:
-    branches: [ master ]
-    paths-ignore:
-      - 'web/**'
-      - 'doc/**'
-      - '**.md'
-  workflow_dispatch: # allow manual trigger
-
-permissions:
-  contents: write
-
-jobs:
-  black-format:
-    # only run on dependabot PRs or manual trigger
-    if: github.actor == 'dependabot[bot]' || github.event_name == 'workflow_dispatch'
-    runs-on: ubuntu-22.04
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
-        with:
-          ref: ${{ github.head_ref }}
-          # need a token with write access to push the commit
-          token: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Set up Python 3.13
-        uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
-        with:
-          python-version: "3.13"
-
-      - name: Install dependencies
-        run: |
-          pip install -r requirements.txt
-          pip install -e .[dev,scripts]
-
-      - name: Run isort
-        run: pre-commit run isort --all-files
-
-      - name: Run black/continue
-        # black returns non-zero error code after formatting, which is what we expect
-        continue-on-error: true
-        run: pre-commit run black --all-files
-
-      - name: Check for changes
-        id: changes
-        run: |
-          if git diff --quiet; then
-            echo "has_changes=false" >> "$GITHUB_OUTPUT"
-          else
-            echo "has_changes=true" >> "$GITHUB_OUTPUT"
-          fi
-
-      - name: Commit and push formatting changes
-        if: steps.changes.outputs.has_changes == 'true'
-        run: |
-          git config user.name "${GITHUB_ACTOR}"
-          git config user.email "${GITHUB_ACTOR_ID}+${GITHUB_ACTOR}@users.noreply.github.com"
-          git add -A
-          git commit -m "style: auto-format with black and isort"
-          git push
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -28,11 +28,6 @@ jobs:
            artifact_name: capa
            asset_name: linux
            python_version: '3.10'
-            # for Ghidra
-            java-version: '21'
-            ghidra-version: '12.0'
-            public-version: 'PUBLIC_20251205'
-            ghidra-sha256: 'af43e8cfb2fa4490cf6020c3a2bde25c159d83f45236a0542688a024e8fc1941'
          - os: ubuntu-22.04-arm
            artifact_name: capa
            asset_name: linux-arm64
@@ -51,8 +46,8 @@ jobs:
          #  artifact_name: capa.exe
          #  asset_name: windows-arm64
          #  python_version: '3.12'
-          - os: macos-15-intel
-            # macos-15-intel is the lowest native intel build
+          - os: macos-13
+            # use older macOS for assumed better portability
            artifact_name: capa
            asset_name: macos
            python_version: '3.10'
@@ -111,24 +106,6 @@ jobs:
        run: |
          7z e "tests/data/dynamic/cape/v2.2/d46900384c78863420fb3e297d0a2f743cd2b6b3f7f82bf64059a168e07aceb7.json.gz"
          dist/capa -d "d46900384c78863420fb3e297d0a2f743cd2b6b3f7f82bf64059a168e07aceb7.json"
-      - name: Set up Java ${{ matrix.java-version }}
-        if: matrix.os == 'ubuntu-22.04' && matrix.python_version == '3.10'
-        uses: actions/setup-java@387ac29b308b003ca37ba93a6cab5eb57c8f5f93 # v4.0.0
-        with:
-          distribution: 'temurin'
-          java-version: ${{ matrix.java-version }}
-      - name: Install Ghidra ${{ matrix.ghidra-version }}
-        if: matrix.os == 'ubuntu-22.04' && matrix.python_version == '3.10'
-        run: |
-          mkdir ./.github/ghidra
-          wget "https://github.com/NationalSecurityAgency/ghidra/releases/download/Ghidra_${{ matrix.ghidra-version }}_build/ghidra_${{ matrix.ghidra-version }}_${{ matrix.public-version }}.zip" -O ./.github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC.zip
-          echo "${{ matrix.ghidra-sha256 }} ./.github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC.zip" | sha256sum -c -
-          unzip .github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC.zip -d .github/ghidra/
-      - name: Does it run (Ghidra)?
-        if: matrix.os == 'ubuntu-22.04' && matrix.python_version == '3.10'
-        env:
-          GHIDRA_INSTALL_DIR: ${{ github.workspace }}/.github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC
-        run: dist/capa -b ghidra -d "tests/data/Practical Malware Analysis Lab 01-01.dll_"
      - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1
        with:
          name: ${{ matrix.asset_name }}
@@ -167,7 +144,7 @@ jobs:
      - name: Set zip name
        run: echo "zip_name=capa-${GITHUB_REF#refs/tags/}-${{ matrix.asset_name }}.zip" >> $GITHUB_ENV
      - name: Zip ${{ matrix.artifact_name }} into ${{ env.zip_name }}
-        run: zip ${ZIP_NAME} ${{ matrix.artifact_name }}
+        run: zip ${{ env.zip_name }} ${{ matrix.artifact_name }}
      - name: Upload ${{ env.zip_name }} to GH Release
        uses: svenstaro/upload-release-action@2728235f7dc9ff598bd86ce3c274b74f802d2208 # v2
        with:
--- a/.github/workflows/pip-audit.yml
+++ b/.github/workflows/pip-audit.yml
@@ -14,8 +14,8 @@ jobs:

    steps:
      - name: Check out repository code
-        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+        uses: actions/checkout@v4

-      - uses: pypa/gh-action-pip-audit@1220774d901786e6f652ae159f7b6bc8fea6d266 # v1.1.0
+      - uses: pypa/gh-action-pip-audit@v1.0.8
        with:
          inputs: .
--- a/.github/workflows/tag.yml
+++ b/.github/workflows/tag.yml
@@ -21,10 +21,8 @@ jobs:
        # user information is needed to create annotated tags (with a message)
        git config user.email 'capa-dev@mandiant.com'
        git config user.name 'Capa Bot'
-        name=${GITHUB_EVENT_RELEASE_TAG_NAME}
+        name=${{ github.event.release.tag_name }}
        git tag $name -m "https://github.com/mandiant/capa/releases/$name"
-      env:
-        GITHUB_EVENT_RELEASE_TAG_NAME: ${{ github.event.release.tag_name }}
        # TODO update branch name-major=${name%%.*}
    - name: Push tag to capa-rules
      uses: ad-m/github-push-action@d91a481090679876dfc4178fef17f286781251df # v0.8.0
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -42,10 +42,10 @@ jobs:
    - name: Checkout capa
      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
    # use latest available python to take advantage of best performance
-    - name: Set up Python 3.13
+    - name: Set up Python 3.12
      uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
      with:
-        python-version: "3.13"
+        python-version: "3.12"
    - name: Install dependencies
      run: |
        pip install -r requirements.txt
@@ -70,10 +70,10 @@ jobs:
      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
      with:
        submodules: recursive
-    - name: Set up Python 3.13
+    - name: Set up Python 3.12
      uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
      with:
-        python-version: "3.13"
+        python-version: "3.12"
    - name: Install capa
      run: |
        pip install -r requirements.txt
@@ -88,11 +88,13 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2022, macos-15-intel, macos-14]
+        os: [ubuntu-22.04, windows-2022, macos-13]
        # across all operating systems
-        python-version: ["3.10", "3.13"]
+        python-version: ["3.10", "3.11"]
        include:
          # on Ubuntu run these as well
+          - os: ubuntu-22.04
+            python-version: "3.10"
          - os: ubuntu-22.04
            python-version: "3.11"
          - os: ubuntu-22.04
@@ -113,11 +115,6 @@ jobs:
      run: |
        pip install -r requirements.txt
        pip install -e .[dev,scripts]
-    - name: Cache vivisect workspaces
-      uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
-      with:
-        path: tests/data/**/*.viv
-        key: viv-${{ runner.os }}-${{ runner.arch }}-${{ matrix.python-version }}-${{ hashFiles('**/requirements.txt') }}
    - name: Run tests (fast)
      # this set of tests runs about 80% of the cases in 20% of the time,
      # and should catch most errors quickly.
@@ -134,7 +131,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        python-version: ["3.10", "3.13"]
+        python-version: ["3.10", "3.11"]
    steps:
    - name: Checkout capa with submodules
      # do only run if BN_SERIAL is available, have to do this in every step, see https://github.com/orgs/community/discussions/26726#discussioncomment-3253118
@@ -160,7 +157,7 @@ jobs:
      run: |
        mkdir ./.github/binja
        curl "https://raw.githubusercontent.com/Vector35/binaryninja-api/6812c97/scripts/download_headless.py" -o ./.github/binja/download_headless.py
-        python ./.github/binja/download_headless.py --serial ${BN_SERIAL} --output .github/binja/BinaryNinja-headless.zip
+        python ./.github/binja/download_headless.py --serial ${{ env.BN_SERIAL }} --output .github/binja/BinaryNinja-headless.zip
        unzip .github/binja/BinaryNinja-headless.zip -d .github/binja/
        python .github/binja/binaryninja/scripts/install_api.py --install-on-root --silent
    - name: Run tests
@@ -176,11 +173,11 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        python-version: ["3.10", "3.13"]
-        java-version: ["21"]
-        ghidra-version: ["12.0"]
-        public-version: ["PUBLIC_20251205"] # for ghidra releases
-        ghidra-sha256: ['af43e8cfb2fa4490cf6020c3a2bde25c159d83f45236a0542688a024e8fc1941']
+        python-version: ["3.10", "3.11"]
+        java-version: ["17"]
+        ghidra-version: ["11.0.1"]
+        public-version: ["PUBLIC_20240130"] # for ghidra releases
+        ghidrathon-version: ["4.0.0"] 
    steps:
    - name: Checkout capa with submodules
      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@@ -199,66 +196,26 @@ jobs:
      run: |
        mkdir ./.github/ghidra
        wget "https://github.com/NationalSecurityAgency/ghidra/releases/download/Ghidra_${{ matrix.ghidra-version }}_build/ghidra_${{ matrix.ghidra-version }}_${{ matrix.public-version }}.zip" -O ./.github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC.zip
-        echo "${{ matrix.ghidra-sha256 }} ./.github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC.zip" | sha256sum -c -
        unzip .github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC.zip -d .github/ghidra/
+    - name: Install Ghidrathon
+      run : |
+        mkdir ./.github/ghidrathon
+        wget "https://github.com/mandiant/Ghidrathon/releases/download/v${{ matrix.ghidrathon-version }}/Ghidrathon-v${{ matrix.ghidrathon-version}}.zip" -O ./.github/ghidrathon/ghidrathon-v${{ matrix.ghidrathon-version }}.zip
+        unzip .github/ghidrathon/ghidrathon-v${{ matrix.ghidrathon-version }}.zip -d .github/ghidrathon/
+        python -m pip install -r .github/ghidrathon/requirements.txt
+        python .github/ghidrathon/ghidrathon_configure.py $(pwd)/.github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC
+        unzip .github/ghidrathon/Ghidrathon-v${{ matrix.ghidrathon-version }}.zip -d .github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC/Ghidra/Extensions
    - name: Install pyyaml
      run: sudo apt-get install -y libyaml-dev
-    - name: Install capa with Ghidra extra
-      run: |
-        pip install -e .[dev,ghidra]
-    - name: Run tests
-      env:
-        GHIDRA_INSTALL_DIR: ${{ github.workspace }}/.github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC
-      run: pytest -v tests/test_ghidra_features.py
- 
-  idalib-tests:
-    name: IDA ${{ matrix.ida.version }} tests for ${{ matrix.python-version }}
-    runs-on: ubuntu-22.04
-    needs: [tests]
-    env:
-      IDA_LICENSE_ID: ${{ secrets.IDA_LICENSE_ID }}
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: ["3.10", "3.13"]
-        ida:
-          - version: 9.0
-            slug: "release/9.0/ida-essential/ida-essential_90_x64linux.run"
-          - version: 9.1
-            slug: "release/9.1/ida-essential/ida-essential_91_x64linux.run"
-          - version: 9.2
-            slug: "release/9.2/ida-essential/ida-essential_92_x64linux.run"
-    steps:
-    - name: Checkout capa with submodules
-      # do only run if IDA_LICENSE_ID is available, have to do this in every step, see https://github.com/orgs/community/discussions/26726#discussioncomment-3253118
-      if: ${{ env.IDA_LICENSE_ID != 0 }}
-      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
-      with:
-        submodules: recursive
-    - name: Set up Python ${{ matrix.python-version }}
-      if: ${{ env.IDA_LICENSE_ID != 0 }}
-      uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Setup uv
-      if: ${{ env.IDA_LICENSE_ID != 0 }}
-      uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # v7.2.0
-    - name: Install dependencies
-      if: ${{ env.IDA_LICENSE_ID != 0 }}
-      run: sudo apt-get install -y libyaml-dev
    - name: Install capa
-      if: ${{ env.IDA_LICENSE_ID != 0 }}
      run: |
        pip install -r requirements.txt
        pip install -e .[dev,scripts]
-        pip install idapro
-    - name: Install IDA ${{ matrix.ida.version }}
-      if: ${{ env.IDA_LICENSE_ID != 0 }}
-      run: |
-        uv run hcli --disable-updates ida install --download-id ${{ matrix.ida.slug }} --license-id ${{ secrets.IDA_LICENSE_ID }} --set-default --yes
-      env:
-        HCLI_API_KEY: ${{ secrets.HCLI_API_KEY }}
-        IDA_LICENSE_ID: ${{ secrets.IDA_LICENSE_ID }}
    - name: Run tests
-      if: ${{ env.IDA_LICENSE_ID != 0 }}
-      run: pytest -v tests/test_idalib_features.py  # explicitly refer to the idalib tests for performance. other tests run above.
+      run: | 
+        mkdir ./.github/ghidra/project
+        .github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC/support/analyzeHeadless .github/ghidra/project ghidra_test -Import ./tests/data/mimikatz.exe_ -ScriptPath ./tests/ -PostScript test_ghidra_features.py > ../output.log
+        cat ../output.log
+        exit_code=$(cat ../output.log | grep exit | awk '{print $NF}')
+        exit $exit_code
+ 
--- a/.github/workflows/web-release.yml
+++ b/.github/workflows/web-release.yml
@@ -18,18 +18,14 @@ jobs:
    - uses: actions/checkout@v4

    - name: Set release name
-      run: echo "RELEASE_NAME=capa-explorer-web-v${GITHUB_EVENT_INPUTS_VERSION}-${GITHUB_SHA::7}" >> $GITHUB_ENV
-      env:
-        GITHUB_EVENT_INPUTS_VERSION: ${{ github.event.inputs.version }}
+      run: echo "RELEASE_NAME=capa-explorer-web-v${{ github.event.inputs.version }}-${GITHUB_SHA::7}" >> $GITHUB_ENV

    - name: Check if release already exists
      run: |
-        if ls web/explorer/releases/capa-explorer-web-v${GITHUB_EVENT_INPUTS_VERSION}-* 1> /dev/null 2>&1; then
-          echo "::error:: A release with version ${GITHUB_EVENT_INPUTS_VERSION} already exists"
+        if ls web/explorer/releases/capa-explorer-web-v${{ github.event.inputs.version }}-* 1> /dev/null 2>&1; then
+          echo "::error:: A release with version ${{ github.event.inputs.version }} already exists"
          exit 1
        fi
-      env:
-        GITHUB_EVENT_INPUTS_VERSION: ${{ github.event.inputs.version }}

    - name: Set up Node.js
      uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4
@@ -47,24 +43,24 @@ jobs:
      working-directory: web/explorer

    - name: Compress bundle
-      run: zip -r ${RELEASE_NAME}.zip capa-explorer-web
+      run: zip -r ${{ env.RELEASE_NAME }}.zip capa-explorer-web
      working-directory: web/explorer

    - name: Create releases directory
      run: mkdir -vp web/explorer/releases

    - name: Move release to releases folder
-      run: mv web/explorer/${RELEASE_NAME}.zip web/explorer/releases
+      run: mv web/explorer/${{ env.RELEASE_NAME }}.zip web/explorer/releases

    - name: Compute release SHA256 hash
      run: |
-        echo "RELEASE_SHA256=$(sha256sum web/explorer/releases/${RELEASE_NAME}.zip | awk '{print $1}')" >> $GITHUB_ENV
+        echo "RELEASE_SHA256=$(sha256sum web/explorer/releases/${{ env.RELEASE_NAME }}.zip | awk '{print $1}')" >> $GITHUB_ENV

    - name: Update CHANGELOG.md
      run: |
-        echo "## ${RELEASE_NAME}" >> web/explorer/releases/CHANGELOG.md
+        echo "## ${{ env.RELEASE_NAME }}" >> web/explorer/releases/CHANGELOG.md
        echo "- Release Date: $(date -u '+%Y-%m-%d %H:%M:%S UTC')" >> web/explorer/releases/CHANGELOG.md
-        echo "- SHA256: ${RELEASE_SHA256}" >> web/explorer/releases/CHANGELOG.md
+        echo "- SHA256: ${{ env.RELEASE_SHA256 }}" >> web/explorer/releases/CHANGELOG.md
        echo "" >> web/explorer/releases/CHANGELOG.md
        cat web/explorer/releases/CHANGELOG.md

@@ -77,7 +73,7 @@ jobs:
      run: |
        git config --local user.email "capa-dev@mandiant.com"
        git config --local user.name "Capa Bot"
-        git add -f web/explorer/releases/${RELEASE_NAME}.zip web/explorer/releases/CHANGELOG.md
+        git add -f web/explorer/releases/${{ env.RELEASE_NAME }}.zip web/explorer/releases/CHANGELOG.md
        git add -u web/explorer/releases/

    - name: Create Pull Request
--- a/.gitignore
+++ b/.gitignore
@@ -122,7 +122,6 @@ scripts/perf/*.zip
 */.DS_Store
 Pipfile
 Pipfile.lock
-uv.lock
 /cache/
 .github/binja/binaryninja
 .github/binja/download_headless.py
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -136,8 +136,8 @@ repos:
        -   "tests/"
        -   "--ignore=tests/test_binja_features.py"
        -   "--ignore=tests/test_ghidra_features.py"
+        -   "--ignore=tests/test_ida_features.py"
        -   "--ignore=tests/test_viv_features.py"
-        -   "--ignore=tests/test_idalib_features.py"
        -   "--ignore=tests/test_main.py"
        -   "--ignore=tests/test_scripts.py"
        always_run: true
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,109 +3,11 @@
 ## master (unreleased)

 ### New Features
-
- ghidra: support PyGhidra @mike-hunhoff #2788
- vmray: extract number features from whitelisted void_ptr parameters (hKey, hKeyRoot) @adeboyedn #2835
-
-### Breaking Changes
-
-### New Rules (23)
-
- nursery/run-as-nodejs-native-module mehunhoff@google.com
- nursery/inject-shellcode-using-thread-pool-work-insertion-with-tp_io still@teamt5.org
- nursery/inject-shellcode-using-thread-pool-work-insertion-with-tp_timer still@teamt5.org
- nursery/inject-shellcode-using-thread-pool-work-insertion-with-tp_work still@teamt5.org
- data-manipulation/encryption/hc-256/encrypt-data-using-hc-256 wballenthin@hex-rays.com
- anti-analysis/anti-llm/terminate-anthropic-session-via-magic-strings wballenthin@hex-rays.com
- nursery/access-aws-credentials maximemorin@google.com
- nursery/access-cloudflare-credentials maximemorin@google.com
- nursery/access-docker-credentials maximemorin@google.com
- nursery/access-gcp-credentials maximemorin@google.com
- nursery/access-kubernetes-credentials maximemorin@google.com
- nursery/enumerate-aws-cloudformation maximemorin@google.com
- nursery/enumerate-aws-cloudtrail maximemorin@google.com
- nursery/enumerate-aws-direct-connect maximemorin@google.com
- nursery/enumerate-aws-ec2 maximemorin@google.com
- nursery/enumerate-aws-iam maximemorin@google.com
- nursery/enumerate-aws-s3 maximemorin@google.com
- nursery/enumerate-aws-support-cases maximemorin@google.com
- persistence/registry/persist-via-shellserviceobjectdelayload-registry-key xpzhxhm@gmail.com
- nursery/get-http-response-date @cosmoworker
- host-interaction/process/create/create-process-in-dotnet moritz.raabe@mandiant.com social.tarang@gmail.com
- nursery/read-file-in-dotnet moritz.raabe@mandiant.com anushka.virgaonkar@mandiant.com
- nursery/write-file-in-dotnet william.ballenthin@mandiant.com anushka.virgaonkar@mandiant.com
-
-
-### Bug Fixes
- main: suggest --os flag in unsupported OS error message to help users override ELF OS detection @devs6186 #2577
- render: escape sample-controlled strings before passing to Rich to prevent MarkupError @devs6186 #2699
- rules: handle empty or invalid YAML documents gracefully in `Rule.from_yaml` and `get_rules` @devs6186 #2900
- Fixed insecure deserialization vulnerability in YAML loading @0x1622 (#2770)
- loader: gracefully handle ELF files with unsupported architectures kamranulhaq2002@gmail.com #2800
- loader: handle SegmentationViolation for malformed ELF files @kami922 #2799
- lint: disable rule caching during linting @Maijin #2817
- vmray: skip processes with invalid PID or missing filename @EclipseAditya #2807
- features: fix Regex.get_value_str() returning escaped pattern instead of raw regex @EclipseAditya #1909
- render: use default styling for dynamic -vv API/call details so they are easier to see @devs6186 #1865
- loader: handle struct.error from dnfile and show clear CorruptFile message @devs6186 #2442
- address: fix TypeError when sorting locations containing mixed address types @devs6186 #2195
- loader: skip PE files with unrealistically large section virtual sizes to prevent resource exhaustion @devs6186 #1989
- engine/render: fix unbounded range sentinel precedence so `count(...): N or more` uses explicit `((1 << 64) - 1)` @blenbot #2936
-
-### capa Explorer Web
- webui: fix 404 for "View rule in capa-rules" by using encodeURIComponent for rule name in URL @devs6186 #2482
- webui: show error when JSON does not follow expected result document schema; suggest reanalyzing for VT URLs @devs6186 #2363
- webui: fix global search to match feature types (match, regex, api, …) @devs6186 #2349
-
-### capa Explorer IDA Pro plugin
-
-### Performance
-
- perf: eliminate O(n²) tuple growth and reduce per-match overhead @devs6186 #2890
-
-### Development
-
- doc: document that default output shows top-level matches only; -v/-vv show nested matches @devs6186 #1410
- doc: fix typo in usage.md, add documentation links to README @devs6186 #2274
- doc: add table comparing ways to consume capa output (CLI, IDA, Ghidra, dynamic sandbox, web) @devs6186 #2273
- binja: add mypy config for top-level binaryninja module to fix mypy issues @devs6186 #2399
- ci: deprecate macos-13 runner and use Python v3.13 for testing @mike-hunhoff #2777
- ci: pin pip-audit action SHAs and update to v1.1.0 @kami922 #1131
-
-### Raw diffs
- [capa v9.3.1...master](https://github.com/mandiant/capa/compare/v9.3.1...master)
- [capa-rules v9.3.1...master](https://github.com/mandiant/capa-rules/compare/v9.3.1...master)
-
-## v9.3.1
-
-This patch release fixes a missing import for the capa explorer plugin for IDA Pro.
-
-### Bug Fixes
-
- add missing ida-netnode dependency to project.toml @mike-hunhoff #2765
-
-### Development
-
- ci: bump binja min version @mike-hunhoff #2763
-
-### Raw diffs
- [capa v9.3.0...master](https://github.com/mandiant/capa/compare/v9.3.0...master)
- [capa-rules v9.3.0...master](https://github.com/mandiant/capa-rules/compare/v9.3.0...master)
-
-## v9.3.0
-
-capa v9.3.0 comes with over 20 new and/or impoved rules.
-For IDA users the capa explorer plugin is now available via the IDA Pro plugin repository and contains Qt compatibility layer for PyQt5 and PySide6 support.
-Additionally a Binary Ninja bug has been fixed. Released binaries now include ARM64 binaries (Linux and macOS).
-
-### New Features
-
 - ci: add support for arm64 binary releases
- tests: run tests against IDA via idalib @williballenthin #2742

 ### Breaking Changes

-### New Rules (24)
+### New Rules (14)

 - anti-analysis/anti-vm/vm-detection/detect-mouse-movement-via-activity-checks-on-windows tevajdr@gmail.com
 - nursery/create-executable-heap moritz.raabe@mandiant.com
@@ -120,37 +22,21 @@ Additionally a Binary Ninja bug has been fixed. Released binaries now include AR
 - linking/static/grpc/linked-against-grpc jakubjozwiak@google.com
 - linking/static/hp-socket/linked-against-hp-socket jakubjozwiak@google.com
 - load-code/execute-jscript-via-vsaengine-in-dotnet jakubjozwiak@google.com
- linking/static/funchook/linked-against-funchook jakubjozwiak@google.com
- linking/static/plthook/linked-against-plthook jakubjozwiak@google.com
- host-interaction/network/enumerate-tcp-connections-via-wmi-com-api jakubjozwiak@google.com
- host-interaction/network/routing-table/create-routing-table-entry jakubjozwiak@google.com
- host-interaction/network/routing-table/get-routing-table michael.hunhoff@mandiant.com
- host-interaction/file-system/use-io_uring-io-interface-on-linux jakubjozwiak@google.com
- collection/keylog/log-keystrokes-via-direct-input zeze-zeze
- nursery/compiled-from-fsharp mehunhoff@google.com
- nursery/decrypt-data-using-aes-via-dotnet mehunhoff@google.com
- nursery/get-dotnet-assembly-entry-point mehunhoff@google.com
+-

 ### Bug Fixes

- binja: fix a crash during feature extraction when the MLIL is unavailable @xusheng6 #2714 
-
 ### capa Explorer Web

 ### capa Explorer IDA Pro plugin

- add `ida-plugin.json` for inclusion in the IDA Pro plugin repository @williballenthin
- ida plugin: add Qt compatibility layer for PyQt5 and PySide6 support @williballenthin #2707
- delay import to not load Qt* when running under idalib @mr-tz #2752
-
 ### Development

 - ci: remove redundant "test_run" action from build workflow @mike-hunhoff #2692
- dev: add bumpmyversion to bump and sync versions across the project @mr-tz

 ### Raw diffs
- [capa v9.2.1...9.3.0](https://github.com/mandiant/capa/compare/v9.2.1...9.3.0)
- [capa-rules v9.2.1...9.3.0](https://github.com/mandiant/capa-rules/compare/v9.2.1...9.3.0)
+- [capa v9.2.1...master](https://github.com/mandiant/capa/compare/v9.2.1...master)
+- [capa-rules v9.2.1...master](https://github.com/mandiant/capa-rules/compare/v9.2.1...master)

 ## v9.2.1

--- a/README.md
+++ b/README.md
@@ -87,8 +87,6 @@ Download stable releases of the standalone capa binaries [here](https://github.c

 To use capa as a library or integrate with another tool, see [doc/installation.md](https://github.com/mandiant/capa/blob/master/doc/installation.md) for further setup instructions.

-**Documentation:** [Usage and tips](doc/usage.md) · [Installation](doc/installation.md) · [Limitations](doc/limitations.md) · [FAQ](doc/faq.md)
-
 # capa Explorer Web
 The [capa Explorer Web](https://mandiant.github.io/capa/explorer/) enables you to interactively explore capa results in your web browser. Besides the online version you can download a standalone HTML file for local offline usage.

@@ -293,17 +291,11 @@ It also uses your local changes to the .idb to extract better features, such as
 ![capa + IDA Pro integration](https://github.com/mandiant/capa/blob/master/doc/img/explorer_expanded.png)

 # Ghidra integration
-
-capa supports using Ghidra (via [PyGhidra](https://github.com/NationalSecurityAgency/ghidra/tree/master/Ghidra/Features/PyGhidra)) as a feature extraction backend. This allows you to run capa against binaries using Ghidra's analysis engine.
-
-You can run and view capa results in the Ghidra UI using [capa explorer for Ghidra](https://github.com/mandiant/capa/tree/master/capa/ghidra/plugin).
+If you use Ghidra, then you can use the [capa + Ghidra integration](/capa/ghidra/) to run capa's analysis directly on your Ghidra database and render the results in Ghidra's user interface.

 <img src="https://github.com/mandiant/capa/assets/66766340/eeae33f4-99d4-42dc-a5e8-4c1b8c661492" width=300>

-You can also run capa from the command line using the [Ghidra backend](https://github.com/mandiant/capa/tree/master/capa/ghidra).
-
 # blog posts
- [Riding Dragons: capa Harnesses Ghidra](https://www.mandiant.com/resources/blog/capa-harnesses-ghidra)
 - [Dynamic capa: Exploring Executable Run-Time Behavior with the CAPE Sandbox](https://www.mandiant.com/resources/blog/dynamic-capa-executable-behavior-cape-sandbox)
 - [capa v4: casting a wider .NET](https://www.mandiant.com/resources/blog/capa-v4-casting-wider-net) (.NET support)
 - [ELFant in the Room – capa v3](https://www.mandiant.com/resources/elfant-in-the-room-capa-v3) (ELF support)
@@ -323,6 +315,3 @@ You can also run capa from the command line using the [Ghidra backend](https://g

 ## capa testfiles
 The [capa-testfiles repository](https://github.com/mandiant/capa-testfiles) contains the data we use to test capa's code and rules
-
-## mailing list
-Subscribe to the FLARE mailing list for community announcements! Email "subscribe" to [flare-external@google.com](mailto:flare-external@google.com?subject=subscribe).
--- a/capa/capabilities/dynamic.py
+++ b/capa/capabilities/dynamic.py
@@ -277,9 +277,7 @@ def find_dynamic_capabilities(
    all_span_matches: MatchResults = collections.defaultdict(list)
    all_call_matches: MatchResults = collections.defaultdict(list)

-    # Accumulate into a list to avoid O(n²) tuple concatenation.
-    # Tuples are immutable, so `t += (x,)` copies the entire tuple each time.
-    process_feature_counts: list[rdoc.ProcessFeatureCount] = []
+    feature_counts = rdoc.DynamicFeatureCounts(file=0, processes=())

    assert isinstance(extractor, DynamicFeatureExtractor)
    processes: list[ProcessHandle] = list(extractor.get_processes())
@@ -291,10 +289,10 @@ def find_dynamic_capabilities(
        task = pbar.add_task("matching", total=n_processes, unit="processes")
        for p in processes:
            process_capabilities = find_process_capabilities(ruleset, extractor, p)
-            process_feature_counts.append(
+            feature_counts.processes += (
                rdoc.ProcessFeatureCount(
                    address=frz.Address.from_capa(p.address), count=process_capabilities.feature_count
-                )
+                ),
            )

            for rule_name, res in process_capabilities.process_matches.items():
@@ -319,11 +317,7 @@ def find_dynamic_capabilities(
        capa.engine.index_rule_matches(process_and_lower_features, rule, locations)

    all_file_capabilities = find_file_capabilities(ruleset, extractor, process_and_lower_features)
-
-    feature_counts = rdoc.DynamicFeatureCounts(
-        file=all_file_capabilities.feature_count,
-        processes=tuple(process_feature_counts),
-    )
+    feature_counts.file = all_file_capabilities.feature_count

    matches = dict(
        itertools.chain(
--- a/capa/capabilities/static.py
+++ b/capa/capabilities/static.py
@@ -156,11 +156,8 @@ def find_static_capabilities(
    all_bb_matches: MatchResults = collections.defaultdict(list)
    all_insn_matches: MatchResults = collections.defaultdict(list)

-    # Accumulate into lists to avoid O(n²) tuple concatenation.
-    # Tuples are immutable, so `t += (x,)` copies the entire tuple each time.
-    # For binaries with thousands of functions this becomes quadratic in memory work.
-    function_feature_counts: list[rdoc.FunctionFeatureCount] = []
-    library_functions_list: list[rdoc.LibraryFunction] = []
+    feature_counts = rdoc.StaticFeatureCounts(file=0, functions=())
+    library_functions: tuple[rdoc.LibraryFunction, ...] = ()

    assert isinstance(extractor, StaticFeatureExtractor)
    functions: list[FunctionHandle] = list(extractor.get_functions())
@@ -179,20 +176,20 @@ def find_static_capabilities(
            if extractor.is_library_function(f.address):
                function_name = extractor.get_function_name(f.address)
                logger.debug("skipping library function 0x%x (%s)", f.address, function_name)
-                library_functions_list.append(
-                    rdoc.LibraryFunction(address=frz.Address.from_capa(f.address), name=function_name)
+                library_functions += (
+                    rdoc.LibraryFunction(address=frz.Address.from_capa(f.address), name=function_name),
                )
-                n_libs = len(library_functions_list)
+                n_libs = len(library_functions)
                percentage = round(100 * (n_libs / n_funcs))
                pbar.update(task, postfix=f"skipped {n_libs} library functions, {percentage}%")
                pbar.advance(task)
                continue

            code_capabilities = find_code_capabilities(ruleset, extractor, f)
-            function_feature_counts.append(
+            feature_counts.functions += (
                rdoc.FunctionFeatureCount(
                    address=frz.Address.from_capa(f.address), count=code_capabilities.feature_count
-                )
+                ),
            )
            t1 = time.time()

@@ -233,11 +230,7 @@ def find_static_capabilities(
        capa.engine.index_rule_matches(function_and_lower_features, rule, locations)

    all_file_capabilities = find_file_capabilities(ruleset, extractor, function_and_lower_features)
-
-    feature_counts = rdoc.StaticFeatureCounts(
-        file=all_file_capabilities.feature_count,
-        functions=tuple(function_feature_counts),
-    )
+    feature_counts.file = all_file_capabilities.feature_count

    matches: MatchResults = dict(
        itertools.chain(
@@ -251,4 +244,4 @@ def find_static_capabilities(
        )
    )

-    return Capabilities(matches, feature_counts, tuple(library_functions_list))
+    return Capabilities(matches, feature_counts, library_functions)
--- a/capa/engine.py
+++ b/capa/engine.py
@@ -227,7 +227,7 @@ class Range(Statement):
        super().__init__(description=description)
        self.child = child
        self.min = min if min is not None else 0
-        self.max = max if max is not None else ((1 << 64) - 1)
+        self.max = max if max is not None else (1 << 64 - 1)

    def evaluate(self, features: FeatureSet, short_circuit=True):
        capa.perf.counters["evaluate.feature"] += 1
@@ -240,7 +240,7 @@ class Range(Statement):
        return Result(self.min <= count <= self.max, self, [], locations=features.get(self.child))

    def __str__(self):
-        if self.max == ((1 << 64) - 1):
+        if self.max == (1 << 64 - 1):
            return f"range({str(self.child)}, min={self.min}, max=infinity)"
        else:
            return f"range({str(self.child)}, min={self.min}, max={self.max})"
--- a/capa/features/address.py
+++ b/capa/features/address.py
@@ -189,11 +189,6 @@ class _NoAddress(Address):
    def __lt__(self, other):
        return False

-    def __gt__(self, other):
-        # Mixed-type comparison: (real_address < NO_ADDRESS) invokes this so sort works.
-        # NoAddress sorts last.
-        return other is not self
-
    def __hash__(self):
        return hash(0)

--- a/capa/features/common.py
+++ b/capa/features/common.py
@@ -369,12 +369,6 @@ class Regex(String):
        else:
            return Result(False, _MatchedRegex(self, {}), [])

-    def get_value_str(self) -> str:
-        # return the raw regex pattern, not the escaped version from String.get_value_str().
-        # see #1909.
-        assert isinstance(self.value, str)
-        return self.value
-
    def __str__(self):
        assert isinstance(self.value, str)
        return f"regex(string =~ {self.value})"
--- a/capa/features/extractors/binexport2/init.py
+++ b/capa/features/extractors/binexport2/init.py
@@ -20,7 +20,6 @@ Proto files generated via protobuf v24.4:
 from BinExport2 at 6916731d5f6693c4a4f0a052501fd3bd92cfd08b
 https://github.com/google/binexport/blob/6916731/binexport2.proto
 """
-
 import io
 import hashlib
 import logging
--- a/capa/features/extractors/binexport2/arch/arm/insn.py
+++ b/capa/features/extractors/binexport2/arch/arm/insn.py
@@ -84,14 +84,16 @@ def extract_insn_number_features(
                yield OperandOffset(i, value), ih.address


-OFFSET_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
+OFFSET_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
+    """
    ldr|ldrb|ldrh|ldrsb|ldrsh|ldrex|ldrd|str|strb|strh|strex|strd reg, [reg(not-stack),  #int]                                 ; capture #int
    ldr|ldrb|ldrh|ldrsb|ldrsh|ldrex|ldrd|str|strb|strh|strex|strd reg, [reg(not-stack),  #int]!                                ; capture #int
    ldr|ldrb|ldrh|ldrsb|ldrsh|ldrex|ldrd|str|strb|strh|strex|strd reg, [reg(not-stack)],        #int                           ; capture #int
    ldp|ldpd|stp|stpd                                             reg, reg,                     [reg(not-stack), #int]         ; capture #int
    ldp|ldpd|stp|stpd                                             reg, reg,                     [reg(not-stack), #int]!        ; capture #int
    ldp|ldpd|stp|stpd                                             reg, reg,                     [reg(not-stack)],       #int   ; capture #int
-    """)
+    """
+)


 def extract_insn_offset_features(
@@ -115,10 +117,12 @@ def extract_insn_offset_features(
        yield OperandOffset(match.operand_index, value), ih.address


-NZXOR_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
+NZXOR_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
+    """
    eor reg, reg, reg
    eor reg, reg, #int
-    """)
+    """
+)


 def extract_insn_nzxor_characteristic_features(
@@ -140,9 +144,11 @@ def extract_insn_nzxor_characteristic_features(
        yield Characteristic("nzxor"), ih.address


-INDIRECT_CALL_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
+INDIRECT_CALL_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
+    """
    blx|bx|blr reg
-    """)
+    """
+)


 def extract_function_indirect_call_characteristic_features(
--- a/capa/features/extractors/binexport2/arch/intel/insn.py
+++ b/capa/features/extractors/binexport2/arch/intel/insn.py
@@ -34,14 +34,17 @@ from capa.features.extractors.binexport2.arch.intel.helpers import SECURITY_COOK
 logger = logging.getLogger(__name__)


-IGNORE_NUMBER_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
+IGNORE_NUMBER_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
+    """
    ret  #int
    retn #int
    add  reg(stack), #int
    sub  reg(stack), #int
-    """)
+    """
+)

-NUMBER_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
+NUMBER_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
+    """
    push #int0            ; capture #int0

    # its a little tedious to enumerate all the address forms
@@ -61,7 +64,8 @@ NUMBER_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
    # imagine reg is zero'd out, then this is like `mov reg, #int`
    # which is not uncommon.
    lea reg, [reg + #int]  ; capture #int
-    """)
+    """
+)


 def extract_insn_number_features(
@@ -96,7 +100,8 @@ def extract_insn_number_features(
            yield OperandOffset(match.operand_index, value), ih.address


-OFFSET_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
+OFFSET_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
+    """
    mov|movzx|movsb|cmp [reg            +  reg * #int + #int0], #int  ; capture #int0
    mov|movzx|movsb|cmp [reg            * #int + #int0],        #int  ; capture #int0
    mov|movzx|movsb|cmp [reg            +  reg + #int0],        #int  ; capture #int0
@@ -109,15 +114,18 @@ OFFSET_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
    mov|movzx|movsb|cmp|lea reg, [reg            * #int + #int0]         ; capture #int0
    mov|movzx|movsb|cmp|lea reg, [reg            +  reg + #int0]         ; capture #int0
    mov|movzx|movsb|cmp|lea reg, [reg(not-stack) + #int0]                ; capture #int0
-    """)
+    """
+)

 # these are patterns that access offset 0 from some pointer
 # (pointer is not the stack pointer).
-OFFSET_ZERO_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
+OFFSET_ZERO_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
+    """
    mov|movzx|movsb [reg(not-stack)], reg
    mov|movzx|movsb [reg(not-stack)], #int
    lea             reg,              [reg(not-stack)]
-    """)
+    """
+)


 def extract_insn_offset_features(
@@ -181,10 +189,12 @@ def is_security_cookie(
    return False


-NZXOR_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
+NZXOR_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
+    """
    xor|xorpd|xorps|pxor reg, reg
    xor|xorpd|xorps|pxor reg, #int
-    """)
+    """
+)


 def extract_insn_nzxor_characteristic_features(
@@ -218,7 +228,8 @@ def extract_insn_nzxor_characteristic_features(
    yield Characteristic("nzxor"), ih.address


-INDIRECT_CALL_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
+INDIRECT_CALL_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
+    """
    call|jmp reg0
    call|jmp [reg + reg * #int + #int]
    call|jmp [reg + reg * #int]
@@ -226,7 +237,8 @@ INDIRECT_CALL_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
    call|jmp [reg + reg + #int]
    call|jmp [reg + #int]
    call|jmp [reg]
-    """)
+    """
+)


 def extract_function_indirect_call_characteristic_features(
--- a/capa/features/extractors/binja/function.py
+++ b/capa/features/extractors/binja/function.py
@@ -19,6 +19,7 @@ from binaryninja import (
    Function,
    BinaryView,
    SymbolType,
+    ILException,
    RegisterValueType,
    VariableSourceType,
    LowLevelILOperation,
@@ -191,7 +192,11 @@ def extract_stackstring(fh: FunctionHandle):
    if bv is None:
        return

-    mlil = func.mlil
+    try:
+        mlil = func.mlil
+    except ILException:
+        return
+
    if mlil is None:
        return

--- a/capa/features/extractors/cape/extractor.py
+++ b/capa/features/extractors/cape/extractor.py
@@ -35,7 +35,7 @@ from capa.features.extractors.base_extractor import (

 logger = logging.getLogger(__name__)

-TESTED_VERSIONS = {"2.2-CAPE", "2.4-CAPE", "2.5-CAPE"}
+TESTED_VERSIONS = {"2.2-CAPE", "2.4-CAPE"}


 class CapeExtractor(DynamicFeatureExtractor):
--- a/capa/features/extractors/dnfile/extractor.py
+++ b/capa/features/extractors/dnfile/extractor.py
@@ -27,12 +27,7 @@ import capa.features.extractors.dnfile.file
 import capa.features.extractors.dnfile.insn
 import capa.features.extractors.dnfile.function
 from capa.features.common import Feature
-from capa.features.address import (
-    NO_ADDRESS,
-    Address,
-    DNTokenAddress,
-    DNTokenOffsetAddress,
-)
+from capa.features.address import NO_ADDRESS, Address, DNTokenAddress, DNTokenOffsetAddress
 from capa.features.extractors.dnfile.types import DnType, DnUnmanagedMethod
 from capa.features.extractors.base_extractor import (
    BBHandle,
@@ -44,7 +39,6 @@ from capa.features.extractors.base_extractor import (
 from capa.features.extractors.dnfile.helpers import (
    get_dotnet_types,
    get_dotnet_fields,
-    load_dotnet_image,
    get_dotnet_managed_imports,
    get_dotnet_managed_methods,
    get_dotnet_unmanaged_imports,
@@ -89,7 +83,7 @@ class DnFileFeatureExtractorCache:

 class DnfileFeatureExtractor(StaticFeatureExtractor):
    def __init__(self, path: Path):
-        self.pe = load_dotnet_image(path)
+        self.pe: dnfile.dnPE = dnfile.dnPE(str(path))
        super().__init__(hashes=SampleHashes.from_bytes(path.read_bytes()))

        # pre-compute .NET token lookup tables; each .NET method has access to this cache for feature extraction
@@ -118,12 +112,7 @@ class DnfileFeatureExtractor(StaticFeatureExtractor):
            fh: FunctionHandle = FunctionHandle(
                address=DNTokenAddress(token),
                inner=method,
-                ctx={
-                    "pe": self.pe,
-                    "calls_from": set(),
-                    "calls_to": set(),
-                    "cache": self.token_cache,
-                },
+                ctx={"pe": self.pe, "calls_from": set(), "calls_to": set(), "cache": self.token_cache},
            )

            # method tokens should be unique
--- a/capa/features/extractors/dnfile/helpers.py
+++ b/capa/features/extractors/dnfile/helpers.py
@@ -15,10 +15,8 @@

 from __future__ import annotations

-import struct
 import logging
 from typing import Union, Iterator, Optional
-from pathlib import Path

 import dnfile
 from dncil.cil.body import CilMethodBody
@@ -32,16 +30,6 @@ from capa.features.extractors.dnfile.types import DnType, DnUnmanagedMethod
 logger = logging.getLogger(__name__)


-def load_dotnet_image(path: Path) -> dnfile.dnPE:
-    """load a .NET PE file, raising CorruptFile on struct.error with the original error message."""
-    try:
-        return dnfile.dnPE(str(path))
-    except struct.error as e:
-        from capa.loader import CorruptFile
-
-        raise CorruptFile(f"Invalid or truncated .NET metadata: {e}") from e
-
-
 class DnfileMethodBodyReader(CilMethodBodyReaderBase):
    def __init__(self, pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow):
        self.pe: dnfile.dnPE = pe
@@ -163,9 +151,7 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]:
        )


-def get_dotnet_methoddef_property_accessors(
-    pe: dnfile.dnPE,
-) -> Iterator[tuple[int, str]]:
+def get_dotnet_methoddef_property_accessors(pe: dnfile.dnPE) -> Iterator[tuple[int, str]]:
    """get MethodDef methods used to access properties

    see https://www.ntcore.com/files/dotnetformat.htm
@@ -240,13 +226,7 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]:

            typedefnamespace, typedefname = resolve_nested_typedef_name(nested_class_table, rid, typedef, pe)

-            yield DnType(
-                token,
-                typedefname,
-                namespace=typedefnamespace,
-                member=method_name,
-                access=access,
-            )
+            yield DnType(token, typedefname, namespace=typedefnamespace, member=method_name, access=access)


 def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]:
@@ -279,9 +259,7 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]:
            yield DnType(token, typedefname, namespace=typedefnamespace, member=field.row.Name)


-def get_dotnet_managed_method_bodies(
-    pe: dnfile.dnPE,
-) -> Iterator[tuple[int, CilMethodBody]]:
+def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[tuple[int, CilMethodBody]]:
    """get managed methods from MethodDef table"""
    for rid, method_def in iter_dotnet_table(pe, dnfile.mdtable.MethodDef.number):
        assert isinstance(method_def, dnfile.mdtable.MethodDefRow)
@@ -360,10 +338,7 @@ def get_dotnet_table_row(pe: dnfile.dnPE, table_index: int, row_index: int) -> O


 def resolve_nested_typedef_name(
-    nested_class_table: dict,
-    index: int,
-    typedef: dnfile.mdtable.TypeDefRow,
-    pe: dnfile.dnPE,
+    nested_class_table: dict, index: int, typedef: dnfile.mdtable.TypeDefRow, pe: dnfile.dnPE
 ) -> tuple[str, tuple[str, ...]]:
    """Resolves all nested TypeDef class names. Returns the namespace as a str and the nested TypeRef name as a tuple"""

--- a/capa/features/extractors/dotnetfile.py
+++ b/capa/features/extractors/dotnetfile.py
@@ -42,7 +42,6 @@ from capa.features.extractors.dnfile.types import DnType
 from capa.features.extractors.base_extractor import SampleHashes, StaticFeatureExtractor
 from capa.features.extractors.dnfile.helpers import (
    iter_dotnet_table,
-    load_dotnet_image,
    is_dotnet_mixed_mode,
    get_dotnet_managed_imports,
    get_dotnet_managed_methods,
@@ -185,8 +184,8 @@ GLOBAL_HANDLERS = (
 class DotnetFileFeatureExtractor(StaticFeatureExtractor):
    def __init__(self, path: Path):
        super().__init__(hashes=SampleHashes.from_bytes(path.read_bytes()))
-        self.path = path
-        self.pe = load_dotnet_image(path)
+        self.path: Path = path
+        self.pe: dnfile.dnPE = dnfile.dnPE(str(path))

    def get_base_address(self):
        return NO_ADDRESS
@@ -218,10 +217,7 @@ class DotnetFileFeatureExtractor(StaticFeatureExtractor):
        assert self.pe.net.struct.MajorRuntimeVersion is not None
        assert self.pe.net.struct.MinorRuntimeVersion is not None

-        return (
-            self.pe.net.struct.MajorRuntimeVersion,
-            self.pe.net.struct.MinorRuntimeVersion,
-        )
+        return self.pe.net.struct.MajorRuntimeVersion, self.pe.net.struct.MinorRuntimeVersion

    def get_meta_version_string(self) -> str:
        assert self.pe.net is not None
--- a/capa/features/extractors/ghidra/basicblock.py
+++ b/capa/features/extractors/ghidra/basicblock.py
@@ -83,7 +83,7 @@ def bb_contains_stackstring(bb: ghidra.program.model.block.CodeBlock) -> bool:
    true if basic block contains enough moves of constant bytes to the stack
    """
    count = 0
-    for insn in capa.features.extractors.ghidra.helpers.get_current_program().getListing().getInstructions(bb, True):
+    for insn in currentProgram().getListing().getInstructions(bb, True):  # type: ignore [name-defined] # noqa: F821
        if is_mov_imm_to_stack(insn):
            count += get_printable_len(insn.getScalar(1))
        if count > MIN_STACKSTRING_LEN:
@@ -96,9 +96,7 @@ def _bb_has_tight_loop(bb: ghidra.program.model.block.CodeBlock):
    parse tight loops, true if last instruction in basic block branches to bb start
    """
    # Reverse Ordered, first InstructionDB
-    last_insn = (
-        capa.features.extractors.ghidra.helpers.get_current_program().getListing().getInstructions(bb, False).next()
-    )
+    last_insn = currentProgram().getListing().getInstructions(bb, False).next()  # type: ignore [name-defined] # noqa: F821

    if last_insn.getFlowType().isJump():
        return last_insn.getAddress(0) == bb.getMinAddress()
@@ -142,3 +140,20 @@ def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Featur
    for bb_handler in BASIC_BLOCK_HANDLERS:
        for feature, addr in bb_handler(fh, bbh):
            yield feature, addr
+
+
+def main():
+    features = []
+    from capa.features.extractors.ghidra.extractor import GhidraFeatureExtractor
+
+    for fh in GhidraFeatureExtractor().get_functions():
+        for bbh in capa.features.extractors.ghidra.helpers.get_function_blocks(fh):
+            features.extend(list(extract_features(fh, bbh)))
+
+    import pprint
+
+    pprint.pprint(features)  # noqa: T203
+
+
+if __name__ == "__main__":
+    main()
--- a/capa/features/extractors/ghidra/context.py
+++ b/capa/features/extractors/ghidra/context.py
@@ -1,44 +0,0 @@
-# Copyright 2023 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import Optional
-
-
-class GhidraContext:
-    """
-    State holder for the Ghidra backend to avoid passing state to every function.
-
-    PyGhidra uses a context manager to set up the Ghidra environment (program, transaction, etc.).
-    We store the relevant objects here to allow easy access throughout the extractor
-    without needing to pass them as arguments to every feature extraction method.
-    """
-
-    def __init__(self, program, flat_api, monitor):
-        self.program = program
-        self.flat_api = flat_api
-        self.monitor = monitor
-
-
-_context: Optional[GhidraContext] = None
-
-
-def set_context(program, flat_api, monitor):
-    global _context
-    _context = GhidraContext(program, flat_api, monitor)
-
-
-def get_context() -> GhidraContext:
-    if _context is None:
-        raise RuntimeError("GhidraContext not initialized")
-    return _context
--- a/capa/features/extractors/ghidra/extractor.py
+++ b/capa/features/extractors/ghidra/extractor.py
@@ -12,14 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-import weakref
-import contextlib
 from typing import Iterator

 import capa.features.extractors.ghidra.file
 import capa.features.extractors.ghidra.insn
 import capa.features.extractors.ghidra.global_
-import capa.features.extractors.ghidra.helpers as ghidra_helpers
 import capa.features.extractors.ghidra.function
 import capa.features.extractors.ghidra.basicblock
 from capa.features.common import Feature
@@ -34,20 +31,19 @@ from capa.features.extractors.base_extractor import (


 class GhidraFeatureExtractor(StaticFeatureExtractor):
-    def __init__(self, ctx_manager=None, tmpdir=None):
-        self.ctx_manager = ctx_manager
-        self.tmpdir = tmpdir
+    def __init__(self):
+        import capa.features.extractors.ghidra.helpers as ghidra_helpers

        super().__init__(
            SampleHashes(
-                md5=ghidra_helpers.get_current_program().getExecutableMD5(),
+                md5=capa.ghidra.helpers.get_file_md5(),
                # ghidra doesn't expose this hash.
                # https://ghidra.re/ghidra_docs/api/ghidra/program/model/listing/Program.html
                #
                # the hashes are stored in the database, not computed on the fly,
                # so it's probably not trivial to add SHA1.
                sha1="",
-                sha256=ghidra_helpers.get_current_program().getExecutableSHA256(),
+                sha256=capa.ghidra.helpers.get_file_sha256(),
            )
        )

@@ -59,14 +55,8 @@ class GhidraFeatureExtractor(StaticFeatureExtractor):
        self.externs = ghidra_helpers.get_file_externs()
        self.fakes = ghidra_helpers.map_fake_import_addrs()

-        # Register cleanup to run when the extractor is garbage collected or when the program exits.
-        # We use weakref.finalize instead of __del__ to avoid issues with reference cycles and
-        # to ensure deterministic cleanup on interpreter shutdown.
-        if self.ctx_manager or self.tmpdir:
-            weakref.finalize(self, cleanup, self.ctx_manager, self.tmpdir)
-
    def get_base_address(self):
-        return AbsoluteVirtualAddress(ghidra_helpers.get_current_program().getImageBase().getOffset())
+        return AbsoluteVirtualAddress(currentProgram().getImageBase().getOffset())  # type: ignore [name-defined] # noqa: F821

    def extract_global_features(self):
        yield from self.global_features
@@ -75,6 +65,7 @@ class GhidraFeatureExtractor(StaticFeatureExtractor):
        yield from capa.features.extractors.ghidra.file.extract_features()

    def get_functions(self) -> Iterator[FunctionHandle]:
+        import capa.features.extractors.ghidra.helpers as ghidra_helpers

        for fhandle in ghidra_helpers.get_function_symbols():
            fh: FunctionHandle = FunctionHandle(
@@ -86,14 +77,14 @@ class GhidraFeatureExtractor(StaticFeatureExtractor):

    @staticmethod
    def get_function(addr: int) -> FunctionHandle:
-
-        func = ghidra_helpers.get_flat_api().getFunctionContaining(ghidra_helpers.get_flat_api().toAddr(addr))
+        func = getFunctionContaining(toAddr(addr))  # type: ignore [name-defined] # noqa: F821
        return FunctionHandle(address=AbsoluteVirtualAddress(func.getEntryPoint().getOffset()), inner=func)

    def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
        yield from capa.features.extractors.ghidra.function.extract_features(fh)

    def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
+        import capa.features.extractors.ghidra.helpers as ghidra_helpers

        yield from ghidra_helpers.get_function_blocks(fh)

@@ -101,17 +92,9 @@ class GhidraFeatureExtractor(StaticFeatureExtractor):
        yield from capa.features.extractors.ghidra.basicblock.extract_features(fh, bbh)

    def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
+        import capa.features.extractors.ghidra.helpers as ghidra_helpers

        yield from ghidra_helpers.get_insn_in_range(bbh)

    def extract_insn_features(self, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle):
        yield from capa.features.extractors.ghidra.insn.extract_features(fh, bbh, ih)
-
-
-def cleanup(ctx_manager, tmpdir):
-    if ctx_manager:
-        with contextlib.suppress(Exception):
-            ctx_manager.__exit__(None, None, None)
-    if tmpdir:
-        with contextlib.suppress(Exception):
-            tmpdir.cleanup()
--- a/capa/features/extractors/ghidra/file.py
+++ b/capa/features/extractors/ghidra/file.py
@@ -80,54 +80,22 @@ def extract_file_embedded_pe() -> Iterator[tuple[Feature, Address]]:
        for i in range(256)
    ]

-    for block in capa.features.extractors.ghidra.helpers.get_current_program().getMemory().getBlocks():
+    for block in currentProgram().getMemory().getBlocks():  # type: ignore [name-defined] # noqa: F821
        if not all((block.isLoaded(), block.isInitialized(), "Headers" not in block.getName())):
            continue

        for off, _ in find_embedded_pe(capa.features.extractors.ghidra.helpers.get_block_bytes(block), mz_xor):
            # add offset back to block start
-            ea_addr = block.getStart().add(off)
-            ea = ea_addr.getOffset()
-            f_offset = capa.features.extractors.ghidra.helpers.get_file_offset(ea_addr)
-            if f_offset != -1:
-                ea = f_offset
+            ea: int = block.getStart().add(off).getOffset()

            yield Characteristic("embedded pe"), FileOffsetAddress(ea)


 def extract_file_export_names() -> Iterator[tuple[Feature, Address]]:
    """extract function exports"""
-    program = capa.features.extractors.ghidra.helpers.get_current_program()
-    st = program.getSymbolTable()
-
+    st = currentProgram().getSymbolTable()  # type: ignore [name-defined] # noqa: F821
    for addr in st.getExternalEntryPointIterator():
-        sym = st.getPrimarySymbol(addr)
-        name = sym.getName()
-
-        # Check for forwarded export
-        is_forwarded = False
-        refs = program.getReferenceManager().getReferencesFrom(addr)
-        for ref in refs:
-            if ref.getToAddress().isExternalAddress():
-                ext_sym = st.getPrimarySymbol(ref.getToAddress())
-                if ext_sym:
-                    ext_loc = program.getExternalManager().getExternalLocation(ext_sym)
-                    if ext_loc:
-                        # It is a forwarded export
-                        libname = ext_loc.getLibraryName()
-                        if libname.lower().endswith(".dll"):
-                            libname = libname[:-4]
-
-                        forwarded_name = f"{libname}.{ext_loc.getLabel()}"
-                        forwarded_name = capa.features.extractors.helpers.reformat_forwarded_export_name(forwarded_name)
-
-                        yield Export(forwarded_name), AbsoluteVirtualAddress(addr.getOffset())
-                        yield Characteristic("forwarded export"), AbsoluteVirtualAddress(addr.getOffset())
-                        is_forwarded = True
-                        break
-
-        if not is_forwarded:
-            yield Export(name), AbsoluteVirtualAddress(addr.getOffset())
+        yield Export(st.getPrimarySymbol(addr).getName()), AbsoluteVirtualAddress(addr.getOffset())


 def extract_file_import_names() -> Iterator[tuple[Feature, Address]]:
@@ -142,7 +110,7 @@ def extract_file_import_names() -> Iterator[tuple[Feature, Address]]:
     - importname
    """

-    for f in capa.features.extractors.ghidra.helpers.get_current_program().getFunctionManager().getExternalFunctions():
+    for f in currentProgram().getFunctionManager().getExternalFunctions():  # type: ignore [name-defined] # noqa: F821
        for r in f.getSymbol().getReferences():
            if r.getReferenceType().isData():
                addr = r.getFromAddress().getOffset()  # gets pointer to fake external addr
@@ -158,14 +126,14 @@ def extract_file_import_names() -> Iterator[tuple[Feature, Address]]:
 def extract_file_section_names() -> Iterator[tuple[Feature, Address]]:
    """extract section names"""

-    for block in capa.features.extractors.ghidra.helpers.get_current_program().getMemory().getBlocks():
+    for block in currentProgram().getMemory().getBlocks():  # type: ignore [name-defined] # noqa: F821
        yield Section(block.getName()), AbsoluteVirtualAddress(block.getStart().getOffset())


 def extract_file_strings() -> Iterator[tuple[Feature, Address]]:
    """extract ASCII and UTF-16 LE strings"""

-    for block in capa.features.extractors.ghidra.helpers.get_current_program().getMemory().getBlocks():
+    for block in currentProgram().getMemory().getBlocks():  # type: ignore [name-defined] # noqa: F821
        if not block.isInitialized():
            continue

@@ -185,8 +153,7 @@ def extract_file_function_names() -> Iterator[tuple[Feature, Address]]:
    extract the names of statically-linked library functions.
    """

-    for sym in capa.features.extractors.ghidra.helpers.get_current_program().getSymbolTable().getAllSymbols(True):
-
+    for sym in currentProgram().getSymbolTable().getAllSymbols(True):  # type: ignore [name-defined] # noqa: F821
        # .isExternal() misses more than this config for the function symbols
        if sym.getSymbolType() == SymbolType.FUNCTION and sym.getSource() == SourceType.ANALYSIS and sym.isGlobal():
            name = sym.getName()  # starts to resolve names based on Ghidra's FidDB
@@ -203,7 +170,7 @@ def extract_file_function_names() -> Iterator[tuple[Feature, Address]]:


 def extract_file_format() -> Iterator[tuple[Feature, Address]]:
-    ef = capa.features.extractors.ghidra.helpers.get_current_program().getExecutableFormat()
+    ef = currentProgram().getExecutableFormat()  # type: ignore [name-defined] # noqa: F821
    if "PE" in ef:
        yield Format(FORMAT_PE), NO_ADDRESS
    elif "ELF" in ef:
@@ -231,3 +198,14 @@ FILE_HANDLERS = (
    extract_file_function_names,
    extract_file_format,
 )
+
+
+def main():
+    """ """
+    import pprint
+
+    pprint.pprint(list(extract_features()))  # noqa: T203
+
+
+if __name__ == "__main__":
+    main()
--- a/capa/features/extractors/ghidra/function.py
+++ b/capa/features/extractors/ghidra/function.py
@@ -26,25 +26,21 @@ from capa.features.extractors.base_extractor import FunctionHandle

 def extract_function_calls_to(fh: FunctionHandle):
    """extract callers to a function"""
-    f: "ghidra.program.database.function.FunctionDB" = fh.inner
+    f: ghidra.program.database.function.FunctionDB = fh.inner
    for ref in f.getSymbol().getReferences():
        if ref.getReferenceType().isCall():
            yield Characteristic("calls to"), AbsoluteVirtualAddress(ref.getFromAddress().getOffset())


 def extract_function_loop(fh: FunctionHandle):
-    f: "ghidra.program.database.function.FunctionDB" = fh.inner
+    f: ghidra.program.database.function.FunctionDB = fh.inner

    edges = []
-    for block in SimpleBlockIterator(
-        BasicBlockModel(capa.features.extractors.ghidra.helpers.get_current_program()),
-        f.getBody(),
-        capa.features.extractors.ghidra.helpers.get_monitor(),
-    ):
-        dests = block.getDestinations(capa.features.extractors.ghidra.helpers.get_monitor())
+    for block in SimpleBlockIterator(BasicBlockModel(currentProgram()), f.getBody(), monitor()):  # type: ignore [name-defined] # noqa: F821
+        dests = block.getDestinations(monitor())  # type: ignore [name-defined] # noqa: F821
        s_addrs = block.getStartAddresses()

-        while dests.hasNext():
+        while dests.hasNext():  # For loop throws Python TypeError
            for addr in s_addrs:
                edges.append((addr.getOffset(), dests.next().getDestinationAddress().getOffset()))

@@ -53,17 +49,32 @@ def extract_function_loop(fh: FunctionHandle):


 def extract_recursive_call(fh: FunctionHandle):
-    f: "ghidra.program.database.function.FunctionDB" = fh.inner
+    f: ghidra.program.database.function.FunctionDB = fh.inner

-    for func in f.getCalledFunctions(capa.features.extractors.ghidra.helpers.get_monitor()):
+    for func in f.getCalledFunctions(monitor()):  # type: ignore [name-defined] # noqa: F821
        if func.getEntryPoint().getOffset() == f.getEntryPoint().getOffset():
            yield Characteristic("recursive call"), AbsoluteVirtualAddress(f.getEntryPoint().getOffset())


 def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
-    for function_handler in FUNCTION_HANDLERS:
-        for feature, addr in function_handler(fh):
+    for func_handler in FUNCTION_HANDLERS:
+        for feature, addr in func_handler(fh):
            yield feature, addr


 FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call)
+
+
+def main():
+    """ """
+    features = []
+    for fhandle in capa.features.extractors.ghidra.helpers.get_function_symbols():
+        features.extend(list(extract_features(fhandle)))
+
+    import pprint
+
+    pprint.pprint(features)  # noqa: T203
+
+
+if __name__ == "__main__":
+    main()
--- a/capa/features/extractors/ghidra/global_.py
+++ b/capa/features/extractors/ghidra/global_.py
@@ -26,7 +26,7 @@ logger = logging.getLogger(__name__)


 def extract_os() -> Iterator[tuple[Feature, Address]]:
-    format_name: str = capa.features.extractors.ghidra.helpers.get_current_program().getExecutableFormat()
+    format_name: str = currentProgram().getExecutableFormat()  # type: ignore [name-defined] # noqa: F821

    if "PE" in format_name:
        yield OS(OS_WINDOWS), NO_ADDRESS
@@ -53,7 +53,7 @@ def extract_os() -> Iterator[tuple[Feature, Address]]:


 def extract_arch() -> Iterator[tuple[Feature, Address]]:
-    lang_id = capa.features.extractors.ghidra.helpers.get_current_program().getMetadata().get("Language ID")
+    lang_id = currentProgram().getMetadata().get("Language ID")  # type: ignore [name-defined] # noqa: F821

    if "x86" in lang_id and "64" in lang_id:
        yield Arch(ARCH_AMD64), NO_ADDRESS
--- a/capa/features/extractors/ghidra/helpers.py
+++ b/capa/features/extractors/ghidra/helpers.py
@@ -22,22 +22,9 @@ from ghidra.program.model.symbol import SourceType, SymbolType
 from ghidra.program.model.address import AddressSpace

 import capa.features.extractors.helpers
-import capa.features.extractors.ghidra.context as ghidra_context
 from capa.features.common import THUNK_CHAIN_DEPTH_DELTA
 from capa.features.address import AbsoluteVirtualAddress
-from capa.features.extractors.base_extractor import BBHandle, InsnHandle
-
-
-def get_current_program():
-    return ghidra_context.get_context().program
-
-
-def get_monitor():
-    return ghidra_context.get_context().monitor
-
-
-def get_flat_api():
-    return ghidra_context.get_context().flat_api
+from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle


 def ints_to_bytes(bytez: list[int]) -> bytes:
@@ -49,7 +36,7 @@ def ints_to_bytes(bytez: list[int]) -> bytes:
    return bytes([b & 0xFF for b in bytez])


-def find_byte_sequence(addr: "ghidra.program.model.address.Address", seq: bytes) -> Iterator[int]:
+def find_byte_sequence(addr: ghidra.program.model.address.Address, seq: bytes) -> Iterator[int]:
    """yield all ea of a given byte sequence

    args:
@@ -57,25 +44,12 @@ def find_byte_sequence(addr: "ghidra.program.model.address.Address", seq: bytes)
        seq: bytes to search e.g. b"\x01\x03"
    """
    seqstr = "".join([f"\\x{b:02x}" for b in seq])
-    eas = get_flat_api().findBytes(addr, seqstr, java.lang.Integer.MAX_VALUE, 1)
+    eas = findBytes(addr, seqstr, java.lang.Integer.MAX_VALUE, 1)  # type: ignore [name-defined] # noqa: F821

    yield from eas


-def get_file_offset(addr: "ghidra.program.model.address.Address") -> int:
-    """get file offset for an address"""
-    block = get_current_program().getMemory().getBlock(addr)
-    if not block:
-        return -1
-
-    for info in block.getSourceInfos():
-        if info.contains(addr):
-            return info.getFileBytesOffset(addr)
-
-    return -1
-
-
-def get_bytes(addr: "ghidra.program.model.address.Address", length: int) -> bytes:
+def get_bytes(addr: ghidra.program.model.address.Address, length: int) -> bytes:
    """yield length bytes at addr

    args:
@@ -83,12 +57,12 @@ def get_bytes(addr: "ghidra.program.model.address.Address", length: int) -> byte
        length: length of bytes to pull
    """
    try:
-        return ints_to_bytes(get_flat_api().getBytes(addr, int(length)))
-    except Exception:
+        return ints_to_bytes(getBytes(addr, length))  # type: ignore [name-defined] # noqa: F821
+    except RuntimeError:
        return b""


-def get_block_bytes(block: "ghidra.program.model.mem.MemoryBlock") -> bytes:
+def get_block_bytes(block: ghidra.program.model.mem.MemoryBlock) -> bytes:
    """yield all bytes in a given block

    args:
@@ -99,21 +73,20 @@ def get_block_bytes(block: "ghidra.program.model.mem.MemoryBlock") -> bytes:

 def get_function_symbols():
    """yield all non-external function symbols"""
-    yield from get_current_program().getFunctionManager().getFunctionsNoStubs(True)
+    yield from currentProgram().getFunctionManager().getFunctionsNoStubs(True)  # type: ignore [name-defined] # noqa: F821


-def get_function_blocks(fh: "capa.features.extractors.base_extractor.FunctionHandle") -> Iterator[BBHandle]:
-    """
-    yield the basic blocks of the function
-    """
+def get_function_blocks(fh: FunctionHandle) -> Iterator[BBHandle]:
+    """yield BBHandle for each bb in a given function"""

-    for block in SimpleBlockIterator(BasicBlockModel(get_current_program()), fh.inner.getBody(), get_monitor()):
-        yield BBHandle(address=AbsoluteVirtualAddress(block.getMinAddress().getOffset()), inner=block)
+    func: ghidra.program.database.function.FunctionDB = fh.inner
+    for bb in SimpleBlockIterator(BasicBlockModel(currentProgram()), func.getBody(), monitor()):  # type: ignore [name-defined] # noqa: F821
+        yield BBHandle(address=AbsoluteVirtualAddress(bb.getMinAddress().getOffset()), inner=bb)


 def get_insn_in_range(bbh: BBHandle) -> Iterator[InsnHandle]:
    """yield InshHandle for each insn in a given basicblock"""
-    for insn in get_current_program().getListing().getInstructions(bbh.inner, True):
+    for insn in currentProgram().getListing().getInstructions(bbh.inner, True):  # type: ignore [name-defined] # noqa: F821
        yield InsnHandle(address=AbsoluteVirtualAddress(insn.getAddress().getOffset()), inner=insn)


@@ -122,7 +95,7 @@ def get_file_imports() -> dict[int, list[str]]:

    import_dict: dict[int, list[str]] = {}

-    for f in get_current_program().getFunctionManager().getExternalFunctions():
+    for f in currentProgram().getFunctionManager().getExternalFunctions():  # type: ignore [name-defined] # noqa: F821
        for r in f.getSymbol().getReferences():
            if r.getReferenceType().isData():
                addr = r.getFromAddress().getOffset()  # gets pointer to fake external addr
@@ -160,7 +133,7 @@ def get_file_externs() -> dict[int, list[str]]:

    extern_dict: dict[int, list[str]] = {}

-    for sym in get_current_program().getSymbolTable().getAllSymbols(True):
+    for sym in currentProgram().getSymbolTable().getAllSymbols(True):  # type: ignore [name-defined] # noqa: F821
        # .isExternal() misses more than this config for the function symbols
        if sym.getSymbolType() == SymbolType.FUNCTION and sym.getSource() == SourceType.ANALYSIS and sym.isGlobal():
            name = sym.getName()  # starts to resolve names based on Ghidra's FidDB
@@ -198,7 +171,7 @@ def map_fake_import_addrs() -> dict[int, list[int]]:
    """
    fake_dict: dict[int, list[int]] = {}

-    for f in get_current_program().getFunctionManager().getExternalFunctions():
+    for f in currentProgram().getFunctionManager().getExternalFunctions():  # type: ignore [name-defined] # noqa: F821
        for r in f.getSymbol().getReferences():
            if r.getReferenceType().isData():
                fake_dict.setdefault(f.getEntryPoint().getOffset(), []).append(r.getFromAddress().getOffset())
@@ -207,7 +180,7 @@ def map_fake_import_addrs() -> dict[int, list[int]]:


 def check_addr_for_api(
-    addr: "ghidra.program.model.address.Address",
+    addr: ghidra.program.model.address.Address,
    fakes: dict[int, list[int]],
    imports: dict[int, list[str]],
    externs: dict[int, list[str]],
@@ -229,18 +202,18 @@ def check_addr_for_api(
    return False


-def is_call_or_jmp(insn: "ghidra.program.database.code.InstructionDB") -> bool:
+def is_call_or_jmp(insn: ghidra.program.database.code.InstructionDB) -> bool:
    return any(mnem in insn.getMnemonicString() for mnem in ["CALL", "J"])  # JMP, JNE, JNZ, etc


-def is_sp_modified(insn: "ghidra.program.database.code.InstructionDB") -> bool:
+def is_sp_modified(insn: ghidra.program.database.code.InstructionDB) -> bool:
    for i in range(insn.getNumOperands()):
        if insn.getOperandType(i) == OperandType.REGISTER:
            return "SP" in insn.getRegister(i).getName() and insn.getOperandRefType(i).isWrite()
    return False


-def is_stack_referenced(insn: "ghidra.program.database.code.InstructionDB") -> bool:
+def is_stack_referenced(insn: ghidra.program.database.code.InstructionDB) -> bool:
    """generic catch-all for stack references"""
    for i in range(insn.getNumOperands()):
        if insn.getOperandType(i) == OperandType.REGISTER:
@@ -252,7 +225,7 @@ def is_stack_referenced(insn: "ghidra.program.database.code.InstructionDB") -> b
    return any(ref.isStackReference() for ref in insn.getReferencesFrom())


-def is_zxor(insn: "ghidra.program.database.code.InstructionDB") -> bool:
+def is_zxor(insn: ghidra.program.database.code.InstructionDB) -> bool:
    # assume XOR insn
    # XOR's against the same operand zero out
    ops = []
@@ -268,29 +241,29 @@ def is_zxor(insn: "ghidra.program.database.code.InstructionDB") -> bool:
    return all(n == operands[0] for n in operands)


-def handle_thunk(addr: "ghidra.program.model.address.Address"):
+def handle_thunk(addr: ghidra.program.model.address.Address):
    """Follow thunk chains down to a reasonable depth"""
    ref = addr
    for _ in range(THUNK_CHAIN_DEPTH_DELTA):
-        thunk_jmp = get_flat_api().getInstructionAt(ref)
+        thunk_jmp = getInstructionAt(ref)  # type: ignore [name-defined] # noqa: F821
        if thunk_jmp and is_call_or_jmp(thunk_jmp):
            if OperandType.isAddress(thunk_jmp.getOperandType(0)):
                ref = thunk_jmp.getAddress(0)
        else:
-            thunk_dat = get_flat_api().getDataContaining(ref)
+            thunk_dat = getDataContaining(ref)  # type: ignore [name-defined] # noqa: F821
            if thunk_dat and thunk_dat.isDefined() and thunk_dat.isPointer():
                ref = thunk_dat.getValue()
                break  # end of thunk chain reached
    return ref


-def dereference_ptr(insn: "ghidra.program.database.code.InstructionDB"):
+def dereference_ptr(insn: ghidra.program.database.code.InstructionDB):
    addr_code = OperandType.ADDRESS | OperandType.CODE
    to_deref = insn.getAddress(0)
-    dat = get_flat_api().getDataContaining(to_deref)
+    dat = getDataContaining(to_deref)  # type: ignore [name-defined] # noqa: F821

    if insn.getOperandType(0) == addr_code:
-        thfunc = get_flat_api().getFunctionContaining(to_deref)
+        thfunc = getFunctionContaining(to_deref)  # type: ignore [name-defined] # noqa: F821
        if thfunc and thfunc.isThunk():
            return handle_thunk(to_deref)
        else:
@@ -321,7 +294,7 @@ def find_data_references_from_insn(insn, max_depth: int = 10):
        to_addr = reference.getToAddress()

        for _ in range(max_depth - 1):
-            data = get_flat_api().getDataAt(to_addr)
+            data = getDataAt(to_addr)  # type: ignore [name-defined] # noqa: F821
            if data and data.isPointer():
                ptr_value = data.getValue()

--- a/capa/features/extractors/ghidra/insn.py
+++ b/capa/features/extractors/ghidra/insn.py
@@ -234,7 +234,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
        push    offset iid_004118d4_IShellLinkA ; riid
    """
    for addr in capa.features.extractors.ghidra.helpers.find_data_references_from_insn(ih.inner):
-        data = capa.features.extractors.ghidra.helpers.get_flat_api().getDataAt(addr)
+        data = getDataAt(addr)  # type: ignore [name-defined] # noqa: F821
        if data and not data.hasStringValue():
            extracted_bytes = capa.features.extractors.ghidra.helpers.get_bytes(addr, MAX_BYTES_FEATURE_SIZE)
            if extracted_bytes and not capa.features.extractors.helpers.all_zeros(extracted_bytes):
@@ -249,9 +249,9 @@ def extract_insn_string_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl
        push offset aAcr     ; "ACR  > "
    """
    for addr in capa.features.extractors.ghidra.helpers.find_data_references_from_insn(ih.inner):
-        data = capa.features.extractors.ghidra.helpers.get_flat_api().getDataAt(addr)
+        data = getDataAt(addr)  # type: ignore [name-defined] # noqa: F821
        if data and data.hasStringValue():
-            yield String(str(data.getValue())), ih.address
+            yield String(data.getValue()), ih.address


 def extract_insn_mnemonic_features(
@@ -361,8 +361,8 @@ def extract_insn_cross_section_cflow(
        if capa.features.extractors.ghidra.helpers.check_addr_for_api(ref, fakes, imports, externs):
            return

-    this_mem_block = capa.features.extractors.ghidra.helpers.get_flat_api().getMemoryBlock(insn.getAddress())
-    ref_block = capa.features.extractors.ghidra.helpers.get_flat_api().getMemoryBlock(ref)
+    this_mem_block = getMemoryBlock(insn.getAddress())  # type: ignore [name-defined] # noqa: F821
+    ref_block = getMemoryBlock(ref)  # type: ignore [name-defined] # noqa: F821
    if ref_block != this_mem_block:
        yield Characteristic("cross section flow"), ih.address

@@ -425,19 +425,19 @@ def check_nzxor_security_cookie_delta(
    Check if insn within last addr of last bb - delta
    """

-    model = SimpleBlockModel(capa.features.extractors.ghidra.helpers.get_current_program())
+    model = SimpleBlockModel(currentProgram())  # type: ignore [name-defined] # noqa: F821
    insn_addr = insn.getAddress()
    func_asv = fh.getBody()

    first_addr = func_asv.getMinAddress()
    if insn_addr < first_addr.add(SECURITY_COOKIE_BYTES_DELTA):
-        first_bb = model.getFirstCodeBlockContaining(first_addr, capa.features.extractors.ghidra.helpers.get_monitor())
+        first_bb = model.getFirstCodeBlockContaining(first_addr, monitor())  # type: ignore [name-defined] # noqa: F821
        if first_bb.contains(insn_addr):
            return True

    last_addr = func_asv.getMaxAddress()
    if insn_addr > last_addr.add(SECURITY_COOKIE_BYTES_DELTA * -1):
-        last_bb = model.getFirstCodeBlockContaining(last_addr, capa.features.extractors.ghidra.helpers.get_monitor())
+        last_bb = model.getFirstCodeBlockContaining(last_addr, monitor())  # type: ignore [name-defined] # noqa: F821
        if last_bb.contains(insn_addr):
            return True

@@ -488,3 +488,22 @@ INSTRUCTION_HANDLERS = (
    extract_function_calls_from,
    extract_function_indirect_call_characteristic_features,
 )
+
+
+def main():
+    """ """
+    features = []
+    from capa.features.extractors.ghidra.extractor import GhidraFeatureExtractor
+
+    for fh in GhidraFeatureExtractor().get_functions():
+        for bb in capa.features.extractors.ghidra.helpers.get_function_blocks(fh):
+            for insn in capa.features.extractors.ghidra.helpers.get_insn_in_range(bb):
+                features.extend(list(extract_features(fh, bb, insn)))
+
+    import pprint
+
+    pprint.pprint(features)  # noqa: T203
+
+
+if __name__ == "__main__":
+    main()
--- a/capa/features/extractors/ida/function.py
+++ b/capa/features/extractors/ida/function.py
@@ -18,7 +18,6 @@ import idaapi
 import idautils

 import capa.features.extractors.ida.helpers
-from capa.features.file import FunctionName
 from capa.features.common import Feature, Characteristic
 from capa.features.address import Address, AbsoluteVirtualAddress
 from capa.features.extractors import loops
@@ -51,39 +50,10 @@ def extract_recursive_call(fh: FunctionHandle):
        yield Characteristic("recursive call"), fh.address


-def extract_function_name(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
-    ea = fh.inner.start_ea
-    name = idaapi.get_name(ea)
-    if name.startswith("sub_"):
-        # skip default names, like "sub_401000"
-        return
-
-    yield FunctionName(name), fh.address
-    if name.startswith("_"):
-        # some linkers may prefix linked routines with a `_` to avoid name collisions.
-        # extract features for both the mangled and un-mangled representations.
-        # e.g. `_fwrite` -> `fwrite`
-        # see: https://stackoverflow.com/a/2628384/87207
-        yield FunctionName(name[1:]), fh.address
-
-
-def extract_function_alternative_names(fh: FunctionHandle):
-    """Get all alternative names for an address."""
-
-    for aname in capa.features.extractors.ida.helpers.get_function_alternative_names(fh.inner.start_ea):
-        yield FunctionName(aname), fh.address
-
-
 def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
    for func_handler in FUNCTION_HANDLERS:
        for feature, addr in func_handler(fh):
            yield feature, addr


-FUNCTION_HANDLERS = (
-    extract_function_calls_to,
-    extract_function_loop,
-    extract_recursive_call,
-    extract_function_name,
-    extract_function_alternative_names,
-)
+FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call)
--- a/capa/features/extractors/ida/helpers.py
+++ b/capa/features/extractors/ida/helpers.py
@@ -20,7 +20,6 @@ import idaapi
 import ida_nalt
 import idautils
 import ida_bytes
-import ida_funcs
 import ida_segment

 from capa.features.address import AbsoluteVirtualAddress
@@ -437,16 +436,3 @@ def is_basic_block_return(bb: idaapi.BasicBlock) -> bool:
 def has_sib(oper: idaapi.op_t) -> bool:
    # via: https://reverseengineering.stackexchange.com/a/14300
    return oper.specflag1 == 1
-
-
-def find_alternative_names(cmt: str):
-    for line in cmt.split("\n"):
-        if line.startswith("Alternative name is '") and line.endswith("'"):
-            name = line[len("Alternative name is '") : -1]  # Extract name between quotes
-            yield name
-
-
-def get_function_alternative_names(fva: int):
-    """Get all alternative names for an address."""
-    yield from find_alternative_names(ida_bytes.get_cmt(fva, False) or "")
-    yield from find_alternative_names(ida_funcs.get_func_cmt(idaapi.get_func(fva), False) or "")
--- a/capa/features/extractors/ida/insn.py
+++ b/capa/features/extractors/ida/insn.py
@@ -22,7 +22,6 @@ import idautils

 import capa.features.extractors.helpers
 import capa.features.extractors.ida.helpers
-from capa.features.file import FunctionName
 from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
 from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Feature, Characteristic
 from capa.features.address import Address, AbsoluteVirtualAddress
@@ -130,8 +129,8 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
        # not a function (start)
        return

-    name = idaapi.get_name(target_func.start_ea)
-    if target_func.flags & idaapi.FUNC_LIB or not name.startswith("sub_"):
+    if target_func.flags & idaapi.FUNC_LIB:
+        name = idaapi.get_name(target_func.start_ea)
        yield API(name), ih.address
        if name.startswith("_"):
            # some linkers may prefix linked routines with a `_` to avoid name collisions.
@@ -140,10 +139,6 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
            # see: https://stackoverflow.com/a/2628384/87207
            yield API(name[1:]), ih.address

-        for altname in capa.features.extractors.ida.helpers.get_function_alternative_names(target_func.start_ea):
-            yield FunctionName(altname), ih.address
-            yield API(altname), ih.address
-

 def extract_insn_number_features(
    fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
--- a/capa/features/extractors/viv/indirect_calls.py
+++ b/capa/features/extractors/viv/indirect_calls.py
@@ -56,7 +56,7 @@ def get_previous_instructions(vw: VivWorkspace, va: int) -> list[int]:
        if ploc is not None:
            # from vivisect.const:
            # location: (L_VA, L_SIZE, L_LTYPE, L_TINFO)
-            pva, _, ptype, pinfo = ploc
+            (pva, _, ptype, pinfo) = ploc

            if ptype == LOC_OP and not (pinfo & IF_NOFALL):
                ret.append(pva)
--- a/capa/features/extractors/viv/insn.py
+++ b/capa/features/extractors/viv/insn.py
@@ -176,7 +176,7 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato

    elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper):
        try:
-            _, target = resolve_indirect_call(f.vw, insn.va, insn=insn)
+            (_, target) = resolve_indirect_call(f.vw, insn.va, insn=insn)
        except NotFoundError:
            # not able to resolve the indirect call, sorry
            return
--- a/capa/features/extractors/vmray/call.py
+++ b/capa/features/extractors/vmray/call.py
@@ -26,16 +26,6 @@ from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, Pr
 logger = logging.getLogger(__name__)


-VOID_PTR_NUMBER_PARAMS = frozenset(
-    {
-        "hKey",
-        "hKeyRoot",
-        "hkResult",
-        "samDesired",
-    }
-)
-
-
 def get_call_param_features(param: Param, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
    if param.deref is not None:
        # pointer types contain a special "deref" member that stores the deref'd value
@@ -49,31 +39,10 @@ def get_call_param_features(param: Param, ch: CallHandle) -> Iterator[tuple[Feat
                    # parsing the data up to here results in double-escaped backslashes, remove those here
                    yield String(param.deref.value.replace("\\\\", "\\")), ch.address
            else:
-                if param.name in VOID_PTR_NUMBER_PARAMS:
-                    try:
-                        yield Number(hexint(param.deref.value)), ch.address
-                    except (ValueError, TypeError) as e:
-                        logger.debug(
-                            "failed to parse whitelisted void_ptr param %s value %s: %s",
-                            param.name,
-                            param.deref.value,
-                            e,
-                        )
-                else:
-                    logger.debug("skipping deref param type %s", param.deref.type_)
+                logger.debug("skipping deref param type %s", param.deref.type_)
    elif param.value is not None:
        if param.type_ in PARAM_TYPE_INT:
            yield Number(hexint(param.value)), ch.address
-        elif param.type_ == "void_ptr" and param.name in VOID_PTR_NUMBER_PARAMS:
-            try:
-                yield Number(hexint(param.value)), ch.address
-            except (ValueError, TypeError) as e:
-                logger.debug(
-                    "failed to parse whitelisted void_ptr param %s value %s: %s",
-                    param.name,
-                    param.value,
-                    e,
-                )


 def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
--- a/capa/features/extractors/vmray/extractor.py
+++ b/capa/features/extractors/vmray/extractor.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-import logging
+
 from typing import Iterator
 from pathlib import Path

@@ -39,8 +39,6 @@ from capa.features.extractors.base_extractor import (
    DynamicFeatureExtractor,
 )

-logger = logging.getLogger(__name__)
-

 def get_formatted_params(params: ParamList) -> list[str]:
    params_list: list[str] = []
@@ -89,16 +87,6 @@ class VMRayExtractor(DynamicFeatureExtractor):

    def get_processes(self) -> Iterator[ProcessHandle]:
        for monitor_process in self.analysis.monitor_processes.values():
-            # skip invalid/incomplete monitor process entries, see #2807
-            if monitor_process.pid == 0 or not monitor_process.filename:
-                logger.debug(
-                    "skipping incomplete process entry: pid=%d, filename=%s, monitor_id=%d",
-                    monitor_process.pid,
-                    monitor_process.filename,
-                    monitor_process.monitor_id,
-                )
-                continue
-
            address: ProcessAddress = ProcessAddress(pid=monitor_process.pid, ppid=monitor_process.ppid)
            yield ProcessHandle(address, inner=monitor_process)

--- a/capa/features/freeze/init.py
+++ b/capa/features/freeze/init.py
@@ -490,10 +490,11 @@ def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str:
            taddr = Address.from_capa(t.address)
            tfeatures = [
                ThreadFeature(
-                    thread=taddr,
+                    basic_block=taddr,
                    address=Address.from_capa(addr),
                    feature=feature_from_capa(feature),
-                )
+                )  # type: ignore
+                # Mypy is unable to recognise `basic_block` as an argument due to alias
                for feature, addr in extractor.extract_thread_features(p, t)
            ]

@@ -543,7 +544,7 @@ def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str:
    # Mypy is unable to recognise `global_` as an argument due to alias

    # workaround around mypy issue: https://github.com/python/mypy/issues/1424
-    get_base_addr = getattr(extractor, "get_base_address", None)
+    get_base_addr = getattr(extractor, "get_base_addr", None)
    base_addr = get_base_addr() if get_base_addr else capa.features.address.NO_ADDRESS

    freeze = Freeze(
--- a/capa/ghidra/README.md
+++ b/capa/ghidra/README.md
@@ -1,75 +1,107 @@
-# capa analysis using Ghidra
+<div align="center">
+    <img src="../../doc/img/ghidra_backend_logo.png" width=240 height=125>
+</div>

-capa supports using Ghidra (via [PyGhidra](https://github.com/NationalSecurityAgency/ghidra/tree/master/Ghidra/Features/PyGhidra)) as a feature extraction backend. This enables you to run capa against binaries using Ghidra's analysis engine.
+# capa + Ghidra

+[capa](https://github.com/mandiant/capa) is the FLARE team’s open-source tool that detects capabilities in executable files. [Ghidra](https://github.com/NationalSecurityAgency/ghidra) is an open-source software reverse engineering framework created and maintained by the National Security Agency Research Directorate. capa + Ghidra brings capa’s detection capabilities directly to Ghidra’s user interface helping speed up your reverse engineering tasks by identifying what parts of a program suggest interesting behavior, such as setting a registry value. You can execute the included Python 3 scripts [capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_explorer.py) or [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) to run capa’s analysis and view the results in Ghidra. You may be asking yourself, “Python 3 scripts in Ghidra?”. You read that correctly. This integration is written entirely in Python 3 and relies on [Ghidrathon]( https://github.com/mandiant/ghidrathon), an open source Ghidra extension that adds Python 3 scripting to Ghidra.
+
+Check out our capa + Ghidra blog posts:
+* [Riding Dragons: capa Harnesses Ghidra](https://www.mandiant.com/resources/blog/capa-harnesses-ghidra)
+
+## UI Integration
+[capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_explorer.py) renders capa results in Ghidra's UI to help you quickly navigate them. This includes adding matched functions to Ghidra’s Symbol Tree and Bookmarks windows and adding comments to functions that indicate matched capabilities and features. You can execute this script using Ghidra’s Script Manager window.
+
+### Symbol Tree Window
+Matched functions are added to Ghidra's Symbol Tree window under a custom namespace that maps to the capabilities' [capa namespace](https://github.com/mandiant/capa-rules/blob/master/doc/format.md#rule-namespace).
+<div align="center">
+    <img src="https://github.com/mandiant/capa/assets/66766340/eeae33f4-99d4-42dc-a5e8-4c1b8c661492" width=300>
+</div>
+
+### Comments
+
+Comments are added at the beginning of matched functions indicating matched capabilities and inline comments are added to functions indicating matched features. You can view these comments in Ghidra’s Disassembly Listing and Decompile windows.
+<div align="center">
+    <img src="https://github.com/mandiant/capa/assets/66766340/bb2b4170-7fd4-45fc-8c7b-ff8f2e2f101b" width=1000>
+</div>
+
+### Bookmarks
+
+Bookmarks are added to functions that matched a capability that is mapped to a MITRE ATT&CK and/or Malware Behavior Catalog (MBC) technique. You can view these bookmarks in Ghidra's Bookmarks window.
+<div align="center">
+    <img src="https://github.com/mandiant/capa/assets/66766340/7f9a66a9-7be7-4223-91c6-4b8fc4651336" width=825>
+</div>
+
+## Text-based Integration
+
+[capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) outputs text-based capa results that mirror the output of capa’s standalone tool. You can execute this script using Ghidra’s Script Manager and view its output in Ghidra’s Console window.
+
+<div align="center">
+  <img src="../../doc/img/ghidra_script_mngr_output.png" width=700>
+</div>
+
+You can also execute [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) using Ghidra's Headless Analyzer to view its output in a terminal window.
+
+<div align="center">
+  <img src="../../doc/img/ghidra_headless_analyzer.png">
+</div>
+
+# Getting Started
+
+## Requirements
+
+| Tool | Version | Source |
+|------------|---------|--------|
+| capa | `>= 7.0.0` | https://github.com/mandiant/capa/releases |
+| Ghidrathon | `>= 3.0.0` | https://github.com/mandiant/Ghidrathon/releases |
+| Ghidra | `>= 10.3.2` | https://github.com/NationalSecurityAgency/ghidra/releases |
+| Python | `>= 3.10.0` | https://www.python.org/downloads |
+
+## Installation
+
+**Note**: capa + Ghidra relies on [Ghidrathon]( https://github.com/mandiant/ghidrathon) to execute Python 3 code in Ghidra. You must first install and configure Ghidrathon using the [steps outlined in its README]( https://github.com/mandiant/ghidrathon?tab=readme-ov-file#installing-ghidrathon). Then, you must use the Python 3 interpreter that you configured with Ghidrathon to complete the following steps:
+
+1. Install capa and its dependencies from PyPI using the following command:
 ```bash
-$ capa -b ghidra Practical\ Malware\ Analysis\ Lab\ 01-01.exe_
-┌──────────┬──────────────────────────────────────────────────────────────────────────────────────────────────────┐
-│ md5      │ bb7425b82141a1c0f7d60e5106676bb1                                                                     │
-│ sha1     │                                                                                                      │
-│ sha256   │ 58898bd42c5bd3bf9b1389f0eee5b39cd59180e8370eb9ea838a0b327bd6fe47                                     │
-│ analysis │ static                                                                                               │
-│ os       │ windows                                                                                              │
-│ format   │ pe                                                                                                   │
-│ arch     │ i386                                                                                                 │
-│ path     │ ~/Documents/capa/tests/data/Practical Malware Analysis Lab 01-01.exe_                                │
-└──────────┴──────────────────────────────────────────────────────────────────────────────────────────────────────┘
-┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-┃ ATT&CK Tactic                      ┃ ATT&CK Technique                                            ┃
-┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
-│ DISCOVERY                          │ File and Directory Discovery [T1083]                        │
-└────────────────────────────────────┴─────────────────────────────────────────────────────────────┘
-┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-┃ MBC Objective                      ┃ MBC Behavior                                                ┃
-┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
-│ DISCOVERY                          │ File and Directory Discovery [E1083]                        │
-│ FILE SYSTEM                        │ Copy File [C0045]                                           │
-│                                    │ Read File [C0051]                                           │
-│ PROCESS                            │ Terminate Process [C0018]                                   │
-└────────────────────────────────────┴─────────────────────────────────────────────────────────────┘
-┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-┃ Capability                                     ┃ Namespace                                       ┃
-┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
-│ copy file                                      │ host-interaction/file-system/copy               │
-│ enumerate files recursively                    │ host-interaction/file-system/files/list         │
-│ read file via mapping (2 matches)              │ host-interaction/file-system/read               │
-│ terminate process (2 matches)                  │ host-interaction/process/terminate              │
-│ resolve function by parsing PE exports         │ load-code/pe                                    │
-└────────────────────────────────────────────────┴─────────────────────────────────────────────────┘
+$ pip install flare-capa
 ```

-## getting started
-
-### requirements
-
- [Ghidra](https://github.com/NationalSecurityAgency/ghidra) >= 12.0 must be installed and available via the `GHIDRA_INSTALL_DIR` environment variable.
-
-#### standalone binary (recommended)
-
-The capa [standalone binary](https://github.com/mandiant/capa/releases) is the preferred way to run capa with the Ghidra backend.
-Although the binary does not bundle the Java environment or Ghidra itself, it will dynamically load them at runtime.
-
-#### python package
-
-You can also use the Ghidra backend with the capa Python package by installing `flare-capa` with the `ghidra` extra.
-
+2. Download and extract the [official capa rules](https://github.com/mandiant/capa-rules/releases) that match the capa version you have installed. You can use the following command to view the version of capa you have installed:
 ```bash
-$ pip install "flare-capa[ghidra]"
+$ pip show flare-capa
+OR
+$ capa --version
 ```

-### usage
+3. Copy [capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_explorer.py) and [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) to your `ghidra_scripts` directory or manually add the parent directory of each script using Ghidra’s Script Manager.

-To use the Ghidra backend, specify it with the `-b` or `--backend` flag:
+## Usage

+You can execute [capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_explorer.py) and [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) using Ghidra’s Script Manager. [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) can also be executed using Ghidra's Headless Analyzer.
+
+### Execution using Ghidra’s Script Manager
+
+You can execute [capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_explorer.py) and [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) using Ghidra's Script Manager as follows:
+1. Navigate to `Window > Script Manager`
+2. Expand the `Python 3 > capa` category
+3. Double-click a script to execute it
+
+Both scripts ask you to provide the path of your capa rules directory (see installation step 2). [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) also has you choose one of `default`, `verbose`, and `vverbose` output formats which mirror the output formats of capa’s standalone tool.
+
+### Execution using Ghidra’s Headless Analyzer
+
+You can execute [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) using Ghidra’s Headless Analyzer by invoking the `analyzeHeadless` script included with Ghidra in its `support` directory. The following arguments must be provided:
+
+| Argument | Description |
+|----|----|
+|`<project_path>`| Path to Ghidra project|
+| `<project_name>`| Name of Ghidra Project|
+| `-Process <sample_name>` OR `-Import <sample_path>`| Name of sample `<sample_name>` already imported into `<project_name>` OR absolute path of sample `<sample_path>` to import into `<project_name>`|
+| `-ScriptPath <script_path>`| OPTIONAL parent directory `<script_path>` of [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py)|
+| `-PostScript capa_ghidra.py`| Execute [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) after Ghidra analysis|
+| `"<script_args>"`| Quoted string `"<script_args>"` containing script arguments passed to [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) that must specify a capa rules path and optionally the output format (`--verbose`, `--vverbose`, `--json`) – you can specify `”help”` to view the script’s help message |
+
+The following is an example of combining these arguments into a single `analyzeHeadless` script command:
 ```bash
-$ capa -b ghidra /path/to/sample
+$ analyzeHeadless /home/wumbo/demo demo -Import /home/wumbo/capa/tests/data/Practical\ Malware\ Analysis\ Lab\ 01-01.dll_ -PostScript capa_ghidra.py "/home/wumbo/capa/rules --verbose"
 ```
-
-capa will:
-1.  Initialize a headless Ghidra instance.
-2.  Create a temporary project.
-3.  Import and analyze the sample.
-4.  Extract features and match rules.
-5.  Clean up the temporary project.
-
-**Note:** The first time you run this, it may take a few moments to initialize the Ghidra environment.
--- a/capa/ghidra/plugin/capa_explorer.py
+++ b/capa/ghidra/plugin/capa_explorer.py
@@ -1,3 +1,7 @@
+# Run capa against loaded Ghidra database and render results in Ghidra UI
+# @author Colton Gabertan (gabertan.colton@gmail.com)
+# @category Python 3.capa
+
 # Copyright 2024 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -12,63 +16,36 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-# Run capa against loaded Ghidra database and render results in Ghidra UI
-
-# @author Colton Gabertan (gabertan.colton@gmail.com)
-# @category capa
-# @runtime PyGhidra
-
+import sys
 import json
 import logging
 import pathlib
 from typing import Any

-from java.util import ArrayList
-from ghidra.util import Msg
 from ghidra.app.cmd.label import AddLabelCmd, CreateNamespacesCmd
-from ghidra.util.exception import CancelledException
-from ghidra.program.flatapi import FlatProgramAPI
 from ghidra.program.model.symbol import Namespace, SourceType, SymbolType

 import capa
 import capa.main
 import capa.rules
-import capa.version
 import capa.render.json
 import capa.ghidra.helpers
 import capa.capabilities.common
-import capa.features.extractors.ghidra.context
 import capa.features.extractors.ghidra.extractor

 logger = logging.getLogger("capa_explorer")


-def show_monitor_message(msg):
-    capa.ghidra.helpers.get_monitor().checkCanceled()
-    capa.ghidra.helpers.get_monitor().setMessage(msg)
-
-
-def show_error(msg):
-    Msg.showError(None, None, "capa explorer", msg)
-
-
-def show_warn(msg):
-    Msg.showWarn(None, None, "capa explorer", msg)
-
-
-def show_info(msg):
-    Msg.showInfo(None, None, "capa explorer", msg)
-
-
 def add_bookmark(addr, txt, category="CapaExplorer"):
    """create bookmark at addr"""
-    capa.ghidra.helpers.get_current_program().getBookmarkManager().setBookmark(addr, "Info", category, txt)
+    currentProgram().getBookmarkManager().setBookmark(addr, "Info", category, txt)  # type: ignore [name-defined] # noqa: F821


 def create_namespace(namespace_str):
    """create new Ghidra namespace for each capa namespace"""
+
    cmd = CreateNamespacesCmd(namespace_str, SourceType.USER_DEFINED)
-    cmd.applyTo(capa.ghidra.helpers.get_current_program())
+    cmd.applyTo(currentProgram())  # type: ignore [name-defined] # noqa: F821
    return cmd.getNamespace()


@@ -76,7 +53,7 @@ def create_label(ghidra_addr, name, capa_namespace):
    """custom label cmd to overlay symbols under capa-generated namespaces"""

    # prevent duplicate labels under the same capa-generated namespace
-    symbol_table = capa.ghidra.helpers.get_current_program().getSymbolTable()
+    symbol_table = currentProgram().getSymbolTable()  # type: ignore [name-defined] # noqa: F821
    for sym in symbol_table.getSymbols(ghidra_addr):
        if sym.getName(True) == capa_namespace.getName(True) + Namespace.DELIMITER + name:
            return
@@ -84,7 +61,7 @@ def create_label(ghidra_addr, name, capa_namespace):
    # create SymbolType.LABEL at addr
    # prioritize capa-generated namespace (duplicate match @ new addr), else put under global Ghidra one (new match)
    cmd = AddLabelCmd(ghidra_addr, name, True, SourceType.USER_DEFINED)
-    cmd.applyTo(capa.ghidra.helpers.get_current_program())
+    cmd.applyTo(currentProgram())  # type: ignore [name-defined] # noqa: F821

    # assign new match overlay label to capa-generated namespace
    cmd.getSymbol().setNamespace(capa_namespace)
@@ -115,8 +92,8 @@ class CapaMatchData:
            return

        for key in self.matches.keys():
-            addr = capa.ghidra.helpers.get_flat_api().toAddr(hex(key))
-            func = capa.ghidra.helpers.get_flat_api().getFunctionContaining(addr)
+            addr = toAddr(hex(key))  # type: ignore [name-defined] # noqa: F821
+            func = getFunctionContaining(addr)  # type: ignore [name-defined] # noqa: F821

            # bookmark & tag MITRE ATT&CK tactics & MBC @ function scope
            if func is not None:
@@ -140,160 +117,140 @@ class CapaMatchData:

    def set_plate_comment(self, ghidra_addr):
        """set plate comments at matched functions"""
-        comment = capa.ghidra.helpers.get_flat_api().getPlateComment(ghidra_addr)
+        comment = getPlateComment(ghidra_addr)  # type: ignore [name-defined] # noqa: F821
        rule_path = self.namespace.replace(Namespace.DELIMITER, "/")
        # 2 calls to avoid duplicate comments via subsequent script runs
        if comment is None:
            # first comment @ function
            comment = rule_path + "\n"
-            capa.ghidra.helpers.get_flat_api().setPlateComment(ghidra_addr, comment)
+            setPlateComment(ghidra_addr, comment)  # type: ignore [name-defined] # noqa: F821
        elif rule_path not in comment:
            comment = comment + rule_path + "\n"
-            capa.ghidra.helpers.get_flat_api().setPlateComment(ghidra_addr, comment)
+            setPlateComment(ghidra_addr, comment)  # type: ignore [name-defined] # noqa: F821
        else:
            return

    def set_pre_comment(self, ghidra_addr, sub_type, description):
        """set pre comments at subscoped matches of main rules"""
-        comment = capa.ghidra.helpers.get_flat_api().getPreComment(ghidra_addr)
+        comment = getPreComment(ghidra_addr)  # type: ignore [name-defined] # noqa: F821
        if comment is None:
            comment = "capa: " + sub_type + "(" + description + ")" + ' matched in "' + self.capability + '"\n'
-            capa.ghidra.helpers.get_flat_api().setPreComment(ghidra_addr, comment)
+            setPreComment(ghidra_addr, comment)  # type: ignore [name-defined] # noqa: F821
        elif self.capability not in comment:
            comment = (
                comment + "capa: " + sub_type + "(" + description + ")" + ' matched in "' + self.capability + '"\n'
            )
-            capa.ghidra.helpers.get_flat_api().setPreComment(ghidra_addr, comment)
+            setPreComment(ghidra_addr, comment)  # type: ignore [name-defined] # noqa: F821
        else:
            return

-    def label_matches(self, do_namespaces, do_comments):
+    def label_matches(self):
        """label findings at function scopes and comment on subscope matches"""
-        capa_namespace = None
-        if do_namespaces:
-            capa_namespace = create_namespace(self.namespace)
-
-        symbol_table = capa.ghidra.helpers.get_current_program().getSymbolTable()
+        capa_namespace = create_namespace(self.namespace)
+        symbol_table = currentProgram().getSymbolTable()  # type: ignore [name-defined] # noqa: F821

        # handle function main scope of matched rule
        # these will typically contain further matches within
        if self.scope == "function":
            for addr in self.matches.keys():
-                ghidra_addr = capa.ghidra.helpers.get_flat_api().toAddr(hex(addr))
+                ghidra_addr = toAddr(hex(addr))  # type: ignore [name-defined] # noqa: F821

                # classify new function label under capa-generated namespace
-                if do_namespaces:
-                    sym = symbol_table.getPrimarySymbol(ghidra_addr)
-                    if sym is not None:
-                        if sym.getSymbolType() == SymbolType.FUNCTION:
-                            create_label(ghidra_addr, sym.getName(), capa_namespace)
+                sym = symbol_table.getPrimarySymbol(ghidra_addr)
+                if sym is not None:
+                    if sym.getSymbolType() == SymbolType.FUNCTION:
+                        create_label(ghidra_addr, sym.getName(), capa_namespace)
+                        self.set_plate_comment(ghidra_addr)

-                if do_comments:
-                    self.set_plate_comment(ghidra_addr)
+                    # parse the corresponding nodes, and pre-comment subscope matched features
+                    # under the encompassing function(s)
+                    for sub_match in self.matches.get(addr):
+                        for loc, node in sub_match.items():
+                            sub_ghidra_addr = toAddr(hex(loc))  # type: ignore [name-defined] # noqa: F821
+                            if sub_ghidra_addr == ghidra_addr:
+                                # skip duplicates
+                                continue

-                # parse the corresponding nodes, and pre-comment subscope matched features
-                # under the encompassing function(s)
-                for sub_match in self.matches.get(addr):
-                    for loc, node in sub_match.items():
-                        sub_ghidra_addr = capa.ghidra.helpers.get_flat_api().toAddr(hex(loc))
-                        if sub_ghidra_addr == ghidra_addr:
-                            # skip duplicates
-                            continue
-
-                        # precomment subscope matches under the function
-                        if node != {} and do_comments:
-                            for sub_type, description in parse_node(node):
-                                self.set_pre_comment(sub_ghidra_addr, sub_type, description)
+                            # precomment subscope matches under the function
+                            if node != {}:
+                                for sub_type, description in parse_node(node):
+                                    self.set_pre_comment(sub_ghidra_addr, sub_type, description)
        else:
            # resolve the encompassing function for the capa namespace
            # of non-function scoped main matches
            for addr in self.matches.keys():
-                ghidra_addr = capa.ghidra.helpers.get_flat_api().toAddr(hex(addr))
+                ghidra_addr = toAddr(hex(addr))  # type: ignore [name-defined] # noqa: F821

                # basic block / insn scoped main matches
                # Ex. See "Create Process on Windows" Rule
-                func = capa.ghidra.helpers.get_flat_api().getFunctionContaining(ghidra_addr)
+                func = getFunctionContaining(ghidra_addr)  # type: ignore [name-defined] # noqa: F821
                if func is not None:
                    func_addr = func.getEntryPoint()
-                    if do_namespaces:
-                        create_label(func_addr, func.getName(), capa_namespace)
-                    if do_comments:
-                        self.set_plate_comment(func_addr)
+                    create_label(func_addr, func.getName(), capa_namespace)
+                    self.set_plate_comment(func_addr)

                # create subscope match precomments
                for sub_match in self.matches.get(addr):
                    for loc, node in sub_match.items():
-                        sub_ghidra_addr = capa.ghidra.helpers.get_flat_api().toAddr(hex(loc))
+                        sub_ghidra_addr = toAddr(hex(loc))  # type: ignore [name-defined] # noqa: F821

                        if node != {}:
                            if func is not None:
                                # basic block/ insn scope under resolved function
-                                if do_comments:
-                                    for sub_type, description in parse_node(node):
-                                        self.set_pre_comment(sub_ghidra_addr, sub_type, description)
+                                for sub_type, description in parse_node(node):
+                                    self.set_pre_comment(sub_ghidra_addr, sub_type, description)
                            else:
                                # this would be a global/file scoped main match
                                # try to resolve the encompassing function via the subscope match, instead
                                # Ex. "run as service" rule
-                                sub_func = capa.ghidra.helpers.get_flat_api().getFunctionContaining(sub_ghidra_addr)
+                                sub_func = getFunctionContaining(sub_ghidra_addr)  # type: ignore [name-defined] # noqa: F821
                                if sub_func is not None:
                                    sub_func_addr = sub_func.getEntryPoint()
                                    # place function in capa namespace & create the subscope match label in Ghidra's global namespace
-                                    if do_namespaces:
-                                        create_label(sub_func_addr, sub_func.getName(), capa_namespace)
-                                    if do_comments:
-                                        self.set_plate_comment(sub_func_addr)
-
-                                    if do_comments:
-                                        for sub_type, description in parse_node(node):
-                                            self.set_pre_comment(sub_ghidra_addr, sub_type, description)
+                                    create_label(sub_func_addr, sub_func.getName(), capa_namespace)
+                                    self.set_plate_comment(sub_func_addr)
+                                    for sub_type, description in parse_node(node):
+                                        self.set_pre_comment(sub_ghidra_addr, sub_type, description)
                                else:
                                    # addr is in some other file section like .data
                                    # represent this location with a label symbol under the capa namespace
                                    # Ex. See "Reference Base64 String" rule
-                                    if do_namespaces:
-                                        for _sub_type, _description in parse_node(node):
-                                            # in many cases, these will be ghidra-labeled data, so just add the existing
-                                            # label symbol to the capa namespace
-                                            for sym in symbol_table.getSymbols(sub_ghidra_addr):
-                                                if sym.getSymbolType() == SymbolType.LABEL:
-                                                    sym.setNamespace(capa_namespace)
-                                    if do_comments:
-                                        for sub_type, description in parse_node(node):
-                                            self.set_pre_comment(sub_ghidra_addr, sub_type, description)
+                                    for sub_type, description in parse_node(node):
+                                        # in many cases, these will be ghidra-labeled data, so just add the existing
+                                        # label symbol to the capa namespace
+                                        for sym in symbol_table.getSymbols(sub_ghidra_addr):
+                                            if sym.getSymbolType() == SymbolType.LABEL:
+                                                sym.setNamespace(capa_namespace)
+                                        self.set_pre_comment(sub_ghidra_addr, sub_type, description)


 def get_capabilities():
-    rules_dir = ""
-
-    show_monitor_message(f"requesting capa {capa.version.__version__} rules directory")
-    selected_dir = askDirectory(f"choose capa {capa.version.__version__} rules directory", "Ok")  # type: ignore [name-defined] # noqa: F821
-
-    if selected_dir:
-        rules_dir = selected_dir.getPath()
+    rules_dir: str = ""
+    try:
+        selected_dir = askDirectory("Choose capa rules directory", "Ok")  # type: ignore [name-defined] # noqa: F821
+        if selected_dir:
+            rules_dir = selected_dir.getPath()
+    except RuntimeError:
+        # RuntimeError thrown when user selects "Cancel"
+        pass

    if not rules_dir:
-        raise CancelledException
+        logger.info("You must choose a capa rules directory before running capa.")
+        return ""  # return empty str to avoid handling both int and str types

    rules_path: pathlib.Path = pathlib.Path(rules_dir)
+    logger.info("running capa using rules from %s", str(rules_path))

-    show_monitor_message(f"loading rules from {rules_path}")
    rules = capa.rules.get_rules([rules_path])
-
-    show_monitor_message("collecting binary metadata")
    meta = capa.ghidra.helpers.collect_metadata([rules_path])
-
-    show_monitor_message("running capa analysis")
    extractor = capa.features.extractors.ghidra.extractor.GhidraFeatureExtractor()
+
    capabilities = capa.capabilities.common.find_capabilities(rules, extractor, True)

-    show_monitor_message("checking for static limitations")
    if capa.capabilities.common.has_static_limitation(rules, capabilities, is_standalone=False):
-        show_warn(
-            "capa explorer encountered warnings during analysis. Please check the console output for more information.",
-        )
+        popup("capa explorer encountered warnings during analysis. Please check the console output for more information.")  # type: ignore [name-defined] # noqa: F821
+        logger.info("capa encountered warnings during analysis")

-    show_monitor_message("rendering results")
    return capa.render.json.render(meta, rules, capabilities.matches)


@@ -371,12 +328,12 @@ def parse_json(capa_data):
            # this requires the correct delimiter used by Ghidra
            # Ex. 'communication/named-pipe/create/create pipe' -> capa::communication::named-pipe::create::create-pipe
            namespace_str = Namespace.DELIMITER.join(meta["namespace"].split("/"))
-            namespace = "capa_explorer" + Namespace.DELIMITER + namespace_str + fmt_rule
+            namespace = "capa" + Namespace.DELIMITER + namespace_str + fmt_rule
        else:
            # lib rules via the official rules repo will not contain data
            # for the "namespaces" key, so format using rule itself
            # Ex. 'contain loop' -> capa::lib::contain-loop
-            namespace = "capa_explorer" + Namespace.DELIMITER + "lib" + fmt_rule
+            namespace = "capa" + Namespace.DELIMITER + "lib" + fmt_rule

        yield CapaMatchData(namespace, scope, rule, rule_matches, attack, mbc)

@@ -385,79 +342,44 @@ def main():
    logging.basicConfig(level=logging.INFO)
    logging.getLogger().setLevel(logging.INFO)

-    choices = ["namespaces", "bookmarks", "comments"]
-    # use ArrayList to resolve ambiguous askChoices overloads (List vs List, List) in PyGhidra
-    choices_java = ArrayList()
-    for c in choices:
-        choices_java.add(c)
+    if isRunningHeadless():  # type: ignore [name-defined] # noqa: F821
+        logger.error("unsupported Ghidra execution mode")
+        return capa.main.E_UNSUPPORTED_GHIDRA_EXECUTION_MODE

-    choice_labels = [
-        'add "capa_explorer" namespace for matched functions',
-        "add bookmarks for matched functions",
-        "add comments to matched functions",
-    ]
-    # use ArrayList to resolve ambiguous askChoices overloads (List vs List, List) in PyGhidra
-    choice_labels_java = ArrayList()
-    for c in choice_labels:
-        choice_labels_java.add(c)
-
-    selected = list(askChoices("capa explorer", "select actions:", choices_java, choice_labels_java))  # type: ignore [name-defined] # noqa: F821
-
-    do_namespaces = "namespaces" in selected
-    do_comments = "comments" in selected
-    do_bookmarks = "bookmarks" in selected
-
-    if not any((do_namespaces, do_comments, do_bookmarks)):
-        raise CancelledException("no actions selected")
-
-    # initialize the context for the extractor/helpers
-    capa.features.extractors.ghidra.context.set_context(
-        currentProgram,  # type: ignore [name-defined] # noqa: F821
-        FlatProgramAPI(currentProgram),  # type: ignore [name-defined] # noqa: F821
-        monitor,  # type: ignore [name-defined] # noqa: F821
-    )
-
-    show_monitor_message("checking supported Ghidra version")
    if not capa.ghidra.helpers.is_supported_ghidra_version():
-        show_error("unsupported Ghidra version")
+        logger.error("unsupported Ghidra version")
        return capa.main.E_UNSUPPORTED_GHIDRA_VERSION

-    show_monitor_message("checking supported file type")
    if not capa.ghidra.helpers.is_supported_file_type():
-        show_error("unsupported file type")
+        logger.error("unsupported file type")
        return capa.main.E_INVALID_FILE_TYPE

-    show_monitor_message("checking supported file architecture")
    if not capa.ghidra.helpers.is_supported_arch_type():
-        show_error("unsupported file architecture")
+        logger.error("unsupported file architecture")
        return capa.main.E_INVALID_FILE_ARCH

    # capa_data will always contain {'meta':..., 'rules':...}
    # if the 'rules' key contains no values, then there were no matches
    capa_data = json.loads(get_capabilities())
    if capa_data.get("rules") is None:
-        show_info("capa explorer found no matches.")
+        logger.info("capa explorer found no matches")
+        popup("capa explorer found no matches.")  # type: ignore [name-defined] # noqa: F821
        return capa.main.E_EMPTY_REPORT

-    show_monitor_message("processing matches")
    for item in parse_json(capa_data):
-        if do_bookmarks:
-            show_monitor_message("adding bookmarks")
-            item.bookmark_functions()
-        if do_namespaces or do_comments:
-            show_monitor_message("adding labels")
-            item.label_matches(do_namespaces, do_comments)
-
-    show_info("capa explorer analysis complete.")
-
+        item.bookmark_functions()
+        item.label_matches()
+    logger.info("capa explorer analysis complete")
+    popup("capa explorer analysis complete.\nPlease see results in the Bookmarks Window and Namespaces section of the Symbol Tree Window.")  # type: ignore [name-defined] # noqa: F821
    return 0


 if __name__ == "__main__":
-    try:
-        if main() != 0:
-            show_error(
-                "capa explorer encountered errors during analysis. Please check the console output for more information.",
-            )
-    except CancelledException:
-        show_info("capa explorer analysis cancelled.")
+    if sys.version_info < (3, 10):
+        from capa.exceptions import UnsupportedRuntimeError
+
+        raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.10+")
+    exit_code = main()
+    if exit_code != 0:
+        popup("capa explorer encountered errors during analysis. Please check the console output for more information.")  # type: ignore [name-defined] # noqa: F821
+    sys.exit(exit_code)
--- a/capa/ghidra/capa_ghidra.py
+++ b/capa/ghidra/capa_ghidra.py
@@ -0,0 +1,174 @@
+# Run capa against loaded Ghidra database and render results in Ghidra Console window
+# @author Mike Hunhoff (mehunhoff@google.com)
+# @category Python 3.capa
+
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import logging
+import pathlib
+import argparse
+
+import capa
+import capa.main
+import capa.rules
+import capa.ghidra.helpers
+import capa.render.default
+import capa.capabilities.common
+import capa.features.extractors.ghidra.extractor
+
+logger = logging.getLogger("capa_ghidra")
+
+
+def run_headless():
+    parser = argparse.ArgumentParser(description="The FLARE team's open-source tool to integrate capa with Ghidra.")
+
+    parser.add_argument(
+        "rules",
+        type=str,
+        help="path to rule file or directory",
+    )
+    parser.add_argument(
+        "-v", "--verbose", action="store_true", help="enable verbose result document (no effect with --json)"
+    )
+    parser.add_argument(
+        "-vv", "--vverbose", action="store_true", help="enable very verbose result document (no effect with --json)"
+    )
+    parser.add_argument("-d", "--debug", action="store_true", help="enable debugging output on STDERR")
+    parser.add_argument("-q", "--quiet", action="store_true", help="disable all output but errors")
+    parser.add_argument("-j", "--json", action="store_true", help="emit JSON instead of text")
+
+    script_args = list(getScriptArgs())  # type: ignore [name-defined] # noqa: F821
+    if not script_args or len(script_args) > 1:
+        script_args = []
+    else:
+        script_args = script_args[0].split()
+        for idx, arg in enumerate(script_args):
+            if arg.lower() == "help":
+                script_args[idx] = "--help"
+
+    args = parser.parse_args(args=script_args)
+
+    if args.quiet:
+        logging.basicConfig(level=logging.WARNING)
+        logging.getLogger().setLevel(logging.WARNING)
+    elif args.debug:
+        logging.basicConfig(level=logging.DEBUG)
+        logging.getLogger().setLevel(logging.DEBUG)
+    else:
+        logging.basicConfig(level=logging.INFO)
+        logging.getLogger().setLevel(logging.INFO)
+
+    logger.debug("running in Ghidra headless mode")
+
+    rules_path = pathlib.Path(args.rules)
+
+    logger.debug("rule path: %s", rules_path)
+    rules = capa.rules.get_rules([rules_path])
+
+    meta = capa.ghidra.helpers.collect_metadata([rules_path])
+    extractor = capa.features.extractors.ghidra.extractor.GhidraFeatureExtractor()
+
+    capabilities = capa.capabilities.common.find_capabilities(rules, extractor, False)
+
+    meta.analysis.feature_counts = capabilities.feature_counts
+    meta.analysis.library_functions = capabilities.library_functions
+    meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities.matches)
+
+    if capa.capabilities.common.has_static_limitation(rules, capabilities, is_standalone=True):
+        logger.info("capa encountered warnings during analysis")
+
+    if args.json:
+        print(capa.render.json.render(meta, rules, capabilities.matches))  # noqa: T201
+    elif args.vverbose:
+        print(capa.render.vverbose.render(meta, rules, capabilities.matches))  # noqa: T201
+    elif args.verbose:
+        print(capa.render.verbose.render(meta, rules, capabilities.matches))  # noqa: T201
+    else:
+        print(capa.render.default.render(meta, rules, capabilities.matches))  # noqa: T201
+
+    return 0
+
+
+def run_ui():
+    logging.basicConfig(level=logging.INFO)
+    logging.getLogger().setLevel(logging.INFO)
+
+    rules_dir: str = ""
+    try:
+        selected_dir = askDirectory("Choose capa rules directory", "Ok")  # type: ignore [name-defined] # noqa: F821
+        if selected_dir:
+            rules_dir = selected_dir.getPath()
+    except RuntimeError:
+        # RuntimeError thrown when user selects "Cancel"
+        pass
+
+    if not rules_dir:
+        logger.info("You must choose a capa rules directory before running capa.")
+        return capa.main.E_MISSING_RULES
+
+    verbose = askChoice(  # type: ignore [name-defined] # noqa: F821
+        "capa output verbosity", "Choose capa output verbosity", ["default", "verbose", "vverbose"], "default"
+    )
+
+    rules_path: pathlib.Path = pathlib.Path(rules_dir)
+    logger.info("running capa using rules from %s", str(rules_path))
+
+    rules = capa.rules.get_rules([rules_path])
+
+    meta = capa.ghidra.helpers.collect_metadata([rules_path])
+    extractor = capa.features.extractors.ghidra.extractor.GhidraFeatureExtractor()
+
+    capabilities = capa.capabilities.common.find_capabilities(rules, extractor, True)
+
+    meta.analysis.feature_counts = capabilities.feature_counts
+    meta.analysis.library_functions = capabilities.library_functions
+    meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities.matches)
+
+    if capa.capabilities.common.has_static_limitation(rules, capabilities, is_standalone=False):
+        logger.info("capa encountered warnings during analysis")
+
+    if verbose == "vverbose":
+        print(capa.render.vverbose.render(meta, rules, capabilities.matches))  # noqa: T201
+    elif verbose == "verbose":
+        print(capa.render.verbose.render(meta, rules, capabilities.matches))  # noqa: T201
+    else:
+        print(capa.render.default.render(meta, rules, capabilities.matches))  # noqa: T201
+
+    return 0
+
+
+def main():
+    if not capa.ghidra.helpers.is_supported_ghidra_version():
+        return capa.main.E_UNSUPPORTED_GHIDRA_VERSION
+
+    if not capa.ghidra.helpers.is_supported_file_type():
+        return capa.main.E_INVALID_FILE_TYPE
+
+    if not capa.ghidra.helpers.is_supported_arch_type():
+        return capa.main.E_INVALID_FILE_ARCH
+
+    if isRunningHeadless():  # type: ignore [name-defined] # noqa: F821
+        return run_headless()
+    else:
+        return run_ui()
+
+
+if __name__ == "__main__":
+    if sys.version_info < (3, 10):
+        from capa.exceptions import UnsupportedRuntimeError
+
+        raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.10+")
+    sys.exit(main())
--- a/capa/ghidra/helpers.py
+++ b/capa/ghidra/helpers.py
@@ -22,7 +22,6 @@ import capa.version
 import capa.features.common
 import capa.features.freeze
 import capa.render.result_document as rdoc
-import capa.features.extractors.ghidra.context as ghidra_context
 import capa.features.extractors.ghidra.helpers
 from capa.features.address import AbsoluteVirtualAddress

@@ -32,18 +31,6 @@ logger = logging.getLogger("capa")
 SUPPORTED_FILE_TYPES = ("Executable and Linking Format (ELF)", "Portable Executable (PE)", "Raw Binary")


-def get_current_program():
-    return ghidra_context.get_context().program
-
-
-def get_flat_api():
-    return ghidra_context.get_context().flat_api
-
-
-def get_monitor():
-    return ghidra_context.get_context().monitor
-
-
 class GHIDRAIO:
    """
    An object that acts as a file-like object,
@@ -61,12 +48,7 @@ class GHIDRAIO:
        self.offset = offset

    def read(self, size):
-        logger.debug(
-            "reading 0x%x bytes at 0x%x (ea: 0x%x)",
-            size,
-            self.offset,
-            get_current_program().getImageBase().add(self.offset).getOffset(),
-        )
+        logger.debug("reading 0x%x bytes at 0x%x (ea: 0x%x)", size, self.offset, currentProgram().getImageBase().add(self.offset).getOffset())  # type: ignore [name-defined] # noqa: F821

        if size > len(self.bytes_) - self.offset:
            logger.debug("cannot read 0x%x bytes at 0x%x (ea: BADADDR)", size, self.offset)
@@ -78,7 +60,7 @@ class GHIDRAIO:
        return

    def get_bytes(self):
-        file_bytes = get_current_program().getMemory().getAllFileBytes()[0]
+        file_bytes = currentProgram().getMemory().getAllFileBytes()[0]  # type: ignore [name-defined] # noqa: F821

        # getOriginalByte() allows for raw file parsing on the Ghidra side
        # other functions will fail as Ghidra will think that it's reading uninitialized memory
@@ -88,32 +70,21 @@ class GHIDRAIO:


 def is_supported_ghidra_version():
-    import ghidra.framework
-
-    version = ghidra.framework.Application.getApplicationVersion()
-    try:
-        # version format example: "11.1.2" or "11.4"
-        major, minor = map(int, version.split(".")[:2])
-        if major < 12:
-            logger.error("-" * 80)
-            logger.error(" Ghidra version %s is not supported.", version)
-            logger.error(" ")
-            logger.error(" capa requires Ghidra 12.0 or higher.")
-            logger.error("-" * 80)
-            return False
-    except ValueError:
-        logger.warning("could not parse Ghidra version: %s", version)
+    version = float(getGhidraVersion()[:4])  # type: ignore [name-defined] # noqa: F821
+    if version < 10.2:
+        warning_msg = "capa does not support this Ghidra version"
+        logger.warning(warning_msg)
+        logger.warning("Your Ghidra version is: %s. Supported versions are: Ghidra >= 10.2", version)
        return False
-
    return True


 def is_running_headless():
-    return True  # PyGhidra is always headless in this context
+    return isRunningHeadless()  # type: ignore [name-defined] # noqa: F821


 def is_supported_file_type():
-    file_info = get_current_program().getExecutableFormat()
+    file_info = currentProgram().getExecutableFormat()  # type: ignore [name-defined] # noqa: F821
    if file_info not in SUPPORTED_FILE_TYPES:
        logger.error("-" * 80)
        logger.error(" Input file does not appear to be a supported file type.")
@@ -128,7 +99,7 @@ def is_supported_file_type():


 def is_supported_arch_type():
-    lang_id = str(get_current_program().getLanguageID()).lower()
+    lang_id = str(currentProgram().getLanguageID()).lower()  # type: ignore [name-defined] # noqa: F821

    if not all((lang_id.startswith("x86"), any(arch in lang_id for arch in ("32", "64")))):
        logger.error("-" * 80)
@@ -141,18 +112,18 @@ def is_supported_arch_type():


 def get_file_md5():
-    return get_current_program().getExecutableMD5()
+    return currentProgram().getExecutableMD5()  # type: ignore [name-defined] # noqa: F821


 def get_file_sha256():
-    return get_current_program().getExecutableSHA256()
+    return currentProgram().getExecutableSHA256()  # type: ignore [name-defined] # noqa: F821


 def collect_metadata(rules: list[Path]):
    md5 = get_file_md5()
    sha256 = get_file_sha256()

-    info = get_current_program().getLanguageID().toString()
+    info = currentProgram().getLanguageID().toString()  # type: ignore [name-defined] # noqa: F821
    if "x86" in info and "64" in info:
        arch = "x86_64"
    elif "x86" in info and "32" in info:
@@ -160,11 +131,11 @@ def collect_metadata(rules: list[Path]):
    else:
        arch = "unknown arch"

-    format_name: str = get_current_program().getExecutableFormat()
+    format_name: str = currentProgram().getExecutableFormat()  # type: ignore [name-defined] # noqa: F821
    if "PE" in format_name:
        os = "windows"
    elif "ELF" in format_name:
-        with contextlib.closing(GHIDRAIO()) as f:
+        with contextlib.closing(capa.ghidra.helpers.GHIDRAIO()) as f:
            os = capa.features.extractors.elf.detect_elf_os(f)
    else:
        os = "unknown os"
@@ -177,18 +148,16 @@ def collect_metadata(rules: list[Path]):
            md5=md5,
            sha1="",
            sha256=sha256,
-            path=get_current_program().getExecutablePath(),
+            path=currentProgram().getExecutablePath(),  # type: ignore [name-defined] # noqa: F821
        ),
        flavor=rdoc.Flavor.STATIC,
        analysis=rdoc.StaticAnalysis(
-            format=get_current_program().getExecutableFormat(),
+            format=currentProgram().getExecutableFormat(),  # type: ignore [name-defined] # noqa: F821
            arch=arch,
            os=os,
            extractor="ghidra",
            rules=tuple(r.resolve().absolute().as_posix() for r in rules),
-            base_address=capa.features.freeze.Address.from_capa(
-                AbsoluteVirtualAddress(get_current_program().getImageBase().getOffset())
-            ),
+            base_address=capa.features.freeze.Address.from_capa(AbsoluteVirtualAddress(currentProgram().getImageBase().getOffset())),  # type: ignore [name-defined] # noqa: F821
            layout=rdoc.StaticLayout(
                functions=(),
            ),
--- a/capa/ghidra/plugin/README.md
+++ b/capa/ghidra/plugin/README.md
@@ -1,54 +0,0 @@
-<div align="center">
-    <img src="https://github.com/mandiant/capa/blob/master/doc/img/ghidra_backend_logo.png" width=240 height=125>
-</div>
-
-# capa explorer for Ghidra
-
-capa explorer for Ghidra brings capa’s detection capabilities directly to Ghidra’s user interface helping speed up your reverse engineering tasks by identifying what parts of a program suggest interesting behavior, such as setting a registry value. You can execute (via [PyGhidra](https://github.com/NationalSecurityAgency/ghidra/tree/master/Ghidra/Features/PyGhidra)) the script [capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/plugin/capa_explorer.py) using Ghidra’s Script Manager window to run capa’s analysis and view the results in Ghidra.
-
-## ui integration
-[capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_explorer.py) renders capa results in Ghidra's UI to help you quickly navigate them. This includes adding matched functions to Ghidra’s Symbol Tree and Bookmarks windows and adding comments to functions that indicate matched capabilities and features. You can execute this script using Ghidra’s Script Manager window.
-
-### symbol tree window
-Matched functions are added to Ghidra's Symbol Tree window under a custom namespace that maps to the capabilities' [capa namespace](https://github.com/mandiant/capa-rules/blob/master/doc/format.md#rule-namespace).
-<div align="center">
-    <img src="https://github.com/mandiant/capa/assets/66766340/eeae33f4-99d4-42dc-a5e8-4c1b8c661492" width=300>
-</div>
-
-### comments
-
-Comments are added at the beginning of matched functions indicating matched capabilities and inline comments are added to functions indicating matched features. You can view these comments in Ghidra’s Disassembly Listing and Decompile windows.
-<div align="center">
-    <img src="https://github.com/mandiant/capa/assets/66766340/bb2b4170-7fd4-45fc-8c7b-ff8f2e2f101b" width=1000>
-</div>
-
-### bookmarks
-
-Bookmarks are added to functions that matched a capability that is mapped to a MITRE ATT&CK and/or Malware Behavior Catalog (MBC) technique. You can view these bookmarks in Ghidra's Bookmarks window.
-<div align="center">
-    <img src="https://github.com/mandiant/capa/assets/66766340/7f9a66a9-7be7-4223-91c6-4b8fc4651336" width=825>
-</div>
-
-# getting started
-
-## requirements
-
- [Ghidra](https://github.com/NationalSecurityAgency/ghidra) >= 12.0 must be installed.
- [flare-capa](https://pypi.org/project/flare-capa/) >= 10.0 must be installed (virtual environment recommended) with the `ghidra` extra (e.g., `pip install "flare-capa[ghidra]"`).
- [capa rules](https://github.com/mandiant/capa-rules) must be downloaded for the version of capa you are using.
-
-## execution
-
-### 1. run Ghidra with PyGhidra
-You must start Ghidra using the `pyghidraRun` script provided in the support directory of your Ghidra installation to ensure the Python environment is correctly loaded. You should execute `pyghidraRun` from within the Python environment that you used to install capa.
-
-```bash
-<ghidra_install>/support/pyghidraRun
-```
-
-### 2. run capa_explorer.py
-1. Open your Ghidra project and CodeBrowser.
-2. Open the Script Manager.
-3. Add [capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/plugin/capa_explorer.py) to the script directories.
-4. Filter for capa and run the script.
-5. When prompted, select the directory containing the downloaded capa rules.
--- a/capa/helpers.py
+++ b/capa/helpers.py
@@ -96,7 +96,11 @@ def is_runtime_ida():


 def is_runtime_ghidra():
-    return importlib.util.find_spec("ghidra") is not None
+    try:
+        currentProgram  # type: ignore [name-defined] # noqa: F821
+    except NameError:
+        return False
+    return True


 def assert_never(value) -> NoReturn:
@@ -327,9 +331,6 @@ def log_unsupported_os_error():
    logger.error(" ")
    logger.error(" capa currently only analyzes executables for some operating systems")
    logger.error(" (including Windows, Linux, and Android).")
-    logger.error(" ")
-    logger.error(" If you know the target OS, you can specify it explicitly, for example:")
-    logger.error("   capa --os linux <sample>")
    logger.error("-" * 80)


--- a/capa/ida/plugin/init.py
+++ b/capa/ida/plugin/init.py
@@ -17,6 +17,7 @@ import logging
 import idaapi
 import ida_kernwin

+from capa.ida.plugin.form import CapaExplorerForm
 from capa.ida.plugin.icon import ICON

 logger = logging.getLogger(__name__)
@@ -73,9 +74,6 @@ class CapaExplorerPlugin(idaapi.plugin_t):
          arg (int): bitflag. Setting LSB enables automatic analysis upon
          loading. The other bits are currently undefined. See `form.Options`.
        """
-        # delay import to not trigger load of Qt components when not running in idaq, i.e., in idalib
-        from capa.ida.plugin.form import CapaExplorerForm
-
        if not self.form:
            self.form = CapaExplorerForm(self.PLUGIN_NAME, arg)
        else:
--- a/capa/ida/plugin/extractor.py
+++ b/capa/ida/plugin/extractor.py
@@ -14,9 +14,9 @@


 import ida_kernwin
+from PyQt5 import QtCore

 from capa.ida.plugin.error import UserCancelledError
-from capa.ida.plugin.qt_compat import QtCore, Signal
 from capa.features.extractors.ida.extractor import IdaFeatureExtractor
 from capa.features.extractors.base_extractor import FunctionHandle

@@ -24,7 +24,7 @@ from capa.features.extractors.base_extractor import FunctionHandle
 class CapaExplorerProgressIndicator(QtCore.QObject):
    """implement progress signal, used during feature extraction"""

-    progress = Signal(str)
+    progress = QtCore.pyqtSignal(str)

    def update(self, text):
        """emit progress update
--- a/capa/ida/plugin/form.py
+++ b/capa/ida/plugin/form.py
@@ -23,6 +23,7 @@ from pathlib import Path
 import idaapi
 import ida_kernwin
 import ida_settings
+from PyQt5 import QtGui, QtCore, QtWidgets

 import capa.main
 import capa.rules
@@ -50,7 +51,6 @@ from capa.ida.plugin.hooks import CapaExplorerIdaHooks
 from capa.ida.plugin.model import CapaExplorerDataModel
 from capa.ida.plugin.proxy import CapaExplorerRangeProxyModel, CapaExplorerSearchProxyModel
 from capa.ida.plugin.extractor import CapaExplorerFeatureExtractor
-from capa.ida.plugin.qt_compat import QtGui, QtCore, QtWidgets
 from capa.features.extractors.base_extractor import FunctionHandle

 logger = logging.getLogger(__name__)
@@ -1358,7 +1358,7 @@ class CapaExplorerForm(idaapi.PluginForm):

        @param state: checked state
        """
-        if state:
+        if state == QtCore.Qt.Checked:
            self.limit_results_to_function(idaapi.get_func(idaapi.get_screen_ea()))
        else:
            self.range_model_proxy.reset_address_range_filter()
@@ -1367,7 +1367,7 @@ class CapaExplorerForm(idaapi.PluginForm):

    def slot_checkbox_limit_features_by_ea(self, state):
        """ """
-        if state:
+        if state == QtCore.Qt.Checked:
            self.view_rulegen_features.filter_items_by_ea(idaapi.get_screen_ea())
        else:
            self.view_rulegen_features.show_all_items()
--- a/capa/ida/plugin/ida-plugin.json
+++ b/capa/ida/plugin/ida-plugin.json
@@ -1,38 +0,0 @@
-{
-  "IDAMetadataDescriptorVersion": 1,
-  "plugin": {
-    "name": "capa",
-    "entryPoint": "capa_explorer.py",
-    "version": "9.3.1",
-    "idaVersions": ">=7.4",
-    "description": "Identify capabilities in executable files using FLARE's capa framework",
-    "license": "Apache-2.0",
-    "categories": [
-      "malware-analysis",
-      "api-scripting-and-automation",
-      "ui-ux-and-visualization"
-    ],
-    "pythonDependencies": ["flare-capa==9.3.1"],
-    "urls": {
-      "repository": "https://github.com/mandiant/capa"
-    },
-    "authors": [
-      {"name": "Willi Ballenthin", "email": "wballenthin@hex-rays.com"},
-      {"name": "Moritz Raabe", "email": "moritzraabe@google.com"},
-      {"name": "Mike Hunhoff", "email": "mike.hunhoff@gmail.com"},
-      {"name": "Yacine Elhamer", "email": "elhamer.yacine@gmail.com"}
-    ],
-    "keywords": [
-      "capability-detection",
-      "malware-analysis",
-      "behavior-analysis",
-      "reverse-engineering",
-      "att&ck",
-      "rule-engine",
-      "feature-extraction",
-      "yara-like-rules",
-      "static-analysis",
-      "dynamic-analysis"
-    ]
-  }
-}
--- a/capa/ida/plugin/item.py
+++ b/capa/ida/plugin/item.py
@@ -18,10 +18,10 @@ from typing import Iterator, Optional

 import idc
 import idaapi
+from PyQt5 import QtCore

 import capa.ida.helpers
 from capa.features.address import Address, FileOffsetAddress, AbsoluteVirtualAddress
-from capa.ida.plugin.qt_compat import QtCore, qt_get_item_flag_tristate


 def info_to_name(display):
@@ -55,7 +55,7 @@ class CapaExplorerDataItem:
        self.flags = QtCore.Qt.ItemIsEnabled | QtCore.Qt.ItemIsSelectable

        if self._can_check:
-            self.flags = self.flags | QtCore.Qt.ItemIsUserCheckable | qt_get_item_flag_tristate()
+            self.flags = self.flags | QtCore.Qt.ItemIsUserCheckable | QtCore.Qt.ItemIsTristate

        if self.pred:
            self.pred.appendChild(self)
--- a/capa/ida/plugin/model.py
+++ b/capa/ida/plugin/model.py
@@ -18,6 +18,7 @@ from collections import deque

 import idc
 import idaapi
+from PyQt5 import QtGui, QtCore

 import capa.rules
 import capa.ida.helpers
@@ -41,7 +42,6 @@ from capa.ida.plugin.item import (
    CapaExplorerInstructionViewItem,
 )
 from capa.features.address import Address, AbsoluteVirtualAddress
-from capa.ida.plugin.qt_compat import QtGui, QtCore

 # default highlight color used in IDA window
 DEFAULT_HIGHLIGHT = 0xE6C700
@@ -269,7 +269,7 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
                visited.add(child_index)

                for idx in range(self.rowCount(child_index)):
-                    stack.append(self.index(idx, 0, child_index))
+                    stack.append(child_index.child(idx, 0))

    def reset_ida_highlighting(self, item, checked):
        """reset IDA highlight for item
@@ -403,7 +403,7 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
                display += f"{statement.min}"
            elif statement.min == 0:
                display += f"{statement.max} or fewer"
-            elif statement.max == ((1 << 64) - 1):
+            elif statement.max == (1 << 64 - 1):
                display += f"{statement.min} or more"
            else:
                display += f"between {statement.min} and {statement.max}"
--- a/capa/ida/plugin/proxy.py
+++ b/capa/ida/plugin/proxy.py
@@ -12,8 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+from PyQt5 import QtCore
+from PyQt5.QtCore import Qt
+
 from capa.ida.plugin.model import CapaExplorerDataModel
-from capa.ida.plugin.qt_compat import Qt, QtCore


 class CapaExplorerRangeProxyModel(QtCore.QSortFilterProxyModel):
--- a/capa/ida/plugin/qt_compat.py
+++ b/capa/ida/plugin/qt_compat.py
@@ -1,79 +0,0 @@
-# Copyright 2020 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Qt compatibility layer for capa IDA Pro plugin.
-
-Handles PyQt5 (IDA < 9.2) vs PySide6 (IDA >= 9.2) differences.
-This module provides a unified import interface for Qt modules and handles
-API changes between Qt5 and Qt6.
-"""
-
-try:
-    # IDA 9.2+ uses PySide6
-    from PySide6 import QtGui, QtCore, QtWidgets
-    from PySide6.QtGui import QAction
-
-    QT_LIBRARY = "PySide6"
-    Signal = QtCore.Signal
-except ImportError:
-    # Older IDA versions use PyQt5
-    try:
-        from PyQt5 import QtGui, QtCore, QtWidgets
-        from PyQt5.QtWidgets import QAction
-
-        QT_LIBRARY = "PyQt5"
-        Signal = QtCore.pyqtSignal
-    except ImportError:
-        raise ImportError("Neither PySide6 nor PyQt5 is available. Cannot initialize capa IDA plugin.")
-
-Qt = QtCore.Qt
-
-
-def qt_get_item_flag_tristate():
-    """
-    Get the tristate item flag compatible with Qt5 and Qt6.
-
-    Qt5 (PyQt5): Uses Qt.ItemIsTristate
-    Qt6 (PySide6): Qt.ItemIsTristate was removed, uses Qt.ItemIsAutoTristate
-
-    ItemIsAutoTristate automatically manages tristate based on child checkboxes,
-    matching the original ItemIsTristate behavior where parent checkboxes reflect
-    the check state of their children.
-
-    Returns:
-        int: The appropriate flag value for the Qt version
-
-    Raises:
-        AttributeError: If the tristate flag cannot be found in the Qt library
-    """
-    if QT_LIBRARY == "PySide6":
-        # Qt6: ItemIsTristate was removed, replaced with ItemIsAutoTristate
-        # Try different possible locations (API varies slightly across PySide6 versions)
-        if hasattr(Qt, "ItemIsAutoTristate"):
-            return Qt.ItemIsAutoTristate
-        elif hasattr(Qt, "ItemFlag") and hasattr(Qt.ItemFlag, "ItemIsAutoTristate"):
-            return Qt.ItemFlag.ItemIsAutoTristate
-        else:
-            raise AttributeError(
-                "Cannot find ItemIsAutoTristate in PySide6. "
-                + "Your PySide6 version may be incompatible with capa. "
-                + f"Available Qt attributes: {[attr for attr in dir(Qt) if 'Item' in attr]}"
-            )
-    else:
-        # Qt5: Use the original ItemIsTristate flag
-        return Qt.ItemIsTristate
-
-
-__all__ = ["qt_get_item_flag_tristate", "Signal", "QAction", "QtGui", "QtCore", "QtWidgets"]
--- a/capa/ida/plugin/view.py
+++ b/capa/ida/plugin/view.py
@@ -18,6 +18,7 @@ from collections import Counter

 import idc
 import idaapi
+from PyQt5 import QtGui, QtCore, QtWidgets

 import capa.rules
 import capa.engine
@@ -27,7 +28,6 @@ import capa.features.basicblock
 from capa.ida.plugin.item import CapaExplorerFunctionItem
 from capa.features.address import AbsoluteVirtualAddress, _NoAddress
 from capa.ida.plugin.model import CapaExplorerDataModel
-from capa.ida.plugin.qt_compat import QtGui, QtCore, Signal, QAction, QtWidgets

 MAX_SECTION_SIZE = 750

@@ -147,7 +147,7 @@ def calc_item_depth(o):

 def build_action(o, display, data, slot):
    """ """
-    action = QAction(display, o)
+    action = QtWidgets.QAction(display, o)

    action.setData(data)
    action.triggered.connect(lambda checked: slot(action))
@@ -312,7 +312,7 @@ class CapaExplorerRulegenPreview(QtWidgets.QTextEdit):


 class CapaExplorerRulegenEditor(QtWidgets.QTreeWidget):
-    updated = Signal()
+    updated = QtCore.pyqtSignal()

    def __init__(self, preview, parent=None):
        """ """
--- a/capa/loader.py
+++ b/capa/loader.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+import io
 import os
 import logging
 import datetime
@@ -22,13 +23,24 @@ from pathlib import Path
 from rich.console import Console
 from typing_extensions import assert_never

+import capa.perf
 import capa.rules
+import capa.engine
+import capa.helpers
 import capa.version
+import capa.render.json
+import capa.rules.cache
+import capa.render.default
+import capa.render.verbose
 import capa.features.common
 import capa.features.freeze as frz
+import capa.render.vverbose
 import capa.features.extractors
+import capa.render.result_document
 import capa.render.result_document as rdoc
 import capa.features.extractors.common
+import capa.features.extractors.base_extractor
+import capa.features.extractors.cape.extractor
 from capa.rules import RuleSet
 from capa.engine import MatchResults
 from capa.exceptions import UnsupportedOSError, UnsupportedArchError, UnsupportedFormatError
@@ -67,7 +79,6 @@ BACKEND_VMRAY = "vmray"
 BACKEND_FREEZE = "freeze"
 BACKEND_BINEXPORT2 = "binexport2"
 BACKEND_IDA = "ida"
-BACKEND_GHIDRA = "ghidra"


 class CorruptFile(ValueError):
@@ -126,57 +137,6 @@ def get_meta_str(vw):
    return f"{', '.join(meta)}, number of functions: {len(vw.getFunctions())}"


-def _is_probably_corrupt_pe(path: Path) -> bool:
-    """
-    Heuristic check for obviously malformed PE samples that provoke
-    pathological behavior in vivisect (see GH-1989).
-
-    We treat a PE as "probably corrupt" when any section declares an
-    unrealistically large virtual size compared to the file size, e.g.
-    hundreds of megabytes in a tiny file. Such cases lead vivisect to
-    try to map enormous regions and can exhaust CPU/memory.
-    """
-    try:
-        import pefile
-    except Exception:
-        # If pefile is unavailable, fall back to existing behavior.
-        return False
-
-    try:
-        pe = pefile.PE(str(path), fast_load=True)
-    except pefile.PEFormatError:
-        # Not a PE file (or badly formed); let existing checks handle it.
-        return False
-    except Exception:
-        return False
-
-    try:
-        file_size = path.stat().st_size
-    except OSError:
-        return False
-
-    if file_size <= 0:
-        return False
-
-    # Flag sections whose declared virtual size is wildly disproportionate
-    # to the file size (e.g. 900MB section in a ~400KB sample).
-    _VSIZE_FILE_RATIO = 128
-    _MAX_REASONABLE_VSIZE = 512 * 1024 * 1024  # 512 MB
-    max_reasonable = max(file_size * _VSIZE_FILE_RATIO, _MAX_REASONABLE_VSIZE)
-
-    for section in getattr(pe, "sections", []):
-        vsize = getattr(section, "Misc_VirtualSize", 0) or 0
-        if vsize > max_reasonable:
-            logger.debug(
-                "detected unrealistic PE section virtual size: 0x%x (file size: 0x%x), treating as corrupt",
-                vsize,
-                file_size,
-            )
-            return True
-
-    return False
-
-
 def get_workspace(path: Path, input_format: str, sigpaths: list[Path]):
    """
    load the program at the given path into a vivisect workspace using the given format.
@@ -194,18 +154,11 @@ def get_workspace(path: Path, input_format: str, sigpaths: list[Path]):
    """

    # lazy import enables us to not require viv if user wants another backend.
-    import envi.exc
    import viv_utils
    import viv_utils.flirt

    logger.debug("generating vivisect workspace for: %s", path)

-    if input_format in (FORMAT_PE, FORMAT_AUTO) and _is_probably_corrupt_pe(path):
-        raise CorruptFile(
-            "PE file appears to contain unrealistically large sections and is likely corrupt"
-            + " - skipping analysis to avoid excessive resource usage."
-        )
-
    try:
        if input_format == FORMAT_AUTO:
            if not is_supported_format(path):
@@ -222,20 +175,11 @@ def get_workspace(path: Path, input_format: str, sigpaths: list[Path]):
            vw = viv_utils.getShellcodeWorkspaceFromFile(str(path), arch="amd64", analyze=False)
        else:
            raise ValueError("unexpected format: " + input_format)
-    except envi.exc.SegmentationViolation as e:
-        raise CorruptFile(f"Invalid memory access during binary parsing: {e}") from e
    except Exception as e:
        # vivisect raises raw Exception instances, and we don't want
        # to do a subclass check via isinstance.
-        if type(e) is Exception and e.args:
-            error_msg = str(e.args[0])
-
-            if "Couldn't convert rva" in error_msg:
-                raise CorruptFile(error_msg) from e
-            elif "Unsupported Architecture" in error_msg:
-                # Extract architecture number if available
-                arch_info = e.args[1] if len(e.args) > 1 else "unknown"
-                raise CorruptFile(f"Unsupported architecture: {arch_info}") from e
+        if type(e) is Exception and "Couldn't convert rva" in e.args[0]:
+            raise CorruptFile(e.args[0]) from e
        raise

    viv_utils.flirt.register_flirt_signature_analyzers(vw, [str(s) for s in sigpaths])
@@ -394,24 +338,12 @@ def get_extractor(
        import capa.features.extractors.ida.extractor

        logger.debug("idalib: opening database...")
-        idapro.enable_console_messages(False)
-        with console.status("analyzing program...", spinner="dots"):
-            # we set the primary and secondary Lumina servers to 0.0.0.0 to disable Lumina,
-            # which sometimes provides bad names, including overwriting names from debug info.
-            #
-            # use -R to load resources, which can help us embedded PE files.
-            #
-            # return values from open_database:
-            #   0 - Success
-            #   2 - User cancelled or 32-64 bit conversion failed
-            #   4 - Database initialization failed
-            #   -1 - Generic errors (database already open, auto-analysis failed, etc.)
-            #   -2 - User cancelled operation
-            ret = idapro.open_database(
-                str(input_path), run_auto_analysis=True, args="-Olumina:host=0.0.0.0 -Osecondary_lumina:host=0.0.0.0 -R"
-            )
-            if ret != 0:
-                raise RuntimeError("failed to analyze input file")
+        # idalib writes to stdout (ugh), so we have to capture that
+        # so as not to screw up structured output.
+        with capa.helpers.stdout_redirector(io.BytesIO()):
+            with console.status("analyzing program...", spinner="dots"):
+                if idapro.open_database(str(input_path), run_auto_analysis=True):
+                    raise RuntimeError("failed to analyze input file")

            logger.debug("idalib: waiting for analysis...")
            ida_auto.auto_wait()
@@ -419,69 +351,6 @@ def get_extractor(

        return capa.features.extractors.ida.extractor.IdaFeatureExtractor()

-    elif backend == BACKEND_GHIDRA:
-        import pyghidra
-
-        with console.status("analyzing program...", spinner="dots"):
-            if not pyghidra.started():
-                pyghidra.start()
-
-            import capa.ghidra.helpers
-
-            if not capa.ghidra.helpers.is_supported_ghidra_version():
-                raise RuntimeError("unsupported Ghidra version")
-
-            import tempfile
-
-            tmpdir = tempfile.TemporaryDirectory()
-
-            project_cm = pyghidra.open_project(tmpdir.name, "CapaProject", create=True)
-            project = project_cm.__enter__()
-            try:
-                from ghidra.util.task import TaskMonitor
-
-                monitor = TaskMonitor.DUMMY
-
-                # Import file
-                loader = pyghidra.program_loader().project(project).source(str(input_path)).name(input_path.name)
-                with loader.load() as load_results:
-                    load_results.save(monitor)
-
-                # Open program
-                program, consumer = pyghidra.consume_program(project, "/" + input_path.name)
-
-                # Analyze
-                pyghidra.analyze(program, monitor)
-
-                from ghidra.program.flatapi import FlatProgramAPI
-
-                flat_api = FlatProgramAPI(program)
-
-                import capa.features.extractors.ghidra.context as ghidra_context
-
-                ghidra_context.set_context(program, flat_api, monitor)
-
-                # Wrapper to handle cleanup of program (consumer) and project
-                class GhidraContextWrapper:
-                    def __init__(self, project_cm, program, consumer):
-                        self.project_cm = project_cm
-                        self.program = program
-                        self.consumer = consumer
-
-                    def __exit__(self, exc_type, exc_val, exc_tb):
-                        self.program.release(self.consumer)
-                        self.project_cm.__exit__(exc_type, exc_val, exc_tb)
-
-                cm = GhidraContextWrapper(project_cm, program, consumer)
-
-            except Exception:
-                project_cm.__exit__(None, None, None)
-                tmpdir.cleanup()
-                raise
-
-        import capa.features.extractors.ghidra.extractor
-
-        return capa.features.extractors.ghidra.extractor.GhidraFeatureExtractor(ctx_manager=cm, tmpdir=tmpdir)
    else:
        raise ValueError("unexpected backend: " + backend)

--- a/capa/main.py
+++ b/capa/main.py
@@ -55,7 +55,6 @@ from capa.loader import (
    BACKEND_VMRAY,
    BACKEND_DOTNET,
    BACKEND_FREEZE,
-    BACKEND_GHIDRA,
    BACKEND_PEFILE,
    BACKEND_DRAKVUF,
    BACKEND_BINEXPORT2,
@@ -299,7 +298,6 @@ def install_common_args(parser, wanted=None):
            (BACKEND_BINJA, "Binary Ninja"),
            (BACKEND_DOTNET, ".NET"),
            (BACKEND_BINEXPORT2, "BinExport2"),
-            (BACKEND_GHIDRA, "Ghidra"),
            (BACKEND_FREEZE, "capa freeze"),
            (BACKEND_CAPE, "CAPE"),
            (BACKEND_DRAKVUF, "DRAKVUF"),
@@ -394,7 +392,6 @@ class ShouldExitError(Exception):
    """raised when a main-related routine indicates the program should exit."""

    def __init__(self, status_code: int):
-        super().__init__(status_code)
        self.status_code = status_code


@@ -661,9 +658,7 @@ def get_rules_from_cli(args) -> RuleSet:
    raises:
      ShouldExitError: if the program is invoked incorrectly and should exit.
    """
-    enable_cache: bool = getattr(args, "enable_cache", True)
-    # this allows calling functions to easily disable rule caching, e.g., used by the rule linter to avoid
-
+    enable_cache: bool = True
    try:
        if capa.helpers.is_running_standalone() and args.is_default_rules:
            cache_dir = get_default_root() / "cache"
@@ -945,7 +940,8 @@ def main(argv: Optional[list[str]] = None):
        argv = sys.argv[1:]

    desc = "The FLARE team's open-source tool to identify capabilities in executable files."
-    epilog = textwrap.dedent("""
+    epilog = textwrap.dedent(
+        """
        By default, capa uses a default set of embedded rules.
        You can see the rule set here:
          https://github.com/mandiant/capa-rules
@@ -972,7 +968,8 @@ def main(argv: Optional[list[str]] = None):

          filter rules by meta fields, e.g. rule name or namespace
            capa -t "create TCP socket" suspicious.exe
-         """)
+         """
+    )

    parser = argparse.ArgumentParser(
        description=desc, epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter
@@ -1107,26 +1104,14 @@ def ida_main():


 def ghidra_main():
-    from ghidra.program.flatapi import FlatProgramAPI
-
    import capa.rules
    import capa.ghidra.helpers
    import capa.render.default
-    import capa.features.extractors.ghidra.context
    import capa.features.extractors.ghidra.extractor

    logging.basicConfig(level=logging.INFO)
    logging.getLogger().setLevel(logging.INFO)

-    # These are provided by the Ghidra scripting environment
-    # but are not available when running standard python
-    # so we have to ignore the linting errors
-    program = currentProgram  # type: ignore [name-defined] # noqa: F821
-    monitor_ = monitor  # type: ignore [name-defined] # noqa: F821
-    flat_api = FlatProgramAPI(program)
-
-    capa.features.extractors.ghidra.context.set_context(program, flat_api, monitor_)
-
    logger.debug("-" * 80)
    logger.debug(" Using default embedded rules.")
    logger.debug(" ")
--- a/capa/render/proto/init.py
+++ b/capa/render/proto/init.py
@@ -31,7 +31,6 @@ $ protoc.exe --python_out=. --mypy_out=. <path_to_proto> (e.g. capa/render/proto

 Alternatively, --pyi_out=. can be used to generate a Python Interface file that supports development
 """
-
 import datetime
 from typing import Any, Union

--- a/capa/render/utils.py
+++ b/capa/render/utils.py
@@ -17,7 +17,6 @@ import io
 from typing import Union, Iterator, Optional

 import rich.console
-from rich.markup import escape
 from rich.progress import Text

 import capa.render.result_document as rd
@@ -25,21 +24,21 @@ import capa.render.result_document as rd

 def bold(s: str) -> Text:
    """draw attention to the given string"""
-    return Text.from_markup(f"[cyan]{escape(s)}")
+    return Text.from_markup(f"[cyan]{s}")


 def bold2(s: str) -> Text:
    """draw attention to the given string, within a `bold` section"""
-    return Text.from_markup(f"[green]{escape(s)}")
+    return Text.from_markup(f"[green]{s}")


 def mute(s: str) -> Text:
    """draw attention away from the given string"""
-    return Text.from_markup(f"[dim]{escape(s)}")
+    return Text.from_markup(f"[dim]{s}")


 def warn(s: str) -> Text:
-    return Text.from_markup(f"[yellow]{escape(s)}")
+    return Text.from_markup(f"[yellow]{s}")


 def format_parts_id(data: Union[rd.AttackSpec, rd.MBCSpec]):
--- a/capa/render/verbose.py
+++ b/capa/render/verbose.py
@@ -159,8 +159,9 @@ def render_call(layout: rd.DynamicLayout, addr: frz.Address) -> str:
    s.append(f"){rest}")

    newline = "\n"
-    # Use default (non-dim) styling for API details so they remain readable in -vv output
-    return f"{pname}{{pid:{call.thread.process.pid},tid:{call.thread.tid},call:{call.id}}}\n{newline.join(s)}"
+    return (
+        f"{pname}{{pid:{call.thread.process.pid},tid:{call.thread.tid},call:{call.id}}}\n{rutils.mute(newline.join(s))}"
+    )


 def render_short_call(layout: rd.DynamicLayout, addr: frz.Address) -> str:
@@ -179,8 +180,7 @@ def render_short_call(layout: rd.DynamicLayout, addr: frz.Address) -> str:
    s.append(f"){rest}")

    newline = "\n"
-    # Use default (non-dim) styling for API details so they remain readable in -vv output
-    return f"call:{call.id}\n{newline.join(s)}"
+    return f"call:{call.id}\n{rutils.mute(newline.join(s))}"


 def render_static_meta(console: Console, meta: rd.StaticMetadata):
--- a/capa/render/vverbose.py
+++ b/capa/render/vverbose.py
@@ -172,7 +172,7 @@ def render_statement(console: Console, layout: rd.Layout, match: rd.Match, state
            console.write(f"{statement.min}")
        elif statement.min == 0:
            console.write(f"{statement.max} or fewer")
-        elif statement.max == ((1 << 64) - 1):
+        elif statement.max == (1 << 64 - 1):
            console.write(f"{statement.min} or more")
        else:
            console.write(f"between {statement.min} and {statement.max}")
--- a/capa/rules/init.py
+++ b/capa/rules/init.py
@@ -274,8 +274,12 @@ SUPPORTED_FEATURES[Scope.FUNCTION].update(SUPPORTED_FEATURES[Scope.BASIC_BLOCK])


 class InvalidRule(ValueError):
+    def __init__(self, msg):
+        super().__init__()
+        self.msg = msg
+
    def __str__(self):
-        return f"invalid rule: {super().__str__()}"
+        return f"invalid rule: {self.msg}"

    def __repr__(self):
        return str(self)
@@ -285,15 +289,20 @@ class InvalidRuleWithPath(InvalidRule):
    def __init__(self, path, msg):
        super().__init__(msg)
        self.path = path
+        self.msg = msg
        self.__cause__ = None

    def __str__(self):
-        return f"invalid rule: {self.path}: {super(InvalidRule, self).__str__()}"
+        return f"invalid rule: {self.path}: {self.msg}"


 class InvalidRuleSet(ValueError):
+    def __init__(self, msg):
+        super().__init__()
+        self.msg = msg
+
    def __str__(self):
-        return f"invalid rule set: {super().__str__()}"
+        return f"invalid rule set: {self.msg}"

    def __repr__(self):
        return str(self)
@@ -1093,15 +1102,15 @@ class Rule:
    @lru_cache()
    def _get_yaml_loader():
        try:
-            # prefer to use CLoader to be fast, see #306 / CSafeLoader is the same as CLoader but with safe loading
+            # prefer to use CLoader to be fast, see #306
            # on Linux, make sure you install libyaml-dev or similar
            # on Windows, get WHLs from pyyaml.org/pypi
-            logger.debug("using libyaml CSafeLoader.")
-            return yaml.CSafeLoader
+            logger.debug("using libyaml CLoader.")
+            return yaml.CLoader
        except Exception:
-            logger.debug("unable to import libyaml CSafeLoader, falling back to Python yaml parser.")
+            logger.debug("unable to import libyaml CLoader, falling back to Python yaml parser.")
            logger.debug("this will be slower to load rules.")
-            return yaml.SafeLoader
+            return yaml.Loader

    @staticmethod
    def _get_ruamel_yaml_parser():
@@ -1143,8 +1152,6 @@ class Rule:
        else:
            # use pyyaml because it can be much faster than ruamel (pure python)
            doc = yaml.load(s, Loader=cls._get_yaml_loader())
-        if doc is None or not isinstance(doc, dict) or "rule" not in doc:
-            raise InvalidRule("empty or invalid YAML document")
        return cls.from_dict(doc, s)

    @classmethod
@@ -1449,13 +1456,6 @@ class RuleSet:
            scope: self._index_rules_by_feature(scope, self.rules_by_scope[scope], scores_by_rule) for scope in scopes
        }

-        # Pre-compute the topological index mapping for each scope.
-        # This avoids rebuilding the dict on every call to _match (which runs once per
-        # instruction/basic-block/function/file scope, i.e. potentially millions of times).
-        self._rule_index_by_scope: dict[Scope, dict[str, int]] = {
-            scope: {rule.name: i for i, rule in enumerate(self.rules_by_scope[scope])} for scope in scopes
-        }
-
    @property
    def file_rules(self):
        return self.rules_by_scope[Scope.FILE]
@@ -1885,13 +1885,11 @@ class RuleSet:
        """
        done = []

-        # use a list as a stack: append new items and pop() from the end, both O(1).
-        # order doesn't matter here since every rule in the queue is processed eventually.
-        rules_stack = list(rules)
-        while rules_stack:
-            rule = rules_stack.pop()
+        # use a queue of rules, because we'll be modifying the list (appending new items) as we go.
+        while rules:
+            rule = rules.pop(0)
            for subscope_rule in rule.extract_subscope_rules():
-                rules_stack.append(subscope_rule)
+                rules.append(subscope_rule)
            done.append(rule)

        return done
@@ -1940,11 +1938,11 @@ class RuleSet:
        """

        feature_index: RuleSet._RuleFeatureIndex = self._feature_indexes_by_scopes[scope]
+        rules: list[Rule] = self.rules_by_scope[scope]
        # Topologic location of rule given its name.
        # That is, rules with a lower index should be evaluated first, since their dependencies
        # will be evaluated later.
-        # Pre-computed in __init__ to avoid rebuilding on every _match call.
-        rule_index_by_rule_name = self._rule_index_by_scope[scope]
+        rule_index_by_rule_name = {rule.name: i for i, rule in enumerate(rules)}

        # This algorithm is optimized to evaluate as few rules as possible,
        # because the less work we do, the faster capa can run.
@@ -2040,9 +2038,7 @@ class RuleSet:
        candidate_rules = [self.rules[name] for name in candidate_rule_names]

        # Order rules topologically, so that rules with dependencies work correctly.
-        # Sort descending so pop() from the end yields the topologically-first rule in O(1).
        RuleSet._sort_rules_by_index(rule_index_by_rule_name, candidate_rules)
-        candidate_rules.reverse()

        #
        # The following is derived from ceng.match
@@ -2057,7 +2053,7 @@ class RuleSet:
        augmented_features = features

        while candidate_rules:
-            rule = candidate_rules.pop()
+            rule = candidate_rules.pop(0)
            res = rule.evaluate(augmented_features, short_circuit=True)
            if res:
                # we first matched the rule with short circuiting enabled.
@@ -2096,7 +2092,6 @@ class RuleSet:
                        candidate_rule_names.update(new_candidates)
                        candidate_rules.extend([self.rules[rule_name] for rule_name in new_candidates])
                        RuleSet._sort_rules_by_index(rule_index_by_rule_name, candidate_rules)
-                        candidate_rules.reverse()

        return (augmented_features, results)

@@ -2233,10 +2228,7 @@ def get_rules(

        try:
            rule = Rule.from_yaml(content.decode("utf-8"))
-        except InvalidRule as e:
-            if e.args and e.args[0] == "empty or invalid YAML document":
-                logger.warning("skipping %s: %s", path, e)
-                continue
+        except InvalidRule:
            raise
        else:
            rule.meta["capa/path"] = path.as_posix()
--- a/capa/version.py
+++ b/capa/version.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-__version__ = "9.3.1"
+__version__ = "9.2.1"


 def get_major_version():
--- a/doc/img/ghidra_headless_analyzer.png
+++ b/doc/img/ghidra_headless_analyzer.png
--- a/doc/img/ghidra_script_mngr_output.png
+++ b/doc/img/ghidra_script_mngr_output.png
--- a/doc/img/ghidra_script_mngr_rules.png
+++ b/doc/img/ghidra_script_mngr_rules.png
--- a/doc/img/ghidra_script_mngr_verbosity.png
+++ b/doc/img/ghidra_script_mngr_verbosity.png
--- a/doc/release.md
+++ b/doc/release.md
@@ -7,7 +7,6 @@
 - [ ] Review changes
  - capa https://github.com/mandiant/capa/compare/\<last-release\>...master
  - capa-rules https://github.com/mandiant/capa-rules/compare/\<last-release>\...master
- [ ] Run `$ bump-my-version bump {patch/minor/major} [--allow-dirty]` to update [capa/version.py](https://github.com/mandiant/capa/blob/master/capa/version.py) and other version files
 - [ ] Update [CHANGELOG.md](https://github.com/mandiant/capa/blob/master/CHANGELOG.md)
  - Do not forget to add a nice introduction thanking contributors
  - Remember that we need a major release if we introduce breaking changes
@@ -37,6 +36,7 @@
    - [capa <release>...master](https://github.com/mandiant/capa/compare/<release>...master)
    - [capa-rules <release>...master](https://github.com/mandiant/capa-rules/compare/<release>...master)
    ```
+- [ ] Update [capa/version.py](https://github.com/mandiant/capa/blob/master/capa/version.py)
 - [ ] Create a PR with the updated [CHANGELOG.md](https://github.com/mandiant/capa/blob/master/CHANGELOG.md) and [capa/version.py](https://github.com/mandiant/capa/blob/master/capa/version.py). Copy this checklist in the PR description.
 - [ ] Update the [homepage](https://github.com/mandiant/capa/blob/master/web/public/index.html) (i.e. What's New section)
 - [ ] After PR review, merge the PR and [create the release in GH](https://github.com/mandiant/capa/releases/new) using text from the [CHANGELOG.md](https://github.com/mandiant/capa/blob/master/CHANGELOG.md).
--- a/doc/usage.md
+++ b/doc/usage.md
@@ -2,21 +2,6 @@

 See `capa -h` for all supported arguments and usage examples.

-## Ways to consume capa output
-
-| Method | Output / interface | Typical use |
-|--------|--------------------|-------------|
-| **CLI** | Text (default, `-v`, `-vv`), JSON (`-j`), or other formats | Scripting, CI, one-off analysis |
-| [**IDA Pro**](https://github.com/mandiant/capa/tree/master/capa/ida/plugin) | capa Explorer plugin inside IDA | Interactive analysis with jump-to-address |
-| [**Ghidra**](https://github.com/mandiant/capa/tree/master/capa/ghidra/plugin) | capa Explorer plugin inside Ghidra | Interactive analysis with Ghidra integration |
-| [**Binary Ninja**](https://github.com/mandiant/capa/tree/master/capa/features/extractors/binja) | capa run using Binary Ninja as the analysis backend | Interactive analysis with Binary Ninja integration |
-| [**Dynamic (Sandbox)**](https://www.mandiant.com/resources/blog/dynamic-capa-executable-behavior-cape-sandbox) | capa run on dynamic sandbox report (CAPE, VMRay, etc.) | Dynamic analysis of sandbox output |
-| [**Web (capa Explorer)**](https://mandiant.github.io/capa/explorer/) | Web UI (upload JSON or load from URL) | Sharing results, viewing from VirusTotal or similar |
-
-## Default vs verbose output
-
-By default, capa shows only *top-level* rule matches: capabilities that are not already implied by another displayed rule. For example, if a rule "persist via Run registry key" matches and it *contains* a match for "set registry value", the default output lists only "persist via Run registry key". This keeps the default output short while still reflecting all detected capabilities at the top level. Use **`-v`** to see all rule matches, including nested ones. Use **`-vv`** for an even more detailed view that shows how each rule matched.
-
 ## tips and tricks

 ### only run selected rules
@@ -26,7 +11,7 @@ For example, `capa -t william.ballenthin@mandiant.com` runs rules that reference

 ### only analyze selected functions
 Use the `--restrict-to-functions` option to extract capabilities from only a selected set of functions. This is useful for analyzing 
-large functions and figuring out their capabilities and their address of occurrence; for example: PEB access, RC4 encryption, etc.
+large functions and figuring out their capabilities and their address of occurance; for example: PEB access, RC4 encryption, etc.

 To use this, you can copy the virtual addresses from your favorite disassembler and pass them to capa as follows:
 `capa sample.exe --restrict-to-functions 0x4019C0,0x401CD0`. If you add the `-v` option then capa will extract the interesting parts of a function for you.
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -74,8 +74,7 @@ dependencies = [
    # comments and context.
    "pyyaml>=6",
    "colorama>=0.4",
-    "ida-netnode>=3.0",
-    "ida-settings>=3.1.0",
+    "ida-settings>=2",
    "ruamel.yaml>=0.18",
    "pefile>=2023.2.7",
    "pyelftools>=0.31",
@@ -105,17 +104,10 @@ dependencies = [

    "networkx>=3",

-    "dnfile>=0.17.0",
+    "dnfile>=0.15.0",
 ]
 dynamic = ["version"]

-[tool.pytest.ini_options]
-filterwarnings = [
-    "ignore:builtin type SwigPyPacked has no __module__ attribute:DeprecationWarning",
-    "ignore:builtin type SwigPyObject has no __module__ attribute:DeprecationWarning",
-    "ignore:builtin type swigvarlink has no __module__ attribute:DeprecationWarning",
-]
-
 [tool.setuptools.dynamic]
 version = {attr = "capa.version.__version__"}

@@ -129,57 +121,51 @@ dev = [
    # we want all developer environments to be consistent.
    # These dependencies are not used in production environments
    # and should not conflict with other libraries/tooling.
-    "pre-commit==4.5.0",
-    "pytest==9.0.2",
-    "pytest-sugar==1.1.1",
+    "pre-commit==4.2.0",
+    "pytest==8.0.0",
+    "pytest-sugar==1.0.0",
    "pytest-instafail==0.5.0",
    "flake8==7.3.0",
-    "flake8-bugbear==25.11.29",
+    "flake8-bugbear==24.12.12",
    "flake8-encodings==0.5.1",
-    "flake8-comprehensions==3.17.0",
+    "flake8-comprehensions==3.16.0",
    "flake8-logging-format==0.9.0",
    "flake8-no-implicit-concat==0.3.5",
    "flake8-print==5.0.0",
    "flake8-todos==0.3.1",
-    "flake8-simplify==0.30.0",
+    "flake8-simplify==0.22.0",
    "flake8-use-pathlib==0.3.0",
    "flake8-copyright==0.2.4",
-    "ruff==0.15.0",
-    "black==26.3.0",
-    "isort==8.0.0",
-    "mypy==1.19.1",
-    "mypy-protobuf==5.0.0",
-    "PyGithub==2.8.1",
-    "bump-my-version==1.2.4",
+    "ruff==0.12.0",
+    "black==25.1.0",
+    "isort==6.0.0",
+    "mypy==1.17.1",
+    "mypy-protobuf==3.6.0",
+    "PyGithub==2.6.0",
    # type stubs for mypy
    "types-backports==0.1.3",
    "types-colorama==0.4.15.11",
    "types-PyYAML==6.0.8",
-    "types-psutil==7.2.0.20251228",
+    "types-psutil==7.0.0.20250218",
    "types_requests==2.32.0.20240712",
-    "types-protobuf==6.32.1.20250918",
-    "deptry==0.24.0"
+    "types-protobuf==6.30.2.20250516",
+    "deptry==0.23.0"
 ]
 build = [
    # Dev and build dependencies are not relaxed because
    # we want all developer environments to be consistent.
    # These dependencies are not used in production environments
    # and should not conflict with other libraries/tooling.
-    "pyinstaller==6.19.0",
-    "setuptools==80.10.1",
-    "build==1.4.0"
+    "pyinstaller==6.14.1",
+    "setuptools==80.9.0",
+    "build==1.2.2"
 ]
 scripts = [
-    # can (optionally) be more lenient on dependencies here
-    # see comment on dependencies for more context
    "jschema_to_python==1.2.3",
-    "psutil==7.2.1",
+    "psutil==7.0.0",
    "stix2==3.0.1",
    "sarif_om==1.0.4",
-    "requests>=2.32.4",
-]
-ghidra = [
-    "pyghidra>=3.0.0",
+    "requests==2.32.3",
 ]

 [tool.deptry]
@@ -211,8 +197,7 @@ known_first_party = [
    "idc",
    "java",
    "netnode",
-    "PyQt5",
-    "PySide6"
+    "PyQt5"
 ]

 [tool.deptry.per_rule_ignores]
@@ -220,7 +205,6 @@ known_first_party = [
 DEP002 = [
    "black",
    "build",
-    "bump-my-version",
    "deptry",
    "flake8",
    "flake8-bugbear",
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,40 +10,38 @@ annotated-types==0.7.0
 colorama==0.4.6
 cxxfilt==0.3.0
 dncil==1.0.2
-dnfile==0.18.0
+dnfile==0.16.4
 funcy==2.0
-humanize==4.15.0
+humanize==4.12.0
 ida-netnode==3.0
-ida-settings==3.2.2
-intervaltree==3.2.1
-markdown-it-py==4.0.0
+ida-settings==2.1.0
+intervaltree==3.1.0
+markdown-it-py==3.0.0
 mdurl==0.1.2
 msgpack==1.0.8
 networkx==3.4.2
 pefile==2024.8.26
-pip==26.0
-protobuf==7.34.0
+pip==25.1.1
+protobuf==6.31.1
 pyasn1==0.5.1
 pyasn1-modules==0.3.0
-pycparser==3.0
-pydantic==2.12.4
+pycparser==2.22
+pydantic==2.11.4
 # pydantic pins pydantic-core, 
 # but dependabot updates these separately (which is broken) and is annoying,
 # so we rely on pydantic to pull in the right version of pydantic-core.
 # pydantic-core==2.23.4
-xmltodict==1.0.2
+xmltodict==0.14.2
 pyelftools==0.32
 pygments==2.19.1
-pyghidra==3.0.0
 python-flirt==0.9.2
 pyyaml==6.0.2
-rich==14.3.2
-ruamel-yaml==0.19.1
-ruamel-yaml-clib==0.2.14
-setuptools==80.10.1
+rich==14.0.0
+ruamel-yaml==0.18.6
+ruamel-yaml-clib==0.2.8
+setuptools==80.9.0
 six==1.17.0
 sortedcontainers==2.4.0
 viv-utils==0.8.0
-vivisect==1.3.0
-msgspec==0.20.0
-bump-my-version==1.2.4
+vivisect==1.2.1
+msgspec==0.19.0
--- a/2
+++ b/2
--- a/scripts/bulk-process.py
+++ b/scripts/bulk-process.py
@@ -61,7 +61,6 @@ usage:
                            parallelism factor
      --no-mp               disable subprocesses
 """
-
 import sys
 import json
 import logging
--- a/scripts/capa2sarif.py
+++ b/scripts/capa2sarif.py
@@ -28,7 +28,6 @@ Requires:
    - sarif_om 1.0.4
    - jschema_to_python 1.2.3
 """
-
 import sys
 import json
 import logging
--- a/scripts/detect-binexport2-capabilities.py
+++ b/scripts/detect-binexport2-capabilities.py
@@ -32,7 +32,6 @@ Example:
    │00000070│ 39 31 37 36 61 64 36 38 ┊ 32 66 66 64 64 36 35 66 │9176ad68┊2ffdd65f│
    │00000080│ 30 61 36 36 39 12 28 61 ┊ 34 62 33 35 64 65 37 31 │0a669•(a┊4b35de71│
 """
-
 import sys
 import logging
 import argparse
--- a/scripts/detect-elf-os.py
+++ b/scripts/detect-elf-os.py
@@ -18,7 +18,6 @@ detect-elf-os

 Attempt to detect the underlying OS that the given ELF file targets.
 """
-
 import sys
 import logging
 import argparse
--- a/scripts/import-to-bn.py
+++ b/scripts/import-to-bn.py
@@ -36,7 +36,6 @@ Check the log window for any errors, and/or the summary of changes.

 Derived from: https://github.com/mandiant/capa/blob/master/scripts/import-to-ida.py
 """
-
 import os
 import json
 from pathlib import Path
--- a/scripts/lint.py
+++ b/scripts/lint.py
@@ -1229,7 +1229,6 @@ def main(argv=None):

    time0 = time.time()

-    args.enable_cache = False
    try:
        rules = capa.main.get_rules_from_cli(args)
    except capa.main.ShouldExitError as e:
--- a/scripts/match-function-id.py
+++ b/scripts/match-function-id.py
@@ -54,7 +54,6 @@ Example::
    0x44cb60: ?
    0x44cba0: __guard_icall_checks_enforced
 """
-
 import sys
 import logging
 import argparse
--- a/scripts/minimize_vmray_results.py
+++ b/scripts/minimize_vmray_results.py
@@ -16,7 +16,6 @@
 """
 Extract files relevant to capa analysis from VMRay Analysis Archive and create a new ZIP file.
 """
-
 import sys
 import logging
 import zipfile
--- a/scripts/profile-time.py
+++ b/scripts/profile-time.py
@@ -43,7 +43,6 @@ example:

      ^^^ --label or git hash
 """
-
 import sys
 import timeit
 import logging
--- a/scripts/proto-from-results.py
+++ b/scripts/proto-from-results.py
@@ -34,7 +34,6 @@ Example:
    │00000080│ 30 61 36 36 39 12 28 61 ┊ 34 62 33 35 64 65 37 31 │0a669•(a┊4b35de71│

 """
-
 import sys
 import logging
 import argparse
--- a/scripts/proto-to-results.py
+++ b/scripts/proto-to-results.py
@@ -37,7 +37,6 @@ Example:
    ────┴────────────────────────────────────────────────────

 """
-
 import sys
 import logging
 import argparse
--- a/scripts/setup-linter-dependencies.py
+++ b/scripts/setup-linter-dependencies.py
@@ -46,7 +46,6 @@ Example:
    2022-01-24 22:35:39,839 [INFO] Starting extraction...
    2022-01-24 22:35:42,632 [INFO] Writing results to linter-data.json
 """
-
 import json
 import logging
 import argparse
--- a/scripts/show-capabilities-by-function.py
+++ b/scripts/show-capabilities-by-function.py
@@ -54,7 +54,6 @@ Example::
      - connect TCP socket
    ...
 """
-
 import sys
 import logging
 import argparse
--- a/scripts/show-features.py
+++ b/scripts/show-features.py
@@ -70,7 +70,6 @@ Example::
    insn: 0x10001027: mnemonic(shl)
    ...
 """
-
 import sys
 import logging
 import argparse
--- a/tests/data
+++ b/tests/data
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-import logging
+
 import contextlib
 import collections
 from pathlib import Path
@@ -20,7 +20,7 @@ from functools import lru_cache

 import pytest

-import capa.loader
+import capa.main
 import capa.features.file
 import capa.features.insn
 import capa.features.common
@@ -53,7 +53,6 @@ from capa.features.extractors.base_extractor import (
 )
 from capa.features.extractors.dnfile.extractor import DnfileFeatureExtractor

-logger = logging.getLogger(__name__)
 CD = Path(__file__).resolve().parent
 DOTNET_DIR = CD / "data" / "dotnet"
 DNFILE_TESTFILES = DOTNET_DIR / "dnfile-testfiles"
@@ -201,73 +200,6 @@ def get_binja_extractor(path: Path):
    return extractor


-# we can't easily cache this because the extractor relies on global state (the opened database)
-# which also has to be closed elsewhere. so, the idalib tests will just take a little bit to run.
-def get_idalib_extractor(path: Path):
-    import capa.features.extractors.ida.idalib as idalib
-
-    if not idalib.has_idalib():
-        raise RuntimeError("cannot find IDA idalib module.")
-
-    if not idalib.load_idalib():
-        raise RuntimeError("failed to load IDA idalib module.")
-
-    import idapro
-    import ida_auto
-
-    import capa.features.extractors.ida.extractor
-
-    logger.debug("idalib: opening database...")
-
-    idapro.enable_console_messages(False)
-
-    # we set the primary and secondary Lumina servers to 0.0.0.0 to disable Lumina,
-    # which sometimes provides bad names, including overwriting names from debug info.
-    #
-    # use -R to load resources, which can help us embedded PE files.
-    #
-    # return values from open_database:
-    #   0 - Success
-    #   2 - User cancelled or 32-64 bit conversion failed
-    #   4 - Database initialization failed
-    #   -1 - Generic errors (database already open, auto-analysis failed, etc.)
-    #   -2 - User cancelled operation
-    ret = idapro.open_database(
-        str(path), run_auto_analysis=True, args="-Olumina:host=0.0.0.0 -Osecondary_lumina:host=0.0.0.0 -R"
-    )
-    if ret != 0:
-        raise RuntimeError("failed to analyze input file")
-
-    logger.debug("idalib: waiting for analysis...")
-    ida_auto.auto_wait()
-    logger.debug("idalib: opened database.")
-
-    extractor = capa.features.extractors.ida.extractor.IdaFeatureExtractor()
-    fixup_idalib(path, extractor)
-    return extractor
-
-
-def fixup_idalib(path: Path, extractor):
-    """
-    IDA fixups to overcome differences between backends
-    """
-    import idaapi
-    import ida_funcs
-
-    def remove_library_id_flag(fva):
-        f = idaapi.get_func(fva)
-        f.flags &= ~ida_funcs.FUNC_LIB
-        ida_funcs.update_func(f)
-
-    if "kernel32-64" in path.name:
-        # remove (correct) library function id, so we can test x64 thunk
-        remove_library_id_flag(0x1800202B0)
-
-    if "al-khaser_x64" in path.name:
-        # remove (correct) library function id, so we can test x64 nested thunk
-        remove_library_id_flag(0x14004B4F0)
-
-
@lru_cache(maxsize=1)
 def get_cape_extractor(path):
    from capa.helpers import load_json_from_path
@@ -295,33 +227,13 @@ def get_vmray_extractor(path):
    return VMRayExtractor.from_zipfile(path)


-GHIDRA_CACHE: dict[Path, tuple] = {}
-
-
+@lru_cache(maxsize=1)
 def get_ghidra_extractor(path: Path):
-    # we need to start PyGhidra before importing the extractor
-    # because the extractor imports Ghidra modules that are only available after PyGhidra is started
-    import pyghidra
-
-    if not pyghidra.started():
-        pyghidra.start()
-
-    import capa.features.extractors.ghidra.context
    import capa.features.extractors.ghidra.extractor

-    if path in GHIDRA_CACHE:
-        extractor, program, flat_api, monitor = GHIDRA_CACHE[path]
-        capa.features.extractors.ghidra.context.set_context(program, flat_api, monitor)
-        return extractor
+    extractor = capa.features.extractors.ghidra.extractor.GhidraFeatureExtractor()
+    setattr(extractor, "path", path.as_posix())

-    # We use a larger cache size to avoid re-opening the same file multiple times
-    # which is very slow with Ghidra.
-    extractor = capa.loader.get_extractor(
-        path, FORMAT_AUTO, OS_AUTO, capa.loader.BACKEND_GHIDRA, [], disable_progress=True
-    )
-
-    ctx = capa.features.extractors.ghidra.context.get_context()
-    GHIDRA_CACHE[path] = (extractor, ctx.program, ctx.flat_api, ctx.monitor)
    return extractor


@@ -982,8 +894,20 @@ FEATURE_PRESENCE_TESTS = sorted(
        ("mimikatz", "function=0x4556E5", capa.features.insn.API("advapi32.LsaQueryInformationPolicy"), False),
        ("mimikatz", "function=0x4556E5", capa.features.insn.API("LsaQueryInformationPolicy"), True),
        # insn/api: x64
+        (
+            "kernel32-64",
+            "function=0x180001010",
+            capa.features.insn.API("RtlVirtualUnwind"),
+            True,
+        ),
        ("kernel32-64", "function=0x180001010", capa.features.insn.API("RtlVirtualUnwind"), True),
        # insn/api: x64 thunk
+        (
+            "kernel32-64",
+            "function=0x1800202B0",
+            capa.features.insn.API("RtlCaptureContext"),
+            True,
+        ),
        ("kernel32-64", "function=0x1800202B0", capa.features.insn.API("RtlCaptureContext"), True),
        # insn/api: x64 nested thunk
        ("al-khaser x64", "function=0x14004B4F0", capa.features.insn.API("__vcrt_GetModuleHandle"), True),
@@ -1071,20 +995,20 @@ FEATURE_PRESENCE_TESTS = sorted(
        ("pma16-01", "file", OS(OS_WINDOWS), True),
        ("pma16-01", "file", OS(OS_LINUX), False),
        ("mimikatz", "file", OS(OS_WINDOWS), True),
-        ("pma16-01", "function=0x401100", OS(OS_WINDOWS), True),
-        ("pma16-01", "function=0x401100,bb=0x401130", OS(OS_WINDOWS), True),
+        ("pma16-01", "function=0x404356", OS(OS_WINDOWS), True),
+        ("pma16-01", "function=0x404356,bb=0x4043B9", OS(OS_WINDOWS), True),
        ("mimikatz", "function=0x40105D", OS(OS_WINDOWS), True),
        ("pma16-01", "file", Arch(ARCH_I386), True),
        ("pma16-01", "file", Arch(ARCH_AMD64), False),
        ("mimikatz", "file", Arch(ARCH_I386), True),
-        ("pma16-01", "function=0x401100", Arch(ARCH_I386), True),
-        ("pma16-01", "function=0x401100,bb=0x401130", Arch(ARCH_I386), True),
+        ("pma16-01", "function=0x404356", Arch(ARCH_I386), True),
+        ("pma16-01", "function=0x404356,bb=0x4043B9", Arch(ARCH_I386), True),
        ("mimikatz", "function=0x40105D", Arch(ARCH_I386), True),
        ("pma16-01", "file", Format(FORMAT_PE), True),
        ("pma16-01", "file", Format(FORMAT_ELF), False),
        ("mimikatz", "file", Format(FORMAT_PE), True),
        # format is also a global feature
-        ("pma16-01", "function=0x401100", Format(FORMAT_PE), True),
+        ("pma16-01", "function=0x404356", Format(FORMAT_PE), True),
        ("mimikatz", "function=0x456BB9", Format(FORMAT_PE), True),
        # elf support
        ("7351f.elf", "file", OS(OS_LINUX), True),
--- a/tests/test_binexport_accessors.py
+++ b/tests/test_binexport_accessors.py
@@ -458,7 +458,9 @@ def test_pattern_parsing():
        capture="#int",
    )

-    assert BinExport2InstructionPatternMatcher.from_str("""
+    assert (
+        BinExport2InstructionPatternMatcher.from_str(
+            """
            # comment
            br      reg
            br      reg(not-stack)
@@ -479,7 +481,10 @@ def test_pattern_parsing():
            call    [reg * #int + #int]
            call    [reg + reg + #int]
            call    [reg + #int]
-            """).queries is not None
+            """
+        ).queries
+        is not None
+    )


 def match_address(extractor: BinExport2FeatureExtractor, queries: BinExport2InstructionPatternMatcher, address: int):
@@ -502,7 +507,8 @@ def match_address_with_be2(


 def test_pattern_matching():
-    queries = BinExport2InstructionPatternMatcher.from_str("""
+    queries = BinExport2InstructionPatternMatcher.from_str(
+        """
        br      reg(stack)                     ; capture reg
        br      reg(not-stack)                 ; capture reg
        mov     reg0, reg1                     ; capture reg0
@@ -516,7 +522,8 @@ def test_pattern_matching():
        ldp|stp reg, reg, [reg, #int]!         ; capture #int
        ldp|stp reg, reg, [reg], #int          ; capture #int
        ldrb    reg0, [reg1(not-stack), reg2]  ; capture reg2
-        """)
+        """
+    )

    # 0x210184: ldrb      w2, [x0,                x1]
    # query:    ldrb    reg0, [reg1(not-stack), reg2]      ; capture reg2"
@@ -543,9 +550,11 @@ BE2_EXTRACTOR_687 = fixtures.get_binexport_extractor(


 def test_pattern_matching_exclamation():
-    queries = BinExport2InstructionPatternMatcher.from_str("""
+    queries = BinExport2InstructionPatternMatcher.from_str(
+        """
        stp  reg, reg, [reg, #int]!  ; capture #int
-        """)
+        """
+    )

    # note this captures the sp
    # 0x107918:  stp  x20, x19, [sp,0xFFFFFFFFFFFFFFE0]!
@@ -555,9 +564,11 @@ def test_pattern_matching_exclamation():


 def test_pattern_matching_stack():
-    queries = BinExport2InstructionPatternMatcher.from_str("""
+    queries = BinExport2InstructionPatternMatcher.from_str(
+        """
        stp  reg, reg, [reg(stack), #int]!  ; capture #int
-        """)
+        """
+    )

    # note this does capture the sp
    # compare this with the test above (exclamation)
@@ -568,9 +579,11 @@ def test_pattern_matching_stack():


 def test_pattern_matching_not_stack():
-    queries = BinExport2InstructionPatternMatcher.from_str("""
+    queries = BinExport2InstructionPatternMatcher.from_str(
+        """
        stp  reg, reg, [reg(not-stack), #int]!  ; capture #int
-        """)
+        """
+    )

    # note this does not capture the sp
    # compare this with the test above (exclamation)
@@ -584,9 +597,11 @@ BE2_EXTRACTOR_MIMI = fixtures.get_binexport_extractor(CD / "data" / "binexport2"


 def test_pattern_matching_x86():
-    queries = BinExport2InstructionPatternMatcher.from_str("""
+    queries = BinExport2InstructionPatternMatcher.from_str(
+        """
        cmp|lea reg, [reg(not-stack) + #int0]  ; capture #int0
-        """)
+        """
+    )

    # 0x4018c0:  LEA         ECX, [EBX+0x2]
    # query:     cmp|lea     reg, [reg(not-stack) + #int0]  ; capture #int0
--- a/tests/test_binja_features.py
+++ b/tests/test_binja_features.py
@@ -70,4 +70,4 @@ def test_standalone_binja_backend():
@pytest.mark.skipif(binja_present is False, reason="Skip binja tests if the binaryninja Python API is not installed")
 def test_binja_version():
    version = binaryninja.core_version_info()
-    assert version.major == 5 and version.minor == 2
+    assert version.major == 5 and version.minor == 1
--- a/tests/test_capabilities.py
+++ b/tests/test_capabilities.py
@@ -23,7 +23,9 @@ def test_match_across_scopes_file_function(z9324d_extractor):
    rules = capa.rules.RuleSet(
        [
            # this rule should match on a function (0x4073F0)
-            capa.rules.Rule.from_yaml(textwrap.dedent("""
+            capa.rules.Rule.from_yaml(
+                textwrap.dedent(
+                    """
                    rule:
                        meta:
                            name: install service
@@ -37,9 +39,13 @@ def test_match_across_scopes_file_function(z9324d_extractor):
                                - api: advapi32.OpenSCManagerA
                                - api: advapi32.CreateServiceA
                                - api: advapi32.StartServiceA
-                    """)),
+                    """
+                )
+            ),
            # this rule should match on a file feature
-            capa.rules.Rule.from_yaml(textwrap.dedent("""
+            capa.rules.Rule.from_yaml(
+                textwrap.dedent(
+                    """
                    rule:
                        meta:
                            name: .text section
@@ -50,11 +56,15 @@ def test_match_across_scopes_file_function(z9324d_extractor):
                              - 9324d1a8ae37a36ae560c37448c9705a
                        features:
                            - section: .text
-                    """)),
+                    """
+                )
+            ),
            # this rule should match on earlier rule matches:
            #  - install service, with function scope
            #  - .text section, with file scope
-            capa.rules.Rule.from_yaml(textwrap.dedent("""
+            capa.rules.Rule.from_yaml(
+                textwrap.dedent(
+                    """
                    rule:
                        meta:
                            name: .text section and install service
@@ -67,7 +77,9 @@ def test_match_across_scopes_file_function(z9324d_extractor):
                            - and:
                              - match: install service
                              - match: .text section
-                    """)),
+                    """
+                )
+            ),
        ]
    )
    capabilities = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
@@ -80,7 +92,9 @@ def test_match_across_scopes(z9324d_extractor):
    rules = capa.rules.RuleSet(
        [
            # this rule should match on a basic block (including at least 0x403685)
-            capa.rules.Rule.from_yaml(textwrap.dedent("""
+            capa.rules.Rule.from_yaml(
+                textwrap.dedent(
+                    """
                    rule:
                        meta:
                            name: tight loop
@@ -91,10 +105,14 @@ def test_match_across_scopes(z9324d_extractor):
                              - 9324d1a8ae37a36ae560c37448c9705a:0x403685
                        features:
                          - characteristic: tight loop
-                    """)),
+                    """
+                )
+            ),
            # this rule should match on a function (0x403660)
            # based on API, as well as prior basic block rule match
-            capa.rules.Rule.from_yaml(textwrap.dedent("""
+            capa.rules.Rule.from_yaml(
+                textwrap.dedent(
+                    """
                    rule:
                        meta:
                            name: kill thread loop
@@ -108,9 +126,13 @@ def test_match_across_scopes(z9324d_extractor):
                            - api: kernel32.TerminateThread
                            - api: kernel32.CloseHandle
                            - match: tight loop
-                    """)),
+                    """
+                )
+            ),
            # this rule should match on a file feature and a prior function rule match
-            capa.rules.Rule.from_yaml(textwrap.dedent("""
+            capa.rules.Rule.from_yaml(
+                textwrap.dedent(
+                    """
                    rule:
                        meta:
                            name: kill thread program
@@ -123,7 +145,9 @@ def test_match_across_scopes(z9324d_extractor):
                          - and:
                            - section: .text
                            - match: kill thread loop
-                    """)),
+                    """
+                )
+            ),
        ]
    )
    capabilities = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
@@ -133,7 +157,11 @@ def test_match_across_scopes(z9324d_extractor):


 def test_subscope_bb_rules(z9324d_extractor):
-    rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
+    rules = capa.rules.RuleSet(
+        [
+            capa.rules.Rule.from_yaml(
+                textwrap.dedent(
+                    """
                    rule:
                        meta:
                            name: test rule
@@ -144,14 +172,22 @@ def test_subscope_bb_rules(z9324d_extractor):
                            - and:
                                - basic block:
                                    - characteristic: tight loop
-                    """))])
+                    """
+                )
+            )
+        ]
+    )
    # tight loop at 0x403685
    capabilities = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
    assert "test rule" in capabilities.matches


 def test_match_specific_functions(z9324d_extractor):
-    rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
+    rules = capa.rules.RuleSet(
+        [
+            capa.rules.Rule.from_yaml(
+                textwrap.dedent(
+                    """
                    rule:
                        meta:
                            name: receive data
@@ -163,7 +199,11 @@ def test_match_specific_functions(z9324d_extractor):
                        features:
                            - or:
                                - api: recv
-                    """))])
+                    """
+                )
+            )
+        ]
+    )
    extractor = FunctionFilter(z9324d_extractor, {0x4019C0})
    capabilities = capa.capabilities.common.find_capabilities(rules, extractor)
    matches = capabilities.matches["receive data"]
@@ -174,7 +214,11 @@ def test_match_specific_functions(z9324d_extractor):


 def test_byte_matching(z9324d_extractor):
-    rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
+    rules = capa.rules.RuleSet(
+        [
+            capa.rules.Rule.from_yaml(
+                textwrap.dedent(
+                    """
                    rule:
                        meta:
                            name: byte match test
@@ -184,13 +228,21 @@ def test_byte_matching(z9324d_extractor):
                        features:
                            - and:
                                - bytes: ED 24 9E F4 52 A9 07 47 55 8E E1 AB 30 8E 23 61
-                    """))])
+                    """
+                )
+            )
+        ]
+    )
    capabilities = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
    assert "byte match test" in capabilities.matches


 def test_com_feature_matching(z395eb_extractor):
-    rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
+    rules = capa.rules.RuleSet(
+        [
+            capa.rules.Rule.from_yaml(
+                textwrap.dedent(
+                    """
                    rule:
                        meta:
                            name: initialize IWebBrowser2
@@ -202,13 +254,21 @@ def test_com_feature_matching(z395eb_extractor):
                                - api: ole32.CoCreateInstance
                                - com/class: InternetExplorer #bytes: 01 DF 02 00 00 00 00 00 C0 00 00 00 00 00 00 46 = CLSID_InternetExplorer
                                - com/interface: IWebBrowser2 #bytes: 61 16 0C D3 AF CD D0 11 8A 3E 00 C0 4F C9 E2 6E = IID_IWebBrowser2
-                    """))])
+                    """
+                )
+            )
+        ]
+    )
    capabilities = capa.main.find_capabilities(rules, z395eb_extractor)
    assert "initialize IWebBrowser2" in capabilities.matches


 def test_count_bb(z9324d_extractor):
-    rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
+    rules = capa.rules.RuleSet(
+        [
+            capa.rules.Rule.from_yaml(
+                textwrap.dedent(
+                    """
                    rule:
                      meta:
                        name: count bb
@@ -219,14 +279,22 @@ def test_count_bb(z9324d_extractor):
                      features:
                        - and:
                          - count(basic blocks): 1 or more
-                    """))])
+                    """
+                )
+            )
+        ]
+    )
    capabilities = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
    assert "count bb" in capabilities.matches


 def test_instruction_scope(z9324d_extractor):
    # .text:004071A4 68 E8 03 00 00          push    3E8h
-    rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
+    rules = capa.rules.RuleSet(
+        [
+            capa.rules.Rule.from_yaml(
+                textwrap.dedent(
+                    """
                    rule:
                      meta:
                        name: push 1000
@@ -238,7 +306,11 @@ def test_instruction_scope(z9324d_extractor):
                        - and:
                          - mnemonic: push
                          - number: 1000
-                    """))])
+                    """
+                )
+            )
+        ]
+    )
    capabilities = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
    assert "push 1000" in capabilities.matches
    assert 0x4071A4 in {result[0] for result in capabilities.matches["push 1000"]}
@@ -248,7 +320,11 @@ def test_instruction_subscope(z9324d_extractor):
    # .text:00406F60                         sub_406F60 proc near
    # [...]
    # .text:004071A4 68 E8 03 00 00          push    3E8h
-    rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
+    rules = capa.rules.RuleSet(
+        [
+            capa.rules.Rule.from_yaml(
+                textwrap.dedent(
+                    """
                    rule:
                      meta:
                        name: push 1000 on i386
@@ -262,7 +338,11 @@ def test_instruction_subscope(z9324d_extractor):
                          - instruction:
                            - mnemonic: push
                            - number: 1000
-                    """))])
+                    """
+                )
+            )
+        ]
+    )
    capabilities = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
    assert "push 1000 on i386" in capabilities.matches
    assert 0x406F60 in {result[0] for result in capabilities.matches["push 1000 on i386"]}
--- a/tests/test_cape_model.py
+++ b/tests/test_cape_model.py
@@ -81,7 +81,8 @@ def test_cape_extractor(version: str, filename: str, exception: Type[BaseExcepti


 def test_cape_model_argument():
-    call = Call.model_validate_json("""
+    call = Call.model_validate_json(
+        """
        {
            "timestamp": "2023-10-20 12:30:14,015",
            "thread_id": "2380",
@@ -104,6 +105,7 @@ def test_cape_model_argument():
            "repeated": 19,
            "id": 0
        }
-        """)
+        """
+    )
    assert call.arguments[0].value == 30
    assert call.arguments[1].value == 0x30
--- a/tests/test_drakvuf_models.py
+++ b/tests/test_drakvuf_models.py
@@ -18,7 +18,8 @@ from capa.features.extractors.drakvuf.models import SystemCall


 def test_syscall_argument_construction():
-    call_dictionary = json.loads(r"""
+    call_dictionary = json.loads(
+        r"""
        {
            "Plugin": "syscall",
            "TimeStamp": "1716999134.581449",
@@ -42,7 +43,8 @@ def test_syscall_argument_construction():
            "Timeout": "0xfffff506a02846d8",
            "Alertable": "0x0"
        }
-        """)
+        """
+    )
    call = SystemCall(**call_dictionary)
    assert len(call.arguments) == call.nargs
    assert call.arguments["IoCompletionHandle"] == "0xffffffff80001ac0"
--- a/tests/test_dynamic_span_of_calls_scope.py
+++ b/tests/test_dynamic_span_of_calls_scope.py
@@ -83,7 +83,8 @@ def get_call_ids(matches) -> Iterator[int]:
 def test_dynamic_call_scope():
    extractor = get_0000a657_thread3064()

-    rule = textwrap.dedent("""
+    rule = textwrap.dedent(
+        """
        rule:
            meta:
                name: test rule
@@ -92,7 +93,8 @@ def test_dynamic_call_scope():
                    dynamic: call
            features:
                - api: GetSystemTimeAsFileTime
-        """)
+        """
+    )

    r = capa.rules.Rule.from_yaml(rule)
    ruleset = capa.rules.RuleSet([r])
@@ -114,7 +116,8 @@ def test_dynamic_call_scope():
 def test_dynamic_span_scope():
    extractor = get_0000a657_thread3064()

-    rule = textwrap.dedent("""
+    rule = textwrap.dedent(
+        """
        rule:
            meta:
                name: test rule
@@ -128,7 +131,8 @@ def test_dynamic_span_scope():
                    - api: LdrGetDllHandle
                    - api: LdrGetProcedureAddress
                    - count(api(LdrGetDllHandle)): 2
-        """)
+        """
+    )

    r = capa.rules.Rule.from_yaml(rule)
    ruleset = capa.rules.RuleSet([r])
@@ -154,7 +158,8 @@ def test_dynamic_span_scope():
 def test_dynamic_span_scope_length():
    extractor = get_0000a657_thread3064()

-    rule = textwrap.dedent("""
+    rule = textwrap.dedent(
+        """
        rule:
            meta:
                name: test rule
@@ -165,7 +170,8 @@ def test_dynamic_span_scope_length():
                - and:
                    - api: GetSystemTimeAsFileTime
                    - api: RtlAddVectoredExceptionHandler
-        """)
+        """
+    )

    r = capa.rules.Rule.from_yaml(rule)
    ruleset = capa.rules.RuleSet([r])
@@ -190,7 +196,8 @@ def test_dynamic_span_scope_length():
 def test_dynamic_span_call_subscope():
    extractor = get_0000a657_thread3064()

-    rule = textwrap.dedent("""
+    rule = textwrap.dedent(
+        """
        rule:
            meta:
                name: test rule
@@ -203,7 +210,8 @@ def test_dynamic_span_call_subscope():
                        - and:
                            - api: LdrGetProcedureAddress
                            - string: AddVectoredExceptionHandler
-        """)
+        """
+    )

    r = capa.rules.Rule.from_yaml(rule)
    ruleset = capa.rules.RuleSet([r])
@@ -226,7 +234,8 @@ def test_dynamic_span_call_subscope():
 def test_dynamic_span_scope_span_subscope():
    extractor = get_0000a657_thread3064()

-    rule = textwrap.dedent("""
+    rule = textwrap.dedent(
+        """
        rule:
            meta:
                name: test rule
@@ -247,7 +256,8 @@ def test_dynamic_span_scope_span_subscope():
                            - api: LdrGetDllHandle
                            - api: LdrGetProcedureAddress
                            - string: RemoveVectoredExceptionHandler
-        """)
+        """
+    )

    r = capa.rules.Rule.from_yaml(rule)
    ruleset = capa.rules.RuleSet([r])
@@ -259,7 +269,8 @@ def test_dynamic_span_scope_span_subscope():

 # show that you can't use thread subscope in span rules.
 def test_dynamic_span_scope_thread_subscope():
-    rule = textwrap.dedent("""
+    rule = textwrap.dedent(
+        """
        rule:
            meta:
                name: test rule
@@ -270,7 +281,8 @@ def test_dynamic_span_scope_thread_subscope():
                - and:
                    - thread:
                        - string: "foo"
-        """)
+        """
+    )

    with pytest.raises(capa.rules.InvalidRule):
        capa.rules.Rule.from_yaml(rule)
@@ -288,7 +300,8 @@ def test_dynamic_span_scope_thread_subscope():
 def test_dynamic_span_example():
    extractor = get_0000a657_thread3064()

-    rule = textwrap.dedent("""
+    rule = textwrap.dedent(
+        """
        rule:
            meta:
                name: test rule
@@ -306,7 +319,8 @@ def test_dynamic_span_example():
                            - api: LdrGetProcedureAddress
                            - string: "AddVectoredExceptionHandler"
                    - api: RtlAddVectoredExceptionHandler
-        """)
+        """
+    )

    r = capa.rules.Rule.from_yaml(rule)
    ruleset = capa.rules.RuleSet([r])
@@ -331,7 +345,8 @@ def test_dynamic_span_example():
 def test_dynamic_span_multiple_spans_overlapping_single_event():
    extractor = get_0000a657_thread3064()

-    rule = textwrap.dedent("""
+    rule = textwrap.dedent(
+        """
        rule:
            meta:
                name: test rule
@@ -344,7 +359,8 @@ def test_dynamic_span_multiple_spans_overlapping_single_event():
                        - and:
                            - api: LdrGetProcedureAddress
                            - string: "AddVectoredExceptionHandler"
-        """)
+        """
+    )

    r = capa.rules.Rule.from_yaml(rule)
    ruleset = capa.rules.RuleSet([r])
@@ -370,7 +386,9 @@ def test_dynamic_span_scope_match_statements():

    ruleset = capa.rules.RuleSet(
        [
-            capa.rules.Rule.from_yaml(textwrap.dedent("""
+            capa.rules.Rule.from_yaml(
+                textwrap.dedent(
+                    """
                rule:
                    meta:
                        name: resolve add VEH
@@ -383,8 +401,12 @@ def test_dynamic_span_scope_match_statements():
                            - api: LdrGetDllHandle
                            - api: LdrGetProcedureAddress
                            - string: AddVectoredExceptionHandler
-                """)),
-            capa.rules.Rule.from_yaml(textwrap.dedent("""
+                """
+                )
+            ),
+            capa.rules.Rule.from_yaml(
+                textwrap.dedent(
+                    """
                rule:
                    meta:
                        name: resolve remove VEH
@@ -397,8 +419,12 @@ def test_dynamic_span_scope_match_statements():
                            - api: LdrGetDllHandle
                            - api: LdrGetProcedureAddress
                            - string: RemoveVectoredExceptionHandler
-                """)),
-            capa.rules.Rule.from_yaml(textwrap.dedent("""
+                """
+                )
+            ),
+            capa.rules.Rule.from_yaml(
+                textwrap.dedent(
+                    """
                rule:
                    meta:
                        name: resolve add and remove VEH
@@ -409,8 +435,12 @@ def test_dynamic_span_scope_match_statements():
                        - and:
                            - match: resolve add VEH
                            - match: resolve remove VEH
-                """)),
-            capa.rules.Rule.from_yaml(textwrap.dedent("""
+                """
+                )
+            ),
+            capa.rules.Rule.from_yaml(
+                textwrap.dedent(
+                    """
                rule:
                    meta:
                        name: has VEH runtime linking
@@ -420,7 +450,9 @@ def test_dynamic_span_scope_match_statements():
                    features:
                        - and:
                            - match: linking/runtime-linking/veh
-                """)),
+                """
+                )
+            ),
        ]
    )

--- a/tests/test_fmt.py
+++ b/tests/test_fmt.py
@@ -17,7 +17,8 @@ import textwrap

 import capa.rules

-EXPECTED = textwrap.dedent("""\
+EXPECTED = textwrap.dedent(
+    """\
    rule:
      meta:
        name: test rule
@@ -33,11 +34,13 @@ EXPECTED = textwrap.dedent("""\
        - and:
          - number: 1
          - number: 2
-    """)
+    """
+)


 def test_rule_reformat_top_level_elements():
-    rule = textwrap.dedent("""
+    rule = textwrap.dedent(
+        """
        rule:
          features:
            - and:
@@ -53,13 +56,15 @@ def test_rule_reformat_top_level_elements():
            examples:
              - foo1234
              - bar5678
-        """)
+        """
+    )

    assert capa.rules.Rule.from_yaml(rule).to_yaml() == EXPECTED


 def test_rule_reformat_indentation():
-    rule = textwrap.dedent("""
+    rule = textwrap.dedent(
+        """
        rule:
         meta:
            name: test rule
@@ -75,13 +80,15 @@ def test_rule_reformat_indentation():
                - and:
                             - number: 1
                             - number: 2
-        """)
+        """
+    )

    assert capa.rules.Rule.from_yaml(rule).to_yaml() == EXPECTED


 def test_rule_reformat_order():
-    rule = textwrap.dedent("""
+    rule = textwrap.dedent(
+        """
        rule:
          meta:
            authors:
@@ -97,7 +104,8 @@ def test_rule_reformat_order():
            - and:
              - number: 1
              - number: 2
-        """)
+        """
+    )

    assert capa.rules.Rule.from_yaml(rule).to_yaml() == EXPECTED

@@ -105,7 +113,8 @@ def test_rule_reformat_order():
 def test_rule_reformat_meta_update():
    # test updating the rule content after parsing

-    src = textwrap.dedent("""
+    src = textwrap.dedent(
+        """
        rule:
          meta:
            authors:
@@ -121,7 +130,8 @@ def test_rule_reformat_meta_update():
            - and:
              - number: 1
              - number: 2
-        """)
+        """
+    )

    rule = capa.rules.Rule.from_yaml(src)
    rule.name = "test rule"
@@ -131,7 +141,8 @@ def test_rule_reformat_meta_update():
 def test_rule_reformat_string_description():
    # the `description` should be aligned with the preceding feature name.
    # see #263
-    src = textwrap.dedent("""
+    src = textwrap.dedent(
+        """
        rule:
          meta:
            name: test rule
@@ -144,7 +155,8 @@ def test_rule_reformat_string_description():
            - and:
              - string: foo
                description: bar
-        """).lstrip()
+        """
+    ).lstrip()

    rule = capa.rules.Rule.from_yaml(src)
    assert rule.to_yaml() == src
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
copilot-swe-agent[bot]	eb6c0bd719	Fix Binary Ninja MLIL None handling in extract_stackstring Co-authored-by: williballenthin <156560+williballenthin@users.noreply.github.com>	2025-09-03 09:44:38 +00:00
copilot-swe-agent[bot]	aa02a46f33	Initial plan	2025-09-03 09:38:07 +00:00