wip

codecut: torch loader
codecut: import to torch
2026-03-16 14:59:04 -07:00 · 2025-01-15 12:09:17 +00:00 · 2025-01-15 12:09:17 +00:00 · 2025-01-15 12:09:17 +00:00 · 2025-01-15 12:09:17 +00:00 · 2025-01-15 12:09:17 +00:00
176 changed files with 3565 additions and 1337 deletions
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -1,6 +1,6 @@
 # See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.233.0/containers/python-3/.devcontainer/base.Dockerfile

-# [Choice] Python version (use -bullseye variants on local arm64/Apple Silicon): 3, 3.10, 3.9, 3.8, 3.7, 3.6, 3-bullseye, 3.10-bullseye, 3.9-bullseye, 3.8-bullseye, 3.7-bullseye, 3.6-bullseye, 3-buster, 3.10-buster, 3.9-buster, 3.8-buster, 3.7-buster, 3.6-buster
+# [Choice] Python version (use -bullseye variants on local arm64/Apple Silicon): 3, 3.10, 3-bullseye, 3.10-bullseye, 3-buster, 3.10-buster, etc.
 ARG VARIANT="3.10-bullseye"
 FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT}

--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -6,7 +6,7 @@
 		"dockerfile": "Dockerfile",
 		"context": "..",
 		"args": { 
-			// Update 'VARIANT' to pick a Python version: 3, 3.10, 3.9, 3.8, 3.7, 3.6
+			// Update 'VARIANT' to pick a Python version: 3, 3.10, etc.
 			// Append -bullseye or -buster to pin to an OS version.
 			// Use -bullseye variants on local on arm64/Apple Silicon.
 			"VARIANT": "3.10",
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -21,26 +21,25 @@ jobs:
      # set to false for debugging
      fail-fast: true
      matrix:
-        # using Python 3.8 to support running across multiple operating systems including Windows 7
        include:
          - os: ubuntu-20.04
            # use old linux so that the shared library versioning is more portable
            artifact_name: capa
            asset_name: linux
-            python_version: 3.8
+            python_version: '3.10'
          - os: ubuntu-20.04
            artifact_name: capa
            asset_name: linux-py312
-            python_version: 3.12
+            python_version: '3.12'
          - os: windows-2019
            artifact_name: capa.exe
            asset_name: windows
-            python_version: 3.8
-          - os: macos-12
+            python_version: '3.10'
+          - os: macos-13
            # use older macOS for assumed better portability
            artifact_name: capa
            asset_name: macos
-            python_version: 3.8
+            python_version: '3.10'
    steps:
      - name: Checkout capa
        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@@ -107,7 +106,7 @@ jobs:
    # upload zipped binaries to Release page
    if: github.event_name == 'release'
    name: zip and upload ${{ matrix.asset_name }}
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-latest
    needs: [build]
    strategy:
      matrix:
--- a/.github/workflows/changelog.yml
+++ b/.github/workflows/changelog.yml
@@ -13,8 +13,11 @@ permissions:
 jobs:
  check_changelog:
    # no need to check for dependency updates via dependabot
-    if: github.actor != 'dependabot[bot]' && github.actor != 'dependabot-preview[bot]'
-    runs-on: ubuntu-20.04
+    # github.event.pull_request.user.login refers to PR author
+    if: |
+      github.event.pull_request.user.login != 'dependabot[bot]' &&
+      github.event.pull_request.user.login != 'dependabot-preview[bot]'
+    runs-on: ubuntu-latest
    env:
      NO_CHANGELOG: '[x] No CHANGELOG update needed'
    steps:
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -21,7 +21,7 @@ jobs:
      - name: Set up Python
        uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
        with:
-          python-version: '3.8'
+          python-version: '3.10'
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
--- a/.github/workflows/tag.yml
+++ b/.github/workflows/tag.yml
@@ -9,7 +9,7 @@ permissions: read-all
 jobs:
  tag:
    name: Tag capa rules
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-latest
    steps:
    - name: Checkout capa-rules
      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -26,7 +26,7 @@ env:

 jobs:
  changelog_format:
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
    steps:
    - name: Checkout capa
      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@@ -37,15 +37,15 @@ jobs:
        if [ $number != 1 ]; then exit 1; fi

  code_style:
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
    steps:
    - name: Checkout capa
      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
    # use latest available python to take advantage of best performance
-    - name: Set up Python 3.11
+    - name: Set up Python 3.12
      uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
      with:
-        python-version: "3.11"
+        python-version: "3.12"
    - name: Install dependencies
      run: |
        pip install -r requirements.txt
@@ -64,16 +64,16 @@ jobs:
      run: pre-commit run deptry --hook-stage manual

  rule_linter:
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
    steps:
    - name: Checkout capa with submodules
      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
      with:
        submodules: recursive
-    - name: Set up Python 3.11
+    - name: Set up Python 3.12
      uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
      with:
-        python-version: "3.11"
+        python-version: "3.12"
    - name: Install capa
      run: |
        pip install -r requirements.txt
@@ -88,17 +88,17 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        os: [ubuntu-20.04, windows-2019, macos-12]
+        os: [ubuntu-20.04, windows-2019, macos-13]
        # across all operating systems
-        python-version: ["3.8", "3.11"]
+        python-version: ["3.10", "3.11"]
        include:
          # on Ubuntu run these as well
-          - os: ubuntu-20.04
-            python-version: "3.8"
-          - os: ubuntu-20.04
-            python-version: "3.9"
          - os: ubuntu-20.04
            python-version: "3.10"
+          - os: ubuntu-20.04
+            python-version: "3.11"
+          - os: ubuntu-20.04
+            python-version: "3.12"
    steps:
    - name: Checkout capa with submodules
      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@@ -131,7 +131,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        python-version: ["3.9", "3.11"]
+        python-version: ["3.10", "3.11"]
    steps:
    - name: Checkout capa with submodules
      # do only run if BN_SERIAL is available, have to do this in every step, see https://github.com/orgs/community/discussions/26726#discussioncomment-3253118
@@ -173,7 +173,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        python-version: ["3.8", "3.11"]
+        python-version: ["3.10", "3.11"]
        java-version: ["17"]
        ghidra-version: ["11.0.1"]
        public-version: ["PUBLIC_20240130"] # for ghidra releases
--- a/.github/workflows/web-release.yml
+++ b/.github/workflows/web-release.yml
@@ -0,0 +1,103 @@
+name: create web release
+on:
+  workflow_dispatch:
+    inputs:
+      version:
+        description: 'Version number for the release (x.x.x)'
+        required: true
+        type: string
+
+jobs:
+  run-tests:
+    uses: ./.github/workflows/web-tests.yml
+
+  build-and-release:
+    needs: run-tests
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set release name
+      run: echo "RELEASE_NAME=capa-explorer-web-v${{ github.event.inputs.version }}-${GITHUB_SHA::7}" >> $GITHUB_ENV
+
+    - name: Check if release already exists
+      run: |
+        if ls web/explorer/releases/capa-explorer-web-v${{ github.event.inputs.version }}-* 1> /dev/null 2>&1; then
+          echo "::error:: A release with version ${{ github.event.inputs.version }} already exists"
+          exit 1
+        fi
+
+    - name: Set up Node.js
+      uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4
+      with:
+        node-version: 20
+        cache: 'npm'
+        cache-dependency-path: 'web/explorer/package-lock.json'
+
+    - name: Install dependencies
+      run: npm ci
+      working-directory: web/explorer
+
+    - name: Build offline bundle
+      run: npm run build:bundle
+      working-directory: web/explorer
+
+    - name: Compress bundle
+      run: zip -r ${{ env.RELEASE_NAME }}.zip capa-explorer-web
+      working-directory: web/explorer
+
+    - name: Create releases directory
+      run: mkdir -vp web/explorer/releases
+
+    - name: Move release to releases folder
+      run: mv web/explorer/${{ env.RELEASE_NAME }}.zip web/explorer/releases
+
+    - name: Compute release SHA256 hash
+      run: |
+        echo "RELEASE_SHA256=$(sha256sum web/explorer/releases/${{ env.RELEASE_NAME }}.zip | awk '{print $1}')" >> $GITHUB_ENV
+
+    - name: Update CHANGELOG.md
+      run: |
+        echo "## ${{ env.RELEASE_NAME }}" >> web/explorer/releases/CHANGELOG.md
+        echo "- Release Date: $(date -u '+%Y-%m-%d %H:%M:%S UTC')" >> web/explorer/releases/CHANGELOG.md
+        echo "- SHA256: ${{ env.RELEASE_SHA256 }}" >> web/explorer/releases/CHANGELOG.md
+        echo "" >> web/explorer/releases/CHANGELOG.md
+        cat web/explorer/releases/CHANGELOG.md
+
+    - name: Remove older releases
+      # keep only the latest 3 releases
+      run: ls -t capa-explorer-web-v*.zip | tail -n +4 | xargs -r rm --
+      working-directory: web/explorer/releases
+
+    - name: Stage release files
+      run: |
+        git config --local user.email "capa-dev@mandiant.com"
+        git config --local user.name "Capa Bot"
+        git add -f web/explorer/releases/${{ env.RELEASE_NAME }}.zip web/explorer/releases/CHANGELOG.md
+        git add -u web/explorer/releases/
+
+    - name: Create Pull Request
+      uses: peter-evans/create-pull-request@5e914681df9dc83aa4e4905692ca88beb2f9e91f # v7.0.5
+      with:
+        token: ${{ secrets.GITHUB_TOKEN }}
+        title: "explorer web: add release v${{ github.event.inputs.version }}"
+        body: |
+          This PR adds a new capa Explorer Web release v${{ github.event.inputs.version }}.
+
+          Release details:
+          - Name: ${{ env.RELEASE_NAME }}
+          - SHA256: ${{ env.RELEASE_SHA256 }}
+
+          This release is generated by the [web release](https://github.com/mandiant/capa/actions/workflows/web-release.yml) workflow.
+
+          - [x] No CHANGELOG update needed
+          - [x] No new tests needed
+          - [x] No documentation update needed
+        commit-message: ":robot: explorer web: add release ${{ env.RELEASE_NAME }}"
+        branch: release/web-v${{ github.event.inputs.version }}
+        add-paths: web/explorer/releases/${{ env.RELEASE_NAME }}.zip
+        base: master
+        labels: webui
+        delete-branch: true
+        committer: Capa Bot <capa-dev@mandiant.com>
+        author: Capa Bot <capa-dev@mandiant.com>
--- a/.github/workflows/web-tests.yml
+++ b/.github/workflows/web-tests.yml
@@ -1,10 +1,11 @@
-name: Capa Explorer Web tests 
+name: capa Explorer Web tests 

 on:
  pull_request:
    branches: [ master ]
    paths:
      - 'web/explorer/**'
+  workflow_call:  # this allows the workflow to be called by other workflows

 jobs:
  test:
@@ -23,20 +24,20 @@ jobs:
      with:
        node-version: 20
        cache: 'npm'
-        cache-dependency-path: './web/explorer/package-lock.json'
+        cache-dependency-path: 'web/explorer/package-lock.json'

    - name: Install dependencies
      run: npm ci
-      working-directory: ./web/explorer
+      working-directory: web/explorer

    - name: Lint
      run: npm run lint
-      working-directory: ./web/explorer
+      working-directory: web/explorer

    - name: Format
      run: npm run format:check
-      working-directory: ./web/explorer
+      working-directory: web/explorer

    - name: Run unit tests
      run: npm run test
-      working-directory: ./web/explorer
+      working-directory: web/explorer
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -25,7 +25,7 @@ repos:
    hooks:
    -   id: isort
        name: isort
-        stages: [commit, push, manual]
+        stages: [pre-commit, pre-push, manual]
        language: system
        entry: isort
        args: 
@@ -46,7 +46,7 @@ repos:
    hooks:
    -   id: black
        name: black
-        stages: [commit, push, manual]
+        stages: [pre-commit, pre-push, manual]
        language: system
        entry: black
        args: 
@@ -64,7 +64,7 @@ repos:
    hooks:
    -   id: ruff
        name: ruff
-        stages: [commit, push, manual]
+        stages: [pre-commit, pre-push, manual]
        language: system
        entry: ruff
        args: 
@@ -82,7 +82,7 @@ repos:
    hooks:
    -   id: flake8
        name: flake8
-        stages: [push, manual]
+        stages: [pre-push, manual]
        language: system
        entry: flake8
        args: 
@@ -101,13 +101,14 @@ repos:
    hooks:
    -   id: mypy
        name: mypy
-        stages: [push, manual]
+        stages: [pre-push, manual]
        language: system
        entry: mypy
        args: 
        -   "--check-untyped-defs"
        -   "--ignore-missing-imports"
        -   "--config-file=.github/mypy/mypy.ini"
+        -   "--enable-incomplete-feature=NewGenericSyntax"
        -   "capa/"
        -   "scripts/"
        -   "tests/"
@@ -119,7 +120,7 @@ repos:
    hooks:
    -   id: deptry
        name: deptry
-        stages: [push, manual]
+        stages: [pre-push, manual]
        language: system
        entry: deptry .
        always_run: true
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,8 +12,9 @@

 ### Bug Fixes

- extractor: fix exception when PE extractor encounters unknown architecture #2440 @Tamir-K
- IDA Pro: rename ida to idapro module for plugin and idalib in IDA 9.0 #2453 @mr-tz
+- vmray: load more analysis archives @mr-tz
+- dynamic: only check file limitations for static file formats @mr-tz
+- vmray: skip non-printable strings @mike-hunhoff

 ### capa Explorer Web

@@ -22,8 +23,127 @@
 ### Development

 ### Raw diffs
- [capa v7.4.0...master](https://github.com/mandiant/capa/compare/v7.4.0...master)
- [capa-rules v7.4.0...master](https://github.com/mandiant/capa-rules/compare/v7.4.0...master)
+- [capa v8.0.1...master](https://github.com/mandiant/capa/compare/v8.0.1...master)
+- [capa-rules v8.0.1...master](https://github.com/mandiant/capa-rules/compare/v8.0.1...master)
+
+## v8.0.1
+
+This point release fixes an issue with the IDAPython API to now handle IDA Pro 8.3, 8.4, and 9.0 correctly.
+
+### Bug Fixes
+
+- handle IDA 8.3/8.4 vs. 9.0 API change @mr-tz
+
+### Raw diffs
+- [capa v8.0.0...v8.0.1](https://github.com/mandiant/capa/compare/v8.0.0...v8.0.1)
+- [capa-rules v8.0.0...v8.0.1](https://github.com/mandiant/capa-rules/compare/v8.0.0...v8.0.1)
+
+## v8.0.0
+
+capa version 8 adds support for IDA Pro 9.0 (and idalib). The release comes with various improvements and bug fixes for the Binary Ninja backend (including to load with database files) -- thanks to @xusheng6.
+
+Additional bug fixes improve the dynamic and BinExport backends.
+
+capa version 8 now requires Python 3.10 or newer.
+
+Special thanks to @Tamir-K, @harshit-wadhwani, @jorik-utwente for their great contributions.
+
+### New Features
+
+- allow call as valid subscope for call scoped rules @mr-tz
+- support loading and analyzing a Binary Ninja database #2496 @xusheng6
+- vmray: record process command line details @mr-tz
+
+### Breaking Changes
+
+- remove support for Python 3.8 and use Python 3.10 as minimum now #1966 @mr-tz
+
+### New Rules (54)
+
+- nursery/get-shadow-password-file-entry-on-linux jonathanlepore@google.com
+- nursery/set-shadow-password-file-entry-on-linux jonathanlepore@google.com
+- collection/browser/get-chrome-cookiemonster still@teamt5.org
+- collection/browser/get-elevation-service-for-chromium-based-browsers still@teamt5.org
+- collection/get-steam-token still@teamt5.org
+- nursery/persist-via-application-shimming j.j.vannielen@utwente.nl
+- nursery/persist-via-bits-job j.j.vannielen@utwente.nl
+- nursery/persist-via-print-processors-registry-key j.j.vannielen@utwente.nl
+- linking/static/touchsocket/linked-against-touchsocket still@teamt5.org
+- runtime/dotnet/compiled-with-dotnet-aot still@teamt5.org
+- nursery/persist-via-errorhandler-script j.j.vannielen@utwente.nl
+- nursery/persist-via-get-variable-hijack j.j.vannielen@utwente.nl
+- nursery/persist-via-iphlpapi-dll-hijack j.j.vannielen@utwente.nl
+- nursery/persist-via-lnk-shortcut j.j.vannielen@utwente.nl
+- nursery/persist-via-powershell-profile j.j.vannielen@utwente.nl
+- nursery/persist-via-windows-accessibility-tools j.j.vannielen@utwente.nl
+- nursery/persist-via-windows-terminal-profile j.j.vannielen@utwente.nl
+- nursery/write-to-browser-extension-directory j.j.vannielen@utwente.nl
+- nursery/persist-via-aedebug-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-amsi-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-app-paths-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-appcertdlls-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-appx-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-autodialdll-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-autoplayhandlers-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-bootverificationprogram-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-code-signing-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-com-hijack j.j.vannielen@utwente.nl
+- nursery/persist-via-command-processor-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-contextmenuhandlers-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-cor_profiler_path-registry-value j.j.vannielen@utwente.nl
+- nursery/persist-via-default-file-association-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-disk-cleanup-handler-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-dotnet-dbgmanageddebugger-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-dotnet_startup_hooks-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-explorer-tools-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-filter-handlers-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-group-policy-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-hhctrl-com-hijack j.j.vannielen@utwente.nl
+- nursery/persist-via-htmlhelp-author-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-image-file-execution-options-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-lsa-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-natural-language-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-netsh-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-network-provider-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-path-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-print-monitors-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-rdp-startup-programs-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-silentprocessexit-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-telemetrycontroller-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-timeproviders-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-ts-initialprogram-registry-key j.j.vannielen@utwente.nl
+- nursery/persist-via-userinitmprlogonscript-registry-value j.j.vannielen@utwente.nl
+- nursery/persist-via-windows-error-reporting-registry-key j.j.vannielen@utwente.nl
+
+### Bug Fixes
+
+- extractor: fix exception when PE extractor encounters unknown architecture #2440 @Tamir-K
+- IDA Pro: rename ida to idapro module for plugin and idalib in IDA 9.0 #2453 @mr-tz
+- ghidra: fix saving of base address @mr-tz
+- binja: support loading raw x86/x86_64 shellcode #2489 @xusheng6
+- binja: fix crash when the IL of certain functions are not available. #2249 @xusheng6
+- binja: major performance improvement on the binja extractor. #1414 @xusheng6
+- cape: make Process model flexible and procmemory optional to load newest reports #2466 @mr-tz
+- binja: fix unit test failure by fixing up the analysis for file al-khaser_x64.exe_ #2507 @xusheng6
+- binja: move the stack string detection to function level #2516 @xusheng6
+- BinExport2: fix handling of incorrect thunk functions #2524 @williballenthin
+- BinExport2: more precise pruning of expressions @williballenthin
+- BinExport2: better handle weird expression trees from Ghidra #2528 #2530 @williballenthin
+
+### capa Explorer Web
+
+### capa Explorer IDA Pro plugin
+
+- fix bug preventing saving of capa results via Save button @mr-tz
+- fix saving of base address @mr-tz
+
+### Development
+- CI: use macos-13 since macos-12 is deprecated and will be removed on December 3rd, 2024 #2173 @mr-tz
+- CI: update Binary Ninja version to 4.2 #2499 @xusheng6
+
+### Raw diffs
+- [capa v7.4.0...v8.0.0](https://github.com/mandiant/capa/compare/v7.4.0...v8.0.0)
+- [capa-rules v7.4.0...v8.0.0](https://github.com/mandiant/capa-rules/compare/v7.4.0...v8.0.0)

 ## v7.4.0

@@ -179,6 +299,8 @@ Special thanks to our repeat and new contributors:
 - CI: update Binary Ninja version to 4.1 and use Python 3.9 to test it #2211 @xusheng6
 - CI: update tests.yml workflow to exclude web and documentation files #2263 @s-ff
 - CI: update build.yml workflow to exclude web and documentation files #2270 @s-ff
+- CI: add web releases workflow #2455 @s-ff
+- CI: skip changelog.yml for dependabot PRs #2471

 ### Raw diffs

--- a/README.md
+++ b/README.md
@@ -38,49 +38,47 @@ Below you find a list of [our capa blog posts with more details.](#blog-posts)
 ```
 $ capa.exe suspicious.exe

-+------------------------+--------------------------------------------------------------------------------+
-| ATT&CK Tactic          | ATT&CK Technique                                                               |
-|------------------------+--------------------------------------------------------------------------------|
-| DEFENSE EVASION        | Obfuscated Files or Information [T1027]                                        |
-| DISCOVERY              | Query Registry [T1012]                                                         |
-|                        | System Information Discovery [T1082]                                           |
-| EXECUTION              | Command and Scripting Interpreter::Windows Command Shell [T1059.003]           |
-|                        | Shared Modules [T1129]                                                         |
-| EXFILTRATION           | Exfiltration Over C2 Channel [T1041]                                           |
-| PERSISTENCE            | Create or Modify System Process::Windows Service [T1543.003]                   |
-+------------------------+--------------------------------------------------------------------------------+
+--------------------+------------------------------------------------------------------------+
+| ATT&CK Tactic      | ATT&CK Technique                                                       |
+|--------------------+------------------------------------------------------------------------|
+| DEFENSE EVASION    | Obfuscated Files or Information [T1027]                                |
+| DISCOVERY          | Query Registry [T1012]                                                 |
+|                    | System Information Discovery [T1082]                                   |
+| EXECUTION          | Command and Scripting Interpreter::Windows Command Shell [T1059.003]   |
+|                    | Shared Modules [T1129]                                                 |
+| EXFILTRATION       | Exfiltration Over C2 Channel [T1041]                                   |
+| PERSISTENCE        | Create or Modify System Process::Windows Service [T1543.003]           |
+--------------------+------------------------------------------------------------------------+

-+-------------------------------------------------------+-------------------------------------------------+
-| CAPABILITY                                            | NAMESPACE                                       |
-|-------------------------------------------------------+-------------------------------------------------|
-| check for OutputDebugString error                     | anti-analysis/anti-debugging/debugger-detection |
-| read and send data from client to server              | c2/file-transfer                                |
-| execute shell command and capture output              | c2/shell                                        |
-| receive data (2 matches)                              | communication                                   |
-| send data (6 matches)                                 | communication                                   |
-| connect to HTTP server (3 matches)                    | communication/http/client                       |
-| send HTTP request (3 matches)                         | communication/http/client                       |
-| create pipe                                           | communication/named-pipe/create                 |
-| get socket status (2 matches)                         | communication/socket                            |
-| receive data on socket (2 matches)                    | communication/socket/receive                    |
-| send data on socket (3 matches)                       | communication/socket/send                       |
-| connect TCP socket                                    | communication/socket/tcp                        |
-| encode data using Base64                              | data-manipulation/encoding/base64               |
-| encode data using XOR (6 matches)                     | data-manipulation/encoding/xor                  |
-| run as a service                                      | executable/pe                                   |
-| get common file path (3 matches)                      | host-interaction/file-system                    |
-| read file                                             | host-interaction/file-system/read               |
-| write file (2 matches)                                | host-interaction/file-system/write              |
-| print debug messages (2 matches)                      | host-interaction/log/debug/write-event          |
-| resolve DNS                                           | host-interaction/network/dns/resolve            |
-| get hostname                                          | host-interaction/os/hostname                    |
-| create a process with modified I/O handles and window | host-interaction/process/create                 |
-| create process                                        | host-interaction/process/create                 |
-| create registry key                                   | host-interaction/registry/create                |
-| create service                                        | host-interaction/service/create                 |
-| create thread                                         | host-interaction/thread/create                  |
-| persist via Windows service                           | persistence/service                             |
-+-------------------------------------------------------+-------------------------------------------------+
+-------------------------------------------+-------------------------------------------------+
+| CAPABILITY                                | NAMESPACE                                       |
+|-------------------------------------------+-------------------------------------------------|
+| read and send data from client to server  | c2/file-transfer                               |
+| execute shell command and capture output  | c2/shell                                       |
+| receive data (2 matches)                  | communication                                   |
+| send data (6 matches)                     | communication                                   |
+| connect to HTTP server (3 matches)        | communication/http/client                       |
+| send HTTP request (3 matches)             | communication/http/client                       |
+| create pipe                               | communication/named-pipe/create                 |
+| get socket status (2 matches)             | communication/socket                            |
+| receive data on socket (2 matches)        | communication/socket/receive                    |
+| send data on socket (3 matches)           | communication/socket/send                       |
+| connect TCP socket                        | communication/socket/tcp                        |
+| encode data using Base64                  | data-manipulation/encoding/base64               |
+| encode data using XOR (6 matches)         | data-manipulation/encoding/xor                  |
+| run as a service                          | executable/pe                                   |
+| get common file path (3 matches)          | host-interaction/file-system                    |
+| read file                                 | host-interaction/file-system/read               |
+| write file (2 matches)                    | host-interaction/file-system/write              |
+| print debug messages (2 matches)          | host-interaction/log/debug/write-event          |
+| resolve DNS                               | host-interaction/network/dns/resolve            |
+| get hostname                              | host-interaction/os/hostname                    |
+| create process                            | host-interaction/process/create                 |
+| create registry key                       | host-interaction/registry/create                |
+| create service                            | host-interaction/service/create                 |
+| create thread                             | host-interaction/thread/create                  |
+| persist via Windows service               | persistence/service                             |
+-------------------------------------------+-------------------------------------------------+
 ```

 # download and usage
--- a/capa/analysis/init.py
+++ b/capa/analysis/init.py
--- a/capa/analysis/flirt.py
+++ b/capa/analysis/flirt.py
@@ -0,0 +1,38 @@
+# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+
+from pydantic import BaseModel
+
+import capa.features.extractors.ida.idalib as idalib
+
+if not idalib.has_idalib():
+    raise RuntimeError("cannot find IDA idalib module.")
+
+if not idalib.load_idalib():
+    raise RuntimeError("failed to load IDA idalib module.")
+
+import idaapi
+import idautils
+
+
+class FunctionId(BaseModel):
+    va: int
+    is_library: bool
+    name: str
+
+
+def get_flirt_matches(lib_only=True):
+    for fva in idautils.Functions():
+        f = idaapi.get_func(fva)
+        is_lib = bool(f.flags & idaapi.FUNC_LIB)
+        fname = idaapi.get_func_name(fva)
+
+        if lib_only and not is_lib:
+            continue
+
+        yield FunctionId(va=fva, is_library=is_lib, name=fname)
--- a/capa/analysis/libraries.py
+++ b/capa/analysis/libraries.py
@@ -0,0 +1,242 @@
+# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+import io
+import sys
+import logging
+import argparse
+import tempfile
+import contextlib
+from enum import Enum
+from typing import List, Optional
+from pathlib import Path
+
+import rich
+from pydantic import BaseModel
+from rich.text import Text
+from rich.console import Console
+
+import capa.main
+import capa.helpers
+import capa.analysis.flirt
+import capa.analysis.strings
+import capa.features.extractors.ida.idalib as idalib
+
+if not idalib.has_idalib():
+    raise RuntimeError("cannot find IDA idalib module.")
+
+if not idalib.load_idalib():
+    raise RuntimeError("failed to load IDA idalib module.")
+
+import idaapi
+import idapro
+import ida_auto
+import idautils
+
+logger = logging.getLogger(__name__)
+
+
+class Classification(str, Enum):
+    USER = "user"
+    LIBRARY = "library"
+    UNKNOWN = "unknown"
+
+
+class Method(str, Enum):
+    FLIRT = "flirt"
+    STRINGS = "strings"
+    THUNK = "thunk"
+    ENTRYPOINT = "entrypoint"
+
+
+class FunctionClassification(BaseModel):
+    va: int
+    classification: Classification
+    # name per the disassembler/analysis tool
+    # may be combined with the recovered/suspected name TODO below
+    name: str
+
+    # if is library, this must be provided
+    method: Optional[Method]
+
+    # TODO if is library, recovered/suspected name?
+
+    # if is library, these can optionally be provided.
+    library_name: Optional[str] = None
+    library_version: Optional[str] = None
+
+
+class FunctionIdResults(BaseModel):
+    function_classifications: List[FunctionClassification]
+
+
+@contextlib.contextmanager
+def ida_session(input_path: Path, use_temp_dir=True):
+    if use_temp_dir:
+        t = Path(tempfile.mkdtemp(prefix="ida-")) / input_path.name
+    else:
+        t = input_path
+
+    logger.debug("using %s", str(t))
+    # stderr=True is used here to redirect the spinner banner to stderr,
+    # so that users can redirect capa's output.
+    console = Console(stderr=True, quiet=False)
+
+    try:
+        if use_temp_dir:
+            t.write_bytes(input_path.read_bytes())
+
+        # idalib writes to stdout (ugh), so we have to capture that
+        # so as not to screw up structured output.
+        with capa.helpers.stdout_redirector(io.BytesIO()):
+            idapro.enable_console_messages(False)
+            with capa.main.timing("analyze program"):
+                with console.status("analyzing program...", spinner="dots"):
+                    if idapro.open_database(str(t.absolute()), run_auto_analysis=True):
+                        raise RuntimeError("failed to analyze input file")
+
+            logger.debug("idalib: waiting for analysis...")
+            ida_auto.auto_wait()
+            logger.debug("idalib: opened database.")
+
+        yield
+    finally:
+        idapro.close_database()
+        if use_temp_dir:
+            t.unlink()
+
+
+def is_thunk_function(fva):
+    f = idaapi.get_func(fva)
+    return bool(f.flags & idaapi.FUNC_THUNK)
+
+
+def main(argv=None):
+    if argv is None:
+        argv = sys.argv[1:]
+
+    parser = argparse.ArgumentParser(description="Identify library functions using various strategies.")
+    capa.main.install_common_args(parser, wanted={"input_file"})
+    parser.add_argument("--store-idb", action="store_true", default=False, help="store IDA database file")
+    parser.add_argument("--min-string-length", type=int, default=8, help="minimum string length")
+    parser.add_argument("-j", "--json", action="store_true", help="emit JSON instead of text")
+    args = parser.parse_args(args=argv)
+
+    try:
+        capa.main.handle_common_args(args)
+    except capa.main.ShouldExitError as e:
+        return e.status_code
+
+    dbs = capa.analysis.strings.get_default_databases()
+    capa.analysis.strings.prune_databases(dbs, n=args.min_string_length)
+
+    function_classifications: List[FunctionClassification] = []
+    with ida_session(args.input_file, use_temp_dir=not args.store_idb):
+        with capa.main.timing("FLIRT-based library identification"):
+            # TODO: add more signature (files)
+            # TOOD: apply more signatures
+            for flirt_match in capa.analysis.flirt.get_flirt_matches():
+                function_classifications.append(
+                    FunctionClassification(
+                        va=flirt_match.va,
+                        name=flirt_match.name,
+                        classification=Classification.LIBRARY,
+                        method=Method.FLIRT,
+                        # note: we cannot currently include which signature matched per function via the IDA API
+                    )
+                )
+
+        # thunks
+        for fva in idautils.Functions():
+            if is_thunk_function(fva):
+                function_classifications.append(
+                    FunctionClassification(
+                        va=fva,
+                        name=idaapi.get_func_name(fva),
+                        classification=Classification.LIBRARY,
+                        method=Method.THUNK,
+                    )
+                )
+
+        with capa.main.timing("string-based library identification"):
+            for string_match in capa.analysis.strings.get_string_matches(dbs):
+                function_classifications.append(
+                    FunctionClassification(
+                        va=string_match.va,
+                        name=idaapi.get_func_name(string_match.va),
+                        classification=Classification.LIBRARY,
+                        method=Method.STRINGS,
+                        library_name=string_match.metadata.library_name,
+                        library_version=string_match.metadata.library_version,
+                    )
+                )
+
+        for va in idautils.Functions():
+            name = idaapi.get_func_name(va)
+            if name not in {
+                "WinMain",
+            }:
+                continue
+
+            function_classifications.append(
+                FunctionClassification(
+                    va=va,
+                    name=name,
+                    classification=Classification.USER,
+                    method=Method.ENTRYPOINT,
+                )
+            )
+
+        doc = FunctionIdResults(function_classifications=[])
+        classifications_by_va = capa.analysis.strings.create_index(function_classifications, "va")
+        for va in idautils.Functions():
+            if classifications := classifications_by_va.get(va):
+                doc.function_classifications.extend(classifications)
+            else:
+                doc.function_classifications.append(
+                    FunctionClassification(
+                        va=va,
+                        name=idaapi.get_func_name(va),
+                        classification=Classification.UNKNOWN,
+                        method=None,
+                    )
+                )
+
+        if args.json:
+            print(doc.model_dump_json())  # noqa: T201 print found
+
+        else:
+            table = rich.table.Table()
+            table.add_column("FVA")
+            table.add_column("CLASSIFICATION")
+            table.add_column("METHOD")
+            table.add_column("FNAME")
+            table.add_column("EXTRA INFO")
+
+            classifications_by_va = capa.analysis.strings.create_index(doc.function_classifications, "va", sorted_=True)
+            for va, classifications in classifications_by_va.items():
+                name = ", ".join({c.name for c in classifications})
+                if "sub_" in name:
+                    name = Text(name, style="grey53")
+
+                classification = {c.classification for c in classifications}
+                method = {c.method for c in classifications if c.method}
+                extra = {f"{c.library_name}@{c.library_version}" for c in classifications if c.library_name}
+
+                table.add_row(
+                    hex(va),
+                    ", ".join(classification) if classification != {"unknown"} else Text("unknown", style="grey53"),
+                    ", ".join(method),
+                    name,
+                    ", ".join(extra),
+                )
+
+            rich.print(table)
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/capa/analysis/requirements.txt
+++ b/capa/analysis/requirements.txt
@@ -0,0 +1,2 @@
+# temporary extra file to track dependencies of the analysis directory
+nltk==3.9.1
--- a/capa/analysis/strings/init.py
+++ b/capa/analysis/strings/init.py
@@ -0,0 +1,269 @@
+# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+
+"""
+further requirements:
+  - nltk
+"""
+import gzip
+import logging
+import collections
+from typing import Any, Dict, Mapping
+from pathlib import Path
+from dataclasses import dataclass
+
+import msgspec
+
+import capa.features.extractors.strings
+
+logger = logging.getLogger(__name__)
+
+
+class LibraryString(msgspec.Struct):
+    string: str
+    library_name: str
+    library_version: str
+    file_path: str | None = None
+    function_name: str | None = None
+    line_number: int | None = None
+
+
+@dataclass
+class LibraryStringDatabase:
+    metadata_by_string: Dict[str, LibraryString]
+
+    def __len__(self) -> int:
+        return len(self.metadata_by_string)
+
+    @classmethod
+    def from_file(cls, path: Path) -> "LibraryStringDatabase":
+        metadata_by_string: Dict[str, LibraryString] = {}
+        decoder = msgspec.json.Decoder(type=LibraryString)
+        for line in gzip.decompress(path.read_bytes()).split(b"\n"):
+            if not line:
+                continue
+            s = decoder.decode(line)
+            metadata_by_string[s.string] = s
+
+        return cls(metadata_by_string=metadata_by_string)
+
+
+DEFAULT_FILENAMES = (
+    "brotli.jsonl.gz",
+    "bzip2.jsonl.gz",
+    "cryptopp.jsonl.gz",
+    "curl.jsonl.gz",
+    "detours.jsonl.gz",
+    "jemalloc.jsonl.gz",
+    "jsoncpp.jsonl.gz",
+    "kcp.jsonl.gz",
+    "liblzma.jsonl.gz",
+    "libsodium.jsonl.gz",
+    "libpcap.jsonl.gz",
+    "mbedtls.jsonl.gz",
+    "openssl.jsonl.gz",
+    "sqlite3.jsonl.gz",
+    "tomcrypt.jsonl.gz",
+    "wolfssl.jsonl.gz",
+    "zlib.jsonl.gz",
+)
+
+DEFAULT_PATHS = tuple(Path(__file__).parent / "data" / "oss" / filename for filename in DEFAULT_FILENAMES) + (
+    Path(__file__).parent / "data" / "crt" / "msvc_v143.jsonl.gz",
+)
+
+
+def get_default_databases() -> list[LibraryStringDatabase]:
+    return [LibraryStringDatabase.from_file(path) for path in DEFAULT_PATHS]
+
+
+@dataclass
+class WindowsApiStringDatabase:
+    dll_names: set[str]
+    api_names: set[str]
+
+    def __len__(self) -> int:
+        return len(self.dll_names) + len(self.api_names)
+
+    @classmethod
+    def from_dir(cls, path: Path) -> "WindowsApiStringDatabase":
+        dll_names: set[str] = set()
+        api_names: set[str] = set()
+
+        for line in gzip.decompress((path / "dlls.txt.gz").read_bytes()).decode("utf-8").splitlines():
+            if not line:
+                continue
+            dll_names.add(line)
+
+        for line in gzip.decompress((path / "apis.txt.gz").read_bytes()).decode("utf-8").splitlines():
+            if not line:
+                continue
+            api_names.add(line)
+
+        return cls(dll_names=dll_names, api_names=api_names)
+
+    @classmethod
+    def from_defaults(cls) -> "WindowsApiStringDatabase":
+        return cls.from_dir(Path(__file__).parent / "data" / "winapi")
+
+
+def extract_strings(buf, n=4):
+    yield from capa.features.extractors.strings.extract_ascii_strings(buf, n=n)
+    yield from capa.features.extractors.strings.extract_unicode_strings(buf, n=n)
+
+
+def prune_databases(dbs: list[LibraryStringDatabase], n=8):
+    """remove less trustyworthy database entries.
+
+    such as:
+      - those found in multiple databases
+      - those that are English words
+      - those that are too short
+      - Windows API and DLL names
+    """
+
+    # TODO: consider applying these filters directly to the persisted databases, not at load time.
+
+    winapi = WindowsApiStringDatabase.from_defaults()
+
+    try:
+        from nltk.corpus import words as nltk_words
+
+        nltk_words.words()
+    except (ImportError, LookupError):
+        # one-time download of dataset.
+        # this probably doesn't work well for embedded use.
+        import nltk
+
+        nltk.download("words")
+        from nltk.corpus import words as nltk_words
+    words = set(nltk_words.words())
+
+    counter: collections.Counter[str] = collections.Counter()
+    to_remove = set()
+    for db in dbs:
+        for string in db.metadata_by_string.keys():
+            counter[string] += 1
+
+            if string in words:
+                to_remove.add(string)
+                continue
+
+            if len(string) < n:
+                to_remove.add(string)
+                continue
+
+            if string in winapi.api_names:
+                to_remove.add(string)
+                continue
+
+            if string in winapi.dll_names:
+                to_remove.add(string)
+                continue
+
+    for string, count in counter.most_common():
+        if count <= 1:
+            break
+
+        # remove strings that are seen in more than one database
+        to_remove.add(string)
+
+    for db in dbs:
+        for string in to_remove:
+            if string in db.metadata_by_string:
+                del db.metadata_by_string[string]
+
+
+def get_function_strings():
+    import idaapi
+    import idautils
+
+    import capa.features.extractors.ida.helpers as ida_helpers
+
+    strings_by_function = collections.defaultdict(set)
+    for ea in idautils.Functions():
+        f = idaapi.get_func(ea)
+
+        # ignore library functions and thunk functions as identified by IDA
+        if f.flags & idaapi.FUNC_THUNK:
+            continue
+        if f.flags & idaapi.FUNC_LIB:
+            continue
+
+        for bb in ida_helpers.get_function_blocks(f):
+            for insn in ida_helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
+                ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
+                if ref == insn.ea:
+                    continue
+
+                string = capa.features.extractors.ida.helpers.find_string_at(ref)
+                if not string:
+                    continue
+
+                strings_by_function[ea].add(string)
+
+    return strings_by_function
+
+
+@dataclass
+class LibraryStringClassification:
+    va: int
+    string: str
+    library_name: str
+    metadata: LibraryString
+
+
+def create_index(s: list, k: str, sorted_: bool = False) -> Mapping[Any, list]:
+    """create an index of the elements in `s` using the key `k`, optionally sorted by `k`"""
+    if sorted_:
+        s = sorted(s, key=lambda x: getattr(x, k))
+
+    s_by_k = collections.defaultdict(list)
+    for v in s:
+        p = getattr(v, k)
+        s_by_k[p].append(v)
+    return s_by_k
+
+
+def get_string_matches(dbs: list[LibraryStringDatabase]) -> list[LibraryStringClassification]:
+    matches: list[LibraryStringClassification] = []
+
+    for function, strings in sorted(get_function_strings().items()):
+        for string in strings:
+            for db in dbs:
+                if metadata := db.metadata_by_string.get(string):
+                    matches.append(
+                        LibraryStringClassification(
+                            va=function,
+                            string=string,
+                            library_name=metadata.library_name,
+                            metadata=metadata,
+                        )
+                    )
+
+    # if there are less than N strings per library, ignore that library
+    matches_by_library = create_index(matches, "library_name")
+    for library_name, library_matches in matches_by_library.items():
+        if len(library_matches) > 5:
+            continue
+
+        logger.info("pruning library %s: only %d matched string", library_name, len(library_matches))
+        matches = [m for m in matches if m.library_name != library_name]
+
+    # if there are conflicts within a single function, don't label it
+    matches_by_function = create_index(matches, "va")
+    for va, function_matches in matches_by_function.items():
+        library_names = {m.library_name for m in function_matches}
+        if len(library_names) == 1:
+            continue
+
+        logger.info("conflicting matches: 0x%x: %s", va, sorted(library_names))
+        # this is potentially slow (O(n**2)) but hopefully fast enough in practice.
+        matches = [m for m in matches if m.va != va]
+
+    return matches
--- a/capa/analysis/strings/main.py
+++ b/capa/analysis/strings/main.py
@@ -0,0 +1,130 @@
+# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+import sys
+import logging
+import collections
+from pathlib import Path
+
+import rich
+from rich.text import Text
+
+import capa.analysis.strings
+import capa.features.extractors.strings
+import capa.features.extractors.ida.helpers as ida_helpers
+
+logger = logging.getLogger(__name__)
+
+
+def open_ida(input_path: Path):
+    import tempfile
+
+    import idapro
+
+    t = Path(tempfile.mkdtemp(prefix="ida-")) / input_path.name
+    t.write_bytes(input_path.read_bytes())
+    # resource leak: we should delete this upon exit
+
+    idapro.enable_console_messages(False)
+    idapro.open_database(str(t.absolute()), run_auto_analysis=True)
+
+    import ida_auto
+
+    ida_auto.auto_wait()
+
+
+def main():
+    logging.basicConfig(level=logging.DEBUG)
+
+    # use n=8 to ignore common words
+    N = 8
+
+    input_path = Path(sys.argv[1])
+
+    dbs = capa.analysis.strings.get_default_databases()
+    capa.analysis.strings.prune_databases(dbs, n=N)
+
+    strings_by_library = collections.defaultdict(set)
+    for string in capa.analysis.strings.extract_strings(input_path.read_bytes(), n=N):
+        for db in dbs:
+            if metadata := db.metadata_by_string.get(string.s):
+                strings_by_library[metadata.library_name].add(string.s)
+
+    console = rich.get_console()
+    console.print("found libraries:", style="bold")
+    for library, strings in sorted(strings_by_library.items(), key=lambda p: len(p[1]), reverse=True):
+        console.print(f"  - [b]{library}[/] ({len(strings)} strings)")
+
+        for string in sorted(strings)[:10]:
+            console.print(f"    - {string}", markup=False, style="grey37")
+
+        if len(strings) > 10:
+            console.print("    ...", style="grey37")
+
+    if not strings_by_library:
+        console.print("  (none)", style="grey37")
+        # since we're not going to find any strings
+        # return early and don't do IDA analysis
+        return
+
+    open_ida(input_path)
+
+    import idaapi
+    import idautils
+    import ida_funcs
+
+    strings_by_function = collections.defaultdict(set)
+    for ea in idautils.Functions():
+        f = idaapi.get_func(ea)
+
+        # ignore library functions and thunk functions as identified by IDA
+        if f.flags & idaapi.FUNC_THUNK:
+            continue
+        if f.flags & idaapi.FUNC_LIB:
+            continue
+
+        for bb in ida_helpers.get_function_blocks(f):
+            for insn in ida_helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
+                ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
+                if ref == insn.ea:
+                    continue
+
+                string = capa.features.extractors.ida.helpers.find_string_at(ref)
+                if not string:
+                    continue
+
+                for db in dbs:
+                    if metadata := db.metadata_by_string.get(string):
+                        strings_by_function[ea].add(string)
+
+    # ensure there are at least XXX functions renamed, or ignore those entries
+
+    console.print("functions:", style="bold")
+    for function, strings in sorted(strings_by_function.items()):
+        if strings:
+            name = ida_funcs.get_func_name(function)
+
+            console.print(f"  [b]{name}[/]@{function:08x}:")
+
+            for string in strings:
+                for db in dbs:
+                    if metadata := db.metadata_by_string.get(string):
+                        location = Text(
+                            f"{metadata.library_name}@{metadata.library_version}::{metadata.function_name}",
+                            style="grey37",
+                        )
+                        console.print("    - ", location, ": ", string.rstrip())
+
+    console.print()
+
+    console.print(
+        f"found {len(strings_by_function)} library functions across {len(list(idautils.Functions()))} functions"
+    )
+
+
+if __name__ == "__main__":
+    main()
--- a/capa/analysis/strings/data/crt/msvc_v143.jsonl.gz
+++ b/capa/analysis/strings/data/crt/msvc_v143.jsonl.gz
--- a/capa/analysis/strings/data/oss/.gitignore
+++ b/capa/analysis/strings/data/oss/.gitignore
@@ -0,0 +1,3 @@
+*.csv
+*.jsonl
+*.jsonl.gz
--- a/capa/analysis/strings/data/oss/brotli.jsonl.gz
+++ b/capa/analysis/strings/data/oss/brotli.jsonl.gz
--- a/capa/analysis/strings/data/oss/bzip2.jsonl.gz
+++ b/capa/analysis/strings/data/oss/bzip2.jsonl.gz
--- a/capa/analysis/strings/data/oss/cryptopp.jsonl.gz
+++ b/capa/analysis/strings/data/oss/cryptopp.jsonl.gz
--- a/capa/analysis/strings/data/oss/curl.jsonl.gz
+++ b/capa/analysis/strings/data/oss/curl.jsonl.gz
--- a/capa/analysis/strings/data/oss/detours.jsonl.gz
+++ b/capa/analysis/strings/data/oss/detours.jsonl.gz
--- a/capa/analysis/strings/data/oss/jemalloc.jsonl.gz
+++ b/capa/analysis/strings/data/oss/jemalloc.jsonl.gz
--- a/capa/analysis/strings/data/oss/jsoncpp.jsonl.gz
+++ b/capa/analysis/strings/data/oss/jsoncpp.jsonl.gz
--- a/capa/analysis/strings/data/oss/kcp.jsonl.gz
+++ b/capa/analysis/strings/data/oss/kcp.jsonl.gz
--- a/capa/analysis/strings/data/oss/liblzma.jsonl.gz
+++ b/capa/analysis/strings/data/oss/liblzma.jsonl.gz
--- a/capa/analysis/strings/data/oss/libpcap.jsonl.gz
+++ b/capa/analysis/strings/data/oss/libpcap.jsonl.gz
--- a/capa/analysis/strings/data/oss/libsodium.jsonl.gz
+++ b/capa/analysis/strings/data/oss/libsodium.jsonl.gz
--- a/capa/analysis/strings/data/oss/mbedtls.jsonl.gz
+++ b/capa/analysis/strings/data/oss/mbedtls.jsonl.gz
--- a/capa/analysis/strings/data/oss/openssl.jsonl.gz
+++ b/capa/analysis/strings/data/oss/openssl.jsonl.gz
--- a/capa/analysis/strings/data/oss/readme.md
+++ b/capa/analysis/strings/data/oss/readme.md
@@ -0,0 +1,99 @@
+# Strings from Open Source libraries
+
+This directory contains databases of strings extracted from open soure software. 
+capa uses these databases to ignore functions that are likely library code.
+
+There is one file for each database. Each database is a gzip-compressed, JSONL (one JSON document per line) file.
+The JSON document looks like this:
+
+    string: "1.0.8, 13-Jul-2019"
+    library_name: "bzip2"
+    library_version: "1.0.8#3"
+    file_path: "CMakeFiles/bz2.dir/bzlib.c.obj"
+    function_name: "BZ2_bzlibVersion"
+    line_number: null
+
+The following databases were extracted via the vkpkg & jh technique:
+
+  - brotli 1.0.9#5
+  - bzip2 1.0.8#3
+  - cryptopp 8.7.0
+  - curl 7.86.0#1
+  - detours 4.0.1#7
+  - jemalloc 5.3.0#1
+  - jsoncpp 1.9.5
+  - kcp 1.7
+  - liblzma 5.2.5#6
+  - libsodium 1.0.18#8
+  - libpcap 1.10.1#3
+  - mbedtls 2.28.1
+  - openssl 3.0.7#1
+  - sqlite3 3.40.0#1
+  - tomcrypt 1.18.2#2
+  - wolfssl 5.5.0
+  - zlib 1.2.13
+
+This code was originally developed in FLOSS and imported into capa.
+
+## The vkpkg & jh technique
+
+Major steps:
+
+  1. build static libraries via vcpkg
+  2. extract features via jh
+  3. convert to JSONL format with `jh_to_qs.py`
+  4. compress with gzip
+
+### Build static libraries via vcpkg
+
+[vcpkg](https://vcpkg.io/en/) is a free C/C++ package manager for acquiring and managing libraries.
+We use it to easily build common open source libraries, like zlib.
+Use the triplet `x64-windows-static` to build static archives (.lib files that are AR archives containing COFF object files):
+
+```console
+PS > C:\vcpkg\vcpkg.exe install --triplet x64-windows-static zlib
+```
+
+### Extract features via jh
+
+[jh](https://github.com/williballenthin/lancelot/blob/master/bin/src/bin/jh.rs)
+is a lancelot-based utility that parses AR archives containing COFF object files,
+reconstructs their control flow, finds functions, and extracts features. 
+jh extracts numbers, API calls, and strings; we are only interested in the string features.
+
+For each feature, jh emits a CSV line with the fields 
+  - target triplet
+  - compiler 
+  - library
+  - version
+  - build profile
+  - path
+  - function
+  - feature type
+  - feature value
+
+For example:
+
+```csv
+x64-windows-static,msvc143,bzip2,1.0.8#3,release,CMakeFiles/bz2.dir/bzlib.c.obj,BZ2_bzBuffToBuffCompress,number,0x00000100
+```
+
+For example, to invoke jh:
+
+```console
+$ ~/lancelot/target/release/jh x64-windows-static msvc143 zlib 1.2.13 release /mnt/c/vcpkg/installed/x64-windows-static/lib/zlib.lib > ~/flare-floss/floss/qs/db/data/oss/zlib.csv
+```
+
+### Convert to OSS database format
+
+We use the script `jh_to_qs.py` to convert these CSV lines into JSONL file prepared for FLOSS:
+
+```console
+$ python3 jh_to_qs.py zlib.csv > zlib.jsonl
+```
+
+These files are then gzip'd:
+
+```console
+$  gzip -c zlib.jsonl > zlib.jsonl.gz
+```
--- a/capa/analysis/strings/data/oss/sqlite3.jsonl.gz
+++ b/capa/analysis/strings/data/oss/sqlite3.jsonl.gz
--- a/capa/analysis/strings/data/oss/tomcrypt.jsonl.gz
+++ b/capa/analysis/strings/data/oss/tomcrypt.jsonl.gz
--- a/capa/analysis/strings/data/oss/wolfssl.jsonl.gz
+++ b/capa/analysis/strings/data/oss/wolfssl.jsonl.gz
--- a/capa/analysis/strings/data/oss/zlib.jsonl.gz
+++ b/capa/analysis/strings/data/oss/zlib.jsonl.gz
--- a/capa/analysis/strings/data/winapi/apis.txt.gz
+++ b/capa/analysis/strings/data/winapi/apis.txt.gz
--- a/capa/analysis/strings/data/winapi/dlls.txt.gz
+++ b/capa/analysis/strings/data/winapi/dlls.txt.gz
--- a/capa/capabilities/common.py
+++ b/capa/capabilities/common.py
@@ -9,7 +9,7 @@
 import logging
 import itertools
 import collections
-from typing import Any, Tuple
+from typing import Any

 from capa.rules import Scope, RuleSet
 from capa.engine import FeatureSet, MatchResults
@@ -64,7 +64,7 @@ def has_file_limitation(rules: RuleSet, capabilities: MatchResults, is_standalon

 def find_capabilities(
    ruleset: RuleSet, extractor: FeatureExtractor, disable_progress=None, **kwargs
-) -> Tuple[MatchResults, Any]:
+) -> tuple[MatchResults, Any]:
    from capa.capabilities.static import find_static_capabilities
    from capa.capabilities.dynamic import find_dynamic_capabilities

--- a/capa/capabilities/dynamic.py
+++ b/capa/capabilities/dynamic.py
@@ -9,7 +9,7 @@
 import logging
 import itertools
 import collections
-from typing import Any, List, Tuple
+from typing import Any

 import capa.perf
 import capa.features.freeze as frz
@@ -24,7 +24,7 @@ logger = logging.getLogger(__name__)

 def find_call_capabilities(
    ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
-) -> Tuple[FeatureSet, MatchResults]:
+) -> tuple[FeatureSet, MatchResults]:
    """
    find matches for the given rules for the given call.

@@ -51,7 +51,7 @@ def find_call_capabilities(

 def find_thread_capabilities(
    ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle, th: ThreadHandle
-) -> Tuple[FeatureSet, MatchResults, MatchResults]:
+) -> tuple[FeatureSet, MatchResults, MatchResults]:
    """
    find matches for the given rules within the given thread.

@@ -89,7 +89,7 @@ def find_thread_capabilities(

 def find_process_capabilities(
    ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle
-) -> Tuple[MatchResults, MatchResults, MatchResults, int]:
+) -> tuple[MatchResults, MatchResults, MatchResults, int]:
    """
    find matches for the given rules within the given process.

@@ -127,7 +127,7 @@ def find_process_capabilities(

 def find_dynamic_capabilities(
    ruleset: RuleSet, extractor: DynamicFeatureExtractor, disable_progress=None
-) -> Tuple[MatchResults, Any]:
+) -> tuple[MatchResults, Any]:
    all_process_matches: MatchResults = collections.defaultdict(list)
    all_thread_matches: MatchResults = collections.defaultdict(list)
    all_call_matches: MatchResults = collections.defaultdict(list)
@@ -135,7 +135,7 @@ def find_dynamic_capabilities(
    feature_counts = rdoc.DynamicFeatureCounts(file=0, processes=())

    assert isinstance(extractor, DynamicFeatureExtractor)
-    processes: List[ProcessHandle] = list(extractor.get_processes())
+    processes: list[ProcessHandle] = list(extractor.get_processes())
    n_processes: int = len(processes)

    with capa.helpers.CapaProgressBar(
--- a/capa/capabilities/static.py
+++ b/capa/capabilities/static.py
@@ -10,7 +10,7 @@ import time
 import logging
 import itertools
 import collections
-from typing import Any, List, Tuple
+from typing import Any

 import capa.perf
 import capa.helpers
@@ -26,7 +26,7 @@ logger = logging.getLogger(__name__)

 def find_instruction_capabilities(
    ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
-) -> Tuple[FeatureSet, MatchResults]:
+) -> tuple[FeatureSet, MatchResults]:
    """
    find matches for the given rules for the given instruction.

@@ -53,7 +53,7 @@ def find_instruction_capabilities(

 def find_basic_block_capabilities(
    ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle
-) -> Tuple[FeatureSet, MatchResults, MatchResults]:
+) -> tuple[FeatureSet, MatchResults, MatchResults]:
    """
    find matches for the given rules within the given basic block.

@@ -93,7 +93,7 @@ def find_basic_block_capabilities(

 def find_code_capabilities(
    ruleset: RuleSet, extractor: StaticFeatureExtractor, fh: FunctionHandle
-) -> Tuple[MatchResults, MatchResults, MatchResults, int]:
+) -> tuple[MatchResults, MatchResults, MatchResults, int]:
    """
    find matches for the given rules within the given function.

@@ -131,16 +131,16 @@ def find_code_capabilities(

 def find_static_capabilities(
    ruleset: RuleSet, extractor: StaticFeatureExtractor, disable_progress=None
-) -> Tuple[MatchResults, Any]:
+) -> tuple[MatchResults, Any]:
    all_function_matches: MatchResults = collections.defaultdict(list)
    all_bb_matches: MatchResults = collections.defaultdict(list)
    all_insn_matches: MatchResults = collections.defaultdict(list)

    feature_counts = rdoc.StaticFeatureCounts(file=0, functions=())
-    library_functions: Tuple[rdoc.LibraryFunction, ...] = ()
+    library_functions: tuple[rdoc.LibraryFunction, ...] = ()

    assert isinstance(extractor, StaticFeatureExtractor)
-    functions: List[FunctionHandle] = list(extractor.get_functions())
+    functions: list[FunctionHandle] = list(extractor.get_functions())
    n_funcs: int = len(functions)
    n_libs: int = 0
    percentage: float = 0
--- a/capa/engine.py
+++ b/capa/engine.py
@@ -8,7 +8,7 @@

 import copy
 import collections
-from typing import TYPE_CHECKING, Set, Dict, List, Tuple, Union, Mapping, Iterable, Iterator
+from typing import TYPE_CHECKING, Union, Mapping, Iterable, Iterator

 import capa.perf
 import capa.features.common
@@ -27,7 +27,7 @@ if TYPE_CHECKING:
 # to collect the locations of a feature, do: `features[Number(0x10)]`
 #
 # aliased here so that the type can be documented and xref'd.
-FeatureSet = Dict[Feature, Set[Address]]
+FeatureSet = dict[Feature, set[Address]]


 class Statement:
@@ -94,7 +94,7 @@ class And(Statement):
    match if all of the children evaluate to True.

    the order of evaluation is dictated by the property
-    `And.children` (type: List[Statement|Feature]).
+    `And.children` (type: list[Statement|Feature]).
    a query optimizer may safely manipulate the order of these children.
    """

@@ -127,7 +127,7 @@ class Or(Statement):
    match if any of the children evaluate to True.

    the order of evaluation is dictated by the property
-    `Or.children` (type: List[Statement|Feature]).
+    `Or.children` (type: list[Statement|Feature]).
    a query optimizer may safely manipulate the order of these children.
    """

@@ -176,7 +176,7 @@ class Some(Statement):
    match if at least N of the children evaluate to True.

    the order of evaluation is dictated by the property
-    `Some.children` (type: List[Statement|Feature]).
+    `Some.children` (type: list[Statement|Feature]).
    a query optimizer may safely manipulate the order of these children.
    """

@@ -267,7 +267,7 @@ class Subscope(Statement):
 #         inspect(match_details)
 #
 # aliased here so that the type can be documented and xref'd.
-MatchResults = Mapping[str, List[Tuple[Address, Result]]]
+MatchResults = Mapping[str, list[tuple[Address, Result]]]


 def get_rule_namespaces(rule: "capa.rules.Rule") -> Iterator[str]:
@@ -292,7 +292,7 @@ def index_rule_matches(features: FeatureSet, rule: "capa.rules.Rule", locations:
        features[capa.features.common.MatchedRule(namespace)].update(locations)


-def match(rules: List["capa.rules.Rule"], features: FeatureSet, addr: Address) -> Tuple[FeatureSet, MatchResults]:
+def match(rules: list["capa.rules.Rule"], features: FeatureSet, addr: Address) -> tuple[FeatureSet, MatchResults]:
    """
    match the given rules against the given features,
    returning an updated set of features and the matches.
--- a/capa/features/com/init.py
+++ b/capa/features/com/init.py
@@ -6,7 +6,6 @@
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
 from enum import Enum
-from typing import Dict, List

 from capa.helpers import assert_never

@@ -22,7 +21,7 @@ COM_PREFIXES = {
 }


-def load_com_database(com_type: ComType) -> Dict[str, List[str]]:
+def load_com_database(com_type: ComType) -> dict[str, list[str]]:
    # lazy load these python files since they are so large.
    # that is, don't load them unless a COM feature is being handled.
    import capa.features.com.classes
--- a/capa/features/com/classes.py
+++ b/capa/features/com/classes.py
@@ -5,9 +5,8 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
-from typing import Dict, List

-COM_CLASSES: Dict[str, List[str]] = {
+COM_CLASSES: dict[str, list[str]] = {
    "ClusAppWiz": ["24F97150-6689-11D1-9AA7-00C04FB93A80"],
    "ClusCfgAddNodesWizard": ["BB8D141E-C00A-469F-BC5C-ECD814F0BD74"],
    "ClusCfgCreateClusterWizard": ["B929818E-F5B0-44DC-8A00-1B5F5F5AA1F0"],
--- a/capa/features/com/interfaces.py
+++ b/capa/features/com/interfaces.py
@@ -5,9 +5,8 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
-from typing import Dict, List

-COM_INTERFACES: Dict[str, List[str]] = {
+COM_INTERFACES: dict[str, list[str]] = {
    "IClusterApplicationWizard": ["24F97151-6689-11D1-9AA7-00C04FB93A80"],
    "IWEExtendWizard97": ["97DEDE68-FC6B-11CF-B5F5-00A0C90AB505"],
    "IWCWizard97Callback": ["97DEDE67-FC6B-11CF-B5F5-00A0C90AB505"],
@@ -16334,7 +16333,7 @@ COM_INTERFACES: Dict[str, List[str]] = {
    "IRcsServiceDescription": ["416437de-e78b-44c9-990f-7ede1f2a0c91"],
    "IRcsServiceKindSupportedChangedEventArgs": ["f47ea244-e783-4866-b3a7-4e5ccf023070"],
    "IRcsServiceStatusChangedArgs": ["661ae45a-412a-460d-bdd4-dd8ea3c15583"],
-    "IRcsServiceTuple": ["ce17a39b-2e8b-41af-b5a9-5cb072cc373c"],
+    "IRcsServicetuple": ["ce17a39b-2e8b-41af-b5a9-5cb072cc373c"],
    "IRcsSubscriptionReceivedArgs": ["04eaf06d-42bc-46cc-a637-eeb3a8723fe4"],
    "IRcsTransport": ["fea34759-f37c-4319-8546-ec84d21d30ff"],
    "IRcsTransportConfiguration": ["1fccb102-2472-4bb9-9988-c1211c83e8a9"],
--- a/capa/features/common.py
+++ b/capa/features/common.py
@@ -9,10 +9,9 @@
 import re
 import abc
 import codecs
-import typing
 import logging
 import collections
-from typing import TYPE_CHECKING, Set, Dict, List, Union, Optional
+from typing import TYPE_CHECKING, Union, Optional

 if TYPE_CHECKING:
    # circular import, otherwise
@@ -79,8 +78,8 @@ class Result:
        self,
        success: bool,
        statement: Union["capa.engine.Statement", "Feature"],
-        children: List["Result"],
-        locations: Optional[Set[Address]] = None,
+        children: list["Result"],
+        locations: Optional[set[Address]] = None,
    ):
        super().__init__()
        self.success = success
@@ -213,7 +212,7 @@ class Substring(String):

        # mapping from string value to list of locations.
        # will unique the locations later on.
-        matches: typing.DefaultDict[str, Set[Address]] = collections.defaultdict(set)
+        matches: collections.defaultdict[str, set[Address]] = collections.defaultdict(set)

        assert isinstance(self.value, str)
        for feature, locations in features.items():
@@ -261,7 +260,7 @@ class _MatchedSubstring(Substring):
    note: this type should only ever be constructed by `Substring.evaluate()`. it is not part of the public API.
    """

-    def __init__(self, substring: Substring, matches: Dict[str, Set[Address]]):
+    def __init__(self, substring: Substring, matches: dict[str, set[Address]]):
        """
        args:
          substring: the substring feature that matches.
@@ -305,7 +304,7 @@ class Regex(String):

        # mapping from string value to list of locations.
        # will unique the locations later on.
-        matches: typing.DefaultDict[str, Set[Address]] = collections.defaultdict(set)
+        matches: collections.defaultdict[str, set[Address]] = collections.defaultdict(set)

        for feature, locations in features.items():
            if not isinstance(feature, (String,)):
@@ -353,7 +352,7 @@ class _MatchedRegex(Regex):
    note: this type should only ever be constructed by `Regex.evaluate()`. it is not part of the public API.
    """

-    def __init__(self, regex: Regex, matches: Dict[str, Set[Address]]):
+    def __init__(self, regex: Regex, matches: dict[str, set[Address]]):
        """
        args:
          regex: the regex feature that matches.
@@ -467,6 +466,7 @@ FORMAT_VMRAY = "vmray"
 FORMAT_BINEXPORT2 = "binexport2"
 FORMAT_FREEZE = "freeze"
 FORMAT_RESULT = "result"
+FORMAT_BINJA_DB = "binja_database"
 STATIC_FORMATS = {
    FORMAT_SC32,
    FORMAT_SC64,
@@ -476,6 +476,7 @@ STATIC_FORMATS = {
    FORMAT_FREEZE,
    FORMAT_RESULT,
    FORMAT_BINEXPORT2,
+    FORMAT_BINJA_DB,
 }
 DYNAMIC_FORMATS = {
    FORMAT_CAPE,
--- a/capa/features/extractors/base_extractor.py
+++ b/capa/features/extractors/base_extractor.py
@@ -11,13 +11,9 @@ import hashlib
 import dataclasses
 from copy import copy
 from types import MethodType
-from typing import Any, Set, Dict, Tuple, Union, Iterator
+from typing import Any, Union, Iterator, TypeAlias
 from dataclasses import dataclass

-# TODO(williballenthin): use typing.TypeAlias directly when Python 3.9 is deprecated
-# https://github.com/mandiant/capa/issues/1699
-from typing_extensions import TypeAlias
-
 import capa.features.address
 from capa.features.common import Feature
 from capa.features.address import Address, ThreadAddress, ProcessAddress, DynamicCallAddress, AbsoluteVirtualAddress
@@ -59,7 +55,7 @@ class FunctionHandle:

    address: Address
    inner: Any
-    ctx: Dict[str, Any] = dataclasses.field(default_factory=dict)
+    ctx: dict[str, Any] = dataclasses.field(default_factory=dict)


@dataclass
@@ -135,7 +131,7 @@ class StaticFeatureExtractor:
        return self._sample_hashes

    @abc.abstractmethod
-    def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
+    def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
        """
        extract features found at every scope ("global").

@@ -146,12 +142,12 @@ class StaticFeatureExtractor:
                print('0x%x: %s', va, feature)

        yields:
-          Tuple[Feature, Address]: feature and its location
+          tuple[Feature, Address]: feature and its location
        """
        raise NotImplementedError()

    @abc.abstractmethod
-    def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
+    def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
        """
        extract file-scope features.

@@ -162,7 +158,7 @@ class StaticFeatureExtractor:
                print('0x%x: %s', va, feature)

        yields:
-          Tuple[Feature, Address]: feature and its location
+          tuple[Feature, Address]: feature and its location
        """
        raise NotImplementedError()

@@ -211,7 +207,7 @@ class StaticFeatureExtractor:
        raise KeyError(addr)

    @abc.abstractmethod
-    def extract_function_features(self, f: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
+    def extract_function_features(self, f: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
        """
        extract function-scope features.
        the arguments are opaque values previously provided by `.get_functions()`, etc.
@@ -227,7 +223,7 @@ class StaticFeatureExtractor:
          f [FunctionHandle]: an opaque value previously fetched from `.get_functions()`.

        yields:
-          Tuple[Feature, Address]: feature and its location
+          tuple[Feature, Address]: feature and its location
        """
        raise NotImplementedError()

@@ -240,7 +236,7 @@ class StaticFeatureExtractor:
        raise NotImplementedError()

    @abc.abstractmethod
-    def extract_basic_block_features(self, f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
+    def extract_basic_block_features(self, f: FunctionHandle, bb: BBHandle) -> Iterator[tuple[Feature, Address]]:
        """
        extract basic block-scope features.
        the arguments are opaque values previously provided by `.get_functions()`, etc.
@@ -258,7 +254,7 @@ class StaticFeatureExtractor:
          bb [BBHandle]: an opaque value previously fetched from `.get_basic_blocks()`.

        yields:
-          Tuple[Feature, Address]: feature and its location
+          tuple[Feature, Address]: feature and its location
        """
        raise NotImplementedError()

@@ -273,7 +269,7 @@ class StaticFeatureExtractor:
    @abc.abstractmethod
    def extract_insn_features(
        self, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
-    ) -> Iterator[Tuple[Feature, Address]]:
+    ) -> Iterator[tuple[Feature, Address]]:
        """
        extract instruction-scope features.
        the arguments are opaque values previously provided by `.get_functions()`, etc.
@@ -293,12 +289,12 @@ class StaticFeatureExtractor:
          insn [InsnHandle]: an opaque value previously fetched from `.get_instructions()`.

        yields:
-          Tuple[Feature, Address]: feature and its location
+          tuple[Feature, Address]: feature and its location
        """
        raise NotImplementedError()


-def FunctionFilter(extractor: StaticFeatureExtractor, functions: Set) -> StaticFeatureExtractor:
+def FunctionFilter(extractor: StaticFeatureExtractor, functions: set) -> StaticFeatureExtractor:
    original_get_functions = extractor.get_functions

    def filtered_get_functions(self):
@@ -387,7 +383,7 @@ class DynamicFeatureExtractor:
        return self._sample_hashes

    @abc.abstractmethod
-    def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
+    def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
        """
        extract features found at every scope ("global").

@@ -398,12 +394,12 @@ class DynamicFeatureExtractor:
                print(addr, feature)

        yields:
-          Tuple[Feature, Address]: feature and its location
+          tuple[Feature, Address]: feature and its location
        """
        raise NotImplementedError()

    @abc.abstractmethod
-    def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
+    def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
        """
        extract file-scope features.

@@ -414,7 +410,7 @@ class DynamicFeatureExtractor:
                print(addr, feature)

        yields:
-          Tuple[Feature, Address]: feature and its location
+          tuple[Feature, Address]: feature and its location
        """
        raise NotImplementedError()

@@ -426,7 +422,7 @@ class DynamicFeatureExtractor:
        raise NotImplementedError()

    @abc.abstractmethod
-    def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
+    def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
        """
        Yields all the features of a process. These include:
        - file features of the process' image
@@ -449,7 +445,7 @@ class DynamicFeatureExtractor:
        raise NotImplementedError()

    @abc.abstractmethod
-    def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
+    def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[tuple[Feature, Address]]:
        """
        Yields all the features of a thread. These include:
        - sequenced api traces
@@ -466,7 +462,7 @@ class DynamicFeatureExtractor:
    @abc.abstractmethod
    def extract_call_features(
        self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
-    ) -> Iterator[Tuple[Feature, Address]]:
+    ) -> Iterator[tuple[Feature, Address]]:
        """
        Yields all features of a call. These include:
        - api name
@@ -485,7 +481,7 @@ class DynamicFeatureExtractor:
        raise NotImplementedError()


-def ProcessFilter(extractor: DynamicFeatureExtractor, processes: Set) -> DynamicFeatureExtractor:
+def ProcessFilter(extractor: DynamicFeatureExtractor, processes: set) -> DynamicFeatureExtractor:
    original_get_processes = extractor.get_processes

    def filtered_get_processes(self):
--- a/capa/features/extractors/binexport2/init.py
+++ b/capa/features/extractors/binexport2/init.py
@@ -17,7 +17,7 @@ import io
 import hashlib
 import logging
 import contextlib
-from typing import Set, Dict, List, Tuple, Iterator
+from typing import Iterator
 from pathlib import Path
 from collections import defaultdict
 from dataclasses import dataclass
@@ -51,13 +51,13 @@ def compute_common_prefix_length(m: str, n: str) -> int:
    return len(m)


-def get_sample_from_binexport2(input_file: Path, be2: BinExport2, search_paths: List[Path]) -> Path:
+def get_sample_from_binexport2(input_file: Path, be2: BinExport2, search_paths: list[Path]) -> Path:
    """attempt to find the sample file, given a BinExport2 file.

    searches in the same directory as the BinExport2 file, and then in search_paths.
    """

-    def filename_similarity_key(p: Path) -> Tuple[int, str]:
+    def filename_similarity_key(p: Path) -> tuple[int, str]:
        # note closure over input_file.
        # sort first by length of common prefix, then by name (for stability)
        return (compute_common_prefix_length(p.name, input_file.name), p.name)
@@ -65,7 +65,7 @@ def get_sample_from_binexport2(input_file: Path, be2: BinExport2, search_paths:
    wanted_sha256: str = be2.meta_information.executable_id.lower()

    input_directory: Path = input_file.parent
-    siblings: List[Path] = [p for p in input_directory.iterdir() if p.is_file()]
+    siblings: list[Path] = [p for p in input_directory.iterdir() if p.is_file()]
    siblings.sort(key=filename_similarity_key, reverse=True)
    for sibling in siblings:
        # e.g. with open IDA files in the same directory on Windows
@@ -74,7 +74,7 @@ def get_sample_from_binexport2(input_file: Path, be2: BinExport2, search_paths:
                return sibling

    for search_path in search_paths:
-        candidates: List[Path] = [p for p in search_path.iterdir() if p.is_file()]
+        candidates: list[Path] = [p for p in search_path.iterdir() if p.is_file()]
        candidates.sort(key=filename_similarity_key, reverse=True)
        for candidate in candidates:
            with contextlib.suppress(PermissionError):
@@ -88,27 +88,27 @@ class BinExport2Index:
    def __init__(self, be2: BinExport2):
        self.be2: BinExport2 = be2

-        self.callers_by_vertex_index: Dict[int, List[int]] = defaultdict(list)
-        self.callees_by_vertex_index: Dict[int, List[int]] = defaultdict(list)
+        self.callers_by_vertex_index: dict[int, list[int]] = defaultdict(list)
+        self.callees_by_vertex_index: dict[int, list[int]] = defaultdict(list)

        # note: flow graph != call graph (vertex)
-        self.flow_graph_index_by_address: Dict[int, int] = {}
-        self.flow_graph_address_by_index: Dict[int, int] = {}
+        self.flow_graph_index_by_address: dict[int, int] = {}
+        self.flow_graph_address_by_index: dict[int, int] = {}

        # edges that come from the given basic block
-        self.source_edges_by_basic_block_index: Dict[int, List[BinExport2.FlowGraph.Edge]] = defaultdict(list)
+        self.source_edges_by_basic_block_index: dict[int, list[BinExport2.FlowGraph.Edge]] = defaultdict(list)
        # edges that end up at the given basic block
-        self.target_edges_by_basic_block_index: Dict[int, List[BinExport2.FlowGraph.Edge]] = defaultdict(list)
+        self.target_edges_by_basic_block_index: dict[int, list[BinExport2.FlowGraph.Edge]] = defaultdict(list)

-        self.vertex_index_by_address: Dict[int, int] = {}
+        self.vertex_index_by_address: dict[int, int] = {}

-        self.data_reference_index_by_source_instruction_index: Dict[int, List[int]] = defaultdict(list)
-        self.data_reference_index_by_target_address: Dict[int, List[int]] = defaultdict(list)
-        self.string_reference_index_by_source_instruction_index: Dict[int, List[int]] = defaultdict(list)
+        self.data_reference_index_by_source_instruction_index: dict[int, list[int]] = defaultdict(list)
+        self.data_reference_index_by_target_address: dict[int, list[int]] = defaultdict(list)
+        self.string_reference_index_by_source_instruction_index: dict[int, list[int]] = defaultdict(list)

-        self.insn_address_by_index: Dict[int, int] = {}
-        self.insn_index_by_address: Dict[int, int] = {}
-        self.insn_by_address: Dict[int, BinExport2.Instruction] = {}
+        self.insn_address_by_index: dict[int, int] = {}
+        self.insn_index_by_address: dict[int, int] = {}
+        self.insn_by_address: dict[int, BinExport2.Instruction] = {}

        # must index instructions first
        self._index_insn_addresses()
@@ -208,7 +208,7 @@ class BinExport2Index:

    def basic_block_instructions(
        self, basic_block: BinExport2.BasicBlock
-    ) -> Iterator[Tuple[int, BinExport2.Instruction, int]]:
+    ) -> Iterator[tuple[int, BinExport2.Instruction, int]]:
        """
        For a given basic block, enumerate the instruction indices,
        the instruction instances, and their addresses.
@@ -253,7 +253,7 @@ class BinExport2Analysis:
        self.idx: BinExport2Index = idx
        self.buf: bytes = buf
        self.base_address: int = 0
-        self.thunks: Dict[int, int] = {}
+        self.thunks: dict[int, int] = {}

        self._find_base_address()
        self._compute_thunks()
@@ -279,12 +279,14 @@ class BinExport2Analysis:

            curr_idx: int = idx
            for _ in range(capa.features.common.THUNK_CHAIN_DEPTH_DELTA):
-                thunk_callees: List[int] = self.idx.callees_by_vertex_index[curr_idx]
-                # if this doesn't hold, then it doesn't seem like this is a thunk,
+                thunk_callees: list[int] = self.idx.callees_by_vertex_index[curr_idx]
+                # If this doesn't hold, then it doesn't seem like this is a thunk,
                # because either, len is:
-                #    0 and the thunk doesn't point to anything, or
+                #    0 and the thunk doesn't point to anything or is indirect, like `call eax`, or
                #   >1 and the thunk may end up at many functions.
-                assert len(thunk_callees) == 1, f"thunk @ {hex(addr)} failed"
+                # In any case, this doesn't appear to be the sort of thunk we're looking for.
+                if len(thunk_callees) != 1:
+                    break

                thunked_idx: int = thunk_callees[0]
                thunked_vertex: BinExport2.CallGraph.Vertex = self.be2.call_graph.vertex[thunked_idx]
@@ -324,7 +326,7 @@ class AddressNotMappedError(ReadMemoryError): ...
@dataclass
 class AddressSpace:
    base_address: int
-    memory_regions: Tuple[MemoryRegion, ...]
+    memory_regions: tuple[MemoryRegion, ...]

    def read_memory(self, address: int, length: int) -> bytes:
        rva: int = address - self.base_address
@@ -337,7 +339,7 @@ class AddressSpace:

    @classmethod
    def from_pe(cls, pe: PE, base_address: int):
-        regions: List[MemoryRegion] = []
+        regions: list[MemoryRegion] = []
        for section in pe.sections:
            address: int = section.VirtualAddress
            size: int = section.Misc_VirtualSize
@@ -355,7 +357,7 @@ class AddressSpace:

    @classmethod
    def from_elf(cls, elf: ELFFile, base_address: int):
-        regions: List[MemoryRegion] = []
+        regions: list[MemoryRegion] = []

        # ELF segments are for runtime data,
        # ELF sections are for link-time data.
@@ -401,9 +403,9 @@ class AnalysisContext:
 class FunctionContext:
    ctx: AnalysisContext
    flow_graph_index: int
-    format: Set[str]
-    os: Set[str]
-    arch: Set[str]
+    format: set[str]
+    os: set[str]
+    arch: set[str]


@dataclass
--- a/capa/features/extractors/binexport2/arch/arm/insn.py
+++ b/capa/features/extractors/binexport2/arch/arm/insn.py
@@ -6,7 +6,7 @@
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
 import logging
-from typing import List, Tuple, Iterator, Optional
+from typing import Iterator, Optional

 import capa.features.extractors.binexport2.helpers
 from capa.features.insn import MAX_STRUCTURE_SIZE, Number, Offset, OperandNumber, OperandOffset
@@ -30,7 +30,7 @@ logger = logging.getLogger(__name__)

 def extract_insn_number_features(
    fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    fhi: FunctionContext = fh.inner
    ii: InstructionContext = ih.inner

@@ -91,7 +91,7 @@ OFFSET_PATTERNS = BinExport2InstructionPatternMatcher.from_str(

 def extract_insn_offset_features(
    fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    fhi: FunctionContext = fh.inner
    ii: InstructionContext = ih.inner

@@ -120,7 +120,7 @@ NZXOR_PATTERNS = BinExport2InstructionPatternMatcher.from_str(

 def extract_insn_nzxor_characteristic_features(
    fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    fhi: FunctionContext = fh.inner
    ii: InstructionContext = ih.inner
    be2: BinExport2 = fhi.ctx.be2
@@ -131,7 +131,7 @@ def extract_insn_nzxor_characteristic_features(
    instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index]
    # guaranteed to be simple int/reg operands
    # so we don't have to realize the tree/list.
-    operands: List[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index]
+    operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index]

    if operands[1] != operands[2]:
        yield Characteristic("nzxor"), ih.address
@@ -146,7 +146,7 @@ INDIRECT_CALL_PATTERNS = BinExport2InstructionPatternMatcher.from_str(

 def extract_function_indirect_call_characteristic_features(
    fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    fhi: FunctionContext = fh.inner
    ii: InstructionContext = ih.inner
    be2: BinExport2 = fhi.ctx.be2
--- a/capa/features/extractors/binexport2/arch/intel/helpers.py
+++ b/capa/features/extractors/binexport2/arch/intel/helpers.py
@@ -5,7 +5,7 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
-from typing import List, Optional
+from typing import Optional
 from dataclasses import dataclass

 from capa.features.extractors.binexport2.helpers import get_operand_expressions
@@ -32,7 +32,7 @@ def get_operand_phrase_info(be2: BinExport2, operand: BinExport2.Operand) -> Opt
    # Base: Any general purpose register
    # Displacement: An integral offset

-    expressions: List[BinExport2.Expression] = get_operand_expressions(be2, operand)
+    expressions: list[BinExport2.Expression] = get_operand_expressions(be2, operand)

    # skip expression up to and including BinExport2.Expression.DEREFERENCE, assume caller
    # has checked for BinExport2.Expression.DEREFERENCE
--- a/capa/features/extractors/binexport2/arch/intel/insn.py
+++ b/capa/features/extractors/binexport2/arch/intel/insn.py
@@ -6,7 +6,7 @@
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
 import logging
-from typing import List, Tuple, Iterator
+from typing import Iterator

 import capa.features.extractors.strings
 import capa.features.extractors.binexport2.helpers
@@ -63,7 +63,7 @@ NUMBER_PATTERNS = BinExport2InstructionPatternMatcher.from_str(

 def extract_insn_number_features(
    fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    fhi: FunctionContext = fh.inner
    ii: InstructionContext = ih.inner

@@ -123,7 +123,7 @@ OFFSET_ZERO_PATTERNS = BinExport2InstructionPatternMatcher.from_str(

 def extract_insn_offset_features(
    fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    fhi: FunctionContext = fh.inner
    ii: InstructionContext = ih.inner

@@ -161,7 +161,7 @@ def is_security_cookie(

    # security cookie check should use SP or BP
    op1: BinExport2.Operand = be2.operand[instruction.operand_index[1]]
-    op1_exprs: List[BinExport2.Expression] = [be2.expression[expr_i] for expr_i in op1.expression_index]
+    op1_exprs: list[BinExport2.Expression] = [be2.expression[expr_i] for expr_i in op1.expression_index]
    if all(expr.symbol.lower() not in ("bp", "esp", "ebp", "rbp", "rsp") for expr in op1_exprs):
        return False

@@ -192,7 +192,7 @@ NZXOR_PATTERNS = BinExport2InstructionPatternMatcher.from_str(

 def extract_insn_nzxor_characteristic_features(
    fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    """
    parse non-zeroing XOR instruction from the given instruction.
    ignore expected non-zeroing XORs, e.g. security cookies.
@@ -209,7 +209,7 @@ def extract_insn_nzxor_characteristic_features(
    instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index]
    # guaranteed to be simple int/reg operands
    # so we don't have to realize the tree/list.
-    operands: List[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index]
+    operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index]

    if operands[0] == operands[1]:
        return
@@ -236,7 +236,7 @@ INDIRECT_CALL_PATTERNS = BinExport2InstructionPatternMatcher.from_str(

 def extract_function_indirect_call_characteristic_features(
    fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    fhi: FunctionContext = fh.inner
    ii: InstructionContext = ih.inner
    be2: BinExport2 = fhi.ctx.be2
--- a/capa/features/extractors/binexport2/basicblock.py
+++ b/capa/features/extractors/binexport2/basicblock.py
@@ -6,7 +6,7 @@
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.

-from typing import List, Tuple, Iterator
+from typing import Iterator

 from capa.features.common import Feature, Characteristic
 from capa.features.address import Address, AbsoluteVirtualAddress
@@ -16,20 +16,20 @@ from capa.features.extractors.base_extractor import BBHandle, FunctionHandle
 from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2


-def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
    fhi: FunctionContext = fh.inner
    bbi: BasicBlockContext = bbh.inner

    idx = fhi.ctx.idx

    basic_block_index: int = bbi.basic_block_index
-    target_edges: List[BinExport2.FlowGraph.Edge] = idx.target_edges_by_basic_block_index[basic_block_index]
+    target_edges: list[BinExport2.FlowGraph.Edge] = idx.target_edges_by_basic_block_index[basic_block_index]
    if basic_block_index in (e.source_basic_block_index for e in target_edges):
        basic_block_address: int = idx.get_basic_block_address(basic_block_index)
        yield Characteristic("tight loop"), AbsoluteVirtualAddress(basic_block_address)


-def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
    """extract basic block features"""
    for bb_handler in BASIC_BLOCK_HANDLERS:
        for feature, addr in bb_handler(fh, bbh):
--- a/capa/features/extractors/binexport2/extractor.py
+++ b/capa/features/extractors/binexport2/extractor.py
@@ -6,7 +6,7 @@
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
 import logging
-from typing import Set, List, Tuple, Iterator
+from typing import Iterator

 import capa.features.extractors.elf
 import capa.features.extractors.common
@@ -48,14 +48,14 @@ class BinExport2FeatureExtractor(StaticFeatureExtractor):
        address_space: AddressSpace = AddressSpace.from_buf(buf, self.analysis.base_address)
        self.ctx: AnalysisContext = AnalysisContext(self.buf, self.be2, self.idx, self.analysis, address_space)

-        self.global_features: List[Tuple[Feature, Address]] = []
+        self.global_features: list[tuple[Feature, Address]] = []
        self.global_features.extend(list(capa.features.extractors.common.extract_format(self.buf)))
        self.global_features.extend(list(capa.features.extractors.common.extract_os(self.buf)))
        self.global_features.extend(list(capa.features.extractors.common.extract_arch(self.buf)))

-        self.format: Set[str] = set()
-        self.os: Set[str] = set()
-        self.arch: Set[str] = set()
+        self.format: set[str] = set()
+        self.os: set[str] = set()
+        self.arch: set[str] = set()

        for feature, _ in self.global_features:
            assert isinstance(feature.value, str)
@@ -72,10 +72,10 @@ class BinExport2FeatureExtractor(StaticFeatureExtractor):
    def get_base_address(self) -> AbsoluteVirtualAddress:
        return AbsoluteVirtualAddress(self.analysis.base_address)

-    def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
+    def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
        yield from self.global_features

-    def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
+    def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
        yield from capa.features.extractors.binexport2.file.extract_features(self.be2, self.buf)

    def get_functions(self) -> Iterator[FunctionHandle]:
@@ -97,7 +97,7 @@ class BinExport2FeatureExtractor(StaticFeatureExtractor):
                inner=FunctionContext(self.ctx, flow_graph_index, self.format, self.os, self.arch),
            )

-    def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
+    def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
        yield from capa.features.extractors.binexport2.function.extract_features(fh)

    def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
@@ -112,7 +112,7 @@ class BinExport2FeatureExtractor(StaticFeatureExtractor):
                inner=BasicBlockContext(basic_block_index),
            )

-    def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
+    def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
        yield from capa.features.extractors.binexport2.basicblock.extract_features(fh, bbh)

    def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
@@ -126,5 +126,5 @@ class BinExport2FeatureExtractor(StaticFeatureExtractor):

    def extract_insn_features(
        self, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
-    ) -> Iterator[Tuple[Feature, Address]]:
+    ) -> Iterator[tuple[Feature, Address]]:
        yield from capa.features.extractors.binexport2.insn.extract_features(fh, bbh, ih)
--- a/capa/features/extractors/binexport2/file.py
+++ b/capa/features/extractors/binexport2/file.py
@@ -7,7 +7,7 @@
 # See the License for the specific language governing permissions and limitations under the License.
 import io
 import logging
-from typing import Tuple, Iterator
+from typing import Iterator

 import pefile
 from elftools.elf.elffile import ELFFile
@@ -23,7 +23,7 @@ from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
 logger = logging.getLogger(__name__)


-def extract_file_export_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
+def extract_file_export_names(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
    if buf.startswith(capa.features.extractors.common.MATCH_PE):
        pe: pefile.PE = pefile.PE(data=buf)
        yield from capa.features.extractors.pefile.extract_file_export_names(pe)
@@ -34,7 +34,7 @@ def extract_file_export_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Fe
        logger.warning("unsupported format")


-def extract_file_import_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
+def extract_file_import_names(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
    if buf.startswith(capa.features.extractors.common.MATCH_PE):
        pe: pefile.PE = pefile.PE(data=buf)
        yield from capa.features.extractors.pefile.extract_file_import_names(pe)
@@ -45,7 +45,7 @@ def extract_file_import_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Fe
        logger.warning("unsupported format")


-def extract_file_section_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
+def extract_file_section_names(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
    if buf.startswith(capa.features.extractors.common.MATCH_PE):
        pe: pefile.PE = pefile.PE(data=buf)
        yield from capa.features.extractors.pefile.extract_file_section_names(pe)
@@ -56,15 +56,15 @@ def extract_file_section_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[F
        logger.warning("unsupported format")


-def extract_file_strings(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
+def extract_file_strings(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
    yield from capa.features.extractors.common.extract_file_strings(buf)


-def extract_file_format(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
+def extract_file_format(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
    yield from capa.features.extractors.common.extract_format(buf)


-def extract_features(be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
+def extract_features(be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
    """extract file features"""
    for file_handler in FILE_HANDLERS:
        for feature, addr in file_handler(be2, buf):
--- a/capa/features/extractors/binexport2/function.py
+++ b/capa/features/extractors/binexport2/function.py
@@ -5,7 +5,7 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
-from typing import List, Tuple, Iterator
+from typing import Iterator

 from capa.features.file import FunctionName
 from capa.features.common import Feature, Characteristic
@@ -16,7 +16,7 @@ from capa.features.extractors.base_extractor import FunctionHandle
 from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2


-def extract_function_calls_to(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_function_calls_to(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
    fhi: FunctionContext = fh.inner

    be2: BinExport2 = fhi.ctx.be2
@@ -32,7 +32,7 @@ def extract_function_calls_to(fh: FunctionHandle) -> Iterator[Tuple[Feature, Add
        yield Characteristic("calls to"), AbsoluteVirtualAddress(caller_address)


-def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_function_loop(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
    fhi: FunctionContext = fh.inner

    be2: BinExport2 = fhi.ctx.be2
@@ -40,7 +40,7 @@ def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address
    flow_graph_index: int = fhi.flow_graph_index
    flow_graph: BinExport2.FlowGraph = be2.flow_graph[flow_graph_index]

-    edges: List[Tuple[int, int]] = []
+    edges: list[tuple[int, int]] = []
    for edge in flow_graph.edge:
        edges.append((edge.source_basic_block_index, edge.target_basic_block_index))

@@ -48,7 +48,7 @@ def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address
        yield Characteristic("loop"), fh.address


-def extract_function_name(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_function_name(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
    fhi: FunctionContext = fh.inner

    be2: BinExport2 = fhi.ctx.be2
@@ -63,7 +63,7 @@ def extract_function_name(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address
        yield FunctionName(vertex.mangled_name), fh.address


-def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
    for func_handler in FUNCTION_HANDLERS:
        for feature, addr in func_handler(fh):
            yield feature, addr
--- a/capa/features/extractors/binexport2/helpers.py
+++ b/capa/features/extractors/binexport2/helpers.py
@@ -6,7 +6,7 @@
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
 import re
-from typing import Set, Dict, List, Tuple, Union, Iterator, Optional
+from typing import Union, Iterator, Optional
 from collections import defaultdict
 from dataclasses import dataclass

@@ -22,7 +22,7 @@ HAS_ARCH_INTEL = {ARCH_I386, ARCH_AMD64}
 HAS_ARCH_ARM = {ARCH_AARCH64}


-def mask_immediate(arch: Set[str], immediate: int) -> int:
+def mask_immediate(arch: set[str], immediate: int) -> int:
    if arch & HAS_ARCH64:
        immediate &= 0xFFFFFFFFFFFFFFFF
    elif arch & HAS_ARCH32:
@@ -30,7 +30,7 @@ def mask_immediate(arch: Set[str], immediate: int) -> int:
    return immediate


-def twos_complement(arch: Set[str], immediate: int, default: Optional[int] = None) -> int:
+def twos_complement(arch: set[str], immediate: int, default: Optional[int] = None) -> int:
    if default is not None:
        return capa.features.extractors.helpers.twos_complement(immediate, default)
    elif arch & HAS_ARCH64:
@@ -50,17 +50,36 @@ def is_vertex_type(vertex: BinExport2.CallGraph.Vertex, type_: BinExport2.CallGr
    return vertex.HasField("type") and vertex.type == type_


+# internal to `build_expression_tree`
+# this is unstable: it is subject to change, so don't rely on it!
+def _prune_expression_tree_references_to_tree_index(
+    expression_tree: list[list[int]],
+    tree_index: int,
+):
+    # `i` is the index of the tree node that we'll search for `tree_index`
+    # if we remove `tree_index` from it, and it is now empty,
+    # then we'll need to prune references to `i`.
+    for i, tree_node in enumerate(expression_tree):
+        if tree_index in tree_node:
+            tree_node.remove(tree_index)
+
+            if len(tree_node) == 0:
+                # if the parent node is now empty,
+                # remove references to that parent node.
+                _prune_expression_tree_references_to_tree_index(expression_tree, i)
+
+
 # internal to `build_expression_tree`
 # this is unstable: it is subject to change, so don't rely on it!
 def _prune_expression_tree_empty_shifts(
    be2: BinExport2,
    operand: BinExport2.Operand,
-    expression_tree: List[List[int]],
+    expression_tree: list[list[int]],
    tree_index: int,
 ):
    expression_index = operand.expression_index[tree_index]
    expression = be2.expression[expression_index]
-    children_tree_indexes: List[int] = expression_tree[tree_index]
+    children_tree_indexes: list[int] = expression_tree[tree_index]

    if expression.type == BinExport2.Expression.OPERATOR:
        if len(children_tree_indexes) == 0 and expression.symbol in ("lsl", "lsr"):
@@ -70,9 +89,7 @@ def _prune_expression_tree_empty_shifts(
            #
            # Which seems to be as if the shift wasn't there (shift of #0)
            # so we want to remove references to this node from any parent nodes.
-            for tree_node in expression_tree:
-                if tree_index in tree_node:
-                    tree_node.remove(tree_index)
+            _prune_expression_tree_references_to_tree_index(expression_tree, tree_index)

            return

@@ -82,38 +99,37 @@ def _prune_expression_tree_empty_shifts(

 # internal to `build_expression_tree`
 # this is unstable: it is subject to change, so don't rely on it!
-def _prune_expression_tree_empty_commas(
+def _fixup_expression_tree_references_to_tree_index(
+    expression_tree: list[list[int]],
+    existing_index: int,
+    new_index: int,
+):
+    for tree_node in expression_tree:
+        for i, index in enumerate(tree_node):
+            if index == existing_index:
+                tree_node[i] = new_index
+
+
+# internal to `build_expression_tree`
+# this is unstable: it is subject to change, so don't rely on it!
+def _fixup_expression_tree_lonely_commas(
    be2: BinExport2,
    operand: BinExport2.Operand,
-    expression_tree: List[List[int]],
+    expression_tree: list[list[int]],
    tree_index: int,
 ):
    expression_index = operand.expression_index[tree_index]
    expression = be2.expression[expression_index]
-    children_tree_indexes: List[int] = expression_tree[tree_index]
+    children_tree_indexes: list[int] = expression_tree[tree_index]

    if expression.type == BinExport2.Expression.OPERATOR:
        if len(children_tree_indexes) == 1 and expression.symbol == ",":
-            # Due to the above pruning of empty LSL or LSR expressions,
-            # the parents might need to be fixed up.
-            #
-            # Specifically, if the pruned node was part of a comma list with two children,
-            # now there's only a single child, which renders as an extra comma,
-            # so we replace references to the comma node with the immediate child.
-            #
-            # A more correct way of doing this might be to walk up the parents and do fixups,
-            # but I'm not quite sure how to do this yet. Just do two passes right now.
-            child = children_tree_indexes[0]
-
-            for tree_node in expression_tree:
-                tree_node.index
-                if tree_index in tree_node:
-                    tree_node[tree_node.index(tree_index)] = child
-
-            return
+            existing_index = tree_index
+            new_index = children_tree_indexes[0]
+            _fixup_expression_tree_references_to_tree_index(expression_tree, existing_index, new_index)

    for child_tree_index in children_tree_indexes:
-        _prune_expression_tree_empty_commas(be2, operand, expression_tree, child_tree_index)
+        _fixup_expression_tree_lonely_commas(be2, operand, expression_tree, child_tree_index)


 # internal to `build_expression_tree`
@@ -121,17 +137,17 @@ def _prune_expression_tree_empty_commas(
 def _prune_expression_tree(
    be2: BinExport2,
    operand: BinExport2.Operand,
-    expression_tree: List[List[int]],
+    expression_tree: list[list[int]],
 ):
    _prune_expression_tree_empty_shifts(be2, operand, expression_tree, 0)
-    _prune_expression_tree_empty_commas(be2, operand, expression_tree, 0)
+    _fixup_expression_tree_lonely_commas(be2, operand, expression_tree, 0)


 # this is unstable: it is subject to change, so don't rely on it!
 def _build_expression_tree(
    be2: BinExport2,
    operand: BinExport2.Operand,
-) -> List[List[int]]:
+) -> list[list[int]]:
    # The reconstructed expression tree layout, linking parent nodes to their children.
    #
    # There is one list of integers for each expression in the operand.
@@ -159,7 +175,7 @@ def _build_expression_tree(
        # exist (see https://github.com/NationalSecurityAgency/ghidra/issues/6817)
        return []

-    tree: List[List[int]] = []
+    tree: list[list[int]] = []
    for i, expression_index in enumerate(operand.expression_index):
        children = []

@@ -173,7 +189,6 @@ def _build_expression_tree(
        tree.append(children)

    _prune_expression_tree(be2, operand, tree)
-    _prune_expression_tree(be2, operand, tree)

    return tree

@@ -181,21 +196,34 @@ def _build_expression_tree(
 def _fill_operand_expression_list(
    be2: BinExport2,
    operand: BinExport2.Operand,
-    expression_tree: List[List[int]],
+    expression_tree: list[list[int]],
    tree_index: int,
-    expression_list: List[BinExport2.Expression],
+    expression_list: list[BinExport2.Expression],
 ):
    """
    Walk the given expression tree and collect the expression nodes in-order.
    """
    expression_index = operand.expression_index[tree_index]
    expression = be2.expression[expression_index]
-    children_tree_indexes: List[int] = expression_tree[tree_index]
+    children_tree_indexes: list[int] = expression_tree[tree_index]

    if expression.type == BinExport2.Expression.REGISTER:
-        assert len(children_tree_indexes) == 0
+        assert len(children_tree_indexes) <= 1
        expression_list.append(expression)
-        return
+
+        if len(children_tree_indexes) == 0:
+            return
+        elif len(children_tree_indexes) == 1:
+            # like for aarch64 with vector instructions, indicating vector data size:
+            #
+            #     FADD V0.4S, V1.4S, V2.4S
+            #
+            # see: https://github.com/mandiant/capa/issues/2528
+            child_index = children_tree_indexes[0]
+            _fill_operand_expression_list(be2, operand, expression_tree, child_index, expression_list)
+            return
+        else:
+            raise NotImplementedError(len(children_tree_indexes))

    elif expression.type == BinExport2.Expression.SYMBOL:
        assert len(children_tree_indexes) <= 1
@@ -218,9 +246,23 @@ def _fill_operand_expression_list(
            raise NotImplementedError(len(children_tree_indexes))

    elif expression.type == BinExport2.Expression.IMMEDIATE_INT:
-        assert len(children_tree_indexes) == 0
+        assert len(children_tree_indexes) <= 1
        expression_list.append(expression)
-        return
+
+        if len(children_tree_indexes) == 0:
+            return
+        elif len(children_tree_indexes) == 1:
+            # the ghidra exporter can produce some weird expressions,
+            # particularly for MSRs, like for:
+            #
+            #     sreg(3, 0, c.0, c.4, 4)
+            #
+            # see: https://github.com/mandiant/capa/issues/2530
+            child_index = children_tree_indexes[0]
+            _fill_operand_expression_list(be2, operand, expression_tree, child_index, expression_list)
+            return
+        else:
+            raise NotImplementedError(len(children_tree_indexes))

    elif expression.type == BinExport2.Expression.SIZE_PREFIX:
        # like: b4
@@ -282,10 +324,10 @@ def _fill_operand_expression_list(
        raise NotImplementedError(expression.type)


-def get_operand_expressions(be2: BinExport2, op: BinExport2.Operand) -> List[BinExport2.Expression]:
+def get_operand_expressions(be2: BinExport2, op: BinExport2.Operand) -> list[BinExport2.Expression]:
    tree = _build_expression_tree(be2, op)

-    expressions: List[BinExport2.Expression] = []
+    expressions: list[BinExport2.Expression] = []
    _fill_operand_expression_list(be2, op, tree, 0, expressions)

    return expressions
@@ -331,11 +373,11 @@ def get_instruction_mnemonic(be2: BinExport2, instruction: BinExport2.Instructio
    return be2.mnemonic[instruction.mnemonic_index].name.lower()


-def get_instruction_operands(be2: BinExport2, instruction: BinExport2.Instruction) -> List[BinExport2.Operand]:
+def get_instruction_operands(be2: BinExport2, instruction: BinExport2.Instruction) -> list[BinExport2.Operand]:
    return [be2.operand[operand_index] for operand_index in instruction.operand_index]


-def split_with_delimiters(s: str, delimiters: Tuple[str, ...]) -> Iterator[str]:
+def split_with_delimiters(s: str, delimiters: tuple[str, ...]) -> Iterator[str]:
    """
    Splits a string by any of the provided delimiter characters,
    including the delimiters in the results.
@@ -355,7 +397,7 @@ def split_with_delimiters(s: str, delimiters: Tuple[str, ...]) -> Iterator[str]:
        yield s[start:]


-BinExport2OperandPattern = Union[str, Tuple[str, ...]]
+BinExport2OperandPattern = Union[str, tuple[str, ...]]


@dataclass
@@ -382,8 +424,8 @@ class BinExport2InstructionPattern:
    This matcher uses the BinExport2 data layout under the hood.
    """

-    mnemonics: Tuple[str, ...]
-    operands: Tuple[Union[str, BinExport2OperandPattern], ...]
+    mnemonics: tuple[str, ...]
+    operands: tuple[Union[str, BinExport2OperandPattern], ...]
    capture: Optional[str]

    @classmethod
@@ -438,7 +480,7 @@ class BinExport2InstructionPattern:
        mnemonic, _, rest = pattern.partition(" ")
        mnemonics = mnemonic.split("|")

-        operands: List[Union[str, Tuple[str, ...]]] = []
+        operands: list[Union[str, tuple[str, ...]]] = []
        while rest:
            rest = rest.strip()
            if not rest.startswith("["):
@@ -509,7 +551,7 @@ class BinExport2InstructionPattern:
        expression: BinExport2.Expression

    def match(
-        self, mnemonic: str, operand_expressions: List[List[BinExport2.Expression]]
+        self, mnemonic: str, operand_expressions: list[list[BinExport2.Expression]]
    ) -> Optional["BinExport2InstructionPattern.MatchResult"]:
        """
        Match the given BinExport2 data against this pattern.
@@ -602,10 +644,10 @@ class BinExport2InstructionPattern:
 class BinExport2InstructionPatternMatcher:
    """Index and match a collection of instruction patterns."""

-    def __init__(self, queries: List[BinExport2InstructionPattern]):
+    def __init__(self, queries: list[BinExport2InstructionPattern]):
        self.queries = queries
        # shard the patterns by (mnemonic, #operands)
-        self._index: Dict[Tuple[str, int], List[BinExport2InstructionPattern]] = defaultdict(list)
+        self._index: dict[tuple[str, int], list[BinExport2InstructionPattern]] = defaultdict(list)

        for query in queries:
            for mnemonic in query.mnemonics:
@@ -623,7 +665,7 @@ class BinExport2InstructionPatternMatcher:
        )

    def match(
-        self, mnemonic: str, operand_expressions: List[List[BinExport2.Expression]]
+        self, mnemonic: str, operand_expressions: list[list[BinExport2.Expression]]
    ) -> Optional[BinExport2InstructionPattern.MatchResult]:
        queries = self._index.get((mnemonic.lower(), len(operand_expressions)), [])
        for query in queries:
--- a/capa/features/extractors/binexport2/insn.py
+++ b/capa/features/extractors/binexport2/insn.py
@@ -6,7 +6,7 @@
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
 import logging
-from typing import List, Tuple, Iterator
+from typing import Iterator

 import capa.features.extractors.helpers
 import capa.features.extractors.strings
@@ -32,7 +32,7 @@ from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
 logger = logging.getLogger(__name__)


-def extract_insn_api_features(fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_insn_api_features(fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
    fhi: FunctionContext = fh.inner
    ii: InstructionContext = ih.inner

@@ -68,7 +68,7 @@ def extract_insn_api_features(fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle

 def extract_insn_number_features(
    fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    fhi: FunctionContext = fh.inner

    if fhi.arch & HAS_ARCH_INTEL:
@@ -77,7 +77,7 @@ def extract_insn_number_features(
        yield from capa.features.extractors.binexport2.arch.arm.insn.extract_insn_number_features(fh, bbh, ih)


-def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
    fhi: FunctionContext = fh.inner
    ii: InstructionContext = ih.inner

@@ -92,7 +92,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
        # disassembler already identified string reference from instruction
        return

-    reference_addresses: List[int] = []
+    reference_addresses: list[int] = []

    if instruction_index in idx.data_reference_index_by_source_instruction_index:
        for data_reference_index in idx.data_reference_index_by_source_instruction_index[instruction_index]:
@@ -142,7 +142,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl

 def extract_insn_string_features(
    fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    fhi: FunctionContext = fh.inner
    ii: InstructionContext = ih.inner

@@ -161,7 +161,7 @@ def extract_insn_string_features(

 def extract_insn_offset_features(
    fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    fhi: FunctionContext = fh.inner

    if fhi.arch & HAS_ARCH_INTEL:
@@ -172,7 +172,7 @@ def extract_insn_offset_features(

 def extract_insn_nzxor_characteristic_features(
    fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    fhi: FunctionContext = fh.inner

    if fhi.arch & HAS_ARCH_INTEL:
@@ -187,7 +187,7 @@ def extract_insn_nzxor_characteristic_features(

 def extract_insn_mnemonic_features(
    fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    fhi: FunctionContext = fh.inner
    ii: InstructionContext = ih.inner

@@ -199,7 +199,7 @@ def extract_insn_mnemonic_features(
    yield Mnemonic(mnemonic_name), ih.address


-def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
    """extract functions calls from features

    most relevant at the function scope;
@@ -221,7 +221,7 @@ def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl

 def extract_function_indirect_call_characteristic_features(
    fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    fhi: FunctionContext = fh.inner

    if fhi.arch & HAS_ARCH_INTEL:
@@ -234,7 +234,7 @@ def extract_function_indirect_call_characteristic_features(
        )


-def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[tuple[Feature, Address]]:
    """extract instruction features"""
    for inst_handler in INSTRUCTION_HANDLERS:
        for feature, ea in inst_handler(f, bbh, insn):
--- a/capa/features/extractors/binja/basicblock.py
+++ b/capa/features/extractors/binja/basicblock.py
@@ -5,115 +5,25 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
+from typing import Iterator

-import string
-from typing import Tuple, Iterator
-
-from binaryninja import Function
 from binaryninja import BasicBlock as BinjaBasicBlock
-from binaryninja import (
-    BinaryView,
-    SymbolType,
-    RegisterValueType,
-    VariableSourceType,
-    MediumLevelILOperation,
-    MediumLevelILBasicBlock,
-    MediumLevelILInstruction,
-)

 from capa.features.common import Feature, Characteristic
 from capa.features.address import Address
 from capa.features.basicblock import BasicBlock
-from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
 from capa.features.extractors.base_extractor import BBHandle, FunctionHandle


-def get_printable_len_ascii(s: bytes) -> int:
-    """Return string length if all operand bytes are ascii or utf16-le printable"""
-    count = 0
-    for c in s:
-        if c == 0:
-            return count
-        if c < 127 and chr(c) in string.printable:
-            count += 1
-    return count
-
-
-def get_printable_len_wide(s: bytes) -> int:
-    """Return string length if all operand bytes are ascii or utf16-le printable"""
-    if all(c == 0x00 for c in s[1::2]):
-        return get_printable_len_ascii(s[::2])
-    return 0
-
-
-def get_stack_string_len(f: Function, il: MediumLevelILInstruction) -> int:
-    bv: BinaryView = f.view
-
-    if il.operation != MediumLevelILOperation.MLIL_CALL:
-        return 0
-
-    target = il.dest
-    if target.operation not in [MediumLevelILOperation.MLIL_CONST, MediumLevelILOperation.MLIL_CONST_PTR]:
-        return 0
-
-    addr = target.value.value
-    sym = bv.get_symbol_at(addr)
-    if not sym or sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.SymbolicFunctionSymbol]:
-        return 0
-
-    if sym.name not in ["__builtin_strncpy", "__builtin_strcpy", "__builtin_wcscpy"]:
-        return 0
-
-    if len(il.params) < 2:
-        return 0
-
-    dest = il.params[0]
-    if dest.operation in [MediumLevelILOperation.MLIL_ADDRESS_OF, MediumLevelILOperation.MLIL_VAR]:
-        var = dest.src
-    else:
-        return 0
-
-    if var.source_type != VariableSourceType.StackVariableSourceType:
-        return 0
-
-    src = il.params[1]
-    if src.value.type != RegisterValueType.ConstantDataAggregateValue:
-        return 0
-
-    s = f.get_constant_data(RegisterValueType.ConstantDataAggregateValue, src.value.value)
-    return max(get_printable_len_ascii(bytes(s)), get_printable_len_wide(bytes(s)))
-
-
-def bb_contains_stackstring(f: Function, bb: MediumLevelILBasicBlock) -> bool:
-    """check basic block for stackstring indicators
-
-    true if basic block contains enough moves of constant bytes to the stack
-    """
-    count = 0
-    for il in bb:
-        count += get_stack_string_len(f, il)
-        if count > MIN_STACKSTRING_LEN:
-            return True
-
-    return False
-
-
-def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
-    """extract stackstring indicators from basic block"""
-    bb: Tuple[BinjaBasicBlock, MediumLevelILBasicBlock] = bbh.inner
-    if bb[1] is not None and bb_contains_stackstring(fh.inner, bb[1]):
-        yield Characteristic("stack string"), bbh.address
-
-
-def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
    """extract tight loop indicators from a basic block"""
-    bb: Tuple[BinjaBasicBlock, MediumLevelILBasicBlock] = bbh.inner
-    for edge in bb[0].outgoing_edges:
-        if edge.target.start == bb[0].start:
+    bb: BinjaBasicBlock = bbh.inner
+    for edge in bb.outgoing_edges:
+        if edge.target.start == bb.start:
            yield Characteristic("tight loop"), bbh.address


-def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
    """extract basic block features"""
    for bb_handler in BASIC_BLOCK_HANDLERS:
        for feature, addr in bb_handler(fh, bbh):
@@ -121,7 +31,4 @@ def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Featur
    yield BasicBlock(), bbh.address


-BASIC_BLOCK_HANDLERS = (
-    extract_bb_tight_loop,
-    extract_bb_stackstring,
-)
+BASIC_BLOCK_HANDLERS = (extract_bb_tight_loop,)
--- a/capa/features/extractors/binja/extractor.py
+++ b/capa/features/extractors/binja/extractor.py
@@ -5,7 +5,7 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
-from typing import List, Tuple, Iterator
+from typing import Iterator

 import binaryninja as binja

@@ -30,7 +30,7 @@ class BinjaFeatureExtractor(StaticFeatureExtractor):
    def __init__(self, bv: binja.BinaryView):
        super().__init__(hashes=SampleHashes.from_bytes(bv.file.raw.read(0, bv.file.raw.length)))
        self.bv = bv
-        self.global_features: List[Tuple[Feature, Address]] = []
+        self.global_features: list[tuple[Feature, Address]] = []
        self.global_features.extend(capa.features.extractors.binja.file.extract_file_format(self.bv))
        self.global_features.extend(capa.features.extractors.binja.global_.extract_os(self.bv))
        self.global_features.extend(capa.features.extractors.binja.global_.extract_arch(self.bv))
@@ -48,31 +48,24 @@ class BinjaFeatureExtractor(StaticFeatureExtractor):
        for f in self.bv.functions:
            yield FunctionHandle(address=AbsoluteVirtualAddress(f.start), inner=f)

-    def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
+    def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
        yield from capa.features.extractors.binja.function.extract_features(fh)

    def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
        f: binja.Function = fh.inner
-        # Set up a MLIL basic block dict look up to associate the disassembly basic block with its MLIL basic block
-        mlil_lookup = {}
-        for mlil_bb in f.mlil.basic_blocks:
-            mlil_lookup[mlil_bb.source_block.start] = mlil_bb
-
        for bb in f.basic_blocks:
-            mlil_bb = mlil_lookup.get(bb.start)
+            yield BBHandle(address=AbsoluteVirtualAddress(bb.start), inner=bb)

-            yield BBHandle(address=AbsoluteVirtualAddress(bb.start), inner=(bb, mlil_bb))
-
-    def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
+    def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
        yield from capa.features.extractors.binja.basicblock.extract_features(fh, bbh)

    def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
        import capa.features.extractors.binja.helpers as binja_helpers

-        bb: Tuple[binja.BasicBlock, binja.MediumLevelILBasicBlock] = bbh.inner
-        addr = bb[0].start
+        bb: binja.BasicBlock = bbh.inner
+        addr = bb.start

-        for text, length in bb[0]:
+        for text, length in bb:
            insn = binja_helpers.DisassemblyInstruction(addr, length, text)
            yield InsnHandle(address=AbsoluteVirtualAddress(addr), inner=insn)
            addr += length
--- a/capa/features/extractors/binja/file.py
+++ b/capa/features/extractors/binja/file.py
@@ -5,7 +5,7 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
-from typing import Tuple, Iterator
+from typing import Iterator

 from binaryninja import Segment, BinaryView, SymbolType, SymbolBinding

@@ -13,12 +13,22 @@ import capa.features.extractors.common
 import capa.features.extractors.helpers
 import capa.features.extractors.strings
 from capa.features.file import Export, Import, Section, FunctionName
-from capa.features.common import FORMAT_PE, FORMAT_ELF, Format, String, Feature, Characteristic
+from capa.features.common import (
+    FORMAT_PE,
+    FORMAT_ELF,
+    FORMAT_SC32,
+    FORMAT_SC64,
+    FORMAT_BINJA_DB,
+    Format,
+    String,
+    Feature,
+    Characteristic,
+)
 from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, AbsoluteVirtualAddress
 from capa.features.extractors.binja.helpers import read_c_string, unmangle_c_name


-def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[Tuple[Feature, Address]]:
+def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[tuple[Feature, Address]]:
    """check segment for embedded PE"""
    start = 0
    if bv.view_type == "PE" and seg.start == bv.start:
@@ -32,13 +42,13 @@ def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[Tuple[Feature
        yield Characteristic("embedded pe"), FileOffsetAddress(seg.start + offset)


-def extract_file_embedded_pe(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
+def extract_file_embedded_pe(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
    """extract embedded PE features"""
    for seg in bv.segments:
        yield from check_segment_for_pe(bv, seg)


-def extract_file_export_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
+def extract_file_export_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
    """extract function exports"""
    for sym in bv.get_symbols_of_type(SymbolType.FunctionSymbol) + bv.get_symbols_of_type(SymbolType.DataSymbol):
        if sym.binding in [SymbolBinding.GlobalBinding, SymbolBinding.WeakBinding]:
@@ -72,7 +82,7 @@ def extract_file_export_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address
        yield Characteristic("forwarded export"), AbsoluteVirtualAddress(sym.address)


-def extract_file_import_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
+def extract_file_import_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
    """extract function imports

    1. imports by ordinal:
@@ -96,19 +106,19 @@ def extract_file_import_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address
                yield Import(name), addr


-def extract_file_section_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
+def extract_file_section_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
    """extract section names"""
    for name, section in bv.sections.items():
        yield Section(name), AbsoluteVirtualAddress(section.start)


-def extract_file_strings(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
+def extract_file_strings(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
    """extract ASCII and UTF-16 LE strings"""
    for s in bv.strings:
        yield String(s.value), FileOffsetAddress(s.start)


-def extract_file_function_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
+def extract_file_function_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
    """
    extract the names of statically-linked library functions.
    """
@@ -127,12 +137,22 @@ def extract_file_function_names(bv: BinaryView) -> Iterator[Tuple[Feature, Addre
                yield FunctionName(name[1:]), sym.address


-def extract_file_format(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
+def extract_file_format(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
+    if bv.file.database is not None:
+        yield Format(FORMAT_BINJA_DB), NO_ADDRESS
+
    view_type = bv.view_type
    if view_type in ["PE", "COFF"]:
        yield Format(FORMAT_PE), NO_ADDRESS
    elif view_type == "ELF":
        yield Format(FORMAT_ELF), NO_ADDRESS
+    elif view_type == "Mapped":
+        if bv.arch.name == "x86":
+            yield Format(FORMAT_SC32), NO_ADDRESS
+        elif bv.arch.name == "x86_64":
+            yield Format(FORMAT_SC64), NO_ADDRESS
+        else:
+            raise NotImplementedError(f"unexpected raw file with arch: {bv.arch}")
    elif view_type == "Raw":
        # no file type to return when processing a binary file, but we want to continue processing
        return
@@ -140,7 +160,7 @@ def extract_file_format(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
        raise NotImplementedError(f"unexpected file format: {view_type}")


-def extract_features(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
+def extract_features(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
    """extract file features"""
    for file_handler in FILE_HANDLERS:
        for feature, addr in file_handler(bv):
--- a/capa/features/extractors/binja/find_binja_api.py
+++ b/capa/features/extractors/binja/find_binja_api.py
@@ -105,13 +105,13 @@ def find_binaryninja() -> Optional[Path]:
            logger.debug("detected OS: linux")
        elif sys.platform == "darwin":
            logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
-            return False
+            return None
        elif sys.platform == "win32":
            logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
-            return False
+            return None
        else:
            logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
-            return False
+            return None

        desktop_entry = get_desktop_entry("com.vector35.binaryninja.desktop")
        if not desktop_entry:
--- a/capa/features/extractors/binja/function.py
+++ b/capa/features/extractors/binja/function.py
@@ -5,14 +5,28 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
-from typing import Tuple, Iterator
+import string
+from typing import Iterator

-from binaryninja import Function, BinaryView, SymbolType, RegisterValueType, LowLevelILOperation
+from binaryninja import (
+    Function,
+    BinaryView,
+    SymbolType,
+    ILException,
+    RegisterValueType,
+    VariableSourceType,
+    LowLevelILOperation,
+    MediumLevelILOperation,
+    MediumLevelILBasicBlock,
+    MediumLevelILInstruction,
+)

 from capa.features.file import FunctionName
 from capa.features.common import Feature, Characteristic
 from capa.features.address import Address, AbsoluteVirtualAddress
 from capa.features.extractors import loops
+from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
+from capa.features.extractors.binja.helpers import get_llil_instr_at_addr
 from capa.features.extractors.base_extractor import FunctionHandle


@@ -24,7 +38,7 @@ def extract_function_calls_to(fh: FunctionHandle):
        # Everything that is a code reference to the current function is considered a caller, which actually includes
        # many other references that are NOT a caller. For example, an instruction `push function_start` will also be
        # considered a caller to the function
-        llil = caller.llil
+        llil = get_llil_instr_at_addr(func.view, caller.address)
        if (llil is None) or llil.operation not in [
            LowLevelILOperation.LLIL_CALL,
            LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
@@ -33,14 +47,13 @@ def extract_function_calls_to(fh: FunctionHandle):
        ]:
            continue

-        if llil.dest.value.type not in [
-            RegisterValueType.ImportedAddressValue,
-            RegisterValueType.ConstantValue,
-            RegisterValueType.ConstantPointerValue,
+        if llil.dest.operation not in [
+            LowLevelILOperation.LLIL_CONST,
+            LowLevelILOperation.LLIL_CONST_PTR,
        ]:
            continue

-        address = llil.dest.value.value
+        address = llil.dest.constant
        if address != func.start:
            continue

@@ -95,10 +108,103 @@ def extract_function_name(fh: FunctionHandle):
            yield FunctionName(name[1:]), sym.address


-def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
+def get_printable_len_ascii(s: bytes) -> int:
+    """Return string length if all operand bytes are ascii or utf16-le printable"""
+    count = 0
+    for c in s:
+        if c == 0:
+            return count
+        if c < 127 and chr(c) in string.printable:
+            count += 1
+    return count
+
+
+def get_printable_len_wide(s: bytes) -> int:
+    """Return string length if all operand bytes are ascii or utf16-le printable"""
+    if all(c == 0x00 for c in s[1::2]):
+        return get_printable_len_ascii(s[::2])
+    return 0
+
+
+def get_stack_string_len(f: Function, il: MediumLevelILInstruction) -> int:
+    bv: BinaryView = f.view
+
+    if il.operation != MediumLevelILOperation.MLIL_CALL:
+        return 0
+
+    target = il.dest
+    if target.operation not in [MediumLevelILOperation.MLIL_CONST, MediumLevelILOperation.MLIL_CONST_PTR]:
+        return 0
+
+    addr = target.value.value
+    sym = bv.get_symbol_at(addr)
+    if not sym or sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.SymbolicFunctionSymbol]:
+        return 0
+
+    if sym.name not in ["__builtin_strncpy", "__builtin_strcpy", "__builtin_wcscpy"]:
+        return 0
+
+    if len(il.params) < 2:
+        return 0
+
+    dest = il.params[0]
+    if dest.operation in [MediumLevelILOperation.MLIL_ADDRESS_OF, MediumLevelILOperation.MLIL_VAR]:
+        var = dest.src
+    else:
+        return 0
+
+    if var.source_type != VariableSourceType.StackVariableSourceType:
+        return 0
+
+    src = il.params[1]
+    if src.value.type != RegisterValueType.ConstantDataAggregateValue:
+        return 0
+
+    s = f.get_constant_data(RegisterValueType.ConstantDataAggregateValue, src.value.value)
+    return max(get_printable_len_ascii(bytes(s)), get_printable_len_wide(bytes(s)))
+
+
+def bb_contains_stackstring(f: Function, bb: MediumLevelILBasicBlock) -> bool:
+    """check basic block for stackstring indicators
+
+    true if basic block contains enough moves of constant bytes to the stack
+    """
+    count = 0
+    for il in bb:
+        count += get_stack_string_len(f, il)
+        if count > MIN_STACKSTRING_LEN:
+            return True
+
+    return False
+
+
+def extract_stackstring(fh: FunctionHandle):
+    """extract stackstring indicators"""
+    func: Function = fh.inner
+    bv: BinaryView = func.view
+    if bv is None:
+        return
+
+    try:
+        mlil = func.mlil
+    except ILException:
+        return
+
+    for block in mlil.basic_blocks:
+        if bb_contains_stackstring(func, block):
+            yield Characteristic("stack string"), block.source_block.start
+
+
+def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
    for func_handler in FUNCTION_HANDLERS:
        for feature, addr in func_handler(fh):
            yield feature, addr


-FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call, extract_function_name)
+FUNCTION_HANDLERS = (
+    extract_function_calls_to,
+    extract_function_loop,
+    extract_recursive_call,
+    extract_function_name,
+    extract_stackstring,
+)
--- a/capa/features/extractors/binja/global_.py
+++ b/capa/features/extractors/binja/global_.py
@@ -6,7 +6,7 @@
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
 import logging
-from typing import Tuple, Iterator
+from typing import Iterator

 from binaryninja import BinaryView

@@ -16,7 +16,7 @@ from capa.features.address import NO_ADDRESS, Address
 logger = logging.getLogger(__name__)


-def extract_os(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
+def extract_os(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
    name = bv.platform.name
    if "-" in name:
        name = name.split("-")[0]
@@ -45,7 +45,7 @@ def extract_os(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
        return


-def extract_arch(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
+def extract_arch(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
    arch = bv.arch.name
    if arch == "x86_64":
        yield Arch(ARCH_AMD64), NO_ADDRESS
--- a/capa/features/extractors/binja/helpers.py
+++ b/capa/features/extractors/binja/helpers.py
@@ -6,10 +6,10 @@
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
 import re
-from typing import List, Callable
+from typing import Callable, Optional
 from dataclasses import dataclass

-from binaryninja import BinaryView, LowLevelILInstruction
+from binaryninja import BinaryView, LowLevelILFunction, LowLevelILInstruction
 from binaryninja.architecture import InstructionTextToken


@@ -17,7 +17,7 @@ from binaryninja.architecture import InstructionTextToken
 class DisassemblyInstruction:
    address: int
    length: int
-    text: List[InstructionTextToken]
+    text: list[InstructionTextToken]


 LLIL_VISITOR = Callable[[LowLevelILInstruction, LowLevelILInstruction, int], bool]
@@ -54,7 +54,7 @@ def unmangle_c_name(name: str) -> str:


 def read_c_string(bv: BinaryView, offset: int, max_len: int) -> str:
-    s: List[str] = []
+    s: list[str] = []
    while len(s) < max_len:
        try:
            c = bv.read(offset + len(s), 1)[0]
@@ -67,3 +67,13 @@ def read_c_string(bv: BinaryView, offset: int, max_len: int) -> str:
        s.append(chr(c))

    return "".join(s)
+
+
+def get_llil_instr_at_addr(bv: BinaryView, addr: int) -> Optional[LowLevelILInstruction]:
+    arch = bv.arch
+    buffer = bv.read(addr, arch.max_instr_length)
+    llil = LowLevelILFunction(arch=arch)
+    llil.current_address = addr
+    if arch.get_instruction_low_level_il(buffer, addr, llil) == 0:
+        return None
+    return llil[0]
--- a/capa/features/extractors/binja/insn.py
+++ b/capa/features/extractors/binja/insn.py
@@ -5,7 +5,7 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
-from typing import Any, List, Tuple, Iterator, Optional
+from typing import Any, Iterator, Optional

 from binaryninja import Function
 from binaryninja import BasicBlock as BinjaBasicBlock
@@ -23,7 +23,7 @@ import capa.features.extractors.helpers
 from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
 from capa.features.common import MAX_BYTES_FEATURE_SIZE, Bytes, String, Feature, Characteristic
 from capa.features.address import Address, AbsoluteVirtualAddress
-from capa.features.extractors.binja.helpers import DisassemblyInstruction, visit_llil_exprs
+from capa.features.extractors.binja.helpers import DisassemblyInstruction, visit_llil_exprs, get_llil_instr_at_addr
 from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle

 # security cookie checks may perform non-zeroing XORs, these are expected within a certain
@@ -36,35 +36,27 @@ SECURITY_COOKIE_BYTES_DELTA = 0x40
 # 2. The function must only make one call/jump to another address
 # If the function being checked is a stub function, returns the target address. Otherwise, return None.
 def is_stub_function(bv: BinaryView, addr: int) -> Optional[int]:
-    funcs = bv.get_functions_at(addr)
-    for func in funcs:
-        if len(func.basic_blocks) != 1:
-            continue
+    llil = get_llil_instr_at_addr(bv, addr)
+    if llil is None or llil.operation not in [
+        LowLevelILOperation.LLIL_CALL,
+        LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
+        LowLevelILOperation.LLIL_JUMP,
+        LowLevelILOperation.LLIL_TAILCALL,
+    ]:
+        return None

-        call_count = 0
-        call_target = None
-        for il in func.llil.instructions:
-            if il.operation in [
-                LowLevelILOperation.LLIL_CALL,
-                LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
-                LowLevelILOperation.LLIL_JUMP,
-                LowLevelILOperation.LLIL_TAILCALL,
-            ]:
-                call_count += 1
-                if il.dest.value.type in [
-                    RegisterValueType.ImportedAddressValue,
-                    RegisterValueType.ConstantValue,
-                    RegisterValueType.ConstantPointerValue,
-                ]:
-                    call_target = il.dest.value.value
+    # The LLIL instruction retrieved by `get_llil_instr_at_addr` did not go through a full analysis, so we cannot check
+    # `llil.dest.value.type` here
+    if llil.dest.operation not in [
+        LowLevelILOperation.LLIL_CONST,
+        LowLevelILOperation.LLIL_CONST_PTR,
+    ]:
+        return None

-        if call_count == 1 and call_target is not None:
-            return call_target
-
-    return None
+    return llil.dest.constant


-def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
    """
    parse instruction API features

@@ -123,7 +115,7 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)

 def extract_insn_number_features(
    fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    """
    parse instruction number features
    example:
@@ -131,7 +123,7 @@ def extract_insn_number_features(
    """
    func: Function = fh.inner

-    results: List[Tuple[Any[Number, OperandNumber], Address]] = []
+    results: list[tuple[Any[Number, OperandNumber], Address]] = []

    def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool:
        if il.operation == LowLevelILOperation.LLIL_LOAD:
@@ -162,7 +154,7 @@ def extract_insn_number_features(
    yield from results


-def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
    """
    parse referenced byte sequences
    example:
@@ -209,7 +201,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl

 def extract_insn_string_features(
    fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    """
    parse instruction string features

@@ -266,7 +258,7 @@ def extract_insn_string_features(

 def extract_insn_offset_features(
    fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    """
    parse instruction structure offset features

@@ -275,7 +267,7 @@ def extract_insn_offset_features(
    """
    func: Function = fh.inner

-    results: List[Tuple[Any[Offset, OperandOffset], Address]] = []
+    results: list[tuple[Any[Offset, OperandOffset], Address]] = []
    address_size = func.view.arch.address_size * 8

    def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool:
@@ -353,7 +345,7 @@ def is_nzxor_stack_cookie(f: Function, bb: BinjaBasicBlock, llil: LowLevelILInst

 def extract_insn_nzxor_characteristic_features(
    fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    """
    parse instruction non-zeroing XOR instruction
    ignore expected non-zeroing XORs, e.g. security cookies
@@ -367,7 +359,7 @@ def extract_insn_nzxor_characteristic_features(
        # e.g., <llil: eax = 0>, (LLIL_SET_REG). So we do not need to check whether the two operands are the same.
        if il.operation == LowLevelILOperation.LLIL_XOR:
            # Exclude cases related to the stack cookie
-            if is_nzxor_stack_cookie(fh.inner, bbh.inner[0], il):
+            if is_nzxor_stack_cookie(fh.inner, bbh.inner, il):
                return False
            results.append((Characteristic("nzxor"), ih.address))
            return False
@@ -382,7 +374,7 @@ def extract_insn_nzxor_characteristic_features(

 def extract_insn_mnemonic_features(
    fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    """parse instruction mnemonic features"""
    insn: DisassemblyInstruction = ih.inner
    yield Mnemonic(insn.text[0].text), ih.address
@@ -390,7 +382,7 @@ def extract_insn_mnemonic_features(

 def extract_insn_obfs_call_plus_5_characteristic_features(
    fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    """
    parse call $+5 instruction from the given instruction.
    """
@@ -401,7 +393,7 @@ def extract_insn_obfs_call_plus_5_characteristic_features(

 def extract_insn_peb_access_characteristic_features(
    fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    """parse instruction peb access

    fs:[0x30] on x86, gs:[0x60] on x64
@@ -444,7 +436,7 @@ def extract_insn_peb_access_characteristic_features(

 def extract_insn_segment_access_features(
    fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    """parse instruction fs or gs access"""
    func: Function = fh.inner

@@ -471,7 +463,7 @@ def extract_insn_segment_access_features(

 def extract_insn_cross_section_cflow(
    fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    """inspect the instruction for a CALL or JMP that crosses section boundaries"""
    func: Function = fh.inner
    bv: BinaryView = func.view
@@ -491,7 +483,7 @@ def extract_insn_cross_section_cflow(
            yield Characteristic("cross section flow"), ih.address


-def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
    """extract functions calls from features

    most relevant at the function scope, however, its most efficient to extract at the instruction scope
@@ -534,7 +526,7 @@ def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl

 def extract_function_indirect_call_characteristic_features(
    fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    """extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
    does not include calls like => call ds:dword_ABD4974

@@ -562,7 +554,7 @@ def extract_function_indirect_call_characteristic_features(
    yield Characteristic("indirect call"), ih.address


-def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[tuple[Feature, Address]]:
    """extract instruction features"""
    for inst_handler in INSTRUCTION_HANDLERS:
        for feature, ea in inst_handler(f, bbh, insn):
--- a/capa/features/extractors/cape/call.py
+++ b/capa/features/extractors/cape/call.py
@@ -7,7 +7,7 @@
 # See the License for the specific language governing permissions and limitations under the License.

 import logging
-from typing import Tuple, Iterator
+from typing import Iterator

 import capa.features.extractors.helpers
 from capa.helpers import assert_never
@@ -20,7 +20,7 @@ from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, Pr
 logger = logging.getLogger(__name__)


-def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
    """
    this method extracts the given call's features (such as API name and arguments),
    and returns them as API, Number, and String features.
@@ -55,7 +55,7 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -
        yield API(name), ch.address


-def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
    for handler in CALL_HANDLERS:
        for feature, addr in handler(ph, th, ch):
            yield feature, addr
--- a/capa/features/extractors/cape/extractor.py
+++ b/capa/features/extractors/cape/extractor.py
@@ -7,7 +7,7 @@
 # See the License for the specific language governing permissions and limitations under the License.

 import logging
-from typing import Dict, Tuple, Union, Iterator
+from typing import Union, Iterator

 import capa.features.extractors.cape.call
 import capa.features.extractors.cape.file
@@ -50,16 +50,16 @@ class CapeExtractor(DynamicFeatureExtractor):
        assert self.report.static is not None and self.report.static.pe is not None
        return AbsoluteVirtualAddress(self.report.static.pe.imagebase)

-    def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
+    def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
        yield from self.global_features

-    def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
+    def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
        yield from capa.features.extractors.cape.file.extract_features(self.report)

    def get_processes(self) -> Iterator[ProcessHandle]:
        yield from capa.features.extractors.cape.file.get_processes(self.report)

-    def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
+    def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
        yield from capa.features.extractors.cape.process.extract_features(ph)

    def get_process_name(self, ph) -> str:
@@ -69,7 +69,7 @@ class CapeExtractor(DynamicFeatureExtractor):
    def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
        yield from capa.features.extractors.cape.process.get_threads(ph)

-    def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
+    def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[tuple[Feature, Address]]:
        if False:
            # force this routine to be a generator,
            # but we don't actually have any elements to generate.
@@ -81,7 +81,7 @@ class CapeExtractor(DynamicFeatureExtractor):

    def extract_call_features(
        self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
-    ) -> Iterator[Tuple[Feature, Address]]:
+    ) -> Iterator[tuple[Feature, Address]]:
        yield from capa.features.extractors.cape.call.extract_features(ph, th, ch)

    def get_call_name(self, ph, th, ch) -> str:
@@ -122,7 +122,7 @@ class CapeExtractor(DynamicFeatureExtractor):
        return "".join(parts)

    @classmethod
-    def from_report(cls, report: Dict) -> "CapeExtractor":
+    def from_report(cls, report: dict) -> "CapeExtractor":
        cr = CapeReport.model_validate(report)

        if cr.info.version not in TESTED_VERSIONS:
--- a/capa/features/extractors/cape/file.py
+++ b/capa/features/extractors/cape/file.py
@@ -7,7 +7,7 @@
 # See the License for the specific language governing permissions and limitations under the License.

 import logging
-from typing import Tuple, Iterator
+from typing import Iterator

 from capa.features.file import Export, Import, Section
 from capa.features.common import String, Feature
@@ -41,7 +41,7 @@ def get_processes(report: CapeReport) -> Iterator[ProcessHandle]:
            seen_processes[addr].append(process)


-def extract_import_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
+def extract_import_names(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
    """
    extract imported function names
    """
@@ -62,57 +62,57 @@ def extract_import_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]
                yield Import(name), AbsoluteVirtualAddress(function.address)


-def extract_export_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
+def extract_export_names(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
    assert report.static is not None and report.static.pe is not None
    for function in report.static.pe.exports:
        yield Export(function.name), AbsoluteVirtualAddress(function.address)


-def extract_section_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
+def extract_section_names(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
    assert report.static is not None and report.static.pe is not None
    for section in report.static.pe.sections:
        yield Section(section.name), AbsoluteVirtualAddress(section.virtual_address)


-def extract_file_strings(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
+def extract_file_strings(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
    if report.strings is not None:
        for string in report.strings:
            yield String(string), NO_ADDRESS


-def extract_used_regkeys(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
+def extract_used_regkeys(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
    for regkey in report.behavior.summary.keys:
        yield String(regkey), NO_ADDRESS


-def extract_used_files(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
+def extract_used_files(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
    for file in report.behavior.summary.files:
        yield String(file), NO_ADDRESS


-def extract_used_mutexes(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
+def extract_used_mutexes(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
    for mutex in report.behavior.summary.mutexes:
        yield String(mutex), NO_ADDRESS


-def extract_used_commands(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
+def extract_used_commands(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
    for cmd in report.behavior.summary.executed_commands:
        yield String(cmd), NO_ADDRESS


-def extract_used_apis(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
+def extract_used_apis(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
    for symbol in report.behavior.summary.resolved_apis:
        yield String(symbol), NO_ADDRESS


-def extract_used_services(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
+def extract_used_services(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
    for svc in report.behavior.summary.created_services:
        yield String(svc), NO_ADDRESS
    for svc in report.behavior.summary.started_services:
        yield String(svc), NO_ADDRESS


-def extract_features(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
+def extract_features(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
    for handler in FILE_HANDLERS:
        for feature, addr in handler(report):
            yield feature, addr
--- a/capa/features/extractors/cape/global_.py
+++ b/capa/features/extractors/cape/global_.py
@@ -7,7 +7,7 @@
 # See the License for the specific language governing permissions and limitations under the License.

 import logging
-from typing import Tuple, Iterator
+from typing import Iterator

 from capa.features.common import (
    OS,
@@ -28,7 +28,7 @@ from capa.features.extractors.cape.models import CapeReport
 logger = logging.getLogger(__name__)


-def extract_arch(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
+def extract_arch(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
    if "Intel 80386" in report.target.file.type:
        yield Arch(ARCH_I386), NO_ADDRESS
    elif "x86-64" in report.target.file.type:
@@ -40,7 +40,7 @@ def extract_arch(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
        )


-def extract_format(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
+def extract_format(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
    if "PE" in report.target.file.type:
        yield Format(FORMAT_PE), NO_ADDRESS
    elif "ELF" in report.target.file.type:
@@ -52,7 +52,7 @@ def extract_format(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
        )


-def extract_os(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
+def extract_os(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
    # this variable contains the output of the file command
    file_output = report.target.file.type

@@ -80,7 +80,7 @@ def extract_os(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
        yield OS(OS_ANY), NO_ADDRESS


-def extract_features(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
+def extract_features(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
    for global_handler in GLOBAL_HANDLER:
        for feature, addr in global_handler(report):
            yield feature, addr
--- a/capa/features/extractors/cape/helpers.py
+++ b/capa/features/extractors/cape/helpers.py
@@ -6,12 +6,12 @@
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.

-from typing import Any, Dict, List
+from typing import Any

 from capa.features.extractors.base_extractor import ProcessHandle


-def find_process(processes: List[Dict[str, Any]], ph: ProcessHandle) -> Dict[str, Any]:
+def find_process(processes: list[dict[str, Any]], ph: ProcessHandle) -> dict[str, Any]:
    """
    find a specific process identified by a process handler.

--- a/capa/features/extractors/cape/models.py
+++ b/capa/features/extractors/cape/models.py
@@ -6,10 +6,9 @@
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
 import binascii
-from typing import Any, Dict, List, Union, Literal, Optional
+from typing import Any, Union, Literal, Optional, Annotated, TypeAlias

 from pydantic import Field, BaseModel, ConfigDict
-from typing_extensions import Annotated, TypeAlias
 from pydantic.functional_validators import BeforeValidator


@@ -59,11 +58,11 @@ Skip: TypeAlias = Optional[Any]
 # in a field with this type.
 # then we can update the model with the discovered shape.
 TODO: TypeAlias = None
-ListTODO: TypeAlias = List[None]
+ListTODO: TypeAlias = list[None]
 DictTODO: TypeAlias = ExactModel

-EmptyDict: TypeAlias = BaseModel
-EmptyList: TypeAlias = List[Any]
+Emptydict: TypeAlias = BaseModel
+EmptyList: TypeAlias = list[Any]


 class Info(FlexibleModel):
@@ -77,7 +76,7 @@ class ImportedSymbol(ExactModel):

 class ImportedDll(ExactModel):
    dll: str
-    imports: List[ImportedSymbol]
+    imports: list[ImportedSymbol]


 class DirectoryEntry(ExactModel):
@@ -149,7 +148,7 @@ class Signer(ExactModel):
    aux_valid: Optional[bool] = None
    aux_error: Optional[bool] = None
    aux_error_desc: Optional[str] = None
-    aux_signers: Optional[List[AuxSigner]] = None
+    aux_signers: Optional[list[AuxSigner]] = None


 class Overlay(ExactModel):
@@ -178,22 +177,22 @@ class PE(ExactModel):
    pdbpath: Optional[str] = None
    timestamp: str

-    # List[ImportedDll], or Dict[basename(dll), ImportedDll]
-    imports: Union[List[ImportedDll], Dict[str, ImportedDll]]
+    # list[ImportedDll], or dict[basename(dll), ImportedDll]
+    imports: Union[list[ImportedDll], dict[str, ImportedDll]]
    imported_dll_count: Optional[int] = None
    imphash: str

    exported_dll_name: Optional[str] = None
-    exports: List[ExportedSymbol]
+    exports: list[ExportedSymbol]

-    dirents: List[DirectoryEntry]
-    sections: List[Section]
+    dirents: list[DirectoryEntry]
+    sections: list[Section]

    ep_bytes: Optional[HexBytes] = None

    overlay: Optional[Overlay] = None
-    resources: List[Resource]
-    versioninfo: List[KV]
+    resources: list[Resource]
+    versioninfo: list[KV]

    # base64 encoded data
    icon: Optional[str] = None
@@ -204,7 +203,7 @@ class PE(ExactModel):
    # short hex string
    icon_dhash: Optional[str] = None

-    digital_signers: List[DigitalSigner]
+    digital_signers: list[DigitalSigner]
    guest_signers: Signer


@@ -217,9 +216,9 @@ class File(FlexibleModel):
    cape_type: Optional[str] = None

    pid: Optional[Union[int, Literal[""]]] = None
-    name: Union[List[str], str]
+    name: Union[list[str], str]
    path: str
-    guest_paths: Union[List[str], str, None]
+    guest_paths: Union[list[str], str, None]
    timestamp: Optional[str] = None

    #
@@ -244,7 +243,7 @@ class File(FlexibleModel):
    ep_bytes: Optional[HexBytes] = None
    entrypoint: Optional[int] = None
    data: Optional[str] = None
-    strings: Optional[List[str]] = None
+    strings: Optional[list[str]] = None

    #
    # detections (skip)
@@ -283,7 +282,7 @@ class Call(ExactModel):

    api: str

-    arguments: List[Argument]
+    arguments: list[Argument]
    status: bool
    return_: HexInt = Field(alias="return")
    pretty_return: Optional[str] = None
@@ -298,15 +297,18 @@ class Call(ExactModel):
    id: int


-class Process(ExactModel):
+# FlexibleModel to account for extended fields
+# refs: https://github.com/mandiant/capa/issues/2466
+# https://github.com/kevoreilly/CAPEv2/pull/2199
+class Process(FlexibleModel):
    process_id: int
    process_name: str
    parent_id: int
    module_path: str
    first_seen: str
-    calls: List[Call]
-    threads: List[int]
-    environ: Dict[str, str]
+    calls: list[Call]
+    threads: list[int]
+    environ: dict[str, str]


 class ProcessTree(ExactModel):
@@ -314,25 +316,25 @@ class ProcessTree(ExactModel):
    pid: int
    parent_id: int
    module_path: str
-    threads: List[int]
-    environ: Dict[str, str]
-    children: List["ProcessTree"]
+    threads: list[int]
+    environ: dict[str, str]
+    children: list["ProcessTree"]


 class Summary(ExactModel):
-    files: List[str]
-    read_files: List[str]
-    write_files: List[str]
-    delete_files: List[str]
-    keys: List[str]
-    read_keys: List[str]
-    write_keys: List[str]
-    delete_keys: List[str]
-    executed_commands: List[str]
-    resolved_apis: List[str]
-    mutexes: List[str]
-    created_services: List[str]
-    started_services: List[str]
+    files: list[str]
+    read_files: list[str]
+    write_files: list[str]
+    delete_files: list[str]
+    keys: list[str]
+    read_keys: list[str]
+    write_keys: list[str]
+    delete_keys: list[str]
+    executed_commands: list[str]
+    resolved_apis: list[str]
+    mutexes: list[str]
+    created_services: list[str]
+    started_services: list[str]


 class EncryptedBuffer(ExactModel):
@@ -349,12 +351,12 @@ class Behavior(ExactModel):
    summary: Summary

    # list of processes, of threads, of calls
-    processes: List[Process]
+    processes: list[Process]
    # tree of processes
-    processtree: List[ProcessTree]
+    processtree: list[ProcessTree]

-    anomaly: List[str]
-    encryptedbuffers: List[EncryptedBuffer]
+    anomaly: list[str]
+    encryptedbuffers: list[EncryptedBuffer]
    # these are small objects that describe atomic events,
    # like file move, registry access.
    # we'll detect the same with our API call analysis.
@@ -373,7 +375,7 @@ class Static(ExactModel):


 class Cape(ExactModel):
-    payloads: List[ProcessFile]
+    payloads: list[ProcessFile]
    configs: Skip = None


@@ -389,7 +391,7 @@ class CapeReport(FlexibleModel):
    # static analysis results
    #
    static: Optional[Static] = None
-    strings: Optional[List[str]] = None
+    strings: Optional[list[str]] = None

    #
    # dynamic analysis results
@@ -398,10 +400,10 @@ class CapeReport(FlexibleModel):
    behavior: Behavior

    # post-processed results: payloads and extracted configs
-    CAPE: Optional[Union[Cape, List]] = None
-    dropped: Optional[List[File]] = None
-    procdump: Optional[List[ProcessFile]] = None
-    procmemory: ListTODO
+    CAPE: Optional[Union[Cape, list]] = None
+    dropped: Optional[list[File]] = None
+    procdump: Optional[list[ProcessFile]] = None
+    procmemory: Optional[ListTODO] = None

    # =========================================================================
    # information we won't use in capa
@@ -437,7 +439,7 @@ class CapeReport(FlexibleModel):
    malfamily_tag: Optional[str] = None
    malscore: float
    detections: Skip = None
-    detections2pid: Optional[Dict[int, List[str]]] = None
+    detections2pid: Optional[dict[int, list[str]]] = None
    # AV detections for the sample.
    virustotal: Skip = None

--- a/capa/features/extractors/cape/process.py
+++ b/capa/features/extractors/cape/process.py
@@ -7,7 +7,7 @@
 # See the License for the specific language governing permissions and limitations under the License.

 import logging
-from typing import List, Tuple, Iterator
+from typing import Iterator

 from capa.features.common import String, Feature
 from capa.features.address import Address, ThreadAddress
@@ -22,14 +22,14 @@ def get_threads(ph: ProcessHandle) -> Iterator[ThreadHandle]:
    get the threads associated with a given process
    """
    process: Process = ph.inner
-    threads: List[int] = process.threads
+    threads: list[int] = process.threads

    for thread in threads:
        address: ThreadAddress = ThreadAddress(process=ph.address, tid=thread)
        yield ThreadHandle(address=address, inner={})


-def extract_environ_strings(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_environ_strings(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
    """
    extract strings from a process' provided environment variables.
    """
@@ -39,7 +39,7 @@ def extract_environ_strings(ph: ProcessHandle) -> Iterator[Tuple[Feature, Addres
        yield String(value), ph.address


-def extract_features(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_features(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
    for handler in PROCESS_HANDLERS:
        for feature, addr in handler(ph):
            yield feature, addr
--- a/capa/features/extractors/common.py
+++ b/capa/features/extractors/common.py
@@ -10,7 +10,7 @@ import re
 import logging
 import binascii
 import contextlib
-from typing import Tuple, Iterator
+from typing import Iterator

 import pefile

@@ -45,7 +45,7 @@ MATCH_RESULT = b'{"meta":'
 MATCH_JSON_OBJECT = b'{"'


-def extract_file_strings(buf: bytes, **kwargs) -> Iterator[Tuple[String, Address]]:
+def extract_file_strings(buf: bytes, **kwargs) -> Iterator[tuple[String, Address]]:
    """
    extract ASCII and UTF-16 LE strings from file
    """
@@ -56,7 +56,7 @@ def extract_file_strings(buf: bytes, **kwargs) -> Iterator[Tuple[String, Address
        yield String(s.s), FileOffsetAddress(s.offset)


-def extract_format(buf: bytes) -> Iterator[Tuple[Feature, Address]]:
+def extract_format(buf: bytes) -> Iterator[tuple[Feature, Address]]:
    if buf.startswith(MATCH_PE):
        yield Format(FORMAT_PE), NO_ADDRESS
    elif buf.startswith(MATCH_ELF):
@@ -79,7 +79,7 @@ def extract_format(buf: bytes) -> Iterator[Tuple[Feature, Address]]:
        return


-def extract_arch(buf) -> Iterator[Tuple[Feature, Address]]:
+def extract_arch(buf) -> Iterator[tuple[Feature, Address]]:
    if buf.startswith(MATCH_PE):
        yield from capa.features.extractors.pefile.extract_file_arch(pe=pefile.PE(data=buf))

@@ -111,7 +111,7 @@ def extract_arch(buf) -> Iterator[Tuple[Feature, Address]]:
        return


-def extract_os(buf, os=OS_AUTO) -> Iterator[Tuple[Feature, Address]]:
+def extract_os(buf, os=OS_AUTO) -> Iterator[tuple[Feature, Address]]:
    if os != OS_AUTO:
        yield OS(os), NO_ADDRESS

--- a/capa/features/extractors/dnfile/extractor.py
+++ b/capa/features/extractors/dnfile/extractor.py
@@ -8,7 +8,7 @@

 from __future__ import annotations

-from typing import Dict, List, Tuple, Union, Iterator, Optional
+from typing import Union, Iterator, Optional
 from pathlib import Path

 import dnfile
@@ -41,11 +41,11 @@ from capa.features.extractors.dnfile.helpers import (

 class DnFileFeatureExtractorCache:
    def __init__(self, pe: dnfile.dnPE):
-        self.imports: Dict[int, Union[DnType, DnUnmanagedMethod]] = {}
-        self.native_imports: Dict[int, Union[DnType, DnUnmanagedMethod]] = {}
-        self.methods: Dict[int, Union[DnType, DnUnmanagedMethod]] = {}
-        self.fields: Dict[int, Union[DnType, DnUnmanagedMethod]] = {}
-        self.types: Dict[int, Union[DnType, DnUnmanagedMethod]] = {}
+        self.imports: dict[int, Union[DnType, DnUnmanagedMethod]] = {}
+        self.native_imports: dict[int, Union[DnType, DnUnmanagedMethod]] = {}
+        self.methods: dict[int, Union[DnType, DnUnmanagedMethod]] = {}
+        self.fields: dict[int, Union[DnType, DnUnmanagedMethod]] = {}
+        self.types: dict[int, Union[DnType, DnUnmanagedMethod]] = {}

        for import_ in get_dotnet_managed_imports(pe):
            self.imports[import_.token] = import_
@@ -84,7 +84,7 @@ class DnfileFeatureExtractor(StaticFeatureExtractor):
        self.token_cache: DnFileFeatureExtractorCache = DnFileFeatureExtractorCache(self.pe)

        # pre-compute these because we'll yield them at *every* scope.
-        self.global_features: List[Tuple[Feature, Address]] = []
+        self.global_features: list[tuple[Feature, Address]] = []
        self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_format())
        self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_os(pe=self.pe))
        self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_arch(pe=self.pe))
@@ -100,7 +100,7 @@ class DnfileFeatureExtractor(StaticFeatureExtractor):

    def get_functions(self) -> Iterator[FunctionHandle]:
        # create a method lookup table
-        methods: Dict[Address, FunctionHandle] = {}
+        methods: dict[Address, FunctionHandle] = {}
        for token, method in get_dotnet_managed_method_bodies(self.pe):
            fh: FunctionHandle = FunctionHandle(
                address=DNTokenAddress(token),
@@ -136,7 +136,7 @@ class DnfileFeatureExtractor(StaticFeatureExtractor):

        yield from methods.values()

-    def extract_function_features(self, fh) -> Iterator[Tuple[Feature, Address]]:
+    def extract_function_features(self, fh) -> Iterator[tuple[Feature, Address]]:
        yield from capa.features.extractors.dnfile.function.extract_features(fh)

    def get_basic_blocks(self, f) -> Iterator[BBHandle]:
@@ -157,5 +157,5 @@ class DnfileFeatureExtractor(StaticFeatureExtractor):
                inner=insn,
            )

-    def extract_insn_features(self, fh, bbh, ih) -> Iterator[Tuple[Feature, Address]]:
+    def extract_insn_features(self, fh, bbh, ih) -> Iterator[tuple[Feature, Address]]:
        yield from capa.features.extractors.dnfile.insn.extract_features(fh, bbh, ih)
--- a/capa/features/extractors/dnfile/file.py
+++ b/capa/features/extractors/dnfile/file.py
@@ -8,7 +8,7 @@

 from __future__ import annotations

-from typing import Tuple, Iterator
+from typing import Iterator

 import dnfile

@@ -18,35 +18,35 @@ from capa.features.common import Class, Format, String, Feature, Namespace, Char
 from capa.features.address import Address


-def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[Tuple[Import, Address]]:
+def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[tuple[Import, Address]]:
    yield from capa.features.extractors.dotnetfile.extract_file_import_names(pe=pe)


-def extract_file_format(pe: dnfile.dnPE) -> Iterator[Tuple[Format, Address]]:
+def extract_file_format(pe: dnfile.dnPE) -> Iterator[tuple[Format, Address]]:
    yield from capa.features.extractors.dotnetfile.extract_file_format(pe=pe)


-def extract_file_function_names(pe: dnfile.dnPE) -> Iterator[Tuple[FunctionName, Address]]:
+def extract_file_function_names(pe: dnfile.dnPE) -> Iterator[tuple[FunctionName, Address]]:
    yield from capa.features.extractors.dotnetfile.extract_file_function_names(pe=pe)


-def extract_file_strings(pe: dnfile.dnPE) -> Iterator[Tuple[String, Address]]:
+def extract_file_strings(pe: dnfile.dnPE) -> Iterator[tuple[String, Address]]:
    yield from capa.features.extractors.dotnetfile.extract_file_strings(pe=pe)


-def extract_file_mixed_mode_characteristic_features(pe: dnfile.dnPE) -> Iterator[Tuple[Characteristic, Address]]:
+def extract_file_mixed_mode_characteristic_features(pe: dnfile.dnPE) -> Iterator[tuple[Characteristic, Address]]:
    yield from capa.features.extractors.dotnetfile.extract_file_mixed_mode_characteristic_features(pe=pe)


-def extract_file_namespace_features(pe: dnfile.dnPE) -> Iterator[Tuple[Namespace, Address]]:
+def extract_file_namespace_features(pe: dnfile.dnPE) -> Iterator[tuple[Namespace, Address]]:
    yield from capa.features.extractors.dotnetfile.extract_file_namespace_features(pe=pe)


-def extract_file_class_features(pe: dnfile.dnPE) -> Iterator[Tuple[Class, Address]]:
+def extract_file_class_features(pe: dnfile.dnPE) -> Iterator[tuple[Class, Address]]:
    yield from capa.features.extractors.dotnetfile.extract_file_class_features(pe=pe)


-def extract_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]:
+def extract_features(pe: dnfile.dnPE) -> Iterator[tuple[Feature, Address]]:
    for file_handler in FILE_HANDLERS:
        for feature, address in file_handler(pe):
            yield feature, address
--- a/capa/features/extractors/dnfile/function.py
+++ b/capa/features/extractors/dnfile/function.py
@@ -9,7 +9,7 @@
 from __future__ import annotations

 import logging
-from typing import Tuple, Iterator
+from typing import Iterator

 from capa.features.common import Feature, Characteristic
 from capa.features.address import Address
@@ -18,30 +18,30 @@ from capa.features.extractors.base_extractor import FunctionHandle
 logger = logging.getLogger(__name__)


-def extract_function_calls_to(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]:
+def extract_function_calls_to(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]:
    """extract callers to a function"""
    for dest in fh.ctx["calls_to"]:
        yield Characteristic("calls to"), dest


-def extract_function_calls_from(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]:
+def extract_function_calls_from(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]:
    """extract callers from a function"""
    for src in fh.ctx["calls_from"]:
        yield Characteristic("calls from"), src


-def extract_recursive_call(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]:
+def extract_recursive_call(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]:
    """extract recursive function call"""
    if fh.address in fh.ctx["calls_to"]:
        yield Characteristic("recursive call"), fh.address


-def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]:
+def extract_function_loop(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]:
    """extract loop indicators from a function"""
    raise NotImplementedError()


-def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
    for func_handler in FUNCTION_HANDLERS:
        for feature, addr in func_handler(fh):
            yield feature, addr
--- a/capa/features/extractors/dnfile/helpers.py
+++ b/capa/features/extractors/dnfile/helpers.py
@@ -9,7 +9,7 @@
 from __future__ import annotations

 import logging
-from typing import Dict, Tuple, Union, Iterator, Optional
+from typing import Union, Iterator, Optional

 import dnfile
 from dncil.cil.body import CilMethodBody
@@ -144,7 +144,7 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]:
        )


-def get_dotnet_methoddef_property_accessors(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]:
+def get_dotnet_methoddef_property_accessors(pe: dnfile.dnPE) -> Iterator[tuple[int, str]]:
    """get MethodDef methods used to access properties

    see https://www.ntcore.com/files/dotnetformat.htm
@@ -194,7 +194,7 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]:
    """
    nested_class_table = get_dotnet_nested_class_table_index(pe)

-    accessor_map: Dict[int, str] = {}
+    accessor_map: dict[int, str] = {}
    for methoddef, methoddef_access in get_dotnet_methoddef_property_accessors(pe):
        accessor_map[methoddef] = methoddef_access

@@ -252,7 +252,7 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]:
            yield DnType(token, typedefname, namespace=typedefnamespace, member=field.row.Name)


-def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[Tuple[int, CilMethodBody]]:
+def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[tuple[int, CilMethodBody]]:
    """get managed methods from MethodDef table"""
    for rid, method_def in iter_dotnet_table(pe, dnfile.mdtable.MethodDef.number):
        assert isinstance(method_def, dnfile.mdtable.MethodDefRow)
@@ -332,7 +332,7 @@ def get_dotnet_table_row(pe: dnfile.dnPE, table_index: int, row_index: int) -> O

 def resolve_nested_typedef_name(
    nested_class_table: dict, index: int, typedef: dnfile.mdtable.TypeDefRow, pe: dnfile.dnPE
-) -> Tuple[str, Tuple[str, ...]]:
+) -> tuple[str, tuple[str, ...]]:
    """Resolves all nested TypeDef class names. Returns the namespace as a str and the nested TypeRef name as a tuple"""

    if index in nested_class_table:
@@ -368,7 +368,7 @@ def resolve_nested_typedef_name(

 def resolve_nested_typeref_name(
    index: int, typeref: dnfile.mdtable.TypeRefRow, pe: dnfile.dnPE
-) -> Tuple[str, Tuple[str, ...]]:
+) -> tuple[str, tuple[str, ...]]:
    """Resolves all nested TypeRef class names. Returns the namespace as a str and the nested TypeRef name as a tuple"""
    # If the ResolutionScope decodes to a typeRef type then it is nested
    if isinstance(typeref.ResolutionScope.table, dnfile.mdtable.TypeRef):
@@ -398,7 +398,7 @@ def resolve_nested_typeref_name(
        return str(typeref.TypeNamespace), (str(typeref.TypeName),)


-def get_dotnet_nested_class_table_index(pe: dnfile.dnPE) -> Dict[int, int]:
+def get_dotnet_nested_class_table_index(pe: dnfile.dnPE) -> dict[int, int]:
    """Build index for EnclosingClass based off the NestedClass row index in the nestedclass table"""
    nested_class_table = {}

@@ -442,7 +442,7 @@ def is_dotnet_mixed_mode(pe: dnfile.dnPE) -> bool:
    return not bool(pe.net.Flags.CLR_ILONLY)


-def iter_dotnet_table(pe: dnfile.dnPE, table_index: int) -> Iterator[Tuple[int, dnfile.base.MDTableRow]]:
+def iter_dotnet_table(pe: dnfile.dnPE, table_index: int) -> Iterator[tuple[int, dnfile.base.MDTableRow]]:
    assert pe.net is not None
    assert pe.net.mdtables is not None

--- a/capa/features/extractors/dnfile/insn.py
+++ b/capa/features/extractors/dnfile/insn.py
@@ -9,7 +9,7 @@
 from __future__ import annotations

 import logging
-from typing import TYPE_CHECKING, Tuple, Union, Iterator, Optional
+from typing import TYPE_CHECKING, Union, Iterator, Optional

 if TYPE_CHECKING:
    from capa.features.extractors.dnfile.extractor import DnFileFeatureExtractorCache
@@ -61,7 +61,7 @@ def get_callee(
    return callee


-def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
    """parse instruction API features"""
    if ih.inner.opcode not in (
        OpCodes.Call,
@@ -83,7 +83,7 @@ def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterato
            yield API(name), ih.address


-def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
    """parse instruction property features"""
    name: Optional[str] = None
    access: Optional[str] = None
@@ -118,7 +118,7 @@ def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> It

 def extract_insn_namespace_class_features(
    fh: FunctionHandle, bh, ih: InsnHandle
-) -> Iterator[Tuple[Union[Namespace, Class], Address]]:
+) -> Iterator[tuple[Union[Namespace, Class], Address]]:
    """parse instruction namespace and class features"""
    type_: Optional[Union[DnType, DnUnmanagedMethod]] = None

@@ -173,13 +173,13 @@ def extract_insn_namespace_class_features(
            yield Namespace(type_.namespace), ih.address


-def extract_insn_number_features(fh, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_insn_number_features(fh, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
    """parse instruction number features"""
    if ih.inner.is_ldc():
        yield Number(ih.inner.get_ldc()), ih.address


-def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
    """parse instruction string features"""
    if not ih.inner.is_ldstr():
        return
@@ -197,7 +197,7 @@ def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iter

 def extract_unmanaged_call_characteristic_features(
    fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
-) -> Iterator[Tuple[Characteristic, Address]]:
+) -> Iterator[tuple[Characteristic, Address]]:
    if ih.inner.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp):
        return

@@ -209,7 +209,7 @@ def extract_unmanaged_call_characteristic_features(
        yield Characteristic("unmanaged call"), ih.address


-def extract_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
    """extract instruction features"""
    for inst_handler in INSTRUCTION_HANDLERS:
        for feature, addr in inst_handler(fh, bbh, ih):
--- a/capa/features/extractors/dnfile/types.py
+++ b/capa/features/extractors/dnfile/types.py
@@ -6,17 +6,17 @@
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.

-from typing import Tuple, Optional
+from typing import Optional


 class DnType:
    def __init__(
-        self, token: int, class_: Tuple[str, ...], namespace: str = "", member: str = "", access: Optional[str] = None
+        self, token: int, class_: tuple[str, ...], namespace: str = "", member: str = "", access: Optional[str] = None
    ):
        self.token: int = token
        self.access: Optional[str] = access
        self.namespace: str = namespace
-        self.class_: Tuple[str, ...] = class_
+        self.class_: tuple[str, ...] = class_

        if member == ".ctor":
            member = "ctor"
@@ -44,7 +44,7 @@ class DnType:
        return str(self)

    @staticmethod
-    def format_name(class_: Tuple[str, ...], namespace: str = "", member: str = ""):
+    def format_name(class_: tuple[str, ...], namespace: str = "", member: str = ""):
        if len(class_) > 1:
            class_str = "/".join(class_)  # Concat items in tuple, separated by a "/"
        else:
--- a/capa/features/extractors/dotnetfile.py
+++ b/capa/features/extractors/dotnetfile.py
@@ -6,7 +6,7 @@
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
 import logging
-from typing import Tuple, Iterator
+from typing import Iterator
 from pathlib import Path

 import dnfile
@@ -48,12 +48,12 @@ from capa.features.extractors.dnfile.helpers import (
 logger = logging.getLogger(__name__)


-def extract_file_format(**kwargs) -> Iterator[Tuple[Format, Address]]:
+def extract_file_format(**kwargs) -> Iterator[tuple[Format, Address]]:
    yield Format(FORMAT_DOTNET), NO_ADDRESS
    yield Format(FORMAT_PE), NO_ADDRESS


-def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Import, Address]]:
+def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Import, Address]]:
    for method in get_dotnet_managed_imports(pe):
        # like System.IO.File::OpenRead
        yield Import(str(method)), DNTokenAddress(method.token)
@@ -64,12 +64,12 @@ def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Impor
            yield Import(name), DNTokenAddress(imp.token)


-def extract_file_function_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[FunctionName, Address]]:
+def extract_file_function_names(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[FunctionName, Address]]:
    for method in get_dotnet_managed_methods(pe):
        yield FunctionName(str(method)), DNTokenAddress(method.token)


-def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Namespace, Address]]:
+def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Namespace, Address]]:
    """emit namespace features from TypeRef and TypeDef tables"""

    # namespaces may be referenced multiple times, so we need to filter
@@ -93,7 +93,7 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple
        yield Namespace(namespace), NO_ADDRESS


-def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Class, Address]]:
+def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Class, Address]]:
    """emit class features from TypeRef and TypeDef tables"""
    nested_class_table = get_dotnet_nested_class_table_index(pe)

@@ -116,11 +116,11 @@ def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Cla
        yield Class(DnType.format_name(typerefname, namespace=typerefnamespace)), DNTokenAddress(token)


-def extract_file_os(**kwargs) -> Iterator[Tuple[OS, Address]]:
+def extract_file_os(**kwargs) -> Iterator[tuple[OS, Address]]:
    yield OS(OS_ANY), NO_ADDRESS


-def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Arch, Address]]:
+def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Arch, Address]]:
    # to distinguish in more detail, see https://stackoverflow.com/a/23614024/10548020
    # .NET 4.5 added option: any CPU, 32-bit preferred
    assert pe.net is not None
@@ -134,18 +134,18 @@ def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Arch, Address
        yield Arch(ARCH_ANY), NO_ADDRESS


-def extract_file_strings(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[String, Address]]:
+def extract_file_strings(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[String, Address]]:
    yield from capa.features.extractors.common.extract_file_strings(pe.__data__)


 def extract_file_mixed_mode_characteristic_features(
    pe: dnfile.dnPE, **kwargs
-) -> Iterator[Tuple[Characteristic, Address]]:
+) -> Iterator[tuple[Characteristic, Address]]:
    if is_dotnet_mixed_mode(pe):
        yield Characteristic("mixed mode"), NO_ADDRESS


-def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]:
+def extract_file_features(pe: dnfile.dnPE) -> Iterator[tuple[Feature, Address]]:
    for file_handler in FILE_HANDLERS:
        for feature, addr in file_handler(pe=pe):  # type: ignore
            yield feature, addr
@@ -162,7 +162,7 @@ FILE_HANDLERS = (
 )


-def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]:
+def extract_global_features(pe: dnfile.dnPE) -> Iterator[tuple[Feature, Address]]:
    for handler in GLOBAL_HANDLERS:
        for feature, va in handler(pe=pe):  # type: ignore
            yield feature, va
@@ -204,7 +204,7 @@ class DotnetFileFeatureExtractor(StaticFeatureExtractor):
    def is_mixed_mode(self) -> bool:
        return is_dotnet_mixed_mode(self.pe)

-    def get_runtime_version(self) -> Tuple[int, int]:
+    def get_runtime_version(self) -> tuple[int, int]:
        assert self.pe.net is not None
        assert self.pe.net.struct is not None
        assert self.pe.net.struct.MajorRuntimeVersion is not None
--- a/capa/features/extractors/drakvuf/call.py
+++ b/capa/features/extractors/drakvuf/call.py
@@ -7,7 +7,7 @@
 # See the License for the specific language governing permissions and limitations under the License.

 import logging
-from typing import Tuple, Iterator
+from typing import Iterator

 import capa.features.extractors.helpers
 from capa.features.insn import API, Number
@@ -19,7 +19,7 @@ from capa.features.extractors.drakvuf.models import Call
 logger = logging.getLogger(__name__)


-def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
    """
    This method extracts the given call's features (such as API name and arguments),
    and returns them as API, Number, and String features.
@@ -49,7 +49,7 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -
        yield API(name), ch.address


-def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
    for handler in CALL_HANDLERS:
        for feature, addr in handler(ph, th, ch):
            yield feature, addr
--- a/capa/features/extractors/drakvuf/extractor.py
+++ b/capa/features/extractors/drakvuf/extractor.py
@@ -7,7 +7,7 @@
 # See the License for the specific language governing permissions and limitations under the License.

 import logging
-from typing import Dict, List, Tuple, Union, Iterator
+from typing import Union, Iterator

 import capa.features.extractors.drakvuf.call
 import capa.features.extractors.drakvuf.file
@@ -39,7 +39,7 @@ class DrakvufExtractor(DynamicFeatureExtractor):
        self.report: DrakvufReport = report

        # sort the api calls to prevent going through the entire list each time
-        self.sorted_calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]] = index_calls(report)
+        self.sorted_calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]] = index_calls(report)

        # pre-compute these because we'll yield them at *every* scope.
        self.global_features = list(capa.features.extractors.drakvuf.global_.extract_features(self.report))
@@ -48,16 +48,16 @@ class DrakvufExtractor(DynamicFeatureExtractor):
        # DRAKVUF currently does not yield information about the PE's address
        return NO_ADDRESS

-    def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
+    def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
        yield from self.global_features

-    def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
+    def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
        yield from capa.features.extractors.drakvuf.file.extract_features(self.report)

    def get_processes(self) -> Iterator[ProcessHandle]:
        yield from capa.features.extractors.drakvuf.file.get_processes(self.sorted_calls)

-    def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
+    def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
        yield from capa.features.extractors.drakvuf.process.extract_features(ph)

    def get_process_name(self, ph: ProcessHandle) -> str:
@@ -66,7 +66,7 @@ class DrakvufExtractor(DynamicFeatureExtractor):
    def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
        yield from capa.features.extractors.drakvuf.process.get_threads(self.sorted_calls, ph)

-    def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
+    def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[tuple[Feature, Address]]:
        if False:
            # force this routine to be a generator,
            # but we don't actually have any elements to generate.
@@ -87,10 +87,10 @@ class DrakvufExtractor(DynamicFeatureExtractor):

    def extract_call_features(
        self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
-    ) -> Iterator[Tuple[Feature, Address]]:
+    ) -> Iterator[tuple[Feature, Address]]:
        yield from capa.features.extractors.drakvuf.call.extract_features(ph, th, ch)

    @classmethod
-    def from_report(cls, report: Iterator[Dict]) -> "DrakvufExtractor":
+    def from_report(cls, report: Iterator[dict]) -> "DrakvufExtractor":
        dr = DrakvufReport.from_raw_report(report)
        return DrakvufExtractor(report=dr)
--- a/capa/features/extractors/drakvuf/file.py
+++ b/capa/features/extractors/drakvuf/file.py
@@ -7,7 +7,7 @@
 # See the License for the specific language governing permissions and limitations under the License.

 import logging
-from typing import Dict, List, Tuple, Iterator
+from typing import Iterator

 from capa.features.file import Import
 from capa.features.common import Feature
@@ -19,7 +19,7 @@ from capa.features.extractors.drakvuf.models import Call, DrakvufReport
 logger = logging.getLogger(__name__)


-def get_processes(calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]]) -> Iterator[ProcessHandle]:
+def get_processes(calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]]) -> Iterator[ProcessHandle]:
    """
    Get all the created processes for a sample.
    """
@@ -28,7 +28,7 @@ def get_processes(calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]])
        yield ProcessHandle(proc_addr, inner={"process_name": sample_call.process_name})


-def extract_import_names(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
+def extract_import_names(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
    """
    Extract imported function names.
    """
@@ -43,7 +43,7 @@ def extract_import_names(report: DrakvufReport) -> Iterator[Tuple[Feature, Addre
                yield Import(name), AbsoluteVirtualAddress(function_address)


-def extract_features(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
+def extract_features(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
    for handler in FILE_HANDLERS:
        for feature, addr in handler(report):
            yield feature, addr
--- a/capa/features/extractors/drakvuf/global_.py
+++ b/capa/features/extractors/drakvuf/global_.py
@@ -7,7 +7,7 @@
 # See the License for the specific language governing permissions and limitations under the License.

 import logging
-from typing import Tuple, Iterator
+from typing import Iterator

 from capa.features.common import OS, FORMAT_PE, ARCH_AMD64, OS_WINDOWS, Arch, Format, Feature
 from capa.features.address import NO_ADDRESS, Address
@@ -16,22 +16,22 @@ from capa.features.extractors.drakvuf.models import DrakvufReport
 logger = logging.getLogger(__name__)


-def extract_format(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
+def extract_format(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
    # DRAKVUF sandbox currently supports only Windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html
    yield Format(FORMAT_PE), NO_ADDRESS


-def extract_os(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
+def extract_os(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
    # DRAKVUF sandbox currently supports only PE files: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html
    yield OS(OS_WINDOWS), NO_ADDRESS


-def extract_arch(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
+def extract_arch(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
    # DRAKVUF sandbox currently supports only x64 Windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html
    yield Arch(ARCH_AMD64), NO_ADDRESS


-def extract_features(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
+def extract_features(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
    for global_handler in GLOBAL_HANDLER:
        for feature, addr in global_handler(report):
            yield feature, addr
--- a/capa/features/extractors/drakvuf/helpers.py
+++ b/capa/features/extractors/drakvuf/helpers.py
@@ -7,16 +7,15 @@
 # See the License for the specific language governing permissions and limitations under the License.

 import itertools
-from typing import Dict, List

 from capa.features.address import ThreadAddress, ProcessAddress
 from capa.features.extractors.drakvuf.models import Call, DrakvufReport


-def index_calls(report: DrakvufReport) -> Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]]:
+def index_calls(report: DrakvufReport) -> dict[ProcessAddress, dict[ThreadAddress, list[Call]]]:
    # this method organizes calls into processes and threads, and then sorts them based on
    # timestamp so that we can address individual calls per index (CallAddress requires call index)
-    result: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]] = {}
+    result: dict[ProcessAddress, dict[ThreadAddress, list[Call]]] = {}
    for call in itertools.chain(report.syscalls, report.apicalls):
        if call.pid == 0:
            # DRAKVUF captures api/native calls from all processes running on the system.
--- a/capa/features/extractors/drakvuf/models.py
+++ b/capa/features/extractors/drakvuf/models.py
@@ -6,7 +6,7 @@
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
 import logging
-from typing import Any, Dict, List, Iterator
+from typing import Any, Iterator

 from pydantic import Field, BaseModel, ConfigDict, model_validator

@@ -47,7 +47,7 @@ class LoadedDLL(ConciseModel):
    plugin_name: str = Field(alias="Plugin")
    event: str = Field(alias="Event")
    name: str = Field(alias="DllName")
-    imports: Dict[str, int] = Field(alias="Rva")
+    imports: dict[str, int] = Field(alias="Rva")


 class Call(ConciseModel):
@@ -58,18 +58,18 @@ class Call(ConciseModel):
    pid: int = Field(alias="PID")
    tid: int = Field(alias="TID")
    name: str = Field(alias="Method")
-    arguments: Dict[str, str]
+    arguments: dict[str, str]


 class WinApiCall(Call):
    # This class models Windows API calls captured by DRAKVUF (DLLs, etc.).
-    arguments: Dict[str, str] = Field(alias="Arguments")
+    arguments: dict[str, str] = Field(alias="Arguments")
    event: str = Field(alias="Event")
    return_value: str = Field(alias="ReturnValue")

    @model_validator(mode="before")
    @classmethod
-    def build_arguments(cls, values: Dict[str, Any]) -> Dict[str, Any]:
+    def build_arguments(cls, values: dict[str, Any]) -> dict[str, Any]:
        args = values["Arguments"]
        values["Arguments"] = dict(arg.split("=", 1) for arg in args)
        return values
@@ -100,7 +100,7 @@ class SystemCall(Call):

    @model_validator(mode="before")
    @classmethod
-    def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]:
+    def build_extra(cls, values: dict[str, Any]) -> dict[str, Any]:
        # DRAKVUF stores argument names and values as entries in the syscall's entry.
        # This model validator collects those arguments into a list in the model.
        values["arguments"] = {
@@ -110,13 +110,13 @@ class SystemCall(Call):


 class DrakvufReport(ConciseModel):
-    syscalls: List[SystemCall] = []
-    apicalls: List[WinApiCall] = []
-    discovered_dlls: List[DiscoveredDLL] = []
-    loaded_dlls: List[LoadedDLL] = []
+    syscalls: list[SystemCall] = []
+    apicalls: list[WinApiCall] = []
+    discovered_dlls: list[DiscoveredDLL] = []
+    loaded_dlls: list[LoadedDLL] = []

    @classmethod
-    def from_raw_report(cls, entries: Iterator[Dict]) -> "DrakvufReport":
+    def from_raw_report(cls, entries: Iterator[dict]) -> "DrakvufReport":
        report = cls()

        for entry in entries:
--- a/capa/features/extractors/drakvuf/process.py
+++ b/capa/features/extractors/drakvuf/process.py
@@ -7,7 +7,7 @@
 # See the License for the specific language governing permissions and limitations under the License.

 import logging
-from typing import Dict, List, Tuple, Iterator
+from typing import Iterator

 from capa.features.common import String, Feature
 from capa.features.address import Address, ThreadAddress, ProcessAddress
@@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)


 def get_threads(
-    calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]], ph: ProcessHandle
+    calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]], ph: ProcessHandle
 ) -> Iterator[ThreadHandle]:
    """
    Get the threads associated with a given process.
@@ -27,11 +27,11 @@ def get_threads(
        yield ThreadHandle(address=thread_addr, inner={})


-def extract_process_name(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_process_name(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
    yield String(ph.inner["process_name"]), ph.address


-def extract_features(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_features(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
    for handler in PROCESS_HANDLERS:
        for feature, addr in handler(ph):
            yield feature, addr
--- a/capa/features/extractors/drakvuf/thread.py
+++ b/capa/features/extractors/drakvuf/thread.py
@@ -7,7 +7,7 @@
 # See the License for the specific language governing permissions and limitations under the License.

 import logging
-from typing import Dict, List, Iterator
+from typing import Iterator

 from capa.features.address import ThreadAddress, ProcessAddress, DynamicCallAddress
 from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle
@@ -17,7 +17,7 @@ logger = logging.getLogger(__name__)


 def get_calls(
-    sorted_calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]], ph: ProcessHandle, th: ThreadHandle
+    sorted_calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]], ph: ProcessHandle, th: ThreadHandle
 ) -> Iterator[CallHandle]:
    for i, call in enumerate(sorted_calls[ph.address][th.address]):
        call_addr = DynamicCallAddress(thread=th.address, id=i)
--- a/capa/features/extractors/elf.py
+++ b/capa/features/extractors/elf.py
@@ -10,7 +10,7 @@ import logging
 import itertools
 import collections
 from enum import Enum
-from typing import TYPE_CHECKING, Set, Dict, List, Tuple, BinaryIO, Iterator, Optional
+from typing import TYPE_CHECKING, BinaryIO, Iterator, Optional
 from dataclasses import dataclass

 if TYPE_CHECKING:
@@ -394,7 +394,7 @@ class ELF:
            return read_cstr(phdr.buf, 0)

    @property
-    def versions_needed(self) -> Dict[str, Set[str]]:
+    def versions_needed(self) -> dict[str, set[str]]:
        # symbol version requirements are stored in the .gnu.version_r section,
        # which has type SHT_GNU_verneed (0x6ffffffe).
        #
@@ -452,7 +452,7 @@ class ELF:
        return {}

    @property
-    def dynamic_entries(self) -> Iterator[Tuple[int, int]]:
+    def dynamic_entries(self) -> Iterator[tuple[int, int]]:
        """
        read the entries from the dynamic section,
        yielding the tag and value for each entry.
@@ -547,7 +547,7 @@ class ELF:
                logger.warning("failed to read DT_NEEDED entry: %s", str(e))

    @property
-    def symtab(self) -> Optional[Tuple[Shdr, Shdr]]:
+    def symtab(self) -> Optional[tuple[Shdr, Shdr]]:
        """
        fetch the Shdr for the symtab and the associated strtab.
        """
@@ -682,7 +682,7 @@ class SymTab:
        symtab: Shdr,
        strtab: Shdr,
    ) -> None:
-        self.symbols: List[Symbol] = []
+        self.symbols: list[Symbol] = []

        self.symtab = symtab
        self.strtab = strtab
--- a/capa/features/extractors/elffile.py
+++ b/capa/features/extractors/elffile.py
@@ -7,7 +7,7 @@
 # See the License for the specific language governing permissions and limitations under the License.
 import io
 import logging
-from typing import Tuple, Iterator
+from typing import Iterator
 from pathlib import Path

 from elftools.elf.elffile import ELFFile, DynamicSegment, SymbolTableSection
@@ -166,7 +166,7 @@ def extract_file_arch(elf: ELFFile, **kwargs):
        logger.warning("unsupported architecture: %s", arch)


-def extract_file_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature, int]]:
+def extract_file_features(elf: ELFFile, buf: bytes) -> Iterator[tuple[Feature, int]]:
    for file_handler in FILE_HANDLERS:
        for feature, addr in file_handler(elf=elf, buf=buf):  # type: ignore
            yield feature, addr
@@ -182,7 +182,7 @@ FILE_HANDLERS = (
 )


-def extract_global_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature, int]]:
+def extract_global_features(elf: ELFFile, buf: bytes) -> Iterator[tuple[Feature, int]]:
    for global_handler in GLOBAL_HANDLERS:
        for feature, addr in global_handler(elf=elf, buf=buf):  # type: ignore
            yield feature, addr
--- a/capa/features/extractors/ghidra/basicblock.py
+++ b/capa/features/extractors/ghidra/basicblock.py
@@ -8,7 +8,7 @@

 import string
 import struct
-from typing import Tuple, Iterator
+from typing import Iterator

 import ghidra
 from ghidra.program.model.lang import OperandType
@@ -97,7 +97,7 @@ def _bb_has_tight_loop(bb: ghidra.program.model.block.CodeBlock):
    return False


-def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
    """extract stackstring indicators from basic block"""
    bb: ghidra.program.model.block.CodeBlock = bbh.inner

@@ -105,7 +105,7 @@ def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[
        yield Characteristic("stack string"), bbh.address


-def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
    """check basic block for tight loop indicators"""
    bb: ghidra.program.model.block.CodeBlock = bbh.inner

@@ -119,7 +119,7 @@ BASIC_BLOCK_HANDLERS = (
 )


-def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
    """
    extract features from the given basic block.

@@ -127,7 +127,7 @@ def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Featur
        bb: the basic block to process.

    yields:
-      Tuple[Feature, int]: the features and their location found in this basic block.
+      tuple[Feature, int]: the features and their location found in this basic block.
    """
    yield BasicBlock(), bbh.address
    for bb_handler in BASIC_BLOCK_HANDLERS:
--- a/capa/features/extractors/ghidra/extractor.py
+++ b/capa/features/extractors/ghidra/extractor.py
@@ -5,7 +5,7 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
-from typing import List, Tuple, Iterator
+from typing import Iterator

 import capa.features.extractors.ghidra.file
 import capa.features.extractors.ghidra.insn
@@ -40,7 +40,7 @@ class GhidraFeatureExtractor(StaticFeatureExtractor):
            )
        )

-        self.global_features: List[Tuple[Feature, Address]] = []
+        self.global_features: list[tuple[Feature, Address]] = []
        self.global_features.extend(capa.features.extractors.ghidra.file.extract_file_format())
        self.global_features.extend(capa.features.extractors.ghidra.global_.extract_os())
        self.global_features.extend(capa.features.extractors.ghidra.global_.extract_arch())
@@ -73,7 +73,7 @@ class GhidraFeatureExtractor(StaticFeatureExtractor):
        func = getFunctionContaining(toAddr(addr))  # type: ignore [name-defined] # noqa: F821
        return FunctionHandle(address=AbsoluteVirtualAddress(func.getEntryPoint().getOffset()), inner=func)

-    def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
+    def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
        yield from capa.features.extractors.ghidra.function.extract_features(fh)

    def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
@@ -81,7 +81,7 @@ class GhidraFeatureExtractor(StaticFeatureExtractor):

        yield from ghidra_helpers.get_function_blocks(fh)

-    def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
+    def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
        yield from capa.features.extractors.ghidra.basicblock.extract_features(fh, bbh)

    def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
--- a/capa/features/extractors/ghidra/file.py
+++ b/capa/features/extractors/ghidra/file.py
@@ -7,7 +7,7 @@
 # See the License for the specific language governing permissions and limitations under the License.
 import re
 import struct
-from typing import List, Tuple, Iterator
+from typing import Iterator

 from ghidra.program.model.symbol import SourceType, SymbolType

@@ -22,7 +22,7 @@ from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, Absolu
 MAX_OFFSET_PE_AFTER_MZ = 0x200


-def find_embedded_pe(block_bytez: bytes, mz_xor: List[Tuple[bytes, bytes, int]]) -> Iterator[Tuple[int, int]]:
+def find_embedded_pe(block_bytez: bytes, mz_xor: list[tuple[bytes, bytes, int]]) -> Iterator[tuple[int, int]]:
    """check segment for embedded PE

    adapted for Ghidra from:
@@ -60,11 +60,11 @@ def find_embedded_pe(block_bytez: bytes, mz_xor: List[Tuple[bytes, bytes, int]])
            yield off, i


-def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]:
+def extract_file_embedded_pe() -> Iterator[tuple[Feature, Address]]:
    """extract embedded PE features"""

    # pre-compute XOR pairs
-    mz_xor: List[Tuple[bytes, bytes, int]] = [
+    mz_xor: list[tuple[bytes, bytes, int]] = [
        (
            capa.features.extractors.helpers.xor_static(b"MZ", i),
            capa.features.extractors.helpers.xor_static(b"PE", i),
@@ -84,14 +84,14 @@ def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]:
            yield Characteristic("embedded pe"), FileOffsetAddress(ea)


-def extract_file_export_names() -> Iterator[Tuple[Feature, Address]]:
+def extract_file_export_names() -> Iterator[tuple[Feature, Address]]:
    """extract function exports"""
    st = currentProgram().getSymbolTable()  # type: ignore [name-defined] # noqa: F821
    for addr in st.getExternalEntryPointIterator():
        yield Export(st.getPrimarySymbol(addr).getName()), AbsoluteVirtualAddress(addr.getOffset())


-def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
+def extract_file_import_names() -> Iterator[tuple[Feature, Address]]:
    """extract function imports

    1. imports by ordinal:
@@ -116,14 +116,14 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
            yield Import(name), AbsoluteVirtualAddress(addr)


-def extract_file_section_names() -> Iterator[Tuple[Feature, Address]]:
+def extract_file_section_names() -> Iterator[tuple[Feature, Address]]:
    """extract section names"""

    for block in currentProgram().getMemory().getBlocks():  # type: ignore [name-defined] # noqa: F821
        yield Section(block.getName()), AbsoluteVirtualAddress(block.getStart().getOffset())


-def extract_file_strings() -> Iterator[Tuple[Feature, Address]]:
+def extract_file_strings() -> Iterator[tuple[Feature, Address]]:
    """extract ASCII and UTF-16 LE strings"""

    for block in currentProgram().getMemory().getBlocks():  # type: ignore [name-defined] # noqa: F821
@@ -141,7 +141,7 @@ def extract_file_strings() -> Iterator[Tuple[Feature, Address]]:
            yield String(s.s), FileOffsetAddress(offset)


-def extract_file_function_names() -> Iterator[Tuple[Feature, Address]]:
+def extract_file_function_names() -> Iterator[tuple[Feature, Address]]:
    """
    extract the names of statically-linked library functions.
    """
@@ -162,7 +162,7 @@ def extract_file_function_names() -> Iterator[Tuple[Feature, Address]]:
                yield FunctionName(name[1:]), addr


-def extract_file_format() -> Iterator[Tuple[Feature, Address]]:
+def extract_file_format() -> Iterator[tuple[Feature, Address]]:
    ef = currentProgram().getExecutableFormat()  # type: ignore [name-defined] # noqa: F821
    if "PE" in ef:
        yield Format(FORMAT_PE), NO_ADDRESS
@@ -175,7 +175,7 @@ def extract_file_format() -> Iterator[Tuple[Feature, Address]]:
        raise NotImplementedError(f"unexpected file format: {ef}")


-def extract_features() -> Iterator[Tuple[Feature, Address]]:
+def extract_features() -> Iterator[tuple[Feature, Address]]:
    """extract file features"""
    for file_handler in FILE_HANDLERS:
        for feature, addr in file_handler():
--- a/capa/features/extractors/ghidra/function.py
+++ b/capa/features/extractors/ghidra/function.py
@@ -5,7 +5,7 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
-from typing import Tuple, Iterator
+from typing import Iterator

 import ghidra
 from ghidra.program.model.block import BasicBlockModel, SimpleBlockIterator
@@ -49,7 +49,7 @@ def extract_recursive_call(fh: FunctionHandle):
            yield Characteristic("recursive call"), AbsoluteVirtualAddress(f.getEntryPoint().getOffset())


-def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
    for func_handler in FUNCTION_HANDLERS:
        for feature, addr in func_handler(fh):
            yield feature, addr
--- a/capa/features/extractors/ghidra/global_.py
+++ b/capa/features/extractors/ghidra/global_.py
@@ -7,7 +7,7 @@
 # See the License for the specific language governing permissions and limitations under the License.
 import logging
 import contextlib
-from typing import Tuple, Iterator
+from typing import Iterator

 import capa.ghidra.helpers
 import capa.features.extractors.elf
@@ -18,7 +18,7 @@ from capa.features.address import NO_ADDRESS, Address
 logger = logging.getLogger(__name__)


-def extract_os() -> Iterator[Tuple[Feature, Address]]:
+def extract_os() -> Iterator[tuple[Feature, Address]]:
    format_name: str = currentProgram().getExecutableFormat()  # type: ignore [name-defined] # noqa: F821

    if "PE" in format_name:
@@ -45,7 +45,7 @@ def extract_os() -> Iterator[Tuple[Feature, Address]]:
        return


-def extract_arch() -> Iterator[Tuple[Feature, Address]]:
+def extract_arch() -> Iterator[tuple[Feature, Address]]:
    lang_id = currentProgram().getMetadata().get("Language ID")  # type: ignore [name-defined] # noqa: F821

    if "x86" in lang_id and "64" in lang_id:
--- a/capa/features/extractors/ghidra/helpers.py
+++ b/capa/features/extractors/ghidra/helpers.py
@@ -5,7 +5,7 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
-from typing import Dict, List, Iterator
+from typing import Iterator

 import ghidra
 import java.lang
@@ -20,7 +20,7 @@ from capa.features.address import AbsoluteVirtualAddress
 from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle


-def ints_to_bytes(bytez: List[int]) -> bytes:
+def ints_to_bytes(bytez: list[int]) -> bytes:
    """convert Java signed ints to Python bytes

    args:
@@ -83,10 +83,10 @@ def get_insn_in_range(bbh: BBHandle) -> Iterator[InsnHandle]:
        yield InsnHandle(address=AbsoluteVirtualAddress(insn.getAddress().getOffset()), inner=insn)


-def get_file_imports() -> Dict[int, List[str]]:
+def get_file_imports() -> dict[int, list[str]]:
    """get all import names & addrs"""

-    import_dict: Dict[int, List[str]] = {}
+    import_dict: dict[int, list[str]] = {}

    for f in currentProgram().getFunctionManager().getExternalFunctions():  # type: ignore [name-defined] # noqa: F821
        for r in f.getSymbol().getReferences():
@@ -110,7 +110,7 @@ def get_file_imports() -> Dict[int, List[str]]:
    return import_dict


-def get_file_externs() -> Dict[int, List[str]]:
+def get_file_externs() -> dict[int, list[str]]:
    """
    Gets function names & addresses of statically-linked library functions

@@ -124,7 +124,7 @@ def get_file_externs() -> Dict[int, List[str]]:
    - Note: See Symbol Table labels
    """

-    extern_dict: Dict[int, List[str]] = {}
+    extern_dict: dict[int, list[str]] = {}

    for sym in currentProgram().getSymbolTable().getAllSymbols(True):  # type: ignore [name-defined] # noqa: F821
        # .isExternal() misses more than this config for the function symbols
@@ -143,7 +143,7 @@ def get_file_externs() -> Dict[int, List[str]]:
    return extern_dict


-def map_fake_import_addrs() -> Dict[int, List[int]]:
+def map_fake_import_addrs() -> dict[int, list[int]]:
    """
    Map ghidra's fake import entrypoints to their
    real addresses
@@ -162,7 +162,7 @@ def map_fake_import_addrs() -> Dict[int, List[int]]:
    - 0x473090 -> PTR_CreateServiceW_00473090
    - 'EXTERNAL:00000025' -> External Address (ghidra.program.model.address.SpecialAddress)
    """
-    fake_dict: Dict[int, List[int]] = {}
+    fake_dict: dict[int, list[int]] = {}

    for f in currentProgram().getFunctionManager().getExternalFunctions():  # type: ignore [name-defined] # noqa: F821
        for r in f.getSymbol().getReferences():
@@ -174,9 +174,9 @@ def map_fake_import_addrs() -> Dict[int, List[int]]:

 def check_addr_for_api(
    addr: ghidra.program.model.address.Address,
-    fakes: Dict[int, List[int]],
-    imports: Dict[int, List[str]],
-    externs: Dict[int, List[str]],
+    fakes: dict[int, list[int]],
+    imports: dict[int, list[str]],
+    externs: dict[int, list[str]],
 ) -> bool:
    offset = addr.getOffset()

--- a/capa/features/extractors/ghidra/insn.py
+++ b/capa/features/extractors/ghidra/insn.py
@@ -5,7 +5,7 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
-from typing import Any, Dict, Tuple, Iterator
+from typing import Any, Iterator

 import ghidra
 from ghidra.program.model.lang import OperandType
@@ -26,21 +26,21 @@ SECURITY_COOKIE_BYTES_DELTA = 0x40
 OPERAND_TYPE_DYNAMIC_ADDRESS = OperandType.DYNAMIC | OperandType.ADDRESS


-def get_imports(ctx: Dict[str, Any]) -> Dict[int, Any]:
+def get_imports(ctx: dict[str, Any]) -> dict[int, Any]:
    """Populate the import cache for this context"""
    if "imports_cache" not in ctx:
        ctx["imports_cache"] = capa.features.extractors.ghidra.helpers.get_file_imports()
    return ctx["imports_cache"]


-def get_externs(ctx: Dict[str, Any]) -> Dict[int, Any]:
+def get_externs(ctx: dict[str, Any]) -> dict[int, Any]:
    """Populate the externs cache for this context"""
    if "externs_cache" not in ctx:
        ctx["externs_cache"] = capa.features.extractors.ghidra.helpers.get_file_externs()
    return ctx["externs_cache"]


-def get_fakes(ctx: Dict[str, Any]) -> Dict[int, Any]:
+def get_fakes(ctx: dict[str, Any]) -> dict[int, Any]:
    """Populate the fake import addrs cache for this context"""
    if "fakes_cache" not in ctx:
        ctx["fakes_cache"] = capa.features.extractors.ghidra.helpers.map_fake_import_addrs()
@@ -48,7 +48,7 @@ def get_fakes(ctx: Dict[str, Any]) -> Dict[int, Any]:


 def check_for_api_call(
-    insn, externs: Dict[int, Any], fakes: Dict[int, Any], imports: Dict[int, Any], imp_or_ex: bool
+    insn, externs: dict[int, Any], fakes: dict[int, Any], imports: dict[int, Any], imp_or_ex: bool
 ) -> Iterator[Any]:
    """check instruction for API call

@@ -110,7 +110,7 @@ def check_for_api_call(
            yield info


-def extract_insn_api_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_insn_api_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
    insn: ghidra.program.database.code.InstructionDB = ih.inner

    if not capa.features.extractors.ghidra.helpers.is_call_or_jmp(insn):
@@ -131,7 +131,7 @@ def extract_insn_api_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle)
            yield API(ext), ih.address


-def extract_insn_number_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_insn_number_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
    """
    parse instruction number features
    example:
@@ -186,7 +186,7 @@ def extract_insn_number_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl
                yield OperandOffset(i, const), addr


-def extract_insn_offset_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_insn_offset_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
    """
    parse instruction structure offset features

@@ -219,7 +219,7 @@ def extract_insn_offset_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl
            yield OperandOffset(i, op_off), ih.address


-def extract_insn_bytes_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_insn_bytes_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
    """
    parse referenced byte sequences

@@ -234,7 +234,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
                yield Bytes(extracted_bytes), ih.address


-def extract_insn_string_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
+def extract_insn_string_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
    """
    parse instruction string features

@@ -249,7 +249,7 @@ def extract_insn_string_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl

 def extract_insn_mnemonic_features(
    fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    """parse instruction mnemonic features"""
    insn: ghidra.program.database.code.InstructionDB = ih.inner

@@ -258,7 +258,7 @@ def extract_insn_mnemonic_features(

 def extract_insn_obfs_call_plus_5_characteristic_features(
    fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    """
    parse call $+5 instruction from the given instruction.
    """
@@ -279,7 +279,7 @@ def extract_insn_obfs_call_plus_5_characteristic_features(

 def extract_insn_segment_access_features(
    fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    """parse instruction fs or gs access"""
    insn: ghidra.program.database.code.InstructionDB = ih.inner

@@ -294,7 +294,7 @@ def extract_insn_segment_access_features(

 def extract_insn_peb_access_characteristic_features(
    fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    """parse instruction peb access

    fs:[0x30] on x86, gs:[0x60] on x64
@@ -310,7 +310,7 @@ def extract_insn_peb_access_characteristic_features(

 def extract_insn_cross_section_cflow(
    fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    """inspect the instruction for a CALL or JMP that crosses section boundaries"""
    insn: ghidra.program.database.code.InstructionDB = ih.inner

@@ -364,7 +364,7 @@ def extract_function_calls_from(
    fh: FunctionHandle,
    bb: BBHandle,
    ih: InsnHandle,
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    """extract functions calls from features

    most relevant at the function scope, however, its most efficient to extract at the instruction scope
@@ -393,7 +393,7 @@ def extract_function_indirect_call_characteristic_features(
    fh: FunctionHandle,
    bb: BBHandle,
    ih: InsnHandle,
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    """extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
    does not include calls like => call ds:dword_ABD4974

@@ -442,7 +442,7 @@ def extract_insn_nzxor_characteristic_features(
    fh: FunctionHandle,
    bb: BBHandle,
    ih: InsnHandle,
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    f: ghidra.program.database.function.FunctionDB = fh.inner
    insn: ghidra.program.database.code.InstructionDB = ih.inner

@@ -461,7 +461,7 @@ def extract_features(
    fh: FunctionHandle,
    bb: BBHandle,
    insn: InsnHandle,
-) -> Iterator[Tuple[Feature, Address]]:
+) -> Iterator[tuple[Feature, Address]]:
    for insn_handler in INSTRUCTION_HANDLERS:
        for feature, addr in insn_handler(fh, bb, insn):
            yield feature, addr
--- a/capa/features/extractors/helpers.py
+++ b/capa/features/extractors/helpers.py
@@ -8,7 +8,7 @@

 import struct
 import builtins
-from typing import Tuple, Iterator
+from typing import Iterator

 MIN_STACKSTRING_LEN = 8

@@ -119,7 +119,7 @@ def twos_complement(val: int, bits: int) -> int:
        return val


-def carve_pe(pbytes: bytes, offset: int = 0) -> Iterator[Tuple[int, int]]:
+def carve_pe(pbytes: bytes, offset: int = 0) -> Iterator[tuple[int, int]]:
    """
    Generate (offset, key) tuples of embedded PEs

--- a/Show More
+++ b/Show More