merge upstream

add test case
Sync capa-testfiles submodule
2026-03-17 23:39:00 -07:00 · 2024-06-18 10:57:57 -06:00 · 2024-06-18 07:35:38 -07:00 · 2024-06-18 11:00:26 +00:00 · 2024-06-18 06:36:10 +02:00 · 2024-06-18 06:35:50 +02:00
155 changed files with 37230 additions and 2490 deletions
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -57,7 +57,7 @@ When we make a significant decision in how we maintain the project and what we c
 we will document it in the [capa issues tracker](https://github.com/mandiant/capa/issues).
 This is the best place review our discussions about what/how/why we do things in the project.
 If you have a question, check to see if it is documented there.
-If it is *not* documented there, or you can't find an answer, please open a issue.
+If it is *not* documented there, or you can't find an answer, please open an issue.
 We'll link to existing issues when appropriate to keep discussions in one place.

 ## How Can I Contribute?
--- a/.github/flake8.ini
+++ b/.github/flake8.ini
@@ -10,6 +10,8 @@ extend-ignore =
    F811,
    # E501 line too long  (prefer black)
    E501,
+    # E701 multiple statements on one line (colon)  (prefer black, see https://github.com/psf/black/issues/4173)
+    E701,
    # B010 Do not call setattr with a constant attribute value
    B010,
    # G200 Logging statement uses exception in arguments
@@ -38,4 +40,4 @@ per-file-ignores =

 copyright-check = True
 copyright-min-file-size = 1 
-copyright-regexp = Copyright \(C\) 2023 Mandiant, Inc. All Rights Reserved.
+copyright-regexp = Copyright \(C\) \d{4} Mandiant, Inc. All Rights Reserved.
--- a/.github/mypy/mypy.ini
+++ b/.github/mypy/mypy.ini
@@ -1,8 +1,5 @@
 [mypy]

-[mypy-halo.*]
-ignore_missing_imports = True
-
 [mypy-tqdm.*]
 ignore_missing_imports = True

--- a/.github/pyinstaller/hooks/hook-vivisect.py
+++ b/.github/pyinstaller/hooks/hook-vivisect.py
@@ -24,7 +24,7 @@ excludedimports = [
    "pyqtwebengine",
    # the above are imported by these viv modules.
    # so really, we'd want to exclude these submodules of viv.
-    # but i dont think this works.
+    # but i don't think this works.
    "vqt",
    "vdb.qt",
    "envi.qt",
--- a/.github/pyinstaller/pyinstaller.spec
+++ b/.github/pyinstaller/pyinstaller.spec
@@ -1,10 +1,19 @@
 # -*- mode: python -*-
 # Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
-import os.path
-import subprocess
+import sys

 import wcwidth
+import capa.rules.cache

+from pathlib import Path
+
+# SPECPATH is a global variable which points to .spec file path
+capa_dir = Path(SPECPATH).parent.parent
+rules_dir = capa_dir / 'rules'
+cache_dir = capa_dir / 'cache'
+
+if not capa.rules.cache.generate_rule_cache(rules_dir, cache_dir):
+    sys.exit(-1)

 a = Analysis(
    # when invoking pyinstaller from the project root,
@@ -17,7 +26,6 @@ a = Analysis(
        # when invoking pyinstaller from the project root,
        # this gets invoked from the directory of the spec file,
        # i.e. ./.github/pyinstaller
-        ("../../assets", "assets"),
        ("../../rules", "rules"),
        ("../../sigs", "sigs"),
        ("../../cache", "cache"),
@@ -27,7 +35,7 @@ a = Analysis(
        # so we manually embed the wcwidth resources here.
        #
        # ref: https://stackoverflow.com/a/62278462/87207
-        (os.path.dirname(wcwidth.__file__), "wcwidth"),
+        (Path(wcwidth.__file__).parent, "wcwidth"),
    ],
    # when invoking pyinstaller from the project root,
    # this gets run from the project root.
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -39,11 +39,11 @@ jobs:
            python_version: 3.8
    steps:
      - name: Checkout capa
-        uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
        with:
          submodules: true
      - name: Set up Python ${{ matrix.python_version }}
-        uses: actions/setup-python@d27e3f3d7c64b4bbf8e4abfb9b63b83e846e0435 # v4.5.0
+        uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
        with:
          python-version: ${{ matrix.python_version }}
      - if: matrix.os == 'ubuntu-20.04'
@@ -51,22 +51,22 @@ jobs:
      - name: Upgrade pip, setuptools
        run: python -m pip install --upgrade pip setuptools
      - name: Install capa with build requirements
-        run: pip install -e .[build]
-      - name: Cache the rule set
-        run: python ./scripts/cache-ruleset.py ./rules/ ./cache/
+        run: |
+          pip install -r requirements.txt
+          pip install -e .[build]
      - name: Build standalone executable
        run: pyinstaller --log-level DEBUG .github/pyinstaller/pyinstaller.spec
      - name: Does it run (PE)?
-        run: dist/capa "tests/data/Practical Malware Analysis Lab 01-01.dll_"
+        run: dist/capa -d "tests/data/Practical Malware Analysis Lab 01-01.dll_"
      - name: Does it run (Shellcode)?
-        run: dist/capa "tests/data/499c2a85f6e8142c3f48d4251c9c7cd6.raw32"
+        run: dist/capa -d "tests/data/499c2a85f6e8142c3f48d4251c9c7cd6.raw32"
      - name: Does it run (ELF)?
-        run: dist/capa "tests/data/7351f8a40c5450557b24622417fc478d.elf_"
+        run: dist/capa -d "tests/data/7351f8a40c5450557b24622417fc478d.elf_"
      - name: Does it run (CAPE)?
        run: |
          7z e "tests/data/dynamic/cape/v2.2/d46900384c78863420fb3e297d0a2f743cd2b6b3f7f82bf64059a168e07aceb7.json.gz"
-          dist/capa "d46900384c78863420fb3e297d0a2f743cd2b6b3f7f82bf64059a168e07aceb7.json"
-      - uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # v3.1.2
+          dist/capa -d "d46900384c78863420fb3e297d0a2f743cd2b6b3f7f82bf64059a168e07aceb7.json"
+      - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1
        with:
          name: ${{ matrix.asset_name }}
          path: dist/${{ matrix.artifact_name }}
@@ -90,7 +90,7 @@ jobs:
            asset_name: windows
    steps:
      - name: Download ${{ matrix.asset_name }}
-        uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # v3.0.2
+        uses: actions/download-artifact@eaceaf801fd36c7dee90939fad912460b18a1ffe # v4.1.2
        with:
          name: ${{ matrix.asset_name }}
      - name: Set executable flag
@@ -118,7 +118,7 @@ jobs:
            artifact_name: capa
    steps:
      - name: Download ${{ matrix.asset_name }}
-        uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # v3.0.2
+        uses: actions/download-artifact@eaceaf801fd36c7dee90939fad912460b18a1ffe # v4.1.2
        with:
          name: ${{ matrix.asset_name }}
      - name: Set executable flag
--- a/.github/workflows/changelog.yml
+++ b/.github/workflows/changelog.yml
@@ -7,7 +7,8 @@ on:
  pull_request_target:
    types: [opened, edited, synchronize]

-permissions: read-all
+permissions:
+  pull-requests: write

 jobs:
  check_changelog:
@@ -19,7 +20,7 @@ jobs:
    steps:
    - name: Get changed files
      id: files
-      uses: Ana06/get-changed-files@e0c398b7065a8d84700c471b6afc4116d1ba4e96 # v2.2.0
+      uses: Ana06/get-changed-files@25f79e676e7ea1868813e21465014798211fad8c # v2.3.0
    - name: check changelog updated
      id: changelog_updated
      env:
@@ -29,14 +30,14 @@ jobs:
        echo $FILES | grep -qF 'CHANGELOG.md' || echo $PR_BODY | grep -qiF "$NO_CHANGELOG"
    - name: Reject pull request if no CHANGELOG update
      if: ${{ always() && steps.changelog_updated.outcome == 'failure' }}
-      uses: Ana06/automatic-pull-request-review@0cf4e8a17ba79344ed3fdd7fed6dd0311d08a9d4 # v0.1.0
+      uses: Ana06/automatic-pull-request-review@76aaf9b15b116a54e1da7a28a46f91fe089600bf # v0.2.0
      with:
        repo-token: ${{ secrets.GITHUB_TOKEN }}
        event: REQUEST_CHANGES
        body: "Please add bug fixes, new features, breaking changes and anything else you think is worthwhile mentioning to the `master (unreleased)` section of CHANGELOG.md. If no CHANGELOG update is needed add the following to the PR description: `${{ env.NO_CHANGELOG }}`"
        allow_duplicate: false
    - name: Dismiss previous review if CHANGELOG update
-      uses: Ana06/automatic-pull-request-review@0cf4e8a17ba79344ed3fdd7fed6dd0311d08a9d4 # v0.1.0
+      uses: Ana06/automatic-pull-request-review@76aaf9b15b116a54e1da7a28a46f91fe089600bf # v0.2.0
      with:
        repo-token: ${{ secrets.GITHUB_TOKEN }}
        event: DISMISS
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -17,20 +17,21 @@ jobs:
    permissions:
      id-token: write
    steps:
-      - uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
      - name: Set up Python
-        uses: actions/setup-python@d27e3f3d7c64b4bbf8e4abfb9b63b83e846e0435 # v4.5.0
+        uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
        with:
          python-version: '3.8'
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
+          pip install -r requirements.txt
          pip install -e .[build]
      - name: build package
        run: |
          python -m build
      - name: upload package artifacts
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # v3.1.2
+        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1
        with:
          path: dist/*
      - name: publish package
--- a/.github/workflows/scorecard.yml
+++ b/.github/workflows/scorecard.yml
@@ -32,12 +32,12 @@ jobs:

    steps:
      - name: "Checkout code"
-        uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11  # v4.1.1
        with:
          persist-credentials: false

      - name: "Run analysis"
-        uses: ossf/scorecard-action@99c53751e09b9529366343771cc321ec74e9bd3d # v2.0.6
+        uses: ossf/scorecard-action@0864cf19026789058feabb7e87baa5f140aac736 # v2.3.1
        with:
          results_file: results.sarif
          results_format: sarif
@@ -59,7 +59,7 @@ jobs:
      # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
      # format to the repository Actions tab.
      - name: "Upload artifact"
-        uses: actions/upload-artifact@3cea5372237819ed00197afe530f5a7ea3e805c8 # v3.1.0
+        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1
        with:
          name: SARIF file
          path: results.sarif
@@ -67,6 +67,6 @@ jobs:

      # Upload the results to GitHub's code scanning dashboard.
      - name: "Upload to code-scanning"
-        uses: github/codeql-action/upload-sarif@807578363a7869ca324a79039e6db9c843e0e100 # v2.1.27
+        uses: github/codeql-action/upload-sarif@8a470fddafa5cbb6266ee11b37ef4d8aae19c571  # v3.24.6
        with:
          sarif_file: results.sarif
--- a/.github/workflows/tag.yml
+++ b/.github/workflows/tag.yml
@@ -12,7 +12,7 @@ jobs:
    runs-on: ubuntu-20.04
    steps:
    - name: Checkout capa-rules
-      uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0
+      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
      with:
        repository: mandiant/capa-rules
        token: ${{ secrets.CAPA_TOKEN }}
@@ -25,7 +25,7 @@ jobs:
        git tag $name -m "https://github.com/mandiant/capa/releases/$name"
        # TODO update branch name-major=${name%%.*}
    - name: Push tag to capa-rules
-      uses: ad-m/github-push-action@0fafdd62b84042d49ec0cb92d9cac7f7ce4ec79e # master
+      uses: ad-m/github-push-action@d91a481090679876dfc4178fef17f286781251df # v0.8.0
      with:
        repository: mandiant/capa-rules
        github_token: ${{ secrets.CAPA_TOKEN }}
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -17,7 +17,7 @@ jobs:
    runs-on: ubuntu-20.04
    steps:
    - name: Checkout capa
-      uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0
+      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
    # The sync GH action in capa-rules relies on a single '- *$' in the CHANGELOG file
    - name: Ensure CHANGELOG has '- *$'
      run: |
@@ -28,14 +28,16 @@ jobs:
    runs-on: ubuntu-20.04
    steps:
    - name: Checkout capa
-      uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0
+      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
    # use latest available python to take advantage of best performance
    - name: Set up Python 3.11
-      uses: actions/setup-python@d27e3f3d7c64b4bbf8e4abfb9b63b83e846e0435 # v4.5.0
+      uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
      with:
        python-version: "3.11"
    - name: Install dependencies
-      run: pip install -e .[dev]
+      run: |
+        pip install -r requirements.txt
+        pip install -e .[dev,scripts]
    - name: Lint with ruff
      run: pre-commit run ruff
    - name: Lint with isort
@@ -45,21 +47,25 @@ jobs:
    - name: Lint with flake8
      run: pre-commit run flake8 --hook-stage manual
    - name: Check types with mypy
-      run:  pre-commit run mypy --hook-stage manual
+      run: pre-commit run mypy --hook-stage manual
+    - name: Check imports against dependencies
+      run: pre-commit run deptry --hook-stage manual

  rule_linter:
    runs-on: ubuntu-20.04
    steps:
    - name: Checkout capa with submodules
-      uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0
+      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
      with:
        submodules: recursive
    - name: Set up Python 3.11
-      uses: actions/setup-python@d27e3f3d7c64b4bbf8e4abfb9b63b83e846e0435 # v4.5.0
+      uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
      with:
        python-version: "3.11"
    - name: Install capa
-      run: pip install -e .[dev]
+      run: |
+        pip install -r requirements.txt
+        pip install -e .[dev,scripts]
    - name: Run rule linter
      run: python scripts/lint.py rules/

@@ -83,18 +89,20 @@ jobs:
            python-version: "3.10"
    steps:
    - name: Checkout capa with submodules
-      uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0
+      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
      with:
        submodules: recursive
    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@d27e3f3d7c64b4bbf8e4abfb9b63b83e846e0435 # v4.5.0
+      uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
      with:
        python-version: ${{ matrix.python-version }}
    - name: Install pyyaml
      if: matrix.os == 'ubuntu-20.04'
      run: sudo apt-get install -y libyaml-dev
    - name: Install capa
-      run: pip install -e .[dev]
+      run: |
+        pip install -r requirements.txt
+        pip install -e .[dev,scripts]
    - name: Run tests (fast)
      # this set of tests runs about 80% of the cases in 20% of the time,
      # and should catch most errors quickly.
@@ -106,7 +114,7 @@ jobs:
    name: Binary Ninja tests for ${{ matrix.python-version }}
    env:
      BN_SERIAL: ${{ secrets.BN_SERIAL }}
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
    needs: [tests]
    strategy:
      fail-fast: false
@@ -116,12 +124,12 @@ jobs:
    - name: Checkout capa with submodules
      # do only run if BN_SERIAL is available, have to do this in every step, see https://github.com/orgs/community/discussions/26726#discussioncomment-3253118
      if: ${{ env.BN_SERIAL != 0 }}
-      uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0
+      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
      with:
        submodules: recursive
    - name: Set up Python ${{ matrix.python-version }}
      if: ${{ env.BN_SERIAL != 0 }}
-      uses: actions/setup-python@d27e3f3d7c64b4bbf8e4abfb9b63b83e846e0435 # v4.5.0
+      uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
      with:
        python-version: ${{ matrix.python-version }}
    - name: Install pyyaml
@@ -129,7 +137,9 @@ jobs:
      run: sudo apt-get install -y libyaml-dev
    - name: Install capa
      if: ${{ env.BN_SERIAL != 0 }}
-      run: pip install -e .[dev]
+      run: |
+        pip install -r requirements.txt
+        pip install -e .[dev,scripts]
    - name: install Binary Ninja
      if: ${{ env.BN_SERIAL != 0 }}
      run: |
@@ -153,31 +163,23 @@ jobs:
      matrix:
        python-version: ["3.8", "3.11"]
        java-version: ["17"]
-        gradle-version: ["7.3"]
-        ghidra-version: ["10.3"]
-        public-version: ["PUBLIC_20230510"] # for ghidra releases
-        jep-version: ["4.1.1"]
-        ghidrathon-version: ["3.0.0"]
+        ghidra-version: ["11.0.1"]
+        public-version: ["PUBLIC_20240130"] # for ghidra releases
+        ghidrathon-version: ["4.0.0"] 
    steps:
    - name: Checkout capa with submodules
-      uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0
+      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
      with:
        submodules: true
    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@d27e3f3d7c64b4bbf8e4abfb9b63b83e846e0435 # v4.5.0
+      uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
      with:
        python-version: ${{ matrix.python-version }}
    - name: Set up Java ${{ matrix.java-version }}
-      uses: actions/setup-java@5ffc13f4174014e2d4d4572b3d74c3fa61aeb2c2 # v3
+      uses: actions/setup-java@387ac29b308b003ca37ba93a6cab5eb57c8f5f93 # v4.0.0
      with:
        distribution: 'temurin'
        java-version: ${{ matrix.java-version }}
-    - name: Set up Gradle ${{ matrix.gradle-version }} 
-      uses: gradle/gradle-build-action@40b6781dcdec2762ad36556682ac74e31030cfe2 # v2.5.1
-      with:
-        gradle-version: ${{ matrix.gradle-version }}
-    - name: Install Jep ${{ matrix.jep-version }} 
-      run : pip install jep==${{ matrix.jep-version }}
    - name: Install Ghidra ${{ matrix.ghidra-version }} 
      run: |
        mkdir ./.github/ghidra
@@ -186,14 +188,17 @@ jobs:
    - name: Install Ghidrathon
      run : |
        mkdir ./.github/ghidrathon
-        curl -o ./.github/ghidrathon/ghidrathon-${{ matrix.ghidrathon-version }}.zip "https://codeload.github.com/mandiant/Ghidrathon/zip/refs/tags/v${{ matrix.ghidrathon-version }}"
-        unzip .github/ghidrathon/ghidrathon-${{ matrix.ghidrathon-version }}.zip -d .github/ghidrathon/
-        gradle -p ./.github/ghidrathon/Ghidrathon-${{ matrix.ghidrathon-version }}/ -PGHIDRA_INSTALL_DIR=$(pwd)/.github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC
-        unzip .github/ghidrathon/Ghidrathon-${{ matrix.ghidrathon-version }}/dist/*.zip -d .github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC/Ghidra/Extensions
+        wget "https://github.com/mandiant/Ghidrathon/releases/download/v${{ matrix.ghidrathon-version }}/Ghidrathon-v${{ matrix.ghidrathon-version}}.zip" -O ./.github/ghidrathon/ghidrathon-v${{ matrix.ghidrathon-version }}.zip
+        unzip .github/ghidrathon/ghidrathon-v${{ matrix.ghidrathon-version }}.zip -d .github/ghidrathon/
+        python -m pip install -r .github/ghidrathon/requirements.txt
+        python .github/ghidrathon/ghidrathon_configure.py $(pwd)/.github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC
+        unzip .github/ghidrathon/Ghidrathon-v${{ matrix.ghidrathon-version }}.zip -d .github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC/Ghidra/Extensions
    - name: Install pyyaml
      run: sudo apt-get install -y libyaml-dev
    - name: Install capa
-      run: pip install -e .[dev] 
+      run: |
+        pip install -r requirements.txt
+        pip install -e .[dev,scripts]
    - name: Run tests
      run: | 
        mkdir ./.github/ghidra/project
@@ -201,4 +206,4 @@ jobs:
        cat ../output.log
        exit_code=$(cat ../output.log | grep exit | awk '{print $NF}')
        exit $exit_code
- 
+ 
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,8 +1,6 @@
 [submodule "rules"]
 	path = rules
 	url = ../capa-rules.git
-	branch = dynamic-syntax
 [submodule "tests/data"]
 	path = tests/data
 	url = ../capa-testfiles.git
-	branch = dynamic-feature-extractor
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -110,6 +110,16 @@ repos:
        always_run: true
        pass_filenames: false

+-   repo: local
+    hooks:
+    -   id: deptry
+        name: deptry
+        stages: [push, manual]
+        language: system
+        entry: deptry .
+        always_run: true
+        pass_filenames: false
+
 -   repo: local
    hooks:
    -   id: pytest-fast
@@ -127,3 +137,4 @@ repos:
        -   "--ignore=tests/test_scripts.py"
        always_run: true
        pass_filenames: false
+
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,7 +3,127 @@
 ## master (unreleased)

 ### New Features
+
+### Breaking Changes
+
+### New Rules (0)
+
+-
+
+### Bug Fixes
+
+### capa explorer IDA Pro plugin
+
+### Development
+
+### Raw diffs
+- [capa v7.1.0...master](https://github.com/mandiant/capa/compare/v7.1.0...master)
+- [capa-rules v7.1.0...master](https://github.com/mandiant/capa-rules/compare/v7.1.0...master)
+
+## v7.1.0
+The v7.1.0 release brings large performance improvements to capa's rule matching engine.
+Additionally, we've fixed various bugs and added new features for people using and developing capa.
+
+Special thanks to our repeat and new contributors:
+* @sjha2048 made their first contribution in https://github.com/mandiant/capa/pull/2000
+* @Rohit1123 made their first contribution in https://github.com/mandiant/capa/pull/1990
+* @psahithireddy made their first contribution in https://github.com/mandiant/capa/pull/2020
+* @Atlas-64 made their first contribution in https://github.com/mandiant/capa/pull/2018
+* @s-ff made their first contribution in https://github.com/mandiant/capa/pull/2011
+* @samadpls made their first contribution in https://github.com/mandiant/capa/pull/2024
+* @acelynnzhang made their first contribution in https://github.com/mandiant/capa/pull/2044
+* @RainRat made their first contribution in https://github.com/mandiant/capa/pull/2058
+* @ReversingWithMe made their first contribution in https://github.com/mandiant/capa/pull/2093
+* @malwarefrank made their first contribution in https://github.com/mandiant/capa/pull/2037
+
+### New Features
+- Emit "dotnet" as format to ResultDocument when processing .NET files #2024 @samadpls
+- ELF: detect OS from statically-linked Go binaries #1978 @williballenthin
+- add function in capa/helpers to load plain and compressed JSON reports #1883 @Rohit1123
+- document Antivirus warnings and VirusTotal false positive detections #2028 @RionEV @mr-tz
+- Add json to sarif conversion script @reversingwithme
+- render maec/* fields #843 @s-ff
+- replace Halo spinner with Rich #2086 @s-ff
+- optimize rule matching #2080 @williballenthin
+- add aarch64 as a valid architecture #2144 mehunhoff@google.com @williballenthin
+- relax dependency version requirements for the capa library #2053 @williballenthin
+- add scripts dependency group and update documentation #2145 @mr-tz
+
+### New Rules (25)
+
+- impact/wipe-disk/delete-drive-layout-via-ioctl william.ballenthin@mandiant.com
+- host-interaction/driver/interact-with-driver-via-ioctl moritz.raabe@mandiant.com
+- host-interaction/driver/unload-driver moritz.raabe@mandiant.com
+- nursery/get-disk-information-via-ioctl william.ballenthin@mandiant.com
+- nursery/get-volume-information-via-ioctl william.ballenthin@mandiant.com
+- nursery/unmount-volume-via-ioctl william.ballenthin@mandiant.com
+- data-manipulation/encryption/rc4/encrypt-data-using-rc4-via-systemfunction033 daniel.stepanic@elastic.co
+- anti-analysis/anti-forensic/self-deletion/self-delete-using-alternate-data-streams daniel.stepanic@elastic.co
+- nursery/change-memory-permission-on-linux mehunhoff@google.com
+- nursery/check-file-permission-on-linux mehunhoff@google.com
+- nursery/check-if-process-is-running-under-android-emulator-on-android mehunhoff@google.com
+- nursery/map-or-unmap-memory-on-linux mehunhoff@google.com
+- persistence/act-as-share-provider-dll jakub.jozwiak@mandiant.com
+- persistence/act-as-windbg-extension jakub.jozwiak@mandiant.com
+- persistence/act-as-time-provider-dll jakub.jozwiak@mandiant.com
+- host-interaction/gui/window/hide/hide-graphical-window-from-taskbar jakub.jozwiak@mandiant.com
+- compiler/dart/compiled-with-dart jakub.jozwiak@mandiant.com
+- nursery/bypass-hidden-api-restrictions-via-jni-on-android mehunhoff@google.com
+- nursery/get-current-process-filesystem-mounts-on-linux mehunhoff@google.com
+- nursery/get-current-process-memory-mapping-on-linux mehunhoff@google.com
+- nursery/get-system-property-on-android mehunhoff@google.com
+- nursery/hook-routines-via-lsplant mehunhoff@google.com
+- nursery/load-packed-dex-via-jiagu-on-android mehunhoff@google.com
+- nursery/modify-api-blacklist-or-denylist-via-jni-on-android mehunhoff@google.com
+- nursery/truncate-file-on-linux mehunhoff@google.com
+
+### Bug Fixes
+
+- do some imports closer to where they are used #1810 @williballenthin
+- binja: fix and simplify stack string detection code after binja 4.0 @xusheng6
+- binja: add support for forwarded export #1646 @xusheng6
+- cape: support more report formats #2035 @mr-tz
+- elf: extract import / export symbols from stripped binaries #2096 @ygasparis
+
+### capa explorer IDA Pro plugin
+- replace deprecated IDA API find_binary with bin_search #1606 @s-ff
+
+### Development
+
+- ci: Fix PR review in the changelog check GH action #2004 @Ana06
+- ci: use rules number badge stored in our bot gist and generated using `schneegans/dynamic-badges-action` #2001 capa-rules#882 @Ana06
+- ci: update github workflows to use latest version of actions that were using a deprecated version of node #1967 #2003 capa-rules#883 @sjha2048 @Ana06
+- ci: update binja version to stable 4.0 #2016 @xusheng6
+- ci: update github workflows to reflect the latest ghidrathon installation and bumped up jep, ghidra versions  #2020 @psahithireddy
+- ci: include rule caching in PyInstaller build process #2097 @s-ff
+- add deptry support #1497 @s-ff
+
+### Raw diffs
+- [capa v7.0.1...v7.1.0](https://github.com/mandiant/capa/compare/v7.0.1...v7.1.0)
+- [capa-rules v7.0.1...v7.1.0](https://github.com/mandiant/capa-rules/compare/v7.0.1...v7.1.0)
+
+## v7.0.1
+
+This release fixes a circular import error when using capa as a library.
+
+### Bug Fixes
+
+- fix potentially circular import errors #1969 @williballenthin
+
+### Raw diffs
+- [capa v7.0.0...v7.0.1](https://github.com/mandiant/capa/compare/v7.0.0...v7.0.1)
+- [capa-rules v7.0.0...v7.0.1](https://github.com/mandiant/capa-rules/compare/v7.0.0...v7.0.1)
+
+## v7.0.0
+This is the v7.0.0 release of capa which was mainly worked on during the Google Summer of Code (GSoC) 2023. A huge
+shoutout to our GSoC contributors @colton-gabertan and @yelhamer for their amazing work.
+
+Also, a big thanks to the other contributors: @aaronatp, @Aayush-Goel-04, @bkojusner, @doomedraven, @ruppde, @larchchen, @JCoonradt, and @xusheng6.
+
+### New Features
+
 - add Ghidra backend #1770 #1767 @colton-gabertan @mike-hunhoff
+- add Ghidra UI integration #1734 @colton-gabertan @mike-hunhoff
 - add dynamic analysis via CAPE sandbox reports #48 #1535 @yelhamer
  - add call scope #771 @yelhamer
  - add thread scope #1517 @yelhamer
@@ -13,6 +133,7 @@
 - binja: add support for forwarded exports #1646 @xusheng6
 - binja: add support for symtab names #1504 @xusheng6
 - add com class/interface features #322 @Aayush-goel-04
+- dotnet: emit enclosing class information for nested classes #1780 #1913 @bkojusner @mike-hunhoff

 ### Breaking Changes

@@ -21,8 +142,11 @@
 - protobuf: deprecate `Metadata.analysis` in favor of `Metadata.analysis2` that is dynamic analysis aware @williballenthin
 - update freeze format to v3, adding support for dynamic analysis @williballenthin
 - extractor: ignore DLL name for api features #1815 @mr-tz
+- main: introduce wrapping routines within main for working with CLI args #1813 @williballenthin
+- move functions from `capa.main` to new `capa.loader` namespace #1821 @williballenthin
+- proto: add `package` declaration #1960 @larchchen

-### New Rules (34)
+### New Rules (41)

 - nursery/get-ntoskrnl-base-address @mr-tz
 - host-interaction/network/connectivity/set-tcp-connection-state @johnk3r
@@ -57,21 +181,53 @@
 - data-manipulation/compression/create-cabinet-on-windows michael.hunhoff@mandiant.com jakub.jozwiak@mandiant.com
 - data-manipulation/compression/extract-cabinet-on-windows jakub.jozwiak@mandiant.com
 - lib/create-file-decompression-interface-context-on-windows jakub.jozwiak@mandiant.com
-
+- nursery/enumerate-files-in-dotnet moritz.raabe@mandiant.com anushka.virgaonkar@mandiant.com
+- nursery/get-mac-address-in-dotnet moritz.raabe@mandiant.com michael.hunhoff@mandiant.com echernofsky@google.com
+- nursery/get-current-process-command-line william.ballenthin@mandiant.com
+- nursery/get-current-process-file-path william.ballenthin@mandiant.com
+- nursery/hook-routines-via-dlsym-rtld_next william.ballenthin@mandiant.com
+- nursery/linked-against-hp-socket still@teamt5.org
+- host-interaction/process/inject/process-ghostly-hollowing sara.rincon@mandiant.com

 ### Bug Fixes
 - ghidra: fix `ints_to_bytes` performance #1761 @mike-hunhoff
 - binja: improve function call site detection @xusheng6
 - binja: use `binaryninja.load` to open files @xusheng6
 - binja: bump binja version to 3.5 #1789 @xusheng6
+- elf: better detect ELF OS via GCC .ident directives #1928 @williballenthin
+- elf: better detect ELF OS via Android dependencies #1947 @williballenthin
+- fix setuptools package discovery #1886 @gmacon @mr-tz
+- remove unnecessary scripts/vivisect-py2-vs-py3.sh file #1949 @JCoonradt

 ### capa explorer IDA Pro plugin
+- various integration updates and minor bug fixes

 ### Development
+- update ATT&CK/MBC data for linting #1932 @mr-tz
+
+#### Developer Notes
+With this new release, many classes and concepts have been split up into static (mostly identical to the
+prior implementations) and dynamic ones. For example, the legacy FeatureExtractor class has been renamed to
+StaticFeatureExtractor and the DynamicFeatureExtractor has been added.
+
+Starting from version 7.0, we have moved the component responsible for feature extractor from main to a new
+capabilities' module. Now, users wishing to utilize capa’s feature extraction abilities should use that module instead
+of importing the relevant logic from the main file.
+
+For sandbox-based feature extractors, we are using Pydantic models. Contributions of more models for other sandboxes
+are very welcome!
+
+With this release we've reorganized the logic found in `main()` to localize logic and ease readability and ease changes
+and integrations. The new "main routines" are expected to be used only within main functions, either capa main or
+related scripts. These functions should not be invoked from library code.
+
+Beyond copying code around, we've refined the handling of the input file/format/backend. The logic for picking the
+format and backend is more consistent. We've documented that the input file is not necessarily the sample itself
+(cape/freeze/etc.) inputs are not actually the sample.

 ### Raw diffs
- [capa v6.1.0...master](https://github.com/mandiant/capa/compare/v6.1.0...master)
- [capa-rules v6.1.0...master](https://github.com/mandiant/capa-rules/compare/v6.1.0...master)
+- [capa v6.1.0...v7.0.0](https://github.com/mandiant/capa/compare/v6.1.0...v7.0.0)
+- [capa-rules v6.1.0...v7.0.0](https://github.com/mandiant/capa-rules/compare/v6.1.0...v7.0.0)

 ## v6.1.0

@@ -194,7 +350,7 @@ For those that use capa as a library, we've introduced some limited breaking cha
 - [capa-rules v5.1.0...v6.0.0](https://github.com/mandiant/capa-rules/compare/v5.1.0...v6.0.0)

 ## v5.1.0
-capa version 5.1.0 adds a Protocol Buffers (protobuf) format for result documents. Additionally, the [Vector35](https://vector35.com/) team contributed a new feature extractor using Binary Ninja. Other new features are a new CLI flag to override the detected operating system, functionality to read and render existing result documents, and a output color format that's easier to read.
+capa version 5.1.0 adds a Protocol Buffers (protobuf) format for result documents. Additionally, the [Vector35](https://vector35.com/) team contributed a new feature extractor using Binary Ninja. Other new features are a new CLI flag to override the detected operating system, functionality to read and render existing result documents, and an output color format that's easier to read.

 Over 25 capa rules have been added and improved.

@@ -1393,7 +1549,7 @@ The IDA Pro integration is now distributed as a real plugin, instead of a script
  - updates distributed PyPI/`pip install --upgrade` without touching your `%IDADIR%`
  - generally doing thing the "right way"

-How to get this new version? Its easy: download [capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ida/plugin/capa_explorer.py) to your IDA plugins directory and update your capa installation (incidentally, this is a good opportunity to migrate to `pip install flare-capa` instead of git checkouts). Now you should see the plugin listed in the `Edit > Plugins > FLARE capa explorer` menu in IDA. 
+How to get this new version? It's easy: download [capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ida/plugin/capa_explorer.py) to your IDA plugins directory and update your capa installation (incidentally, this is a good opportunity to migrate to `pip install flare-capa` instead of git checkouts). Now you should see the plugin listed in the `Edit > Plugins > FLARE capa explorer` menu in IDA. 

 Please refer to the plugin [readme](https://github.com/mandiant/capa/blob/master/capa/ida/plugin/README.md) for additional information on installing and using the IDA Pro plugin.

@@ -1626,4 +1782,4 @@ Download a standalone binary below and checkout the readme [here on GitHub](http
 ### Raw diffs

  - [capa v1.0.0...v1.1.0](https://github.com/mandiant/capa/compare/v1.0.0...v1.1.0)
-  - [capa-rules v1.0.0...v1.1.0](https://github.com/mandiant/capa-rules/compare/v1.0.0...v1.1.0)
+  - [capa-rules v1.0.0...v1.1.0](https://github.com/mandiant/capa-rules/compare/v1.0.0...v1.1.0)
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -0,0 +1,8 @@
+cff-version: 1.2.0
+message: "If you use this software, please cite it as below."
+authors:
+  - name: "The FLARE Team"
+title: "capa, a tool to identify capabilities in programs and sandbox traces."
+date-released: 2020-07-16
+url: "https://github.com/mandiant/capa"
+
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -187,7 +187,7 @@
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

-   Copyright (C) 2023 Mandiant, Inc.
+   Copyright (C) 2020 Mandiant, Inc.

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@

 [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa)
 [![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases)
-[![Number of rules](https://img.shields.io/badge/rules-859-blue.svg)](https://github.com/mandiant/capa-rules)
+[![Number of rules](https://gist.githubusercontent.com/capa-bot/6d7960e911f48b3b74916df8988cf0f3/raw/rules_badge.svg)](https://github.com/mandiant/capa-rules)
 [![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster)
 [![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases)
 [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt)
@@ -11,11 +11,12 @@ capa detects capabilities in executable files.
 You run it against a PE, ELF, .NET module, shellcode file, or a sandbox report and it tells you what it thinks the program can do.
 For example, it might suggest that the file is a backdoor, is capable of installing services, or relies on HTTP to communicate.

-Check out:
- the overview in our first [capa blog post](https://www.mandiant.com/resources/capa-automatically-identify-malware-capabilities)
- the major version 2.0 updates described in our [second blog post](https://www.mandiant.com/resources/capa-2-better-stronger-faster)
- the major version 3.0 (ELF support) described in the [third blog post](https://www.mandiant.com/resources/elfant-in-the-room-capa-v3)
- the major version 4.0 (.NET support) described in the [fourth blog post](https://www.mandiant.com/resources/blog/capa-v4-casting-wider-net)
+Check out our capa blog posts:
+- [Dynamic capa: Exploring Executable Run-Time Behavior with the CAPE Sandbox](https://www.mandiant.com/resources/blog/dynamic-capa-executable-behavior-cape-sandbox)
+- [capa v4: casting a wider .NET](https://www.mandiant.com/resources/blog/capa-v4-casting-wider-net) (.NET support)
+- [ELFant in the Room – capa v3](https://www.mandiant.com/resources/elfant-in-the-room-capa-v3) (ELF support)
+- [capa 2.0: Better, Stronger, Faster](https://www.mandiant.com/resources/capa-2-better-stronger-faster)
+- [capa: Automatically Identify Malware Capabilities](https://www.mandiant.com/resources/capa-automatically-identify-malware-capabilities)

 ```
 $ capa.exe suspicious.exe
@@ -125,7 +126,7 @@ function @ 0x4011C0
 ...
 ```

-Additionally, capa also supports analyzing [CAPE](https://github.com/kevoreilly/CAPEv2) sandbox reports for dynamic capabilty extraction.
+Additionally, capa also supports analyzing [CAPE](https://github.com/kevoreilly/CAPEv2) sandbox reports for dynamic capability extraction.
 In order to use this, you first submit your sample to CAPE for analysis, and then run capa against the generated report (JSON).

 Here's an example of running capa against a packed binary, and then running capa against the CAPE report of that binary:
@@ -259,7 +260,9 @@ capa explorer helps you identify interesting areas of a program and build new ca

 ![capa + IDA Pro integration](https://github.com/mandiant/capa/blob/master/doc/img/explorer_expanded.png)

-If you use Ghidra, you can use the Python 3 [Ghidra feature extractor](/capa/ghidra/). This integration enables capa to extract features directly from your Ghidra database, which can help you identify capabilities in programs that you analyze using Ghidra.
+If you use Ghidra, then you can use the [capa + Ghidra integration](/capa/ghidra/) to run capa's analysis directly on your Ghidra database and render the results in Ghidra's user interface.
+
+<img src="https://github.com/mandiant/capa/assets/66766340/eeae33f4-99d4-42dc-a5e8-4c1b8c661492" width=300>

 # further information
 ## capa
--- a/assets/classes.json.gz
+++ b/assets/classes.json.gz
--- a/assets/interfaces.json.gz
+++ b/assets/interfaces.json.gz
--- a/capa/capabilities/dynamic.py
+++ b/capa/capabilities/dynamic.py
@@ -6,6 +6,7 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
+import sys
 import logging
 import itertools
 import collections
@@ -65,7 +66,7 @@ def find_thread_capabilities(
    features: FeatureSet = collections.defaultdict(set)

    # matches found at the call scope.
-    # might be found at different calls, thats ok.
+    # might be found at different calls, that's ok.
    call_matches: MatchResults = collections.defaultdict(list)

    for ch in extractor.get_calls(ph, th):
@@ -103,11 +104,11 @@ def find_process_capabilities(
    process_features: FeatureSet = collections.defaultdict(set)

    # matches found at the basic threads.
-    # might be found at different threads, thats ok.
+    # might be found at different threads, that's ok.
    thread_matches: MatchResults = collections.defaultdict(list)

    # matches found at the call scope.
-    # might be found at different calls, thats ok.
+    # might be found at different calls, that's ok.
    call_matches: MatchResults = collections.defaultdict(list)

    for th in extractor.get_threads(ph):
@@ -147,6 +148,11 @@ def find_dynamic_capabilities(
                def pbar(s, *args, **kwargs):
                    return s

+            elif not sys.stderr.isatty():
+                # don't display progress bar when stderr is redirected to a file
+                def pbar(s, *args, **kwargs):
+                    return s
+
            processes = list(extractor.get_processes())

            pb = pbar(processes, desc="matching", unit=" processes", leave=False)
--- a/capa/capabilities/static.py
+++ b/capa/capabilities/static.py
@@ -6,6 +6,7 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
+import sys
 import time
 import logging
 import itertools
@@ -66,7 +67,7 @@ def find_basic_block_capabilities(
    features: FeatureSet = collections.defaultdict(set)

    # matches found at the instruction scope.
-    # might be found at different instructions, thats ok.
+    # might be found at different instructions, that's ok.
    insn_matches: MatchResults = collections.defaultdict(list)

    for insn in extractor.get_instructions(f, bb):
@@ -106,11 +107,11 @@ def find_code_capabilities(
    function_features: FeatureSet = collections.defaultdict(set)

    # matches found at the basic block scope.
-    # might be found at different basic blocks, thats ok.
+    # might be found at different basic blocks, that's ok.
    bb_matches: MatchResults = collections.defaultdict(list)

    # matches found at the instruction scope.
-    # might be found at different instructions, thats ok.
+    # might be found at different instructions, that's ok.
    insn_matches: MatchResults = collections.defaultdict(list)

    for bb in extractor.get_basic_blocks(fh):
@@ -156,6 +157,11 @@ def find_static_capabilities(
                def pbar(s, *args, **kwargs):
                    return s

+            elif not sys.stderr.isatty():
+                # don't display progress bar when stderr is redirected to a file
+                def pbar(s, *args, **kwargs):
+                    return s
+
            functions = list(extractor.get_functions())
            n_funcs = len(functions)

@@ -182,9 +188,16 @@ def find_static_capabilities(
                )
                t1 = time.time()

-                match_count = sum(len(res) for res in function_matches.values())
-                match_count += sum(len(res) for res in bb_matches.values())
-                match_count += sum(len(res) for res in insn_matches.values())
+                match_count = 0
+                for name, matches_ in itertools.chain(
+                    function_matches.items(), bb_matches.items(), insn_matches.items()
+                ):
+                    # in practice, most matches are derived rules,
+                    # like "check OS version/5bf4c7f39fd4492cbed0f6dc7d596d49"
+                    # but when we log to the human, they really care about "real" rules.
+                    if not ruleset.rules[name].is_subscope_rule():
+                        match_count += len(matches_)
+
                logger.debug(
                    "analyzed function 0x%x and extracted %d features, %d matches in %0.02fs",
                    f.address,
@@ -213,7 +226,7 @@ def find_static_capabilities(
    all_file_matches, feature_count = find_file_capabilities(ruleset, extractor, function_and_lower_features)
    feature_counts.file = feature_count

-    matches = dict(
+    matches: MatchResults = dict(
        itertools.chain(
            # each rule exists in exactly one scope,
            # so there won't be any overlap among these following MatchResults,
--- a/capa/engine.py
+++ b/capa/engine.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -102,14 +102,14 @@ class And(Statement):
        super().__init__(description=description)
        self.children = children

-    def evaluate(self, ctx, short_circuit=True):
+    def evaluate(self, features: FeatureSet, short_circuit=True):
        capa.perf.counters["evaluate.feature"] += 1
        capa.perf.counters["evaluate.feature.and"] += 1

        if short_circuit:
            results = []
            for child in self.children:
-                result = child.evaluate(ctx, short_circuit=short_circuit)
+                result = child.evaluate(features, short_circuit=short_circuit)
                results.append(result)
                if not result:
                    # short circuit
@@ -117,7 +117,7 @@ class And(Statement):

            return Result(True, self, results)
        else:
-            results = [child.evaluate(ctx, short_circuit=short_circuit) for child in self.children]
+            results = [child.evaluate(features, short_circuit=short_circuit) for child in self.children]
            success = all(results)
            return Result(success, self, results)

@@ -135,14 +135,14 @@ class Or(Statement):
        super().__init__(description=description)
        self.children = children

-    def evaluate(self, ctx, short_circuit=True):
+    def evaluate(self, features: FeatureSet, short_circuit=True):
        capa.perf.counters["evaluate.feature"] += 1
        capa.perf.counters["evaluate.feature.or"] += 1

        if short_circuit:
            results = []
            for child in self.children:
-                result = child.evaluate(ctx, short_circuit=short_circuit)
+                result = child.evaluate(features, short_circuit=short_circuit)
                results.append(result)
                if result:
                    # short circuit as soon as we hit one match
@@ -150,7 +150,7 @@ class Or(Statement):

            return Result(False, self, results)
        else:
-            results = [child.evaluate(ctx, short_circuit=short_circuit) for child in self.children]
+            results = [child.evaluate(features, short_circuit=short_circuit) for child in self.children]
            success = any(results)
            return Result(success, self, results)

@@ -162,11 +162,11 @@ class Not(Statement):
        super().__init__(description=description)
        self.child = child

-    def evaluate(self, ctx, short_circuit=True):
+    def evaluate(self, features: FeatureSet, short_circuit=True):
        capa.perf.counters["evaluate.feature"] += 1
        capa.perf.counters["evaluate.feature.not"] += 1

-        results = [self.child.evaluate(ctx, short_circuit=short_circuit)]
+        results = [self.child.evaluate(features, short_circuit=short_circuit)]
        success = not results[0]
        return Result(success, self, results)

@@ -185,7 +185,7 @@ class Some(Statement):
        self.count = count
        self.children = children

-    def evaluate(self, ctx, short_circuit=True):
+    def evaluate(self, features: FeatureSet, short_circuit=True):
        capa.perf.counters["evaluate.feature"] += 1
        capa.perf.counters["evaluate.feature.some"] += 1

@@ -193,7 +193,7 @@ class Some(Statement):
            results = []
            satisfied_children_count = 0
            for child in self.children:
-                result = child.evaluate(ctx, short_circuit=short_circuit)
+                result = child.evaluate(features, short_circuit=short_circuit)
                results.append(result)
                if result:
                    satisfied_children_count += 1
@@ -204,7 +204,7 @@ class Some(Statement):

            return Result(False, self, results)
        else:
-            results = [child.evaluate(ctx, short_circuit=short_circuit) for child in self.children]
+            results = [child.evaluate(features, short_circuit=short_circuit) for child in self.children]
            # note that here we cast the child result as a bool
            # because we've overridden `__bool__` above.
            #
@@ -214,7 +214,7 @@ class Some(Statement):


 class Range(Statement):
-    """match if the child is contained in the ctx set with a count in the given range."""
+    """match if the child is contained in the feature set with a count in the given range."""

    def __init__(self, child, min=None, max=None, description=None):
        super().__init__(description=description)
@@ -222,15 +222,15 @@ class Range(Statement):
        self.min = min if min is not None else 0
        self.max = max if max is not None else (1 << 64 - 1)

-    def evaluate(self, ctx, **kwargs):
+    def evaluate(self, features: FeatureSet, short_circuit=True):
        capa.perf.counters["evaluate.feature"] += 1
        capa.perf.counters["evaluate.feature.range"] += 1

-        count = len(ctx.get(self.child, []))
+        count = len(features.get(self.child, []))
        if self.min == 0 and count == 0:
            return Result(True, self, [])

-        return Result(self.min <= count <= self.max, self, [], locations=ctx.get(self.child))
+        return Result(self.min <= count <= self.max, self, [], locations=features.get(self.child))

    def __str__(self):
        if self.max == (1 << 64 - 1):
@@ -250,7 +250,7 @@ class Subscope(Statement):
        self.scope = scope
        self.child = child

-    def evaluate(self, ctx, **kwargs):
+    def evaluate(self, features: FeatureSet, short_circuit=True):
        raise ValueError("cannot evaluate a subscope directly!")


@@ -270,6 +270,14 @@ class Subscope(Statement):
 MatchResults = Mapping[str, List[Tuple[Address, Result]]]


+def get_rule_namespaces(rule: "capa.rules.Rule") -> Iterator[str]:
+    namespace = rule.meta.get("namespace")
+    if namespace:
+        while namespace:
+            yield namespace
+            namespace, _, _ = namespace.rpartition("/")
+
+
 def index_rule_matches(features: FeatureSet, rule: "capa.rules.Rule", locations: Iterable[Address]):
    """
    record into the given featureset that the given rule matched at the given locations.
@@ -280,11 +288,8 @@ def index_rule_matches(features: FeatureSet, rule: "capa.rules.Rule", locations:
    updates `features` in-place. doesn't modify the remaining arguments.
    """
    features[capa.features.common.MatchedRule(rule.name)].update(locations)
-    namespace = rule.meta.get("namespace")
-    if namespace:
-        while namespace:
-            features[capa.features.common.MatchedRule(namespace)].update(locations)
-            namespace, _, _ = namespace.rpartition("/")
+    for namespace in get_rule_namespaces(rule):
+        features[capa.features.common.MatchedRule(namespace)].update(locations)


 def match(rules: List["capa.rules.Rule"], features: FeatureSet, addr: Address) -> Tuple[FeatureSet, MatchResults]:
--- a/capa/exceptions.py
+++ b/capa/exceptions.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2022 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/features/address.py
+++ b/capa/features/address.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2022 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -10,8 +10,7 @@ import abc

 class Address(abc.ABC):
    @abc.abstractmethod
-    def __eq__(self, other):
-        ...
+    def __eq__(self, other): ...

    @abc.abstractmethod
    def __lt__(self, other):
@@ -94,7 +93,7 @@ class ThreadAddress(Address):


 class DynamicCallAddress(Address):
-    """addesses a call in a dynamic execution trace"""
+    """addresses a call in a dynamic execution trace"""

    def __init__(self, thread: ThreadAddress, id: int):
        assert id >= 0
--- a/capa/features/basicblock.py
+++ b/capa/features/basicblock.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/features/com/init.py
+++ b/capa/features/com/init.py
@@ -0,0 +1,36 @@
+# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+from enum import Enum
+from typing import Dict, List
+
+from capa.helpers import assert_never
+
+
+class ComType(Enum):
+    CLASS = "class"
+    INTERFACE = "interface"
+
+
+COM_PREFIXES = {
+    ComType.CLASS: "CLSID_",
+    ComType.INTERFACE: "IID_",
+}
+
+
+def load_com_database(com_type: ComType) -> Dict[str, List[str]]:
+    # lazy load these python files since they are so large.
+    # that is, don't load them unless a COM feature is being handled.
+    import capa.features.com.classes
+    import capa.features.com.interfaces
+
+    if com_type == ComType.CLASS:
+        return capa.features.com.classes.COM_CLASSES
+    elif com_type == ComType.INTERFACE:
+        return capa.features.com.interfaces.COM_INTERFACES
+    else:
+        assert_never(com_type)
--- a/capa/features/com/classes.py
+++ b/capa/features/com/classes.py
--- a/capa/features/com/interfaces.py
+++ b/capa/features/com/interfaces.py
--- a/capa/features/common.py
+++ b/capa/features/common.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2021 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -128,7 +128,7 @@ class Feature(abc.ABC):  # noqa: B024

    def __lt__(self, other):
        # implementing sorting by serializing to JSON is a huge hack.
-        # its slow, inelegant, and probably doesn't work intuitively;
+        # it's slow, inelegant, and probably doesn't work intuitively;
        # however, we only use it for deterministic output, so it's good enough for now.

        # circular import
@@ -166,10 +166,10 @@ class Feature(abc.ABC):  # noqa: B024
    def __repr__(self):
        return str(self)

-    def evaluate(self, ctx: Dict["Feature", Set[Address]], **kwargs) -> Result:
+    def evaluate(self, features: "capa.engine.FeatureSet", short_circuit=True) -> Result:
        capa.perf.counters["evaluate.feature"] += 1
        capa.perf.counters["evaluate.feature." + self.name] += 1
-        return Result(self in ctx, self, [], locations=ctx.get(self, set()))
+        return Result(self in features, self, [], locations=features.get(self, set()))


 class MatchedRule(Feature):
@@ -207,7 +207,7 @@ class Substring(String):
        super().__init__(value, description=description)
        self.value = value

-    def evaluate(self, ctx, short_circuit=True):
+    def evaluate(self, features: "capa.engine.FeatureSet", short_circuit=True):
        capa.perf.counters["evaluate.feature"] += 1
        capa.perf.counters["evaluate.feature.substring"] += 1

@@ -216,7 +216,7 @@ class Substring(String):
        matches: typing.DefaultDict[str, Set[Address]] = collections.defaultdict(set)

        assert isinstance(self.value, str)
-        for feature, locations in ctx.items():
+        for feature, locations in features.items():
            if not isinstance(feature, (String,)):
                continue

@@ -227,7 +227,7 @@ class Substring(String):
            if self.value in feature.value:
                matches[feature.value].update(locations)
                if short_circuit:
-                    # we found one matching string, thats sufficient to match.
+                    # we found one matching string, that's sufficient to match.
                    # don't collect other matching strings in this mode.
                    break

@@ -299,7 +299,7 @@ class Regex(String):
                f"invalid regular expression: {value} it should use Python syntax, try it at https://pythex.org"
            ) from exc

-    def evaluate(self, ctx, short_circuit=True):
+    def evaluate(self, features: "capa.engine.FeatureSet", short_circuit=True):
        capa.perf.counters["evaluate.feature"] += 1
        capa.perf.counters["evaluate.feature.regex"] += 1

@@ -307,7 +307,7 @@ class Regex(String):
        # will unique the locations later on.
        matches: typing.DefaultDict[str, Set[Address]] = collections.defaultdict(set)

-        for feature, locations in ctx.items():
+        for feature, locations in features.items():
            if not isinstance(feature, (String,)):
                continue

@@ -322,7 +322,7 @@ class Regex(String):
            if self.re.search(feature.value):
                matches[feature.value].update(locations)
                if short_circuit:
-                    # we found one matching string, thats sufficient to match.
+                    # we found one matching string, that's sufficient to match.
                    # don't collect other matching strings in this mode.
                    break

@@ -384,12 +384,14 @@ class Bytes(Feature):
        super().__init__(value, description=description)
        self.value = value

-    def evaluate(self, ctx, **kwargs):
+    def evaluate(self, features: "capa.engine.FeatureSet", short_circuit=True):
+        assert isinstance(self.value, bytes)
+
        capa.perf.counters["evaluate.feature"] += 1
        capa.perf.counters["evaluate.feature.bytes"] += 1
+        capa.perf.counters["evaluate.feature.bytes." + str(len(self.value))] += 1

-        assert isinstance(self.value, bytes)
-        for feature, locations in ctx.items():
+        for feature, locations in features.items():
            if not isinstance(feature, (Bytes,)):
                continue

@@ -407,9 +409,10 @@ class Bytes(Feature):
 # other candidates here: https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#machine-types
 ARCH_I386 = "i386"
 ARCH_AMD64 = "amd64"
+ARCH_AARCH64 = "aarch64"
 # dotnet
 ARCH_ANY = "any"
-VALID_ARCH = (ARCH_I386, ARCH_AMD64, ARCH_ANY)
+VALID_ARCH = (ARCH_I386, ARCH_AMD64, ARCH_AARCH64, ARCH_ANY)


 class Arch(Feature):
@@ -434,11 +437,11 @@ class OS(Feature):
        super().__init__(value, description=description)
        self.name = "os"

-    def evaluate(self, ctx, **kwargs):
+    def evaluate(self, features: "capa.engine.FeatureSet", short_circuit=True):
        capa.perf.counters["evaluate.feature"] += 1
        capa.perf.counters["evaluate.feature." + self.name] += 1

-        for feature, locations in ctx.items():
+        for feature, locations in features.items():
            if not isinstance(feature, (OS,)):
                continue

@@ -458,18 +461,22 @@ FORMAT_AUTO = "auto"
 FORMAT_SC32 = "sc32"
 FORMAT_SC64 = "sc64"
 FORMAT_CAPE = "cape"
+FORMAT_FREEZE = "freeze"
+FORMAT_RESULT = "result"
 STATIC_FORMATS = {
    FORMAT_SC32,
    FORMAT_SC64,
    FORMAT_PE,
    FORMAT_ELF,
    FORMAT_DOTNET,
+    FORMAT_FREEZE,
+    FORMAT_RESULT,
 }
 DYNAMIC_FORMATS = {
    FORMAT_CAPE,
+    FORMAT_FREEZE,
+    FORMAT_RESULT,
 }
-FORMAT_FREEZE = "freeze"
-FORMAT_RESULT = "result"
 FORMAT_UNKNOWN = "unknown"


@@ -482,6 +489,6 @@ class Format(Feature):
 def is_global_feature(feature):
    """
    is this a feature that is extracted at every scope?
-    today, these are OS and arch features.
+    today, these are OS, arch, and format features.
    """
-    return isinstance(feature, (OS, Arch))
+    return isinstance(feature, (OS, Arch, Format))
--- a/capa/features/extractors/base_extractor.py
+++ b/capa/features/extractors/base_extractor.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2021 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -75,7 +75,7 @@ class BBHandle:

@dataclass
 class InsnHandle:
-    """reference to a instruction recognized by a feature extractor.
+    """reference to an instruction recognized by a feature extractor.

    Attributes:
        address: the address of the instruction address.
--- a/capa/features/extractors/binja/basicblock.py
+++ b/capa/features/extractors/binja/basicblock.py
@@ -7,17 +7,15 @@
 # See the License for the specific language governing permissions and limitations under the License.

 import string
-import struct
 from typing import Tuple, Iterator

-from binaryninja import Function, Settings
+from binaryninja import Function
 from binaryninja import BasicBlock as BinjaBasicBlock
 from binaryninja import (
    BinaryView,
    SymbolType,
    RegisterValueType,
    VariableSourceType,
-    MediumLevelILSetVar,
    MediumLevelILOperation,
    MediumLevelILBasicBlock,
    MediumLevelILInstruction,
@@ -29,11 +27,6 @@ from capa.features.basicblock import BasicBlock
 from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
 from capa.features.extractors.base_extractor import BBHandle, FunctionHandle

-use_const_outline: bool = False
-settings: Settings = Settings()
-if settings.contains("analysis.outlining.builtins") and settings.get_bool("analysis.outlining.builtins"):
-    use_const_outline = True
-

 def get_printable_len_ascii(s: bytes) -> int:
    """Return string length if all operand bytes are ascii or utf16-le printable"""
@@ -65,7 +58,7 @@ def get_stack_string_len(f: Function, il: MediumLevelILInstruction) -> int:

    addr = target.value.value
    sym = bv.get_symbol_at(addr)
-    if not sym or sym.type != SymbolType.LibraryFunctionSymbol:
+    if not sym or sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.SymbolicFunctionSymbol]:
        return 0

    if sym.name not in ["__builtin_strncpy", "__builtin_strcpy", "__builtin_wcscpy"]:
@@ -91,52 +84,6 @@ def get_stack_string_len(f: Function, il: MediumLevelILInstruction) -> int:
    return max(get_printable_len_ascii(bytes(s)), get_printable_len_wide(bytes(s)))


-def get_printable_len(il: MediumLevelILSetVar) -> int:
-    """Return string length if all operand bytes are ascii or utf16-le printable"""
-    width = il.dest.type.width
-    value = il.src.value.value
-
-    if width == 1:
-        chars = struct.pack("<B", value & 0xFF)
-    elif width == 2:
-        chars = struct.pack("<H", value & 0xFFFF)
-    elif width == 4:
-        chars = struct.pack("<I", value & 0xFFFFFFFF)
-    elif width == 8:
-        chars = struct.pack("<Q", value & 0xFFFFFFFFFFFFFFFF)
-    else:
-        return 0
-
-    def is_printable_ascii(chars_: bytes):
-        return all(c < 127 and chr(c) in string.printable for c in chars_)
-
-    def is_printable_utf16le(chars_: bytes):
-        if all(c == 0x00 for c in chars_[1::2]):
-            return is_printable_ascii(chars_[::2])
-
-    if is_printable_ascii(chars):
-        return width
-
-    if is_printable_utf16le(chars):
-        return width // 2
-
-    return 0
-
-
-def is_mov_imm_to_stack(il: MediumLevelILInstruction) -> bool:
-    """verify instruction moves immediate onto stack"""
-    if il.operation != MediumLevelILOperation.MLIL_SET_VAR:
-        return False
-
-    if il.src.operation != MediumLevelILOperation.MLIL_CONST:
-        return False
-
-    if il.dest.source_type != VariableSourceType.StackVariableSourceType:
-        return False
-
-    return True
-
-
 def bb_contains_stackstring(f: Function, bb: MediumLevelILBasicBlock) -> bool:
    """check basic block for stackstring indicators

@@ -144,14 +91,10 @@ def bb_contains_stackstring(f: Function, bb: MediumLevelILBasicBlock) -> bool:
    """
    count = 0
    for il in bb:
-        if use_const_outline:
-            count += get_stack_string_len(f, il)
-        else:
-            if is_mov_imm_to_stack(il):
-                count += get_printable_len(il)
+        count += get_stack_string_len(f, il)
+        if count > MIN_STACKSTRING_LEN:
+            return True

-    if count > MIN_STACKSTRING_LEN:
-        return True
    return False


--- a/capa/features/extractors/binja/file.py
+++ b/capa/features/extractors/binja/file.py
@@ -74,13 +74,18 @@ def extract_file_embedded_pe(bv: BinaryView) -> Iterator[Tuple[Feature, Address]

 def extract_file_export_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
    """extract function exports"""
-    for sym in bv.get_symbols_of_type(SymbolType.FunctionSymbol):
+    for sym in bv.get_symbols_of_type(SymbolType.FunctionSymbol) + bv.get_symbols_of_type(SymbolType.DataSymbol):
        if sym.binding in [SymbolBinding.GlobalBinding, SymbolBinding.WeakBinding]:
            name = sym.short_name
-            yield Export(name), AbsoluteVirtualAddress(sym.address)
-            unmangled_name = unmangle_c_name(name)
-            if name != unmangled_name:
-                yield Export(unmangled_name), AbsoluteVirtualAddress(sym.address)
+            if name.startswith("__forwarder_name(") and name.endswith(")"):
+                yield Export(name[17:-1]), AbsoluteVirtualAddress(sym.address)
+                yield Characteristic("forwarded export"), AbsoluteVirtualAddress(sym.address)
+            else:
+                yield Export(name), AbsoluteVirtualAddress(sym.address)
+
+                unmangled_name = unmangle_c_name(name)
+                if name != unmangled_name:
+                    yield Export(unmangled_name), AbsoluteVirtualAddress(sym.address)

    for sym in bv.get_symbols_of_type(SymbolType.DataSymbol):
        if sym.binding not in [SymbolBinding.GlobalBinding]:
--- a/capa/features/extractors/binja/find_binja_api.py
+++ b/capa/features/extractors/binja/find_binja_api.py
@@ -11,7 +11,7 @@ from pathlib import Path
 # When the script gets executed as a standalone executable (via PyInstaller), `import binaryninja` does not work because
 # we have excluded the binaryninja module in `pyinstaller.spec`. The trick here is to call the system Python and try
 # to find out the path of the binaryninja module that has been installed.
-# Note, including the binaryninja module in the `pyintaller.spec` would not work, since the binaryninja module tries to
+# Note, including the binaryninja module in the `pyinstaller.spec` would not work, since the binaryninja module tries to
 # find the binaryninja core e.g., `libbinaryninjacore.dylib`, using a relative path. And this does not work when the
 # binaryninja module is extracted by the PyInstaller.
 code = r"""
--- a/capa/features/extractors/cape/extractor.py
+++ b/capa/features/extractors/cape/extractor.py
@@ -128,6 +128,14 @@ class CapeExtractor(DynamicFeatureExtractor):
        if cr.info.version not in TESTED_VERSIONS:
            logger.warning("CAPE version '%s' not tested/supported yet", cr.info.version)

+        # TODO(mr-tz): support more file types
+        # https://github.com/mandiant/capa/issues/1933
+        if "PE" not in cr.target.file.type:
+            logger.error(
+                "capa currently only supports PE target files, this target file's type is: '%s'.\nPlease report this at: https://github.com/mandiant/capa/issues/1933",
+                cr.target.file.type,
+            )
+
        # observed in 2.4-CAPE reports from capesandbox.com
        if cr.static is None and cr.target.file.pe is not None:
            cr.static = Static()
--- a/capa/features/extractors/cape/models.py
+++ b/capa/features/extractors/cape/models.py
@@ -46,7 +46,7 @@ class FlexibleModel(BaseModel):


 # use this type to indicate that we won't model this data.
-# because its not relevant to our use in capa.
+# because it's not relevant to our use in capa.
 #
 # while its nice to have full coverage of the data shape,
 # it can easily change and break our parsing.
@@ -230,7 +230,7 @@ class File(FlexibleModel):
    sha1: str
    sha256: str
    sha512: str
-    sha3_384: str
+    sha3_384: Optional[str] = None
    ssdeep: str
    # unsure why this would ever be "False"
    tlsh: Optional[Union[str, bool]] = None
@@ -356,8 +356,8 @@ class Behavior(ExactModel):
    anomaly: List[str]
    encryptedbuffers: List[EncryptedBuffer]
    # these are small objects that describe atomic events,
-    # like file move, registery access.
-    # we'll detect the same with our API call analyis.
+    # like file move, registry access.
+    # we'll detect the same with our API call analysis.
    enhanced: Skip = None


@@ -398,7 +398,7 @@ class CapeReport(FlexibleModel):
    behavior: Behavior

    # post-processed results: payloads and extracted configs
-    CAPE: Optional[Cape] = None
+    CAPE: Optional[Union[Cape, List]] = None
    dropped: Optional[List[File]] = None
    procdump: Optional[List[ProcessFile]] = None
    procmemory: ListTODO
--- a/capa/features/extractors/common.py
+++ b/capa/features/extractors/common.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2021 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -45,7 +45,7 @@ MATCH_RESULT = b'{"meta":'
 MATCH_JSON_OBJECT = b'{"'


-def extract_file_strings(buf, **kwargs) -> Iterator[Tuple[String, Address]]:
+def extract_file_strings(buf: bytes, **kwargs) -> Iterator[Tuple[String, Address]]:
    """
    extract ASCII and UTF-16 LE strings from file
    """
@@ -56,7 +56,7 @@ def extract_file_strings(buf, **kwargs) -> Iterator[Tuple[String, Address]]:
        yield String(s.s), FileOffsetAddress(s.offset)


-def extract_format(buf) -> Iterator[Tuple[Feature, Address]]:
+def extract_format(buf: bytes) -> Iterator[Tuple[Feature, Address]]:
    if buf.startswith(MATCH_PE):
        yield Format(FORMAT_PE), NO_ADDRESS
    elif buf.startswith(MATCH_ELF):
--- a/capa/features/extractors/dnfile/extractor.py
+++ b/capa/features/extractors/dnfile/extractor.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2022 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/features/extractors/dnfile/file.py
+++ b/capa/features/extractors/dnfile/file.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2022 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/features/extractors/dnfile/function.py
+++ b/capa/features/extractors/dnfile/function.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2022 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/features/extractors/dnfile/helpers.py
+++ b/capa/features/extractors/dnfile/helpers.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2022 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -83,7 +83,7 @@ def read_dotnet_user_string(pe: dnfile.dnPE, token: StringToken) -> Optional[str
        return None

    try:
-        user_string: Optional[dnfile.stream.UserString] = pe.net.user_strings.get_us(token.rid)
+        user_string: Optional[dnfile.stream.UserString] = pe.net.user_strings.get(token.rid)
    except UnicodeDecodeError as e:
        logger.debug("failed to decode #US stream index 0x%08x (%s)", token.rid, e)
        return None
@@ -119,22 +119,26 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]:
        access: Optional[str]

        # assume .NET imports starting with get_/set_ are used to access a property
-        if member_ref.Name.startswith("get_"):
+        member_ref_name: str = str(member_ref.Name)
+        if member_ref_name.startswith("get_"):
            access = FeatureAccess.READ
-        elif member_ref.Name.startswith("set_"):
+        elif member_ref_name.startswith("set_"):
            access = FeatureAccess.WRITE
        else:
            access = None

-        member_ref_name: str = member_ref.Name
        if member_ref_name.startswith(("get_", "set_")):
            # remove get_/set_ from MemberRef name
            member_ref_name = member_ref_name[4:]

+        typerefnamespace, typerefname = resolve_nested_typeref_name(
+            member_ref.Class.row_index, member_ref.Class.row, pe
+        )
+
        yield DnType(
            token,
-            member_ref.Class.row.TypeName,
-            namespace=member_ref.Class.row.TypeNamespace,
+            typerefname,
+            namespace=typerefnamespace,
            member=member_ref_name,
            access=access,
        )
@@ -188,6 +192,8 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]:
            TypeNamespace (index into String heap)
            MethodList (index into MethodDef table; it marks the first of a contiguous run of Methods owned by this Type)
    """
+    nested_class_table = get_dotnet_nested_class_table_index(pe)
+
    accessor_map: Dict[int, str] = {}
    for methoddef, methoddef_access in get_dotnet_methoddef_property_accessors(pe):
        accessor_map[methoddef] = methoddef_access
@@ -206,12 +212,14 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]:
            token: int = calculate_dotnet_token_value(method.table.number, method.row_index)
            access: Optional[str] = accessor_map.get(token)

-            method_name: str = method.row.Name
+            method_name: str = str(method.row.Name)
            if method_name.startswith(("get_", "set_")):
                # remove get_/set_
                method_name = method_name[4:]

-            yield DnType(token, typedef.TypeName, namespace=typedef.TypeNamespace, member=method_name, access=access)
+            typedefnamespace, typedefname = resolve_nested_typedef_name(nested_class_table, rid, typedef, pe)
+
+            yield DnType(token, typedefname, namespace=typedefnamespace, member=method_name, access=access)


 def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]:
@@ -225,6 +233,8 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]:
            TypeNamespace (index into String heap)
            FieldList (index into Field table; it marks the first of a contiguous run of Fields owned by this Type)
    """
+    nested_class_table = get_dotnet_nested_class_table_index(pe)
+
    for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number):
        assert isinstance(typedef, dnfile.mdtable.TypeDefRow)

@@ -235,8 +245,11 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]:
            if field.row is None:
                logger.debug("TypeDef[0x%X] FieldList[0x%X] row is None", rid, idx)
                continue
+
+            typedefnamespace, typedefname = resolve_nested_typedef_name(nested_class_table, rid, typedef, pe)
+
            token: int = calculate_dotnet_token_value(field.table.number, field.row_index)
-            yield DnType(token, typedef.TypeName, namespace=typedef.TypeNamespace, member=field.row.Name)
+            yield DnType(token, typedefname, namespace=typedefnamespace, member=field.row.Name)


 def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[Tuple[int, CilMethodBody]]:
@@ -276,8 +289,8 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[DnUnmanagedMethod]
            logger.debug("ImplMap[0x%X] ImportScope row is None", rid)
            module = ""
        else:
-            module = impl_map.ImportScope.row.Name
-        method: str = impl_map.ImportName
+            module = str(impl_map.ImportScope.row.Name)
+        method: str = str(impl_map.ImportName)

        member_forward_table: int
        if impl_map.MemberForwarded.table is None:
@@ -300,19 +313,122 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[DnUnmanagedMethod]
        yield DnUnmanagedMethod(token, module, method)


+def get_dotnet_table_row(pe: dnfile.dnPE, table_index: int, row_index: int) -> Optional[dnfile.base.MDTableRow]:
+    assert pe.net is not None
+    assert pe.net.mdtables is not None
+
+    if row_index - 1 <= 0:
+        return None
+
+    table: Optional[dnfile.base.ClrMetaDataTable] = pe.net.mdtables.tables.get(table_index)
+    if table is None:
+        return None
+
+    try:
+        return table[row_index - 1]
+    except IndexError:
+        return None
+
+
+def resolve_nested_typedef_name(
+    nested_class_table: dict, index: int, typedef: dnfile.mdtable.TypeDefRow, pe: dnfile.dnPE
+) -> Tuple[str, Tuple[str, ...]]:
+    """Resolves all nested TypeDef class names. Returns the namespace as a str and the nested TypeRef name as a tuple"""
+
+    if index in nested_class_table:
+        typedef_name = []
+        name = str(typedef.TypeName)
+
+        # Append the current typedef name
+        typedef_name.append(name)
+
+        while nested_class_table[index] in nested_class_table:
+            # Iterate through the typedef table to resolve the nested name
+            table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, nested_class_table[index])
+            if table_row is None:
+                return str(typedef.TypeNamespace), tuple(typedef_name[::-1])
+
+            name = str(table_row.TypeName)
+            typedef_name.append(name)
+            index = nested_class_table[index]
+
+        # Document the root enclosing details
+        table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, nested_class_table[index])
+        if table_row is None:
+            return str(typedef.TypeNamespace), tuple(typedef_name[::-1])
+
+        enclosing_name = str(table_row.TypeName)
+        typedef_name.append(enclosing_name)
+
+        return str(table_row.TypeNamespace), tuple(typedef_name[::-1])
+
+    else:
+        return str(typedef.TypeNamespace), (str(typedef.TypeName),)
+
+
+def resolve_nested_typeref_name(
+    index: int, typeref: dnfile.mdtable.TypeRefRow, pe: dnfile.dnPE
+) -> Tuple[str, Tuple[str, ...]]:
+    """Resolves all nested TypeRef class names. Returns the namespace as a str and the nested TypeRef name as a tuple"""
+    # If the ResolutionScope decodes to a typeRef type then it is nested
+    if isinstance(typeref.ResolutionScope.table, dnfile.mdtable.TypeRef):
+        typeref_name = []
+        name = str(typeref.TypeName)
+        # Not appending the current typeref name to avoid potential duplicate
+
+        # Validate index
+        table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeRef.number, index)
+        if table_row is None:
+            return str(typeref.TypeNamespace), (str(typeref.TypeName),)
+
+        while isinstance(table_row.ResolutionScope.table, dnfile.mdtable.TypeRef):
+            # Iterate through the typeref table to resolve the nested name
+            typeref_name.append(name)
+            name = str(table_row.TypeName)
+            table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeRef.number, table_row.ResolutionScope.row_index)
+            if table_row is None:
+                return str(typeref.TypeNamespace), tuple(typeref_name[::-1])
+
+        # Document the root enclosing details
+        typeref_name.append(str(table_row.TypeName))
+
+        return str(table_row.TypeNamespace), tuple(typeref_name[::-1])
+
+    else:
+        return str(typeref.TypeNamespace), (str(typeref.TypeName),)
+
+
+def get_dotnet_nested_class_table_index(pe: dnfile.dnPE) -> Dict[int, int]:
+    """Build index for EnclosingClass based off the NestedClass row index in the nestedclass table"""
+    nested_class_table = {}
+
+    # Used to find nested classes in typedef
+    for _, nestedclass in iter_dotnet_table(pe, dnfile.mdtable.NestedClass.number):
+        assert isinstance(nestedclass, dnfile.mdtable.NestedClassRow)
+        nested_class_table[nestedclass.NestedClass.row_index] = nestedclass.EnclosingClass.row_index
+
+    return nested_class_table
+
+
 def get_dotnet_types(pe: dnfile.dnPE) -> Iterator[DnType]:
    """get .NET types from TypeDef and TypeRef tables"""
+    nested_class_table = get_dotnet_nested_class_table_index(pe)
+
    for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number):
        assert isinstance(typedef, dnfile.mdtable.TypeDefRow)

+        typedefnamespace, typedefname = resolve_nested_typedef_name(nested_class_table, rid, typedef, pe)
+
        typedef_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeDef.number, rid)
-        yield DnType(typedef_token, typedef.TypeName, namespace=typedef.TypeNamespace)
+        yield DnType(typedef_token, typedefname, namespace=typedefnamespace)

    for rid, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number):
        assert isinstance(typeref, dnfile.mdtable.TypeRefRow)

+        typerefnamespace, typerefname = resolve_nested_typeref_name(typeref.ResolutionScope.row_index, typeref, pe)
+
        typeref_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeRef.number, rid)
-        yield DnType(typeref_token, typeref.TypeName, namespace=typeref.TypeNamespace)
+        yield DnType(typeref_token, typerefname, namespace=typerefnamespace)


 def calculate_dotnet_token_value(table: int, rid: int) -> int:
--- a/capa/features/extractors/dnfile/insn.py
+++ b/capa/features/extractors/dnfile/insn.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2022 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/features/extractors/dnfile/types.py
+++ b/capa/features/extractors/dnfile/types.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2022 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -6,15 +6,17 @@
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.

-from typing import Optional
+from typing import Tuple, Optional


 class DnType:
-    def __init__(self, token: int, class_: str, namespace: str = "", member: str = "", access: Optional[str] = None):
+    def __init__(
+        self, token: int, class_: Tuple[str, ...], namespace: str = "", member: str = "", access: Optional[str] = None
+    ):
        self.token: int = token
        self.access: Optional[str] = access
        self.namespace: str = namespace
-        self.class_: str = class_
+        self.class_: Tuple[str, ...] = class_

        if member == ".ctor":
            member = "ctor"
@@ -42,9 +44,13 @@ class DnType:
        return str(self)

    @staticmethod
-    def format_name(class_: str, namespace: str = "", member: str = ""):
+    def format_name(class_: Tuple[str, ...], namespace: str = "", member: str = ""):
+        if len(class_) > 1:
+            class_str = "/".join(class_)  # Concat items in tuple, separated by a "/"
+        else:
+            class_str = "".join(class_)  # Convert tuple to str
        # like File::OpenRead
-        name: str = f"{class_}::{member}" if member else class_
+        name: str = f"{class_str}::{member}" if member else class_str
        if namespace:
            # like System.IO.File::OpenRead
            name = f"{namespace}.{name}"
--- a/capa/features/extractors/dotnetfile.py
+++ b/capa/features/extractors/dotnetfile.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2022 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -38,16 +38,19 @@ from capa.features.extractors.dnfile.helpers import (
    is_dotnet_mixed_mode,
    get_dotnet_managed_imports,
    get_dotnet_managed_methods,
+    resolve_nested_typedef_name,
+    resolve_nested_typeref_name,
    calculate_dotnet_token_value,
    get_dotnet_unmanaged_imports,
+    get_dotnet_nested_class_table_index,
 )

 logger = logging.getLogger(__name__)


 def extract_file_format(**kwargs) -> Iterator[Tuple[Format, Address]]:
-    yield Format(FORMAT_PE), NO_ADDRESS
    yield Format(FORMAT_DOTNET), NO_ADDRESS
+    yield Format(FORMAT_PE), NO_ADDRESS


 def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Import, Address]]:
@@ -75,12 +78,12 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple
    for _, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number):
        # emit internal .NET namespaces
        assert isinstance(typedef, dnfile.mdtable.TypeDefRow)
-        namespaces.add(typedef.TypeNamespace)
+        namespaces.add(str(typedef.TypeNamespace))

    for _, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number):
        # emit external .NET namespaces
        assert isinstance(typeref, dnfile.mdtable.TypeRefRow)
-        namespaces.add(typeref.TypeNamespace)
+        namespaces.add(str(typeref.TypeNamespace))

    # namespaces may be empty, discard
    namespaces.discard("")
@@ -92,19 +95,25 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple

 def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Class, Address]]:
    """emit class features from TypeRef and TypeDef tables"""
+    nested_class_table = get_dotnet_nested_class_table_index(pe)
+
    for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number):
        # emit internal .NET classes
        assert isinstance(typedef, dnfile.mdtable.TypeDefRow)

+        typedefnamespace, typedefname = resolve_nested_typedef_name(nested_class_table, rid, typedef, pe)
+
        token = calculate_dotnet_token_value(dnfile.mdtable.TypeDef.number, rid)
-        yield Class(DnType.format_name(typedef.TypeName, namespace=typedef.TypeNamespace)), DNTokenAddress(token)
+        yield Class(DnType.format_name(typedefname, namespace=typedefnamespace)), DNTokenAddress(token)

    for rid, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number):
        # emit external .NET classes
        assert isinstance(typeref, dnfile.mdtable.TypeRefRow)

+        typerefnamespace, typerefname = resolve_nested_typeref_name(typeref.ResolutionScope.row_index, typeref, pe)
+
        token = calculate_dotnet_token_value(dnfile.mdtable.TypeRef.number, rid)
-        yield Class(DnType.format_name(typeref.TypeName, namespace=typeref.TypeNamespace)), DNTokenAddress(token)
+        yield Class(DnType.format_name(typerefname, namespace=typerefnamespace)), DNTokenAddress(token)


 def extract_file_os(**kwargs) -> Iterator[Tuple[OS, Address]]:
--- a/capa/features/extractors/elf.py
+++ b/capa/features/extractors/elf.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2021 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -10,10 +10,11 @@ import logging
 import itertools
 import collections
 from enum import Enum
-from typing import Set, Dict, List, Tuple, BinaryIO, Iterator, Optional
+from typing import TYPE_CHECKING, Set, Dict, List, Tuple, BinaryIO, Iterator, Optional
 from dataclasses import dataclass

-import Elf  # from vivisect
+if TYPE_CHECKING:
+    import Elf  # from vivisect

 logger = logging.getLogger(__name__)

@@ -57,6 +58,10 @@ class OS(str, Enum):
    SYLLABLE = "syllable"
    NACL = "nacl"
    ANDROID = "android"
+    DRAGONFLYBSD = "dragonfly BSD"
+    ILLUMOS = "illumos"
+    ZOS = "z/os"
+    UNIX = "unix"


 # via readelf: https://github.com/bminor/binutils-gdb/blob/c0e94211e1ac05049a4ce7c192c9d14d1764eb3e/binutils/readelf.c#L19635-L19658
@@ -80,6 +85,8 @@ class Phdr:
    paddr: int
    filesz: int
    buf: bytes
+    flags: int
+    memsz: int


@dataclass
@@ -108,6 +115,9 @@ class Shdr:
            buf,
        )

+    def get_name(self, elf: "ELF") -> str:
+        return elf.shstrtab.buf[self.name :].partition(b"\x00")[0].decode("ascii")
+

 class ELF:
    def __init__(self, f: BinaryIO):
@@ -120,6 +130,7 @@ class ELF:
        self.e_phnum: int
        self.e_shentsize: int
        self.e_shnum: int
+        self.e_shstrndx: int
        self.phbuf: bytes
        self.shbuf: bytes

@@ -151,11 +162,15 @@ class ELF:
        if self.bitness == 32:
            e_phoff, e_shoff = struct.unpack_from(self.endian + "II", self.file_header, 0x1C)
            self.e_phentsize, self.e_phnum = struct.unpack_from(self.endian + "HH", self.file_header, 0x2A)
-            self.e_shentsize, self.e_shnum = struct.unpack_from(self.endian + "HH", self.file_header, 0x2E)
+            self.e_shentsize, self.e_shnum, self.e_shstrndx = struct.unpack_from(
+                self.endian + "HHH", self.file_header, 0x2E
+            )
        elif self.bitness == 64:
            e_phoff, e_shoff = struct.unpack_from(self.endian + "QQ", self.file_header, 0x20)
            self.e_phentsize, self.e_phnum = struct.unpack_from(self.endian + "HH", self.file_header, 0x36)
-            self.e_shentsize, self.e_shnum = struct.unpack_from(self.endian + "HH", self.file_header, 0x3A)
+            self.e_shentsize, self.e_shnum, self.e_shstrndx = struct.unpack_from(
+                self.endian + "HHH", self.file_header, 0x3A
+            )
        else:
            raise NotImplementedError()

@@ -197,7 +212,7 @@ class ELF:
        15: OS.AROS,
        16: OS.FENIXOS,
        17: OS.CLOUD,
-        # 53: "SORTFIX",      # i can't find any reference to this OS, i dont think it exists
+        # 53: "SORTFIX",      # i can't find any reference to this OS, i don't think it exists
        # 64: "ARM_AEABI",    # not an OS
        # 97: "ARM",          # not an OS
        # 255: "STANDALONE",  # not an OS
@@ -306,24 +321,23 @@ class ELF:
        phent_offset = i * self.e_phentsize
        phent = self.phbuf[phent_offset : phent_offset + self.e_phentsize]

-        (p_type,) = struct.unpack_from(self.endian + "I", phent, 0x0)
-        logger.debug("ph:p_type: 0x%04x", p_type)
-
        if self.bitness == 32:
-            p_offset, p_vaddr, p_paddr, p_filesz = struct.unpack_from(self.endian + "IIII", phent, 0x4)
+            p_type, p_offset, p_vaddr, p_paddr, p_filesz, p_memsz, p_flags = struct.unpack_from(
+                self.endian + "IIIIIII", phent, 0x0
+            )
        elif self.bitness == 64:
-            p_offset, p_vaddr, p_paddr, p_filesz = struct.unpack_from(self.endian + "QQQQ", phent, 0x8)
+            p_type, p_flags, p_offset, p_vaddr, p_paddr, p_filesz, p_memsz = struct.unpack_from(
+                self.endian + "IIQQQQQ", phent, 0x0
+            )
        else:
            raise NotImplementedError()

-        logger.debug("ph:p_offset: 0x%02x p_filesz: 0x%04x", p_offset, p_filesz)
-
        self.f.seek(p_offset)
        buf = self.f.read(p_filesz)
        if len(buf) != p_filesz:
            raise ValueError("failed to read program header content")

-        return Phdr(p_type, p_offset, p_vaddr, p_paddr, p_filesz, buf)
+        return Phdr(p_type, p_offset, p_vaddr, p_paddr, p_filesz, buf, p_flags, p_memsz)

    @property
    def program_headers(self):
@@ -348,8 +362,6 @@ class ELF:
        else:
            raise NotImplementedError()

-        logger.debug("sh:sh_offset: 0x%02x sh_size: 0x%04x", sh_offset, sh_size)
-
        self.f.seek(sh_offset)
        buf = self.f.read(sh_size)
        if len(buf) != sh_size:
@@ -365,6 +377,10 @@ class ELF:
            except ValueError:
                continue

+    @property
+    def shstrtab(self) -> Shdr:
+        return self.parse_section_header(self.e_shstrndx)
+
    @property
    def linker(self):
        PT_INTERP = 0x3
@@ -712,7 +728,7 @@ class SymTab:
        yield from self.symbols

    @classmethod
-    def from_viv(cls, elf: Elf.Elf) -> Optional["SymTab"]:
+    def from_viv(cls, elf: "Elf.Elf") -> Optional["SymTab"]:
        endian = "<" if elf.getEndian() == 0 else ">"
        bitness = elf.bits

@@ -816,6 +832,52 @@ def guess_os_from_sh_notes(elf: ELF) -> Optional[OS]:
    return None


+def guess_os_from_ident_directive(elf: ELF) -> Optional[OS]:
+    # GCC inserts the GNU version via an .ident directive
+    # that gets stored in a section named ".comment".
+    # look at the version and recognize common OSes.
+    #
+    # assume the GCC version matches the target OS version,
+    # which I guess could be wrong during cross-compilation?
+    # therefore, don't rely on this if possible.
+    #
+    # https://stackoverflow.com/q/6263425
+    # https://gcc.gnu.org/onlinedocs/cpp/Other-Directives.html
+
+    SHT_PROGBITS = 0x1
+    for shdr in elf.section_headers:
+        if shdr.type != SHT_PROGBITS:
+            continue
+
+        if shdr.get_name(elf) != ".comment":
+            continue
+
+        try:
+            comment = shdr.buf.decode("utf-8")
+        except ValueError:
+            continue
+
+        if "GCC:" not in comment:
+            continue
+
+        logger.debug(".ident: %s", comment)
+
+        # these values come from our testfiles, like:
+        # rg -a "GCC: " tests/data/
+        if "Debian" in comment:
+            return OS.LINUX
+        elif "Ubuntu" in comment:
+            return OS.LINUX
+        elif "Red Hat" in comment:
+            return OS.LINUX
+        elif "Alpine" in comment:
+            return OS.LINUX
+        elif "Android" in comment:
+            return OS.ANDROID
+
+    return None
+
+
 def guess_os_from_linker(elf: ELF) -> Optional[OS]:
    # search for recognizable dynamic linkers (interpreters)
    # for example, on linux, we see file paths like: /lib64/ld-linux-x86-64.so.2
@@ -851,8 +913,10 @@ def guess_os_from_abi_versions_needed(elf: ELF) -> Optional[OS]:
                return OS.HURD

            else:
-                # we don't have any good guesses based on versions needed
-                pass
+                # in practice, Hurd isn't a common/viable OS,
+                # so this is almost certain to be Linux,
+                # so lets just make that guess.
+                return OS.LINUX

    return None

@@ -865,6 +929,8 @@ def guess_os_from_needed_dependencies(elf: ELF) -> Optional[OS]:
            return OS.HURD
        if needed.startswith("libandroid.so"):
            return OS.ANDROID
+        if needed.startswith("liblog.so"):
+            return OS.ANDROID

    return None

@@ -891,11 +957,506 @@ def guess_os_from_symtab(elf: ELF) -> Optional[OS]:

        for os, hints in keywords.items():
            if any(hint in sym_name for hint in hints):
+                logger.debug("symtab: %s looks like %s", sym_name, os)
                return os

    return None


+def is_go_binary(elf: ELF) -> bool:
+    for shdr in elf.section_headers:
+        if shdr.get_name(elf) == ".note.go.buildid":
+            logger.debug("go buildinfo: found section .note.go.buildid")
+            return True
+
+    # The `go version` command enumerates sections for the name `.go.buildinfo`
+    # (in addition to looking for the BUILDINFO_MAGIC) to check if an executable is go or not.
+    # See references to the `errNotGoExe` error here:
+    # https://github.com/golang/go/blob/master/src/debug/buildinfo/buildinfo.go#L41
+    for shdr in elf.section_headers:
+        if shdr.get_name(elf) == ".go.buildinfo":
+            logger.debug("go buildinfo: found section .go.buildinfo")
+            return True
+
+    # other strategy used by FLOSS: search for known runtime strings.
+    # https://github.com/mandiant/flare-floss/blob/b2ca8adfc5edf278861dd6bff67d73da39683b46/floss/language/identify.py#L88
+    return False
+
+
+def get_go_buildinfo_data(elf: ELF) -> Optional[bytes]:
+    for shdr in elf.section_headers:
+        if shdr.get_name(elf) == ".go.buildinfo":
+            logger.debug("go buildinfo: found section .go.buildinfo")
+            return shdr.buf
+
+    PT_LOAD = 0x1
+    PF_X = 1
+    PF_W = 2
+    for phdr in elf.program_headers:
+        if phdr.type != PT_LOAD:
+            continue
+
+        if (phdr.flags & (PF_X | PF_W)) == PF_W:
+            logger.debug("go buildinfo: found data segment")
+            return phdr.buf
+
+    return None
+
+
+def read_data(elf: ELF, rva: int, size: int) -> Optional[bytes]:
+    # ELF segments are for runtime data,
+    # ELF sections are for link-time data.
+    # So we want to read Program Headers/Segments.
+    for phdr in elf.program_headers:
+        if phdr.vaddr <= rva < phdr.vaddr + phdr.memsz:
+            segment_data = phdr.buf
+
+            # pad the section with NULLs
+            # assume page alignment is already handled.
+            # might need more hardening here.
+            if len(segment_data) < phdr.memsz:
+                segment_data += b"\x00" * (phdr.memsz - len(segment_data))
+
+            segment_offset = rva - phdr.vaddr
+            return segment_data[segment_offset : segment_offset + size]
+
+    return None
+
+
+def read_go_slice(elf: ELF, rva: int) -> Optional[bytes]:
+    if elf.bitness == 32:
+        struct_size = 8
+        struct_format = elf.endian + "II"
+    elif elf.bitness == 64:
+        struct_size = 16
+        struct_format = elf.endian + "QQ"
+    else:
+        raise ValueError("invalid psize")
+
+    struct_buf = read_data(elf, rva, struct_size)
+    if not struct_buf:
+        return None
+
+    addr, length = struct.unpack_from(struct_format, struct_buf, 0)
+
+    return read_data(elf, addr, length)
+
+
+def guess_os_from_go_buildinfo(elf: ELF) -> Optional[OS]:
+    """
+    In a binary compiled by Go, the buildinfo structure may contain
+    metadata about the build environment, including the configured
+    GOOS, which specifies the target operating system.
+
+    Search for and parse the buildinfo structure,
+    which may be found in the .go.buildinfo section,
+    and often contains this metadata inline. Otherwise,
+    follow a few byte slices to the relevant information.
+
+    This strategy is derived from GoReSym.
+    """
+    buf = get_go_buildinfo_data(elf)
+    if not buf:
+        logger.debug("go buildinfo: no buildinfo section")
+        return None
+
+    assert isinstance(buf, bytes)
+
+    # The build info blob left by the linker is identified by
+    # a 16-byte header, consisting of:
+    #  - buildInfoMagic (14 bytes),
+    #  - the binary's pointer size (1 byte), and
+    #  - whether the binary is big endian (1 byte).
+    #
+    # Then:
+    #  - virtual address to Go string: runtime.buildVersion
+    #  - virtual address to Go string: runtime.modinfo
+    #
+    #  On 32-bit platforms, the last 8 bytes are unused.
+    #
+    #  If the endianness has the 2 bit set, then the pointers are zero,
+    #  and the 32-byte header is followed by varint-prefixed string data
+    #  for the two string values we care about.
+    # https://github.com/mandiant/GoReSym/blob/0860a1b1b4f3495e9fb7e71eb4386bf3e0a7c500/buildinfo/buildinfo.go#L185-L193
+    BUILDINFO_MAGIC = b"\xFF Go buildinf:"
+
+    try:
+        index = buf.index(BUILDINFO_MAGIC)
+    except ValueError:
+        logger.debug("go buildinfo: no buildinfo magic")
+        return None
+
+    psize, flags = struct.unpack_from("<bb", buf, index + len(BUILDINFO_MAGIC))
+    assert psize in (4, 8)
+    is_big_endian = flags & 0b01
+    has_inline_strings = flags & 0b10
+    logger.debug("go buildinfo: psize: %d big endian: %s inline: %s", psize, is_big_endian, has_inline_strings)
+
+    GOOS_TO_OS = {
+        b"aix": OS.AIX,
+        b"android": OS.ANDROID,
+        b"dragonfly": OS.DRAGONFLYBSD,
+        b"freebsd": OS.FREEBSD,
+        b"hurd": OS.HURD,
+        b"illumos": OS.ILLUMOS,
+        b"linux": OS.LINUX,
+        b"netbsd": OS.NETBSD,
+        b"openbsd": OS.OPENBSD,
+        b"solaris": OS.SOLARIS,
+        b"zos": OS.ZOS,
+        b"windows": None,  # PE format
+        b"plan9": None,  # a.out format
+        b"ios": None,  # Mach-O format
+        b"darwin": None,  # Mach-O format
+        b"nacl": None,  # dropped in GO 1.14
+        b"js": None,
+    }
+
+    if has_inline_strings:
+        # This is the common case/path. Most samples will have an inline GOOS string.
+        #
+        # To find samples on VT, use these VTGrep searches:
+        #
+        #   content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 04 02}
+        #   content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 08 02}
+
+        # If present, the GOOS key will be found within
+        # the current buildinfo data region.
+        #
+        # Brute force the k-v pair, like `GOOS=linux`,
+        # rather than try to parse the data, which would be fragile.
+        for key, os in GOOS_TO_OS.items():
+            if (b"GOOS=" + key) in buf:
+                logger.debug("go buildinfo: found os: %s", os)
+                return os
+    else:
+        # This is the uncommon path. Most samples will have an inline GOOS string.
+        #
+        # To find samples on VT, use the referenced VTGrep content searches.
+        info_format = {
+            # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 04 00}
+            # like: 71e617e5cc7fda89bf67422ff60f437e9d54622382c5ed6ff31f75e601f9b22e
+            # in which the modinfo doesn't have GOOS.
+            (4, False): "<II",
+            # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 08 00}
+            # like: 93d3b3e2a904c6c909e20f2f76c3c2e8d0c81d535eb46e5493b5701f461816c3
+            # in which the modinfo doesn't have GOOS.
+            (8, False): "<QQ",
+            # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 04 01}
+            # (no matches on VT today)
+            (4, True): ">II",
+            # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 08 01}
+            # like: d44ba497964050c0e3dd2a192c511e4c3c4f17717f0322a554d64b797ee4690a
+            # in which the modinfo doesn't have GOOS.
+            (8, True): ">QQ",
+        }
+
+        build_version_address, modinfo_address = struct.unpack_from(
+            info_format[(psize, is_big_endian)], buf, index + 0x10
+        )
+        logger.debug("go buildinfo: build version address: 0x%x", build_version_address)
+        logger.debug("go buildinfo: modinfo address: 0x%x", modinfo_address)
+
+        build_version = read_go_slice(elf, build_version_address)
+        if build_version:
+            logger.debug("go buildinfo: build version: %s", build_version.decode("utf-8"))
+
+        modinfo = read_go_slice(elf, modinfo_address)
+        if modinfo:
+            if modinfo[-0x11] == ord("\n"):
+                # Strip module framing: sentinel strings delimiting the module info.
+                # These are cmd/go/internal/modload/build.infoStart and infoEnd.
+                # Which should probably be:
+                # 	infoStart, _ = hex.DecodeString("3077af0c9274080241e1c107e6d618e6")
+                #   infoEnd, _   = hex.DecodeString("f932433186182072008242104116d8f2")
+                modinfo = modinfo[0x10:-0x10]
+            logger.debug("go buildinfo: modinfo: %s", modinfo.decode("utf-8"))
+
+        if not modinfo:
+            return None
+
+        for key, os in GOOS_TO_OS.items():
+            # Brute force the k-v pair, like `GOOS=linux`,
+            # rather than try to parse the data, which would be fragile.
+            if (b"GOOS=" + key) in modinfo:
+                logger.debug("go buildinfo: found os: %s", os)
+                return os
+
+    return None
+
+
+def guess_os_from_go_source(elf: ELF) -> Optional[OS]:
+    """
+    In a binary compiled by Go, runtime metadata may contain
+    references to the source filenames, including the
+    src/runtime/os_* files, whose name indicates the
+    target operating system.
+
+    Confirm the given ELF seems to be built by Go,
+    and then look for strings that look like
+    Go source filenames.
+
+    This strategy is derived from GoReSym.
+    """
+    if not is_go_binary(elf):
+        return None
+
+    for phdr in elf.program_headers:
+        buf = phdr.buf
+        NEEDLE_OS = b"/src/runtime/os_"
+        try:
+            index = buf.index(NEEDLE_OS)
+        except ValueError:
+            continue
+
+        rest = buf[index + len(NEEDLE_OS) : index + len(NEEDLE_OS) + 32]
+        filename = rest.partition(b".go")[0].decode("utf-8")
+        logger.debug("go source: filename: /src/runtime/os_%s.go", filename)
+
+        # via: https://cs.opensource.google/go/go/+/master:src/runtime/;bpv=1;bpt=0
+        # candidates today:
+        #   - aix
+        #   - android
+        #   - darwin
+        #   - darwin_arm64
+        #   - dragonfly
+        #   - freebsd
+        #   - freebsd2
+        #   - freebsd_amd64
+        #   - freebsd_arm
+        #   - freebsd_arm64
+        #   - freebsd_noauxv
+        #   - freebsd_riscv64
+        #   - illumos
+        #   - js
+        #   - linux
+        #   - linux_arm
+        #   - linux_arm64
+        #   - linux_be64
+        #   - linux_generic
+        #   - linux_loong64
+        #   - linux_mips64x
+        #   - linux_mipsx
+        #   - linux_noauxv
+        #   - linux_novdso
+        #   - linux_ppc64x
+        #   - linux_riscv64
+        #   - linux_s390x
+        #   - linux_x86
+        #   - netbsd
+        #   - netbsd_386
+        #   - netbsd_amd64
+        #   - netbsd_arm
+        #   - netbsd_arm64
+        #   - nonopenbsd
+        #   - only_solaris
+        #   - openbsd
+        #   - openbsd_arm
+        #   - openbsd_arm64
+        #   - openbsd_libc
+        #   - openbsd_mips64
+        #   - openbsd_syscall
+        #   - openbsd_syscall1
+        #   - openbsd_syscall2
+        #   - plan9
+        #   - plan9_arm
+        #   - solaris
+        #   - unix
+        #   - unix_nonlinux
+        #   - wasip1
+        #   - wasm
+        #   - windows
+        #   - windows_arm
+        #   - windows_arm64
+
+        OS_FILENAME_TO_OS = {
+            "aix": OS.AIX,
+            "android": OS.ANDROID,
+            "dragonfly": OS.DRAGONFLYBSD,
+            "freebsd": OS.FREEBSD,
+            "freebsd2": OS.FREEBSD,
+            "freebsd_": OS.FREEBSD,
+            "illumos": OS.ILLUMOS,
+            "linux": OS.LINUX,
+            "netbsd": OS.NETBSD,
+            "only_solaris": OS.SOLARIS,
+            "openbsd": OS.OPENBSD,
+            "solaris": OS.SOLARIS,
+            "unix_nonlinux": OS.UNIX,
+        }
+
+        for prefix, os in OS_FILENAME_TO_OS.items():
+            if filename.startswith(prefix):
+                return os
+
+    for phdr in elf.program_headers:
+        buf = phdr.buf
+        NEEDLE_RT0 = b"/src/runtime/rt0_"
+        try:
+            index = buf.index(NEEDLE_RT0)
+        except ValueError:
+            continue
+
+        rest = buf[index + len(NEEDLE_RT0) : index + len(NEEDLE_RT0) + 32]
+        filename = rest.partition(b".s")[0].decode("utf-8")
+        logger.debug("go source: filename: /src/runtime/rt0_%s.s", filename)
+
+        # via: https://cs.opensource.google/go/go/+/master:src/runtime/;bpv=1;bpt=0
+        # candidates today:
+        #   - aix_ppc64
+        #   - android_386
+        #   - android_amd64
+        #   - android_arm
+        #   - android_arm64
+        #   - darwin_amd64
+        #   - darwin_arm64
+        #   - dragonfly_amd64
+        #   - freebsd_386
+        #   - freebsd_amd64
+        #   - freebsd_arm
+        #   - freebsd_arm64
+        #   - freebsd_riscv64
+        #   - illumos_amd64
+        #   - ios_amd64
+        #   - ios_arm64
+        #   - js_wasm
+        #   - linux_386
+        #   - linux_amd64
+        #   - linux_arm
+        #   - linux_arm64
+        #   - linux_loong64
+        #   - linux_mips64x
+        #   - linux_mipsx
+        #   - linux_ppc64
+        #   - linux_ppc64le
+        #   - linux_riscv64
+        #   - linux_s390x
+        #   - netbsd_386
+        #   - netbsd_amd64
+        #   - netbsd_arm
+        #   - netbsd_arm64
+        #   - openbsd_386
+        #   - openbsd_amd64
+        #   - openbsd_arm
+        #   - openbsd_arm64
+        #   - openbsd_mips64
+        #   - openbsd_ppc64
+        #   - openbsd_riscv64
+        #   - plan9_386
+        #   - plan9_amd64
+        #   - plan9_arm
+        #   - solaris_amd64
+        #   - wasip1_wasm
+        #   - windows_386
+        #   - windows_amd64
+        #   - windows_arm
+        #   - windows_arm64
+
+        RT0_FILENAME_TO_OS = {
+            "aix": OS.AIX,
+            "android": OS.ANDROID,
+            "dragonfly": OS.DRAGONFLYBSD,
+            "freebsd": OS.FREEBSD,
+            "illumos": OS.ILLUMOS,
+            "linux": OS.LINUX,
+            "netbsd": OS.NETBSD,
+            "openbsd": OS.OPENBSD,
+            "solaris": OS.SOLARIS,
+        }
+
+        for prefix, os in RT0_FILENAME_TO_OS.items():
+            if filename.startswith(prefix):
+                return os
+
+    return None
+
+
+def guess_os_from_vdso_strings(elf: ELF) -> Optional[OS]:
+    """
+    The "vDSO" (virtual dynamic shared object) is a small shared
+    library that the kernel automatically maps into the address space
+    of all user-space applications.
+
+    Some statically linked executables include small dynamic linker
+    routines that finds these vDSO symbols, using the ASCII
+    symbol name and version. We can therefore recognize the pairs
+    (symbol, version) to guess the binary targets Linux.
+    """
+    for phdr in elf.program_headers:
+        buf = phdr.buf
+
+        # We don't really use the arch, but its interesting for documentation
+        # I suppose we could restrict the arch here to what's in the ELF header,
+        # but that's even more work. Let's see if this is sufficient.
+        for arch, symbol, version in (
+            # via: https://man7.org/linux/man-pages/man7/vdso.7.html
+            ("arm", b"__vdso_gettimeofday", b"LINUX_2.6"),
+            ("arm", b"__vdso_clock_gettime", b"LINUX_2.6"),
+            ("aarch64", b"__kernel_rt_sigreturn", b"LINUX_2.6.39"),
+            ("aarch64", b"__kernel_gettimeofday", b"LINUX_2.6.39"),
+            ("aarch64", b"__kernel_clock_gettime", b"LINUX_2.6.39"),
+            ("aarch64", b"__kernel_clock_getres", b"LINUX_2.6.39"),
+            ("mips", b"__kernel_gettimeofday", b"LINUX_2.6"),
+            ("mips", b"__kernel_clock_gettime", b"LINUX_2.6"),
+            ("ia64", b"__kernel_sigtramp", b"LINUX_2.5"),
+            ("ia64", b"__kernel_syscall_via_break", b"LINUX_2.5"),
+            ("ia64", b"__kernel_syscall_via_epc", b"LINUX_2.5"),
+            ("ppc/32", b"__kernel_clock_getres", b"LINUX_2.6.15"),
+            ("ppc/32", b"__kernel_clock_gettime", b"LINUX_2.6.15"),
+            ("ppc/32", b"__kernel_clock_gettime64", b"LINUX_5.11"),
+            ("ppc/32", b"__kernel_datapage_offset", b"LINUX_2.6.15"),
+            ("ppc/32", b"__kernel_get_syscall_map", b"LINUX_2.6.15"),
+            ("ppc/32", b"__kernel_get_tbfreq", b"LINUX_2.6.15"),
+            ("ppc/32", b"__kernel_getcpu", b"LINUX_2.6.15"),
+            ("ppc/32", b"__kernel_gettimeofday", b"LINUX_2.6.15"),
+            ("ppc/32", b"__kernel_sigtramp_rt32", b"LINUX_2.6.15"),
+            ("ppc/32", b"__kernel_sigtramp32", b"LINUX_2.6.15"),
+            ("ppc/32", b"__kernel_sync_dicache", b"LINUX_2.6.15"),
+            ("ppc/32", b"__kernel_sync_dicache_p5", b"LINUX_2.6.15"),
+            ("ppc/64", b"__kernel_clock_getres", b"LINUX_2.6.15"),
+            ("ppc/64", b"__kernel_clock_gettime", b"LINUX_2.6.15"),
+            ("ppc/64", b"__kernel_datapage_offset", b"LINUX_2.6.15"),
+            ("ppc/64", b"__kernel_get_syscall_map", b"LINUX_2.6.15"),
+            ("ppc/64", b"__kernel_get_tbfreq", b"LINUX_2.6.15"),
+            ("ppc/64", b"__kernel_getcpu", b"LINUX_2.6.15"),
+            ("ppc/64", b"__kernel_gettimeofday", b"LINUX_2.6.15"),
+            ("ppc/64", b"__kernel_sigtramp_rt64", b"LINUX_2.6.15"),
+            ("ppc/64", b"__kernel_sync_dicache", b"LINUX_2.6.15"),
+            ("ppc/64", b"__kernel_sync_dicache_p5", b"LINUX_2.6.15"),
+            ("riscv", b"__vdso_rt_sigreturn", b"LINUX_4.15"),
+            ("riscv", b"__vdso_gettimeofday", b"LINUX_4.15"),
+            ("riscv", b"__vdso_clock_gettime", b"LINUX_4.15"),
+            ("riscv", b"__vdso_clock_getres", b"LINUX_4.15"),
+            ("riscv", b"__vdso_getcpu", b"LINUX_4.15"),
+            ("riscv", b"__vdso_flush_icache", b"LINUX_4.15"),
+            ("s390", b"__kernel_clock_getres", b"LINUX_2.6.29"),
+            ("s390", b"__kernel_clock_gettime", b"LINUX_2.6.29"),
+            ("s390", b"__kernel_gettimeofday", b"LINUX_2.6.29"),
+            ("superh", b"__kernel_rt_sigreturn", b"LINUX_2.6"),
+            ("superh", b"__kernel_sigreturn", b"LINUX_2.6"),
+            ("superh", b"__kernel_vsyscall", b"LINUX_2.6"),
+            ("i386", b"__kernel_sigreturn", b"LINUX_2.5"),
+            ("i386", b"__kernel_rt_sigreturn", b"LINUX_2.5"),
+            ("i386", b"__kernel_vsyscall", b"LINUX_2.5"),
+            ("i386", b"__vdso_clock_gettime", b"LINUX_2.6"),
+            ("i386", b"__vdso_gettimeofday", b"LINUX_2.6"),
+            ("i386", b"__vdso_time", b"LINUX_2.6"),
+            ("x86-64", b"__vdso_clock_gettime", b"LINUX_2.6"),
+            ("x86-64", b"__vdso_getcpu", b"LINUX_2.6"),
+            ("x86-64", b"__vdso_gettimeofday", b"LINUX_2.6"),
+            ("x86-64", b"__vdso_time", b"LINUX_2.6"),
+            ("x86/32", b"__vdso_clock_gettime", b"LINUX_2.6"),
+            ("x86/32", b"__vdso_getcpu", b"LINUX_2.6"),
+            ("x86/32", b"__vdso_gettimeofday", b"LINUX_2.6"),
+            ("x86/32", b"__vdso_time", b"LINUX_2.6"),
+        ):
+            if symbol in buf and version in buf:
+                logger.debug("vdso string: %s %s %s", arch, symbol.decode("ascii"), version.decode("ascii"))
+                return OS.LINUX
+
+    return None
+
+
 def detect_elf_os(f) -> str:
    """
    f: type Union[BinaryIO, IDAIO, GHIDRAIO]
@@ -927,6 +1488,13 @@ def detect_elf_os(f) -> str:
        logger.warning("Error guessing OS from section header notes: %s", e)
        sh_notes_guess = None

+    try:
+        ident_guess = guess_os_from_ident_directive(elf)
+        logger.debug("guess: .ident: %s", ident_guess)
+    except Exception as e:
+        logger.warning("Error guessing OS from .ident directive: %s", e)
+        ident_guess = None
+
    try:
        linker_guess = guess_os_from_linker(elf)
        logger.debug("guess: linker: %s", linker_guess)
@@ -955,6 +1523,27 @@ def detect_elf_os(f) -> str:
        logger.warning("Error guessing OS from symbol table: %s", e)
        symtab_guess = None

+    try:
+        goos_guess = guess_os_from_go_buildinfo(elf)
+        logger.debug("guess: Go buildinfo: %s", goos_guess)
+    except Exception as e:
+        logger.warning("Error guessing OS from Go buildinfo: %s", e)
+        goos_guess = None
+
+    try:
+        gosrc_guess = guess_os_from_go_source(elf)
+        logger.debug("guess: Go source: %s", gosrc_guess)
+    except Exception as e:
+        logger.warning("Error guessing OS from Go source path: %s", e)
+        gosrc_guess = None
+
+    try:
+        vdso_guess = guess_os_from_vdso_strings(elf)
+        logger.debug("guess: vdso strings: %s", vdso_guess)
+    except Exception as e:
+        logger.warning("Error guessing OS from vdso strings: %s", e)
+        symtab_guess = None
+
    ret = None

    if osabi_guess:
@@ -978,6 +1567,24 @@ def detect_elf_os(f) -> str:
    elif symtab_guess:
        ret = symtab_guess

+    elif goos_guess:
+        ret = goos_guess
+
+    elif gosrc_guess:
+        # prefer goos_guess to this method,
+        # which is just string interpretation.
+        ret = gosrc_guess
+
+    elif ident_guess:
+        # at the bottom because we don't trust this too much
+        # due to potential for bugs with cross-compilation.
+        ret = ident_guess
+
+    elif vdso_guess:
+        # at the bottom because this is just scanning strings,
+        # which isn't very authoritative.
+        ret = vdso_guess
+
    return ret.value if ret is not None else "unknown"


--- a/capa/features/extractors/elffile.py
+++ b/capa/features/extractors/elffile.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2021 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -10,8 +10,7 @@ import logging
 from typing import Tuple, Iterator
 from pathlib import Path

-from elftools.elf.elffile import ELFFile, SymbolTableSection
-from elftools.elf.relocation import RelocationSection
+from elftools.elf.elffile import ELFFile, DynamicSegment, SymbolTableSection

 import capa.features.extractors.common
 from capa.features.file import Export, Import, Section
@@ -47,17 +46,37 @@ def extract_file_export_names(elf: ELFFile, **kwargs):

            yield Export(symbol.name), AbsoluteVirtualAddress(symbol.entry.st_value)

+    for segment in elf.iter_segments():
+        if not isinstance(segment, DynamicSegment):
+            continue
+
+        logger.debug("Dynamic Segment contains %s symbols: ", segment.num_symbols())
+
+        for symbol in segment.iter_symbols():
+            # The following conditions are based on the following article
+            # http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html
+            if not symbol.name:
+                continue
+            if symbol.entry.st_info.type not in ["STT_FUNC", "STT_OBJECT", "STT_IFUNC"]:
+                continue
+            if symbol.entry.st_value == 0:
+                continue
+            if symbol.entry.st_shndx == "SHN_UNDEF":
+                continue
+
+            yield Export(symbol.name), AbsoluteVirtualAddress(symbol.entry.st_value)
+

 def extract_file_import_names(elf: ELFFile, **kwargs):
    # Create a dictionary to store symbol names by their index
    symbol_names = {}

    # Extract symbol names and store them in the dictionary
-    for section in elf.iter_sections():
-        if not isinstance(section, SymbolTableSection):
+    for segment in elf.iter_segments():
+        if not isinstance(segment, DynamicSegment):
            continue

-        for _, symbol in enumerate(section.iter_symbols()):
+        for _, symbol in enumerate(segment.iter_symbols()):
            # The following conditions are based on the following article
            # http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html
            if not symbol.name:
@@ -73,21 +92,19 @@ def extract_file_import_names(elf: ELFFile, **kwargs):

            symbol_names[_] = symbol.name

-    for section in elf.iter_sections():
-        if not isinstance(section, RelocationSection):
+    for segment in elf.iter_segments():
+        if not isinstance(segment, DynamicSegment):
            continue

-        if section["sh_entsize"] == 0:
-            logger.debug("Symbol table '%s' has a sh_entsize of zero!", section.name)
-            continue
+        relocation_tables = segment.get_relocation_tables()
+        logger.debug("Dynamic Segment contains %s relocation tables:", len(relocation_tables))

-        logger.debug("Symbol table '%s' contains %s entries:", section.name, section.num_relocations())
-
-        for relocation in section.iter_relocations():
-            # Extract the symbol name from the symbol table using the symbol index in the relocation
-            if relocation["r_info_sym"] not in symbol_names:
-                continue
-            yield Import(symbol_names[relocation["r_info_sym"]]), FileOffsetAddress(relocation["r_offset"])
+        for relocation_table in relocation_tables.values():
+            for relocation in relocation_table.iter_relocations():
+                # Extract the symbol name from the symbol table using the symbol index in the relocation
+                if relocation["r_info_sym"] not in symbol_names:
+                    continue
+                yield Import(symbol_names[relocation["r_info_sym"]]), FileOffsetAddress(relocation["r_offset"])


 def extract_file_section_names(elf: ELFFile, **kwargs):
--- a/capa/features/extractors/ghidra/extractor.py
+++ b/capa/features/extractors/ghidra/extractor.py
@@ -34,7 +34,7 @@ class GhidraFeatureExtractor(StaticFeatureExtractor):
                # https://ghidra.re/ghidra_docs/api/ghidra/program/model/listing/Program.html
                #
                # the hashes are stored in the database, not computed on the fly,
-                # so its probably not trivial to add SHA1.
+                # so it's probably not trivial to add SHA1.
                sha1="",
                sha256=capa.ghidra.helpers.get_file_sha256(),
            )
--- a/capa/features/extractors/ghidra/file.py
+++ b/capa/features/extractors/ghidra/file.py
@@ -127,8 +127,10 @@ def extract_file_strings() -> Iterator[Tuple[Feature, Address]]:
    """extract ASCII and UTF-16 LE strings"""

    for block in currentProgram().getMemory().getBlocks():  # type: ignore [name-defined] # noqa: F821
-        if block.isInitialized():
-            p_bytes = capa.features.extractors.ghidra.helpers.get_block_bytes(block)
+        if not block.isInitialized():
+            continue
+
+        p_bytes = capa.features.extractors.ghidra.helpers.get_block_bytes(block)

        for s in capa.features.extractors.strings.extract_ascii_strings(p_bytes):
            offset = block.getStart().getOffset() + s.offset
--- a/capa/features/extractors/ghidra/helpers.py
+++ b/capa/features/extractors/ghidra/helpers.py
@@ -260,7 +260,7 @@ def dereference_ptr(insn: ghidra.program.database.code.InstructionDB):
        if thfunc and thfunc.isThunk():
            return handle_thunk(to_deref)
        else:
-            # if it doesn't poin to a thunk, it's usually a jmp to a label
+            # if it doesn't point to a thunk, it's usually a jmp to a label
            return to_deref
    if not dat:
        return to_deref
@@ -275,3 +275,27 @@ def dereference_ptr(insn: ghidra.program.database.code.InstructionDB):
            return addr
    else:
        return to_deref
+
+
+def find_data_references_from_insn(insn, max_depth: int = 10):
+    """yield data references from given instruction"""
+    for reference in insn.getReferencesFrom():
+        if not reference.getReferenceType().isData():
+            # only care about data references
+            continue
+
+        to_addr = reference.getToAddress()
+
+        for _ in range(max_depth - 1):
+            data = getDataAt(to_addr)  # type: ignore [name-defined] # noqa: F821
+            if data and data.isPointer():
+                ptr_value = data.getValue()
+
+                if ptr_value is None:
+                    break
+
+                to_addr = ptr_value
+            else:
+                break
+
+        yield to_addr
--- a/capa/features/extractors/ghidra/insn.py
+++ b/capa/features/extractors/ghidra/insn.py
@@ -23,6 +23,9 @@ from capa.features.extractors.base_extractor import BBHandle, InsnHandle, Functi
 SECURITY_COOKIE_BYTES_DELTA = 0x40


+OPERAND_TYPE_DYNAMIC_ADDRESS = OperandType.DYNAMIC | OperandType.ADDRESS
+
+
 def get_imports(ctx: Dict[str, Any]) -> Dict[int, Any]:
    """Populate the import cache for this context"""
    if "imports_cache" not in ctx:
@@ -82,7 +85,7 @@ def check_for_api_call(
        if not capa.features.extractors.ghidra.helpers.check_addr_for_api(addr_ref, fakes, imports, externs):
            return
        ref = addr_ref.getOffset()
-    elif ref_type == OperandType.DYNAMIC | OperandType.ADDRESS or ref_type == OperandType.DYNAMIC:
+    elif ref_type == OPERAND_TYPE_DYNAMIC_ADDRESS or ref_type == OperandType.DYNAMIC:
        return  # cannot resolve dynamics statically
    else:
        # pure address does not need to get dereferenced/ handled
@@ -195,46 +198,39 @@ def extract_insn_offset_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl
    if insn.getMnemonicString().startswith("LEA"):
        return

-    # ignore any stack references
-    if not capa.features.extractors.ghidra.helpers.is_stack_referenced(insn):
-        # Ghidra stores operands in 2D arrays if they contain offsets
-        for i in range(insn.getNumOperands()):
-            if insn.getOperandType(i) == OperandType.DYNAMIC:  # e.g. [esi + 4]
-                # manual extraction, since the default api calls only work on the 1st dimension of the array
-                op_objs = insn.getOpObjects(i)
-                if isinstance(op_objs[-1], ghidra.program.model.scalar.Scalar):
-                    op_off = op_objs[-1].getValue()
-                    yield Offset(op_off), ih.address
-                    yield OperandOffset(i, op_off), ih.address
-                else:
-                    yield Offset(0), ih.address
-                    yield OperandOffset(i, 0), ih.address
+    if capa.features.extractors.ghidra.helpers.is_stack_referenced(insn):
+        # ignore stack references
+        return
+
+    # Ghidra stores operands in 2D arrays if they contain offsets
+    for i in range(insn.getNumOperands()):
+        if insn.getOperandType(i) == OperandType.DYNAMIC:  # e.g. [esi + 4]
+            # manual extraction, since the default api calls only work on the 1st dimension of the array
+            op_objs = insn.getOpObjects(i)
+            if not op_objs:
+                continue
+
+            if isinstance(op_objs[-1], ghidra.program.model.scalar.Scalar):
+                op_off = op_objs[-1].getValue()
+            else:
+                op_off = 0
+
+            yield Offset(op_off), ih.address
+            yield OperandOffset(i, op_off), ih.address


 def extract_insn_bytes_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
    """
    parse referenced byte sequences
+
    example:
        push    offset iid_004118d4_IShellLinkA ; riid
    """
-    insn: ghidra.program.database.code.InstructionDB = ih.inner
-
-    if capa.features.extractors.ghidra.helpers.is_call_or_jmp(insn):
-        return
-
-    ref = insn.getAddress()  # init to insn addr
-    for i in range(insn.getNumOperands()):
-        if OperandType.isAddress(insn.getOperandType(i)):
-            ref = insn.getAddress(i)  # pulls pointer if there is one
-
-    if ref != insn.getAddress():  # bail out if there's no pointer
-        ghidra_dat = getDataAt(ref)  # type: ignore [name-defined] # noqa: F821
-        if (
-            ghidra_dat and not ghidra_dat.hasStringValue() and not ghidra_dat.isPointer()
-        ):  # avoid if the data itself is a pointer
-            extracted_bytes = capa.features.extractors.ghidra.helpers.get_bytes(ref, MAX_BYTES_FEATURE_SIZE)
+    for addr in capa.features.extractors.ghidra.helpers.find_data_references_from_insn(ih.inner):
+        data = getDataAt(addr)  # type: ignore [name-defined] # noqa: F821
+        if data and not data.hasStringValue():
+            extracted_bytes = capa.features.extractors.ghidra.helpers.get_bytes(addr, MAX_BYTES_FEATURE_SIZE)
            if extracted_bytes and not capa.features.extractors.helpers.all_zeros(extracted_bytes):
-                # don't extract byte features for obvious strings
                yield Bytes(extracted_bytes), ih.address


@@ -245,24 +241,10 @@ def extract_insn_string_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl
    example:
        push offset aAcr     ; "ACR  > "
    """
-    insn: ghidra.program.database.code.InstructionDB = ih.inner
-    dyn_addr = OperandType.DYNAMIC | OperandType.ADDRESS
-
-    ref = insn.getAddress()
-    for i in range(insn.getNumOperands()):
-        if OperandType.isScalarAsAddress(insn.getOperandType(i)):
-            ref = insn.getAddress(i)
-        # strings are also referenced dynamically via pointers & arrays, so we need to deref them
-        if insn.getOperandType(i) == dyn_addr:
-            ref = insn.getAddress(i)
-            dat = getDataAt(ref)  # type: ignore [name-defined] # noqa: F821
-            if dat and dat.isPointer():
-                ref = dat.getValue()
-
-    if ref != insn.getAddress():
-        ghidra_dat = getDataAt(ref)  # type: ignore [name-defined] # noqa: F821
-        if ghidra_dat and ghidra_dat.hasStringValue():
-            yield String(ghidra_dat.getValue()), ih.address
+    for addr in capa.features.extractors.ghidra.helpers.find_data_references_from_insn(ih.inner):
+        data = getDataAt(addr)  # type: ignore [name-defined] # noqa: F821
+        if data and data.hasStringValue():
+            yield String(data.getValue()), ih.address


 def extract_insn_mnemonic_features(
@@ -359,7 +341,7 @@ def extract_insn_cross_section_cflow(
        ref = capa.features.extractors.ghidra.helpers.dereference_ptr(insn)
        if capa.features.extractors.ghidra.helpers.check_addr_for_api(ref, fakes, imports, externs):
            return
-    elif ref_type == OperandType.DYNAMIC | OperandType.ADDRESS or ref_type == OperandType.DYNAMIC:
+    elif ref_type == OPERAND_TYPE_DYNAMIC_ADDRESS or ref_type == OperandType.DYNAMIC:
        return  # cannot resolve dynamics statically
    else:
        # pure address does not need to get dereferenced/ handled
--- a/capa/features/extractors/helpers.py
+++ b/capa/features/extractors/helpers.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/features/extractors/ida/basicblock.py
+++ b/capa/features/extractors/ida/basicblock.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/features/extractors/ida/extractor.py
+++ b/capa/features/extractors/ida/extractor.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2021 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/features/extractors/ida/file.py
+++ b/capa/features/extractors/ida/file.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/features/extractors/ida/function.py
+++ b/capa/features/extractors/ida/function.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/features/extractors/ida/global_.py
+++ b/capa/features/extractors/ida/global_.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2021 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/features/extractors/ida/helpers.py
+++ b/capa/features/extractors/ida/helpers.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -10,6 +10,7 @@ from typing import Any, Dict, Tuple, Iterator, Optional

 import idc
 import idaapi
+import ida_nalt
 import idautils
 import ida_bytes
 import ida_segment
@@ -17,6 +18,8 @@ import ida_segment
 from capa.features.address import AbsoluteVirtualAddress
 from capa.features.extractors.base_extractor import FunctionHandle

+IDA_NALT_ENCODING = ida_nalt.get_default_encoding_idx(ida_nalt.BPU_1B)  # use one byte-per-character encoding
+

 def find_byte_sequence(start: int, end: int, seq: bytes) -> Iterator[int]:
    """yield all ea of a given byte sequence
@@ -26,11 +29,16 @@ def find_byte_sequence(start: int, end: int, seq: bytes) -> Iterator[int]:
        end: max virtual address
        seq: bytes to search e.g. b"\x01\x03"
    """
+    patterns = ida_bytes.compiled_binpat_vec_t()
+
    seqstr = " ".join([f"{b:02x}" for b in seq])
+    err = ida_bytes.parse_binpat_str(patterns, 0, seqstr, 16, IDA_NALT_ENCODING)
+
+    if err:
+        return
+
    while True:
-        # TODO(mike-hunhoff): find_binary is deprecated. Please use ida_bytes.bin_search() instead.
-        # https://github.com/mandiant/capa/issues/1606
-        ea = idaapi.find_binary(start, end, seqstr, 0, idaapi.SEARCH_DOWN)
+        ea = ida_bytes.bin_search(start, end, patterns, ida_bytes.BIN_SEARCH_FORWARD)
        if ea == idaapi.BADADDR:
            break
        start = ea + 1
--- a/capa/features/extractors/ida/insn.py
+++ b/capa/features/extractors/ida/insn.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/features/extractors/loops.py
+++ b/capa/features/extractors/loops.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/features/extractors/null.py
+++ b/capa/features/extractors/null.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2022 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/features/extractors/pefile.py
+++ b/capa/features/extractors/pefile.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2021 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/features/extractors/strings.py
+++ b/capa/features/extractors/strings.py
@@ -1,6 +1,6 @@
 # strings code from FLOSS, https://github.com/mandiant/flare-floss
 #
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/features/extractors/viv/basicblock.py
+++ b/capa/features/extractors/viv/basicblock.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/features/extractors/viv/extractor.py
+++ b/capa/features/extractors/viv/extractor.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2021 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/features/extractors/viv/file.py
+++ b/capa/features/extractors/viv/file.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/features/extractors/viv/function.py
+++ b/capa/features/extractors/viv/function.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/features/extractors/viv/global_.py
+++ b/capa/features/extractors/viv/global_.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2021 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/features/extractors/viv/helpers.py
+++ b/capa/features/extractors/viv/helpers.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/features/extractors/viv/indirect_calls.py
+++ b/capa/features/extractors/viv/indirect_calls.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/features/extractors/viv/insn.py
+++ b/capa/features/extractors/viv/insn.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -113,7 +113,7 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato
        if f.vw.metadata["Format"] == "elf":
            if "symtab" not in fh.ctx["cache"]:
                # the symbol table gets stored as a function's attribute in order to avoid running
-                # this code everytime the call is made, thus preventing the computational overhead.
+                # this code every time the call is made, thus preventing the computational overhead.
                try:
                    fh.ctx["cache"]["symtab"] = SymTab.from_viv(f.vw.parsedbin)
                except Exception:
@@ -598,7 +598,7 @@ def extract_op_number_features(

    if f.vw.probeMemory(v, 1, envi.memory.MM_READ):
        # this is a valid address
-        # assume its not also a constant.
+        # assume it's not also a constant.
        return

    if insn.mnem == "add" and insn.opers[0].isReg() and insn.opers[0].reg == envi.archs.i386.regs.REG_ESP:
--- a/capa/features/file.py
+++ b/capa/features/file.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/features/freeze/init.py
+++ b/capa/features/freeze/init.py
@@ -1,7 +1,7 @@
 """
 capa freeze file format: `| capa0000 | + zlib(utf-8(json(...)))`

-Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+Copyright (C) 2022 Mandiant, Inc. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -9,6 +9,7 @@ Unless required by applicable law or agreed to in writing, software distributed
 is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and limitations under the License.
 """
+
 import json
 import zlib
 import logging
@@ -381,7 +382,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str:
                    address=Address.from_capa(addr),
                    feature=feature_from_capa(feature),
                )  # type: ignore
-                # Mypy is unable to recognise `basic_block` as a argument due to alias
+                # Mypy is unable to recognise `basic_block` as an argument due to alias
                for feature, addr in extractor.extract_basic_block_features(f, bb)
            ]

@@ -418,7 +419,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str:
                features=tuple(ffeatures),
                basic_blocks=basic_blocks,
            )  # type: ignore
-            # Mypy is unable to recognise `basic_blocks` as a argument due to alias
+            # Mypy is unable to recognise `basic_blocks` as an argument due to alias
        )

    features = StaticFeatures(
@@ -426,7 +427,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str:
        file=tuple(file_features),
        functions=tuple(function_features),
    )  # type: ignore
-    # Mypy is unable to recognise `global_` as a argument due to alias
+    # Mypy is unable to recognise `global_` as an argument due to alias

    freeze = Freeze(
        version=CURRENT_VERSION,
@@ -436,7 +437,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str:
        extractor=Extractor(name=extractor.__class__.__name__),
        features=features,
    )  # type: ignore
-    # Mypy is unable to recognise `base_address` as a argument due to alias
+    # Mypy is unable to recognise `base_address` as an argument due to alias

    return freeze.model_dump_json()

@@ -484,7 +485,7 @@ def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str:
                    address=Address.from_capa(addr),
                    feature=feature_from_capa(feature),
                )  # type: ignore
-                # Mypy is unable to recognise `basic_block` as a argument due to alias
+                # Mypy is unable to recognise `basic_block` as an argument due to alias
                for feature, addr in extractor.extract_thread_features(p, t)
            ]

@@ -531,7 +532,7 @@ def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str:
        file=tuple(file_features),
        processes=tuple(process_features),
    )  # type: ignore
-    # Mypy is unable to recognise `global_` as a argument due to alias
+    # Mypy is unable to recognise `global_` as an argument due to alias

    # workaround around mypy issue: https://github.com/python/mypy/issues/1424
    get_base_addr = getattr(extractor, "get_base_addr", None)
@@ -545,7 +546,7 @@ def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str:
        extractor=Extractor(name=extractor.__class__.__name__),
        features=features,
    )  # type: ignore
-    # Mypy is unable to recognise `base_address` as a argument due to alias
+    # Mypy is unable to recognise `base_address` as an argument due to alias

    return freeze.model_dump_json()

@@ -681,14 +682,18 @@ def main(argv=None):
        argv = sys.argv[1:]

    parser = argparse.ArgumentParser(description="save capa features to a file")
-    capa.main.install_common_args(parser, {"sample", "format", "backend", "os", "signatures"})
+    capa.main.install_common_args(parser, {"input_file", "format", "backend", "os", "signatures"})
    parser.add_argument("output", type=str, help="Path to output file")
    args = parser.parse_args(args=argv)
-    capa.main.handle_common_args(args)

-    sigpaths = capa.main.get_signatures(args.signatures)
-
-    extractor = capa.main.get_extractor(args.sample, args.format, args.os, args.backend, sigpaths, False)
+    try:
+        capa.main.handle_common_args(args)
+        capa.main.ensure_input_exists_from_cli(args)
+        input_format = capa.main.get_input_format_from_cli(args)
+        backend = capa.main.get_backend_from_cli(args, input_format)
+        extractor = capa.main.get_extractor_from_cli(args, input_format, backend)
+    except capa.main.ShouldExitError as e:
+        return e.status_code

    Path(args.output).write_bytes(dump(extractor))

--- a/capa/features/freeze/features.py
+++ b/capa/features/freeze/features.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2022 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -132,7 +132,7 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature":
    elif isinstance(f, capa.features.file.Import):
        assert isinstance(f.value, str)
        return ImportFeature(import_=f.value, description=f.description)  # type: ignore
-        # Mypy is unable to recognise `import_` as a argument due to alias
+        # Mypy is unable to recognise `import_` as an argument due to alias

    elif isinstance(f, capa.features.file.Section):
        assert isinstance(f.value, str)
@@ -141,7 +141,7 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature":
    elif isinstance(f, capa.features.file.FunctionName):
        assert isinstance(f.value, str)
        return FunctionNameFeature(function_name=f.value, description=f.description)  # type: ignore
-        # Mypy is unable to recognise `function_name` as a argument due to alias
+        # Mypy is unable to recognise `function_name` as an argument due to alias

    # must come before check for String due to inheritance
    elif isinstance(f, capa.features.common.Substring):
@@ -160,7 +160,7 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature":
    elif isinstance(f, capa.features.common.Class):
        assert isinstance(f.value, str)
        return ClassFeature(class_=f.value, description=f.description)  # type: ignore
-        # Mypy is unable to recognise `class_` as a argument due to alias
+        # Mypy is unable to recognise `class_` as an argument due to alias

    elif isinstance(f, capa.features.common.Namespace):
        assert isinstance(f.value, str)
@@ -197,12 +197,12 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature":
    elif isinstance(f, capa.features.insn.OperandNumber):
        assert isinstance(f.value, int)
        return OperandNumberFeature(index=f.index, operand_number=f.value, description=f.description)  # type: ignore
-        # Mypy is unable to recognise `operand_number` as a argument due to alias
+        # Mypy is unable to recognise `operand_number` as an argument due to alias

    elif isinstance(f, capa.features.insn.OperandOffset):
        assert isinstance(f.value, int)
        return OperandOffsetFeature(index=f.index, operand_offset=f.value, description=f.description)  # type: ignore
-        # Mypy is unable to recognise `operand_offset` as a argument due to alias
+        # Mypy is unable to recognise `operand_offset` as an argument due to alias

    else:
        raise NotImplementedError(f"feature_from_capa({type(f)}) not implemented")
--- a/capa/features/insn.py
+++ b/capa/features/insn.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/ghidra/README.md
+++ b/capa/ghidra/README.md
@@ -1,172 +1,107 @@
 <div align="center">
-    <img src="/doc/img/ghidra_backend_logo.png" width=300 height=175>
+    <img src="../../doc/img/ghidra_backend_logo.png" width=240 height=125>
 </div>

-The Ghidra feature extractor is an application of the FLARE team's open-source project, Ghidrathon, to integrate capa with Ghidra using Python 3. capa is a framework that uses a well-defined collection of rules to identify capabilities in a program. You can run capa against a PE file, ELF file, or shellcode and it tells you what it thinks the program can do. For example, it might suggest that the program is a backdoor, can install services, or relies on HTTP to communicate. The Ghidra feature extractor can be used to run capa analysis on your Ghidra databases without needing access to the original binary file.
+# capa + Ghidra

-<img src="/doc/img/ghidra_script_mngr_output.png">
+[capa](https://github.com/mandiant/capa) is the FLARE team’s open-source tool that detects capabilities in executable files. [Ghidra](https://github.com/NationalSecurityAgency/ghidra) is an open-source software reverse engineering framework created and maintained by the National Security Agency Research Directorate. capa + Ghidra brings capa’s detection capabilities directly to Ghidra’s user interface helping speed up your reverse engineering tasks by identifying what parts of a program suggest interesting behavior, such as setting a registry value. You can execute the included Python 3 scripts [capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_explorer.py) or [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) to run capa’s analysis and view the results in Ghidra. You may be asking yourself, “Python 3 scripts in Ghidra?”. You read that correctly. This integration is written entirely in Python 3 and relies on [Ghidrathon]( https://github.com/mandiant/ghidrathon), an open source Ghidra extension that adds Python 3 scripting to Ghidra.

-## Getting Started
+Check out our capa + Ghidra blog posts:
+* [Riding Dragons: capa Harnesses Ghidra](https://www.mandiant.com/resources/blog/capa-harnesses-ghidra)

-### Installation
+## UI Integration
+[capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_explorer.py) renders capa results in Ghidra's UI to help you quickly navigate them. This includes adding matched functions to Ghidra’s Symbol Tree and Bookmarks windows and adding comments to functions that indicate matched capabilities and features. You can execute this script using Ghidra’s Script Manager window.

-Please ensure that you have the following dependencies installed before continuing:
+### Symbol Tree Window
+Matched functions are added to Ghidra's Symbol Tree window under a custom namespace that maps to the capabilities' [capa namespace](https://github.com/mandiant/capa-rules/blob/master/doc/format.md#rule-namespace).
+<div align="center">
+    <img src="https://github.com/mandiant/capa/assets/66766340/eeae33f4-99d4-42dc-a5e8-4c1b8c661492" width=300>
+</div>

-| Dependency | Version | Source |
+### Comments
+
+Comments are added at the beginning of matched functions indicating matched capabilities and inline comments are added to functions indicating matched features. You can view these comments in Ghidra’s Disassembly Listing and Decompile windows.
+<div align="center">
+    <img src="https://github.com/mandiant/capa/assets/66766340/bb2b4170-7fd4-45fc-8c7b-ff8f2e2f101b" width=1000>
+</div>
+
+### Bookmarks
+
+Bookmarks are added to functions that matched a capability that is mapped to a MITRE ATT&CK and/or Malware Behavior Catalog (MBC) technique. You can view these bookmarks in Ghidra's Bookmarks window.
+<div align="center">
+    <img src="https://github.com/mandiant/capa/assets/66766340/7f9a66a9-7be7-4223-91c6-4b8fc4651336" width=825>
+</div>
+
+## Text-based Integration
+
+[capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) outputs text-based capa results that mirror the output of capa’s standalone tool. You can execute this script using Ghidra’s Script Manager and view its output in Ghidra’s Console window.
+
+<div align="center">
+  <img src="../../doc/img/ghidra_script_mngr_output.png" width=700>
+</div>
+
+You can also execute [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) using Ghidra's Headless Analyzer to view its output in a terminal window.
+
+<div align="center">
+  <img src="../../doc/img/ghidra_headless_analyzer.png">
+</div>
+
+# Getting Started
+
+## Requirements
+
+| Tool | Version | Source |
 |------------|---------|--------|
-| Ghidrathon | `>= 3.0.0` | https://github.com/mandiant/Ghidrathon |
-| Python | `>= 3.8` | https://www.python.org/downloads |
-| Ghidra | `>= 10.2` | https://ghidra-sre.org |
+| capa | `>= 7.0.0` | https://github.com/mandiant/capa/releases |
+| Ghidrathon | `>= 3.0.0` | https://github.com/mandiant/Ghidrathon/releases |
+| Ghidra | `>= 10.3.2` | https://github.com/NationalSecurityAgency/ghidra/releases |
+| Python | `>= 3.8.0` | https://www.python.org/downloads |

-In order to run capa using using Ghidra, you must install capa as a library, obtain the official capa rules that match the capa version you have installed, and configure the Python 3 script [capa_ghidra.py](/capa/ghidra/capa_ghidra.py). You can do this by completing the following steps using the Python 3 interpreter that you have configured for your Ghidrathon installation:
+## Installation
+
+**Note**: capa + Ghidra relies on [Ghidrathon]( https://github.com/mandiant/ghidrathon) to execute Python 3 code in Ghidra. You must first install and configure Ghidrathon using the [steps outlined in its README]( https://github.com/mandiant/ghidrathon?tab=readme-ov-file#installing-ghidrathon). Then, you must use the Python 3 interpreter that you configured with Ghidrathon to complete the following steps:

 1. Install capa and its dependencies from PyPI using the following command:
 ```bash
 $ pip install flare-capa
 ```

-2. Download and extract the [official capa rules](https://github.com/mandiant/capa-rules/releases) that match the capa version you have installed. Use the following command to view the version of capa you have installed:
+2. Download and extract the [official capa rules](https://github.com/mandiant/capa-rules/releases) that match the capa version you have installed. You can use the following command to view the version of capa you have installed:
 ```bash
 $ pip show flare-capa
 OR
 $ capa --version
 ```

-3. Copy [capa_ghidra.py](/capa/ghidra/capa_ghidra.py) to your `$USER_HOME/ghidra_scripts` directory or manually add `</path/to/ghidra_capa.py/>` to the Ghidra Script Manager.
+3. Copy [capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_explorer.py) and [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) to your `ghidra_scripts` directory or manually add the parent directory of each script using Ghidra’s Script Manager.

 ## Usage

-After completing the installation steps you can execute `capa_ghidra.py` using the Ghidra Script Manager or Headless Analyzer.
+You can execute [capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_explorer.py) and [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) using Ghidra’s Script Manager. [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) can also be executed using Ghidra's Headless Analyzer.

-### Ghidra Script Manager
+### Execution using Ghidra’s Script Manager

-To execute `capa_ghidra.py` using the Ghidra Script Manager, first open the Ghidra Script Manager by navigating to `Window > Script Manager` in the Ghidra Code Browser. Next, locate `capa_ghidra.py` by selecting the `Python 3 > capa` category or using the Ghidra Script Manager search funtionality. Finally, double-click `capa_ghidra.py` to execute the script. If you don't see `capa_ghidra.py`, make sure you have copied the script to your `$USER_HOME/ghidra_scripts` directory or manually added `</path/to/ghidra_capa.py/>` to the Ghidra Script Manager
+You can execute [capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_explorer.py) and [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) using Ghidra's Script Manager as follows:
+1. Navigate to `Window > Script Manager`
+2. Expand the `Python 3 > capa` category
+3. Double-click a script to execute it

-When executed, `capa_ghidra.py` asks you to provide your capa rules directory and preferred output format. `capa_ghidra.py` supports `default`, `verbose`, and `vverbose` output formats when executed from the Ghidra Script Manager. `capa_ghidra.py` writes output to the Ghidra Console Window.
+Both scripts ask you to provide the path of your capa rules directory (see installation step 2). [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) also has you choose one of `default`, `verbose`, and `vverbose` output formats which mirror the output formats of capa’s standalone tool.

-#### Example
+### Execution using Ghidra’s Headless Analyzer

-The following is an example of running `capa_ghidra.py` using the Ghidra Script Manager:
+You can execute [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) using Ghidra’s Headless Analyzer by invoking the `analyzeHeadless` script included with Ghidra in its `support` directory. The following arguments must be provided:

-Selecting capa rules:
-<img src="/doc/img/ghidra_script_mngr_rules.png">
-
-Choosing output format:
-<img src="/doc/img/ghidra_script_mngr_verbosity.png">
-
-Viewing results in Ghidra Console Window:
-<img src="/doc/img/ghidra_script_mngr_output.png">
-
-### Ghidra Headless Analyzer
-
-To execute `capa_ghidra.py` using the Ghidra Headless Analyzer, you can use the Ghidra `analyzeHeadless` script located in your `$GHIDRA_HOME/support` directory. You will need to provide the following arguments to the Ghidra `analyzeHeadless` script:
-
-1. `</path/to/ghidra/project/>`: path to Ghidra project
-2. `<ghidra_project_name>`: name of Ghidra Project
-3. `-process <sample_name>`: name of sample `<sample_name>`
-4. `-ScriptPath </path/to/capa_ghidra/>`: OPTIONAL argument specifying path `</path/to/capa_ghidra/>` to `capa_ghidra.py`
-5. `-PostScript capa_ghidra.py`: executes `capa_ghidra.py` as post-analysis script
-6. `"<capa_args>"`: single, quoted string containing capa arguments that must specify capa rules directory and output format, e.g. `"<path/to/capa/rules> --verbose"`. `capa_ghidra.py` supports `default`, `verbose`, `vverbose` and `json` formats when executed using the Ghidra Headless Analyzer. `capa_ghidra.py` writes output to the console window used to execute the Ghidra `analyzeHeadless` script.
-7. `-processor <languageID>`: required ONLY if sample `<sample_name>` is shellcode. More information on specifying the `<languageID>` can be found in the `$GHIDRA_HOME/support/analyzeHeadlessREADME.html` documentation.
+| Argument | Description |
+|----|----|
+|`<project_path>`| Path to Ghidra project|
+| `<project_name>`| Name of Ghidra Project|
+| `-Process <sample_name>` OR `-Import <sample_path>`| Name of sample `<sample_name>` already imported into `<project_name>` OR absolute path of sample `<sample_path>` to import into `<project_name>`|
+| `-ScriptPath <script_path>`| OPTIONAL parent directory `<script_path>` of [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py)|
+| `-PostScript capa_ghidra.py`| Execute [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) after Ghidra analysis|
+| `"<script_args>"`| Quoted string `"<script_args>"` containing script arguments passed to [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) that must specify a capa rules path and optionally the output format (`--verbose`, `--vverbose`, `--json`) – you can specify `”help”` to view the script’s help message |

 The following is an example of combining these arguments into a single `analyzeHeadless` script command:
-
-```
-$GHIDRA_HOME/support/analyzeHeadless </path/to/ghidra/project/> <ghidra_project_name> -process <sample_name> -PostScript capa_ghidra.py "/path/to/capa/rules/ --verbose"
-```
-
-You may also want to run capa against a sample that you have not yet imported into your Ghidra project. The following is an example of importing a sample and running `capa_ghidra.py` using a single `analyzeHeadless` script command:
-
-```
-$GHIDRA_HOME/support/analyzeHeadless </path/to/ghidra/project/> <ghidra_project_name> -Import </path/to/sample> -PostScript capa_ghidra.py "/path/to/capa/rules/ --verbose"
-```
-
-You can also provide `capa_ghidra.py` the single argument `"help"` to view supported arguments when running the script using the Ghidra Headless Analyzer:
-```
-$GHIDRA_HOME/support/analyzeHeadless </path/to/ghidra/project/> <ghidra_project_name> -process <sample_name> -PostScript capa_ghidra.py "help"
-```
-
-#### Example
-
-The following is an example of running `capa_ghidra.py` against a shellcode sample using the Ghidra `analyzeHeadless` script:
-```
-$ analyzeHeadless /home/wumbo/Desktop/ghidra_projects/ capa_test -process 499c2a85f6e8142c3f48d4251c9c7cd6.raw32 -processor x86:LE:32:default -PostScript capa_ghidra.py "/home/wumbo/capa/rules -vv"
-[...]
-
-INFO  REPORT: Analysis succeeded for file: /499c2a85f6e8142c3f48d4251c9c7cd6.raw32 (HeadlessAnalyzer)  
-INFO  SCRIPT: /home/wumbo/ghidra_scripts/capa_ghidra.py (HeadlessAnalyzer)  
-md5                     499c2a85f6e8142c3f48d4251c9c7cd6                                                                                                                                                                                                    
-sha1
-sha256                  e8e02191c1b38c808d27a899ac164b3675eb5cadd3a8907b0ffa863714000e72
-path                    /home/wumbo/capa/tests/data/499c2a85f6e8142c3f48d4251c9c7cd6.raw32
-timestamp               2023-08-29 17:57:00.946588
-capa version            6.1.0
-os                      unknown os
-format                  Raw Binary
-arch                    x86
-extractor               ghidra
-base address            global
-rules                   /home/wumbo/capa/rules
-function count          42
-library function count  0
-total feature count     1970
-
-contain loop (24 matches, only showing first match of library rule)
-author  moritz.raabe@mandiant.com
-scope   function
-function @ 0x0
-  or:
-    characteristic: loop @ 0x0
-    characteristic: tight loop @ 0x278
-
-contain obfuscated stackstrings
-namespace  anti-analysis/obfuscation/string/stackstring
-author     moritz.raabe@mandiant.com
-scope      basic block
-att&ck     Defense Evasion::Obfuscated Files or Information::Indicator Removal from Tools [T1027.005]
-mbc        Anti-Static Analysis::Executable Code Obfuscation::Argument Obfuscation [B0032.020], Anti-Static Analysis::Executable Code Obfuscation::Stack Strings [B0032.017]
-basic block @ 0x0 in function 0x0
-  characteristic: stack string @ 0x0
-
-encode data using XOR
-namespace  data-manipulation/encoding/xor
-author     moritz.raabe@mandiant.com
-scope      basic block
-att&ck     Defense Evasion::Obfuscated Files or Information [T1027]
-mbc        Defense Evasion::Obfuscated Files or Information::Encoding-Standard Algorithm [E1027.m02], Data::Encode Data::XOR [C0026.002]
-basic block @ 0x8AF in function 0x8A1
-  and:
-    characteristic: tight loop @ 0x8AF
-    characteristic: nzxor @ 0x8C0
-    not: = filter for potential false positives
-      or:
-        or: = unsigned bitwise negation operation (~i)
-          number: 0xFFFFFFFF = bitwise negation for unsigned 32 bits
-          number: 0xFFFFFFFFFFFFFFFF = bitwise negation for unsigned 64 bits
-        or: = signed bitwise negation operation (~i)
-          number: 0xFFFFFFF = bitwise negation for signed 32 bits
-          number: 0xFFFFFFFFFFFFFFF = bitwise negation for signed 64 bits
-        or: = Magic constants used in the implementation of strings functions.
-          number: 0x7EFEFEFF = optimized string constant for 32 bits
-          number: 0x81010101 = -0x81010101 = 0x7EFEFEFF
-          number: 0x81010100 = 0x81010100 = ~0x7EFEFEFF
-          number: 0x7EFEFEFEFEFEFEFF = optimized string constant for 64 bits
-          number: 0x8101010101010101 = -0x8101010101010101 = 0x7EFEFEFEFEFEFEFF
-          number: 0x8101010101010100 = 0x8101010101010100 = ~0x7EFEFEFEFEFEFEFF
-
-get OS information via KUSER_SHARED_DATA
-namespace   host-interaction/os/version
-author      @mr-tz
-scope       function
-att&ck      Discovery::System Information Discovery [T1082]
-references  https://www.geoffchappell.com/studies/windows/km/ntoskrnl/inc/api/ntexapi_x/kuser_shared_data/index.htm
-function @ 0x1CA6
-  or:
-    number: 0x7FFE026C = NtMajorVersion @ 0x1D18
-
-
-
-Script /home/wumbo/ghidra_scripts/capa_ghidra.py called exit with code 0
-
-[...]
+```bash
+$ analyzeHeadless /home/wumbo/demo demo -Import /home/wumbo/capa/tests/data/Practical\ Malware\ Analysis\ Lab\ 01-01.dll_ -PostScript capa_ghidra.py "/home/wumbo/capa/rules --verbose"
 ```
--- a/capa/ghidra/capa_explorer.py
+++ b/capa/ghidra/capa_explorer.py
@@ -0,0 +1,378 @@
+# Run capa against loaded Ghidra database and render results in Ghidra UI
+# @author Colton Gabertan (gabertan.colton@gmail.com)
+# @category Python 3.capa
+
+# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+import sys
+import json
+import logging
+import pathlib
+from typing import Any, Dict, List
+
+from ghidra.app.cmd.label import AddLabelCmd, CreateNamespacesCmd
+from ghidra.program.model.symbol import Namespace, SourceType, SymbolType
+
+import capa
+import capa.main
+import capa.rules
+import capa.render.json
+import capa.ghidra.helpers
+import capa.capabilities.common
+import capa.features.extractors.ghidra.extractor
+
+logger = logging.getLogger("capa_explorer")
+
+
+def add_bookmark(addr, txt, category="CapaExplorer"):
+    """create bookmark at addr"""
+    currentProgram().getBookmarkManager().setBookmark(addr, "Info", category, txt)  # type: ignore [name-defined] # noqa: F821
+
+
+def create_namespace(namespace_str):
+    """create new Ghidra namespace for each capa namespace"""
+
+    cmd = CreateNamespacesCmd(namespace_str, SourceType.USER_DEFINED)
+    cmd.applyTo(currentProgram())  # type: ignore [name-defined] # noqa: F821
+    return cmd.getNamespace()
+
+
+def create_label(ghidra_addr, name, capa_namespace):
+    """custom label cmd to overlay symbols under capa-generated namespaces"""
+
+    # prevent duplicate labels under the same capa-generated namespace
+    symbol_table = currentProgram().getSymbolTable()  # type: ignore [name-defined] # noqa: F821
+    for sym in symbol_table.getSymbols(ghidra_addr):
+        if sym.getName(True) == capa_namespace.getName(True) + Namespace.DELIMITER + name:
+            return
+
+    # create SymbolType.LABEL at addr
+    # prioritize capa-generated namespace (duplicate match @ new addr), else put under global Ghidra one (new match)
+    cmd = AddLabelCmd(ghidra_addr, name, True, SourceType.USER_DEFINED)
+    cmd.applyTo(currentProgram())  # type: ignore [name-defined] # noqa: F821
+
+    # assign new match overlay label to capa-generated namespace
+    cmd.getSymbol().setNamespace(capa_namespace)
+    return
+
+
+class CapaMatchData:
+    def __init__(
+        self,
+        namespace,
+        scope,
+        capability,
+        matches,
+        attack: List[Dict[Any, Any]],
+        mbc: List[Dict[Any, Any]],
+    ):
+        self.namespace = namespace
+        self.scope = scope
+        self.capability = capability
+        self.matches = matches
+        self.attack = attack
+        self.mbc = mbc
+
+    def bookmark_functions(self):
+        """create bookmarks for MITRE ATT&CK & MBC mappings"""
+
+        if self.attack == [] and self.mbc == []:
+            return
+
+        for key in self.matches.keys():
+            addr = toAddr(hex(key))  # type: ignore [name-defined] # noqa: F821
+            func = getFunctionContaining(addr)  # type: ignore [name-defined] # noqa: F821
+
+            # bookmark & tag MITRE ATT&CK tactics & MBC @ function scope
+            if func is not None:
+                func_addr = func.getEntryPoint()
+
+                if self.attack != []:
+                    for item in self.attack:
+                        attack_txt = ""
+                        for part in item.get("parts", {}):
+                            attack_txt = attack_txt + part + Namespace.DELIMITER
+                        attack_txt = attack_txt + item.get("id", {})
+                        add_bookmark(func_addr, attack_txt, "CapaExplorer::MITRE ATT&CK")
+
+                if self.mbc != []:
+                    for item in self.mbc:
+                        mbc_txt = ""
+                        for part in item.get("parts", {}):
+                            mbc_txt = mbc_txt + part + Namespace.DELIMITER
+                        mbc_txt = mbc_txt + item.get("id", {})
+                        add_bookmark(func_addr, mbc_txt, "CapaExplorer::MBC")
+
+    def set_plate_comment(self, ghidra_addr):
+        """set plate comments at matched functions"""
+        comment = getPlateComment(ghidra_addr)  # type: ignore [name-defined] # noqa: F821
+        rule_path = self.namespace.replace(Namespace.DELIMITER, "/")
+        # 2 calls to avoid duplicate comments via subsequent script runs
+        if comment is None:
+            # first comment @ function
+            comment = rule_path + "\n"
+            setPlateComment(ghidra_addr, comment)  # type: ignore [name-defined] # noqa: F821
+        elif rule_path not in comment:
+            comment = comment + rule_path + "\n"
+            setPlateComment(ghidra_addr, comment)  # type: ignore [name-defined] # noqa: F821
+        else:
+            return
+
+    def set_pre_comment(self, ghidra_addr, sub_type, description):
+        """set pre comments at subscoped matches of main rules"""
+        comment = getPreComment(ghidra_addr)  # type: ignore [name-defined] # noqa: F821
+        if comment is None:
+            comment = "capa: " + sub_type + "(" + description + ")" + ' matched in "' + self.capability + '"\n'
+            setPreComment(ghidra_addr, comment)  # type: ignore [name-defined] # noqa: F821
+        elif self.capability not in comment:
+            comment = (
+                comment + "capa: " + sub_type + "(" + description + ")" + ' matched in "' + self.capability + '"\n'
+            )
+            setPreComment(ghidra_addr, comment)  # type: ignore [name-defined] # noqa: F821
+        else:
+            return
+
+    def label_matches(self):
+        """label findings at function scopes and comment on subscope matches"""
+        capa_namespace = create_namespace(self.namespace)
+        symbol_table = currentProgram().getSymbolTable()  # type: ignore [name-defined] # noqa: F821
+
+        # handle function main scope of matched rule
+        # these will typically contain further matches within
+        if self.scope == "function":
+            for addr in self.matches.keys():
+                ghidra_addr = toAddr(hex(addr))  # type: ignore [name-defined] # noqa: F821
+
+                # classify new function label under capa-generated namespace
+                sym = symbol_table.getPrimarySymbol(ghidra_addr)
+                if sym is not None:
+                    if sym.getSymbolType() == SymbolType.FUNCTION:
+                        create_label(ghidra_addr, sym.getName(), capa_namespace)
+                        self.set_plate_comment(ghidra_addr)
+
+                    # parse the corresponding nodes, and pre-comment subscope matched features
+                    # under the encompassing function(s)
+                    for sub_match in self.matches.get(addr):
+                        for loc, node in sub_match.items():
+                            sub_ghidra_addr = toAddr(hex(loc))  # type: ignore [name-defined] # noqa: F821
+                            if sub_ghidra_addr == ghidra_addr:
+                                # skip duplicates
+                                continue
+
+                            # precomment subscope matches under the function
+                            if node != {}:
+                                for sub_type, description in parse_node(node):
+                                    self.set_pre_comment(sub_ghidra_addr, sub_type, description)
+        else:
+            # resolve the encompassing function for the capa namespace
+            # of non-function scoped main matches
+            for addr in self.matches.keys():
+                ghidra_addr = toAddr(hex(addr))  # type: ignore [name-defined] # noqa: F821
+
+                # basic block / insn scoped main matches
+                # Ex. See "Create Process on Windows" Rule
+                func = getFunctionContaining(ghidra_addr)  # type: ignore [name-defined] # noqa: F821
+                if func is not None:
+                    func_addr = func.getEntryPoint()
+                    create_label(func_addr, func.getName(), capa_namespace)
+                    self.set_plate_comment(func_addr)
+
+                # create subscope match precomments
+                for sub_match in self.matches.get(addr):
+                    for loc, node in sub_match.items():
+                        sub_ghidra_addr = toAddr(hex(loc))  # type: ignore [name-defined] # noqa: F821
+
+                        if node != {}:
+                            if func is not None:
+                                # basic block/ insn scope under resolved function
+                                for sub_type, description in parse_node(node):
+                                    self.set_pre_comment(sub_ghidra_addr, sub_type, description)
+                            else:
+                                # this would be a global/file scoped main match
+                                # try to resolve the encompassing function via the subscope match, instead
+                                # Ex. "run as service" rule
+                                sub_func = getFunctionContaining(sub_ghidra_addr)  # type: ignore [name-defined] # noqa: F821
+                                if sub_func is not None:
+                                    sub_func_addr = sub_func.getEntryPoint()
+                                    # place function in capa namespace & create the subscope match label in Ghidra's global namespace
+                                    create_label(sub_func_addr, sub_func.getName(), capa_namespace)
+                                    self.set_plate_comment(sub_func_addr)
+                                    for sub_type, description in parse_node(node):
+                                        self.set_pre_comment(sub_ghidra_addr, sub_type, description)
+                                else:
+                                    # addr is in some other file section like .data
+                                    # represent this location with a label symbol under the capa namespace
+                                    # Ex. See "Reference Base64 String" rule
+                                    for sub_type, description in parse_node(node):
+                                        # in many cases, these will be ghidra-labeled data, so just add the existing
+                                        # label symbol to the capa namespace
+                                        for sym in symbol_table.getSymbols(sub_ghidra_addr):
+                                            if sym.getSymbolType() == SymbolType.LABEL:
+                                                sym.setNamespace(capa_namespace)
+                                        self.set_pre_comment(sub_ghidra_addr, sub_type, description)
+
+
+def get_capabilities():
+    rules_dir: str = ""
+    try:
+        selected_dir = askDirectory("Choose capa rules directory", "Ok")  # type: ignore [name-defined] # noqa: F821
+        if selected_dir:
+            rules_dir = selected_dir.getPath()
+    except RuntimeError:
+        # RuntimeError thrown when user selects "Cancel"
+        pass
+
+    if not rules_dir:
+        logger.info("You must choose a capa rules directory before running capa.")
+        return ""  # return empty str to avoid handling both int and str types
+
+    rules_path: pathlib.Path = pathlib.Path(rules_dir)
+    logger.info("running capa using rules from %s", str(rules_path))
+
+    rules = capa.rules.get_rules([rules_path])
+    meta = capa.ghidra.helpers.collect_metadata([rules_path])
+    extractor = capa.features.extractors.ghidra.extractor.GhidraFeatureExtractor()
+
+    capabilities, counts = capa.capabilities.common.find_capabilities(rules, extractor, True)
+
+    if capa.capabilities.common.has_file_limitation(rules, capabilities, is_standalone=False):
+        popup("capa explorer encountered warnings during analysis. Please check the console output for more information.")  # type: ignore [name-defined] # noqa: F821
+        logger.info("capa encountered warnings during analysis")
+
+    return capa.render.json.render(meta, rules, capabilities)
+
+
+def get_locations(match_dict):
+    """recursively collect match addresses and associated nodes"""
+
+    for loc in match_dict.get("locations", {}):
+        # either an rva (absolute)
+        # or an offset into a file (file)
+        if loc.get("type", "") in ("absolute", "file"):
+            yield loc.get("value"), match_dict.get("node")
+
+    for child in match_dict.get("children", {}):
+        yield from get_locations(child)
+
+
+def parse_node(node_data):
+    """pull match descriptions and sub features by parsing node dicts"""
+
+    node = node_data.get(node_data.get("type"))
+
+    if "description" in node:
+        yield "description", node.get("description")
+
+    data = node.get(node.get("type"))
+    if isinstance(data, (str, int)):
+        feat_type = node.get("type")
+        if isinstance(data, int):
+            data = hex(data)
+        yield feat_type, data
+
+
+def parse_json(capa_data):
+    """Parse json produced by capa"""
+
+    for rule, capability in capa_data.get("rules", {}).items():
+        # structure to contain rule match address & supporting feature data
+        # {rule match addr:[{feature addr:{node_data}}]}
+        rule_matches: Dict[Any, List[Any]] = {}
+        for i in range(len(capability.get("matches"))):
+            # grab rule match location
+            match_loc = capability.get("matches")[i][0].get("value")
+            if match_loc is None:
+                # Ex. See "Reference Base64 string"
+                # {'type':'no address'}
+                match_loc = i
+            rule_matches[match_loc] = []
+
+            # grab extracted feature locations & corresponding node data
+            # feature[0]: location
+            # feature[1]: node
+            features = capability.get("matches")[i][1]
+            feat_dict = {}
+            for feature in get_locations(features):
+                feat_dict[feature[0]] = feature[1]
+                rule_matches[match_loc].append(feat_dict)
+
+        # dict data of currently matched rule
+        meta = capability["meta"]
+
+        # get MITRE ATT&CK and MBC
+        attack = meta.get("attack")
+        if attack is None:
+            attack = []
+        mbc = meta.get("mbc")
+        if mbc is None:
+            mbc = []
+
+        # scope match for the rule
+        scope = meta["scopes"].get("static")
+
+        fmt_rule = Namespace.DELIMITER + rule.replace(" ", "-")
+        if "namespace" in meta:
+            # split into list to help define child namespaces
+            # this requires the correct delimiter used by Ghidra
+            # Ex. 'communication/named-pipe/create/create pipe' -> capa::communication::named-pipe::create::create-pipe
+            namespace_str = Namespace.DELIMITER.join(meta["namespace"].split("/"))
+            namespace = "capa" + Namespace.DELIMITER + namespace_str + fmt_rule
+        else:
+            # lib rules via the official rules repo will not contain data
+            # for the "namespaces" key, so format using rule itself
+            # Ex. 'contain loop' -> capa::lib::contain-loop
+            namespace = "capa" + Namespace.DELIMITER + "lib" + fmt_rule
+
+        yield CapaMatchData(namespace, scope, rule, rule_matches, attack, mbc)
+
+
+def main():
+    logging.basicConfig(level=logging.INFO)
+    logging.getLogger().setLevel(logging.INFO)
+
+    if isRunningHeadless():  # type: ignore [name-defined] # noqa: F821
+        logger.error("unsupported Ghidra execution mode")
+        return capa.main.E_UNSUPPORTED_GHIDRA_EXECUTION_MODE
+
+    if not capa.ghidra.helpers.is_supported_ghidra_version():
+        logger.error("unsupported Ghidra version")
+        return capa.main.E_UNSUPPORTED_GHIDRA_VERSION
+
+    if not capa.ghidra.helpers.is_supported_file_type():
+        logger.error("unsupported file type")
+        return capa.main.E_INVALID_FILE_TYPE
+
+    if not capa.ghidra.helpers.is_supported_arch_type():
+        logger.error("unsupported file architecture")
+        return capa.main.E_INVALID_FILE_ARCH
+
+    # capa_data will always contain {'meta':..., 'rules':...}
+    # if the 'rules' key contains no values, then there were no matches
+    capa_data = json.loads(get_capabilities())
+    if capa_data.get("rules") is None:
+        logger.info("capa explorer found no matches")
+        popup("capa explorer found no matches.")  # type: ignore [name-defined] # noqa: F821
+        return capa.main.E_EMPTY_REPORT
+
+    for item in parse_json(capa_data):
+        item.bookmark_functions()
+        item.label_matches()
+    logger.info("capa explorer analysis complete")
+    popup("capa explorer analysis complete.\nPlease see results in the Bookmarks Window and Namespaces section of the Symbol Tree Window.")  # type: ignore [name-defined] # noqa: F821
+    return 0
+
+
+if __name__ == "__main__":
+    if sys.version_info < (3, 8):
+        from capa.exceptions import UnsupportedRuntimeError
+
+        raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.8+")
+    exit_code = main()
+    if exit_code != 0:
+        popup("capa explorer encountered errors during analysis. Please check the console output for more information.")  # type: ignore [name-defined] # noqa: F821
+    sys.exit(exit_code)
--- a/capa/ghidra/capa_ghidra.py
+++ b/capa/ghidra/capa_ghidra.py
@@ -1,4 +1,4 @@
-# Run capa against loaded Ghidra database
+# Run capa against loaded Ghidra database and render results in Ghidra Console window
 # @author Mike Hunhoff (mehunhoff@google.com)
 # @category Python 3.capa

@@ -69,7 +69,7 @@ def run_headless():
    rules_path = pathlib.Path(args.rules)

    logger.debug("rule path: %s", rules_path)
-    rules = capa.main.get_rules([rules_path])
+    rules = capa.rules.get_rules([rules_path])

    meta = capa.ghidra.helpers.collect_metadata([rules_path])
    extractor = capa.features.extractors.ghidra.extractor.GhidraFeatureExtractor()
@@ -78,7 +78,7 @@ def run_headless():

    meta.analysis.feature_counts = counts["feature_counts"]
    meta.analysis.library_functions = counts["library_functions"]
-    meta.analysis.layout = capa.main.compute_layout(rules, extractor, capabilities)
+    meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities)

    if capa.capabilities.common.has_file_limitation(rules, capabilities, is_standalone=True):
        logger.info("capa encountered warnings during analysis")
@@ -119,7 +119,7 @@ def run_ui():
    rules_path: pathlib.Path = pathlib.Path(rules_dir)
    logger.info("running capa using rules from %s", str(rules_path))

-    rules = capa.main.get_rules([rules_path])
+    rules = capa.rules.get_rules([rules_path])

    meta = capa.ghidra.helpers.collect_metadata([rules_path])
    extractor = capa.features.extractors.ghidra.extractor.GhidraFeatureExtractor()
@@ -128,7 +128,7 @@ def run_ui():

    meta.analysis.feature_counts = counts["feature_counts"]
    meta.analysis.library_functions = counts["library_functions"]
-    meta.analysis.layout = capa.main.compute_layout(rules, extractor, capabilities)
+    meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities)

    if capa.capabilities.common.has_file_limitation(rules, capabilities, is_standalone=False):
        logger.info("capa encountered warnings during analysis")
--- a/capa/helpers.py
+++ b/capa/helpers.py
@@ -1,10 +1,12 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
+import sys
+import gzip
 import json
 import inspect
 import logging
@@ -16,12 +18,22 @@ from pathlib import Path
 import tqdm

 from capa.exceptions import UnsupportedFormatError
-from capa.features.common import FORMAT_PE, FORMAT_CAPE, FORMAT_SC32, FORMAT_SC64, FORMAT_DOTNET, FORMAT_UNKNOWN, Format
+from capa.features.common import (
+    FORMAT_PE,
+    FORMAT_CAPE,
+    FORMAT_SC32,
+    FORMAT_SC64,
+    FORMAT_DOTNET,
+    FORMAT_FREEZE,
+    FORMAT_UNKNOWN,
+    Format,
+)

 EXTENSIONS_SHELLCODE_32 = ("sc32", "raw32")
 EXTENSIONS_SHELLCODE_64 = ("sc64", "raw64")
-EXTENSIONS_DYNAMIC = ("json", "json_")
+EXTENSIONS_DYNAMIC = ("json", "json_", "json.gz")
 EXTENSIONS_ELF = "elf_"
+EXTENSIONS_FREEZE = "frz"

 logger = logging.getLogger("capa")

@@ -59,9 +71,19 @@ def assert_never(value) -> NoReturn:
    assert False, f"Unhandled value: {value} ({type(value).__name__})"  # noqa: B011


-def get_format_from_report(sample: Path) -> str:
-    report = json.load(sample.open(encoding="utf-8"))
+def load_json_from_path(json_path: Path):
+    with gzip.open(json_path, "r") as compressed_report:
+        try:
+            report_json = compressed_report.read()
+        except gzip.BadGzipFile:
+            report = json.load(json_path.open(encoding="utf-8"))
+        else:
+            report = json.loads(report_json)
+    return report

+
+def get_format_from_report(sample: Path) -> str:
+    report = load_json_from_path(sample)
    if "CAPE" in report:
        return FORMAT_CAPE

@@ -81,6 +103,8 @@ def get_format_from_extension(sample: Path) -> str:
        format_ = FORMAT_SC64
    elif sample.name.endswith(EXTENSIONS_DYNAMIC):
        format_ = get_format_from_report(sample)
+    elif sample.name.endswith(EXTENSIONS_FREEZE):
+        format_ = FORMAT_FREEZE
    return format_


@@ -156,7 +180,7 @@ def log_unsupported_format_error():

 def log_unsupported_cape_report_error(error: str):
    logger.error("-" * 80)
-    logger.error("Input file is not a valid CAPE report: %s", error)
+    logger.error(" Input file is not a valid CAPE report: %s", error)
    logger.error(" ")
    logger.error(" capa currently only supports analyzing standard CAPE reports in JSON format.")
    logger.error(
@@ -201,3 +225,16 @@ def log_unsupported_runtime_error():
        " If you're seeing this message on the command line, please ensure you're running a supported Python version."
    )
    logger.error("-" * 80)
+
+
+def is_running_standalone() -> bool:
+    """
+    are we running from a PyInstaller'd executable?
+    if so, then we'll be able to access `sys._MEIPASS` for the packaged resources.
+    """
+    # typically we only expect capa.main to be packaged via PyInstaller.
+    # therefore, this *should* be in capa.main; however,
+    # the Binary Ninja extractor uses this to resolve the BN API code,
+    # so we keep this in a common area.
+    # generally, other library code should not use this function.
+    return hasattr(sys, "frozen") and hasattr(sys, "_MEIPASS")
--- a/capa/ida/helpers.py
+++ b/capa/ida/helpers.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2021 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/ida/plugin/init.py
+++ b/capa/ida/plugin/init.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -125,7 +125,7 @@ def install_icon():
        return False

    # resource leak here. need to call `ida_kernwin.free_custom_icon`?
-    # however, since we're not cycling this icon a lot, its probably ok.
+    # however, since we're not cycling this icon a lot, it's probably ok.
    # expect to leak exactly one icon per application load.
    icon = ida_kernwin.load_custom_icon(data=ICON)

--- a/capa/ida/plugin/capa_explorer.py
+++ b/capa/ida/plugin/capa_explorer.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/ida/plugin/form.py
+++ b/capa/ida/plugin/form.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -636,7 +636,7 @@ class CapaExplorerForm(idaapi.PluginForm):
                if ida_kernwin.user_cancelled():
                    raise UserCancelledError("user cancelled")

-            return capa.main.get_rules([rule_path], on_load_rule=on_load_rule)
+            return capa.rules.get_rules([rule_path], on_load_rule=on_load_rule)
        except UserCancelledError:
            logger.info("User cancelled analysis.")
            return None
@@ -775,7 +775,7 @@ class CapaExplorerForm(idaapi.PluginForm):

                    meta.analysis.feature_counts = counts["feature_counts"]
                    meta.analysis.library_functions = counts["library_functions"]
-                    meta.analysis.layout = capa.main.compute_layout(ruleset, self.feature_extractor, capabilities)
+                    meta.analysis.layout = capa.loader.compute_layout(ruleset, self.feature_extractor, capabilities)
                except UserCancelledError:
                    logger.info("User cancelled analysis.")
                    return False
@@ -932,9 +932,9 @@ class CapaExplorerForm(idaapi.PluginForm):
                    update_wait_box("verifying cached results")

                    try:
-                        results: Optional[
-                            capa.render.result_document.ResultDocument
-                        ] = capa.ida.helpers.load_and_verify_cached_results()
+                        results: Optional[capa.render.result_document.ResultDocument] = (
+                            capa.ida.helpers.load_and_verify_cached_results()
+                        )
                    except Exception as e:
                        capa.ida.helpers.inform_user_ida_ui("Failed to verify cached results, reanalyzing program")
                        logger.exception("Failed to verify cached results (error: %s)", e)
@@ -1073,9 +1073,7 @@ class CapaExplorerForm(idaapi.PluginForm):

            self.view_rulegen_features.load_features(all_file_features, all_function_features)

-            self.set_view_status_label(
-                f"capa rules: {settings.user[CAPA_SETTINGS_RULE_PATH]} ({settings.user[CAPA_SETTINGS_RULE_PATH]} rules)"
-            )
+            self.set_view_status_label(f"capa rules: {settings.user[CAPA_SETTINGS_RULE_PATH]}")
        except Exception as e:
            logger.exception("Failed to render views (error: %s)", e)
            return False
@@ -1324,10 +1322,17 @@ class CapaExplorerForm(idaapi.PluginForm):
            idaapi.info("No rule to save.")
            return

-        path = Path(self.ask_user_capa_rule_file())
-        if not path.exists():
+        rule_file_path = self.ask_user_capa_rule_file()
+        if not rule_file_path:
+            # dialog canceled
            return

+        path = Path(rule_file_path)
+        if not path.parent.exists():
+            logger.warning("Failed to save file: parent directory '%s' does not exist.", path.parent)
+            return
+
+        logger.info("Saving rule to %s.", path)
        write_file(path, s)

    def slot_checkbox_limit_by_changed(self, state):
--- a/capa/ida/plugin/hooks.py
+++ b/capa/ida/plugin/hooks.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/ida/plugin/icon.py
+++ b/capa/ida/plugin/icon.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/ida/plugin/item.py
+++ b/capa/ida/plugin/item.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/ida/plugin/model.py
+++ b/capa/ida/plugin/model.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/ida/plugin/proxy.py
+++ b/capa/ida/plugin/proxy.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/ida/plugin/view.py
+++ b/capa/ida/plugin/view.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -194,13 +194,17 @@ class CapaExplorerRulegenPreview(QtWidgets.QTextEdit):
            "    namespace: <insert_namespace>",
            "    authors:",
            f"      - {author}",
-            f"    scope: {scope}",
+            "    scopes:",
+            f"      static: {scope}",
+            "      dynamic: unsupported",
            "    references:",
            "      - <insert_references>",
            "    examples:",
-            f"      - {capa.ida.helpers.get_file_md5().upper()}:{hex(ea)}"
-            if ea
-            else f"      - {capa.ida.helpers.get_file_md5().upper()}",
+            (
+                f"      - {capa.ida.helpers.get_file_md5().upper()}:{hex(ea)}"
+                if ea
+                else f"      - {capa.ida.helpers.get_file_md5().upper()}"
+            ),
            "  features:",
        ]
        self.setText("\n".join(metadata_default))
@@ -760,7 +764,7 @@ class CapaExplorerRulegenEditor(QtWidgets.QTreeWidget):

            node = self.make_child_node_from_feature(parent, parse_yaml_line(line.strip()))

-            # append our new node in case its a parent for another node
+            # append our new node in case it's a parent for another node
            if node:
                stack.append(node)

--- a/capa/loader.py
+++ b/capa/loader.py
@@ -0,0 +1,570 @@
+# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+import sys
+import logging
+import datetime
+import contextlib
+from typing import Set, Dict, List, Optional
+from pathlib import Path
+
+from rich.console import Console
+from typing_extensions import assert_never
+
+import capa.perf
+import capa.rules
+import capa.engine
+import capa.helpers
+import capa.version
+import capa.render.json
+import capa.rules.cache
+import capa.render.default
+import capa.render.verbose
+import capa.features.common
+import capa.features.freeze as frz
+import capa.render.vverbose
+import capa.features.extractors
+import capa.render.result_document
+import capa.render.result_document as rdoc
+import capa.features.extractors.common
+import capa.features.extractors.base_extractor
+import capa.features.extractors.cape.extractor
+from capa.rules import RuleSet
+from capa.engine import MatchResults
+from capa.exceptions import UnsupportedOSError, UnsupportedArchError, UnsupportedFormatError
+from capa.features.common import (
+    OS_AUTO,
+    FORMAT_PE,
+    FORMAT_ELF,
+    FORMAT_AUTO,
+    FORMAT_CAPE,
+    FORMAT_SC32,
+    FORMAT_SC64,
+    FORMAT_DOTNET,
+)
+from capa.features.address import Address
+from capa.features.extractors.base_extractor import (
+    SampleHashes,
+    FeatureExtractor,
+    StaticFeatureExtractor,
+    DynamicFeatureExtractor,
+)
+
+logger = logging.getLogger(__name__)
+
+BACKEND_VIV = "vivisect"
+BACKEND_DOTNET = "dotnet"
+BACKEND_BINJA = "binja"
+BACKEND_PEFILE = "pefile"
+BACKEND_CAPE = "cape"
+BACKEND_FREEZE = "freeze"
+
+
+def is_supported_format(sample: Path) -> bool:
+    """
+    Return if this is a supported file based on magic header values
+    """
+    taste = sample.open("rb").read(0x100)
+
+    return len(list(capa.features.extractors.common.extract_format(taste))) == 1
+
+
+def is_supported_arch(sample: Path) -> bool:
+    buf = sample.read_bytes()
+
+    return len(list(capa.features.extractors.common.extract_arch(buf))) == 1
+
+
+def get_arch(sample: Path) -> str:
+    buf = sample.read_bytes()
+
+    for feature, _ in capa.features.extractors.common.extract_arch(buf):
+        assert isinstance(feature.value, str)
+        return feature.value
+
+    return "unknown"
+
+
+def is_supported_os(sample: Path) -> bool:
+    buf = sample.read_bytes()
+
+    return len(list(capa.features.extractors.common.extract_os(buf))) == 1
+
+
+def get_os(sample: Path) -> str:
+    buf = sample.read_bytes()
+
+    for feature, _ in capa.features.extractors.common.extract_os(buf):
+        assert isinstance(feature.value, str)
+        return feature.value
+
+    return "unknown"
+
+
+def get_meta_str(vw):
+    """
+    Return workspace meta information string
+    """
+    meta = []
+    for k in ["Format", "Platform", "Architecture"]:
+        if k in vw.metadata:
+            meta.append(f"{k.lower()}: {vw.metadata[k]}")
+    return f"{', '.join(meta)}, number of functions: {len(vw.getFunctions())}"
+
+
+def get_workspace(path: Path, input_format: str, sigpaths: List[Path]):
+    """
+    load the program at the given path into a vivisect workspace using the given format.
+    also apply the given FLIRT signatures.
+
+    supported formats:
+      - pe
+      - elf
+      - shellcode 32-bit
+      - shellcode 64-bit
+      - auto
+
+    this creates and analyzes the workspace; however, it does *not* save the workspace.
+    this is the responsibility of the caller.
+    """
+
+    # lazy import enables us to not require viv if user wants another backend.
+    import viv_utils
+    import viv_utils.flirt
+
+    logger.debug("generating vivisect workspace for: %s", path)
+    if input_format == FORMAT_AUTO:
+        if not is_supported_format(path):
+            raise UnsupportedFormatError()
+
+        # don't analyze, so that we can add our Flirt function analyzer first.
+        vw = viv_utils.getWorkspace(str(path), analyze=False, should_save=False)
+    elif input_format in {FORMAT_PE, FORMAT_ELF}:
+        vw = viv_utils.getWorkspace(str(path), analyze=False, should_save=False)
+    elif input_format == FORMAT_SC32:
+        # these are not analyzed nor saved.
+        vw = viv_utils.getShellcodeWorkspaceFromFile(str(path), arch="i386", analyze=False)
+    elif input_format == FORMAT_SC64:
+        vw = viv_utils.getShellcodeWorkspaceFromFile(str(path), arch="amd64", analyze=False)
+    else:
+        raise ValueError("unexpected format: " + input_format)
+
+    viv_utils.flirt.register_flirt_signature_analyzers(vw, [str(s) for s in sigpaths])
+
+    with contextlib.suppress(Exception):
+        # unfortuately viv raises a raw Exception (not any subclass).
+        # This happens when the module isn't found, such as with a viv upgrade.
+        #
+        # Remove the symbolic switch case solver.
+        # This is only enabled for ELF files, not PE files.
+        # During the following performance investigation, this analysis module
+        # had some terrible worst-case behavior.
+        # We can put up with slightly worse CFG reconstruction in order to avoid this.
+        # https://github.com/mandiant/capa/issues/1989#issuecomment-1948022767
+        vw.delFuncAnalysisModule("vivisect.analysis.generic.symswitchcase")
+
+    vw.analyze()
+
+    logger.debug("%s", get_meta_str(vw))
+    return vw
+
+
+def get_extractor(
+    input_path: Path,
+    input_format: str,
+    os_: str,
+    backend: str,
+    sigpaths: List[Path],
+    should_save_workspace=False,
+    disable_progress=False,
+    sample_path: Optional[Path] = None,
+) -> FeatureExtractor:
+    """
+    raises:
+      UnsupportedFormatError
+      UnsupportedArchError
+      UnsupportedOSError
+    """
+
+    # stderr=True is used here to redirect the spinner banner to stderr, so that users can redirect capa's output.
+    console = Console(stderr=True, quiet=disable_progress)
+
+    if backend == BACKEND_CAPE:
+        import capa.features.extractors.cape.extractor
+
+        report = capa.helpers.load_json_from_path(input_path)
+        return capa.features.extractors.cape.extractor.CapeExtractor.from_report(report)
+
+    elif backend == BACKEND_DOTNET:
+        import capa.features.extractors.dnfile.extractor
+
+        if input_format not in (FORMAT_PE, FORMAT_DOTNET):
+            raise UnsupportedFormatError()
+
+        return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(input_path)
+
+    elif backend == BACKEND_BINJA:
+        import capa.helpers
+        from capa.features.extractors.binja.find_binja_api import find_binja_path
+
+        # When we are running as a standalone executable, we cannot directly import binaryninja
+        # We need to fist find the binja API installation path and add it into sys.path
+        if capa.helpers.is_running_standalone():
+            bn_api = find_binja_path()
+            if bn_api.exists():
+                sys.path.append(str(bn_api))
+
+        try:
+            import binaryninja
+            from binaryninja import BinaryView
+        except ImportError:
+            raise RuntimeError(
+                "Cannot import binaryninja module. Please install the Binary Ninja Python API first: "
+                + "https://docs.binary.ninja/dev/batch.html#install-the-api)."
+            )
+
+        import capa.features.extractors.binja.extractor
+
+        if input_format not in (FORMAT_SC32, FORMAT_SC64):
+            if not is_supported_format(input_path):
+                raise UnsupportedFormatError()
+
+            if not is_supported_arch(input_path):
+                raise UnsupportedArchError()
+
+            if os_ == OS_AUTO and not is_supported_os(input_path):
+                raise UnsupportedOSError()
+
+        with console.status("analyzing program...", spinner="dots"):
+            bv: BinaryView = binaryninja.load(str(input_path))
+            if bv is None:
+                raise RuntimeError(f"Binary Ninja cannot open file {input_path}")
+
+        return capa.features.extractors.binja.extractor.BinjaFeatureExtractor(bv)
+
+    elif backend == BACKEND_PEFILE:
+        import capa.features.extractors.pefile
+
+        return capa.features.extractors.pefile.PefileFeatureExtractor(input_path)
+
+    elif backend == BACKEND_VIV:
+        import capa.features.extractors.viv.extractor
+
+        if input_format not in (FORMAT_SC32, FORMAT_SC64):
+            if not is_supported_format(input_path):
+                raise UnsupportedFormatError()
+
+            if not is_supported_arch(input_path):
+                raise UnsupportedArchError()
+
+            if os_ == OS_AUTO and not is_supported_os(input_path):
+                raise UnsupportedOSError()
+
+        with console.status("analyzing program...", spinner="dots"):
+            vw = get_workspace(input_path, input_format, sigpaths)
+
+            if should_save_workspace:
+                logger.debug("saving workspace")
+                try:
+                    vw.saveWorkspace()
+                except IOError:
+                    # see #168 for discussion around how to handle non-writable directories
+                    logger.info("source directory is not writable, won't save intermediate workspace")
+            else:
+                logger.debug("CAPA_SAVE_WORKSPACE unset, not saving workspace")
+
+        return capa.features.extractors.viv.extractor.VivisectFeatureExtractor(vw, input_path, os_)
+
+    elif backend == BACKEND_FREEZE:
+        return frz.load(input_path.read_bytes())
+
+    else:
+        raise ValueError("unexpected backend: " + backend)
+
+
+def get_file_extractors(input_file: Path, input_format: str) -> List[FeatureExtractor]:
+    file_extractors: List[FeatureExtractor] = []
+
+    # we use lazy importing here to avoid eagerly loading dependencies
+    # that some specialized environments may not have,
+    # e.g., those that run capa without vivisect.
+
+    if input_format == FORMAT_PE:
+        import capa.features.extractors.pefile
+
+        file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(input_file))
+
+    elif input_format == FORMAT_DOTNET:
+        import capa.features.extractors.pefile
+        import capa.features.extractors.dotnetfile
+
+        file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(input_file))
+        file_extractors.append(capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor(input_file))
+
+    elif input_format == FORMAT_ELF:
+        import capa.features.extractors.elffile
+
+        file_extractors.append(capa.features.extractors.elffile.ElfFeatureExtractor(input_file))
+
+    elif input_format == FORMAT_CAPE:
+        import capa.features.extractors.cape.extractor
+
+        report = capa.helpers.load_json_from_path(input_file)
+        file_extractors.append(capa.features.extractors.cape.extractor.CapeExtractor.from_report(report))
+
+    return file_extractors
+
+
+def get_signatures(sigs_path: Path) -> List[Path]:
+    if not sigs_path.exists():
+        raise IOError(f"signatures path {sigs_path} does not exist or cannot be accessed")
+
+    paths: List[Path] = []
+    if sigs_path.is_file():
+        paths.append(sigs_path)
+    elif sigs_path.is_dir():
+        logger.debug("reading signatures from directory %s", sigs_path.resolve())
+        for file in sigs_path.rglob("*"):
+            if file.is_file() and file.suffix.lower() in (".pat", ".pat.gz", ".sig"):
+                paths.append(file)
+
+    # Convert paths to their absolute and normalized forms
+    paths = [path.resolve().absolute() for path in paths]
+
+    # load signatures in deterministic order: the alphabetic sorting of filename.
+    # this means that `0_sigs.pat` loads before `1_sigs.pat`.
+    paths = sorted(paths, key=lambda path: path.name)
+
+    for path in paths:
+        logger.debug("found signature file: %s", path)
+
+    return paths
+
+
+def get_sample_analysis(format_, arch, os_, extractor, rules_path, counts):
+    if isinstance(extractor, StaticFeatureExtractor):
+        return rdoc.StaticAnalysis(
+            format=format_,
+            arch=arch,
+            os=os_,
+            extractor=extractor.__class__.__name__,
+            rules=tuple(rules_path),
+            base_address=frz.Address.from_capa(extractor.get_base_address()),
+            layout=rdoc.StaticLayout(
+                functions=(),
+                # this is updated after capabilities have been collected.
+                # will look like:
+                #
+                # "functions": { 0x401000: { "matched_basic_blocks": [ 0x401000, 0x401005, ... ] }, ... }
+            ),
+            feature_counts=counts["feature_counts"],
+            library_functions=counts["library_functions"],
+        )
+    elif isinstance(extractor, DynamicFeatureExtractor):
+        return rdoc.DynamicAnalysis(
+            format=format_,
+            arch=arch,
+            os=os_,
+            extractor=extractor.__class__.__name__,
+            rules=tuple(rules_path),
+            layout=rdoc.DynamicLayout(
+                processes=(),
+            ),
+            feature_counts=counts["feature_counts"],
+        )
+    else:
+        raise ValueError("invalid extractor type")
+
+
+def collect_metadata(
+    argv: List[str],
+    input_path: Path,
+    input_format: str,
+    os_: str,
+    rules_path: List[Path],
+    extractor: FeatureExtractor,
+    counts: dict,
+) -> rdoc.Metadata:
+    # if it's a binary sample we hash it, if it's a report
+    # we fetch the hashes from the report
+    sample_hashes: SampleHashes = extractor.get_sample_hashes()
+    md5, sha1, sha256 = sample_hashes.md5, sample_hashes.sha1, sample_hashes.sha256
+
+    global_feats = list(extractor.extract_global_features())
+    extractor_format = [f.value for (f, _) in global_feats if isinstance(f, capa.features.common.Format)]
+    extractor_arch = [f.value for (f, _) in global_feats if isinstance(f, capa.features.common.Arch)]
+    extractor_os = [f.value for (f, _) in global_feats if isinstance(f, capa.features.common.OS)]
+
+    input_format = (
+        str(extractor_format[0]) if extractor_format else "unknown" if input_format == FORMAT_AUTO else input_format
+    )
+    arch = str(extractor_arch[0]) if extractor_arch else "unknown"
+    os_ = str(extractor_os[0]) if extractor_os else "unknown" if os_ == OS_AUTO else os_
+
+    if isinstance(extractor, StaticFeatureExtractor):
+        meta_class: type = rdoc.StaticMetadata
+    elif isinstance(extractor, DynamicFeatureExtractor):
+        meta_class = rdoc.DynamicMetadata
+    else:
+        assert_never(extractor)
+
+    rules = tuple(r.resolve().absolute().as_posix() for r in rules_path)
+
+    return meta_class(
+        timestamp=datetime.datetime.now(),
+        version=capa.version.__version__,
+        argv=tuple(argv) if argv else None,
+        sample=rdoc.Sample(
+            md5=md5,
+            sha1=sha1,
+            sha256=sha256,
+            path=input_path.resolve().as_posix(),
+        ),
+        analysis=get_sample_analysis(
+            input_format,
+            arch,
+            os_,
+            extractor,
+            rules,
+            counts,
+        ),
+    )
+
+
+def compute_dynamic_layout(
+    rules: RuleSet, extractor: DynamicFeatureExtractor, capabilities: MatchResults
+) -> rdoc.DynamicLayout:
+    """
+    compute a metadata structure that links threads
+    to the processes in which they're found.
+
+    only collect the threads at which some rule matched.
+    otherwise, we may pollute the json document with
+    a large amount of un-referenced data.
+    """
+    assert isinstance(extractor, DynamicFeatureExtractor)
+
+    matched_calls: Set[Address] = set()
+
+    def result_rec(result: capa.features.common.Result):
+        for loc in result.locations:
+            if isinstance(loc, capa.features.address.DynamicCallAddress):
+                matched_calls.add(loc)
+        for child in result.children:
+            result_rec(child)
+
+    for matches in capabilities.values():
+        for _, result in matches:
+            result_rec(result)
+
+    names_by_process: Dict[Address, str] = {}
+    names_by_call: Dict[Address, str] = {}
+
+    matched_processes: Set[Address] = set()
+    matched_threads: Set[Address] = set()
+
+    threads_by_process: Dict[Address, List[Address]] = {}
+    calls_by_thread: Dict[Address, List[Address]] = {}
+
+    for p in extractor.get_processes():
+        threads_by_process[p.address] = []
+
+        for t in extractor.get_threads(p):
+            calls_by_thread[t.address] = []
+
+            for c in extractor.get_calls(p, t):
+                if c.address in matched_calls:
+                    names_by_call[c.address] = extractor.get_call_name(p, t, c)
+                    calls_by_thread[t.address].append(c.address)
+
+            if calls_by_thread[t.address]:
+                matched_threads.add(t.address)
+                threads_by_process[p.address].append(t.address)
+
+        if threads_by_process[p.address]:
+            matched_processes.add(p.address)
+            names_by_process[p.address] = extractor.get_process_name(p)
+
+    layout = rdoc.DynamicLayout(
+        processes=tuple(
+            rdoc.ProcessLayout(
+                address=frz.Address.from_capa(p),
+                name=names_by_process[p],
+                matched_threads=tuple(
+                    rdoc.ThreadLayout(
+                        address=frz.Address.from_capa(t),
+                        matched_calls=tuple(
+                            rdoc.CallLayout(
+                                address=frz.Address.from_capa(c),
+                                name=names_by_call[c],
+                            )
+                            for c in calls_by_thread[t]
+                            if c in matched_calls
+                        ),
+                    )
+                    for t in threads
+                    if t in matched_threads
+                ),  # this object is open to extension in the future,
+                # such as with the function name, etc.
+            )
+            for p, threads in threads_by_process.items()
+            if p in matched_processes
+        )
+    )
+
+    return layout
+
+
+def compute_static_layout(rules: RuleSet, extractor: StaticFeatureExtractor, capabilities) -> rdoc.StaticLayout:
+    """
+    compute a metadata structure that links basic blocks
+    to the functions in which they're found.
+
+    only collect the basic blocks at which some rule matched.
+    otherwise, we may pollute the json document with
+    a large amount of un-referenced data.
+    """
+    functions_by_bb: Dict[Address, Address] = {}
+    bbs_by_function: Dict[Address, List[Address]] = {}
+    for f in extractor.get_functions():
+        bbs_by_function[f.address] = []
+        for bb in extractor.get_basic_blocks(f):
+            functions_by_bb[bb.address] = f.address
+            bbs_by_function[f.address].append(bb.address)
+
+    matched_bbs = set()
+    for rule_name, matches in capabilities.items():
+        rule = rules[rule_name]
+        if capa.rules.Scope.BASIC_BLOCK in rule.scopes:
+            for addr, _ in matches:
+                assert addr in functions_by_bb
+                matched_bbs.add(addr)
+
+    layout = rdoc.StaticLayout(
+        functions=tuple(
+            rdoc.FunctionLayout(
+                address=frz.Address.from_capa(f),
+                matched_basic_blocks=tuple(
+                    rdoc.BasicBlockLayout(address=frz.Address.from_capa(bb)) for bb in bbs if bb in matched_bbs
+                ),  # this object is open to extension in the future,
+                # such as with the function name, etc.
+            )
+            for f, bbs in bbs_by_function.items()
+            if len([bb for bb in bbs if bb in matched_bbs]) > 0
+        )
+    )
+
+    return layout
+
+
+def compute_layout(rules: RuleSet, extractor, capabilities) -> rdoc.Layout:
+    if isinstance(extractor, StaticFeatureExtractor):
+        return compute_static_layout(rules, extractor, capabilities)
+    elif isinstance(extractor, DynamicFeatureExtractor):
+        return compute_dynamic_layout(rules, extractor, capabilities)
+    else:
+        raise ValueError("extractor must be either a static or dynamic extracotr")
--- a/capa/main.py
+++ b/capa/main.py
--- a/capa/optimizer.py
+++ b/capa/optimizer.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2021 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/perf.py
+++ b/capa/perf.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2021 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/render/default.py
+++ b/capa/render/default.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -33,7 +33,7 @@ def render_meta(doc: rd.ResultDocument, ostream: StringIO):
        (width("md5", 22), width(doc.meta.sample.md5, 82)),
        ("sha1", doc.meta.sample.sha1),
        ("sha256", doc.meta.sample.sha256),
-        ("analysis", doc.meta.flavor),
+        ("analysis", doc.meta.flavor.value),
        ("os", doc.meta.analysis.os),
        ("format", doc.meta.analysis.format),
        ("arch", doc.meta.analysis.arch),
@@ -102,7 +102,11 @@ def render_capabilities(doc: rd.ResultDocument, ostream: StringIO):

    if rows:
        ostream.write(
-            tabulate.tabulate(rows, headers=[width("Capability", 50), width("Namespace", 50)], tablefmt="mixed_outline")
+            tabulate.tabulate(
+                rows,
+                headers=[width("Capability", 50), width("Namespace", 50)],
+                tablefmt="mixed_outline",
+            )
        )
        ostream.write("\n")
    else:
@@ -148,7 +152,55 @@ def render_attack(doc: rd.ResultDocument, ostream: StringIO):
    if rows:
        ostream.write(
            tabulate.tabulate(
-                rows, headers=[width("ATT&CK Tactic", 20), width("ATT&CK Technique", 80)], tablefmt="mixed_grid"
+                rows,
+                headers=[width("ATT&CK Tactic", 20), width("ATT&CK Technique", 80)],
+                tablefmt="mixed_grid",
+            )
+        )
+        ostream.write("\n")
+
+
+def render_maec(doc: rd.ResultDocument, ostream: StringIO):
+    """
+    example::
+
+        +--------------------------+-----------------------------------------------------------+
+        | MAEC Category            | MAEC Value                                                |
+        |--------------------------+-----------------------------------------------------------|
+        | analysis-conclusion      | malicious                                                 |
+        |--------------------------+-----------------------------------------------------------|
+        | malware-family           | PlugX                                                     |
+        |--------------------------+-----------------------------------------------------------|
+        | malware-category         | downloader                                                |
+        |                          | launcher                                                  |
+        +--------------------------+-----------------------------------------------------------+
+    """
+    maec_categories = {
+        "analysis_conclusion",
+        "analysis_conclusion_ov",
+        "malware_family",
+        "malware_category",
+        "malware_category_ov",
+    }
+    maec_table = collections.defaultdict(set)
+    for rule in rutils.maec_rules(doc):
+        for maec_category in maec_categories:
+            maec_value = getattr(rule.meta.maec, maec_category, None)
+            if maec_value:
+                maec_table[maec_category].add(maec_value)
+
+    rows = []
+    for category in sorted(maec_categories):
+        values = maec_table.get(category, set())
+        if values:
+            rows.append((rutils.bold(category.replace("_", "-")), "\n".join(sorted(values))))
+
+    if rows:
+        ostream.write(
+            tabulate.tabulate(
+                rows,
+                headers=[width("MAEC Category", 25), width("MAEC Value", 75)],
+                tablefmt="mixed_grid",
            )
        )
        ostream.write("\n")
@@ -191,7 +243,9 @@ def render_mbc(doc: rd.ResultDocument, ostream: StringIO):
    if rows:
        ostream.write(
            tabulate.tabulate(
-                rows, headers=[width("MBC Objective", 25), width("MBC Behavior", 75)], tablefmt="mixed_grid"
+                rows,
+                headers=[width("MBC Objective", 25), width("MBC Behavior", 75)],
+                tablefmt="mixed_grid",
            )
        )
        ostream.write("\n")
@@ -204,6 +258,8 @@ def render_default(doc: rd.ResultDocument):
    ostream.write("\n")
    render_attack(doc, ostream)
    ostream.write("\n")
+    render_maec(doc, ostream)
+    ostream.write("\n")
    render_mbc(doc, ostream)
    ostream.write("\n")
    render_capabilities(doc, ostream)
--- a/capa/render/json.py
+++ b/capa/render/json.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2021 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
--- a/capa/render/proto/capa.proto
+++ b/capa/render/proto/capa.proto
@@ -1,5 +1,7 @@
 syntax = "proto3";

+package mandiant.capa;
+
 message APIFeature {
  string type = 1;
  string api = 2;
--- a/capa/render/proto/capa_pb2.py
+++ b/capa/render/proto/capa_pb2.py
--- a/capa/render/result_document.py
+++ b/capa/render/result_document.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2021 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -160,8 +160,7 @@ class CompoundStatementType:
    OPTIONAL = "optional"


-class StatementModel(FrozenModel):
-    ...
+class StatementModel(FrozenModel): ...


 class CompoundStatement(StatementModel):
@@ -307,7 +306,7 @@ class Match(FrozenModel):
    args:
      success: did the node match?
      node: the logic node or feature node.
-      children: any children of the logic node. not relevent for features, can be empty.
+      children: any children of the logic node. not relevant for features, can be empty.
      locations: where the feature matched. not relevant for logic nodes (except range), can be empty.
      captures: captured values from the string/regex feature, and the locations of those values.
    """
@@ -419,7 +418,7 @@ class Match(FrozenModel):
                            # doc[locations] contains all matches for the given namespace.
                            # for example, the feature might be `match: anti-analysis/packer`
                            # which matches against "generic unpacker" and "UPX".
-                            # in this case, doc[locations] contains locations for *both* of thse.
+                            # in this case, doc[locations] contains locations for *both* of those.
                            #
                            # rule_matches contains the matches for the specific rule.
                            # this is a subset of doc[locations].
@@ -650,9 +649,9 @@ class ResultDocument(FrozenModel):
        return ResultDocument(meta=meta, rules=rule_matches)

    def to_capa(self) -> Tuple[Metadata, Dict]:
-        capabilities: Dict[
-            str, List[Tuple[capa.features.address.Address, capa.features.common.Result]]
-        ] = collections.defaultdict(list)
+        capabilities: Dict[str, List[Tuple[capa.features.address.Address, capa.features.common.Result]]] = (
+            collections.defaultdict(list)
+        )

        # this doesn't quite work because we don't have the rule source for rules that aren't matched.
        rules_by_name = {
--- a/capa/render/utils.py
+++ b/capa/render/utils.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -7,7 +7,7 @@
 # See the License for the specific language governing permissions and limitations under the License.

 import io
-from typing import Union, Iterator
+from typing import Dict, List, Tuple, Union, Iterator, Optional

 import termcolor

@@ -40,9 +40,14 @@ def format_parts_id(data: Union[rd.AttackSpec, rd.MBCSpec]):
    return f"{'::'.join(data.parts)} [{data.id}]"


+def sort_rules(rules: Dict[str, rd.RuleMatches]) -> List[Tuple[Optional[str], str, rd.RuleMatches]]:
+    """Sort rules by namespace and name."""
+    return sorted((rule.meta.namespace or "", rule.meta.name, rule) for rule in rules.values())
+
+
 def capability_rules(doc: rd.ResultDocument) -> Iterator[rd.RuleMatches]:
    """enumerate the rules in (namespace, name) order that are 'capability' rules (not lib/subscope/disposition/etc)."""
-    for _, _, rule in sorted((rule.meta.namespace or "", rule.meta.name, rule) for rule in doc.rules.values()):
+    for _, _, rule in sort_rules(doc.rules):
        if rule.meta.lib:
            continue
        if rule.meta.is_subscope_rule:
@@ -61,6 +66,21 @@ def capability_rules(doc: rd.ResultDocument) -> Iterator[rd.RuleMatches]:
        yield rule


+def maec_rules(doc: rd.ResultDocument) -> Iterator[rd.RuleMatches]:
+    """enumerate 'maec' rules."""
+    for rule in doc.rules.values():
+        if any(
+            [
+                rule.meta.maec.analysis_conclusion,
+                rule.meta.maec.analysis_conclusion_ov,
+                rule.meta.maec.malware_family,
+                rule.meta.maec.malware_category,
+                rule.meta.maec.malware_category_ov,
+            ]
+        ):
+            yield rule
+
+
 class StringIO(io.StringIO):
    def writeln(self, s):
        self.write(s)
--- a/capa/render/verbose.py
+++ b/capa/render/verbose.py
@@ -14,7 +14,7 @@ example::
                 0x10003415
                 0x10003797

-Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -22,6 +22,7 @@ Unless required by applicable law or agreed to in writing, software distributed
 is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and limitations under the License.
 """
+
 from typing import cast

 import tabulate
--- a/capa/render/vverbose.py
+++ b/capa/render/vverbose.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -48,7 +48,7 @@ def hanging_indent(s: str, indent: int) -> str:
 def render_locations(ostream, layout: rd.Layout, locations: Iterable[frz.Address], indent: int):
    import capa.render.verbose as v

-    # its possible to have an empty locations array here,
+    # it's possible to have an empty locations array here,
    # such as when we're in MODE_FAILURE and showing the logic
    # under a `not` statement (which will have no matched locations).
    locations = sorted(locations)
--- a/capa/rules/init.py
+++ b/capa/rules/init.py
--- a/Show More
+++ b/Show More