mirror of
https://github.com/mandiant/capa.git
synced 2025-12-09 06:10:36 -08:00
Compare commits
106 Commits
mr/library
...
v8.0.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f11661f8f2 | ||
|
|
518dc3381c | ||
|
|
5c60adaf96 | ||
|
|
4ab8d75629 | ||
|
|
51d852d1b3 | ||
|
|
aa8e4603d1 | ||
|
|
6c61a91778 | ||
|
|
e633e34517 | ||
|
|
9c72c9067b | ||
|
|
168435cf75 | ||
|
|
5fdf7e61e2 | ||
|
|
95fc747e6f | ||
|
|
1f374e4986 | ||
|
|
28c0234339 | ||
|
|
f57f909e68 | ||
|
|
02c359f79f | ||
|
|
4448d612f1 | ||
|
|
d7cf8d1251 | ||
|
|
d1f3e43325 | ||
|
|
83a46265df | ||
|
|
0c64bd4985 | ||
|
|
ed86e5fb1b | ||
|
|
e1c786466a | ||
|
|
959a234f0e | ||
|
|
e57de2beb4 | ||
|
|
9c9b3711c0 | ||
|
|
65e2dac4c4 | ||
|
|
9ad3f06e1d | ||
|
|
201ec07b58 | ||
|
|
c85be8dc72 | ||
|
|
54952feb07 | ||
|
|
379d6ef313 | ||
|
|
28fcd10d2e | ||
|
|
a6481df6c4 | ||
|
|
abe80842cb | ||
|
|
b6763ac5fe | ||
|
|
5a284de438 | ||
|
|
8cfccbcb44 | ||
|
|
01772d0de0 | ||
|
|
f0042157ab | ||
|
|
6a2330c11a | ||
|
|
02b5e11380 | ||
|
|
32c428b989 | ||
|
|
20909c1d95 | ||
|
|
035b4f6ae6 | ||
|
|
cb002567c4 | ||
|
|
46c513c0a9 | ||
|
|
0f0523d2ba | ||
|
|
688841fd3b | ||
|
|
2a6ba62379 | ||
|
|
ca7580d417 | ||
|
|
7c01712843 | ||
|
|
ef02e4fe83 | ||
|
|
d51074385b | ||
|
|
d9ea57d29d | ||
|
|
8b7ec049f4 | ||
|
|
c05e01cc3a | ||
|
|
11bb0c3fbd | ||
|
|
93da346f32 | ||
|
|
3a2056b701 | ||
|
|
915f3b0511 | ||
|
|
cd61983e43 | ||
|
|
9627f7e5c3 | ||
|
|
3ebec9ec2b | ||
|
|
295cd413bb | ||
|
|
03e4778620 | ||
|
|
e8ad207245 | ||
|
|
a31bd2cd15 | ||
|
|
9118946ecb | ||
|
|
7b32706bd4 | ||
|
|
c632d594a6 | ||
|
|
4398b8ac31 | ||
|
|
ec697c01f9 | ||
|
|
097ed73ccd | ||
|
|
4e121ae24f | ||
|
|
322e7a934e | ||
|
|
7d983af907 | ||
|
|
77758e8922 | ||
|
|
296255f581 | ||
|
|
0237059cbd | ||
|
|
3241ee599f | ||
|
|
24236dda0e | ||
|
|
d4d856767d | ||
|
|
35767e6c6a | ||
|
|
7d8ee6aaac | ||
|
|
23709c9d6a | ||
|
|
bc72b6d14e | ||
|
|
13b1e533f5 | ||
|
|
7cc3ddd4ea | ||
|
|
20ae098cda | ||
|
|
2987eeb0ac | ||
|
|
cebf8e7274 | ||
|
|
d74225b5e0 | ||
|
|
70610cd1c5 | ||
|
|
338107cf9e | ||
|
|
6b88eed1e4 | ||
|
|
54badc323d | ||
|
|
2e2e1bc277 | ||
|
|
84c9da09e0 | ||
|
|
b2f89695b5 | ||
|
|
bc91171c65 | ||
|
|
69190dfa82 | ||
|
|
688afab087 | ||
|
|
6447319cc7 | ||
|
|
7be6fe6ae1 | ||
|
|
ca7073ce87 |
@@ -1,6 +1,6 @@
|
|||||||
# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.233.0/containers/python-3/.devcontainer/base.Dockerfile
|
# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.233.0/containers/python-3/.devcontainer/base.Dockerfile
|
||||||
|
|
||||||
# [Choice] Python version (use -bullseye variants on local arm64/Apple Silicon): 3, 3.10, 3.9, 3.8, 3.7, 3.6, 3-bullseye, 3.10-bullseye, 3.9-bullseye, 3.8-bullseye, 3.7-bullseye, 3.6-bullseye, 3-buster, 3.10-buster, 3.9-buster, 3.8-buster, 3.7-buster, 3.6-buster
|
# [Choice] Python version (use -bullseye variants on local arm64/Apple Silicon): 3, 3.10, 3-bullseye, 3.10-bullseye, 3-buster, 3.10-buster, etc.
|
||||||
ARG VARIANT="3.10-bullseye"
|
ARG VARIANT="3.10-bullseye"
|
||||||
FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT}
|
FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT}
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
"dockerfile": "Dockerfile",
|
"dockerfile": "Dockerfile",
|
||||||
"context": "..",
|
"context": "..",
|
||||||
"args": {
|
"args": {
|
||||||
// Update 'VARIANT' to pick a Python version: 3, 3.10, 3.9, 3.8, 3.7, 3.6
|
// Update 'VARIANT' to pick a Python version: 3, 3.10, etc.
|
||||||
// Append -bullseye or -buster to pin to an OS version.
|
// Append -bullseye or -buster to pin to an OS version.
|
||||||
// Use -bullseye variants on local on arm64/Apple Silicon.
|
// Use -bullseye variants on local on arm64/Apple Silicon.
|
||||||
"VARIANT": "3.10",
|
"VARIANT": "3.10",
|
||||||
|
|||||||
13
.github/workflows/build.yml
vendored
13
.github/workflows/build.yml
vendored
@@ -21,26 +21,25 @@ jobs:
|
|||||||
# set to false for debugging
|
# set to false for debugging
|
||||||
fail-fast: true
|
fail-fast: true
|
||||||
matrix:
|
matrix:
|
||||||
# using Python 3.8 to support running across multiple operating systems including Windows 7
|
|
||||||
include:
|
include:
|
||||||
- os: ubuntu-20.04
|
- os: ubuntu-20.04
|
||||||
# use old linux so that the shared library versioning is more portable
|
# use old linux so that the shared library versioning is more portable
|
||||||
artifact_name: capa
|
artifact_name: capa
|
||||||
asset_name: linux
|
asset_name: linux
|
||||||
python_version: 3.8
|
python_version: '3.10'
|
||||||
- os: ubuntu-20.04
|
- os: ubuntu-20.04
|
||||||
artifact_name: capa
|
artifact_name: capa
|
||||||
asset_name: linux-py312
|
asset_name: linux-py312
|
||||||
python_version: 3.12
|
python_version: '3.12'
|
||||||
- os: windows-2019
|
- os: windows-2019
|
||||||
artifact_name: capa.exe
|
artifact_name: capa.exe
|
||||||
asset_name: windows
|
asset_name: windows
|
||||||
python_version: 3.8
|
python_version: '3.10'
|
||||||
- os: macos-12
|
- os: macos-13
|
||||||
# use older macOS for assumed better portability
|
# use older macOS for assumed better portability
|
||||||
artifact_name: capa
|
artifact_name: capa
|
||||||
asset_name: macos
|
asset_name: macos
|
||||||
python_version: 3.8
|
python_version: '3.10'
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout capa
|
- name: Checkout capa
|
||||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||||
@@ -107,7 +106,7 @@ jobs:
|
|||||||
# upload zipped binaries to Release page
|
# upload zipped binaries to Release page
|
||||||
if: github.event_name == 'release'
|
if: github.event_name == 'release'
|
||||||
name: zip and upload ${{ matrix.asset_name }}
|
name: zip and upload ${{ matrix.asset_name }}
|
||||||
runs-on: ubuntu-20.04
|
runs-on: ubuntu-latest
|
||||||
needs: [build]
|
needs: [build]
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
|
|||||||
7
.github/workflows/changelog.yml
vendored
7
.github/workflows/changelog.yml
vendored
@@ -13,8 +13,11 @@ permissions:
|
|||||||
jobs:
|
jobs:
|
||||||
check_changelog:
|
check_changelog:
|
||||||
# no need to check for dependency updates via dependabot
|
# no need to check for dependency updates via dependabot
|
||||||
if: github.actor != 'dependabot[bot]' && github.actor != 'dependabot-preview[bot]'
|
# github.event.pull_request.user.login refers to PR author
|
||||||
runs-on: ubuntu-20.04
|
if: |
|
||||||
|
github.event.pull_request.user.login != 'dependabot[bot]' &&
|
||||||
|
github.event.pull_request.user.login != 'dependabot-preview[bot]'
|
||||||
|
runs-on: ubuntu-latest
|
||||||
env:
|
env:
|
||||||
NO_CHANGELOG: '[x] No CHANGELOG update needed'
|
NO_CHANGELOG: '[x] No CHANGELOG update needed'
|
||||||
steps:
|
steps:
|
||||||
|
|||||||
2
.github/workflows/publish.yml
vendored
2
.github/workflows/publish.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
|||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
|
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
|
||||||
with:
|
with:
|
||||||
python-version: '3.8'
|
python-version: '3.10'
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
|
|||||||
2
.github/workflows/tag.yml
vendored
2
.github/workflows/tag.yml
vendored
@@ -9,7 +9,7 @@ permissions: read-all
|
|||||||
jobs:
|
jobs:
|
||||||
tag:
|
tag:
|
||||||
name: Tag capa rules
|
name: Tag capa rules
|
||||||
runs-on: ubuntu-20.04
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout capa-rules
|
- name: Checkout capa-rules
|
||||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||||
|
|||||||
30
.github/workflows/tests.yml
vendored
30
.github/workflows/tests.yml
vendored
@@ -26,7 +26,7 @@ env:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
changelog_format:
|
changelog_format:
|
||||||
runs-on: ubuntu-20.04
|
runs-on: ubuntu-22.04
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout capa
|
- name: Checkout capa
|
||||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||||
@@ -37,15 +37,15 @@ jobs:
|
|||||||
if [ $number != 1 ]; then exit 1; fi
|
if [ $number != 1 ]; then exit 1; fi
|
||||||
|
|
||||||
code_style:
|
code_style:
|
||||||
runs-on: ubuntu-20.04
|
runs-on: ubuntu-22.04
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout capa
|
- name: Checkout capa
|
||||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||||
# use latest available python to take advantage of best performance
|
# use latest available python to take advantage of best performance
|
||||||
- name: Set up Python 3.11
|
- name: Set up Python 3.12
|
||||||
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
|
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.12"
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
@@ -64,16 +64,16 @@ jobs:
|
|||||||
run: pre-commit run deptry --hook-stage manual
|
run: pre-commit run deptry --hook-stage manual
|
||||||
|
|
||||||
rule_linter:
|
rule_linter:
|
||||||
runs-on: ubuntu-20.04
|
runs-on: ubuntu-22.04
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout capa with submodules
|
- name: Checkout capa with submodules
|
||||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||||
with:
|
with:
|
||||||
submodules: recursive
|
submodules: recursive
|
||||||
- name: Set up Python 3.11
|
- name: Set up Python 3.12
|
||||||
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
|
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.12"
|
||||||
- name: Install capa
|
- name: Install capa
|
||||||
run: |
|
run: |
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
@@ -88,17 +88,17 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
os: [ubuntu-20.04, windows-2019, macos-12]
|
os: [ubuntu-20.04, windows-2019, macos-13]
|
||||||
# across all operating systems
|
# across all operating systems
|
||||||
python-version: ["3.8", "3.11"]
|
python-version: ["3.10", "3.11"]
|
||||||
include:
|
include:
|
||||||
# on Ubuntu run these as well
|
# on Ubuntu run these as well
|
||||||
- os: ubuntu-20.04
|
|
||||||
python-version: "3.8"
|
|
||||||
- os: ubuntu-20.04
|
|
||||||
python-version: "3.9"
|
|
||||||
- os: ubuntu-20.04
|
- os: ubuntu-20.04
|
||||||
python-version: "3.10"
|
python-version: "3.10"
|
||||||
|
- os: ubuntu-20.04
|
||||||
|
python-version: "3.11"
|
||||||
|
- os: ubuntu-20.04
|
||||||
|
python-version: "3.12"
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout capa with submodules
|
- name: Checkout capa with submodules
|
||||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||||
@@ -131,7 +131,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
python-version: ["3.9", "3.11"]
|
python-version: ["3.10", "3.11"]
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout capa with submodules
|
- name: Checkout capa with submodules
|
||||||
# do only run if BN_SERIAL is available, have to do this in every step, see https://github.com/orgs/community/discussions/26726#discussioncomment-3253118
|
# do only run if BN_SERIAL is available, have to do this in every step, see https://github.com/orgs/community/discussions/26726#discussioncomment-3253118
|
||||||
@@ -173,7 +173,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
python-version: ["3.8", "3.11"]
|
python-version: ["3.10", "3.11"]
|
||||||
java-version: ["17"]
|
java-version: ["17"]
|
||||||
ghidra-version: ["11.0.1"]
|
ghidra-version: ["11.0.1"]
|
||||||
public-version: ["PUBLIC_20240130"] # for ghidra releases
|
public-version: ["PUBLIC_20240130"] # for ghidra releases
|
||||||
|
|||||||
103
.github/workflows/web-release.yml
vendored
Normal file
103
.github/workflows/web-release.yml
vendored
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
name: create web release
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
version:
|
||||||
|
description: 'Version number for the release (x.x.x)'
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
run-tests:
|
||||||
|
uses: ./.github/workflows/web-tests.yml
|
||||||
|
|
||||||
|
build-and-release:
|
||||||
|
needs: run-tests
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set release name
|
||||||
|
run: echo "RELEASE_NAME=capa-explorer-web-v${{ github.event.inputs.version }}-${GITHUB_SHA::7}" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
- name: Check if release already exists
|
||||||
|
run: |
|
||||||
|
if ls web/explorer/releases/capa-explorer-web-v${{ github.event.inputs.version }}-* 1> /dev/null 2>&1; then
|
||||||
|
echo "::error:: A release with version ${{ github.event.inputs.version }} already exists"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Set up Node.js
|
||||||
|
uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4
|
||||||
|
with:
|
||||||
|
node-version: 20
|
||||||
|
cache: 'npm'
|
||||||
|
cache-dependency-path: 'web/explorer/package-lock.json'
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: npm ci
|
||||||
|
working-directory: web/explorer
|
||||||
|
|
||||||
|
- name: Build offline bundle
|
||||||
|
run: npm run build:bundle
|
||||||
|
working-directory: web/explorer
|
||||||
|
|
||||||
|
- name: Compress bundle
|
||||||
|
run: zip -r ${{ env.RELEASE_NAME }}.zip capa-explorer-web
|
||||||
|
working-directory: web/explorer
|
||||||
|
|
||||||
|
- name: Create releases directory
|
||||||
|
run: mkdir -vp web/explorer/releases
|
||||||
|
|
||||||
|
- name: Move release to releases folder
|
||||||
|
run: mv web/explorer/${{ env.RELEASE_NAME }}.zip web/explorer/releases
|
||||||
|
|
||||||
|
- name: Compute release SHA256 hash
|
||||||
|
run: |
|
||||||
|
echo "RELEASE_SHA256=$(sha256sum web/explorer/releases/${{ env.RELEASE_NAME }}.zip | awk '{print $1}')" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
- name: Update CHANGELOG.md
|
||||||
|
run: |
|
||||||
|
echo "## ${{ env.RELEASE_NAME }}" >> web/explorer/releases/CHANGELOG.md
|
||||||
|
echo "- Release Date: $(date -u '+%Y-%m-%d %H:%M:%S UTC')" >> web/explorer/releases/CHANGELOG.md
|
||||||
|
echo "- SHA256: ${{ env.RELEASE_SHA256 }}" >> web/explorer/releases/CHANGELOG.md
|
||||||
|
echo "" >> web/explorer/releases/CHANGELOG.md
|
||||||
|
cat web/explorer/releases/CHANGELOG.md
|
||||||
|
|
||||||
|
- name: Remove older releases
|
||||||
|
# keep only the latest 3 releases
|
||||||
|
run: ls -t capa-explorer-web-v*.zip | tail -n +4 | xargs -r rm --
|
||||||
|
working-directory: web/explorer/releases
|
||||||
|
|
||||||
|
- name: Stage release files
|
||||||
|
run: |
|
||||||
|
git config --local user.email "capa-dev@mandiant.com"
|
||||||
|
git config --local user.name "Capa Bot"
|
||||||
|
git add -f web/explorer/releases/${{ env.RELEASE_NAME }}.zip web/explorer/releases/CHANGELOG.md
|
||||||
|
git add -u web/explorer/releases/
|
||||||
|
|
||||||
|
- name: Create Pull Request
|
||||||
|
uses: peter-evans/create-pull-request@5e914681df9dc83aa4e4905692ca88beb2f9e91f # v7.0.5
|
||||||
|
with:
|
||||||
|
token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
title: "explorer web: add release v${{ github.event.inputs.version }}"
|
||||||
|
body: |
|
||||||
|
This PR adds a new capa Explorer Web release v${{ github.event.inputs.version }}.
|
||||||
|
|
||||||
|
Release details:
|
||||||
|
- Name: ${{ env.RELEASE_NAME }}
|
||||||
|
- SHA256: ${{ env.RELEASE_SHA256 }}
|
||||||
|
|
||||||
|
This release is generated by the [web release](https://github.com/mandiant/capa/actions/workflows/web-release.yml) workflow.
|
||||||
|
|
||||||
|
- [x] No CHANGELOG update needed
|
||||||
|
- [x] No new tests needed
|
||||||
|
- [x] No documentation update needed
|
||||||
|
commit-message: ":robot: explorer web: add release ${{ env.RELEASE_NAME }}"
|
||||||
|
branch: release/web-v${{ github.event.inputs.version }}
|
||||||
|
add-paths: web/explorer/releases/${{ env.RELEASE_NAME }}.zip
|
||||||
|
base: master
|
||||||
|
labels: webui
|
||||||
|
delete-branch: true
|
||||||
|
committer: Capa Bot <capa-dev@mandiant.com>
|
||||||
|
author: Capa Bot <capa-dev@mandiant.com>
|
||||||
13
.github/workflows/web-tests.yml
vendored
13
.github/workflows/web-tests.yml
vendored
@@ -1,10 +1,11 @@
|
|||||||
name: Capa Explorer Web tests
|
name: capa Explorer Web tests
|
||||||
|
|
||||||
on:
|
on:
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ master ]
|
branches: [ master ]
|
||||||
paths:
|
paths:
|
||||||
- 'web/explorer/**'
|
- 'web/explorer/**'
|
||||||
|
workflow_call: # this allows the workflow to be called by other workflows
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
test:
|
test:
|
||||||
@@ -23,20 +24,20 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
node-version: 20
|
node-version: 20
|
||||||
cache: 'npm'
|
cache: 'npm'
|
||||||
cache-dependency-path: './web/explorer/package-lock.json'
|
cache-dependency-path: 'web/explorer/package-lock.json'
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: npm ci
|
run: npm ci
|
||||||
working-directory: ./web/explorer
|
working-directory: web/explorer
|
||||||
|
|
||||||
- name: Lint
|
- name: Lint
|
||||||
run: npm run lint
|
run: npm run lint
|
||||||
working-directory: ./web/explorer
|
working-directory: web/explorer
|
||||||
|
|
||||||
- name: Format
|
- name: Format
|
||||||
run: npm run format:check
|
run: npm run format:check
|
||||||
working-directory: ./web/explorer
|
working-directory: web/explorer
|
||||||
|
|
||||||
- name: Run unit tests
|
- name: Run unit tests
|
||||||
run: npm run test
|
run: npm run test
|
||||||
working-directory: ./web/explorer
|
working-directory: web/explorer
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ repos:
|
|||||||
hooks:
|
hooks:
|
||||||
- id: isort
|
- id: isort
|
||||||
name: isort
|
name: isort
|
||||||
stages: [commit, push, manual]
|
stages: [pre-commit, pre-push, manual]
|
||||||
language: system
|
language: system
|
||||||
entry: isort
|
entry: isort
|
||||||
args:
|
args:
|
||||||
@@ -46,7 +46,7 @@ repos:
|
|||||||
hooks:
|
hooks:
|
||||||
- id: black
|
- id: black
|
||||||
name: black
|
name: black
|
||||||
stages: [commit, push, manual]
|
stages: [pre-commit, pre-push, manual]
|
||||||
language: system
|
language: system
|
||||||
entry: black
|
entry: black
|
||||||
args:
|
args:
|
||||||
@@ -64,7 +64,7 @@ repos:
|
|||||||
hooks:
|
hooks:
|
||||||
- id: ruff
|
- id: ruff
|
||||||
name: ruff
|
name: ruff
|
||||||
stages: [commit, push, manual]
|
stages: [pre-commit, pre-push, manual]
|
||||||
language: system
|
language: system
|
||||||
entry: ruff
|
entry: ruff
|
||||||
args:
|
args:
|
||||||
@@ -82,7 +82,7 @@ repos:
|
|||||||
hooks:
|
hooks:
|
||||||
- id: flake8
|
- id: flake8
|
||||||
name: flake8
|
name: flake8
|
||||||
stages: [push, manual]
|
stages: [pre-push, manual]
|
||||||
language: system
|
language: system
|
||||||
entry: flake8
|
entry: flake8
|
||||||
args:
|
args:
|
||||||
@@ -101,7 +101,7 @@ repos:
|
|||||||
hooks:
|
hooks:
|
||||||
- id: mypy
|
- id: mypy
|
||||||
name: mypy
|
name: mypy
|
||||||
stages: [push, manual]
|
stages: [pre-push, manual]
|
||||||
language: system
|
language: system
|
||||||
entry: mypy
|
entry: mypy
|
||||||
args:
|
args:
|
||||||
@@ -119,7 +119,7 @@ repos:
|
|||||||
hooks:
|
hooks:
|
||||||
- id: deptry
|
- id: deptry
|
||||||
name: deptry
|
name: deptry
|
||||||
stages: [push, manual]
|
stages: [pre-push, manual]
|
||||||
language: system
|
language: system
|
||||||
entry: deptry .
|
entry: deptry .
|
||||||
always_run: true
|
always_run: true
|
||||||
|
|||||||
116
CHANGELOG.md
116
CHANGELOG.md
@@ -12,9 +12,6 @@
|
|||||||
|
|
||||||
### Bug Fixes
|
### Bug Fixes
|
||||||
|
|
||||||
- extractor: fix exception when PE extractor encounters unknown architecture #2440 @Tamir-K
|
|
||||||
- IDA Pro: rename ida to idapro module for plugin and idalib in IDA 9.0 #2453 @mr-tz
|
|
||||||
|
|
||||||
### capa Explorer Web
|
### capa Explorer Web
|
||||||
|
|
||||||
### capa Explorer IDA Pro plugin
|
### capa Explorer IDA Pro plugin
|
||||||
@@ -22,8 +19,115 @@
|
|||||||
### Development
|
### Development
|
||||||
|
|
||||||
### Raw diffs
|
### Raw diffs
|
||||||
- [capa v7.4.0...master](https://github.com/mandiant/capa/compare/v7.4.0...master)
|
- [capa v8.0.0...master](https://github.com/mandiant/capa/compare/v8.0.0...master)
|
||||||
- [capa-rules v7.4.0...master](https://github.com/mandiant/capa-rules/compare/v7.4.0...master)
|
- [capa-rules v8.0.0...master](https://github.com/mandiant/capa-rules/compare/v8.0.0...master)
|
||||||
|
|
||||||
|
## v8.0.0
|
||||||
|
|
||||||
|
capa version 8 adds support for IDA Pro 9.0 (and idalib). The release comes with various improvements and bug fixes for the Binary Ninja backend (including to load with database files) -- thanks to @xusheng6.
|
||||||
|
|
||||||
|
Additional bug fixes improve the dynamic and BinExport backends.
|
||||||
|
|
||||||
|
capa version 8 now requires Python 3.10 or newer.
|
||||||
|
|
||||||
|
Special thanks to @Tamir-K, @harshit-wadhwani, @jorik-utwente for their great contributions.
|
||||||
|
|
||||||
|
### New Features
|
||||||
|
|
||||||
|
- allow call as valid subscope for call scoped rules @mr-tz
|
||||||
|
- support loading and analyzing a Binary Ninja database #2496 @xusheng6
|
||||||
|
- vmray: record process command line details @mr-tz
|
||||||
|
|
||||||
|
### Breaking Changes
|
||||||
|
|
||||||
|
- remove support for Python 3.8 and use Python 3.10 as minimum now #1966 @mr-tz
|
||||||
|
|
||||||
|
### New Rules (54)
|
||||||
|
|
||||||
|
- nursery/get-shadow-password-file-entry-on-linux jonathanlepore@google.com
|
||||||
|
- nursery/set-shadow-password-file-entry-on-linux jonathanlepore@google.com
|
||||||
|
- collection/browser/get-chrome-cookiemonster still@teamt5.org
|
||||||
|
- collection/browser/get-elevation-service-for-chromium-based-browsers still@teamt5.org
|
||||||
|
- collection/get-steam-token still@teamt5.org
|
||||||
|
- nursery/persist-via-application-shimming j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-bits-job j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-print-processors-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- linking/static/touchsocket/linked-against-touchsocket still@teamt5.org
|
||||||
|
- runtime/dotnet/compiled-with-dotnet-aot still@teamt5.org
|
||||||
|
- nursery/persist-via-errorhandler-script j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-get-variable-hijack j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-iphlpapi-dll-hijack j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-lnk-shortcut j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-powershell-profile j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-windows-accessibility-tools j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-windows-terminal-profile j.j.vannielen@utwente.nl
|
||||||
|
- nursery/write-to-browser-extension-directory j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-aedebug-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-amsi-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-app-paths-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-appcertdlls-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-appx-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-autodialdll-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-autoplayhandlers-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-bootverificationprogram-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-code-signing-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-com-hijack j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-command-processor-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-contextmenuhandlers-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-cor_profiler_path-registry-value j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-default-file-association-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-disk-cleanup-handler-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-dotnet-dbgmanageddebugger-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-dotnet_startup_hooks-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-explorer-tools-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-filter-handlers-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-group-policy-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-hhctrl-com-hijack j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-htmlhelp-author-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-image-file-execution-options-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-lsa-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-natural-language-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-netsh-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-network-provider-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-path-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-print-monitors-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-rdp-startup-programs-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-silentprocessexit-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-telemetrycontroller-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-timeproviders-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-ts-initialprogram-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-userinitmprlogonscript-registry-value j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-windows-error-reporting-registry-key j.j.vannielen@utwente.nl
|
||||||
|
|
||||||
|
### Bug Fixes
|
||||||
|
|
||||||
|
- extractor: fix exception when PE extractor encounters unknown architecture #2440 @Tamir-K
|
||||||
|
- IDA Pro: rename ida to idapro module for plugin and idalib in IDA 9.0 #2453 @mr-tz
|
||||||
|
- ghidra: fix saving of base address @mr-tz
|
||||||
|
- binja: support loading raw x86/x86_64 shellcode #2489 @xusheng6
|
||||||
|
- binja: fix crash when the IL of certain functions are not available. #2249 @xusheng6
|
||||||
|
- binja: major performance improvement on the binja extractor. #1414 @xusheng6
|
||||||
|
- cape: make Process model flexible and procmemory optional to load newest reports #2466 @mr-tz
|
||||||
|
- binja: fix unit test failure by fixing up the analysis for file al-khaser_x64.exe_ #2507 @xusheng6
|
||||||
|
- binja: move the stack string detection to function level #2516 @xusheng6
|
||||||
|
- BinExport2: fix handling of incorrect thunk functions #2524 @williballenthin
|
||||||
|
- BinExport2: more precise pruning of expressions @williballenthin
|
||||||
|
- BinExport2: better handle weird expression trees from Ghidra #2528 #2530 @williballenthin
|
||||||
|
|
||||||
|
### capa Explorer Web
|
||||||
|
|
||||||
|
### capa Explorer IDA Pro plugin
|
||||||
|
|
||||||
|
- fix bug preventing saving of capa results via Save button @mr-tz
|
||||||
|
- fix saving of base address @mr-tz
|
||||||
|
|
||||||
|
### Development
|
||||||
|
- CI: use macos-13 since macos-12 is deprecated and will be removed on December 3rd, 2024 #2173 @mr-tz
|
||||||
|
- CI: update Binary Ninja version to 4.2 #2499 @xusheng6
|
||||||
|
|
||||||
|
### Raw diffs
|
||||||
|
- [capa v7.4.0...v8.0.0](https://github.com/mandiant/capa/compare/v7.4.0...v8.0.0)
|
||||||
|
- [capa-rules v7.4.0...v8.0.0](https://github.com/mandiant/capa-rules/compare/v7.4.0...v8.0.0)
|
||||||
|
|
||||||
## v7.4.0
|
## v7.4.0
|
||||||
|
|
||||||
@@ -179,6 +283,8 @@ Special thanks to our repeat and new contributors:
|
|||||||
- CI: update Binary Ninja version to 4.1 and use Python 3.9 to test it #2211 @xusheng6
|
- CI: update Binary Ninja version to 4.1 and use Python 3.9 to test it #2211 @xusheng6
|
||||||
- CI: update tests.yml workflow to exclude web and documentation files #2263 @s-ff
|
- CI: update tests.yml workflow to exclude web and documentation files #2263 @s-ff
|
||||||
- CI: update build.yml workflow to exclude web and documentation files #2270 @s-ff
|
- CI: update build.yml workflow to exclude web and documentation files #2270 @s-ff
|
||||||
|
- CI: add web releases workflow #2455 @s-ff
|
||||||
|
- CI: skip changelog.yml for dependabot PRs #2471
|
||||||
|
|
||||||
### Raw diffs
|
### Raw diffs
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
import logging
|
import logging
|
||||||
import itertools
|
import itertools
|
||||||
import collections
|
import collections
|
||||||
from typing import Any, Tuple
|
from typing import Any
|
||||||
|
|
||||||
from capa.rules import Scope, RuleSet
|
from capa.rules import Scope, RuleSet
|
||||||
from capa.engine import FeatureSet, MatchResults
|
from capa.engine import FeatureSet, MatchResults
|
||||||
@@ -64,7 +64,7 @@ def has_file_limitation(rules: RuleSet, capabilities: MatchResults, is_standalon
|
|||||||
|
|
||||||
def find_capabilities(
|
def find_capabilities(
|
||||||
ruleset: RuleSet, extractor: FeatureExtractor, disable_progress=None, **kwargs
|
ruleset: RuleSet, extractor: FeatureExtractor, disable_progress=None, **kwargs
|
||||||
) -> Tuple[MatchResults, Any]:
|
) -> tuple[MatchResults, Any]:
|
||||||
from capa.capabilities.static import find_static_capabilities
|
from capa.capabilities.static import find_static_capabilities
|
||||||
from capa.capabilities.dynamic import find_dynamic_capabilities
|
from capa.capabilities.dynamic import find_dynamic_capabilities
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
import logging
|
import logging
|
||||||
import itertools
|
import itertools
|
||||||
import collections
|
import collections
|
||||||
from typing import Any, List, Tuple
|
from typing import Any
|
||||||
|
|
||||||
import capa.perf
|
import capa.perf
|
||||||
import capa.features.freeze as frz
|
import capa.features.freeze as frz
|
||||||
@@ -24,7 +24,7 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
def find_call_capabilities(
|
def find_call_capabilities(
|
||||||
ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
||||||
) -> Tuple[FeatureSet, MatchResults]:
|
) -> tuple[FeatureSet, MatchResults]:
|
||||||
"""
|
"""
|
||||||
find matches for the given rules for the given call.
|
find matches for the given rules for the given call.
|
||||||
|
|
||||||
@@ -51,7 +51,7 @@ def find_call_capabilities(
|
|||||||
|
|
||||||
def find_thread_capabilities(
|
def find_thread_capabilities(
|
||||||
ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle, th: ThreadHandle
|
ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle, th: ThreadHandle
|
||||||
) -> Tuple[FeatureSet, MatchResults, MatchResults]:
|
) -> tuple[FeatureSet, MatchResults, MatchResults]:
|
||||||
"""
|
"""
|
||||||
find matches for the given rules within the given thread.
|
find matches for the given rules within the given thread.
|
||||||
|
|
||||||
@@ -89,7 +89,7 @@ def find_thread_capabilities(
|
|||||||
|
|
||||||
def find_process_capabilities(
|
def find_process_capabilities(
|
||||||
ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle
|
ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle
|
||||||
) -> Tuple[MatchResults, MatchResults, MatchResults, int]:
|
) -> tuple[MatchResults, MatchResults, MatchResults, int]:
|
||||||
"""
|
"""
|
||||||
find matches for the given rules within the given process.
|
find matches for the given rules within the given process.
|
||||||
|
|
||||||
@@ -127,7 +127,7 @@ def find_process_capabilities(
|
|||||||
|
|
||||||
def find_dynamic_capabilities(
|
def find_dynamic_capabilities(
|
||||||
ruleset: RuleSet, extractor: DynamicFeatureExtractor, disable_progress=None
|
ruleset: RuleSet, extractor: DynamicFeatureExtractor, disable_progress=None
|
||||||
) -> Tuple[MatchResults, Any]:
|
) -> tuple[MatchResults, Any]:
|
||||||
all_process_matches: MatchResults = collections.defaultdict(list)
|
all_process_matches: MatchResults = collections.defaultdict(list)
|
||||||
all_thread_matches: MatchResults = collections.defaultdict(list)
|
all_thread_matches: MatchResults = collections.defaultdict(list)
|
||||||
all_call_matches: MatchResults = collections.defaultdict(list)
|
all_call_matches: MatchResults = collections.defaultdict(list)
|
||||||
@@ -135,7 +135,7 @@ def find_dynamic_capabilities(
|
|||||||
feature_counts = rdoc.DynamicFeatureCounts(file=0, processes=())
|
feature_counts = rdoc.DynamicFeatureCounts(file=0, processes=())
|
||||||
|
|
||||||
assert isinstance(extractor, DynamicFeatureExtractor)
|
assert isinstance(extractor, DynamicFeatureExtractor)
|
||||||
processes: List[ProcessHandle] = list(extractor.get_processes())
|
processes: list[ProcessHandle] = list(extractor.get_processes())
|
||||||
n_processes: int = len(processes)
|
n_processes: int = len(processes)
|
||||||
|
|
||||||
with capa.helpers.CapaProgressBar(
|
with capa.helpers.CapaProgressBar(
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ import time
|
|||||||
import logging
|
import logging
|
||||||
import itertools
|
import itertools
|
||||||
import collections
|
import collections
|
||||||
from typing import Any, List, Tuple
|
from typing import Any
|
||||||
|
|
||||||
import capa.perf
|
import capa.perf
|
||||||
import capa.helpers
|
import capa.helpers
|
||||||
@@ -26,7 +26,7 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
def find_instruction_capabilities(
|
def find_instruction_capabilities(
|
||||||
ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
|
ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
|
||||||
) -> Tuple[FeatureSet, MatchResults]:
|
) -> tuple[FeatureSet, MatchResults]:
|
||||||
"""
|
"""
|
||||||
find matches for the given rules for the given instruction.
|
find matches for the given rules for the given instruction.
|
||||||
|
|
||||||
@@ -53,7 +53,7 @@ def find_instruction_capabilities(
|
|||||||
|
|
||||||
def find_basic_block_capabilities(
|
def find_basic_block_capabilities(
|
||||||
ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle
|
ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle
|
||||||
) -> Tuple[FeatureSet, MatchResults, MatchResults]:
|
) -> tuple[FeatureSet, MatchResults, MatchResults]:
|
||||||
"""
|
"""
|
||||||
find matches for the given rules within the given basic block.
|
find matches for the given rules within the given basic block.
|
||||||
|
|
||||||
@@ -93,7 +93,7 @@ def find_basic_block_capabilities(
|
|||||||
|
|
||||||
def find_code_capabilities(
|
def find_code_capabilities(
|
||||||
ruleset: RuleSet, extractor: StaticFeatureExtractor, fh: FunctionHandle
|
ruleset: RuleSet, extractor: StaticFeatureExtractor, fh: FunctionHandle
|
||||||
) -> Tuple[MatchResults, MatchResults, MatchResults, int]:
|
) -> tuple[MatchResults, MatchResults, MatchResults, int]:
|
||||||
"""
|
"""
|
||||||
find matches for the given rules within the given function.
|
find matches for the given rules within the given function.
|
||||||
|
|
||||||
@@ -131,16 +131,16 @@ def find_code_capabilities(
|
|||||||
|
|
||||||
def find_static_capabilities(
|
def find_static_capabilities(
|
||||||
ruleset: RuleSet, extractor: StaticFeatureExtractor, disable_progress=None
|
ruleset: RuleSet, extractor: StaticFeatureExtractor, disable_progress=None
|
||||||
) -> Tuple[MatchResults, Any]:
|
) -> tuple[MatchResults, Any]:
|
||||||
all_function_matches: MatchResults = collections.defaultdict(list)
|
all_function_matches: MatchResults = collections.defaultdict(list)
|
||||||
all_bb_matches: MatchResults = collections.defaultdict(list)
|
all_bb_matches: MatchResults = collections.defaultdict(list)
|
||||||
all_insn_matches: MatchResults = collections.defaultdict(list)
|
all_insn_matches: MatchResults = collections.defaultdict(list)
|
||||||
|
|
||||||
feature_counts = rdoc.StaticFeatureCounts(file=0, functions=())
|
feature_counts = rdoc.StaticFeatureCounts(file=0, functions=())
|
||||||
library_functions: Tuple[rdoc.LibraryFunction, ...] = ()
|
library_functions: tuple[rdoc.LibraryFunction, ...] = ()
|
||||||
|
|
||||||
assert isinstance(extractor, StaticFeatureExtractor)
|
assert isinstance(extractor, StaticFeatureExtractor)
|
||||||
functions: List[FunctionHandle] = list(extractor.get_functions())
|
functions: list[FunctionHandle] = list(extractor.get_functions())
|
||||||
n_funcs: int = len(functions)
|
n_funcs: int = len(functions)
|
||||||
n_libs: int = 0
|
n_libs: int = 0
|
||||||
percentage: float = 0
|
percentage: float = 0
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
import copy
|
import copy
|
||||||
import collections
|
import collections
|
||||||
from typing import TYPE_CHECKING, Set, Dict, List, Tuple, Union, Mapping, Iterable, Iterator
|
from typing import TYPE_CHECKING, Union, Mapping, Iterable, Iterator
|
||||||
|
|
||||||
import capa.perf
|
import capa.perf
|
||||||
import capa.features.common
|
import capa.features.common
|
||||||
@@ -27,7 +27,7 @@ if TYPE_CHECKING:
|
|||||||
# to collect the locations of a feature, do: `features[Number(0x10)]`
|
# to collect the locations of a feature, do: `features[Number(0x10)]`
|
||||||
#
|
#
|
||||||
# aliased here so that the type can be documented and xref'd.
|
# aliased here so that the type can be documented and xref'd.
|
||||||
FeatureSet = Dict[Feature, Set[Address]]
|
FeatureSet = dict[Feature, set[Address]]
|
||||||
|
|
||||||
|
|
||||||
class Statement:
|
class Statement:
|
||||||
@@ -94,7 +94,7 @@ class And(Statement):
|
|||||||
match if all of the children evaluate to True.
|
match if all of the children evaluate to True.
|
||||||
|
|
||||||
the order of evaluation is dictated by the property
|
the order of evaluation is dictated by the property
|
||||||
`And.children` (type: List[Statement|Feature]).
|
`And.children` (type: list[Statement|Feature]).
|
||||||
a query optimizer may safely manipulate the order of these children.
|
a query optimizer may safely manipulate the order of these children.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -127,7 +127,7 @@ class Or(Statement):
|
|||||||
match if any of the children evaluate to True.
|
match if any of the children evaluate to True.
|
||||||
|
|
||||||
the order of evaluation is dictated by the property
|
the order of evaluation is dictated by the property
|
||||||
`Or.children` (type: List[Statement|Feature]).
|
`Or.children` (type: list[Statement|Feature]).
|
||||||
a query optimizer may safely manipulate the order of these children.
|
a query optimizer may safely manipulate the order of these children.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -176,7 +176,7 @@ class Some(Statement):
|
|||||||
match if at least N of the children evaluate to True.
|
match if at least N of the children evaluate to True.
|
||||||
|
|
||||||
the order of evaluation is dictated by the property
|
the order of evaluation is dictated by the property
|
||||||
`Some.children` (type: List[Statement|Feature]).
|
`Some.children` (type: list[Statement|Feature]).
|
||||||
a query optimizer may safely manipulate the order of these children.
|
a query optimizer may safely manipulate the order of these children.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -267,7 +267,7 @@ class Subscope(Statement):
|
|||||||
# inspect(match_details)
|
# inspect(match_details)
|
||||||
#
|
#
|
||||||
# aliased here so that the type can be documented and xref'd.
|
# aliased here so that the type can be documented and xref'd.
|
||||||
MatchResults = Mapping[str, List[Tuple[Address, Result]]]
|
MatchResults = Mapping[str, list[tuple[Address, Result]]]
|
||||||
|
|
||||||
|
|
||||||
def get_rule_namespaces(rule: "capa.rules.Rule") -> Iterator[str]:
|
def get_rule_namespaces(rule: "capa.rules.Rule") -> Iterator[str]:
|
||||||
@@ -292,7 +292,7 @@ def index_rule_matches(features: FeatureSet, rule: "capa.rules.Rule", locations:
|
|||||||
features[capa.features.common.MatchedRule(namespace)].update(locations)
|
features[capa.features.common.MatchedRule(namespace)].update(locations)
|
||||||
|
|
||||||
|
|
||||||
def match(rules: List["capa.rules.Rule"], features: FeatureSet, addr: Address) -> Tuple[FeatureSet, MatchResults]:
|
def match(rules: list["capa.rules.Rule"], features: FeatureSet, addr: Address) -> tuple[FeatureSet, MatchResults]:
|
||||||
"""
|
"""
|
||||||
match the given rules against the given features,
|
match the given rules against the given features,
|
||||||
returning an updated set of features and the matches.
|
returning an updated set of features and the matches.
|
||||||
|
|||||||
@@ -6,7 +6,6 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Dict, List
|
|
||||||
|
|
||||||
from capa.helpers import assert_never
|
from capa.helpers import assert_never
|
||||||
|
|
||||||
@@ -22,7 +21,7 @@ COM_PREFIXES = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def load_com_database(com_type: ComType) -> Dict[str, List[str]]:
|
def load_com_database(com_type: ComType) -> dict[str, list[str]]:
|
||||||
# lazy load these python files since they are so large.
|
# lazy load these python files since they are so large.
|
||||||
# that is, don't load them unless a COM feature is being handled.
|
# that is, don't load them unless a COM feature is being handled.
|
||||||
import capa.features.com.classes
|
import capa.features.com.classes
|
||||||
|
|||||||
@@ -5,9 +5,8 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Dict, List
|
|
||||||
|
|
||||||
COM_CLASSES: Dict[str, List[str]] = {
|
COM_CLASSES: dict[str, list[str]] = {
|
||||||
"ClusAppWiz": ["24F97150-6689-11D1-9AA7-00C04FB93A80"],
|
"ClusAppWiz": ["24F97150-6689-11D1-9AA7-00C04FB93A80"],
|
||||||
"ClusCfgAddNodesWizard": ["BB8D141E-C00A-469F-BC5C-ECD814F0BD74"],
|
"ClusCfgAddNodesWizard": ["BB8D141E-C00A-469F-BC5C-ECD814F0BD74"],
|
||||||
"ClusCfgCreateClusterWizard": ["B929818E-F5B0-44DC-8A00-1B5F5F5AA1F0"],
|
"ClusCfgCreateClusterWizard": ["B929818E-F5B0-44DC-8A00-1B5F5F5AA1F0"],
|
||||||
|
|||||||
@@ -5,9 +5,8 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Dict, List
|
|
||||||
|
|
||||||
COM_INTERFACES: Dict[str, List[str]] = {
|
COM_INTERFACES: dict[str, list[str]] = {
|
||||||
"IClusterApplicationWizard": ["24F97151-6689-11D1-9AA7-00C04FB93A80"],
|
"IClusterApplicationWizard": ["24F97151-6689-11D1-9AA7-00C04FB93A80"],
|
||||||
"IWEExtendWizard97": ["97DEDE68-FC6B-11CF-B5F5-00A0C90AB505"],
|
"IWEExtendWizard97": ["97DEDE68-FC6B-11CF-B5F5-00A0C90AB505"],
|
||||||
"IWCWizard97Callback": ["97DEDE67-FC6B-11CF-B5F5-00A0C90AB505"],
|
"IWCWizard97Callback": ["97DEDE67-FC6B-11CF-B5F5-00A0C90AB505"],
|
||||||
@@ -16334,7 +16333,7 @@ COM_INTERFACES: Dict[str, List[str]] = {
|
|||||||
"IRcsServiceDescription": ["416437de-e78b-44c9-990f-7ede1f2a0c91"],
|
"IRcsServiceDescription": ["416437de-e78b-44c9-990f-7ede1f2a0c91"],
|
||||||
"IRcsServiceKindSupportedChangedEventArgs": ["f47ea244-e783-4866-b3a7-4e5ccf023070"],
|
"IRcsServiceKindSupportedChangedEventArgs": ["f47ea244-e783-4866-b3a7-4e5ccf023070"],
|
||||||
"IRcsServiceStatusChangedArgs": ["661ae45a-412a-460d-bdd4-dd8ea3c15583"],
|
"IRcsServiceStatusChangedArgs": ["661ae45a-412a-460d-bdd4-dd8ea3c15583"],
|
||||||
"IRcsServiceTuple": ["ce17a39b-2e8b-41af-b5a9-5cb072cc373c"],
|
"IRcsServicetuple": ["ce17a39b-2e8b-41af-b5a9-5cb072cc373c"],
|
||||||
"IRcsSubscriptionReceivedArgs": ["04eaf06d-42bc-46cc-a637-eeb3a8723fe4"],
|
"IRcsSubscriptionReceivedArgs": ["04eaf06d-42bc-46cc-a637-eeb3a8723fe4"],
|
||||||
"IRcsTransport": ["fea34759-f37c-4319-8546-ec84d21d30ff"],
|
"IRcsTransport": ["fea34759-f37c-4319-8546-ec84d21d30ff"],
|
||||||
"IRcsTransportConfiguration": ["1fccb102-2472-4bb9-9988-c1211c83e8a9"],
|
"IRcsTransportConfiguration": ["1fccb102-2472-4bb9-9988-c1211c83e8a9"],
|
||||||
|
|||||||
@@ -9,10 +9,9 @@
|
|||||||
import re
|
import re
|
||||||
import abc
|
import abc
|
||||||
import codecs
|
import codecs
|
||||||
import typing
|
|
||||||
import logging
|
import logging
|
||||||
import collections
|
import collections
|
||||||
from typing import TYPE_CHECKING, Set, Dict, List, Union, Optional
|
from typing import TYPE_CHECKING, Union, Optional
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
# circular import, otherwise
|
# circular import, otherwise
|
||||||
@@ -79,8 +78,8 @@ class Result:
|
|||||||
self,
|
self,
|
||||||
success: bool,
|
success: bool,
|
||||||
statement: Union["capa.engine.Statement", "Feature"],
|
statement: Union["capa.engine.Statement", "Feature"],
|
||||||
children: List["Result"],
|
children: list["Result"],
|
||||||
locations: Optional[Set[Address]] = None,
|
locations: Optional[set[Address]] = None,
|
||||||
):
|
):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.success = success
|
self.success = success
|
||||||
@@ -213,7 +212,7 @@ class Substring(String):
|
|||||||
|
|
||||||
# mapping from string value to list of locations.
|
# mapping from string value to list of locations.
|
||||||
# will unique the locations later on.
|
# will unique the locations later on.
|
||||||
matches: typing.DefaultDict[str, Set[Address]] = collections.defaultdict(set)
|
matches: collections.defaultdict[str, set[Address]] = collections.defaultdict(set)
|
||||||
|
|
||||||
assert isinstance(self.value, str)
|
assert isinstance(self.value, str)
|
||||||
for feature, locations in features.items():
|
for feature, locations in features.items():
|
||||||
@@ -261,7 +260,7 @@ class _MatchedSubstring(Substring):
|
|||||||
note: this type should only ever be constructed by `Substring.evaluate()`. it is not part of the public API.
|
note: this type should only ever be constructed by `Substring.evaluate()`. it is not part of the public API.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, substring: Substring, matches: Dict[str, Set[Address]]):
|
def __init__(self, substring: Substring, matches: dict[str, set[Address]]):
|
||||||
"""
|
"""
|
||||||
args:
|
args:
|
||||||
substring: the substring feature that matches.
|
substring: the substring feature that matches.
|
||||||
@@ -305,7 +304,7 @@ class Regex(String):
|
|||||||
|
|
||||||
# mapping from string value to list of locations.
|
# mapping from string value to list of locations.
|
||||||
# will unique the locations later on.
|
# will unique the locations later on.
|
||||||
matches: typing.DefaultDict[str, Set[Address]] = collections.defaultdict(set)
|
matches: collections.defaultdict[str, set[Address]] = collections.defaultdict(set)
|
||||||
|
|
||||||
for feature, locations in features.items():
|
for feature, locations in features.items():
|
||||||
if not isinstance(feature, (String,)):
|
if not isinstance(feature, (String,)):
|
||||||
@@ -353,7 +352,7 @@ class _MatchedRegex(Regex):
|
|||||||
note: this type should only ever be constructed by `Regex.evaluate()`. it is not part of the public API.
|
note: this type should only ever be constructed by `Regex.evaluate()`. it is not part of the public API.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, regex: Regex, matches: Dict[str, Set[Address]]):
|
def __init__(self, regex: Regex, matches: dict[str, set[Address]]):
|
||||||
"""
|
"""
|
||||||
args:
|
args:
|
||||||
regex: the regex feature that matches.
|
regex: the regex feature that matches.
|
||||||
@@ -467,6 +466,7 @@ FORMAT_VMRAY = "vmray"
|
|||||||
FORMAT_BINEXPORT2 = "binexport2"
|
FORMAT_BINEXPORT2 = "binexport2"
|
||||||
FORMAT_FREEZE = "freeze"
|
FORMAT_FREEZE = "freeze"
|
||||||
FORMAT_RESULT = "result"
|
FORMAT_RESULT = "result"
|
||||||
|
FORMAT_BINJA_DB = "binja_database"
|
||||||
STATIC_FORMATS = {
|
STATIC_FORMATS = {
|
||||||
FORMAT_SC32,
|
FORMAT_SC32,
|
||||||
FORMAT_SC64,
|
FORMAT_SC64,
|
||||||
@@ -476,6 +476,7 @@ STATIC_FORMATS = {
|
|||||||
FORMAT_FREEZE,
|
FORMAT_FREEZE,
|
||||||
FORMAT_RESULT,
|
FORMAT_RESULT,
|
||||||
FORMAT_BINEXPORT2,
|
FORMAT_BINEXPORT2,
|
||||||
|
FORMAT_BINJA_DB,
|
||||||
}
|
}
|
||||||
DYNAMIC_FORMATS = {
|
DYNAMIC_FORMATS = {
|
||||||
FORMAT_CAPE,
|
FORMAT_CAPE,
|
||||||
|
|||||||
@@ -11,13 +11,9 @@ import hashlib
|
|||||||
import dataclasses
|
import dataclasses
|
||||||
from copy import copy
|
from copy import copy
|
||||||
from types import MethodType
|
from types import MethodType
|
||||||
from typing import Any, Set, Dict, Tuple, Union, Iterator
|
from typing import Any, Union, Iterator, TypeAlias
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
# TODO(williballenthin): use typing.TypeAlias directly when Python 3.9 is deprecated
|
|
||||||
# https://github.com/mandiant/capa/issues/1699
|
|
||||||
from typing_extensions import TypeAlias
|
|
||||||
|
|
||||||
import capa.features.address
|
import capa.features.address
|
||||||
from capa.features.common import Feature
|
from capa.features.common import Feature
|
||||||
from capa.features.address import Address, ThreadAddress, ProcessAddress, DynamicCallAddress, AbsoluteVirtualAddress
|
from capa.features.address import Address, ThreadAddress, ProcessAddress, DynamicCallAddress, AbsoluteVirtualAddress
|
||||||
@@ -59,7 +55,7 @@ class FunctionHandle:
|
|||||||
|
|
||||||
address: Address
|
address: Address
|
||||||
inner: Any
|
inner: Any
|
||||||
ctx: Dict[str, Any] = dataclasses.field(default_factory=dict)
|
ctx: dict[str, Any] = dataclasses.field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -135,7 +131,7 @@ class StaticFeatureExtractor:
|
|||||||
return self._sample_hashes
|
return self._sample_hashes
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract features found at every scope ("global").
|
extract features found at every scope ("global").
|
||||||
|
|
||||||
@@ -146,12 +142,12 @@ class StaticFeatureExtractor:
|
|||||||
print('0x%x: %s', va, feature)
|
print('0x%x: %s', va, feature)
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
Tuple[Feature, Address]: feature and its location
|
tuple[Feature, Address]: feature and its location
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract file-scope features.
|
extract file-scope features.
|
||||||
|
|
||||||
@@ -162,7 +158,7 @@ class StaticFeatureExtractor:
|
|||||||
print('0x%x: %s', va, feature)
|
print('0x%x: %s', va, feature)
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
Tuple[Feature, Address]: feature and its location
|
tuple[Feature, Address]: feature and its location
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@@ -211,7 +207,7 @@ class StaticFeatureExtractor:
|
|||||||
raise KeyError(addr)
|
raise KeyError(addr)
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_function_features(self, f: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_function_features(self, f: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract function-scope features.
|
extract function-scope features.
|
||||||
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
||||||
@@ -227,7 +223,7 @@ class StaticFeatureExtractor:
|
|||||||
f [FunctionHandle]: an opaque value previously fetched from `.get_functions()`.
|
f [FunctionHandle]: an opaque value previously fetched from `.get_functions()`.
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
Tuple[Feature, Address]: feature and its location
|
tuple[Feature, Address]: feature and its location
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@@ -240,7 +236,7 @@ class StaticFeatureExtractor:
|
|||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_basic_block_features(self, f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_basic_block_features(self, f: FunctionHandle, bb: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract basic block-scope features.
|
extract basic block-scope features.
|
||||||
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
||||||
@@ -258,7 +254,7 @@ class StaticFeatureExtractor:
|
|||||||
bb [BBHandle]: an opaque value previously fetched from `.get_basic_blocks()`.
|
bb [BBHandle]: an opaque value previously fetched from `.get_basic_blocks()`.
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
Tuple[Feature, Address]: feature and its location
|
tuple[Feature, Address]: feature and its location
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@@ -273,7 +269,7 @@ class StaticFeatureExtractor:
|
|||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_insn_features(
|
def extract_insn_features(
|
||||||
self, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
|
self, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract instruction-scope features.
|
extract instruction-scope features.
|
||||||
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
||||||
@@ -293,12 +289,12 @@ class StaticFeatureExtractor:
|
|||||||
insn [InsnHandle]: an opaque value previously fetched from `.get_instructions()`.
|
insn [InsnHandle]: an opaque value previously fetched from `.get_instructions()`.
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
Tuple[Feature, Address]: feature and its location
|
tuple[Feature, Address]: feature and its location
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
|
||||||
def FunctionFilter(extractor: StaticFeatureExtractor, functions: Set) -> StaticFeatureExtractor:
|
def FunctionFilter(extractor: StaticFeatureExtractor, functions: set) -> StaticFeatureExtractor:
|
||||||
original_get_functions = extractor.get_functions
|
original_get_functions = extractor.get_functions
|
||||||
|
|
||||||
def filtered_get_functions(self):
|
def filtered_get_functions(self):
|
||||||
@@ -387,7 +383,7 @@ class DynamicFeatureExtractor:
|
|||||||
return self._sample_hashes
|
return self._sample_hashes
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract features found at every scope ("global").
|
extract features found at every scope ("global").
|
||||||
|
|
||||||
@@ -398,12 +394,12 @@ class DynamicFeatureExtractor:
|
|||||||
print(addr, feature)
|
print(addr, feature)
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
Tuple[Feature, Address]: feature and its location
|
tuple[Feature, Address]: feature and its location
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract file-scope features.
|
extract file-scope features.
|
||||||
|
|
||||||
@@ -414,7 +410,7 @@ class DynamicFeatureExtractor:
|
|||||||
print(addr, feature)
|
print(addr, feature)
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
Tuple[Feature, Address]: feature and its location
|
tuple[Feature, Address]: feature and its location
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@@ -426,7 +422,7 @@ class DynamicFeatureExtractor:
|
|||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
Yields all the features of a process. These include:
|
Yields all the features of a process. These include:
|
||||||
- file features of the process' image
|
- file features of the process' image
|
||||||
@@ -449,7 +445,7 @@ class DynamicFeatureExtractor:
|
|||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
Yields all the features of a thread. These include:
|
Yields all the features of a thread. These include:
|
||||||
- sequenced api traces
|
- sequenced api traces
|
||||||
@@ -466,7 +462,7 @@ class DynamicFeatureExtractor:
|
|||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_call_features(
|
def extract_call_features(
|
||||||
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
Yields all features of a call. These include:
|
Yields all features of a call. These include:
|
||||||
- api name
|
- api name
|
||||||
@@ -485,7 +481,7 @@ class DynamicFeatureExtractor:
|
|||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
|
||||||
def ProcessFilter(extractor: DynamicFeatureExtractor, processes: Set) -> DynamicFeatureExtractor:
|
def ProcessFilter(extractor: DynamicFeatureExtractor, processes: set) -> DynamicFeatureExtractor:
|
||||||
original_get_processes = extractor.get_processes
|
original_get_processes = extractor.get_processes
|
||||||
|
|
||||||
def filtered_get_processes(self):
|
def filtered_get_processes(self):
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ import io
|
|||||||
import hashlib
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
import contextlib
|
import contextlib
|
||||||
from typing import Set, Dict, List, Tuple, Iterator
|
from typing import Iterator
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
@@ -51,13 +51,13 @@ def compute_common_prefix_length(m: str, n: str) -> int:
|
|||||||
return len(m)
|
return len(m)
|
||||||
|
|
||||||
|
|
||||||
def get_sample_from_binexport2(input_file: Path, be2: BinExport2, search_paths: List[Path]) -> Path:
|
def get_sample_from_binexport2(input_file: Path, be2: BinExport2, search_paths: list[Path]) -> Path:
|
||||||
"""attempt to find the sample file, given a BinExport2 file.
|
"""attempt to find the sample file, given a BinExport2 file.
|
||||||
|
|
||||||
searches in the same directory as the BinExport2 file, and then in search_paths.
|
searches in the same directory as the BinExport2 file, and then in search_paths.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def filename_similarity_key(p: Path) -> Tuple[int, str]:
|
def filename_similarity_key(p: Path) -> tuple[int, str]:
|
||||||
# note closure over input_file.
|
# note closure over input_file.
|
||||||
# sort first by length of common prefix, then by name (for stability)
|
# sort first by length of common prefix, then by name (for stability)
|
||||||
return (compute_common_prefix_length(p.name, input_file.name), p.name)
|
return (compute_common_prefix_length(p.name, input_file.name), p.name)
|
||||||
@@ -65,7 +65,7 @@ def get_sample_from_binexport2(input_file: Path, be2: BinExport2, search_paths:
|
|||||||
wanted_sha256: str = be2.meta_information.executable_id.lower()
|
wanted_sha256: str = be2.meta_information.executable_id.lower()
|
||||||
|
|
||||||
input_directory: Path = input_file.parent
|
input_directory: Path = input_file.parent
|
||||||
siblings: List[Path] = [p for p in input_directory.iterdir() if p.is_file()]
|
siblings: list[Path] = [p for p in input_directory.iterdir() if p.is_file()]
|
||||||
siblings.sort(key=filename_similarity_key, reverse=True)
|
siblings.sort(key=filename_similarity_key, reverse=True)
|
||||||
for sibling in siblings:
|
for sibling in siblings:
|
||||||
# e.g. with open IDA files in the same directory on Windows
|
# e.g. with open IDA files in the same directory on Windows
|
||||||
@@ -74,7 +74,7 @@ def get_sample_from_binexport2(input_file: Path, be2: BinExport2, search_paths:
|
|||||||
return sibling
|
return sibling
|
||||||
|
|
||||||
for search_path in search_paths:
|
for search_path in search_paths:
|
||||||
candidates: List[Path] = [p for p in search_path.iterdir() if p.is_file()]
|
candidates: list[Path] = [p for p in search_path.iterdir() if p.is_file()]
|
||||||
candidates.sort(key=filename_similarity_key, reverse=True)
|
candidates.sort(key=filename_similarity_key, reverse=True)
|
||||||
for candidate in candidates:
|
for candidate in candidates:
|
||||||
with contextlib.suppress(PermissionError):
|
with contextlib.suppress(PermissionError):
|
||||||
@@ -88,27 +88,27 @@ class BinExport2Index:
|
|||||||
def __init__(self, be2: BinExport2):
|
def __init__(self, be2: BinExport2):
|
||||||
self.be2: BinExport2 = be2
|
self.be2: BinExport2 = be2
|
||||||
|
|
||||||
self.callers_by_vertex_index: Dict[int, List[int]] = defaultdict(list)
|
self.callers_by_vertex_index: dict[int, list[int]] = defaultdict(list)
|
||||||
self.callees_by_vertex_index: Dict[int, List[int]] = defaultdict(list)
|
self.callees_by_vertex_index: dict[int, list[int]] = defaultdict(list)
|
||||||
|
|
||||||
# note: flow graph != call graph (vertex)
|
# note: flow graph != call graph (vertex)
|
||||||
self.flow_graph_index_by_address: Dict[int, int] = {}
|
self.flow_graph_index_by_address: dict[int, int] = {}
|
||||||
self.flow_graph_address_by_index: Dict[int, int] = {}
|
self.flow_graph_address_by_index: dict[int, int] = {}
|
||||||
|
|
||||||
# edges that come from the given basic block
|
# edges that come from the given basic block
|
||||||
self.source_edges_by_basic_block_index: Dict[int, List[BinExport2.FlowGraph.Edge]] = defaultdict(list)
|
self.source_edges_by_basic_block_index: dict[int, list[BinExport2.FlowGraph.Edge]] = defaultdict(list)
|
||||||
# edges that end up at the given basic block
|
# edges that end up at the given basic block
|
||||||
self.target_edges_by_basic_block_index: Dict[int, List[BinExport2.FlowGraph.Edge]] = defaultdict(list)
|
self.target_edges_by_basic_block_index: dict[int, list[BinExport2.FlowGraph.Edge]] = defaultdict(list)
|
||||||
|
|
||||||
self.vertex_index_by_address: Dict[int, int] = {}
|
self.vertex_index_by_address: dict[int, int] = {}
|
||||||
|
|
||||||
self.data_reference_index_by_source_instruction_index: Dict[int, List[int]] = defaultdict(list)
|
self.data_reference_index_by_source_instruction_index: dict[int, list[int]] = defaultdict(list)
|
||||||
self.data_reference_index_by_target_address: Dict[int, List[int]] = defaultdict(list)
|
self.data_reference_index_by_target_address: dict[int, list[int]] = defaultdict(list)
|
||||||
self.string_reference_index_by_source_instruction_index: Dict[int, List[int]] = defaultdict(list)
|
self.string_reference_index_by_source_instruction_index: dict[int, list[int]] = defaultdict(list)
|
||||||
|
|
||||||
self.insn_address_by_index: Dict[int, int] = {}
|
self.insn_address_by_index: dict[int, int] = {}
|
||||||
self.insn_index_by_address: Dict[int, int] = {}
|
self.insn_index_by_address: dict[int, int] = {}
|
||||||
self.insn_by_address: Dict[int, BinExport2.Instruction] = {}
|
self.insn_by_address: dict[int, BinExport2.Instruction] = {}
|
||||||
|
|
||||||
# must index instructions first
|
# must index instructions first
|
||||||
self._index_insn_addresses()
|
self._index_insn_addresses()
|
||||||
@@ -208,7 +208,7 @@ class BinExport2Index:
|
|||||||
|
|
||||||
def basic_block_instructions(
|
def basic_block_instructions(
|
||||||
self, basic_block: BinExport2.BasicBlock
|
self, basic_block: BinExport2.BasicBlock
|
||||||
) -> Iterator[Tuple[int, BinExport2.Instruction, int]]:
|
) -> Iterator[tuple[int, BinExport2.Instruction, int]]:
|
||||||
"""
|
"""
|
||||||
For a given basic block, enumerate the instruction indices,
|
For a given basic block, enumerate the instruction indices,
|
||||||
the instruction instances, and their addresses.
|
the instruction instances, and their addresses.
|
||||||
@@ -253,7 +253,7 @@ class BinExport2Analysis:
|
|||||||
self.idx: BinExport2Index = idx
|
self.idx: BinExport2Index = idx
|
||||||
self.buf: bytes = buf
|
self.buf: bytes = buf
|
||||||
self.base_address: int = 0
|
self.base_address: int = 0
|
||||||
self.thunks: Dict[int, int] = {}
|
self.thunks: dict[int, int] = {}
|
||||||
|
|
||||||
self._find_base_address()
|
self._find_base_address()
|
||||||
self._compute_thunks()
|
self._compute_thunks()
|
||||||
@@ -279,12 +279,14 @@ class BinExport2Analysis:
|
|||||||
|
|
||||||
curr_idx: int = idx
|
curr_idx: int = idx
|
||||||
for _ in range(capa.features.common.THUNK_CHAIN_DEPTH_DELTA):
|
for _ in range(capa.features.common.THUNK_CHAIN_DEPTH_DELTA):
|
||||||
thunk_callees: List[int] = self.idx.callees_by_vertex_index[curr_idx]
|
thunk_callees: list[int] = self.idx.callees_by_vertex_index[curr_idx]
|
||||||
# if this doesn't hold, then it doesn't seem like this is a thunk,
|
# If this doesn't hold, then it doesn't seem like this is a thunk,
|
||||||
# because either, len is:
|
# because either, len is:
|
||||||
# 0 and the thunk doesn't point to anything, or
|
# 0 and the thunk doesn't point to anything or is indirect, like `call eax`, or
|
||||||
# >1 and the thunk may end up at many functions.
|
# >1 and the thunk may end up at many functions.
|
||||||
assert len(thunk_callees) == 1, f"thunk @ {hex(addr)} failed"
|
# In any case, this doesn't appear to be the sort of thunk we're looking for.
|
||||||
|
if len(thunk_callees) != 1:
|
||||||
|
break
|
||||||
|
|
||||||
thunked_idx: int = thunk_callees[0]
|
thunked_idx: int = thunk_callees[0]
|
||||||
thunked_vertex: BinExport2.CallGraph.Vertex = self.be2.call_graph.vertex[thunked_idx]
|
thunked_vertex: BinExport2.CallGraph.Vertex = self.be2.call_graph.vertex[thunked_idx]
|
||||||
@@ -324,7 +326,7 @@ class AddressNotMappedError(ReadMemoryError): ...
|
|||||||
@dataclass
|
@dataclass
|
||||||
class AddressSpace:
|
class AddressSpace:
|
||||||
base_address: int
|
base_address: int
|
||||||
memory_regions: Tuple[MemoryRegion, ...]
|
memory_regions: tuple[MemoryRegion, ...]
|
||||||
|
|
||||||
def read_memory(self, address: int, length: int) -> bytes:
|
def read_memory(self, address: int, length: int) -> bytes:
|
||||||
rva: int = address - self.base_address
|
rva: int = address - self.base_address
|
||||||
@@ -337,7 +339,7 @@ class AddressSpace:
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_pe(cls, pe: PE, base_address: int):
|
def from_pe(cls, pe: PE, base_address: int):
|
||||||
regions: List[MemoryRegion] = []
|
regions: list[MemoryRegion] = []
|
||||||
for section in pe.sections:
|
for section in pe.sections:
|
||||||
address: int = section.VirtualAddress
|
address: int = section.VirtualAddress
|
||||||
size: int = section.Misc_VirtualSize
|
size: int = section.Misc_VirtualSize
|
||||||
@@ -355,7 +357,7 @@ class AddressSpace:
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_elf(cls, elf: ELFFile, base_address: int):
|
def from_elf(cls, elf: ELFFile, base_address: int):
|
||||||
regions: List[MemoryRegion] = []
|
regions: list[MemoryRegion] = []
|
||||||
|
|
||||||
# ELF segments are for runtime data,
|
# ELF segments are for runtime data,
|
||||||
# ELF sections are for link-time data.
|
# ELF sections are for link-time data.
|
||||||
@@ -401,9 +403,9 @@ class AnalysisContext:
|
|||||||
class FunctionContext:
|
class FunctionContext:
|
||||||
ctx: AnalysisContext
|
ctx: AnalysisContext
|
||||||
flow_graph_index: int
|
flow_graph_index: int
|
||||||
format: Set[str]
|
format: set[str]
|
||||||
os: Set[str]
|
os: set[str]
|
||||||
arch: Set[str]
|
arch: set[str]
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import List, Tuple, Iterator, Optional
|
from typing import Iterator, Optional
|
||||||
|
|
||||||
import capa.features.extractors.binexport2.helpers
|
import capa.features.extractors.binexport2.helpers
|
||||||
from capa.features.insn import MAX_STRUCTURE_SIZE, Number, Offset, OperandNumber, OperandOffset
|
from capa.features.insn import MAX_STRUCTURE_SIZE, Number, Offset, OperandNumber, OperandOffset
|
||||||
@@ -30,7 +30,7 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
def extract_insn_number_features(
|
def extract_insn_number_features(
|
||||||
fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
|
|
||||||
@@ -91,7 +91,7 @@ OFFSET_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
|||||||
|
|
||||||
def extract_insn_offset_features(
|
def extract_insn_offset_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
|
|
||||||
@@ -120,7 +120,7 @@ NZXOR_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
|||||||
|
|
||||||
def extract_insn_nzxor_characteristic_features(
|
def extract_insn_nzxor_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
be2: BinExport2 = fhi.ctx.be2
|
be2: BinExport2 = fhi.ctx.be2
|
||||||
@@ -131,7 +131,7 @@ def extract_insn_nzxor_characteristic_features(
|
|||||||
instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index]
|
instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index]
|
||||||
# guaranteed to be simple int/reg operands
|
# guaranteed to be simple int/reg operands
|
||||||
# so we don't have to realize the tree/list.
|
# so we don't have to realize the tree/list.
|
||||||
operands: List[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index]
|
operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index]
|
||||||
|
|
||||||
if operands[1] != operands[2]:
|
if operands[1] != operands[2]:
|
||||||
yield Characteristic("nzxor"), ih.address
|
yield Characteristic("nzxor"), ih.address
|
||||||
@@ -146,7 +146,7 @@ INDIRECT_CALL_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
|||||||
|
|
||||||
def extract_function_indirect_call_characteristic_features(
|
def extract_function_indirect_call_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
be2: BinExport2 = fhi.ctx.be2
|
be2: BinExport2 = fhi.ctx.be2
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import List, Optional
|
from typing import Optional
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
from capa.features.extractors.binexport2.helpers import get_operand_expressions
|
from capa.features.extractors.binexport2.helpers import get_operand_expressions
|
||||||
@@ -32,7 +32,7 @@ def get_operand_phrase_info(be2: BinExport2, operand: BinExport2.Operand) -> Opt
|
|||||||
# Base: Any general purpose register
|
# Base: Any general purpose register
|
||||||
# Displacement: An integral offset
|
# Displacement: An integral offset
|
||||||
|
|
||||||
expressions: List[BinExport2.Expression] = get_operand_expressions(be2, operand)
|
expressions: list[BinExport2.Expression] = get_operand_expressions(be2, operand)
|
||||||
|
|
||||||
# skip expression up to and including BinExport2.Expression.DEREFERENCE, assume caller
|
# skip expression up to and including BinExport2.Expression.DEREFERENCE, assume caller
|
||||||
# has checked for BinExport2.Expression.DEREFERENCE
|
# has checked for BinExport2.Expression.DEREFERENCE
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import List, Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import capa.features.extractors.strings
|
import capa.features.extractors.strings
|
||||||
import capa.features.extractors.binexport2.helpers
|
import capa.features.extractors.binexport2.helpers
|
||||||
@@ -63,7 +63,7 @@ NUMBER_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
|||||||
|
|
||||||
def extract_insn_number_features(
|
def extract_insn_number_features(
|
||||||
fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
|
|
||||||
@@ -123,7 +123,7 @@ OFFSET_ZERO_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
|||||||
|
|
||||||
def extract_insn_offset_features(
|
def extract_insn_offset_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
|
|
||||||
@@ -161,7 +161,7 @@ def is_security_cookie(
|
|||||||
|
|
||||||
# security cookie check should use SP or BP
|
# security cookie check should use SP or BP
|
||||||
op1: BinExport2.Operand = be2.operand[instruction.operand_index[1]]
|
op1: BinExport2.Operand = be2.operand[instruction.operand_index[1]]
|
||||||
op1_exprs: List[BinExport2.Expression] = [be2.expression[expr_i] for expr_i in op1.expression_index]
|
op1_exprs: list[BinExport2.Expression] = [be2.expression[expr_i] for expr_i in op1.expression_index]
|
||||||
if all(expr.symbol.lower() not in ("bp", "esp", "ebp", "rbp", "rsp") for expr in op1_exprs):
|
if all(expr.symbol.lower() not in ("bp", "esp", "ebp", "rbp", "rsp") for expr in op1_exprs):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -192,7 +192,7 @@ NZXOR_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
|||||||
|
|
||||||
def extract_insn_nzxor_characteristic_features(
|
def extract_insn_nzxor_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse non-zeroing XOR instruction from the given instruction.
|
parse non-zeroing XOR instruction from the given instruction.
|
||||||
ignore expected non-zeroing XORs, e.g. security cookies.
|
ignore expected non-zeroing XORs, e.g. security cookies.
|
||||||
@@ -209,7 +209,7 @@ def extract_insn_nzxor_characteristic_features(
|
|||||||
instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index]
|
instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index]
|
||||||
# guaranteed to be simple int/reg operands
|
# guaranteed to be simple int/reg operands
|
||||||
# so we don't have to realize the tree/list.
|
# so we don't have to realize the tree/list.
|
||||||
operands: List[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index]
|
operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index]
|
||||||
|
|
||||||
if operands[0] == operands[1]:
|
if operands[0] == operands[1]:
|
||||||
return
|
return
|
||||||
@@ -236,7 +236,7 @@ INDIRECT_CALL_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
|||||||
|
|
||||||
def extract_function_indirect_call_characteristic_features(
|
def extract_function_indirect_call_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
be2: BinExport2 = fhi.ctx.be2
|
be2: BinExport2 = fhi.ctx.be2
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
from typing import List, Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from capa.features.common import Feature, Characteristic
|
from capa.features.common import Feature, Characteristic
|
||||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||||
@@ -16,20 +16,20 @@ from capa.features.extractors.base_extractor import BBHandle, FunctionHandle
|
|||||||
from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
|
from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
|
||||||
|
|
||||||
|
|
||||||
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
bbi: BasicBlockContext = bbh.inner
|
bbi: BasicBlockContext = bbh.inner
|
||||||
|
|
||||||
idx = fhi.ctx.idx
|
idx = fhi.ctx.idx
|
||||||
|
|
||||||
basic_block_index: int = bbi.basic_block_index
|
basic_block_index: int = bbi.basic_block_index
|
||||||
target_edges: List[BinExport2.FlowGraph.Edge] = idx.target_edges_by_basic_block_index[basic_block_index]
|
target_edges: list[BinExport2.FlowGraph.Edge] = idx.target_edges_by_basic_block_index[basic_block_index]
|
||||||
if basic_block_index in (e.source_basic_block_index for e in target_edges):
|
if basic_block_index in (e.source_basic_block_index for e in target_edges):
|
||||||
basic_block_address: int = idx.get_basic_block_address(basic_block_index)
|
basic_block_address: int = idx.get_basic_block_address(basic_block_index)
|
||||||
yield Characteristic("tight loop"), AbsoluteVirtualAddress(basic_block_address)
|
yield Characteristic("tight loop"), AbsoluteVirtualAddress(basic_block_address)
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract basic block features"""
|
"""extract basic block features"""
|
||||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||||
for feature, addr in bb_handler(fh, bbh):
|
for feature, addr in bb_handler(fh, bbh):
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import Set, List, Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import capa.features.extractors.elf
|
import capa.features.extractors.elf
|
||||||
import capa.features.extractors.common
|
import capa.features.extractors.common
|
||||||
@@ -48,14 +48,14 @@ class BinExport2FeatureExtractor(StaticFeatureExtractor):
|
|||||||
address_space: AddressSpace = AddressSpace.from_buf(buf, self.analysis.base_address)
|
address_space: AddressSpace = AddressSpace.from_buf(buf, self.analysis.base_address)
|
||||||
self.ctx: AnalysisContext = AnalysisContext(self.buf, self.be2, self.idx, self.analysis, address_space)
|
self.ctx: AnalysisContext = AnalysisContext(self.buf, self.be2, self.idx, self.analysis, address_space)
|
||||||
|
|
||||||
self.global_features: List[Tuple[Feature, Address]] = []
|
self.global_features: list[tuple[Feature, Address]] = []
|
||||||
self.global_features.extend(list(capa.features.extractors.common.extract_format(self.buf)))
|
self.global_features.extend(list(capa.features.extractors.common.extract_format(self.buf)))
|
||||||
self.global_features.extend(list(capa.features.extractors.common.extract_os(self.buf)))
|
self.global_features.extend(list(capa.features.extractors.common.extract_os(self.buf)))
|
||||||
self.global_features.extend(list(capa.features.extractors.common.extract_arch(self.buf)))
|
self.global_features.extend(list(capa.features.extractors.common.extract_arch(self.buf)))
|
||||||
|
|
||||||
self.format: Set[str] = set()
|
self.format: set[str] = set()
|
||||||
self.os: Set[str] = set()
|
self.os: set[str] = set()
|
||||||
self.arch: Set[str] = set()
|
self.arch: set[str] = set()
|
||||||
|
|
||||||
for feature, _ in self.global_features:
|
for feature, _ in self.global_features:
|
||||||
assert isinstance(feature.value, str)
|
assert isinstance(feature.value, str)
|
||||||
@@ -72,10 +72,10 @@ class BinExport2FeatureExtractor(StaticFeatureExtractor):
|
|||||||
def get_base_address(self) -> AbsoluteVirtualAddress:
|
def get_base_address(self) -> AbsoluteVirtualAddress:
|
||||||
return AbsoluteVirtualAddress(self.analysis.base_address)
|
return AbsoluteVirtualAddress(self.analysis.base_address)
|
||||||
|
|
||||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from self.global_features
|
yield from self.global_features
|
||||||
|
|
||||||
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.binexport2.file.extract_features(self.be2, self.buf)
|
yield from capa.features.extractors.binexport2.file.extract_features(self.be2, self.buf)
|
||||||
|
|
||||||
def get_functions(self) -> Iterator[FunctionHandle]:
|
def get_functions(self) -> Iterator[FunctionHandle]:
|
||||||
@@ -97,7 +97,7 @@ class BinExport2FeatureExtractor(StaticFeatureExtractor):
|
|||||||
inner=FunctionContext(self.ctx, flow_graph_index, self.format, self.os, self.arch),
|
inner=FunctionContext(self.ctx, flow_graph_index, self.format, self.os, self.arch),
|
||||||
)
|
)
|
||||||
|
|
||||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.binexport2.function.extract_features(fh)
|
yield from capa.features.extractors.binexport2.function.extract_features(fh)
|
||||||
|
|
||||||
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
||||||
@@ -112,7 +112,7 @@ class BinExport2FeatureExtractor(StaticFeatureExtractor):
|
|||||||
inner=BasicBlockContext(basic_block_index),
|
inner=BasicBlockContext(basic_block_index),
|
||||||
)
|
)
|
||||||
|
|
||||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.binexport2.basicblock.extract_features(fh, bbh)
|
yield from capa.features.extractors.binexport2.basicblock.extract_features(fh, bbh)
|
||||||
|
|
||||||
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
||||||
@@ -126,5 +126,5 @@ class BinExport2FeatureExtractor(StaticFeatureExtractor):
|
|||||||
|
|
||||||
def extract_insn_features(
|
def extract_insn_features(
|
||||||
self, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
self, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.binexport2.insn.extract_features(fh, bbh, ih)
|
yield from capa.features.extractors.binexport2.insn.extract_features(fh, bbh, ih)
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import io
|
import io
|
||||||
import logging
|
import logging
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import pefile
|
import pefile
|
||||||
from elftools.elf.elffile import ELFFile
|
from elftools.elf.elffile import ELFFile
|
||||||
@@ -23,7 +23,7 @@ from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_export_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_export_names(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
||||||
if buf.startswith(capa.features.extractors.common.MATCH_PE):
|
if buf.startswith(capa.features.extractors.common.MATCH_PE):
|
||||||
pe: pefile.PE = pefile.PE(data=buf)
|
pe: pefile.PE = pefile.PE(data=buf)
|
||||||
yield from capa.features.extractors.pefile.extract_file_export_names(pe)
|
yield from capa.features.extractors.pefile.extract_file_export_names(pe)
|
||||||
@@ -34,7 +34,7 @@ def extract_file_export_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Fe
|
|||||||
logger.warning("unsupported format")
|
logger.warning("unsupported format")
|
||||||
|
|
||||||
|
|
||||||
def extract_file_import_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_import_names(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
||||||
if buf.startswith(capa.features.extractors.common.MATCH_PE):
|
if buf.startswith(capa.features.extractors.common.MATCH_PE):
|
||||||
pe: pefile.PE = pefile.PE(data=buf)
|
pe: pefile.PE = pefile.PE(data=buf)
|
||||||
yield from capa.features.extractors.pefile.extract_file_import_names(pe)
|
yield from capa.features.extractors.pefile.extract_file_import_names(pe)
|
||||||
@@ -45,7 +45,7 @@ def extract_file_import_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Fe
|
|||||||
logger.warning("unsupported format")
|
logger.warning("unsupported format")
|
||||||
|
|
||||||
|
|
||||||
def extract_file_section_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_section_names(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
||||||
if buf.startswith(capa.features.extractors.common.MATCH_PE):
|
if buf.startswith(capa.features.extractors.common.MATCH_PE):
|
||||||
pe: pefile.PE = pefile.PE(data=buf)
|
pe: pefile.PE = pefile.PE(data=buf)
|
||||||
yield from capa.features.extractors.pefile.extract_file_section_names(pe)
|
yield from capa.features.extractors.pefile.extract_file_section_names(pe)
|
||||||
@@ -56,15 +56,15 @@ def extract_file_section_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[F
|
|||||||
logger.warning("unsupported format")
|
logger.warning("unsupported format")
|
||||||
|
|
||||||
|
|
||||||
def extract_file_strings(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_strings(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.common.extract_file_strings(buf)
|
yield from capa.features.extractors.common.extract_file_strings(buf)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_format(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_format(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.common.extract_format(buf)
|
yield from capa.features.extractors.common.extract_format(buf)
|
||||||
|
|
||||||
|
|
||||||
def extract_features(be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract file features"""
|
"""extract file features"""
|
||||||
for file_handler in FILE_HANDLERS:
|
for file_handler in FILE_HANDLERS:
|
||||||
for feature, addr in file_handler(be2, buf):
|
for feature, addr in file_handler(be2, buf):
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import List, Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from capa.features.file import FunctionName
|
from capa.features.file import FunctionName
|
||||||
from capa.features.common import Feature, Characteristic
|
from capa.features.common import Feature, Characteristic
|
||||||
@@ -16,7 +16,7 @@ from capa.features.extractors.base_extractor import FunctionHandle
|
|||||||
from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
|
from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
|
||||||
|
|
||||||
|
|
||||||
def extract_function_calls_to(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_function_calls_to(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
|
|
||||||
be2: BinExport2 = fhi.ctx.be2
|
be2: BinExport2 = fhi.ctx.be2
|
||||||
@@ -32,7 +32,7 @@ def extract_function_calls_to(fh: FunctionHandle) -> Iterator[Tuple[Feature, Add
|
|||||||
yield Characteristic("calls to"), AbsoluteVirtualAddress(caller_address)
|
yield Characteristic("calls to"), AbsoluteVirtualAddress(caller_address)
|
||||||
|
|
||||||
|
|
||||||
def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_function_loop(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
|
|
||||||
be2: BinExport2 = fhi.ctx.be2
|
be2: BinExport2 = fhi.ctx.be2
|
||||||
@@ -40,7 +40,7 @@ def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address
|
|||||||
flow_graph_index: int = fhi.flow_graph_index
|
flow_graph_index: int = fhi.flow_graph_index
|
||||||
flow_graph: BinExport2.FlowGraph = be2.flow_graph[flow_graph_index]
|
flow_graph: BinExport2.FlowGraph = be2.flow_graph[flow_graph_index]
|
||||||
|
|
||||||
edges: List[Tuple[int, int]] = []
|
edges: list[tuple[int, int]] = []
|
||||||
for edge in flow_graph.edge:
|
for edge in flow_graph.edge:
|
||||||
edges.append((edge.source_basic_block_index, edge.target_basic_block_index))
|
edges.append((edge.source_basic_block_index, edge.target_basic_block_index))
|
||||||
|
|
||||||
@@ -48,7 +48,7 @@ def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address
|
|||||||
yield Characteristic("loop"), fh.address
|
yield Characteristic("loop"), fh.address
|
||||||
|
|
||||||
|
|
||||||
def extract_function_name(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_function_name(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
|
|
||||||
be2: BinExport2 = fhi.ctx.be2
|
be2: BinExport2 = fhi.ctx.be2
|
||||||
@@ -63,7 +63,7 @@ def extract_function_name(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address
|
|||||||
yield FunctionName(vertex.mangled_name), fh.address
|
yield FunctionName(vertex.mangled_name), fh.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
for func_handler in FUNCTION_HANDLERS:
|
for func_handler in FUNCTION_HANDLERS:
|
||||||
for feature, addr in func_handler(fh):
|
for feature, addr in func_handler(fh):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import re
|
import re
|
||||||
from typing import Set, Dict, List, Tuple, Union, Iterator, Optional
|
from typing import Union, Iterator, Optional
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
@@ -22,7 +22,7 @@ HAS_ARCH_INTEL = {ARCH_I386, ARCH_AMD64}
|
|||||||
HAS_ARCH_ARM = {ARCH_AARCH64}
|
HAS_ARCH_ARM = {ARCH_AARCH64}
|
||||||
|
|
||||||
|
|
||||||
def mask_immediate(arch: Set[str], immediate: int) -> int:
|
def mask_immediate(arch: set[str], immediate: int) -> int:
|
||||||
if arch & HAS_ARCH64:
|
if arch & HAS_ARCH64:
|
||||||
immediate &= 0xFFFFFFFFFFFFFFFF
|
immediate &= 0xFFFFFFFFFFFFFFFF
|
||||||
elif arch & HAS_ARCH32:
|
elif arch & HAS_ARCH32:
|
||||||
@@ -30,7 +30,7 @@ def mask_immediate(arch: Set[str], immediate: int) -> int:
|
|||||||
return immediate
|
return immediate
|
||||||
|
|
||||||
|
|
||||||
def twos_complement(arch: Set[str], immediate: int, default: Optional[int] = None) -> int:
|
def twos_complement(arch: set[str], immediate: int, default: Optional[int] = None) -> int:
|
||||||
if default is not None:
|
if default is not None:
|
||||||
return capa.features.extractors.helpers.twos_complement(immediate, default)
|
return capa.features.extractors.helpers.twos_complement(immediate, default)
|
||||||
elif arch & HAS_ARCH64:
|
elif arch & HAS_ARCH64:
|
||||||
@@ -50,17 +50,36 @@ def is_vertex_type(vertex: BinExport2.CallGraph.Vertex, type_: BinExport2.CallGr
|
|||||||
return vertex.HasField("type") and vertex.type == type_
|
return vertex.HasField("type") and vertex.type == type_
|
||||||
|
|
||||||
|
|
||||||
|
# internal to `build_expression_tree`
|
||||||
|
# this is unstable: it is subject to change, so don't rely on it!
|
||||||
|
def _prune_expression_tree_references_to_tree_index(
|
||||||
|
expression_tree: list[list[int]],
|
||||||
|
tree_index: int,
|
||||||
|
):
|
||||||
|
# `i` is the index of the tree node that we'll search for `tree_index`
|
||||||
|
# if we remove `tree_index` from it, and it is now empty,
|
||||||
|
# then we'll need to prune references to `i`.
|
||||||
|
for i, tree_node in enumerate(expression_tree):
|
||||||
|
if tree_index in tree_node:
|
||||||
|
tree_node.remove(tree_index)
|
||||||
|
|
||||||
|
if len(tree_node) == 0:
|
||||||
|
# if the parent node is now empty,
|
||||||
|
# remove references to that parent node.
|
||||||
|
_prune_expression_tree_references_to_tree_index(expression_tree, i)
|
||||||
|
|
||||||
|
|
||||||
# internal to `build_expression_tree`
|
# internal to `build_expression_tree`
|
||||||
# this is unstable: it is subject to change, so don't rely on it!
|
# this is unstable: it is subject to change, so don't rely on it!
|
||||||
def _prune_expression_tree_empty_shifts(
|
def _prune_expression_tree_empty_shifts(
|
||||||
be2: BinExport2,
|
be2: BinExport2,
|
||||||
operand: BinExport2.Operand,
|
operand: BinExport2.Operand,
|
||||||
expression_tree: List[List[int]],
|
expression_tree: list[list[int]],
|
||||||
tree_index: int,
|
tree_index: int,
|
||||||
):
|
):
|
||||||
expression_index = operand.expression_index[tree_index]
|
expression_index = operand.expression_index[tree_index]
|
||||||
expression = be2.expression[expression_index]
|
expression = be2.expression[expression_index]
|
||||||
children_tree_indexes: List[int] = expression_tree[tree_index]
|
children_tree_indexes: list[int] = expression_tree[tree_index]
|
||||||
|
|
||||||
if expression.type == BinExport2.Expression.OPERATOR:
|
if expression.type == BinExport2.Expression.OPERATOR:
|
||||||
if len(children_tree_indexes) == 0 and expression.symbol in ("lsl", "lsr"):
|
if len(children_tree_indexes) == 0 and expression.symbol in ("lsl", "lsr"):
|
||||||
@@ -70,9 +89,7 @@ def _prune_expression_tree_empty_shifts(
|
|||||||
#
|
#
|
||||||
# Which seems to be as if the shift wasn't there (shift of #0)
|
# Which seems to be as if the shift wasn't there (shift of #0)
|
||||||
# so we want to remove references to this node from any parent nodes.
|
# so we want to remove references to this node from any parent nodes.
|
||||||
for tree_node in expression_tree:
|
_prune_expression_tree_references_to_tree_index(expression_tree, tree_index)
|
||||||
if tree_index in tree_node:
|
|
||||||
tree_node.remove(tree_index)
|
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -82,38 +99,37 @@ def _prune_expression_tree_empty_shifts(
|
|||||||
|
|
||||||
# internal to `build_expression_tree`
|
# internal to `build_expression_tree`
|
||||||
# this is unstable: it is subject to change, so don't rely on it!
|
# this is unstable: it is subject to change, so don't rely on it!
|
||||||
def _prune_expression_tree_empty_commas(
|
def _fixup_expression_tree_references_to_tree_index(
|
||||||
|
expression_tree: list[list[int]],
|
||||||
|
existing_index: int,
|
||||||
|
new_index: int,
|
||||||
|
):
|
||||||
|
for tree_node in expression_tree:
|
||||||
|
for i, index in enumerate(tree_node):
|
||||||
|
if index == existing_index:
|
||||||
|
tree_node[i] = new_index
|
||||||
|
|
||||||
|
|
||||||
|
# internal to `build_expression_tree`
|
||||||
|
# this is unstable: it is subject to change, so don't rely on it!
|
||||||
|
def _fixup_expression_tree_lonely_commas(
|
||||||
be2: BinExport2,
|
be2: BinExport2,
|
||||||
operand: BinExport2.Operand,
|
operand: BinExport2.Operand,
|
||||||
expression_tree: List[List[int]],
|
expression_tree: list[list[int]],
|
||||||
tree_index: int,
|
tree_index: int,
|
||||||
):
|
):
|
||||||
expression_index = operand.expression_index[tree_index]
|
expression_index = operand.expression_index[tree_index]
|
||||||
expression = be2.expression[expression_index]
|
expression = be2.expression[expression_index]
|
||||||
children_tree_indexes: List[int] = expression_tree[tree_index]
|
children_tree_indexes: list[int] = expression_tree[tree_index]
|
||||||
|
|
||||||
if expression.type == BinExport2.Expression.OPERATOR:
|
if expression.type == BinExport2.Expression.OPERATOR:
|
||||||
if len(children_tree_indexes) == 1 and expression.symbol == ",":
|
if len(children_tree_indexes) == 1 and expression.symbol == ",":
|
||||||
# Due to the above pruning of empty LSL or LSR expressions,
|
existing_index = tree_index
|
||||||
# the parents might need to be fixed up.
|
new_index = children_tree_indexes[0]
|
||||||
#
|
_fixup_expression_tree_references_to_tree_index(expression_tree, existing_index, new_index)
|
||||||
# Specifically, if the pruned node was part of a comma list with two children,
|
|
||||||
# now there's only a single child, which renders as an extra comma,
|
|
||||||
# so we replace references to the comma node with the immediate child.
|
|
||||||
#
|
|
||||||
# A more correct way of doing this might be to walk up the parents and do fixups,
|
|
||||||
# but I'm not quite sure how to do this yet. Just do two passes right now.
|
|
||||||
child = children_tree_indexes[0]
|
|
||||||
|
|
||||||
for tree_node in expression_tree:
|
|
||||||
tree_node.index
|
|
||||||
if tree_index in tree_node:
|
|
||||||
tree_node[tree_node.index(tree_index)] = child
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
for child_tree_index in children_tree_indexes:
|
for child_tree_index in children_tree_indexes:
|
||||||
_prune_expression_tree_empty_commas(be2, operand, expression_tree, child_tree_index)
|
_fixup_expression_tree_lonely_commas(be2, operand, expression_tree, child_tree_index)
|
||||||
|
|
||||||
|
|
||||||
# internal to `build_expression_tree`
|
# internal to `build_expression_tree`
|
||||||
@@ -121,17 +137,17 @@ def _prune_expression_tree_empty_commas(
|
|||||||
def _prune_expression_tree(
|
def _prune_expression_tree(
|
||||||
be2: BinExport2,
|
be2: BinExport2,
|
||||||
operand: BinExport2.Operand,
|
operand: BinExport2.Operand,
|
||||||
expression_tree: List[List[int]],
|
expression_tree: list[list[int]],
|
||||||
):
|
):
|
||||||
_prune_expression_tree_empty_shifts(be2, operand, expression_tree, 0)
|
_prune_expression_tree_empty_shifts(be2, operand, expression_tree, 0)
|
||||||
_prune_expression_tree_empty_commas(be2, operand, expression_tree, 0)
|
_fixup_expression_tree_lonely_commas(be2, operand, expression_tree, 0)
|
||||||
|
|
||||||
|
|
||||||
# this is unstable: it is subject to change, so don't rely on it!
|
# this is unstable: it is subject to change, so don't rely on it!
|
||||||
def _build_expression_tree(
|
def _build_expression_tree(
|
||||||
be2: BinExport2,
|
be2: BinExport2,
|
||||||
operand: BinExport2.Operand,
|
operand: BinExport2.Operand,
|
||||||
) -> List[List[int]]:
|
) -> list[list[int]]:
|
||||||
# The reconstructed expression tree layout, linking parent nodes to their children.
|
# The reconstructed expression tree layout, linking parent nodes to their children.
|
||||||
#
|
#
|
||||||
# There is one list of integers for each expression in the operand.
|
# There is one list of integers for each expression in the operand.
|
||||||
@@ -159,7 +175,7 @@ def _build_expression_tree(
|
|||||||
# exist (see https://github.com/NationalSecurityAgency/ghidra/issues/6817)
|
# exist (see https://github.com/NationalSecurityAgency/ghidra/issues/6817)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
tree: List[List[int]] = []
|
tree: list[list[int]] = []
|
||||||
for i, expression_index in enumerate(operand.expression_index):
|
for i, expression_index in enumerate(operand.expression_index):
|
||||||
children = []
|
children = []
|
||||||
|
|
||||||
@@ -173,7 +189,6 @@ def _build_expression_tree(
|
|||||||
tree.append(children)
|
tree.append(children)
|
||||||
|
|
||||||
_prune_expression_tree(be2, operand, tree)
|
_prune_expression_tree(be2, operand, tree)
|
||||||
_prune_expression_tree(be2, operand, tree)
|
|
||||||
|
|
||||||
return tree
|
return tree
|
||||||
|
|
||||||
@@ -181,21 +196,34 @@ def _build_expression_tree(
|
|||||||
def _fill_operand_expression_list(
|
def _fill_operand_expression_list(
|
||||||
be2: BinExport2,
|
be2: BinExport2,
|
||||||
operand: BinExport2.Operand,
|
operand: BinExport2.Operand,
|
||||||
expression_tree: List[List[int]],
|
expression_tree: list[list[int]],
|
||||||
tree_index: int,
|
tree_index: int,
|
||||||
expression_list: List[BinExport2.Expression],
|
expression_list: list[BinExport2.Expression],
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Walk the given expression tree and collect the expression nodes in-order.
|
Walk the given expression tree and collect the expression nodes in-order.
|
||||||
"""
|
"""
|
||||||
expression_index = operand.expression_index[tree_index]
|
expression_index = operand.expression_index[tree_index]
|
||||||
expression = be2.expression[expression_index]
|
expression = be2.expression[expression_index]
|
||||||
children_tree_indexes: List[int] = expression_tree[tree_index]
|
children_tree_indexes: list[int] = expression_tree[tree_index]
|
||||||
|
|
||||||
if expression.type == BinExport2.Expression.REGISTER:
|
if expression.type == BinExport2.Expression.REGISTER:
|
||||||
assert len(children_tree_indexes) == 0
|
assert len(children_tree_indexes) <= 1
|
||||||
expression_list.append(expression)
|
expression_list.append(expression)
|
||||||
return
|
|
||||||
|
if len(children_tree_indexes) == 0:
|
||||||
|
return
|
||||||
|
elif len(children_tree_indexes) == 1:
|
||||||
|
# like for aarch64 with vector instructions, indicating vector data size:
|
||||||
|
#
|
||||||
|
# FADD V0.4S, V1.4S, V2.4S
|
||||||
|
#
|
||||||
|
# see: https://github.com/mandiant/capa/issues/2528
|
||||||
|
child_index = children_tree_indexes[0]
|
||||||
|
_fill_operand_expression_list(be2, operand, expression_tree, child_index, expression_list)
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(len(children_tree_indexes))
|
||||||
|
|
||||||
elif expression.type == BinExport2.Expression.SYMBOL:
|
elif expression.type == BinExport2.Expression.SYMBOL:
|
||||||
assert len(children_tree_indexes) <= 1
|
assert len(children_tree_indexes) <= 1
|
||||||
@@ -218,9 +246,23 @@ def _fill_operand_expression_list(
|
|||||||
raise NotImplementedError(len(children_tree_indexes))
|
raise NotImplementedError(len(children_tree_indexes))
|
||||||
|
|
||||||
elif expression.type == BinExport2.Expression.IMMEDIATE_INT:
|
elif expression.type == BinExport2.Expression.IMMEDIATE_INT:
|
||||||
assert len(children_tree_indexes) == 0
|
assert len(children_tree_indexes) <= 1
|
||||||
expression_list.append(expression)
|
expression_list.append(expression)
|
||||||
return
|
|
||||||
|
if len(children_tree_indexes) == 0:
|
||||||
|
return
|
||||||
|
elif len(children_tree_indexes) == 1:
|
||||||
|
# the ghidra exporter can produce some weird expressions,
|
||||||
|
# particularly for MSRs, like for:
|
||||||
|
#
|
||||||
|
# sreg(3, 0, c.0, c.4, 4)
|
||||||
|
#
|
||||||
|
# see: https://github.com/mandiant/capa/issues/2530
|
||||||
|
child_index = children_tree_indexes[0]
|
||||||
|
_fill_operand_expression_list(be2, operand, expression_tree, child_index, expression_list)
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(len(children_tree_indexes))
|
||||||
|
|
||||||
elif expression.type == BinExport2.Expression.SIZE_PREFIX:
|
elif expression.type == BinExport2.Expression.SIZE_PREFIX:
|
||||||
# like: b4
|
# like: b4
|
||||||
@@ -282,10 +324,10 @@ def _fill_operand_expression_list(
|
|||||||
raise NotImplementedError(expression.type)
|
raise NotImplementedError(expression.type)
|
||||||
|
|
||||||
|
|
||||||
def get_operand_expressions(be2: BinExport2, op: BinExport2.Operand) -> List[BinExport2.Expression]:
|
def get_operand_expressions(be2: BinExport2, op: BinExport2.Operand) -> list[BinExport2.Expression]:
|
||||||
tree = _build_expression_tree(be2, op)
|
tree = _build_expression_tree(be2, op)
|
||||||
|
|
||||||
expressions: List[BinExport2.Expression] = []
|
expressions: list[BinExport2.Expression] = []
|
||||||
_fill_operand_expression_list(be2, op, tree, 0, expressions)
|
_fill_operand_expression_list(be2, op, tree, 0, expressions)
|
||||||
|
|
||||||
return expressions
|
return expressions
|
||||||
@@ -331,11 +373,11 @@ def get_instruction_mnemonic(be2: BinExport2, instruction: BinExport2.Instructio
|
|||||||
return be2.mnemonic[instruction.mnemonic_index].name.lower()
|
return be2.mnemonic[instruction.mnemonic_index].name.lower()
|
||||||
|
|
||||||
|
|
||||||
def get_instruction_operands(be2: BinExport2, instruction: BinExport2.Instruction) -> List[BinExport2.Operand]:
|
def get_instruction_operands(be2: BinExport2, instruction: BinExport2.Instruction) -> list[BinExport2.Operand]:
|
||||||
return [be2.operand[operand_index] for operand_index in instruction.operand_index]
|
return [be2.operand[operand_index] for operand_index in instruction.operand_index]
|
||||||
|
|
||||||
|
|
||||||
def split_with_delimiters(s: str, delimiters: Tuple[str, ...]) -> Iterator[str]:
|
def split_with_delimiters(s: str, delimiters: tuple[str, ...]) -> Iterator[str]:
|
||||||
"""
|
"""
|
||||||
Splits a string by any of the provided delimiter characters,
|
Splits a string by any of the provided delimiter characters,
|
||||||
including the delimiters in the results.
|
including the delimiters in the results.
|
||||||
@@ -355,7 +397,7 @@ def split_with_delimiters(s: str, delimiters: Tuple[str, ...]) -> Iterator[str]:
|
|||||||
yield s[start:]
|
yield s[start:]
|
||||||
|
|
||||||
|
|
||||||
BinExport2OperandPattern = Union[str, Tuple[str, ...]]
|
BinExport2OperandPattern = Union[str, tuple[str, ...]]
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -382,8 +424,8 @@ class BinExport2InstructionPattern:
|
|||||||
This matcher uses the BinExport2 data layout under the hood.
|
This matcher uses the BinExport2 data layout under the hood.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
mnemonics: Tuple[str, ...]
|
mnemonics: tuple[str, ...]
|
||||||
operands: Tuple[Union[str, BinExport2OperandPattern], ...]
|
operands: tuple[Union[str, BinExport2OperandPattern], ...]
|
||||||
capture: Optional[str]
|
capture: Optional[str]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -438,7 +480,7 @@ class BinExport2InstructionPattern:
|
|||||||
mnemonic, _, rest = pattern.partition(" ")
|
mnemonic, _, rest = pattern.partition(" ")
|
||||||
mnemonics = mnemonic.split("|")
|
mnemonics = mnemonic.split("|")
|
||||||
|
|
||||||
operands: List[Union[str, Tuple[str, ...]]] = []
|
operands: list[Union[str, tuple[str, ...]]] = []
|
||||||
while rest:
|
while rest:
|
||||||
rest = rest.strip()
|
rest = rest.strip()
|
||||||
if not rest.startswith("["):
|
if not rest.startswith("["):
|
||||||
@@ -509,7 +551,7 @@ class BinExport2InstructionPattern:
|
|||||||
expression: BinExport2.Expression
|
expression: BinExport2.Expression
|
||||||
|
|
||||||
def match(
|
def match(
|
||||||
self, mnemonic: str, operand_expressions: List[List[BinExport2.Expression]]
|
self, mnemonic: str, operand_expressions: list[list[BinExport2.Expression]]
|
||||||
) -> Optional["BinExport2InstructionPattern.MatchResult"]:
|
) -> Optional["BinExport2InstructionPattern.MatchResult"]:
|
||||||
"""
|
"""
|
||||||
Match the given BinExport2 data against this pattern.
|
Match the given BinExport2 data against this pattern.
|
||||||
@@ -602,10 +644,10 @@ class BinExport2InstructionPattern:
|
|||||||
class BinExport2InstructionPatternMatcher:
|
class BinExport2InstructionPatternMatcher:
|
||||||
"""Index and match a collection of instruction patterns."""
|
"""Index and match a collection of instruction patterns."""
|
||||||
|
|
||||||
def __init__(self, queries: List[BinExport2InstructionPattern]):
|
def __init__(self, queries: list[BinExport2InstructionPattern]):
|
||||||
self.queries = queries
|
self.queries = queries
|
||||||
# shard the patterns by (mnemonic, #operands)
|
# shard the patterns by (mnemonic, #operands)
|
||||||
self._index: Dict[Tuple[str, int], List[BinExport2InstructionPattern]] = defaultdict(list)
|
self._index: dict[tuple[str, int], list[BinExport2InstructionPattern]] = defaultdict(list)
|
||||||
|
|
||||||
for query in queries:
|
for query in queries:
|
||||||
for mnemonic in query.mnemonics:
|
for mnemonic in query.mnemonics:
|
||||||
@@ -623,7 +665,7 @@ class BinExport2InstructionPatternMatcher:
|
|||||||
)
|
)
|
||||||
|
|
||||||
def match(
|
def match(
|
||||||
self, mnemonic: str, operand_expressions: List[List[BinExport2.Expression]]
|
self, mnemonic: str, operand_expressions: list[list[BinExport2.Expression]]
|
||||||
) -> Optional[BinExport2InstructionPattern.MatchResult]:
|
) -> Optional[BinExport2InstructionPattern.MatchResult]:
|
||||||
queries = self._index.get((mnemonic.lower(), len(operand_expressions)), [])
|
queries = self._index.get((mnemonic.lower(), len(operand_expressions)), [])
|
||||||
for query in queries:
|
for query in queries:
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import List, Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import capa.features.extractors.helpers
|
import capa.features.extractors.helpers
|
||||||
import capa.features.extractors.strings
|
import capa.features.extractors.strings
|
||||||
@@ -32,7 +32,7 @@ from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_api_features(fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_api_features(fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
|
|
||||||
@@ -68,7 +68,7 @@ def extract_insn_api_features(fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
|
|||||||
|
|
||||||
def extract_insn_number_features(
|
def extract_insn_number_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
|
|
||||||
if fhi.arch & HAS_ARCH_INTEL:
|
if fhi.arch & HAS_ARCH_INTEL:
|
||||||
@@ -77,7 +77,7 @@ def extract_insn_number_features(
|
|||||||
yield from capa.features.extractors.binexport2.arch.arm.insn.extract_insn_number_features(fh, bbh, ih)
|
yield from capa.features.extractors.binexport2.arch.arm.insn.extract_insn_number_features(fh, bbh, ih)
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
|
|
||||||
@@ -92,7 +92,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
|||||||
# disassembler already identified string reference from instruction
|
# disassembler already identified string reference from instruction
|
||||||
return
|
return
|
||||||
|
|
||||||
reference_addresses: List[int] = []
|
reference_addresses: list[int] = []
|
||||||
|
|
||||||
if instruction_index in idx.data_reference_index_by_source_instruction_index:
|
if instruction_index in idx.data_reference_index_by_source_instruction_index:
|
||||||
for data_reference_index in idx.data_reference_index_by_source_instruction_index[instruction_index]:
|
for data_reference_index in idx.data_reference_index_by_source_instruction_index[instruction_index]:
|
||||||
@@ -142,7 +142,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
|||||||
|
|
||||||
def extract_insn_string_features(
|
def extract_insn_string_features(
|
||||||
fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
|
|
||||||
@@ -161,7 +161,7 @@ def extract_insn_string_features(
|
|||||||
|
|
||||||
def extract_insn_offset_features(
|
def extract_insn_offset_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
|
|
||||||
if fhi.arch & HAS_ARCH_INTEL:
|
if fhi.arch & HAS_ARCH_INTEL:
|
||||||
@@ -172,7 +172,7 @@ def extract_insn_offset_features(
|
|||||||
|
|
||||||
def extract_insn_nzxor_characteristic_features(
|
def extract_insn_nzxor_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
|
|
||||||
if fhi.arch & HAS_ARCH_INTEL:
|
if fhi.arch & HAS_ARCH_INTEL:
|
||||||
@@ -187,7 +187,7 @@ def extract_insn_nzxor_characteristic_features(
|
|||||||
|
|
||||||
def extract_insn_mnemonic_features(
|
def extract_insn_mnemonic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
|
|
||||||
@@ -199,7 +199,7 @@ def extract_insn_mnemonic_features(
|
|||||||
yield Mnemonic(mnemonic_name), ih.address
|
yield Mnemonic(mnemonic_name), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract functions calls from features
|
"""extract functions calls from features
|
||||||
|
|
||||||
most relevant at the function scope;
|
most relevant at the function scope;
|
||||||
@@ -221,7 +221,7 @@ def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
|||||||
|
|
||||||
def extract_function_indirect_call_characteristic_features(
|
def extract_function_indirect_call_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
|
|
||||||
if fhi.arch & HAS_ARCH_INTEL:
|
if fhi.arch & HAS_ARCH_INTEL:
|
||||||
@@ -234,7 +234,7 @@ def extract_function_indirect_call_characteristic_features(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract instruction features"""
|
"""extract instruction features"""
|
||||||
for inst_handler in INSTRUCTION_HANDLERS:
|
for inst_handler in INSTRUCTION_HANDLERS:
|
||||||
for feature, ea in inst_handler(f, bbh, insn):
|
for feature, ea in inst_handler(f, bbh, insn):
|
||||||
|
|||||||
@@ -5,115 +5,25 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
from typing import Iterator
|
||||||
|
|
||||||
import string
|
|
||||||
from typing import Tuple, Iterator
|
|
||||||
|
|
||||||
from binaryninja import Function
|
|
||||||
from binaryninja import BasicBlock as BinjaBasicBlock
|
from binaryninja import BasicBlock as BinjaBasicBlock
|
||||||
from binaryninja import (
|
|
||||||
BinaryView,
|
|
||||||
SymbolType,
|
|
||||||
RegisterValueType,
|
|
||||||
VariableSourceType,
|
|
||||||
MediumLevelILOperation,
|
|
||||||
MediumLevelILBasicBlock,
|
|
||||||
MediumLevelILInstruction,
|
|
||||||
)
|
|
||||||
|
|
||||||
from capa.features.common import Feature, Characteristic
|
from capa.features.common import Feature, Characteristic
|
||||||
from capa.features.address import Address
|
from capa.features.address import Address
|
||||||
from capa.features.basicblock import BasicBlock
|
from capa.features.basicblock import BasicBlock
|
||||||
from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
|
|
||||||
from capa.features.extractors.base_extractor import BBHandle, FunctionHandle
|
from capa.features.extractors.base_extractor import BBHandle, FunctionHandle
|
||||||
|
|
||||||
|
|
||||||
def get_printable_len_ascii(s: bytes) -> int:
|
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""Return string length if all operand bytes are ascii or utf16-le printable"""
|
|
||||||
count = 0
|
|
||||||
for c in s:
|
|
||||||
if c == 0:
|
|
||||||
return count
|
|
||||||
if c < 127 and chr(c) in string.printable:
|
|
||||||
count += 1
|
|
||||||
return count
|
|
||||||
|
|
||||||
|
|
||||||
def get_printable_len_wide(s: bytes) -> int:
|
|
||||||
"""Return string length if all operand bytes are ascii or utf16-le printable"""
|
|
||||||
if all(c == 0x00 for c in s[1::2]):
|
|
||||||
return get_printable_len_ascii(s[::2])
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
def get_stack_string_len(f: Function, il: MediumLevelILInstruction) -> int:
|
|
||||||
bv: BinaryView = f.view
|
|
||||||
|
|
||||||
if il.operation != MediumLevelILOperation.MLIL_CALL:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
target = il.dest
|
|
||||||
if target.operation not in [MediumLevelILOperation.MLIL_CONST, MediumLevelILOperation.MLIL_CONST_PTR]:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
addr = target.value.value
|
|
||||||
sym = bv.get_symbol_at(addr)
|
|
||||||
if not sym or sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.SymbolicFunctionSymbol]:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if sym.name not in ["__builtin_strncpy", "__builtin_strcpy", "__builtin_wcscpy"]:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if len(il.params) < 2:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
dest = il.params[0]
|
|
||||||
if dest.operation in [MediumLevelILOperation.MLIL_ADDRESS_OF, MediumLevelILOperation.MLIL_VAR]:
|
|
||||||
var = dest.src
|
|
||||||
else:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if var.source_type != VariableSourceType.StackVariableSourceType:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
src = il.params[1]
|
|
||||||
if src.value.type != RegisterValueType.ConstantDataAggregateValue:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
s = f.get_constant_data(RegisterValueType.ConstantDataAggregateValue, src.value.value)
|
|
||||||
return max(get_printable_len_ascii(bytes(s)), get_printable_len_wide(bytes(s)))
|
|
||||||
|
|
||||||
|
|
||||||
def bb_contains_stackstring(f: Function, bb: MediumLevelILBasicBlock) -> bool:
|
|
||||||
"""check basic block for stackstring indicators
|
|
||||||
|
|
||||||
true if basic block contains enough moves of constant bytes to the stack
|
|
||||||
"""
|
|
||||||
count = 0
|
|
||||||
for il in bb:
|
|
||||||
count += get_stack_string_len(f, il)
|
|
||||||
if count > MIN_STACKSTRING_LEN:
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
|
||||||
"""extract stackstring indicators from basic block"""
|
|
||||||
bb: Tuple[BinjaBasicBlock, MediumLevelILBasicBlock] = bbh.inner
|
|
||||||
if bb[1] is not None and bb_contains_stackstring(fh.inner, bb[1]):
|
|
||||||
yield Characteristic("stack string"), bbh.address
|
|
||||||
|
|
||||||
|
|
||||||
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
|
||||||
"""extract tight loop indicators from a basic block"""
|
"""extract tight loop indicators from a basic block"""
|
||||||
bb: Tuple[BinjaBasicBlock, MediumLevelILBasicBlock] = bbh.inner
|
bb: BinjaBasicBlock = bbh.inner
|
||||||
for edge in bb[0].outgoing_edges:
|
for edge in bb.outgoing_edges:
|
||||||
if edge.target.start == bb[0].start:
|
if edge.target.start == bb.start:
|
||||||
yield Characteristic("tight loop"), bbh.address
|
yield Characteristic("tight loop"), bbh.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract basic block features"""
|
"""extract basic block features"""
|
||||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||||
for feature, addr in bb_handler(fh, bbh):
|
for feature, addr in bb_handler(fh, bbh):
|
||||||
@@ -121,7 +31,4 @@ def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Featur
|
|||||||
yield BasicBlock(), bbh.address
|
yield BasicBlock(), bbh.address
|
||||||
|
|
||||||
|
|
||||||
BASIC_BLOCK_HANDLERS = (
|
BASIC_BLOCK_HANDLERS = (extract_bb_tight_loop,)
|
||||||
extract_bb_tight_loop,
|
|
||||||
extract_bb_stackstring,
|
|
||||||
)
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import List, Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import binaryninja as binja
|
import binaryninja as binja
|
||||||
|
|
||||||
@@ -30,7 +30,7 @@ class BinjaFeatureExtractor(StaticFeatureExtractor):
|
|||||||
def __init__(self, bv: binja.BinaryView):
|
def __init__(self, bv: binja.BinaryView):
|
||||||
super().__init__(hashes=SampleHashes.from_bytes(bv.file.raw.read(0, bv.file.raw.length)))
|
super().__init__(hashes=SampleHashes.from_bytes(bv.file.raw.read(0, bv.file.raw.length)))
|
||||||
self.bv = bv
|
self.bv = bv
|
||||||
self.global_features: List[Tuple[Feature, Address]] = []
|
self.global_features: list[tuple[Feature, Address]] = []
|
||||||
self.global_features.extend(capa.features.extractors.binja.file.extract_file_format(self.bv))
|
self.global_features.extend(capa.features.extractors.binja.file.extract_file_format(self.bv))
|
||||||
self.global_features.extend(capa.features.extractors.binja.global_.extract_os(self.bv))
|
self.global_features.extend(capa.features.extractors.binja.global_.extract_os(self.bv))
|
||||||
self.global_features.extend(capa.features.extractors.binja.global_.extract_arch(self.bv))
|
self.global_features.extend(capa.features.extractors.binja.global_.extract_arch(self.bv))
|
||||||
@@ -48,31 +48,24 @@ class BinjaFeatureExtractor(StaticFeatureExtractor):
|
|||||||
for f in self.bv.functions:
|
for f in self.bv.functions:
|
||||||
yield FunctionHandle(address=AbsoluteVirtualAddress(f.start), inner=f)
|
yield FunctionHandle(address=AbsoluteVirtualAddress(f.start), inner=f)
|
||||||
|
|
||||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.binja.function.extract_features(fh)
|
yield from capa.features.extractors.binja.function.extract_features(fh)
|
||||||
|
|
||||||
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
||||||
f: binja.Function = fh.inner
|
f: binja.Function = fh.inner
|
||||||
# Set up a MLIL basic block dict look up to associate the disassembly basic block with its MLIL basic block
|
|
||||||
mlil_lookup = {}
|
|
||||||
for mlil_bb in f.mlil.basic_blocks:
|
|
||||||
mlil_lookup[mlil_bb.source_block.start] = mlil_bb
|
|
||||||
|
|
||||||
for bb in f.basic_blocks:
|
for bb in f.basic_blocks:
|
||||||
mlil_bb = mlil_lookup.get(bb.start)
|
yield BBHandle(address=AbsoluteVirtualAddress(bb.start), inner=bb)
|
||||||
|
|
||||||
yield BBHandle(address=AbsoluteVirtualAddress(bb.start), inner=(bb, mlil_bb))
|
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
|
|
||||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
|
||||||
yield from capa.features.extractors.binja.basicblock.extract_features(fh, bbh)
|
yield from capa.features.extractors.binja.basicblock.extract_features(fh, bbh)
|
||||||
|
|
||||||
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
||||||
import capa.features.extractors.binja.helpers as binja_helpers
|
import capa.features.extractors.binja.helpers as binja_helpers
|
||||||
|
|
||||||
bb: Tuple[binja.BasicBlock, binja.MediumLevelILBasicBlock] = bbh.inner
|
bb: binja.BasicBlock = bbh.inner
|
||||||
addr = bb[0].start
|
addr = bb.start
|
||||||
|
|
||||||
for text, length in bb[0]:
|
for text, length in bb:
|
||||||
insn = binja_helpers.DisassemblyInstruction(addr, length, text)
|
insn = binja_helpers.DisassemblyInstruction(addr, length, text)
|
||||||
yield InsnHandle(address=AbsoluteVirtualAddress(addr), inner=insn)
|
yield InsnHandle(address=AbsoluteVirtualAddress(addr), inner=insn)
|
||||||
addr += length
|
addr += length
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from binaryninja import Segment, BinaryView, SymbolType, SymbolBinding
|
from binaryninja import Segment, BinaryView, SymbolType, SymbolBinding
|
||||||
|
|
||||||
@@ -13,12 +13,22 @@ import capa.features.extractors.common
|
|||||||
import capa.features.extractors.helpers
|
import capa.features.extractors.helpers
|
||||||
import capa.features.extractors.strings
|
import capa.features.extractors.strings
|
||||||
from capa.features.file import Export, Import, Section, FunctionName
|
from capa.features.file import Export, Import, Section, FunctionName
|
||||||
from capa.features.common import FORMAT_PE, FORMAT_ELF, Format, String, Feature, Characteristic
|
from capa.features.common import (
|
||||||
|
FORMAT_PE,
|
||||||
|
FORMAT_ELF,
|
||||||
|
FORMAT_SC32,
|
||||||
|
FORMAT_SC64,
|
||||||
|
FORMAT_BINJA_DB,
|
||||||
|
Format,
|
||||||
|
String,
|
||||||
|
Feature,
|
||||||
|
Characteristic,
|
||||||
|
)
|
||||||
from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, AbsoluteVirtualAddress
|
from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, AbsoluteVirtualAddress
|
||||||
from capa.features.extractors.binja.helpers import read_c_string, unmangle_c_name
|
from capa.features.extractors.binja.helpers import read_c_string, unmangle_c_name
|
||||||
|
|
||||||
|
|
||||||
def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[Tuple[Feature, Address]]:
|
def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""check segment for embedded PE"""
|
"""check segment for embedded PE"""
|
||||||
start = 0
|
start = 0
|
||||||
if bv.view_type == "PE" and seg.start == bv.start:
|
if bv.view_type == "PE" and seg.start == bv.start:
|
||||||
@@ -32,13 +42,13 @@ def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[Tuple[Feature
|
|||||||
yield Characteristic("embedded pe"), FileOffsetAddress(seg.start + offset)
|
yield Characteristic("embedded pe"), FileOffsetAddress(seg.start + offset)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_embedded_pe(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_embedded_pe(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract embedded PE features"""
|
"""extract embedded PE features"""
|
||||||
for seg in bv.segments:
|
for seg in bv.segments:
|
||||||
yield from check_segment_for_pe(bv, seg)
|
yield from check_segment_for_pe(bv, seg)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_export_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_export_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract function exports"""
|
"""extract function exports"""
|
||||||
for sym in bv.get_symbols_of_type(SymbolType.FunctionSymbol) + bv.get_symbols_of_type(SymbolType.DataSymbol):
|
for sym in bv.get_symbols_of_type(SymbolType.FunctionSymbol) + bv.get_symbols_of_type(SymbolType.DataSymbol):
|
||||||
if sym.binding in [SymbolBinding.GlobalBinding, SymbolBinding.WeakBinding]:
|
if sym.binding in [SymbolBinding.GlobalBinding, SymbolBinding.WeakBinding]:
|
||||||
@@ -72,7 +82,7 @@ def extract_file_export_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address
|
|||||||
yield Characteristic("forwarded export"), AbsoluteVirtualAddress(sym.address)
|
yield Characteristic("forwarded export"), AbsoluteVirtualAddress(sym.address)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_import_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_import_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract function imports
|
"""extract function imports
|
||||||
|
|
||||||
1. imports by ordinal:
|
1. imports by ordinal:
|
||||||
@@ -96,19 +106,19 @@ def extract_file_import_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address
|
|||||||
yield Import(name), addr
|
yield Import(name), addr
|
||||||
|
|
||||||
|
|
||||||
def extract_file_section_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_section_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract section names"""
|
"""extract section names"""
|
||||||
for name, section in bv.sections.items():
|
for name, section in bv.sections.items():
|
||||||
yield Section(name), AbsoluteVirtualAddress(section.start)
|
yield Section(name), AbsoluteVirtualAddress(section.start)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_strings(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_strings(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract ASCII and UTF-16 LE strings"""
|
"""extract ASCII and UTF-16 LE strings"""
|
||||||
for s in bv.strings:
|
for s in bv.strings:
|
||||||
yield String(s.value), FileOffsetAddress(s.start)
|
yield String(s.value), FileOffsetAddress(s.start)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_function_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_function_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract the names of statically-linked library functions.
|
extract the names of statically-linked library functions.
|
||||||
"""
|
"""
|
||||||
@@ -127,12 +137,22 @@ def extract_file_function_names(bv: BinaryView) -> Iterator[Tuple[Feature, Addre
|
|||||||
yield FunctionName(name[1:]), sym.address
|
yield FunctionName(name[1:]), sym.address
|
||||||
|
|
||||||
|
|
||||||
def extract_file_format(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_format(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||||
|
if bv.file.database is not None:
|
||||||
|
yield Format(FORMAT_BINJA_DB), NO_ADDRESS
|
||||||
|
|
||||||
view_type = bv.view_type
|
view_type = bv.view_type
|
||||||
if view_type in ["PE", "COFF"]:
|
if view_type in ["PE", "COFF"]:
|
||||||
yield Format(FORMAT_PE), NO_ADDRESS
|
yield Format(FORMAT_PE), NO_ADDRESS
|
||||||
elif view_type == "ELF":
|
elif view_type == "ELF":
|
||||||
yield Format(FORMAT_ELF), NO_ADDRESS
|
yield Format(FORMAT_ELF), NO_ADDRESS
|
||||||
|
elif view_type == "Mapped":
|
||||||
|
if bv.arch.name == "x86":
|
||||||
|
yield Format(FORMAT_SC32), NO_ADDRESS
|
||||||
|
elif bv.arch.name == "x86_64":
|
||||||
|
yield Format(FORMAT_SC64), NO_ADDRESS
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(f"unexpected raw file with arch: {bv.arch}")
|
||||||
elif view_type == "Raw":
|
elif view_type == "Raw":
|
||||||
# no file type to return when processing a binary file, but we want to continue processing
|
# no file type to return when processing a binary file, but we want to continue processing
|
||||||
return
|
return
|
||||||
@@ -140,7 +160,7 @@ def extract_file_format(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
|||||||
raise NotImplementedError(f"unexpected file format: {view_type}")
|
raise NotImplementedError(f"unexpected file format: {view_type}")
|
||||||
|
|
||||||
|
|
||||||
def extract_features(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract file features"""
|
"""extract file features"""
|
||||||
for file_handler in FILE_HANDLERS:
|
for file_handler in FILE_HANDLERS:
|
||||||
for feature, addr in file_handler(bv):
|
for feature, addr in file_handler(bv):
|
||||||
|
|||||||
@@ -105,13 +105,13 @@ def find_binaryninja() -> Optional[Path]:
|
|||||||
logger.debug("detected OS: linux")
|
logger.debug("detected OS: linux")
|
||||||
elif sys.platform == "darwin":
|
elif sys.platform == "darwin":
|
||||||
logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
|
logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
|
||||||
return False
|
return None
|
||||||
elif sys.platform == "win32":
|
elif sys.platform == "win32":
|
||||||
logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
|
logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
|
||||||
return False
|
return None
|
||||||
else:
|
else:
|
||||||
logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
|
logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
|
||||||
return False
|
return None
|
||||||
|
|
||||||
desktop_entry = get_desktop_entry("com.vector35.binaryninja.desktop")
|
desktop_entry = get_desktop_entry("com.vector35.binaryninja.desktop")
|
||||||
if not desktop_entry:
|
if not desktop_entry:
|
||||||
|
|||||||
@@ -5,14 +5,28 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Tuple, Iterator
|
import string
|
||||||
|
from typing import Iterator
|
||||||
|
|
||||||
from binaryninja import Function, BinaryView, SymbolType, RegisterValueType, LowLevelILOperation
|
from binaryninja import (
|
||||||
|
Function,
|
||||||
|
BinaryView,
|
||||||
|
SymbolType,
|
||||||
|
ILException,
|
||||||
|
RegisterValueType,
|
||||||
|
VariableSourceType,
|
||||||
|
LowLevelILOperation,
|
||||||
|
MediumLevelILOperation,
|
||||||
|
MediumLevelILBasicBlock,
|
||||||
|
MediumLevelILInstruction,
|
||||||
|
)
|
||||||
|
|
||||||
from capa.features.file import FunctionName
|
from capa.features.file import FunctionName
|
||||||
from capa.features.common import Feature, Characteristic
|
from capa.features.common import Feature, Characteristic
|
||||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||||
from capa.features.extractors import loops
|
from capa.features.extractors import loops
|
||||||
|
from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
|
||||||
|
from capa.features.extractors.binja.helpers import get_llil_instr_at_addr
|
||||||
from capa.features.extractors.base_extractor import FunctionHandle
|
from capa.features.extractors.base_extractor import FunctionHandle
|
||||||
|
|
||||||
|
|
||||||
@@ -24,7 +38,7 @@ def extract_function_calls_to(fh: FunctionHandle):
|
|||||||
# Everything that is a code reference to the current function is considered a caller, which actually includes
|
# Everything that is a code reference to the current function is considered a caller, which actually includes
|
||||||
# many other references that are NOT a caller. For example, an instruction `push function_start` will also be
|
# many other references that are NOT a caller. For example, an instruction `push function_start` will also be
|
||||||
# considered a caller to the function
|
# considered a caller to the function
|
||||||
llil = caller.llil
|
llil = get_llil_instr_at_addr(func.view, caller.address)
|
||||||
if (llil is None) or llil.operation not in [
|
if (llil is None) or llil.operation not in [
|
||||||
LowLevelILOperation.LLIL_CALL,
|
LowLevelILOperation.LLIL_CALL,
|
||||||
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
|
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
|
||||||
@@ -33,14 +47,13 @@ def extract_function_calls_to(fh: FunctionHandle):
|
|||||||
]:
|
]:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if llil.dest.value.type not in [
|
if llil.dest.operation not in [
|
||||||
RegisterValueType.ImportedAddressValue,
|
LowLevelILOperation.LLIL_CONST,
|
||||||
RegisterValueType.ConstantValue,
|
LowLevelILOperation.LLIL_CONST_PTR,
|
||||||
RegisterValueType.ConstantPointerValue,
|
|
||||||
]:
|
]:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
address = llil.dest.value.value
|
address = llil.dest.constant
|
||||||
if address != func.start:
|
if address != func.start:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -95,10 +108,103 @@ def extract_function_name(fh: FunctionHandle):
|
|||||||
yield FunctionName(name[1:]), sym.address
|
yield FunctionName(name[1:]), sym.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
def get_printable_len_ascii(s: bytes) -> int:
|
||||||
|
"""Return string length if all operand bytes are ascii or utf16-le printable"""
|
||||||
|
count = 0
|
||||||
|
for c in s:
|
||||||
|
if c == 0:
|
||||||
|
return count
|
||||||
|
if c < 127 and chr(c) in string.printable:
|
||||||
|
count += 1
|
||||||
|
return count
|
||||||
|
|
||||||
|
|
||||||
|
def get_printable_len_wide(s: bytes) -> int:
|
||||||
|
"""Return string length if all operand bytes are ascii or utf16-le printable"""
|
||||||
|
if all(c == 0x00 for c in s[1::2]):
|
||||||
|
return get_printable_len_ascii(s[::2])
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def get_stack_string_len(f: Function, il: MediumLevelILInstruction) -> int:
|
||||||
|
bv: BinaryView = f.view
|
||||||
|
|
||||||
|
if il.operation != MediumLevelILOperation.MLIL_CALL:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
target = il.dest
|
||||||
|
if target.operation not in [MediumLevelILOperation.MLIL_CONST, MediumLevelILOperation.MLIL_CONST_PTR]:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
addr = target.value.value
|
||||||
|
sym = bv.get_symbol_at(addr)
|
||||||
|
if not sym or sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.SymbolicFunctionSymbol]:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if sym.name not in ["__builtin_strncpy", "__builtin_strcpy", "__builtin_wcscpy"]:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if len(il.params) < 2:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
dest = il.params[0]
|
||||||
|
if dest.operation in [MediumLevelILOperation.MLIL_ADDRESS_OF, MediumLevelILOperation.MLIL_VAR]:
|
||||||
|
var = dest.src
|
||||||
|
else:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if var.source_type != VariableSourceType.StackVariableSourceType:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
src = il.params[1]
|
||||||
|
if src.value.type != RegisterValueType.ConstantDataAggregateValue:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
s = f.get_constant_data(RegisterValueType.ConstantDataAggregateValue, src.value.value)
|
||||||
|
return max(get_printable_len_ascii(bytes(s)), get_printable_len_wide(bytes(s)))
|
||||||
|
|
||||||
|
|
||||||
|
def bb_contains_stackstring(f: Function, bb: MediumLevelILBasicBlock) -> bool:
|
||||||
|
"""check basic block for stackstring indicators
|
||||||
|
|
||||||
|
true if basic block contains enough moves of constant bytes to the stack
|
||||||
|
"""
|
||||||
|
count = 0
|
||||||
|
for il in bb:
|
||||||
|
count += get_stack_string_len(f, il)
|
||||||
|
if count > MIN_STACKSTRING_LEN:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def extract_stackstring(fh: FunctionHandle):
|
||||||
|
"""extract stackstring indicators"""
|
||||||
|
func: Function = fh.inner
|
||||||
|
bv: BinaryView = func.view
|
||||||
|
if bv is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
mlil = func.mlil
|
||||||
|
except ILException:
|
||||||
|
return
|
||||||
|
|
||||||
|
for block in mlil.basic_blocks:
|
||||||
|
if bb_contains_stackstring(func, block):
|
||||||
|
yield Characteristic("stack string"), block.source_block.start
|
||||||
|
|
||||||
|
|
||||||
|
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
for func_handler in FUNCTION_HANDLERS:
|
for func_handler in FUNCTION_HANDLERS:
|
||||||
for feature, addr in func_handler(fh):
|
for feature, addr in func_handler(fh):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|
||||||
|
|
||||||
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call, extract_function_name)
|
FUNCTION_HANDLERS = (
|
||||||
|
extract_function_calls_to,
|
||||||
|
extract_function_loop,
|
||||||
|
extract_recursive_call,
|
||||||
|
extract_function_name,
|
||||||
|
extract_stackstring,
|
||||||
|
)
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from binaryninja import BinaryView
|
from binaryninja import BinaryView
|
||||||
|
|
||||||
@@ -16,7 +16,7 @@ from capa.features.address import NO_ADDRESS, Address
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_os(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
def extract_os(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||||
name = bv.platform.name
|
name = bv.platform.name
|
||||||
if "-" in name:
|
if "-" in name:
|
||||||
name = name.split("-")[0]
|
name = name.split("-")[0]
|
||||||
@@ -45,7 +45,7 @@ def extract_os(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
|||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def extract_arch(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
def extract_arch(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||||
arch = bv.arch.name
|
arch = bv.arch.name
|
||||||
if arch == "x86_64":
|
if arch == "x86_64":
|
||||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import re
|
import re
|
||||||
from typing import List, Callable
|
from typing import Callable, Optional
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
from binaryninja import BinaryView, LowLevelILInstruction
|
from binaryninja import BinaryView, LowLevelILFunction, LowLevelILInstruction
|
||||||
from binaryninja.architecture import InstructionTextToken
|
from binaryninja.architecture import InstructionTextToken
|
||||||
|
|
||||||
|
|
||||||
@@ -17,7 +17,7 @@ from binaryninja.architecture import InstructionTextToken
|
|||||||
class DisassemblyInstruction:
|
class DisassemblyInstruction:
|
||||||
address: int
|
address: int
|
||||||
length: int
|
length: int
|
||||||
text: List[InstructionTextToken]
|
text: list[InstructionTextToken]
|
||||||
|
|
||||||
|
|
||||||
LLIL_VISITOR = Callable[[LowLevelILInstruction, LowLevelILInstruction, int], bool]
|
LLIL_VISITOR = Callable[[LowLevelILInstruction, LowLevelILInstruction, int], bool]
|
||||||
@@ -54,7 +54,7 @@ def unmangle_c_name(name: str) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def read_c_string(bv: BinaryView, offset: int, max_len: int) -> str:
|
def read_c_string(bv: BinaryView, offset: int, max_len: int) -> str:
|
||||||
s: List[str] = []
|
s: list[str] = []
|
||||||
while len(s) < max_len:
|
while len(s) < max_len:
|
||||||
try:
|
try:
|
||||||
c = bv.read(offset + len(s), 1)[0]
|
c = bv.read(offset + len(s), 1)[0]
|
||||||
@@ -67,3 +67,13 @@ def read_c_string(bv: BinaryView, offset: int, max_len: int) -> str:
|
|||||||
s.append(chr(c))
|
s.append(chr(c))
|
||||||
|
|
||||||
return "".join(s)
|
return "".join(s)
|
||||||
|
|
||||||
|
|
||||||
|
def get_llil_instr_at_addr(bv: BinaryView, addr: int) -> Optional[LowLevelILInstruction]:
|
||||||
|
arch = bv.arch
|
||||||
|
buffer = bv.read(addr, arch.max_instr_length)
|
||||||
|
llil = LowLevelILFunction(arch=arch)
|
||||||
|
llil.current_address = addr
|
||||||
|
if arch.get_instruction_low_level_il(buffer, addr, llil) == 0:
|
||||||
|
return None
|
||||||
|
return llil[0]
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Any, List, Tuple, Iterator, Optional
|
from typing import Any, Iterator, Optional
|
||||||
|
|
||||||
from binaryninja import Function
|
from binaryninja import Function
|
||||||
from binaryninja import BasicBlock as BinjaBasicBlock
|
from binaryninja import BasicBlock as BinjaBasicBlock
|
||||||
@@ -23,7 +23,7 @@ import capa.features.extractors.helpers
|
|||||||
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
|
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
|
||||||
from capa.features.common import MAX_BYTES_FEATURE_SIZE, Bytes, String, Feature, Characteristic
|
from capa.features.common import MAX_BYTES_FEATURE_SIZE, Bytes, String, Feature, Characteristic
|
||||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||||
from capa.features.extractors.binja.helpers import DisassemblyInstruction, visit_llil_exprs
|
from capa.features.extractors.binja.helpers import DisassemblyInstruction, visit_llil_exprs, get_llil_instr_at_addr
|
||||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
||||||
|
|
||||||
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
|
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
|
||||||
@@ -36,35 +36,27 @@ SECURITY_COOKIE_BYTES_DELTA = 0x40
|
|||||||
# 2. The function must only make one call/jump to another address
|
# 2. The function must only make one call/jump to another address
|
||||||
# If the function being checked is a stub function, returns the target address. Otherwise, return None.
|
# If the function being checked is a stub function, returns the target address. Otherwise, return None.
|
||||||
def is_stub_function(bv: BinaryView, addr: int) -> Optional[int]:
|
def is_stub_function(bv: BinaryView, addr: int) -> Optional[int]:
|
||||||
funcs = bv.get_functions_at(addr)
|
llil = get_llil_instr_at_addr(bv, addr)
|
||||||
for func in funcs:
|
if llil is None or llil.operation not in [
|
||||||
if len(func.basic_blocks) != 1:
|
LowLevelILOperation.LLIL_CALL,
|
||||||
continue
|
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
|
||||||
|
LowLevelILOperation.LLIL_JUMP,
|
||||||
|
LowLevelILOperation.LLIL_TAILCALL,
|
||||||
|
]:
|
||||||
|
return None
|
||||||
|
|
||||||
call_count = 0
|
# The LLIL instruction retrieved by `get_llil_instr_at_addr` did not go through a full analysis, so we cannot check
|
||||||
call_target = None
|
# `llil.dest.value.type` here
|
||||||
for il in func.llil.instructions:
|
if llil.dest.operation not in [
|
||||||
if il.operation in [
|
LowLevelILOperation.LLIL_CONST,
|
||||||
LowLevelILOperation.LLIL_CALL,
|
LowLevelILOperation.LLIL_CONST_PTR,
|
||||||
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
|
]:
|
||||||
LowLevelILOperation.LLIL_JUMP,
|
return None
|
||||||
LowLevelILOperation.LLIL_TAILCALL,
|
|
||||||
]:
|
|
||||||
call_count += 1
|
|
||||||
if il.dest.value.type in [
|
|
||||||
RegisterValueType.ImportedAddressValue,
|
|
||||||
RegisterValueType.ConstantValue,
|
|
||||||
RegisterValueType.ConstantPointerValue,
|
|
||||||
]:
|
|
||||||
call_target = il.dest.value.value
|
|
||||||
|
|
||||||
if call_count == 1 and call_target is not None:
|
return llil.dest.constant
|
||||||
return call_target
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction API features
|
parse instruction API features
|
||||||
|
|
||||||
@@ -123,7 +115,7 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
|
|||||||
|
|
||||||
def extract_insn_number_features(
|
def extract_insn_number_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction number features
|
parse instruction number features
|
||||||
example:
|
example:
|
||||||
@@ -131,7 +123,7 @@ def extract_insn_number_features(
|
|||||||
"""
|
"""
|
||||||
func: Function = fh.inner
|
func: Function = fh.inner
|
||||||
|
|
||||||
results: List[Tuple[Any[Number, OperandNumber], Address]] = []
|
results: list[tuple[Any[Number, OperandNumber], Address]] = []
|
||||||
|
|
||||||
def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool:
|
def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool:
|
||||||
if il.operation == LowLevelILOperation.LLIL_LOAD:
|
if il.operation == LowLevelILOperation.LLIL_LOAD:
|
||||||
@@ -162,7 +154,7 @@ def extract_insn_number_features(
|
|||||||
yield from results
|
yield from results
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse referenced byte sequences
|
parse referenced byte sequences
|
||||||
example:
|
example:
|
||||||
@@ -209,7 +201,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
|||||||
|
|
||||||
def extract_insn_string_features(
|
def extract_insn_string_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction string features
|
parse instruction string features
|
||||||
|
|
||||||
@@ -266,7 +258,7 @@ def extract_insn_string_features(
|
|||||||
|
|
||||||
def extract_insn_offset_features(
|
def extract_insn_offset_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction structure offset features
|
parse instruction structure offset features
|
||||||
|
|
||||||
@@ -275,7 +267,7 @@ def extract_insn_offset_features(
|
|||||||
"""
|
"""
|
||||||
func: Function = fh.inner
|
func: Function = fh.inner
|
||||||
|
|
||||||
results: List[Tuple[Any[Offset, OperandOffset], Address]] = []
|
results: list[tuple[Any[Offset, OperandOffset], Address]] = []
|
||||||
address_size = func.view.arch.address_size * 8
|
address_size = func.view.arch.address_size * 8
|
||||||
|
|
||||||
def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool:
|
def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool:
|
||||||
@@ -353,7 +345,7 @@ def is_nzxor_stack_cookie(f: Function, bb: BinjaBasicBlock, llil: LowLevelILInst
|
|||||||
|
|
||||||
def extract_insn_nzxor_characteristic_features(
|
def extract_insn_nzxor_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction non-zeroing XOR instruction
|
parse instruction non-zeroing XOR instruction
|
||||||
ignore expected non-zeroing XORs, e.g. security cookies
|
ignore expected non-zeroing XORs, e.g. security cookies
|
||||||
@@ -367,7 +359,7 @@ def extract_insn_nzxor_characteristic_features(
|
|||||||
# e.g., <llil: eax = 0>, (LLIL_SET_REG). So we do not need to check whether the two operands are the same.
|
# e.g., <llil: eax = 0>, (LLIL_SET_REG). So we do not need to check whether the two operands are the same.
|
||||||
if il.operation == LowLevelILOperation.LLIL_XOR:
|
if il.operation == LowLevelILOperation.LLIL_XOR:
|
||||||
# Exclude cases related to the stack cookie
|
# Exclude cases related to the stack cookie
|
||||||
if is_nzxor_stack_cookie(fh.inner, bbh.inner[0], il):
|
if is_nzxor_stack_cookie(fh.inner, bbh.inner, il):
|
||||||
return False
|
return False
|
||||||
results.append((Characteristic("nzxor"), ih.address))
|
results.append((Characteristic("nzxor"), ih.address))
|
||||||
return False
|
return False
|
||||||
@@ -382,7 +374,7 @@ def extract_insn_nzxor_characteristic_features(
|
|||||||
|
|
||||||
def extract_insn_mnemonic_features(
|
def extract_insn_mnemonic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""parse instruction mnemonic features"""
|
"""parse instruction mnemonic features"""
|
||||||
insn: DisassemblyInstruction = ih.inner
|
insn: DisassemblyInstruction = ih.inner
|
||||||
yield Mnemonic(insn.text[0].text), ih.address
|
yield Mnemonic(insn.text[0].text), ih.address
|
||||||
@@ -390,7 +382,7 @@ def extract_insn_mnemonic_features(
|
|||||||
|
|
||||||
def extract_insn_obfs_call_plus_5_characteristic_features(
|
def extract_insn_obfs_call_plus_5_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse call $+5 instruction from the given instruction.
|
parse call $+5 instruction from the given instruction.
|
||||||
"""
|
"""
|
||||||
@@ -401,7 +393,7 @@ def extract_insn_obfs_call_plus_5_characteristic_features(
|
|||||||
|
|
||||||
def extract_insn_peb_access_characteristic_features(
|
def extract_insn_peb_access_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""parse instruction peb access
|
"""parse instruction peb access
|
||||||
|
|
||||||
fs:[0x30] on x86, gs:[0x60] on x64
|
fs:[0x30] on x86, gs:[0x60] on x64
|
||||||
@@ -444,7 +436,7 @@ def extract_insn_peb_access_characteristic_features(
|
|||||||
|
|
||||||
def extract_insn_segment_access_features(
|
def extract_insn_segment_access_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""parse instruction fs or gs access"""
|
"""parse instruction fs or gs access"""
|
||||||
func: Function = fh.inner
|
func: Function = fh.inner
|
||||||
|
|
||||||
@@ -471,7 +463,7 @@ def extract_insn_segment_access_features(
|
|||||||
|
|
||||||
def extract_insn_cross_section_cflow(
|
def extract_insn_cross_section_cflow(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""inspect the instruction for a CALL or JMP that crosses section boundaries"""
|
"""inspect the instruction for a CALL or JMP that crosses section boundaries"""
|
||||||
func: Function = fh.inner
|
func: Function = fh.inner
|
||||||
bv: BinaryView = func.view
|
bv: BinaryView = func.view
|
||||||
@@ -491,7 +483,7 @@ def extract_insn_cross_section_cflow(
|
|||||||
yield Characteristic("cross section flow"), ih.address
|
yield Characteristic("cross section flow"), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract functions calls from features
|
"""extract functions calls from features
|
||||||
|
|
||||||
most relevant at the function scope, however, its most efficient to extract at the instruction scope
|
most relevant at the function scope, however, its most efficient to extract at the instruction scope
|
||||||
@@ -534,7 +526,7 @@ def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
|||||||
|
|
||||||
def extract_function_indirect_call_characteristic_features(
|
def extract_function_indirect_call_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
|
"""extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
|
||||||
does not include calls like => call ds:dword_ABD4974
|
does not include calls like => call ds:dword_ABD4974
|
||||||
|
|
||||||
@@ -562,7 +554,7 @@ def extract_function_indirect_call_characteristic_features(
|
|||||||
yield Characteristic("indirect call"), ih.address
|
yield Characteristic("indirect call"), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract instruction features"""
|
"""extract instruction features"""
|
||||||
for inst_handler in INSTRUCTION_HANDLERS:
|
for inst_handler in INSTRUCTION_HANDLERS:
|
||||||
for feature, ea in inst_handler(f, bbh, insn):
|
for feature, ea in inst_handler(f, bbh, insn):
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import capa.features.extractors.helpers
|
import capa.features.extractors.helpers
|
||||||
from capa.helpers import assert_never
|
from capa.helpers import assert_never
|
||||||
@@ -20,7 +20,7 @@ from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, Pr
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
this method extracts the given call's features (such as API name and arguments),
|
this method extracts the given call's features (such as API name and arguments),
|
||||||
and returns them as API, Number, and String features.
|
and returns them as API, Number, and String features.
|
||||||
@@ -55,7 +55,7 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -
|
|||||||
yield API(name), ch.address
|
yield API(name), ch.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
for handler in CALL_HANDLERS:
|
for handler in CALL_HANDLERS:
|
||||||
for feature, addr in handler(ph, th, ch):
|
for feature, addr in handler(ph, th, ch):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, Tuple, Union, Iterator
|
from typing import Union, Iterator
|
||||||
|
|
||||||
import capa.features.extractors.cape.call
|
import capa.features.extractors.cape.call
|
||||||
import capa.features.extractors.cape.file
|
import capa.features.extractors.cape.file
|
||||||
@@ -50,16 +50,16 @@ class CapeExtractor(DynamicFeatureExtractor):
|
|||||||
assert self.report.static is not None and self.report.static.pe is not None
|
assert self.report.static is not None and self.report.static.pe is not None
|
||||||
return AbsoluteVirtualAddress(self.report.static.pe.imagebase)
|
return AbsoluteVirtualAddress(self.report.static.pe.imagebase)
|
||||||
|
|
||||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from self.global_features
|
yield from self.global_features
|
||||||
|
|
||||||
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.cape.file.extract_features(self.report)
|
yield from capa.features.extractors.cape.file.extract_features(self.report)
|
||||||
|
|
||||||
def get_processes(self) -> Iterator[ProcessHandle]:
|
def get_processes(self) -> Iterator[ProcessHandle]:
|
||||||
yield from capa.features.extractors.cape.file.get_processes(self.report)
|
yield from capa.features.extractors.cape.file.get_processes(self.report)
|
||||||
|
|
||||||
def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.cape.process.extract_features(ph)
|
yield from capa.features.extractors.cape.process.extract_features(ph)
|
||||||
|
|
||||||
def get_process_name(self, ph) -> str:
|
def get_process_name(self, ph) -> str:
|
||||||
@@ -69,7 +69,7 @@ class CapeExtractor(DynamicFeatureExtractor):
|
|||||||
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
||||||
yield from capa.features.extractors.cape.process.get_threads(ph)
|
yield from capa.features.extractors.cape.process.get_threads(ph)
|
||||||
|
|
||||||
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
if False:
|
if False:
|
||||||
# force this routine to be a generator,
|
# force this routine to be a generator,
|
||||||
# but we don't actually have any elements to generate.
|
# but we don't actually have any elements to generate.
|
||||||
@@ -81,7 +81,7 @@ class CapeExtractor(DynamicFeatureExtractor):
|
|||||||
|
|
||||||
def extract_call_features(
|
def extract_call_features(
|
||||||
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.cape.call.extract_features(ph, th, ch)
|
yield from capa.features.extractors.cape.call.extract_features(ph, th, ch)
|
||||||
|
|
||||||
def get_call_name(self, ph, th, ch) -> str:
|
def get_call_name(self, ph, th, ch) -> str:
|
||||||
@@ -122,7 +122,7 @@ class CapeExtractor(DynamicFeatureExtractor):
|
|||||||
return "".join(parts)
|
return "".join(parts)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_report(cls, report: Dict) -> "CapeExtractor":
|
def from_report(cls, report: dict) -> "CapeExtractor":
|
||||||
cr = CapeReport.model_validate(report)
|
cr = CapeReport.model_validate(report)
|
||||||
|
|
||||||
if cr.info.version not in TESTED_VERSIONS:
|
if cr.info.version not in TESTED_VERSIONS:
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from capa.features.file import Export, Import, Section
|
from capa.features.file import Export, Import, Section
|
||||||
from capa.features.common import String, Feature
|
from capa.features.common import String, Feature
|
||||||
@@ -41,7 +41,7 @@ def get_processes(report: CapeReport) -> Iterator[ProcessHandle]:
|
|||||||
seen_processes[addr].append(process)
|
seen_processes[addr].append(process)
|
||||||
|
|
||||||
|
|
||||||
def extract_import_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_import_names(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract imported function names
|
extract imported function names
|
||||||
"""
|
"""
|
||||||
@@ -62,57 +62,57 @@ def extract_import_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]
|
|||||||
yield Import(name), AbsoluteVirtualAddress(function.address)
|
yield Import(name), AbsoluteVirtualAddress(function.address)
|
||||||
|
|
||||||
|
|
||||||
def extract_export_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_export_names(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
assert report.static is not None and report.static.pe is not None
|
assert report.static is not None and report.static.pe is not None
|
||||||
for function in report.static.pe.exports:
|
for function in report.static.pe.exports:
|
||||||
yield Export(function.name), AbsoluteVirtualAddress(function.address)
|
yield Export(function.name), AbsoluteVirtualAddress(function.address)
|
||||||
|
|
||||||
|
|
||||||
def extract_section_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_section_names(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
assert report.static is not None and report.static.pe is not None
|
assert report.static is not None and report.static.pe is not None
|
||||||
for section in report.static.pe.sections:
|
for section in report.static.pe.sections:
|
||||||
yield Section(section.name), AbsoluteVirtualAddress(section.virtual_address)
|
yield Section(section.name), AbsoluteVirtualAddress(section.virtual_address)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_strings(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_strings(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
if report.strings is not None:
|
if report.strings is not None:
|
||||||
for string in report.strings:
|
for string in report.strings:
|
||||||
yield String(string), NO_ADDRESS
|
yield String(string), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_used_regkeys(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_used_regkeys(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
for regkey in report.behavior.summary.keys:
|
for regkey in report.behavior.summary.keys:
|
||||||
yield String(regkey), NO_ADDRESS
|
yield String(regkey), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_used_files(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_used_files(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
for file in report.behavior.summary.files:
|
for file in report.behavior.summary.files:
|
||||||
yield String(file), NO_ADDRESS
|
yield String(file), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_used_mutexes(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_used_mutexes(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
for mutex in report.behavior.summary.mutexes:
|
for mutex in report.behavior.summary.mutexes:
|
||||||
yield String(mutex), NO_ADDRESS
|
yield String(mutex), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_used_commands(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_used_commands(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
for cmd in report.behavior.summary.executed_commands:
|
for cmd in report.behavior.summary.executed_commands:
|
||||||
yield String(cmd), NO_ADDRESS
|
yield String(cmd), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_used_apis(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_used_apis(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
for symbol in report.behavior.summary.resolved_apis:
|
for symbol in report.behavior.summary.resolved_apis:
|
||||||
yield String(symbol), NO_ADDRESS
|
yield String(symbol), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_used_services(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_used_services(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
for svc in report.behavior.summary.created_services:
|
for svc in report.behavior.summary.created_services:
|
||||||
yield String(svc), NO_ADDRESS
|
yield String(svc), NO_ADDRESS
|
||||||
for svc in report.behavior.summary.started_services:
|
for svc in report.behavior.summary.started_services:
|
||||||
yield String(svc), NO_ADDRESS
|
yield String(svc), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_features(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
for handler in FILE_HANDLERS:
|
for handler in FILE_HANDLERS:
|
||||||
for feature, addr in handler(report):
|
for feature, addr in handler(report):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from capa.features.common import (
|
from capa.features.common import (
|
||||||
OS,
|
OS,
|
||||||
@@ -28,7 +28,7 @@ from capa.features.extractors.cape.models import CapeReport
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_arch(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_arch(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
if "Intel 80386" in report.target.file.type:
|
if "Intel 80386" in report.target.file.type:
|
||||||
yield Arch(ARCH_I386), NO_ADDRESS
|
yield Arch(ARCH_I386), NO_ADDRESS
|
||||||
elif "x86-64" in report.target.file.type:
|
elif "x86-64" in report.target.file.type:
|
||||||
@@ -40,7 +40,7 @@ def extract_arch(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def extract_format(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_format(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
if "PE" in report.target.file.type:
|
if "PE" in report.target.file.type:
|
||||||
yield Format(FORMAT_PE), NO_ADDRESS
|
yield Format(FORMAT_PE), NO_ADDRESS
|
||||||
elif "ELF" in report.target.file.type:
|
elif "ELF" in report.target.file.type:
|
||||||
@@ -52,7 +52,7 @@ def extract_format(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def extract_os(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_os(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
# this variable contains the output of the file command
|
# this variable contains the output of the file command
|
||||||
file_output = report.target.file.type
|
file_output = report.target.file.type
|
||||||
|
|
||||||
@@ -80,7 +80,7 @@ def extract_os(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
|||||||
yield OS(OS_ANY), NO_ADDRESS
|
yield OS(OS_ANY), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_features(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
for global_handler in GLOBAL_HANDLER:
|
for global_handler in GLOBAL_HANDLER:
|
||||||
for feature, addr in global_handler(report):
|
for feature, addr in global_handler(report):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -6,12 +6,12 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
from typing import Any, Dict, List
|
from typing import Any
|
||||||
|
|
||||||
from capa.features.extractors.base_extractor import ProcessHandle
|
from capa.features.extractors.base_extractor import ProcessHandle
|
||||||
|
|
||||||
|
|
||||||
def find_process(processes: List[Dict[str, Any]], ph: ProcessHandle) -> Dict[str, Any]:
|
def find_process(processes: list[dict[str, Any]], ph: ProcessHandle) -> dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
find a specific process identified by a process handler.
|
find a specific process identified by a process handler.
|
||||||
|
|
||||||
|
|||||||
@@ -6,10 +6,9 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import binascii
|
import binascii
|
||||||
from typing import Any, Dict, List, Union, Literal, Optional
|
from typing import Any, Union, Literal, Optional, Annotated, TypeAlias
|
||||||
|
|
||||||
from pydantic import Field, BaseModel, ConfigDict
|
from pydantic import Field, BaseModel, ConfigDict
|
||||||
from typing_extensions import Annotated, TypeAlias
|
|
||||||
from pydantic.functional_validators import BeforeValidator
|
from pydantic.functional_validators import BeforeValidator
|
||||||
|
|
||||||
|
|
||||||
@@ -59,11 +58,11 @@ Skip: TypeAlias = Optional[Any]
|
|||||||
# in a field with this type.
|
# in a field with this type.
|
||||||
# then we can update the model with the discovered shape.
|
# then we can update the model with the discovered shape.
|
||||||
TODO: TypeAlias = None
|
TODO: TypeAlias = None
|
||||||
ListTODO: TypeAlias = List[None]
|
ListTODO: TypeAlias = list[None]
|
||||||
DictTODO: TypeAlias = ExactModel
|
DictTODO: TypeAlias = ExactModel
|
||||||
|
|
||||||
EmptyDict: TypeAlias = BaseModel
|
Emptydict: TypeAlias = BaseModel
|
||||||
EmptyList: TypeAlias = List[Any]
|
EmptyList: TypeAlias = list[Any]
|
||||||
|
|
||||||
|
|
||||||
class Info(FlexibleModel):
|
class Info(FlexibleModel):
|
||||||
@@ -77,7 +76,7 @@ class ImportedSymbol(ExactModel):
|
|||||||
|
|
||||||
class ImportedDll(ExactModel):
|
class ImportedDll(ExactModel):
|
||||||
dll: str
|
dll: str
|
||||||
imports: List[ImportedSymbol]
|
imports: list[ImportedSymbol]
|
||||||
|
|
||||||
|
|
||||||
class DirectoryEntry(ExactModel):
|
class DirectoryEntry(ExactModel):
|
||||||
@@ -149,7 +148,7 @@ class Signer(ExactModel):
|
|||||||
aux_valid: Optional[bool] = None
|
aux_valid: Optional[bool] = None
|
||||||
aux_error: Optional[bool] = None
|
aux_error: Optional[bool] = None
|
||||||
aux_error_desc: Optional[str] = None
|
aux_error_desc: Optional[str] = None
|
||||||
aux_signers: Optional[List[AuxSigner]] = None
|
aux_signers: Optional[list[AuxSigner]] = None
|
||||||
|
|
||||||
|
|
||||||
class Overlay(ExactModel):
|
class Overlay(ExactModel):
|
||||||
@@ -178,22 +177,22 @@ class PE(ExactModel):
|
|||||||
pdbpath: Optional[str] = None
|
pdbpath: Optional[str] = None
|
||||||
timestamp: str
|
timestamp: str
|
||||||
|
|
||||||
# List[ImportedDll], or Dict[basename(dll), ImportedDll]
|
# list[ImportedDll], or dict[basename(dll), ImportedDll]
|
||||||
imports: Union[List[ImportedDll], Dict[str, ImportedDll]]
|
imports: Union[list[ImportedDll], dict[str, ImportedDll]]
|
||||||
imported_dll_count: Optional[int] = None
|
imported_dll_count: Optional[int] = None
|
||||||
imphash: str
|
imphash: str
|
||||||
|
|
||||||
exported_dll_name: Optional[str] = None
|
exported_dll_name: Optional[str] = None
|
||||||
exports: List[ExportedSymbol]
|
exports: list[ExportedSymbol]
|
||||||
|
|
||||||
dirents: List[DirectoryEntry]
|
dirents: list[DirectoryEntry]
|
||||||
sections: List[Section]
|
sections: list[Section]
|
||||||
|
|
||||||
ep_bytes: Optional[HexBytes] = None
|
ep_bytes: Optional[HexBytes] = None
|
||||||
|
|
||||||
overlay: Optional[Overlay] = None
|
overlay: Optional[Overlay] = None
|
||||||
resources: List[Resource]
|
resources: list[Resource]
|
||||||
versioninfo: List[KV]
|
versioninfo: list[KV]
|
||||||
|
|
||||||
# base64 encoded data
|
# base64 encoded data
|
||||||
icon: Optional[str] = None
|
icon: Optional[str] = None
|
||||||
@@ -204,7 +203,7 @@ class PE(ExactModel):
|
|||||||
# short hex string
|
# short hex string
|
||||||
icon_dhash: Optional[str] = None
|
icon_dhash: Optional[str] = None
|
||||||
|
|
||||||
digital_signers: List[DigitalSigner]
|
digital_signers: list[DigitalSigner]
|
||||||
guest_signers: Signer
|
guest_signers: Signer
|
||||||
|
|
||||||
|
|
||||||
@@ -217,9 +216,9 @@ class File(FlexibleModel):
|
|||||||
cape_type: Optional[str] = None
|
cape_type: Optional[str] = None
|
||||||
|
|
||||||
pid: Optional[Union[int, Literal[""]]] = None
|
pid: Optional[Union[int, Literal[""]]] = None
|
||||||
name: Union[List[str], str]
|
name: Union[list[str], str]
|
||||||
path: str
|
path: str
|
||||||
guest_paths: Union[List[str], str, None]
|
guest_paths: Union[list[str], str, None]
|
||||||
timestamp: Optional[str] = None
|
timestamp: Optional[str] = None
|
||||||
|
|
||||||
#
|
#
|
||||||
@@ -244,7 +243,7 @@ class File(FlexibleModel):
|
|||||||
ep_bytes: Optional[HexBytes] = None
|
ep_bytes: Optional[HexBytes] = None
|
||||||
entrypoint: Optional[int] = None
|
entrypoint: Optional[int] = None
|
||||||
data: Optional[str] = None
|
data: Optional[str] = None
|
||||||
strings: Optional[List[str]] = None
|
strings: Optional[list[str]] = None
|
||||||
|
|
||||||
#
|
#
|
||||||
# detections (skip)
|
# detections (skip)
|
||||||
@@ -283,7 +282,7 @@ class Call(ExactModel):
|
|||||||
|
|
||||||
api: str
|
api: str
|
||||||
|
|
||||||
arguments: List[Argument]
|
arguments: list[Argument]
|
||||||
status: bool
|
status: bool
|
||||||
return_: HexInt = Field(alias="return")
|
return_: HexInt = Field(alias="return")
|
||||||
pretty_return: Optional[str] = None
|
pretty_return: Optional[str] = None
|
||||||
@@ -298,15 +297,18 @@ class Call(ExactModel):
|
|||||||
id: int
|
id: int
|
||||||
|
|
||||||
|
|
||||||
class Process(ExactModel):
|
# FlexibleModel to account for extended fields
|
||||||
|
# refs: https://github.com/mandiant/capa/issues/2466
|
||||||
|
# https://github.com/kevoreilly/CAPEv2/pull/2199
|
||||||
|
class Process(FlexibleModel):
|
||||||
process_id: int
|
process_id: int
|
||||||
process_name: str
|
process_name: str
|
||||||
parent_id: int
|
parent_id: int
|
||||||
module_path: str
|
module_path: str
|
||||||
first_seen: str
|
first_seen: str
|
||||||
calls: List[Call]
|
calls: list[Call]
|
||||||
threads: List[int]
|
threads: list[int]
|
||||||
environ: Dict[str, str]
|
environ: dict[str, str]
|
||||||
|
|
||||||
|
|
||||||
class ProcessTree(ExactModel):
|
class ProcessTree(ExactModel):
|
||||||
@@ -314,25 +316,25 @@ class ProcessTree(ExactModel):
|
|||||||
pid: int
|
pid: int
|
||||||
parent_id: int
|
parent_id: int
|
||||||
module_path: str
|
module_path: str
|
||||||
threads: List[int]
|
threads: list[int]
|
||||||
environ: Dict[str, str]
|
environ: dict[str, str]
|
||||||
children: List["ProcessTree"]
|
children: list["ProcessTree"]
|
||||||
|
|
||||||
|
|
||||||
class Summary(ExactModel):
|
class Summary(ExactModel):
|
||||||
files: List[str]
|
files: list[str]
|
||||||
read_files: List[str]
|
read_files: list[str]
|
||||||
write_files: List[str]
|
write_files: list[str]
|
||||||
delete_files: List[str]
|
delete_files: list[str]
|
||||||
keys: List[str]
|
keys: list[str]
|
||||||
read_keys: List[str]
|
read_keys: list[str]
|
||||||
write_keys: List[str]
|
write_keys: list[str]
|
||||||
delete_keys: List[str]
|
delete_keys: list[str]
|
||||||
executed_commands: List[str]
|
executed_commands: list[str]
|
||||||
resolved_apis: List[str]
|
resolved_apis: list[str]
|
||||||
mutexes: List[str]
|
mutexes: list[str]
|
||||||
created_services: List[str]
|
created_services: list[str]
|
||||||
started_services: List[str]
|
started_services: list[str]
|
||||||
|
|
||||||
|
|
||||||
class EncryptedBuffer(ExactModel):
|
class EncryptedBuffer(ExactModel):
|
||||||
@@ -349,12 +351,12 @@ class Behavior(ExactModel):
|
|||||||
summary: Summary
|
summary: Summary
|
||||||
|
|
||||||
# list of processes, of threads, of calls
|
# list of processes, of threads, of calls
|
||||||
processes: List[Process]
|
processes: list[Process]
|
||||||
# tree of processes
|
# tree of processes
|
||||||
processtree: List[ProcessTree]
|
processtree: list[ProcessTree]
|
||||||
|
|
||||||
anomaly: List[str]
|
anomaly: list[str]
|
||||||
encryptedbuffers: List[EncryptedBuffer]
|
encryptedbuffers: list[EncryptedBuffer]
|
||||||
# these are small objects that describe atomic events,
|
# these are small objects that describe atomic events,
|
||||||
# like file move, registry access.
|
# like file move, registry access.
|
||||||
# we'll detect the same with our API call analysis.
|
# we'll detect the same with our API call analysis.
|
||||||
@@ -373,7 +375,7 @@ class Static(ExactModel):
|
|||||||
|
|
||||||
|
|
||||||
class Cape(ExactModel):
|
class Cape(ExactModel):
|
||||||
payloads: List[ProcessFile]
|
payloads: list[ProcessFile]
|
||||||
configs: Skip = None
|
configs: Skip = None
|
||||||
|
|
||||||
|
|
||||||
@@ -389,7 +391,7 @@ class CapeReport(FlexibleModel):
|
|||||||
# static analysis results
|
# static analysis results
|
||||||
#
|
#
|
||||||
static: Optional[Static] = None
|
static: Optional[Static] = None
|
||||||
strings: Optional[List[str]] = None
|
strings: Optional[list[str]] = None
|
||||||
|
|
||||||
#
|
#
|
||||||
# dynamic analysis results
|
# dynamic analysis results
|
||||||
@@ -398,10 +400,10 @@ class CapeReport(FlexibleModel):
|
|||||||
behavior: Behavior
|
behavior: Behavior
|
||||||
|
|
||||||
# post-processed results: payloads and extracted configs
|
# post-processed results: payloads and extracted configs
|
||||||
CAPE: Optional[Union[Cape, List]] = None
|
CAPE: Optional[Union[Cape, list]] = None
|
||||||
dropped: Optional[List[File]] = None
|
dropped: Optional[list[File]] = None
|
||||||
procdump: Optional[List[ProcessFile]] = None
|
procdump: Optional[list[ProcessFile]] = None
|
||||||
procmemory: ListTODO
|
procmemory: Optional[ListTODO] = None
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# information we won't use in capa
|
# information we won't use in capa
|
||||||
@@ -437,7 +439,7 @@ class CapeReport(FlexibleModel):
|
|||||||
malfamily_tag: Optional[str] = None
|
malfamily_tag: Optional[str] = None
|
||||||
malscore: float
|
malscore: float
|
||||||
detections: Skip = None
|
detections: Skip = None
|
||||||
detections2pid: Optional[Dict[int, List[str]]] = None
|
detections2pid: Optional[dict[int, list[str]]] = None
|
||||||
# AV detections for the sample.
|
# AV detections for the sample.
|
||||||
virustotal: Skip = None
|
virustotal: Skip = None
|
||||||
|
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import List, Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from capa.features.common import String, Feature
|
from capa.features.common import String, Feature
|
||||||
from capa.features.address import Address, ThreadAddress
|
from capa.features.address import Address, ThreadAddress
|
||||||
@@ -22,14 +22,14 @@ def get_threads(ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
|||||||
get the threads associated with a given process
|
get the threads associated with a given process
|
||||||
"""
|
"""
|
||||||
process: Process = ph.inner
|
process: Process = ph.inner
|
||||||
threads: List[int] = process.threads
|
threads: list[int] = process.threads
|
||||||
|
|
||||||
for thread in threads:
|
for thread in threads:
|
||||||
address: ThreadAddress = ThreadAddress(process=ph.address, tid=thread)
|
address: ThreadAddress = ThreadAddress(process=ph.address, tid=thread)
|
||||||
yield ThreadHandle(address=address, inner={})
|
yield ThreadHandle(address=address, inner={})
|
||||||
|
|
||||||
|
|
||||||
def extract_environ_strings(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_environ_strings(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract strings from a process' provided environment variables.
|
extract strings from a process' provided environment variables.
|
||||||
"""
|
"""
|
||||||
@@ -39,7 +39,7 @@ def extract_environ_strings(ph: ProcessHandle) -> Iterator[Tuple[Feature, Addres
|
|||||||
yield String(value), ph.address
|
yield String(value), ph.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
for handler in PROCESS_HANDLERS:
|
for handler in PROCESS_HANDLERS:
|
||||||
for feature, addr in handler(ph):
|
for feature, addr in handler(ph):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ import re
|
|||||||
import logging
|
import logging
|
||||||
import binascii
|
import binascii
|
||||||
import contextlib
|
import contextlib
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import pefile
|
import pefile
|
||||||
|
|
||||||
@@ -45,7 +45,7 @@ MATCH_RESULT = b'{"meta":'
|
|||||||
MATCH_JSON_OBJECT = b'{"'
|
MATCH_JSON_OBJECT = b'{"'
|
||||||
|
|
||||||
|
|
||||||
def extract_file_strings(buf: bytes, **kwargs) -> Iterator[Tuple[String, Address]]:
|
def extract_file_strings(buf: bytes, **kwargs) -> Iterator[tuple[String, Address]]:
|
||||||
"""
|
"""
|
||||||
extract ASCII and UTF-16 LE strings from file
|
extract ASCII and UTF-16 LE strings from file
|
||||||
"""
|
"""
|
||||||
@@ -56,7 +56,7 @@ def extract_file_strings(buf: bytes, **kwargs) -> Iterator[Tuple[String, Address
|
|||||||
yield String(s.s), FileOffsetAddress(s.offset)
|
yield String(s.s), FileOffsetAddress(s.offset)
|
||||||
|
|
||||||
|
|
||||||
def extract_format(buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
def extract_format(buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
||||||
if buf.startswith(MATCH_PE):
|
if buf.startswith(MATCH_PE):
|
||||||
yield Format(FORMAT_PE), NO_ADDRESS
|
yield Format(FORMAT_PE), NO_ADDRESS
|
||||||
elif buf.startswith(MATCH_ELF):
|
elif buf.startswith(MATCH_ELF):
|
||||||
@@ -79,7 +79,7 @@ def extract_format(buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
|||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def extract_arch(buf) -> Iterator[Tuple[Feature, Address]]:
|
def extract_arch(buf) -> Iterator[tuple[Feature, Address]]:
|
||||||
if buf.startswith(MATCH_PE):
|
if buf.startswith(MATCH_PE):
|
||||||
yield from capa.features.extractors.pefile.extract_file_arch(pe=pefile.PE(data=buf))
|
yield from capa.features.extractors.pefile.extract_file_arch(pe=pefile.PE(data=buf))
|
||||||
|
|
||||||
@@ -111,7 +111,7 @@ def extract_arch(buf) -> Iterator[Tuple[Feature, Address]]:
|
|||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def extract_os(buf, os=OS_AUTO) -> Iterator[Tuple[Feature, Address]]:
|
def extract_os(buf, os=OS_AUTO) -> Iterator[tuple[Feature, Address]]:
|
||||||
if os != OS_AUTO:
|
if os != OS_AUTO:
|
||||||
yield OS(os), NO_ADDRESS
|
yield OS(os), NO_ADDRESS
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import Dict, List, Tuple, Union, Iterator, Optional
|
from typing import Union, Iterator, Optional
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import dnfile
|
import dnfile
|
||||||
@@ -41,11 +41,11 @@ from capa.features.extractors.dnfile.helpers import (
|
|||||||
|
|
||||||
class DnFileFeatureExtractorCache:
|
class DnFileFeatureExtractorCache:
|
||||||
def __init__(self, pe: dnfile.dnPE):
|
def __init__(self, pe: dnfile.dnPE):
|
||||||
self.imports: Dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
self.imports: dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||||
self.native_imports: Dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
self.native_imports: dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||||
self.methods: Dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
self.methods: dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||||
self.fields: Dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
self.fields: dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||||
self.types: Dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
self.types: dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||||
|
|
||||||
for import_ in get_dotnet_managed_imports(pe):
|
for import_ in get_dotnet_managed_imports(pe):
|
||||||
self.imports[import_.token] = import_
|
self.imports[import_.token] = import_
|
||||||
@@ -84,7 +84,7 @@ class DnfileFeatureExtractor(StaticFeatureExtractor):
|
|||||||
self.token_cache: DnFileFeatureExtractorCache = DnFileFeatureExtractorCache(self.pe)
|
self.token_cache: DnFileFeatureExtractorCache = DnFileFeatureExtractorCache(self.pe)
|
||||||
|
|
||||||
# pre-compute these because we'll yield them at *every* scope.
|
# pre-compute these because we'll yield them at *every* scope.
|
||||||
self.global_features: List[Tuple[Feature, Address]] = []
|
self.global_features: list[tuple[Feature, Address]] = []
|
||||||
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_format())
|
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_format())
|
||||||
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_os(pe=self.pe))
|
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_os(pe=self.pe))
|
||||||
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_arch(pe=self.pe))
|
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_arch(pe=self.pe))
|
||||||
@@ -100,7 +100,7 @@ class DnfileFeatureExtractor(StaticFeatureExtractor):
|
|||||||
|
|
||||||
def get_functions(self) -> Iterator[FunctionHandle]:
|
def get_functions(self) -> Iterator[FunctionHandle]:
|
||||||
# create a method lookup table
|
# create a method lookup table
|
||||||
methods: Dict[Address, FunctionHandle] = {}
|
methods: dict[Address, FunctionHandle] = {}
|
||||||
for token, method in get_dotnet_managed_method_bodies(self.pe):
|
for token, method in get_dotnet_managed_method_bodies(self.pe):
|
||||||
fh: FunctionHandle = FunctionHandle(
|
fh: FunctionHandle = FunctionHandle(
|
||||||
address=DNTokenAddress(token),
|
address=DNTokenAddress(token),
|
||||||
@@ -136,7 +136,7 @@ class DnfileFeatureExtractor(StaticFeatureExtractor):
|
|||||||
|
|
||||||
yield from methods.values()
|
yield from methods.values()
|
||||||
|
|
||||||
def extract_function_features(self, fh) -> Iterator[Tuple[Feature, Address]]:
|
def extract_function_features(self, fh) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.dnfile.function.extract_features(fh)
|
yield from capa.features.extractors.dnfile.function.extract_features(fh)
|
||||||
|
|
||||||
def get_basic_blocks(self, f) -> Iterator[BBHandle]:
|
def get_basic_blocks(self, f) -> Iterator[BBHandle]:
|
||||||
@@ -157,5 +157,5 @@ class DnfileFeatureExtractor(StaticFeatureExtractor):
|
|||||||
inner=insn,
|
inner=insn,
|
||||||
)
|
)
|
||||||
|
|
||||||
def extract_insn_features(self, fh, bbh, ih) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_features(self, fh, bbh, ih) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.dnfile.insn.extract_features(fh, bbh, ih)
|
yield from capa.features.extractors.dnfile.insn.extract_features(fh, bbh, ih)
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import dnfile
|
import dnfile
|
||||||
|
|
||||||
@@ -18,35 +18,35 @@ from capa.features.common import Class, Format, String, Feature, Namespace, Char
|
|||||||
from capa.features.address import Address
|
from capa.features.address import Address
|
||||||
|
|
||||||
|
|
||||||
def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[Tuple[Import, Address]]:
|
def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[tuple[Import, Address]]:
|
||||||
yield from capa.features.extractors.dotnetfile.extract_file_import_names(pe=pe)
|
yield from capa.features.extractors.dotnetfile.extract_file_import_names(pe=pe)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_format(pe: dnfile.dnPE) -> Iterator[Tuple[Format, Address]]:
|
def extract_file_format(pe: dnfile.dnPE) -> Iterator[tuple[Format, Address]]:
|
||||||
yield from capa.features.extractors.dotnetfile.extract_file_format(pe=pe)
|
yield from capa.features.extractors.dotnetfile.extract_file_format(pe=pe)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_function_names(pe: dnfile.dnPE) -> Iterator[Tuple[FunctionName, Address]]:
|
def extract_file_function_names(pe: dnfile.dnPE) -> Iterator[tuple[FunctionName, Address]]:
|
||||||
yield from capa.features.extractors.dotnetfile.extract_file_function_names(pe=pe)
|
yield from capa.features.extractors.dotnetfile.extract_file_function_names(pe=pe)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_strings(pe: dnfile.dnPE) -> Iterator[Tuple[String, Address]]:
|
def extract_file_strings(pe: dnfile.dnPE) -> Iterator[tuple[String, Address]]:
|
||||||
yield from capa.features.extractors.dotnetfile.extract_file_strings(pe=pe)
|
yield from capa.features.extractors.dotnetfile.extract_file_strings(pe=pe)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_mixed_mode_characteristic_features(pe: dnfile.dnPE) -> Iterator[Tuple[Characteristic, Address]]:
|
def extract_file_mixed_mode_characteristic_features(pe: dnfile.dnPE) -> Iterator[tuple[Characteristic, Address]]:
|
||||||
yield from capa.features.extractors.dotnetfile.extract_file_mixed_mode_characteristic_features(pe=pe)
|
yield from capa.features.extractors.dotnetfile.extract_file_mixed_mode_characteristic_features(pe=pe)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_namespace_features(pe: dnfile.dnPE) -> Iterator[Tuple[Namespace, Address]]:
|
def extract_file_namespace_features(pe: dnfile.dnPE) -> Iterator[tuple[Namespace, Address]]:
|
||||||
yield from capa.features.extractors.dotnetfile.extract_file_namespace_features(pe=pe)
|
yield from capa.features.extractors.dotnetfile.extract_file_namespace_features(pe=pe)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_class_features(pe: dnfile.dnPE) -> Iterator[Tuple[Class, Address]]:
|
def extract_file_class_features(pe: dnfile.dnPE) -> Iterator[tuple[Class, Address]]:
|
||||||
yield from capa.features.extractors.dotnetfile.extract_file_class_features(pe=pe)
|
yield from capa.features.extractors.dotnetfile.extract_file_class_features(pe=pe)
|
||||||
|
|
||||||
|
|
||||||
def extract_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(pe: dnfile.dnPE) -> Iterator[tuple[Feature, Address]]:
|
||||||
for file_handler in FILE_HANDLERS:
|
for file_handler in FILE_HANDLERS:
|
||||||
for feature, address in file_handler(pe):
|
for feature, address in file_handler(pe):
|
||||||
yield feature, address
|
yield feature, address
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from capa.features.common import Feature, Characteristic
|
from capa.features.common import Feature, Characteristic
|
||||||
from capa.features.address import Address
|
from capa.features.address import Address
|
||||||
@@ -18,30 +18,30 @@ from capa.features.extractors.base_extractor import FunctionHandle
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_function_calls_to(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]:
|
def extract_function_calls_to(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]:
|
||||||
"""extract callers to a function"""
|
"""extract callers to a function"""
|
||||||
for dest in fh.ctx["calls_to"]:
|
for dest in fh.ctx["calls_to"]:
|
||||||
yield Characteristic("calls to"), dest
|
yield Characteristic("calls to"), dest
|
||||||
|
|
||||||
|
|
||||||
def extract_function_calls_from(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]:
|
def extract_function_calls_from(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]:
|
||||||
"""extract callers from a function"""
|
"""extract callers from a function"""
|
||||||
for src in fh.ctx["calls_from"]:
|
for src in fh.ctx["calls_from"]:
|
||||||
yield Characteristic("calls from"), src
|
yield Characteristic("calls from"), src
|
||||||
|
|
||||||
|
|
||||||
def extract_recursive_call(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]:
|
def extract_recursive_call(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]:
|
||||||
"""extract recursive function call"""
|
"""extract recursive function call"""
|
||||||
if fh.address in fh.ctx["calls_to"]:
|
if fh.address in fh.ctx["calls_to"]:
|
||||||
yield Characteristic("recursive call"), fh.address
|
yield Characteristic("recursive call"), fh.address
|
||||||
|
|
||||||
|
|
||||||
def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]:
|
def extract_function_loop(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]:
|
||||||
"""extract loop indicators from a function"""
|
"""extract loop indicators from a function"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
for func_handler in FUNCTION_HANDLERS:
|
for func_handler in FUNCTION_HANDLERS:
|
||||||
for feature, addr in func_handler(fh):
|
for feature, addr in func_handler(fh):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, Tuple, Union, Iterator, Optional
|
from typing import Union, Iterator, Optional
|
||||||
|
|
||||||
import dnfile
|
import dnfile
|
||||||
from dncil.cil.body import CilMethodBody
|
from dncil.cil.body import CilMethodBody
|
||||||
@@ -144,7 +144,7 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_dotnet_methoddef_property_accessors(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]:
|
def get_dotnet_methoddef_property_accessors(pe: dnfile.dnPE) -> Iterator[tuple[int, str]]:
|
||||||
"""get MethodDef methods used to access properties
|
"""get MethodDef methods used to access properties
|
||||||
|
|
||||||
see https://www.ntcore.com/files/dotnetformat.htm
|
see https://www.ntcore.com/files/dotnetformat.htm
|
||||||
@@ -194,7 +194,7 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]:
|
|||||||
"""
|
"""
|
||||||
nested_class_table = get_dotnet_nested_class_table_index(pe)
|
nested_class_table = get_dotnet_nested_class_table_index(pe)
|
||||||
|
|
||||||
accessor_map: Dict[int, str] = {}
|
accessor_map: dict[int, str] = {}
|
||||||
for methoddef, methoddef_access in get_dotnet_methoddef_property_accessors(pe):
|
for methoddef, methoddef_access in get_dotnet_methoddef_property_accessors(pe):
|
||||||
accessor_map[methoddef] = methoddef_access
|
accessor_map[methoddef] = methoddef_access
|
||||||
|
|
||||||
@@ -252,7 +252,7 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]:
|
|||||||
yield DnType(token, typedefname, namespace=typedefnamespace, member=field.row.Name)
|
yield DnType(token, typedefname, namespace=typedefnamespace, member=field.row.Name)
|
||||||
|
|
||||||
|
|
||||||
def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[Tuple[int, CilMethodBody]]:
|
def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[tuple[int, CilMethodBody]]:
|
||||||
"""get managed methods from MethodDef table"""
|
"""get managed methods from MethodDef table"""
|
||||||
for rid, method_def in iter_dotnet_table(pe, dnfile.mdtable.MethodDef.number):
|
for rid, method_def in iter_dotnet_table(pe, dnfile.mdtable.MethodDef.number):
|
||||||
assert isinstance(method_def, dnfile.mdtable.MethodDefRow)
|
assert isinstance(method_def, dnfile.mdtable.MethodDefRow)
|
||||||
@@ -332,7 +332,7 @@ def get_dotnet_table_row(pe: dnfile.dnPE, table_index: int, row_index: int) -> O
|
|||||||
|
|
||||||
def resolve_nested_typedef_name(
|
def resolve_nested_typedef_name(
|
||||||
nested_class_table: dict, index: int, typedef: dnfile.mdtable.TypeDefRow, pe: dnfile.dnPE
|
nested_class_table: dict, index: int, typedef: dnfile.mdtable.TypeDefRow, pe: dnfile.dnPE
|
||||||
) -> Tuple[str, Tuple[str, ...]]:
|
) -> tuple[str, tuple[str, ...]]:
|
||||||
"""Resolves all nested TypeDef class names. Returns the namespace as a str and the nested TypeRef name as a tuple"""
|
"""Resolves all nested TypeDef class names. Returns the namespace as a str and the nested TypeRef name as a tuple"""
|
||||||
|
|
||||||
if index in nested_class_table:
|
if index in nested_class_table:
|
||||||
@@ -368,7 +368,7 @@ def resolve_nested_typedef_name(
|
|||||||
|
|
||||||
def resolve_nested_typeref_name(
|
def resolve_nested_typeref_name(
|
||||||
index: int, typeref: dnfile.mdtable.TypeRefRow, pe: dnfile.dnPE
|
index: int, typeref: dnfile.mdtable.TypeRefRow, pe: dnfile.dnPE
|
||||||
) -> Tuple[str, Tuple[str, ...]]:
|
) -> tuple[str, tuple[str, ...]]:
|
||||||
"""Resolves all nested TypeRef class names. Returns the namespace as a str and the nested TypeRef name as a tuple"""
|
"""Resolves all nested TypeRef class names. Returns the namespace as a str and the nested TypeRef name as a tuple"""
|
||||||
# If the ResolutionScope decodes to a typeRef type then it is nested
|
# If the ResolutionScope decodes to a typeRef type then it is nested
|
||||||
if isinstance(typeref.ResolutionScope.table, dnfile.mdtable.TypeRef):
|
if isinstance(typeref.ResolutionScope.table, dnfile.mdtable.TypeRef):
|
||||||
@@ -398,7 +398,7 @@ def resolve_nested_typeref_name(
|
|||||||
return str(typeref.TypeNamespace), (str(typeref.TypeName),)
|
return str(typeref.TypeNamespace), (str(typeref.TypeName),)
|
||||||
|
|
||||||
|
|
||||||
def get_dotnet_nested_class_table_index(pe: dnfile.dnPE) -> Dict[int, int]:
|
def get_dotnet_nested_class_table_index(pe: dnfile.dnPE) -> dict[int, int]:
|
||||||
"""Build index for EnclosingClass based off the NestedClass row index in the nestedclass table"""
|
"""Build index for EnclosingClass based off the NestedClass row index in the nestedclass table"""
|
||||||
nested_class_table = {}
|
nested_class_table = {}
|
||||||
|
|
||||||
@@ -442,7 +442,7 @@ def is_dotnet_mixed_mode(pe: dnfile.dnPE) -> bool:
|
|||||||
return not bool(pe.net.Flags.CLR_ILONLY)
|
return not bool(pe.net.Flags.CLR_ILONLY)
|
||||||
|
|
||||||
|
|
||||||
def iter_dotnet_table(pe: dnfile.dnPE, table_index: int) -> Iterator[Tuple[int, dnfile.base.MDTableRow]]:
|
def iter_dotnet_table(pe: dnfile.dnPE, table_index: int) -> Iterator[tuple[int, dnfile.base.MDTableRow]]:
|
||||||
assert pe.net is not None
|
assert pe.net is not None
|
||||||
assert pe.net.mdtables is not None
|
assert pe.net.mdtables is not None
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import TYPE_CHECKING, Tuple, Union, Iterator, Optional
|
from typing import TYPE_CHECKING, Union, Iterator, Optional
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from capa.features.extractors.dnfile.extractor import DnFileFeatureExtractorCache
|
from capa.features.extractors.dnfile.extractor import DnFileFeatureExtractorCache
|
||||||
@@ -61,7 +61,7 @@ def get_callee(
|
|||||||
return callee
|
return callee
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""parse instruction API features"""
|
"""parse instruction API features"""
|
||||||
if ih.inner.opcode not in (
|
if ih.inner.opcode not in (
|
||||||
OpCodes.Call,
|
OpCodes.Call,
|
||||||
@@ -83,7 +83,7 @@ def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterato
|
|||||||
yield API(name), ih.address
|
yield API(name), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""parse instruction property features"""
|
"""parse instruction property features"""
|
||||||
name: Optional[str] = None
|
name: Optional[str] = None
|
||||||
access: Optional[str] = None
|
access: Optional[str] = None
|
||||||
@@ -118,7 +118,7 @@ def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> It
|
|||||||
|
|
||||||
def extract_insn_namespace_class_features(
|
def extract_insn_namespace_class_features(
|
||||||
fh: FunctionHandle, bh, ih: InsnHandle
|
fh: FunctionHandle, bh, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Union[Namespace, Class], Address]]:
|
) -> Iterator[tuple[Union[Namespace, Class], Address]]:
|
||||||
"""parse instruction namespace and class features"""
|
"""parse instruction namespace and class features"""
|
||||||
type_: Optional[Union[DnType, DnUnmanagedMethod]] = None
|
type_: Optional[Union[DnType, DnUnmanagedMethod]] = None
|
||||||
|
|
||||||
@@ -173,13 +173,13 @@ def extract_insn_namespace_class_features(
|
|||||||
yield Namespace(type_.namespace), ih.address
|
yield Namespace(type_.namespace), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_number_features(fh, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_number_features(fh, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""parse instruction number features"""
|
"""parse instruction number features"""
|
||||||
if ih.inner.is_ldc():
|
if ih.inner.is_ldc():
|
||||||
yield Number(ih.inner.get_ldc()), ih.address
|
yield Number(ih.inner.get_ldc()), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""parse instruction string features"""
|
"""parse instruction string features"""
|
||||||
if not ih.inner.is_ldstr():
|
if not ih.inner.is_ldstr():
|
||||||
return
|
return
|
||||||
@@ -197,7 +197,7 @@ def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iter
|
|||||||
|
|
||||||
def extract_unmanaged_call_characteristic_features(
|
def extract_unmanaged_call_characteristic_features(
|
||||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Characteristic, Address]]:
|
) -> Iterator[tuple[Characteristic, Address]]:
|
||||||
if ih.inner.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp):
|
if ih.inner.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp):
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -209,7 +209,7 @@ def extract_unmanaged_call_characteristic_features(
|
|||||||
yield Characteristic("unmanaged call"), ih.address
|
yield Characteristic("unmanaged call"), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract instruction features"""
|
"""extract instruction features"""
|
||||||
for inst_handler in INSTRUCTION_HANDLERS:
|
for inst_handler in INSTRUCTION_HANDLERS:
|
||||||
for feature, addr in inst_handler(fh, bbh, ih):
|
for feature, addr in inst_handler(fh, bbh, ih):
|
||||||
|
|||||||
@@ -6,17 +6,17 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
from typing import Tuple, Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
class DnType:
|
class DnType:
|
||||||
def __init__(
|
def __init__(
|
||||||
self, token: int, class_: Tuple[str, ...], namespace: str = "", member: str = "", access: Optional[str] = None
|
self, token: int, class_: tuple[str, ...], namespace: str = "", member: str = "", access: Optional[str] = None
|
||||||
):
|
):
|
||||||
self.token: int = token
|
self.token: int = token
|
||||||
self.access: Optional[str] = access
|
self.access: Optional[str] = access
|
||||||
self.namespace: str = namespace
|
self.namespace: str = namespace
|
||||||
self.class_: Tuple[str, ...] = class_
|
self.class_: tuple[str, ...] = class_
|
||||||
|
|
||||||
if member == ".ctor":
|
if member == ".ctor":
|
||||||
member = "ctor"
|
member = "ctor"
|
||||||
@@ -44,7 +44,7 @@ class DnType:
|
|||||||
return str(self)
|
return str(self)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def format_name(class_: Tuple[str, ...], namespace: str = "", member: str = ""):
|
def format_name(class_: tuple[str, ...], namespace: str = "", member: str = ""):
|
||||||
if len(class_) > 1:
|
if len(class_) > 1:
|
||||||
class_str = "/".join(class_) # Concat items in tuple, separated by a "/"
|
class_str = "/".join(class_) # Concat items in tuple, separated by a "/"
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import dnfile
|
import dnfile
|
||||||
@@ -48,12 +48,12 @@ from capa.features.extractors.dnfile.helpers import (
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_format(**kwargs) -> Iterator[Tuple[Format, Address]]:
|
def extract_file_format(**kwargs) -> Iterator[tuple[Format, Address]]:
|
||||||
yield Format(FORMAT_DOTNET), NO_ADDRESS
|
yield Format(FORMAT_DOTNET), NO_ADDRESS
|
||||||
yield Format(FORMAT_PE), NO_ADDRESS
|
yield Format(FORMAT_PE), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Import, Address]]:
|
def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Import, Address]]:
|
||||||
for method in get_dotnet_managed_imports(pe):
|
for method in get_dotnet_managed_imports(pe):
|
||||||
# like System.IO.File::OpenRead
|
# like System.IO.File::OpenRead
|
||||||
yield Import(str(method)), DNTokenAddress(method.token)
|
yield Import(str(method)), DNTokenAddress(method.token)
|
||||||
@@ -64,12 +64,12 @@ def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Impor
|
|||||||
yield Import(name), DNTokenAddress(imp.token)
|
yield Import(name), DNTokenAddress(imp.token)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_function_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[FunctionName, Address]]:
|
def extract_file_function_names(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[FunctionName, Address]]:
|
||||||
for method in get_dotnet_managed_methods(pe):
|
for method in get_dotnet_managed_methods(pe):
|
||||||
yield FunctionName(str(method)), DNTokenAddress(method.token)
|
yield FunctionName(str(method)), DNTokenAddress(method.token)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Namespace, Address]]:
|
def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Namespace, Address]]:
|
||||||
"""emit namespace features from TypeRef and TypeDef tables"""
|
"""emit namespace features from TypeRef and TypeDef tables"""
|
||||||
|
|
||||||
# namespaces may be referenced multiple times, so we need to filter
|
# namespaces may be referenced multiple times, so we need to filter
|
||||||
@@ -93,7 +93,7 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple
|
|||||||
yield Namespace(namespace), NO_ADDRESS
|
yield Namespace(namespace), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Class, Address]]:
|
def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Class, Address]]:
|
||||||
"""emit class features from TypeRef and TypeDef tables"""
|
"""emit class features from TypeRef and TypeDef tables"""
|
||||||
nested_class_table = get_dotnet_nested_class_table_index(pe)
|
nested_class_table = get_dotnet_nested_class_table_index(pe)
|
||||||
|
|
||||||
@@ -116,11 +116,11 @@ def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Cla
|
|||||||
yield Class(DnType.format_name(typerefname, namespace=typerefnamespace)), DNTokenAddress(token)
|
yield Class(DnType.format_name(typerefname, namespace=typerefnamespace)), DNTokenAddress(token)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_os(**kwargs) -> Iterator[Tuple[OS, Address]]:
|
def extract_file_os(**kwargs) -> Iterator[tuple[OS, Address]]:
|
||||||
yield OS(OS_ANY), NO_ADDRESS
|
yield OS(OS_ANY), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Arch, Address]]:
|
def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Arch, Address]]:
|
||||||
# to distinguish in more detail, see https://stackoverflow.com/a/23614024/10548020
|
# to distinguish in more detail, see https://stackoverflow.com/a/23614024/10548020
|
||||||
# .NET 4.5 added option: any CPU, 32-bit preferred
|
# .NET 4.5 added option: any CPU, 32-bit preferred
|
||||||
assert pe.net is not None
|
assert pe.net is not None
|
||||||
@@ -134,18 +134,18 @@ def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Arch, Address
|
|||||||
yield Arch(ARCH_ANY), NO_ADDRESS
|
yield Arch(ARCH_ANY), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_file_strings(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[String, Address]]:
|
def extract_file_strings(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[String, Address]]:
|
||||||
yield from capa.features.extractors.common.extract_file_strings(pe.__data__)
|
yield from capa.features.extractors.common.extract_file_strings(pe.__data__)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_mixed_mode_characteristic_features(
|
def extract_file_mixed_mode_characteristic_features(
|
||||||
pe: dnfile.dnPE, **kwargs
|
pe: dnfile.dnPE, **kwargs
|
||||||
) -> Iterator[Tuple[Characteristic, Address]]:
|
) -> Iterator[tuple[Characteristic, Address]]:
|
||||||
if is_dotnet_mixed_mode(pe):
|
if is_dotnet_mixed_mode(pe):
|
||||||
yield Characteristic("mixed mode"), NO_ADDRESS
|
yield Characteristic("mixed mode"), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_features(pe: dnfile.dnPE) -> Iterator[tuple[Feature, Address]]:
|
||||||
for file_handler in FILE_HANDLERS:
|
for file_handler in FILE_HANDLERS:
|
||||||
for feature, addr in file_handler(pe=pe): # type: ignore
|
for feature, addr in file_handler(pe=pe): # type: ignore
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
@@ -162,7 +162,7 @@ FILE_HANDLERS = (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]:
|
def extract_global_features(pe: dnfile.dnPE) -> Iterator[tuple[Feature, Address]]:
|
||||||
for handler in GLOBAL_HANDLERS:
|
for handler in GLOBAL_HANDLERS:
|
||||||
for feature, va in handler(pe=pe): # type: ignore
|
for feature, va in handler(pe=pe): # type: ignore
|
||||||
yield feature, va
|
yield feature, va
|
||||||
@@ -204,7 +204,7 @@ class DotnetFileFeatureExtractor(StaticFeatureExtractor):
|
|||||||
def is_mixed_mode(self) -> bool:
|
def is_mixed_mode(self) -> bool:
|
||||||
return is_dotnet_mixed_mode(self.pe)
|
return is_dotnet_mixed_mode(self.pe)
|
||||||
|
|
||||||
def get_runtime_version(self) -> Tuple[int, int]:
|
def get_runtime_version(self) -> tuple[int, int]:
|
||||||
assert self.pe.net is not None
|
assert self.pe.net is not None
|
||||||
assert self.pe.net.struct is not None
|
assert self.pe.net.struct is not None
|
||||||
assert self.pe.net.struct.MajorRuntimeVersion is not None
|
assert self.pe.net.struct.MajorRuntimeVersion is not None
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import capa.features.extractors.helpers
|
import capa.features.extractors.helpers
|
||||||
from capa.features.insn import API, Number
|
from capa.features.insn import API, Number
|
||||||
@@ -19,7 +19,7 @@ from capa.features.extractors.drakvuf.models import Call
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
This method extracts the given call's features (such as API name and arguments),
|
This method extracts the given call's features (such as API name and arguments),
|
||||||
and returns them as API, Number, and String features.
|
and returns them as API, Number, and String features.
|
||||||
@@ -49,7 +49,7 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -
|
|||||||
yield API(name), ch.address
|
yield API(name), ch.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
for handler in CALL_HANDLERS:
|
for handler in CALL_HANDLERS:
|
||||||
for feature, addr in handler(ph, th, ch):
|
for feature, addr in handler(ph, th, ch):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, List, Tuple, Union, Iterator
|
from typing import Union, Iterator
|
||||||
|
|
||||||
import capa.features.extractors.drakvuf.call
|
import capa.features.extractors.drakvuf.call
|
||||||
import capa.features.extractors.drakvuf.file
|
import capa.features.extractors.drakvuf.file
|
||||||
@@ -39,7 +39,7 @@ class DrakvufExtractor(DynamicFeatureExtractor):
|
|||||||
self.report: DrakvufReport = report
|
self.report: DrakvufReport = report
|
||||||
|
|
||||||
# sort the api calls to prevent going through the entire list each time
|
# sort the api calls to prevent going through the entire list each time
|
||||||
self.sorted_calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]] = index_calls(report)
|
self.sorted_calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]] = index_calls(report)
|
||||||
|
|
||||||
# pre-compute these because we'll yield them at *every* scope.
|
# pre-compute these because we'll yield them at *every* scope.
|
||||||
self.global_features = list(capa.features.extractors.drakvuf.global_.extract_features(self.report))
|
self.global_features = list(capa.features.extractors.drakvuf.global_.extract_features(self.report))
|
||||||
@@ -48,16 +48,16 @@ class DrakvufExtractor(DynamicFeatureExtractor):
|
|||||||
# DRAKVUF currently does not yield information about the PE's address
|
# DRAKVUF currently does not yield information about the PE's address
|
||||||
return NO_ADDRESS
|
return NO_ADDRESS
|
||||||
|
|
||||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from self.global_features
|
yield from self.global_features
|
||||||
|
|
||||||
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.drakvuf.file.extract_features(self.report)
|
yield from capa.features.extractors.drakvuf.file.extract_features(self.report)
|
||||||
|
|
||||||
def get_processes(self) -> Iterator[ProcessHandle]:
|
def get_processes(self) -> Iterator[ProcessHandle]:
|
||||||
yield from capa.features.extractors.drakvuf.file.get_processes(self.sorted_calls)
|
yield from capa.features.extractors.drakvuf.file.get_processes(self.sorted_calls)
|
||||||
|
|
||||||
def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.drakvuf.process.extract_features(ph)
|
yield from capa.features.extractors.drakvuf.process.extract_features(ph)
|
||||||
|
|
||||||
def get_process_name(self, ph: ProcessHandle) -> str:
|
def get_process_name(self, ph: ProcessHandle) -> str:
|
||||||
@@ -66,7 +66,7 @@ class DrakvufExtractor(DynamicFeatureExtractor):
|
|||||||
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
||||||
yield from capa.features.extractors.drakvuf.process.get_threads(self.sorted_calls, ph)
|
yield from capa.features.extractors.drakvuf.process.get_threads(self.sorted_calls, ph)
|
||||||
|
|
||||||
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
if False:
|
if False:
|
||||||
# force this routine to be a generator,
|
# force this routine to be a generator,
|
||||||
# but we don't actually have any elements to generate.
|
# but we don't actually have any elements to generate.
|
||||||
@@ -87,10 +87,10 @@ class DrakvufExtractor(DynamicFeatureExtractor):
|
|||||||
|
|
||||||
def extract_call_features(
|
def extract_call_features(
|
||||||
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.drakvuf.call.extract_features(ph, th, ch)
|
yield from capa.features.extractors.drakvuf.call.extract_features(ph, th, ch)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_report(cls, report: Iterator[Dict]) -> "DrakvufExtractor":
|
def from_report(cls, report: Iterator[dict]) -> "DrakvufExtractor":
|
||||||
dr = DrakvufReport.from_raw_report(report)
|
dr = DrakvufReport.from_raw_report(report)
|
||||||
return DrakvufExtractor(report=dr)
|
return DrakvufExtractor(report=dr)
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, List, Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from capa.features.file import Import
|
from capa.features.file import Import
|
||||||
from capa.features.common import Feature
|
from capa.features.common import Feature
|
||||||
@@ -19,7 +19,7 @@ from capa.features.extractors.drakvuf.models import Call, DrakvufReport
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def get_processes(calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]]) -> Iterator[ProcessHandle]:
|
def get_processes(calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]]) -> Iterator[ProcessHandle]:
|
||||||
"""
|
"""
|
||||||
Get all the created processes for a sample.
|
Get all the created processes for a sample.
|
||||||
"""
|
"""
|
||||||
@@ -28,7 +28,7 @@ def get_processes(calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]])
|
|||||||
yield ProcessHandle(proc_addr, inner={"process_name": sample_call.process_name})
|
yield ProcessHandle(proc_addr, inner={"process_name": sample_call.process_name})
|
||||||
|
|
||||||
|
|
||||||
def extract_import_names(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_import_names(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
Extract imported function names.
|
Extract imported function names.
|
||||||
"""
|
"""
|
||||||
@@ -43,7 +43,7 @@ def extract_import_names(report: DrakvufReport) -> Iterator[Tuple[Feature, Addre
|
|||||||
yield Import(name), AbsoluteVirtualAddress(function_address)
|
yield Import(name), AbsoluteVirtualAddress(function_address)
|
||||||
|
|
||||||
|
|
||||||
def extract_features(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
for handler in FILE_HANDLERS:
|
for handler in FILE_HANDLERS:
|
||||||
for feature, addr in handler(report):
|
for feature, addr in handler(report):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from capa.features.common import OS, FORMAT_PE, ARCH_AMD64, OS_WINDOWS, Arch, Format, Feature
|
from capa.features.common import OS, FORMAT_PE, ARCH_AMD64, OS_WINDOWS, Arch, Format, Feature
|
||||||
from capa.features.address import NO_ADDRESS, Address
|
from capa.features.address import NO_ADDRESS, Address
|
||||||
@@ -16,22 +16,22 @@ from capa.features.extractors.drakvuf.models import DrakvufReport
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_format(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_format(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
# DRAKVUF sandbox currently supports only Windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html
|
# DRAKVUF sandbox currently supports only Windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html
|
||||||
yield Format(FORMAT_PE), NO_ADDRESS
|
yield Format(FORMAT_PE), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_os(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_os(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
# DRAKVUF sandbox currently supports only PE files: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html
|
# DRAKVUF sandbox currently supports only PE files: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html
|
||||||
yield OS(OS_WINDOWS), NO_ADDRESS
|
yield OS(OS_WINDOWS), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_arch(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_arch(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
# DRAKVUF sandbox currently supports only x64 Windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html
|
# DRAKVUF sandbox currently supports only x64 Windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html
|
||||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_features(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
for global_handler in GLOBAL_HANDLER:
|
for global_handler in GLOBAL_HANDLER:
|
||||||
for feature, addr in global_handler(report):
|
for feature, addr in global_handler(report):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -7,16 +7,15 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
from typing import Dict, List
|
|
||||||
|
|
||||||
from capa.features.address import ThreadAddress, ProcessAddress
|
from capa.features.address import ThreadAddress, ProcessAddress
|
||||||
from capa.features.extractors.drakvuf.models import Call, DrakvufReport
|
from capa.features.extractors.drakvuf.models import Call, DrakvufReport
|
||||||
|
|
||||||
|
|
||||||
def index_calls(report: DrakvufReport) -> Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]]:
|
def index_calls(report: DrakvufReport) -> dict[ProcessAddress, dict[ThreadAddress, list[Call]]]:
|
||||||
# this method organizes calls into processes and threads, and then sorts them based on
|
# this method organizes calls into processes and threads, and then sorts them based on
|
||||||
# timestamp so that we can address individual calls per index (CallAddress requires call index)
|
# timestamp so that we can address individual calls per index (CallAddress requires call index)
|
||||||
result: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]] = {}
|
result: dict[ProcessAddress, dict[ThreadAddress, list[Call]]] = {}
|
||||||
for call in itertools.chain(report.syscalls, report.apicalls):
|
for call in itertools.chain(report.syscalls, report.apicalls):
|
||||||
if call.pid == 0:
|
if call.pid == 0:
|
||||||
# DRAKVUF captures api/native calls from all processes running on the system.
|
# DRAKVUF captures api/native calls from all processes running on the system.
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import Any, Dict, List, Iterator
|
from typing import Any, Iterator
|
||||||
|
|
||||||
from pydantic import Field, BaseModel, ConfigDict, model_validator
|
from pydantic import Field, BaseModel, ConfigDict, model_validator
|
||||||
|
|
||||||
@@ -47,7 +47,7 @@ class LoadedDLL(ConciseModel):
|
|||||||
plugin_name: str = Field(alias="Plugin")
|
plugin_name: str = Field(alias="Plugin")
|
||||||
event: str = Field(alias="Event")
|
event: str = Field(alias="Event")
|
||||||
name: str = Field(alias="DllName")
|
name: str = Field(alias="DllName")
|
||||||
imports: Dict[str, int] = Field(alias="Rva")
|
imports: dict[str, int] = Field(alias="Rva")
|
||||||
|
|
||||||
|
|
||||||
class Call(ConciseModel):
|
class Call(ConciseModel):
|
||||||
@@ -58,18 +58,18 @@ class Call(ConciseModel):
|
|||||||
pid: int = Field(alias="PID")
|
pid: int = Field(alias="PID")
|
||||||
tid: int = Field(alias="TID")
|
tid: int = Field(alias="TID")
|
||||||
name: str = Field(alias="Method")
|
name: str = Field(alias="Method")
|
||||||
arguments: Dict[str, str]
|
arguments: dict[str, str]
|
||||||
|
|
||||||
|
|
||||||
class WinApiCall(Call):
|
class WinApiCall(Call):
|
||||||
# This class models Windows API calls captured by DRAKVUF (DLLs, etc.).
|
# This class models Windows API calls captured by DRAKVUF (DLLs, etc.).
|
||||||
arguments: Dict[str, str] = Field(alias="Arguments")
|
arguments: dict[str, str] = Field(alias="Arguments")
|
||||||
event: str = Field(alias="Event")
|
event: str = Field(alias="Event")
|
||||||
return_value: str = Field(alias="ReturnValue")
|
return_value: str = Field(alias="ReturnValue")
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@model_validator(mode="before")
|
||||||
@classmethod
|
@classmethod
|
||||||
def build_arguments(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
def build_arguments(cls, values: dict[str, Any]) -> dict[str, Any]:
|
||||||
args = values["Arguments"]
|
args = values["Arguments"]
|
||||||
values["Arguments"] = dict(arg.split("=", 1) for arg in args)
|
values["Arguments"] = dict(arg.split("=", 1) for arg in args)
|
||||||
return values
|
return values
|
||||||
@@ -100,7 +100,7 @@ class SystemCall(Call):
|
|||||||
|
|
||||||
@model_validator(mode="before")
|
@model_validator(mode="before")
|
||||||
@classmethod
|
@classmethod
|
||||||
def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
def build_extra(cls, values: dict[str, Any]) -> dict[str, Any]:
|
||||||
# DRAKVUF stores argument names and values as entries in the syscall's entry.
|
# DRAKVUF stores argument names and values as entries in the syscall's entry.
|
||||||
# This model validator collects those arguments into a list in the model.
|
# This model validator collects those arguments into a list in the model.
|
||||||
values["arguments"] = {
|
values["arguments"] = {
|
||||||
@@ -110,13 +110,13 @@ class SystemCall(Call):
|
|||||||
|
|
||||||
|
|
||||||
class DrakvufReport(ConciseModel):
|
class DrakvufReport(ConciseModel):
|
||||||
syscalls: List[SystemCall] = []
|
syscalls: list[SystemCall] = []
|
||||||
apicalls: List[WinApiCall] = []
|
apicalls: list[WinApiCall] = []
|
||||||
discovered_dlls: List[DiscoveredDLL] = []
|
discovered_dlls: list[DiscoveredDLL] = []
|
||||||
loaded_dlls: List[LoadedDLL] = []
|
loaded_dlls: list[LoadedDLL] = []
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_raw_report(cls, entries: Iterator[Dict]) -> "DrakvufReport":
|
def from_raw_report(cls, entries: Iterator[dict]) -> "DrakvufReport":
|
||||||
report = cls()
|
report = cls()
|
||||||
|
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, List, Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from capa.features.common import String, Feature
|
from capa.features.common import String, Feature
|
||||||
from capa.features.address import Address, ThreadAddress, ProcessAddress
|
from capa.features.address import Address, ThreadAddress, ProcessAddress
|
||||||
@@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
def get_threads(
|
def get_threads(
|
||||||
calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]], ph: ProcessHandle
|
calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]], ph: ProcessHandle
|
||||||
) -> Iterator[ThreadHandle]:
|
) -> Iterator[ThreadHandle]:
|
||||||
"""
|
"""
|
||||||
Get the threads associated with a given process.
|
Get the threads associated with a given process.
|
||||||
@@ -27,11 +27,11 @@ def get_threads(
|
|||||||
yield ThreadHandle(address=thread_addr, inner={})
|
yield ThreadHandle(address=thread_addr, inner={})
|
||||||
|
|
||||||
|
|
||||||
def extract_process_name(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_process_name(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield String(ph.inner["process_name"]), ph.address
|
yield String(ph.inner["process_name"]), ph.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
for handler in PROCESS_HANDLERS:
|
for handler in PROCESS_HANDLERS:
|
||||||
for feature, addr in handler(ph):
|
for feature, addr in handler(ph):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, List, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from capa.features.address import ThreadAddress, ProcessAddress, DynamicCallAddress
|
from capa.features.address import ThreadAddress, ProcessAddress, DynamicCallAddress
|
||||||
from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle
|
from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle
|
||||||
@@ -17,7 +17,7 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
def get_calls(
|
def get_calls(
|
||||||
sorted_calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]], ph: ProcessHandle, th: ThreadHandle
|
sorted_calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]], ph: ProcessHandle, th: ThreadHandle
|
||||||
) -> Iterator[CallHandle]:
|
) -> Iterator[CallHandle]:
|
||||||
for i, call in enumerate(sorted_calls[ph.address][th.address]):
|
for i, call in enumerate(sorted_calls[ph.address][th.address]):
|
||||||
call_addr = DynamicCallAddress(thread=th.address, id=i)
|
call_addr = DynamicCallAddress(thread=th.address, id=i)
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ import logging
|
|||||||
import itertools
|
import itertools
|
||||||
import collections
|
import collections
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import TYPE_CHECKING, Set, Dict, List, Tuple, BinaryIO, Iterator, Optional
|
from typing import TYPE_CHECKING, BinaryIO, Iterator, Optional
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
@@ -394,7 +394,7 @@ class ELF:
|
|||||||
return read_cstr(phdr.buf, 0)
|
return read_cstr(phdr.buf, 0)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def versions_needed(self) -> Dict[str, Set[str]]:
|
def versions_needed(self) -> dict[str, set[str]]:
|
||||||
# symbol version requirements are stored in the .gnu.version_r section,
|
# symbol version requirements are stored in the .gnu.version_r section,
|
||||||
# which has type SHT_GNU_verneed (0x6ffffffe).
|
# which has type SHT_GNU_verneed (0x6ffffffe).
|
||||||
#
|
#
|
||||||
@@ -452,7 +452,7 @@ class ELF:
|
|||||||
return {}
|
return {}
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def dynamic_entries(self) -> Iterator[Tuple[int, int]]:
|
def dynamic_entries(self) -> Iterator[tuple[int, int]]:
|
||||||
"""
|
"""
|
||||||
read the entries from the dynamic section,
|
read the entries from the dynamic section,
|
||||||
yielding the tag and value for each entry.
|
yielding the tag and value for each entry.
|
||||||
@@ -547,7 +547,7 @@ class ELF:
|
|||||||
logger.warning("failed to read DT_NEEDED entry: %s", str(e))
|
logger.warning("failed to read DT_NEEDED entry: %s", str(e))
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def symtab(self) -> Optional[Tuple[Shdr, Shdr]]:
|
def symtab(self) -> Optional[tuple[Shdr, Shdr]]:
|
||||||
"""
|
"""
|
||||||
fetch the Shdr for the symtab and the associated strtab.
|
fetch the Shdr for the symtab and the associated strtab.
|
||||||
"""
|
"""
|
||||||
@@ -682,7 +682,7 @@ class SymTab:
|
|||||||
symtab: Shdr,
|
symtab: Shdr,
|
||||||
strtab: Shdr,
|
strtab: Shdr,
|
||||||
) -> None:
|
) -> None:
|
||||||
self.symbols: List[Symbol] = []
|
self.symbols: list[Symbol] = []
|
||||||
|
|
||||||
self.symtab = symtab
|
self.symtab = symtab
|
||||||
self.strtab = strtab
|
self.strtab = strtab
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import io
|
import io
|
||||||
import logging
|
import logging
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from elftools.elf.elffile import ELFFile, DynamicSegment, SymbolTableSection
|
from elftools.elf.elffile import ELFFile, DynamicSegment, SymbolTableSection
|
||||||
@@ -166,7 +166,7 @@ def extract_file_arch(elf: ELFFile, **kwargs):
|
|||||||
logger.warning("unsupported architecture: %s", arch)
|
logger.warning("unsupported architecture: %s", arch)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature, int]]:
|
def extract_file_features(elf: ELFFile, buf: bytes) -> Iterator[tuple[Feature, int]]:
|
||||||
for file_handler in FILE_HANDLERS:
|
for file_handler in FILE_HANDLERS:
|
||||||
for feature, addr in file_handler(elf=elf, buf=buf): # type: ignore
|
for feature, addr in file_handler(elf=elf, buf=buf): # type: ignore
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
@@ -182,7 +182,7 @@ FILE_HANDLERS = (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def extract_global_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature, int]]:
|
def extract_global_features(elf: ELFFile, buf: bytes) -> Iterator[tuple[Feature, int]]:
|
||||||
for global_handler in GLOBAL_HANDLERS:
|
for global_handler in GLOBAL_HANDLERS:
|
||||||
for feature, addr in global_handler(elf=elf, buf=buf): # type: ignore
|
for feature, addr in global_handler(elf=elf, buf=buf): # type: ignore
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
import string
|
import string
|
||||||
import struct
|
import struct
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import ghidra
|
import ghidra
|
||||||
from ghidra.program.model.lang import OperandType
|
from ghidra.program.model.lang import OperandType
|
||||||
@@ -97,7 +97,7 @@ def _bb_has_tight_loop(bb: ghidra.program.model.block.CodeBlock):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract stackstring indicators from basic block"""
|
"""extract stackstring indicators from basic block"""
|
||||||
bb: ghidra.program.model.block.CodeBlock = bbh.inner
|
bb: ghidra.program.model.block.CodeBlock = bbh.inner
|
||||||
|
|
||||||
@@ -105,7 +105,7 @@ def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[
|
|||||||
yield Characteristic("stack string"), bbh.address
|
yield Characteristic("stack string"), bbh.address
|
||||||
|
|
||||||
|
|
||||||
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""check basic block for tight loop indicators"""
|
"""check basic block for tight loop indicators"""
|
||||||
bb: ghidra.program.model.block.CodeBlock = bbh.inner
|
bb: ghidra.program.model.block.CodeBlock = bbh.inner
|
||||||
|
|
||||||
@@ -119,7 +119,7 @@ BASIC_BLOCK_HANDLERS = (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract features from the given basic block.
|
extract features from the given basic block.
|
||||||
|
|
||||||
@@ -127,7 +127,7 @@ def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Featur
|
|||||||
bb: the basic block to process.
|
bb: the basic block to process.
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
Tuple[Feature, int]: the features and their location found in this basic block.
|
tuple[Feature, int]: the features and their location found in this basic block.
|
||||||
"""
|
"""
|
||||||
yield BasicBlock(), bbh.address
|
yield BasicBlock(), bbh.address
|
||||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import List, Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import capa.features.extractors.ghidra.file
|
import capa.features.extractors.ghidra.file
|
||||||
import capa.features.extractors.ghidra.insn
|
import capa.features.extractors.ghidra.insn
|
||||||
@@ -40,7 +40,7 @@ class GhidraFeatureExtractor(StaticFeatureExtractor):
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
self.global_features: List[Tuple[Feature, Address]] = []
|
self.global_features: list[tuple[Feature, Address]] = []
|
||||||
self.global_features.extend(capa.features.extractors.ghidra.file.extract_file_format())
|
self.global_features.extend(capa.features.extractors.ghidra.file.extract_file_format())
|
||||||
self.global_features.extend(capa.features.extractors.ghidra.global_.extract_os())
|
self.global_features.extend(capa.features.extractors.ghidra.global_.extract_os())
|
||||||
self.global_features.extend(capa.features.extractors.ghidra.global_.extract_arch())
|
self.global_features.extend(capa.features.extractors.ghidra.global_.extract_arch())
|
||||||
@@ -73,7 +73,7 @@ class GhidraFeatureExtractor(StaticFeatureExtractor):
|
|||||||
func = getFunctionContaining(toAddr(addr)) # type: ignore [name-defined] # noqa: F821
|
func = getFunctionContaining(toAddr(addr)) # type: ignore [name-defined] # noqa: F821
|
||||||
return FunctionHandle(address=AbsoluteVirtualAddress(func.getEntryPoint().getOffset()), inner=func)
|
return FunctionHandle(address=AbsoluteVirtualAddress(func.getEntryPoint().getOffset()), inner=func)
|
||||||
|
|
||||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.ghidra.function.extract_features(fh)
|
yield from capa.features.extractors.ghidra.function.extract_features(fh)
|
||||||
|
|
||||||
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
||||||
@@ -81,7 +81,7 @@ class GhidraFeatureExtractor(StaticFeatureExtractor):
|
|||||||
|
|
||||||
yield from ghidra_helpers.get_function_blocks(fh)
|
yield from ghidra_helpers.get_function_blocks(fh)
|
||||||
|
|
||||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.ghidra.basicblock.extract_features(fh, bbh)
|
yield from capa.features.extractors.ghidra.basicblock.extract_features(fh, bbh)
|
||||||
|
|
||||||
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import re
|
import re
|
||||||
import struct
|
import struct
|
||||||
from typing import List, Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from ghidra.program.model.symbol import SourceType, SymbolType
|
from ghidra.program.model.symbol import SourceType, SymbolType
|
||||||
|
|
||||||
@@ -22,7 +22,7 @@ from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, Absolu
|
|||||||
MAX_OFFSET_PE_AFTER_MZ = 0x200
|
MAX_OFFSET_PE_AFTER_MZ = 0x200
|
||||||
|
|
||||||
|
|
||||||
def find_embedded_pe(block_bytez: bytes, mz_xor: List[Tuple[bytes, bytes, int]]) -> Iterator[Tuple[int, int]]:
|
def find_embedded_pe(block_bytez: bytes, mz_xor: list[tuple[bytes, bytes, int]]) -> Iterator[tuple[int, int]]:
|
||||||
"""check segment for embedded PE
|
"""check segment for embedded PE
|
||||||
|
|
||||||
adapted for Ghidra from:
|
adapted for Ghidra from:
|
||||||
@@ -60,11 +60,11 @@ def find_embedded_pe(block_bytez: bytes, mz_xor: List[Tuple[bytes, bytes, int]])
|
|||||||
yield off, i
|
yield off, i
|
||||||
|
|
||||||
|
|
||||||
def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_embedded_pe() -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract embedded PE features"""
|
"""extract embedded PE features"""
|
||||||
|
|
||||||
# pre-compute XOR pairs
|
# pre-compute XOR pairs
|
||||||
mz_xor: List[Tuple[bytes, bytes, int]] = [
|
mz_xor: list[tuple[bytes, bytes, int]] = [
|
||||||
(
|
(
|
||||||
capa.features.extractors.helpers.xor_static(b"MZ", i),
|
capa.features.extractors.helpers.xor_static(b"MZ", i),
|
||||||
capa.features.extractors.helpers.xor_static(b"PE", i),
|
capa.features.extractors.helpers.xor_static(b"PE", i),
|
||||||
@@ -84,14 +84,14 @@ def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]:
|
|||||||
yield Characteristic("embedded pe"), FileOffsetAddress(ea)
|
yield Characteristic("embedded pe"), FileOffsetAddress(ea)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_export_names() -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_export_names() -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract function exports"""
|
"""extract function exports"""
|
||||||
st = currentProgram().getSymbolTable() # type: ignore [name-defined] # noqa: F821
|
st = currentProgram().getSymbolTable() # type: ignore [name-defined] # noqa: F821
|
||||||
for addr in st.getExternalEntryPointIterator():
|
for addr in st.getExternalEntryPointIterator():
|
||||||
yield Export(st.getPrimarySymbol(addr).getName()), AbsoluteVirtualAddress(addr.getOffset())
|
yield Export(st.getPrimarySymbol(addr).getName()), AbsoluteVirtualAddress(addr.getOffset())
|
||||||
|
|
||||||
|
|
||||||
def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_import_names() -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract function imports
|
"""extract function imports
|
||||||
|
|
||||||
1. imports by ordinal:
|
1. imports by ordinal:
|
||||||
@@ -116,14 +116,14 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
|
|||||||
yield Import(name), AbsoluteVirtualAddress(addr)
|
yield Import(name), AbsoluteVirtualAddress(addr)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_section_names() -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_section_names() -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract section names"""
|
"""extract section names"""
|
||||||
|
|
||||||
for block in currentProgram().getMemory().getBlocks(): # type: ignore [name-defined] # noqa: F821
|
for block in currentProgram().getMemory().getBlocks(): # type: ignore [name-defined] # noqa: F821
|
||||||
yield Section(block.getName()), AbsoluteVirtualAddress(block.getStart().getOffset())
|
yield Section(block.getName()), AbsoluteVirtualAddress(block.getStart().getOffset())
|
||||||
|
|
||||||
|
|
||||||
def extract_file_strings() -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_strings() -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract ASCII and UTF-16 LE strings"""
|
"""extract ASCII and UTF-16 LE strings"""
|
||||||
|
|
||||||
for block in currentProgram().getMemory().getBlocks(): # type: ignore [name-defined] # noqa: F821
|
for block in currentProgram().getMemory().getBlocks(): # type: ignore [name-defined] # noqa: F821
|
||||||
@@ -141,7 +141,7 @@ def extract_file_strings() -> Iterator[Tuple[Feature, Address]]:
|
|||||||
yield String(s.s), FileOffsetAddress(offset)
|
yield String(s.s), FileOffsetAddress(offset)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_function_names() -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_function_names() -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract the names of statically-linked library functions.
|
extract the names of statically-linked library functions.
|
||||||
"""
|
"""
|
||||||
@@ -162,7 +162,7 @@ def extract_file_function_names() -> Iterator[Tuple[Feature, Address]]:
|
|||||||
yield FunctionName(name[1:]), addr
|
yield FunctionName(name[1:]), addr
|
||||||
|
|
||||||
|
|
||||||
def extract_file_format() -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_format() -> Iterator[tuple[Feature, Address]]:
|
||||||
ef = currentProgram().getExecutableFormat() # type: ignore [name-defined] # noqa: F821
|
ef = currentProgram().getExecutableFormat() # type: ignore [name-defined] # noqa: F821
|
||||||
if "PE" in ef:
|
if "PE" in ef:
|
||||||
yield Format(FORMAT_PE), NO_ADDRESS
|
yield Format(FORMAT_PE), NO_ADDRESS
|
||||||
@@ -175,7 +175,7 @@ def extract_file_format() -> Iterator[Tuple[Feature, Address]]:
|
|||||||
raise NotImplementedError(f"unexpected file format: {ef}")
|
raise NotImplementedError(f"unexpected file format: {ef}")
|
||||||
|
|
||||||
|
|
||||||
def extract_features() -> Iterator[Tuple[Feature, Address]]:
|
def extract_features() -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract file features"""
|
"""extract file features"""
|
||||||
for file_handler in FILE_HANDLERS:
|
for file_handler in FILE_HANDLERS:
|
||||||
for feature, addr in file_handler():
|
for feature, addr in file_handler():
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import ghidra
|
import ghidra
|
||||||
from ghidra.program.model.block import BasicBlockModel, SimpleBlockIterator
|
from ghidra.program.model.block import BasicBlockModel, SimpleBlockIterator
|
||||||
@@ -49,7 +49,7 @@ def extract_recursive_call(fh: FunctionHandle):
|
|||||||
yield Characteristic("recursive call"), AbsoluteVirtualAddress(f.getEntryPoint().getOffset())
|
yield Characteristic("recursive call"), AbsoluteVirtualAddress(f.getEntryPoint().getOffset())
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
for func_handler in FUNCTION_HANDLERS:
|
for func_handler in FUNCTION_HANDLERS:
|
||||||
for feature, addr in func_handler(fh):
|
for feature, addr in func_handler(fh):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
import contextlib
|
import contextlib
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import capa.ghidra.helpers
|
import capa.ghidra.helpers
|
||||||
import capa.features.extractors.elf
|
import capa.features.extractors.elf
|
||||||
@@ -18,7 +18,7 @@ from capa.features.address import NO_ADDRESS, Address
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_os() -> Iterator[Tuple[Feature, Address]]:
|
def extract_os() -> Iterator[tuple[Feature, Address]]:
|
||||||
format_name: str = currentProgram().getExecutableFormat() # type: ignore [name-defined] # noqa: F821
|
format_name: str = currentProgram().getExecutableFormat() # type: ignore [name-defined] # noqa: F821
|
||||||
|
|
||||||
if "PE" in format_name:
|
if "PE" in format_name:
|
||||||
@@ -45,7 +45,7 @@ def extract_os() -> Iterator[Tuple[Feature, Address]]:
|
|||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def extract_arch() -> Iterator[Tuple[Feature, Address]]:
|
def extract_arch() -> Iterator[tuple[Feature, Address]]:
|
||||||
lang_id = currentProgram().getMetadata().get("Language ID") # type: ignore [name-defined] # noqa: F821
|
lang_id = currentProgram().getMetadata().get("Language ID") # type: ignore [name-defined] # noqa: F821
|
||||||
|
|
||||||
if "x86" in lang_id and "64" in lang_id:
|
if "x86" in lang_id and "64" in lang_id:
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Dict, List, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import ghidra
|
import ghidra
|
||||||
import java.lang
|
import java.lang
|
||||||
@@ -20,7 +20,7 @@ from capa.features.address import AbsoluteVirtualAddress
|
|||||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
||||||
|
|
||||||
|
|
||||||
def ints_to_bytes(bytez: List[int]) -> bytes:
|
def ints_to_bytes(bytez: list[int]) -> bytes:
|
||||||
"""convert Java signed ints to Python bytes
|
"""convert Java signed ints to Python bytes
|
||||||
|
|
||||||
args:
|
args:
|
||||||
@@ -83,10 +83,10 @@ def get_insn_in_range(bbh: BBHandle) -> Iterator[InsnHandle]:
|
|||||||
yield InsnHandle(address=AbsoluteVirtualAddress(insn.getAddress().getOffset()), inner=insn)
|
yield InsnHandle(address=AbsoluteVirtualAddress(insn.getAddress().getOffset()), inner=insn)
|
||||||
|
|
||||||
|
|
||||||
def get_file_imports() -> Dict[int, List[str]]:
|
def get_file_imports() -> dict[int, list[str]]:
|
||||||
"""get all import names & addrs"""
|
"""get all import names & addrs"""
|
||||||
|
|
||||||
import_dict: Dict[int, List[str]] = {}
|
import_dict: dict[int, list[str]] = {}
|
||||||
|
|
||||||
for f in currentProgram().getFunctionManager().getExternalFunctions(): # type: ignore [name-defined] # noqa: F821
|
for f in currentProgram().getFunctionManager().getExternalFunctions(): # type: ignore [name-defined] # noqa: F821
|
||||||
for r in f.getSymbol().getReferences():
|
for r in f.getSymbol().getReferences():
|
||||||
@@ -110,7 +110,7 @@ def get_file_imports() -> Dict[int, List[str]]:
|
|||||||
return import_dict
|
return import_dict
|
||||||
|
|
||||||
|
|
||||||
def get_file_externs() -> Dict[int, List[str]]:
|
def get_file_externs() -> dict[int, list[str]]:
|
||||||
"""
|
"""
|
||||||
Gets function names & addresses of statically-linked library functions
|
Gets function names & addresses of statically-linked library functions
|
||||||
|
|
||||||
@@ -124,7 +124,7 @@ def get_file_externs() -> Dict[int, List[str]]:
|
|||||||
- Note: See Symbol Table labels
|
- Note: See Symbol Table labels
|
||||||
"""
|
"""
|
||||||
|
|
||||||
extern_dict: Dict[int, List[str]] = {}
|
extern_dict: dict[int, list[str]] = {}
|
||||||
|
|
||||||
for sym in currentProgram().getSymbolTable().getAllSymbols(True): # type: ignore [name-defined] # noqa: F821
|
for sym in currentProgram().getSymbolTable().getAllSymbols(True): # type: ignore [name-defined] # noqa: F821
|
||||||
# .isExternal() misses more than this config for the function symbols
|
# .isExternal() misses more than this config for the function symbols
|
||||||
@@ -143,7 +143,7 @@ def get_file_externs() -> Dict[int, List[str]]:
|
|||||||
return extern_dict
|
return extern_dict
|
||||||
|
|
||||||
|
|
||||||
def map_fake_import_addrs() -> Dict[int, List[int]]:
|
def map_fake_import_addrs() -> dict[int, list[int]]:
|
||||||
"""
|
"""
|
||||||
Map ghidra's fake import entrypoints to their
|
Map ghidra's fake import entrypoints to their
|
||||||
real addresses
|
real addresses
|
||||||
@@ -162,7 +162,7 @@ def map_fake_import_addrs() -> Dict[int, List[int]]:
|
|||||||
- 0x473090 -> PTR_CreateServiceW_00473090
|
- 0x473090 -> PTR_CreateServiceW_00473090
|
||||||
- 'EXTERNAL:00000025' -> External Address (ghidra.program.model.address.SpecialAddress)
|
- 'EXTERNAL:00000025' -> External Address (ghidra.program.model.address.SpecialAddress)
|
||||||
"""
|
"""
|
||||||
fake_dict: Dict[int, List[int]] = {}
|
fake_dict: dict[int, list[int]] = {}
|
||||||
|
|
||||||
for f in currentProgram().getFunctionManager().getExternalFunctions(): # type: ignore [name-defined] # noqa: F821
|
for f in currentProgram().getFunctionManager().getExternalFunctions(): # type: ignore [name-defined] # noqa: F821
|
||||||
for r in f.getSymbol().getReferences():
|
for r in f.getSymbol().getReferences():
|
||||||
@@ -174,9 +174,9 @@ def map_fake_import_addrs() -> Dict[int, List[int]]:
|
|||||||
|
|
||||||
def check_addr_for_api(
|
def check_addr_for_api(
|
||||||
addr: ghidra.program.model.address.Address,
|
addr: ghidra.program.model.address.Address,
|
||||||
fakes: Dict[int, List[int]],
|
fakes: dict[int, list[int]],
|
||||||
imports: Dict[int, List[str]],
|
imports: dict[int, list[str]],
|
||||||
externs: Dict[int, List[str]],
|
externs: dict[int, list[str]],
|
||||||
) -> bool:
|
) -> bool:
|
||||||
offset = addr.getOffset()
|
offset = addr.getOffset()
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Any, Dict, Tuple, Iterator
|
from typing import Any, Iterator
|
||||||
|
|
||||||
import ghidra
|
import ghidra
|
||||||
from ghidra.program.model.lang import OperandType
|
from ghidra.program.model.lang import OperandType
|
||||||
@@ -26,21 +26,21 @@ SECURITY_COOKIE_BYTES_DELTA = 0x40
|
|||||||
OPERAND_TYPE_DYNAMIC_ADDRESS = OperandType.DYNAMIC | OperandType.ADDRESS
|
OPERAND_TYPE_DYNAMIC_ADDRESS = OperandType.DYNAMIC | OperandType.ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def get_imports(ctx: Dict[str, Any]) -> Dict[int, Any]:
|
def get_imports(ctx: dict[str, Any]) -> dict[int, Any]:
|
||||||
"""Populate the import cache for this context"""
|
"""Populate the import cache for this context"""
|
||||||
if "imports_cache" not in ctx:
|
if "imports_cache" not in ctx:
|
||||||
ctx["imports_cache"] = capa.features.extractors.ghidra.helpers.get_file_imports()
|
ctx["imports_cache"] = capa.features.extractors.ghidra.helpers.get_file_imports()
|
||||||
return ctx["imports_cache"]
|
return ctx["imports_cache"]
|
||||||
|
|
||||||
|
|
||||||
def get_externs(ctx: Dict[str, Any]) -> Dict[int, Any]:
|
def get_externs(ctx: dict[str, Any]) -> dict[int, Any]:
|
||||||
"""Populate the externs cache for this context"""
|
"""Populate the externs cache for this context"""
|
||||||
if "externs_cache" not in ctx:
|
if "externs_cache" not in ctx:
|
||||||
ctx["externs_cache"] = capa.features.extractors.ghidra.helpers.get_file_externs()
|
ctx["externs_cache"] = capa.features.extractors.ghidra.helpers.get_file_externs()
|
||||||
return ctx["externs_cache"]
|
return ctx["externs_cache"]
|
||||||
|
|
||||||
|
|
||||||
def get_fakes(ctx: Dict[str, Any]) -> Dict[int, Any]:
|
def get_fakes(ctx: dict[str, Any]) -> dict[int, Any]:
|
||||||
"""Populate the fake import addrs cache for this context"""
|
"""Populate the fake import addrs cache for this context"""
|
||||||
if "fakes_cache" not in ctx:
|
if "fakes_cache" not in ctx:
|
||||||
ctx["fakes_cache"] = capa.features.extractors.ghidra.helpers.map_fake_import_addrs()
|
ctx["fakes_cache"] = capa.features.extractors.ghidra.helpers.map_fake_import_addrs()
|
||||||
@@ -48,7 +48,7 @@ def get_fakes(ctx: Dict[str, Any]) -> Dict[int, Any]:
|
|||||||
|
|
||||||
|
|
||||||
def check_for_api_call(
|
def check_for_api_call(
|
||||||
insn, externs: Dict[int, Any], fakes: Dict[int, Any], imports: Dict[int, Any], imp_or_ex: bool
|
insn, externs: dict[int, Any], fakes: dict[int, Any], imports: dict[int, Any], imp_or_ex: bool
|
||||||
) -> Iterator[Any]:
|
) -> Iterator[Any]:
|
||||||
"""check instruction for API call
|
"""check instruction for API call
|
||||||
|
|
||||||
@@ -110,7 +110,7 @@ def check_for_api_call(
|
|||||||
yield info
|
yield info
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_api_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_api_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
||||||
|
|
||||||
if not capa.features.extractors.ghidra.helpers.is_call_or_jmp(insn):
|
if not capa.features.extractors.ghidra.helpers.is_call_or_jmp(insn):
|
||||||
@@ -131,7 +131,7 @@ def extract_insn_api_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle)
|
|||||||
yield API(ext), ih.address
|
yield API(ext), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_number_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_number_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction number features
|
parse instruction number features
|
||||||
example:
|
example:
|
||||||
@@ -186,7 +186,7 @@ def extract_insn_number_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl
|
|||||||
yield OperandOffset(i, const), addr
|
yield OperandOffset(i, const), addr
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_offset_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_offset_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction structure offset features
|
parse instruction structure offset features
|
||||||
|
|
||||||
@@ -219,7 +219,7 @@ def extract_insn_offset_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl
|
|||||||
yield OperandOffset(i, op_off), ih.address
|
yield OperandOffset(i, op_off), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_bytes_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_bytes_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse referenced byte sequences
|
parse referenced byte sequences
|
||||||
|
|
||||||
@@ -234,7 +234,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
|||||||
yield Bytes(extracted_bytes), ih.address
|
yield Bytes(extracted_bytes), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_string_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_string_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction string features
|
parse instruction string features
|
||||||
|
|
||||||
@@ -249,7 +249,7 @@ def extract_insn_string_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl
|
|||||||
|
|
||||||
def extract_insn_mnemonic_features(
|
def extract_insn_mnemonic_features(
|
||||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""parse instruction mnemonic features"""
|
"""parse instruction mnemonic features"""
|
||||||
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
||||||
|
|
||||||
@@ -258,7 +258,7 @@ def extract_insn_mnemonic_features(
|
|||||||
|
|
||||||
def extract_insn_obfs_call_plus_5_characteristic_features(
|
def extract_insn_obfs_call_plus_5_characteristic_features(
|
||||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse call $+5 instruction from the given instruction.
|
parse call $+5 instruction from the given instruction.
|
||||||
"""
|
"""
|
||||||
@@ -279,7 +279,7 @@ def extract_insn_obfs_call_plus_5_characteristic_features(
|
|||||||
|
|
||||||
def extract_insn_segment_access_features(
|
def extract_insn_segment_access_features(
|
||||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""parse instruction fs or gs access"""
|
"""parse instruction fs or gs access"""
|
||||||
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
||||||
|
|
||||||
@@ -294,7 +294,7 @@ def extract_insn_segment_access_features(
|
|||||||
|
|
||||||
def extract_insn_peb_access_characteristic_features(
|
def extract_insn_peb_access_characteristic_features(
|
||||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""parse instruction peb access
|
"""parse instruction peb access
|
||||||
|
|
||||||
fs:[0x30] on x86, gs:[0x60] on x64
|
fs:[0x30] on x86, gs:[0x60] on x64
|
||||||
@@ -310,7 +310,7 @@ def extract_insn_peb_access_characteristic_features(
|
|||||||
|
|
||||||
def extract_insn_cross_section_cflow(
|
def extract_insn_cross_section_cflow(
|
||||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""inspect the instruction for a CALL or JMP that crosses section boundaries"""
|
"""inspect the instruction for a CALL or JMP that crosses section boundaries"""
|
||||||
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
||||||
|
|
||||||
@@ -364,7 +364,7 @@ def extract_function_calls_from(
|
|||||||
fh: FunctionHandle,
|
fh: FunctionHandle,
|
||||||
bb: BBHandle,
|
bb: BBHandle,
|
||||||
ih: InsnHandle,
|
ih: InsnHandle,
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract functions calls from features
|
"""extract functions calls from features
|
||||||
|
|
||||||
most relevant at the function scope, however, its most efficient to extract at the instruction scope
|
most relevant at the function scope, however, its most efficient to extract at the instruction scope
|
||||||
@@ -393,7 +393,7 @@ def extract_function_indirect_call_characteristic_features(
|
|||||||
fh: FunctionHandle,
|
fh: FunctionHandle,
|
||||||
bb: BBHandle,
|
bb: BBHandle,
|
||||||
ih: InsnHandle,
|
ih: InsnHandle,
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
|
"""extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
|
||||||
does not include calls like => call ds:dword_ABD4974
|
does not include calls like => call ds:dword_ABD4974
|
||||||
|
|
||||||
@@ -442,7 +442,7 @@ def extract_insn_nzxor_characteristic_features(
|
|||||||
fh: FunctionHandle,
|
fh: FunctionHandle,
|
||||||
bb: BBHandle,
|
bb: BBHandle,
|
||||||
ih: InsnHandle,
|
ih: InsnHandle,
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
f: ghidra.program.database.function.FunctionDB = fh.inner
|
f: ghidra.program.database.function.FunctionDB = fh.inner
|
||||||
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
||||||
|
|
||||||
@@ -461,7 +461,7 @@ def extract_features(
|
|||||||
fh: FunctionHandle,
|
fh: FunctionHandle,
|
||||||
bb: BBHandle,
|
bb: BBHandle,
|
||||||
insn: InsnHandle,
|
insn: InsnHandle,
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
for insn_handler in INSTRUCTION_HANDLERS:
|
for insn_handler in INSTRUCTION_HANDLERS:
|
||||||
for feature, addr in insn_handler(fh, bb, insn):
|
for feature, addr in insn_handler(fh, bb, insn):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
import struct
|
import struct
|
||||||
import builtins
|
import builtins
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
MIN_STACKSTRING_LEN = 8
|
MIN_STACKSTRING_LEN = 8
|
||||||
|
|
||||||
@@ -119,7 +119,7 @@ def twos_complement(val: int, bits: int) -> int:
|
|||||||
return val
|
return val
|
||||||
|
|
||||||
|
|
||||||
def carve_pe(pbytes: bytes, offset: int = 0) -> Iterator[Tuple[int, int]]:
|
def carve_pe(pbytes: bytes, offset: int = 0) -> Iterator[tuple[int, int]]:
|
||||||
"""
|
"""
|
||||||
Generate (offset, key) tuples of embedded PEs
|
Generate (offset, key) tuples of embedded PEs
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
import string
|
import string
|
||||||
import struct
|
import struct
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import idaapi
|
import idaapi
|
||||||
|
|
||||||
@@ -80,19 +80,19 @@ def bb_contains_stackstring(f: idaapi.func_t, bb: idaapi.BasicBlock) -> bool:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract stackstring indicators from basic block"""
|
"""extract stackstring indicators from basic block"""
|
||||||
if bb_contains_stackstring(fh.inner, bbh.inner):
|
if bb_contains_stackstring(fh.inner, bbh.inner):
|
||||||
yield Characteristic("stack string"), bbh.address
|
yield Characteristic("stack string"), bbh.address
|
||||||
|
|
||||||
|
|
||||||
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract tight loop indicators from a basic block"""
|
"""extract tight loop indicators from a basic block"""
|
||||||
if capa.features.extractors.ida.helpers.is_basic_block_tight_loop(bbh.inner):
|
if capa.features.extractors.ida.helpers.is_basic_block_tight_loop(bbh.inner):
|
||||||
yield Characteristic("tight loop"), bbh.address
|
yield Characteristic("tight loop"), bbh.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract basic block features"""
|
"""extract basic block features"""
|
||||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||||
for feature, addr in bb_handler(fh, bbh):
|
for feature, addr in bb_handler(fh, bbh):
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import List, Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import idaapi
|
import idaapi
|
||||||
|
|
||||||
@@ -36,7 +36,7 @@ class IdaFeatureExtractor(StaticFeatureExtractor):
|
|||||||
sha256=capa.ida.helpers.retrieve_input_file_sha256(),
|
sha256=capa.ida.helpers.retrieve_input_file_sha256(),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
self.global_features: List[Tuple[Feature, Address]] = []
|
self.global_features: list[tuple[Feature, Address]] = []
|
||||||
self.global_features.extend(capa.features.extractors.ida.file.extract_file_format())
|
self.global_features.extend(capa.features.extractors.ida.file.extract_file_format())
|
||||||
self.global_features.extend(capa.features.extractors.ida.global_.extract_os())
|
self.global_features.extend(capa.features.extractors.ida.global_.extract_os())
|
||||||
self.global_features.extend(capa.features.extractors.ida.global_.extract_arch())
|
self.global_features.extend(capa.features.extractors.ida.global_.extract_arch())
|
||||||
@@ -61,7 +61,7 @@ class IdaFeatureExtractor(StaticFeatureExtractor):
|
|||||||
f = idaapi.get_func(ea)
|
f = idaapi.get_func(ea)
|
||||||
return FunctionHandle(address=AbsoluteVirtualAddress(f.start_ea), inner=f)
|
return FunctionHandle(address=AbsoluteVirtualAddress(f.start_ea), inner=f)
|
||||||
|
|
||||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.ida.function.extract_features(fh)
|
yield from capa.features.extractors.ida.function.extract_features(fh)
|
||||||
|
|
||||||
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
||||||
@@ -70,7 +70,7 @@ class IdaFeatureExtractor(StaticFeatureExtractor):
|
|||||||
for bb in ida_helpers.get_function_blocks(fh.inner):
|
for bb in ida_helpers.get_function_blocks(fh.inner):
|
||||||
yield BBHandle(address=AbsoluteVirtualAddress(bb.start_ea), inner=bb)
|
yield BBHandle(address=AbsoluteVirtualAddress(bb.start_ea), inner=bb)
|
||||||
|
|
||||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.ida.basicblock.extract_features(fh, bbh)
|
yield from capa.features.extractors.ida.basicblock.extract_features(fh, bbh)
|
||||||
|
|
||||||
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import struct
|
import struct
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import idc
|
import idc
|
||||||
import idaapi
|
import idaapi
|
||||||
@@ -26,7 +26,7 @@ from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, Absolu
|
|||||||
MAX_OFFSET_PE_AFTER_MZ = 0x200
|
MAX_OFFSET_PE_AFTER_MZ = 0x200
|
||||||
|
|
||||||
|
|
||||||
def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[Tuple[int, int]]:
|
def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[tuple[int, int]]:
|
||||||
"""check segment for embedded PE
|
"""check segment for embedded PE
|
||||||
|
|
||||||
adapted for IDA from:
|
adapted for IDA from:
|
||||||
@@ -71,7 +71,7 @@ def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[Tuple[int, int]]:
|
|||||||
yield off, i
|
yield off, i
|
||||||
|
|
||||||
|
|
||||||
def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_embedded_pe() -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract embedded PE features
|
"""extract embedded PE features
|
||||||
|
|
||||||
IDA must load resource sections for this to be complete
|
IDA must load resource sections for this to be complete
|
||||||
@@ -83,7 +83,7 @@ def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]:
|
|||||||
yield Characteristic("embedded pe"), FileOffsetAddress(ea)
|
yield Characteristic("embedded pe"), FileOffsetAddress(ea)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_export_names() -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_export_names() -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract function exports"""
|
"""extract function exports"""
|
||||||
for _, ordinal, ea, name in idautils.Entries():
|
for _, ordinal, ea, name in idautils.Entries():
|
||||||
forwarded_name = ida_entry.get_entry_forwarder(ordinal)
|
forwarded_name = ida_entry.get_entry_forwarder(ordinal)
|
||||||
@@ -95,7 +95,7 @@ def extract_file_export_names() -> Iterator[Tuple[Feature, Address]]:
|
|||||||
yield Characteristic("forwarded export"), AbsoluteVirtualAddress(ea)
|
yield Characteristic("forwarded export"), AbsoluteVirtualAddress(ea)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_import_names() -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract function imports
|
"""extract function imports
|
||||||
|
|
||||||
1. imports by ordinal:
|
1. imports by ordinal:
|
||||||
@@ -131,7 +131,7 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
|
|||||||
yield Import(info[1]), AbsoluteVirtualAddress(ea)
|
yield Import(info[1]), AbsoluteVirtualAddress(ea)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_section_names() -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_section_names() -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract section names
|
"""extract section names
|
||||||
|
|
||||||
IDA must load resource sections for this to be complete
|
IDA must load resource sections for this to be complete
|
||||||
@@ -142,7 +142,7 @@ def extract_file_section_names() -> Iterator[Tuple[Feature, Address]]:
|
|||||||
yield Section(idaapi.get_segm_name(seg)), AbsoluteVirtualAddress(seg.start_ea)
|
yield Section(idaapi.get_segm_name(seg)), AbsoluteVirtualAddress(seg.start_ea)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_strings() -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_strings() -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract ASCII and UTF-16 LE strings
|
"""extract ASCII and UTF-16 LE strings
|
||||||
|
|
||||||
IDA must load resource sections for this to be complete
|
IDA must load resource sections for this to be complete
|
||||||
@@ -160,7 +160,7 @@ def extract_file_strings() -> Iterator[Tuple[Feature, Address]]:
|
|||||||
yield String(s.s), FileOffsetAddress(seg.start_ea + s.offset)
|
yield String(s.s), FileOffsetAddress(seg.start_ea + s.offset)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_function_names() -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_function_names() -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract the names of statically-linked library functions.
|
extract the names of statically-linked library functions.
|
||||||
"""
|
"""
|
||||||
@@ -177,7 +177,7 @@ def extract_file_function_names() -> Iterator[Tuple[Feature, Address]]:
|
|||||||
yield FunctionName(name[1:]), addr
|
yield FunctionName(name[1:]), addr
|
||||||
|
|
||||||
|
|
||||||
def extract_file_format() -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_format() -> Iterator[tuple[Feature, Address]]:
|
||||||
filetype = capa.ida.helpers.get_filetype()
|
filetype = capa.ida.helpers.get_filetype()
|
||||||
|
|
||||||
if filetype in (idaapi.f_PE, idaapi.f_COFF):
|
if filetype in (idaapi.f_PE, idaapi.f_COFF):
|
||||||
@@ -191,7 +191,7 @@ def extract_file_format() -> Iterator[Tuple[Feature, Address]]:
|
|||||||
raise NotImplementedError(f"unexpected file format: {filetype}")
|
raise NotImplementedError(f"unexpected file format: {filetype}")
|
||||||
|
|
||||||
|
|
||||||
def extract_features() -> Iterator[Tuple[Feature, Address]]:
|
def extract_features() -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract file features"""
|
"""extract file features"""
|
||||||
for file_handler in FILE_HANDLERS:
|
for file_handler in FILE_HANDLERS:
|
||||||
for feature, addr in file_handler():
|
for feature, addr in file_handler():
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import idaapi
|
import idaapi
|
||||||
import idautils
|
import idautils
|
||||||
@@ -43,7 +43,7 @@ def extract_recursive_call(fh: FunctionHandle):
|
|||||||
yield Characteristic("recursive call"), fh.address
|
yield Characteristic("recursive call"), fh.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
for func_handler in FUNCTION_HANDLERS:
|
for func_handler in FUNCTION_HANDLERS:
|
||||||
for feature, addr in func_handler(fh):
|
for feature, addr in func_handler(fh):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
import contextlib
|
import contextlib
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import ida_loader
|
import ida_loader
|
||||||
|
|
||||||
@@ -19,7 +19,7 @@ from capa.features.address import NO_ADDRESS, Address
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_os() -> Iterator[Tuple[Feature, Address]]:
|
def extract_os() -> Iterator[tuple[Feature, Address]]:
|
||||||
format_name: str = ida_loader.get_file_type_name()
|
format_name: str = ida_loader.get_file_type_name()
|
||||||
|
|
||||||
if "PE" in format_name:
|
if "PE" in format_name:
|
||||||
@@ -46,7 +46,7 @@ def extract_os() -> Iterator[Tuple[Feature, Address]]:
|
|||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def extract_arch() -> Iterator[Tuple[Feature, Address]]:
|
def extract_arch() -> Iterator[tuple[Feature, Address]]:
|
||||||
procname = capa.ida.helpers.get_processor_name()
|
procname = capa.ida.helpers.get_processor_name()
|
||||||
if procname == "metapc" and capa.ida.helpers.is_64bit():
|
if procname == "metapc" and capa.ida.helpers.is_64bit():
|
||||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import functools
|
import functools
|
||||||
from typing import Any, Dict, Tuple, Iterator, Optional
|
from typing import Any, Iterator, Optional
|
||||||
|
|
||||||
import idc
|
import idc
|
||||||
import idaapi
|
import idaapi
|
||||||
@@ -124,9 +124,9 @@ def inspect_import(imports, library, ea, function, ordinal):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def get_file_imports() -> Dict[int, Tuple[str, str, int]]:
|
def get_file_imports() -> dict[int, tuple[str, str, int]]:
|
||||||
"""get file imports"""
|
"""get file imports"""
|
||||||
imports: Dict[int, Tuple[str, str, int]] = {}
|
imports: dict[int, tuple[str, str, int]] = {}
|
||||||
|
|
||||||
for idx in range(idaapi.get_import_module_qty()):
|
for idx in range(idaapi.get_import_module_qty()):
|
||||||
library = idaapi.get_import_module_name(idx)
|
library = idaapi.get_import_module_name(idx)
|
||||||
@@ -147,7 +147,7 @@ def get_file_imports() -> Dict[int, Tuple[str, str, int]]:
|
|||||||
return imports
|
return imports
|
||||||
|
|
||||||
|
|
||||||
def get_file_externs() -> Dict[int, Tuple[str, str, int]]:
|
def get_file_externs() -> dict[int, tuple[str, str, int]]:
|
||||||
externs = {}
|
externs = {}
|
||||||
|
|
||||||
for seg in get_segments(skip_header_segments=True):
|
for seg in get_segments(skip_header_segments=True):
|
||||||
@@ -248,7 +248,7 @@ def find_string_at(ea: int, min_: int = 4) -> str:
|
|||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
||||||
def get_op_phrase_info(op: idaapi.op_t) -> Dict:
|
def get_op_phrase_info(op: idaapi.op_t) -> dict:
|
||||||
"""parse phrase features from operand
|
"""parse phrase features from operand
|
||||||
|
|
||||||
Pretty much dup of sark's implementation:
|
Pretty much dup of sark's implementation:
|
||||||
@@ -323,7 +323,7 @@ def is_frame_register(reg: int) -> bool:
|
|||||||
return reg in (idautils.procregs.sp.reg, idautils.procregs.bp.reg)
|
return reg in (idautils.procregs.sp.reg, idautils.procregs.bp.reg)
|
||||||
|
|
||||||
|
|
||||||
def get_insn_ops(insn: idaapi.insn_t, target_ops: Optional[Tuple[Any]] = None) -> idaapi.op_t:
|
def get_insn_ops(insn: idaapi.insn_t, target_ops: Optional[tuple[Any]] = None) -> idaapi.op_t:
|
||||||
"""yield op_t for instruction, filter on type if specified"""
|
"""yield op_t for instruction, filter on type if specified"""
|
||||||
for op in insn.ops:
|
for op in insn.ops:
|
||||||
if op.type == idaapi.o_void:
|
if op.type == idaapi.o_void:
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import re
|
import re
|
||||||
from typing import Any, Dict, Tuple, Iterator, Optional
|
from typing import Any, Iterator, Optional
|
||||||
|
|
||||||
import idc
|
import idc
|
||||||
import ida_ua
|
import ida_ua
|
||||||
@@ -25,19 +25,19 @@ from capa.features.extractors.base_extractor import BBHandle, InsnHandle, Functi
|
|||||||
SECURITY_COOKIE_BYTES_DELTA = 0x40
|
SECURITY_COOKIE_BYTES_DELTA = 0x40
|
||||||
|
|
||||||
|
|
||||||
def get_imports(ctx: Dict[str, Any]) -> Dict[int, Any]:
|
def get_imports(ctx: dict[str, Any]) -> dict[int, Any]:
|
||||||
if "imports_cache" not in ctx:
|
if "imports_cache" not in ctx:
|
||||||
ctx["imports_cache"] = capa.features.extractors.ida.helpers.get_file_imports()
|
ctx["imports_cache"] = capa.features.extractors.ida.helpers.get_file_imports()
|
||||||
return ctx["imports_cache"]
|
return ctx["imports_cache"]
|
||||||
|
|
||||||
|
|
||||||
def get_externs(ctx: Dict[str, Any]) -> Dict[int, Any]:
|
def get_externs(ctx: dict[str, Any]) -> dict[int, Any]:
|
||||||
if "externs_cache" not in ctx:
|
if "externs_cache" not in ctx:
|
||||||
ctx["externs_cache"] = capa.features.extractors.ida.helpers.get_file_externs()
|
ctx["externs_cache"] = capa.features.extractors.ida.helpers.get_file_externs()
|
||||||
return ctx["externs_cache"]
|
return ctx["externs_cache"]
|
||||||
|
|
||||||
|
|
||||||
def check_for_api_call(insn: idaapi.insn_t, funcs: Dict[int, Any]) -> Optional[Tuple[str, str]]:
|
def check_for_api_call(insn: idaapi.insn_t, funcs: dict[int, Any]) -> Optional[tuple[str, str]]:
|
||||||
"""check instruction for API call"""
|
"""check instruction for API call"""
|
||||||
info = None
|
info = None
|
||||||
ref = insn.ea
|
ref = insn.ea
|
||||||
@@ -65,7 +65,7 @@ def check_for_api_call(insn: idaapi.insn_t, funcs: Dict[int, Any]) -> Optional[T
|
|||||||
return info
|
return info
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction API features
|
parse instruction API features
|
||||||
|
|
||||||
@@ -135,7 +135,7 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
|
|||||||
|
|
||||||
def extract_insn_number_features(
|
def extract_insn_number_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction number features
|
parse instruction number features
|
||||||
example:
|
example:
|
||||||
@@ -181,7 +181,7 @@ def extract_insn_number_features(
|
|||||||
yield OperandOffset(i, const), ih.address
|
yield OperandOffset(i, const), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse referenced byte sequences
|
parse referenced byte sequences
|
||||||
example:
|
example:
|
||||||
@@ -203,7 +203,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
|||||||
|
|
||||||
def extract_insn_string_features(
|
def extract_insn_string_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction string features
|
parse instruction string features
|
||||||
|
|
||||||
@@ -221,7 +221,7 @@ def extract_insn_string_features(
|
|||||||
|
|
||||||
def extract_insn_offset_features(
|
def extract_insn_offset_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction structure offset features
|
parse instruction structure offset features
|
||||||
|
|
||||||
@@ -369,7 +369,7 @@ def is_nzxor_stack_cookie(f: idaapi.func_t, bb: idaapi.BasicBlock, insn: idaapi.
|
|||||||
|
|
||||||
def extract_insn_nzxor_characteristic_features(
|
def extract_insn_nzxor_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction non-zeroing XOR instruction
|
parse instruction non-zeroing XOR instruction
|
||||||
ignore expected non-zeroing XORs, e.g. security cookies
|
ignore expected non-zeroing XORs, e.g. security cookies
|
||||||
@@ -387,14 +387,14 @@ def extract_insn_nzxor_characteristic_features(
|
|||||||
|
|
||||||
def extract_insn_mnemonic_features(
|
def extract_insn_mnemonic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""parse instruction mnemonic features"""
|
"""parse instruction mnemonic features"""
|
||||||
yield Mnemonic(idc.print_insn_mnem(ih.inner.ea)), ih.address
|
yield Mnemonic(idc.print_insn_mnem(ih.inner.ea)), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_obfs_call_plus_5_characteristic_features(
|
def extract_insn_obfs_call_plus_5_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse call $+5 instruction from the given instruction.
|
parse call $+5 instruction from the given instruction.
|
||||||
"""
|
"""
|
||||||
@@ -409,7 +409,7 @@ def extract_insn_obfs_call_plus_5_characteristic_features(
|
|||||||
|
|
||||||
def extract_insn_peb_access_characteristic_features(
|
def extract_insn_peb_access_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""parse instruction peb access
|
"""parse instruction peb access
|
||||||
|
|
||||||
fs:[0x30] on x86, gs:[0x60] on x64
|
fs:[0x30] on x86, gs:[0x60] on x64
|
||||||
@@ -437,7 +437,7 @@ def extract_insn_peb_access_characteristic_features(
|
|||||||
|
|
||||||
def extract_insn_segment_access_features(
|
def extract_insn_segment_access_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""parse instruction fs or gs access
|
"""parse instruction fs or gs access
|
||||||
|
|
||||||
TODO:
|
TODO:
|
||||||
@@ -466,7 +466,7 @@ def extract_insn_segment_access_features(
|
|||||||
|
|
||||||
def extract_insn_cross_section_cflow(
|
def extract_insn_cross_section_cflow(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""inspect the instruction for a CALL or JMP that crosses section boundaries"""
|
"""inspect the instruction for a CALL or JMP that crosses section boundaries"""
|
||||||
insn: idaapi.insn_t = ih.inner
|
insn: idaapi.insn_t = ih.inner
|
||||||
|
|
||||||
@@ -482,7 +482,7 @@ def extract_insn_cross_section_cflow(
|
|||||||
yield Characteristic("cross section flow"), ih.address
|
yield Characteristic("cross section flow"), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract functions calls from features
|
"""extract functions calls from features
|
||||||
|
|
||||||
most relevant at the function scope, however, its most efficient to extract at the instruction scope
|
most relevant at the function scope, however, its most efficient to extract at the instruction scope
|
||||||
@@ -496,7 +496,7 @@ def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
|||||||
|
|
||||||
def extract_function_indirect_call_characteristic_features(
|
def extract_function_indirect_call_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
|
"""extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
|
||||||
does not include calls like => call ds:dword_ABD4974
|
does not include calls like => call ds:dword_ABD4974
|
||||||
|
|
||||||
@@ -509,7 +509,7 @@ def extract_function_indirect_call_characteristic_features(
|
|||||||
yield Characteristic("indirect call"), ih.address
|
yield Characteristic("indirect call"), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract instruction features"""
|
"""extract instruction features"""
|
||||||
for inst_handler in INSTRUCTION_HANDLERS:
|
for inst_handler in INSTRUCTION_HANDLERS:
|
||||||
for feature, ea in inst_handler(f, bbh, insn):
|
for feature, ea in inst_handler(f, bbh, insn):
|
||||||
|
|||||||
@@ -5,11 +5,9 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Dict, List, Tuple, Union
|
from typing import Union, TypeAlias
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
from typing_extensions import TypeAlias
|
|
||||||
|
|
||||||
from capa.features.common import Feature
|
from capa.features.common import Feature
|
||||||
from capa.features.address import NO_ADDRESS, Address, ThreadAddress, ProcessAddress, DynamicCallAddress
|
from capa.features.address import NO_ADDRESS, Address, ThreadAddress, ProcessAddress, DynamicCallAddress
|
||||||
from capa.features.extractors.base_extractor import (
|
from capa.features.extractors.base_extractor import (
|
||||||
@@ -27,19 +25,19 @@ from capa.features.extractors.base_extractor import (
|
|||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class InstructionFeatures:
|
class InstructionFeatures:
|
||||||
features: List[Tuple[Address, Feature]]
|
features: list[tuple[Address, Feature]]
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class BasicBlockFeatures:
|
class BasicBlockFeatures:
|
||||||
features: List[Tuple[Address, Feature]]
|
features: list[tuple[Address, Feature]]
|
||||||
instructions: Dict[Address, InstructionFeatures]
|
instructions: dict[Address, InstructionFeatures]
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class FunctionFeatures:
|
class FunctionFeatures:
|
||||||
features: List[Tuple[Address, Feature]]
|
features: list[tuple[Address, Feature]]
|
||||||
basic_blocks: Dict[Address, BasicBlockFeatures]
|
basic_blocks: dict[Address, BasicBlockFeatures]
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -52,9 +50,9 @@ class NullStaticFeatureExtractor(StaticFeatureExtractor):
|
|||||||
|
|
||||||
base_address: Address
|
base_address: Address
|
||||||
sample_hashes: SampleHashes
|
sample_hashes: SampleHashes
|
||||||
global_features: List[Feature]
|
global_features: list[Feature]
|
||||||
file_features: List[Tuple[Address, Feature]]
|
file_features: list[tuple[Address, Feature]]
|
||||||
functions: Dict[Address, FunctionFeatures]
|
functions: dict[Address, FunctionFeatures]
|
||||||
|
|
||||||
def get_base_address(self):
|
def get_base_address(self):
|
||||||
return self.base_address
|
return self.base_address
|
||||||
@@ -98,19 +96,19 @@ class NullStaticFeatureExtractor(StaticFeatureExtractor):
|
|||||||
@dataclass
|
@dataclass
|
||||||
class CallFeatures:
|
class CallFeatures:
|
||||||
name: str
|
name: str
|
||||||
features: List[Tuple[Address, Feature]]
|
features: list[tuple[Address, Feature]]
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ThreadFeatures:
|
class ThreadFeatures:
|
||||||
features: List[Tuple[Address, Feature]]
|
features: list[tuple[Address, Feature]]
|
||||||
calls: Dict[Address, CallFeatures]
|
calls: dict[Address, CallFeatures]
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ProcessFeatures:
|
class ProcessFeatures:
|
||||||
features: List[Tuple[Address, Feature]]
|
features: list[tuple[Address, Feature]]
|
||||||
threads: Dict[Address, ThreadFeatures]
|
threads: dict[Address, ThreadFeatures]
|
||||||
name: str
|
name: str
|
||||||
|
|
||||||
|
|
||||||
@@ -118,9 +116,9 @@ class ProcessFeatures:
|
|||||||
class NullDynamicFeatureExtractor(DynamicFeatureExtractor):
|
class NullDynamicFeatureExtractor(DynamicFeatureExtractor):
|
||||||
base_address: Address
|
base_address: Address
|
||||||
sample_hashes: SampleHashes
|
sample_hashes: SampleHashes
|
||||||
global_features: List[Feature]
|
global_features: list[Feature]
|
||||||
file_features: List[Tuple[Address, Feature]]
|
file_features: list[tuple[Address, Feature]]
|
||||||
processes: Dict[Address, ProcessFeatures]
|
processes: dict[Address, ProcessFeatures]
|
||||||
|
|
||||||
def extract_global_features(self):
|
def extract_global_features(self):
|
||||||
for feature in self.global_features:
|
for feature in self.global_features:
|
||||||
|
|||||||
@@ -148,11 +148,11 @@ def extract_file_features(pe, buf):
|
|||||||
buf: the raw sample bytes
|
buf: the raw sample bytes
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
Tuple[Feature, VA]: a feature and its location.
|
tuple[Feature, VA]: a feature and its location.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
for file_handler in FILE_HANDLERS:
|
for file_handler in FILE_HANDLERS:
|
||||||
# file_handler: type: (pe, bytes) -> Iterable[Tuple[Feature, Address]]
|
# file_handler: type: (pe, bytes) -> Iterable[tuple[Feature, Address]]
|
||||||
for feature, va in file_handler(pe=pe, buf=buf): # type: ignore
|
for feature, va in file_handler(pe=pe, buf=buf): # type: ignore
|
||||||
yield feature, va
|
yield feature, va
|
||||||
|
|
||||||
@@ -177,10 +177,10 @@ def extract_global_features(pe, buf):
|
|||||||
buf: the raw sample bytes
|
buf: the raw sample bytes
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
Tuple[Feature, VA]: a feature and its location.
|
tuple[Feature, VA]: a feature and its location.
|
||||||
"""
|
"""
|
||||||
for handler in GLOBAL_HANDLERS:
|
for handler in GLOBAL_HANDLERS:
|
||||||
# file_handler: type: (pe, bytes) -> Iterable[Tuple[Feature, Address]]
|
# file_handler: type: (pe, bytes) -> Iterable[tuple[Feature, Address]]
|
||||||
for feature, va in handler(pe=pe, buf=buf): # type: ignore
|
for feature, va in handler(pe=pe, buf=buf): # type: ignore
|
||||||
yield feature, va
|
yield feature, va
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
import string
|
import string
|
||||||
import struct
|
import struct
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import envi
|
import envi
|
||||||
import envi.archs.i386.disasm
|
import envi.archs.i386.disasm
|
||||||
@@ -20,7 +20,7 @@ from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
|
|||||||
from capa.features.extractors.base_extractor import BBHandle, FunctionHandle
|
from capa.features.extractors.base_extractor import BBHandle, FunctionHandle
|
||||||
|
|
||||||
|
|
||||||
def interface_extract_basic_block_XXX(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
def interface_extract_basic_block_XXX(f: FunctionHandle, bb: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse features from the given basic block.
|
parse features from the given basic block.
|
||||||
|
|
||||||
@@ -47,7 +47,7 @@ def _bb_has_tight_loop(f, bb):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def extract_bb_tight_loop(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_bb_tight_loop(f: FunctionHandle, bb: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""check basic block for tight loop indicators"""
|
"""check basic block for tight loop indicators"""
|
||||||
if _bb_has_tight_loop(f, bb.inner):
|
if _bb_has_tight_loop(f, bb.inner):
|
||||||
yield Characteristic("tight loop"), bb.address
|
yield Characteristic("tight loop"), bb.address
|
||||||
@@ -70,7 +70,7 @@ def _bb_has_stackstring(f, bb):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def extract_stackstring(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_stackstring(f: FunctionHandle, bb: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""check basic block for stackstring indicators"""
|
"""check basic block for stackstring indicators"""
|
||||||
if _bb_has_stackstring(f, bb.inner):
|
if _bb_has_stackstring(f, bb.inner):
|
||||||
yield Characteristic("stack string"), bb.address
|
yield Characteristic("stack string"), bb.address
|
||||||
@@ -145,7 +145,7 @@ def is_printable_utf16le(chars: bytes) -> bool:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def extract_features(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(f: FunctionHandle, bb: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract features from the given basic block.
|
extract features from the given basic block.
|
||||||
|
|
||||||
@@ -154,7 +154,7 @@ def extract_features(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature,
|
|||||||
bb (viv_utils.BasicBlock): the basic block to process.
|
bb (viv_utils.BasicBlock): the basic block to process.
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
Tuple[Feature, int]: the features and their location found in this basic block.
|
tuple[Feature, int]: the features and their location found in this basic block.
|
||||||
"""
|
"""
|
||||||
yield BasicBlock(), AbsoluteVirtualAddress(bb.inner.va)
|
yield BasicBlock(), AbsoluteVirtualAddress(bb.inner.va)
|
||||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import Any, Dict, List, Tuple, Iterator
|
from typing import Any, Iterator
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import viv_utils
|
import viv_utils
|
||||||
@@ -39,7 +39,7 @@ class VivisectFeatureExtractor(StaticFeatureExtractor):
|
|||||||
super().__init__(hashes=SampleHashes.from_bytes(self.buf))
|
super().__init__(hashes=SampleHashes.from_bytes(self.buf))
|
||||||
|
|
||||||
# pre-compute these because we'll yield them at *every* scope.
|
# pre-compute these because we'll yield them at *every* scope.
|
||||||
self.global_features: List[Tuple[Feature, Address]] = []
|
self.global_features: list[tuple[Feature, Address]] = []
|
||||||
self.global_features.extend(capa.features.extractors.viv.file.extract_file_format(self.buf))
|
self.global_features.extend(capa.features.extractors.viv.file.extract_file_format(self.buf))
|
||||||
self.global_features.extend(capa.features.extractors.common.extract_os(self.buf, os))
|
self.global_features.extend(capa.features.extractors.common.extract_os(self.buf, os))
|
||||||
self.global_features.extend(capa.features.extractors.viv.global_.extract_arch(self.vw))
|
self.global_features.extend(capa.features.extractors.viv.global_.extract_arch(self.vw))
|
||||||
@@ -55,13 +55,13 @@ class VivisectFeatureExtractor(StaticFeatureExtractor):
|
|||||||
yield from capa.features.extractors.viv.file.extract_features(self.vw, self.buf)
|
yield from capa.features.extractors.viv.file.extract_features(self.vw, self.buf)
|
||||||
|
|
||||||
def get_functions(self) -> Iterator[FunctionHandle]:
|
def get_functions(self) -> Iterator[FunctionHandle]:
|
||||||
cache: Dict[str, Any] = {}
|
cache: dict[str, Any] = {}
|
||||||
for va in sorted(self.vw.getFunctions()):
|
for va in sorted(self.vw.getFunctions()):
|
||||||
yield FunctionHandle(
|
yield FunctionHandle(
|
||||||
address=AbsoluteVirtualAddress(va), inner=viv_utils.Function(self.vw, va), ctx={"cache": cache}
|
address=AbsoluteVirtualAddress(va), inner=viv_utils.Function(self.vw, va), ctx={"cache": cache}
|
||||||
)
|
)
|
||||||
|
|
||||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.viv.function.extract_features(fh)
|
yield from capa.features.extractors.viv.function.extract_features(fh)
|
||||||
|
|
||||||
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
||||||
@@ -69,7 +69,7 @@ class VivisectFeatureExtractor(StaticFeatureExtractor):
|
|||||||
for bb in f.basic_blocks:
|
for bb in f.basic_blocks:
|
||||||
yield BBHandle(address=AbsoluteVirtualAddress(bb.va), inner=bb)
|
yield BBHandle(address=AbsoluteVirtualAddress(bb.va), inner=bb)
|
||||||
|
|
||||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh) -> Iterator[Tuple[Feature, Address]]:
|
def extract_basic_block_features(self, fh: FunctionHandle, bbh) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.viv.basicblock.extract_features(fh, bbh)
|
yield from capa.features.extractors.viv.basicblock.extract_features(fh, bbh)
|
||||||
|
|
||||||
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
||||||
@@ -79,7 +79,7 @@ class VivisectFeatureExtractor(StaticFeatureExtractor):
|
|||||||
|
|
||||||
def extract_insn_features(
|
def extract_insn_features(
|
||||||
self, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
self, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.viv.insn.extract_features(fh, bbh, ih)
|
yield from capa.features.extractors.viv.insn.extract_features(fh, bbh, ih)
|
||||||
|
|
||||||
def is_library_function(self, addr):
|
def is_library_function(self, addr):
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import PE.carve as pe_carve # vivisect PE
|
import PE.carve as pe_carve # vivisect PE
|
||||||
import vivisect
|
import vivisect
|
||||||
@@ -21,7 +21,7 @@ from capa.features.common import Feature, Characteristic
|
|||||||
from capa.features.address import Address, FileOffsetAddress, AbsoluteVirtualAddress
|
from capa.features.address import Address, FileOffsetAddress, AbsoluteVirtualAddress
|
||||||
|
|
||||||
|
|
||||||
def extract_file_embedded_pe(buf, **kwargs) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_embedded_pe(buf, **kwargs) -> Iterator[tuple[Feature, Address]]:
|
||||||
for offset, _ in pe_carve.carve(buf, 1):
|
for offset, _ in pe_carve.carve(buf, 1):
|
||||||
yield Characteristic("embedded pe"), FileOffsetAddress(offset)
|
yield Characteristic("embedded pe"), FileOffsetAddress(offset)
|
||||||
|
|
||||||
@@ -37,7 +37,7 @@ def get_first_vw_filename(vw: vivisect.VivWorkspace):
|
|||||||
return next(iter(vw.filemeta.keys()))
|
return next(iter(vw.filemeta.keys()))
|
||||||
|
|
||||||
|
|
||||||
def extract_file_export_names(vw: vivisect.VivWorkspace, **kwargs) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_export_names(vw: vivisect.VivWorkspace, **kwargs) -> Iterator[tuple[Feature, Address]]:
|
||||||
for va, _, name, _ in vw.getExports():
|
for va, _, name, _ in vw.getExports():
|
||||||
yield Export(name), AbsoluteVirtualAddress(va)
|
yield Export(name), AbsoluteVirtualAddress(va)
|
||||||
|
|
||||||
@@ -56,7 +56,7 @@ def extract_file_export_names(vw: vivisect.VivWorkspace, **kwargs) -> Iterator[T
|
|||||||
yield Characteristic("forwarded export"), AbsoluteVirtualAddress(va)
|
yield Characteristic("forwarded export"), AbsoluteVirtualAddress(va)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_import_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_import_names(vw, **kwargs) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract imported function names
|
extract imported function names
|
||||||
1. imports by ordinal:
|
1. imports by ordinal:
|
||||||
@@ -91,16 +91,16 @@ def is_viv_ord_impname(impname: str) -> bool:
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def extract_file_section_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_section_names(vw, **kwargs) -> Iterator[tuple[Feature, Address]]:
|
||||||
for va, _, segname, _ in vw.getSegments():
|
for va, _, segname, _ in vw.getSegments():
|
||||||
yield Section(segname), AbsoluteVirtualAddress(va)
|
yield Section(segname), AbsoluteVirtualAddress(va)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_strings(buf, **kwargs) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_strings(buf, **kwargs) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.common.extract_file_strings(buf)
|
yield from capa.features.extractors.common.extract_file_strings(buf)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_function_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_function_names(vw, **kwargs) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract the names of statically-linked library functions.
|
extract the names of statically-linked library functions.
|
||||||
"""
|
"""
|
||||||
@@ -117,11 +117,11 @@ def extract_file_function_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address
|
|||||||
yield FunctionName(name[1:]), addr
|
yield FunctionName(name[1:]), addr
|
||||||
|
|
||||||
|
|
||||||
def extract_file_format(buf, **kwargs) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_format(buf, **kwargs) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.common.extract_format(buf)
|
yield from capa.features.extractors.common.extract_format(buf)
|
||||||
|
|
||||||
|
|
||||||
def extract_features(vw, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(vw, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract file features from given workspace
|
extract file features from given workspace
|
||||||
|
|
||||||
@@ -130,7 +130,7 @@ def extract_features(vw, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
|||||||
buf: the raw input file bytes
|
buf: the raw input file bytes
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
Tuple[Feature, Address]: a feature and its location.
|
tuple[Feature, Address]: a feature and its location.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
for file_handler in FILE_HANDLERS:
|
for file_handler in FILE_HANDLERS:
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import envi
|
import envi
|
||||||
import viv_utils
|
import viv_utils
|
||||||
@@ -19,7 +19,7 @@ from capa.features.extractors.elf import SymTab
|
|||||||
from capa.features.extractors.base_extractor import FunctionHandle
|
from capa.features.extractors.base_extractor import FunctionHandle
|
||||||
|
|
||||||
|
|
||||||
def interface_extract_function_XXX(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
def interface_extract_function_XXX(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse features from the given function.
|
parse features from the given function.
|
||||||
|
|
||||||
@@ -32,7 +32,7 @@ def interface_extract_function_XXX(fh: FunctionHandle) -> Iterator[Tuple[Feature
|
|||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
def extract_function_symtab_names(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_function_symtab_names(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
if fh.inner.vw.metadata["Format"] == "elf":
|
if fh.inner.vw.metadata["Format"] == "elf":
|
||||||
# the file's symbol table gets added to the metadata of the vivisect workspace.
|
# the file's symbol table gets added to the metadata of the vivisect workspace.
|
||||||
# this is in order to eliminate the computational overhead of refetching symtab each time.
|
# this is in order to eliminate the computational overhead of refetching symtab each time.
|
||||||
@@ -54,13 +54,13 @@ def extract_function_symtab_names(fh: FunctionHandle) -> Iterator[Tuple[Feature,
|
|||||||
yield FunctionName(sym_name), fh.address
|
yield FunctionName(sym_name), fh.address
|
||||||
|
|
||||||
|
|
||||||
def extract_function_calls_to(fhandle: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_function_calls_to(fhandle: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
f: viv_utils.Function = fhandle.inner
|
f: viv_utils.Function = fhandle.inner
|
||||||
for src, _, _, _ in f.vw.getXrefsTo(f.va, rtype=vivisect.const.REF_CODE):
|
for src, _, _, _ in f.vw.getXrefsTo(f.va, rtype=vivisect.const.REF_CODE):
|
||||||
yield Characteristic("calls to"), AbsoluteVirtualAddress(src)
|
yield Characteristic("calls to"), AbsoluteVirtualAddress(src)
|
||||||
|
|
||||||
|
|
||||||
def extract_function_loop(fhandle: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_function_loop(fhandle: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse if a function has a loop
|
parse if a function has a loop
|
||||||
"""
|
"""
|
||||||
@@ -88,7 +88,7 @@ def extract_function_loop(fhandle: FunctionHandle) -> Iterator[Tuple[Feature, Ad
|
|||||||
yield Characteristic("loop"), fhandle.address
|
yield Characteristic("loop"), fhandle.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract features from the given function.
|
extract features from the given function.
|
||||||
|
|
||||||
@@ -96,7 +96,7 @@ def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
|||||||
fh: the function handle from which to extract features
|
fh: the function handle from which to extract features
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
Tuple[Feature, int]: the features and their location found in this function.
|
tuple[Feature, int]: the features and their location found in this function.
|
||||||
"""
|
"""
|
||||||
for func_handler in FUNCTION_HANDLERS:
|
for func_handler in FUNCTION_HANDLERS:
|
||||||
for feature, addr in func_handler(fh):
|
for feature, addr in func_handler(fh):
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from capa.features.common import ARCH_I386, ARCH_AMD64, Arch, Feature
|
from capa.features.common import ARCH_I386, ARCH_AMD64, Arch, Feature
|
||||||
from capa.features.address import NO_ADDRESS, Address
|
from capa.features.address import NO_ADDRESS, Address
|
||||||
@@ -14,7 +14,7 @@ from capa.features.address import NO_ADDRESS, Address
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_arch(vw) -> Iterator[Tuple[Feature, Address]]:
|
def extract_arch(vw) -> Iterator[tuple[Feature, Address]]:
|
||||||
arch = vw.getMeta("Architecture")
|
arch = vw.getMeta("Architecture")
|
||||||
if arch == "amd64":
|
if arch == "amd64":
|
||||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import collections
|
import collections
|
||||||
from typing import Set, List, Deque, Tuple, Optional
|
from typing import Deque, Optional
|
||||||
|
|
||||||
import envi
|
import envi
|
||||||
import vivisect.const
|
import vivisect.const
|
||||||
@@ -28,7 +28,7 @@ FAR_BRANCH_MASK = envi.BR_PROC | envi.BR_DEREF | envi.BR_ARCH
|
|||||||
DESTRUCTIVE_MNEMONICS = ("mov", "lea", "pop", "xor")
|
DESTRUCTIVE_MNEMONICS = ("mov", "lea", "pop", "xor")
|
||||||
|
|
||||||
|
|
||||||
def get_previous_instructions(vw: VivWorkspace, va: int) -> List[int]:
|
def get_previous_instructions(vw: VivWorkspace, va: int) -> list[int]:
|
||||||
"""
|
"""
|
||||||
collect the instructions that flow to the given address, local to the current function.
|
collect the instructions that flow to the given address, local to the current function.
|
||||||
|
|
||||||
@@ -37,7 +37,7 @@ def get_previous_instructions(vw: VivWorkspace, va: int) -> List[int]:
|
|||||||
va (int): the virtual address to inspect
|
va (int): the virtual address to inspect
|
||||||
|
|
||||||
returns:
|
returns:
|
||||||
List[int]: the prior instructions, which may fallthrough and/or jump here
|
list[int]: the prior instructions, which may fallthrough and/or jump here
|
||||||
"""
|
"""
|
||||||
ret = []
|
ret = []
|
||||||
|
|
||||||
@@ -71,7 +71,7 @@ class NotFoundError(Exception):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def find_definition(vw: VivWorkspace, va: int, reg: int) -> Tuple[int, Optional[int]]:
|
def find_definition(vw: VivWorkspace, va: int, reg: int) -> tuple[int, Optional[int]]:
|
||||||
"""
|
"""
|
||||||
scan backwards from the given address looking for assignments to the given register.
|
scan backwards from the given address looking for assignments to the given register.
|
||||||
if a constant, return that value.
|
if a constant, return that value.
|
||||||
@@ -88,7 +88,7 @@ def find_definition(vw: VivWorkspace, va: int, reg: int) -> Tuple[int, Optional[
|
|||||||
NotFoundError: when the definition cannot be found.
|
NotFoundError: when the definition cannot be found.
|
||||||
"""
|
"""
|
||||||
q: Deque[int] = collections.deque()
|
q: Deque[int] = collections.deque()
|
||||||
seen: Set[int] = set()
|
seen: set[int] = set()
|
||||||
|
|
||||||
q.extend(get_previous_instructions(vw, va))
|
q.extend(get_previous_instructions(vw, va))
|
||||||
while q:
|
while q:
|
||||||
@@ -139,7 +139,7 @@ def is_indirect_call(vw: VivWorkspace, va: int, insn: envi.Opcode) -> bool:
|
|||||||
return insn.mnem in ("call", "jmp") and isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper)
|
return insn.mnem in ("call", "jmp") and isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper)
|
||||||
|
|
||||||
|
|
||||||
def resolve_indirect_call(vw: VivWorkspace, va: int, insn: envi.Opcode) -> Tuple[int, Optional[int]]:
|
def resolve_indirect_call(vw: VivWorkspace, va: int, insn: envi.Opcode) -> tuple[int, Optional[int]]:
|
||||||
"""
|
"""
|
||||||
inspect the given indirect call instruction and attempt to resolve the target address.
|
inspect the given indirect call instruction and attempt to resolve the target address.
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import List, Tuple, Callable, Iterator
|
from typing import Callable, Iterator
|
||||||
|
|
||||||
import envi
|
import envi
|
||||||
import envi.exc
|
import envi.exc
|
||||||
@@ -33,7 +33,7 @@ SECURITY_COOKIE_BYTES_DELTA = 0x40
|
|||||||
|
|
||||||
def interface_extract_instruction_XXX(
|
def interface_extract_instruction_XXX(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse features from the given instruction.
|
parse features from the given instruction.
|
||||||
|
|
||||||
@@ -53,7 +53,7 @@ def get_imports(vw):
|
|||||||
caching accessor to vivisect workspace imports
|
caching accessor to vivisect workspace imports
|
||||||
avoids performance issues in vivisect when collecting locations
|
avoids performance issues in vivisect when collecting locations
|
||||||
|
|
||||||
returns: Dict[int, Tuple[str, str]]
|
returns: dict[int, tuple[str, str]]
|
||||||
"""
|
"""
|
||||||
if "imports" in vw.metadata:
|
if "imports" in vw.metadata:
|
||||||
return vw.metadata["imports"]
|
return vw.metadata["imports"]
|
||||||
@@ -65,7 +65,7 @@ def get_imports(vw):
|
|||||||
return imports
|
return imports
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse API features from the given instruction.
|
parse API features from the given instruction.
|
||||||
|
|
||||||
@@ -260,7 +260,7 @@ def read_bytes(vw, va: int) -> bytes:
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_bytes_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_bytes_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse byte sequence features from the given instruction.
|
parse byte sequence features from the given instruction.
|
||||||
example:
|
example:
|
||||||
@@ -371,7 +371,7 @@ def is_security_cookie(f, bb, insn) -> bool:
|
|||||||
|
|
||||||
def extract_insn_nzxor_characteristic_features(
|
def extract_insn_nzxor_characteristic_features(
|
||||||
fh: FunctionHandle, bbhandle: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbhandle: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse non-zeroing XOR instruction from the given instruction.
|
parse non-zeroing XOR instruction from the given instruction.
|
||||||
ignore expected non-zeroing XORs, e.g. security cookies.
|
ignore expected non-zeroing XORs, e.g. security cookies.
|
||||||
@@ -392,12 +392,12 @@ def extract_insn_nzxor_characteristic_features(
|
|||||||
yield Characteristic("nzxor"), ih.address
|
yield Characteristic("nzxor"), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_mnemonic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_mnemonic_features(f, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""parse mnemonic features from the given instruction."""
|
"""parse mnemonic features from the given instruction."""
|
||||||
yield Mnemonic(ih.inner.mnem), ih.address
|
yield Mnemonic(ih.inner.mnem), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse call $+5 instruction from the given instruction.
|
parse call $+5 instruction from the given instruction.
|
||||||
"""
|
"""
|
||||||
@@ -415,7 +415,7 @@ def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, ih: InsnHandle)
|
|||||||
yield Characteristic("call $+5"), ih.address
|
yield Characteristic("call $+5"), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_peb_access_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_peb_access_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64
|
parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64
|
||||||
"""
|
"""
|
||||||
@@ -451,7 +451,7 @@ def extract_insn_peb_access_characteristic_features(f, bb, ih: InsnHandle) -> It
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_segment_access_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_segment_access_features(f, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""parse the instruction for access to fs or gs"""
|
"""parse the instruction for access to fs or gs"""
|
||||||
insn: envi.Opcode = ih.inner
|
insn: envi.Opcode = ih.inner
|
||||||
|
|
||||||
@@ -472,7 +472,7 @@ def get_section(vw, va: int):
|
|||||||
raise KeyError(va)
|
raise KeyError(va)
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_cross_section_cflow(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_cross_section_cflow(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
inspect the instruction for a CALL or JMP that crosses section boundaries.
|
inspect the instruction for a CALL or JMP that crosses section boundaries.
|
||||||
"""
|
"""
|
||||||
@@ -513,7 +513,7 @@ def extract_insn_cross_section_cflow(fh: FunctionHandle, bb, ih: InsnHandle) ->
|
|||||||
|
|
||||||
# this is a feature that's most relevant at the function scope,
|
# this is a feature that's most relevant at the function scope,
|
||||||
# however, its most efficient to extract at the instruction scope.
|
# however, its most efficient to extract at the instruction scope.
|
||||||
def extract_function_calls_from(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_function_calls_from(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
insn: envi.Opcode = ih.inner
|
insn: envi.Opcode = ih.inner
|
||||||
f: viv_utils.Function = fh.inner
|
f: viv_utils.Function = fh.inner
|
||||||
|
|
||||||
@@ -554,7 +554,7 @@ def extract_function_calls_from(fh: FunctionHandle, bb, ih: InsnHandle) -> Itera
|
|||||||
|
|
||||||
# this is a feature that's most relevant at the function or basic block scope,
|
# this is a feature that's most relevant at the function or basic block scope,
|
||||||
# however, its most efficient to extract at the instruction scope.
|
# however, its most efficient to extract at the instruction scope.
|
||||||
def extract_function_indirect_call_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_function_indirect_call_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract indirect function call characteristic (e.g., call eax or call dword ptr [edx+4])
|
extract indirect function call characteristic (e.g., call eax or call dword ptr [edx+4])
|
||||||
does not include calls like => call ds:dword_ABD4974
|
does not include calls like => call ds:dword_ABD4974
|
||||||
@@ -578,7 +578,7 @@ def extract_function_indirect_call_characteristic_features(f, bb, ih: InsnHandle
|
|||||||
|
|
||||||
def extract_op_number_features(
|
def extract_op_number_features(
|
||||||
fh: FunctionHandle, bb, ih: InsnHandle, i, oper: envi.Operand
|
fh: FunctionHandle, bb, ih: InsnHandle, i, oper: envi.Operand
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""parse number features from the given operand.
|
"""parse number features from the given operand.
|
||||||
|
|
||||||
example:
|
example:
|
||||||
@@ -623,7 +623,7 @@ def extract_op_number_features(
|
|||||||
|
|
||||||
def extract_op_offset_features(
|
def extract_op_offset_features(
|
||||||
fh: FunctionHandle, bb, ih: InsnHandle, i, oper: envi.Operand
|
fh: FunctionHandle, bb, ih: InsnHandle, i, oper: envi.Operand
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""parse structure offset features from the given operand."""
|
"""parse structure offset features from the given operand."""
|
||||||
# example:
|
# example:
|
||||||
#
|
#
|
||||||
@@ -674,7 +674,7 @@ def extract_op_offset_features(
|
|||||||
|
|
||||||
def extract_op_string_features(
|
def extract_op_string_features(
|
||||||
fh: FunctionHandle, bb, ih: InsnHandle, i, oper: envi.Operand
|
fh: FunctionHandle, bb, ih: InsnHandle, i, oper: envi.Operand
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""parse string features from the given operand."""
|
"""parse string features from the given operand."""
|
||||||
# example:
|
# example:
|
||||||
#
|
#
|
||||||
@@ -705,15 +705,15 @@ def extract_op_string_features(
|
|||||||
yield String(s), ih.address
|
yield String(s), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_operand_features(f: FunctionHandle, bb, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_operand_features(f: FunctionHandle, bb, insn: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
for i, oper in enumerate(insn.inner.opers):
|
for i, oper in enumerate(insn.inner.opers):
|
||||||
for op_handler in OPERAND_HANDLERS:
|
for op_handler in OPERAND_HANDLERS:
|
||||||
for feature, addr in op_handler(f, bb, insn, i, oper):
|
for feature, addr in op_handler(f, bb, insn, i, oper):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|
||||||
|
|
||||||
OPERAND_HANDLERS: List[
|
OPERAND_HANDLERS: list[
|
||||||
Callable[[FunctionHandle, BBHandle, InsnHandle, int, envi.Operand], Iterator[Tuple[Feature, Address]]]
|
Callable[[FunctionHandle, BBHandle, InsnHandle, int, envi.Operand], Iterator[tuple[Feature, Address]]]
|
||||||
] = [
|
] = [
|
||||||
extract_op_number_features,
|
extract_op_number_features,
|
||||||
extract_op_offset_features,
|
extract_op_offset_features,
|
||||||
@@ -721,7 +721,7 @@ OPERAND_HANDLERS: List[
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def extract_features(f, bb, insn) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(f, bb, insn) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract features from the given insn.
|
extract features from the given insn.
|
||||||
|
|
||||||
@@ -731,14 +731,14 @@ def extract_features(f, bb, insn) -> Iterator[Tuple[Feature, Address]]:
|
|||||||
insn (vivisect...Instruction): the instruction to process.
|
insn (vivisect...Instruction): the instruction to process.
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
Tuple[Feature, Address]: the features and their location found in this insn.
|
tuple[Feature, Address]: the features and their location found in this insn.
|
||||||
"""
|
"""
|
||||||
for insn_handler in INSTRUCTION_HANDLERS:
|
for insn_handler in INSTRUCTION_HANDLERS:
|
||||||
for feature, addr in insn_handler(f, bb, insn):
|
for feature, addr in insn_handler(f, bb, insn):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|
||||||
|
|
||||||
INSTRUCTION_HANDLERS: List[Callable[[FunctionHandle, BBHandle, InsnHandle], Iterator[Tuple[Feature, Address]]]] = [
|
INSTRUCTION_HANDLERS: list[Callable[[FunctionHandle, BBHandle, InsnHandle], Iterator[tuple[Feature, Address]]]] = [
|
||||||
extract_insn_api_features,
|
extract_insn_api_features,
|
||||||
extract_insn_bytes_features,
|
extract_insn_bytes_features,
|
||||||
extract_insn_nzxor_characteristic_features,
|
extract_insn_nzxor_characteristic_features,
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, List, Tuple, Optional
|
from typing import Optional
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from zipfile import ZipFile
|
from zipfile import ZipFile
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
@@ -35,6 +35,8 @@ class VMRayMonitorProcess:
|
|||||||
ppid: int # parent process ID assigned by OS
|
ppid: int # parent process ID assigned by OS
|
||||||
monitor_id: int # unique ID assigned to process by VMRay
|
monitor_id: int # unique ID assigned to process by VMRay
|
||||||
image_name: str
|
image_name: str
|
||||||
|
filename: str
|
||||||
|
cmd_line: str
|
||||||
|
|
||||||
|
|
||||||
class VMRayAnalysis:
|
class VMRayAnalysis:
|
||||||
@@ -58,17 +60,17 @@ class VMRayAnalysis:
|
|||||||
"VMRay feature extractor does not support flog version %s" % self.flog.analysis.log_version
|
"VMRay feature extractor does not support flog version %s" % self.flog.analysis.log_version
|
||||||
)
|
)
|
||||||
|
|
||||||
self.exports: Dict[int, str] = {}
|
self.exports: dict[int, str] = {}
|
||||||
self.imports: Dict[int, Tuple[str, str]] = {}
|
self.imports: dict[int, tuple[str, str]] = {}
|
||||||
self.sections: Dict[int, str] = {}
|
self.sections: dict[int, str] = {}
|
||||||
self.monitor_processes: Dict[int, VMRayMonitorProcess] = {}
|
self.monitor_processes: dict[int, VMRayMonitorProcess] = {}
|
||||||
self.monitor_threads: Dict[int, VMRayMonitorThread] = {}
|
self.monitor_threads: dict[int, VMRayMonitorThread] = {}
|
||||||
|
|
||||||
# map monitor thread IDs to their associated monitor process ID
|
# map monitor thread IDs to their associated monitor process ID
|
||||||
self.monitor_threads_by_monitor_process: Dict[int, List[int]] = defaultdict(list)
|
self.monitor_threads_by_monitor_process: dict[int, list[int]] = defaultdict(list)
|
||||||
|
|
||||||
# map function calls to their associated monitor thread ID mapped to its associated monitor process ID
|
# map function calls to their associated monitor thread ID mapped to its associated monitor process ID
|
||||||
self.monitor_process_calls: Dict[int, Dict[int, List[FunctionCall]]] = defaultdict(lambda: defaultdict(list))
|
self.monitor_process_calls: dict[int, dict[int, list[FunctionCall]]] = defaultdict(lambda: defaultdict(list))
|
||||||
|
|
||||||
self.base_address: int
|
self.base_address: int
|
||||||
|
|
||||||
@@ -160,7 +162,12 @@ class VMRayAnalysis:
|
|||||||
self.sv2.processes[process.ref_parent_process.path[1]].os_pid if process.ref_parent_process else 0
|
self.sv2.processes[process.ref_parent_process.path[1]].os_pid if process.ref_parent_process else 0
|
||||||
)
|
)
|
||||||
self.monitor_processes[process.monitor_id] = VMRayMonitorProcess(
|
self.monitor_processes[process.monitor_id] = VMRayMonitorProcess(
|
||||||
process.os_pid, ppid, process.monitor_id, process.image_name
|
process.os_pid,
|
||||||
|
ppid,
|
||||||
|
process.monitor_id,
|
||||||
|
process.image_name,
|
||||||
|
process.filename,
|
||||||
|
process.cmd_line,
|
||||||
)
|
)
|
||||||
|
|
||||||
# not all processes are recorded in SummaryV2.json, get missing data from flog.xml, see #2394
|
# not all processes are recorded in SummaryV2.json, get missing data from flog.xml, see #2394
|
||||||
@@ -170,6 +177,8 @@ class VMRayAnalysis:
|
|||||||
monitor_process.os_parent_pid,
|
monitor_process.os_parent_pid,
|
||||||
monitor_process.process_id,
|
monitor_process.process_id,
|
||||||
monitor_process.image_name,
|
monitor_process.image_name,
|
||||||
|
monitor_process.filename,
|
||||||
|
monitor_process.cmd_line,
|
||||||
)
|
)
|
||||||
|
|
||||||
if monitor_process.process_id not in self.monitor_processes:
|
if monitor_process.process_id not in self.monitor_processes:
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import capa.features.extractors.helpers
|
import capa.features.extractors.helpers
|
||||||
from capa.features.insn import API, Number
|
from capa.features.insn import API, Number
|
||||||
@@ -18,7 +18,7 @@ from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, Pr
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def get_call_param_features(param: Param, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
def get_call_param_features(param: Param, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
if param.deref is not None:
|
if param.deref is not None:
|
||||||
# pointer types contain a special "deref" member that stores the deref'd value
|
# pointer types contain a special "deref" member that stores the deref'd value
|
||||||
# so we check for this first and ignore Param.value as this always contains the
|
# so we check for this first and ignore Param.value as this always contains the
|
||||||
@@ -39,7 +39,7 @@ def get_call_param_features(param: Param, ch: CallHandle) -> Iterator[Tuple[Feat
|
|||||||
yield Number(hexint(param.value)), ch.address
|
yield Number(hexint(param.value)), ch.address
|
||||||
|
|
||||||
|
|
||||||
def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
call: FunctionCall = ch.inner
|
call: FunctionCall = ch.inner
|
||||||
|
|
||||||
if call.params_in:
|
if call.params_in:
|
||||||
@@ -50,7 +50,7 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -
|
|||||||
yield API(name), ch.address
|
yield API(name), ch.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
for handler in CALL_HANDLERS:
|
for handler in CALL_HANDLERS:
|
||||||
for feature, addr in handler(ph, th, ch):
|
for feature, addr in handler(ph, th, ch):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
|
|
||||||
from typing import List, Tuple, Iterator
|
from typing import Iterator
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import capa.helpers
|
import capa.helpers
|
||||||
@@ -34,8 +34,8 @@ from capa.features.extractors.base_extractor import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_formatted_params(params: ParamList) -> List[str]:
|
def get_formatted_params(params: ParamList) -> list[str]:
|
||||||
params_list: List[str] = []
|
params_list: list[str] = []
|
||||||
|
|
||||||
for param in params:
|
for param in params:
|
||||||
if param.deref and param.deref.value is not None:
|
if param.deref and param.deref.value is not None:
|
||||||
@@ -69,10 +69,10 @@ class VMRayExtractor(DynamicFeatureExtractor):
|
|||||||
# value according to the PE header, the actual trace may use a different imagebase
|
# value according to the PE header, the actual trace may use a different imagebase
|
||||||
return AbsoluteVirtualAddress(self.analysis.base_address)
|
return AbsoluteVirtualAddress(self.analysis.base_address)
|
||||||
|
|
||||||
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.vmray.file.extract_features(self.analysis)
|
yield from capa.features.extractors.vmray.file.extract_features(self.analysis)
|
||||||
|
|
||||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from self.global_features
|
yield from self.global_features
|
||||||
|
|
||||||
def get_processes(self) -> Iterator[ProcessHandle]:
|
def get_processes(self) -> Iterator[ProcessHandle]:
|
||||||
@@ -80,13 +80,13 @@ class VMRayExtractor(DynamicFeatureExtractor):
|
|||||||
address: ProcessAddress = ProcessAddress(pid=monitor_process.pid, ppid=monitor_process.ppid)
|
address: ProcessAddress = ProcessAddress(pid=monitor_process.pid, ppid=monitor_process.ppid)
|
||||||
yield ProcessHandle(address, inner=monitor_process)
|
yield ProcessHandle(address, inner=monitor_process)
|
||||||
|
|
||||||
def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
# we have not identified process-specific features for VMRay yet
|
# we have not identified process-specific features for VMRay yet
|
||||||
yield from []
|
yield from []
|
||||||
|
|
||||||
def get_process_name(self, ph) -> str:
|
def get_process_name(self, ph) -> str:
|
||||||
monitor_process: VMRayMonitorProcess = ph.inner
|
monitor_process: VMRayMonitorProcess = ph.inner
|
||||||
return monitor_process.image_name
|
return f"{monitor_process.image_name} ({monitor_process.cmd_line})"
|
||||||
|
|
||||||
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
||||||
for monitor_thread_id in self.analysis.monitor_threads_by_monitor_process[ph.inner.monitor_id]:
|
for monitor_thread_id in self.analysis.monitor_threads_by_monitor_process[ph.inner.monitor_id]:
|
||||||
@@ -95,7 +95,7 @@ class VMRayExtractor(DynamicFeatureExtractor):
|
|||||||
address: ThreadAddress = ThreadAddress(process=ph.address, tid=monitor_thread.tid)
|
address: ThreadAddress = ThreadAddress(process=ph.address, tid=monitor_thread.tid)
|
||||||
yield ThreadHandle(address=address, inner=monitor_thread)
|
yield ThreadHandle(address=address, inner=monitor_thread)
|
||||||
|
|
||||||
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
if False:
|
if False:
|
||||||
# force this routine to be a generator,
|
# force this routine to be a generator,
|
||||||
# but we don't actually have any elements to generate.
|
# but we don't actually have any elements to generate.
|
||||||
@@ -109,7 +109,7 @@ class VMRayExtractor(DynamicFeatureExtractor):
|
|||||||
|
|
||||||
def extract_call_features(
|
def extract_call_features(
|
||||||
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.vmray.call.extract_features(ph, th, ch)
|
yield from capa.features.extractors.vmray.call.extract_features(ph, th, ch)
|
||||||
|
|
||||||
def get_call_name(self, ph, th, ch) -> str:
|
def get_call_name(self, ph, th, ch) -> str:
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import capa.features.extractors.common
|
import capa.features.extractors.common
|
||||||
from capa.features.file import Export, Import, Section
|
from capa.features.file import Export, Import, Section
|
||||||
@@ -18,52 +18,52 @@ from capa.features.extractors.helpers import generate_symbols
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_export_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
def extract_export_names(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
||||||
for addr, name in analysis.exports.items():
|
for addr, name in analysis.exports.items():
|
||||||
yield Export(name), AbsoluteVirtualAddress(addr)
|
yield Export(name), AbsoluteVirtualAddress(addr)
|
||||||
|
|
||||||
|
|
||||||
def extract_import_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
def extract_import_names(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
||||||
for addr, (module, api) in analysis.imports.items():
|
for addr, (module, api) in analysis.imports.items():
|
||||||
for symbol in generate_symbols(module, api, include_dll=True):
|
for symbol in generate_symbols(module, api, include_dll=True):
|
||||||
yield Import(symbol), AbsoluteVirtualAddress(addr)
|
yield Import(symbol), AbsoluteVirtualAddress(addr)
|
||||||
|
|
||||||
|
|
||||||
def extract_section_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
def extract_section_names(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
||||||
for addr, name in analysis.sections.items():
|
for addr, name in analysis.sections.items():
|
||||||
yield Section(name), AbsoluteVirtualAddress(addr)
|
yield Section(name), AbsoluteVirtualAddress(addr)
|
||||||
|
|
||||||
|
|
||||||
def extract_referenced_filenames(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
def extract_referenced_filenames(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
||||||
for filename in analysis.sv2.filenames.values():
|
for filename in analysis.sv2.filenames.values():
|
||||||
yield String(filename.filename), NO_ADDRESS
|
yield String(filename.filename), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_referenced_mutex_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
def extract_referenced_mutex_names(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
||||||
for mutex in analysis.sv2.mutexes.values():
|
for mutex in analysis.sv2.mutexes.values():
|
||||||
yield String(mutex.name), NO_ADDRESS
|
yield String(mutex.name), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_referenced_domain_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
def extract_referenced_domain_names(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
||||||
for domain in analysis.sv2.domains.values():
|
for domain in analysis.sv2.domains.values():
|
||||||
yield String(domain.domain), NO_ADDRESS
|
yield String(domain.domain), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_referenced_ip_addresses(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
def extract_referenced_ip_addresses(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
||||||
for ip_address in analysis.sv2.ip_addresses.values():
|
for ip_address in analysis.sv2.ip_addresses.values():
|
||||||
yield String(ip_address.ip_address), NO_ADDRESS
|
yield String(ip_address.ip_address), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_referenced_registry_key_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
def extract_referenced_registry_key_names(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
||||||
for registry_record in analysis.sv2.registry_records.values():
|
for registry_record in analysis.sv2.registry_records.values():
|
||||||
yield String(registry_record.reg_key_name), NO_ADDRESS
|
yield String(registry_record.reg_key_name), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_file_strings(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_strings(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.common.extract_file_strings(analysis.sample_file_buf)
|
yield from capa.features.extractors.common.extract_file_strings(analysis.sample_file_buf)
|
||||||
|
|
||||||
|
|
||||||
def extract_features(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
||||||
for handler in FILE_HANDLERS:
|
for handler in FILE_HANDLERS:
|
||||||
for feature, addr in handler(analysis):
|
for feature, addr in handler(analysis):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from capa.features.common import (
|
from capa.features.common import (
|
||||||
OS,
|
OS,
|
||||||
@@ -27,7 +27,7 @@ from capa.features.extractors.vmray import VMRayAnalysis
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_arch(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
def extract_arch(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
||||||
file_type: str = analysis.file_type
|
file_type: str = analysis.file_type
|
||||||
|
|
||||||
if "x86-32" in file_type:
|
if "x86-32" in file_type:
|
||||||
@@ -38,7 +38,7 @@ def extract_arch(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
|||||||
raise ValueError("unrecognized arch from the VMRay report: %s" % file_type)
|
raise ValueError("unrecognized arch from the VMRay report: %s" % file_type)
|
||||||
|
|
||||||
|
|
||||||
def extract_format(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
def extract_format(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
||||||
assert analysis.sample_file_static_data is not None
|
assert analysis.sample_file_static_data is not None
|
||||||
if analysis.sample_file_static_data.pe:
|
if analysis.sample_file_static_data.pe:
|
||||||
yield Format(FORMAT_PE), NO_ADDRESS
|
yield Format(FORMAT_PE), NO_ADDRESS
|
||||||
@@ -48,7 +48,7 @@ def extract_format(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]
|
|||||||
raise ValueError("unrecognized file format from the VMRay report: %s" % analysis.file_type)
|
raise ValueError("unrecognized file format from the VMRay report: %s" % analysis.file_type)
|
||||||
|
|
||||||
|
|
||||||
def extract_os(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
def extract_os(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
||||||
file_type: str = analysis.file_type
|
file_type: str = analysis.file_type
|
||||||
|
|
||||||
if "windows" in file_type.lower():
|
if "windows" in file_type.lower():
|
||||||
@@ -59,7 +59,7 @@ def extract_os(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
|||||||
raise ValueError("unrecognized OS from the VMRay report: %s" % file_type)
|
raise ValueError("unrecognized OS from the VMRay report: %s" % file_type)
|
||||||
|
|
||||||
|
|
||||||
def extract_features(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
||||||
for global_handler in GLOBAL_HANDLER:
|
for global_handler in GLOBAL_HANDLER:
|
||||||
for feature, addr in global_handler(analysis):
|
for feature, addr in global_handler(analysis):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -6,11 +6,10 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
from typing import Dict, List, Union, Optional
|
from typing import Union, Optional, Annotated
|
||||||
|
|
||||||
import xmltodict
|
import xmltodict
|
||||||
from pydantic import Field, BaseModel
|
from pydantic import Field, BaseModel
|
||||||
from typing_extensions import Annotated
|
|
||||||
from pydantic.functional_validators import BeforeValidator
|
from pydantic.functional_validators import BeforeValidator
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@@ -87,7 +86,7 @@ class Param(BaseModel):
|
|||||||
deref: Optional[ParamDeref] = None
|
deref: Optional[ParamDeref] = None
|
||||||
|
|
||||||
|
|
||||||
def validate_ensure_is_list(value: Union[List[Param], Param]) -> List[Param]:
|
def validate_ensure_is_list(value: Union[list[Param], Param]) -> list[Param]:
|
||||||
if isinstance(value, list):
|
if isinstance(value, list):
|
||||||
return value
|
return value
|
||||||
else:
|
else:
|
||||||
@@ -95,9 +94,9 @@ def validate_ensure_is_list(value: Union[List[Param], Param]) -> List[Param]:
|
|||||||
|
|
||||||
|
|
||||||
# params may be stored as a list of Param or a single Param so we convert
|
# params may be stored as a list of Param or a single Param so we convert
|
||||||
# the input value to Python list type before the inner validation (List[Param])
|
# the input value to Python list type before the inner validation (list[Param])
|
||||||
# is called
|
# is called
|
||||||
ParamList = Annotated[List[Param], BeforeValidator(validate_ensure_is_list)]
|
ParamList = Annotated[list[Param], BeforeValidator(validate_ensure_is_list)]
|
||||||
|
|
||||||
|
|
||||||
class Params(BaseModel):
|
class Params(BaseModel):
|
||||||
@@ -137,11 +136,20 @@ class FunctionReturn(BaseModel):
|
|||||||
from_addr: HexInt = Field(alias="from")
|
from_addr: HexInt = Field(alias="from")
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_string(value: str) -> str:
|
||||||
|
# e.g. "cmd_line": "\"C:\\Users\\38lTTV5Kii\\Desktop\\filename.exe\" ",
|
||||||
|
return value.replace("\\\\", "\\").strip(' "')
|
||||||
|
|
||||||
|
|
||||||
|
# unify representation
|
||||||
|
SanitizedString = Annotated[str, BeforeValidator(sanitize_string)]
|
||||||
|
|
||||||
|
|
||||||
class MonitorProcess(BaseModel):
|
class MonitorProcess(BaseModel):
|
||||||
ts: HexInt
|
ts: HexInt
|
||||||
process_id: int
|
process_id: int
|
||||||
image_name: str
|
image_name: str
|
||||||
filename: str
|
filename: SanitizedString
|
||||||
# page_root: HexInt
|
# page_root: HexInt
|
||||||
os_pid: HexInt
|
os_pid: HexInt
|
||||||
# os_integrity_level: HexInt
|
# os_integrity_level: HexInt
|
||||||
@@ -149,7 +157,7 @@ class MonitorProcess(BaseModel):
|
|||||||
monitor_reason: str
|
monitor_reason: str
|
||||||
parent_id: int
|
parent_id: int
|
||||||
os_parent_pid: HexInt
|
os_parent_pid: HexInt
|
||||||
# cmd_line: str
|
cmd_line: SanitizedString
|
||||||
# cur_dir: str
|
# cur_dir: str
|
||||||
# os_username: str
|
# os_username: str
|
||||||
# bitness: int
|
# bitness: int
|
||||||
@@ -164,9 +172,9 @@ class MonitorThread(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
# handle if there's only single entries, but the model expects a list
|
# handle if there's only single entries, but the model expects a list
|
||||||
MonitorProcessList = Annotated[List[MonitorProcess], BeforeValidator(validate_ensure_is_list)]
|
MonitorProcessList = Annotated[list[MonitorProcess], BeforeValidator(validate_ensure_is_list)]
|
||||||
MonitorThreadList = Annotated[List[MonitorThread], BeforeValidator(validate_ensure_is_list)]
|
MonitorThreadList = Annotated[list[MonitorThread], BeforeValidator(validate_ensure_is_list)]
|
||||||
FunctionCallList = Annotated[List[FunctionCall], BeforeValidator(validate_ensure_is_list)]
|
FunctionCallList = Annotated[list[FunctionCall], BeforeValidator(validate_ensure_is_list)]
|
||||||
|
|
||||||
|
|
||||||
class Analysis(BaseModel):
|
class Analysis(BaseModel):
|
||||||
@@ -177,7 +185,7 @@ class Analysis(BaseModel):
|
|||||||
monitor_processes: MonitorProcessList = Field(alias="monitor_process", default=[])
|
monitor_processes: MonitorProcessList = Field(alias="monitor_process", default=[])
|
||||||
monitor_threads: MonitorThreadList = Field(alias="monitor_thread", default=[])
|
monitor_threads: MonitorThreadList = Field(alias="monitor_thread", default=[])
|
||||||
function_calls: FunctionCallList = Field(alias="fncall", default=[])
|
function_calls: FunctionCallList = Field(alias="fncall", default=[])
|
||||||
# function_returns: List[FunctionReturn] = Field(alias="fnret", default=[])
|
# function_returns: list[FunctionReturn] = Field(alias="fnret", default=[])
|
||||||
|
|
||||||
|
|
||||||
class Flog(BaseModel):
|
class Flog(BaseModel):
|
||||||
@@ -186,7 +194,7 @@ class Flog(BaseModel):
|
|||||||
|
|
||||||
# models for summary_v2.json file, certain fields left as comments for documentation purposes
|
# models for summary_v2.json file, certain fields left as comments for documentation purposes
|
||||||
class GenericReference(BaseModel):
|
class GenericReference(BaseModel):
|
||||||
path: List[str]
|
path: list[str]
|
||||||
source: str
|
source: str
|
||||||
|
|
||||||
|
|
||||||
@@ -226,12 +234,12 @@ class PEFileImport(BaseModel):
|
|||||||
|
|
||||||
class PEFileImportModule(BaseModel):
|
class PEFileImportModule(BaseModel):
|
||||||
dll: str
|
dll: str
|
||||||
apis: List[PEFileImport]
|
apis: list[PEFileImport]
|
||||||
|
|
||||||
|
|
||||||
class PEFileSection(BaseModel):
|
class PEFileSection(BaseModel):
|
||||||
# entropy: float
|
# entropy: float
|
||||||
# flags: List[str] = []
|
# flags: list[str] = []
|
||||||
name: str
|
name: str
|
||||||
# raw_data_offset: int
|
# raw_data_offset: int
|
||||||
# raw_data_size: int
|
# raw_data_size: int
|
||||||
@@ -241,9 +249,9 @@ class PEFileSection(BaseModel):
|
|||||||
|
|
||||||
class PEFile(BaseModel):
|
class PEFile(BaseModel):
|
||||||
basic_info: PEFileBasicInfo
|
basic_info: PEFileBasicInfo
|
||||||
exports: List[PEFileExport] = []
|
exports: list[PEFileExport] = []
|
||||||
imports: List[PEFileImportModule] = []
|
imports: list[PEFileImportModule] = []
|
||||||
sections: List[PEFileSection] = []
|
sections: list[PEFileSection] = []
|
||||||
|
|
||||||
|
|
||||||
class ElfFileSectionHeader(BaseModel):
|
class ElfFileSectionHeader(BaseModel):
|
||||||
@@ -268,7 +276,7 @@ class ElfFileHeader(BaseModel):
|
|||||||
|
|
||||||
class ElfFile(BaseModel):
|
class ElfFile(BaseModel):
|
||||||
# file_header: ElfFileHeader
|
# file_header: ElfFileHeader
|
||||||
sections: List[ElfFileSection]
|
sections: list[ElfFileSection]
|
||||||
|
|
||||||
|
|
||||||
class StaticData(BaseModel):
|
class StaticData(BaseModel):
|
||||||
@@ -284,7 +292,7 @@ class FileHashes(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
class File(BaseModel):
|
class File(BaseModel):
|
||||||
# categories: List[str]
|
# categories: list[str]
|
||||||
hash_values: FileHashes
|
hash_values: FileHashes
|
||||||
# is_artifact: bool
|
# is_artifact: bool
|
||||||
# is_ioc: bool
|
# is_ioc: bool
|
||||||
@@ -292,11 +300,11 @@ class File(BaseModel):
|
|||||||
# size: int
|
# size: int
|
||||||
# is_truncated: bool
|
# is_truncated: bool
|
||||||
# mime_type: Optional[str] = None
|
# mime_type: Optional[str] = None
|
||||||
# operations: List[str] = []
|
# operations: list[str] = []
|
||||||
# ref_filenames: List[GenericReference] = []
|
# ref_filenames: list[GenericReference] = []
|
||||||
# ref_gfncalls: List[GenericReference] = []
|
# ref_gfncalls: list[GenericReference] = []
|
||||||
ref_static_data: Optional[StaticDataReference] = None
|
ref_static_data: Optional[StaticDataReference] = None
|
||||||
# ref_vti_matches: List[GenericReference] = []
|
# ref_vti_matches: list[GenericReference] = []
|
||||||
# verdict: str
|
# verdict: str
|
||||||
|
|
||||||
|
|
||||||
@@ -307,8 +315,9 @@ class Process(BaseModel):
|
|||||||
monitor_id: int
|
monitor_id: int
|
||||||
# monitor_reason: str
|
# monitor_reason: str
|
||||||
os_pid: int
|
os_pid: int
|
||||||
filename: str
|
filename: SanitizedString
|
||||||
image_name: str
|
image_name: str
|
||||||
|
cmd_line: SanitizedString
|
||||||
ref_parent_process: Optional[GenericReference] = None
|
ref_parent_process: Optional[GenericReference] = None
|
||||||
|
|
||||||
|
|
||||||
@@ -356,13 +365,13 @@ class AnalysisMetadata(BaseModel):
|
|||||||
class SummaryV2(BaseModel):
|
class SummaryV2(BaseModel):
|
||||||
analysis_metadata: AnalysisMetadata
|
analysis_metadata: AnalysisMetadata
|
||||||
|
|
||||||
static_data: Dict[str, StaticData] = {}
|
static_data: dict[str, StaticData] = {}
|
||||||
|
|
||||||
# recorded artifacts
|
# recorded artifacts
|
||||||
files: Dict[str, File] = {}
|
files: dict[str, File] = {}
|
||||||
processes: Dict[str, Process] = {}
|
processes: dict[str, Process] = {}
|
||||||
filenames: Dict[str, Filename] = {}
|
filenames: dict[str, Filename] = {}
|
||||||
mutexes: Dict[str, Mutex] = {}
|
mutexes: dict[str, Mutex] = {}
|
||||||
domains: Dict[str, Domain] = {}
|
domains: dict[str, Domain] = {}
|
||||||
ip_addresses: Dict[str, IPAddress] = {}
|
ip_addresses: dict[str, IPAddress] = {}
|
||||||
registry_records: Dict[str, Registry] = {}
|
registry_records: dict[str, Registry] = {}
|
||||||
|
|||||||
@@ -14,14 +14,10 @@ import json
|
|||||||
import zlib
|
import zlib
|
||||||
import logging
|
import logging
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import List, Tuple, Union, Literal
|
from typing import Union, Literal, TypeAlias
|
||||||
|
|
||||||
from pydantic import Field, BaseModel, ConfigDict
|
from pydantic import Field, BaseModel, ConfigDict
|
||||||
|
|
||||||
# TODO(williballenthin): use typing.TypeAlias directly in Python 3.10+
|
|
||||||
# https://github.com/mandiant/capa/issues/1699
|
|
||||||
from typing_extensions import TypeAlias
|
|
||||||
|
|
||||||
import capa.helpers
|
import capa.helpers
|
||||||
import capa.version
|
import capa.version
|
||||||
import capa.features.file
|
import capa.features.file
|
||||||
@@ -62,7 +58,7 @@ class AddressType(str, Enum):
|
|||||||
|
|
||||||
class Address(HashableModel):
|
class Address(HashableModel):
|
||||||
type: AddressType
|
type: AddressType
|
||||||
value: Union[int, Tuple[int, ...], None] = None # None default value to support deserialization of NO_ADDRESS
|
value: Union[int, tuple[int, ...], None] = None # None default value to support deserialization of NO_ADDRESS
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_capa(cls, a: capa.features.address.Address) -> "Address":
|
def from_capa(cls, a: capa.features.address.Address) -> "Address":
|
||||||
@@ -272,52 +268,52 @@ class InstructionFeature(HashableModel):
|
|||||||
|
|
||||||
class InstructionFeatures(BaseModel):
|
class InstructionFeatures(BaseModel):
|
||||||
address: Address
|
address: Address
|
||||||
features: Tuple[InstructionFeature, ...]
|
features: tuple[InstructionFeature, ...]
|
||||||
|
|
||||||
|
|
||||||
class BasicBlockFeatures(BaseModel):
|
class BasicBlockFeatures(BaseModel):
|
||||||
address: Address
|
address: Address
|
||||||
features: Tuple[BasicBlockFeature, ...]
|
features: tuple[BasicBlockFeature, ...]
|
||||||
instructions: Tuple[InstructionFeatures, ...]
|
instructions: tuple[InstructionFeatures, ...]
|
||||||
|
|
||||||
|
|
||||||
class FunctionFeatures(BaseModel):
|
class FunctionFeatures(BaseModel):
|
||||||
address: Address
|
address: Address
|
||||||
features: Tuple[FunctionFeature, ...]
|
features: tuple[FunctionFeature, ...]
|
||||||
basic_blocks: Tuple[BasicBlockFeatures, ...] = Field(alias="basic blocks")
|
basic_blocks: tuple[BasicBlockFeatures, ...] = Field(alias="basic blocks")
|
||||||
model_config = ConfigDict(populate_by_name=True)
|
model_config = ConfigDict(populate_by_name=True)
|
||||||
|
|
||||||
|
|
||||||
class CallFeatures(BaseModel):
|
class CallFeatures(BaseModel):
|
||||||
address: Address
|
address: Address
|
||||||
name: str
|
name: str
|
||||||
features: Tuple[CallFeature, ...]
|
features: tuple[CallFeature, ...]
|
||||||
|
|
||||||
|
|
||||||
class ThreadFeatures(BaseModel):
|
class ThreadFeatures(BaseModel):
|
||||||
address: Address
|
address: Address
|
||||||
features: Tuple[ThreadFeature, ...]
|
features: tuple[ThreadFeature, ...]
|
||||||
calls: Tuple[CallFeatures, ...]
|
calls: tuple[CallFeatures, ...]
|
||||||
|
|
||||||
|
|
||||||
class ProcessFeatures(BaseModel):
|
class ProcessFeatures(BaseModel):
|
||||||
address: Address
|
address: Address
|
||||||
name: str
|
name: str
|
||||||
features: Tuple[ProcessFeature, ...]
|
features: tuple[ProcessFeature, ...]
|
||||||
threads: Tuple[ThreadFeatures, ...]
|
threads: tuple[ThreadFeatures, ...]
|
||||||
|
|
||||||
|
|
||||||
class StaticFeatures(BaseModel):
|
class StaticFeatures(BaseModel):
|
||||||
global_: Tuple[GlobalFeature, ...] = Field(alias="global")
|
global_: tuple[GlobalFeature, ...] = Field(alias="global")
|
||||||
file: Tuple[FileFeature, ...]
|
file: tuple[FileFeature, ...]
|
||||||
functions: Tuple[FunctionFeatures, ...]
|
functions: tuple[FunctionFeatures, ...]
|
||||||
model_config = ConfigDict(populate_by_name=True)
|
model_config = ConfigDict(populate_by_name=True)
|
||||||
|
|
||||||
|
|
||||||
class DynamicFeatures(BaseModel):
|
class DynamicFeatures(BaseModel):
|
||||||
global_: Tuple[GlobalFeature, ...] = Field(alias="global")
|
global_: tuple[GlobalFeature, ...] = Field(alias="global")
|
||||||
file: Tuple[FileFeature, ...]
|
file: tuple[FileFeature, ...]
|
||||||
processes: Tuple[ProcessFeatures, ...]
|
processes: tuple[ProcessFeatures, ...]
|
||||||
model_config = ConfigDict(populate_by_name=True)
|
model_config = ConfigDict(populate_by_name=True)
|
||||||
|
|
||||||
|
|
||||||
@@ -344,7 +340,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str:
|
|||||||
"""
|
"""
|
||||||
serialize the given extractor to a string
|
serialize the given extractor to a string
|
||||||
"""
|
"""
|
||||||
global_features: List[GlobalFeature] = []
|
global_features: list[GlobalFeature] = []
|
||||||
for feature, _ in extractor.extract_global_features():
|
for feature, _ in extractor.extract_global_features():
|
||||||
global_features.append(
|
global_features.append(
|
||||||
GlobalFeature(
|
GlobalFeature(
|
||||||
@@ -352,7 +348,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
file_features: List[FileFeature] = []
|
file_features: list[FileFeature] = []
|
||||||
for feature, address in extractor.extract_file_features():
|
for feature, address in extractor.extract_file_features():
|
||||||
file_features.append(
|
file_features.append(
|
||||||
FileFeature(
|
FileFeature(
|
||||||
@@ -361,7 +357,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
function_features: List[FunctionFeatures] = []
|
function_features: list[FunctionFeatures] = []
|
||||||
for f in extractor.get_functions():
|
for f in extractor.get_functions():
|
||||||
faddr = Address.from_capa(f.address)
|
faddr = Address.from_capa(f.address)
|
||||||
ffeatures = [
|
ffeatures = [
|
||||||
@@ -446,7 +442,7 @@ def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str:
|
|||||||
"""
|
"""
|
||||||
serialize the given extractor to a string
|
serialize the given extractor to a string
|
||||||
"""
|
"""
|
||||||
global_features: List[GlobalFeature] = []
|
global_features: list[GlobalFeature] = []
|
||||||
for feature, _ in extractor.extract_global_features():
|
for feature, _ in extractor.extract_global_features():
|
||||||
global_features.append(
|
global_features.append(
|
||||||
GlobalFeature(
|
GlobalFeature(
|
||||||
@@ -454,7 +450,7 @@ def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
file_features: List[FileFeature] = []
|
file_features: list[FileFeature] = []
|
||||||
for feature, address in extractor.extract_file_features():
|
for feature, address in extractor.extract_file_features():
|
||||||
file_features.append(
|
file_features.append(
|
||||||
FileFeature(
|
FileFeature(
|
||||||
@@ -463,7 +459,7 @@ def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
process_features: List[ProcessFeatures] = []
|
process_features: list[ProcessFeatures] = []
|
||||||
for p in extractor.get_processes():
|
for p in extractor.get_processes():
|
||||||
paddr = Address.from_capa(p.address)
|
paddr = Address.from_capa(p.address)
|
||||||
pname = extractor.get_process_name(p)
|
pname = extractor.get_process_name(p)
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import binascii
|
import binascii
|
||||||
from typing import Union, Optional
|
from typing import Union, Literal, Optional, Annotated
|
||||||
|
|
||||||
from pydantic import Field, BaseModel, ConfigDict
|
from pydantic import Field, BaseModel, ConfigDict
|
||||||
|
|
||||||
@@ -209,168 +209,171 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature":
|
|||||||
|
|
||||||
|
|
||||||
class OSFeature(FeatureModel):
|
class OSFeature(FeatureModel):
|
||||||
type: str = "os"
|
type: Literal["os"] = "os"
|
||||||
os: str
|
os: str
|
||||||
description: Optional[str] = None
|
description: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class ArchFeature(FeatureModel):
|
class ArchFeature(FeatureModel):
|
||||||
type: str = "arch"
|
type: Literal["arch"] = "arch"
|
||||||
arch: str
|
arch: str
|
||||||
description: Optional[str] = None
|
description: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class FormatFeature(FeatureModel):
|
class FormatFeature(FeatureModel):
|
||||||
type: str = "format"
|
type: Literal["format"] = "format"
|
||||||
format: str
|
format: str
|
||||||
description: Optional[str] = None
|
description: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class MatchFeature(FeatureModel):
|
class MatchFeature(FeatureModel):
|
||||||
type: str = "match"
|
type: Literal["match"] = "match"
|
||||||
match: str
|
match: str
|
||||||
description: Optional[str] = None
|
description: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class CharacteristicFeature(FeatureModel):
|
class CharacteristicFeature(FeatureModel):
|
||||||
type: str = "characteristic"
|
type: Literal["characteristic"] = "characteristic"
|
||||||
characteristic: str
|
characteristic: str
|
||||||
description: Optional[str] = None
|
description: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class ExportFeature(FeatureModel):
|
class ExportFeature(FeatureModel):
|
||||||
type: str = "export"
|
type: Literal["export"] = "export"
|
||||||
export: str
|
export: str
|
||||||
description: Optional[str] = None
|
description: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class ImportFeature(FeatureModel):
|
class ImportFeature(FeatureModel):
|
||||||
type: str = "import"
|
type: Literal["import"] = "import"
|
||||||
import_: str = Field(alias="import")
|
import_: str = Field(alias="import")
|
||||||
description: Optional[str] = None
|
description: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class SectionFeature(FeatureModel):
|
class SectionFeature(FeatureModel):
|
||||||
type: str = "section"
|
type: Literal["section"] = "section"
|
||||||
section: str
|
section: str
|
||||||
description: Optional[str] = None
|
description: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class FunctionNameFeature(FeatureModel):
|
class FunctionNameFeature(FeatureModel):
|
||||||
type: str = "function name"
|
type: Literal["function name"] = "function name"
|
||||||
function_name: str = Field(alias="function name")
|
function_name: str = Field(alias="function name")
|
||||||
description: Optional[str] = None
|
description: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class SubstringFeature(FeatureModel):
|
class SubstringFeature(FeatureModel):
|
||||||
type: str = "substring"
|
type: Literal["substring"] = "substring"
|
||||||
substring: str
|
substring: str
|
||||||
description: Optional[str] = None
|
description: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class RegexFeature(FeatureModel):
|
class RegexFeature(FeatureModel):
|
||||||
type: str = "regex"
|
type: Literal["regex"] = "regex"
|
||||||
regex: str
|
regex: str
|
||||||
description: Optional[str] = None
|
description: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class StringFeature(FeatureModel):
|
class StringFeature(FeatureModel):
|
||||||
type: str = "string"
|
type: Literal["string"] = "string"
|
||||||
string: str
|
string: str
|
||||||
description: Optional[str] = None
|
description: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class ClassFeature(FeatureModel):
|
class ClassFeature(FeatureModel):
|
||||||
type: str = "class"
|
type: Literal["class"] = "class"
|
||||||
class_: str = Field(alias="class")
|
class_: str = Field(alias="class")
|
||||||
description: Optional[str] = None
|
description: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class NamespaceFeature(FeatureModel):
|
class NamespaceFeature(FeatureModel):
|
||||||
type: str = "namespace"
|
type: Literal["namespace"] = "namespace"
|
||||||
namespace: str
|
namespace: str
|
||||||
description: Optional[str] = None
|
description: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class BasicBlockFeature(FeatureModel):
|
class BasicBlockFeature(FeatureModel):
|
||||||
type: str = "basic block"
|
type: Literal["basic block"] = "basic block"
|
||||||
description: Optional[str] = None
|
description: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class APIFeature(FeatureModel):
|
class APIFeature(FeatureModel):
|
||||||
type: str = "api"
|
type: Literal["api"] = "api"
|
||||||
api: str
|
api: str
|
||||||
description: Optional[str] = None
|
description: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class PropertyFeature(FeatureModel):
|
class PropertyFeature(FeatureModel):
|
||||||
type: str = "property"
|
type: Literal["property"] = "property"
|
||||||
access: Optional[str] = None
|
access: Optional[str] = None
|
||||||
property: str
|
property: str
|
||||||
description: Optional[str] = None
|
description: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class NumberFeature(FeatureModel):
|
class NumberFeature(FeatureModel):
|
||||||
type: str = "number"
|
type: Literal["number"] = "number"
|
||||||
number: Union[int, float]
|
number: Union[int, float]
|
||||||
description: Optional[str] = None
|
description: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class BytesFeature(FeatureModel):
|
class BytesFeature(FeatureModel):
|
||||||
type: str = "bytes"
|
type: Literal["bytes"] = "bytes"
|
||||||
bytes: str
|
bytes: str
|
||||||
description: Optional[str] = None
|
description: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class OffsetFeature(FeatureModel):
|
class OffsetFeature(FeatureModel):
|
||||||
type: str = "offset"
|
type: Literal["offset"] = "offset"
|
||||||
offset: int
|
offset: int
|
||||||
description: Optional[str] = None
|
description: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class MnemonicFeature(FeatureModel):
|
class MnemonicFeature(FeatureModel):
|
||||||
type: str = "mnemonic"
|
type: Literal["mnemonic"] = "mnemonic"
|
||||||
mnemonic: str
|
mnemonic: str
|
||||||
description: Optional[str] = None
|
description: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class OperandNumberFeature(FeatureModel):
|
class OperandNumberFeature(FeatureModel):
|
||||||
type: str = "operand number"
|
type: Literal["operand number"] = "operand number"
|
||||||
index: int
|
index: int
|
||||||
operand_number: int = Field(alias="operand number")
|
operand_number: int = Field(alias="operand number")
|
||||||
description: Optional[str] = None
|
description: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class OperandOffsetFeature(FeatureModel):
|
class OperandOffsetFeature(FeatureModel):
|
||||||
type: str = "operand offset"
|
type: Literal["operand offset"] = "operand offset"
|
||||||
index: int
|
index: int
|
||||||
operand_offset: int = Field(alias="operand offset")
|
operand_offset: int = Field(alias="operand offset")
|
||||||
description: Optional[str] = None
|
description: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
Feature = Union[
|
Feature = Annotated[
|
||||||
OSFeature,
|
Union[
|
||||||
ArchFeature,
|
OSFeature,
|
||||||
FormatFeature,
|
ArchFeature,
|
||||||
MatchFeature,
|
FormatFeature,
|
||||||
CharacteristicFeature,
|
MatchFeature,
|
||||||
ExportFeature,
|
CharacteristicFeature,
|
||||||
ImportFeature,
|
ExportFeature,
|
||||||
SectionFeature,
|
ImportFeature,
|
||||||
FunctionNameFeature,
|
SectionFeature,
|
||||||
SubstringFeature,
|
FunctionNameFeature,
|
||||||
RegexFeature,
|
SubstringFeature,
|
||||||
StringFeature,
|
RegexFeature,
|
||||||
ClassFeature,
|
StringFeature,
|
||||||
NamespaceFeature,
|
ClassFeature,
|
||||||
APIFeature,
|
NamespaceFeature,
|
||||||
PropertyFeature,
|
APIFeature,
|
||||||
NumberFeature,
|
PropertyFeature,
|
||||||
BytesFeature,
|
NumberFeature,
|
||||||
OffsetFeature,
|
BytesFeature,
|
||||||
MnemonicFeature,
|
OffsetFeature,
|
||||||
OperandNumberFeature,
|
MnemonicFeature,
|
||||||
OperandOffsetFeature,
|
OperandNumberFeature,
|
||||||
# Note! this must be last, see #1161
|
OperandOffsetFeature,
|
||||||
BasicBlockFeature,
|
# Note! this must be last, see #1161
|
||||||
|
BasicBlockFeature,
|
||||||
|
],
|
||||||
|
Field(discriminator="type"),
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -55,7 +55,7 @@ You can also execute [capa_ghidra.py](https://raw.githubusercontent.com/mandiant
|
|||||||
| capa | `>= 7.0.0` | https://github.com/mandiant/capa/releases |
|
| capa | `>= 7.0.0` | https://github.com/mandiant/capa/releases |
|
||||||
| Ghidrathon | `>= 3.0.0` | https://github.com/mandiant/Ghidrathon/releases |
|
| Ghidrathon | `>= 3.0.0` | https://github.com/mandiant/Ghidrathon/releases |
|
||||||
| Ghidra | `>= 10.3.2` | https://github.com/NationalSecurityAgency/ghidra/releases |
|
| Ghidra | `>= 10.3.2` | https://github.com/NationalSecurityAgency/ghidra/releases |
|
||||||
| Python | `>= 3.8.0` | https://www.python.org/downloads |
|
| Python | `>= 3.10.0` | https://www.python.org/downloads |
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ import sys
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import pathlib
|
import pathlib
|
||||||
from typing import Any, Dict, List
|
from typing import Any
|
||||||
|
|
||||||
from ghidra.app.cmd.label import AddLabelCmd, CreateNamespacesCmd
|
from ghidra.app.cmd.label import AddLabelCmd, CreateNamespacesCmd
|
||||||
from ghidra.program.model.symbol import Namespace, SourceType, SymbolType
|
from ghidra.program.model.symbol import Namespace, SourceType, SymbolType
|
||||||
@@ -68,8 +68,8 @@ class CapaMatchData:
|
|||||||
scope,
|
scope,
|
||||||
capability,
|
capability,
|
||||||
matches,
|
matches,
|
||||||
attack: List[Dict[Any, Any]],
|
attack: list[dict[Any, Any]],
|
||||||
mbc: List[Dict[Any, Any]],
|
mbc: list[dict[Any, Any]],
|
||||||
):
|
):
|
||||||
self.namespace = namespace
|
self.namespace = namespace
|
||||||
self.scope = scope
|
self.scope = scope
|
||||||
@@ -282,7 +282,7 @@ def parse_json(capa_data):
|
|||||||
for rule, capability in capa_data.get("rules", {}).items():
|
for rule, capability in capa_data.get("rules", {}).items():
|
||||||
# structure to contain rule match address & supporting feature data
|
# structure to contain rule match address & supporting feature data
|
||||||
# {rule match addr:[{feature addr:{node_data}}]}
|
# {rule match addr:[{feature addr:{node_data}}]}
|
||||||
rule_matches: Dict[Any, List[Any]] = {}
|
rule_matches: dict[Any, list[Any]] = {}
|
||||||
for i in range(len(capability.get("matches"))):
|
for i in range(len(capability.get("matches"))):
|
||||||
# grab rule match location
|
# grab rule match location
|
||||||
match_loc = capability.get("matches")[i][0].get("value")
|
match_loc = capability.get("matches")[i][0].get("value")
|
||||||
@@ -368,14 +368,10 @@ def main():
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
if sys.version_info < (3, 8):
|
if sys.version_info < (3, 10):
|
||||||
from capa.exceptions import UnsupportedRuntimeError
|
from capa.exceptions import UnsupportedRuntimeError
|
||||||
|
|
||||||
raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.8+")
|
raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.10+")
|
||||||
elif sys.version_info < (3, 10):
|
|
||||||
from warnings import warn
|
|
||||||
|
|
||||||
warn("This is the last capa version supporting Python 3.8 and 3.9.", DeprecationWarning, stacklevel=2)
|
|
||||||
exit_code = main()
|
exit_code = main()
|
||||||
if exit_code != 0:
|
if exit_code != 0:
|
||||||
popup("capa explorer encountered errors during analysis. Please check the console output for more information.") # type: ignore [name-defined] # noqa: F821
|
popup("capa explorer encountered errors during analysis. Please check the console output for more information.") # type: ignore [name-defined] # noqa: F821
|
||||||
|
|||||||
@@ -160,12 +160,8 @@ def main():
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
if sys.version_info < (3, 8):
|
if sys.version_info < (3, 10):
|
||||||
from capa.exceptions import UnsupportedRuntimeError
|
from capa.exceptions import UnsupportedRuntimeError
|
||||||
|
|
||||||
raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.8+")
|
raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.10+")
|
||||||
elif sys.version_info < (3, 10):
|
|
||||||
from warnings import warn
|
|
||||||
|
|
||||||
warn("This is the last capa version supporting Python 3.8 and 3.9.", DeprecationWarning, stacklevel=2)
|
|
||||||
sys.exit(main())
|
sys.exit(main())
|
||||||
|
|||||||
@@ -8,7 +8,6 @@
|
|||||||
import logging
|
import logging
|
||||||
import datetime
|
import datetime
|
||||||
import contextlib
|
import contextlib
|
||||||
from typing import List
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import capa
|
import capa
|
||||||
@@ -112,7 +111,7 @@ def get_file_sha256():
|
|||||||
return currentProgram().getExecutableSHA256() # type: ignore [name-defined] # noqa: F821
|
return currentProgram().getExecutableSHA256() # type: ignore [name-defined] # noqa: F821
|
||||||
|
|
||||||
|
|
||||||
def collect_metadata(rules: List[Path]):
|
def collect_metadata(rules: list[Path]):
|
||||||
md5 = get_file_md5()
|
md5 = get_file_md5()
|
||||||
sha256 = get_file_sha256()
|
sha256 = get_file_sha256()
|
||||||
|
|
||||||
@@ -150,7 +149,7 @@ def collect_metadata(rules: List[Path]):
|
|||||||
os=os,
|
os=os,
|
||||||
extractor="ghidra",
|
extractor="ghidra",
|
||||||
rules=tuple(r.resolve().absolute().as_posix() for r in rules),
|
rules=tuple(r.resolve().absolute().as_posix() for r in rules),
|
||||||
base_address=capa.features.freeze.Address.from_capa(currentProgram().getImageBase().getOffset()), # type: ignore [name-defined] # noqa: F821
|
base_address=capa.features.freeze.Address.from_capa(AbsoluteVirtualAddress(currentProgram().getImageBase().getOffset())), # type: ignore [name-defined] # noqa: F821
|
||||||
layout=rdoc.StaticLayout(
|
layout=rdoc.StaticLayout(
|
||||||
functions=(),
|
functions=(),
|
||||||
),
|
),
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ import logging
|
|||||||
import tempfile
|
import tempfile
|
||||||
import contextlib
|
import contextlib
|
||||||
import importlib.util
|
import importlib.util
|
||||||
from typing import Dict, List, Union, BinaryIO, Iterator, NoReturn
|
from typing import BinaryIO, Iterator, NoReturn
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from zipfile import ZipFile
|
from zipfile import ZipFile
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
@@ -46,6 +46,7 @@ from capa.features.common import (
|
|||||||
FORMAT_FREEZE,
|
FORMAT_FREEZE,
|
||||||
FORMAT_DRAKVUF,
|
FORMAT_DRAKVUF,
|
||||||
FORMAT_UNKNOWN,
|
FORMAT_UNKNOWN,
|
||||||
|
FORMAT_BINJA_DB,
|
||||||
FORMAT_BINEXPORT2,
|
FORMAT_BINEXPORT2,
|
||||||
Format,
|
Format,
|
||||||
)
|
)
|
||||||
@@ -59,6 +60,7 @@ EXTENSIONS_DYNAMIC = ("json", "json_", "json.gz", "log", ".log.gz", ".zip")
|
|||||||
EXTENSIONS_BINEXPORT2 = ("BinExport", "BinExport2")
|
EXTENSIONS_BINEXPORT2 = ("BinExport", "BinExport2")
|
||||||
EXTENSIONS_ELF = "elf_"
|
EXTENSIONS_ELF = "elf_"
|
||||||
EXTENSIONS_FREEZE = "frz"
|
EXTENSIONS_FREEZE = "frz"
|
||||||
|
EXTENSIONS_BINJA_DB = "bndb"
|
||||||
|
|
||||||
logger = logging.getLogger("capa")
|
logger = logging.getLogger("capa")
|
||||||
|
|
||||||
@@ -164,7 +166,7 @@ def load_json_from_path(json_path: Path):
|
|||||||
return report
|
return report
|
||||||
|
|
||||||
|
|
||||||
def decode_json_lines(fd: Union[BinaryIO, gzip.GzipFile]):
|
def decode_json_lines(fd: BinaryIO | gzip.GzipFile):
|
||||||
for line in fd:
|
for line in fd:
|
||||||
try:
|
try:
|
||||||
line_s = line.strip().decode()
|
line_s = line.strip().decode()
|
||||||
@@ -175,7 +177,7 @@ def decode_json_lines(fd: Union[BinaryIO, gzip.GzipFile]):
|
|||||||
logger.debug("bad DRAKVUF log line: %s", line)
|
logger.debug("bad DRAKVUF log line: %s", line)
|
||||||
|
|
||||||
|
|
||||||
def load_jsonl_from_path(jsonl_path: Path) -> Iterator[Dict]:
|
def load_jsonl_from_path(jsonl_path: Path) -> Iterator[dict]:
|
||||||
try:
|
try:
|
||||||
with gzip.open(jsonl_path, "rb") as fg:
|
with gzip.open(jsonl_path, "rb") as fg:
|
||||||
yield from decode_json_lines(fg)
|
yield from decode_json_lines(fg)
|
||||||
@@ -204,7 +206,7 @@ def get_format_from_report(sample: Path) -> str:
|
|||||||
return FORMAT_DRAKVUF
|
return FORMAT_DRAKVUF
|
||||||
elif sample.name.endswith(".zip"):
|
elif sample.name.endswith(".zip"):
|
||||||
with ZipFile(sample, "r") as zipfile:
|
with ZipFile(sample, "r") as zipfile:
|
||||||
namelist: List[str] = zipfile.namelist()
|
namelist: list[str] = zipfile.namelist()
|
||||||
if "logs/summary_v2.json" in namelist and "logs/flog.xml" in namelist:
|
if "logs/summary_v2.json" in namelist and "logs/flog.xml" in namelist:
|
||||||
# assume VMRay zipfile at a minimum has these files
|
# assume VMRay zipfile at a minimum has these files
|
||||||
return FORMAT_VMRAY
|
return FORMAT_VMRAY
|
||||||
@@ -232,6 +234,8 @@ def get_format_from_extension(sample: Path) -> str:
|
|||||||
format_ = FORMAT_FREEZE
|
format_ = FORMAT_FREEZE
|
||||||
elif sample.name.endswith(EXTENSIONS_BINEXPORT2):
|
elif sample.name.endswith(EXTENSIONS_BINEXPORT2):
|
||||||
format_ = FORMAT_BINEXPORT2
|
format_ = FORMAT_BINEXPORT2
|
||||||
|
elif sample.name.endswith(EXTENSIONS_BINJA_DB):
|
||||||
|
format_ = FORMAT_BINJA_DB
|
||||||
return format_
|
return format_
|
||||||
|
|
||||||
|
|
||||||
@@ -331,17 +335,6 @@ def log_unsupported_arch_error():
|
|||||||
logger.error("-" * 80)
|
logger.error("-" * 80)
|
||||||
|
|
||||||
|
|
||||||
def log_unsupported_runtime_error():
|
|
||||||
logger.error("-" * 80)
|
|
||||||
logger.error(" Unsupported runtime or Python interpreter.")
|
|
||||||
logger.error(" ")
|
|
||||||
logger.error(" capa supports running under Python 3.8 and higher.")
|
|
||||||
logger.error(" ")
|
|
||||||
logger.error(" If you're seeing this message on the command line,")
|
|
||||||
logger.error(" please ensure you're running a supported Python version.")
|
|
||||||
logger.error("-" * 80)
|
|
||||||
|
|
||||||
|
|
||||||
def is_running_standalone() -> bool:
|
def is_running_standalone() -> bool:
|
||||||
"""
|
"""
|
||||||
are we running from a PyInstaller'd executable?
|
are we running from a PyInstaller'd executable?
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user