mirror of
https://github.com/mandiant/capa.git
synced 2025-12-06 04:41:00 -08:00
Compare commits
110 Commits
hide-msgsp
...
v8.0.1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1a82b9d0c5 | ||
|
|
3cbc184020 | ||
|
|
347601a112 | ||
|
|
8a02b0773d | ||
|
|
f11661f8f2 | ||
|
|
518dc3381c | ||
|
|
5c60adaf96 | ||
|
|
4ab8d75629 | ||
|
|
51d852d1b3 | ||
|
|
aa8e4603d1 | ||
|
|
6c61a91778 | ||
|
|
e633e34517 | ||
|
|
9c72c9067b | ||
|
|
168435cf75 | ||
|
|
5fdf7e61e2 | ||
|
|
95fc747e6f | ||
|
|
1f374e4986 | ||
|
|
28c0234339 | ||
|
|
f57f909e68 | ||
|
|
02c359f79f | ||
|
|
4448d612f1 | ||
|
|
d7cf8d1251 | ||
|
|
d1f3e43325 | ||
|
|
83a46265df | ||
|
|
0c64bd4985 | ||
|
|
ed86e5fb1b | ||
|
|
e1c786466a | ||
|
|
959a234f0e | ||
|
|
e57de2beb4 | ||
|
|
9c9b3711c0 | ||
|
|
65e2dac4c4 | ||
|
|
9ad3f06e1d | ||
|
|
201ec07b58 | ||
|
|
c85be8dc72 | ||
|
|
54952feb07 | ||
|
|
379d6ef313 | ||
|
|
28fcd10d2e | ||
|
|
a6481df6c4 | ||
|
|
abe80842cb | ||
|
|
b6763ac5fe | ||
|
|
5a284de438 | ||
|
|
8cfccbcb44 | ||
|
|
01772d0de0 | ||
|
|
f0042157ab | ||
|
|
6a2330c11a | ||
|
|
02b5e11380 | ||
|
|
32c428b989 | ||
|
|
20909c1d95 | ||
|
|
035b4f6ae6 | ||
|
|
cb002567c4 | ||
|
|
46c513c0a9 | ||
|
|
0f0523d2ba | ||
|
|
688841fd3b | ||
|
|
2a6ba62379 | ||
|
|
ca7580d417 | ||
|
|
7c01712843 | ||
|
|
ef02e4fe83 | ||
|
|
d51074385b | ||
|
|
d9ea57d29d | ||
|
|
8b7ec049f4 | ||
|
|
c05e01cc3a | ||
|
|
11bb0c3fbd | ||
|
|
93da346f32 | ||
|
|
3a2056b701 | ||
|
|
915f3b0511 | ||
|
|
cd61983e43 | ||
|
|
9627f7e5c3 | ||
|
|
3ebec9ec2b | ||
|
|
295cd413bb | ||
|
|
03e4778620 | ||
|
|
e8ad207245 | ||
|
|
a31bd2cd15 | ||
|
|
9118946ecb | ||
|
|
7b32706bd4 | ||
|
|
c632d594a6 | ||
|
|
4398b8ac31 | ||
|
|
ec697c01f9 | ||
|
|
097ed73ccd | ||
|
|
4e121ae24f | ||
|
|
322e7a934e | ||
|
|
7d983af907 | ||
|
|
77758e8922 | ||
|
|
296255f581 | ||
|
|
0237059cbd | ||
|
|
3241ee599f | ||
|
|
24236dda0e | ||
|
|
d4d856767d | ||
|
|
35767e6c6a | ||
|
|
7d8ee6aaac | ||
|
|
23709c9d6a | ||
|
|
bc72b6d14e | ||
|
|
13b1e533f5 | ||
|
|
7cc3ddd4ea | ||
|
|
20ae098cda | ||
|
|
2987eeb0ac | ||
|
|
cebf8e7274 | ||
|
|
d74225b5e0 | ||
|
|
70610cd1c5 | ||
|
|
338107cf9e | ||
|
|
6b88eed1e4 | ||
|
|
54badc323d | ||
|
|
2e2e1bc277 | ||
|
|
84c9da09e0 | ||
|
|
b2f89695b5 | ||
|
|
bc91171c65 | ||
|
|
69190dfa82 | ||
|
|
688afab087 | ||
|
|
6447319cc7 | ||
|
|
7be6fe6ae1 | ||
|
|
ca7073ce87 |
@@ -1,6 +1,6 @@
|
||||
# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.233.0/containers/python-3/.devcontainer/base.Dockerfile
|
||||
|
||||
# [Choice] Python version (use -bullseye variants on local arm64/Apple Silicon): 3, 3.10, 3.9, 3.8, 3.7, 3.6, 3-bullseye, 3.10-bullseye, 3.9-bullseye, 3.8-bullseye, 3.7-bullseye, 3.6-bullseye, 3-buster, 3.10-buster, 3.9-buster, 3.8-buster, 3.7-buster, 3.6-buster
|
||||
# [Choice] Python version (use -bullseye variants on local arm64/Apple Silicon): 3, 3.10, 3-bullseye, 3.10-bullseye, 3-buster, 3.10-buster, etc.
|
||||
ARG VARIANT="3.10-bullseye"
|
||||
FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT}
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
"dockerfile": "Dockerfile",
|
||||
"context": "..",
|
||||
"args": {
|
||||
// Update 'VARIANT' to pick a Python version: 3, 3.10, 3.9, 3.8, 3.7, 3.6
|
||||
// Update 'VARIANT' to pick a Python version: 3, 3.10, etc.
|
||||
// Append -bullseye or -buster to pin to an OS version.
|
||||
// Use -bullseye variants on local on arm64/Apple Silicon.
|
||||
"VARIANT": "3.10",
|
||||
|
||||
13
.github/workflows/build.yml
vendored
13
.github/workflows/build.yml
vendored
@@ -21,26 +21,25 @@ jobs:
|
||||
# set to false for debugging
|
||||
fail-fast: true
|
||||
matrix:
|
||||
# using Python 3.8 to support running across multiple operating systems including Windows 7
|
||||
include:
|
||||
- os: ubuntu-20.04
|
||||
# use old linux so that the shared library versioning is more portable
|
||||
artifact_name: capa
|
||||
asset_name: linux
|
||||
python_version: 3.8
|
||||
python_version: '3.10'
|
||||
- os: ubuntu-20.04
|
||||
artifact_name: capa
|
||||
asset_name: linux-py312
|
||||
python_version: 3.12
|
||||
python_version: '3.12'
|
||||
- os: windows-2019
|
||||
artifact_name: capa.exe
|
||||
asset_name: windows
|
||||
python_version: 3.8
|
||||
- os: macos-12
|
||||
python_version: '3.10'
|
||||
- os: macos-13
|
||||
# use older macOS for assumed better portability
|
||||
artifact_name: capa
|
||||
asset_name: macos
|
||||
python_version: 3.8
|
||||
python_version: '3.10'
|
||||
steps:
|
||||
- name: Checkout capa
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||
@@ -107,7 +106,7 @@ jobs:
|
||||
# upload zipped binaries to Release page
|
||||
if: github.event_name == 'release'
|
||||
name: zip and upload ${{ matrix.asset_name }}
|
||||
runs-on: ubuntu-20.04
|
||||
runs-on: ubuntu-latest
|
||||
needs: [build]
|
||||
strategy:
|
||||
matrix:
|
||||
|
||||
7
.github/workflows/changelog.yml
vendored
7
.github/workflows/changelog.yml
vendored
@@ -13,8 +13,11 @@ permissions:
|
||||
jobs:
|
||||
check_changelog:
|
||||
# no need to check for dependency updates via dependabot
|
||||
if: github.actor != 'dependabot[bot]' && github.actor != 'dependabot-preview[bot]'
|
||||
runs-on: ubuntu-20.04
|
||||
# github.event.pull_request.user.login refers to PR author
|
||||
if: |
|
||||
github.event.pull_request.user.login != 'dependabot[bot]' &&
|
||||
github.event.pull_request.user.login != 'dependabot-preview[bot]'
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
NO_CHANGELOG: '[x] No CHANGELOG update needed'
|
||||
steps:
|
||||
|
||||
2
.github/workflows/publish.yml
vendored
2
.github/workflows/publish.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
|
||||
with:
|
||||
python-version: '3.8'
|
||||
python-version: '3.10'
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
|
||||
2
.github/workflows/tag.yml
vendored
2
.github/workflows/tag.yml
vendored
@@ -9,7 +9,7 @@ permissions: read-all
|
||||
jobs:
|
||||
tag:
|
||||
name: Tag capa rules
|
||||
runs-on: ubuntu-20.04
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout capa-rules
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||
|
||||
30
.github/workflows/tests.yml
vendored
30
.github/workflows/tests.yml
vendored
@@ -26,7 +26,7 @@ env:
|
||||
|
||||
jobs:
|
||||
changelog_format:
|
||||
runs-on: ubuntu-20.04
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Checkout capa
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||
@@ -37,15 +37,15 @@ jobs:
|
||||
if [ $number != 1 ]; then exit 1; fi
|
||||
|
||||
code_style:
|
||||
runs-on: ubuntu-20.04
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Checkout capa
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||
# use latest available python to take advantage of best performance
|
||||
- name: Set up Python 3.11
|
||||
- name: Set up Python 3.12
|
||||
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
|
||||
with:
|
||||
python-version: "3.11"
|
||||
python-version: "3.12"
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
@@ -64,16 +64,16 @@ jobs:
|
||||
run: pre-commit run deptry --hook-stage manual
|
||||
|
||||
rule_linter:
|
||||
runs-on: ubuntu-20.04
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Checkout capa with submodules
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||
with:
|
||||
submodules: recursive
|
||||
- name: Set up Python 3.11
|
||||
- name: Set up Python 3.12
|
||||
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
|
||||
with:
|
||||
python-version: "3.11"
|
||||
python-version: "3.12"
|
||||
- name: Install capa
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
@@ -88,17 +88,17 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-20.04, windows-2019, macos-12]
|
||||
os: [ubuntu-20.04, windows-2019, macos-13]
|
||||
# across all operating systems
|
||||
python-version: ["3.8", "3.11"]
|
||||
python-version: ["3.10", "3.11"]
|
||||
include:
|
||||
# on Ubuntu run these as well
|
||||
- os: ubuntu-20.04
|
||||
python-version: "3.8"
|
||||
- os: ubuntu-20.04
|
||||
python-version: "3.9"
|
||||
- os: ubuntu-20.04
|
||||
python-version: "3.10"
|
||||
- os: ubuntu-20.04
|
||||
python-version: "3.11"
|
||||
- os: ubuntu-20.04
|
||||
python-version: "3.12"
|
||||
steps:
|
||||
- name: Checkout capa with submodules
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||
@@ -131,7 +131,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.9", "3.11"]
|
||||
python-version: ["3.10", "3.11"]
|
||||
steps:
|
||||
- name: Checkout capa with submodules
|
||||
# do only run if BN_SERIAL is available, have to do this in every step, see https://github.com/orgs/community/discussions/26726#discussioncomment-3253118
|
||||
@@ -173,7 +173,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.8", "3.11"]
|
||||
python-version: ["3.10", "3.11"]
|
||||
java-version: ["17"]
|
||||
ghidra-version: ["11.0.1"]
|
||||
public-version: ["PUBLIC_20240130"] # for ghidra releases
|
||||
|
||||
103
.github/workflows/web-release.yml
vendored
Normal file
103
.github/workflows/web-release.yml
vendored
Normal file
@@ -0,0 +1,103 @@
|
||||
name: create web release
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
version:
|
||||
description: 'Version number for the release (x.x.x)'
|
||||
required: true
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
run-tests:
|
||||
uses: ./.github/workflows/web-tests.yml
|
||||
|
||||
build-and-release:
|
||||
needs: run-tests
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set release name
|
||||
run: echo "RELEASE_NAME=capa-explorer-web-v${{ github.event.inputs.version }}-${GITHUB_SHA::7}" >> $GITHUB_ENV
|
||||
|
||||
- name: Check if release already exists
|
||||
run: |
|
||||
if ls web/explorer/releases/capa-explorer-web-v${{ github.event.inputs.version }}-* 1> /dev/null 2>&1; then
|
||||
echo "::error:: A release with version ${{ github.event.inputs.version }} already exists"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4
|
||||
with:
|
||||
node-version: 20
|
||||
cache: 'npm'
|
||||
cache-dependency-path: 'web/explorer/package-lock.json'
|
||||
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
working-directory: web/explorer
|
||||
|
||||
- name: Build offline bundle
|
||||
run: npm run build:bundle
|
||||
working-directory: web/explorer
|
||||
|
||||
- name: Compress bundle
|
||||
run: zip -r ${{ env.RELEASE_NAME }}.zip capa-explorer-web
|
||||
working-directory: web/explorer
|
||||
|
||||
- name: Create releases directory
|
||||
run: mkdir -vp web/explorer/releases
|
||||
|
||||
- name: Move release to releases folder
|
||||
run: mv web/explorer/${{ env.RELEASE_NAME }}.zip web/explorer/releases
|
||||
|
||||
- name: Compute release SHA256 hash
|
||||
run: |
|
||||
echo "RELEASE_SHA256=$(sha256sum web/explorer/releases/${{ env.RELEASE_NAME }}.zip | awk '{print $1}')" >> $GITHUB_ENV
|
||||
|
||||
- name: Update CHANGELOG.md
|
||||
run: |
|
||||
echo "## ${{ env.RELEASE_NAME }}" >> web/explorer/releases/CHANGELOG.md
|
||||
echo "- Release Date: $(date -u '+%Y-%m-%d %H:%M:%S UTC')" >> web/explorer/releases/CHANGELOG.md
|
||||
echo "- SHA256: ${{ env.RELEASE_SHA256 }}" >> web/explorer/releases/CHANGELOG.md
|
||||
echo "" >> web/explorer/releases/CHANGELOG.md
|
||||
cat web/explorer/releases/CHANGELOG.md
|
||||
|
||||
- name: Remove older releases
|
||||
# keep only the latest 3 releases
|
||||
run: ls -t capa-explorer-web-v*.zip | tail -n +4 | xargs -r rm --
|
||||
working-directory: web/explorer/releases
|
||||
|
||||
- name: Stage release files
|
||||
run: |
|
||||
git config --local user.email "capa-dev@mandiant.com"
|
||||
git config --local user.name "Capa Bot"
|
||||
git add -f web/explorer/releases/${{ env.RELEASE_NAME }}.zip web/explorer/releases/CHANGELOG.md
|
||||
git add -u web/explorer/releases/
|
||||
|
||||
- name: Create Pull Request
|
||||
uses: peter-evans/create-pull-request@5e914681df9dc83aa4e4905692ca88beb2f9e91f # v7.0.5
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
title: "explorer web: add release v${{ github.event.inputs.version }}"
|
||||
body: |
|
||||
This PR adds a new capa Explorer Web release v${{ github.event.inputs.version }}.
|
||||
|
||||
Release details:
|
||||
- Name: ${{ env.RELEASE_NAME }}
|
||||
- SHA256: ${{ env.RELEASE_SHA256 }}
|
||||
|
||||
This release is generated by the [web release](https://github.com/mandiant/capa/actions/workflows/web-release.yml) workflow.
|
||||
|
||||
- [x] No CHANGELOG update needed
|
||||
- [x] No new tests needed
|
||||
- [x] No documentation update needed
|
||||
commit-message: ":robot: explorer web: add release ${{ env.RELEASE_NAME }}"
|
||||
branch: release/web-v${{ github.event.inputs.version }}
|
||||
add-paths: web/explorer/releases/${{ env.RELEASE_NAME }}.zip
|
||||
base: master
|
||||
labels: webui
|
||||
delete-branch: true
|
||||
committer: Capa Bot <capa-dev@mandiant.com>
|
||||
author: Capa Bot <capa-dev@mandiant.com>
|
||||
13
.github/workflows/web-tests.yml
vendored
13
.github/workflows/web-tests.yml
vendored
@@ -1,10 +1,11 @@
|
||||
name: Capa Explorer Web tests
|
||||
name: capa Explorer Web tests
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
paths:
|
||||
- 'web/explorer/**'
|
||||
workflow_call: # this allows the workflow to be called by other workflows
|
||||
|
||||
jobs:
|
||||
test:
|
||||
@@ -23,20 +24,20 @@ jobs:
|
||||
with:
|
||||
node-version: 20
|
||||
cache: 'npm'
|
||||
cache-dependency-path: './web/explorer/package-lock.json'
|
||||
cache-dependency-path: 'web/explorer/package-lock.json'
|
||||
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
working-directory: ./web/explorer
|
||||
working-directory: web/explorer
|
||||
|
||||
- name: Lint
|
||||
run: npm run lint
|
||||
working-directory: ./web/explorer
|
||||
working-directory: web/explorer
|
||||
|
||||
- name: Format
|
||||
run: npm run format:check
|
||||
working-directory: ./web/explorer
|
||||
working-directory: web/explorer
|
||||
|
||||
- name: Run unit tests
|
||||
run: npm run test
|
||||
working-directory: ./web/explorer
|
||||
working-directory: web/explorer
|
||||
|
||||
@@ -25,7 +25,7 @@ repos:
|
||||
hooks:
|
||||
- id: isort
|
||||
name: isort
|
||||
stages: [commit, push, manual]
|
||||
stages: [pre-commit, pre-push, manual]
|
||||
language: system
|
||||
entry: isort
|
||||
args:
|
||||
@@ -46,7 +46,7 @@ repos:
|
||||
hooks:
|
||||
- id: black
|
||||
name: black
|
||||
stages: [commit, push, manual]
|
||||
stages: [pre-commit, pre-push, manual]
|
||||
language: system
|
||||
entry: black
|
||||
args:
|
||||
@@ -64,7 +64,7 @@ repos:
|
||||
hooks:
|
||||
- id: ruff
|
||||
name: ruff
|
||||
stages: [commit, push, manual]
|
||||
stages: [pre-commit, pre-push, manual]
|
||||
language: system
|
||||
entry: ruff
|
||||
args:
|
||||
@@ -82,7 +82,7 @@ repos:
|
||||
hooks:
|
||||
- id: flake8
|
||||
name: flake8
|
||||
stages: [push, manual]
|
||||
stages: [pre-push, manual]
|
||||
language: system
|
||||
entry: flake8
|
||||
args:
|
||||
@@ -101,7 +101,7 @@ repos:
|
||||
hooks:
|
||||
- id: mypy
|
||||
name: mypy
|
||||
stages: [push, manual]
|
||||
stages: [pre-push, manual]
|
||||
language: system
|
||||
entry: mypy
|
||||
args:
|
||||
@@ -119,7 +119,7 @@ repos:
|
||||
hooks:
|
||||
- id: deptry
|
||||
name: deptry
|
||||
stages: [push, manual]
|
||||
stages: [pre-push, manual]
|
||||
language: system
|
||||
entry: deptry .
|
||||
always_run: true
|
||||
|
||||
128
CHANGELOG.md
128
CHANGELOG.md
@@ -12,9 +12,6 @@
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
- extractor: fix exception when PE extractor encounters unknown architecture #2440 @Tamir-K
|
||||
- IDA Pro: rename ida to idapro module for plugin and idalib in IDA 9.0 #2453 @mr-tz
|
||||
|
||||
### capa Explorer Web
|
||||
|
||||
### capa Explorer IDA Pro plugin
|
||||
@@ -22,8 +19,127 @@
|
||||
### Development
|
||||
|
||||
### Raw diffs
|
||||
- [capa v7.4.0...master](https://github.com/mandiant/capa/compare/v7.4.0...master)
|
||||
- [capa-rules v7.4.0...master](https://github.com/mandiant/capa-rules/compare/v7.4.0...master)
|
||||
- [capa v8.0.1...master](https://github.com/mandiant/capa/compare/v8.0.1...master)
|
||||
- [capa-rules v8.0.1...master](https://github.com/mandiant/capa-rules/compare/v8.0.1...master)
|
||||
|
||||
## v8.0.1
|
||||
|
||||
This point release fixes an issue with the IDAPython API to now handle IDA Pro 8.3, 8.4, and 9.0 correctly.
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
- handle IDA 8.3/8.4 vs. 9.0 API change @mr-tz
|
||||
|
||||
### Raw diffs
|
||||
- [capa v8.0.0...v8.0.1](https://github.com/mandiant/capa/compare/v8.0.0...v8.0.1)
|
||||
- [capa-rules v8.0.0...v8.0.1](https://github.com/mandiant/capa-rules/compare/v8.0.0...v8.0.1)
|
||||
|
||||
## v8.0.0
|
||||
|
||||
capa version 8 adds support for IDA Pro 9.0 (and idalib). The release comes with various improvements and bug fixes for the Binary Ninja backend (including to load with database files) -- thanks to @xusheng6.
|
||||
|
||||
Additional bug fixes improve the dynamic and BinExport backends.
|
||||
|
||||
capa version 8 now requires Python 3.10 or newer.
|
||||
|
||||
Special thanks to @Tamir-K, @harshit-wadhwani, @jorik-utwente for their great contributions.
|
||||
|
||||
### New Features
|
||||
|
||||
- allow call as valid subscope for call scoped rules @mr-tz
|
||||
- support loading and analyzing a Binary Ninja database #2496 @xusheng6
|
||||
- vmray: record process command line details @mr-tz
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
- remove support for Python 3.8 and use Python 3.10 as minimum now #1966 @mr-tz
|
||||
|
||||
### New Rules (54)
|
||||
|
||||
- nursery/get-shadow-password-file-entry-on-linux jonathanlepore@google.com
|
||||
- nursery/set-shadow-password-file-entry-on-linux jonathanlepore@google.com
|
||||
- collection/browser/get-chrome-cookiemonster still@teamt5.org
|
||||
- collection/browser/get-elevation-service-for-chromium-based-browsers still@teamt5.org
|
||||
- collection/get-steam-token still@teamt5.org
|
||||
- nursery/persist-via-application-shimming j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-bits-job j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-print-processors-registry-key j.j.vannielen@utwente.nl
|
||||
- linking/static/touchsocket/linked-against-touchsocket still@teamt5.org
|
||||
- runtime/dotnet/compiled-with-dotnet-aot still@teamt5.org
|
||||
- nursery/persist-via-errorhandler-script j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-get-variable-hijack j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-iphlpapi-dll-hijack j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-lnk-shortcut j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-powershell-profile j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-windows-accessibility-tools j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-windows-terminal-profile j.j.vannielen@utwente.nl
|
||||
- nursery/write-to-browser-extension-directory j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-aedebug-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-amsi-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-app-paths-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-appcertdlls-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-appx-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-autodialdll-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-autoplayhandlers-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-bootverificationprogram-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-code-signing-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-com-hijack j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-command-processor-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-contextmenuhandlers-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-cor_profiler_path-registry-value j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-default-file-association-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-disk-cleanup-handler-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-dotnet-dbgmanageddebugger-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-dotnet_startup_hooks-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-explorer-tools-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-filter-handlers-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-group-policy-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-hhctrl-com-hijack j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-htmlhelp-author-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-image-file-execution-options-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-lsa-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-natural-language-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-netsh-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-network-provider-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-path-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-print-monitors-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-rdp-startup-programs-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-silentprocessexit-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-telemetrycontroller-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-timeproviders-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-ts-initialprogram-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-userinitmprlogonscript-registry-value j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-windows-error-reporting-registry-key j.j.vannielen@utwente.nl
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
- extractor: fix exception when PE extractor encounters unknown architecture #2440 @Tamir-K
|
||||
- IDA Pro: rename ida to idapro module for plugin and idalib in IDA 9.0 #2453 @mr-tz
|
||||
- ghidra: fix saving of base address @mr-tz
|
||||
- binja: support loading raw x86/x86_64 shellcode #2489 @xusheng6
|
||||
- binja: fix crash when the IL of certain functions are not available. #2249 @xusheng6
|
||||
- binja: major performance improvement on the binja extractor. #1414 @xusheng6
|
||||
- cape: make Process model flexible and procmemory optional to load newest reports #2466 @mr-tz
|
||||
- binja: fix unit test failure by fixing up the analysis for file al-khaser_x64.exe_ #2507 @xusheng6
|
||||
- binja: move the stack string detection to function level #2516 @xusheng6
|
||||
- BinExport2: fix handling of incorrect thunk functions #2524 @williballenthin
|
||||
- BinExport2: more precise pruning of expressions @williballenthin
|
||||
- BinExport2: better handle weird expression trees from Ghidra #2528 #2530 @williballenthin
|
||||
|
||||
### capa Explorer Web
|
||||
|
||||
### capa Explorer IDA Pro plugin
|
||||
|
||||
- fix bug preventing saving of capa results via Save button @mr-tz
|
||||
- fix saving of base address @mr-tz
|
||||
|
||||
### Development
|
||||
- CI: use macos-13 since macos-12 is deprecated and will be removed on December 3rd, 2024 #2173 @mr-tz
|
||||
- CI: update Binary Ninja version to 4.2 #2499 @xusheng6
|
||||
|
||||
### Raw diffs
|
||||
- [capa v7.4.0...v8.0.0](https://github.com/mandiant/capa/compare/v7.4.0...v8.0.0)
|
||||
- [capa-rules v7.4.0...v8.0.0](https://github.com/mandiant/capa-rules/compare/v7.4.0...v8.0.0)
|
||||
|
||||
## v7.4.0
|
||||
|
||||
@@ -179,6 +295,8 @@ Special thanks to our repeat and new contributors:
|
||||
- CI: update Binary Ninja version to 4.1 and use Python 3.9 to test it #2211 @xusheng6
|
||||
- CI: update tests.yml workflow to exclude web and documentation files #2263 @s-ff
|
||||
- CI: update build.yml workflow to exclude web and documentation files #2270 @s-ff
|
||||
- CI: add web releases workflow #2455 @s-ff
|
||||
- CI: skip changelog.yml for dependabot PRs #2471
|
||||
|
||||
### Raw diffs
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
import logging
|
||||
import itertools
|
||||
import collections
|
||||
from typing import Any, Tuple
|
||||
from typing import Any
|
||||
|
||||
from capa.rules import Scope, RuleSet
|
||||
from capa.engine import FeatureSet, MatchResults
|
||||
@@ -64,7 +64,7 @@ def has_file_limitation(rules: RuleSet, capabilities: MatchResults, is_standalon
|
||||
|
||||
def find_capabilities(
|
||||
ruleset: RuleSet, extractor: FeatureExtractor, disable_progress=None, **kwargs
|
||||
) -> Tuple[MatchResults, Any]:
|
||||
) -> tuple[MatchResults, Any]:
|
||||
from capa.capabilities.static import find_static_capabilities
|
||||
from capa.capabilities.dynamic import find_dynamic_capabilities
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
import logging
|
||||
import itertools
|
||||
import collections
|
||||
from typing import Any, List, Tuple
|
||||
from typing import Any
|
||||
|
||||
import capa.perf
|
||||
import capa.features.freeze as frz
|
||||
@@ -24,7 +24,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
def find_call_capabilities(
|
||||
ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
||||
) -> Tuple[FeatureSet, MatchResults]:
|
||||
) -> tuple[FeatureSet, MatchResults]:
|
||||
"""
|
||||
find matches for the given rules for the given call.
|
||||
|
||||
@@ -51,7 +51,7 @@ def find_call_capabilities(
|
||||
|
||||
def find_thread_capabilities(
|
||||
ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle, th: ThreadHandle
|
||||
) -> Tuple[FeatureSet, MatchResults, MatchResults]:
|
||||
) -> tuple[FeatureSet, MatchResults, MatchResults]:
|
||||
"""
|
||||
find matches for the given rules within the given thread.
|
||||
|
||||
@@ -89,7 +89,7 @@ def find_thread_capabilities(
|
||||
|
||||
def find_process_capabilities(
|
||||
ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle
|
||||
) -> Tuple[MatchResults, MatchResults, MatchResults, int]:
|
||||
) -> tuple[MatchResults, MatchResults, MatchResults, int]:
|
||||
"""
|
||||
find matches for the given rules within the given process.
|
||||
|
||||
@@ -127,7 +127,7 @@ def find_process_capabilities(
|
||||
|
||||
def find_dynamic_capabilities(
|
||||
ruleset: RuleSet, extractor: DynamicFeatureExtractor, disable_progress=None
|
||||
) -> Tuple[MatchResults, Any]:
|
||||
) -> tuple[MatchResults, Any]:
|
||||
all_process_matches: MatchResults = collections.defaultdict(list)
|
||||
all_thread_matches: MatchResults = collections.defaultdict(list)
|
||||
all_call_matches: MatchResults = collections.defaultdict(list)
|
||||
@@ -135,7 +135,7 @@ def find_dynamic_capabilities(
|
||||
feature_counts = rdoc.DynamicFeatureCounts(file=0, processes=())
|
||||
|
||||
assert isinstance(extractor, DynamicFeatureExtractor)
|
||||
processes: List[ProcessHandle] = list(extractor.get_processes())
|
||||
processes: list[ProcessHandle] = list(extractor.get_processes())
|
||||
n_processes: int = len(processes)
|
||||
|
||||
with capa.helpers.CapaProgressBar(
|
||||
|
||||
@@ -10,7 +10,7 @@ import time
|
||||
import logging
|
||||
import itertools
|
||||
import collections
|
||||
from typing import Any, List, Tuple
|
||||
from typing import Any
|
||||
|
||||
import capa.perf
|
||||
import capa.helpers
|
||||
@@ -26,7 +26,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
def find_instruction_capabilities(
|
||||
ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
|
||||
) -> Tuple[FeatureSet, MatchResults]:
|
||||
) -> tuple[FeatureSet, MatchResults]:
|
||||
"""
|
||||
find matches for the given rules for the given instruction.
|
||||
|
||||
@@ -53,7 +53,7 @@ def find_instruction_capabilities(
|
||||
|
||||
def find_basic_block_capabilities(
|
||||
ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle
|
||||
) -> Tuple[FeatureSet, MatchResults, MatchResults]:
|
||||
) -> tuple[FeatureSet, MatchResults, MatchResults]:
|
||||
"""
|
||||
find matches for the given rules within the given basic block.
|
||||
|
||||
@@ -93,7 +93,7 @@ def find_basic_block_capabilities(
|
||||
|
||||
def find_code_capabilities(
|
||||
ruleset: RuleSet, extractor: StaticFeatureExtractor, fh: FunctionHandle
|
||||
) -> Tuple[MatchResults, MatchResults, MatchResults, int]:
|
||||
) -> tuple[MatchResults, MatchResults, MatchResults, int]:
|
||||
"""
|
||||
find matches for the given rules within the given function.
|
||||
|
||||
@@ -131,16 +131,16 @@ def find_code_capabilities(
|
||||
|
||||
def find_static_capabilities(
|
||||
ruleset: RuleSet, extractor: StaticFeatureExtractor, disable_progress=None
|
||||
) -> Tuple[MatchResults, Any]:
|
||||
) -> tuple[MatchResults, Any]:
|
||||
all_function_matches: MatchResults = collections.defaultdict(list)
|
||||
all_bb_matches: MatchResults = collections.defaultdict(list)
|
||||
all_insn_matches: MatchResults = collections.defaultdict(list)
|
||||
|
||||
feature_counts = rdoc.StaticFeatureCounts(file=0, functions=())
|
||||
library_functions: Tuple[rdoc.LibraryFunction, ...] = ()
|
||||
library_functions: tuple[rdoc.LibraryFunction, ...] = ()
|
||||
|
||||
assert isinstance(extractor, StaticFeatureExtractor)
|
||||
functions: List[FunctionHandle] = list(extractor.get_functions())
|
||||
functions: list[FunctionHandle] = list(extractor.get_functions())
|
||||
n_funcs: int = len(functions)
|
||||
n_libs: int = 0
|
||||
percentage: float = 0
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
import copy
|
||||
import collections
|
||||
from typing import TYPE_CHECKING, Set, Dict, List, Tuple, Union, Mapping, Iterable, Iterator
|
||||
from typing import TYPE_CHECKING, Union, Mapping, Iterable, Iterator
|
||||
|
||||
import capa.perf
|
||||
import capa.features.common
|
||||
@@ -27,7 +27,7 @@ if TYPE_CHECKING:
|
||||
# to collect the locations of a feature, do: `features[Number(0x10)]`
|
||||
#
|
||||
# aliased here so that the type can be documented and xref'd.
|
||||
FeatureSet = Dict[Feature, Set[Address]]
|
||||
FeatureSet = dict[Feature, set[Address]]
|
||||
|
||||
|
||||
class Statement:
|
||||
@@ -94,7 +94,7 @@ class And(Statement):
|
||||
match if all of the children evaluate to True.
|
||||
|
||||
the order of evaluation is dictated by the property
|
||||
`And.children` (type: List[Statement|Feature]).
|
||||
`And.children` (type: list[Statement|Feature]).
|
||||
a query optimizer may safely manipulate the order of these children.
|
||||
"""
|
||||
|
||||
@@ -127,7 +127,7 @@ class Or(Statement):
|
||||
match if any of the children evaluate to True.
|
||||
|
||||
the order of evaluation is dictated by the property
|
||||
`Or.children` (type: List[Statement|Feature]).
|
||||
`Or.children` (type: list[Statement|Feature]).
|
||||
a query optimizer may safely manipulate the order of these children.
|
||||
"""
|
||||
|
||||
@@ -176,7 +176,7 @@ class Some(Statement):
|
||||
match if at least N of the children evaluate to True.
|
||||
|
||||
the order of evaluation is dictated by the property
|
||||
`Some.children` (type: List[Statement|Feature]).
|
||||
`Some.children` (type: list[Statement|Feature]).
|
||||
a query optimizer may safely manipulate the order of these children.
|
||||
"""
|
||||
|
||||
@@ -267,7 +267,7 @@ class Subscope(Statement):
|
||||
# inspect(match_details)
|
||||
#
|
||||
# aliased here so that the type can be documented and xref'd.
|
||||
MatchResults = Mapping[str, List[Tuple[Address, Result]]]
|
||||
MatchResults = Mapping[str, list[tuple[Address, Result]]]
|
||||
|
||||
|
||||
def get_rule_namespaces(rule: "capa.rules.Rule") -> Iterator[str]:
|
||||
@@ -292,7 +292,7 @@ def index_rule_matches(features: FeatureSet, rule: "capa.rules.Rule", locations:
|
||||
features[capa.features.common.MatchedRule(namespace)].update(locations)
|
||||
|
||||
|
||||
def match(rules: List["capa.rules.Rule"], features: FeatureSet, addr: Address) -> Tuple[FeatureSet, MatchResults]:
|
||||
def match(rules: list["capa.rules.Rule"], features: FeatureSet, addr: Address) -> tuple[FeatureSet, MatchResults]:
|
||||
"""
|
||||
match the given rules against the given features,
|
||||
returning an updated set of features and the matches.
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from enum import Enum
|
||||
from typing import Dict, List
|
||||
|
||||
from capa.helpers import assert_never
|
||||
|
||||
@@ -22,7 +21,7 @@ COM_PREFIXES = {
|
||||
}
|
||||
|
||||
|
||||
def load_com_database(com_type: ComType) -> Dict[str, List[str]]:
|
||||
def load_com_database(com_type: ComType) -> dict[str, list[str]]:
|
||||
# lazy load these python files since they are so large.
|
||||
# that is, don't load them unless a COM feature is being handled.
|
||||
import capa.features.com.classes
|
||||
|
||||
@@ -5,9 +5,8 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import Dict, List
|
||||
|
||||
COM_CLASSES: Dict[str, List[str]] = {
|
||||
COM_CLASSES: dict[str, list[str]] = {
|
||||
"ClusAppWiz": ["24F97150-6689-11D1-9AA7-00C04FB93A80"],
|
||||
"ClusCfgAddNodesWizard": ["BB8D141E-C00A-469F-BC5C-ECD814F0BD74"],
|
||||
"ClusCfgCreateClusterWizard": ["B929818E-F5B0-44DC-8A00-1B5F5F5AA1F0"],
|
||||
|
||||
@@ -5,9 +5,8 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import Dict, List
|
||||
|
||||
COM_INTERFACES: Dict[str, List[str]] = {
|
||||
COM_INTERFACES: dict[str, list[str]] = {
|
||||
"IClusterApplicationWizard": ["24F97151-6689-11D1-9AA7-00C04FB93A80"],
|
||||
"IWEExtendWizard97": ["97DEDE68-FC6B-11CF-B5F5-00A0C90AB505"],
|
||||
"IWCWizard97Callback": ["97DEDE67-FC6B-11CF-B5F5-00A0C90AB505"],
|
||||
@@ -16334,7 +16333,7 @@ COM_INTERFACES: Dict[str, List[str]] = {
|
||||
"IRcsServiceDescription": ["416437de-e78b-44c9-990f-7ede1f2a0c91"],
|
||||
"IRcsServiceKindSupportedChangedEventArgs": ["f47ea244-e783-4866-b3a7-4e5ccf023070"],
|
||||
"IRcsServiceStatusChangedArgs": ["661ae45a-412a-460d-bdd4-dd8ea3c15583"],
|
||||
"IRcsServiceTuple": ["ce17a39b-2e8b-41af-b5a9-5cb072cc373c"],
|
||||
"IRcsServicetuple": ["ce17a39b-2e8b-41af-b5a9-5cb072cc373c"],
|
||||
"IRcsSubscriptionReceivedArgs": ["04eaf06d-42bc-46cc-a637-eeb3a8723fe4"],
|
||||
"IRcsTransport": ["fea34759-f37c-4319-8546-ec84d21d30ff"],
|
||||
"IRcsTransportConfiguration": ["1fccb102-2472-4bb9-9988-c1211c83e8a9"],
|
||||
|
||||
@@ -9,10 +9,9 @@
|
||||
import re
|
||||
import abc
|
||||
import codecs
|
||||
import typing
|
||||
import logging
|
||||
import collections
|
||||
from typing import TYPE_CHECKING, Set, Dict, List, Union, Optional
|
||||
from typing import TYPE_CHECKING, Union, Optional
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# circular import, otherwise
|
||||
@@ -79,8 +78,8 @@ class Result:
|
||||
self,
|
||||
success: bool,
|
||||
statement: Union["capa.engine.Statement", "Feature"],
|
||||
children: List["Result"],
|
||||
locations: Optional[Set[Address]] = None,
|
||||
children: list["Result"],
|
||||
locations: Optional[set[Address]] = None,
|
||||
):
|
||||
super().__init__()
|
||||
self.success = success
|
||||
@@ -213,7 +212,7 @@ class Substring(String):
|
||||
|
||||
# mapping from string value to list of locations.
|
||||
# will unique the locations later on.
|
||||
matches: typing.DefaultDict[str, Set[Address]] = collections.defaultdict(set)
|
||||
matches: collections.defaultdict[str, set[Address]] = collections.defaultdict(set)
|
||||
|
||||
assert isinstance(self.value, str)
|
||||
for feature, locations in features.items():
|
||||
@@ -261,7 +260,7 @@ class _MatchedSubstring(Substring):
|
||||
note: this type should only ever be constructed by `Substring.evaluate()`. it is not part of the public API.
|
||||
"""
|
||||
|
||||
def __init__(self, substring: Substring, matches: Dict[str, Set[Address]]):
|
||||
def __init__(self, substring: Substring, matches: dict[str, set[Address]]):
|
||||
"""
|
||||
args:
|
||||
substring: the substring feature that matches.
|
||||
@@ -305,7 +304,7 @@ class Regex(String):
|
||||
|
||||
# mapping from string value to list of locations.
|
||||
# will unique the locations later on.
|
||||
matches: typing.DefaultDict[str, Set[Address]] = collections.defaultdict(set)
|
||||
matches: collections.defaultdict[str, set[Address]] = collections.defaultdict(set)
|
||||
|
||||
for feature, locations in features.items():
|
||||
if not isinstance(feature, (String,)):
|
||||
@@ -353,7 +352,7 @@ class _MatchedRegex(Regex):
|
||||
note: this type should only ever be constructed by `Regex.evaluate()`. it is not part of the public API.
|
||||
"""
|
||||
|
||||
def __init__(self, regex: Regex, matches: Dict[str, Set[Address]]):
|
||||
def __init__(self, regex: Regex, matches: dict[str, set[Address]]):
|
||||
"""
|
||||
args:
|
||||
regex: the regex feature that matches.
|
||||
@@ -467,6 +466,7 @@ FORMAT_VMRAY = "vmray"
|
||||
FORMAT_BINEXPORT2 = "binexport2"
|
||||
FORMAT_FREEZE = "freeze"
|
||||
FORMAT_RESULT = "result"
|
||||
FORMAT_BINJA_DB = "binja_database"
|
||||
STATIC_FORMATS = {
|
||||
FORMAT_SC32,
|
||||
FORMAT_SC64,
|
||||
@@ -476,6 +476,7 @@ STATIC_FORMATS = {
|
||||
FORMAT_FREEZE,
|
||||
FORMAT_RESULT,
|
||||
FORMAT_BINEXPORT2,
|
||||
FORMAT_BINJA_DB,
|
||||
}
|
||||
DYNAMIC_FORMATS = {
|
||||
FORMAT_CAPE,
|
||||
|
||||
@@ -11,13 +11,9 @@ import hashlib
|
||||
import dataclasses
|
||||
from copy import copy
|
||||
from types import MethodType
|
||||
from typing import Any, Set, Dict, Tuple, Union, Iterator
|
||||
from typing import Any, Union, Iterator, TypeAlias
|
||||
from dataclasses import dataclass
|
||||
|
||||
# TODO(williballenthin): use typing.TypeAlias directly when Python 3.9 is deprecated
|
||||
# https://github.com/mandiant/capa/issues/1699
|
||||
from typing_extensions import TypeAlias
|
||||
|
||||
import capa.features.address
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import Address, ThreadAddress, ProcessAddress, DynamicCallAddress, AbsoluteVirtualAddress
|
||||
@@ -59,7 +55,7 @@ class FunctionHandle:
|
||||
|
||||
address: Address
|
||||
inner: Any
|
||||
ctx: Dict[str, Any] = dataclasses.field(default_factory=dict)
|
||||
ctx: dict[str, Any] = dataclasses.field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -135,7 +131,7 @@ class StaticFeatureExtractor:
|
||||
return self._sample_hashes
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract features found at every scope ("global").
|
||||
|
||||
@@ -146,12 +142,12 @@ class StaticFeatureExtractor:
|
||||
print('0x%x: %s', va, feature)
|
||||
|
||||
yields:
|
||||
Tuple[Feature, Address]: feature and its location
|
||||
tuple[Feature, Address]: feature and its location
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract file-scope features.
|
||||
|
||||
@@ -162,7 +158,7 @@ class StaticFeatureExtractor:
|
||||
print('0x%x: %s', va, feature)
|
||||
|
||||
yields:
|
||||
Tuple[Feature, Address]: feature and its location
|
||||
tuple[Feature, Address]: feature and its location
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@@ -211,7 +207,7 @@ class StaticFeatureExtractor:
|
||||
raise KeyError(addr)
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_function_features(self, f: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_function_features(self, f: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract function-scope features.
|
||||
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
||||
@@ -227,7 +223,7 @@ class StaticFeatureExtractor:
|
||||
f [FunctionHandle]: an opaque value previously fetched from `.get_functions()`.
|
||||
|
||||
yields:
|
||||
Tuple[Feature, Address]: feature and its location
|
||||
tuple[Feature, Address]: feature and its location
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@@ -240,7 +236,7 @@ class StaticFeatureExtractor:
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_basic_block_features(self, f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_basic_block_features(self, f: FunctionHandle, bb: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract basic block-scope features.
|
||||
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
||||
@@ -258,7 +254,7 @@ class StaticFeatureExtractor:
|
||||
bb [BBHandle]: an opaque value previously fetched from `.get_basic_blocks()`.
|
||||
|
||||
yields:
|
||||
Tuple[Feature, Address]: feature and its location
|
||||
tuple[Feature, Address]: feature and its location
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@@ -273,7 +269,7 @@ class StaticFeatureExtractor:
|
||||
@abc.abstractmethod
|
||||
def extract_insn_features(
|
||||
self, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract instruction-scope features.
|
||||
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
||||
@@ -293,12 +289,12 @@ class StaticFeatureExtractor:
|
||||
insn [InsnHandle]: an opaque value previously fetched from `.get_instructions()`.
|
||||
|
||||
yields:
|
||||
Tuple[Feature, Address]: feature and its location
|
||||
tuple[Feature, Address]: feature and its location
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
def FunctionFilter(extractor: StaticFeatureExtractor, functions: Set) -> StaticFeatureExtractor:
|
||||
def FunctionFilter(extractor: StaticFeatureExtractor, functions: set) -> StaticFeatureExtractor:
|
||||
original_get_functions = extractor.get_functions
|
||||
|
||||
def filtered_get_functions(self):
|
||||
@@ -387,7 +383,7 @@ class DynamicFeatureExtractor:
|
||||
return self._sample_hashes
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract features found at every scope ("global").
|
||||
|
||||
@@ -398,12 +394,12 @@ class DynamicFeatureExtractor:
|
||||
print(addr, feature)
|
||||
|
||||
yields:
|
||||
Tuple[Feature, Address]: feature and its location
|
||||
tuple[Feature, Address]: feature and its location
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract file-scope features.
|
||||
|
||||
@@ -414,7 +410,7 @@ class DynamicFeatureExtractor:
|
||||
print(addr, feature)
|
||||
|
||||
yields:
|
||||
Tuple[Feature, Address]: feature and its location
|
||||
tuple[Feature, Address]: feature and its location
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@@ -426,7 +422,7 @@ class DynamicFeatureExtractor:
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
Yields all the features of a process. These include:
|
||||
- file features of the process' image
|
||||
@@ -449,7 +445,7 @@ class DynamicFeatureExtractor:
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
Yields all the features of a thread. These include:
|
||||
- sequenced api traces
|
||||
@@ -466,7 +462,7 @@ class DynamicFeatureExtractor:
|
||||
@abc.abstractmethod
|
||||
def extract_call_features(
|
||||
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
Yields all features of a call. These include:
|
||||
- api name
|
||||
@@ -485,7 +481,7 @@ class DynamicFeatureExtractor:
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
def ProcessFilter(extractor: DynamicFeatureExtractor, processes: Set) -> DynamicFeatureExtractor:
|
||||
def ProcessFilter(extractor: DynamicFeatureExtractor, processes: set) -> DynamicFeatureExtractor:
|
||||
original_get_processes = extractor.get_processes
|
||||
|
||||
def filtered_get_processes(self):
|
||||
|
||||
@@ -17,7 +17,7 @@ import io
|
||||
import hashlib
|
||||
import logging
|
||||
import contextlib
|
||||
from typing import Set, Dict, List, Tuple, Iterator
|
||||
from typing import Iterator
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
@@ -51,13 +51,13 @@ def compute_common_prefix_length(m: str, n: str) -> int:
|
||||
return len(m)
|
||||
|
||||
|
||||
def get_sample_from_binexport2(input_file: Path, be2: BinExport2, search_paths: List[Path]) -> Path:
|
||||
def get_sample_from_binexport2(input_file: Path, be2: BinExport2, search_paths: list[Path]) -> Path:
|
||||
"""attempt to find the sample file, given a BinExport2 file.
|
||||
|
||||
searches in the same directory as the BinExport2 file, and then in search_paths.
|
||||
"""
|
||||
|
||||
def filename_similarity_key(p: Path) -> Tuple[int, str]:
|
||||
def filename_similarity_key(p: Path) -> tuple[int, str]:
|
||||
# note closure over input_file.
|
||||
# sort first by length of common prefix, then by name (for stability)
|
||||
return (compute_common_prefix_length(p.name, input_file.name), p.name)
|
||||
@@ -65,7 +65,7 @@ def get_sample_from_binexport2(input_file: Path, be2: BinExport2, search_paths:
|
||||
wanted_sha256: str = be2.meta_information.executable_id.lower()
|
||||
|
||||
input_directory: Path = input_file.parent
|
||||
siblings: List[Path] = [p for p in input_directory.iterdir() if p.is_file()]
|
||||
siblings: list[Path] = [p for p in input_directory.iterdir() if p.is_file()]
|
||||
siblings.sort(key=filename_similarity_key, reverse=True)
|
||||
for sibling in siblings:
|
||||
# e.g. with open IDA files in the same directory on Windows
|
||||
@@ -74,7 +74,7 @@ def get_sample_from_binexport2(input_file: Path, be2: BinExport2, search_paths:
|
||||
return sibling
|
||||
|
||||
for search_path in search_paths:
|
||||
candidates: List[Path] = [p for p in search_path.iterdir() if p.is_file()]
|
||||
candidates: list[Path] = [p for p in search_path.iterdir() if p.is_file()]
|
||||
candidates.sort(key=filename_similarity_key, reverse=True)
|
||||
for candidate in candidates:
|
||||
with contextlib.suppress(PermissionError):
|
||||
@@ -88,27 +88,27 @@ class BinExport2Index:
|
||||
def __init__(self, be2: BinExport2):
|
||||
self.be2: BinExport2 = be2
|
||||
|
||||
self.callers_by_vertex_index: Dict[int, List[int]] = defaultdict(list)
|
||||
self.callees_by_vertex_index: Dict[int, List[int]] = defaultdict(list)
|
||||
self.callers_by_vertex_index: dict[int, list[int]] = defaultdict(list)
|
||||
self.callees_by_vertex_index: dict[int, list[int]] = defaultdict(list)
|
||||
|
||||
# note: flow graph != call graph (vertex)
|
||||
self.flow_graph_index_by_address: Dict[int, int] = {}
|
||||
self.flow_graph_address_by_index: Dict[int, int] = {}
|
||||
self.flow_graph_index_by_address: dict[int, int] = {}
|
||||
self.flow_graph_address_by_index: dict[int, int] = {}
|
||||
|
||||
# edges that come from the given basic block
|
||||
self.source_edges_by_basic_block_index: Dict[int, List[BinExport2.FlowGraph.Edge]] = defaultdict(list)
|
||||
self.source_edges_by_basic_block_index: dict[int, list[BinExport2.FlowGraph.Edge]] = defaultdict(list)
|
||||
# edges that end up at the given basic block
|
||||
self.target_edges_by_basic_block_index: Dict[int, List[BinExport2.FlowGraph.Edge]] = defaultdict(list)
|
||||
self.target_edges_by_basic_block_index: dict[int, list[BinExport2.FlowGraph.Edge]] = defaultdict(list)
|
||||
|
||||
self.vertex_index_by_address: Dict[int, int] = {}
|
||||
self.vertex_index_by_address: dict[int, int] = {}
|
||||
|
||||
self.data_reference_index_by_source_instruction_index: Dict[int, List[int]] = defaultdict(list)
|
||||
self.data_reference_index_by_target_address: Dict[int, List[int]] = defaultdict(list)
|
||||
self.string_reference_index_by_source_instruction_index: Dict[int, List[int]] = defaultdict(list)
|
||||
self.data_reference_index_by_source_instruction_index: dict[int, list[int]] = defaultdict(list)
|
||||
self.data_reference_index_by_target_address: dict[int, list[int]] = defaultdict(list)
|
||||
self.string_reference_index_by_source_instruction_index: dict[int, list[int]] = defaultdict(list)
|
||||
|
||||
self.insn_address_by_index: Dict[int, int] = {}
|
||||
self.insn_index_by_address: Dict[int, int] = {}
|
||||
self.insn_by_address: Dict[int, BinExport2.Instruction] = {}
|
||||
self.insn_address_by_index: dict[int, int] = {}
|
||||
self.insn_index_by_address: dict[int, int] = {}
|
||||
self.insn_by_address: dict[int, BinExport2.Instruction] = {}
|
||||
|
||||
# must index instructions first
|
||||
self._index_insn_addresses()
|
||||
@@ -208,7 +208,7 @@ class BinExport2Index:
|
||||
|
||||
def basic_block_instructions(
|
||||
self, basic_block: BinExport2.BasicBlock
|
||||
) -> Iterator[Tuple[int, BinExport2.Instruction, int]]:
|
||||
) -> Iterator[tuple[int, BinExport2.Instruction, int]]:
|
||||
"""
|
||||
For a given basic block, enumerate the instruction indices,
|
||||
the instruction instances, and their addresses.
|
||||
@@ -253,7 +253,7 @@ class BinExport2Analysis:
|
||||
self.idx: BinExport2Index = idx
|
||||
self.buf: bytes = buf
|
||||
self.base_address: int = 0
|
||||
self.thunks: Dict[int, int] = {}
|
||||
self.thunks: dict[int, int] = {}
|
||||
|
||||
self._find_base_address()
|
||||
self._compute_thunks()
|
||||
@@ -279,12 +279,14 @@ class BinExport2Analysis:
|
||||
|
||||
curr_idx: int = idx
|
||||
for _ in range(capa.features.common.THUNK_CHAIN_DEPTH_DELTA):
|
||||
thunk_callees: List[int] = self.idx.callees_by_vertex_index[curr_idx]
|
||||
# if this doesn't hold, then it doesn't seem like this is a thunk,
|
||||
thunk_callees: list[int] = self.idx.callees_by_vertex_index[curr_idx]
|
||||
# If this doesn't hold, then it doesn't seem like this is a thunk,
|
||||
# because either, len is:
|
||||
# 0 and the thunk doesn't point to anything, or
|
||||
# 0 and the thunk doesn't point to anything or is indirect, like `call eax`, or
|
||||
# >1 and the thunk may end up at many functions.
|
||||
assert len(thunk_callees) == 1, f"thunk @ {hex(addr)} failed"
|
||||
# In any case, this doesn't appear to be the sort of thunk we're looking for.
|
||||
if len(thunk_callees) != 1:
|
||||
break
|
||||
|
||||
thunked_idx: int = thunk_callees[0]
|
||||
thunked_vertex: BinExport2.CallGraph.Vertex = self.be2.call_graph.vertex[thunked_idx]
|
||||
@@ -324,7 +326,7 @@ class AddressNotMappedError(ReadMemoryError): ...
|
||||
@dataclass
|
||||
class AddressSpace:
|
||||
base_address: int
|
||||
memory_regions: Tuple[MemoryRegion, ...]
|
||||
memory_regions: tuple[MemoryRegion, ...]
|
||||
|
||||
def read_memory(self, address: int, length: int) -> bytes:
|
||||
rva: int = address - self.base_address
|
||||
@@ -337,7 +339,7 @@ class AddressSpace:
|
||||
|
||||
@classmethod
|
||||
def from_pe(cls, pe: PE, base_address: int):
|
||||
regions: List[MemoryRegion] = []
|
||||
regions: list[MemoryRegion] = []
|
||||
for section in pe.sections:
|
||||
address: int = section.VirtualAddress
|
||||
size: int = section.Misc_VirtualSize
|
||||
@@ -355,7 +357,7 @@ class AddressSpace:
|
||||
|
||||
@classmethod
|
||||
def from_elf(cls, elf: ELFFile, base_address: int):
|
||||
regions: List[MemoryRegion] = []
|
||||
regions: list[MemoryRegion] = []
|
||||
|
||||
# ELF segments are for runtime data,
|
||||
# ELF sections are for link-time data.
|
||||
@@ -401,9 +403,9 @@ class AnalysisContext:
|
||||
class FunctionContext:
|
||||
ctx: AnalysisContext
|
||||
flow_graph_index: int
|
||||
format: Set[str]
|
||||
os: Set[str]
|
||||
arch: Set[str]
|
||||
format: set[str]
|
||||
os: set[str]
|
||||
arch: set[str]
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
from typing import List, Tuple, Iterator, Optional
|
||||
from typing import Iterator, Optional
|
||||
|
||||
import capa.features.extractors.binexport2.helpers
|
||||
from capa.features.insn import MAX_STRUCTURE_SIZE, Number, Offset, OperandNumber, OperandOffset
|
||||
@@ -30,7 +30,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
def extract_insn_number_features(
|
||||
fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
ii: InstructionContext = ih.inner
|
||||
|
||||
@@ -91,7 +91,7 @@ OFFSET_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
||||
|
||||
def extract_insn_offset_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
ii: InstructionContext = ih.inner
|
||||
|
||||
@@ -120,7 +120,7 @@ NZXOR_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
||||
|
||||
def extract_insn_nzxor_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
ii: InstructionContext = ih.inner
|
||||
be2: BinExport2 = fhi.ctx.be2
|
||||
@@ -131,7 +131,7 @@ def extract_insn_nzxor_characteristic_features(
|
||||
instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index]
|
||||
# guaranteed to be simple int/reg operands
|
||||
# so we don't have to realize the tree/list.
|
||||
operands: List[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index]
|
||||
operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index]
|
||||
|
||||
if operands[1] != operands[2]:
|
||||
yield Characteristic("nzxor"), ih.address
|
||||
@@ -146,7 +146,7 @@ INDIRECT_CALL_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
||||
|
||||
def extract_function_indirect_call_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
ii: InstructionContext = ih.inner
|
||||
be2: BinExport2 = fhi.ctx.be2
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import List, Optional
|
||||
from typing import Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
from capa.features.extractors.binexport2.helpers import get_operand_expressions
|
||||
@@ -32,7 +32,7 @@ def get_operand_phrase_info(be2: BinExport2, operand: BinExport2.Operand) -> Opt
|
||||
# Base: Any general purpose register
|
||||
# Displacement: An integral offset
|
||||
|
||||
expressions: List[BinExport2.Expression] = get_operand_expressions(be2, operand)
|
||||
expressions: list[BinExport2.Expression] = get_operand_expressions(be2, operand)
|
||||
|
||||
# skip expression up to and including BinExport2.Expression.DEREFERENCE, assume caller
|
||||
# has checked for BinExport2.Expression.DEREFERENCE
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
from typing import List, Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import capa.features.extractors.strings
|
||||
import capa.features.extractors.binexport2.helpers
|
||||
@@ -63,7 +63,7 @@ NUMBER_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
||||
|
||||
def extract_insn_number_features(
|
||||
fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
ii: InstructionContext = ih.inner
|
||||
|
||||
@@ -123,7 +123,7 @@ OFFSET_ZERO_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
||||
|
||||
def extract_insn_offset_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
ii: InstructionContext = ih.inner
|
||||
|
||||
@@ -161,7 +161,7 @@ def is_security_cookie(
|
||||
|
||||
# security cookie check should use SP or BP
|
||||
op1: BinExport2.Operand = be2.operand[instruction.operand_index[1]]
|
||||
op1_exprs: List[BinExport2.Expression] = [be2.expression[expr_i] for expr_i in op1.expression_index]
|
||||
op1_exprs: list[BinExport2.Expression] = [be2.expression[expr_i] for expr_i in op1.expression_index]
|
||||
if all(expr.symbol.lower() not in ("bp", "esp", "ebp", "rbp", "rsp") for expr in op1_exprs):
|
||||
return False
|
||||
|
||||
@@ -192,7 +192,7 @@ NZXOR_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
||||
|
||||
def extract_insn_nzxor_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse non-zeroing XOR instruction from the given instruction.
|
||||
ignore expected non-zeroing XORs, e.g. security cookies.
|
||||
@@ -209,7 +209,7 @@ def extract_insn_nzxor_characteristic_features(
|
||||
instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index]
|
||||
# guaranteed to be simple int/reg operands
|
||||
# so we don't have to realize the tree/list.
|
||||
operands: List[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index]
|
||||
operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index]
|
||||
|
||||
if operands[0] == operands[1]:
|
||||
return
|
||||
@@ -236,7 +236,7 @@ INDIRECT_CALL_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
||||
|
||||
def extract_function_indirect_call_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
ii: InstructionContext = ih.inner
|
||||
be2: BinExport2 = fhi.ctx.be2
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
from typing import List, Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
from capa.features.common import Feature, Characteristic
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
@@ -16,20 +16,20 @@ from capa.features.extractors.base_extractor import BBHandle, FunctionHandle
|
||||
from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
|
||||
|
||||
|
||||
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
bbi: BasicBlockContext = bbh.inner
|
||||
|
||||
idx = fhi.ctx.idx
|
||||
|
||||
basic_block_index: int = bbi.basic_block_index
|
||||
target_edges: List[BinExport2.FlowGraph.Edge] = idx.target_edges_by_basic_block_index[basic_block_index]
|
||||
target_edges: list[BinExport2.FlowGraph.Edge] = idx.target_edges_by_basic_block_index[basic_block_index]
|
||||
if basic_block_index in (e.source_basic_block_index for e in target_edges):
|
||||
basic_block_address: int = idx.get_basic_block_address(basic_block_index)
|
||||
yield Characteristic("tight loop"), AbsoluteVirtualAddress(basic_block_address)
|
||||
|
||||
|
||||
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract basic block features"""
|
||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||
for feature, addr in bb_handler(fh, bbh):
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
from typing import Set, List, Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import capa.features.extractors.elf
|
||||
import capa.features.extractors.common
|
||||
@@ -48,14 +48,14 @@ class BinExport2FeatureExtractor(StaticFeatureExtractor):
|
||||
address_space: AddressSpace = AddressSpace.from_buf(buf, self.analysis.base_address)
|
||||
self.ctx: AnalysisContext = AnalysisContext(self.buf, self.be2, self.idx, self.analysis, address_space)
|
||||
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
self.global_features: list[tuple[Feature, Address]] = []
|
||||
self.global_features.extend(list(capa.features.extractors.common.extract_format(self.buf)))
|
||||
self.global_features.extend(list(capa.features.extractors.common.extract_os(self.buf)))
|
||||
self.global_features.extend(list(capa.features.extractors.common.extract_arch(self.buf)))
|
||||
|
||||
self.format: Set[str] = set()
|
||||
self.os: Set[str] = set()
|
||||
self.arch: Set[str] = set()
|
||||
self.format: set[str] = set()
|
||||
self.os: set[str] = set()
|
||||
self.arch: set[str] = set()
|
||||
|
||||
for feature, _ in self.global_features:
|
||||
assert isinstance(feature.value, str)
|
||||
@@ -72,10 +72,10 @@ class BinExport2FeatureExtractor(StaticFeatureExtractor):
|
||||
def get_base_address(self) -> AbsoluteVirtualAddress:
|
||||
return AbsoluteVirtualAddress(self.analysis.base_address)
|
||||
|
||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from self.global_features
|
||||
|
||||
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.binexport2.file.extract_features(self.be2, self.buf)
|
||||
|
||||
def get_functions(self) -> Iterator[FunctionHandle]:
|
||||
@@ -97,7 +97,7 @@ class BinExport2FeatureExtractor(StaticFeatureExtractor):
|
||||
inner=FunctionContext(self.ctx, flow_graph_index, self.format, self.os, self.arch),
|
||||
)
|
||||
|
||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.binexport2.function.extract_features(fh)
|
||||
|
||||
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
||||
@@ -112,7 +112,7 @@ class BinExport2FeatureExtractor(StaticFeatureExtractor):
|
||||
inner=BasicBlockContext(basic_block_index),
|
||||
)
|
||||
|
||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.binexport2.basicblock.extract_features(fh, bbh)
|
||||
|
||||
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
||||
@@ -126,5 +126,5 @@ class BinExport2FeatureExtractor(StaticFeatureExtractor):
|
||||
|
||||
def extract_insn_features(
|
||||
self, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.binexport2.insn.extract_features(fh, bbh, ih)
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import io
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import pefile
|
||||
from elftools.elf.elffile import ELFFile
|
||||
@@ -23,7 +23,7 @@ from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_file_export_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_export_names(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
||||
if buf.startswith(capa.features.extractors.common.MATCH_PE):
|
||||
pe: pefile.PE = pefile.PE(data=buf)
|
||||
yield from capa.features.extractors.pefile.extract_file_export_names(pe)
|
||||
@@ -34,7 +34,7 @@ def extract_file_export_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Fe
|
||||
logger.warning("unsupported format")
|
||||
|
||||
|
||||
def extract_file_import_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_import_names(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
||||
if buf.startswith(capa.features.extractors.common.MATCH_PE):
|
||||
pe: pefile.PE = pefile.PE(data=buf)
|
||||
yield from capa.features.extractors.pefile.extract_file_import_names(pe)
|
||||
@@ -45,7 +45,7 @@ def extract_file_import_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Fe
|
||||
logger.warning("unsupported format")
|
||||
|
||||
|
||||
def extract_file_section_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_section_names(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
||||
if buf.startswith(capa.features.extractors.common.MATCH_PE):
|
||||
pe: pefile.PE = pefile.PE(data=buf)
|
||||
yield from capa.features.extractors.pefile.extract_file_section_names(pe)
|
||||
@@ -56,15 +56,15 @@ def extract_file_section_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[F
|
||||
logger.warning("unsupported format")
|
||||
|
||||
|
||||
def extract_file_strings(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_strings(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.common.extract_file_strings(buf)
|
||||
|
||||
|
||||
def extract_file_format(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_format(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.common.extract_format(buf)
|
||||
|
||||
|
||||
def extract_features(be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract file features"""
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, addr in file_handler(be2, buf):
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import List, Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
from capa.features.file import FunctionName
|
||||
from capa.features.common import Feature, Characteristic
|
||||
@@ -16,7 +16,7 @@ from capa.features.extractors.base_extractor import FunctionHandle
|
||||
from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
|
||||
|
||||
|
||||
def extract_function_calls_to(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_function_calls_to(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
|
||||
be2: BinExport2 = fhi.ctx.be2
|
||||
@@ -32,7 +32,7 @@ def extract_function_calls_to(fh: FunctionHandle) -> Iterator[Tuple[Feature, Add
|
||||
yield Characteristic("calls to"), AbsoluteVirtualAddress(caller_address)
|
||||
|
||||
|
||||
def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_function_loop(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
|
||||
be2: BinExport2 = fhi.ctx.be2
|
||||
@@ -40,7 +40,7 @@ def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address
|
||||
flow_graph_index: int = fhi.flow_graph_index
|
||||
flow_graph: BinExport2.FlowGraph = be2.flow_graph[flow_graph_index]
|
||||
|
||||
edges: List[Tuple[int, int]] = []
|
||||
edges: list[tuple[int, int]] = []
|
||||
for edge in flow_graph.edge:
|
||||
edges.append((edge.source_basic_block_index, edge.target_basic_block_index))
|
||||
|
||||
@@ -48,7 +48,7 @@ def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address
|
||||
yield Characteristic("loop"), fh.address
|
||||
|
||||
|
||||
def extract_function_name(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_function_name(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
|
||||
be2: BinExport2 = fhi.ctx.be2
|
||||
@@ -63,7 +63,7 @@ def extract_function_name(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address
|
||||
yield FunctionName(vertex.mangled_name), fh.address
|
||||
|
||||
|
||||
def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
for func_handler in FUNCTION_HANDLERS:
|
||||
for feature, addr in func_handler(fh):
|
||||
yield feature, addr
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import re
|
||||
from typing import Set, Dict, List, Tuple, Union, Iterator, Optional
|
||||
from typing import Union, Iterator, Optional
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
|
||||
@@ -22,7 +22,7 @@ HAS_ARCH_INTEL = {ARCH_I386, ARCH_AMD64}
|
||||
HAS_ARCH_ARM = {ARCH_AARCH64}
|
||||
|
||||
|
||||
def mask_immediate(arch: Set[str], immediate: int) -> int:
|
||||
def mask_immediate(arch: set[str], immediate: int) -> int:
|
||||
if arch & HAS_ARCH64:
|
||||
immediate &= 0xFFFFFFFFFFFFFFFF
|
||||
elif arch & HAS_ARCH32:
|
||||
@@ -30,7 +30,7 @@ def mask_immediate(arch: Set[str], immediate: int) -> int:
|
||||
return immediate
|
||||
|
||||
|
||||
def twos_complement(arch: Set[str], immediate: int, default: Optional[int] = None) -> int:
|
||||
def twos_complement(arch: set[str], immediate: int, default: Optional[int] = None) -> int:
|
||||
if default is not None:
|
||||
return capa.features.extractors.helpers.twos_complement(immediate, default)
|
||||
elif arch & HAS_ARCH64:
|
||||
@@ -50,17 +50,36 @@ def is_vertex_type(vertex: BinExport2.CallGraph.Vertex, type_: BinExport2.CallGr
|
||||
return vertex.HasField("type") and vertex.type == type_
|
||||
|
||||
|
||||
# internal to `build_expression_tree`
|
||||
# this is unstable: it is subject to change, so don't rely on it!
|
||||
def _prune_expression_tree_references_to_tree_index(
|
||||
expression_tree: list[list[int]],
|
||||
tree_index: int,
|
||||
):
|
||||
# `i` is the index of the tree node that we'll search for `tree_index`
|
||||
# if we remove `tree_index` from it, and it is now empty,
|
||||
# then we'll need to prune references to `i`.
|
||||
for i, tree_node in enumerate(expression_tree):
|
||||
if tree_index in tree_node:
|
||||
tree_node.remove(tree_index)
|
||||
|
||||
if len(tree_node) == 0:
|
||||
# if the parent node is now empty,
|
||||
# remove references to that parent node.
|
||||
_prune_expression_tree_references_to_tree_index(expression_tree, i)
|
||||
|
||||
|
||||
# internal to `build_expression_tree`
|
||||
# this is unstable: it is subject to change, so don't rely on it!
|
||||
def _prune_expression_tree_empty_shifts(
|
||||
be2: BinExport2,
|
||||
operand: BinExport2.Operand,
|
||||
expression_tree: List[List[int]],
|
||||
expression_tree: list[list[int]],
|
||||
tree_index: int,
|
||||
):
|
||||
expression_index = operand.expression_index[tree_index]
|
||||
expression = be2.expression[expression_index]
|
||||
children_tree_indexes: List[int] = expression_tree[tree_index]
|
||||
children_tree_indexes: list[int] = expression_tree[tree_index]
|
||||
|
||||
if expression.type == BinExport2.Expression.OPERATOR:
|
||||
if len(children_tree_indexes) == 0 and expression.symbol in ("lsl", "lsr"):
|
||||
@@ -70,9 +89,7 @@ def _prune_expression_tree_empty_shifts(
|
||||
#
|
||||
# Which seems to be as if the shift wasn't there (shift of #0)
|
||||
# so we want to remove references to this node from any parent nodes.
|
||||
for tree_node in expression_tree:
|
||||
if tree_index in tree_node:
|
||||
tree_node.remove(tree_index)
|
||||
_prune_expression_tree_references_to_tree_index(expression_tree, tree_index)
|
||||
|
||||
return
|
||||
|
||||
@@ -82,38 +99,37 @@ def _prune_expression_tree_empty_shifts(
|
||||
|
||||
# internal to `build_expression_tree`
|
||||
# this is unstable: it is subject to change, so don't rely on it!
|
||||
def _prune_expression_tree_empty_commas(
|
||||
def _fixup_expression_tree_references_to_tree_index(
|
||||
expression_tree: list[list[int]],
|
||||
existing_index: int,
|
||||
new_index: int,
|
||||
):
|
||||
for tree_node in expression_tree:
|
||||
for i, index in enumerate(tree_node):
|
||||
if index == existing_index:
|
||||
tree_node[i] = new_index
|
||||
|
||||
|
||||
# internal to `build_expression_tree`
|
||||
# this is unstable: it is subject to change, so don't rely on it!
|
||||
def _fixup_expression_tree_lonely_commas(
|
||||
be2: BinExport2,
|
||||
operand: BinExport2.Operand,
|
||||
expression_tree: List[List[int]],
|
||||
expression_tree: list[list[int]],
|
||||
tree_index: int,
|
||||
):
|
||||
expression_index = operand.expression_index[tree_index]
|
||||
expression = be2.expression[expression_index]
|
||||
children_tree_indexes: List[int] = expression_tree[tree_index]
|
||||
children_tree_indexes: list[int] = expression_tree[tree_index]
|
||||
|
||||
if expression.type == BinExport2.Expression.OPERATOR:
|
||||
if len(children_tree_indexes) == 1 and expression.symbol == ",":
|
||||
# Due to the above pruning of empty LSL or LSR expressions,
|
||||
# the parents might need to be fixed up.
|
||||
#
|
||||
# Specifically, if the pruned node was part of a comma list with two children,
|
||||
# now there's only a single child, which renders as an extra comma,
|
||||
# so we replace references to the comma node with the immediate child.
|
||||
#
|
||||
# A more correct way of doing this might be to walk up the parents and do fixups,
|
||||
# but I'm not quite sure how to do this yet. Just do two passes right now.
|
||||
child = children_tree_indexes[0]
|
||||
|
||||
for tree_node in expression_tree:
|
||||
tree_node.index
|
||||
if tree_index in tree_node:
|
||||
tree_node[tree_node.index(tree_index)] = child
|
||||
|
||||
return
|
||||
existing_index = tree_index
|
||||
new_index = children_tree_indexes[0]
|
||||
_fixup_expression_tree_references_to_tree_index(expression_tree, existing_index, new_index)
|
||||
|
||||
for child_tree_index in children_tree_indexes:
|
||||
_prune_expression_tree_empty_commas(be2, operand, expression_tree, child_tree_index)
|
||||
_fixup_expression_tree_lonely_commas(be2, operand, expression_tree, child_tree_index)
|
||||
|
||||
|
||||
# internal to `build_expression_tree`
|
||||
@@ -121,17 +137,17 @@ def _prune_expression_tree_empty_commas(
|
||||
def _prune_expression_tree(
|
||||
be2: BinExport2,
|
||||
operand: BinExport2.Operand,
|
||||
expression_tree: List[List[int]],
|
||||
expression_tree: list[list[int]],
|
||||
):
|
||||
_prune_expression_tree_empty_shifts(be2, operand, expression_tree, 0)
|
||||
_prune_expression_tree_empty_commas(be2, operand, expression_tree, 0)
|
||||
_fixup_expression_tree_lonely_commas(be2, operand, expression_tree, 0)
|
||||
|
||||
|
||||
# this is unstable: it is subject to change, so don't rely on it!
|
||||
def _build_expression_tree(
|
||||
be2: BinExport2,
|
||||
operand: BinExport2.Operand,
|
||||
) -> List[List[int]]:
|
||||
) -> list[list[int]]:
|
||||
# The reconstructed expression tree layout, linking parent nodes to their children.
|
||||
#
|
||||
# There is one list of integers for each expression in the operand.
|
||||
@@ -159,7 +175,7 @@ def _build_expression_tree(
|
||||
# exist (see https://github.com/NationalSecurityAgency/ghidra/issues/6817)
|
||||
return []
|
||||
|
||||
tree: List[List[int]] = []
|
||||
tree: list[list[int]] = []
|
||||
for i, expression_index in enumerate(operand.expression_index):
|
||||
children = []
|
||||
|
||||
@@ -173,7 +189,6 @@ def _build_expression_tree(
|
||||
tree.append(children)
|
||||
|
||||
_prune_expression_tree(be2, operand, tree)
|
||||
_prune_expression_tree(be2, operand, tree)
|
||||
|
||||
return tree
|
||||
|
||||
@@ -181,21 +196,34 @@ def _build_expression_tree(
|
||||
def _fill_operand_expression_list(
|
||||
be2: BinExport2,
|
||||
operand: BinExport2.Operand,
|
||||
expression_tree: List[List[int]],
|
||||
expression_tree: list[list[int]],
|
||||
tree_index: int,
|
||||
expression_list: List[BinExport2.Expression],
|
||||
expression_list: list[BinExport2.Expression],
|
||||
):
|
||||
"""
|
||||
Walk the given expression tree and collect the expression nodes in-order.
|
||||
"""
|
||||
expression_index = operand.expression_index[tree_index]
|
||||
expression = be2.expression[expression_index]
|
||||
children_tree_indexes: List[int] = expression_tree[tree_index]
|
||||
children_tree_indexes: list[int] = expression_tree[tree_index]
|
||||
|
||||
if expression.type == BinExport2.Expression.REGISTER:
|
||||
assert len(children_tree_indexes) == 0
|
||||
assert len(children_tree_indexes) <= 1
|
||||
expression_list.append(expression)
|
||||
return
|
||||
|
||||
if len(children_tree_indexes) == 0:
|
||||
return
|
||||
elif len(children_tree_indexes) == 1:
|
||||
# like for aarch64 with vector instructions, indicating vector data size:
|
||||
#
|
||||
# FADD V0.4S, V1.4S, V2.4S
|
||||
#
|
||||
# see: https://github.com/mandiant/capa/issues/2528
|
||||
child_index = children_tree_indexes[0]
|
||||
_fill_operand_expression_list(be2, operand, expression_tree, child_index, expression_list)
|
||||
return
|
||||
else:
|
||||
raise NotImplementedError(len(children_tree_indexes))
|
||||
|
||||
elif expression.type == BinExport2.Expression.SYMBOL:
|
||||
assert len(children_tree_indexes) <= 1
|
||||
@@ -218,9 +246,23 @@ def _fill_operand_expression_list(
|
||||
raise NotImplementedError(len(children_tree_indexes))
|
||||
|
||||
elif expression.type == BinExport2.Expression.IMMEDIATE_INT:
|
||||
assert len(children_tree_indexes) == 0
|
||||
assert len(children_tree_indexes) <= 1
|
||||
expression_list.append(expression)
|
||||
return
|
||||
|
||||
if len(children_tree_indexes) == 0:
|
||||
return
|
||||
elif len(children_tree_indexes) == 1:
|
||||
# the ghidra exporter can produce some weird expressions,
|
||||
# particularly for MSRs, like for:
|
||||
#
|
||||
# sreg(3, 0, c.0, c.4, 4)
|
||||
#
|
||||
# see: https://github.com/mandiant/capa/issues/2530
|
||||
child_index = children_tree_indexes[0]
|
||||
_fill_operand_expression_list(be2, operand, expression_tree, child_index, expression_list)
|
||||
return
|
||||
else:
|
||||
raise NotImplementedError(len(children_tree_indexes))
|
||||
|
||||
elif expression.type == BinExport2.Expression.SIZE_PREFIX:
|
||||
# like: b4
|
||||
@@ -282,10 +324,10 @@ def _fill_operand_expression_list(
|
||||
raise NotImplementedError(expression.type)
|
||||
|
||||
|
||||
def get_operand_expressions(be2: BinExport2, op: BinExport2.Operand) -> List[BinExport2.Expression]:
|
||||
def get_operand_expressions(be2: BinExport2, op: BinExport2.Operand) -> list[BinExport2.Expression]:
|
||||
tree = _build_expression_tree(be2, op)
|
||||
|
||||
expressions: List[BinExport2.Expression] = []
|
||||
expressions: list[BinExport2.Expression] = []
|
||||
_fill_operand_expression_list(be2, op, tree, 0, expressions)
|
||||
|
||||
return expressions
|
||||
@@ -331,11 +373,11 @@ def get_instruction_mnemonic(be2: BinExport2, instruction: BinExport2.Instructio
|
||||
return be2.mnemonic[instruction.mnemonic_index].name.lower()
|
||||
|
||||
|
||||
def get_instruction_operands(be2: BinExport2, instruction: BinExport2.Instruction) -> List[BinExport2.Operand]:
|
||||
def get_instruction_operands(be2: BinExport2, instruction: BinExport2.Instruction) -> list[BinExport2.Operand]:
|
||||
return [be2.operand[operand_index] for operand_index in instruction.operand_index]
|
||||
|
||||
|
||||
def split_with_delimiters(s: str, delimiters: Tuple[str, ...]) -> Iterator[str]:
|
||||
def split_with_delimiters(s: str, delimiters: tuple[str, ...]) -> Iterator[str]:
|
||||
"""
|
||||
Splits a string by any of the provided delimiter characters,
|
||||
including the delimiters in the results.
|
||||
@@ -355,7 +397,7 @@ def split_with_delimiters(s: str, delimiters: Tuple[str, ...]) -> Iterator[str]:
|
||||
yield s[start:]
|
||||
|
||||
|
||||
BinExport2OperandPattern = Union[str, Tuple[str, ...]]
|
||||
BinExport2OperandPattern = Union[str, tuple[str, ...]]
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -382,8 +424,8 @@ class BinExport2InstructionPattern:
|
||||
This matcher uses the BinExport2 data layout under the hood.
|
||||
"""
|
||||
|
||||
mnemonics: Tuple[str, ...]
|
||||
operands: Tuple[Union[str, BinExport2OperandPattern], ...]
|
||||
mnemonics: tuple[str, ...]
|
||||
operands: tuple[Union[str, BinExport2OperandPattern], ...]
|
||||
capture: Optional[str]
|
||||
|
||||
@classmethod
|
||||
@@ -438,7 +480,7 @@ class BinExport2InstructionPattern:
|
||||
mnemonic, _, rest = pattern.partition(" ")
|
||||
mnemonics = mnemonic.split("|")
|
||||
|
||||
operands: List[Union[str, Tuple[str, ...]]] = []
|
||||
operands: list[Union[str, tuple[str, ...]]] = []
|
||||
while rest:
|
||||
rest = rest.strip()
|
||||
if not rest.startswith("["):
|
||||
@@ -509,7 +551,7 @@ class BinExport2InstructionPattern:
|
||||
expression: BinExport2.Expression
|
||||
|
||||
def match(
|
||||
self, mnemonic: str, operand_expressions: List[List[BinExport2.Expression]]
|
||||
self, mnemonic: str, operand_expressions: list[list[BinExport2.Expression]]
|
||||
) -> Optional["BinExport2InstructionPattern.MatchResult"]:
|
||||
"""
|
||||
Match the given BinExport2 data against this pattern.
|
||||
@@ -602,10 +644,10 @@ class BinExport2InstructionPattern:
|
||||
class BinExport2InstructionPatternMatcher:
|
||||
"""Index and match a collection of instruction patterns."""
|
||||
|
||||
def __init__(self, queries: List[BinExport2InstructionPattern]):
|
||||
def __init__(self, queries: list[BinExport2InstructionPattern]):
|
||||
self.queries = queries
|
||||
# shard the patterns by (mnemonic, #operands)
|
||||
self._index: Dict[Tuple[str, int], List[BinExport2InstructionPattern]] = defaultdict(list)
|
||||
self._index: dict[tuple[str, int], list[BinExport2InstructionPattern]] = defaultdict(list)
|
||||
|
||||
for query in queries:
|
||||
for mnemonic in query.mnemonics:
|
||||
@@ -623,7 +665,7 @@ class BinExport2InstructionPatternMatcher:
|
||||
)
|
||||
|
||||
def match(
|
||||
self, mnemonic: str, operand_expressions: List[List[BinExport2.Expression]]
|
||||
self, mnemonic: str, operand_expressions: list[list[BinExport2.Expression]]
|
||||
) -> Optional[BinExport2InstructionPattern.MatchResult]:
|
||||
queries = self._index.get((mnemonic.lower(), len(operand_expressions)), [])
|
||||
for query in queries:
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
from typing import List, Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import capa.features.extractors.helpers
|
||||
import capa.features.extractors.strings
|
||||
@@ -32,7 +32,7 @@ from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_insn_api_features(fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_api_features(fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
ii: InstructionContext = ih.inner
|
||||
|
||||
@@ -68,7 +68,7 @@ def extract_insn_api_features(fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
|
||||
|
||||
def extract_insn_number_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
|
||||
if fhi.arch & HAS_ARCH_INTEL:
|
||||
@@ -77,7 +77,7 @@ def extract_insn_number_features(
|
||||
yield from capa.features.extractors.binexport2.arch.arm.insn.extract_insn_number_features(fh, bbh, ih)
|
||||
|
||||
|
||||
def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
ii: InstructionContext = ih.inner
|
||||
|
||||
@@ -92,7 +92,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
||||
# disassembler already identified string reference from instruction
|
||||
return
|
||||
|
||||
reference_addresses: List[int] = []
|
||||
reference_addresses: list[int] = []
|
||||
|
||||
if instruction_index in idx.data_reference_index_by_source_instruction_index:
|
||||
for data_reference_index in idx.data_reference_index_by_source_instruction_index[instruction_index]:
|
||||
@@ -142,7 +142,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
||||
|
||||
def extract_insn_string_features(
|
||||
fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
ii: InstructionContext = ih.inner
|
||||
|
||||
@@ -161,7 +161,7 @@ def extract_insn_string_features(
|
||||
|
||||
def extract_insn_offset_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
|
||||
if fhi.arch & HAS_ARCH_INTEL:
|
||||
@@ -172,7 +172,7 @@ def extract_insn_offset_features(
|
||||
|
||||
def extract_insn_nzxor_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
|
||||
if fhi.arch & HAS_ARCH_INTEL:
|
||||
@@ -187,7 +187,7 @@ def extract_insn_nzxor_characteristic_features(
|
||||
|
||||
def extract_insn_mnemonic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
ii: InstructionContext = ih.inner
|
||||
|
||||
@@ -199,7 +199,7 @@ def extract_insn_mnemonic_features(
|
||||
yield Mnemonic(mnemonic_name), ih.address
|
||||
|
||||
|
||||
def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract functions calls from features
|
||||
|
||||
most relevant at the function scope;
|
||||
@@ -221,7 +221,7 @@ def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
||||
|
||||
def extract_function_indirect_call_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
|
||||
if fhi.arch & HAS_ARCH_INTEL:
|
||||
@@ -234,7 +234,7 @@ def extract_function_indirect_call_characteristic_features(
|
||||
)
|
||||
|
||||
|
||||
def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract instruction features"""
|
||||
for inst_handler in INSTRUCTION_HANDLERS:
|
||||
for feature, ea in inst_handler(f, bbh, insn):
|
||||
|
||||
@@ -5,115 +5,25 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import Iterator
|
||||
|
||||
import string
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
from binaryninja import Function
|
||||
from binaryninja import BasicBlock as BinjaBasicBlock
|
||||
from binaryninja import (
|
||||
BinaryView,
|
||||
SymbolType,
|
||||
RegisterValueType,
|
||||
VariableSourceType,
|
||||
MediumLevelILOperation,
|
||||
MediumLevelILBasicBlock,
|
||||
MediumLevelILInstruction,
|
||||
)
|
||||
|
||||
from capa.features.common import Feature, Characteristic
|
||||
from capa.features.address import Address
|
||||
from capa.features.basicblock import BasicBlock
|
||||
from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
|
||||
from capa.features.extractors.base_extractor import BBHandle, FunctionHandle
|
||||
|
||||
|
||||
def get_printable_len_ascii(s: bytes) -> int:
|
||||
"""Return string length if all operand bytes are ascii or utf16-le printable"""
|
||||
count = 0
|
||||
for c in s:
|
||||
if c == 0:
|
||||
return count
|
||||
if c < 127 and chr(c) in string.printable:
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
def get_printable_len_wide(s: bytes) -> int:
|
||||
"""Return string length if all operand bytes are ascii or utf16-le printable"""
|
||||
if all(c == 0x00 for c in s[1::2]):
|
||||
return get_printable_len_ascii(s[::2])
|
||||
return 0
|
||||
|
||||
|
||||
def get_stack_string_len(f: Function, il: MediumLevelILInstruction) -> int:
|
||||
bv: BinaryView = f.view
|
||||
|
||||
if il.operation != MediumLevelILOperation.MLIL_CALL:
|
||||
return 0
|
||||
|
||||
target = il.dest
|
||||
if target.operation not in [MediumLevelILOperation.MLIL_CONST, MediumLevelILOperation.MLIL_CONST_PTR]:
|
||||
return 0
|
||||
|
||||
addr = target.value.value
|
||||
sym = bv.get_symbol_at(addr)
|
||||
if not sym or sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.SymbolicFunctionSymbol]:
|
||||
return 0
|
||||
|
||||
if sym.name not in ["__builtin_strncpy", "__builtin_strcpy", "__builtin_wcscpy"]:
|
||||
return 0
|
||||
|
||||
if len(il.params) < 2:
|
||||
return 0
|
||||
|
||||
dest = il.params[0]
|
||||
if dest.operation in [MediumLevelILOperation.MLIL_ADDRESS_OF, MediumLevelILOperation.MLIL_VAR]:
|
||||
var = dest.src
|
||||
else:
|
||||
return 0
|
||||
|
||||
if var.source_type != VariableSourceType.StackVariableSourceType:
|
||||
return 0
|
||||
|
||||
src = il.params[1]
|
||||
if src.value.type != RegisterValueType.ConstantDataAggregateValue:
|
||||
return 0
|
||||
|
||||
s = f.get_constant_data(RegisterValueType.ConstantDataAggregateValue, src.value.value)
|
||||
return max(get_printable_len_ascii(bytes(s)), get_printable_len_wide(bytes(s)))
|
||||
|
||||
|
||||
def bb_contains_stackstring(f: Function, bb: MediumLevelILBasicBlock) -> bool:
|
||||
"""check basic block for stackstring indicators
|
||||
|
||||
true if basic block contains enough moves of constant bytes to the stack
|
||||
"""
|
||||
count = 0
|
||||
for il in bb:
|
||||
count += get_stack_string_len(f, il)
|
||||
if count > MIN_STACKSTRING_LEN:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract stackstring indicators from basic block"""
|
||||
bb: Tuple[BinjaBasicBlock, MediumLevelILBasicBlock] = bbh.inner
|
||||
if bb[1] is not None and bb_contains_stackstring(fh.inner, bb[1]):
|
||||
yield Characteristic("stack string"), bbh.address
|
||||
|
||||
|
||||
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract tight loop indicators from a basic block"""
|
||||
bb: Tuple[BinjaBasicBlock, MediumLevelILBasicBlock] = bbh.inner
|
||||
for edge in bb[0].outgoing_edges:
|
||||
if edge.target.start == bb[0].start:
|
||||
bb: BinjaBasicBlock = bbh.inner
|
||||
for edge in bb.outgoing_edges:
|
||||
if edge.target.start == bb.start:
|
||||
yield Characteristic("tight loop"), bbh.address
|
||||
|
||||
|
||||
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract basic block features"""
|
||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||
for feature, addr in bb_handler(fh, bbh):
|
||||
@@ -121,7 +31,4 @@ def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Featur
|
||||
yield BasicBlock(), bbh.address
|
||||
|
||||
|
||||
BASIC_BLOCK_HANDLERS = (
|
||||
extract_bb_tight_loop,
|
||||
extract_bb_stackstring,
|
||||
)
|
||||
BASIC_BLOCK_HANDLERS = (extract_bb_tight_loop,)
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import List, Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import binaryninja as binja
|
||||
|
||||
@@ -30,7 +30,7 @@ class BinjaFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self, bv: binja.BinaryView):
|
||||
super().__init__(hashes=SampleHashes.from_bytes(bv.file.raw.read(0, bv.file.raw.length)))
|
||||
self.bv = bv
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
self.global_features: list[tuple[Feature, Address]] = []
|
||||
self.global_features.extend(capa.features.extractors.binja.file.extract_file_format(self.bv))
|
||||
self.global_features.extend(capa.features.extractors.binja.global_.extract_os(self.bv))
|
||||
self.global_features.extend(capa.features.extractors.binja.global_.extract_arch(self.bv))
|
||||
@@ -48,31 +48,24 @@ class BinjaFeatureExtractor(StaticFeatureExtractor):
|
||||
for f in self.bv.functions:
|
||||
yield FunctionHandle(address=AbsoluteVirtualAddress(f.start), inner=f)
|
||||
|
||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.binja.function.extract_features(fh)
|
||||
|
||||
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
||||
f: binja.Function = fh.inner
|
||||
# Set up a MLIL basic block dict look up to associate the disassembly basic block with its MLIL basic block
|
||||
mlil_lookup = {}
|
||||
for mlil_bb in f.mlil.basic_blocks:
|
||||
mlil_lookup[mlil_bb.source_block.start] = mlil_bb
|
||||
|
||||
for bb in f.basic_blocks:
|
||||
mlil_bb = mlil_lookup.get(bb.start)
|
||||
yield BBHandle(address=AbsoluteVirtualAddress(bb.start), inner=bb)
|
||||
|
||||
yield BBHandle(address=AbsoluteVirtualAddress(bb.start), inner=(bb, mlil_bb))
|
||||
|
||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.binja.basicblock.extract_features(fh, bbh)
|
||||
|
||||
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
||||
import capa.features.extractors.binja.helpers as binja_helpers
|
||||
|
||||
bb: Tuple[binja.BasicBlock, binja.MediumLevelILBasicBlock] = bbh.inner
|
||||
addr = bb[0].start
|
||||
bb: binja.BasicBlock = bbh.inner
|
||||
addr = bb.start
|
||||
|
||||
for text, length in bb[0]:
|
||||
for text, length in bb:
|
||||
insn = binja_helpers.DisassemblyInstruction(addr, length, text)
|
||||
yield InsnHandle(address=AbsoluteVirtualAddress(addr), inner=insn)
|
||||
addr += length
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
from binaryninja import Segment, BinaryView, SymbolType, SymbolBinding
|
||||
|
||||
@@ -13,12 +13,22 @@ import capa.features.extractors.common
|
||||
import capa.features.extractors.helpers
|
||||
import capa.features.extractors.strings
|
||||
from capa.features.file import Export, Import, Section, FunctionName
|
||||
from capa.features.common import FORMAT_PE, FORMAT_ELF, Format, String, Feature, Characteristic
|
||||
from capa.features.common import (
|
||||
FORMAT_PE,
|
||||
FORMAT_ELF,
|
||||
FORMAT_SC32,
|
||||
FORMAT_SC64,
|
||||
FORMAT_BINJA_DB,
|
||||
Format,
|
||||
String,
|
||||
Feature,
|
||||
Characteristic,
|
||||
)
|
||||
from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.binja.helpers import read_c_string, unmangle_c_name
|
||||
|
||||
|
||||
def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[Tuple[Feature, Address]]:
|
||||
def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[tuple[Feature, Address]]:
|
||||
"""check segment for embedded PE"""
|
||||
start = 0
|
||||
if bv.view_type == "PE" and seg.start == bv.start:
|
||||
@@ -32,13 +42,13 @@ def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[Tuple[Feature
|
||||
yield Characteristic("embedded pe"), FileOffsetAddress(seg.start + offset)
|
||||
|
||||
|
||||
def extract_file_embedded_pe(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_embedded_pe(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract embedded PE features"""
|
||||
for seg in bv.segments:
|
||||
yield from check_segment_for_pe(bv, seg)
|
||||
|
||||
|
||||
def extract_file_export_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_export_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract function exports"""
|
||||
for sym in bv.get_symbols_of_type(SymbolType.FunctionSymbol) + bv.get_symbols_of_type(SymbolType.DataSymbol):
|
||||
if sym.binding in [SymbolBinding.GlobalBinding, SymbolBinding.WeakBinding]:
|
||||
@@ -72,7 +82,7 @@ def extract_file_export_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address
|
||||
yield Characteristic("forwarded export"), AbsoluteVirtualAddress(sym.address)
|
||||
|
||||
|
||||
def extract_file_import_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_import_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract function imports
|
||||
|
||||
1. imports by ordinal:
|
||||
@@ -96,19 +106,19 @@ def extract_file_import_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address
|
||||
yield Import(name), addr
|
||||
|
||||
|
||||
def extract_file_section_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_section_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract section names"""
|
||||
for name, section in bv.sections.items():
|
||||
yield Section(name), AbsoluteVirtualAddress(section.start)
|
||||
|
||||
|
||||
def extract_file_strings(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_strings(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract ASCII and UTF-16 LE strings"""
|
||||
for s in bv.strings:
|
||||
yield String(s.value), FileOffsetAddress(s.start)
|
||||
|
||||
|
||||
def extract_file_function_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_function_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract the names of statically-linked library functions.
|
||||
"""
|
||||
@@ -127,12 +137,22 @@ def extract_file_function_names(bv: BinaryView) -> Iterator[Tuple[Feature, Addre
|
||||
yield FunctionName(name[1:]), sym.address
|
||||
|
||||
|
||||
def extract_file_format(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_format(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||
if bv.file.database is not None:
|
||||
yield Format(FORMAT_BINJA_DB), NO_ADDRESS
|
||||
|
||||
view_type = bv.view_type
|
||||
if view_type in ["PE", "COFF"]:
|
||||
yield Format(FORMAT_PE), NO_ADDRESS
|
||||
elif view_type == "ELF":
|
||||
yield Format(FORMAT_ELF), NO_ADDRESS
|
||||
elif view_type == "Mapped":
|
||||
if bv.arch.name == "x86":
|
||||
yield Format(FORMAT_SC32), NO_ADDRESS
|
||||
elif bv.arch.name == "x86_64":
|
||||
yield Format(FORMAT_SC64), NO_ADDRESS
|
||||
else:
|
||||
raise NotImplementedError(f"unexpected raw file with arch: {bv.arch}")
|
||||
elif view_type == "Raw":
|
||||
# no file type to return when processing a binary file, but we want to continue processing
|
||||
return
|
||||
@@ -140,7 +160,7 @@ def extract_file_format(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
raise NotImplementedError(f"unexpected file format: {view_type}")
|
||||
|
||||
|
||||
def extract_features(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract file features"""
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, addr in file_handler(bv):
|
||||
|
||||
@@ -105,13 +105,13 @@ def find_binaryninja() -> Optional[Path]:
|
||||
logger.debug("detected OS: linux")
|
||||
elif sys.platform == "darwin":
|
||||
logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
|
||||
return False
|
||||
return None
|
||||
elif sys.platform == "win32":
|
||||
logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
|
||||
return False
|
||||
return None
|
||||
else:
|
||||
logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
|
||||
return False
|
||||
return None
|
||||
|
||||
desktop_entry = get_desktop_entry("com.vector35.binaryninja.desktop")
|
||||
if not desktop_entry:
|
||||
|
||||
@@ -5,14 +5,28 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import Tuple, Iterator
|
||||
import string
|
||||
from typing import Iterator
|
||||
|
||||
from binaryninja import Function, BinaryView, SymbolType, RegisterValueType, LowLevelILOperation
|
||||
from binaryninja import (
|
||||
Function,
|
||||
BinaryView,
|
||||
SymbolType,
|
||||
ILException,
|
||||
RegisterValueType,
|
||||
VariableSourceType,
|
||||
LowLevelILOperation,
|
||||
MediumLevelILOperation,
|
||||
MediumLevelILBasicBlock,
|
||||
MediumLevelILInstruction,
|
||||
)
|
||||
|
||||
from capa.features.file import FunctionName
|
||||
from capa.features.common import Feature, Characteristic
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors import loops
|
||||
from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
|
||||
from capa.features.extractors.binja.helpers import get_llil_instr_at_addr
|
||||
from capa.features.extractors.base_extractor import FunctionHandle
|
||||
|
||||
|
||||
@@ -24,7 +38,7 @@ def extract_function_calls_to(fh: FunctionHandle):
|
||||
# Everything that is a code reference to the current function is considered a caller, which actually includes
|
||||
# many other references that are NOT a caller. For example, an instruction `push function_start` will also be
|
||||
# considered a caller to the function
|
||||
llil = caller.llil
|
||||
llil = get_llil_instr_at_addr(func.view, caller.address)
|
||||
if (llil is None) or llil.operation not in [
|
||||
LowLevelILOperation.LLIL_CALL,
|
||||
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
|
||||
@@ -33,14 +47,13 @@ def extract_function_calls_to(fh: FunctionHandle):
|
||||
]:
|
||||
continue
|
||||
|
||||
if llil.dest.value.type not in [
|
||||
RegisterValueType.ImportedAddressValue,
|
||||
RegisterValueType.ConstantValue,
|
||||
RegisterValueType.ConstantPointerValue,
|
||||
if llil.dest.operation not in [
|
||||
LowLevelILOperation.LLIL_CONST,
|
||||
LowLevelILOperation.LLIL_CONST_PTR,
|
||||
]:
|
||||
continue
|
||||
|
||||
address = llil.dest.value.value
|
||||
address = llil.dest.constant
|
||||
if address != func.start:
|
||||
continue
|
||||
|
||||
@@ -95,10 +108,103 @@ def extract_function_name(fh: FunctionHandle):
|
||||
yield FunctionName(name[1:]), sym.address
|
||||
|
||||
|
||||
def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def get_printable_len_ascii(s: bytes) -> int:
|
||||
"""Return string length if all operand bytes are ascii or utf16-le printable"""
|
||||
count = 0
|
||||
for c in s:
|
||||
if c == 0:
|
||||
return count
|
||||
if c < 127 and chr(c) in string.printable:
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
def get_printable_len_wide(s: bytes) -> int:
|
||||
"""Return string length if all operand bytes are ascii or utf16-le printable"""
|
||||
if all(c == 0x00 for c in s[1::2]):
|
||||
return get_printable_len_ascii(s[::2])
|
||||
return 0
|
||||
|
||||
|
||||
def get_stack_string_len(f: Function, il: MediumLevelILInstruction) -> int:
|
||||
bv: BinaryView = f.view
|
||||
|
||||
if il.operation != MediumLevelILOperation.MLIL_CALL:
|
||||
return 0
|
||||
|
||||
target = il.dest
|
||||
if target.operation not in [MediumLevelILOperation.MLIL_CONST, MediumLevelILOperation.MLIL_CONST_PTR]:
|
||||
return 0
|
||||
|
||||
addr = target.value.value
|
||||
sym = bv.get_symbol_at(addr)
|
||||
if not sym or sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.SymbolicFunctionSymbol]:
|
||||
return 0
|
||||
|
||||
if sym.name not in ["__builtin_strncpy", "__builtin_strcpy", "__builtin_wcscpy"]:
|
||||
return 0
|
||||
|
||||
if len(il.params) < 2:
|
||||
return 0
|
||||
|
||||
dest = il.params[0]
|
||||
if dest.operation in [MediumLevelILOperation.MLIL_ADDRESS_OF, MediumLevelILOperation.MLIL_VAR]:
|
||||
var = dest.src
|
||||
else:
|
||||
return 0
|
||||
|
||||
if var.source_type != VariableSourceType.StackVariableSourceType:
|
||||
return 0
|
||||
|
||||
src = il.params[1]
|
||||
if src.value.type != RegisterValueType.ConstantDataAggregateValue:
|
||||
return 0
|
||||
|
||||
s = f.get_constant_data(RegisterValueType.ConstantDataAggregateValue, src.value.value)
|
||||
return max(get_printable_len_ascii(bytes(s)), get_printable_len_wide(bytes(s)))
|
||||
|
||||
|
||||
def bb_contains_stackstring(f: Function, bb: MediumLevelILBasicBlock) -> bool:
|
||||
"""check basic block for stackstring indicators
|
||||
|
||||
true if basic block contains enough moves of constant bytes to the stack
|
||||
"""
|
||||
count = 0
|
||||
for il in bb:
|
||||
count += get_stack_string_len(f, il)
|
||||
if count > MIN_STACKSTRING_LEN:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def extract_stackstring(fh: FunctionHandle):
|
||||
"""extract stackstring indicators"""
|
||||
func: Function = fh.inner
|
||||
bv: BinaryView = func.view
|
||||
if bv is None:
|
||||
return
|
||||
|
||||
try:
|
||||
mlil = func.mlil
|
||||
except ILException:
|
||||
return
|
||||
|
||||
for block in mlil.basic_blocks:
|
||||
if bb_contains_stackstring(func, block):
|
||||
yield Characteristic("stack string"), block.source_block.start
|
||||
|
||||
|
||||
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
for func_handler in FUNCTION_HANDLERS:
|
||||
for feature, addr in func_handler(fh):
|
||||
yield feature, addr
|
||||
|
||||
|
||||
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call, extract_function_name)
|
||||
FUNCTION_HANDLERS = (
|
||||
extract_function_calls_to,
|
||||
extract_function_loop,
|
||||
extract_recursive_call,
|
||||
extract_function_name,
|
||||
extract_stackstring,
|
||||
)
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
from binaryninja import BinaryView
|
||||
|
||||
@@ -16,7 +16,7 @@ from capa.features.address import NO_ADDRESS, Address
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_os(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_os(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||
name = bv.platform.name
|
||||
if "-" in name:
|
||||
name = name.split("-")[0]
|
||||
@@ -45,7 +45,7 @@ def extract_os(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
return
|
||||
|
||||
|
||||
def extract_arch(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_arch(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||
arch = bv.arch.name
|
||||
if arch == "x86_64":
|
||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||
|
||||
@@ -6,10 +6,10 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import re
|
||||
from typing import List, Callable
|
||||
from typing import Callable, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
from binaryninja import BinaryView, LowLevelILInstruction
|
||||
from binaryninja import BinaryView, LowLevelILFunction, LowLevelILInstruction
|
||||
from binaryninja.architecture import InstructionTextToken
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ from binaryninja.architecture import InstructionTextToken
|
||||
class DisassemblyInstruction:
|
||||
address: int
|
||||
length: int
|
||||
text: List[InstructionTextToken]
|
||||
text: list[InstructionTextToken]
|
||||
|
||||
|
||||
LLIL_VISITOR = Callable[[LowLevelILInstruction, LowLevelILInstruction, int], bool]
|
||||
@@ -54,7 +54,7 @@ def unmangle_c_name(name: str) -> str:
|
||||
|
||||
|
||||
def read_c_string(bv: BinaryView, offset: int, max_len: int) -> str:
|
||||
s: List[str] = []
|
||||
s: list[str] = []
|
||||
while len(s) < max_len:
|
||||
try:
|
||||
c = bv.read(offset + len(s), 1)[0]
|
||||
@@ -67,3 +67,13 @@ def read_c_string(bv: BinaryView, offset: int, max_len: int) -> str:
|
||||
s.append(chr(c))
|
||||
|
||||
return "".join(s)
|
||||
|
||||
|
||||
def get_llil_instr_at_addr(bv: BinaryView, addr: int) -> Optional[LowLevelILInstruction]:
|
||||
arch = bv.arch
|
||||
buffer = bv.read(addr, arch.max_instr_length)
|
||||
llil = LowLevelILFunction(arch=arch)
|
||||
llil.current_address = addr
|
||||
if arch.get_instruction_low_level_il(buffer, addr, llil) == 0:
|
||||
return None
|
||||
return llil[0]
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import Any, List, Tuple, Iterator, Optional
|
||||
from typing import Any, Iterator, Optional
|
||||
|
||||
from binaryninja import Function
|
||||
from binaryninja import BasicBlock as BinjaBasicBlock
|
||||
@@ -23,7 +23,7 @@ import capa.features.extractors.helpers
|
||||
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
|
||||
from capa.features.common import MAX_BYTES_FEATURE_SIZE, Bytes, String, Feature, Characteristic
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.binja.helpers import DisassemblyInstruction, visit_llil_exprs
|
||||
from capa.features.extractors.binja.helpers import DisassemblyInstruction, visit_llil_exprs, get_llil_instr_at_addr
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
||||
|
||||
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
|
||||
@@ -36,35 +36,27 @@ SECURITY_COOKIE_BYTES_DELTA = 0x40
|
||||
# 2. The function must only make one call/jump to another address
|
||||
# If the function being checked is a stub function, returns the target address. Otherwise, return None.
|
||||
def is_stub_function(bv: BinaryView, addr: int) -> Optional[int]:
|
||||
funcs = bv.get_functions_at(addr)
|
||||
for func in funcs:
|
||||
if len(func.basic_blocks) != 1:
|
||||
continue
|
||||
llil = get_llil_instr_at_addr(bv, addr)
|
||||
if llil is None or llil.operation not in [
|
||||
LowLevelILOperation.LLIL_CALL,
|
||||
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
|
||||
LowLevelILOperation.LLIL_JUMP,
|
||||
LowLevelILOperation.LLIL_TAILCALL,
|
||||
]:
|
||||
return None
|
||||
|
||||
call_count = 0
|
||||
call_target = None
|
||||
for il in func.llil.instructions:
|
||||
if il.operation in [
|
||||
LowLevelILOperation.LLIL_CALL,
|
||||
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
|
||||
LowLevelILOperation.LLIL_JUMP,
|
||||
LowLevelILOperation.LLIL_TAILCALL,
|
||||
]:
|
||||
call_count += 1
|
||||
if il.dest.value.type in [
|
||||
RegisterValueType.ImportedAddressValue,
|
||||
RegisterValueType.ConstantValue,
|
||||
RegisterValueType.ConstantPointerValue,
|
||||
]:
|
||||
call_target = il.dest.value.value
|
||||
# The LLIL instruction retrieved by `get_llil_instr_at_addr` did not go through a full analysis, so we cannot check
|
||||
# `llil.dest.value.type` here
|
||||
if llil.dest.operation not in [
|
||||
LowLevelILOperation.LLIL_CONST,
|
||||
LowLevelILOperation.LLIL_CONST_PTR,
|
||||
]:
|
||||
return None
|
||||
|
||||
if call_count == 1 and call_target is not None:
|
||||
return call_target
|
||||
|
||||
return None
|
||||
return llil.dest.constant
|
||||
|
||||
|
||||
def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction API features
|
||||
|
||||
@@ -123,7 +115,7 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
|
||||
|
||||
def extract_insn_number_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction number features
|
||||
example:
|
||||
@@ -131,7 +123,7 @@ def extract_insn_number_features(
|
||||
"""
|
||||
func: Function = fh.inner
|
||||
|
||||
results: List[Tuple[Any[Number, OperandNumber], Address]] = []
|
||||
results: list[tuple[Any[Number, OperandNumber], Address]] = []
|
||||
|
||||
def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool:
|
||||
if il.operation == LowLevelILOperation.LLIL_LOAD:
|
||||
@@ -162,7 +154,7 @@ def extract_insn_number_features(
|
||||
yield from results
|
||||
|
||||
|
||||
def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse referenced byte sequences
|
||||
example:
|
||||
@@ -209,7 +201,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
||||
|
||||
def extract_insn_string_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction string features
|
||||
|
||||
@@ -266,7 +258,7 @@ def extract_insn_string_features(
|
||||
|
||||
def extract_insn_offset_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction structure offset features
|
||||
|
||||
@@ -275,7 +267,7 @@ def extract_insn_offset_features(
|
||||
"""
|
||||
func: Function = fh.inner
|
||||
|
||||
results: List[Tuple[Any[Offset, OperandOffset], Address]] = []
|
||||
results: list[tuple[Any[Offset, OperandOffset], Address]] = []
|
||||
address_size = func.view.arch.address_size * 8
|
||||
|
||||
def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool:
|
||||
@@ -353,7 +345,7 @@ def is_nzxor_stack_cookie(f: Function, bb: BinjaBasicBlock, llil: LowLevelILInst
|
||||
|
||||
def extract_insn_nzxor_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction non-zeroing XOR instruction
|
||||
ignore expected non-zeroing XORs, e.g. security cookies
|
||||
@@ -367,7 +359,7 @@ def extract_insn_nzxor_characteristic_features(
|
||||
# e.g., <llil: eax = 0>, (LLIL_SET_REG). So we do not need to check whether the two operands are the same.
|
||||
if il.operation == LowLevelILOperation.LLIL_XOR:
|
||||
# Exclude cases related to the stack cookie
|
||||
if is_nzxor_stack_cookie(fh.inner, bbh.inner[0], il):
|
||||
if is_nzxor_stack_cookie(fh.inner, bbh.inner, il):
|
||||
return False
|
||||
results.append((Characteristic("nzxor"), ih.address))
|
||||
return False
|
||||
@@ -382,7 +374,7 @@ def extract_insn_nzxor_characteristic_features(
|
||||
|
||||
def extract_insn_mnemonic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""parse instruction mnemonic features"""
|
||||
insn: DisassemblyInstruction = ih.inner
|
||||
yield Mnemonic(insn.text[0].text), ih.address
|
||||
@@ -390,7 +382,7 @@ def extract_insn_mnemonic_features(
|
||||
|
||||
def extract_insn_obfs_call_plus_5_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse call $+5 instruction from the given instruction.
|
||||
"""
|
||||
@@ -401,7 +393,7 @@ def extract_insn_obfs_call_plus_5_characteristic_features(
|
||||
|
||||
def extract_insn_peb_access_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""parse instruction peb access
|
||||
|
||||
fs:[0x30] on x86, gs:[0x60] on x64
|
||||
@@ -444,7 +436,7 @@ def extract_insn_peb_access_characteristic_features(
|
||||
|
||||
def extract_insn_segment_access_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""parse instruction fs or gs access"""
|
||||
func: Function = fh.inner
|
||||
|
||||
@@ -471,7 +463,7 @@ def extract_insn_segment_access_features(
|
||||
|
||||
def extract_insn_cross_section_cflow(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""inspect the instruction for a CALL or JMP that crosses section boundaries"""
|
||||
func: Function = fh.inner
|
||||
bv: BinaryView = func.view
|
||||
@@ -491,7 +483,7 @@ def extract_insn_cross_section_cflow(
|
||||
yield Characteristic("cross section flow"), ih.address
|
||||
|
||||
|
||||
def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract functions calls from features
|
||||
|
||||
most relevant at the function scope, however, its most efficient to extract at the instruction scope
|
||||
@@ -534,7 +526,7 @@ def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
||||
|
||||
def extract_function_indirect_call_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
|
||||
does not include calls like => call ds:dword_ABD4974
|
||||
|
||||
@@ -562,7 +554,7 @@ def extract_function_indirect_call_characteristic_features(
|
||||
yield Characteristic("indirect call"), ih.address
|
||||
|
||||
|
||||
def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract instruction features"""
|
||||
for inst_handler in INSTRUCTION_HANDLERS:
|
||||
for feature, ea in inst_handler(f, bbh, insn):
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import capa.features.extractors.helpers
|
||||
from capa.helpers import assert_never
|
||||
@@ -20,7 +20,7 @@ from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, Pr
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
this method extracts the given call's features (such as API name and arguments),
|
||||
and returns them as API, Number, and String features.
|
||||
@@ -55,7 +55,7 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -
|
||||
yield API(name), ch.address
|
||||
|
||||
|
||||
def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
for handler in CALL_HANDLERS:
|
||||
for feature, addr in handler(ph, th, ch):
|
||||
yield feature, addr
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import Dict, Tuple, Union, Iterator
|
||||
from typing import Union, Iterator
|
||||
|
||||
import capa.features.extractors.cape.call
|
||||
import capa.features.extractors.cape.file
|
||||
@@ -50,16 +50,16 @@ class CapeExtractor(DynamicFeatureExtractor):
|
||||
assert self.report.static is not None and self.report.static.pe is not None
|
||||
return AbsoluteVirtualAddress(self.report.static.pe.imagebase)
|
||||
|
||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from self.global_features
|
||||
|
||||
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.cape.file.extract_features(self.report)
|
||||
|
||||
def get_processes(self) -> Iterator[ProcessHandle]:
|
||||
yield from capa.features.extractors.cape.file.get_processes(self.report)
|
||||
|
||||
def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.cape.process.extract_features(ph)
|
||||
|
||||
def get_process_name(self, ph) -> str:
|
||||
@@ -69,7 +69,7 @@ class CapeExtractor(DynamicFeatureExtractor):
|
||||
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
||||
yield from capa.features.extractors.cape.process.get_threads(ph)
|
||||
|
||||
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
if False:
|
||||
# force this routine to be a generator,
|
||||
# but we don't actually have any elements to generate.
|
||||
@@ -81,7 +81,7 @@ class CapeExtractor(DynamicFeatureExtractor):
|
||||
|
||||
def extract_call_features(
|
||||
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.cape.call.extract_features(ph, th, ch)
|
||||
|
||||
def get_call_name(self, ph, th, ch) -> str:
|
||||
@@ -122,7 +122,7 @@ class CapeExtractor(DynamicFeatureExtractor):
|
||||
return "".join(parts)
|
||||
|
||||
@classmethod
|
||||
def from_report(cls, report: Dict) -> "CapeExtractor":
|
||||
def from_report(cls, report: dict) -> "CapeExtractor":
|
||||
cr = CapeReport.model_validate(report)
|
||||
|
||||
if cr.info.version not in TESTED_VERSIONS:
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
from capa.features.file import Export, Import, Section
|
||||
from capa.features.common import String, Feature
|
||||
@@ -41,7 +41,7 @@ def get_processes(report: CapeReport) -> Iterator[ProcessHandle]:
|
||||
seen_processes[addr].append(process)
|
||||
|
||||
|
||||
def extract_import_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_import_names(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract imported function names
|
||||
"""
|
||||
@@ -62,57 +62,57 @@ def extract_import_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]
|
||||
yield Import(name), AbsoluteVirtualAddress(function.address)
|
||||
|
||||
|
||||
def extract_export_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_export_names(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
assert report.static is not None and report.static.pe is not None
|
||||
for function in report.static.pe.exports:
|
||||
yield Export(function.name), AbsoluteVirtualAddress(function.address)
|
||||
|
||||
|
||||
def extract_section_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_section_names(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
assert report.static is not None and report.static.pe is not None
|
||||
for section in report.static.pe.sections:
|
||||
yield Section(section.name), AbsoluteVirtualAddress(section.virtual_address)
|
||||
|
||||
|
||||
def extract_file_strings(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_strings(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
if report.strings is not None:
|
||||
for string in report.strings:
|
||||
yield String(string), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_used_regkeys(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_used_regkeys(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
for regkey in report.behavior.summary.keys:
|
||||
yield String(regkey), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_used_files(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_used_files(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
for file in report.behavior.summary.files:
|
||||
yield String(file), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_used_mutexes(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_used_mutexes(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
for mutex in report.behavior.summary.mutexes:
|
||||
yield String(mutex), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_used_commands(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_used_commands(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
for cmd in report.behavior.summary.executed_commands:
|
||||
yield String(cmd), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_used_apis(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_used_apis(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
for symbol in report.behavior.summary.resolved_apis:
|
||||
yield String(symbol), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_used_services(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_used_services(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
for svc in report.behavior.summary.created_services:
|
||||
yield String(svc), NO_ADDRESS
|
||||
for svc in report.behavior.summary.started_services:
|
||||
yield String(svc), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_features(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
for handler in FILE_HANDLERS:
|
||||
for feature, addr in handler(report):
|
||||
yield feature, addr
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
from capa.features.common import (
|
||||
OS,
|
||||
@@ -28,7 +28,7 @@ from capa.features.extractors.cape.models import CapeReport
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_arch(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_arch(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
if "Intel 80386" in report.target.file.type:
|
||||
yield Arch(ARCH_I386), NO_ADDRESS
|
||||
elif "x86-64" in report.target.file.type:
|
||||
@@ -40,7 +40,7 @@ def extract_arch(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
)
|
||||
|
||||
|
||||
def extract_format(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_format(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
if "PE" in report.target.file.type:
|
||||
yield Format(FORMAT_PE), NO_ADDRESS
|
||||
elif "ELF" in report.target.file.type:
|
||||
@@ -52,7 +52,7 @@ def extract_format(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
)
|
||||
|
||||
|
||||
def extract_os(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_os(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
# this variable contains the output of the file command
|
||||
file_output = report.target.file.type
|
||||
|
||||
@@ -80,7 +80,7 @@ def extract_os(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
yield OS(OS_ANY), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_features(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
for global_handler in GLOBAL_HANDLER:
|
||||
for feature, addr in global_handler(report):
|
||||
yield feature, addr
|
||||
|
||||
@@ -6,12 +6,12 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
from typing import Any, Dict, List
|
||||
from typing import Any
|
||||
|
||||
from capa.features.extractors.base_extractor import ProcessHandle
|
||||
|
||||
|
||||
def find_process(processes: List[Dict[str, Any]], ph: ProcessHandle) -> Dict[str, Any]:
|
||||
def find_process(processes: list[dict[str, Any]], ph: ProcessHandle) -> dict[str, Any]:
|
||||
"""
|
||||
find a specific process identified by a process handler.
|
||||
|
||||
|
||||
@@ -6,10 +6,9 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import binascii
|
||||
from typing import Any, Dict, List, Union, Literal, Optional
|
||||
from typing import Any, Union, Literal, Optional, Annotated, TypeAlias
|
||||
|
||||
from pydantic import Field, BaseModel, ConfigDict
|
||||
from typing_extensions import Annotated, TypeAlias
|
||||
from pydantic.functional_validators import BeforeValidator
|
||||
|
||||
|
||||
@@ -59,11 +58,11 @@ Skip: TypeAlias = Optional[Any]
|
||||
# in a field with this type.
|
||||
# then we can update the model with the discovered shape.
|
||||
TODO: TypeAlias = None
|
||||
ListTODO: TypeAlias = List[None]
|
||||
ListTODO: TypeAlias = list[None]
|
||||
DictTODO: TypeAlias = ExactModel
|
||||
|
||||
EmptyDict: TypeAlias = BaseModel
|
||||
EmptyList: TypeAlias = List[Any]
|
||||
Emptydict: TypeAlias = BaseModel
|
||||
EmptyList: TypeAlias = list[Any]
|
||||
|
||||
|
||||
class Info(FlexibleModel):
|
||||
@@ -77,7 +76,7 @@ class ImportedSymbol(ExactModel):
|
||||
|
||||
class ImportedDll(ExactModel):
|
||||
dll: str
|
||||
imports: List[ImportedSymbol]
|
||||
imports: list[ImportedSymbol]
|
||||
|
||||
|
||||
class DirectoryEntry(ExactModel):
|
||||
@@ -149,7 +148,7 @@ class Signer(ExactModel):
|
||||
aux_valid: Optional[bool] = None
|
||||
aux_error: Optional[bool] = None
|
||||
aux_error_desc: Optional[str] = None
|
||||
aux_signers: Optional[List[AuxSigner]] = None
|
||||
aux_signers: Optional[list[AuxSigner]] = None
|
||||
|
||||
|
||||
class Overlay(ExactModel):
|
||||
@@ -178,22 +177,22 @@ class PE(ExactModel):
|
||||
pdbpath: Optional[str] = None
|
||||
timestamp: str
|
||||
|
||||
# List[ImportedDll], or Dict[basename(dll), ImportedDll]
|
||||
imports: Union[List[ImportedDll], Dict[str, ImportedDll]]
|
||||
# list[ImportedDll], or dict[basename(dll), ImportedDll]
|
||||
imports: Union[list[ImportedDll], dict[str, ImportedDll]]
|
||||
imported_dll_count: Optional[int] = None
|
||||
imphash: str
|
||||
|
||||
exported_dll_name: Optional[str] = None
|
||||
exports: List[ExportedSymbol]
|
||||
exports: list[ExportedSymbol]
|
||||
|
||||
dirents: List[DirectoryEntry]
|
||||
sections: List[Section]
|
||||
dirents: list[DirectoryEntry]
|
||||
sections: list[Section]
|
||||
|
||||
ep_bytes: Optional[HexBytes] = None
|
||||
|
||||
overlay: Optional[Overlay] = None
|
||||
resources: List[Resource]
|
||||
versioninfo: List[KV]
|
||||
resources: list[Resource]
|
||||
versioninfo: list[KV]
|
||||
|
||||
# base64 encoded data
|
||||
icon: Optional[str] = None
|
||||
@@ -204,7 +203,7 @@ class PE(ExactModel):
|
||||
# short hex string
|
||||
icon_dhash: Optional[str] = None
|
||||
|
||||
digital_signers: List[DigitalSigner]
|
||||
digital_signers: list[DigitalSigner]
|
||||
guest_signers: Signer
|
||||
|
||||
|
||||
@@ -217,9 +216,9 @@ class File(FlexibleModel):
|
||||
cape_type: Optional[str] = None
|
||||
|
||||
pid: Optional[Union[int, Literal[""]]] = None
|
||||
name: Union[List[str], str]
|
||||
name: Union[list[str], str]
|
||||
path: str
|
||||
guest_paths: Union[List[str], str, None]
|
||||
guest_paths: Union[list[str], str, None]
|
||||
timestamp: Optional[str] = None
|
||||
|
||||
#
|
||||
@@ -244,7 +243,7 @@ class File(FlexibleModel):
|
||||
ep_bytes: Optional[HexBytes] = None
|
||||
entrypoint: Optional[int] = None
|
||||
data: Optional[str] = None
|
||||
strings: Optional[List[str]] = None
|
||||
strings: Optional[list[str]] = None
|
||||
|
||||
#
|
||||
# detections (skip)
|
||||
@@ -283,7 +282,7 @@ class Call(ExactModel):
|
||||
|
||||
api: str
|
||||
|
||||
arguments: List[Argument]
|
||||
arguments: list[Argument]
|
||||
status: bool
|
||||
return_: HexInt = Field(alias="return")
|
||||
pretty_return: Optional[str] = None
|
||||
@@ -298,15 +297,18 @@ class Call(ExactModel):
|
||||
id: int
|
||||
|
||||
|
||||
class Process(ExactModel):
|
||||
# FlexibleModel to account for extended fields
|
||||
# refs: https://github.com/mandiant/capa/issues/2466
|
||||
# https://github.com/kevoreilly/CAPEv2/pull/2199
|
||||
class Process(FlexibleModel):
|
||||
process_id: int
|
||||
process_name: str
|
||||
parent_id: int
|
||||
module_path: str
|
||||
first_seen: str
|
||||
calls: List[Call]
|
||||
threads: List[int]
|
||||
environ: Dict[str, str]
|
||||
calls: list[Call]
|
||||
threads: list[int]
|
||||
environ: dict[str, str]
|
||||
|
||||
|
||||
class ProcessTree(ExactModel):
|
||||
@@ -314,25 +316,25 @@ class ProcessTree(ExactModel):
|
||||
pid: int
|
||||
parent_id: int
|
||||
module_path: str
|
||||
threads: List[int]
|
||||
environ: Dict[str, str]
|
||||
children: List["ProcessTree"]
|
||||
threads: list[int]
|
||||
environ: dict[str, str]
|
||||
children: list["ProcessTree"]
|
||||
|
||||
|
||||
class Summary(ExactModel):
|
||||
files: List[str]
|
||||
read_files: List[str]
|
||||
write_files: List[str]
|
||||
delete_files: List[str]
|
||||
keys: List[str]
|
||||
read_keys: List[str]
|
||||
write_keys: List[str]
|
||||
delete_keys: List[str]
|
||||
executed_commands: List[str]
|
||||
resolved_apis: List[str]
|
||||
mutexes: List[str]
|
||||
created_services: List[str]
|
||||
started_services: List[str]
|
||||
files: list[str]
|
||||
read_files: list[str]
|
||||
write_files: list[str]
|
||||
delete_files: list[str]
|
||||
keys: list[str]
|
||||
read_keys: list[str]
|
||||
write_keys: list[str]
|
||||
delete_keys: list[str]
|
||||
executed_commands: list[str]
|
||||
resolved_apis: list[str]
|
||||
mutexes: list[str]
|
||||
created_services: list[str]
|
||||
started_services: list[str]
|
||||
|
||||
|
||||
class EncryptedBuffer(ExactModel):
|
||||
@@ -349,12 +351,12 @@ class Behavior(ExactModel):
|
||||
summary: Summary
|
||||
|
||||
# list of processes, of threads, of calls
|
||||
processes: List[Process]
|
||||
processes: list[Process]
|
||||
# tree of processes
|
||||
processtree: List[ProcessTree]
|
||||
processtree: list[ProcessTree]
|
||||
|
||||
anomaly: List[str]
|
||||
encryptedbuffers: List[EncryptedBuffer]
|
||||
anomaly: list[str]
|
||||
encryptedbuffers: list[EncryptedBuffer]
|
||||
# these are small objects that describe atomic events,
|
||||
# like file move, registry access.
|
||||
# we'll detect the same with our API call analysis.
|
||||
@@ -373,7 +375,7 @@ class Static(ExactModel):
|
||||
|
||||
|
||||
class Cape(ExactModel):
|
||||
payloads: List[ProcessFile]
|
||||
payloads: list[ProcessFile]
|
||||
configs: Skip = None
|
||||
|
||||
|
||||
@@ -389,7 +391,7 @@ class CapeReport(FlexibleModel):
|
||||
# static analysis results
|
||||
#
|
||||
static: Optional[Static] = None
|
||||
strings: Optional[List[str]] = None
|
||||
strings: Optional[list[str]] = None
|
||||
|
||||
#
|
||||
# dynamic analysis results
|
||||
@@ -398,10 +400,10 @@ class CapeReport(FlexibleModel):
|
||||
behavior: Behavior
|
||||
|
||||
# post-processed results: payloads and extracted configs
|
||||
CAPE: Optional[Union[Cape, List]] = None
|
||||
dropped: Optional[List[File]] = None
|
||||
procdump: Optional[List[ProcessFile]] = None
|
||||
procmemory: ListTODO
|
||||
CAPE: Optional[Union[Cape, list]] = None
|
||||
dropped: Optional[list[File]] = None
|
||||
procdump: Optional[list[ProcessFile]] = None
|
||||
procmemory: Optional[ListTODO] = None
|
||||
|
||||
# =========================================================================
|
||||
# information we won't use in capa
|
||||
@@ -437,7 +439,7 @@ class CapeReport(FlexibleModel):
|
||||
malfamily_tag: Optional[str] = None
|
||||
malscore: float
|
||||
detections: Skip = None
|
||||
detections2pid: Optional[Dict[int, List[str]]] = None
|
||||
detections2pid: Optional[dict[int, list[str]]] = None
|
||||
# AV detections for the sample.
|
||||
virustotal: Skip = None
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import List, Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
from capa.features.common import String, Feature
|
||||
from capa.features.address import Address, ThreadAddress
|
||||
@@ -22,14 +22,14 @@ def get_threads(ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
||||
get the threads associated with a given process
|
||||
"""
|
||||
process: Process = ph.inner
|
||||
threads: List[int] = process.threads
|
||||
threads: list[int] = process.threads
|
||||
|
||||
for thread in threads:
|
||||
address: ThreadAddress = ThreadAddress(process=ph.address, tid=thread)
|
||||
yield ThreadHandle(address=address, inner={})
|
||||
|
||||
|
||||
def extract_environ_strings(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_environ_strings(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract strings from a process' provided environment variables.
|
||||
"""
|
||||
@@ -39,7 +39,7 @@ def extract_environ_strings(ph: ProcessHandle) -> Iterator[Tuple[Feature, Addres
|
||||
yield String(value), ph.address
|
||||
|
||||
|
||||
def extract_features(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
for handler in PROCESS_HANDLERS:
|
||||
for feature, addr in handler(ph):
|
||||
yield feature, addr
|
||||
|
||||
@@ -10,7 +10,7 @@ import re
|
||||
import logging
|
||||
import binascii
|
||||
import contextlib
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import pefile
|
||||
|
||||
@@ -45,7 +45,7 @@ MATCH_RESULT = b'{"meta":'
|
||||
MATCH_JSON_OBJECT = b'{"'
|
||||
|
||||
|
||||
def extract_file_strings(buf: bytes, **kwargs) -> Iterator[Tuple[String, Address]]:
|
||||
def extract_file_strings(buf: bytes, **kwargs) -> Iterator[tuple[String, Address]]:
|
||||
"""
|
||||
extract ASCII and UTF-16 LE strings from file
|
||||
"""
|
||||
@@ -56,7 +56,7 @@ def extract_file_strings(buf: bytes, **kwargs) -> Iterator[Tuple[String, Address
|
||||
yield String(s.s), FileOffsetAddress(s.offset)
|
||||
|
||||
|
||||
def extract_format(buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_format(buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
||||
if buf.startswith(MATCH_PE):
|
||||
yield Format(FORMAT_PE), NO_ADDRESS
|
||||
elif buf.startswith(MATCH_ELF):
|
||||
@@ -79,7 +79,7 @@ def extract_format(buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
||||
return
|
||||
|
||||
|
||||
def extract_arch(buf) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_arch(buf) -> Iterator[tuple[Feature, Address]]:
|
||||
if buf.startswith(MATCH_PE):
|
||||
yield from capa.features.extractors.pefile.extract_file_arch(pe=pefile.PE(data=buf))
|
||||
|
||||
@@ -111,7 +111,7 @@ def extract_arch(buf) -> Iterator[Tuple[Feature, Address]]:
|
||||
return
|
||||
|
||||
|
||||
def extract_os(buf, os=OS_AUTO) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_os(buf, os=OS_AUTO) -> Iterator[tuple[Feature, Address]]:
|
||||
if os != OS_AUTO:
|
||||
yield OS(os), NO_ADDRESS
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Dict, List, Tuple, Union, Iterator, Optional
|
||||
from typing import Union, Iterator, Optional
|
||||
from pathlib import Path
|
||||
|
||||
import dnfile
|
||||
@@ -41,11 +41,11 @@ from capa.features.extractors.dnfile.helpers import (
|
||||
|
||||
class DnFileFeatureExtractorCache:
|
||||
def __init__(self, pe: dnfile.dnPE):
|
||||
self.imports: Dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||
self.native_imports: Dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||
self.methods: Dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||
self.fields: Dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||
self.types: Dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||
self.imports: dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||
self.native_imports: dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||
self.methods: dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||
self.fields: dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||
self.types: dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||
|
||||
for import_ in get_dotnet_managed_imports(pe):
|
||||
self.imports[import_.token] = import_
|
||||
@@ -84,7 +84,7 @@ class DnfileFeatureExtractor(StaticFeatureExtractor):
|
||||
self.token_cache: DnFileFeatureExtractorCache = DnFileFeatureExtractorCache(self.pe)
|
||||
|
||||
# pre-compute these because we'll yield them at *every* scope.
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
self.global_features: list[tuple[Feature, Address]] = []
|
||||
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_format())
|
||||
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_os(pe=self.pe))
|
||||
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_arch(pe=self.pe))
|
||||
@@ -100,7 +100,7 @@ class DnfileFeatureExtractor(StaticFeatureExtractor):
|
||||
|
||||
def get_functions(self) -> Iterator[FunctionHandle]:
|
||||
# create a method lookup table
|
||||
methods: Dict[Address, FunctionHandle] = {}
|
||||
methods: dict[Address, FunctionHandle] = {}
|
||||
for token, method in get_dotnet_managed_method_bodies(self.pe):
|
||||
fh: FunctionHandle = FunctionHandle(
|
||||
address=DNTokenAddress(token),
|
||||
@@ -136,7 +136,7 @@ class DnfileFeatureExtractor(StaticFeatureExtractor):
|
||||
|
||||
yield from methods.values()
|
||||
|
||||
def extract_function_features(self, fh) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_function_features(self, fh) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.dnfile.function.extract_features(fh)
|
||||
|
||||
def get_basic_blocks(self, f) -> Iterator[BBHandle]:
|
||||
@@ -157,5 +157,5 @@ class DnfileFeatureExtractor(StaticFeatureExtractor):
|
||||
inner=insn,
|
||||
)
|
||||
|
||||
def extract_insn_features(self, fh, bbh, ih) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_features(self, fh, bbh, ih) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.dnfile.insn.extract_features(fh, bbh, ih)
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import dnfile
|
||||
|
||||
@@ -18,35 +18,35 @@ from capa.features.common import Class, Format, String, Feature, Namespace, Char
|
||||
from capa.features.address import Address
|
||||
|
||||
|
||||
def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[Tuple[Import, Address]]:
|
||||
def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[tuple[Import, Address]]:
|
||||
yield from capa.features.extractors.dotnetfile.extract_file_import_names(pe=pe)
|
||||
|
||||
|
||||
def extract_file_format(pe: dnfile.dnPE) -> Iterator[Tuple[Format, Address]]:
|
||||
def extract_file_format(pe: dnfile.dnPE) -> Iterator[tuple[Format, Address]]:
|
||||
yield from capa.features.extractors.dotnetfile.extract_file_format(pe=pe)
|
||||
|
||||
|
||||
def extract_file_function_names(pe: dnfile.dnPE) -> Iterator[Tuple[FunctionName, Address]]:
|
||||
def extract_file_function_names(pe: dnfile.dnPE) -> Iterator[tuple[FunctionName, Address]]:
|
||||
yield from capa.features.extractors.dotnetfile.extract_file_function_names(pe=pe)
|
||||
|
||||
|
||||
def extract_file_strings(pe: dnfile.dnPE) -> Iterator[Tuple[String, Address]]:
|
||||
def extract_file_strings(pe: dnfile.dnPE) -> Iterator[tuple[String, Address]]:
|
||||
yield from capa.features.extractors.dotnetfile.extract_file_strings(pe=pe)
|
||||
|
||||
|
||||
def extract_file_mixed_mode_characteristic_features(pe: dnfile.dnPE) -> Iterator[Tuple[Characteristic, Address]]:
|
||||
def extract_file_mixed_mode_characteristic_features(pe: dnfile.dnPE) -> Iterator[tuple[Characteristic, Address]]:
|
||||
yield from capa.features.extractors.dotnetfile.extract_file_mixed_mode_characteristic_features(pe=pe)
|
||||
|
||||
|
||||
def extract_file_namespace_features(pe: dnfile.dnPE) -> Iterator[Tuple[Namespace, Address]]:
|
||||
def extract_file_namespace_features(pe: dnfile.dnPE) -> Iterator[tuple[Namespace, Address]]:
|
||||
yield from capa.features.extractors.dotnetfile.extract_file_namespace_features(pe=pe)
|
||||
|
||||
|
||||
def extract_file_class_features(pe: dnfile.dnPE) -> Iterator[Tuple[Class, Address]]:
|
||||
def extract_file_class_features(pe: dnfile.dnPE) -> Iterator[tuple[Class, Address]]:
|
||||
yield from capa.features.extractors.dotnetfile.extract_file_class_features(pe=pe)
|
||||
|
||||
|
||||
def extract_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(pe: dnfile.dnPE) -> Iterator[tuple[Feature, Address]]:
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, address in file_handler(pe):
|
||||
yield feature, address
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
from capa.features.common import Feature, Characteristic
|
||||
from capa.features.address import Address
|
||||
@@ -18,30 +18,30 @@ from capa.features.extractors.base_extractor import FunctionHandle
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_function_calls_to(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]:
|
||||
def extract_function_calls_to(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]:
|
||||
"""extract callers to a function"""
|
||||
for dest in fh.ctx["calls_to"]:
|
||||
yield Characteristic("calls to"), dest
|
||||
|
||||
|
||||
def extract_function_calls_from(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]:
|
||||
def extract_function_calls_from(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]:
|
||||
"""extract callers from a function"""
|
||||
for src in fh.ctx["calls_from"]:
|
||||
yield Characteristic("calls from"), src
|
||||
|
||||
|
||||
def extract_recursive_call(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]:
|
||||
def extract_recursive_call(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]:
|
||||
"""extract recursive function call"""
|
||||
if fh.address in fh.ctx["calls_to"]:
|
||||
yield Characteristic("recursive call"), fh.address
|
||||
|
||||
|
||||
def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]:
|
||||
def extract_function_loop(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]:
|
||||
"""extract loop indicators from a function"""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
for func_handler in FUNCTION_HANDLERS:
|
||||
for feature, addr in func_handler(fh):
|
||||
yield feature, addr
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Dict, Tuple, Union, Iterator, Optional
|
||||
from typing import Union, Iterator, Optional
|
||||
|
||||
import dnfile
|
||||
from dncil.cil.body import CilMethodBody
|
||||
@@ -144,7 +144,7 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]:
|
||||
)
|
||||
|
||||
|
||||
def get_dotnet_methoddef_property_accessors(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]:
|
||||
def get_dotnet_methoddef_property_accessors(pe: dnfile.dnPE) -> Iterator[tuple[int, str]]:
|
||||
"""get MethodDef methods used to access properties
|
||||
|
||||
see https://www.ntcore.com/files/dotnetformat.htm
|
||||
@@ -194,7 +194,7 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]:
|
||||
"""
|
||||
nested_class_table = get_dotnet_nested_class_table_index(pe)
|
||||
|
||||
accessor_map: Dict[int, str] = {}
|
||||
accessor_map: dict[int, str] = {}
|
||||
for methoddef, methoddef_access in get_dotnet_methoddef_property_accessors(pe):
|
||||
accessor_map[methoddef] = methoddef_access
|
||||
|
||||
@@ -252,7 +252,7 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]:
|
||||
yield DnType(token, typedefname, namespace=typedefnamespace, member=field.row.Name)
|
||||
|
||||
|
||||
def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[Tuple[int, CilMethodBody]]:
|
||||
def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[tuple[int, CilMethodBody]]:
|
||||
"""get managed methods from MethodDef table"""
|
||||
for rid, method_def in iter_dotnet_table(pe, dnfile.mdtable.MethodDef.number):
|
||||
assert isinstance(method_def, dnfile.mdtable.MethodDefRow)
|
||||
@@ -332,7 +332,7 @@ def get_dotnet_table_row(pe: dnfile.dnPE, table_index: int, row_index: int) -> O
|
||||
|
||||
def resolve_nested_typedef_name(
|
||||
nested_class_table: dict, index: int, typedef: dnfile.mdtable.TypeDefRow, pe: dnfile.dnPE
|
||||
) -> Tuple[str, Tuple[str, ...]]:
|
||||
) -> tuple[str, tuple[str, ...]]:
|
||||
"""Resolves all nested TypeDef class names. Returns the namespace as a str and the nested TypeRef name as a tuple"""
|
||||
|
||||
if index in nested_class_table:
|
||||
@@ -368,7 +368,7 @@ def resolve_nested_typedef_name(
|
||||
|
||||
def resolve_nested_typeref_name(
|
||||
index: int, typeref: dnfile.mdtable.TypeRefRow, pe: dnfile.dnPE
|
||||
) -> Tuple[str, Tuple[str, ...]]:
|
||||
) -> tuple[str, tuple[str, ...]]:
|
||||
"""Resolves all nested TypeRef class names. Returns the namespace as a str and the nested TypeRef name as a tuple"""
|
||||
# If the ResolutionScope decodes to a typeRef type then it is nested
|
||||
if isinstance(typeref.ResolutionScope.table, dnfile.mdtable.TypeRef):
|
||||
@@ -398,7 +398,7 @@ def resolve_nested_typeref_name(
|
||||
return str(typeref.TypeNamespace), (str(typeref.TypeName),)
|
||||
|
||||
|
||||
def get_dotnet_nested_class_table_index(pe: dnfile.dnPE) -> Dict[int, int]:
|
||||
def get_dotnet_nested_class_table_index(pe: dnfile.dnPE) -> dict[int, int]:
|
||||
"""Build index for EnclosingClass based off the NestedClass row index in the nestedclass table"""
|
||||
nested_class_table = {}
|
||||
|
||||
@@ -442,7 +442,7 @@ def is_dotnet_mixed_mode(pe: dnfile.dnPE) -> bool:
|
||||
return not bool(pe.net.Flags.CLR_ILONLY)
|
||||
|
||||
|
||||
def iter_dotnet_table(pe: dnfile.dnPE, table_index: int) -> Iterator[Tuple[int, dnfile.base.MDTableRow]]:
|
||||
def iter_dotnet_table(pe: dnfile.dnPE, table_index: int) -> Iterator[tuple[int, dnfile.base.MDTableRow]]:
|
||||
assert pe.net is not None
|
||||
assert pe.net.mdtables is not None
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Tuple, Union, Iterator, Optional
|
||||
from typing import TYPE_CHECKING, Union, Iterator, Optional
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from capa.features.extractors.dnfile.extractor import DnFileFeatureExtractorCache
|
||||
@@ -61,7 +61,7 @@ def get_callee(
|
||||
return callee
|
||||
|
||||
|
||||
def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""parse instruction API features"""
|
||||
if ih.inner.opcode not in (
|
||||
OpCodes.Call,
|
||||
@@ -83,7 +83,7 @@ def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterato
|
||||
yield API(name), ih.address
|
||||
|
||||
|
||||
def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""parse instruction property features"""
|
||||
name: Optional[str] = None
|
||||
access: Optional[str] = None
|
||||
@@ -118,7 +118,7 @@ def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> It
|
||||
|
||||
def extract_insn_namespace_class_features(
|
||||
fh: FunctionHandle, bh, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Union[Namespace, Class], Address]]:
|
||||
) -> Iterator[tuple[Union[Namespace, Class], Address]]:
|
||||
"""parse instruction namespace and class features"""
|
||||
type_: Optional[Union[DnType, DnUnmanagedMethod]] = None
|
||||
|
||||
@@ -173,13 +173,13 @@ def extract_insn_namespace_class_features(
|
||||
yield Namespace(type_.namespace), ih.address
|
||||
|
||||
|
||||
def extract_insn_number_features(fh, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_number_features(fh, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""parse instruction number features"""
|
||||
if ih.inner.is_ldc():
|
||||
yield Number(ih.inner.get_ldc()), ih.address
|
||||
|
||||
|
||||
def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""parse instruction string features"""
|
||||
if not ih.inner.is_ldstr():
|
||||
return
|
||||
@@ -197,7 +197,7 @@ def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iter
|
||||
|
||||
def extract_unmanaged_call_characteristic_features(
|
||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Characteristic, Address]]:
|
||||
) -> Iterator[tuple[Characteristic, Address]]:
|
||||
if ih.inner.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp):
|
||||
return
|
||||
|
||||
@@ -209,7 +209,7 @@ def extract_unmanaged_call_characteristic_features(
|
||||
yield Characteristic("unmanaged call"), ih.address
|
||||
|
||||
|
||||
def extract_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract instruction features"""
|
||||
for inst_handler in INSTRUCTION_HANDLERS:
|
||||
for feature, addr in inst_handler(fh, bbh, ih):
|
||||
|
||||
@@ -6,17 +6,17 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
from typing import Tuple, Optional
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class DnType:
|
||||
def __init__(
|
||||
self, token: int, class_: Tuple[str, ...], namespace: str = "", member: str = "", access: Optional[str] = None
|
||||
self, token: int, class_: tuple[str, ...], namespace: str = "", member: str = "", access: Optional[str] = None
|
||||
):
|
||||
self.token: int = token
|
||||
self.access: Optional[str] = access
|
||||
self.namespace: str = namespace
|
||||
self.class_: Tuple[str, ...] = class_
|
||||
self.class_: tuple[str, ...] = class_
|
||||
|
||||
if member == ".ctor":
|
||||
member = "ctor"
|
||||
@@ -44,7 +44,7 @@ class DnType:
|
||||
return str(self)
|
||||
|
||||
@staticmethod
|
||||
def format_name(class_: Tuple[str, ...], namespace: str = "", member: str = ""):
|
||||
def format_name(class_: tuple[str, ...], namespace: str = "", member: str = ""):
|
||||
if len(class_) > 1:
|
||||
class_str = "/".join(class_) # Concat items in tuple, separated by a "/"
|
||||
else:
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
from pathlib import Path
|
||||
|
||||
import dnfile
|
||||
@@ -48,12 +48,12 @@ from capa.features.extractors.dnfile.helpers import (
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_file_format(**kwargs) -> Iterator[Tuple[Format, Address]]:
|
||||
def extract_file_format(**kwargs) -> Iterator[tuple[Format, Address]]:
|
||||
yield Format(FORMAT_DOTNET), NO_ADDRESS
|
||||
yield Format(FORMAT_PE), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Import, Address]]:
|
||||
def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Import, Address]]:
|
||||
for method in get_dotnet_managed_imports(pe):
|
||||
# like System.IO.File::OpenRead
|
||||
yield Import(str(method)), DNTokenAddress(method.token)
|
||||
@@ -64,12 +64,12 @@ def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Impor
|
||||
yield Import(name), DNTokenAddress(imp.token)
|
||||
|
||||
|
||||
def extract_file_function_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[FunctionName, Address]]:
|
||||
def extract_file_function_names(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[FunctionName, Address]]:
|
||||
for method in get_dotnet_managed_methods(pe):
|
||||
yield FunctionName(str(method)), DNTokenAddress(method.token)
|
||||
|
||||
|
||||
def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Namespace, Address]]:
|
||||
def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Namespace, Address]]:
|
||||
"""emit namespace features from TypeRef and TypeDef tables"""
|
||||
|
||||
# namespaces may be referenced multiple times, so we need to filter
|
||||
@@ -93,7 +93,7 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple
|
||||
yield Namespace(namespace), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Class, Address]]:
|
||||
def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Class, Address]]:
|
||||
"""emit class features from TypeRef and TypeDef tables"""
|
||||
nested_class_table = get_dotnet_nested_class_table_index(pe)
|
||||
|
||||
@@ -116,11 +116,11 @@ def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Cla
|
||||
yield Class(DnType.format_name(typerefname, namespace=typerefnamespace)), DNTokenAddress(token)
|
||||
|
||||
|
||||
def extract_file_os(**kwargs) -> Iterator[Tuple[OS, Address]]:
|
||||
def extract_file_os(**kwargs) -> Iterator[tuple[OS, Address]]:
|
||||
yield OS(OS_ANY), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Arch, Address]]:
|
||||
def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Arch, Address]]:
|
||||
# to distinguish in more detail, see https://stackoverflow.com/a/23614024/10548020
|
||||
# .NET 4.5 added option: any CPU, 32-bit preferred
|
||||
assert pe.net is not None
|
||||
@@ -134,18 +134,18 @@ def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Arch, Address
|
||||
yield Arch(ARCH_ANY), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_strings(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[String, Address]]:
|
||||
def extract_file_strings(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[String, Address]]:
|
||||
yield from capa.features.extractors.common.extract_file_strings(pe.__data__)
|
||||
|
||||
|
||||
def extract_file_mixed_mode_characteristic_features(
|
||||
pe: dnfile.dnPE, **kwargs
|
||||
) -> Iterator[Tuple[Characteristic, Address]]:
|
||||
) -> Iterator[tuple[Characteristic, Address]]:
|
||||
if is_dotnet_mixed_mode(pe):
|
||||
yield Characteristic("mixed mode"), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_features(pe: dnfile.dnPE) -> Iterator[tuple[Feature, Address]]:
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, addr in file_handler(pe=pe): # type: ignore
|
||||
yield feature, addr
|
||||
@@ -162,7 +162,7 @@ FILE_HANDLERS = (
|
||||
)
|
||||
|
||||
|
||||
def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_global_features(pe: dnfile.dnPE) -> Iterator[tuple[Feature, Address]]:
|
||||
for handler in GLOBAL_HANDLERS:
|
||||
for feature, va in handler(pe=pe): # type: ignore
|
||||
yield feature, va
|
||||
@@ -204,7 +204,7 @@ class DotnetFileFeatureExtractor(StaticFeatureExtractor):
|
||||
def is_mixed_mode(self) -> bool:
|
||||
return is_dotnet_mixed_mode(self.pe)
|
||||
|
||||
def get_runtime_version(self) -> Tuple[int, int]:
|
||||
def get_runtime_version(self) -> tuple[int, int]:
|
||||
assert self.pe.net is not None
|
||||
assert self.pe.net.struct is not None
|
||||
assert self.pe.net.struct.MajorRuntimeVersion is not None
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import capa.features.extractors.helpers
|
||||
from capa.features.insn import API, Number
|
||||
@@ -19,7 +19,7 @@ from capa.features.extractors.drakvuf.models import Call
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
This method extracts the given call's features (such as API name and arguments),
|
||||
and returns them as API, Number, and String features.
|
||||
@@ -49,7 +49,7 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -
|
||||
yield API(name), ch.address
|
||||
|
||||
|
||||
def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
for handler in CALL_HANDLERS:
|
||||
for feature, addr in handler(ph, th, ch):
|
||||
yield feature, addr
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import Dict, List, Tuple, Union, Iterator
|
||||
from typing import Union, Iterator
|
||||
|
||||
import capa.features.extractors.drakvuf.call
|
||||
import capa.features.extractors.drakvuf.file
|
||||
@@ -39,7 +39,7 @@ class DrakvufExtractor(DynamicFeatureExtractor):
|
||||
self.report: DrakvufReport = report
|
||||
|
||||
# sort the api calls to prevent going through the entire list each time
|
||||
self.sorted_calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]] = index_calls(report)
|
||||
self.sorted_calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]] = index_calls(report)
|
||||
|
||||
# pre-compute these because we'll yield them at *every* scope.
|
||||
self.global_features = list(capa.features.extractors.drakvuf.global_.extract_features(self.report))
|
||||
@@ -48,16 +48,16 @@ class DrakvufExtractor(DynamicFeatureExtractor):
|
||||
# DRAKVUF currently does not yield information about the PE's address
|
||||
return NO_ADDRESS
|
||||
|
||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from self.global_features
|
||||
|
||||
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.drakvuf.file.extract_features(self.report)
|
||||
|
||||
def get_processes(self) -> Iterator[ProcessHandle]:
|
||||
yield from capa.features.extractors.drakvuf.file.get_processes(self.sorted_calls)
|
||||
|
||||
def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.drakvuf.process.extract_features(ph)
|
||||
|
||||
def get_process_name(self, ph: ProcessHandle) -> str:
|
||||
@@ -66,7 +66,7 @@ class DrakvufExtractor(DynamicFeatureExtractor):
|
||||
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
||||
yield from capa.features.extractors.drakvuf.process.get_threads(self.sorted_calls, ph)
|
||||
|
||||
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
if False:
|
||||
# force this routine to be a generator,
|
||||
# but we don't actually have any elements to generate.
|
||||
@@ -87,10 +87,10 @@ class DrakvufExtractor(DynamicFeatureExtractor):
|
||||
|
||||
def extract_call_features(
|
||||
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.drakvuf.call.extract_features(ph, th, ch)
|
||||
|
||||
@classmethod
|
||||
def from_report(cls, report: Iterator[Dict]) -> "DrakvufExtractor":
|
||||
def from_report(cls, report: Iterator[dict]) -> "DrakvufExtractor":
|
||||
dr = DrakvufReport.from_raw_report(report)
|
||||
return DrakvufExtractor(report=dr)
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import Dict, List, Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
from capa.features.file import Import
|
||||
from capa.features.common import Feature
|
||||
@@ -19,7 +19,7 @@ from capa.features.extractors.drakvuf.models import Call, DrakvufReport
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_processes(calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]]) -> Iterator[ProcessHandle]:
|
||||
def get_processes(calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]]) -> Iterator[ProcessHandle]:
|
||||
"""
|
||||
Get all the created processes for a sample.
|
||||
"""
|
||||
@@ -28,7 +28,7 @@ def get_processes(calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]])
|
||||
yield ProcessHandle(proc_addr, inner={"process_name": sample_call.process_name})
|
||||
|
||||
|
||||
def extract_import_names(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_import_names(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
Extract imported function names.
|
||||
"""
|
||||
@@ -43,7 +43,7 @@ def extract_import_names(report: DrakvufReport) -> Iterator[Tuple[Feature, Addre
|
||||
yield Import(name), AbsoluteVirtualAddress(function_address)
|
||||
|
||||
|
||||
def extract_features(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
|
||||
for handler in FILE_HANDLERS:
|
||||
for feature, addr in handler(report):
|
||||
yield feature, addr
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
from capa.features.common import OS, FORMAT_PE, ARCH_AMD64, OS_WINDOWS, Arch, Format, Feature
|
||||
from capa.features.address import NO_ADDRESS, Address
|
||||
@@ -16,22 +16,22 @@ from capa.features.extractors.drakvuf.models import DrakvufReport
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_format(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_format(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
|
||||
# DRAKVUF sandbox currently supports only Windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html
|
||||
yield Format(FORMAT_PE), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_os(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_os(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
|
||||
# DRAKVUF sandbox currently supports only PE files: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html
|
||||
yield OS(OS_WINDOWS), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_arch(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_arch(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
|
||||
# DRAKVUF sandbox currently supports only x64 Windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html
|
||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_features(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
|
||||
for global_handler in GLOBAL_HANDLER:
|
||||
for feature, addr in global_handler(report):
|
||||
yield feature, addr
|
||||
|
||||
@@ -7,16 +7,15 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import itertools
|
||||
from typing import Dict, List
|
||||
|
||||
from capa.features.address import ThreadAddress, ProcessAddress
|
||||
from capa.features.extractors.drakvuf.models import Call, DrakvufReport
|
||||
|
||||
|
||||
def index_calls(report: DrakvufReport) -> Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]]:
|
||||
def index_calls(report: DrakvufReport) -> dict[ProcessAddress, dict[ThreadAddress, list[Call]]]:
|
||||
# this method organizes calls into processes and threads, and then sorts them based on
|
||||
# timestamp so that we can address individual calls per index (CallAddress requires call index)
|
||||
result: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]] = {}
|
||||
result: dict[ProcessAddress, dict[ThreadAddress, list[Call]]] = {}
|
||||
for call in itertools.chain(report.syscalls, report.apicalls):
|
||||
if call.pid == 0:
|
||||
# DRAKVUF captures api/native calls from all processes running on the system.
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
from typing import Any, Dict, List, Iterator
|
||||
from typing import Any, Iterator
|
||||
|
||||
from pydantic import Field, BaseModel, ConfigDict, model_validator
|
||||
|
||||
@@ -47,7 +47,7 @@ class LoadedDLL(ConciseModel):
|
||||
plugin_name: str = Field(alias="Plugin")
|
||||
event: str = Field(alias="Event")
|
||||
name: str = Field(alias="DllName")
|
||||
imports: Dict[str, int] = Field(alias="Rva")
|
||||
imports: dict[str, int] = Field(alias="Rva")
|
||||
|
||||
|
||||
class Call(ConciseModel):
|
||||
@@ -58,18 +58,18 @@ class Call(ConciseModel):
|
||||
pid: int = Field(alias="PID")
|
||||
tid: int = Field(alias="TID")
|
||||
name: str = Field(alias="Method")
|
||||
arguments: Dict[str, str]
|
||||
arguments: dict[str, str]
|
||||
|
||||
|
||||
class WinApiCall(Call):
|
||||
# This class models Windows API calls captured by DRAKVUF (DLLs, etc.).
|
||||
arguments: Dict[str, str] = Field(alias="Arguments")
|
||||
arguments: dict[str, str] = Field(alias="Arguments")
|
||||
event: str = Field(alias="Event")
|
||||
return_value: str = Field(alias="ReturnValue")
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def build_arguments(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
||||
def build_arguments(cls, values: dict[str, Any]) -> dict[str, Any]:
|
||||
args = values["Arguments"]
|
||||
values["Arguments"] = dict(arg.split("=", 1) for arg in args)
|
||||
return values
|
||||
@@ -100,7 +100,7 @@ class SystemCall(Call):
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
||||
def build_extra(cls, values: dict[str, Any]) -> dict[str, Any]:
|
||||
# DRAKVUF stores argument names and values as entries in the syscall's entry.
|
||||
# This model validator collects those arguments into a list in the model.
|
||||
values["arguments"] = {
|
||||
@@ -110,13 +110,13 @@ class SystemCall(Call):
|
||||
|
||||
|
||||
class DrakvufReport(ConciseModel):
|
||||
syscalls: List[SystemCall] = []
|
||||
apicalls: List[WinApiCall] = []
|
||||
discovered_dlls: List[DiscoveredDLL] = []
|
||||
loaded_dlls: List[LoadedDLL] = []
|
||||
syscalls: list[SystemCall] = []
|
||||
apicalls: list[WinApiCall] = []
|
||||
discovered_dlls: list[DiscoveredDLL] = []
|
||||
loaded_dlls: list[LoadedDLL] = []
|
||||
|
||||
@classmethod
|
||||
def from_raw_report(cls, entries: Iterator[Dict]) -> "DrakvufReport":
|
||||
def from_raw_report(cls, entries: Iterator[dict]) -> "DrakvufReport":
|
||||
report = cls()
|
||||
|
||||
for entry in entries:
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import Dict, List, Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
from capa.features.common import String, Feature
|
||||
from capa.features.address import Address, ThreadAddress, ProcessAddress
|
||||
@@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_threads(
|
||||
calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]], ph: ProcessHandle
|
||||
calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]], ph: ProcessHandle
|
||||
) -> Iterator[ThreadHandle]:
|
||||
"""
|
||||
Get the threads associated with a given process.
|
||||
@@ -27,11 +27,11 @@ def get_threads(
|
||||
yield ThreadHandle(address=thread_addr, inner={})
|
||||
|
||||
|
||||
def extract_process_name(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_process_name(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
yield String(ph.inner["process_name"]), ph.address
|
||||
|
||||
|
||||
def extract_features(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
for handler in PROCESS_HANDLERS:
|
||||
for feature, addr in handler(ph):
|
||||
yield feature, addr
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import Dict, List, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
from capa.features.address import ThreadAddress, ProcessAddress, DynamicCallAddress
|
||||
from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle
|
||||
@@ -17,7 +17,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_calls(
|
||||
sorted_calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]], ph: ProcessHandle, th: ThreadHandle
|
||||
sorted_calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]], ph: ProcessHandle, th: ThreadHandle
|
||||
) -> Iterator[CallHandle]:
|
||||
for i, call in enumerate(sorted_calls[ph.address][th.address]):
|
||||
call_addr = DynamicCallAddress(thread=th.address, id=i)
|
||||
|
||||
@@ -10,7 +10,7 @@ import logging
|
||||
import itertools
|
||||
import collections
|
||||
from enum import Enum
|
||||
from typing import TYPE_CHECKING, Set, Dict, List, Tuple, BinaryIO, Iterator, Optional
|
||||
from typing import TYPE_CHECKING, BinaryIO, Iterator, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -394,7 +394,7 @@ class ELF:
|
||||
return read_cstr(phdr.buf, 0)
|
||||
|
||||
@property
|
||||
def versions_needed(self) -> Dict[str, Set[str]]:
|
||||
def versions_needed(self) -> dict[str, set[str]]:
|
||||
# symbol version requirements are stored in the .gnu.version_r section,
|
||||
# which has type SHT_GNU_verneed (0x6ffffffe).
|
||||
#
|
||||
@@ -452,7 +452,7 @@ class ELF:
|
||||
return {}
|
||||
|
||||
@property
|
||||
def dynamic_entries(self) -> Iterator[Tuple[int, int]]:
|
||||
def dynamic_entries(self) -> Iterator[tuple[int, int]]:
|
||||
"""
|
||||
read the entries from the dynamic section,
|
||||
yielding the tag and value for each entry.
|
||||
@@ -547,7 +547,7 @@ class ELF:
|
||||
logger.warning("failed to read DT_NEEDED entry: %s", str(e))
|
||||
|
||||
@property
|
||||
def symtab(self) -> Optional[Tuple[Shdr, Shdr]]:
|
||||
def symtab(self) -> Optional[tuple[Shdr, Shdr]]:
|
||||
"""
|
||||
fetch the Shdr for the symtab and the associated strtab.
|
||||
"""
|
||||
@@ -682,7 +682,7 @@ class SymTab:
|
||||
symtab: Shdr,
|
||||
strtab: Shdr,
|
||||
) -> None:
|
||||
self.symbols: List[Symbol] = []
|
||||
self.symbols: list[Symbol] = []
|
||||
|
||||
self.symtab = symtab
|
||||
self.strtab = strtab
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import io
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
from pathlib import Path
|
||||
|
||||
from elftools.elf.elffile import ELFFile, DynamicSegment, SymbolTableSection
|
||||
@@ -166,7 +166,7 @@ def extract_file_arch(elf: ELFFile, **kwargs):
|
||||
logger.warning("unsupported architecture: %s", arch)
|
||||
|
||||
|
||||
def extract_file_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature, int]]:
|
||||
def extract_file_features(elf: ELFFile, buf: bytes) -> Iterator[tuple[Feature, int]]:
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, addr in file_handler(elf=elf, buf=buf): # type: ignore
|
||||
yield feature, addr
|
||||
@@ -182,7 +182,7 @@ FILE_HANDLERS = (
|
||||
)
|
||||
|
||||
|
||||
def extract_global_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature, int]]:
|
||||
def extract_global_features(elf: ELFFile, buf: bytes) -> Iterator[tuple[Feature, int]]:
|
||||
for global_handler in GLOBAL_HANDLERS:
|
||||
for feature, addr in global_handler(elf=elf, buf=buf): # type: ignore
|
||||
yield feature, addr
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
import string
|
||||
import struct
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import ghidra
|
||||
from ghidra.program.model.lang import OperandType
|
||||
@@ -97,7 +97,7 @@ def _bb_has_tight_loop(bb: ghidra.program.model.block.CodeBlock):
|
||||
return False
|
||||
|
||||
|
||||
def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract stackstring indicators from basic block"""
|
||||
bb: ghidra.program.model.block.CodeBlock = bbh.inner
|
||||
|
||||
@@ -105,7 +105,7 @@ def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[
|
||||
yield Characteristic("stack string"), bbh.address
|
||||
|
||||
|
||||
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""check basic block for tight loop indicators"""
|
||||
bb: ghidra.program.model.block.CodeBlock = bbh.inner
|
||||
|
||||
@@ -119,7 +119,7 @@ BASIC_BLOCK_HANDLERS = (
|
||||
)
|
||||
|
||||
|
||||
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract features from the given basic block.
|
||||
|
||||
@@ -127,7 +127,7 @@ def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Featur
|
||||
bb: the basic block to process.
|
||||
|
||||
yields:
|
||||
Tuple[Feature, int]: the features and their location found in this basic block.
|
||||
tuple[Feature, int]: the features and their location found in this basic block.
|
||||
"""
|
||||
yield BasicBlock(), bbh.address
|
||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import List, Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import capa.features.extractors.ghidra.file
|
||||
import capa.features.extractors.ghidra.insn
|
||||
@@ -40,7 +40,7 @@ class GhidraFeatureExtractor(StaticFeatureExtractor):
|
||||
)
|
||||
)
|
||||
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
self.global_features: list[tuple[Feature, Address]] = []
|
||||
self.global_features.extend(capa.features.extractors.ghidra.file.extract_file_format())
|
||||
self.global_features.extend(capa.features.extractors.ghidra.global_.extract_os())
|
||||
self.global_features.extend(capa.features.extractors.ghidra.global_.extract_arch())
|
||||
@@ -73,7 +73,7 @@ class GhidraFeatureExtractor(StaticFeatureExtractor):
|
||||
func = getFunctionContaining(toAddr(addr)) # type: ignore [name-defined] # noqa: F821
|
||||
return FunctionHandle(address=AbsoluteVirtualAddress(func.getEntryPoint().getOffset()), inner=func)
|
||||
|
||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.ghidra.function.extract_features(fh)
|
||||
|
||||
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
||||
@@ -81,7 +81,7 @@ class GhidraFeatureExtractor(StaticFeatureExtractor):
|
||||
|
||||
yield from ghidra_helpers.get_function_blocks(fh)
|
||||
|
||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.ghidra.basicblock.extract_features(fh, bbh)
|
||||
|
||||
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import re
|
||||
import struct
|
||||
from typing import List, Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
from ghidra.program.model.symbol import SourceType, SymbolType
|
||||
|
||||
@@ -22,7 +22,7 @@ from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, Absolu
|
||||
MAX_OFFSET_PE_AFTER_MZ = 0x200
|
||||
|
||||
|
||||
def find_embedded_pe(block_bytez: bytes, mz_xor: List[Tuple[bytes, bytes, int]]) -> Iterator[Tuple[int, int]]:
|
||||
def find_embedded_pe(block_bytez: bytes, mz_xor: list[tuple[bytes, bytes, int]]) -> Iterator[tuple[int, int]]:
|
||||
"""check segment for embedded PE
|
||||
|
||||
adapted for Ghidra from:
|
||||
@@ -60,11 +60,11 @@ def find_embedded_pe(block_bytez: bytes, mz_xor: List[Tuple[bytes, bytes, int]])
|
||||
yield off, i
|
||||
|
||||
|
||||
def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_embedded_pe() -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract embedded PE features"""
|
||||
|
||||
# pre-compute XOR pairs
|
||||
mz_xor: List[Tuple[bytes, bytes, int]] = [
|
||||
mz_xor: list[tuple[bytes, bytes, int]] = [
|
||||
(
|
||||
capa.features.extractors.helpers.xor_static(b"MZ", i),
|
||||
capa.features.extractors.helpers.xor_static(b"PE", i),
|
||||
@@ -84,14 +84,14 @@ def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]:
|
||||
yield Characteristic("embedded pe"), FileOffsetAddress(ea)
|
||||
|
||||
|
||||
def extract_file_export_names() -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_export_names() -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract function exports"""
|
||||
st = currentProgram().getSymbolTable() # type: ignore [name-defined] # noqa: F821
|
||||
for addr in st.getExternalEntryPointIterator():
|
||||
yield Export(st.getPrimarySymbol(addr).getName()), AbsoluteVirtualAddress(addr.getOffset())
|
||||
|
||||
|
||||
def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_import_names() -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract function imports
|
||||
|
||||
1. imports by ordinal:
|
||||
@@ -116,14 +116,14 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
|
||||
yield Import(name), AbsoluteVirtualAddress(addr)
|
||||
|
||||
|
||||
def extract_file_section_names() -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_section_names() -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract section names"""
|
||||
|
||||
for block in currentProgram().getMemory().getBlocks(): # type: ignore [name-defined] # noqa: F821
|
||||
yield Section(block.getName()), AbsoluteVirtualAddress(block.getStart().getOffset())
|
||||
|
||||
|
||||
def extract_file_strings() -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_strings() -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract ASCII and UTF-16 LE strings"""
|
||||
|
||||
for block in currentProgram().getMemory().getBlocks(): # type: ignore [name-defined] # noqa: F821
|
||||
@@ -141,7 +141,7 @@ def extract_file_strings() -> Iterator[Tuple[Feature, Address]]:
|
||||
yield String(s.s), FileOffsetAddress(offset)
|
||||
|
||||
|
||||
def extract_file_function_names() -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_function_names() -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract the names of statically-linked library functions.
|
||||
"""
|
||||
@@ -162,7 +162,7 @@ def extract_file_function_names() -> Iterator[Tuple[Feature, Address]]:
|
||||
yield FunctionName(name[1:]), addr
|
||||
|
||||
|
||||
def extract_file_format() -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_format() -> Iterator[tuple[Feature, Address]]:
|
||||
ef = currentProgram().getExecutableFormat() # type: ignore [name-defined] # noqa: F821
|
||||
if "PE" in ef:
|
||||
yield Format(FORMAT_PE), NO_ADDRESS
|
||||
@@ -175,7 +175,7 @@ def extract_file_format() -> Iterator[Tuple[Feature, Address]]:
|
||||
raise NotImplementedError(f"unexpected file format: {ef}")
|
||||
|
||||
|
||||
def extract_features() -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features() -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract file features"""
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, addr in file_handler():
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import ghidra
|
||||
from ghidra.program.model.block import BasicBlockModel, SimpleBlockIterator
|
||||
@@ -49,7 +49,7 @@ def extract_recursive_call(fh: FunctionHandle):
|
||||
yield Characteristic("recursive call"), AbsoluteVirtualAddress(f.getEntryPoint().getOffset())
|
||||
|
||||
|
||||
def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
for func_handler in FUNCTION_HANDLERS:
|
||||
for feature, addr in func_handler(fh):
|
||||
yield feature, addr
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
import contextlib
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import capa.ghidra.helpers
|
||||
import capa.features.extractors.elf
|
||||
@@ -18,7 +18,7 @@ from capa.features.address import NO_ADDRESS, Address
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_os() -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_os() -> Iterator[tuple[Feature, Address]]:
|
||||
format_name: str = currentProgram().getExecutableFormat() # type: ignore [name-defined] # noqa: F821
|
||||
|
||||
if "PE" in format_name:
|
||||
@@ -45,7 +45,7 @@ def extract_os() -> Iterator[Tuple[Feature, Address]]:
|
||||
return
|
||||
|
||||
|
||||
def extract_arch() -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_arch() -> Iterator[tuple[Feature, Address]]:
|
||||
lang_id = currentProgram().getMetadata().get("Language ID") # type: ignore [name-defined] # noqa: F821
|
||||
|
||||
if "x86" in lang_id and "64" in lang_id:
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import Dict, List, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import ghidra
|
||||
import java.lang
|
||||
@@ -20,7 +20,7 @@ from capa.features.address import AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
||||
|
||||
|
||||
def ints_to_bytes(bytez: List[int]) -> bytes:
|
||||
def ints_to_bytes(bytez: list[int]) -> bytes:
|
||||
"""convert Java signed ints to Python bytes
|
||||
|
||||
args:
|
||||
@@ -83,10 +83,10 @@ def get_insn_in_range(bbh: BBHandle) -> Iterator[InsnHandle]:
|
||||
yield InsnHandle(address=AbsoluteVirtualAddress(insn.getAddress().getOffset()), inner=insn)
|
||||
|
||||
|
||||
def get_file_imports() -> Dict[int, List[str]]:
|
||||
def get_file_imports() -> dict[int, list[str]]:
|
||||
"""get all import names & addrs"""
|
||||
|
||||
import_dict: Dict[int, List[str]] = {}
|
||||
import_dict: dict[int, list[str]] = {}
|
||||
|
||||
for f in currentProgram().getFunctionManager().getExternalFunctions(): # type: ignore [name-defined] # noqa: F821
|
||||
for r in f.getSymbol().getReferences():
|
||||
@@ -110,7 +110,7 @@ def get_file_imports() -> Dict[int, List[str]]:
|
||||
return import_dict
|
||||
|
||||
|
||||
def get_file_externs() -> Dict[int, List[str]]:
|
||||
def get_file_externs() -> dict[int, list[str]]:
|
||||
"""
|
||||
Gets function names & addresses of statically-linked library functions
|
||||
|
||||
@@ -124,7 +124,7 @@ def get_file_externs() -> Dict[int, List[str]]:
|
||||
- Note: See Symbol Table labels
|
||||
"""
|
||||
|
||||
extern_dict: Dict[int, List[str]] = {}
|
||||
extern_dict: dict[int, list[str]] = {}
|
||||
|
||||
for sym in currentProgram().getSymbolTable().getAllSymbols(True): # type: ignore [name-defined] # noqa: F821
|
||||
# .isExternal() misses more than this config for the function symbols
|
||||
@@ -143,7 +143,7 @@ def get_file_externs() -> Dict[int, List[str]]:
|
||||
return extern_dict
|
||||
|
||||
|
||||
def map_fake_import_addrs() -> Dict[int, List[int]]:
|
||||
def map_fake_import_addrs() -> dict[int, list[int]]:
|
||||
"""
|
||||
Map ghidra's fake import entrypoints to their
|
||||
real addresses
|
||||
@@ -162,7 +162,7 @@ def map_fake_import_addrs() -> Dict[int, List[int]]:
|
||||
- 0x473090 -> PTR_CreateServiceW_00473090
|
||||
- 'EXTERNAL:00000025' -> External Address (ghidra.program.model.address.SpecialAddress)
|
||||
"""
|
||||
fake_dict: Dict[int, List[int]] = {}
|
||||
fake_dict: dict[int, list[int]] = {}
|
||||
|
||||
for f in currentProgram().getFunctionManager().getExternalFunctions(): # type: ignore [name-defined] # noqa: F821
|
||||
for r in f.getSymbol().getReferences():
|
||||
@@ -174,9 +174,9 @@ def map_fake_import_addrs() -> Dict[int, List[int]]:
|
||||
|
||||
def check_addr_for_api(
|
||||
addr: ghidra.program.model.address.Address,
|
||||
fakes: Dict[int, List[int]],
|
||||
imports: Dict[int, List[str]],
|
||||
externs: Dict[int, List[str]],
|
||||
fakes: dict[int, list[int]],
|
||||
imports: dict[int, list[str]],
|
||||
externs: dict[int, list[str]],
|
||||
) -> bool:
|
||||
offset = addr.getOffset()
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import Any, Dict, Tuple, Iterator
|
||||
from typing import Any, Iterator
|
||||
|
||||
import ghidra
|
||||
from ghidra.program.model.lang import OperandType
|
||||
@@ -26,21 +26,21 @@ SECURITY_COOKIE_BYTES_DELTA = 0x40
|
||||
OPERAND_TYPE_DYNAMIC_ADDRESS = OperandType.DYNAMIC | OperandType.ADDRESS
|
||||
|
||||
|
||||
def get_imports(ctx: Dict[str, Any]) -> Dict[int, Any]:
|
||||
def get_imports(ctx: dict[str, Any]) -> dict[int, Any]:
|
||||
"""Populate the import cache for this context"""
|
||||
if "imports_cache" not in ctx:
|
||||
ctx["imports_cache"] = capa.features.extractors.ghidra.helpers.get_file_imports()
|
||||
return ctx["imports_cache"]
|
||||
|
||||
|
||||
def get_externs(ctx: Dict[str, Any]) -> Dict[int, Any]:
|
||||
def get_externs(ctx: dict[str, Any]) -> dict[int, Any]:
|
||||
"""Populate the externs cache for this context"""
|
||||
if "externs_cache" not in ctx:
|
||||
ctx["externs_cache"] = capa.features.extractors.ghidra.helpers.get_file_externs()
|
||||
return ctx["externs_cache"]
|
||||
|
||||
|
||||
def get_fakes(ctx: Dict[str, Any]) -> Dict[int, Any]:
|
||||
def get_fakes(ctx: dict[str, Any]) -> dict[int, Any]:
|
||||
"""Populate the fake import addrs cache for this context"""
|
||||
if "fakes_cache" not in ctx:
|
||||
ctx["fakes_cache"] = capa.features.extractors.ghidra.helpers.map_fake_import_addrs()
|
||||
@@ -48,7 +48,7 @@ def get_fakes(ctx: Dict[str, Any]) -> Dict[int, Any]:
|
||||
|
||||
|
||||
def check_for_api_call(
|
||||
insn, externs: Dict[int, Any], fakes: Dict[int, Any], imports: Dict[int, Any], imp_or_ex: bool
|
||||
insn, externs: dict[int, Any], fakes: dict[int, Any], imports: dict[int, Any], imp_or_ex: bool
|
||||
) -> Iterator[Any]:
|
||||
"""check instruction for API call
|
||||
|
||||
@@ -110,7 +110,7 @@ def check_for_api_call(
|
||||
yield info
|
||||
|
||||
|
||||
def extract_insn_api_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_api_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
||||
|
||||
if not capa.features.extractors.ghidra.helpers.is_call_or_jmp(insn):
|
||||
@@ -131,7 +131,7 @@ def extract_insn_api_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle)
|
||||
yield API(ext), ih.address
|
||||
|
||||
|
||||
def extract_insn_number_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_number_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction number features
|
||||
example:
|
||||
@@ -186,7 +186,7 @@ def extract_insn_number_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl
|
||||
yield OperandOffset(i, const), addr
|
||||
|
||||
|
||||
def extract_insn_offset_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_offset_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction structure offset features
|
||||
|
||||
@@ -219,7 +219,7 @@ def extract_insn_offset_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl
|
||||
yield OperandOffset(i, op_off), ih.address
|
||||
|
||||
|
||||
def extract_insn_bytes_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_bytes_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse referenced byte sequences
|
||||
|
||||
@@ -234,7 +234,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||
yield Bytes(extracted_bytes), ih.address
|
||||
|
||||
|
||||
def extract_insn_string_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_string_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction string features
|
||||
|
||||
@@ -249,7 +249,7 @@ def extract_insn_string_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl
|
||||
|
||||
def extract_insn_mnemonic_features(
|
||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""parse instruction mnemonic features"""
|
||||
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
||||
|
||||
@@ -258,7 +258,7 @@ def extract_insn_mnemonic_features(
|
||||
|
||||
def extract_insn_obfs_call_plus_5_characteristic_features(
|
||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse call $+5 instruction from the given instruction.
|
||||
"""
|
||||
@@ -279,7 +279,7 @@ def extract_insn_obfs_call_plus_5_characteristic_features(
|
||||
|
||||
def extract_insn_segment_access_features(
|
||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""parse instruction fs or gs access"""
|
||||
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
||||
|
||||
@@ -294,7 +294,7 @@ def extract_insn_segment_access_features(
|
||||
|
||||
def extract_insn_peb_access_characteristic_features(
|
||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""parse instruction peb access
|
||||
|
||||
fs:[0x30] on x86, gs:[0x60] on x64
|
||||
@@ -310,7 +310,7 @@ def extract_insn_peb_access_characteristic_features(
|
||||
|
||||
def extract_insn_cross_section_cflow(
|
||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""inspect the instruction for a CALL or JMP that crosses section boundaries"""
|
||||
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
||||
|
||||
@@ -364,7 +364,7 @@ def extract_function_calls_from(
|
||||
fh: FunctionHandle,
|
||||
bb: BBHandle,
|
||||
ih: InsnHandle,
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract functions calls from features
|
||||
|
||||
most relevant at the function scope, however, its most efficient to extract at the instruction scope
|
||||
@@ -393,7 +393,7 @@ def extract_function_indirect_call_characteristic_features(
|
||||
fh: FunctionHandle,
|
||||
bb: BBHandle,
|
||||
ih: InsnHandle,
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
|
||||
does not include calls like => call ds:dword_ABD4974
|
||||
|
||||
@@ -442,7 +442,7 @@ def extract_insn_nzxor_characteristic_features(
|
||||
fh: FunctionHandle,
|
||||
bb: BBHandle,
|
||||
ih: InsnHandle,
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
f: ghidra.program.database.function.FunctionDB = fh.inner
|
||||
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
||||
|
||||
@@ -461,7 +461,7 @@ def extract_features(
|
||||
fh: FunctionHandle,
|
||||
bb: BBHandle,
|
||||
insn: InsnHandle,
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
for insn_handler in INSTRUCTION_HANDLERS:
|
||||
for feature, addr in insn_handler(fh, bb, insn):
|
||||
yield feature, addr
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
import struct
|
||||
import builtins
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
MIN_STACKSTRING_LEN = 8
|
||||
|
||||
@@ -119,7 +119,7 @@ def twos_complement(val: int, bits: int) -> int:
|
||||
return val
|
||||
|
||||
|
||||
def carve_pe(pbytes: bytes, offset: int = 0) -> Iterator[Tuple[int, int]]:
|
||||
def carve_pe(pbytes: bytes, offset: int = 0) -> Iterator[tuple[int, int]]:
|
||||
"""
|
||||
Generate (offset, key) tuples of embedded PEs
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
import string
|
||||
import struct
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import idaapi
|
||||
|
||||
@@ -80,19 +80,19 @@ def bb_contains_stackstring(f: idaapi.func_t, bb: idaapi.BasicBlock) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract stackstring indicators from basic block"""
|
||||
if bb_contains_stackstring(fh.inner, bbh.inner):
|
||||
yield Characteristic("stack string"), bbh.address
|
||||
|
||||
|
||||
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract tight loop indicators from a basic block"""
|
||||
if capa.features.extractors.ida.helpers.is_basic_block_tight_loop(bbh.inner):
|
||||
yield Characteristic("tight loop"), bbh.address
|
||||
|
||||
|
||||
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract basic block features"""
|
||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||
for feature, addr in bb_handler(fh, bbh):
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import List, Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import idaapi
|
||||
|
||||
@@ -36,7 +36,7 @@ class IdaFeatureExtractor(StaticFeatureExtractor):
|
||||
sha256=capa.ida.helpers.retrieve_input_file_sha256(),
|
||||
)
|
||||
)
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
self.global_features: list[tuple[Feature, Address]] = []
|
||||
self.global_features.extend(capa.features.extractors.ida.file.extract_file_format())
|
||||
self.global_features.extend(capa.features.extractors.ida.global_.extract_os())
|
||||
self.global_features.extend(capa.features.extractors.ida.global_.extract_arch())
|
||||
@@ -61,7 +61,7 @@ class IdaFeatureExtractor(StaticFeatureExtractor):
|
||||
f = idaapi.get_func(ea)
|
||||
return FunctionHandle(address=AbsoluteVirtualAddress(f.start_ea), inner=f)
|
||||
|
||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.ida.function.extract_features(fh)
|
||||
|
||||
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
||||
@@ -70,7 +70,7 @@ class IdaFeatureExtractor(StaticFeatureExtractor):
|
||||
for bb in ida_helpers.get_function_blocks(fh.inner):
|
||||
yield BBHandle(address=AbsoluteVirtualAddress(bb.start_ea), inner=bb)
|
||||
|
||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.ida.basicblock.extract_features(fh, bbh)
|
||||
|
||||
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import struct
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import idc
|
||||
import idaapi
|
||||
@@ -26,7 +26,7 @@ from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, Absolu
|
||||
MAX_OFFSET_PE_AFTER_MZ = 0x200
|
||||
|
||||
|
||||
def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[Tuple[int, int]]:
|
||||
def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[tuple[int, int]]:
|
||||
"""check segment for embedded PE
|
||||
|
||||
adapted for IDA from:
|
||||
@@ -71,7 +71,7 @@ def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[Tuple[int, int]]:
|
||||
yield off, i
|
||||
|
||||
|
||||
def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_embedded_pe() -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract embedded PE features
|
||||
|
||||
IDA must load resource sections for this to be complete
|
||||
@@ -83,7 +83,7 @@ def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]:
|
||||
yield Characteristic("embedded pe"), FileOffsetAddress(ea)
|
||||
|
||||
|
||||
def extract_file_export_names() -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_export_names() -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract function exports"""
|
||||
for _, ordinal, ea, name in idautils.Entries():
|
||||
forwarded_name = ida_entry.get_entry_forwarder(ordinal)
|
||||
@@ -95,7 +95,7 @@ def extract_file_export_names() -> Iterator[Tuple[Feature, Address]]:
|
||||
yield Characteristic("forwarded export"), AbsoluteVirtualAddress(ea)
|
||||
|
||||
|
||||
def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_import_names() -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract function imports
|
||||
|
||||
1. imports by ordinal:
|
||||
@@ -131,7 +131,7 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
|
||||
yield Import(info[1]), AbsoluteVirtualAddress(ea)
|
||||
|
||||
|
||||
def extract_file_section_names() -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_section_names() -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract section names
|
||||
|
||||
IDA must load resource sections for this to be complete
|
||||
@@ -142,7 +142,7 @@ def extract_file_section_names() -> Iterator[Tuple[Feature, Address]]:
|
||||
yield Section(idaapi.get_segm_name(seg)), AbsoluteVirtualAddress(seg.start_ea)
|
||||
|
||||
|
||||
def extract_file_strings() -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_strings() -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract ASCII and UTF-16 LE strings
|
||||
|
||||
IDA must load resource sections for this to be complete
|
||||
@@ -160,7 +160,7 @@ def extract_file_strings() -> Iterator[Tuple[Feature, Address]]:
|
||||
yield String(s.s), FileOffsetAddress(seg.start_ea + s.offset)
|
||||
|
||||
|
||||
def extract_file_function_names() -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_function_names() -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract the names of statically-linked library functions.
|
||||
"""
|
||||
@@ -177,7 +177,7 @@ def extract_file_function_names() -> Iterator[Tuple[Feature, Address]]:
|
||||
yield FunctionName(name[1:]), addr
|
||||
|
||||
|
||||
def extract_file_format() -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_format() -> Iterator[tuple[Feature, Address]]:
|
||||
filetype = capa.ida.helpers.get_filetype()
|
||||
|
||||
if filetype in (idaapi.f_PE, idaapi.f_COFF):
|
||||
@@ -191,7 +191,7 @@ def extract_file_format() -> Iterator[Tuple[Feature, Address]]:
|
||||
raise NotImplementedError(f"unexpected file format: {filetype}")
|
||||
|
||||
|
||||
def extract_features() -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features() -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract file features"""
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, addr in file_handler():
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import idaapi
|
||||
import idautils
|
||||
@@ -43,7 +43,7 @@ def extract_recursive_call(fh: FunctionHandle):
|
||||
yield Characteristic("recursive call"), fh.address
|
||||
|
||||
|
||||
def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
for func_handler in FUNCTION_HANDLERS:
|
||||
for feature, addr in func_handler(fh):
|
||||
yield feature, addr
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
import contextlib
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import ida_loader
|
||||
|
||||
@@ -19,7 +19,7 @@ from capa.features.address import NO_ADDRESS, Address
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_os() -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_os() -> Iterator[tuple[Feature, Address]]:
|
||||
format_name: str = ida_loader.get_file_type_name()
|
||||
|
||||
if "PE" in format_name:
|
||||
@@ -46,7 +46,7 @@ def extract_os() -> Iterator[Tuple[Feature, Address]]:
|
||||
return
|
||||
|
||||
|
||||
def extract_arch() -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_arch() -> Iterator[tuple[Feature, Address]]:
|
||||
procname = capa.ida.helpers.get_processor_name()
|
||||
if procname == "metapc" and capa.ida.helpers.is_64bit():
|
||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import functools
|
||||
from typing import Any, Dict, Tuple, Iterator, Optional
|
||||
from typing import Any, Iterator, Optional
|
||||
|
||||
import idc
|
||||
import idaapi
|
||||
@@ -41,7 +41,15 @@ if hasattr(ida_bytes, "parse_binpat_str"):
|
||||
return
|
||||
|
||||
while True:
|
||||
ea, _ = ida_bytes.bin_search(start, end, patterns, ida_bytes.BIN_SEARCH_FORWARD)
|
||||
ea = ida_bytes.bin_search(start, end, patterns, ida_bytes.BIN_SEARCH_FORWARD)
|
||||
if isinstance(ea, int):
|
||||
# "ea_t" in IDA 8.4, 8.3
|
||||
pass
|
||||
elif isinstance(ea, tuple):
|
||||
# "drc_t" in IDA 9
|
||||
ea = ea[0]
|
||||
else:
|
||||
raise NotImplementedError(f"bin_search returned unhandled type: {type(ea)}")
|
||||
if ea == idaapi.BADADDR:
|
||||
break
|
||||
start = ea + 1
|
||||
@@ -124,9 +132,9 @@ def inspect_import(imports, library, ea, function, ordinal):
|
||||
return True
|
||||
|
||||
|
||||
def get_file_imports() -> Dict[int, Tuple[str, str, int]]:
|
||||
def get_file_imports() -> dict[int, tuple[str, str, int]]:
|
||||
"""get file imports"""
|
||||
imports: Dict[int, Tuple[str, str, int]] = {}
|
||||
imports: dict[int, tuple[str, str, int]] = {}
|
||||
|
||||
for idx in range(idaapi.get_import_module_qty()):
|
||||
library = idaapi.get_import_module_name(idx)
|
||||
@@ -147,7 +155,7 @@ def get_file_imports() -> Dict[int, Tuple[str, str, int]]:
|
||||
return imports
|
||||
|
||||
|
||||
def get_file_externs() -> Dict[int, Tuple[str, str, int]]:
|
||||
def get_file_externs() -> dict[int, tuple[str, str, int]]:
|
||||
externs = {}
|
||||
|
||||
for seg in get_segments(skip_header_segments=True):
|
||||
@@ -248,7 +256,7 @@ def find_string_at(ea: int, min_: int = 4) -> str:
|
||||
return ""
|
||||
|
||||
|
||||
def get_op_phrase_info(op: idaapi.op_t) -> Dict:
|
||||
def get_op_phrase_info(op: idaapi.op_t) -> dict:
|
||||
"""parse phrase features from operand
|
||||
|
||||
Pretty much dup of sark's implementation:
|
||||
@@ -323,7 +331,7 @@ def is_frame_register(reg: int) -> bool:
|
||||
return reg in (idautils.procregs.sp.reg, idautils.procregs.bp.reg)
|
||||
|
||||
|
||||
def get_insn_ops(insn: idaapi.insn_t, target_ops: Optional[Tuple[Any]] = None) -> idaapi.op_t:
|
||||
def get_insn_ops(insn: idaapi.insn_t, target_ops: Optional[tuple[Any]] = None) -> idaapi.op_t:
|
||||
"""yield op_t for instruction, filter on type if specified"""
|
||||
for op in insn.ops:
|
||||
if op.type == idaapi.o_void:
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import re
|
||||
from typing import Any, Dict, Tuple, Iterator, Optional
|
||||
from typing import Any, Iterator, Optional
|
||||
|
||||
import idc
|
||||
import ida_ua
|
||||
@@ -25,19 +25,19 @@ from capa.features.extractors.base_extractor import BBHandle, InsnHandle, Functi
|
||||
SECURITY_COOKIE_BYTES_DELTA = 0x40
|
||||
|
||||
|
||||
def get_imports(ctx: Dict[str, Any]) -> Dict[int, Any]:
|
||||
def get_imports(ctx: dict[str, Any]) -> dict[int, Any]:
|
||||
if "imports_cache" not in ctx:
|
||||
ctx["imports_cache"] = capa.features.extractors.ida.helpers.get_file_imports()
|
||||
return ctx["imports_cache"]
|
||||
|
||||
|
||||
def get_externs(ctx: Dict[str, Any]) -> Dict[int, Any]:
|
||||
def get_externs(ctx: dict[str, Any]) -> dict[int, Any]:
|
||||
if "externs_cache" not in ctx:
|
||||
ctx["externs_cache"] = capa.features.extractors.ida.helpers.get_file_externs()
|
||||
return ctx["externs_cache"]
|
||||
|
||||
|
||||
def check_for_api_call(insn: idaapi.insn_t, funcs: Dict[int, Any]) -> Optional[Tuple[str, str]]:
|
||||
def check_for_api_call(insn: idaapi.insn_t, funcs: dict[int, Any]) -> Optional[tuple[str, str]]:
|
||||
"""check instruction for API call"""
|
||||
info = None
|
||||
ref = insn.ea
|
||||
@@ -65,7 +65,7 @@ def check_for_api_call(insn: idaapi.insn_t, funcs: Dict[int, Any]) -> Optional[T
|
||||
return info
|
||||
|
||||
|
||||
def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction API features
|
||||
|
||||
@@ -135,7 +135,7 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
|
||||
|
||||
def extract_insn_number_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction number features
|
||||
example:
|
||||
@@ -181,7 +181,7 @@ def extract_insn_number_features(
|
||||
yield OperandOffset(i, const), ih.address
|
||||
|
||||
|
||||
def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse referenced byte sequences
|
||||
example:
|
||||
@@ -203,7 +203,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
||||
|
||||
def extract_insn_string_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction string features
|
||||
|
||||
@@ -221,7 +221,7 @@ def extract_insn_string_features(
|
||||
|
||||
def extract_insn_offset_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction structure offset features
|
||||
|
||||
@@ -369,7 +369,7 @@ def is_nzxor_stack_cookie(f: idaapi.func_t, bb: idaapi.BasicBlock, insn: idaapi.
|
||||
|
||||
def extract_insn_nzxor_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction non-zeroing XOR instruction
|
||||
ignore expected non-zeroing XORs, e.g. security cookies
|
||||
@@ -387,14 +387,14 @@ def extract_insn_nzxor_characteristic_features(
|
||||
|
||||
def extract_insn_mnemonic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""parse instruction mnemonic features"""
|
||||
yield Mnemonic(idc.print_insn_mnem(ih.inner.ea)), ih.address
|
||||
|
||||
|
||||
def extract_insn_obfs_call_plus_5_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse call $+5 instruction from the given instruction.
|
||||
"""
|
||||
@@ -409,7 +409,7 @@ def extract_insn_obfs_call_plus_5_characteristic_features(
|
||||
|
||||
def extract_insn_peb_access_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""parse instruction peb access
|
||||
|
||||
fs:[0x30] on x86, gs:[0x60] on x64
|
||||
@@ -437,7 +437,7 @@ def extract_insn_peb_access_characteristic_features(
|
||||
|
||||
def extract_insn_segment_access_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""parse instruction fs or gs access
|
||||
|
||||
TODO:
|
||||
@@ -466,7 +466,7 @@ def extract_insn_segment_access_features(
|
||||
|
||||
def extract_insn_cross_section_cflow(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""inspect the instruction for a CALL or JMP that crosses section boundaries"""
|
||||
insn: idaapi.insn_t = ih.inner
|
||||
|
||||
@@ -482,7 +482,7 @@ def extract_insn_cross_section_cflow(
|
||||
yield Characteristic("cross section flow"), ih.address
|
||||
|
||||
|
||||
def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract functions calls from features
|
||||
|
||||
most relevant at the function scope, however, its most efficient to extract at the instruction scope
|
||||
@@ -496,7 +496,7 @@ def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
||||
|
||||
def extract_function_indirect_call_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
|
||||
does not include calls like => call ds:dword_ABD4974
|
||||
|
||||
@@ -509,7 +509,7 @@ def extract_function_indirect_call_characteristic_features(
|
||||
yield Characteristic("indirect call"), ih.address
|
||||
|
||||
|
||||
def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract instruction features"""
|
||||
for inst_handler in INSTRUCTION_HANDLERS:
|
||||
for feature, ea in inst_handler(f, bbh, insn):
|
||||
|
||||
@@ -5,11 +5,9 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import Dict, List, Tuple, Union
|
||||
from typing import Union, TypeAlias
|
||||
from dataclasses import dataclass
|
||||
|
||||
from typing_extensions import TypeAlias
|
||||
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import NO_ADDRESS, Address, ThreadAddress, ProcessAddress, DynamicCallAddress
|
||||
from capa.features.extractors.base_extractor import (
|
||||
@@ -27,19 +25,19 @@ from capa.features.extractors.base_extractor import (
|
||||
|
||||
@dataclass
|
||||
class InstructionFeatures:
|
||||
features: List[Tuple[Address, Feature]]
|
||||
features: list[tuple[Address, Feature]]
|
||||
|
||||
|
||||
@dataclass
|
||||
class BasicBlockFeatures:
|
||||
features: List[Tuple[Address, Feature]]
|
||||
instructions: Dict[Address, InstructionFeatures]
|
||||
features: list[tuple[Address, Feature]]
|
||||
instructions: dict[Address, InstructionFeatures]
|
||||
|
||||
|
||||
@dataclass
|
||||
class FunctionFeatures:
|
||||
features: List[Tuple[Address, Feature]]
|
||||
basic_blocks: Dict[Address, BasicBlockFeatures]
|
||||
features: list[tuple[Address, Feature]]
|
||||
basic_blocks: dict[Address, BasicBlockFeatures]
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -52,9 +50,9 @@ class NullStaticFeatureExtractor(StaticFeatureExtractor):
|
||||
|
||||
base_address: Address
|
||||
sample_hashes: SampleHashes
|
||||
global_features: List[Feature]
|
||||
file_features: List[Tuple[Address, Feature]]
|
||||
functions: Dict[Address, FunctionFeatures]
|
||||
global_features: list[Feature]
|
||||
file_features: list[tuple[Address, Feature]]
|
||||
functions: dict[Address, FunctionFeatures]
|
||||
|
||||
def get_base_address(self):
|
||||
return self.base_address
|
||||
@@ -98,19 +96,19 @@ class NullStaticFeatureExtractor(StaticFeatureExtractor):
|
||||
@dataclass
|
||||
class CallFeatures:
|
||||
name: str
|
||||
features: List[Tuple[Address, Feature]]
|
||||
features: list[tuple[Address, Feature]]
|
||||
|
||||
|
||||
@dataclass
|
||||
class ThreadFeatures:
|
||||
features: List[Tuple[Address, Feature]]
|
||||
calls: Dict[Address, CallFeatures]
|
||||
features: list[tuple[Address, Feature]]
|
||||
calls: dict[Address, CallFeatures]
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProcessFeatures:
|
||||
features: List[Tuple[Address, Feature]]
|
||||
threads: Dict[Address, ThreadFeatures]
|
||||
features: list[tuple[Address, Feature]]
|
||||
threads: dict[Address, ThreadFeatures]
|
||||
name: str
|
||||
|
||||
|
||||
@@ -118,9 +116,9 @@ class ProcessFeatures:
|
||||
class NullDynamicFeatureExtractor(DynamicFeatureExtractor):
|
||||
base_address: Address
|
||||
sample_hashes: SampleHashes
|
||||
global_features: List[Feature]
|
||||
file_features: List[Tuple[Address, Feature]]
|
||||
processes: Dict[Address, ProcessFeatures]
|
||||
global_features: list[Feature]
|
||||
file_features: list[tuple[Address, Feature]]
|
||||
processes: dict[Address, ProcessFeatures]
|
||||
|
||||
def extract_global_features(self):
|
||||
for feature in self.global_features:
|
||||
|
||||
@@ -148,11 +148,11 @@ def extract_file_features(pe, buf):
|
||||
buf: the raw sample bytes
|
||||
|
||||
yields:
|
||||
Tuple[Feature, VA]: a feature and its location.
|
||||
tuple[Feature, VA]: a feature and its location.
|
||||
"""
|
||||
|
||||
for file_handler in FILE_HANDLERS:
|
||||
# file_handler: type: (pe, bytes) -> Iterable[Tuple[Feature, Address]]
|
||||
# file_handler: type: (pe, bytes) -> Iterable[tuple[Feature, Address]]
|
||||
for feature, va in file_handler(pe=pe, buf=buf): # type: ignore
|
||||
yield feature, va
|
||||
|
||||
@@ -177,10 +177,10 @@ def extract_global_features(pe, buf):
|
||||
buf: the raw sample bytes
|
||||
|
||||
yields:
|
||||
Tuple[Feature, VA]: a feature and its location.
|
||||
tuple[Feature, VA]: a feature and its location.
|
||||
"""
|
||||
for handler in GLOBAL_HANDLERS:
|
||||
# file_handler: type: (pe, bytes) -> Iterable[Tuple[Feature, Address]]
|
||||
# file_handler: type: (pe, bytes) -> Iterable[tuple[Feature, Address]]
|
||||
for feature, va in handler(pe=pe, buf=buf): # type: ignore
|
||||
yield feature, va
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
import string
|
||||
import struct
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import envi
|
||||
import envi.archs.i386.disasm
|
||||
@@ -20,7 +20,7 @@ from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
|
||||
from capa.features.extractors.base_extractor import BBHandle, FunctionHandle
|
||||
|
||||
|
||||
def interface_extract_basic_block_XXX(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def interface_extract_basic_block_XXX(f: FunctionHandle, bb: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse features from the given basic block.
|
||||
|
||||
@@ -47,7 +47,7 @@ def _bb_has_tight_loop(f, bb):
|
||||
return False
|
||||
|
||||
|
||||
def extract_bb_tight_loop(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_bb_tight_loop(f: FunctionHandle, bb: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""check basic block for tight loop indicators"""
|
||||
if _bb_has_tight_loop(f, bb.inner):
|
||||
yield Characteristic("tight loop"), bb.address
|
||||
@@ -70,7 +70,7 @@ def _bb_has_stackstring(f, bb):
|
||||
return False
|
||||
|
||||
|
||||
def extract_stackstring(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_stackstring(f: FunctionHandle, bb: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""check basic block for stackstring indicators"""
|
||||
if _bb_has_stackstring(f, bb.inner):
|
||||
yield Characteristic("stack string"), bb.address
|
||||
@@ -145,7 +145,7 @@ def is_printable_utf16le(chars: bytes) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def extract_features(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(f: FunctionHandle, bb: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract features from the given basic block.
|
||||
|
||||
@@ -154,7 +154,7 @@ def extract_features(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature,
|
||||
bb (viv_utils.BasicBlock): the basic block to process.
|
||||
|
||||
yields:
|
||||
Tuple[Feature, int]: the features and their location found in this basic block.
|
||||
tuple[Feature, int]: the features and their location found in this basic block.
|
||||
"""
|
||||
yield BasicBlock(), AbsoluteVirtualAddress(bb.inner.va)
|
||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
from typing import Any, Dict, List, Tuple, Iterator
|
||||
from typing import Any, Iterator
|
||||
from pathlib import Path
|
||||
|
||||
import viv_utils
|
||||
@@ -39,7 +39,7 @@ class VivisectFeatureExtractor(StaticFeatureExtractor):
|
||||
super().__init__(hashes=SampleHashes.from_bytes(self.buf))
|
||||
|
||||
# pre-compute these because we'll yield them at *every* scope.
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
self.global_features: list[tuple[Feature, Address]] = []
|
||||
self.global_features.extend(capa.features.extractors.viv.file.extract_file_format(self.buf))
|
||||
self.global_features.extend(capa.features.extractors.common.extract_os(self.buf, os))
|
||||
self.global_features.extend(capa.features.extractors.viv.global_.extract_arch(self.vw))
|
||||
@@ -55,13 +55,13 @@ class VivisectFeatureExtractor(StaticFeatureExtractor):
|
||||
yield from capa.features.extractors.viv.file.extract_features(self.vw, self.buf)
|
||||
|
||||
def get_functions(self) -> Iterator[FunctionHandle]:
|
||||
cache: Dict[str, Any] = {}
|
||||
cache: dict[str, Any] = {}
|
||||
for va in sorted(self.vw.getFunctions()):
|
||||
yield FunctionHandle(
|
||||
address=AbsoluteVirtualAddress(va), inner=viv_utils.Function(self.vw, va), ctx={"cache": cache}
|
||||
)
|
||||
|
||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.viv.function.extract_features(fh)
|
||||
|
||||
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
||||
@@ -69,7 +69,7 @@ class VivisectFeatureExtractor(StaticFeatureExtractor):
|
||||
for bb in f.basic_blocks:
|
||||
yield BBHandle(address=AbsoluteVirtualAddress(bb.va), inner=bb)
|
||||
|
||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.viv.basicblock.extract_features(fh, bbh)
|
||||
|
||||
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
||||
@@ -79,7 +79,7 @@ class VivisectFeatureExtractor(StaticFeatureExtractor):
|
||||
|
||||
def extract_insn_features(
|
||||
self, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.viv.insn.extract_features(fh, bbh, ih)
|
||||
|
||||
def is_library_function(self, addr):
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import PE.carve as pe_carve # vivisect PE
|
||||
import vivisect
|
||||
@@ -21,7 +21,7 @@ from capa.features.common import Feature, Characteristic
|
||||
from capa.features.address import Address, FileOffsetAddress, AbsoluteVirtualAddress
|
||||
|
||||
|
||||
def extract_file_embedded_pe(buf, **kwargs) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_embedded_pe(buf, **kwargs) -> Iterator[tuple[Feature, Address]]:
|
||||
for offset, _ in pe_carve.carve(buf, 1):
|
||||
yield Characteristic("embedded pe"), FileOffsetAddress(offset)
|
||||
|
||||
@@ -37,7 +37,7 @@ def get_first_vw_filename(vw: vivisect.VivWorkspace):
|
||||
return next(iter(vw.filemeta.keys()))
|
||||
|
||||
|
||||
def extract_file_export_names(vw: vivisect.VivWorkspace, **kwargs) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_export_names(vw: vivisect.VivWorkspace, **kwargs) -> Iterator[tuple[Feature, Address]]:
|
||||
for va, _, name, _ in vw.getExports():
|
||||
yield Export(name), AbsoluteVirtualAddress(va)
|
||||
|
||||
@@ -56,7 +56,7 @@ def extract_file_export_names(vw: vivisect.VivWorkspace, **kwargs) -> Iterator[T
|
||||
yield Characteristic("forwarded export"), AbsoluteVirtualAddress(va)
|
||||
|
||||
|
||||
def extract_file_import_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_import_names(vw, **kwargs) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract imported function names
|
||||
1. imports by ordinal:
|
||||
@@ -91,16 +91,16 @@ def is_viv_ord_impname(impname: str) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def extract_file_section_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_section_names(vw, **kwargs) -> Iterator[tuple[Feature, Address]]:
|
||||
for va, _, segname, _ in vw.getSegments():
|
||||
yield Section(segname), AbsoluteVirtualAddress(va)
|
||||
|
||||
|
||||
def extract_file_strings(buf, **kwargs) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_strings(buf, **kwargs) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.common.extract_file_strings(buf)
|
||||
|
||||
|
||||
def extract_file_function_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_function_names(vw, **kwargs) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract the names of statically-linked library functions.
|
||||
"""
|
||||
@@ -117,11 +117,11 @@ def extract_file_function_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address
|
||||
yield FunctionName(name[1:]), addr
|
||||
|
||||
|
||||
def extract_file_format(buf, **kwargs) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_format(buf, **kwargs) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.common.extract_format(buf)
|
||||
|
||||
|
||||
def extract_features(vw, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(vw, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract file features from given workspace
|
||||
|
||||
@@ -130,7 +130,7 @@ def extract_features(vw, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
||||
buf: the raw input file bytes
|
||||
|
||||
yields:
|
||||
Tuple[Feature, Address]: a feature and its location.
|
||||
tuple[Feature, Address]: a feature and its location.
|
||||
"""
|
||||
|
||||
for file_handler in FILE_HANDLERS:
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import envi
|
||||
import viv_utils
|
||||
@@ -19,7 +19,7 @@ from capa.features.extractors.elf import SymTab
|
||||
from capa.features.extractors.base_extractor import FunctionHandle
|
||||
|
||||
|
||||
def interface_extract_function_XXX(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def interface_extract_function_XXX(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse features from the given function.
|
||||
|
||||
@@ -32,7 +32,7 @@ def interface_extract_function_XXX(fh: FunctionHandle) -> Iterator[Tuple[Feature
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def extract_function_symtab_names(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_function_symtab_names(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
if fh.inner.vw.metadata["Format"] == "elf":
|
||||
# the file's symbol table gets added to the metadata of the vivisect workspace.
|
||||
# this is in order to eliminate the computational overhead of refetching symtab each time.
|
||||
@@ -54,13 +54,13 @@ def extract_function_symtab_names(fh: FunctionHandle) -> Iterator[Tuple[Feature,
|
||||
yield FunctionName(sym_name), fh.address
|
||||
|
||||
|
||||
def extract_function_calls_to(fhandle: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_function_calls_to(fhandle: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
f: viv_utils.Function = fhandle.inner
|
||||
for src, _, _, _ in f.vw.getXrefsTo(f.va, rtype=vivisect.const.REF_CODE):
|
||||
yield Characteristic("calls to"), AbsoluteVirtualAddress(src)
|
||||
|
||||
|
||||
def extract_function_loop(fhandle: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_function_loop(fhandle: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse if a function has a loop
|
||||
"""
|
||||
@@ -88,7 +88,7 @@ def extract_function_loop(fhandle: FunctionHandle) -> Iterator[Tuple[Feature, Ad
|
||||
yield Characteristic("loop"), fhandle.address
|
||||
|
||||
|
||||
def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract features from the given function.
|
||||
|
||||
@@ -96,7 +96,7 @@ def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
fh: the function handle from which to extract features
|
||||
|
||||
yields:
|
||||
Tuple[Feature, int]: the features and their location found in this function.
|
||||
tuple[Feature, int]: the features and their location found in this function.
|
||||
"""
|
||||
for func_handler in FUNCTION_HANDLERS:
|
||||
for feature, addr in func_handler(fh):
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
from capa.features.common import ARCH_I386, ARCH_AMD64, Arch, Feature
|
||||
from capa.features.address import NO_ADDRESS, Address
|
||||
@@ -14,7 +14,7 @@ from capa.features.address import NO_ADDRESS, Address
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_arch(vw) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_arch(vw) -> Iterator[tuple[Feature, Address]]:
|
||||
arch = vw.getMeta("Architecture")
|
||||
if arch == "amd64":
|
||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import collections
|
||||
from typing import Set, List, Deque, Tuple, Optional
|
||||
from typing import Deque, Optional
|
||||
|
||||
import envi
|
||||
import vivisect.const
|
||||
@@ -28,7 +28,7 @@ FAR_BRANCH_MASK = envi.BR_PROC | envi.BR_DEREF | envi.BR_ARCH
|
||||
DESTRUCTIVE_MNEMONICS = ("mov", "lea", "pop", "xor")
|
||||
|
||||
|
||||
def get_previous_instructions(vw: VivWorkspace, va: int) -> List[int]:
|
||||
def get_previous_instructions(vw: VivWorkspace, va: int) -> list[int]:
|
||||
"""
|
||||
collect the instructions that flow to the given address, local to the current function.
|
||||
|
||||
@@ -37,7 +37,7 @@ def get_previous_instructions(vw: VivWorkspace, va: int) -> List[int]:
|
||||
va (int): the virtual address to inspect
|
||||
|
||||
returns:
|
||||
List[int]: the prior instructions, which may fallthrough and/or jump here
|
||||
list[int]: the prior instructions, which may fallthrough and/or jump here
|
||||
"""
|
||||
ret = []
|
||||
|
||||
@@ -71,7 +71,7 @@ class NotFoundError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def find_definition(vw: VivWorkspace, va: int, reg: int) -> Tuple[int, Optional[int]]:
|
||||
def find_definition(vw: VivWorkspace, va: int, reg: int) -> tuple[int, Optional[int]]:
|
||||
"""
|
||||
scan backwards from the given address looking for assignments to the given register.
|
||||
if a constant, return that value.
|
||||
@@ -88,7 +88,7 @@ def find_definition(vw: VivWorkspace, va: int, reg: int) -> Tuple[int, Optional[
|
||||
NotFoundError: when the definition cannot be found.
|
||||
"""
|
||||
q: Deque[int] = collections.deque()
|
||||
seen: Set[int] = set()
|
||||
seen: set[int] = set()
|
||||
|
||||
q.extend(get_previous_instructions(vw, va))
|
||||
while q:
|
||||
@@ -139,7 +139,7 @@ def is_indirect_call(vw: VivWorkspace, va: int, insn: envi.Opcode) -> bool:
|
||||
return insn.mnem in ("call", "jmp") and isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper)
|
||||
|
||||
|
||||
def resolve_indirect_call(vw: VivWorkspace, va: int, insn: envi.Opcode) -> Tuple[int, Optional[int]]:
|
||||
def resolve_indirect_call(vw: VivWorkspace, va: int, insn: envi.Opcode) -> tuple[int, Optional[int]]:
|
||||
"""
|
||||
inspect the given indirect call instruction and attempt to resolve the target address.
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import List, Tuple, Callable, Iterator
|
||||
from typing import Callable, Iterator
|
||||
|
||||
import envi
|
||||
import envi.exc
|
||||
@@ -33,7 +33,7 @@ SECURITY_COOKIE_BYTES_DELTA = 0x40
|
||||
|
||||
def interface_extract_instruction_XXX(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse features from the given instruction.
|
||||
|
||||
@@ -53,7 +53,7 @@ def get_imports(vw):
|
||||
caching accessor to vivisect workspace imports
|
||||
avoids performance issues in vivisect when collecting locations
|
||||
|
||||
returns: Dict[int, Tuple[str, str]]
|
||||
returns: dict[int, tuple[str, str]]
|
||||
"""
|
||||
if "imports" in vw.metadata:
|
||||
return vw.metadata["imports"]
|
||||
@@ -65,7 +65,7 @@ def get_imports(vw):
|
||||
return imports
|
||||
|
||||
|
||||
def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse API features from the given instruction.
|
||||
|
||||
@@ -260,7 +260,7 @@ def read_bytes(vw, va: int) -> bytes:
|
||||
raise
|
||||
|
||||
|
||||
def extract_insn_bytes_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_bytes_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse byte sequence features from the given instruction.
|
||||
example:
|
||||
@@ -371,7 +371,7 @@ def is_security_cookie(f, bb, insn) -> bool:
|
||||
|
||||
def extract_insn_nzxor_characteristic_features(
|
||||
fh: FunctionHandle, bbhandle: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse non-zeroing XOR instruction from the given instruction.
|
||||
ignore expected non-zeroing XORs, e.g. security cookies.
|
||||
@@ -392,12 +392,12 @@ def extract_insn_nzxor_characteristic_features(
|
||||
yield Characteristic("nzxor"), ih.address
|
||||
|
||||
|
||||
def extract_insn_mnemonic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_mnemonic_features(f, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""parse mnemonic features from the given instruction."""
|
||||
yield Mnemonic(ih.inner.mnem), ih.address
|
||||
|
||||
|
||||
def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse call $+5 instruction from the given instruction.
|
||||
"""
|
||||
@@ -415,7 +415,7 @@ def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, ih: InsnHandle)
|
||||
yield Characteristic("call $+5"), ih.address
|
||||
|
||||
|
||||
def extract_insn_peb_access_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_peb_access_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64
|
||||
"""
|
||||
@@ -451,7 +451,7 @@ def extract_insn_peb_access_characteristic_features(f, bb, ih: InsnHandle) -> It
|
||||
pass
|
||||
|
||||
|
||||
def extract_insn_segment_access_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_segment_access_features(f, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""parse the instruction for access to fs or gs"""
|
||||
insn: envi.Opcode = ih.inner
|
||||
|
||||
@@ -472,7 +472,7 @@ def get_section(vw, va: int):
|
||||
raise KeyError(va)
|
||||
|
||||
|
||||
def extract_insn_cross_section_cflow(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_cross_section_cflow(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
inspect the instruction for a CALL or JMP that crosses section boundaries.
|
||||
"""
|
||||
@@ -513,7 +513,7 @@ def extract_insn_cross_section_cflow(fh: FunctionHandle, bb, ih: InsnHandle) ->
|
||||
|
||||
# this is a feature that's most relevant at the function scope,
|
||||
# however, its most efficient to extract at the instruction scope.
|
||||
def extract_function_calls_from(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_function_calls_from(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
insn: envi.Opcode = ih.inner
|
||||
f: viv_utils.Function = fh.inner
|
||||
|
||||
@@ -554,7 +554,7 @@ def extract_function_calls_from(fh: FunctionHandle, bb, ih: InsnHandle) -> Itera
|
||||
|
||||
# this is a feature that's most relevant at the function or basic block scope,
|
||||
# however, its most efficient to extract at the instruction scope.
|
||||
def extract_function_indirect_call_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_function_indirect_call_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract indirect function call characteristic (e.g., call eax or call dword ptr [edx+4])
|
||||
does not include calls like => call ds:dword_ABD4974
|
||||
@@ -578,7 +578,7 @@ def extract_function_indirect_call_characteristic_features(f, bb, ih: InsnHandle
|
||||
|
||||
def extract_op_number_features(
|
||||
fh: FunctionHandle, bb, ih: InsnHandle, i, oper: envi.Operand
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""parse number features from the given operand.
|
||||
|
||||
example:
|
||||
@@ -623,7 +623,7 @@ def extract_op_number_features(
|
||||
|
||||
def extract_op_offset_features(
|
||||
fh: FunctionHandle, bb, ih: InsnHandle, i, oper: envi.Operand
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""parse structure offset features from the given operand."""
|
||||
# example:
|
||||
#
|
||||
@@ -674,7 +674,7 @@ def extract_op_offset_features(
|
||||
|
||||
def extract_op_string_features(
|
||||
fh: FunctionHandle, bb, ih: InsnHandle, i, oper: envi.Operand
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""parse string features from the given operand."""
|
||||
# example:
|
||||
#
|
||||
@@ -705,15 +705,15 @@ def extract_op_string_features(
|
||||
yield String(s), ih.address
|
||||
|
||||
|
||||
def extract_operand_features(f: FunctionHandle, bb, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_operand_features(f: FunctionHandle, bb, insn: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
for i, oper in enumerate(insn.inner.opers):
|
||||
for op_handler in OPERAND_HANDLERS:
|
||||
for feature, addr in op_handler(f, bb, insn, i, oper):
|
||||
yield feature, addr
|
||||
|
||||
|
||||
OPERAND_HANDLERS: List[
|
||||
Callable[[FunctionHandle, BBHandle, InsnHandle, int, envi.Operand], Iterator[Tuple[Feature, Address]]]
|
||||
OPERAND_HANDLERS: list[
|
||||
Callable[[FunctionHandle, BBHandle, InsnHandle, int, envi.Operand], Iterator[tuple[Feature, Address]]]
|
||||
] = [
|
||||
extract_op_number_features,
|
||||
extract_op_offset_features,
|
||||
@@ -721,7 +721,7 @@ OPERAND_HANDLERS: List[
|
||||
]
|
||||
|
||||
|
||||
def extract_features(f, bb, insn) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(f, bb, insn) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract features from the given insn.
|
||||
|
||||
@@ -731,14 +731,14 @@ def extract_features(f, bb, insn) -> Iterator[Tuple[Feature, Address]]:
|
||||
insn (vivisect...Instruction): the instruction to process.
|
||||
|
||||
yields:
|
||||
Tuple[Feature, Address]: the features and their location found in this insn.
|
||||
tuple[Feature, Address]: the features and their location found in this insn.
|
||||
"""
|
||||
for insn_handler in INSTRUCTION_HANDLERS:
|
||||
for feature, addr in insn_handler(f, bb, insn):
|
||||
yield feature, addr
|
||||
|
||||
|
||||
INSTRUCTION_HANDLERS: List[Callable[[FunctionHandle, BBHandle, InsnHandle], Iterator[Tuple[Feature, Address]]]] = [
|
||||
INSTRUCTION_HANDLERS: list[Callable[[FunctionHandle, BBHandle, InsnHandle], Iterator[tuple[Feature, Address]]]] = [
|
||||
extract_insn_api_features,
|
||||
extract_insn_bytes_features,
|
||||
extract_insn_nzxor_characteristic_features,
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
from typing import Dict, List, Tuple, Optional
|
||||
from typing import Optional
|
||||
from pathlib import Path
|
||||
from zipfile import ZipFile
|
||||
from collections import defaultdict
|
||||
@@ -35,6 +35,8 @@ class VMRayMonitorProcess:
|
||||
ppid: int # parent process ID assigned by OS
|
||||
monitor_id: int # unique ID assigned to process by VMRay
|
||||
image_name: str
|
||||
filename: str
|
||||
cmd_line: str
|
||||
|
||||
|
||||
class VMRayAnalysis:
|
||||
@@ -58,17 +60,17 @@ class VMRayAnalysis:
|
||||
"VMRay feature extractor does not support flog version %s" % self.flog.analysis.log_version
|
||||
)
|
||||
|
||||
self.exports: Dict[int, str] = {}
|
||||
self.imports: Dict[int, Tuple[str, str]] = {}
|
||||
self.sections: Dict[int, str] = {}
|
||||
self.monitor_processes: Dict[int, VMRayMonitorProcess] = {}
|
||||
self.monitor_threads: Dict[int, VMRayMonitorThread] = {}
|
||||
self.exports: dict[int, str] = {}
|
||||
self.imports: dict[int, tuple[str, str]] = {}
|
||||
self.sections: dict[int, str] = {}
|
||||
self.monitor_processes: dict[int, VMRayMonitorProcess] = {}
|
||||
self.monitor_threads: dict[int, VMRayMonitorThread] = {}
|
||||
|
||||
# map monitor thread IDs to their associated monitor process ID
|
||||
self.monitor_threads_by_monitor_process: Dict[int, List[int]] = defaultdict(list)
|
||||
self.monitor_threads_by_monitor_process: dict[int, list[int]] = defaultdict(list)
|
||||
|
||||
# map function calls to their associated monitor thread ID mapped to its associated monitor process ID
|
||||
self.monitor_process_calls: Dict[int, Dict[int, List[FunctionCall]]] = defaultdict(lambda: defaultdict(list))
|
||||
self.monitor_process_calls: dict[int, dict[int, list[FunctionCall]]] = defaultdict(lambda: defaultdict(list))
|
||||
|
||||
self.base_address: int
|
||||
|
||||
@@ -160,7 +162,12 @@ class VMRayAnalysis:
|
||||
self.sv2.processes[process.ref_parent_process.path[1]].os_pid if process.ref_parent_process else 0
|
||||
)
|
||||
self.monitor_processes[process.monitor_id] = VMRayMonitorProcess(
|
||||
process.os_pid, ppid, process.monitor_id, process.image_name
|
||||
process.os_pid,
|
||||
ppid,
|
||||
process.monitor_id,
|
||||
process.image_name,
|
||||
process.filename,
|
||||
process.cmd_line,
|
||||
)
|
||||
|
||||
# not all processes are recorded in SummaryV2.json, get missing data from flog.xml, see #2394
|
||||
@@ -170,6 +177,8 @@ class VMRayAnalysis:
|
||||
monitor_process.os_parent_pid,
|
||||
monitor_process.process_id,
|
||||
monitor_process.image_name,
|
||||
monitor_process.filename,
|
||||
monitor_process.cmd_line,
|
||||
)
|
||||
|
||||
if monitor_process.process_id not in self.monitor_processes:
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import capa.features.extractors.helpers
|
||||
from capa.features.insn import API, Number
|
||||
@@ -18,7 +18,7 @@ from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, Pr
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_call_param_features(param: Param, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def get_call_param_features(param: Param, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
if param.deref is not None:
|
||||
# pointer types contain a special "deref" member that stores the deref'd value
|
||||
# so we check for this first and ignore Param.value as this always contains the
|
||||
@@ -39,7 +39,7 @@ def get_call_param_features(param: Param, ch: CallHandle) -> Iterator[Tuple[Feat
|
||||
yield Number(hexint(param.value)), ch.address
|
||||
|
||||
|
||||
def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
call: FunctionCall = ch.inner
|
||||
|
||||
if call.params_in:
|
||||
@@ -50,7 +50,7 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -
|
||||
yield API(name), ch.address
|
||||
|
||||
|
||||
def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
for handler in CALL_HANDLERS:
|
||||
for feature, addr in handler(ph, th, ch):
|
||||
yield feature, addr
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
|
||||
from typing import List, Tuple, Iterator
|
||||
from typing import Iterator
|
||||
from pathlib import Path
|
||||
|
||||
import capa.helpers
|
||||
@@ -34,8 +34,8 @@ from capa.features.extractors.base_extractor import (
|
||||
)
|
||||
|
||||
|
||||
def get_formatted_params(params: ParamList) -> List[str]:
|
||||
params_list: List[str] = []
|
||||
def get_formatted_params(params: ParamList) -> list[str]:
|
||||
params_list: list[str] = []
|
||||
|
||||
for param in params:
|
||||
if param.deref and param.deref.value is not None:
|
||||
@@ -69,10 +69,10 @@ class VMRayExtractor(DynamicFeatureExtractor):
|
||||
# value according to the PE header, the actual trace may use a different imagebase
|
||||
return AbsoluteVirtualAddress(self.analysis.base_address)
|
||||
|
||||
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.vmray.file.extract_features(self.analysis)
|
||||
|
||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from self.global_features
|
||||
|
||||
def get_processes(self) -> Iterator[ProcessHandle]:
|
||||
@@ -80,13 +80,13 @@ class VMRayExtractor(DynamicFeatureExtractor):
|
||||
address: ProcessAddress = ProcessAddress(pid=monitor_process.pid, ppid=monitor_process.ppid)
|
||||
yield ProcessHandle(address, inner=monitor_process)
|
||||
|
||||
def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
# we have not identified process-specific features for VMRay yet
|
||||
yield from []
|
||||
|
||||
def get_process_name(self, ph) -> str:
|
||||
monitor_process: VMRayMonitorProcess = ph.inner
|
||||
return monitor_process.image_name
|
||||
return f"{monitor_process.image_name} ({monitor_process.cmd_line})"
|
||||
|
||||
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
||||
for monitor_thread_id in self.analysis.monitor_threads_by_monitor_process[ph.inner.monitor_id]:
|
||||
@@ -95,7 +95,7 @@ class VMRayExtractor(DynamicFeatureExtractor):
|
||||
address: ThreadAddress = ThreadAddress(process=ph.address, tid=monitor_thread.tid)
|
||||
yield ThreadHandle(address=address, inner=monitor_thread)
|
||||
|
||||
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
if False:
|
||||
# force this routine to be a generator,
|
||||
# but we don't actually have any elements to generate.
|
||||
@@ -109,7 +109,7 @@ class VMRayExtractor(DynamicFeatureExtractor):
|
||||
|
||||
def extract_call_features(
|
||||
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.vmray.call.extract_features(ph, th, ch)
|
||||
|
||||
def get_call_name(self, ph, th, ch) -> str:
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import capa.features.extractors.common
|
||||
from capa.features.file import Export, Import, Section
|
||||
@@ -18,52 +18,52 @@ from capa.features.extractors.helpers import generate_symbols
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_export_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_export_names(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
||||
for addr, name in analysis.exports.items():
|
||||
yield Export(name), AbsoluteVirtualAddress(addr)
|
||||
|
||||
|
||||
def extract_import_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_import_names(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
||||
for addr, (module, api) in analysis.imports.items():
|
||||
for symbol in generate_symbols(module, api, include_dll=True):
|
||||
yield Import(symbol), AbsoluteVirtualAddress(addr)
|
||||
|
||||
|
||||
def extract_section_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_section_names(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
||||
for addr, name in analysis.sections.items():
|
||||
yield Section(name), AbsoluteVirtualAddress(addr)
|
||||
|
||||
|
||||
def extract_referenced_filenames(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_referenced_filenames(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
||||
for filename in analysis.sv2.filenames.values():
|
||||
yield String(filename.filename), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_referenced_mutex_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_referenced_mutex_names(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
||||
for mutex in analysis.sv2.mutexes.values():
|
||||
yield String(mutex.name), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_referenced_domain_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_referenced_domain_names(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
||||
for domain in analysis.sv2.domains.values():
|
||||
yield String(domain.domain), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_referenced_ip_addresses(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_referenced_ip_addresses(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
||||
for ip_address in analysis.sv2.ip_addresses.values():
|
||||
yield String(ip_address.ip_address), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_referenced_registry_key_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_referenced_registry_key_names(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
||||
for registry_record in analysis.sv2.registry_records.values():
|
||||
yield String(registry_record.reg_key_name), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_strings(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_strings(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.common.extract_file_strings(analysis.sample_file_buf)
|
||||
|
||||
|
||||
def extract_features(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
||||
for handler in FILE_HANDLERS:
|
||||
for feature, addr in handler(analysis):
|
||||
yield feature, addr
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
from capa.features.common import (
|
||||
OS,
|
||||
@@ -27,7 +27,7 @@ from capa.features.extractors.vmray import VMRayAnalysis
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_arch(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_arch(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
||||
file_type: str = analysis.file_type
|
||||
|
||||
if "x86-32" in file_type:
|
||||
@@ -38,7 +38,7 @@ def extract_arch(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||
raise ValueError("unrecognized arch from the VMRay report: %s" % file_type)
|
||||
|
||||
|
||||
def extract_format(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_format(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
||||
assert analysis.sample_file_static_data is not None
|
||||
if analysis.sample_file_static_data.pe:
|
||||
yield Format(FORMAT_PE), NO_ADDRESS
|
||||
@@ -48,7 +48,7 @@ def extract_format(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]
|
||||
raise ValueError("unrecognized file format from the VMRay report: %s" % analysis.file_type)
|
||||
|
||||
|
||||
def extract_os(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_os(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
||||
file_type: str = analysis.file_type
|
||||
|
||||
if "windows" in file_type.lower():
|
||||
@@ -59,7 +59,7 @@ def extract_os(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||
raise ValueError("unrecognized OS from the VMRay report: %s" % file_type)
|
||||
|
||||
|
||||
def extract_features(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
||||
for global_handler in GLOBAL_HANDLER:
|
||||
for feature, addr in global_handler(analysis):
|
||||
yield feature, addr
|
||||
|
||||
@@ -6,11 +6,10 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
from typing import Dict, List, Union, Optional
|
||||
from typing import Union, Optional, Annotated
|
||||
|
||||
import xmltodict
|
||||
from pydantic import Field, BaseModel
|
||||
from typing_extensions import Annotated
|
||||
from pydantic.functional_validators import BeforeValidator
|
||||
|
||||
"""
|
||||
@@ -87,7 +86,7 @@ class Param(BaseModel):
|
||||
deref: Optional[ParamDeref] = None
|
||||
|
||||
|
||||
def validate_ensure_is_list(value: Union[List[Param], Param]) -> List[Param]:
|
||||
def validate_ensure_is_list(value: Union[list[Param], Param]) -> list[Param]:
|
||||
if isinstance(value, list):
|
||||
return value
|
||||
else:
|
||||
@@ -95,9 +94,9 @@ def validate_ensure_is_list(value: Union[List[Param], Param]) -> List[Param]:
|
||||
|
||||
|
||||
# params may be stored as a list of Param or a single Param so we convert
|
||||
# the input value to Python list type before the inner validation (List[Param])
|
||||
# the input value to Python list type before the inner validation (list[Param])
|
||||
# is called
|
||||
ParamList = Annotated[List[Param], BeforeValidator(validate_ensure_is_list)]
|
||||
ParamList = Annotated[list[Param], BeforeValidator(validate_ensure_is_list)]
|
||||
|
||||
|
||||
class Params(BaseModel):
|
||||
@@ -137,11 +136,20 @@ class FunctionReturn(BaseModel):
|
||||
from_addr: HexInt = Field(alias="from")
|
||||
|
||||
|
||||
def sanitize_string(value: str) -> str:
|
||||
# e.g. "cmd_line": "\"C:\\Users\\38lTTV5Kii\\Desktop\\filename.exe\" ",
|
||||
return value.replace("\\\\", "\\").strip(' "')
|
||||
|
||||
|
||||
# unify representation
|
||||
SanitizedString = Annotated[str, BeforeValidator(sanitize_string)]
|
||||
|
||||
|
||||
class MonitorProcess(BaseModel):
|
||||
ts: HexInt
|
||||
process_id: int
|
||||
image_name: str
|
||||
filename: str
|
||||
filename: SanitizedString
|
||||
# page_root: HexInt
|
||||
os_pid: HexInt
|
||||
# os_integrity_level: HexInt
|
||||
@@ -149,7 +157,7 @@ class MonitorProcess(BaseModel):
|
||||
monitor_reason: str
|
||||
parent_id: int
|
||||
os_parent_pid: HexInt
|
||||
# cmd_line: str
|
||||
cmd_line: SanitizedString
|
||||
# cur_dir: str
|
||||
# os_username: str
|
||||
# bitness: int
|
||||
@@ -164,9 +172,9 @@ class MonitorThread(BaseModel):
|
||||
|
||||
|
||||
# handle if there's only single entries, but the model expects a list
|
||||
MonitorProcessList = Annotated[List[MonitorProcess], BeforeValidator(validate_ensure_is_list)]
|
||||
MonitorThreadList = Annotated[List[MonitorThread], BeforeValidator(validate_ensure_is_list)]
|
||||
FunctionCallList = Annotated[List[FunctionCall], BeforeValidator(validate_ensure_is_list)]
|
||||
MonitorProcessList = Annotated[list[MonitorProcess], BeforeValidator(validate_ensure_is_list)]
|
||||
MonitorThreadList = Annotated[list[MonitorThread], BeforeValidator(validate_ensure_is_list)]
|
||||
FunctionCallList = Annotated[list[FunctionCall], BeforeValidator(validate_ensure_is_list)]
|
||||
|
||||
|
||||
class Analysis(BaseModel):
|
||||
@@ -177,7 +185,7 @@ class Analysis(BaseModel):
|
||||
monitor_processes: MonitorProcessList = Field(alias="monitor_process", default=[])
|
||||
monitor_threads: MonitorThreadList = Field(alias="monitor_thread", default=[])
|
||||
function_calls: FunctionCallList = Field(alias="fncall", default=[])
|
||||
# function_returns: List[FunctionReturn] = Field(alias="fnret", default=[])
|
||||
# function_returns: list[FunctionReturn] = Field(alias="fnret", default=[])
|
||||
|
||||
|
||||
class Flog(BaseModel):
|
||||
@@ -186,7 +194,7 @@ class Flog(BaseModel):
|
||||
|
||||
# models for summary_v2.json file, certain fields left as comments for documentation purposes
|
||||
class GenericReference(BaseModel):
|
||||
path: List[str]
|
||||
path: list[str]
|
||||
source: str
|
||||
|
||||
|
||||
@@ -226,12 +234,12 @@ class PEFileImport(BaseModel):
|
||||
|
||||
class PEFileImportModule(BaseModel):
|
||||
dll: str
|
||||
apis: List[PEFileImport]
|
||||
apis: list[PEFileImport]
|
||||
|
||||
|
||||
class PEFileSection(BaseModel):
|
||||
# entropy: float
|
||||
# flags: List[str] = []
|
||||
# flags: list[str] = []
|
||||
name: str
|
||||
# raw_data_offset: int
|
||||
# raw_data_size: int
|
||||
@@ -241,9 +249,9 @@ class PEFileSection(BaseModel):
|
||||
|
||||
class PEFile(BaseModel):
|
||||
basic_info: PEFileBasicInfo
|
||||
exports: List[PEFileExport] = []
|
||||
imports: List[PEFileImportModule] = []
|
||||
sections: List[PEFileSection] = []
|
||||
exports: list[PEFileExport] = []
|
||||
imports: list[PEFileImportModule] = []
|
||||
sections: list[PEFileSection] = []
|
||||
|
||||
|
||||
class ElfFileSectionHeader(BaseModel):
|
||||
@@ -268,7 +276,7 @@ class ElfFileHeader(BaseModel):
|
||||
|
||||
class ElfFile(BaseModel):
|
||||
# file_header: ElfFileHeader
|
||||
sections: List[ElfFileSection]
|
||||
sections: list[ElfFileSection]
|
||||
|
||||
|
||||
class StaticData(BaseModel):
|
||||
@@ -284,7 +292,7 @@ class FileHashes(BaseModel):
|
||||
|
||||
|
||||
class File(BaseModel):
|
||||
# categories: List[str]
|
||||
# categories: list[str]
|
||||
hash_values: FileHashes
|
||||
# is_artifact: bool
|
||||
# is_ioc: bool
|
||||
@@ -292,11 +300,11 @@ class File(BaseModel):
|
||||
# size: int
|
||||
# is_truncated: bool
|
||||
# mime_type: Optional[str] = None
|
||||
# operations: List[str] = []
|
||||
# ref_filenames: List[GenericReference] = []
|
||||
# ref_gfncalls: List[GenericReference] = []
|
||||
# operations: list[str] = []
|
||||
# ref_filenames: list[GenericReference] = []
|
||||
# ref_gfncalls: list[GenericReference] = []
|
||||
ref_static_data: Optional[StaticDataReference] = None
|
||||
# ref_vti_matches: List[GenericReference] = []
|
||||
# ref_vti_matches: list[GenericReference] = []
|
||||
# verdict: str
|
||||
|
||||
|
||||
@@ -307,8 +315,9 @@ class Process(BaseModel):
|
||||
monitor_id: int
|
||||
# monitor_reason: str
|
||||
os_pid: int
|
||||
filename: str
|
||||
filename: SanitizedString
|
||||
image_name: str
|
||||
cmd_line: SanitizedString
|
||||
ref_parent_process: Optional[GenericReference] = None
|
||||
|
||||
|
||||
@@ -356,13 +365,13 @@ class AnalysisMetadata(BaseModel):
|
||||
class SummaryV2(BaseModel):
|
||||
analysis_metadata: AnalysisMetadata
|
||||
|
||||
static_data: Dict[str, StaticData] = {}
|
||||
static_data: dict[str, StaticData] = {}
|
||||
|
||||
# recorded artifacts
|
||||
files: Dict[str, File] = {}
|
||||
processes: Dict[str, Process] = {}
|
||||
filenames: Dict[str, Filename] = {}
|
||||
mutexes: Dict[str, Mutex] = {}
|
||||
domains: Dict[str, Domain] = {}
|
||||
ip_addresses: Dict[str, IPAddress] = {}
|
||||
registry_records: Dict[str, Registry] = {}
|
||||
files: dict[str, File] = {}
|
||||
processes: dict[str, Process] = {}
|
||||
filenames: dict[str, Filename] = {}
|
||||
mutexes: dict[str, Mutex] = {}
|
||||
domains: dict[str, Domain] = {}
|
||||
ip_addresses: dict[str, IPAddress] = {}
|
||||
registry_records: dict[str, Registry] = {}
|
||||
|
||||
@@ -14,14 +14,10 @@ import json
|
||||
import zlib
|
||||
import logging
|
||||
from enum import Enum
|
||||
from typing import List, Tuple, Union, Literal
|
||||
from typing import Union, Literal, TypeAlias
|
||||
|
||||
from pydantic import Field, BaseModel, ConfigDict
|
||||
|
||||
# TODO(williballenthin): use typing.TypeAlias directly in Python 3.10+
|
||||
# https://github.com/mandiant/capa/issues/1699
|
||||
from typing_extensions import TypeAlias
|
||||
|
||||
import capa.helpers
|
||||
import capa.version
|
||||
import capa.features.file
|
||||
@@ -62,7 +58,7 @@ class AddressType(str, Enum):
|
||||
|
||||
class Address(HashableModel):
|
||||
type: AddressType
|
||||
value: Union[int, Tuple[int, ...], None] = None # None default value to support deserialization of NO_ADDRESS
|
||||
value: Union[int, tuple[int, ...], None] = None # None default value to support deserialization of NO_ADDRESS
|
||||
|
||||
@classmethod
|
||||
def from_capa(cls, a: capa.features.address.Address) -> "Address":
|
||||
@@ -272,52 +268,52 @@ class InstructionFeature(HashableModel):
|
||||
|
||||
class InstructionFeatures(BaseModel):
|
||||
address: Address
|
||||
features: Tuple[InstructionFeature, ...]
|
||||
features: tuple[InstructionFeature, ...]
|
||||
|
||||
|
||||
class BasicBlockFeatures(BaseModel):
|
||||
address: Address
|
||||
features: Tuple[BasicBlockFeature, ...]
|
||||
instructions: Tuple[InstructionFeatures, ...]
|
||||
features: tuple[BasicBlockFeature, ...]
|
||||
instructions: tuple[InstructionFeatures, ...]
|
||||
|
||||
|
||||
class FunctionFeatures(BaseModel):
|
||||
address: Address
|
||||
features: Tuple[FunctionFeature, ...]
|
||||
basic_blocks: Tuple[BasicBlockFeatures, ...] = Field(alias="basic blocks")
|
||||
features: tuple[FunctionFeature, ...]
|
||||
basic_blocks: tuple[BasicBlockFeatures, ...] = Field(alias="basic blocks")
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
|
||||
class CallFeatures(BaseModel):
|
||||
address: Address
|
||||
name: str
|
||||
features: Tuple[CallFeature, ...]
|
||||
features: tuple[CallFeature, ...]
|
||||
|
||||
|
||||
class ThreadFeatures(BaseModel):
|
||||
address: Address
|
||||
features: Tuple[ThreadFeature, ...]
|
||||
calls: Tuple[CallFeatures, ...]
|
||||
features: tuple[ThreadFeature, ...]
|
||||
calls: tuple[CallFeatures, ...]
|
||||
|
||||
|
||||
class ProcessFeatures(BaseModel):
|
||||
address: Address
|
||||
name: str
|
||||
features: Tuple[ProcessFeature, ...]
|
||||
threads: Tuple[ThreadFeatures, ...]
|
||||
features: tuple[ProcessFeature, ...]
|
||||
threads: tuple[ThreadFeatures, ...]
|
||||
|
||||
|
||||
class StaticFeatures(BaseModel):
|
||||
global_: Tuple[GlobalFeature, ...] = Field(alias="global")
|
||||
file: Tuple[FileFeature, ...]
|
||||
functions: Tuple[FunctionFeatures, ...]
|
||||
global_: tuple[GlobalFeature, ...] = Field(alias="global")
|
||||
file: tuple[FileFeature, ...]
|
||||
functions: tuple[FunctionFeatures, ...]
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
|
||||
class DynamicFeatures(BaseModel):
|
||||
global_: Tuple[GlobalFeature, ...] = Field(alias="global")
|
||||
file: Tuple[FileFeature, ...]
|
||||
processes: Tuple[ProcessFeatures, ...]
|
||||
global_: tuple[GlobalFeature, ...] = Field(alias="global")
|
||||
file: tuple[FileFeature, ...]
|
||||
processes: tuple[ProcessFeatures, ...]
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
|
||||
@@ -344,7 +340,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str:
|
||||
"""
|
||||
serialize the given extractor to a string
|
||||
"""
|
||||
global_features: List[GlobalFeature] = []
|
||||
global_features: list[GlobalFeature] = []
|
||||
for feature, _ in extractor.extract_global_features():
|
||||
global_features.append(
|
||||
GlobalFeature(
|
||||
@@ -352,7 +348,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str:
|
||||
)
|
||||
)
|
||||
|
||||
file_features: List[FileFeature] = []
|
||||
file_features: list[FileFeature] = []
|
||||
for feature, address in extractor.extract_file_features():
|
||||
file_features.append(
|
||||
FileFeature(
|
||||
@@ -361,7 +357,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str:
|
||||
)
|
||||
)
|
||||
|
||||
function_features: List[FunctionFeatures] = []
|
||||
function_features: list[FunctionFeatures] = []
|
||||
for f in extractor.get_functions():
|
||||
faddr = Address.from_capa(f.address)
|
||||
ffeatures = [
|
||||
@@ -446,7 +442,7 @@ def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str:
|
||||
"""
|
||||
serialize the given extractor to a string
|
||||
"""
|
||||
global_features: List[GlobalFeature] = []
|
||||
global_features: list[GlobalFeature] = []
|
||||
for feature, _ in extractor.extract_global_features():
|
||||
global_features.append(
|
||||
GlobalFeature(
|
||||
@@ -454,7 +450,7 @@ def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str:
|
||||
)
|
||||
)
|
||||
|
||||
file_features: List[FileFeature] = []
|
||||
file_features: list[FileFeature] = []
|
||||
for feature, address in extractor.extract_file_features():
|
||||
file_features.append(
|
||||
FileFeature(
|
||||
@@ -463,7 +459,7 @@ def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str:
|
||||
)
|
||||
)
|
||||
|
||||
process_features: List[ProcessFeatures] = []
|
||||
process_features: list[ProcessFeatures] = []
|
||||
for p in extractor.get_processes():
|
||||
paddr = Address.from_capa(p.address)
|
||||
pname = extractor.get_process_name(p)
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import binascii
|
||||
from typing import Union, Optional
|
||||
from typing import Union, Literal, Optional, Annotated
|
||||
|
||||
from pydantic import Field, BaseModel, ConfigDict
|
||||
|
||||
@@ -209,168 +209,171 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature":
|
||||
|
||||
|
||||
class OSFeature(FeatureModel):
|
||||
type: str = "os"
|
||||
type: Literal["os"] = "os"
|
||||
os: str
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class ArchFeature(FeatureModel):
|
||||
type: str = "arch"
|
||||
type: Literal["arch"] = "arch"
|
||||
arch: str
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class FormatFeature(FeatureModel):
|
||||
type: str = "format"
|
||||
type: Literal["format"] = "format"
|
||||
format: str
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class MatchFeature(FeatureModel):
|
||||
type: str = "match"
|
||||
type: Literal["match"] = "match"
|
||||
match: str
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class CharacteristicFeature(FeatureModel):
|
||||
type: str = "characteristic"
|
||||
type: Literal["characteristic"] = "characteristic"
|
||||
characteristic: str
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class ExportFeature(FeatureModel):
|
||||
type: str = "export"
|
||||
type: Literal["export"] = "export"
|
||||
export: str
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class ImportFeature(FeatureModel):
|
||||
type: str = "import"
|
||||
type: Literal["import"] = "import"
|
||||
import_: str = Field(alias="import")
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class SectionFeature(FeatureModel):
|
||||
type: str = "section"
|
||||
type: Literal["section"] = "section"
|
||||
section: str
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class FunctionNameFeature(FeatureModel):
|
||||
type: str = "function name"
|
||||
type: Literal["function name"] = "function name"
|
||||
function_name: str = Field(alias="function name")
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class SubstringFeature(FeatureModel):
|
||||
type: str = "substring"
|
||||
type: Literal["substring"] = "substring"
|
||||
substring: str
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class RegexFeature(FeatureModel):
|
||||
type: str = "regex"
|
||||
type: Literal["regex"] = "regex"
|
||||
regex: str
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class StringFeature(FeatureModel):
|
||||
type: str = "string"
|
||||
type: Literal["string"] = "string"
|
||||
string: str
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class ClassFeature(FeatureModel):
|
||||
type: str = "class"
|
||||
type: Literal["class"] = "class"
|
||||
class_: str = Field(alias="class")
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class NamespaceFeature(FeatureModel):
|
||||
type: str = "namespace"
|
||||
type: Literal["namespace"] = "namespace"
|
||||
namespace: str
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class BasicBlockFeature(FeatureModel):
|
||||
type: str = "basic block"
|
||||
type: Literal["basic block"] = "basic block"
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class APIFeature(FeatureModel):
|
||||
type: str = "api"
|
||||
type: Literal["api"] = "api"
|
||||
api: str
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class PropertyFeature(FeatureModel):
|
||||
type: str = "property"
|
||||
type: Literal["property"] = "property"
|
||||
access: Optional[str] = None
|
||||
property: str
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class NumberFeature(FeatureModel):
|
||||
type: str = "number"
|
||||
type: Literal["number"] = "number"
|
||||
number: Union[int, float]
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class BytesFeature(FeatureModel):
|
||||
type: str = "bytes"
|
||||
type: Literal["bytes"] = "bytes"
|
||||
bytes: str
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class OffsetFeature(FeatureModel):
|
||||
type: str = "offset"
|
||||
type: Literal["offset"] = "offset"
|
||||
offset: int
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class MnemonicFeature(FeatureModel):
|
||||
type: str = "mnemonic"
|
||||
type: Literal["mnemonic"] = "mnemonic"
|
||||
mnemonic: str
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class OperandNumberFeature(FeatureModel):
|
||||
type: str = "operand number"
|
||||
type: Literal["operand number"] = "operand number"
|
||||
index: int
|
||||
operand_number: int = Field(alias="operand number")
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class OperandOffsetFeature(FeatureModel):
|
||||
type: str = "operand offset"
|
||||
type: Literal["operand offset"] = "operand offset"
|
||||
index: int
|
||||
operand_offset: int = Field(alias="operand offset")
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
Feature = Union[
|
||||
OSFeature,
|
||||
ArchFeature,
|
||||
FormatFeature,
|
||||
MatchFeature,
|
||||
CharacteristicFeature,
|
||||
ExportFeature,
|
||||
ImportFeature,
|
||||
SectionFeature,
|
||||
FunctionNameFeature,
|
||||
SubstringFeature,
|
||||
RegexFeature,
|
||||
StringFeature,
|
||||
ClassFeature,
|
||||
NamespaceFeature,
|
||||
APIFeature,
|
||||
PropertyFeature,
|
||||
NumberFeature,
|
||||
BytesFeature,
|
||||
OffsetFeature,
|
||||
MnemonicFeature,
|
||||
OperandNumberFeature,
|
||||
OperandOffsetFeature,
|
||||
# Note! this must be last, see #1161
|
||||
BasicBlockFeature,
|
||||
Feature = Annotated[
|
||||
Union[
|
||||
OSFeature,
|
||||
ArchFeature,
|
||||
FormatFeature,
|
||||
MatchFeature,
|
||||
CharacteristicFeature,
|
||||
ExportFeature,
|
||||
ImportFeature,
|
||||
SectionFeature,
|
||||
FunctionNameFeature,
|
||||
SubstringFeature,
|
||||
RegexFeature,
|
||||
StringFeature,
|
||||
ClassFeature,
|
||||
NamespaceFeature,
|
||||
APIFeature,
|
||||
PropertyFeature,
|
||||
NumberFeature,
|
||||
BytesFeature,
|
||||
OffsetFeature,
|
||||
MnemonicFeature,
|
||||
OperandNumberFeature,
|
||||
OperandOffsetFeature,
|
||||
# Note! this must be last, see #1161
|
||||
BasicBlockFeature,
|
||||
],
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
|
||||
@@ -55,7 +55,7 @@ You can also execute [capa_ghidra.py](https://raw.githubusercontent.com/mandiant
|
||||
| capa | `>= 7.0.0` | https://github.com/mandiant/capa/releases |
|
||||
| Ghidrathon | `>= 3.0.0` | https://github.com/mandiant/Ghidrathon/releases |
|
||||
| Ghidra | `>= 10.3.2` | https://github.com/NationalSecurityAgency/ghidra/releases |
|
||||
| Python | `>= 3.8.0` | https://www.python.org/downloads |
|
||||
| Python | `>= 3.10.0` | https://www.python.org/downloads |
|
||||
|
||||
## Installation
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ import sys
|
||||
import json
|
||||
import logging
|
||||
import pathlib
|
||||
from typing import Any, Dict, List
|
||||
from typing import Any
|
||||
|
||||
from ghidra.app.cmd.label import AddLabelCmd, CreateNamespacesCmd
|
||||
from ghidra.program.model.symbol import Namespace, SourceType, SymbolType
|
||||
@@ -68,8 +68,8 @@ class CapaMatchData:
|
||||
scope,
|
||||
capability,
|
||||
matches,
|
||||
attack: List[Dict[Any, Any]],
|
||||
mbc: List[Dict[Any, Any]],
|
||||
attack: list[dict[Any, Any]],
|
||||
mbc: list[dict[Any, Any]],
|
||||
):
|
||||
self.namespace = namespace
|
||||
self.scope = scope
|
||||
@@ -282,7 +282,7 @@ def parse_json(capa_data):
|
||||
for rule, capability in capa_data.get("rules", {}).items():
|
||||
# structure to contain rule match address & supporting feature data
|
||||
# {rule match addr:[{feature addr:{node_data}}]}
|
||||
rule_matches: Dict[Any, List[Any]] = {}
|
||||
rule_matches: dict[Any, list[Any]] = {}
|
||||
for i in range(len(capability.get("matches"))):
|
||||
# grab rule match location
|
||||
match_loc = capability.get("matches")[i][0].get("value")
|
||||
@@ -368,14 +368,10 @@ def main():
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if sys.version_info < (3, 8):
|
||||
if sys.version_info < (3, 10):
|
||||
from capa.exceptions import UnsupportedRuntimeError
|
||||
|
||||
raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.8+")
|
||||
elif sys.version_info < (3, 10):
|
||||
from warnings import warn
|
||||
|
||||
warn("This is the last capa version supporting Python 3.8 and 3.9.", DeprecationWarning, stacklevel=2)
|
||||
raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.10+")
|
||||
exit_code = main()
|
||||
if exit_code != 0:
|
||||
popup("capa explorer encountered errors during analysis. Please check the console output for more information.") # type: ignore [name-defined] # noqa: F821
|
||||
|
||||
@@ -160,12 +160,8 @@ def main():
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if sys.version_info < (3, 8):
|
||||
if sys.version_info < (3, 10):
|
||||
from capa.exceptions import UnsupportedRuntimeError
|
||||
|
||||
raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.8+")
|
||||
elif sys.version_info < (3, 10):
|
||||
from warnings import warn
|
||||
|
||||
warn("This is the last capa version supporting Python 3.8 and 3.9.", DeprecationWarning, stacklevel=2)
|
||||
raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.10+")
|
||||
sys.exit(main())
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
import logging
|
||||
import datetime
|
||||
import contextlib
|
||||
from typing import List
|
||||
from pathlib import Path
|
||||
|
||||
import capa
|
||||
@@ -112,7 +111,7 @@ def get_file_sha256():
|
||||
return currentProgram().getExecutableSHA256() # type: ignore [name-defined] # noqa: F821
|
||||
|
||||
|
||||
def collect_metadata(rules: List[Path]):
|
||||
def collect_metadata(rules: list[Path]):
|
||||
md5 = get_file_md5()
|
||||
sha256 = get_file_sha256()
|
||||
|
||||
@@ -150,7 +149,7 @@ def collect_metadata(rules: List[Path]):
|
||||
os=os,
|
||||
extractor="ghidra",
|
||||
rules=tuple(r.resolve().absolute().as_posix() for r in rules),
|
||||
base_address=capa.features.freeze.Address.from_capa(currentProgram().getImageBase().getOffset()), # type: ignore [name-defined] # noqa: F821
|
||||
base_address=capa.features.freeze.Address.from_capa(AbsoluteVirtualAddress(currentProgram().getImageBase().getOffset())), # type: ignore [name-defined] # noqa: F821
|
||||
layout=rdoc.StaticLayout(
|
||||
functions=(),
|
||||
),
|
||||
|
||||
@@ -14,7 +14,7 @@ import logging
|
||||
import tempfile
|
||||
import contextlib
|
||||
import importlib.util
|
||||
from typing import Dict, List, Union, BinaryIO, Iterator, NoReturn
|
||||
from typing import BinaryIO, Iterator, NoReturn
|
||||
from pathlib import Path
|
||||
from zipfile import ZipFile
|
||||
from datetime import datetime
|
||||
@@ -46,6 +46,7 @@ from capa.features.common import (
|
||||
FORMAT_FREEZE,
|
||||
FORMAT_DRAKVUF,
|
||||
FORMAT_UNKNOWN,
|
||||
FORMAT_BINJA_DB,
|
||||
FORMAT_BINEXPORT2,
|
||||
Format,
|
||||
)
|
||||
@@ -59,6 +60,7 @@ EXTENSIONS_DYNAMIC = ("json", "json_", "json.gz", "log", ".log.gz", ".zip")
|
||||
EXTENSIONS_BINEXPORT2 = ("BinExport", "BinExport2")
|
||||
EXTENSIONS_ELF = "elf_"
|
||||
EXTENSIONS_FREEZE = "frz"
|
||||
EXTENSIONS_BINJA_DB = "bndb"
|
||||
|
||||
logger = logging.getLogger("capa")
|
||||
|
||||
@@ -164,7 +166,7 @@ def load_json_from_path(json_path: Path):
|
||||
return report
|
||||
|
||||
|
||||
def decode_json_lines(fd: Union[BinaryIO, gzip.GzipFile]):
|
||||
def decode_json_lines(fd: BinaryIO | gzip.GzipFile):
|
||||
for line in fd:
|
||||
try:
|
||||
line_s = line.strip().decode()
|
||||
@@ -175,7 +177,7 @@ def decode_json_lines(fd: Union[BinaryIO, gzip.GzipFile]):
|
||||
logger.debug("bad DRAKVUF log line: %s", line)
|
||||
|
||||
|
||||
def load_jsonl_from_path(jsonl_path: Path) -> Iterator[Dict]:
|
||||
def load_jsonl_from_path(jsonl_path: Path) -> Iterator[dict]:
|
||||
try:
|
||||
with gzip.open(jsonl_path, "rb") as fg:
|
||||
yield from decode_json_lines(fg)
|
||||
@@ -204,7 +206,7 @@ def get_format_from_report(sample: Path) -> str:
|
||||
return FORMAT_DRAKVUF
|
||||
elif sample.name.endswith(".zip"):
|
||||
with ZipFile(sample, "r") as zipfile:
|
||||
namelist: List[str] = zipfile.namelist()
|
||||
namelist: list[str] = zipfile.namelist()
|
||||
if "logs/summary_v2.json" in namelist and "logs/flog.xml" in namelist:
|
||||
# assume VMRay zipfile at a minimum has these files
|
||||
return FORMAT_VMRAY
|
||||
@@ -232,6 +234,8 @@ def get_format_from_extension(sample: Path) -> str:
|
||||
format_ = FORMAT_FREEZE
|
||||
elif sample.name.endswith(EXTENSIONS_BINEXPORT2):
|
||||
format_ = FORMAT_BINEXPORT2
|
||||
elif sample.name.endswith(EXTENSIONS_BINJA_DB):
|
||||
format_ = FORMAT_BINJA_DB
|
||||
return format_
|
||||
|
||||
|
||||
@@ -331,17 +335,6 @@ def log_unsupported_arch_error():
|
||||
logger.error("-" * 80)
|
||||
|
||||
|
||||
def log_unsupported_runtime_error():
|
||||
logger.error("-" * 80)
|
||||
logger.error(" Unsupported runtime or Python interpreter.")
|
||||
logger.error(" ")
|
||||
logger.error(" capa supports running under Python 3.8 and higher.")
|
||||
logger.error(" ")
|
||||
logger.error(" If you're seeing this message on the command line,")
|
||||
logger.error(" please ensure you're running a supported Python version.")
|
||||
logger.error("-" * 80)
|
||||
|
||||
|
||||
def is_running_standalone() -> bool:
|
||||
"""
|
||||
are we running from a PyInstaller'd executable?
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user