mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 07:40:38 -08:00
Compare commits
151 Commits
library-de
...
codecut
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d89083ab31 | ||
|
|
891fa8aaa3 | ||
|
|
e94147b4c2 | ||
|
|
6fc4567f0c | ||
|
|
3b1a8f5b5a | ||
|
|
f296e7d423 | ||
|
|
3e02b67480 | ||
|
|
536526f61d | ||
|
|
bcd2c3fb35 | ||
|
|
f340b93a02 | ||
|
|
8bd6f8b99a | ||
|
|
8b4d5d3d22 | ||
|
|
bc6e18ed85 | ||
|
|
2426aba223 | ||
|
|
405e189267 | ||
|
|
cfb632edc8 | ||
|
|
70c96a29b4 | ||
|
|
c005de0a0d | ||
|
|
8d42b14b20 | ||
|
|
bad32b91fb | ||
|
|
9716da4765 | ||
|
|
e0784f2e85 | ||
|
|
4a775bab2e | ||
|
|
2de7830f5e | ||
|
|
9d67e133c9 | ||
|
|
fa18b4e201 | ||
|
|
c3c93685e2 | ||
|
|
462e11443e | ||
|
|
32d6181f02 | ||
|
|
6cf944b321 | ||
|
|
369fbc713e | ||
|
|
e3a1dbfac2 | ||
|
|
e5fe935a8e | ||
|
|
233f8dcf9f | ||
|
|
51d606bc0d | ||
|
|
2b46796d08 | ||
|
|
81f7f43b5b | ||
|
|
1f34795fce | ||
|
|
06f0012183 | ||
|
|
55720ddbfd | ||
|
|
893378c10e | ||
|
|
1a82b9d0c5 | ||
|
|
3cbc184020 | ||
|
|
347601a112 | ||
|
|
8a02b0773d | ||
|
|
f11661f8f2 | ||
|
|
518dc3381c | ||
|
|
5c60adaf96 | ||
|
|
4ab8d75629 | ||
|
|
51d852d1b3 | ||
|
|
aa8e4603d1 | ||
|
|
6c61a91778 | ||
|
|
e633e34517 | ||
|
|
9c72c9067b | ||
|
|
168435cf75 | ||
|
|
5fdf7e61e2 | ||
|
|
95fc747e6f | ||
|
|
1f374e4986 | ||
|
|
28c0234339 | ||
|
|
f57f909e68 | ||
|
|
02c359f79f | ||
|
|
4448d612f1 | ||
|
|
d7cf8d1251 | ||
|
|
d1f3e43325 | ||
|
|
83a46265df | ||
|
|
0c64bd4985 | ||
|
|
ed86e5fb1b | ||
|
|
e1c786466a | ||
|
|
959a234f0e | ||
|
|
e57de2beb4 | ||
|
|
9c9b3711c0 | ||
|
|
65e2dac4c4 | ||
|
|
9ad3f06e1d | ||
|
|
201ec07b58 | ||
|
|
c85be8dc72 | ||
|
|
54952feb07 | ||
|
|
379d6ef313 | ||
|
|
28fcd10d2e | ||
|
|
a6481df6c4 | ||
|
|
abe80842cb | ||
|
|
b6763ac5fe | ||
|
|
5a284de438 | ||
|
|
8cfccbcb44 | ||
|
|
01772d0de0 | ||
|
|
f0042157ab | ||
|
|
6a2330c11a | ||
|
|
02b5e11380 | ||
|
|
32c428b989 | ||
|
|
20909c1d95 | ||
|
|
035b4f6ae6 | ||
|
|
cb002567c4 | ||
|
|
46c513c0a9 | ||
|
|
0f0523d2ba | ||
|
|
688841fd3b | ||
|
|
2a6ba62379 | ||
|
|
ca7580d417 | ||
|
|
7c01712843 | ||
|
|
ef02e4fe83 | ||
|
|
d51074385b | ||
|
|
d9ea57d29d | ||
|
|
8b7ec049f4 | ||
|
|
c05e01cc3a | ||
|
|
11bb0c3fbd | ||
|
|
93da346f32 | ||
|
|
3a2056b701 | ||
|
|
915f3b0511 | ||
|
|
cd61983e43 | ||
|
|
9627f7e5c3 | ||
|
|
3ebec9ec2b | ||
|
|
295cd413bb | ||
|
|
03e4778620 | ||
|
|
e8ad207245 | ||
|
|
a31bd2cd15 | ||
|
|
9118946ecb | ||
|
|
7b32706bd4 | ||
|
|
c632d594a6 | ||
|
|
4398b8ac31 | ||
|
|
ec697c01f9 | ||
|
|
097ed73ccd | ||
|
|
4e121ae24f | ||
|
|
322e7a934e | ||
|
|
7d983af907 | ||
|
|
77758e8922 | ||
|
|
296255f581 | ||
|
|
0237059cbd | ||
|
|
3241ee599f | ||
|
|
24236dda0e | ||
|
|
d4d856767d | ||
|
|
35767e6c6a | ||
|
|
7d8ee6aaac | ||
|
|
23709c9d6a | ||
|
|
bc72b6d14e | ||
|
|
13b1e533f5 | ||
|
|
7cc3ddd4ea | ||
|
|
20ae098cda | ||
|
|
2987eeb0ac | ||
|
|
cebf8e7274 | ||
|
|
d74225b5e0 | ||
|
|
70610cd1c5 | ||
|
|
338107cf9e | ||
|
|
6b88eed1e4 | ||
|
|
54badc323d | ||
|
|
2e2e1bc277 | ||
|
|
84c9da09e0 | ||
|
|
b2f89695b5 | ||
|
|
bc91171c65 | ||
|
|
69190dfa82 | ||
|
|
688afab087 | ||
|
|
6447319cc7 | ||
|
|
7be6fe6ae1 | ||
|
|
ca7073ce87 |
@@ -1,6 +1,6 @@
|
||||
# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.233.0/containers/python-3/.devcontainer/base.Dockerfile
|
||||
|
||||
# [Choice] Python version (use -bullseye variants on local arm64/Apple Silicon): 3, 3.10, 3.9, 3.8, 3.7, 3.6, 3-bullseye, 3.10-bullseye, 3.9-bullseye, 3.8-bullseye, 3.7-bullseye, 3.6-bullseye, 3-buster, 3.10-buster, 3.9-buster, 3.8-buster, 3.7-buster, 3.6-buster
|
||||
# [Choice] Python version (use -bullseye variants on local arm64/Apple Silicon): 3, 3.10, 3-bullseye, 3.10-bullseye, 3-buster, 3.10-buster, etc.
|
||||
ARG VARIANT="3.10-bullseye"
|
||||
FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT}
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
"dockerfile": "Dockerfile",
|
||||
"context": "..",
|
||||
"args": {
|
||||
// Update 'VARIANT' to pick a Python version: 3, 3.10, 3.9, 3.8, 3.7, 3.6
|
||||
// Update 'VARIANT' to pick a Python version: 3, 3.10, etc.
|
||||
// Append -bullseye or -buster to pin to an OS version.
|
||||
// Use -bullseye variants on local on arm64/Apple Silicon.
|
||||
"VARIANT": "3.10",
|
||||
|
||||
13
.github/workflows/build.yml
vendored
13
.github/workflows/build.yml
vendored
@@ -21,26 +21,25 @@ jobs:
|
||||
# set to false for debugging
|
||||
fail-fast: true
|
||||
matrix:
|
||||
# using Python 3.8 to support running across multiple operating systems including Windows 7
|
||||
include:
|
||||
- os: ubuntu-20.04
|
||||
# use old linux so that the shared library versioning is more portable
|
||||
artifact_name: capa
|
||||
asset_name: linux
|
||||
python_version: 3.8
|
||||
python_version: '3.10'
|
||||
- os: ubuntu-20.04
|
||||
artifact_name: capa
|
||||
asset_name: linux-py312
|
||||
python_version: 3.12
|
||||
python_version: '3.12'
|
||||
- os: windows-2019
|
||||
artifact_name: capa.exe
|
||||
asset_name: windows
|
||||
python_version: 3.8
|
||||
- os: macos-12
|
||||
python_version: '3.10'
|
||||
- os: macos-13
|
||||
# use older macOS for assumed better portability
|
||||
artifact_name: capa
|
||||
asset_name: macos
|
||||
python_version: 3.8
|
||||
python_version: '3.10'
|
||||
steps:
|
||||
- name: Checkout capa
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||
@@ -107,7 +106,7 @@ jobs:
|
||||
# upload zipped binaries to Release page
|
||||
if: github.event_name == 'release'
|
||||
name: zip and upload ${{ matrix.asset_name }}
|
||||
runs-on: ubuntu-20.04
|
||||
runs-on: ubuntu-latest
|
||||
needs: [build]
|
||||
strategy:
|
||||
matrix:
|
||||
|
||||
7
.github/workflows/changelog.yml
vendored
7
.github/workflows/changelog.yml
vendored
@@ -13,8 +13,11 @@ permissions:
|
||||
jobs:
|
||||
check_changelog:
|
||||
# no need to check for dependency updates via dependabot
|
||||
if: github.actor != 'dependabot[bot]' && github.actor != 'dependabot-preview[bot]'
|
||||
runs-on: ubuntu-20.04
|
||||
# github.event.pull_request.user.login refers to PR author
|
||||
if: |
|
||||
github.event.pull_request.user.login != 'dependabot[bot]' &&
|
||||
github.event.pull_request.user.login != 'dependabot-preview[bot]'
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
NO_CHANGELOG: '[x] No CHANGELOG update needed'
|
||||
steps:
|
||||
|
||||
2
.github/workflows/publish.yml
vendored
2
.github/workflows/publish.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
|
||||
with:
|
||||
python-version: '3.8'
|
||||
python-version: '3.10'
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
|
||||
2
.github/workflows/tag.yml
vendored
2
.github/workflows/tag.yml
vendored
@@ -9,7 +9,7 @@ permissions: read-all
|
||||
jobs:
|
||||
tag:
|
||||
name: Tag capa rules
|
||||
runs-on: ubuntu-20.04
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout capa-rules
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||
|
||||
30
.github/workflows/tests.yml
vendored
30
.github/workflows/tests.yml
vendored
@@ -26,7 +26,7 @@ env:
|
||||
|
||||
jobs:
|
||||
changelog_format:
|
||||
runs-on: ubuntu-20.04
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Checkout capa
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||
@@ -37,15 +37,15 @@ jobs:
|
||||
if [ $number != 1 ]; then exit 1; fi
|
||||
|
||||
code_style:
|
||||
runs-on: ubuntu-20.04
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Checkout capa
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||
# use latest available python to take advantage of best performance
|
||||
- name: Set up Python 3.11
|
||||
- name: Set up Python 3.12
|
||||
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
|
||||
with:
|
||||
python-version: "3.11"
|
||||
python-version: "3.12"
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
@@ -64,16 +64,16 @@ jobs:
|
||||
run: pre-commit run deptry --hook-stage manual
|
||||
|
||||
rule_linter:
|
||||
runs-on: ubuntu-20.04
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Checkout capa with submodules
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||
with:
|
||||
submodules: recursive
|
||||
- name: Set up Python 3.11
|
||||
- name: Set up Python 3.12
|
||||
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
|
||||
with:
|
||||
python-version: "3.11"
|
||||
python-version: "3.12"
|
||||
- name: Install capa
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
@@ -88,17 +88,17 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-20.04, windows-2019, macos-12]
|
||||
os: [ubuntu-20.04, windows-2019, macos-13]
|
||||
# across all operating systems
|
||||
python-version: ["3.8", "3.11"]
|
||||
python-version: ["3.10", "3.11"]
|
||||
include:
|
||||
# on Ubuntu run these as well
|
||||
- os: ubuntu-20.04
|
||||
python-version: "3.8"
|
||||
- os: ubuntu-20.04
|
||||
python-version: "3.9"
|
||||
- os: ubuntu-20.04
|
||||
python-version: "3.10"
|
||||
- os: ubuntu-20.04
|
||||
python-version: "3.11"
|
||||
- os: ubuntu-20.04
|
||||
python-version: "3.12"
|
||||
steps:
|
||||
- name: Checkout capa with submodules
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||
@@ -131,7 +131,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.9", "3.11"]
|
||||
python-version: ["3.10", "3.11"]
|
||||
steps:
|
||||
- name: Checkout capa with submodules
|
||||
# do only run if BN_SERIAL is available, have to do this in every step, see https://github.com/orgs/community/discussions/26726#discussioncomment-3253118
|
||||
@@ -173,7 +173,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.8", "3.11"]
|
||||
python-version: ["3.10", "3.11"]
|
||||
java-version: ["17"]
|
||||
ghidra-version: ["11.0.1"]
|
||||
public-version: ["PUBLIC_20240130"] # for ghidra releases
|
||||
|
||||
103
.github/workflows/web-release.yml
vendored
Normal file
103
.github/workflows/web-release.yml
vendored
Normal file
@@ -0,0 +1,103 @@
|
||||
name: create web release
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
version:
|
||||
description: 'Version number for the release (x.x.x)'
|
||||
required: true
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
run-tests:
|
||||
uses: ./.github/workflows/web-tests.yml
|
||||
|
||||
build-and-release:
|
||||
needs: run-tests
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set release name
|
||||
run: echo "RELEASE_NAME=capa-explorer-web-v${{ github.event.inputs.version }}-${GITHUB_SHA::7}" >> $GITHUB_ENV
|
||||
|
||||
- name: Check if release already exists
|
||||
run: |
|
||||
if ls web/explorer/releases/capa-explorer-web-v${{ github.event.inputs.version }}-* 1> /dev/null 2>&1; then
|
||||
echo "::error:: A release with version ${{ github.event.inputs.version }} already exists"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4
|
||||
with:
|
||||
node-version: 20
|
||||
cache: 'npm'
|
||||
cache-dependency-path: 'web/explorer/package-lock.json'
|
||||
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
working-directory: web/explorer
|
||||
|
||||
- name: Build offline bundle
|
||||
run: npm run build:bundle
|
||||
working-directory: web/explorer
|
||||
|
||||
- name: Compress bundle
|
||||
run: zip -r ${{ env.RELEASE_NAME }}.zip capa-explorer-web
|
||||
working-directory: web/explorer
|
||||
|
||||
- name: Create releases directory
|
||||
run: mkdir -vp web/explorer/releases
|
||||
|
||||
- name: Move release to releases folder
|
||||
run: mv web/explorer/${{ env.RELEASE_NAME }}.zip web/explorer/releases
|
||||
|
||||
- name: Compute release SHA256 hash
|
||||
run: |
|
||||
echo "RELEASE_SHA256=$(sha256sum web/explorer/releases/${{ env.RELEASE_NAME }}.zip | awk '{print $1}')" >> $GITHUB_ENV
|
||||
|
||||
- name: Update CHANGELOG.md
|
||||
run: |
|
||||
echo "## ${{ env.RELEASE_NAME }}" >> web/explorer/releases/CHANGELOG.md
|
||||
echo "- Release Date: $(date -u '+%Y-%m-%d %H:%M:%S UTC')" >> web/explorer/releases/CHANGELOG.md
|
||||
echo "- SHA256: ${{ env.RELEASE_SHA256 }}" >> web/explorer/releases/CHANGELOG.md
|
||||
echo "" >> web/explorer/releases/CHANGELOG.md
|
||||
cat web/explorer/releases/CHANGELOG.md
|
||||
|
||||
- name: Remove older releases
|
||||
# keep only the latest 3 releases
|
||||
run: ls -t capa-explorer-web-v*.zip | tail -n +4 | xargs -r rm --
|
||||
working-directory: web/explorer/releases
|
||||
|
||||
- name: Stage release files
|
||||
run: |
|
||||
git config --local user.email "capa-dev@mandiant.com"
|
||||
git config --local user.name "Capa Bot"
|
||||
git add -f web/explorer/releases/${{ env.RELEASE_NAME }}.zip web/explorer/releases/CHANGELOG.md
|
||||
git add -u web/explorer/releases/
|
||||
|
||||
- name: Create Pull Request
|
||||
uses: peter-evans/create-pull-request@5e914681df9dc83aa4e4905692ca88beb2f9e91f # v7.0.5
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
title: "explorer web: add release v${{ github.event.inputs.version }}"
|
||||
body: |
|
||||
This PR adds a new capa Explorer Web release v${{ github.event.inputs.version }}.
|
||||
|
||||
Release details:
|
||||
- Name: ${{ env.RELEASE_NAME }}
|
||||
- SHA256: ${{ env.RELEASE_SHA256 }}
|
||||
|
||||
This release is generated by the [web release](https://github.com/mandiant/capa/actions/workflows/web-release.yml) workflow.
|
||||
|
||||
- [x] No CHANGELOG update needed
|
||||
- [x] No new tests needed
|
||||
- [x] No documentation update needed
|
||||
commit-message: ":robot: explorer web: add release ${{ env.RELEASE_NAME }}"
|
||||
branch: release/web-v${{ github.event.inputs.version }}
|
||||
add-paths: web/explorer/releases/${{ env.RELEASE_NAME }}.zip
|
||||
base: master
|
||||
labels: webui
|
||||
delete-branch: true
|
||||
committer: Capa Bot <capa-dev@mandiant.com>
|
||||
author: Capa Bot <capa-dev@mandiant.com>
|
||||
13
.github/workflows/web-tests.yml
vendored
13
.github/workflows/web-tests.yml
vendored
@@ -1,10 +1,11 @@
|
||||
name: Capa Explorer Web tests
|
||||
name: capa Explorer Web tests
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
paths:
|
||||
- 'web/explorer/**'
|
||||
workflow_call: # this allows the workflow to be called by other workflows
|
||||
|
||||
jobs:
|
||||
test:
|
||||
@@ -23,20 +24,20 @@ jobs:
|
||||
with:
|
||||
node-version: 20
|
||||
cache: 'npm'
|
||||
cache-dependency-path: './web/explorer/package-lock.json'
|
||||
cache-dependency-path: 'web/explorer/package-lock.json'
|
||||
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
working-directory: ./web/explorer
|
||||
working-directory: web/explorer
|
||||
|
||||
- name: Lint
|
||||
run: npm run lint
|
||||
working-directory: ./web/explorer
|
||||
working-directory: web/explorer
|
||||
|
||||
- name: Format
|
||||
run: npm run format:check
|
||||
working-directory: ./web/explorer
|
||||
working-directory: web/explorer
|
||||
|
||||
- name: Run unit tests
|
||||
run: npm run test
|
||||
working-directory: ./web/explorer
|
||||
working-directory: web/explorer
|
||||
|
||||
@@ -25,7 +25,7 @@ repos:
|
||||
hooks:
|
||||
- id: isort
|
||||
name: isort
|
||||
stages: [commit, push, manual]
|
||||
stages: [pre-commit, pre-push, manual]
|
||||
language: system
|
||||
entry: isort
|
||||
args:
|
||||
@@ -46,7 +46,7 @@ repos:
|
||||
hooks:
|
||||
- id: black
|
||||
name: black
|
||||
stages: [commit, push, manual]
|
||||
stages: [pre-commit, pre-push, manual]
|
||||
language: system
|
||||
entry: black
|
||||
args:
|
||||
@@ -64,7 +64,7 @@ repos:
|
||||
hooks:
|
||||
- id: ruff
|
||||
name: ruff
|
||||
stages: [commit, push, manual]
|
||||
stages: [pre-commit, pre-push, manual]
|
||||
language: system
|
||||
entry: ruff
|
||||
args:
|
||||
@@ -82,7 +82,7 @@ repos:
|
||||
hooks:
|
||||
- id: flake8
|
||||
name: flake8
|
||||
stages: [push, manual]
|
||||
stages: [pre-push, manual]
|
||||
language: system
|
||||
entry: flake8
|
||||
args:
|
||||
@@ -101,13 +101,14 @@ repos:
|
||||
hooks:
|
||||
- id: mypy
|
||||
name: mypy
|
||||
stages: [push, manual]
|
||||
stages: [pre-push, manual]
|
||||
language: system
|
||||
entry: mypy
|
||||
args:
|
||||
- "--check-untyped-defs"
|
||||
- "--ignore-missing-imports"
|
||||
- "--config-file=.github/mypy/mypy.ini"
|
||||
- "--enable-incomplete-feature=NewGenericSyntax"
|
||||
- "capa/"
|
||||
- "scripts/"
|
||||
- "tests/"
|
||||
@@ -119,7 +120,7 @@ repos:
|
||||
hooks:
|
||||
- id: deptry
|
||||
name: deptry
|
||||
stages: [push, manual]
|
||||
stages: [pre-push, manual]
|
||||
language: system
|
||||
entry: deptry .
|
||||
always_run: true
|
||||
|
||||
130
CHANGELOG.md
130
CHANGELOG.md
@@ -12,8 +12,9 @@
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
- extractor: fix exception when PE extractor encounters unknown architecture #2440 @Tamir-K
|
||||
- IDA Pro: rename ida to idapro module for plugin and idalib in IDA 9.0 #2453 @mr-tz
|
||||
- vmray: load more analysis archives @mr-tz
|
||||
- dynamic: only check file limitations for static file formats @mr-tz
|
||||
- vmray: skip non-printable strings @mike-hunhoff
|
||||
|
||||
### capa Explorer Web
|
||||
|
||||
@@ -22,8 +23,127 @@
|
||||
### Development
|
||||
|
||||
### Raw diffs
|
||||
- [capa v7.4.0...master](https://github.com/mandiant/capa/compare/v7.4.0...master)
|
||||
- [capa-rules v7.4.0...master](https://github.com/mandiant/capa-rules/compare/v7.4.0...master)
|
||||
- [capa v8.0.1...master](https://github.com/mandiant/capa/compare/v8.0.1...master)
|
||||
- [capa-rules v8.0.1...master](https://github.com/mandiant/capa-rules/compare/v8.0.1...master)
|
||||
|
||||
## v8.0.1
|
||||
|
||||
This point release fixes an issue with the IDAPython API to now handle IDA Pro 8.3, 8.4, and 9.0 correctly.
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
- handle IDA 8.3/8.4 vs. 9.0 API change @mr-tz
|
||||
|
||||
### Raw diffs
|
||||
- [capa v8.0.0...v8.0.1](https://github.com/mandiant/capa/compare/v8.0.0...v8.0.1)
|
||||
- [capa-rules v8.0.0...v8.0.1](https://github.com/mandiant/capa-rules/compare/v8.0.0...v8.0.1)
|
||||
|
||||
## v8.0.0
|
||||
|
||||
capa version 8 adds support for IDA Pro 9.0 (and idalib). The release comes with various improvements and bug fixes for the Binary Ninja backend (including to load with database files) -- thanks to @xusheng6.
|
||||
|
||||
Additional bug fixes improve the dynamic and BinExport backends.
|
||||
|
||||
capa version 8 now requires Python 3.10 or newer.
|
||||
|
||||
Special thanks to @Tamir-K, @harshit-wadhwani, @jorik-utwente for their great contributions.
|
||||
|
||||
### New Features
|
||||
|
||||
- allow call as valid subscope for call scoped rules @mr-tz
|
||||
- support loading and analyzing a Binary Ninja database #2496 @xusheng6
|
||||
- vmray: record process command line details @mr-tz
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
- remove support for Python 3.8 and use Python 3.10 as minimum now #1966 @mr-tz
|
||||
|
||||
### New Rules (54)
|
||||
|
||||
- nursery/get-shadow-password-file-entry-on-linux jonathanlepore@google.com
|
||||
- nursery/set-shadow-password-file-entry-on-linux jonathanlepore@google.com
|
||||
- collection/browser/get-chrome-cookiemonster still@teamt5.org
|
||||
- collection/browser/get-elevation-service-for-chromium-based-browsers still@teamt5.org
|
||||
- collection/get-steam-token still@teamt5.org
|
||||
- nursery/persist-via-application-shimming j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-bits-job j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-print-processors-registry-key j.j.vannielen@utwente.nl
|
||||
- linking/static/touchsocket/linked-against-touchsocket still@teamt5.org
|
||||
- runtime/dotnet/compiled-with-dotnet-aot still@teamt5.org
|
||||
- nursery/persist-via-errorhandler-script j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-get-variable-hijack j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-iphlpapi-dll-hijack j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-lnk-shortcut j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-powershell-profile j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-windows-accessibility-tools j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-windows-terminal-profile j.j.vannielen@utwente.nl
|
||||
- nursery/write-to-browser-extension-directory j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-aedebug-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-amsi-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-app-paths-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-appcertdlls-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-appx-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-autodialdll-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-autoplayhandlers-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-bootverificationprogram-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-code-signing-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-com-hijack j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-command-processor-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-contextmenuhandlers-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-cor_profiler_path-registry-value j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-default-file-association-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-disk-cleanup-handler-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-dotnet-dbgmanageddebugger-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-dotnet_startup_hooks-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-explorer-tools-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-filter-handlers-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-group-policy-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-hhctrl-com-hijack j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-htmlhelp-author-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-image-file-execution-options-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-lsa-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-natural-language-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-netsh-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-network-provider-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-path-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-print-monitors-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-rdp-startup-programs-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-silentprocessexit-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-telemetrycontroller-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-timeproviders-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-ts-initialprogram-registry-key j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-userinitmprlogonscript-registry-value j.j.vannielen@utwente.nl
|
||||
- nursery/persist-via-windows-error-reporting-registry-key j.j.vannielen@utwente.nl
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
- extractor: fix exception when PE extractor encounters unknown architecture #2440 @Tamir-K
|
||||
- IDA Pro: rename ida to idapro module for plugin and idalib in IDA 9.0 #2453 @mr-tz
|
||||
- ghidra: fix saving of base address @mr-tz
|
||||
- binja: support loading raw x86/x86_64 shellcode #2489 @xusheng6
|
||||
- binja: fix crash when the IL of certain functions are not available. #2249 @xusheng6
|
||||
- binja: major performance improvement on the binja extractor. #1414 @xusheng6
|
||||
- cape: make Process model flexible and procmemory optional to load newest reports #2466 @mr-tz
|
||||
- binja: fix unit test failure by fixing up the analysis for file al-khaser_x64.exe_ #2507 @xusheng6
|
||||
- binja: move the stack string detection to function level #2516 @xusheng6
|
||||
- BinExport2: fix handling of incorrect thunk functions #2524 @williballenthin
|
||||
- BinExport2: more precise pruning of expressions @williballenthin
|
||||
- BinExport2: better handle weird expression trees from Ghidra #2528 #2530 @williballenthin
|
||||
|
||||
### capa Explorer Web
|
||||
|
||||
### capa Explorer IDA Pro plugin
|
||||
|
||||
- fix bug preventing saving of capa results via Save button @mr-tz
|
||||
- fix saving of base address @mr-tz
|
||||
|
||||
### Development
|
||||
- CI: use macos-13 since macos-12 is deprecated and will be removed on December 3rd, 2024 #2173 @mr-tz
|
||||
- CI: update Binary Ninja version to 4.2 #2499 @xusheng6
|
||||
|
||||
### Raw diffs
|
||||
- [capa v7.4.0...v8.0.0](https://github.com/mandiant/capa/compare/v7.4.0...v8.0.0)
|
||||
- [capa-rules v7.4.0...v8.0.0](https://github.com/mandiant/capa-rules/compare/v7.4.0...v8.0.0)
|
||||
|
||||
## v7.4.0
|
||||
|
||||
@@ -179,6 +299,8 @@ Special thanks to our repeat and new contributors:
|
||||
- CI: update Binary Ninja version to 4.1 and use Python 3.9 to test it #2211 @xusheng6
|
||||
- CI: update tests.yml workflow to exclude web and documentation files #2263 @s-ff
|
||||
- CI: update build.yml workflow to exclude web and documentation files #2270 @s-ff
|
||||
- CI: add web releases workflow #2455 @s-ff
|
||||
- CI: skip changelog.yml for dependabot PRs #2471
|
||||
|
||||
### Raw diffs
|
||||
|
||||
|
||||
82
README.md
82
README.md
@@ -38,49 +38,47 @@ Below you find a list of [our capa blog posts with more details.](#blog-posts)
|
||||
```
|
||||
$ capa.exe suspicious.exe
|
||||
|
||||
+------------------------+--------------------------------------------------------------------------------+
|
||||
| ATT&CK Tactic | ATT&CK Technique |
|
||||
|------------------------+--------------------------------------------------------------------------------|
|
||||
| DEFENSE EVASION | Obfuscated Files or Information [T1027] |
|
||||
| DISCOVERY | Query Registry [T1012] |
|
||||
| | System Information Discovery [T1082] |
|
||||
| EXECUTION | Command and Scripting Interpreter::Windows Command Shell [T1059.003] |
|
||||
| | Shared Modules [T1129] |
|
||||
| EXFILTRATION | Exfiltration Over C2 Channel [T1041] |
|
||||
| PERSISTENCE | Create or Modify System Process::Windows Service [T1543.003] |
|
||||
+------------------------+--------------------------------------------------------------------------------+
|
||||
+--------------------+------------------------------------------------------------------------+
|
||||
| ATT&CK Tactic | ATT&CK Technique |
|
||||
|--------------------+------------------------------------------------------------------------|
|
||||
| DEFENSE EVASION | Obfuscated Files or Information [T1027] |
|
||||
| DISCOVERY | Query Registry [T1012] |
|
||||
| | System Information Discovery [T1082] |
|
||||
| EXECUTION | Command and Scripting Interpreter::Windows Command Shell [T1059.003] |
|
||||
| | Shared Modules [T1129] |
|
||||
| EXFILTRATION | Exfiltration Over C2 Channel [T1041] |
|
||||
| PERSISTENCE | Create or Modify System Process::Windows Service [T1543.003] |
|
||||
+--------------------+------------------------------------------------------------------------+
|
||||
|
||||
+-------------------------------------------------------+-------------------------------------------------+
|
||||
| CAPABILITY | NAMESPACE |
|
||||
|-------------------------------------------------------+-------------------------------------------------|
|
||||
| check for OutputDebugString error | anti-analysis/anti-debugging/debugger-detection |
|
||||
| read and send data from client to server | c2/file-transfer |
|
||||
| execute shell command and capture output | c2/shell |
|
||||
| receive data (2 matches) | communication |
|
||||
| send data (6 matches) | communication |
|
||||
| connect to HTTP server (3 matches) | communication/http/client |
|
||||
| send HTTP request (3 matches) | communication/http/client |
|
||||
| create pipe | communication/named-pipe/create |
|
||||
| get socket status (2 matches) | communication/socket |
|
||||
| receive data on socket (2 matches) | communication/socket/receive |
|
||||
| send data on socket (3 matches) | communication/socket/send |
|
||||
| connect TCP socket | communication/socket/tcp |
|
||||
| encode data using Base64 | data-manipulation/encoding/base64 |
|
||||
| encode data using XOR (6 matches) | data-manipulation/encoding/xor |
|
||||
| run as a service | executable/pe |
|
||||
| get common file path (3 matches) | host-interaction/file-system |
|
||||
| read file | host-interaction/file-system/read |
|
||||
| write file (2 matches) | host-interaction/file-system/write |
|
||||
| print debug messages (2 matches) | host-interaction/log/debug/write-event |
|
||||
| resolve DNS | host-interaction/network/dns/resolve |
|
||||
| get hostname | host-interaction/os/hostname |
|
||||
| create a process with modified I/O handles and window | host-interaction/process/create |
|
||||
| create process | host-interaction/process/create |
|
||||
| create registry key | host-interaction/registry/create |
|
||||
| create service | host-interaction/service/create |
|
||||
| create thread | host-interaction/thread/create |
|
||||
| persist via Windows service | persistence/service |
|
||||
+-------------------------------------------------------+-------------------------------------------------+
|
||||
+-------------------------------------------+-------------------------------------------------+
|
||||
| CAPABILITY | NAMESPACE |
|
||||
|-------------------------------------------+-------------------------------------------------|
|
||||
| read and send data from client to server | c2/file-transfer |
|
||||
| execute shell command and capture output | c2/shell |
|
||||
| receive data (2 matches) | communication |
|
||||
| send data (6 matches) | communication |
|
||||
| connect to HTTP server (3 matches) | communication/http/client |
|
||||
| send HTTP request (3 matches) | communication/http/client |
|
||||
| create pipe | communication/named-pipe/create |
|
||||
| get socket status (2 matches) | communication/socket |
|
||||
| receive data on socket (2 matches) | communication/socket/receive |
|
||||
| send data on socket (3 matches) | communication/socket/send |
|
||||
| connect TCP socket | communication/socket/tcp |
|
||||
| encode data using Base64 | data-manipulation/encoding/base64 |
|
||||
| encode data using XOR (6 matches) | data-manipulation/encoding/xor |
|
||||
| run as a service | executable/pe |
|
||||
| get common file path (3 matches) | host-interaction/file-system |
|
||||
| read file | host-interaction/file-system/read |
|
||||
| write file (2 matches) | host-interaction/file-system/write |
|
||||
| print debug messages (2 matches) | host-interaction/log/debug/write-event |
|
||||
| resolve DNS | host-interaction/network/dns/resolve |
|
||||
| get hostname | host-interaction/os/hostname |
|
||||
| create process | host-interaction/process/create |
|
||||
| create registry key | host-interaction/registry/create |
|
||||
| create service | host-interaction/service/create |
|
||||
| create thread | host-interaction/thread/create |
|
||||
| persist via Windows service | persistence/service |
|
||||
+-------------------------------------------+-------------------------------------------------+
|
||||
```
|
||||
|
||||
# download and usage
|
||||
|
||||
0
capa/analysis/__init__.py
Normal file
0
capa/analysis/__init__.py
Normal file
38
capa/analysis/flirt.py
Normal file
38
capa/analysis/flirt.py
Normal file
@@ -0,0 +1,38 @@
|
||||
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
import capa.features.extractors.ida.idalib as idalib
|
||||
|
||||
if not idalib.has_idalib():
|
||||
raise RuntimeError("cannot find IDA idalib module.")
|
||||
|
||||
if not idalib.load_idalib():
|
||||
raise RuntimeError("failed to load IDA idalib module.")
|
||||
|
||||
import idaapi
|
||||
import idautils
|
||||
|
||||
|
||||
class FunctionId(BaseModel):
|
||||
va: int
|
||||
is_library: bool
|
||||
name: str
|
||||
|
||||
|
||||
def get_flirt_matches(lib_only=True):
|
||||
for fva in idautils.Functions():
|
||||
f = idaapi.get_func(fva)
|
||||
is_lib = bool(f.flags & idaapi.FUNC_LIB)
|
||||
fname = idaapi.get_func_name(fva)
|
||||
|
||||
if lib_only and not is_lib:
|
||||
continue
|
||||
|
||||
yield FunctionId(va=fva, is_library=is_lib, name=fname)
|
||||
242
capa/analysis/libraries.py
Normal file
242
capa/analysis/libraries.py
Normal file
@@ -0,0 +1,242 @@
|
||||
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import io
|
||||
import sys
|
||||
import logging
|
||||
import argparse
|
||||
import tempfile
|
||||
import contextlib
|
||||
from enum import Enum
|
||||
from typing import List, Optional
|
||||
from pathlib import Path
|
||||
|
||||
import rich
|
||||
from pydantic import BaseModel
|
||||
from rich.text import Text
|
||||
from rich.console import Console
|
||||
|
||||
import capa.main
|
||||
import capa.helpers
|
||||
import capa.analysis.flirt
|
||||
import capa.analysis.strings
|
||||
import capa.features.extractors.ida.idalib as idalib
|
||||
|
||||
if not idalib.has_idalib():
|
||||
raise RuntimeError("cannot find IDA idalib module.")
|
||||
|
||||
if not idalib.load_idalib():
|
||||
raise RuntimeError("failed to load IDA idalib module.")
|
||||
|
||||
import idaapi
|
||||
import idapro
|
||||
import ida_auto
|
||||
import idautils
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Classification(str, Enum):
|
||||
USER = "user"
|
||||
LIBRARY = "library"
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
|
||||
class Method(str, Enum):
|
||||
FLIRT = "flirt"
|
||||
STRINGS = "strings"
|
||||
THUNK = "thunk"
|
||||
ENTRYPOINT = "entrypoint"
|
||||
|
||||
|
||||
class FunctionClassification(BaseModel):
|
||||
va: int
|
||||
classification: Classification
|
||||
# name per the disassembler/analysis tool
|
||||
# may be combined with the recovered/suspected name TODO below
|
||||
name: str
|
||||
|
||||
# if is library, this must be provided
|
||||
method: Optional[Method]
|
||||
|
||||
# TODO if is library, recovered/suspected name?
|
||||
|
||||
# if is library, these can optionally be provided.
|
||||
library_name: Optional[str] = None
|
||||
library_version: Optional[str] = None
|
||||
|
||||
|
||||
class FunctionIdResults(BaseModel):
|
||||
function_classifications: List[FunctionClassification]
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def ida_session(input_path: Path, use_temp_dir=True):
|
||||
if use_temp_dir:
|
||||
t = Path(tempfile.mkdtemp(prefix="ida-")) / input_path.name
|
||||
else:
|
||||
t = input_path
|
||||
|
||||
logger.debug("using %s", str(t))
|
||||
# stderr=True is used here to redirect the spinner banner to stderr,
|
||||
# so that users can redirect capa's output.
|
||||
console = Console(stderr=True, quiet=False)
|
||||
|
||||
try:
|
||||
if use_temp_dir:
|
||||
t.write_bytes(input_path.read_bytes())
|
||||
|
||||
# idalib writes to stdout (ugh), so we have to capture that
|
||||
# so as not to screw up structured output.
|
||||
with capa.helpers.stdout_redirector(io.BytesIO()):
|
||||
idapro.enable_console_messages(False)
|
||||
with capa.main.timing("analyze program"):
|
||||
with console.status("analyzing program...", spinner="dots"):
|
||||
if idapro.open_database(str(t.absolute()), run_auto_analysis=True):
|
||||
raise RuntimeError("failed to analyze input file")
|
||||
|
||||
logger.debug("idalib: waiting for analysis...")
|
||||
ida_auto.auto_wait()
|
||||
logger.debug("idalib: opened database.")
|
||||
|
||||
yield
|
||||
finally:
|
||||
idapro.close_database()
|
||||
if use_temp_dir:
|
||||
t.unlink()
|
||||
|
||||
|
||||
def is_thunk_function(fva):
|
||||
f = idaapi.get_func(fva)
|
||||
return bool(f.flags & idaapi.FUNC_THUNK)
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
if argv is None:
|
||||
argv = sys.argv[1:]
|
||||
|
||||
parser = argparse.ArgumentParser(description="Identify library functions using various strategies.")
|
||||
capa.main.install_common_args(parser, wanted={"input_file"})
|
||||
parser.add_argument("--store-idb", action="store_true", default=False, help="store IDA database file")
|
||||
parser.add_argument("--min-string-length", type=int, default=8, help="minimum string length")
|
||||
parser.add_argument("-j", "--json", action="store_true", help="emit JSON instead of text")
|
||||
args = parser.parse_args(args=argv)
|
||||
|
||||
try:
|
||||
capa.main.handle_common_args(args)
|
||||
except capa.main.ShouldExitError as e:
|
||||
return e.status_code
|
||||
|
||||
dbs = capa.analysis.strings.get_default_databases()
|
||||
capa.analysis.strings.prune_databases(dbs, n=args.min_string_length)
|
||||
|
||||
function_classifications: List[FunctionClassification] = []
|
||||
with ida_session(args.input_file, use_temp_dir=not args.store_idb):
|
||||
with capa.main.timing("FLIRT-based library identification"):
|
||||
# TODO: add more signature (files)
|
||||
# TOOD: apply more signatures
|
||||
for flirt_match in capa.analysis.flirt.get_flirt_matches():
|
||||
function_classifications.append(
|
||||
FunctionClassification(
|
||||
va=flirt_match.va,
|
||||
name=flirt_match.name,
|
||||
classification=Classification.LIBRARY,
|
||||
method=Method.FLIRT,
|
||||
# note: we cannot currently include which signature matched per function via the IDA API
|
||||
)
|
||||
)
|
||||
|
||||
# thunks
|
||||
for fva in idautils.Functions():
|
||||
if is_thunk_function(fva):
|
||||
function_classifications.append(
|
||||
FunctionClassification(
|
||||
va=fva,
|
||||
name=idaapi.get_func_name(fva),
|
||||
classification=Classification.LIBRARY,
|
||||
method=Method.THUNK,
|
||||
)
|
||||
)
|
||||
|
||||
with capa.main.timing("string-based library identification"):
|
||||
for string_match in capa.analysis.strings.get_string_matches(dbs):
|
||||
function_classifications.append(
|
||||
FunctionClassification(
|
||||
va=string_match.va,
|
||||
name=idaapi.get_func_name(string_match.va),
|
||||
classification=Classification.LIBRARY,
|
||||
method=Method.STRINGS,
|
||||
library_name=string_match.metadata.library_name,
|
||||
library_version=string_match.metadata.library_version,
|
||||
)
|
||||
)
|
||||
|
||||
for va in idautils.Functions():
|
||||
name = idaapi.get_func_name(va)
|
||||
if name not in {
|
||||
"WinMain",
|
||||
}:
|
||||
continue
|
||||
|
||||
function_classifications.append(
|
||||
FunctionClassification(
|
||||
va=va,
|
||||
name=name,
|
||||
classification=Classification.USER,
|
||||
method=Method.ENTRYPOINT,
|
||||
)
|
||||
)
|
||||
|
||||
doc = FunctionIdResults(function_classifications=[])
|
||||
classifications_by_va = capa.analysis.strings.create_index(function_classifications, "va")
|
||||
for va in idautils.Functions():
|
||||
if classifications := classifications_by_va.get(va):
|
||||
doc.function_classifications.extend(classifications)
|
||||
else:
|
||||
doc.function_classifications.append(
|
||||
FunctionClassification(
|
||||
va=va,
|
||||
name=idaapi.get_func_name(va),
|
||||
classification=Classification.UNKNOWN,
|
||||
method=None,
|
||||
)
|
||||
)
|
||||
|
||||
if args.json:
|
||||
print(doc.model_dump_json()) # noqa: T201 print found
|
||||
|
||||
else:
|
||||
table = rich.table.Table()
|
||||
table.add_column("FVA")
|
||||
table.add_column("CLASSIFICATION")
|
||||
table.add_column("METHOD")
|
||||
table.add_column("FNAME")
|
||||
table.add_column("EXTRA INFO")
|
||||
|
||||
classifications_by_va = capa.analysis.strings.create_index(doc.function_classifications, "va", sorted_=True)
|
||||
for va, classifications in classifications_by_va.items():
|
||||
name = ", ".join({c.name for c in classifications})
|
||||
if "sub_" in name:
|
||||
name = Text(name, style="grey53")
|
||||
|
||||
classification = {c.classification for c in classifications}
|
||||
method = {c.method for c in classifications if c.method}
|
||||
extra = {f"{c.library_name}@{c.library_version}" for c in classifications if c.library_name}
|
||||
|
||||
table.add_row(
|
||||
hex(va),
|
||||
", ".join(classification) if classification != {"unknown"} else Text("unknown", style="grey53"),
|
||||
", ".join(method),
|
||||
name,
|
||||
", ".join(extra),
|
||||
)
|
||||
|
||||
rich.print(table)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
2
capa/analysis/requirements.txt
Normal file
2
capa/analysis/requirements.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
# temporary extra file to track dependencies of the analysis directory
|
||||
nltk==3.9.1
|
||||
269
capa/analysis/strings/__init__.py
Normal file
269
capa/analysis/strings/__init__.py
Normal file
@@ -0,0 +1,269 @@
|
||||
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
"""
|
||||
further requirements:
|
||||
- nltk
|
||||
"""
|
||||
import gzip
|
||||
import logging
|
||||
import collections
|
||||
from typing import Any, Dict, Mapping
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass
|
||||
|
||||
import msgspec
|
||||
|
||||
import capa.features.extractors.strings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class LibraryString(msgspec.Struct):
|
||||
string: str
|
||||
library_name: str
|
||||
library_version: str
|
||||
file_path: str | None = None
|
||||
function_name: str | None = None
|
||||
line_number: int | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class LibraryStringDatabase:
|
||||
metadata_by_string: Dict[str, LibraryString]
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.metadata_by_string)
|
||||
|
||||
@classmethod
|
||||
def from_file(cls, path: Path) -> "LibraryStringDatabase":
|
||||
metadata_by_string: Dict[str, LibraryString] = {}
|
||||
decoder = msgspec.json.Decoder(type=LibraryString)
|
||||
for line in gzip.decompress(path.read_bytes()).split(b"\n"):
|
||||
if not line:
|
||||
continue
|
||||
s = decoder.decode(line)
|
||||
metadata_by_string[s.string] = s
|
||||
|
||||
return cls(metadata_by_string=metadata_by_string)
|
||||
|
||||
|
||||
DEFAULT_FILENAMES = (
|
||||
"brotli.jsonl.gz",
|
||||
"bzip2.jsonl.gz",
|
||||
"cryptopp.jsonl.gz",
|
||||
"curl.jsonl.gz",
|
||||
"detours.jsonl.gz",
|
||||
"jemalloc.jsonl.gz",
|
||||
"jsoncpp.jsonl.gz",
|
||||
"kcp.jsonl.gz",
|
||||
"liblzma.jsonl.gz",
|
||||
"libsodium.jsonl.gz",
|
||||
"libpcap.jsonl.gz",
|
||||
"mbedtls.jsonl.gz",
|
||||
"openssl.jsonl.gz",
|
||||
"sqlite3.jsonl.gz",
|
||||
"tomcrypt.jsonl.gz",
|
||||
"wolfssl.jsonl.gz",
|
||||
"zlib.jsonl.gz",
|
||||
)
|
||||
|
||||
DEFAULT_PATHS = tuple(Path(__file__).parent / "data" / "oss" / filename for filename in DEFAULT_FILENAMES) + (
|
||||
Path(__file__).parent / "data" / "crt" / "msvc_v143.jsonl.gz",
|
||||
)
|
||||
|
||||
|
||||
def get_default_databases() -> list[LibraryStringDatabase]:
|
||||
return [LibraryStringDatabase.from_file(path) for path in DEFAULT_PATHS]
|
||||
|
||||
|
||||
@dataclass
|
||||
class WindowsApiStringDatabase:
|
||||
dll_names: set[str]
|
||||
api_names: set[str]
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.dll_names) + len(self.api_names)
|
||||
|
||||
@classmethod
|
||||
def from_dir(cls, path: Path) -> "WindowsApiStringDatabase":
|
||||
dll_names: set[str] = set()
|
||||
api_names: set[str] = set()
|
||||
|
||||
for line in gzip.decompress((path / "dlls.txt.gz").read_bytes()).decode("utf-8").splitlines():
|
||||
if not line:
|
||||
continue
|
||||
dll_names.add(line)
|
||||
|
||||
for line in gzip.decompress((path / "apis.txt.gz").read_bytes()).decode("utf-8").splitlines():
|
||||
if not line:
|
||||
continue
|
||||
api_names.add(line)
|
||||
|
||||
return cls(dll_names=dll_names, api_names=api_names)
|
||||
|
||||
@classmethod
|
||||
def from_defaults(cls) -> "WindowsApiStringDatabase":
|
||||
return cls.from_dir(Path(__file__).parent / "data" / "winapi")
|
||||
|
||||
|
||||
def extract_strings(buf, n=4):
|
||||
yield from capa.features.extractors.strings.extract_ascii_strings(buf, n=n)
|
||||
yield from capa.features.extractors.strings.extract_unicode_strings(buf, n=n)
|
||||
|
||||
|
||||
def prune_databases(dbs: list[LibraryStringDatabase], n=8):
|
||||
"""remove less trustyworthy database entries.
|
||||
|
||||
such as:
|
||||
- those found in multiple databases
|
||||
- those that are English words
|
||||
- those that are too short
|
||||
- Windows API and DLL names
|
||||
"""
|
||||
|
||||
# TODO: consider applying these filters directly to the persisted databases, not at load time.
|
||||
|
||||
winapi = WindowsApiStringDatabase.from_defaults()
|
||||
|
||||
try:
|
||||
from nltk.corpus import words as nltk_words
|
||||
|
||||
nltk_words.words()
|
||||
except (ImportError, LookupError):
|
||||
# one-time download of dataset.
|
||||
# this probably doesn't work well for embedded use.
|
||||
import nltk
|
||||
|
||||
nltk.download("words")
|
||||
from nltk.corpus import words as nltk_words
|
||||
words = set(nltk_words.words())
|
||||
|
||||
counter: collections.Counter[str] = collections.Counter()
|
||||
to_remove = set()
|
||||
for db in dbs:
|
||||
for string in db.metadata_by_string.keys():
|
||||
counter[string] += 1
|
||||
|
||||
if string in words:
|
||||
to_remove.add(string)
|
||||
continue
|
||||
|
||||
if len(string) < n:
|
||||
to_remove.add(string)
|
||||
continue
|
||||
|
||||
if string in winapi.api_names:
|
||||
to_remove.add(string)
|
||||
continue
|
||||
|
||||
if string in winapi.dll_names:
|
||||
to_remove.add(string)
|
||||
continue
|
||||
|
||||
for string, count in counter.most_common():
|
||||
if count <= 1:
|
||||
break
|
||||
|
||||
# remove strings that are seen in more than one database
|
||||
to_remove.add(string)
|
||||
|
||||
for db in dbs:
|
||||
for string in to_remove:
|
||||
if string in db.metadata_by_string:
|
||||
del db.metadata_by_string[string]
|
||||
|
||||
|
||||
def get_function_strings():
|
||||
import idaapi
|
||||
import idautils
|
||||
|
||||
import capa.features.extractors.ida.helpers as ida_helpers
|
||||
|
||||
strings_by_function = collections.defaultdict(set)
|
||||
for ea in idautils.Functions():
|
||||
f = idaapi.get_func(ea)
|
||||
|
||||
# ignore library functions and thunk functions as identified by IDA
|
||||
if f.flags & idaapi.FUNC_THUNK:
|
||||
continue
|
||||
if f.flags & idaapi.FUNC_LIB:
|
||||
continue
|
||||
|
||||
for bb in ida_helpers.get_function_blocks(f):
|
||||
for insn in ida_helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
|
||||
ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
|
||||
if ref == insn.ea:
|
||||
continue
|
||||
|
||||
string = capa.features.extractors.ida.helpers.find_string_at(ref)
|
||||
if not string:
|
||||
continue
|
||||
|
||||
strings_by_function[ea].add(string)
|
||||
|
||||
return strings_by_function
|
||||
|
||||
|
||||
@dataclass
|
||||
class LibraryStringClassification:
|
||||
va: int
|
||||
string: str
|
||||
library_name: str
|
||||
metadata: LibraryString
|
||||
|
||||
|
||||
def create_index(s: list, k: str, sorted_: bool = False) -> Mapping[Any, list]:
|
||||
"""create an index of the elements in `s` using the key `k`, optionally sorted by `k`"""
|
||||
if sorted_:
|
||||
s = sorted(s, key=lambda x: getattr(x, k))
|
||||
|
||||
s_by_k = collections.defaultdict(list)
|
||||
for v in s:
|
||||
p = getattr(v, k)
|
||||
s_by_k[p].append(v)
|
||||
return s_by_k
|
||||
|
||||
|
||||
def get_string_matches(dbs: list[LibraryStringDatabase]) -> list[LibraryStringClassification]:
|
||||
matches: list[LibraryStringClassification] = []
|
||||
|
||||
for function, strings in sorted(get_function_strings().items()):
|
||||
for string in strings:
|
||||
for db in dbs:
|
||||
if metadata := db.metadata_by_string.get(string):
|
||||
matches.append(
|
||||
LibraryStringClassification(
|
||||
va=function,
|
||||
string=string,
|
||||
library_name=metadata.library_name,
|
||||
metadata=metadata,
|
||||
)
|
||||
)
|
||||
|
||||
# if there are less than N strings per library, ignore that library
|
||||
matches_by_library = create_index(matches, "library_name")
|
||||
for library_name, library_matches in matches_by_library.items():
|
||||
if len(library_matches) > 5:
|
||||
continue
|
||||
|
||||
logger.info("pruning library %s: only %d matched string", library_name, len(library_matches))
|
||||
matches = [m for m in matches if m.library_name != library_name]
|
||||
|
||||
# if there are conflicts within a single function, don't label it
|
||||
matches_by_function = create_index(matches, "va")
|
||||
for va, function_matches in matches_by_function.items():
|
||||
library_names = {m.library_name for m in function_matches}
|
||||
if len(library_names) == 1:
|
||||
continue
|
||||
|
||||
logger.info("conflicting matches: 0x%x: %s", va, sorted(library_names))
|
||||
# this is potentially slow (O(n**2)) but hopefully fast enough in practice.
|
||||
matches = [m for m in matches if m.va != va]
|
||||
|
||||
return matches
|
||||
130
capa/analysis/strings/__main__.py
Normal file
130
capa/analysis/strings/__main__.py
Normal file
@@ -0,0 +1,130 @@
|
||||
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import sys
|
||||
import logging
|
||||
import collections
|
||||
from pathlib import Path
|
||||
|
||||
import rich
|
||||
from rich.text import Text
|
||||
|
||||
import capa.analysis.strings
|
||||
import capa.features.extractors.strings
|
||||
import capa.features.extractors.ida.helpers as ida_helpers
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def open_ida(input_path: Path):
|
||||
import tempfile
|
||||
|
||||
import idapro
|
||||
|
||||
t = Path(tempfile.mkdtemp(prefix="ida-")) / input_path.name
|
||||
t.write_bytes(input_path.read_bytes())
|
||||
# resource leak: we should delete this upon exit
|
||||
|
||||
idapro.enable_console_messages(False)
|
||||
idapro.open_database(str(t.absolute()), run_auto_analysis=True)
|
||||
|
||||
import ida_auto
|
||||
|
||||
ida_auto.auto_wait()
|
||||
|
||||
|
||||
def main():
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
# use n=8 to ignore common words
|
||||
N = 8
|
||||
|
||||
input_path = Path(sys.argv[1])
|
||||
|
||||
dbs = capa.analysis.strings.get_default_databases()
|
||||
capa.analysis.strings.prune_databases(dbs, n=N)
|
||||
|
||||
strings_by_library = collections.defaultdict(set)
|
||||
for string in capa.analysis.strings.extract_strings(input_path.read_bytes(), n=N):
|
||||
for db in dbs:
|
||||
if metadata := db.metadata_by_string.get(string.s):
|
||||
strings_by_library[metadata.library_name].add(string.s)
|
||||
|
||||
console = rich.get_console()
|
||||
console.print("found libraries:", style="bold")
|
||||
for library, strings in sorted(strings_by_library.items(), key=lambda p: len(p[1]), reverse=True):
|
||||
console.print(f" - [b]{library}[/] ({len(strings)} strings)")
|
||||
|
||||
for string in sorted(strings)[:10]:
|
||||
console.print(f" - {string}", markup=False, style="grey37")
|
||||
|
||||
if len(strings) > 10:
|
||||
console.print(" ...", style="grey37")
|
||||
|
||||
if not strings_by_library:
|
||||
console.print(" (none)", style="grey37")
|
||||
# since we're not going to find any strings
|
||||
# return early and don't do IDA analysis
|
||||
return
|
||||
|
||||
open_ida(input_path)
|
||||
|
||||
import idaapi
|
||||
import idautils
|
||||
import ida_funcs
|
||||
|
||||
strings_by_function = collections.defaultdict(set)
|
||||
for ea in idautils.Functions():
|
||||
f = idaapi.get_func(ea)
|
||||
|
||||
# ignore library functions and thunk functions as identified by IDA
|
||||
if f.flags & idaapi.FUNC_THUNK:
|
||||
continue
|
||||
if f.flags & idaapi.FUNC_LIB:
|
||||
continue
|
||||
|
||||
for bb in ida_helpers.get_function_blocks(f):
|
||||
for insn in ida_helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
|
||||
ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
|
||||
if ref == insn.ea:
|
||||
continue
|
||||
|
||||
string = capa.features.extractors.ida.helpers.find_string_at(ref)
|
||||
if not string:
|
||||
continue
|
||||
|
||||
for db in dbs:
|
||||
if metadata := db.metadata_by_string.get(string):
|
||||
strings_by_function[ea].add(string)
|
||||
|
||||
# ensure there are at least XXX functions renamed, or ignore those entries
|
||||
|
||||
console.print("functions:", style="bold")
|
||||
for function, strings in sorted(strings_by_function.items()):
|
||||
if strings:
|
||||
name = ida_funcs.get_func_name(function)
|
||||
|
||||
console.print(f" [b]{name}[/]@{function:08x}:")
|
||||
|
||||
for string in strings:
|
||||
for db in dbs:
|
||||
if metadata := db.metadata_by_string.get(string):
|
||||
location = Text(
|
||||
f"{metadata.library_name}@{metadata.library_version}::{metadata.function_name}",
|
||||
style="grey37",
|
||||
)
|
||||
console.print(" - ", location, ": ", string.rstrip())
|
||||
|
||||
console.print()
|
||||
|
||||
console.print(
|
||||
f"found {len(strings_by_function)} library functions across {len(list(idautils.Functions()))} functions"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
BIN
capa/analysis/strings/data/crt/msvc_v143.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/crt/msvc_v143.jsonl.gz
Normal file
Binary file not shown.
3
capa/analysis/strings/data/oss/.gitignore
vendored
Normal file
3
capa/analysis/strings/data/oss/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
*.csv
|
||||
*.jsonl
|
||||
*.jsonl.gz
|
||||
BIN
capa/analysis/strings/data/oss/brotli.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/brotli.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/bzip2.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/bzip2.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/cryptopp.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/cryptopp.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/curl.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/curl.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/detours.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/detours.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/jemalloc.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/jemalloc.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/jsoncpp.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/jsoncpp.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/kcp.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/kcp.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/liblzma.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/liblzma.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/libpcap.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/libpcap.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/libsodium.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/libsodium.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/mbedtls.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/mbedtls.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/openssl.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/openssl.jsonl.gz
Normal file
Binary file not shown.
99
capa/analysis/strings/data/oss/readme.md
Normal file
99
capa/analysis/strings/data/oss/readme.md
Normal file
@@ -0,0 +1,99 @@
|
||||
# Strings from Open Source libraries
|
||||
|
||||
This directory contains databases of strings extracted from open soure software.
|
||||
capa uses these databases to ignore functions that are likely library code.
|
||||
|
||||
There is one file for each database. Each database is a gzip-compressed, JSONL (one JSON document per line) file.
|
||||
The JSON document looks like this:
|
||||
|
||||
string: "1.0.8, 13-Jul-2019"
|
||||
library_name: "bzip2"
|
||||
library_version: "1.0.8#3"
|
||||
file_path: "CMakeFiles/bz2.dir/bzlib.c.obj"
|
||||
function_name: "BZ2_bzlibVersion"
|
||||
line_number: null
|
||||
|
||||
The following databases were extracted via the vkpkg & jh technique:
|
||||
|
||||
- brotli 1.0.9#5
|
||||
- bzip2 1.0.8#3
|
||||
- cryptopp 8.7.0
|
||||
- curl 7.86.0#1
|
||||
- detours 4.0.1#7
|
||||
- jemalloc 5.3.0#1
|
||||
- jsoncpp 1.9.5
|
||||
- kcp 1.7
|
||||
- liblzma 5.2.5#6
|
||||
- libsodium 1.0.18#8
|
||||
- libpcap 1.10.1#3
|
||||
- mbedtls 2.28.1
|
||||
- openssl 3.0.7#1
|
||||
- sqlite3 3.40.0#1
|
||||
- tomcrypt 1.18.2#2
|
||||
- wolfssl 5.5.0
|
||||
- zlib 1.2.13
|
||||
|
||||
This code was originally developed in FLOSS and imported into capa.
|
||||
|
||||
## The vkpkg & jh technique
|
||||
|
||||
Major steps:
|
||||
|
||||
1. build static libraries via vcpkg
|
||||
2. extract features via jh
|
||||
3. convert to JSONL format with `jh_to_qs.py`
|
||||
4. compress with gzip
|
||||
|
||||
### Build static libraries via vcpkg
|
||||
|
||||
[vcpkg](https://vcpkg.io/en/) is a free C/C++ package manager for acquiring and managing libraries.
|
||||
We use it to easily build common open source libraries, like zlib.
|
||||
Use the triplet `x64-windows-static` to build static archives (.lib files that are AR archives containing COFF object files):
|
||||
|
||||
```console
|
||||
PS > C:\vcpkg\vcpkg.exe install --triplet x64-windows-static zlib
|
||||
```
|
||||
|
||||
### Extract features via jh
|
||||
|
||||
[jh](https://github.com/williballenthin/lancelot/blob/master/bin/src/bin/jh.rs)
|
||||
is a lancelot-based utility that parses AR archives containing COFF object files,
|
||||
reconstructs their control flow, finds functions, and extracts features.
|
||||
jh extracts numbers, API calls, and strings; we are only interested in the string features.
|
||||
|
||||
For each feature, jh emits a CSV line with the fields
|
||||
- target triplet
|
||||
- compiler
|
||||
- library
|
||||
- version
|
||||
- build profile
|
||||
- path
|
||||
- function
|
||||
- feature type
|
||||
- feature value
|
||||
|
||||
For example:
|
||||
|
||||
```csv
|
||||
x64-windows-static,msvc143,bzip2,1.0.8#3,release,CMakeFiles/bz2.dir/bzlib.c.obj,BZ2_bzBuffToBuffCompress,number,0x00000100
|
||||
```
|
||||
|
||||
For example, to invoke jh:
|
||||
|
||||
```console
|
||||
$ ~/lancelot/target/release/jh x64-windows-static msvc143 zlib 1.2.13 release /mnt/c/vcpkg/installed/x64-windows-static/lib/zlib.lib > ~/flare-floss/floss/qs/db/data/oss/zlib.csv
|
||||
```
|
||||
|
||||
### Convert to OSS database format
|
||||
|
||||
We use the script `jh_to_qs.py` to convert these CSV lines into JSONL file prepared for FLOSS:
|
||||
|
||||
```console
|
||||
$ python3 jh_to_qs.py zlib.csv > zlib.jsonl
|
||||
```
|
||||
|
||||
These files are then gzip'd:
|
||||
|
||||
```console
|
||||
$ gzip -c zlib.jsonl > zlib.jsonl.gz
|
||||
```
|
||||
BIN
capa/analysis/strings/data/oss/sqlite3.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/sqlite3.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/tomcrypt.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/tomcrypt.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/wolfssl.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/wolfssl.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/zlib.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/zlib.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/winapi/apis.txt.gz
Normal file
BIN
capa/analysis/strings/data/winapi/apis.txt.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/winapi/dlls.txt.gz
Normal file
BIN
capa/analysis/strings/data/winapi/dlls.txt.gz
Normal file
Binary file not shown.
@@ -9,7 +9,7 @@
|
||||
import logging
|
||||
import itertools
|
||||
import collections
|
||||
from typing import Any, Tuple
|
||||
from typing import Any
|
||||
|
||||
from capa.rules import Scope, RuleSet
|
||||
from capa.engine import FeatureSet, MatchResults
|
||||
@@ -64,7 +64,7 @@ def has_file_limitation(rules: RuleSet, capabilities: MatchResults, is_standalon
|
||||
|
||||
def find_capabilities(
|
||||
ruleset: RuleSet, extractor: FeatureExtractor, disable_progress=None, **kwargs
|
||||
) -> Tuple[MatchResults, Any]:
|
||||
) -> tuple[MatchResults, Any]:
|
||||
from capa.capabilities.static import find_static_capabilities
|
||||
from capa.capabilities.dynamic import find_dynamic_capabilities
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
import logging
|
||||
import itertools
|
||||
import collections
|
||||
from typing import Any, List, Tuple
|
||||
from typing import Any
|
||||
|
||||
import capa.perf
|
||||
import capa.features.freeze as frz
|
||||
@@ -24,7 +24,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
def find_call_capabilities(
|
||||
ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
||||
) -> Tuple[FeatureSet, MatchResults]:
|
||||
) -> tuple[FeatureSet, MatchResults]:
|
||||
"""
|
||||
find matches for the given rules for the given call.
|
||||
|
||||
@@ -51,7 +51,7 @@ def find_call_capabilities(
|
||||
|
||||
def find_thread_capabilities(
|
||||
ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle, th: ThreadHandle
|
||||
) -> Tuple[FeatureSet, MatchResults, MatchResults]:
|
||||
) -> tuple[FeatureSet, MatchResults, MatchResults]:
|
||||
"""
|
||||
find matches for the given rules within the given thread.
|
||||
|
||||
@@ -89,7 +89,7 @@ def find_thread_capabilities(
|
||||
|
||||
def find_process_capabilities(
|
||||
ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle
|
||||
) -> Tuple[MatchResults, MatchResults, MatchResults, int]:
|
||||
) -> tuple[MatchResults, MatchResults, MatchResults, int]:
|
||||
"""
|
||||
find matches for the given rules within the given process.
|
||||
|
||||
@@ -127,7 +127,7 @@ def find_process_capabilities(
|
||||
|
||||
def find_dynamic_capabilities(
|
||||
ruleset: RuleSet, extractor: DynamicFeatureExtractor, disable_progress=None
|
||||
) -> Tuple[MatchResults, Any]:
|
||||
) -> tuple[MatchResults, Any]:
|
||||
all_process_matches: MatchResults = collections.defaultdict(list)
|
||||
all_thread_matches: MatchResults = collections.defaultdict(list)
|
||||
all_call_matches: MatchResults = collections.defaultdict(list)
|
||||
@@ -135,7 +135,7 @@ def find_dynamic_capabilities(
|
||||
feature_counts = rdoc.DynamicFeatureCounts(file=0, processes=())
|
||||
|
||||
assert isinstance(extractor, DynamicFeatureExtractor)
|
||||
processes: List[ProcessHandle] = list(extractor.get_processes())
|
||||
processes: list[ProcessHandle] = list(extractor.get_processes())
|
||||
n_processes: int = len(processes)
|
||||
|
||||
with capa.helpers.CapaProgressBar(
|
||||
|
||||
@@ -10,7 +10,7 @@ import time
|
||||
import logging
|
||||
import itertools
|
||||
import collections
|
||||
from typing import Any, List, Tuple
|
||||
from typing import Any
|
||||
|
||||
import capa.perf
|
||||
import capa.helpers
|
||||
@@ -26,7 +26,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
def find_instruction_capabilities(
|
||||
ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
|
||||
) -> Tuple[FeatureSet, MatchResults]:
|
||||
) -> tuple[FeatureSet, MatchResults]:
|
||||
"""
|
||||
find matches for the given rules for the given instruction.
|
||||
|
||||
@@ -53,7 +53,7 @@ def find_instruction_capabilities(
|
||||
|
||||
def find_basic_block_capabilities(
|
||||
ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle
|
||||
) -> Tuple[FeatureSet, MatchResults, MatchResults]:
|
||||
) -> tuple[FeatureSet, MatchResults, MatchResults]:
|
||||
"""
|
||||
find matches for the given rules within the given basic block.
|
||||
|
||||
@@ -93,7 +93,7 @@ def find_basic_block_capabilities(
|
||||
|
||||
def find_code_capabilities(
|
||||
ruleset: RuleSet, extractor: StaticFeatureExtractor, fh: FunctionHandle
|
||||
) -> Tuple[MatchResults, MatchResults, MatchResults, int]:
|
||||
) -> tuple[MatchResults, MatchResults, MatchResults, int]:
|
||||
"""
|
||||
find matches for the given rules within the given function.
|
||||
|
||||
@@ -131,16 +131,16 @@ def find_code_capabilities(
|
||||
|
||||
def find_static_capabilities(
|
||||
ruleset: RuleSet, extractor: StaticFeatureExtractor, disable_progress=None
|
||||
) -> Tuple[MatchResults, Any]:
|
||||
) -> tuple[MatchResults, Any]:
|
||||
all_function_matches: MatchResults = collections.defaultdict(list)
|
||||
all_bb_matches: MatchResults = collections.defaultdict(list)
|
||||
all_insn_matches: MatchResults = collections.defaultdict(list)
|
||||
|
||||
feature_counts = rdoc.StaticFeatureCounts(file=0, functions=())
|
||||
library_functions: Tuple[rdoc.LibraryFunction, ...] = ()
|
||||
library_functions: tuple[rdoc.LibraryFunction, ...] = ()
|
||||
|
||||
assert isinstance(extractor, StaticFeatureExtractor)
|
||||
functions: List[FunctionHandle] = list(extractor.get_functions())
|
||||
functions: list[FunctionHandle] = list(extractor.get_functions())
|
||||
n_funcs: int = len(functions)
|
||||
n_libs: int = 0
|
||||
percentage: float = 0
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
import copy
|
||||
import collections
|
||||
from typing import TYPE_CHECKING, Set, Dict, List, Tuple, Union, Mapping, Iterable, Iterator
|
||||
from typing import TYPE_CHECKING, Union, Mapping, Iterable, Iterator
|
||||
|
||||
import capa.perf
|
||||
import capa.features.common
|
||||
@@ -27,7 +27,7 @@ if TYPE_CHECKING:
|
||||
# to collect the locations of a feature, do: `features[Number(0x10)]`
|
||||
#
|
||||
# aliased here so that the type can be documented and xref'd.
|
||||
FeatureSet = Dict[Feature, Set[Address]]
|
||||
FeatureSet = dict[Feature, set[Address]]
|
||||
|
||||
|
||||
class Statement:
|
||||
@@ -94,7 +94,7 @@ class And(Statement):
|
||||
match if all of the children evaluate to True.
|
||||
|
||||
the order of evaluation is dictated by the property
|
||||
`And.children` (type: List[Statement|Feature]).
|
||||
`And.children` (type: list[Statement|Feature]).
|
||||
a query optimizer may safely manipulate the order of these children.
|
||||
"""
|
||||
|
||||
@@ -127,7 +127,7 @@ class Or(Statement):
|
||||
match if any of the children evaluate to True.
|
||||
|
||||
the order of evaluation is dictated by the property
|
||||
`Or.children` (type: List[Statement|Feature]).
|
||||
`Or.children` (type: list[Statement|Feature]).
|
||||
a query optimizer may safely manipulate the order of these children.
|
||||
"""
|
||||
|
||||
@@ -176,7 +176,7 @@ class Some(Statement):
|
||||
match if at least N of the children evaluate to True.
|
||||
|
||||
the order of evaluation is dictated by the property
|
||||
`Some.children` (type: List[Statement|Feature]).
|
||||
`Some.children` (type: list[Statement|Feature]).
|
||||
a query optimizer may safely manipulate the order of these children.
|
||||
"""
|
||||
|
||||
@@ -267,7 +267,7 @@ class Subscope(Statement):
|
||||
# inspect(match_details)
|
||||
#
|
||||
# aliased here so that the type can be documented and xref'd.
|
||||
MatchResults = Mapping[str, List[Tuple[Address, Result]]]
|
||||
MatchResults = Mapping[str, list[tuple[Address, Result]]]
|
||||
|
||||
|
||||
def get_rule_namespaces(rule: "capa.rules.Rule") -> Iterator[str]:
|
||||
@@ -292,7 +292,7 @@ def index_rule_matches(features: FeatureSet, rule: "capa.rules.Rule", locations:
|
||||
features[capa.features.common.MatchedRule(namespace)].update(locations)
|
||||
|
||||
|
||||
def match(rules: List["capa.rules.Rule"], features: FeatureSet, addr: Address) -> Tuple[FeatureSet, MatchResults]:
|
||||
def match(rules: list["capa.rules.Rule"], features: FeatureSet, addr: Address) -> tuple[FeatureSet, MatchResults]:
|
||||
"""
|
||||
match the given rules against the given features,
|
||||
returning an updated set of features and the matches.
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from enum import Enum
|
||||
from typing import Dict, List
|
||||
|
||||
from capa.helpers import assert_never
|
||||
|
||||
@@ -22,7 +21,7 @@ COM_PREFIXES = {
|
||||
}
|
||||
|
||||
|
||||
def load_com_database(com_type: ComType) -> Dict[str, List[str]]:
|
||||
def load_com_database(com_type: ComType) -> dict[str, list[str]]:
|
||||
# lazy load these python files since they are so large.
|
||||
# that is, don't load them unless a COM feature is being handled.
|
||||
import capa.features.com.classes
|
||||
|
||||
@@ -5,9 +5,8 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import Dict, List
|
||||
|
||||
COM_CLASSES: Dict[str, List[str]] = {
|
||||
COM_CLASSES: dict[str, list[str]] = {
|
||||
"ClusAppWiz": ["24F97150-6689-11D1-9AA7-00C04FB93A80"],
|
||||
"ClusCfgAddNodesWizard": ["BB8D141E-C00A-469F-BC5C-ECD814F0BD74"],
|
||||
"ClusCfgCreateClusterWizard": ["B929818E-F5B0-44DC-8A00-1B5F5F5AA1F0"],
|
||||
|
||||
@@ -5,9 +5,8 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import Dict, List
|
||||
|
||||
COM_INTERFACES: Dict[str, List[str]] = {
|
||||
COM_INTERFACES: dict[str, list[str]] = {
|
||||
"IClusterApplicationWizard": ["24F97151-6689-11D1-9AA7-00C04FB93A80"],
|
||||
"IWEExtendWizard97": ["97DEDE68-FC6B-11CF-B5F5-00A0C90AB505"],
|
||||
"IWCWizard97Callback": ["97DEDE67-FC6B-11CF-B5F5-00A0C90AB505"],
|
||||
@@ -16334,7 +16333,7 @@ COM_INTERFACES: Dict[str, List[str]] = {
|
||||
"IRcsServiceDescription": ["416437de-e78b-44c9-990f-7ede1f2a0c91"],
|
||||
"IRcsServiceKindSupportedChangedEventArgs": ["f47ea244-e783-4866-b3a7-4e5ccf023070"],
|
||||
"IRcsServiceStatusChangedArgs": ["661ae45a-412a-460d-bdd4-dd8ea3c15583"],
|
||||
"IRcsServiceTuple": ["ce17a39b-2e8b-41af-b5a9-5cb072cc373c"],
|
||||
"IRcsServicetuple": ["ce17a39b-2e8b-41af-b5a9-5cb072cc373c"],
|
||||
"IRcsSubscriptionReceivedArgs": ["04eaf06d-42bc-46cc-a637-eeb3a8723fe4"],
|
||||
"IRcsTransport": ["fea34759-f37c-4319-8546-ec84d21d30ff"],
|
||||
"IRcsTransportConfiguration": ["1fccb102-2472-4bb9-9988-c1211c83e8a9"],
|
||||
|
||||
@@ -9,10 +9,9 @@
|
||||
import re
|
||||
import abc
|
||||
import codecs
|
||||
import typing
|
||||
import logging
|
||||
import collections
|
||||
from typing import TYPE_CHECKING, Set, Dict, List, Union, Optional
|
||||
from typing import TYPE_CHECKING, Union, Optional
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# circular import, otherwise
|
||||
@@ -79,8 +78,8 @@ class Result:
|
||||
self,
|
||||
success: bool,
|
||||
statement: Union["capa.engine.Statement", "Feature"],
|
||||
children: List["Result"],
|
||||
locations: Optional[Set[Address]] = None,
|
||||
children: list["Result"],
|
||||
locations: Optional[set[Address]] = None,
|
||||
):
|
||||
super().__init__()
|
||||
self.success = success
|
||||
@@ -213,7 +212,7 @@ class Substring(String):
|
||||
|
||||
# mapping from string value to list of locations.
|
||||
# will unique the locations later on.
|
||||
matches: typing.DefaultDict[str, Set[Address]] = collections.defaultdict(set)
|
||||
matches: collections.defaultdict[str, set[Address]] = collections.defaultdict(set)
|
||||
|
||||
assert isinstance(self.value, str)
|
||||
for feature, locations in features.items():
|
||||
@@ -261,7 +260,7 @@ class _MatchedSubstring(Substring):
|
||||
note: this type should only ever be constructed by `Substring.evaluate()`. it is not part of the public API.
|
||||
"""
|
||||
|
||||
def __init__(self, substring: Substring, matches: Dict[str, Set[Address]]):
|
||||
def __init__(self, substring: Substring, matches: dict[str, set[Address]]):
|
||||
"""
|
||||
args:
|
||||
substring: the substring feature that matches.
|
||||
@@ -305,7 +304,7 @@ class Regex(String):
|
||||
|
||||
# mapping from string value to list of locations.
|
||||
# will unique the locations later on.
|
||||
matches: typing.DefaultDict[str, Set[Address]] = collections.defaultdict(set)
|
||||
matches: collections.defaultdict[str, set[Address]] = collections.defaultdict(set)
|
||||
|
||||
for feature, locations in features.items():
|
||||
if not isinstance(feature, (String,)):
|
||||
@@ -353,7 +352,7 @@ class _MatchedRegex(Regex):
|
||||
note: this type should only ever be constructed by `Regex.evaluate()`. it is not part of the public API.
|
||||
"""
|
||||
|
||||
def __init__(self, regex: Regex, matches: Dict[str, Set[Address]]):
|
||||
def __init__(self, regex: Regex, matches: dict[str, set[Address]]):
|
||||
"""
|
||||
args:
|
||||
regex: the regex feature that matches.
|
||||
@@ -467,6 +466,7 @@ FORMAT_VMRAY = "vmray"
|
||||
FORMAT_BINEXPORT2 = "binexport2"
|
||||
FORMAT_FREEZE = "freeze"
|
||||
FORMAT_RESULT = "result"
|
||||
FORMAT_BINJA_DB = "binja_database"
|
||||
STATIC_FORMATS = {
|
||||
FORMAT_SC32,
|
||||
FORMAT_SC64,
|
||||
@@ -476,6 +476,7 @@ STATIC_FORMATS = {
|
||||
FORMAT_FREEZE,
|
||||
FORMAT_RESULT,
|
||||
FORMAT_BINEXPORT2,
|
||||
FORMAT_BINJA_DB,
|
||||
}
|
||||
DYNAMIC_FORMATS = {
|
||||
FORMAT_CAPE,
|
||||
|
||||
@@ -11,13 +11,9 @@ import hashlib
|
||||
import dataclasses
|
||||
from copy import copy
|
||||
from types import MethodType
|
||||
from typing import Any, Set, Dict, Tuple, Union, Iterator
|
||||
from typing import Any, Union, Iterator, TypeAlias
|
||||
from dataclasses import dataclass
|
||||
|
||||
# TODO(williballenthin): use typing.TypeAlias directly when Python 3.9 is deprecated
|
||||
# https://github.com/mandiant/capa/issues/1699
|
||||
from typing_extensions import TypeAlias
|
||||
|
||||
import capa.features.address
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import Address, ThreadAddress, ProcessAddress, DynamicCallAddress, AbsoluteVirtualAddress
|
||||
@@ -59,7 +55,7 @@ class FunctionHandle:
|
||||
|
||||
address: Address
|
||||
inner: Any
|
||||
ctx: Dict[str, Any] = dataclasses.field(default_factory=dict)
|
||||
ctx: dict[str, Any] = dataclasses.field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -135,7 +131,7 @@ class StaticFeatureExtractor:
|
||||
return self._sample_hashes
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract features found at every scope ("global").
|
||||
|
||||
@@ -146,12 +142,12 @@ class StaticFeatureExtractor:
|
||||
print('0x%x: %s', va, feature)
|
||||
|
||||
yields:
|
||||
Tuple[Feature, Address]: feature and its location
|
||||
tuple[Feature, Address]: feature and its location
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract file-scope features.
|
||||
|
||||
@@ -162,7 +158,7 @@ class StaticFeatureExtractor:
|
||||
print('0x%x: %s', va, feature)
|
||||
|
||||
yields:
|
||||
Tuple[Feature, Address]: feature and its location
|
||||
tuple[Feature, Address]: feature and its location
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@@ -211,7 +207,7 @@ class StaticFeatureExtractor:
|
||||
raise KeyError(addr)
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_function_features(self, f: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_function_features(self, f: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract function-scope features.
|
||||
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
||||
@@ -227,7 +223,7 @@ class StaticFeatureExtractor:
|
||||
f [FunctionHandle]: an opaque value previously fetched from `.get_functions()`.
|
||||
|
||||
yields:
|
||||
Tuple[Feature, Address]: feature and its location
|
||||
tuple[Feature, Address]: feature and its location
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@@ -240,7 +236,7 @@ class StaticFeatureExtractor:
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_basic_block_features(self, f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_basic_block_features(self, f: FunctionHandle, bb: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract basic block-scope features.
|
||||
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
||||
@@ -258,7 +254,7 @@ class StaticFeatureExtractor:
|
||||
bb [BBHandle]: an opaque value previously fetched from `.get_basic_blocks()`.
|
||||
|
||||
yields:
|
||||
Tuple[Feature, Address]: feature and its location
|
||||
tuple[Feature, Address]: feature and its location
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@@ -273,7 +269,7 @@ class StaticFeatureExtractor:
|
||||
@abc.abstractmethod
|
||||
def extract_insn_features(
|
||||
self, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract instruction-scope features.
|
||||
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
||||
@@ -293,12 +289,12 @@ class StaticFeatureExtractor:
|
||||
insn [InsnHandle]: an opaque value previously fetched from `.get_instructions()`.
|
||||
|
||||
yields:
|
||||
Tuple[Feature, Address]: feature and its location
|
||||
tuple[Feature, Address]: feature and its location
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
def FunctionFilter(extractor: StaticFeatureExtractor, functions: Set) -> StaticFeatureExtractor:
|
||||
def FunctionFilter(extractor: StaticFeatureExtractor, functions: set) -> StaticFeatureExtractor:
|
||||
original_get_functions = extractor.get_functions
|
||||
|
||||
def filtered_get_functions(self):
|
||||
@@ -387,7 +383,7 @@ class DynamicFeatureExtractor:
|
||||
return self._sample_hashes
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract features found at every scope ("global").
|
||||
|
||||
@@ -398,12 +394,12 @@ class DynamicFeatureExtractor:
|
||||
print(addr, feature)
|
||||
|
||||
yields:
|
||||
Tuple[Feature, Address]: feature and its location
|
||||
tuple[Feature, Address]: feature and its location
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract file-scope features.
|
||||
|
||||
@@ -414,7 +410,7 @@ class DynamicFeatureExtractor:
|
||||
print(addr, feature)
|
||||
|
||||
yields:
|
||||
Tuple[Feature, Address]: feature and its location
|
||||
tuple[Feature, Address]: feature and its location
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@@ -426,7 +422,7 @@ class DynamicFeatureExtractor:
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
Yields all the features of a process. These include:
|
||||
- file features of the process' image
|
||||
@@ -449,7 +445,7 @@ class DynamicFeatureExtractor:
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
Yields all the features of a thread. These include:
|
||||
- sequenced api traces
|
||||
@@ -466,7 +462,7 @@ class DynamicFeatureExtractor:
|
||||
@abc.abstractmethod
|
||||
def extract_call_features(
|
||||
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
Yields all features of a call. These include:
|
||||
- api name
|
||||
@@ -485,7 +481,7 @@ class DynamicFeatureExtractor:
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
def ProcessFilter(extractor: DynamicFeatureExtractor, processes: Set) -> DynamicFeatureExtractor:
|
||||
def ProcessFilter(extractor: DynamicFeatureExtractor, processes: set) -> DynamicFeatureExtractor:
|
||||
original_get_processes = extractor.get_processes
|
||||
|
||||
def filtered_get_processes(self):
|
||||
|
||||
@@ -17,7 +17,7 @@ import io
|
||||
import hashlib
|
||||
import logging
|
||||
import contextlib
|
||||
from typing import Set, Dict, List, Tuple, Iterator
|
||||
from typing import Iterator
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
@@ -51,13 +51,13 @@ def compute_common_prefix_length(m: str, n: str) -> int:
|
||||
return len(m)
|
||||
|
||||
|
||||
def get_sample_from_binexport2(input_file: Path, be2: BinExport2, search_paths: List[Path]) -> Path:
|
||||
def get_sample_from_binexport2(input_file: Path, be2: BinExport2, search_paths: list[Path]) -> Path:
|
||||
"""attempt to find the sample file, given a BinExport2 file.
|
||||
|
||||
searches in the same directory as the BinExport2 file, and then in search_paths.
|
||||
"""
|
||||
|
||||
def filename_similarity_key(p: Path) -> Tuple[int, str]:
|
||||
def filename_similarity_key(p: Path) -> tuple[int, str]:
|
||||
# note closure over input_file.
|
||||
# sort first by length of common prefix, then by name (for stability)
|
||||
return (compute_common_prefix_length(p.name, input_file.name), p.name)
|
||||
@@ -65,7 +65,7 @@ def get_sample_from_binexport2(input_file: Path, be2: BinExport2, search_paths:
|
||||
wanted_sha256: str = be2.meta_information.executable_id.lower()
|
||||
|
||||
input_directory: Path = input_file.parent
|
||||
siblings: List[Path] = [p for p in input_directory.iterdir() if p.is_file()]
|
||||
siblings: list[Path] = [p for p in input_directory.iterdir() if p.is_file()]
|
||||
siblings.sort(key=filename_similarity_key, reverse=True)
|
||||
for sibling in siblings:
|
||||
# e.g. with open IDA files in the same directory on Windows
|
||||
@@ -74,7 +74,7 @@ def get_sample_from_binexport2(input_file: Path, be2: BinExport2, search_paths:
|
||||
return sibling
|
||||
|
||||
for search_path in search_paths:
|
||||
candidates: List[Path] = [p for p in search_path.iterdir() if p.is_file()]
|
||||
candidates: list[Path] = [p for p in search_path.iterdir() if p.is_file()]
|
||||
candidates.sort(key=filename_similarity_key, reverse=True)
|
||||
for candidate in candidates:
|
||||
with contextlib.suppress(PermissionError):
|
||||
@@ -88,27 +88,27 @@ class BinExport2Index:
|
||||
def __init__(self, be2: BinExport2):
|
||||
self.be2: BinExport2 = be2
|
||||
|
||||
self.callers_by_vertex_index: Dict[int, List[int]] = defaultdict(list)
|
||||
self.callees_by_vertex_index: Dict[int, List[int]] = defaultdict(list)
|
||||
self.callers_by_vertex_index: dict[int, list[int]] = defaultdict(list)
|
||||
self.callees_by_vertex_index: dict[int, list[int]] = defaultdict(list)
|
||||
|
||||
# note: flow graph != call graph (vertex)
|
||||
self.flow_graph_index_by_address: Dict[int, int] = {}
|
||||
self.flow_graph_address_by_index: Dict[int, int] = {}
|
||||
self.flow_graph_index_by_address: dict[int, int] = {}
|
||||
self.flow_graph_address_by_index: dict[int, int] = {}
|
||||
|
||||
# edges that come from the given basic block
|
||||
self.source_edges_by_basic_block_index: Dict[int, List[BinExport2.FlowGraph.Edge]] = defaultdict(list)
|
||||
self.source_edges_by_basic_block_index: dict[int, list[BinExport2.FlowGraph.Edge]] = defaultdict(list)
|
||||
# edges that end up at the given basic block
|
||||
self.target_edges_by_basic_block_index: Dict[int, List[BinExport2.FlowGraph.Edge]] = defaultdict(list)
|
||||
self.target_edges_by_basic_block_index: dict[int, list[BinExport2.FlowGraph.Edge]] = defaultdict(list)
|
||||
|
||||
self.vertex_index_by_address: Dict[int, int] = {}
|
||||
self.vertex_index_by_address: dict[int, int] = {}
|
||||
|
||||
self.data_reference_index_by_source_instruction_index: Dict[int, List[int]] = defaultdict(list)
|
||||
self.data_reference_index_by_target_address: Dict[int, List[int]] = defaultdict(list)
|
||||
self.string_reference_index_by_source_instruction_index: Dict[int, List[int]] = defaultdict(list)
|
||||
self.data_reference_index_by_source_instruction_index: dict[int, list[int]] = defaultdict(list)
|
||||
self.data_reference_index_by_target_address: dict[int, list[int]] = defaultdict(list)
|
||||
self.string_reference_index_by_source_instruction_index: dict[int, list[int]] = defaultdict(list)
|
||||
|
||||
self.insn_address_by_index: Dict[int, int] = {}
|
||||
self.insn_index_by_address: Dict[int, int] = {}
|
||||
self.insn_by_address: Dict[int, BinExport2.Instruction] = {}
|
||||
self.insn_address_by_index: dict[int, int] = {}
|
||||
self.insn_index_by_address: dict[int, int] = {}
|
||||
self.insn_by_address: dict[int, BinExport2.Instruction] = {}
|
||||
|
||||
# must index instructions first
|
||||
self._index_insn_addresses()
|
||||
@@ -208,7 +208,7 @@ class BinExport2Index:
|
||||
|
||||
def basic_block_instructions(
|
||||
self, basic_block: BinExport2.BasicBlock
|
||||
) -> Iterator[Tuple[int, BinExport2.Instruction, int]]:
|
||||
) -> Iterator[tuple[int, BinExport2.Instruction, int]]:
|
||||
"""
|
||||
For a given basic block, enumerate the instruction indices,
|
||||
the instruction instances, and their addresses.
|
||||
@@ -253,7 +253,7 @@ class BinExport2Analysis:
|
||||
self.idx: BinExport2Index = idx
|
||||
self.buf: bytes = buf
|
||||
self.base_address: int = 0
|
||||
self.thunks: Dict[int, int] = {}
|
||||
self.thunks: dict[int, int] = {}
|
||||
|
||||
self._find_base_address()
|
||||
self._compute_thunks()
|
||||
@@ -279,12 +279,14 @@ class BinExport2Analysis:
|
||||
|
||||
curr_idx: int = idx
|
||||
for _ in range(capa.features.common.THUNK_CHAIN_DEPTH_DELTA):
|
||||
thunk_callees: List[int] = self.idx.callees_by_vertex_index[curr_idx]
|
||||
# if this doesn't hold, then it doesn't seem like this is a thunk,
|
||||
thunk_callees: list[int] = self.idx.callees_by_vertex_index[curr_idx]
|
||||
# If this doesn't hold, then it doesn't seem like this is a thunk,
|
||||
# because either, len is:
|
||||
# 0 and the thunk doesn't point to anything, or
|
||||
# 0 and the thunk doesn't point to anything or is indirect, like `call eax`, or
|
||||
# >1 and the thunk may end up at many functions.
|
||||
assert len(thunk_callees) == 1, f"thunk @ {hex(addr)} failed"
|
||||
# In any case, this doesn't appear to be the sort of thunk we're looking for.
|
||||
if len(thunk_callees) != 1:
|
||||
break
|
||||
|
||||
thunked_idx: int = thunk_callees[0]
|
||||
thunked_vertex: BinExport2.CallGraph.Vertex = self.be2.call_graph.vertex[thunked_idx]
|
||||
@@ -324,7 +326,7 @@ class AddressNotMappedError(ReadMemoryError): ...
|
||||
@dataclass
|
||||
class AddressSpace:
|
||||
base_address: int
|
||||
memory_regions: Tuple[MemoryRegion, ...]
|
||||
memory_regions: tuple[MemoryRegion, ...]
|
||||
|
||||
def read_memory(self, address: int, length: int) -> bytes:
|
||||
rva: int = address - self.base_address
|
||||
@@ -337,7 +339,7 @@ class AddressSpace:
|
||||
|
||||
@classmethod
|
||||
def from_pe(cls, pe: PE, base_address: int):
|
||||
regions: List[MemoryRegion] = []
|
||||
regions: list[MemoryRegion] = []
|
||||
for section in pe.sections:
|
||||
address: int = section.VirtualAddress
|
||||
size: int = section.Misc_VirtualSize
|
||||
@@ -355,7 +357,7 @@ class AddressSpace:
|
||||
|
||||
@classmethod
|
||||
def from_elf(cls, elf: ELFFile, base_address: int):
|
||||
regions: List[MemoryRegion] = []
|
||||
regions: list[MemoryRegion] = []
|
||||
|
||||
# ELF segments are for runtime data,
|
||||
# ELF sections are for link-time data.
|
||||
@@ -401,9 +403,9 @@ class AnalysisContext:
|
||||
class FunctionContext:
|
||||
ctx: AnalysisContext
|
||||
flow_graph_index: int
|
||||
format: Set[str]
|
||||
os: Set[str]
|
||||
arch: Set[str]
|
||||
format: set[str]
|
||||
os: set[str]
|
||||
arch: set[str]
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
from typing import List, Tuple, Iterator, Optional
|
||||
from typing import Iterator, Optional
|
||||
|
||||
import capa.features.extractors.binexport2.helpers
|
||||
from capa.features.insn import MAX_STRUCTURE_SIZE, Number, Offset, OperandNumber, OperandOffset
|
||||
@@ -30,7 +30,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
def extract_insn_number_features(
|
||||
fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
ii: InstructionContext = ih.inner
|
||||
|
||||
@@ -91,7 +91,7 @@ OFFSET_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
||||
|
||||
def extract_insn_offset_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
ii: InstructionContext = ih.inner
|
||||
|
||||
@@ -120,7 +120,7 @@ NZXOR_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
||||
|
||||
def extract_insn_nzxor_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
ii: InstructionContext = ih.inner
|
||||
be2: BinExport2 = fhi.ctx.be2
|
||||
@@ -131,7 +131,7 @@ def extract_insn_nzxor_characteristic_features(
|
||||
instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index]
|
||||
# guaranteed to be simple int/reg operands
|
||||
# so we don't have to realize the tree/list.
|
||||
operands: List[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index]
|
||||
operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index]
|
||||
|
||||
if operands[1] != operands[2]:
|
||||
yield Characteristic("nzxor"), ih.address
|
||||
@@ -146,7 +146,7 @@ INDIRECT_CALL_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
||||
|
||||
def extract_function_indirect_call_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
ii: InstructionContext = ih.inner
|
||||
be2: BinExport2 = fhi.ctx.be2
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import List, Optional
|
||||
from typing import Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
from capa.features.extractors.binexport2.helpers import get_operand_expressions
|
||||
@@ -32,7 +32,7 @@ def get_operand_phrase_info(be2: BinExport2, operand: BinExport2.Operand) -> Opt
|
||||
# Base: Any general purpose register
|
||||
# Displacement: An integral offset
|
||||
|
||||
expressions: List[BinExport2.Expression] = get_operand_expressions(be2, operand)
|
||||
expressions: list[BinExport2.Expression] = get_operand_expressions(be2, operand)
|
||||
|
||||
# skip expression up to and including BinExport2.Expression.DEREFERENCE, assume caller
|
||||
# has checked for BinExport2.Expression.DEREFERENCE
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
from typing import List, Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import capa.features.extractors.strings
|
||||
import capa.features.extractors.binexport2.helpers
|
||||
@@ -63,7 +63,7 @@ NUMBER_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
||||
|
||||
def extract_insn_number_features(
|
||||
fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
ii: InstructionContext = ih.inner
|
||||
|
||||
@@ -123,7 +123,7 @@ OFFSET_ZERO_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
||||
|
||||
def extract_insn_offset_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
ii: InstructionContext = ih.inner
|
||||
|
||||
@@ -161,7 +161,7 @@ def is_security_cookie(
|
||||
|
||||
# security cookie check should use SP or BP
|
||||
op1: BinExport2.Operand = be2.operand[instruction.operand_index[1]]
|
||||
op1_exprs: List[BinExport2.Expression] = [be2.expression[expr_i] for expr_i in op1.expression_index]
|
||||
op1_exprs: list[BinExport2.Expression] = [be2.expression[expr_i] for expr_i in op1.expression_index]
|
||||
if all(expr.symbol.lower() not in ("bp", "esp", "ebp", "rbp", "rsp") for expr in op1_exprs):
|
||||
return False
|
||||
|
||||
@@ -192,7 +192,7 @@ NZXOR_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
||||
|
||||
def extract_insn_nzxor_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse non-zeroing XOR instruction from the given instruction.
|
||||
ignore expected non-zeroing XORs, e.g. security cookies.
|
||||
@@ -209,7 +209,7 @@ def extract_insn_nzxor_characteristic_features(
|
||||
instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index]
|
||||
# guaranteed to be simple int/reg operands
|
||||
# so we don't have to realize the tree/list.
|
||||
operands: List[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index]
|
||||
operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index]
|
||||
|
||||
if operands[0] == operands[1]:
|
||||
return
|
||||
@@ -236,7 +236,7 @@ INDIRECT_CALL_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
||||
|
||||
def extract_function_indirect_call_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
ii: InstructionContext = ih.inner
|
||||
be2: BinExport2 = fhi.ctx.be2
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
from typing import List, Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
from capa.features.common import Feature, Characteristic
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
@@ -16,20 +16,20 @@ from capa.features.extractors.base_extractor import BBHandle, FunctionHandle
|
||||
from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
|
||||
|
||||
|
||||
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
bbi: BasicBlockContext = bbh.inner
|
||||
|
||||
idx = fhi.ctx.idx
|
||||
|
||||
basic_block_index: int = bbi.basic_block_index
|
||||
target_edges: List[BinExport2.FlowGraph.Edge] = idx.target_edges_by_basic_block_index[basic_block_index]
|
||||
target_edges: list[BinExport2.FlowGraph.Edge] = idx.target_edges_by_basic_block_index[basic_block_index]
|
||||
if basic_block_index in (e.source_basic_block_index for e in target_edges):
|
||||
basic_block_address: int = idx.get_basic_block_address(basic_block_index)
|
||||
yield Characteristic("tight loop"), AbsoluteVirtualAddress(basic_block_address)
|
||||
|
||||
|
||||
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract basic block features"""
|
||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||
for feature, addr in bb_handler(fh, bbh):
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
from typing import Set, List, Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import capa.features.extractors.elf
|
||||
import capa.features.extractors.common
|
||||
@@ -48,14 +48,14 @@ class BinExport2FeatureExtractor(StaticFeatureExtractor):
|
||||
address_space: AddressSpace = AddressSpace.from_buf(buf, self.analysis.base_address)
|
||||
self.ctx: AnalysisContext = AnalysisContext(self.buf, self.be2, self.idx, self.analysis, address_space)
|
||||
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
self.global_features: list[tuple[Feature, Address]] = []
|
||||
self.global_features.extend(list(capa.features.extractors.common.extract_format(self.buf)))
|
||||
self.global_features.extend(list(capa.features.extractors.common.extract_os(self.buf)))
|
||||
self.global_features.extend(list(capa.features.extractors.common.extract_arch(self.buf)))
|
||||
|
||||
self.format: Set[str] = set()
|
||||
self.os: Set[str] = set()
|
||||
self.arch: Set[str] = set()
|
||||
self.format: set[str] = set()
|
||||
self.os: set[str] = set()
|
||||
self.arch: set[str] = set()
|
||||
|
||||
for feature, _ in self.global_features:
|
||||
assert isinstance(feature.value, str)
|
||||
@@ -72,10 +72,10 @@ class BinExport2FeatureExtractor(StaticFeatureExtractor):
|
||||
def get_base_address(self) -> AbsoluteVirtualAddress:
|
||||
return AbsoluteVirtualAddress(self.analysis.base_address)
|
||||
|
||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from self.global_features
|
||||
|
||||
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.binexport2.file.extract_features(self.be2, self.buf)
|
||||
|
||||
def get_functions(self) -> Iterator[FunctionHandle]:
|
||||
@@ -97,7 +97,7 @@ class BinExport2FeatureExtractor(StaticFeatureExtractor):
|
||||
inner=FunctionContext(self.ctx, flow_graph_index, self.format, self.os, self.arch),
|
||||
)
|
||||
|
||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.binexport2.function.extract_features(fh)
|
||||
|
||||
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
||||
@@ -112,7 +112,7 @@ class BinExport2FeatureExtractor(StaticFeatureExtractor):
|
||||
inner=BasicBlockContext(basic_block_index),
|
||||
)
|
||||
|
||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.binexport2.basicblock.extract_features(fh, bbh)
|
||||
|
||||
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
||||
@@ -126,5 +126,5 @@ class BinExport2FeatureExtractor(StaticFeatureExtractor):
|
||||
|
||||
def extract_insn_features(
|
||||
self, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.binexport2.insn.extract_features(fh, bbh, ih)
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import io
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import pefile
|
||||
from elftools.elf.elffile import ELFFile
|
||||
@@ -23,7 +23,7 @@ from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_file_export_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_export_names(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
||||
if buf.startswith(capa.features.extractors.common.MATCH_PE):
|
||||
pe: pefile.PE = pefile.PE(data=buf)
|
||||
yield from capa.features.extractors.pefile.extract_file_export_names(pe)
|
||||
@@ -34,7 +34,7 @@ def extract_file_export_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Fe
|
||||
logger.warning("unsupported format")
|
||||
|
||||
|
||||
def extract_file_import_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_import_names(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
||||
if buf.startswith(capa.features.extractors.common.MATCH_PE):
|
||||
pe: pefile.PE = pefile.PE(data=buf)
|
||||
yield from capa.features.extractors.pefile.extract_file_import_names(pe)
|
||||
@@ -45,7 +45,7 @@ def extract_file_import_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Fe
|
||||
logger.warning("unsupported format")
|
||||
|
||||
|
||||
def extract_file_section_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_section_names(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
||||
if buf.startswith(capa.features.extractors.common.MATCH_PE):
|
||||
pe: pefile.PE = pefile.PE(data=buf)
|
||||
yield from capa.features.extractors.pefile.extract_file_section_names(pe)
|
||||
@@ -56,15 +56,15 @@ def extract_file_section_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[F
|
||||
logger.warning("unsupported format")
|
||||
|
||||
|
||||
def extract_file_strings(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_strings(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.common.extract_file_strings(buf)
|
||||
|
||||
|
||||
def extract_file_format(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_format(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.common.extract_format(buf)
|
||||
|
||||
|
||||
def extract_features(be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract file features"""
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, addr in file_handler(be2, buf):
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import List, Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
from capa.features.file import FunctionName
|
||||
from capa.features.common import Feature, Characteristic
|
||||
@@ -16,7 +16,7 @@ from capa.features.extractors.base_extractor import FunctionHandle
|
||||
from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
|
||||
|
||||
|
||||
def extract_function_calls_to(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_function_calls_to(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
|
||||
be2: BinExport2 = fhi.ctx.be2
|
||||
@@ -32,7 +32,7 @@ def extract_function_calls_to(fh: FunctionHandle) -> Iterator[Tuple[Feature, Add
|
||||
yield Characteristic("calls to"), AbsoluteVirtualAddress(caller_address)
|
||||
|
||||
|
||||
def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_function_loop(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
|
||||
be2: BinExport2 = fhi.ctx.be2
|
||||
@@ -40,7 +40,7 @@ def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address
|
||||
flow_graph_index: int = fhi.flow_graph_index
|
||||
flow_graph: BinExport2.FlowGraph = be2.flow_graph[flow_graph_index]
|
||||
|
||||
edges: List[Tuple[int, int]] = []
|
||||
edges: list[tuple[int, int]] = []
|
||||
for edge in flow_graph.edge:
|
||||
edges.append((edge.source_basic_block_index, edge.target_basic_block_index))
|
||||
|
||||
@@ -48,7 +48,7 @@ def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address
|
||||
yield Characteristic("loop"), fh.address
|
||||
|
||||
|
||||
def extract_function_name(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_function_name(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
|
||||
be2: BinExport2 = fhi.ctx.be2
|
||||
@@ -63,7 +63,7 @@ def extract_function_name(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address
|
||||
yield FunctionName(vertex.mangled_name), fh.address
|
||||
|
||||
|
||||
def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
for func_handler in FUNCTION_HANDLERS:
|
||||
for feature, addr in func_handler(fh):
|
||||
yield feature, addr
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import re
|
||||
from typing import Set, Dict, List, Tuple, Union, Iterator, Optional
|
||||
from typing import Union, Iterator, Optional
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
|
||||
@@ -22,7 +22,7 @@ HAS_ARCH_INTEL = {ARCH_I386, ARCH_AMD64}
|
||||
HAS_ARCH_ARM = {ARCH_AARCH64}
|
||||
|
||||
|
||||
def mask_immediate(arch: Set[str], immediate: int) -> int:
|
||||
def mask_immediate(arch: set[str], immediate: int) -> int:
|
||||
if arch & HAS_ARCH64:
|
||||
immediate &= 0xFFFFFFFFFFFFFFFF
|
||||
elif arch & HAS_ARCH32:
|
||||
@@ -30,7 +30,7 @@ def mask_immediate(arch: Set[str], immediate: int) -> int:
|
||||
return immediate
|
||||
|
||||
|
||||
def twos_complement(arch: Set[str], immediate: int, default: Optional[int] = None) -> int:
|
||||
def twos_complement(arch: set[str], immediate: int, default: Optional[int] = None) -> int:
|
||||
if default is not None:
|
||||
return capa.features.extractors.helpers.twos_complement(immediate, default)
|
||||
elif arch & HAS_ARCH64:
|
||||
@@ -50,17 +50,36 @@ def is_vertex_type(vertex: BinExport2.CallGraph.Vertex, type_: BinExport2.CallGr
|
||||
return vertex.HasField("type") and vertex.type == type_
|
||||
|
||||
|
||||
# internal to `build_expression_tree`
|
||||
# this is unstable: it is subject to change, so don't rely on it!
|
||||
def _prune_expression_tree_references_to_tree_index(
|
||||
expression_tree: list[list[int]],
|
||||
tree_index: int,
|
||||
):
|
||||
# `i` is the index of the tree node that we'll search for `tree_index`
|
||||
# if we remove `tree_index` from it, and it is now empty,
|
||||
# then we'll need to prune references to `i`.
|
||||
for i, tree_node in enumerate(expression_tree):
|
||||
if tree_index in tree_node:
|
||||
tree_node.remove(tree_index)
|
||||
|
||||
if len(tree_node) == 0:
|
||||
# if the parent node is now empty,
|
||||
# remove references to that parent node.
|
||||
_prune_expression_tree_references_to_tree_index(expression_tree, i)
|
||||
|
||||
|
||||
# internal to `build_expression_tree`
|
||||
# this is unstable: it is subject to change, so don't rely on it!
|
||||
def _prune_expression_tree_empty_shifts(
|
||||
be2: BinExport2,
|
||||
operand: BinExport2.Operand,
|
||||
expression_tree: List[List[int]],
|
||||
expression_tree: list[list[int]],
|
||||
tree_index: int,
|
||||
):
|
||||
expression_index = operand.expression_index[tree_index]
|
||||
expression = be2.expression[expression_index]
|
||||
children_tree_indexes: List[int] = expression_tree[tree_index]
|
||||
children_tree_indexes: list[int] = expression_tree[tree_index]
|
||||
|
||||
if expression.type == BinExport2.Expression.OPERATOR:
|
||||
if len(children_tree_indexes) == 0 and expression.symbol in ("lsl", "lsr"):
|
||||
@@ -70,9 +89,7 @@ def _prune_expression_tree_empty_shifts(
|
||||
#
|
||||
# Which seems to be as if the shift wasn't there (shift of #0)
|
||||
# so we want to remove references to this node from any parent nodes.
|
||||
for tree_node in expression_tree:
|
||||
if tree_index in tree_node:
|
||||
tree_node.remove(tree_index)
|
||||
_prune_expression_tree_references_to_tree_index(expression_tree, tree_index)
|
||||
|
||||
return
|
||||
|
||||
@@ -82,38 +99,37 @@ def _prune_expression_tree_empty_shifts(
|
||||
|
||||
# internal to `build_expression_tree`
|
||||
# this is unstable: it is subject to change, so don't rely on it!
|
||||
def _prune_expression_tree_empty_commas(
|
||||
def _fixup_expression_tree_references_to_tree_index(
|
||||
expression_tree: list[list[int]],
|
||||
existing_index: int,
|
||||
new_index: int,
|
||||
):
|
||||
for tree_node in expression_tree:
|
||||
for i, index in enumerate(tree_node):
|
||||
if index == existing_index:
|
||||
tree_node[i] = new_index
|
||||
|
||||
|
||||
# internal to `build_expression_tree`
|
||||
# this is unstable: it is subject to change, so don't rely on it!
|
||||
def _fixup_expression_tree_lonely_commas(
|
||||
be2: BinExport2,
|
||||
operand: BinExport2.Operand,
|
||||
expression_tree: List[List[int]],
|
||||
expression_tree: list[list[int]],
|
||||
tree_index: int,
|
||||
):
|
||||
expression_index = operand.expression_index[tree_index]
|
||||
expression = be2.expression[expression_index]
|
||||
children_tree_indexes: List[int] = expression_tree[tree_index]
|
||||
children_tree_indexes: list[int] = expression_tree[tree_index]
|
||||
|
||||
if expression.type == BinExport2.Expression.OPERATOR:
|
||||
if len(children_tree_indexes) == 1 and expression.symbol == ",":
|
||||
# Due to the above pruning of empty LSL or LSR expressions,
|
||||
# the parents might need to be fixed up.
|
||||
#
|
||||
# Specifically, if the pruned node was part of a comma list with two children,
|
||||
# now there's only a single child, which renders as an extra comma,
|
||||
# so we replace references to the comma node with the immediate child.
|
||||
#
|
||||
# A more correct way of doing this might be to walk up the parents and do fixups,
|
||||
# but I'm not quite sure how to do this yet. Just do two passes right now.
|
||||
child = children_tree_indexes[0]
|
||||
|
||||
for tree_node in expression_tree:
|
||||
tree_node.index
|
||||
if tree_index in tree_node:
|
||||
tree_node[tree_node.index(tree_index)] = child
|
||||
|
||||
return
|
||||
existing_index = tree_index
|
||||
new_index = children_tree_indexes[0]
|
||||
_fixup_expression_tree_references_to_tree_index(expression_tree, existing_index, new_index)
|
||||
|
||||
for child_tree_index in children_tree_indexes:
|
||||
_prune_expression_tree_empty_commas(be2, operand, expression_tree, child_tree_index)
|
||||
_fixup_expression_tree_lonely_commas(be2, operand, expression_tree, child_tree_index)
|
||||
|
||||
|
||||
# internal to `build_expression_tree`
|
||||
@@ -121,17 +137,17 @@ def _prune_expression_tree_empty_commas(
|
||||
def _prune_expression_tree(
|
||||
be2: BinExport2,
|
||||
operand: BinExport2.Operand,
|
||||
expression_tree: List[List[int]],
|
||||
expression_tree: list[list[int]],
|
||||
):
|
||||
_prune_expression_tree_empty_shifts(be2, operand, expression_tree, 0)
|
||||
_prune_expression_tree_empty_commas(be2, operand, expression_tree, 0)
|
||||
_fixup_expression_tree_lonely_commas(be2, operand, expression_tree, 0)
|
||||
|
||||
|
||||
# this is unstable: it is subject to change, so don't rely on it!
|
||||
def _build_expression_tree(
|
||||
be2: BinExport2,
|
||||
operand: BinExport2.Operand,
|
||||
) -> List[List[int]]:
|
||||
) -> list[list[int]]:
|
||||
# The reconstructed expression tree layout, linking parent nodes to their children.
|
||||
#
|
||||
# There is one list of integers for each expression in the operand.
|
||||
@@ -159,7 +175,7 @@ def _build_expression_tree(
|
||||
# exist (see https://github.com/NationalSecurityAgency/ghidra/issues/6817)
|
||||
return []
|
||||
|
||||
tree: List[List[int]] = []
|
||||
tree: list[list[int]] = []
|
||||
for i, expression_index in enumerate(operand.expression_index):
|
||||
children = []
|
||||
|
||||
@@ -173,7 +189,6 @@ def _build_expression_tree(
|
||||
tree.append(children)
|
||||
|
||||
_prune_expression_tree(be2, operand, tree)
|
||||
_prune_expression_tree(be2, operand, tree)
|
||||
|
||||
return tree
|
||||
|
||||
@@ -181,21 +196,34 @@ def _build_expression_tree(
|
||||
def _fill_operand_expression_list(
|
||||
be2: BinExport2,
|
||||
operand: BinExport2.Operand,
|
||||
expression_tree: List[List[int]],
|
||||
expression_tree: list[list[int]],
|
||||
tree_index: int,
|
||||
expression_list: List[BinExport2.Expression],
|
||||
expression_list: list[BinExport2.Expression],
|
||||
):
|
||||
"""
|
||||
Walk the given expression tree and collect the expression nodes in-order.
|
||||
"""
|
||||
expression_index = operand.expression_index[tree_index]
|
||||
expression = be2.expression[expression_index]
|
||||
children_tree_indexes: List[int] = expression_tree[tree_index]
|
||||
children_tree_indexes: list[int] = expression_tree[tree_index]
|
||||
|
||||
if expression.type == BinExport2.Expression.REGISTER:
|
||||
assert len(children_tree_indexes) == 0
|
||||
assert len(children_tree_indexes) <= 1
|
||||
expression_list.append(expression)
|
||||
return
|
||||
|
||||
if len(children_tree_indexes) == 0:
|
||||
return
|
||||
elif len(children_tree_indexes) == 1:
|
||||
# like for aarch64 with vector instructions, indicating vector data size:
|
||||
#
|
||||
# FADD V0.4S, V1.4S, V2.4S
|
||||
#
|
||||
# see: https://github.com/mandiant/capa/issues/2528
|
||||
child_index = children_tree_indexes[0]
|
||||
_fill_operand_expression_list(be2, operand, expression_tree, child_index, expression_list)
|
||||
return
|
||||
else:
|
||||
raise NotImplementedError(len(children_tree_indexes))
|
||||
|
||||
elif expression.type == BinExport2.Expression.SYMBOL:
|
||||
assert len(children_tree_indexes) <= 1
|
||||
@@ -218,9 +246,23 @@ def _fill_operand_expression_list(
|
||||
raise NotImplementedError(len(children_tree_indexes))
|
||||
|
||||
elif expression.type == BinExport2.Expression.IMMEDIATE_INT:
|
||||
assert len(children_tree_indexes) == 0
|
||||
assert len(children_tree_indexes) <= 1
|
||||
expression_list.append(expression)
|
||||
return
|
||||
|
||||
if len(children_tree_indexes) == 0:
|
||||
return
|
||||
elif len(children_tree_indexes) == 1:
|
||||
# the ghidra exporter can produce some weird expressions,
|
||||
# particularly for MSRs, like for:
|
||||
#
|
||||
# sreg(3, 0, c.0, c.4, 4)
|
||||
#
|
||||
# see: https://github.com/mandiant/capa/issues/2530
|
||||
child_index = children_tree_indexes[0]
|
||||
_fill_operand_expression_list(be2, operand, expression_tree, child_index, expression_list)
|
||||
return
|
||||
else:
|
||||
raise NotImplementedError(len(children_tree_indexes))
|
||||
|
||||
elif expression.type == BinExport2.Expression.SIZE_PREFIX:
|
||||
# like: b4
|
||||
@@ -282,10 +324,10 @@ def _fill_operand_expression_list(
|
||||
raise NotImplementedError(expression.type)
|
||||
|
||||
|
||||
def get_operand_expressions(be2: BinExport2, op: BinExport2.Operand) -> List[BinExport2.Expression]:
|
||||
def get_operand_expressions(be2: BinExport2, op: BinExport2.Operand) -> list[BinExport2.Expression]:
|
||||
tree = _build_expression_tree(be2, op)
|
||||
|
||||
expressions: List[BinExport2.Expression] = []
|
||||
expressions: list[BinExport2.Expression] = []
|
||||
_fill_operand_expression_list(be2, op, tree, 0, expressions)
|
||||
|
||||
return expressions
|
||||
@@ -331,11 +373,11 @@ def get_instruction_mnemonic(be2: BinExport2, instruction: BinExport2.Instructio
|
||||
return be2.mnemonic[instruction.mnemonic_index].name.lower()
|
||||
|
||||
|
||||
def get_instruction_operands(be2: BinExport2, instruction: BinExport2.Instruction) -> List[BinExport2.Operand]:
|
||||
def get_instruction_operands(be2: BinExport2, instruction: BinExport2.Instruction) -> list[BinExport2.Operand]:
|
||||
return [be2.operand[operand_index] for operand_index in instruction.operand_index]
|
||||
|
||||
|
||||
def split_with_delimiters(s: str, delimiters: Tuple[str, ...]) -> Iterator[str]:
|
||||
def split_with_delimiters(s: str, delimiters: tuple[str, ...]) -> Iterator[str]:
|
||||
"""
|
||||
Splits a string by any of the provided delimiter characters,
|
||||
including the delimiters in the results.
|
||||
@@ -355,7 +397,7 @@ def split_with_delimiters(s: str, delimiters: Tuple[str, ...]) -> Iterator[str]:
|
||||
yield s[start:]
|
||||
|
||||
|
||||
BinExport2OperandPattern = Union[str, Tuple[str, ...]]
|
||||
BinExport2OperandPattern = Union[str, tuple[str, ...]]
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -382,8 +424,8 @@ class BinExport2InstructionPattern:
|
||||
This matcher uses the BinExport2 data layout under the hood.
|
||||
"""
|
||||
|
||||
mnemonics: Tuple[str, ...]
|
||||
operands: Tuple[Union[str, BinExport2OperandPattern], ...]
|
||||
mnemonics: tuple[str, ...]
|
||||
operands: tuple[Union[str, BinExport2OperandPattern], ...]
|
||||
capture: Optional[str]
|
||||
|
||||
@classmethod
|
||||
@@ -438,7 +480,7 @@ class BinExport2InstructionPattern:
|
||||
mnemonic, _, rest = pattern.partition(" ")
|
||||
mnemonics = mnemonic.split("|")
|
||||
|
||||
operands: List[Union[str, Tuple[str, ...]]] = []
|
||||
operands: list[Union[str, tuple[str, ...]]] = []
|
||||
while rest:
|
||||
rest = rest.strip()
|
||||
if not rest.startswith("["):
|
||||
@@ -509,7 +551,7 @@ class BinExport2InstructionPattern:
|
||||
expression: BinExport2.Expression
|
||||
|
||||
def match(
|
||||
self, mnemonic: str, operand_expressions: List[List[BinExport2.Expression]]
|
||||
self, mnemonic: str, operand_expressions: list[list[BinExport2.Expression]]
|
||||
) -> Optional["BinExport2InstructionPattern.MatchResult"]:
|
||||
"""
|
||||
Match the given BinExport2 data against this pattern.
|
||||
@@ -602,10 +644,10 @@ class BinExport2InstructionPattern:
|
||||
class BinExport2InstructionPatternMatcher:
|
||||
"""Index and match a collection of instruction patterns."""
|
||||
|
||||
def __init__(self, queries: List[BinExport2InstructionPattern]):
|
||||
def __init__(self, queries: list[BinExport2InstructionPattern]):
|
||||
self.queries = queries
|
||||
# shard the patterns by (mnemonic, #operands)
|
||||
self._index: Dict[Tuple[str, int], List[BinExport2InstructionPattern]] = defaultdict(list)
|
||||
self._index: dict[tuple[str, int], list[BinExport2InstructionPattern]] = defaultdict(list)
|
||||
|
||||
for query in queries:
|
||||
for mnemonic in query.mnemonics:
|
||||
@@ -623,7 +665,7 @@ class BinExport2InstructionPatternMatcher:
|
||||
)
|
||||
|
||||
def match(
|
||||
self, mnemonic: str, operand_expressions: List[List[BinExport2.Expression]]
|
||||
self, mnemonic: str, operand_expressions: list[list[BinExport2.Expression]]
|
||||
) -> Optional[BinExport2InstructionPattern.MatchResult]:
|
||||
queries = self._index.get((mnemonic.lower(), len(operand_expressions)), [])
|
||||
for query in queries:
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
from typing import List, Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import capa.features.extractors.helpers
|
||||
import capa.features.extractors.strings
|
||||
@@ -32,7 +32,7 @@ from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_insn_api_features(fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_api_features(fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
ii: InstructionContext = ih.inner
|
||||
|
||||
@@ -68,7 +68,7 @@ def extract_insn_api_features(fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
|
||||
|
||||
def extract_insn_number_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
|
||||
if fhi.arch & HAS_ARCH_INTEL:
|
||||
@@ -77,7 +77,7 @@ def extract_insn_number_features(
|
||||
yield from capa.features.extractors.binexport2.arch.arm.insn.extract_insn_number_features(fh, bbh, ih)
|
||||
|
||||
|
||||
def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
ii: InstructionContext = ih.inner
|
||||
|
||||
@@ -92,7 +92,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
||||
# disassembler already identified string reference from instruction
|
||||
return
|
||||
|
||||
reference_addresses: List[int] = []
|
||||
reference_addresses: list[int] = []
|
||||
|
||||
if instruction_index in idx.data_reference_index_by_source_instruction_index:
|
||||
for data_reference_index in idx.data_reference_index_by_source_instruction_index[instruction_index]:
|
||||
@@ -142,7 +142,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
||||
|
||||
def extract_insn_string_features(
|
||||
fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
ii: InstructionContext = ih.inner
|
||||
|
||||
@@ -161,7 +161,7 @@ def extract_insn_string_features(
|
||||
|
||||
def extract_insn_offset_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
|
||||
if fhi.arch & HAS_ARCH_INTEL:
|
||||
@@ -172,7 +172,7 @@ def extract_insn_offset_features(
|
||||
|
||||
def extract_insn_nzxor_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
|
||||
if fhi.arch & HAS_ARCH_INTEL:
|
||||
@@ -187,7 +187,7 @@ def extract_insn_nzxor_characteristic_features(
|
||||
|
||||
def extract_insn_mnemonic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
ii: InstructionContext = ih.inner
|
||||
|
||||
@@ -199,7 +199,7 @@ def extract_insn_mnemonic_features(
|
||||
yield Mnemonic(mnemonic_name), ih.address
|
||||
|
||||
|
||||
def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract functions calls from features
|
||||
|
||||
most relevant at the function scope;
|
||||
@@ -221,7 +221,7 @@ def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
||||
|
||||
def extract_function_indirect_call_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
fhi: FunctionContext = fh.inner
|
||||
|
||||
if fhi.arch & HAS_ARCH_INTEL:
|
||||
@@ -234,7 +234,7 @@ def extract_function_indirect_call_characteristic_features(
|
||||
)
|
||||
|
||||
|
||||
def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract instruction features"""
|
||||
for inst_handler in INSTRUCTION_HANDLERS:
|
||||
for feature, ea in inst_handler(f, bbh, insn):
|
||||
|
||||
@@ -5,115 +5,25 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import Iterator
|
||||
|
||||
import string
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
from binaryninja import Function
|
||||
from binaryninja import BasicBlock as BinjaBasicBlock
|
||||
from binaryninja import (
|
||||
BinaryView,
|
||||
SymbolType,
|
||||
RegisterValueType,
|
||||
VariableSourceType,
|
||||
MediumLevelILOperation,
|
||||
MediumLevelILBasicBlock,
|
||||
MediumLevelILInstruction,
|
||||
)
|
||||
|
||||
from capa.features.common import Feature, Characteristic
|
||||
from capa.features.address import Address
|
||||
from capa.features.basicblock import BasicBlock
|
||||
from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
|
||||
from capa.features.extractors.base_extractor import BBHandle, FunctionHandle
|
||||
|
||||
|
||||
def get_printable_len_ascii(s: bytes) -> int:
|
||||
"""Return string length if all operand bytes are ascii or utf16-le printable"""
|
||||
count = 0
|
||||
for c in s:
|
||||
if c == 0:
|
||||
return count
|
||||
if c < 127 and chr(c) in string.printable:
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
def get_printable_len_wide(s: bytes) -> int:
|
||||
"""Return string length if all operand bytes are ascii or utf16-le printable"""
|
||||
if all(c == 0x00 for c in s[1::2]):
|
||||
return get_printable_len_ascii(s[::2])
|
||||
return 0
|
||||
|
||||
|
||||
def get_stack_string_len(f: Function, il: MediumLevelILInstruction) -> int:
|
||||
bv: BinaryView = f.view
|
||||
|
||||
if il.operation != MediumLevelILOperation.MLIL_CALL:
|
||||
return 0
|
||||
|
||||
target = il.dest
|
||||
if target.operation not in [MediumLevelILOperation.MLIL_CONST, MediumLevelILOperation.MLIL_CONST_PTR]:
|
||||
return 0
|
||||
|
||||
addr = target.value.value
|
||||
sym = bv.get_symbol_at(addr)
|
||||
if not sym or sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.SymbolicFunctionSymbol]:
|
||||
return 0
|
||||
|
||||
if sym.name not in ["__builtin_strncpy", "__builtin_strcpy", "__builtin_wcscpy"]:
|
||||
return 0
|
||||
|
||||
if len(il.params) < 2:
|
||||
return 0
|
||||
|
||||
dest = il.params[0]
|
||||
if dest.operation in [MediumLevelILOperation.MLIL_ADDRESS_OF, MediumLevelILOperation.MLIL_VAR]:
|
||||
var = dest.src
|
||||
else:
|
||||
return 0
|
||||
|
||||
if var.source_type != VariableSourceType.StackVariableSourceType:
|
||||
return 0
|
||||
|
||||
src = il.params[1]
|
||||
if src.value.type != RegisterValueType.ConstantDataAggregateValue:
|
||||
return 0
|
||||
|
||||
s = f.get_constant_data(RegisterValueType.ConstantDataAggregateValue, src.value.value)
|
||||
return max(get_printable_len_ascii(bytes(s)), get_printable_len_wide(bytes(s)))
|
||||
|
||||
|
||||
def bb_contains_stackstring(f: Function, bb: MediumLevelILBasicBlock) -> bool:
|
||||
"""check basic block for stackstring indicators
|
||||
|
||||
true if basic block contains enough moves of constant bytes to the stack
|
||||
"""
|
||||
count = 0
|
||||
for il in bb:
|
||||
count += get_stack_string_len(f, il)
|
||||
if count > MIN_STACKSTRING_LEN:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract stackstring indicators from basic block"""
|
||||
bb: Tuple[BinjaBasicBlock, MediumLevelILBasicBlock] = bbh.inner
|
||||
if bb[1] is not None and bb_contains_stackstring(fh.inner, bb[1]):
|
||||
yield Characteristic("stack string"), bbh.address
|
||||
|
||||
|
||||
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract tight loop indicators from a basic block"""
|
||||
bb: Tuple[BinjaBasicBlock, MediumLevelILBasicBlock] = bbh.inner
|
||||
for edge in bb[0].outgoing_edges:
|
||||
if edge.target.start == bb[0].start:
|
||||
bb: BinjaBasicBlock = bbh.inner
|
||||
for edge in bb.outgoing_edges:
|
||||
if edge.target.start == bb.start:
|
||||
yield Characteristic("tight loop"), bbh.address
|
||||
|
||||
|
||||
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract basic block features"""
|
||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||
for feature, addr in bb_handler(fh, bbh):
|
||||
@@ -121,7 +31,4 @@ def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Featur
|
||||
yield BasicBlock(), bbh.address
|
||||
|
||||
|
||||
BASIC_BLOCK_HANDLERS = (
|
||||
extract_bb_tight_loop,
|
||||
extract_bb_stackstring,
|
||||
)
|
||||
BASIC_BLOCK_HANDLERS = (extract_bb_tight_loop,)
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import List, Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import binaryninja as binja
|
||||
|
||||
@@ -30,7 +30,7 @@ class BinjaFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self, bv: binja.BinaryView):
|
||||
super().__init__(hashes=SampleHashes.from_bytes(bv.file.raw.read(0, bv.file.raw.length)))
|
||||
self.bv = bv
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
self.global_features: list[tuple[Feature, Address]] = []
|
||||
self.global_features.extend(capa.features.extractors.binja.file.extract_file_format(self.bv))
|
||||
self.global_features.extend(capa.features.extractors.binja.global_.extract_os(self.bv))
|
||||
self.global_features.extend(capa.features.extractors.binja.global_.extract_arch(self.bv))
|
||||
@@ -48,31 +48,24 @@ class BinjaFeatureExtractor(StaticFeatureExtractor):
|
||||
for f in self.bv.functions:
|
||||
yield FunctionHandle(address=AbsoluteVirtualAddress(f.start), inner=f)
|
||||
|
||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.binja.function.extract_features(fh)
|
||||
|
||||
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
||||
f: binja.Function = fh.inner
|
||||
# Set up a MLIL basic block dict look up to associate the disassembly basic block with its MLIL basic block
|
||||
mlil_lookup = {}
|
||||
for mlil_bb in f.mlil.basic_blocks:
|
||||
mlil_lookup[mlil_bb.source_block.start] = mlil_bb
|
||||
|
||||
for bb in f.basic_blocks:
|
||||
mlil_bb = mlil_lookup.get(bb.start)
|
||||
yield BBHandle(address=AbsoluteVirtualAddress(bb.start), inner=bb)
|
||||
|
||||
yield BBHandle(address=AbsoluteVirtualAddress(bb.start), inner=(bb, mlil_bb))
|
||||
|
||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.binja.basicblock.extract_features(fh, bbh)
|
||||
|
||||
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
||||
import capa.features.extractors.binja.helpers as binja_helpers
|
||||
|
||||
bb: Tuple[binja.BasicBlock, binja.MediumLevelILBasicBlock] = bbh.inner
|
||||
addr = bb[0].start
|
||||
bb: binja.BasicBlock = bbh.inner
|
||||
addr = bb.start
|
||||
|
||||
for text, length in bb[0]:
|
||||
for text, length in bb:
|
||||
insn = binja_helpers.DisassemblyInstruction(addr, length, text)
|
||||
yield InsnHandle(address=AbsoluteVirtualAddress(addr), inner=insn)
|
||||
addr += length
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
from binaryninja import Segment, BinaryView, SymbolType, SymbolBinding
|
||||
|
||||
@@ -13,12 +13,22 @@ import capa.features.extractors.common
|
||||
import capa.features.extractors.helpers
|
||||
import capa.features.extractors.strings
|
||||
from capa.features.file import Export, Import, Section, FunctionName
|
||||
from capa.features.common import FORMAT_PE, FORMAT_ELF, Format, String, Feature, Characteristic
|
||||
from capa.features.common import (
|
||||
FORMAT_PE,
|
||||
FORMAT_ELF,
|
||||
FORMAT_SC32,
|
||||
FORMAT_SC64,
|
||||
FORMAT_BINJA_DB,
|
||||
Format,
|
||||
String,
|
||||
Feature,
|
||||
Characteristic,
|
||||
)
|
||||
from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.binja.helpers import read_c_string, unmangle_c_name
|
||||
|
||||
|
||||
def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[Tuple[Feature, Address]]:
|
||||
def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[tuple[Feature, Address]]:
|
||||
"""check segment for embedded PE"""
|
||||
start = 0
|
||||
if bv.view_type == "PE" and seg.start == bv.start:
|
||||
@@ -32,13 +42,13 @@ def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[Tuple[Feature
|
||||
yield Characteristic("embedded pe"), FileOffsetAddress(seg.start + offset)
|
||||
|
||||
|
||||
def extract_file_embedded_pe(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_embedded_pe(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract embedded PE features"""
|
||||
for seg in bv.segments:
|
||||
yield from check_segment_for_pe(bv, seg)
|
||||
|
||||
|
||||
def extract_file_export_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_export_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract function exports"""
|
||||
for sym in bv.get_symbols_of_type(SymbolType.FunctionSymbol) + bv.get_symbols_of_type(SymbolType.DataSymbol):
|
||||
if sym.binding in [SymbolBinding.GlobalBinding, SymbolBinding.WeakBinding]:
|
||||
@@ -72,7 +82,7 @@ def extract_file_export_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address
|
||||
yield Characteristic("forwarded export"), AbsoluteVirtualAddress(sym.address)
|
||||
|
||||
|
||||
def extract_file_import_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_import_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract function imports
|
||||
|
||||
1. imports by ordinal:
|
||||
@@ -96,19 +106,19 @@ def extract_file_import_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address
|
||||
yield Import(name), addr
|
||||
|
||||
|
||||
def extract_file_section_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_section_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract section names"""
|
||||
for name, section in bv.sections.items():
|
||||
yield Section(name), AbsoluteVirtualAddress(section.start)
|
||||
|
||||
|
||||
def extract_file_strings(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_strings(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract ASCII and UTF-16 LE strings"""
|
||||
for s in bv.strings:
|
||||
yield String(s.value), FileOffsetAddress(s.start)
|
||||
|
||||
|
||||
def extract_file_function_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_function_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract the names of statically-linked library functions.
|
||||
"""
|
||||
@@ -127,12 +137,22 @@ def extract_file_function_names(bv: BinaryView) -> Iterator[Tuple[Feature, Addre
|
||||
yield FunctionName(name[1:]), sym.address
|
||||
|
||||
|
||||
def extract_file_format(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_format(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||
if bv.file.database is not None:
|
||||
yield Format(FORMAT_BINJA_DB), NO_ADDRESS
|
||||
|
||||
view_type = bv.view_type
|
||||
if view_type in ["PE", "COFF"]:
|
||||
yield Format(FORMAT_PE), NO_ADDRESS
|
||||
elif view_type == "ELF":
|
||||
yield Format(FORMAT_ELF), NO_ADDRESS
|
||||
elif view_type == "Mapped":
|
||||
if bv.arch.name == "x86":
|
||||
yield Format(FORMAT_SC32), NO_ADDRESS
|
||||
elif bv.arch.name == "x86_64":
|
||||
yield Format(FORMAT_SC64), NO_ADDRESS
|
||||
else:
|
||||
raise NotImplementedError(f"unexpected raw file with arch: {bv.arch}")
|
||||
elif view_type == "Raw":
|
||||
# no file type to return when processing a binary file, but we want to continue processing
|
||||
return
|
||||
@@ -140,7 +160,7 @@ def extract_file_format(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
raise NotImplementedError(f"unexpected file format: {view_type}")
|
||||
|
||||
|
||||
def extract_features(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract file features"""
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, addr in file_handler(bv):
|
||||
|
||||
@@ -105,13 +105,13 @@ def find_binaryninja() -> Optional[Path]:
|
||||
logger.debug("detected OS: linux")
|
||||
elif sys.platform == "darwin":
|
||||
logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
|
||||
return False
|
||||
return None
|
||||
elif sys.platform == "win32":
|
||||
logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
|
||||
return False
|
||||
return None
|
||||
else:
|
||||
logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
|
||||
return False
|
||||
return None
|
||||
|
||||
desktop_entry = get_desktop_entry("com.vector35.binaryninja.desktop")
|
||||
if not desktop_entry:
|
||||
|
||||
@@ -5,14 +5,28 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import Tuple, Iterator
|
||||
import string
|
||||
from typing import Iterator
|
||||
|
||||
from binaryninja import Function, BinaryView, SymbolType, RegisterValueType, LowLevelILOperation
|
||||
from binaryninja import (
|
||||
Function,
|
||||
BinaryView,
|
||||
SymbolType,
|
||||
ILException,
|
||||
RegisterValueType,
|
||||
VariableSourceType,
|
||||
LowLevelILOperation,
|
||||
MediumLevelILOperation,
|
||||
MediumLevelILBasicBlock,
|
||||
MediumLevelILInstruction,
|
||||
)
|
||||
|
||||
from capa.features.file import FunctionName
|
||||
from capa.features.common import Feature, Characteristic
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors import loops
|
||||
from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
|
||||
from capa.features.extractors.binja.helpers import get_llil_instr_at_addr
|
||||
from capa.features.extractors.base_extractor import FunctionHandle
|
||||
|
||||
|
||||
@@ -24,7 +38,7 @@ def extract_function_calls_to(fh: FunctionHandle):
|
||||
# Everything that is a code reference to the current function is considered a caller, which actually includes
|
||||
# many other references that are NOT a caller. For example, an instruction `push function_start` will also be
|
||||
# considered a caller to the function
|
||||
llil = caller.llil
|
||||
llil = get_llil_instr_at_addr(func.view, caller.address)
|
||||
if (llil is None) or llil.operation not in [
|
||||
LowLevelILOperation.LLIL_CALL,
|
||||
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
|
||||
@@ -33,14 +47,13 @@ def extract_function_calls_to(fh: FunctionHandle):
|
||||
]:
|
||||
continue
|
||||
|
||||
if llil.dest.value.type not in [
|
||||
RegisterValueType.ImportedAddressValue,
|
||||
RegisterValueType.ConstantValue,
|
||||
RegisterValueType.ConstantPointerValue,
|
||||
if llil.dest.operation not in [
|
||||
LowLevelILOperation.LLIL_CONST,
|
||||
LowLevelILOperation.LLIL_CONST_PTR,
|
||||
]:
|
||||
continue
|
||||
|
||||
address = llil.dest.value.value
|
||||
address = llil.dest.constant
|
||||
if address != func.start:
|
||||
continue
|
||||
|
||||
@@ -95,10 +108,103 @@ def extract_function_name(fh: FunctionHandle):
|
||||
yield FunctionName(name[1:]), sym.address
|
||||
|
||||
|
||||
def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def get_printable_len_ascii(s: bytes) -> int:
|
||||
"""Return string length if all operand bytes are ascii or utf16-le printable"""
|
||||
count = 0
|
||||
for c in s:
|
||||
if c == 0:
|
||||
return count
|
||||
if c < 127 and chr(c) in string.printable:
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
def get_printable_len_wide(s: bytes) -> int:
|
||||
"""Return string length if all operand bytes are ascii or utf16-le printable"""
|
||||
if all(c == 0x00 for c in s[1::2]):
|
||||
return get_printable_len_ascii(s[::2])
|
||||
return 0
|
||||
|
||||
|
||||
def get_stack_string_len(f: Function, il: MediumLevelILInstruction) -> int:
|
||||
bv: BinaryView = f.view
|
||||
|
||||
if il.operation != MediumLevelILOperation.MLIL_CALL:
|
||||
return 0
|
||||
|
||||
target = il.dest
|
||||
if target.operation not in [MediumLevelILOperation.MLIL_CONST, MediumLevelILOperation.MLIL_CONST_PTR]:
|
||||
return 0
|
||||
|
||||
addr = target.value.value
|
||||
sym = bv.get_symbol_at(addr)
|
||||
if not sym or sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.SymbolicFunctionSymbol]:
|
||||
return 0
|
||||
|
||||
if sym.name not in ["__builtin_strncpy", "__builtin_strcpy", "__builtin_wcscpy"]:
|
||||
return 0
|
||||
|
||||
if len(il.params) < 2:
|
||||
return 0
|
||||
|
||||
dest = il.params[0]
|
||||
if dest.operation in [MediumLevelILOperation.MLIL_ADDRESS_OF, MediumLevelILOperation.MLIL_VAR]:
|
||||
var = dest.src
|
||||
else:
|
||||
return 0
|
||||
|
||||
if var.source_type != VariableSourceType.StackVariableSourceType:
|
||||
return 0
|
||||
|
||||
src = il.params[1]
|
||||
if src.value.type != RegisterValueType.ConstantDataAggregateValue:
|
||||
return 0
|
||||
|
||||
s = f.get_constant_data(RegisterValueType.ConstantDataAggregateValue, src.value.value)
|
||||
return max(get_printable_len_ascii(bytes(s)), get_printable_len_wide(bytes(s)))
|
||||
|
||||
|
||||
def bb_contains_stackstring(f: Function, bb: MediumLevelILBasicBlock) -> bool:
|
||||
"""check basic block for stackstring indicators
|
||||
|
||||
true if basic block contains enough moves of constant bytes to the stack
|
||||
"""
|
||||
count = 0
|
||||
for il in bb:
|
||||
count += get_stack_string_len(f, il)
|
||||
if count > MIN_STACKSTRING_LEN:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def extract_stackstring(fh: FunctionHandle):
|
||||
"""extract stackstring indicators"""
|
||||
func: Function = fh.inner
|
||||
bv: BinaryView = func.view
|
||||
if bv is None:
|
||||
return
|
||||
|
||||
try:
|
||||
mlil = func.mlil
|
||||
except ILException:
|
||||
return
|
||||
|
||||
for block in mlil.basic_blocks:
|
||||
if bb_contains_stackstring(func, block):
|
||||
yield Characteristic("stack string"), block.source_block.start
|
||||
|
||||
|
||||
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
for func_handler in FUNCTION_HANDLERS:
|
||||
for feature, addr in func_handler(fh):
|
||||
yield feature, addr
|
||||
|
||||
|
||||
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call, extract_function_name)
|
||||
FUNCTION_HANDLERS = (
|
||||
extract_function_calls_to,
|
||||
extract_function_loop,
|
||||
extract_recursive_call,
|
||||
extract_function_name,
|
||||
extract_stackstring,
|
||||
)
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
from binaryninja import BinaryView
|
||||
|
||||
@@ -16,7 +16,7 @@ from capa.features.address import NO_ADDRESS, Address
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_os(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_os(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||
name = bv.platform.name
|
||||
if "-" in name:
|
||||
name = name.split("-")[0]
|
||||
@@ -45,7 +45,7 @@ def extract_os(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
return
|
||||
|
||||
|
||||
def extract_arch(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_arch(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||
arch = bv.arch.name
|
||||
if arch == "x86_64":
|
||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||
|
||||
@@ -6,10 +6,10 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import re
|
||||
from typing import List, Callable
|
||||
from typing import Callable, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
from binaryninja import BinaryView, LowLevelILInstruction
|
||||
from binaryninja import BinaryView, LowLevelILFunction, LowLevelILInstruction
|
||||
from binaryninja.architecture import InstructionTextToken
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ from binaryninja.architecture import InstructionTextToken
|
||||
class DisassemblyInstruction:
|
||||
address: int
|
||||
length: int
|
||||
text: List[InstructionTextToken]
|
||||
text: list[InstructionTextToken]
|
||||
|
||||
|
||||
LLIL_VISITOR = Callable[[LowLevelILInstruction, LowLevelILInstruction, int], bool]
|
||||
@@ -54,7 +54,7 @@ def unmangle_c_name(name: str) -> str:
|
||||
|
||||
|
||||
def read_c_string(bv: BinaryView, offset: int, max_len: int) -> str:
|
||||
s: List[str] = []
|
||||
s: list[str] = []
|
||||
while len(s) < max_len:
|
||||
try:
|
||||
c = bv.read(offset + len(s), 1)[0]
|
||||
@@ -67,3 +67,13 @@ def read_c_string(bv: BinaryView, offset: int, max_len: int) -> str:
|
||||
s.append(chr(c))
|
||||
|
||||
return "".join(s)
|
||||
|
||||
|
||||
def get_llil_instr_at_addr(bv: BinaryView, addr: int) -> Optional[LowLevelILInstruction]:
|
||||
arch = bv.arch
|
||||
buffer = bv.read(addr, arch.max_instr_length)
|
||||
llil = LowLevelILFunction(arch=arch)
|
||||
llil.current_address = addr
|
||||
if arch.get_instruction_low_level_il(buffer, addr, llil) == 0:
|
||||
return None
|
||||
return llil[0]
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import Any, List, Tuple, Iterator, Optional
|
||||
from typing import Any, Iterator, Optional
|
||||
|
||||
from binaryninja import Function
|
||||
from binaryninja import BasicBlock as BinjaBasicBlock
|
||||
@@ -23,7 +23,7 @@ import capa.features.extractors.helpers
|
||||
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
|
||||
from capa.features.common import MAX_BYTES_FEATURE_SIZE, Bytes, String, Feature, Characteristic
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.binja.helpers import DisassemblyInstruction, visit_llil_exprs
|
||||
from capa.features.extractors.binja.helpers import DisassemblyInstruction, visit_llil_exprs, get_llil_instr_at_addr
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
||||
|
||||
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
|
||||
@@ -36,35 +36,27 @@ SECURITY_COOKIE_BYTES_DELTA = 0x40
|
||||
# 2. The function must only make one call/jump to another address
|
||||
# If the function being checked is a stub function, returns the target address. Otherwise, return None.
|
||||
def is_stub_function(bv: BinaryView, addr: int) -> Optional[int]:
|
||||
funcs = bv.get_functions_at(addr)
|
||||
for func in funcs:
|
||||
if len(func.basic_blocks) != 1:
|
||||
continue
|
||||
llil = get_llil_instr_at_addr(bv, addr)
|
||||
if llil is None or llil.operation not in [
|
||||
LowLevelILOperation.LLIL_CALL,
|
||||
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
|
||||
LowLevelILOperation.LLIL_JUMP,
|
||||
LowLevelILOperation.LLIL_TAILCALL,
|
||||
]:
|
||||
return None
|
||||
|
||||
call_count = 0
|
||||
call_target = None
|
||||
for il in func.llil.instructions:
|
||||
if il.operation in [
|
||||
LowLevelILOperation.LLIL_CALL,
|
||||
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
|
||||
LowLevelILOperation.LLIL_JUMP,
|
||||
LowLevelILOperation.LLIL_TAILCALL,
|
||||
]:
|
||||
call_count += 1
|
||||
if il.dest.value.type in [
|
||||
RegisterValueType.ImportedAddressValue,
|
||||
RegisterValueType.ConstantValue,
|
||||
RegisterValueType.ConstantPointerValue,
|
||||
]:
|
||||
call_target = il.dest.value.value
|
||||
# The LLIL instruction retrieved by `get_llil_instr_at_addr` did not go through a full analysis, so we cannot check
|
||||
# `llil.dest.value.type` here
|
||||
if llil.dest.operation not in [
|
||||
LowLevelILOperation.LLIL_CONST,
|
||||
LowLevelILOperation.LLIL_CONST_PTR,
|
||||
]:
|
||||
return None
|
||||
|
||||
if call_count == 1 and call_target is not None:
|
||||
return call_target
|
||||
|
||||
return None
|
||||
return llil.dest.constant
|
||||
|
||||
|
||||
def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction API features
|
||||
|
||||
@@ -123,7 +115,7 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
|
||||
|
||||
def extract_insn_number_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction number features
|
||||
example:
|
||||
@@ -131,7 +123,7 @@ def extract_insn_number_features(
|
||||
"""
|
||||
func: Function = fh.inner
|
||||
|
||||
results: List[Tuple[Any[Number, OperandNumber], Address]] = []
|
||||
results: list[tuple[Any[Number, OperandNumber], Address]] = []
|
||||
|
||||
def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool:
|
||||
if il.operation == LowLevelILOperation.LLIL_LOAD:
|
||||
@@ -162,7 +154,7 @@ def extract_insn_number_features(
|
||||
yield from results
|
||||
|
||||
|
||||
def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse referenced byte sequences
|
||||
example:
|
||||
@@ -209,7 +201,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
||||
|
||||
def extract_insn_string_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction string features
|
||||
|
||||
@@ -266,7 +258,7 @@ def extract_insn_string_features(
|
||||
|
||||
def extract_insn_offset_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction structure offset features
|
||||
|
||||
@@ -275,7 +267,7 @@ def extract_insn_offset_features(
|
||||
"""
|
||||
func: Function = fh.inner
|
||||
|
||||
results: List[Tuple[Any[Offset, OperandOffset], Address]] = []
|
||||
results: list[tuple[Any[Offset, OperandOffset], Address]] = []
|
||||
address_size = func.view.arch.address_size * 8
|
||||
|
||||
def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool:
|
||||
@@ -353,7 +345,7 @@ def is_nzxor_stack_cookie(f: Function, bb: BinjaBasicBlock, llil: LowLevelILInst
|
||||
|
||||
def extract_insn_nzxor_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction non-zeroing XOR instruction
|
||||
ignore expected non-zeroing XORs, e.g. security cookies
|
||||
@@ -367,7 +359,7 @@ def extract_insn_nzxor_characteristic_features(
|
||||
# e.g., <llil: eax = 0>, (LLIL_SET_REG). So we do not need to check whether the two operands are the same.
|
||||
if il.operation == LowLevelILOperation.LLIL_XOR:
|
||||
# Exclude cases related to the stack cookie
|
||||
if is_nzxor_stack_cookie(fh.inner, bbh.inner[0], il):
|
||||
if is_nzxor_stack_cookie(fh.inner, bbh.inner, il):
|
||||
return False
|
||||
results.append((Characteristic("nzxor"), ih.address))
|
||||
return False
|
||||
@@ -382,7 +374,7 @@ def extract_insn_nzxor_characteristic_features(
|
||||
|
||||
def extract_insn_mnemonic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""parse instruction mnemonic features"""
|
||||
insn: DisassemblyInstruction = ih.inner
|
||||
yield Mnemonic(insn.text[0].text), ih.address
|
||||
@@ -390,7 +382,7 @@ def extract_insn_mnemonic_features(
|
||||
|
||||
def extract_insn_obfs_call_plus_5_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse call $+5 instruction from the given instruction.
|
||||
"""
|
||||
@@ -401,7 +393,7 @@ def extract_insn_obfs_call_plus_5_characteristic_features(
|
||||
|
||||
def extract_insn_peb_access_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""parse instruction peb access
|
||||
|
||||
fs:[0x30] on x86, gs:[0x60] on x64
|
||||
@@ -444,7 +436,7 @@ def extract_insn_peb_access_characteristic_features(
|
||||
|
||||
def extract_insn_segment_access_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""parse instruction fs or gs access"""
|
||||
func: Function = fh.inner
|
||||
|
||||
@@ -471,7 +463,7 @@ def extract_insn_segment_access_features(
|
||||
|
||||
def extract_insn_cross_section_cflow(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""inspect the instruction for a CALL or JMP that crosses section boundaries"""
|
||||
func: Function = fh.inner
|
||||
bv: BinaryView = func.view
|
||||
@@ -491,7 +483,7 @@ def extract_insn_cross_section_cflow(
|
||||
yield Characteristic("cross section flow"), ih.address
|
||||
|
||||
|
||||
def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract functions calls from features
|
||||
|
||||
most relevant at the function scope, however, its most efficient to extract at the instruction scope
|
||||
@@ -534,7 +526,7 @@ def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
||||
|
||||
def extract_function_indirect_call_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
|
||||
does not include calls like => call ds:dword_ABD4974
|
||||
|
||||
@@ -562,7 +554,7 @@ def extract_function_indirect_call_characteristic_features(
|
||||
yield Characteristic("indirect call"), ih.address
|
||||
|
||||
|
||||
def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract instruction features"""
|
||||
for inst_handler in INSTRUCTION_HANDLERS:
|
||||
for feature, ea in inst_handler(f, bbh, insn):
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import capa.features.extractors.helpers
|
||||
from capa.helpers import assert_never
|
||||
@@ -20,7 +20,7 @@ from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, Pr
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
this method extracts the given call's features (such as API name and arguments),
|
||||
and returns them as API, Number, and String features.
|
||||
@@ -55,7 +55,7 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -
|
||||
yield API(name), ch.address
|
||||
|
||||
|
||||
def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
for handler in CALL_HANDLERS:
|
||||
for feature, addr in handler(ph, th, ch):
|
||||
yield feature, addr
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import Dict, Tuple, Union, Iterator
|
||||
from typing import Union, Iterator
|
||||
|
||||
import capa.features.extractors.cape.call
|
||||
import capa.features.extractors.cape.file
|
||||
@@ -50,16 +50,16 @@ class CapeExtractor(DynamicFeatureExtractor):
|
||||
assert self.report.static is not None and self.report.static.pe is not None
|
||||
return AbsoluteVirtualAddress(self.report.static.pe.imagebase)
|
||||
|
||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from self.global_features
|
||||
|
||||
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.cape.file.extract_features(self.report)
|
||||
|
||||
def get_processes(self) -> Iterator[ProcessHandle]:
|
||||
yield from capa.features.extractors.cape.file.get_processes(self.report)
|
||||
|
||||
def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.cape.process.extract_features(ph)
|
||||
|
||||
def get_process_name(self, ph) -> str:
|
||||
@@ -69,7 +69,7 @@ class CapeExtractor(DynamicFeatureExtractor):
|
||||
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
||||
yield from capa.features.extractors.cape.process.get_threads(ph)
|
||||
|
||||
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
if False:
|
||||
# force this routine to be a generator,
|
||||
# but we don't actually have any elements to generate.
|
||||
@@ -81,7 +81,7 @@ class CapeExtractor(DynamicFeatureExtractor):
|
||||
|
||||
def extract_call_features(
|
||||
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.cape.call.extract_features(ph, th, ch)
|
||||
|
||||
def get_call_name(self, ph, th, ch) -> str:
|
||||
@@ -122,7 +122,7 @@ class CapeExtractor(DynamicFeatureExtractor):
|
||||
return "".join(parts)
|
||||
|
||||
@classmethod
|
||||
def from_report(cls, report: Dict) -> "CapeExtractor":
|
||||
def from_report(cls, report: dict) -> "CapeExtractor":
|
||||
cr = CapeReport.model_validate(report)
|
||||
|
||||
if cr.info.version not in TESTED_VERSIONS:
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
from capa.features.file import Export, Import, Section
|
||||
from capa.features.common import String, Feature
|
||||
@@ -41,7 +41,7 @@ def get_processes(report: CapeReport) -> Iterator[ProcessHandle]:
|
||||
seen_processes[addr].append(process)
|
||||
|
||||
|
||||
def extract_import_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_import_names(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract imported function names
|
||||
"""
|
||||
@@ -62,57 +62,57 @@ def extract_import_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]
|
||||
yield Import(name), AbsoluteVirtualAddress(function.address)
|
||||
|
||||
|
||||
def extract_export_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_export_names(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
assert report.static is not None and report.static.pe is not None
|
||||
for function in report.static.pe.exports:
|
||||
yield Export(function.name), AbsoluteVirtualAddress(function.address)
|
||||
|
||||
|
||||
def extract_section_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_section_names(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
assert report.static is not None and report.static.pe is not None
|
||||
for section in report.static.pe.sections:
|
||||
yield Section(section.name), AbsoluteVirtualAddress(section.virtual_address)
|
||||
|
||||
|
||||
def extract_file_strings(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_strings(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
if report.strings is not None:
|
||||
for string in report.strings:
|
||||
yield String(string), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_used_regkeys(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_used_regkeys(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
for regkey in report.behavior.summary.keys:
|
||||
yield String(regkey), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_used_files(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_used_files(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
for file in report.behavior.summary.files:
|
||||
yield String(file), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_used_mutexes(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_used_mutexes(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
for mutex in report.behavior.summary.mutexes:
|
||||
yield String(mutex), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_used_commands(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_used_commands(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
for cmd in report.behavior.summary.executed_commands:
|
||||
yield String(cmd), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_used_apis(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_used_apis(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
for symbol in report.behavior.summary.resolved_apis:
|
||||
yield String(symbol), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_used_services(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_used_services(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
for svc in report.behavior.summary.created_services:
|
||||
yield String(svc), NO_ADDRESS
|
||||
for svc in report.behavior.summary.started_services:
|
||||
yield String(svc), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_features(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
for handler in FILE_HANDLERS:
|
||||
for feature, addr in handler(report):
|
||||
yield feature, addr
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
from capa.features.common import (
|
||||
OS,
|
||||
@@ -28,7 +28,7 @@ from capa.features.extractors.cape.models import CapeReport
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_arch(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_arch(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
if "Intel 80386" in report.target.file.type:
|
||||
yield Arch(ARCH_I386), NO_ADDRESS
|
||||
elif "x86-64" in report.target.file.type:
|
||||
@@ -40,7 +40,7 @@ def extract_arch(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
)
|
||||
|
||||
|
||||
def extract_format(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_format(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
if "PE" in report.target.file.type:
|
||||
yield Format(FORMAT_PE), NO_ADDRESS
|
||||
elif "ELF" in report.target.file.type:
|
||||
@@ -52,7 +52,7 @@ def extract_format(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
)
|
||||
|
||||
|
||||
def extract_os(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_os(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
# this variable contains the output of the file command
|
||||
file_output = report.target.file.type
|
||||
|
||||
@@ -80,7 +80,7 @@ def extract_os(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
yield OS(OS_ANY), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_features(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
for global_handler in GLOBAL_HANDLER:
|
||||
for feature, addr in global_handler(report):
|
||||
yield feature, addr
|
||||
|
||||
@@ -6,12 +6,12 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
from typing import Any, Dict, List
|
||||
from typing import Any
|
||||
|
||||
from capa.features.extractors.base_extractor import ProcessHandle
|
||||
|
||||
|
||||
def find_process(processes: List[Dict[str, Any]], ph: ProcessHandle) -> Dict[str, Any]:
|
||||
def find_process(processes: list[dict[str, Any]], ph: ProcessHandle) -> dict[str, Any]:
|
||||
"""
|
||||
find a specific process identified by a process handler.
|
||||
|
||||
|
||||
@@ -6,10 +6,9 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import binascii
|
||||
from typing import Any, Dict, List, Union, Literal, Optional
|
||||
from typing import Any, Union, Literal, Optional, Annotated, TypeAlias
|
||||
|
||||
from pydantic import Field, BaseModel, ConfigDict
|
||||
from typing_extensions import Annotated, TypeAlias
|
||||
from pydantic.functional_validators import BeforeValidator
|
||||
|
||||
|
||||
@@ -59,11 +58,11 @@ Skip: TypeAlias = Optional[Any]
|
||||
# in a field with this type.
|
||||
# then we can update the model with the discovered shape.
|
||||
TODO: TypeAlias = None
|
||||
ListTODO: TypeAlias = List[None]
|
||||
ListTODO: TypeAlias = list[None]
|
||||
DictTODO: TypeAlias = ExactModel
|
||||
|
||||
EmptyDict: TypeAlias = BaseModel
|
||||
EmptyList: TypeAlias = List[Any]
|
||||
Emptydict: TypeAlias = BaseModel
|
||||
EmptyList: TypeAlias = list[Any]
|
||||
|
||||
|
||||
class Info(FlexibleModel):
|
||||
@@ -77,7 +76,7 @@ class ImportedSymbol(ExactModel):
|
||||
|
||||
class ImportedDll(ExactModel):
|
||||
dll: str
|
||||
imports: List[ImportedSymbol]
|
||||
imports: list[ImportedSymbol]
|
||||
|
||||
|
||||
class DirectoryEntry(ExactModel):
|
||||
@@ -149,7 +148,7 @@ class Signer(ExactModel):
|
||||
aux_valid: Optional[bool] = None
|
||||
aux_error: Optional[bool] = None
|
||||
aux_error_desc: Optional[str] = None
|
||||
aux_signers: Optional[List[AuxSigner]] = None
|
||||
aux_signers: Optional[list[AuxSigner]] = None
|
||||
|
||||
|
||||
class Overlay(ExactModel):
|
||||
@@ -178,22 +177,22 @@ class PE(ExactModel):
|
||||
pdbpath: Optional[str] = None
|
||||
timestamp: str
|
||||
|
||||
# List[ImportedDll], or Dict[basename(dll), ImportedDll]
|
||||
imports: Union[List[ImportedDll], Dict[str, ImportedDll]]
|
||||
# list[ImportedDll], or dict[basename(dll), ImportedDll]
|
||||
imports: Union[list[ImportedDll], dict[str, ImportedDll]]
|
||||
imported_dll_count: Optional[int] = None
|
||||
imphash: str
|
||||
|
||||
exported_dll_name: Optional[str] = None
|
||||
exports: List[ExportedSymbol]
|
||||
exports: list[ExportedSymbol]
|
||||
|
||||
dirents: List[DirectoryEntry]
|
||||
sections: List[Section]
|
||||
dirents: list[DirectoryEntry]
|
||||
sections: list[Section]
|
||||
|
||||
ep_bytes: Optional[HexBytes] = None
|
||||
|
||||
overlay: Optional[Overlay] = None
|
||||
resources: List[Resource]
|
||||
versioninfo: List[KV]
|
||||
resources: list[Resource]
|
||||
versioninfo: list[KV]
|
||||
|
||||
# base64 encoded data
|
||||
icon: Optional[str] = None
|
||||
@@ -204,7 +203,7 @@ class PE(ExactModel):
|
||||
# short hex string
|
||||
icon_dhash: Optional[str] = None
|
||||
|
||||
digital_signers: List[DigitalSigner]
|
||||
digital_signers: list[DigitalSigner]
|
||||
guest_signers: Signer
|
||||
|
||||
|
||||
@@ -217,9 +216,9 @@ class File(FlexibleModel):
|
||||
cape_type: Optional[str] = None
|
||||
|
||||
pid: Optional[Union[int, Literal[""]]] = None
|
||||
name: Union[List[str], str]
|
||||
name: Union[list[str], str]
|
||||
path: str
|
||||
guest_paths: Union[List[str], str, None]
|
||||
guest_paths: Union[list[str], str, None]
|
||||
timestamp: Optional[str] = None
|
||||
|
||||
#
|
||||
@@ -244,7 +243,7 @@ class File(FlexibleModel):
|
||||
ep_bytes: Optional[HexBytes] = None
|
||||
entrypoint: Optional[int] = None
|
||||
data: Optional[str] = None
|
||||
strings: Optional[List[str]] = None
|
||||
strings: Optional[list[str]] = None
|
||||
|
||||
#
|
||||
# detections (skip)
|
||||
@@ -283,7 +282,7 @@ class Call(ExactModel):
|
||||
|
||||
api: str
|
||||
|
||||
arguments: List[Argument]
|
||||
arguments: list[Argument]
|
||||
status: bool
|
||||
return_: HexInt = Field(alias="return")
|
||||
pretty_return: Optional[str] = None
|
||||
@@ -298,15 +297,18 @@ class Call(ExactModel):
|
||||
id: int
|
||||
|
||||
|
||||
class Process(ExactModel):
|
||||
# FlexibleModel to account for extended fields
|
||||
# refs: https://github.com/mandiant/capa/issues/2466
|
||||
# https://github.com/kevoreilly/CAPEv2/pull/2199
|
||||
class Process(FlexibleModel):
|
||||
process_id: int
|
||||
process_name: str
|
||||
parent_id: int
|
||||
module_path: str
|
||||
first_seen: str
|
||||
calls: List[Call]
|
||||
threads: List[int]
|
||||
environ: Dict[str, str]
|
||||
calls: list[Call]
|
||||
threads: list[int]
|
||||
environ: dict[str, str]
|
||||
|
||||
|
||||
class ProcessTree(ExactModel):
|
||||
@@ -314,25 +316,25 @@ class ProcessTree(ExactModel):
|
||||
pid: int
|
||||
parent_id: int
|
||||
module_path: str
|
||||
threads: List[int]
|
||||
environ: Dict[str, str]
|
||||
children: List["ProcessTree"]
|
||||
threads: list[int]
|
||||
environ: dict[str, str]
|
||||
children: list["ProcessTree"]
|
||||
|
||||
|
||||
class Summary(ExactModel):
|
||||
files: List[str]
|
||||
read_files: List[str]
|
||||
write_files: List[str]
|
||||
delete_files: List[str]
|
||||
keys: List[str]
|
||||
read_keys: List[str]
|
||||
write_keys: List[str]
|
||||
delete_keys: List[str]
|
||||
executed_commands: List[str]
|
||||
resolved_apis: List[str]
|
||||
mutexes: List[str]
|
||||
created_services: List[str]
|
||||
started_services: List[str]
|
||||
files: list[str]
|
||||
read_files: list[str]
|
||||
write_files: list[str]
|
||||
delete_files: list[str]
|
||||
keys: list[str]
|
||||
read_keys: list[str]
|
||||
write_keys: list[str]
|
||||
delete_keys: list[str]
|
||||
executed_commands: list[str]
|
||||
resolved_apis: list[str]
|
||||
mutexes: list[str]
|
||||
created_services: list[str]
|
||||
started_services: list[str]
|
||||
|
||||
|
||||
class EncryptedBuffer(ExactModel):
|
||||
@@ -349,12 +351,12 @@ class Behavior(ExactModel):
|
||||
summary: Summary
|
||||
|
||||
# list of processes, of threads, of calls
|
||||
processes: List[Process]
|
||||
processes: list[Process]
|
||||
# tree of processes
|
||||
processtree: List[ProcessTree]
|
||||
processtree: list[ProcessTree]
|
||||
|
||||
anomaly: List[str]
|
||||
encryptedbuffers: List[EncryptedBuffer]
|
||||
anomaly: list[str]
|
||||
encryptedbuffers: list[EncryptedBuffer]
|
||||
# these are small objects that describe atomic events,
|
||||
# like file move, registry access.
|
||||
# we'll detect the same with our API call analysis.
|
||||
@@ -373,7 +375,7 @@ class Static(ExactModel):
|
||||
|
||||
|
||||
class Cape(ExactModel):
|
||||
payloads: List[ProcessFile]
|
||||
payloads: list[ProcessFile]
|
||||
configs: Skip = None
|
||||
|
||||
|
||||
@@ -389,7 +391,7 @@ class CapeReport(FlexibleModel):
|
||||
# static analysis results
|
||||
#
|
||||
static: Optional[Static] = None
|
||||
strings: Optional[List[str]] = None
|
||||
strings: Optional[list[str]] = None
|
||||
|
||||
#
|
||||
# dynamic analysis results
|
||||
@@ -398,10 +400,10 @@ class CapeReport(FlexibleModel):
|
||||
behavior: Behavior
|
||||
|
||||
# post-processed results: payloads and extracted configs
|
||||
CAPE: Optional[Union[Cape, List]] = None
|
||||
dropped: Optional[List[File]] = None
|
||||
procdump: Optional[List[ProcessFile]] = None
|
||||
procmemory: ListTODO
|
||||
CAPE: Optional[Union[Cape, list]] = None
|
||||
dropped: Optional[list[File]] = None
|
||||
procdump: Optional[list[ProcessFile]] = None
|
||||
procmemory: Optional[ListTODO] = None
|
||||
|
||||
# =========================================================================
|
||||
# information we won't use in capa
|
||||
@@ -437,7 +439,7 @@ class CapeReport(FlexibleModel):
|
||||
malfamily_tag: Optional[str] = None
|
||||
malscore: float
|
||||
detections: Skip = None
|
||||
detections2pid: Optional[Dict[int, List[str]]] = None
|
||||
detections2pid: Optional[dict[int, list[str]]] = None
|
||||
# AV detections for the sample.
|
||||
virustotal: Skip = None
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import List, Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
from capa.features.common import String, Feature
|
||||
from capa.features.address import Address, ThreadAddress
|
||||
@@ -22,14 +22,14 @@ def get_threads(ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
||||
get the threads associated with a given process
|
||||
"""
|
||||
process: Process = ph.inner
|
||||
threads: List[int] = process.threads
|
||||
threads: list[int] = process.threads
|
||||
|
||||
for thread in threads:
|
||||
address: ThreadAddress = ThreadAddress(process=ph.address, tid=thread)
|
||||
yield ThreadHandle(address=address, inner={})
|
||||
|
||||
|
||||
def extract_environ_strings(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_environ_strings(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract strings from a process' provided environment variables.
|
||||
"""
|
||||
@@ -39,7 +39,7 @@ def extract_environ_strings(ph: ProcessHandle) -> Iterator[Tuple[Feature, Addres
|
||||
yield String(value), ph.address
|
||||
|
||||
|
||||
def extract_features(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
for handler in PROCESS_HANDLERS:
|
||||
for feature, addr in handler(ph):
|
||||
yield feature, addr
|
||||
|
||||
@@ -10,7 +10,7 @@ import re
|
||||
import logging
|
||||
import binascii
|
||||
import contextlib
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import pefile
|
||||
|
||||
@@ -45,7 +45,7 @@ MATCH_RESULT = b'{"meta":'
|
||||
MATCH_JSON_OBJECT = b'{"'
|
||||
|
||||
|
||||
def extract_file_strings(buf: bytes, **kwargs) -> Iterator[Tuple[String, Address]]:
|
||||
def extract_file_strings(buf: bytes, **kwargs) -> Iterator[tuple[String, Address]]:
|
||||
"""
|
||||
extract ASCII and UTF-16 LE strings from file
|
||||
"""
|
||||
@@ -56,7 +56,7 @@ def extract_file_strings(buf: bytes, **kwargs) -> Iterator[Tuple[String, Address
|
||||
yield String(s.s), FileOffsetAddress(s.offset)
|
||||
|
||||
|
||||
def extract_format(buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_format(buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
||||
if buf.startswith(MATCH_PE):
|
||||
yield Format(FORMAT_PE), NO_ADDRESS
|
||||
elif buf.startswith(MATCH_ELF):
|
||||
@@ -79,7 +79,7 @@ def extract_format(buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
||||
return
|
||||
|
||||
|
||||
def extract_arch(buf) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_arch(buf) -> Iterator[tuple[Feature, Address]]:
|
||||
if buf.startswith(MATCH_PE):
|
||||
yield from capa.features.extractors.pefile.extract_file_arch(pe=pefile.PE(data=buf))
|
||||
|
||||
@@ -111,7 +111,7 @@ def extract_arch(buf) -> Iterator[Tuple[Feature, Address]]:
|
||||
return
|
||||
|
||||
|
||||
def extract_os(buf, os=OS_AUTO) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_os(buf, os=OS_AUTO) -> Iterator[tuple[Feature, Address]]:
|
||||
if os != OS_AUTO:
|
||||
yield OS(os), NO_ADDRESS
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Dict, List, Tuple, Union, Iterator, Optional
|
||||
from typing import Union, Iterator, Optional
|
||||
from pathlib import Path
|
||||
|
||||
import dnfile
|
||||
@@ -41,11 +41,11 @@ from capa.features.extractors.dnfile.helpers import (
|
||||
|
||||
class DnFileFeatureExtractorCache:
|
||||
def __init__(self, pe: dnfile.dnPE):
|
||||
self.imports: Dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||
self.native_imports: Dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||
self.methods: Dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||
self.fields: Dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||
self.types: Dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||
self.imports: dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||
self.native_imports: dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||
self.methods: dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||
self.fields: dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||
self.types: dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||
|
||||
for import_ in get_dotnet_managed_imports(pe):
|
||||
self.imports[import_.token] = import_
|
||||
@@ -84,7 +84,7 @@ class DnfileFeatureExtractor(StaticFeatureExtractor):
|
||||
self.token_cache: DnFileFeatureExtractorCache = DnFileFeatureExtractorCache(self.pe)
|
||||
|
||||
# pre-compute these because we'll yield them at *every* scope.
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
self.global_features: list[tuple[Feature, Address]] = []
|
||||
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_format())
|
||||
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_os(pe=self.pe))
|
||||
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_arch(pe=self.pe))
|
||||
@@ -100,7 +100,7 @@ class DnfileFeatureExtractor(StaticFeatureExtractor):
|
||||
|
||||
def get_functions(self) -> Iterator[FunctionHandle]:
|
||||
# create a method lookup table
|
||||
methods: Dict[Address, FunctionHandle] = {}
|
||||
methods: dict[Address, FunctionHandle] = {}
|
||||
for token, method in get_dotnet_managed_method_bodies(self.pe):
|
||||
fh: FunctionHandle = FunctionHandle(
|
||||
address=DNTokenAddress(token),
|
||||
@@ -136,7 +136,7 @@ class DnfileFeatureExtractor(StaticFeatureExtractor):
|
||||
|
||||
yield from methods.values()
|
||||
|
||||
def extract_function_features(self, fh) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_function_features(self, fh) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.dnfile.function.extract_features(fh)
|
||||
|
||||
def get_basic_blocks(self, f) -> Iterator[BBHandle]:
|
||||
@@ -157,5 +157,5 @@ class DnfileFeatureExtractor(StaticFeatureExtractor):
|
||||
inner=insn,
|
||||
)
|
||||
|
||||
def extract_insn_features(self, fh, bbh, ih) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_features(self, fh, bbh, ih) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.dnfile.insn.extract_features(fh, bbh, ih)
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import dnfile
|
||||
|
||||
@@ -18,35 +18,35 @@ from capa.features.common import Class, Format, String, Feature, Namespace, Char
|
||||
from capa.features.address import Address
|
||||
|
||||
|
||||
def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[Tuple[Import, Address]]:
|
||||
def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[tuple[Import, Address]]:
|
||||
yield from capa.features.extractors.dotnetfile.extract_file_import_names(pe=pe)
|
||||
|
||||
|
||||
def extract_file_format(pe: dnfile.dnPE) -> Iterator[Tuple[Format, Address]]:
|
||||
def extract_file_format(pe: dnfile.dnPE) -> Iterator[tuple[Format, Address]]:
|
||||
yield from capa.features.extractors.dotnetfile.extract_file_format(pe=pe)
|
||||
|
||||
|
||||
def extract_file_function_names(pe: dnfile.dnPE) -> Iterator[Tuple[FunctionName, Address]]:
|
||||
def extract_file_function_names(pe: dnfile.dnPE) -> Iterator[tuple[FunctionName, Address]]:
|
||||
yield from capa.features.extractors.dotnetfile.extract_file_function_names(pe=pe)
|
||||
|
||||
|
||||
def extract_file_strings(pe: dnfile.dnPE) -> Iterator[Tuple[String, Address]]:
|
||||
def extract_file_strings(pe: dnfile.dnPE) -> Iterator[tuple[String, Address]]:
|
||||
yield from capa.features.extractors.dotnetfile.extract_file_strings(pe=pe)
|
||||
|
||||
|
||||
def extract_file_mixed_mode_characteristic_features(pe: dnfile.dnPE) -> Iterator[Tuple[Characteristic, Address]]:
|
||||
def extract_file_mixed_mode_characteristic_features(pe: dnfile.dnPE) -> Iterator[tuple[Characteristic, Address]]:
|
||||
yield from capa.features.extractors.dotnetfile.extract_file_mixed_mode_characteristic_features(pe=pe)
|
||||
|
||||
|
||||
def extract_file_namespace_features(pe: dnfile.dnPE) -> Iterator[Tuple[Namespace, Address]]:
|
||||
def extract_file_namespace_features(pe: dnfile.dnPE) -> Iterator[tuple[Namespace, Address]]:
|
||||
yield from capa.features.extractors.dotnetfile.extract_file_namespace_features(pe=pe)
|
||||
|
||||
|
||||
def extract_file_class_features(pe: dnfile.dnPE) -> Iterator[Tuple[Class, Address]]:
|
||||
def extract_file_class_features(pe: dnfile.dnPE) -> Iterator[tuple[Class, Address]]:
|
||||
yield from capa.features.extractors.dotnetfile.extract_file_class_features(pe=pe)
|
||||
|
||||
|
||||
def extract_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(pe: dnfile.dnPE) -> Iterator[tuple[Feature, Address]]:
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, address in file_handler(pe):
|
||||
yield feature, address
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
from capa.features.common import Feature, Characteristic
|
||||
from capa.features.address import Address
|
||||
@@ -18,30 +18,30 @@ from capa.features.extractors.base_extractor import FunctionHandle
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_function_calls_to(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]:
|
||||
def extract_function_calls_to(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]:
|
||||
"""extract callers to a function"""
|
||||
for dest in fh.ctx["calls_to"]:
|
||||
yield Characteristic("calls to"), dest
|
||||
|
||||
|
||||
def extract_function_calls_from(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]:
|
||||
def extract_function_calls_from(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]:
|
||||
"""extract callers from a function"""
|
||||
for src in fh.ctx["calls_from"]:
|
||||
yield Characteristic("calls from"), src
|
||||
|
||||
|
||||
def extract_recursive_call(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]:
|
||||
def extract_recursive_call(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]:
|
||||
"""extract recursive function call"""
|
||||
if fh.address in fh.ctx["calls_to"]:
|
||||
yield Characteristic("recursive call"), fh.address
|
||||
|
||||
|
||||
def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]:
|
||||
def extract_function_loop(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]:
|
||||
"""extract loop indicators from a function"""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
for func_handler in FUNCTION_HANDLERS:
|
||||
for feature, addr in func_handler(fh):
|
||||
yield feature, addr
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Dict, Tuple, Union, Iterator, Optional
|
||||
from typing import Union, Iterator, Optional
|
||||
|
||||
import dnfile
|
||||
from dncil.cil.body import CilMethodBody
|
||||
@@ -144,7 +144,7 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]:
|
||||
)
|
||||
|
||||
|
||||
def get_dotnet_methoddef_property_accessors(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]:
|
||||
def get_dotnet_methoddef_property_accessors(pe: dnfile.dnPE) -> Iterator[tuple[int, str]]:
|
||||
"""get MethodDef methods used to access properties
|
||||
|
||||
see https://www.ntcore.com/files/dotnetformat.htm
|
||||
@@ -194,7 +194,7 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]:
|
||||
"""
|
||||
nested_class_table = get_dotnet_nested_class_table_index(pe)
|
||||
|
||||
accessor_map: Dict[int, str] = {}
|
||||
accessor_map: dict[int, str] = {}
|
||||
for methoddef, methoddef_access in get_dotnet_methoddef_property_accessors(pe):
|
||||
accessor_map[methoddef] = methoddef_access
|
||||
|
||||
@@ -252,7 +252,7 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]:
|
||||
yield DnType(token, typedefname, namespace=typedefnamespace, member=field.row.Name)
|
||||
|
||||
|
||||
def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[Tuple[int, CilMethodBody]]:
|
||||
def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[tuple[int, CilMethodBody]]:
|
||||
"""get managed methods from MethodDef table"""
|
||||
for rid, method_def in iter_dotnet_table(pe, dnfile.mdtable.MethodDef.number):
|
||||
assert isinstance(method_def, dnfile.mdtable.MethodDefRow)
|
||||
@@ -332,7 +332,7 @@ def get_dotnet_table_row(pe: dnfile.dnPE, table_index: int, row_index: int) -> O
|
||||
|
||||
def resolve_nested_typedef_name(
|
||||
nested_class_table: dict, index: int, typedef: dnfile.mdtable.TypeDefRow, pe: dnfile.dnPE
|
||||
) -> Tuple[str, Tuple[str, ...]]:
|
||||
) -> tuple[str, tuple[str, ...]]:
|
||||
"""Resolves all nested TypeDef class names. Returns the namespace as a str and the nested TypeRef name as a tuple"""
|
||||
|
||||
if index in nested_class_table:
|
||||
@@ -368,7 +368,7 @@ def resolve_nested_typedef_name(
|
||||
|
||||
def resolve_nested_typeref_name(
|
||||
index: int, typeref: dnfile.mdtable.TypeRefRow, pe: dnfile.dnPE
|
||||
) -> Tuple[str, Tuple[str, ...]]:
|
||||
) -> tuple[str, tuple[str, ...]]:
|
||||
"""Resolves all nested TypeRef class names. Returns the namespace as a str and the nested TypeRef name as a tuple"""
|
||||
# If the ResolutionScope decodes to a typeRef type then it is nested
|
||||
if isinstance(typeref.ResolutionScope.table, dnfile.mdtable.TypeRef):
|
||||
@@ -398,7 +398,7 @@ def resolve_nested_typeref_name(
|
||||
return str(typeref.TypeNamespace), (str(typeref.TypeName),)
|
||||
|
||||
|
||||
def get_dotnet_nested_class_table_index(pe: dnfile.dnPE) -> Dict[int, int]:
|
||||
def get_dotnet_nested_class_table_index(pe: dnfile.dnPE) -> dict[int, int]:
|
||||
"""Build index for EnclosingClass based off the NestedClass row index in the nestedclass table"""
|
||||
nested_class_table = {}
|
||||
|
||||
@@ -442,7 +442,7 @@ def is_dotnet_mixed_mode(pe: dnfile.dnPE) -> bool:
|
||||
return not bool(pe.net.Flags.CLR_ILONLY)
|
||||
|
||||
|
||||
def iter_dotnet_table(pe: dnfile.dnPE, table_index: int) -> Iterator[Tuple[int, dnfile.base.MDTableRow]]:
|
||||
def iter_dotnet_table(pe: dnfile.dnPE, table_index: int) -> Iterator[tuple[int, dnfile.base.MDTableRow]]:
|
||||
assert pe.net is not None
|
||||
assert pe.net.mdtables is not None
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Tuple, Union, Iterator, Optional
|
||||
from typing import TYPE_CHECKING, Union, Iterator, Optional
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from capa.features.extractors.dnfile.extractor import DnFileFeatureExtractorCache
|
||||
@@ -61,7 +61,7 @@ def get_callee(
|
||||
return callee
|
||||
|
||||
|
||||
def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""parse instruction API features"""
|
||||
if ih.inner.opcode not in (
|
||||
OpCodes.Call,
|
||||
@@ -83,7 +83,7 @@ def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterato
|
||||
yield API(name), ih.address
|
||||
|
||||
|
||||
def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""parse instruction property features"""
|
||||
name: Optional[str] = None
|
||||
access: Optional[str] = None
|
||||
@@ -118,7 +118,7 @@ def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> It
|
||||
|
||||
def extract_insn_namespace_class_features(
|
||||
fh: FunctionHandle, bh, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Union[Namespace, Class], Address]]:
|
||||
) -> Iterator[tuple[Union[Namespace, Class], Address]]:
|
||||
"""parse instruction namespace and class features"""
|
||||
type_: Optional[Union[DnType, DnUnmanagedMethod]] = None
|
||||
|
||||
@@ -173,13 +173,13 @@ def extract_insn_namespace_class_features(
|
||||
yield Namespace(type_.namespace), ih.address
|
||||
|
||||
|
||||
def extract_insn_number_features(fh, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_number_features(fh, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""parse instruction number features"""
|
||||
if ih.inner.is_ldc():
|
||||
yield Number(ih.inner.get_ldc()), ih.address
|
||||
|
||||
|
||||
def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""parse instruction string features"""
|
||||
if not ih.inner.is_ldstr():
|
||||
return
|
||||
@@ -197,7 +197,7 @@ def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iter
|
||||
|
||||
def extract_unmanaged_call_characteristic_features(
|
||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Characteristic, Address]]:
|
||||
) -> Iterator[tuple[Characteristic, Address]]:
|
||||
if ih.inner.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp):
|
||||
return
|
||||
|
||||
@@ -209,7 +209,7 @@ def extract_unmanaged_call_characteristic_features(
|
||||
yield Characteristic("unmanaged call"), ih.address
|
||||
|
||||
|
||||
def extract_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract instruction features"""
|
||||
for inst_handler in INSTRUCTION_HANDLERS:
|
||||
for feature, addr in inst_handler(fh, bbh, ih):
|
||||
|
||||
@@ -6,17 +6,17 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
from typing import Tuple, Optional
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class DnType:
|
||||
def __init__(
|
||||
self, token: int, class_: Tuple[str, ...], namespace: str = "", member: str = "", access: Optional[str] = None
|
||||
self, token: int, class_: tuple[str, ...], namespace: str = "", member: str = "", access: Optional[str] = None
|
||||
):
|
||||
self.token: int = token
|
||||
self.access: Optional[str] = access
|
||||
self.namespace: str = namespace
|
||||
self.class_: Tuple[str, ...] = class_
|
||||
self.class_: tuple[str, ...] = class_
|
||||
|
||||
if member == ".ctor":
|
||||
member = "ctor"
|
||||
@@ -44,7 +44,7 @@ class DnType:
|
||||
return str(self)
|
||||
|
||||
@staticmethod
|
||||
def format_name(class_: Tuple[str, ...], namespace: str = "", member: str = ""):
|
||||
def format_name(class_: tuple[str, ...], namespace: str = "", member: str = ""):
|
||||
if len(class_) > 1:
|
||||
class_str = "/".join(class_) # Concat items in tuple, separated by a "/"
|
||||
else:
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
from pathlib import Path
|
||||
|
||||
import dnfile
|
||||
@@ -48,12 +48,12 @@ from capa.features.extractors.dnfile.helpers import (
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_file_format(**kwargs) -> Iterator[Tuple[Format, Address]]:
|
||||
def extract_file_format(**kwargs) -> Iterator[tuple[Format, Address]]:
|
||||
yield Format(FORMAT_DOTNET), NO_ADDRESS
|
||||
yield Format(FORMAT_PE), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Import, Address]]:
|
||||
def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Import, Address]]:
|
||||
for method in get_dotnet_managed_imports(pe):
|
||||
# like System.IO.File::OpenRead
|
||||
yield Import(str(method)), DNTokenAddress(method.token)
|
||||
@@ -64,12 +64,12 @@ def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Impor
|
||||
yield Import(name), DNTokenAddress(imp.token)
|
||||
|
||||
|
||||
def extract_file_function_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[FunctionName, Address]]:
|
||||
def extract_file_function_names(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[FunctionName, Address]]:
|
||||
for method in get_dotnet_managed_methods(pe):
|
||||
yield FunctionName(str(method)), DNTokenAddress(method.token)
|
||||
|
||||
|
||||
def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Namespace, Address]]:
|
||||
def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Namespace, Address]]:
|
||||
"""emit namespace features from TypeRef and TypeDef tables"""
|
||||
|
||||
# namespaces may be referenced multiple times, so we need to filter
|
||||
@@ -93,7 +93,7 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple
|
||||
yield Namespace(namespace), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Class, Address]]:
|
||||
def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Class, Address]]:
|
||||
"""emit class features from TypeRef and TypeDef tables"""
|
||||
nested_class_table = get_dotnet_nested_class_table_index(pe)
|
||||
|
||||
@@ -116,11 +116,11 @@ def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Cla
|
||||
yield Class(DnType.format_name(typerefname, namespace=typerefnamespace)), DNTokenAddress(token)
|
||||
|
||||
|
||||
def extract_file_os(**kwargs) -> Iterator[Tuple[OS, Address]]:
|
||||
def extract_file_os(**kwargs) -> Iterator[tuple[OS, Address]]:
|
||||
yield OS(OS_ANY), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Arch, Address]]:
|
||||
def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Arch, Address]]:
|
||||
# to distinguish in more detail, see https://stackoverflow.com/a/23614024/10548020
|
||||
# .NET 4.5 added option: any CPU, 32-bit preferred
|
||||
assert pe.net is not None
|
||||
@@ -134,18 +134,18 @@ def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Arch, Address
|
||||
yield Arch(ARCH_ANY), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_strings(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[String, Address]]:
|
||||
def extract_file_strings(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[String, Address]]:
|
||||
yield from capa.features.extractors.common.extract_file_strings(pe.__data__)
|
||||
|
||||
|
||||
def extract_file_mixed_mode_characteristic_features(
|
||||
pe: dnfile.dnPE, **kwargs
|
||||
) -> Iterator[Tuple[Characteristic, Address]]:
|
||||
) -> Iterator[tuple[Characteristic, Address]]:
|
||||
if is_dotnet_mixed_mode(pe):
|
||||
yield Characteristic("mixed mode"), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_features(pe: dnfile.dnPE) -> Iterator[tuple[Feature, Address]]:
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, addr in file_handler(pe=pe): # type: ignore
|
||||
yield feature, addr
|
||||
@@ -162,7 +162,7 @@ FILE_HANDLERS = (
|
||||
)
|
||||
|
||||
|
||||
def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_global_features(pe: dnfile.dnPE) -> Iterator[tuple[Feature, Address]]:
|
||||
for handler in GLOBAL_HANDLERS:
|
||||
for feature, va in handler(pe=pe): # type: ignore
|
||||
yield feature, va
|
||||
@@ -204,7 +204,7 @@ class DotnetFileFeatureExtractor(StaticFeatureExtractor):
|
||||
def is_mixed_mode(self) -> bool:
|
||||
return is_dotnet_mixed_mode(self.pe)
|
||||
|
||||
def get_runtime_version(self) -> Tuple[int, int]:
|
||||
def get_runtime_version(self) -> tuple[int, int]:
|
||||
assert self.pe.net is not None
|
||||
assert self.pe.net.struct is not None
|
||||
assert self.pe.net.struct.MajorRuntimeVersion is not None
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import capa.features.extractors.helpers
|
||||
from capa.features.insn import API, Number
|
||||
@@ -19,7 +19,7 @@ from capa.features.extractors.drakvuf.models import Call
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
This method extracts the given call's features (such as API name and arguments),
|
||||
and returns them as API, Number, and String features.
|
||||
@@ -49,7 +49,7 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -
|
||||
yield API(name), ch.address
|
||||
|
||||
|
||||
def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
for handler in CALL_HANDLERS:
|
||||
for feature, addr in handler(ph, th, ch):
|
||||
yield feature, addr
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import Dict, List, Tuple, Union, Iterator
|
||||
from typing import Union, Iterator
|
||||
|
||||
import capa.features.extractors.drakvuf.call
|
||||
import capa.features.extractors.drakvuf.file
|
||||
@@ -39,7 +39,7 @@ class DrakvufExtractor(DynamicFeatureExtractor):
|
||||
self.report: DrakvufReport = report
|
||||
|
||||
# sort the api calls to prevent going through the entire list each time
|
||||
self.sorted_calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]] = index_calls(report)
|
||||
self.sorted_calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]] = index_calls(report)
|
||||
|
||||
# pre-compute these because we'll yield them at *every* scope.
|
||||
self.global_features = list(capa.features.extractors.drakvuf.global_.extract_features(self.report))
|
||||
@@ -48,16 +48,16 @@ class DrakvufExtractor(DynamicFeatureExtractor):
|
||||
# DRAKVUF currently does not yield information about the PE's address
|
||||
return NO_ADDRESS
|
||||
|
||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from self.global_features
|
||||
|
||||
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.drakvuf.file.extract_features(self.report)
|
||||
|
||||
def get_processes(self) -> Iterator[ProcessHandle]:
|
||||
yield from capa.features.extractors.drakvuf.file.get_processes(self.sorted_calls)
|
||||
|
||||
def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.drakvuf.process.extract_features(ph)
|
||||
|
||||
def get_process_name(self, ph: ProcessHandle) -> str:
|
||||
@@ -66,7 +66,7 @@ class DrakvufExtractor(DynamicFeatureExtractor):
|
||||
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
||||
yield from capa.features.extractors.drakvuf.process.get_threads(self.sorted_calls, ph)
|
||||
|
||||
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
if False:
|
||||
# force this routine to be a generator,
|
||||
# but we don't actually have any elements to generate.
|
||||
@@ -87,10 +87,10 @@ class DrakvufExtractor(DynamicFeatureExtractor):
|
||||
|
||||
def extract_call_features(
|
||||
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.drakvuf.call.extract_features(ph, th, ch)
|
||||
|
||||
@classmethod
|
||||
def from_report(cls, report: Iterator[Dict]) -> "DrakvufExtractor":
|
||||
def from_report(cls, report: Iterator[dict]) -> "DrakvufExtractor":
|
||||
dr = DrakvufReport.from_raw_report(report)
|
||||
return DrakvufExtractor(report=dr)
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import Dict, List, Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
from capa.features.file import Import
|
||||
from capa.features.common import Feature
|
||||
@@ -19,7 +19,7 @@ from capa.features.extractors.drakvuf.models import Call, DrakvufReport
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_processes(calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]]) -> Iterator[ProcessHandle]:
|
||||
def get_processes(calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]]) -> Iterator[ProcessHandle]:
|
||||
"""
|
||||
Get all the created processes for a sample.
|
||||
"""
|
||||
@@ -28,7 +28,7 @@ def get_processes(calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]])
|
||||
yield ProcessHandle(proc_addr, inner={"process_name": sample_call.process_name})
|
||||
|
||||
|
||||
def extract_import_names(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_import_names(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
Extract imported function names.
|
||||
"""
|
||||
@@ -43,7 +43,7 @@ def extract_import_names(report: DrakvufReport) -> Iterator[Tuple[Feature, Addre
|
||||
yield Import(name), AbsoluteVirtualAddress(function_address)
|
||||
|
||||
|
||||
def extract_features(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
|
||||
for handler in FILE_HANDLERS:
|
||||
for feature, addr in handler(report):
|
||||
yield feature, addr
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
from capa.features.common import OS, FORMAT_PE, ARCH_AMD64, OS_WINDOWS, Arch, Format, Feature
|
||||
from capa.features.address import NO_ADDRESS, Address
|
||||
@@ -16,22 +16,22 @@ from capa.features.extractors.drakvuf.models import DrakvufReport
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_format(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_format(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
|
||||
# DRAKVUF sandbox currently supports only Windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html
|
||||
yield Format(FORMAT_PE), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_os(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_os(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
|
||||
# DRAKVUF sandbox currently supports only PE files: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html
|
||||
yield OS(OS_WINDOWS), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_arch(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_arch(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
|
||||
# DRAKVUF sandbox currently supports only x64 Windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html
|
||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_features(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
|
||||
for global_handler in GLOBAL_HANDLER:
|
||||
for feature, addr in global_handler(report):
|
||||
yield feature, addr
|
||||
|
||||
@@ -7,16 +7,15 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import itertools
|
||||
from typing import Dict, List
|
||||
|
||||
from capa.features.address import ThreadAddress, ProcessAddress
|
||||
from capa.features.extractors.drakvuf.models import Call, DrakvufReport
|
||||
|
||||
|
||||
def index_calls(report: DrakvufReport) -> Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]]:
|
||||
def index_calls(report: DrakvufReport) -> dict[ProcessAddress, dict[ThreadAddress, list[Call]]]:
|
||||
# this method organizes calls into processes and threads, and then sorts them based on
|
||||
# timestamp so that we can address individual calls per index (CallAddress requires call index)
|
||||
result: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]] = {}
|
||||
result: dict[ProcessAddress, dict[ThreadAddress, list[Call]]] = {}
|
||||
for call in itertools.chain(report.syscalls, report.apicalls):
|
||||
if call.pid == 0:
|
||||
# DRAKVUF captures api/native calls from all processes running on the system.
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
from typing import Any, Dict, List, Iterator
|
||||
from typing import Any, Iterator
|
||||
|
||||
from pydantic import Field, BaseModel, ConfigDict, model_validator
|
||||
|
||||
@@ -47,7 +47,7 @@ class LoadedDLL(ConciseModel):
|
||||
plugin_name: str = Field(alias="Plugin")
|
||||
event: str = Field(alias="Event")
|
||||
name: str = Field(alias="DllName")
|
||||
imports: Dict[str, int] = Field(alias="Rva")
|
||||
imports: dict[str, int] = Field(alias="Rva")
|
||||
|
||||
|
||||
class Call(ConciseModel):
|
||||
@@ -58,18 +58,18 @@ class Call(ConciseModel):
|
||||
pid: int = Field(alias="PID")
|
||||
tid: int = Field(alias="TID")
|
||||
name: str = Field(alias="Method")
|
||||
arguments: Dict[str, str]
|
||||
arguments: dict[str, str]
|
||||
|
||||
|
||||
class WinApiCall(Call):
|
||||
# This class models Windows API calls captured by DRAKVUF (DLLs, etc.).
|
||||
arguments: Dict[str, str] = Field(alias="Arguments")
|
||||
arguments: dict[str, str] = Field(alias="Arguments")
|
||||
event: str = Field(alias="Event")
|
||||
return_value: str = Field(alias="ReturnValue")
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def build_arguments(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
||||
def build_arguments(cls, values: dict[str, Any]) -> dict[str, Any]:
|
||||
args = values["Arguments"]
|
||||
values["Arguments"] = dict(arg.split("=", 1) for arg in args)
|
||||
return values
|
||||
@@ -100,7 +100,7 @@ class SystemCall(Call):
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
||||
def build_extra(cls, values: dict[str, Any]) -> dict[str, Any]:
|
||||
# DRAKVUF stores argument names and values as entries in the syscall's entry.
|
||||
# This model validator collects those arguments into a list in the model.
|
||||
values["arguments"] = {
|
||||
@@ -110,13 +110,13 @@ class SystemCall(Call):
|
||||
|
||||
|
||||
class DrakvufReport(ConciseModel):
|
||||
syscalls: List[SystemCall] = []
|
||||
apicalls: List[WinApiCall] = []
|
||||
discovered_dlls: List[DiscoveredDLL] = []
|
||||
loaded_dlls: List[LoadedDLL] = []
|
||||
syscalls: list[SystemCall] = []
|
||||
apicalls: list[WinApiCall] = []
|
||||
discovered_dlls: list[DiscoveredDLL] = []
|
||||
loaded_dlls: list[LoadedDLL] = []
|
||||
|
||||
@classmethod
|
||||
def from_raw_report(cls, entries: Iterator[Dict]) -> "DrakvufReport":
|
||||
def from_raw_report(cls, entries: Iterator[dict]) -> "DrakvufReport":
|
||||
report = cls()
|
||||
|
||||
for entry in entries:
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import Dict, List, Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
from capa.features.common import String, Feature
|
||||
from capa.features.address import Address, ThreadAddress, ProcessAddress
|
||||
@@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_threads(
|
||||
calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]], ph: ProcessHandle
|
||||
calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]], ph: ProcessHandle
|
||||
) -> Iterator[ThreadHandle]:
|
||||
"""
|
||||
Get the threads associated with a given process.
|
||||
@@ -27,11 +27,11 @@ def get_threads(
|
||||
yield ThreadHandle(address=thread_addr, inner={})
|
||||
|
||||
|
||||
def extract_process_name(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_process_name(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
yield String(ph.inner["process_name"]), ph.address
|
||||
|
||||
|
||||
def extract_features(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
for handler in PROCESS_HANDLERS:
|
||||
for feature, addr in handler(ph):
|
||||
yield feature, addr
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import Dict, List, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
from capa.features.address import ThreadAddress, ProcessAddress, DynamicCallAddress
|
||||
from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle
|
||||
@@ -17,7 +17,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_calls(
|
||||
sorted_calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]], ph: ProcessHandle, th: ThreadHandle
|
||||
sorted_calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]], ph: ProcessHandle, th: ThreadHandle
|
||||
) -> Iterator[CallHandle]:
|
||||
for i, call in enumerate(sorted_calls[ph.address][th.address]):
|
||||
call_addr = DynamicCallAddress(thread=th.address, id=i)
|
||||
|
||||
@@ -10,7 +10,7 @@ import logging
|
||||
import itertools
|
||||
import collections
|
||||
from enum import Enum
|
||||
from typing import TYPE_CHECKING, Set, Dict, List, Tuple, BinaryIO, Iterator, Optional
|
||||
from typing import TYPE_CHECKING, BinaryIO, Iterator, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -394,7 +394,7 @@ class ELF:
|
||||
return read_cstr(phdr.buf, 0)
|
||||
|
||||
@property
|
||||
def versions_needed(self) -> Dict[str, Set[str]]:
|
||||
def versions_needed(self) -> dict[str, set[str]]:
|
||||
# symbol version requirements are stored in the .gnu.version_r section,
|
||||
# which has type SHT_GNU_verneed (0x6ffffffe).
|
||||
#
|
||||
@@ -452,7 +452,7 @@ class ELF:
|
||||
return {}
|
||||
|
||||
@property
|
||||
def dynamic_entries(self) -> Iterator[Tuple[int, int]]:
|
||||
def dynamic_entries(self) -> Iterator[tuple[int, int]]:
|
||||
"""
|
||||
read the entries from the dynamic section,
|
||||
yielding the tag and value for each entry.
|
||||
@@ -547,7 +547,7 @@ class ELF:
|
||||
logger.warning("failed to read DT_NEEDED entry: %s", str(e))
|
||||
|
||||
@property
|
||||
def symtab(self) -> Optional[Tuple[Shdr, Shdr]]:
|
||||
def symtab(self) -> Optional[tuple[Shdr, Shdr]]:
|
||||
"""
|
||||
fetch the Shdr for the symtab and the associated strtab.
|
||||
"""
|
||||
@@ -682,7 +682,7 @@ class SymTab:
|
||||
symtab: Shdr,
|
||||
strtab: Shdr,
|
||||
) -> None:
|
||||
self.symbols: List[Symbol] = []
|
||||
self.symbols: list[Symbol] = []
|
||||
|
||||
self.symtab = symtab
|
||||
self.strtab = strtab
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import io
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
from pathlib import Path
|
||||
|
||||
from elftools.elf.elffile import ELFFile, DynamicSegment, SymbolTableSection
|
||||
@@ -166,7 +166,7 @@ def extract_file_arch(elf: ELFFile, **kwargs):
|
||||
logger.warning("unsupported architecture: %s", arch)
|
||||
|
||||
|
||||
def extract_file_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature, int]]:
|
||||
def extract_file_features(elf: ELFFile, buf: bytes) -> Iterator[tuple[Feature, int]]:
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, addr in file_handler(elf=elf, buf=buf): # type: ignore
|
||||
yield feature, addr
|
||||
@@ -182,7 +182,7 @@ FILE_HANDLERS = (
|
||||
)
|
||||
|
||||
|
||||
def extract_global_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature, int]]:
|
||||
def extract_global_features(elf: ELFFile, buf: bytes) -> Iterator[tuple[Feature, int]]:
|
||||
for global_handler in GLOBAL_HANDLERS:
|
||||
for feature, addr in global_handler(elf=elf, buf=buf): # type: ignore
|
||||
yield feature, addr
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
import string
|
||||
import struct
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import ghidra
|
||||
from ghidra.program.model.lang import OperandType
|
||||
@@ -97,7 +97,7 @@ def _bb_has_tight_loop(bb: ghidra.program.model.block.CodeBlock):
|
||||
return False
|
||||
|
||||
|
||||
def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract stackstring indicators from basic block"""
|
||||
bb: ghidra.program.model.block.CodeBlock = bbh.inner
|
||||
|
||||
@@ -105,7 +105,7 @@ def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[
|
||||
yield Characteristic("stack string"), bbh.address
|
||||
|
||||
|
||||
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""check basic block for tight loop indicators"""
|
||||
bb: ghidra.program.model.block.CodeBlock = bbh.inner
|
||||
|
||||
@@ -119,7 +119,7 @@ BASIC_BLOCK_HANDLERS = (
|
||||
)
|
||||
|
||||
|
||||
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract features from the given basic block.
|
||||
|
||||
@@ -127,7 +127,7 @@ def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Featur
|
||||
bb: the basic block to process.
|
||||
|
||||
yields:
|
||||
Tuple[Feature, int]: the features and their location found in this basic block.
|
||||
tuple[Feature, int]: the features and their location found in this basic block.
|
||||
"""
|
||||
yield BasicBlock(), bbh.address
|
||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import List, Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import capa.features.extractors.ghidra.file
|
||||
import capa.features.extractors.ghidra.insn
|
||||
@@ -40,7 +40,7 @@ class GhidraFeatureExtractor(StaticFeatureExtractor):
|
||||
)
|
||||
)
|
||||
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
self.global_features: list[tuple[Feature, Address]] = []
|
||||
self.global_features.extend(capa.features.extractors.ghidra.file.extract_file_format())
|
||||
self.global_features.extend(capa.features.extractors.ghidra.global_.extract_os())
|
||||
self.global_features.extend(capa.features.extractors.ghidra.global_.extract_arch())
|
||||
@@ -73,7 +73,7 @@ class GhidraFeatureExtractor(StaticFeatureExtractor):
|
||||
func = getFunctionContaining(toAddr(addr)) # type: ignore [name-defined] # noqa: F821
|
||||
return FunctionHandle(address=AbsoluteVirtualAddress(func.getEntryPoint().getOffset()), inner=func)
|
||||
|
||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.ghidra.function.extract_features(fh)
|
||||
|
||||
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
||||
@@ -81,7 +81,7 @@ class GhidraFeatureExtractor(StaticFeatureExtractor):
|
||||
|
||||
yield from ghidra_helpers.get_function_blocks(fh)
|
||||
|
||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.ghidra.basicblock.extract_features(fh, bbh)
|
||||
|
||||
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import re
|
||||
import struct
|
||||
from typing import List, Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
from ghidra.program.model.symbol import SourceType, SymbolType
|
||||
|
||||
@@ -22,7 +22,7 @@ from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, Absolu
|
||||
MAX_OFFSET_PE_AFTER_MZ = 0x200
|
||||
|
||||
|
||||
def find_embedded_pe(block_bytez: bytes, mz_xor: List[Tuple[bytes, bytes, int]]) -> Iterator[Tuple[int, int]]:
|
||||
def find_embedded_pe(block_bytez: bytes, mz_xor: list[tuple[bytes, bytes, int]]) -> Iterator[tuple[int, int]]:
|
||||
"""check segment for embedded PE
|
||||
|
||||
adapted for Ghidra from:
|
||||
@@ -60,11 +60,11 @@ def find_embedded_pe(block_bytez: bytes, mz_xor: List[Tuple[bytes, bytes, int]])
|
||||
yield off, i
|
||||
|
||||
|
||||
def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_embedded_pe() -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract embedded PE features"""
|
||||
|
||||
# pre-compute XOR pairs
|
||||
mz_xor: List[Tuple[bytes, bytes, int]] = [
|
||||
mz_xor: list[tuple[bytes, bytes, int]] = [
|
||||
(
|
||||
capa.features.extractors.helpers.xor_static(b"MZ", i),
|
||||
capa.features.extractors.helpers.xor_static(b"PE", i),
|
||||
@@ -84,14 +84,14 @@ def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]:
|
||||
yield Characteristic("embedded pe"), FileOffsetAddress(ea)
|
||||
|
||||
|
||||
def extract_file_export_names() -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_export_names() -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract function exports"""
|
||||
st = currentProgram().getSymbolTable() # type: ignore [name-defined] # noqa: F821
|
||||
for addr in st.getExternalEntryPointIterator():
|
||||
yield Export(st.getPrimarySymbol(addr).getName()), AbsoluteVirtualAddress(addr.getOffset())
|
||||
|
||||
|
||||
def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_import_names() -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract function imports
|
||||
|
||||
1. imports by ordinal:
|
||||
@@ -116,14 +116,14 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
|
||||
yield Import(name), AbsoluteVirtualAddress(addr)
|
||||
|
||||
|
||||
def extract_file_section_names() -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_section_names() -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract section names"""
|
||||
|
||||
for block in currentProgram().getMemory().getBlocks(): # type: ignore [name-defined] # noqa: F821
|
||||
yield Section(block.getName()), AbsoluteVirtualAddress(block.getStart().getOffset())
|
||||
|
||||
|
||||
def extract_file_strings() -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_strings() -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract ASCII and UTF-16 LE strings"""
|
||||
|
||||
for block in currentProgram().getMemory().getBlocks(): # type: ignore [name-defined] # noqa: F821
|
||||
@@ -141,7 +141,7 @@ def extract_file_strings() -> Iterator[Tuple[Feature, Address]]:
|
||||
yield String(s.s), FileOffsetAddress(offset)
|
||||
|
||||
|
||||
def extract_file_function_names() -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_function_names() -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
extract the names of statically-linked library functions.
|
||||
"""
|
||||
@@ -162,7 +162,7 @@ def extract_file_function_names() -> Iterator[Tuple[Feature, Address]]:
|
||||
yield FunctionName(name[1:]), addr
|
||||
|
||||
|
||||
def extract_file_format() -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_file_format() -> Iterator[tuple[Feature, Address]]:
|
||||
ef = currentProgram().getExecutableFormat() # type: ignore [name-defined] # noqa: F821
|
||||
if "PE" in ef:
|
||||
yield Format(FORMAT_PE), NO_ADDRESS
|
||||
@@ -175,7 +175,7 @@ def extract_file_format() -> Iterator[Tuple[Feature, Address]]:
|
||||
raise NotImplementedError(f"unexpected file format: {ef}")
|
||||
|
||||
|
||||
def extract_features() -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features() -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract file features"""
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, addr in file_handler():
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import ghidra
|
||||
from ghidra.program.model.block import BasicBlockModel, SimpleBlockIterator
|
||||
@@ -49,7 +49,7 @@ def extract_recursive_call(fh: FunctionHandle):
|
||||
yield Characteristic("recursive call"), AbsoluteVirtualAddress(f.getEntryPoint().getOffset())
|
||||
|
||||
|
||||
def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
for func_handler in FUNCTION_HANDLERS:
|
||||
for feature, addr in func_handler(fh):
|
||||
yield feature, addr
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
import contextlib
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import capa.ghidra.helpers
|
||||
import capa.features.extractors.elf
|
||||
@@ -18,7 +18,7 @@ from capa.features.address import NO_ADDRESS, Address
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_os() -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_os() -> Iterator[tuple[Feature, Address]]:
|
||||
format_name: str = currentProgram().getExecutableFormat() # type: ignore [name-defined] # noqa: F821
|
||||
|
||||
if "PE" in format_name:
|
||||
@@ -45,7 +45,7 @@ def extract_os() -> Iterator[Tuple[Feature, Address]]:
|
||||
return
|
||||
|
||||
|
||||
def extract_arch() -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_arch() -> Iterator[tuple[Feature, Address]]:
|
||||
lang_id = currentProgram().getMetadata().get("Language ID") # type: ignore [name-defined] # noqa: F821
|
||||
|
||||
if "x86" in lang_id and "64" in lang_id:
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import Dict, List, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
import ghidra
|
||||
import java.lang
|
||||
@@ -20,7 +20,7 @@ from capa.features.address import AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
||||
|
||||
|
||||
def ints_to_bytes(bytez: List[int]) -> bytes:
|
||||
def ints_to_bytes(bytez: list[int]) -> bytes:
|
||||
"""convert Java signed ints to Python bytes
|
||||
|
||||
args:
|
||||
@@ -83,10 +83,10 @@ def get_insn_in_range(bbh: BBHandle) -> Iterator[InsnHandle]:
|
||||
yield InsnHandle(address=AbsoluteVirtualAddress(insn.getAddress().getOffset()), inner=insn)
|
||||
|
||||
|
||||
def get_file_imports() -> Dict[int, List[str]]:
|
||||
def get_file_imports() -> dict[int, list[str]]:
|
||||
"""get all import names & addrs"""
|
||||
|
||||
import_dict: Dict[int, List[str]] = {}
|
||||
import_dict: dict[int, list[str]] = {}
|
||||
|
||||
for f in currentProgram().getFunctionManager().getExternalFunctions(): # type: ignore [name-defined] # noqa: F821
|
||||
for r in f.getSymbol().getReferences():
|
||||
@@ -110,7 +110,7 @@ def get_file_imports() -> Dict[int, List[str]]:
|
||||
return import_dict
|
||||
|
||||
|
||||
def get_file_externs() -> Dict[int, List[str]]:
|
||||
def get_file_externs() -> dict[int, list[str]]:
|
||||
"""
|
||||
Gets function names & addresses of statically-linked library functions
|
||||
|
||||
@@ -124,7 +124,7 @@ def get_file_externs() -> Dict[int, List[str]]:
|
||||
- Note: See Symbol Table labels
|
||||
"""
|
||||
|
||||
extern_dict: Dict[int, List[str]] = {}
|
||||
extern_dict: dict[int, list[str]] = {}
|
||||
|
||||
for sym in currentProgram().getSymbolTable().getAllSymbols(True): # type: ignore [name-defined] # noqa: F821
|
||||
# .isExternal() misses more than this config for the function symbols
|
||||
@@ -143,7 +143,7 @@ def get_file_externs() -> Dict[int, List[str]]:
|
||||
return extern_dict
|
||||
|
||||
|
||||
def map_fake_import_addrs() -> Dict[int, List[int]]:
|
||||
def map_fake_import_addrs() -> dict[int, list[int]]:
|
||||
"""
|
||||
Map ghidra's fake import entrypoints to their
|
||||
real addresses
|
||||
@@ -162,7 +162,7 @@ def map_fake_import_addrs() -> Dict[int, List[int]]:
|
||||
- 0x473090 -> PTR_CreateServiceW_00473090
|
||||
- 'EXTERNAL:00000025' -> External Address (ghidra.program.model.address.SpecialAddress)
|
||||
"""
|
||||
fake_dict: Dict[int, List[int]] = {}
|
||||
fake_dict: dict[int, list[int]] = {}
|
||||
|
||||
for f in currentProgram().getFunctionManager().getExternalFunctions(): # type: ignore [name-defined] # noqa: F821
|
||||
for r in f.getSymbol().getReferences():
|
||||
@@ -174,9 +174,9 @@ def map_fake_import_addrs() -> Dict[int, List[int]]:
|
||||
|
||||
def check_addr_for_api(
|
||||
addr: ghidra.program.model.address.Address,
|
||||
fakes: Dict[int, List[int]],
|
||||
imports: Dict[int, List[str]],
|
||||
externs: Dict[int, List[str]],
|
||||
fakes: dict[int, list[int]],
|
||||
imports: dict[int, list[str]],
|
||||
externs: dict[int, list[str]],
|
||||
) -> bool:
|
||||
offset = addr.getOffset()
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import Any, Dict, Tuple, Iterator
|
||||
from typing import Any, Iterator
|
||||
|
||||
import ghidra
|
||||
from ghidra.program.model.lang import OperandType
|
||||
@@ -26,21 +26,21 @@ SECURITY_COOKIE_BYTES_DELTA = 0x40
|
||||
OPERAND_TYPE_DYNAMIC_ADDRESS = OperandType.DYNAMIC | OperandType.ADDRESS
|
||||
|
||||
|
||||
def get_imports(ctx: Dict[str, Any]) -> Dict[int, Any]:
|
||||
def get_imports(ctx: dict[str, Any]) -> dict[int, Any]:
|
||||
"""Populate the import cache for this context"""
|
||||
if "imports_cache" not in ctx:
|
||||
ctx["imports_cache"] = capa.features.extractors.ghidra.helpers.get_file_imports()
|
||||
return ctx["imports_cache"]
|
||||
|
||||
|
||||
def get_externs(ctx: Dict[str, Any]) -> Dict[int, Any]:
|
||||
def get_externs(ctx: dict[str, Any]) -> dict[int, Any]:
|
||||
"""Populate the externs cache for this context"""
|
||||
if "externs_cache" not in ctx:
|
||||
ctx["externs_cache"] = capa.features.extractors.ghidra.helpers.get_file_externs()
|
||||
return ctx["externs_cache"]
|
||||
|
||||
|
||||
def get_fakes(ctx: Dict[str, Any]) -> Dict[int, Any]:
|
||||
def get_fakes(ctx: dict[str, Any]) -> dict[int, Any]:
|
||||
"""Populate the fake import addrs cache for this context"""
|
||||
if "fakes_cache" not in ctx:
|
||||
ctx["fakes_cache"] = capa.features.extractors.ghidra.helpers.map_fake_import_addrs()
|
||||
@@ -48,7 +48,7 @@ def get_fakes(ctx: Dict[str, Any]) -> Dict[int, Any]:
|
||||
|
||||
|
||||
def check_for_api_call(
|
||||
insn, externs: Dict[int, Any], fakes: Dict[int, Any], imports: Dict[int, Any], imp_or_ex: bool
|
||||
insn, externs: dict[int, Any], fakes: dict[int, Any], imports: dict[int, Any], imp_or_ex: bool
|
||||
) -> Iterator[Any]:
|
||||
"""check instruction for API call
|
||||
|
||||
@@ -110,7 +110,7 @@ def check_for_api_call(
|
||||
yield info
|
||||
|
||||
|
||||
def extract_insn_api_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_api_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
||||
|
||||
if not capa.features.extractors.ghidra.helpers.is_call_or_jmp(insn):
|
||||
@@ -131,7 +131,7 @@ def extract_insn_api_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle)
|
||||
yield API(ext), ih.address
|
||||
|
||||
|
||||
def extract_insn_number_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_number_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction number features
|
||||
example:
|
||||
@@ -186,7 +186,7 @@ def extract_insn_number_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl
|
||||
yield OperandOffset(i, const), addr
|
||||
|
||||
|
||||
def extract_insn_offset_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_offset_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction structure offset features
|
||||
|
||||
@@ -219,7 +219,7 @@ def extract_insn_offset_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl
|
||||
yield OperandOffset(i, op_off), ih.address
|
||||
|
||||
|
||||
def extract_insn_bytes_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_bytes_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse referenced byte sequences
|
||||
|
||||
@@ -234,7 +234,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||
yield Bytes(extracted_bytes), ih.address
|
||||
|
||||
|
||||
def extract_insn_string_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
def extract_insn_string_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction string features
|
||||
|
||||
@@ -249,7 +249,7 @@ def extract_insn_string_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl
|
||||
|
||||
def extract_insn_mnemonic_features(
|
||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""parse instruction mnemonic features"""
|
||||
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
||||
|
||||
@@ -258,7 +258,7 @@ def extract_insn_mnemonic_features(
|
||||
|
||||
def extract_insn_obfs_call_plus_5_characteristic_features(
|
||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
parse call $+5 instruction from the given instruction.
|
||||
"""
|
||||
@@ -279,7 +279,7 @@ def extract_insn_obfs_call_plus_5_characteristic_features(
|
||||
|
||||
def extract_insn_segment_access_features(
|
||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""parse instruction fs or gs access"""
|
||||
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
||||
|
||||
@@ -294,7 +294,7 @@ def extract_insn_segment_access_features(
|
||||
|
||||
def extract_insn_peb_access_characteristic_features(
|
||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""parse instruction peb access
|
||||
|
||||
fs:[0x30] on x86, gs:[0x60] on x64
|
||||
@@ -310,7 +310,7 @@ def extract_insn_peb_access_characteristic_features(
|
||||
|
||||
def extract_insn_cross_section_cflow(
|
||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""inspect the instruction for a CALL or JMP that crosses section boundaries"""
|
||||
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
||||
|
||||
@@ -364,7 +364,7 @@ def extract_function_calls_from(
|
||||
fh: FunctionHandle,
|
||||
bb: BBHandle,
|
||||
ih: InsnHandle,
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract functions calls from features
|
||||
|
||||
most relevant at the function scope, however, its most efficient to extract at the instruction scope
|
||||
@@ -393,7 +393,7 @@ def extract_function_indirect_call_characteristic_features(
|
||||
fh: FunctionHandle,
|
||||
bb: BBHandle,
|
||||
ih: InsnHandle,
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
|
||||
does not include calls like => call ds:dword_ABD4974
|
||||
|
||||
@@ -442,7 +442,7 @@ def extract_insn_nzxor_characteristic_features(
|
||||
fh: FunctionHandle,
|
||||
bb: BBHandle,
|
||||
ih: InsnHandle,
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
f: ghidra.program.database.function.FunctionDB = fh.inner
|
||||
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
||||
|
||||
@@ -461,7 +461,7 @@ def extract_features(
|
||||
fh: FunctionHandle,
|
||||
bb: BBHandle,
|
||||
insn: InsnHandle,
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
) -> Iterator[tuple[Feature, Address]]:
|
||||
for insn_handler in INSTRUCTION_HANDLERS:
|
||||
for feature, addr in insn_handler(fh, bb, insn):
|
||||
yield feature, addr
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
import struct
|
||||
import builtins
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Iterator
|
||||
|
||||
MIN_STACKSTRING_LEN = 8
|
||||
|
||||
@@ -119,7 +119,7 @@ def twos_complement(val: int, bits: int) -> int:
|
||||
return val
|
||||
|
||||
|
||||
def carve_pe(pbytes: bytes, offset: int = 0) -> Iterator[Tuple[int, int]]:
|
||||
def carve_pe(pbytes: bytes, offset: int = 0) -> Iterator[tuple[int, int]]:
|
||||
"""
|
||||
Generate (offset, key) tuples of embedded PEs
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user