mirror of
https://github.com/mandiant/capa.git
synced 2025-12-16 09:30:46 -08:00
Compare commits
1 Commits
codecut
...
wb/library
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
03ce40e781 |
@@ -1,6 +1,6 @@
|
|||||||
# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.233.0/containers/python-3/.devcontainer/base.Dockerfile
|
# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.233.0/containers/python-3/.devcontainer/base.Dockerfile
|
||||||
|
|
||||||
# [Choice] Python version (use -bullseye variants on local arm64/Apple Silicon): 3, 3.10, 3-bullseye, 3.10-bullseye, 3-buster, 3.10-buster, etc.
|
# [Choice] Python version (use -bullseye variants on local arm64/Apple Silicon): 3, 3.10, 3.9, 3.8, 3.7, 3.6, 3-bullseye, 3.10-bullseye, 3.9-bullseye, 3.8-bullseye, 3.7-bullseye, 3.6-bullseye, 3-buster, 3.10-buster, 3.9-buster, 3.8-buster, 3.7-buster, 3.6-buster
|
||||||
ARG VARIANT="3.10-bullseye"
|
ARG VARIANT="3.10-bullseye"
|
||||||
FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT}
|
FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT}
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
"dockerfile": "Dockerfile",
|
"dockerfile": "Dockerfile",
|
||||||
"context": "..",
|
"context": "..",
|
||||||
"args": {
|
"args": {
|
||||||
// Update 'VARIANT' to pick a Python version: 3, 3.10, etc.
|
// Update 'VARIANT' to pick a Python version: 3, 3.10, 3.9, 3.8, 3.7, 3.6
|
||||||
// Append -bullseye or -buster to pin to an OS version.
|
// Append -bullseye or -buster to pin to an OS version.
|
||||||
// Use -bullseye variants on local on arm64/Apple Silicon.
|
// Use -bullseye variants on local on arm64/Apple Silicon.
|
||||||
"VARIANT": "3.10",
|
"VARIANT": "3.10",
|
||||||
|
|||||||
13
.github/workflows/build.yml
vendored
13
.github/workflows/build.yml
vendored
@@ -21,25 +21,26 @@ jobs:
|
|||||||
# set to false for debugging
|
# set to false for debugging
|
||||||
fail-fast: true
|
fail-fast: true
|
||||||
matrix:
|
matrix:
|
||||||
|
# using Python 3.8 to support running across multiple operating systems including Windows 7
|
||||||
include:
|
include:
|
||||||
- os: ubuntu-20.04
|
- os: ubuntu-20.04
|
||||||
# use old linux so that the shared library versioning is more portable
|
# use old linux so that the shared library versioning is more portable
|
||||||
artifact_name: capa
|
artifact_name: capa
|
||||||
asset_name: linux
|
asset_name: linux
|
||||||
python_version: '3.10'
|
python_version: 3.8
|
||||||
- os: ubuntu-20.04
|
- os: ubuntu-20.04
|
||||||
artifact_name: capa
|
artifact_name: capa
|
||||||
asset_name: linux-py312
|
asset_name: linux-py312
|
||||||
python_version: '3.12'
|
python_version: 3.12
|
||||||
- os: windows-2019
|
- os: windows-2019
|
||||||
artifact_name: capa.exe
|
artifact_name: capa.exe
|
||||||
asset_name: windows
|
asset_name: windows
|
||||||
python_version: '3.10'
|
python_version: 3.8
|
||||||
- os: macos-13
|
- os: macos-12
|
||||||
# use older macOS for assumed better portability
|
# use older macOS for assumed better portability
|
||||||
artifact_name: capa
|
artifact_name: capa
|
||||||
asset_name: macos
|
asset_name: macos
|
||||||
python_version: '3.10'
|
python_version: 3.8
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout capa
|
- name: Checkout capa
|
||||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||||
@@ -106,7 +107,7 @@ jobs:
|
|||||||
# upload zipped binaries to Release page
|
# upload zipped binaries to Release page
|
||||||
if: github.event_name == 'release'
|
if: github.event_name == 'release'
|
||||||
name: zip and upload ${{ matrix.asset_name }}
|
name: zip and upload ${{ matrix.asset_name }}
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-20.04
|
||||||
needs: [build]
|
needs: [build]
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
|
|||||||
7
.github/workflows/changelog.yml
vendored
7
.github/workflows/changelog.yml
vendored
@@ -13,11 +13,8 @@ permissions:
|
|||||||
jobs:
|
jobs:
|
||||||
check_changelog:
|
check_changelog:
|
||||||
# no need to check for dependency updates via dependabot
|
# no need to check for dependency updates via dependabot
|
||||||
# github.event.pull_request.user.login refers to PR author
|
if: github.actor != 'dependabot[bot]' && github.actor != 'dependabot-preview[bot]'
|
||||||
if: |
|
runs-on: ubuntu-20.04
|
||||||
github.event.pull_request.user.login != 'dependabot[bot]' &&
|
|
||||||
github.event.pull_request.user.login != 'dependabot-preview[bot]'
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
env:
|
env:
|
||||||
NO_CHANGELOG: '[x] No CHANGELOG update needed'
|
NO_CHANGELOG: '[x] No CHANGELOG update needed'
|
||||||
steps:
|
steps:
|
||||||
|
|||||||
2
.github/workflows/publish.yml
vendored
2
.github/workflows/publish.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
|||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
|
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
|
||||||
with:
|
with:
|
||||||
python-version: '3.10'
|
python-version: '3.8'
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
|
|||||||
2
.github/workflows/tag.yml
vendored
2
.github/workflows/tag.yml
vendored
@@ -9,7 +9,7 @@ permissions: read-all
|
|||||||
jobs:
|
jobs:
|
||||||
tag:
|
tag:
|
||||||
name: Tag capa rules
|
name: Tag capa rules
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-20.04
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout capa-rules
|
- name: Checkout capa-rules
|
||||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||||
|
|||||||
30
.github/workflows/tests.yml
vendored
30
.github/workflows/tests.yml
vendored
@@ -26,7 +26,7 @@ env:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
changelog_format:
|
changelog_format:
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-20.04
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout capa
|
- name: Checkout capa
|
||||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||||
@@ -37,15 +37,15 @@ jobs:
|
|||||||
if [ $number != 1 ]; then exit 1; fi
|
if [ $number != 1 ]; then exit 1; fi
|
||||||
|
|
||||||
code_style:
|
code_style:
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-20.04
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout capa
|
- name: Checkout capa
|
||||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||||
# use latest available python to take advantage of best performance
|
# use latest available python to take advantage of best performance
|
||||||
- name: Set up Python 3.12
|
- name: Set up Python 3.11
|
||||||
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
|
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
|
||||||
with:
|
with:
|
||||||
python-version: "3.12"
|
python-version: "3.11"
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
@@ -64,16 +64,16 @@ jobs:
|
|||||||
run: pre-commit run deptry --hook-stage manual
|
run: pre-commit run deptry --hook-stage manual
|
||||||
|
|
||||||
rule_linter:
|
rule_linter:
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-20.04
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout capa with submodules
|
- name: Checkout capa with submodules
|
||||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||||
with:
|
with:
|
||||||
submodules: recursive
|
submodules: recursive
|
||||||
- name: Set up Python 3.12
|
- name: Set up Python 3.11
|
||||||
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
|
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
|
||||||
with:
|
with:
|
||||||
python-version: "3.12"
|
python-version: "3.11"
|
||||||
- name: Install capa
|
- name: Install capa
|
||||||
run: |
|
run: |
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
@@ -88,17 +88,17 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
os: [ubuntu-20.04, windows-2019, macos-13]
|
os: [ubuntu-20.04, windows-2019, macos-12]
|
||||||
# across all operating systems
|
# across all operating systems
|
||||||
python-version: ["3.10", "3.11"]
|
python-version: ["3.8", "3.11"]
|
||||||
include:
|
include:
|
||||||
# on Ubuntu run these as well
|
# on Ubuntu run these as well
|
||||||
|
- os: ubuntu-20.04
|
||||||
|
python-version: "3.8"
|
||||||
|
- os: ubuntu-20.04
|
||||||
|
python-version: "3.9"
|
||||||
- os: ubuntu-20.04
|
- os: ubuntu-20.04
|
||||||
python-version: "3.10"
|
python-version: "3.10"
|
||||||
- os: ubuntu-20.04
|
|
||||||
python-version: "3.11"
|
|
||||||
- os: ubuntu-20.04
|
|
||||||
python-version: "3.12"
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout capa with submodules
|
- name: Checkout capa with submodules
|
||||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||||
@@ -131,7 +131,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
python-version: ["3.10", "3.11"]
|
python-version: ["3.9", "3.11"]
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout capa with submodules
|
- name: Checkout capa with submodules
|
||||||
# do only run if BN_SERIAL is available, have to do this in every step, see https://github.com/orgs/community/discussions/26726#discussioncomment-3253118
|
# do only run if BN_SERIAL is available, have to do this in every step, see https://github.com/orgs/community/discussions/26726#discussioncomment-3253118
|
||||||
@@ -173,7 +173,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
python-version: ["3.10", "3.11"]
|
python-version: ["3.8", "3.11"]
|
||||||
java-version: ["17"]
|
java-version: ["17"]
|
||||||
ghidra-version: ["11.0.1"]
|
ghidra-version: ["11.0.1"]
|
||||||
public-version: ["PUBLIC_20240130"] # for ghidra releases
|
public-version: ["PUBLIC_20240130"] # for ghidra releases
|
||||||
|
|||||||
103
.github/workflows/web-release.yml
vendored
103
.github/workflows/web-release.yml
vendored
@@ -1,103 +0,0 @@
|
|||||||
name: create web release
|
|
||||||
on:
|
|
||||||
workflow_dispatch:
|
|
||||||
inputs:
|
|
||||||
version:
|
|
||||||
description: 'Version number for the release (x.x.x)'
|
|
||||||
required: true
|
|
||||||
type: string
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
run-tests:
|
|
||||||
uses: ./.github/workflows/web-tests.yml
|
|
||||||
|
|
||||||
build-and-release:
|
|
||||||
needs: run-tests
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Set release name
|
|
||||||
run: echo "RELEASE_NAME=capa-explorer-web-v${{ github.event.inputs.version }}-${GITHUB_SHA::7}" >> $GITHUB_ENV
|
|
||||||
|
|
||||||
- name: Check if release already exists
|
|
||||||
run: |
|
|
||||||
if ls web/explorer/releases/capa-explorer-web-v${{ github.event.inputs.version }}-* 1> /dev/null 2>&1; then
|
|
||||||
echo "::error:: A release with version ${{ github.event.inputs.version }} already exists"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Set up Node.js
|
|
||||||
uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4
|
|
||||||
with:
|
|
||||||
node-version: 20
|
|
||||||
cache: 'npm'
|
|
||||||
cache-dependency-path: 'web/explorer/package-lock.json'
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
run: npm ci
|
|
||||||
working-directory: web/explorer
|
|
||||||
|
|
||||||
- name: Build offline bundle
|
|
||||||
run: npm run build:bundle
|
|
||||||
working-directory: web/explorer
|
|
||||||
|
|
||||||
- name: Compress bundle
|
|
||||||
run: zip -r ${{ env.RELEASE_NAME }}.zip capa-explorer-web
|
|
||||||
working-directory: web/explorer
|
|
||||||
|
|
||||||
- name: Create releases directory
|
|
||||||
run: mkdir -vp web/explorer/releases
|
|
||||||
|
|
||||||
- name: Move release to releases folder
|
|
||||||
run: mv web/explorer/${{ env.RELEASE_NAME }}.zip web/explorer/releases
|
|
||||||
|
|
||||||
- name: Compute release SHA256 hash
|
|
||||||
run: |
|
|
||||||
echo "RELEASE_SHA256=$(sha256sum web/explorer/releases/${{ env.RELEASE_NAME }}.zip | awk '{print $1}')" >> $GITHUB_ENV
|
|
||||||
|
|
||||||
- name: Update CHANGELOG.md
|
|
||||||
run: |
|
|
||||||
echo "## ${{ env.RELEASE_NAME }}" >> web/explorer/releases/CHANGELOG.md
|
|
||||||
echo "- Release Date: $(date -u '+%Y-%m-%d %H:%M:%S UTC')" >> web/explorer/releases/CHANGELOG.md
|
|
||||||
echo "- SHA256: ${{ env.RELEASE_SHA256 }}" >> web/explorer/releases/CHANGELOG.md
|
|
||||||
echo "" >> web/explorer/releases/CHANGELOG.md
|
|
||||||
cat web/explorer/releases/CHANGELOG.md
|
|
||||||
|
|
||||||
- name: Remove older releases
|
|
||||||
# keep only the latest 3 releases
|
|
||||||
run: ls -t capa-explorer-web-v*.zip | tail -n +4 | xargs -r rm --
|
|
||||||
working-directory: web/explorer/releases
|
|
||||||
|
|
||||||
- name: Stage release files
|
|
||||||
run: |
|
|
||||||
git config --local user.email "capa-dev@mandiant.com"
|
|
||||||
git config --local user.name "Capa Bot"
|
|
||||||
git add -f web/explorer/releases/${{ env.RELEASE_NAME }}.zip web/explorer/releases/CHANGELOG.md
|
|
||||||
git add -u web/explorer/releases/
|
|
||||||
|
|
||||||
- name: Create Pull Request
|
|
||||||
uses: peter-evans/create-pull-request@5e914681df9dc83aa4e4905692ca88beb2f9e91f # v7.0.5
|
|
||||||
with:
|
|
||||||
token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
title: "explorer web: add release v${{ github.event.inputs.version }}"
|
|
||||||
body: |
|
|
||||||
This PR adds a new capa Explorer Web release v${{ github.event.inputs.version }}.
|
|
||||||
|
|
||||||
Release details:
|
|
||||||
- Name: ${{ env.RELEASE_NAME }}
|
|
||||||
- SHA256: ${{ env.RELEASE_SHA256 }}
|
|
||||||
|
|
||||||
This release is generated by the [web release](https://github.com/mandiant/capa/actions/workflows/web-release.yml) workflow.
|
|
||||||
|
|
||||||
- [x] No CHANGELOG update needed
|
|
||||||
- [x] No new tests needed
|
|
||||||
- [x] No documentation update needed
|
|
||||||
commit-message: ":robot: explorer web: add release ${{ env.RELEASE_NAME }}"
|
|
||||||
branch: release/web-v${{ github.event.inputs.version }}
|
|
||||||
add-paths: web/explorer/releases/${{ env.RELEASE_NAME }}.zip
|
|
||||||
base: master
|
|
||||||
labels: webui
|
|
||||||
delete-branch: true
|
|
||||||
committer: Capa Bot <capa-dev@mandiant.com>
|
|
||||||
author: Capa Bot <capa-dev@mandiant.com>
|
|
||||||
13
.github/workflows/web-tests.yml
vendored
13
.github/workflows/web-tests.yml
vendored
@@ -1,11 +1,10 @@
|
|||||||
name: capa Explorer Web tests
|
name: Capa Explorer Web tests
|
||||||
|
|
||||||
on:
|
on:
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ master ]
|
branches: [ master ]
|
||||||
paths:
|
paths:
|
||||||
- 'web/explorer/**'
|
- 'web/explorer/**'
|
||||||
workflow_call: # this allows the workflow to be called by other workflows
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
test:
|
test:
|
||||||
@@ -24,20 +23,20 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
node-version: 20
|
node-version: 20
|
||||||
cache: 'npm'
|
cache: 'npm'
|
||||||
cache-dependency-path: 'web/explorer/package-lock.json'
|
cache-dependency-path: './web/explorer/package-lock.json'
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: npm ci
|
run: npm ci
|
||||||
working-directory: web/explorer
|
working-directory: ./web/explorer
|
||||||
|
|
||||||
- name: Lint
|
- name: Lint
|
||||||
run: npm run lint
|
run: npm run lint
|
||||||
working-directory: web/explorer
|
working-directory: ./web/explorer
|
||||||
|
|
||||||
- name: Format
|
- name: Format
|
||||||
run: npm run format:check
|
run: npm run format:check
|
||||||
working-directory: web/explorer
|
working-directory: ./web/explorer
|
||||||
|
|
||||||
- name: Run unit tests
|
- name: Run unit tests
|
||||||
run: npm run test
|
run: npm run test
|
||||||
working-directory: web/explorer
|
working-directory: ./web/explorer
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ repos:
|
|||||||
hooks:
|
hooks:
|
||||||
- id: isort
|
- id: isort
|
||||||
name: isort
|
name: isort
|
||||||
stages: [pre-commit, pre-push, manual]
|
stages: [commit, push, manual]
|
||||||
language: system
|
language: system
|
||||||
entry: isort
|
entry: isort
|
||||||
args:
|
args:
|
||||||
@@ -46,7 +46,7 @@ repos:
|
|||||||
hooks:
|
hooks:
|
||||||
- id: black
|
- id: black
|
||||||
name: black
|
name: black
|
||||||
stages: [pre-commit, pre-push, manual]
|
stages: [commit, push, manual]
|
||||||
language: system
|
language: system
|
||||||
entry: black
|
entry: black
|
||||||
args:
|
args:
|
||||||
@@ -64,7 +64,7 @@ repos:
|
|||||||
hooks:
|
hooks:
|
||||||
- id: ruff
|
- id: ruff
|
||||||
name: ruff
|
name: ruff
|
||||||
stages: [pre-commit, pre-push, manual]
|
stages: [commit, push, manual]
|
||||||
language: system
|
language: system
|
||||||
entry: ruff
|
entry: ruff
|
||||||
args:
|
args:
|
||||||
@@ -82,7 +82,7 @@ repos:
|
|||||||
hooks:
|
hooks:
|
||||||
- id: flake8
|
- id: flake8
|
||||||
name: flake8
|
name: flake8
|
||||||
stages: [pre-push, manual]
|
stages: [push, manual]
|
||||||
language: system
|
language: system
|
||||||
entry: flake8
|
entry: flake8
|
||||||
args:
|
args:
|
||||||
@@ -101,14 +101,13 @@ repos:
|
|||||||
hooks:
|
hooks:
|
||||||
- id: mypy
|
- id: mypy
|
||||||
name: mypy
|
name: mypy
|
||||||
stages: [pre-push, manual]
|
stages: [push, manual]
|
||||||
language: system
|
language: system
|
||||||
entry: mypy
|
entry: mypy
|
||||||
args:
|
args:
|
||||||
- "--check-untyped-defs"
|
- "--check-untyped-defs"
|
||||||
- "--ignore-missing-imports"
|
- "--ignore-missing-imports"
|
||||||
- "--config-file=.github/mypy/mypy.ini"
|
- "--config-file=.github/mypy/mypy.ini"
|
||||||
- "--enable-incomplete-feature=NewGenericSyntax"
|
|
||||||
- "capa/"
|
- "capa/"
|
||||||
- "scripts/"
|
- "scripts/"
|
||||||
- "tests/"
|
- "tests/"
|
||||||
@@ -120,7 +119,7 @@ repos:
|
|||||||
hooks:
|
hooks:
|
||||||
- id: deptry
|
- id: deptry
|
||||||
name: deptry
|
name: deptry
|
||||||
stages: [pre-push, manual]
|
stages: [push, manual]
|
||||||
language: system
|
language: system
|
||||||
entry: deptry .
|
entry: deptry .
|
||||||
always_run: true
|
always_run: true
|
||||||
|
|||||||
126
CHANGELOG.md
126
CHANGELOG.md
@@ -12,138 +12,18 @@
|
|||||||
|
|
||||||
### Bug Fixes
|
### Bug Fixes
|
||||||
|
|
||||||
- vmray: load more analysis archives @mr-tz
|
|
||||||
- dynamic: only check file limitations for static file formats @mr-tz
|
|
||||||
- vmray: skip non-printable strings @mike-hunhoff
|
|
||||||
|
|
||||||
### capa Explorer Web
|
|
||||||
|
|
||||||
### capa Explorer IDA Pro plugin
|
|
||||||
|
|
||||||
### Development
|
|
||||||
|
|
||||||
### Raw diffs
|
|
||||||
- [capa v8.0.1...master](https://github.com/mandiant/capa/compare/v8.0.1...master)
|
|
||||||
- [capa-rules v8.0.1...master](https://github.com/mandiant/capa-rules/compare/v8.0.1...master)
|
|
||||||
|
|
||||||
## v8.0.1
|
|
||||||
|
|
||||||
This point release fixes an issue with the IDAPython API to now handle IDA Pro 8.3, 8.4, and 9.0 correctly.
|
|
||||||
|
|
||||||
### Bug Fixes
|
|
||||||
|
|
||||||
- handle IDA 8.3/8.4 vs. 9.0 API change @mr-tz
|
|
||||||
|
|
||||||
### Raw diffs
|
|
||||||
- [capa v8.0.0...v8.0.1](https://github.com/mandiant/capa/compare/v8.0.0...v8.0.1)
|
|
||||||
- [capa-rules v8.0.0...v8.0.1](https://github.com/mandiant/capa-rules/compare/v8.0.0...v8.0.1)
|
|
||||||
|
|
||||||
## v8.0.0
|
|
||||||
|
|
||||||
capa version 8 adds support for IDA Pro 9.0 (and idalib). The release comes with various improvements and bug fixes for the Binary Ninja backend (including to load with database files) -- thanks to @xusheng6.
|
|
||||||
|
|
||||||
Additional bug fixes improve the dynamic and BinExport backends.
|
|
||||||
|
|
||||||
capa version 8 now requires Python 3.10 or newer.
|
|
||||||
|
|
||||||
Special thanks to @Tamir-K, @harshit-wadhwani, @jorik-utwente for their great contributions.
|
|
||||||
|
|
||||||
### New Features
|
|
||||||
|
|
||||||
- allow call as valid subscope for call scoped rules @mr-tz
|
|
||||||
- support loading and analyzing a Binary Ninja database #2496 @xusheng6
|
|
||||||
- vmray: record process command line details @mr-tz
|
|
||||||
|
|
||||||
### Breaking Changes
|
|
||||||
|
|
||||||
- remove support for Python 3.8 and use Python 3.10 as minimum now #1966 @mr-tz
|
|
||||||
|
|
||||||
### New Rules (54)
|
|
||||||
|
|
||||||
- nursery/get-shadow-password-file-entry-on-linux jonathanlepore@google.com
|
|
||||||
- nursery/set-shadow-password-file-entry-on-linux jonathanlepore@google.com
|
|
||||||
- collection/browser/get-chrome-cookiemonster still@teamt5.org
|
|
||||||
- collection/browser/get-elevation-service-for-chromium-based-browsers still@teamt5.org
|
|
||||||
- collection/get-steam-token still@teamt5.org
|
|
||||||
- nursery/persist-via-application-shimming j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-bits-job j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-print-processors-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- linking/static/touchsocket/linked-against-touchsocket still@teamt5.org
|
|
||||||
- runtime/dotnet/compiled-with-dotnet-aot still@teamt5.org
|
|
||||||
- nursery/persist-via-errorhandler-script j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-get-variable-hijack j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-iphlpapi-dll-hijack j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-lnk-shortcut j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-powershell-profile j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-windows-accessibility-tools j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-windows-terminal-profile j.j.vannielen@utwente.nl
|
|
||||||
- nursery/write-to-browser-extension-directory j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-aedebug-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-amsi-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-app-paths-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-appcertdlls-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-appx-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-autodialdll-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-autoplayhandlers-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-bootverificationprogram-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-code-signing-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-com-hijack j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-command-processor-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-contextmenuhandlers-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-cor_profiler_path-registry-value j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-default-file-association-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-disk-cleanup-handler-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-dotnet-dbgmanageddebugger-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-dotnet_startup_hooks-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-explorer-tools-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-filter-handlers-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-group-policy-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-hhctrl-com-hijack j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-htmlhelp-author-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-image-file-execution-options-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-lsa-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-natural-language-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-netsh-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-network-provider-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-path-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-print-monitors-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-rdp-startup-programs-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-silentprocessexit-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-telemetrycontroller-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-timeproviders-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-ts-initialprogram-registry-key j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-userinitmprlogonscript-registry-value j.j.vannielen@utwente.nl
|
|
||||||
- nursery/persist-via-windows-error-reporting-registry-key j.j.vannielen@utwente.nl
|
|
||||||
|
|
||||||
### Bug Fixes
|
|
||||||
|
|
||||||
- extractor: fix exception when PE extractor encounters unknown architecture #2440 @Tamir-K
|
- extractor: fix exception when PE extractor encounters unknown architecture #2440 @Tamir-K
|
||||||
- IDA Pro: rename ida to idapro module for plugin and idalib in IDA 9.0 #2453 @mr-tz
|
- IDA Pro: rename ida to idapro module for plugin and idalib in IDA 9.0 #2453 @mr-tz
|
||||||
- ghidra: fix saving of base address @mr-tz
|
|
||||||
- binja: support loading raw x86/x86_64 shellcode #2489 @xusheng6
|
|
||||||
- binja: fix crash when the IL of certain functions are not available. #2249 @xusheng6
|
|
||||||
- binja: major performance improvement on the binja extractor. #1414 @xusheng6
|
|
||||||
- cape: make Process model flexible and procmemory optional to load newest reports #2466 @mr-tz
|
|
||||||
- binja: fix unit test failure by fixing up the analysis for file al-khaser_x64.exe_ #2507 @xusheng6
|
|
||||||
- binja: move the stack string detection to function level #2516 @xusheng6
|
|
||||||
- BinExport2: fix handling of incorrect thunk functions #2524 @williballenthin
|
|
||||||
- BinExport2: more precise pruning of expressions @williballenthin
|
|
||||||
- BinExport2: better handle weird expression trees from Ghidra #2528 #2530 @williballenthin
|
|
||||||
|
|
||||||
### capa Explorer Web
|
### capa Explorer Web
|
||||||
|
|
||||||
### capa Explorer IDA Pro plugin
|
### capa Explorer IDA Pro plugin
|
||||||
|
|
||||||
- fix bug preventing saving of capa results via Save button @mr-tz
|
|
||||||
- fix saving of base address @mr-tz
|
|
||||||
|
|
||||||
### Development
|
### Development
|
||||||
- CI: use macos-13 since macos-12 is deprecated and will be removed on December 3rd, 2024 #2173 @mr-tz
|
|
||||||
- CI: update Binary Ninja version to 4.2 #2499 @xusheng6
|
|
||||||
|
|
||||||
### Raw diffs
|
### Raw diffs
|
||||||
- [capa v7.4.0...v8.0.0](https://github.com/mandiant/capa/compare/v7.4.0...v8.0.0)
|
- [capa v7.4.0...master](https://github.com/mandiant/capa/compare/v7.4.0...master)
|
||||||
- [capa-rules v7.4.0...v8.0.0](https://github.com/mandiant/capa-rules/compare/v7.4.0...v8.0.0)
|
- [capa-rules v7.4.0...master](https://github.com/mandiant/capa-rules/compare/v7.4.0...master)
|
||||||
|
|
||||||
## v7.4.0
|
## v7.4.0
|
||||||
|
|
||||||
@@ -299,8 +179,6 @@ Special thanks to our repeat and new contributors:
|
|||||||
- CI: update Binary Ninja version to 4.1 and use Python 3.9 to test it #2211 @xusheng6
|
- CI: update Binary Ninja version to 4.1 and use Python 3.9 to test it #2211 @xusheng6
|
||||||
- CI: update tests.yml workflow to exclude web and documentation files #2263 @s-ff
|
- CI: update tests.yml workflow to exclude web and documentation files #2263 @s-ff
|
||||||
- CI: update build.yml workflow to exclude web and documentation files #2270 @s-ff
|
- CI: update build.yml workflow to exclude web and documentation files #2270 @s-ff
|
||||||
- CI: add web releases workflow #2455 @s-ff
|
|
||||||
- CI: skip changelog.yml for dependabot PRs #2471
|
|
||||||
|
|
||||||
### Raw diffs
|
### Raw diffs
|
||||||
|
|
||||||
|
|||||||
14
README.md
14
README.md
@@ -38,9 +38,9 @@ Below you find a list of [our capa blog posts with more details.](#blog-posts)
|
|||||||
```
|
```
|
||||||
$ capa.exe suspicious.exe
|
$ capa.exe suspicious.exe
|
||||||
|
|
||||||
+--------------------+------------------------------------------------------------------------+
|
+------------------------+--------------------------------------------------------------------------------+
|
||||||
| ATT&CK Tactic | ATT&CK Technique |
|
| ATT&CK Tactic | ATT&CK Technique |
|
||||||
|--------------------+------------------------------------------------------------------------|
|
|------------------------+--------------------------------------------------------------------------------|
|
||||||
| DEFENSE EVASION | Obfuscated Files or Information [T1027] |
|
| DEFENSE EVASION | Obfuscated Files or Information [T1027] |
|
||||||
| DISCOVERY | Query Registry [T1012] |
|
| DISCOVERY | Query Registry [T1012] |
|
||||||
| | System Information Discovery [T1082] |
|
| | System Information Discovery [T1082] |
|
||||||
@@ -48,11 +48,12 @@ $ capa.exe suspicious.exe
|
|||||||
| | Shared Modules [T1129] |
|
| | Shared Modules [T1129] |
|
||||||
| EXFILTRATION | Exfiltration Over C2 Channel [T1041] |
|
| EXFILTRATION | Exfiltration Over C2 Channel [T1041] |
|
||||||
| PERSISTENCE | Create or Modify System Process::Windows Service [T1543.003] |
|
| PERSISTENCE | Create or Modify System Process::Windows Service [T1543.003] |
|
||||||
+--------------------+------------------------------------------------------------------------+
|
+------------------------+--------------------------------------------------------------------------------+
|
||||||
|
|
||||||
+-------------------------------------------+-------------------------------------------------+
|
+-------------------------------------------------------+-------------------------------------------------+
|
||||||
| CAPABILITY | NAMESPACE |
|
| CAPABILITY | NAMESPACE |
|
||||||
|-------------------------------------------+-------------------------------------------------|
|
|-------------------------------------------------------+-------------------------------------------------|
|
||||||
|
| check for OutputDebugString error | anti-analysis/anti-debugging/debugger-detection |
|
||||||
| read and send data from client to server | c2/file-transfer |
|
| read and send data from client to server | c2/file-transfer |
|
||||||
| execute shell command and capture output | c2/shell |
|
| execute shell command and capture output | c2/shell |
|
||||||
| receive data (2 matches) | communication |
|
| receive data (2 matches) | communication |
|
||||||
@@ -73,12 +74,13 @@ $ capa.exe suspicious.exe
|
|||||||
| print debug messages (2 matches) | host-interaction/log/debug/write-event |
|
| print debug messages (2 matches) | host-interaction/log/debug/write-event |
|
||||||
| resolve DNS | host-interaction/network/dns/resolve |
|
| resolve DNS | host-interaction/network/dns/resolve |
|
||||||
| get hostname | host-interaction/os/hostname |
|
| get hostname | host-interaction/os/hostname |
|
||||||
|
| create a process with modified I/O handles and window | host-interaction/process/create |
|
||||||
| create process | host-interaction/process/create |
|
| create process | host-interaction/process/create |
|
||||||
| create registry key | host-interaction/registry/create |
|
| create registry key | host-interaction/registry/create |
|
||||||
| create service | host-interaction/service/create |
|
| create service | host-interaction/service/create |
|
||||||
| create thread | host-interaction/thread/create |
|
| create thread | host-interaction/thread/create |
|
||||||
| persist via Windows service | persistence/service |
|
| persist via Windows service | persistence/service |
|
||||||
+-------------------------------------------+-------------------------------------------------+
|
+-------------------------------------------------------+-------------------------------------------------+
|
||||||
```
|
```
|
||||||
|
|
||||||
# download and usage
|
# download and usage
|
||||||
|
|||||||
@@ -1,38 +0,0 @@
|
|||||||
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
|
||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
|
||||||
|
|
||||||
from pydantic import BaseModel
|
|
||||||
|
|
||||||
import capa.features.extractors.ida.idalib as idalib
|
|
||||||
|
|
||||||
if not idalib.has_idalib():
|
|
||||||
raise RuntimeError("cannot find IDA idalib module.")
|
|
||||||
|
|
||||||
if not idalib.load_idalib():
|
|
||||||
raise RuntimeError("failed to load IDA idalib module.")
|
|
||||||
|
|
||||||
import idaapi
|
|
||||||
import idautils
|
|
||||||
|
|
||||||
|
|
||||||
class FunctionId(BaseModel):
|
|
||||||
va: int
|
|
||||||
is_library: bool
|
|
||||||
name: str
|
|
||||||
|
|
||||||
|
|
||||||
def get_flirt_matches(lib_only=True):
|
|
||||||
for fva in idautils.Functions():
|
|
||||||
f = idaapi.get_func(fva)
|
|
||||||
is_lib = bool(f.flags & idaapi.FUNC_LIB)
|
|
||||||
fname = idaapi.get_func_name(fva)
|
|
||||||
|
|
||||||
if lib_only and not is_lib:
|
|
||||||
continue
|
|
||||||
|
|
||||||
yield FunctionId(va=fva, is_library=is_lib, name=fname)
|
|
||||||
@@ -1,242 +1,193 @@
|
|||||||
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
|
"""
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
further requirements:
|
||||||
# you may not use this file except in compliance with the License.
|
- nltk
|
||||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
"""
|
||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
|
||||||
import io
|
|
||||||
import sys
|
import sys
|
||||||
import logging
|
import logging
|
||||||
import argparse
|
import collections
|
||||||
import tempfile
|
|
||||||
import contextlib
|
|
||||||
from enum import Enum
|
|
||||||
from typing import List, Optional
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import rich
|
import rich
|
||||||
from pydantic import BaseModel
|
|
||||||
from rich.text import Text
|
from rich.text import Text
|
||||||
from rich.console import Console
|
|
||||||
|
|
||||||
import capa.main
|
|
||||||
import capa.helpers
|
|
||||||
import capa.analysis.flirt
|
|
||||||
import capa.analysis.strings
|
import capa.analysis.strings
|
||||||
import capa.features.extractors.ida.idalib as idalib
|
import capa.features.extractors.strings
|
||||||
|
from capa.analysis.strings import LibraryStringDatabase
|
||||||
if not idalib.has_idalib():
|
|
||||||
raise RuntimeError("cannot find IDA idalib module.")
|
|
||||||
|
|
||||||
if not idalib.load_idalib():
|
|
||||||
raise RuntimeError("failed to load IDA idalib module.")
|
|
||||||
|
|
||||||
import idaapi
|
|
||||||
import idapro
|
|
||||||
import ida_auto
|
|
||||||
import idautils
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class Classification(str, Enum):
|
def extract_strings(buf, n=4):
|
||||||
USER = "user"
|
yield from capa.features.extractors.strings.extract_ascii_strings(buf, n=n)
|
||||||
LIBRARY = "library"
|
yield from capa.features.extractors.strings.extract_unicode_strings(buf, n=n)
|
||||||
UNKNOWN = "unknown"
|
|
||||||
|
|
||||||
|
|
||||||
class Method(str, Enum):
|
def prune_databases(dbs: list[LibraryStringDatabase], n=8):
|
||||||
FLIRT = "flirt"
|
"""remove less trustyworthy database entries.
|
||||||
STRINGS = "strings"
|
|
||||||
THUNK = "thunk"
|
|
||||||
ENTRYPOINT = "entrypoint"
|
|
||||||
|
|
||||||
|
such as:
|
||||||
|
- those found in multiple databases
|
||||||
|
- those that are English words
|
||||||
|
- those that are too short
|
||||||
|
- Windows API and DLL names
|
||||||
|
"""
|
||||||
|
|
||||||
class FunctionClassification(BaseModel):
|
# TODO: consider applying these filters directly to the persisted databases, not at load time.
|
||||||
va: int
|
|
||||||
classification: Classification
|
|
||||||
# name per the disassembler/analysis tool
|
|
||||||
# may be combined with the recovered/suspected name TODO below
|
|
||||||
name: str
|
|
||||||
|
|
||||||
# if is library, this must be provided
|
winapi = capa.analysis.strings.WindowsApiStringDatabase.from_defaults()
|
||||||
method: Optional[Method]
|
|
||||||
|
|
||||||
# TODO if is library, recovered/suspected name?
|
|
||||||
|
|
||||||
# if is library, these can optionally be provided.
|
|
||||||
library_name: Optional[str] = None
|
|
||||||
library_version: Optional[str] = None
|
|
||||||
|
|
||||||
|
|
||||||
class FunctionIdResults(BaseModel):
|
|
||||||
function_classifications: List[FunctionClassification]
|
|
||||||
|
|
||||||
|
|
||||||
@contextlib.contextmanager
|
|
||||||
def ida_session(input_path: Path, use_temp_dir=True):
|
|
||||||
if use_temp_dir:
|
|
||||||
t = Path(tempfile.mkdtemp(prefix="ida-")) / input_path.name
|
|
||||||
else:
|
|
||||||
t = input_path
|
|
||||||
|
|
||||||
logger.debug("using %s", str(t))
|
|
||||||
# stderr=True is used here to redirect the spinner banner to stderr,
|
|
||||||
# so that users can redirect capa's output.
|
|
||||||
console = Console(stderr=True, quiet=False)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if use_temp_dir:
|
from nltk.corpus import words as nltk_words
|
||||||
t.write_bytes(input_path.read_bytes())
|
except ImportError:
|
||||||
|
# one-time download of dataset.
|
||||||
|
# this probably doesn't work well for embedded use.
|
||||||
|
import nltk
|
||||||
|
nltk.download("words")
|
||||||
|
from nltk.corpus import words as nltk_words
|
||||||
|
words = set(nltk_words.words())
|
||||||
|
|
||||||
# idalib writes to stdout (ugh), so we have to capture that
|
counter = collections.Counter()
|
||||||
# so as not to screw up structured output.
|
to_remove = set()
|
||||||
with capa.helpers.stdout_redirector(io.BytesIO()):
|
for db in dbs:
|
||||||
idapro.enable_console_messages(False)
|
for string in db.metadata_by_string.keys():
|
||||||
with capa.main.timing("analyze program"):
|
counter[string] += 1
|
||||||
with console.status("analyzing program...", spinner="dots"):
|
|
||||||
if idapro.open_database(str(t.absolute()), run_auto_analysis=True):
|
|
||||||
raise RuntimeError("failed to analyze input file")
|
|
||||||
|
|
||||||
logger.debug("idalib: waiting for analysis...")
|
if string in words:
|
||||||
ida_auto.auto_wait()
|
to_remove.add(string)
|
||||||
logger.debug("idalib: opened database.")
|
|
||||||
|
|
||||||
yield
|
|
||||||
finally:
|
|
||||||
idapro.close_database()
|
|
||||||
if use_temp_dir:
|
|
||||||
t.unlink()
|
|
||||||
|
|
||||||
|
|
||||||
def is_thunk_function(fva):
|
|
||||||
f = idaapi.get_func(fva)
|
|
||||||
return bool(f.flags & idaapi.FUNC_THUNK)
|
|
||||||
|
|
||||||
|
|
||||||
def main(argv=None):
|
|
||||||
if argv is None:
|
|
||||||
argv = sys.argv[1:]
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description="Identify library functions using various strategies.")
|
|
||||||
capa.main.install_common_args(parser, wanted={"input_file"})
|
|
||||||
parser.add_argument("--store-idb", action="store_true", default=False, help="store IDA database file")
|
|
||||||
parser.add_argument("--min-string-length", type=int, default=8, help="minimum string length")
|
|
||||||
parser.add_argument("-j", "--json", action="store_true", help="emit JSON instead of text")
|
|
||||||
args = parser.parse_args(args=argv)
|
|
||||||
|
|
||||||
try:
|
|
||||||
capa.main.handle_common_args(args)
|
|
||||||
except capa.main.ShouldExitError as e:
|
|
||||||
return e.status_code
|
|
||||||
|
|
||||||
dbs = capa.analysis.strings.get_default_databases()
|
|
||||||
capa.analysis.strings.prune_databases(dbs, n=args.min_string_length)
|
|
||||||
|
|
||||||
function_classifications: List[FunctionClassification] = []
|
|
||||||
with ida_session(args.input_file, use_temp_dir=not args.store_idb):
|
|
||||||
with capa.main.timing("FLIRT-based library identification"):
|
|
||||||
# TODO: add more signature (files)
|
|
||||||
# TOOD: apply more signatures
|
|
||||||
for flirt_match in capa.analysis.flirt.get_flirt_matches():
|
|
||||||
function_classifications.append(
|
|
||||||
FunctionClassification(
|
|
||||||
va=flirt_match.va,
|
|
||||||
name=flirt_match.name,
|
|
||||||
classification=Classification.LIBRARY,
|
|
||||||
method=Method.FLIRT,
|
|
||||||
# note: we cannot currently include which signature matched per function via the IDA API
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
# thunks
|
|
||||||
for fva in idautils.Functions():
|
|
||||||
if is_thunk_function(fva):
|
|
||||||
function_classifications.append(
|
|
||||||
FunctionClassification(
|
|
||||||
va=fva,
|
|
||||||
name=idaapi.get_func_name(fva),
|
|
||||||
classification=Classification.LIBRARY,
|
|
||||||
method=Method.THUNK,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
with capa.main.timing("string-based library identification"):
|
|
||||||
for string_match in capa.analysis.strings.get_string_matches(dbs):
|
|
||||||
function_classifications.append(
|
|
||||||
FunctionClassification(
|
|
||||||
va=string_match.va,
|
|
||||||
name=idaapi.get_func_name(string_match.va),
|
|
||||||
classification=Classification.LIBRARY,
|
|
||||||
method=Method.STRINGS,
|
|
||||||
library_name=string_match.metadata.library_name,
|
|
||||||
library_version=string_match.metadata.library_version,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
for va in idautils.Functions():
|
|
||||||
name = idaapi.get_func_name(va)
|
|
||||||
if name not in {
|
|
||||||
"WinMain",
|
|
||||||
}:
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
function_classifications.append(
|
if len(string) < n:
|
||||||
FunctionClassification(
|
to_remove.add(string)
|
||||||
va=va,
|
continue
|
||||||
name=name,
|
|
||||||
classification=Classification.USER,
|
|
||||||
method=Method.ENTRYPOINT,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
doc = FunctionIdResults(function_classifications=[])
|
if string in winapi.api_names:
|
||||||
classifications_by_va = capa.analysis.strings.create_index(function_classifications, "va")
|
to_remove.add(string)
|
||||||
for va in idautils.Functions():
|
continue
|
||||||
if classifications := classifications_by_va.get(va):
|
|
||||||
doc.function_classifications.extend(classifications)
|
|
||||||
else:
|
|
||||||
doc.function_classifications.append(
|
|
||||||
FunctionClassification(
|
|
||||||
va=va,
|
|
||||||
name=idaapi.get_func_name(va),
|
|
||||||
classification=Classification.UNKNOWN,
|
|
||||||
method=None,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
if args.json:
|
if string in winapi.dll_names:
|
||||||
print(doc.model_dump_json()) # noqa: T201 print found
|
to_remove.add(string)
|
||||||
|
continue
|
||||||
|
|
||||||
else:
|
for string, count in counter.most_common():
|
||||||
table = rich.table.Table()
|
if count <= 1:
|
||||||
table.add_column("FVA")
|
break
|
||||||
table.add_column("CLASSIFICATION")
|
|
||||||
table.add_column("METHOD")
|
|
||||||
table.add_column("FNAME")
|
|
||||||
table.add_column("EXTRA INFO")
|
|
||||||
|
|
||||||
classifications_by_va = capa.analysis.strings.create_index(doc.function_classifications, "va", sorted_=True)
|
# remove strings that are seen in more than one database
|
||||||
for va, classifications in classifications_by_va.items():
|
to_remove.add(string)
|
||||||
name = ", ".join({c.name for c in classifications})
|
|
||||||
if "sub_" in name:
|
|
||||||
name = Text(name, style="grey53")
|
|
||||||
|
|
||||||
classification = {c.classification for c in classifications}
|
for db in dbs:
|
||||||
method = {c.method for c in classifications if c.method}
|
for string in to_remove:
|
||||||
extra = {f"{c.library_name}@{c.library_version}" for c in classifications if c.library_name}
|
if string in db.metadata_by_string:
|
||||||
|
del db.metadata_by_string[string]
|
||||||
|
|
||||||
table.add_row(
|
|
||||||
hex(va),
|
|
||||||
", ".join(classification) if classification != {"unknown"} else Text("unknown", style="grey53"),
|
|
||||||
", ".join(method),
|
|
||||||
name,
|
|
||||||
", ".join(extra),
|
|
||||||
)
|
|
||||||
|
|
||||||
rich.print(table)
|
def open_ida(input_path: Path):
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
import idapro
|
||||||
|
|
||||||
|
t = Path(tempfile.mkdtemp(prefix="ida-")) / input_path.name
|
||||||
|
t.write_bytes(input_path.read_bytes())
|
||||||
|
# resource leak: we should delete this upon exit
|
||||||
|
|
||||||
|
idapro.enable_console_messages(False)
|
||||||
|
idapro.open_database(str(t.absolute()), run_auto_analysis=True)
|
||||||
|
|
||||||
|
import ida_auto
|
||||||
|
ida_auto.auto_wait()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
|
||||||
|
# use n=8 to ignore common words
|
||||||
|
N = 8
|
||||||
|
|
||||||
|
input_path = Path(sys.argv[1])
|
||||||
|
input_buf = input_path.read_bytes()
|
||||||
|
|
||||||
|
dbs = capa.analysis.strings.get_default_databases()
|
||||||
|
prune_databases(dbs, n=N)
|
||||||
|
|
||||||
|
strings_by_library = collections.defaultdict(set)
|
||||||
|
for string in extract_strings(input_path.read_bytes(), n=N):
|
||||||
|
for db in dbs:
|
||||||
|
if (metadata := db.metadata_by_string.get(string.s)):
|
||||||
|
strings_by_library[metadata.library_name].add(string.s)
|
||||||
|
|
||||||
|
console = rich.get_console()
|
||||||
|
console.print(f"found libraries:", style="bold")
|
||||||
|
for library, strings in sorted(strings_by_library.items(), key=lambda p: len(p[1]), reverse=True):
|
||||||
|
console.print(f" - [b]{library}[/] ({len(strings)} strings)")
|
||||||
|
|
||||||
|
for string in sorted(strings)[:10]:
|
||||||
|
console.print(f" - {string}", markup=False, style="grey37")
|
||||||
|
|
||||||
|
if len(strings) > 10:
|
||||||
|
console.print(" ...", style="grey37")
|
||||||
|
|
||||||
|
if not strings_by_library:
|
||||||
|
console.print(" (none)", style="grey37")
|
||||||
|
# since we're not going to find any strings
|
||||||
|
# return early and don't do IDA analysis
|
||||||
|
return
|
||||||
|
|
||||||
|
# TODO: ensure there are XXX matches for each library, or ignore those entries
|
||||||
|
|
||||||
|
open_ida(input_path)
|
||||||
|
|
||||||
|
import idaapi
|
||||||
|
import idautils
|
||||||
|
import ida_funcs
|
||||||
|
import capa.features.extractors.ida.helpers as ida_helpers
|
||||||
|
|
||||||
|
strings_by_function = collections.defaultdict(set)
|
||||||
|
for ea in idautils.Functions():
|
||||||
|
f = idaapi.get_func(ea)
|
||||||
|
|
||||||
|
# ignore library functions and thunk functions as identified by IDA
|
||||||
|
if f.flags & idaapi.FUNC_THUNK:
|
||||||
|
continue
|
||||||
|
if f.flags & idaapi.FUNC_LIB:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for bb in ida_helpers.get_function_blocks(f):
|
||||||
|
for insn in ida_helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
|
||||||
|
ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
|
||||||
|
if ref == insn.ea:
|
||||||
|
continue
|
||||||
|
|
||||||
|
string = capa.features.extractors.ida.helpers.find_string_at(ref)
|
||||||
|
if not string:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for db in dbs:
|
||||||
|
if (metadata := db.metadata_by_string.get(string)):
|
||||||
|
strings_by_function[ea].add(string)
|
||||||
|
|
||||||
|
# ensure there are at least XXX functions renamed, or ignore those entries
|
||||||
|
|
||||||
|
console.print("functions:", style="bold")
|
||||||
|
for function, strings in sorted(strings_by_function.items()):
|
||||||
|
if strings:
|
||||||
|
name = ida_funcs.get_func_name(function)
|
||||||
|
|
||||||
|
console.print(f" [b]{name}[/]@{function:08x}:")
|
||||||
|
|
||||||
|
for string in strings:
|
||||||
|
for db in dbs:
|
||||||
|
if (metadata := db.metadata_by_string.get(string)):
|
||||||
|
location = Text(f"{metadata.library_name}@{metadata.library_version}::{metadata.function_name}", style="grey37")
|
||||||
|
console.print(" - ", location, ": ", string.rstrip())
|
||||||
|
|
||||||
|
# TODO: ensure there aren't conflicts among the matches
|
||||||
|
|
||||||
|
console.print()
|
||||||
|
|
||||||
|
console.print(f"found {len(strings_by_function)} library functions across {len(list(idautils.Functions()))} functions")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
sys.exit(main())
|
main()
|
||||||
|
|||||||
@@ -1,2 +0,0 @@
|
|||||||
# temporary extra file to track dependencies of the analysis directory
|
|
||||||
nltk==3.9.1
|
|
||||||
@@ -1,28 +1,10 @@
|
|||||||
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
|
||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
|
||||||
|
|
||||||
"""
|
|
||||||
further requirements:
|
|
||||||
- nltk
|
|
||||||
"""
|
|
||||||
import gzip
|
import gzip
|
||||||
import logging
|
import pathlib
|
||||||
import collections
|
from typing import Dict, Sequence
|
||||||
from typing import Any, Dict, Mapping
|
|
||||||
from pathlib import Path
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
import msgspec
|
import msgspec
|
||||||
|
|
||||||
import capa.features.extractors.strings
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class LibraryString(msgspec.Struct):
|
class LibraryString(msgspec.Struct):
|
||||||
string: str
|
string: str
|
||||||
@@ -41,7 +23,7 @@ class LibraryStringDatabase:
|
|||||||
return len(self.metadata_by_string)
|
return len(self.metadata_by_string)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_file(cls, path: Path) -> "LibraryStringDatabase":
|
def from_file(cls, path: pathlib.Path) -> "LibraryStringDatabase":
|
||||||
metadata_by_string: Dict[str, LibraryString] = {}
|
metadata_by_string: Dict[str, LibraryString] = {}
|
||||||
decoder = msgspec.json.Decoder(type=LibraryString)
|
decoder = msgspec.json.Decoder(type=LibraryString)
|
||||||
for line in gzip.decompress(path.read_bytes()).split(b"\n"):
|
for line in gzip.decompress(path.read_bytes()).split(b"\n"):
|
||||||
@@ -73,12 +55,12 @@ DEFAULT_FILENAMES = (
|
|||||||
"zlib.jsonl.gz",
|
"zlib.jsonl.gz",
|
||||||
)
|
)
|
||||||
|
|
||||||
DEFAULT_PATHS = tuple(Path(__file__).parent / "data" / "oss" / filename for filename in DEFAULT_FILENAMES) + (
|
DEFAULT_PATHS = tuple(
|
||||||
Path(__file__).parent / "data" / "crt" / "msvc_v143.jsonl.gz",
|
pathlib.Path(__file__).parent / "data" / "oss" / filename for filename in DEFAULT_FILENAMES
|
||||||
)
|
) + (pathlib.Path(__file__).parent / "data" / "crt" / "msvc_v143.jsonl.gz",)
|
||||||
|
|
||||||
|
|
||||||
def get_default_databases() -> list[LibraryStringDatabase]:
|
def get_default_databases() -> Sequence[LibraryStringDatabase]:
|
||||||
return [LibraryStringDatabase.from_file(path) for path in DEFAULT_PATHS]
|
return [LibraryStringDatabase.from_file(path) for path in DEFAULT_PATHS]
|
||||||
|
|
||||||
|
|
||||||
@@ -91,9 +73,9 @@ class WindowsApiStringDatabase:
|
|||||||
return len(self.dll_names) + len(self.api_names)
|
return len(self.dll_names) + len(self.api_names)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_dir(cls, path: Path) -> "WindowsApiStringDatabase":
|
def from_dir(cls, path: pathlib.Path) -> "WindowsApiStringDatabase":
|
||||||
dll_names: set[str] = set()
|
dll_names: Set[str] = set()
|
||||||
api_names: set[str] = set()
|
api_names: Set[str] = set()
|
||||||
|
|
||||||
for line in gzip.decompress((path / "dlls.txt.gz").read_bytes()).decode("utf-8").splitlines():
|
for line in gzip.decompress((path / "dlls.txt.gz").read_bytes()).decode("utf-8").splitlines():
|
||||||
if not line:
|
if not line:
|
||||||
@@ -109,161 +91,5 @@ class WindowsApiStringDatabase:
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_defaults(cls) -> "WindowsApiStringDatabase":
|
def from_defaults(cls) -> "WindowsApiStringDatabase":
|
||||||
return cls.from_dir(Path(__file__).parent / "data" / "winapi")
|
return cls.from_dir(pathlib.Path(__file__).parent / "data" / "winapi")
|
||||||
|
|
||||||
|
|
||||||
def extract_strings(buf, n=4):
|
|
||||||
yield from capa.features.extractors.strings.extract_ascii_strings(buf, n=n)
|
|
||||||
yield from capa.features.extractors.strings.extract_unicode_strings(buf, n=n)
|
|
||||||
|
|
||||||
|
|
||||||
def prune_databases(dbs: list[LibraryStringDatabase], n=8):
|
|
||||||
"""remove less trustyworthy database entries.
|
|
||||||
|
|
||||||
such as:
|
|
||||||
- those found in multiple databases
|
|
||||||
- those that are English words
|
|
||||||
- those that are too short
|
|
||||||
- Windows API and DLL names
|
|
||||||
"""
|
|
||||||
|
|
||||||
# TODO: consider applying these filters directly to the persisted databases, not at load time.
|
|
||||||
|
|
||||||
winapi = WindowsApiStringDatabase.from_defaults()
|
|
||||||
|
|
||||||
try:
|
|
||||||
from nltk.corpus import words as nltk_words
|
|
||||||
|
|
||||||
nltk_words.words()
|
|
||||||
except (ImportError, LookupError):
|
|
||||||
# one-time download of dataset.
|
|
||||||
# this probably doesn't work well for embedded use.
|
|
||||||
import nltk
|
|
||||||
|
|
||||||
nltk.download("words")
|
|
||||||
from nltk.corpus import words as nltk_words
|
|
||||||
words = set(nltk_words.words())
|
|
||||||
|
|
||||||
counter: collections.Counter[str] = collections.Counter()
|
|
||||||
to_remove = set()
|
|
||||||
for db in dbs:
|
|
||||||
for string in db.metadata_by_string.keys():
|
|
||||||
counter[string] += 1
|
|
||||||
|
|
||||||
if string in words:
|
|
||||||
to_remove.add(string)
|
|
||||||
continue
|
|
||||||
|
|
||||||
if len(string) < n:
|
|
||||||
to_remove.add(string)
|
|
||||||
continue
|
|
||||||
|
|
||||||
if string in winapi.api_names:
|
|
||||||
to_remove.add(string)
|
|
||||||
continue
|
|
||||||
|
|
||||||
if string in winapi.dll_names:
|
|
||||||
to_remove.add(string)
|
|
||||||
continue
|
|
||||||
|
|
||||||
for string, count in counter.most_common():
|
|
||||||
if count <= 1:
|
|
||||||
break
|
|
||||||
|
|
||||||
# remove strings that are seen in more than one database
|
|
||||||
to_remove.add(string)
|
|
||||||
|
|
||||||
for db in dbs:
|
|
||||||
for string in to_remove:
|
|
||||||
if string in db.metadata_by_string:
|
|
||||||
del db.metadata_by_string[string]
|
|
||||||
|
|
||||||
|
|
||||||
def get_function_strings():
|
|
||||||
import idaapi
|
|
||||||
import idautils
|
|
||||||
|
|
||||||
import capa.features.extractors.ida.helpers as ida_helpers
|
|
||||||
|
|
||||||
strings_by_function = collections.defaultdict(set)
|
|
||||||
for ea in idautils.Functions():
|
|
||||||
f = idaapi.get_func(ea)
|
|
||||||
|
|
||||||
# ignore library functions and thunk functions as identified by IDA
|
|
||||||
if f.flags & idaapi.FUNC_THUNK:
|
|
||||||
continue
|
|
||||||
if f.flags & idaapi.FUNC_LIB:
|
|
||||||
continue
|
|
||||||
|
|
||||||
for bb in ida_helpers.get_function_blocks(f):
|
|
||||||
for insn in ida_helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
|
|
||||||
ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
|
|
||||||
if ref == insn.ea:
|
|
||||||
continue
|
|
||||||
|
|
||||||
string = capa.features.extractors.ida.helpers.find_string_at(ref)
|
|
||||||
if not string:
|
|
||||||
continue
|
|
||||||
|
|
||||||
strings_by_function[ea].add(string)
|
|
||||||
|
|
||||||
return strings_by_function
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class LibraryStringClassification:
|
|
||||||
va: int
|
|
||||||
string: str
|
|
||||||
library_name: str
|
|
||||||
metadata: LibraryString
|
|
||||||
|
|
||||||
|
|
||||||
def create_index(s: list, k: str, sorted_: bool = False) -> Mapping[Any, list]:
|
|
||||||
"""create an index of the elements in `s` using the key `k`, optionally sorted by `k`"""
|
|
||||||
if sorted_:
|
|
||||||
s = sorted(s, key=lambda x: getattr(x, k))
|
|
||||||
|
|
||||||
s_by_k = collections.defaultdict(list)
|
|
||||||
for v in s:
|
|
||||||
p = getattr(v, k)
|
|
||||||
s_by_k[p].append(v)
|
|
||||||
return s_by_k
|
|
||||||
|
|
||||||
|
|
||||||
def get_string_matches(dbs: list[LibraryStringDatabase]) -> list[LibraryStringClassification]:
|
|
||||||
matches: list[LibraryStringClassification] = []
|
|
||||||
|
|
||||||
for function, strings in sorted(get_function_strings().items()):
|
|
||||||
for string in strings:
|
|
||||||
for db in dbs:
|
|
||||||
if metadata := db.metadata_by_string.get(string):
|
|
||||||
matches.append(
|
|
||||||
LibraryStringClassification(
|
|
||||||
va=function,
|
|
||||||
string=string,
|
|
||||||
library_name=metadata.library_name,
|
|
||||||
metadata=metadata,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
# if there are less than N strings per library, ignore that library
|
|
||||||
matches_by_library = create_index(matches, "library_name")
|
|
||||||
for library_name, library_matches in matches_by_library.items():
|
|
||||||
if len(library_matches) > 5:
|
|
||||||
continue
|
|
||||||
|
|
||||||
logger.info("pruning library %s: only %d matched string", library_name, len(library_matches))
|
|
||||||
matches = [m for m in matches if m.library_name != library_name]
|
|
||||||
|
|
||||||
# if there are conflicts within a single function, don't label it
|
|
||||||
matches_by_function = create_index(matches, "va")
|
|
||||||
for va, function_matches in matches_by_function.items():
|
|
||||||
library_names = {m.library_name for m in function_matches}
|
|
||||||
if len(library_names) == 1:
|
|
||||||
continue
|
|
||||||
|
|
||||||
logger.info("conflicting matches: 0x%x: %s", va, sorted(library_names))
|
|
||||||
# this is potentially slow (O(n**2)) but hopefully fast enough in practice.
|
|
||||||
matches = [m for m in matches if m.va != va]
|
|
||||||
|
|
||||||
return matches
|
|
||||||
|
|||||||
@@ -1,130 +0,0 @@
|
|||||||
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
|
||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
|
||||||
import sys
|
|
||||||
import logging
|
|
||||||
import collections
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import rich
|
|
||||||
from rich.text import Text
|
|
||||||
|
|
||||||
import capa.analysis.strings
|
|
||||||
import capa.features.extractors.strings
|
|
||||||
import capa.features.extractors.ida.helpers as ida_helpers
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def open_ida(input_path: Path):
|
|
||||||
import tempfile
|
|
||||||
|
|
||||||
import idapro
|
|
||||||
|
|
||||||
t = Path(tempfile.mkdtemp(prefix="ida-")) / input_path.name
|
|
||||||
t.write_bytes(input_path.read_bytes())
|
|
||||||
# resource leak: we should delete this upon exit
|
|
||||||
|
|
||||||
idapro.enable_console_messages(False)
|
|
||||||
idapro.open_database(str(t.absolute()), run_auto_analysis=True)
|
|
||||||
|
|
||||||
import ida_auto
|
|
||||||
|
|
||||||
ida_auto.auto_wait()
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
logging.basicConfig(level=logging.DEBUG)
|
|
||||||
|
|
||||||
# use n=8 to ignore common words
|
|
||||||
N = 8
|
|
||||||
|
|
||||||
input_path = Path(sys.argv[1])
|
|
||||||
|
|
||||||
dbs = capa.analysis.strings.get_default_databases()
|
|
||||||
capa.analysis.strings.prune_databases(dbs, n=N)
|
|
||||||
|
|
||||||
strings_by_library = collections.defaultdict(set)
|
|
||||||
for string in capa.analysis.strings.extract_strings(input_path.read_bytes(), n=N):
|
|
||||||
for db in dbs:
|
|
||||||
if metadata := db.metadata_by_string.get(string.s):
|
|
||||||
strings_by_library[metadata.library_name].add(string.s)
|
|
||||||
|
|
||||||
console = rich.get_console()
|
|
||||||
console.print("found libraries:", style="bold")
|
|
||||||
for library, strings in sorted(strings_by_library.items(), key=lambda p: len(p[1]), reverse=True):
|
|
||||||
console.print(f" - [b]{library}[/] ({len(strings)} strings)")
|
|
||||||
|
|
||||||
for string in sorted(strings)[:10]:
|
|
||||||
console.print(f" - {string}", markup=False, style="grey37")
|
|
||||||
|
|
||||||
if len(strings) > 10:
|
|
||||||
console.print(" ...", style="grey37")
|
|
||||||
|
|
||||||
if not strings_by_library:
|
|
||||||
console.print(" (none)", style="grey37")
|
|
||||||
# since we're not going to find any strings
|
|
||||||
# return early and don't do IDA analysis
|
|
||||||
return
|
|
||||||
|
|
||||||
open_ida(input_path)
|
|
||||||
|
|
||||||
import idaapi
|
|
||||||
import idautils
|
|
||||||
import ida_funcs
|
|
||||||
|
|
||||||
strings_by_function = collections.defaultdict(set)
|
|
||||||
for ea in idautils.Functions():
|
|
||||||
f = idaapi.get_func(ea)
|
|
||||||
|
|
||||||
# ignore library functions and thunk functions as identified by IDA
|
|
||||||
if f.flags & idaapi.FUNC_THUNK:
|
|
||||||
continue
|
|
||||||
if f.flags & idaapi.FUNC_LIB:
|
|
||||||
continue
|
|
||||||
|
|
||||||
for bb in ida_helpers.get_function_blocks(f):
|
|
||||||
for insn in ida_helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
|
|
||||||
ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
|
|
||||||
if ref == insn.ea:
|
|
||||||
continue
|
|
||||||
|
|
||||||
string = capa.features.extractors.ida.helpers.find_string_at(ref)
|
|
||||||
if not string:
|
|
||||||
continue
|
|
||||||
|
|
||||||
for db in dbs:
|
|
||||||
if metadata := db.metadata_by_string.get(string):
|
|
||||||
strings_by_function[ea].add(string)
|
|
||||||
|
|
||||||
# ensure there are at least XXX functions renamed, or ignore those entries
|
|
||||||
|
|
||||||
console.print("functions:", style="bold")
|
|
||||||
for function, strings in sorted(strings_by_function.items()):
|
|
||||||
if strings:
|
|
||||||
name = ida_funcs.get_func_name(function)
|
|
||||||
|
|
||||||
console.print(f" [b]{name}[/]@{function:08x}:")
|
|
||||||
|
|
||||||
for string in strings:
|
|
||||||
for db in dbs:
|
|
||||||
if metadata := db.metadata_by_string.get(string):
|
|
||||||
location = Text(
|
|
||||||
f"{metadata.library_name}@{metadata.library_version}::{metadata.function_name}",
|
|
||||||
style="grey37",
|
|
||||||
)
|
|
||||||
console.print(" - ", location, ": ", string.rstrip())
|
|
||||||
|
|
||||||
console.print()
|
|
||||||
|
|
||||||
console.print(
|
|
||||||
f"found {len(strings_by_function)} library functions across {len(list(idautils.Functions()))} functions"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
52
capa/analysis/strings/data/oss/jh_to_qs.py
Normal file
52
capa/analysis/strings/data/oss/jh_to_qs.py
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
"""
|
||||||
|
convert from a jh CSV file to a .jsonl.gz OpenSourceString database.
|
||||||
|
|
||||||
|
the jh file looks like:
|
||||||
|
|
||||||
|
# triplet,compiler,library,version,profile,path,function,type,value
|
||||||
|
x64-windows-static,msvc143,bzip2,1.0.8#3,release,CMakeFiles/bz2.dir/bzlib.c.obj,BZ2_bzBuffToBuffCompress,number,0x00000100
|
||||||
|
x64-windows-static,msvc143,bzip2,1.0.8#3,release,CMakeFiles/bz2.dir/bzlib.c.obj,BZ2_bzBuffToBuffCompress,number,0xfffffff8
|
||||||
|
x64-windows-static,msvc143,bzip2,1.0.8#3,release,CMakeFiles/bz2.dir/bzlib.c.obj,BZ2_bzBuffToBuffCompress,number,0xfffffffe
|
||||||
|
x64-windows-static,msvc143,bzip2,1.0.8#3,release,CMakeFiles/bz2.dir/bzlib.c.obj,BZ2_bzBuffToBuffCompress,api,BZ2_bzCompressInit
|
||||||
|
x64-windows-static,msvc143,bzip2,1.0.8#3,release,CMakeFiles/bz2.dir/bzlib.c.obj,BZ2_bzBuffToBuffCompress,api,handle_compress
|
||||||
|
x64-windows-static,msvc143,bzip2,1.0.8#3,release,CMakeFiles/bz2.dir/bzlib.c.obj,BZ2_bzBuffToBuffDecompress,number,0x0000fa90
|
||||||
|
x64-windows-static,msvc143,bzip2,1.0.8#3,release,CMakeFiles/bz2.dir/bzlib.c.obj,BZ2_bzBuffToBuffDecompress,number,0xfffffff8
|
||||||
|
x64-windows-static,msvc143,bzip2,1.0.8#3,release,CMakeFiles/bz2.dir/bzlib.c.obj,BZ2_bzBuffToBuffDecompress,number,0xfffffff9
|
||||||
|
x64-windows-static,msvc143,bzip2,1.0.8#3,release,CMakeFiles/bz2.dir/bzlib.c.obj,BZ2_bzBuffToBuffDecompress,number,0xfffffffd
|
||||||
|
|
||||||
|
jh is found here: https://github.com/williballenthin/lancelot/blob/master/bin/src/bin/jh.rs
|
||||||
|
"""
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import pathlib
|
||||||
|
|
||||||
|
import msgspec
|
||||||
|
|
||||||
|
from capa.analysis.strings import LibraryString
|
||||||
|
|
||||||
|
p = pathlib.Path(sys.argv[1])
|
||||||
|
for line in p.read_text().split("\n"):
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if line.startswith("#"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
triplet, compiler, library, version, profile, path, function, rest = line.split(",", 7)
|
||||||
|
type, _, value = rest.partition(",")
|
||||||
|
if type != "string":
|
||||||
|
continue
|
||||||
|
|
||||||
|
if value.startswith('"'):
|
||||||
|
value = json.loads(value)
|
||||||
|
|
||||||
|
s = LibraryString(
|
||||||
|
string=value,
|
||||||
|
library_name=library,
|
||||||
|
library_version=version,
|
||||||
|
file_path=path,
|
||||||
|
function_name=function,
|
||||||
|
)
|
||||||
|
|
||||||
|
sys.stdout.buffer.write(msgspec.json.encode(s))
|
||||||
|
sys.stdout.buffer.write(b"\n")
|
||||||
@@ -9,7 +9,7 @@
|
|||||||
import logging
|
import logging
|
||||||
import itertools
|
import itertools
|
||||||
import collections
|
import collections
|
||||||
from typing import Any
|
from typing import Any, Tuple
|
||||||
|
|
||||||
from capa.rules import Scope, RuleSet
|
from capa.rules import Scope, RuleSet
|
||||||
from capa.engine import FeatureSet, MatchResults
|
from capa.engine import FeatureSet, MatchResults
|
||||||
@@ -64,7 +64,7 @@ def has_file_limitation(rules: RuleSet, capabilities: MatchResults, is_standalon
|
|||||||
|
|
||||||
def find_capabilities(
|
def find_capabilities(
|
||||||
ruleset: RuleSet, extractor: FeatureExtractor, disable_progress=None, **kwargs
|
ruleset: RuleSet, extractor: FeatureExtractor, disable_progress=None, **kwargs
|
||||||
) -> tuple[MatchResults, Any]:
|
) -> Tuple[MatchResults, Any]:
|
||||||
from capa.capabilities.static import find_static_capabilities
|
from capa.capabilities.static import find_static_capabilities
|
||||||
from capa.capabilities.dynamic import find_dynamic_capabilities
|
from capa.capabilities.dynamic import find_dynamic_capabilities
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
import logging
|
import logging
|
||||||
import itertools
|
import itertools
|
||||||
import collections
|
import collections
|
||||||
from typing import Any
|
from typing import Any, List, Tuple
|
||||||
|
|
||||||
import capa.perf
|
import capa.perf
|
||||||
import capa.features.freeze as frz
|
import capa.features.freeze as frz
|
||||||
@@ -24,7 +24,7 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
def find_call_capabilities(
|
def find_call_capabilities(
|
||||||
ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
||||||
) -> tuple[FeatureSet, MatchResults]:
|
) -> Tuple[FeatureSet, MatchResults]:
|
||||||
"""
|
"""
|
||||||
find matches for the given rules for the given call.
|
find matches for the given rules for the given call.
|
||||||
|
|
||||||
@@ -51,7 +51,7 @@ def find_call_capabilities(
|
|||||||
|
|
||||||
def find_thread_capabilities(
|
def find_thread_capabilities(
|
||||||
ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle, th: ThreadHandle
|
ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle, th: ThreadHandle
|
||||||
) -> tuple[FeatureSet, MatchResults, MatchResults]:
|
) -> Tuple[FeatureSet, MatchResults, MatchResults]:
|
||||||
"""
|
"""
|
||||||
find matches for the given rules within the given thread.
|
find matches for the given rules within the given thread.
|
||||||
|
|
||||||
@@ -89,7 +89,7 @@ def find_thread_capabilities(
|
|||||||
|
|
||||||
def find_process_capabilities(
|
def find_process_capabilities(
|
||||||
ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle
|
ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle
|
||||||
) -> tuple[MatchResults, MatchResults, MatchResults, int]:
|
) -> Tuple[MatchResults, MatchResults, MatchResults, int]:
|
||||||
"""
|
"""
|
||||||
find matches for the given rules within the given process.
|
find matches for the given rules within the given process.
|
||||||
|
|
||||||
@@ -127,7 +127,7 @@ def find_process_capabilities(
|
|||||||
|
|
||||||
def find_dynamic_capabilities(
|
def find_dynamic_capabilities(
|
||||||
ruleset: RuleSet, extractor: DynamicFeatureExtractor, disable_progress=None
|
ruleset: RuleSet, extractor: DynamicFeatureExtractor, disable_progress=None
|
||||||
) -> tuple[MatchResults, Any]:
|
) -> Tuple[MatchResults, Any]:
|
||||||
all_process_matches: MatchResults = collections.defaultdict(list)
|
all_process_matches: MatchResults = collections.defaultdict(list)
|
||||||
all_thread_matches: MatchResults = collections.defaultdict(list)
|
all_thread_matches: MatchResults = collections.defaultdict(list)
|
||||||
all_call_matches: MatchResults = collections.defaultdict(list)
|
all_call_matches: MatchResults = collections.defaultdict(list)
|
||||||
@@ -135,7 +135,7 @@ def find_dynamic_capabilities(
|
|||||||
feature_counts = rdoc.DynamicFeatureCounts(file=0, processes=())
|
feature_counts = rdoc.DynamicFeatureCounts(file=0, processes=())
|
||||||
|
|
||||||
assert isinstance(extractor, DynamicFeatureExtractor)
|
assert isinstance(extractor, DynamicFeatureExtractor)
|
||||||
processes: list[ProcessHandle] = list(extractor.get_processes())
|
processes: List[ProcessHandle] = list(extractor.get_processes())
|
||||||
n_processes: int = len(processes)
|
n_processes: int = len(processes)
|
||||||
|
|
||||||
with capa.helpers.CapaProgressBar(
|
with capa.helpers.CapaProgressBar(
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ import time
|
|||||||
import logging
|
import logging
|
||||||
import itertools
|
import itertools
|
||||||
import collections
|
import collections
|
||||||
from typing import Any
|
from typing import Any, List, Tuple
|
||||||
|
|
||||||
import capa.perf
|
import capa.perf
|
||||||
import capa.helpers
|
import capa.helpers
|
||||||
@@ -26,7 +26,7 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
def find_instruction_capabilities(
|
def find_instruction_capabilities(
|
||||||
ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
|
ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
|
||||||
) -> tuple[FeatureSet, MatchResults]:
|
) -> Tuple[FeatureSet, MatchResults]:
|
||||||
"""
|
"""
|
||||||
find matches for the given rules for the given instruction.
|
find matches for the given rules for the given instruction.
|
||||||
|
|
||||||
@@ -53,7 +53,7 @@ def find_instruction_capabilities(
|
|||||||
|
|
||||||
def find_basic_block_capabilities(
|
def find_basic_block_capabilities(
|
||||||
ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle
|
ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle
|
||||||
) -> tuple[FeatureSet, MatchResults, MatchResults]:
|
) -> Tuple[FeatureSet, MatchResults, MatchResults]:
|
||||||
"""
|
"""
|
||||||
find matches for the given rules within the given basic block.
|
find matches for the given rules within the given basic block.
|
||||||
|
|
||||||
@@ -93,7 +93,7 @@ def find_basic_block_capabilities(
|
|||||||
|
|
||||||
def find_code_capabilities(
|
def find_code_capabilities(
|
||||||
ruleset: RuleSet, extractor: StaticFeatureExtractor, fh: FunctionHandle
|
ruleset: RuleSet, extractor: StaticFeatureExtractor, fh: FunctionHandle
|
||||||
) -> tuple[MatchResults, MatchResults, MatchResults, int]:
|
) -> Tuple[MatchResults, MatchResults, MatchResults, int]:
|
||||||
"""
|
"""
|
||||||
find matches for the given rules within the given function.
|
find matches for the given rules within the given function.
|
||||||
|
|
||||||
@@ -131,16 +131,16 @@ def find_code_capabilities(
|
|||||||
|
|
||||||
def find_static_capabilities(
|
def find_static_capabilities(
|
||||||
ruleset: RuleSet, extractor: StaticFeatureExtractor, disable_progress=None
|
ruleset: RuleSet, extractor: StaticFeatureExtractor, disable_progress=None
|
||||||
) -> tuple[MatchResults, Any]:
|
) -> Tuple[MatchResults, Any]:
|
||||||
all_function_matches: MatchResults = collections.defaultdict(list)
|
all_function_matches: MatchResults = collections.defaultdict(list)
|
||||||
all_bb_matches: MatchResults = collections.defaultdict(list)
|
all_bb_matches: MatchResults = collections.defaultdict(list)
|
||||||
all_insn_matches: MatchResults = collections.defaultdict(list)
|
all_insn_matches: MatchResults = collections.defaultdict(list)
|
||||||
|
|
||||||
feature_counts = rdoc.StaticFeatureCounts(file=0, functions=())
|
feature_counts = rdoc.StaticFeatureCounts(file=0, functions=())
|
||||||
library_functions: tuple[rdoc.LibraryFunction, ...] = ()
|
library_functions: Tuple[rdoc.LibraryFunction, ...] = ()
|
||||||
|
|
||||||
assert isinstance(extractor, StaticFeatureExtractor)
|
assert isinstance(extractor, StaticFeatureExtractor)
|
||||||
functions: list[FunctionHandle] = list(extractor.get_functions())
|
functions: List[FunctionHandle] = list(extractor.get_functions())
|
||||||
n_funcs: int = len(functions)
|
n_funcs: int = len(functions)
|
||||||
n_libs: int = 0
|
n_libs: int = 0
|
||||||
percentage: float = 0
|
percentage: float = 0
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
import copy
|
import copy
|
||||||
import collections
|
import collections
|
||||||
from typing import TYPE_CHECKING, Union, Mapping, Iterable, Iterator
|
from typing import TYPE_CHECKING, Set, Dict, List, Tuple, Union, Mapping, Iterable, Iterator
|
||||||
|
|
||||||
import capa.perf
|
import capa.perf
|
||||||
import capa.features.common
|
import capa.features.common
|
||||||
@@ -27,7 +27,7 @@ if TYPE_CHECKING:
|
|||||||
# to collect the locations of a feature, do: `features[Number(0x10)]`
|
# to collect the locations of a feature, do: `features[Number(0x10)]`
|
||||||
#
|
#
|
||||||
# aliased here so that the type can be documented and xref'd.
|
# aliased here so that the type can be documented and xref'd.
|
||||||
FeatureSet = dict[Feature, set[Address]]
|
FeatureSet = Dict[Feature, Set[Address]]
|
||||||
|
|
||||||
|
|
||||||
class Statement:
|
class Statement:
|
||||||
@@ -94,7 +94,7 @@ class And(Statement):
|
|||||||
match if all of the children evaluate to True.
|
match if all of the children evaluate to True.
|
||||||
|
|
||||||
the order of evaluation is dictated by the property
|
the order of evaluation is dictated by the property
|
||||||
`And.children` (type: list[Statement|Feature]).
|
`And.children` (type: List[Statement|Feature]).
|
||||||
a query optimizer may safely manipulate the order of these children.
|
a query optimizer may safely manipulate the order of these children.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -127,7 +127,7 @@ class Or(Statement):
|
|||||||
match if any of the children evaluate to True.
|
match if any of the children evaluate to True.
|
||||||
|
|
||||||
the order of evaluation is dictated by the property
|
the order of evaluation is dictated by the property
|
||||||
`Or.children` (type: list[Statement|Feature]).
|
`Or.children` (type: List[Statement|Feature]).
|
||||||
a query optimizer may safely manipulate the order of these children.
|
a query optimizer may safely manipulate the order of these children.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -176,7 +176,7 @@ class Some(Statement):
|
|||||||
match if at least N of the children evaluate to True.
|
match if at least N of the children evaluate to True.
|
||||||
|
|
||||||
the order of evaluation is dictated by the property
|
the order of evaluation is dictated by the property
|
||||||
`Some.children` (type: list[Statement|Feature]).
|
`Some.children` (type: List[Statement|Feature]).
|
||||||
a query optimizer may safely manipulate the order of these children.
|
a query optimizer may safely manipulate the order of these children.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -267,7 +267,7 @@ class Subscope(Statement):
|
|||||||
# inspect(match_details)
|
# inspect(match_details)
|
||||||
#
|
#
|
||||||
# aliased here so that the type can be documented and xref'd.
|
# aliased here so that the type can be documented and xref'd.
|
||||||
MatchResults = Mapping[str, list[tuple[Address, Result]]]
|
MatchResults = Mapping[str, List[Tuple[Address, Result]]]
|
||||||
|
|
||||||
|
|
||||||
def get_rule_namespaces(rule: "capa.rules.Rule") -> Iterator[str]:
|
def get_rule_namespaces(rule: "capa.rules.Rule") -> Iterator[str]:
|
||||||
@@ -292,7 +292,7 @@ def index_rule_matches(features: FeatureSet, rule: "capa.rules.Rule", locations:
|
|||||||
features[capa.features.common.MatchedRule(namespace)].update(locations)
|
features[capa.features.common.MatchedRule(namespace)].update(locations)
|
||||||
|
|
||||||
|
|
||||||
def match(rules: list["capa.rules.Rule"], features: FeatureSet, addr: Address) -> tuple[FeatureSet, MatchResults]:
|
def match(rules: List["capa.rules.Rule"], features: FeatureSet, addr: Address) -> Tuple[FeatureSet, MatchResults]:
|
||||||
"""
|
"""
|
||||||
match the given rules against the given features,
|
match the given rules against the given features,
|
||||||
returning an updated set of features and the matches.
|
returning an updated set of features and the matches.
|
||||||
|
|||||||
@@ -6,6 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
from typing import Dict, List
|
||||||
|
|
||||||
from capa.helpers import assert_never
|
from capa.helpers import assert_never
|
||||||
|
|
||||||
@@ -21,7 +22,7 @@ COM_PREFIXES = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def load_com_database(com_type: ComType) -> dict[str, list[str]]:
|
def load_com_database(com_type: ComType) -> Dict[str, List[str]]:
|
||||||
# lazy load these python files since they are so large.
|
# lazy load these python files since they are so large.
|
||||||
# that is, don't load them unless a COM feature is being handled.
|
# that is, don't load them unless a COM feature is being handled.
|
||||||
import capa.features.com.classes
|
import capa.features.com.classes
|
||||||
|
|||||||
@@ -5,8 +5,9 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
from typing import Dict, List
|
||||||
|
|
||||||
COM_CLASSES: dict[str, list[str]] = {
|
COM_CLASSES: Dict[str, List[str]] = {
|
||||||
"ClusAppWiz": ["24F97150-6689-11D1-9AA7-00C04FB93A80"],
|
"ClusAppWiz": ["24F97150-6689-11D1-9AA7-00C04FB93A80"],
|
||||||
"ClusCfgAddNodesWizard": ["BB8D141E-C00A-469F-BC5C-ECD814F0BD74"],
|
"ClusCfgAddNodesWizard": ["BB8D141E-C00A-469F-BC5C-ECD814F0BD74"],
|
||||||
"ClusCfgCreateClusterWizard": ["B929818E-F5B0-44DC-8A00-1B5F5F5AA1F0"],
|
"ClusCfgCreateClusterWizard": ["B929818E-F5B0-44DC-8A00-1B5F5F5AA1F0"],
|
||||||
|
|||||||
@@ -5,8 +5,9 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
from typing import Dict, List
|
||||||
|
|
||||||
COM_INTERFACES: dict[str, list[str]] = {
|
COM_INTERFACES: Dict[str, List[str]] = {
|
||||||
"IClusterApplicationWizard": ["24F97151-6689-11D1-9AA7-00C04FB93A80"],
|
"IClusterApplicationWizard": ["24F97151-6689-11D1-9AA7-00C04FB93A80"],
|
||||||
"IWEExtendWizard97": ["97DEDE68-FC6B-11CF-B5F5-00A0C90AB505"],
|
"IWEExtendWizard97": ["97DEDE68-FC6B-11CF-B5F5-00A0C90AB505"],
|
||||||
"IWCWizard97Callback": ["97DEDE67-FC6B-11CF-B5F5-00A0C90AB505"],
|
"IWCWizard97Callback": ["97DEDE67-FC6B-11CF-B5F5-00A0C90AB505"],
|
||||||
@@ -16333,7 +16334,7 @@ COM_INTERFACES: dict[str, list[str]] = {
|
|||||||
"IRcsServiceDescription": ["416437de-e78b-44c9-990f-7ede1f2a0c91"],
|
"IRcsServiceDescription": ["416437de-e78b-44c9-990f-7ede1f2a0c91"],
|
||||||
"IRcsServiceKindSupportedChangedEventArgs": ["f47ea244-e783-4866-b3a7-4e5ccf023070"],
|
"IRcsServiceKindSupportedChangedEventArgs": ["f47ea244-e783-4866-b3a7-4e5ccf023070"],
|
||||||
"IRcsServiceStatusChangedArgs": ["661ae45a-412a-460d-bdd4-dd8ea3c15583"],
|
"IRcsServiceStatusChangedArgs": ["661ae45a-412a-460d-bdd4-dd8ea3c15583"],
|
||||||
"IRcsServicetuple": ["ce17a39b-2e8b-41af-b5a9-5cb072cc373c"],
|
"IRcsServiceTuple": ["ce17a39b-2e8b-41af-b5a9-5cb072cc373c"],
|
||||||
"IRcsSubscriptionReceivedArgs": ["04eaf06d-42bc-46cc-a637-eeb3a8723fe4"],
|
"IRcsSubscriptionReceivedArgs": ["04eaf06d-42bc-46cc-a637-eeb3a8723fe4"],
|
||||||
"IRcsTransport": ["fea34759-f37c-4319-8546-ec84d21d30ff"],
|
"IRcsTransport": ["fea34759-f37c-4319-8546-ec84d21d30ff"],
|
||||||
"IRcsTransportConfiguration": ["1fccb102-2472-4bb9-9988-c1211c83e8a9"],
|
"IRcsTransportConfiguration": ["1fccb102-2472-4bb9-9988-c1211c83e8a9"],
|
||||||
|
|||||||
@@ -9,9 +9,10 @@
|
|||||||
import re
|
import re
|
||||||
import abc
|
import abc
|
||||||
import codecs
|
import codecs
|
||||||
|
import typing
|
||||||
import logging
|
import logging
|
||||||
import collections
|
import collections
|
||||||
from typing import TYPE_CHECKING, Union, Optional
|
from typing import TYPE_CHECKING, Set, Dict, List, Union, Optional
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
# circular import, otherwise
|
# circular import, otherwise
|
||||||
@@ -78,8 +79,8 @@ class Result:
|
|||||||
self,
|
self,
|
||||||
success: bool,
|
success: bool,
|
||||||
statement: Union["capa.engine.Statement", "Feature"],
|
statement: Union["capa.engine.Statement", "Feature"],
|
||||||
children: list["Result"],
|
children: List["Result"],
|
||||||
locations: Optional[set[Address]] = None,
|
locations: Optional[Set[Address]] = None,
|
||||||
):
|
):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.success = success
|
self.success = success
|
||||||
@@ -212,7 +213,7 @@ class Substring(String):
|
|||||||
|
|
||||||
# mapping from string value to list of locations.
|
# mapping from string value to list of locations.
|
||||||
# will unique the locations later on.
|
# will unique the locations later on.
|
||||||
matches: collections.defaultdict[str, set[Address]] = collections.defaultdict(set)
|
matches: typing.DefaultDict[str, Set[Address]] = collections.defaultdict(set)
|
||||||
|
|
||||||
assert isinstance(self.value, str)
|
assert isinstance(self.value, str)
|
||||||
for feature, locations in features.items():
|
for feature, locations in features.items():
|
||||||
@@ -260,7 +261,7 @@ class _MatchedSubstring(Substring):
|
|||||||
note: this type should only ever be constructed by `Substring.evaluate()`. it is not part of the public API.
|
note: this type should only ever be constructed by `Substring.evaluate()`. it is not part of the public API.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, substring: Substring, matches: dict[str, set[Address]]):
|
def __init__(self, substring: Substring, matches: Dict[str, Set[Address]]):
|
||||||
"""
|
"""
|
||||||
args:
|
args:
|
||||||
substring: the substring feature that matches.
|
substring: the substring feature that matches.
|
||||||
@@ -304,7 +305,7 @@ class Regex(String):
|
|||||||
|
|
||||||
# mapping from string value to list of locations.
|
# mapping from string value to list of locations.
|
||||||
# will unique the locations later on.
|
# will unique the locations later on.
|
||||||
matches: collections.defaultdict[str, set[Address]] = collections.defaultdict(set)
|
matches: typing.DefaultDict[str, Set[Address]] = collections.defaultdict(set)
|
||||||
|
|
||||||
for feature, locations in features.items():
|
for feature, locations in features.items():
|
||||||
if not isinstance(feature, (String,)):
|
if not isinstance(feature, (String,)):
|
||||||
@@ -352,7 +353,7 @@ class _MatchedRegex(Regex):
|
|||||||
note: this type should only ever be constructed by `Regex.evaluate()`. it is not part of the public API.
|
note: this type should only ever be constructed by `Regex.evaluate()`. it is not part of the public API.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, regex: Regex, matches: dict[str, set[Address]]):
|
def __init__(self, regex: Regex, matches: Dict[str, Set[Address]]):
|
||||||
"""
|
"""
|
||||||
args:
|
args:
|
||||||
regex: the regex feature that matches.
|
regex: the regex feature that matches.
|
||||||
@@ -466,7 +467,6 @@ FORMAT_VMRAY = "vmray"
|
|||||||
FORMAT_BINEXPORT2 = "binexport2"
|
FORMAT_BINEXPORT2 = "binexport2"
|
||||||
FORMAT_FREEZE = "freeze"
|
FORMAT_FREEZE = "freeze"
|
||||||
FORMAT_RESULT = "result"
|
FORMAT_RESULT = "result"
|
||||||
FORMAT_BINJA_DB = "binja_database"
|
|
||||||
STATIC_FORMATS = {
|
STATIC_FORMATS = {
|
||||||
FORMAT_SC32,
|
FORMAT_SC32,
|
||||||
FORMAT_SC64,
|
FORMAT_SC64,
|
||||||
@@ -476,7 +476,6 @@ STATIC_FORMATS = {
|
|||||||
FORMAT_FREEZE,
|
FORMAT_FREEZE,
|
||||||
FORMAT_RESULT,
|
FORMAT_RESULT,
|
||||||
FORMAT_BINEXPORT2,
|
FORMAT_BINEXPORT2,
|
||||||
FORMAT_BINJA_DB,
|
|
||||||
}
|
}
|
||||||
DYNAMIC_FORMATS = {
|
DYNAMIC_FORMATS = {
|
||||||
FORMAT_CAPE,
|
FORMAT_CAPE,
|
||||||
|
|||||||
@@ -11,9 +11,13 @@ import hashlib
|
|||||||
import dataclasses
|
import dataclasses
|
||||||
from copy import copy
|
from copy import copy
|
||||||
from types import MethodType
|
from types import MethodType
|
||||||
from typing import Any, Union, Iterator, TypeAlias
|
from typing import Any, Set, Dict, Tuple, Union, Iterator
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
# TODO(williballenthin): use typing.TypeAlias directly when Python 3.9 is deprecated
|
||||||
|
# https://github.com/mandiant/capa/issues/1699
|
||||||
|
from typing_extensions import TypeAlias
|
||||||
|
|
||||||
import capa.features.address
|
import capa.features.address
|
||||||
from capa.features.common import Feature
|
from capa.features.common import Feature
|
||||||
from capa.features.address import Address, ThreadAddress, ProcessAddress, DynamicCallAddress, AbsoluteVirtualAddress
|
from capa.features.address import Address, ThreadAddress, ProcessAddress, DynamicCallAddress, AbsoluteVirtualAddress
|
||||||
@@ -55,7 +59,7 @@ class FunctionHandle:
|
|||||||
|
|
||||||
address: Address
|
address: Address
|
||||||
inner: Any
|
inner: Any
|
||||||
ctx: dict[str, Any] = dataclasses.field(default_factory=dict)
|
ctx: Dict[str, Any] = dataclasses.field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -131,7 +135,7 @@ class StaticFeatureExtractor:
|
|||||||
return self._sample_hashes
|
return self._sample_hashes
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
|
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract features found at every scope ("global").
|
extract features found at every scope ("global").
|
||||||
|
|
||||||
@@ -142,12 +146,12 @@ class StaticFeatureExtractor:
|
|||||||
print('0x%x: %s', va, feature)
|
print('0x%x: %s', va, feature)
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
tuple[Feature, Address]: feature and its location
|
Tuple[Feature, Address]: feature and its location
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract file-scope features.
|
extract file-scope features.
|
||||||
|
|
||||||
@@ -158,7 +162,7 @@ class StaticFeatureExtractor:
|
|||||||
print('0x%x: %s', va, feature)
|
print('0x%x: %s', va, feature)
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
tuple[Feature, Address]: feature and its location
|
Tuple[Feature, Address]: feature and its location
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@@ -207,7 +211,7 @@ class StaticFeatureExtractor:
|
|||||||
raise KeyError(addr)
|
raise KeyError(addr)
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_function_features(self, f: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_function_features(self, f: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract function-scope features.
|
extract function-scope features.
|
||||||
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
||||||
@@ -223,7 +227,7 @@ class StaticFeatureExtractor:
|
|||||||
f [FunctionHandle]: an opaque value previously fetched from `.get_functions()`.
|
f [FunctionHandle]: an opaque value previously fetched from `.get_functions()`.
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
tuple[Feature, Address]: feature and its location
|
Tuple[Feature, Address]: feature and its location
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@@ -236,7 +240,7 @@ class StaticFeatureExtractor:
|
|||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_basic_block_features(self, f: FunctionHandle, bb: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_basic_block_features(self, f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract basic block-scope features.
|
extract basic block-scope features.
|
||||||
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
||||||
@@ -254,7 +258,7 @@ class StaticFeatureExtractor:
|
|||||||
bb [BBHandle]: an opaque value previously fetched from `.get_basic_blocks()`.
|
bb [BBHandle]: an opaque value previously fetched from `.get_basic_blocks()`.
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
tuple[Feature, Address]: feature and its location
|
Tuple[Feature, Address]: feature and its location
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@@ -269,7 +273,7 @@ class StaticFeatureExtractor:
|
|||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_insn_features(
|
def extract_insn_features(
|
||||||
self, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
|
self, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract instruction-scope features.
|
extract instruction-scope features.
|
||||||
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
||||||
@@ -289,12 +293,12 @@ class StaticFeatureExtractor:
|
|||||||
insn [InsnHandle]: an opaque value previously fetched from `.get_instructions()`.
|
insn [InsnHandle]: an opaque value previously fetched from `.get_instructions()`.
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
tuple[Feature, Address]: feature and its location
|
Tuple[Feature, Address]: feature and its location
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
|
||||||
def FunctionFilter(extractor: StaticFeatureExtractor, functions: set) -> StaticFeatureExtractor:
|
def FunctionFilter(extractor: StaticFeatureExtractor, functions: Set) -> StaticFeatureExtractor:
|
||||||
original_get_functions = extractor.get_functions
|
original_get_functions = extractor.get_functions
|
||||||
|
|
||||||
def filtered_get_functions(self):
|
def filtered_get_functions(self):
|
||||||
@@ -383,7 +387,7 @@ class DynamicFeatureExtractor:
|
|||||||
return self._sample_hashes
|
return self._sample_hashes
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
|
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract features found at every scope ("global").
|
extract features found at every scope ("global").
|
||||||
|
|
||||||
@@ -394,12 +398,12 @@ class DynamicFeatureExtractor:
|
|||||||
print(addr, feature)
|
print(addr, feature)
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
tuple[Feature, Address]: feature and its location
|
Tuple[Feature, Address]: feature and its location
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract file-scope features.
|
extract file-scope features.
|
||||||
|
|
||||||
@@ -410,7 +414,7 @@ class DynamicFeatureExtractor:
|
|||||||
print(addr, feature)
|
print(addr, feature)
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
tuple[Feature, Address]: feature and its location
|
Tuple[Feature, Address]: feature and its location
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@@ -422,7 +426,7 @@ class DynamicFeatureExtractor:
|
|||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
Yields all the features of a process. These include:
|
Yields all the features of a process. These include:
|
||||||
- file features of the process' image
|
- file features of the process' image
|
||||||
@@ -445,7 +449,7 @@ class DynamicFeatureExtractor:
|
|||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
Yields all the features of a thread. These include:
|
Yields all the features of a thread. These include:
|
||||||
- sequenced api traces
|
- sequenced api traces
|
||||||
@@ -462,7 +466,7 @@ class DynamicFeatureExtractor:
|
|||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_call_features(
|
def extract_call_features(
|
||||||
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
Yields all features of a call. These include:
|
Yields all features of a call. These include:
|
||||||
- api name
|
- api name
|
||||||
@@ -481,7 +485,7 @@ class DynamicFeatureExtractor:
|
|||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
|
||||||
def ProcessFilter(extractor: DynamicFeatureExtractor, processes: set) -> DynamicFeatureExtractor:
|
def ProcessFilter(extractor: DynamicFeatureExtractor, processes: Set) -> DynamicFeatureExtractor:
|
||||||
original_get_processes = extractor.get_processes
|
original_get_processes = extractor.get_processes
|
||||||
|
|
||||||
def filtered_get_processes(self):
|
def filtered_get_processes(self):
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ import io
|
|||||||
import hashlib
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
import contextlib
|
import contextlib
|
||||||
from typing import Iterator
|
from typing import Set, Dict, List, Tuple, Iterator
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
@@ -51,13 +51,13 @@ def compute_common_prefix_length(m: str, n: str) -> int:
|
|||||||
return len(m)
|
return len(m)
|
||||||
|
|
||||||
|
|
||||||
def get_sample_from_binexport2(input_file: Path, be2: BinExport2, search_paths: list[Path]) -> Path:
|
def get_sample_from_binexport2(input_file: Path, be2: BinExport2, search_paths: List[Path]) -> Path:
|
||||||
"""attempt to find the sample file, given a BinExport2 file.
|
"""attempt to find the sample file, given a BinExport2 file.
|
||||||
|
|
||||||
searches in the same directory as the BinExport2 file, and then in search_paths.
|
searches in the same directory as the BinExport2 file, and then in search_paths.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def filename_similarity_key(p: Path) -> tuple[int, str]:
|
def filename_similarity_key(p: Path) -> Tuple[int, str]:
|
||||||
# note closure over input_file.
|
# note closure over input_file.
|
||||||
# sort first by length of common prefix, then by name (for stability)
|
# sort first by length of common prefix, then by name (for stability)
|
||||||
return (compute_common_prefix_length(p.name, input_file.name), p.name)
|
return (compute_common_prefix_length(p.name, input_file.name), p.name)
|
||||||
@@ -65,7 +65,7 @@ def get_sample_from_binexport2(input_file: Path, be2: BinExport2, search_paths:
|
|||||||
wanted_sha256: str = be2.meta_information.executable_id.lower()
|
wanted_sha256: str = be2.meta_information.executable_id.lower()
|
||||||
|
|
||||||
input_directory: Path = input_file.parent
|
input_directory: Path = input_file.parent
|
||||||
siblings: list[Path] = [p for p in input_directory.iterdir() if p.is_file()]
|
siblings: List[Path] = [p for p in input_directory.iterdir() if p.is_file()]
|
||||||
siblings.sort(key=filename_similarity_key, reverse=True)
|
siblings.sort(key=filename_similarity_key, reverse=True)
|
||||||
for sibling in siblings:
|
for sibling in siblings:
|
||||||
# e.g. with open IDA files in the same directory on Windows
|
# e.g. with open IDA files in the same directory on Windows
|
||||||
@@ -74,7 +74,7 @@ def get_sample_from_binexport2(input_file: Path, be2: BinExport2, search_paths:
|
|||||||
return sibling
|
return sibling
|
||||||
|
|
||||||
for search_path in search_paths:
|
for search_path in search_paths:
|
||||||
candidates: list[Path] = [p for p in search_path.iterdir() if p.is_file()]
|
candidates: List[Path] = [p for p in search_path.iterdir() if p.is_file()]
|
||||||
candidates.sort(key=filename_similarity_key, reverse=True)
|
candidates.sort(key=filename_similarity_key, reverse=True)
|
||||||
for candidate in candidates:
|
for candidate in candidates:
|
||||||
with contextlib.suppress(PermissionError):
|
with contextlib.suppress(PermissionError):
|
||||||
@@ -88,27 +88,27 @@ class BinExport2Index:
|
|||||||
def __init__(self, be2: BinExport2):
|
def __init__(self, be2: BinExport2):
|
||||||
self.be2: BinExport2 = be2
|
self.be2: BinExport2 = be2
|
||||||
|
|
||||||
self.callers_by_vertex_index: dict[int, list[int]] = defaultdict(list)
|
self.callers_by_vertex_index: Dict[int, List[int]] = defaultdict(list)
|
||||||
self.callees_by_vertex_index: dict[int, list[int]] = defaultdict(list)
|
self.callees_by_vertex_index: Dict[int, List[int]] = defaultdict(list)
|
||||||
|
|
||||||
# note: flow graph != call graph (vertex)
|
# note: flow graph != call graph (vertex)
|
||||||
self.flow_graph_index_by_address: dict[int, int] = {}
|
self.flow_graph_index_by_address: Dict[int, int] = {}
|
||||||
self.flow_graph_address_by_index: dict[int, int] = {}
|
self.flow_graph_address_by_index: Dict[int, int] = {}
|
||||||
|
|
||||||
# edges that come from the given basic block
|
# edges that come from the given basic block
|
||||||
self.source_edges_by_basic_block_index: dict[int, list[BinExport2.FlowGraph.Edge]] = defaultdict(list)
|
self.source_edges_by_basic_block_index: Dict[int, List[BinExport2.FlowGraph.Edge]] = defaultdict(list)
|
||||||
# edges that end up at the given basic block
|
# edges that end up at the given basic block
|
||||||
self.target_edges_by_basic_block_index: dict[int, list[BinExport2.FlowGraph.Edge]] = defaultdict(list)
|
self.target_edges_by_basic_block_index: Dict[int, List[BinExport2.FlowGraph.Edge]] = defaultdict(list)
|
||||||
|
|
||||||
self.vertex_index_by_address: dict[int, int] = {}
|
self.vertex_index_by_address: Dict[int, int] = {}
|
||||||
|
|
||||||
self.data_reference_index_by_source_instruction_index: dict[int, list[int]] = defaultdict(list)
|
self.data_reference_index_by_source_instruction_index: Dict[int, List[int]] = defaultdict(list)
|
||||||
self.data_reference_index_by_target_address: dict[int, list[int]] = defaultdict(list)
|
self.data_reference_index_by_target_address: Dict[int, List[int]] = defaultdict(list)
|
||||||
self.string_reference_index_by_source_instruction_index: dict[int, list[int]] = defaultdict(list)
|
self.string_reference_index_by_source_instruction_index: Dict[int, List[int]] = defaultdict(list)
|
||||||
|
|
||||||
self.insn_address_by_index: dict[int, int] = {}
|
self.insn_address_by_index: Dict[int, int] = {}
|
||||||
self.insn_index_by_address: dict[int, int] = {}
|
self.insn_index_by_address: Dict[int, int] = {}
|
||||||
self.insn_by_address: dict[int, BinExport2.Instruction] = {}
|
self.insn_by_address: Dict[int, BinExport2.Instruction] = {}
|
||||||
|
|
||||||
# must index instructions first
|
# must index instructions first
|
||||||
self._index_insn_addresses()
|
self._index_insn_addresses()
|
||||||
@@ -208,7 +208,7 @@ class BinExport2Index:
|
|||||||
|
|
||||||
def basic_block_instructions(
|
def basic_block_instructions(
|
||||||
self, basic_block: BinExport2.BasicBlock
|
self, basic_block: BinExport2.BasicBlock
|
||||||
) -> Iterator[tuple[int, BinExport2.Instruction, int]]:
|
) -> Iterator[Tuple[int, BinExport2.Instruction, int]]:
|
||||||
"""
|
"""
|
||||||
For a given basic block, enumerate the instruction indices,
|
For a given basic block, enumerate the instruction indices,
|
||||||
the instruction instances, and their addresses.
|
the instruction instances, and their addresses.
|
||||||
@@ -253,7 +253,7 @@ class BinExport2Analysis:
|
|||||||
self.idx: BinExport2Index = idx
|
self.idx: BinExport2Index = idx
|
||||||
self.buf: bytes = buf
|
self.buf: bytes = buf
|
||||||
self.base_address: int = 0
|
self.base_address: int = 0
|
||||||
self.thunks: dict[int, int] = {}
|
self.thunks: Dict[int, int] = {}
|
||||||
|
|
||||||
self._find_base_address()
|
self._find_base_address()
|
||||||
self._compute_thunks()
|
self._compute_thunks()
|
||||||
@@ -279,14 +279,12 @@ class BinExport2Analysis:
|
|||||||
|
|
||||||
curr_idx: int = idx
|
curr_idx: int = idx
|
||||||
for _ in range(capa.features.common.THUNK_CHAIN_DEPTH_DELTA):
|
for _ in range(capa.features.common.THUNK_CHAIN_DEPTH_DELTA):
|
||||||
thunk_callees: list[int] = self.idx.callees_by_vertex_index[curr_idx]
|
thunk_callees: List[int] = self.idx.callees_by_vertex_index[curr_idx]
|
||||||
# If this doesn't hold, then it doesn't seem like this is a thunk,
|
# if this doesn't hold, then it doesn't seem like this is a thunk,
|
||||||
# because either, len is:
|
# because either, len is:
|
||||||
# 0 and the thunk doesn't point to anything or is indirect, like `call eax`, or
|
# 0 and the thunk doesn't point to anything, or
|
||||||
# >1 and the thunk may end up at many functions.
|
# >1 and the thunk may end up at many functions.
|
||||||
# In any case, this doesn't appear to be the sort of thunk we're looking for.
|
assert len(thunk_callees) == 1, f"thunk @ {hex(addr)} failed"
|
||||||
if len(thunk_callees) != 1:
|
|
||||||
break
|
|
||||||
|
|
||||||
thunked_idx: int = thunk_callees[0]
|
thunked_idx: int = thunk_callees[0]
|
||||||
thunked_vertex: BinExport2.CallGraph.Vertex = self.be2.call_graph.vertex[thunked_idx]
|
thunked_vertex: BinExport2.CallGraph.Vertex = self.be2.call_graph.vertex[thunked_idx]
|
||||||
@@ -326,7 +324,7 @@ class AddressNotMappedError(ReadMemoryError): ...
|
|||||||
@dataclass
|
@dataclass
|
||||||
class AddressSpace:
|
class AddressSpace:
|
||||||
base_address: int
|
base_address: int
|
||||||
memory_regions: tuple[MemoryRegion, ...]
|
memory_regions: Tuple[MemoryRegion, ...]
|
||||||
|
|
||||||
def read_memory(self, address: int, length: int) -> bytes:
|
def read_memory(self, address: int, length: int) -> bytes:
|
||||||
rva: int = address - self.base_address
|
rva: int = address - self.base_address
|
||||||
@@ -339,7 +337,7 @@ class AddressSpace:
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_pe(cls, pe: PE, base_address: int):
|
def from_pe(cls, pe: PE, base_address: int):
|
||||||
regions: list[MemoryRegion] = []
|
regions: List[MemoryRegion] = []
|
||||||
for section in pe.sections:
|
for section in pe.sections:
|
||||||
address: int = section.VirtualAddress
|
address: int = section.VirtualAddress
|
||||||
size: int = section.Misc_VirtualSize
|
size: int = section.Misc_VirtualSize
|
||||||
@@ -357,7 +355,7 @@ class AddressSpace:
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_elf(cls, elf: ELFFile, base_address: int):
|
def from_elf(cls, elf: ELFFile, base_address: int):
|
||||||
regions: list[MemoryRegion] = []
|
regions: List[MemoryRegion] = []
|
||||||
|
|
||||||
# ELF segments are for runtime data,
|
# ELF segments are for runtime data,
|
||||||
# ELF sections are for link-time data.
|
# ELF sections are for link-time data.
|
||||||
@@ -403,9 +401,9 @@ class AnalysisContext:
|
|||||||
class FunctionContext:
|
class FunctionContext:
|
||||||
ctx: AnalysisContext
|
ctx: AnalysisContext
|
||||||
flow_graph_index: int
|
flow_graph_index: int
|
||||||
format: set[str]
|
format: Set[str]
|
||||||
os: set[str]
|
os: Set[str]
|
||||||
arch: set[str]
|
arch: Set[str]
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import Iterator, Optional
|
from typing import List, Tuple, Iterator, Optional
|
||||||
|
|
||||||
import capa.features.extractors.binexport2.helpers
|
import capa.features.extractors.binexport2.helpers
|
||||||
from capa.features.insn import MAX_STRUCTURE_SIZE, Number, Offset, OperandNumber, OperandOffset
|
from capa.features.insn import MAX_STRUCTURE_SIZE, Number, Offset, OperandNumber, OperandOffset
|
||||||
@@ -30,7 +30,7 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
def extract_insn_number_features(
|
def extract_insn_number_features(
|
||||||
fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
|
|
||||||
@@ -91,7 +91,7 @@ OFFSET_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
|||||||
|
|
||||||
def extract_insn_offset_features(
|
def extract_insn_offset_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
|
|
||||||
@@ -120,7 +120,7 @@ NZXOR_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
|||||||
|
|
||||||
def extract_insn_nzxor_characteristic_features(
|
def extract_insn_nzxor_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
be2: BinExport2 = fhi.ctx.be2
|
be2: BinExport2 = fhi.ctx.be2
|
||||||
@@ -131,7 +131,7 @@ def extract_insn_nzxor_characteristic_features(
|
|||||||
instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index]
|
instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index]
|
||||||
# guaranteed to be simple int/reg operands
|
# guaranteed to be simple int/reg operands
|
||||||
# so we don't have to realize the tree/list.
|
# so we don't have to realize the tree/list.
|
||||||
operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index]
|
operands: List[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index]
|
||||||
|
|
||||||
if operands[1] != operands[2]:
|
if operands[1] != operands[2]:
|
||||||
yield Characteristic("nzxor"), ih.address
|
yield Characteristic("nzxor"), ih.address
|
||||||
@@ -146,7 +146,7 @@ INDIRECT_CALL_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
|||||||
|
|
||||||
def extract_function_indirect_call_characteristic_features(
|
def extract_function_indirect_call_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
be2: BinExport2 = fhi.ctx.be2
|
be2: BinExport2 = fhi.ctx.be2
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Optional
|
from typing import List, Optional
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
from capa.features.extractors.binexport2.helpers import get_operand_expressions
|
from capa.features.extractors.binexport2.helpers import get_operand_expressions
|
||||||
@@ -32,7 +32,7 @@ def get_operand_phrase_info(be2: BinExport2, operand: BinExport2.Operand) -> Opt
|
|||||||
# Base: Any general purpose register
|
# Base: Any general purpose register
|
||||||
# Displacement: An integral offset
|
# Displacement: An integral offset
|
||||||
|
|
||||||
expressions: list[BinExport2.Expression] = get_operand_expressions(be2, operand)
|
expressions: List[BinExport2.Expression] = get_operand_expressions(be2, operand)
|
||||||
|
|
||||||
# skip expression up to and including BinExport2.Expression.DEREFERENCE, assume caller
|
# skip expression up to and including BinExport2.Expression.DEREFERENCE, assume caller
|
||||||
# has checked for BinExport2.Expression.DEREFERENCE
|
# has checked for BinExport2.Expression.DEREFERENCE
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import Iterator
|
from typing import List, Tuple, Iterator
|
||||||
|
|
||||||
import capa.features.extractors.strings
|
import capa.features.extractors.strings
|
||||||
import capa.features.extractors.binexport2.helpers
|
import capa.features.extractors.binexport2.helpers
|
||||||
@@ -63,7 +63,7 @@ NUMBER_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
|||||||
|
|
||||||
def extract_insn_number_features(
|
def extract_insn_number_features(
|
||||||
fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
|
|
||||||
@@ -123,7 +123,7 @@ OFFSET_ZERO_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
|||||||
|
|
||||||
def extract_insn_offset_features(
|
def extract_insn_offset_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
|
|
||||||
@@ -161,7 +161,7 @@ def is_security_cookie(
|
|||||||
|
|
||||||
# security cookie check should use SP or BP
|
# security cookie check should use SP or BP
|
||||||
op1: BinExport2.Operand = be2.operand[instruction.operand_index[1]]
|
op1: BinExport2.Operand = be2.operand[instruction.operand_index[1]]
|
||||||
op1_exprs: list[BinExport2.Expression] = [be2.expression[expr_i] for expr_i in op1.expression_index]
|
op1_exprs: List[BinExport2.Expression] = [be2.expression[expr_i] for expr_i in op1.expression_index]
|
||||||
if all(expr.symbol.lower() not in ("bp", "esp", "ebp", "rbp", "rsp") for expr in op1_exprs):
|
if all(expr.symbol.lower() not in ("bp", "esp", "ebp", "rbp", "rsp") for expr in op1_exprs):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -192,7 +192,7 @@ NZXOR_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
|||||||
|
|
||||||
def extract_insn_nzxor_characteristic_features(
|
def extract_insn_nzxor_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse non-zeroing XOR instruction from the given instruction.
|
parse non-zeroing XOR instruction from the given instruction.
|
||||||
ignore expected non-zeroing XORs, e.g. security cookies.
|
ignore expected non-zeroing XORs, e.g. security cookies.
|
||||||
@@ -209,7 +209,7 @@ def extract_insn_nzxor_characteristic_features(
|
|||||||
instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index]
|
instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index]
|
||||||
# guaranteed to be simple int/reg operands
|
# guaranteed to be simple int/reg operands
|
||||||
# so we don't have to realize the tree/list.
|
# so we don't have to realize the tree/list.
|
||||||
operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index]
|
operands: List[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index]
|
||||||
|
|
||||||
if operands[0] == operands[1]:
|
if operands[0] == operands[1]:
|
||||||
return
|
return
|
||||||
@@ -236,7 +236,7 @@ INDIRECT_CALL_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
|||||||
|
|
||||||
def extract_function_indirect_call_characteristic_features(
|
def extract_function_indirect_call_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
be2: BinExport2 = fhi.ctx.be2
|
be2: BinExport2 = fhi.ctx.be2
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
from typing import Iterator
|
from typing import List, Tuple, Iterator
|
||||||
|
|
||||||
from capa.features.common import Feature, Characteristic
|
from capa.features.common import Feature, Characteristic
|
||||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||||
@@ -16,20 +16,20 @@ from capa.features.extractors.base_extractor import BBHandle, FunctionHandle
|
|||||||
from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
|
from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
|
||||||
|
|
||||||
|
|
||||||
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
bbi: BasicBlockContext = bbh.inner
|
bbi: BasicBlockContext = bbh.inner
|
||||||
|
|
||||||
idx = fhi.ctx.idx
|
idx = fhi.ctx.idx
|
||||||
|
|
||||||
basic_block_index: int = bbi.basic_block_index
|
basic_block_index: int = bbi.basic_block_index
|
||||||
target_edges: list[BinExport2.FlowGraph.Edge] = idx.target_edges_by_basic_block_index[basic_block_index]
|
target_edges: List[BinExport2.FlowGraph.Edge] = idx.target_edges_by_basic_block_index[basic_block_index]
|
||||||
if basic_block_index in (e.source_basic_block_index for e in target_edges):
|
if basic_block_index in (e.source_basic_block_index for e in target_edges):
|
||||||
basic_block_address: int = idx.get_basic_block_address(basic_block_index)
|
basic_block_address: int = idx.get_basic_block_address(basic_block_index)
|
||||||
yield Characteristic("tight loop"), AbsoluteVirtualAddress(basic_block_address)
|
yield Characteristic("tight loop"), AbsoluteVirtualAddress(basic_block_address)
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract basic block features"""
|
"""extract basic block features"""
|
||||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||||
for feature, addr in bb_handler(fh, bbh):
|
for feature, addr in bb_handler(fh, bbh):
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import Iterator
|
from typing import Set, List, Tuple, Iterator
|
||||||
|
|
||||||
import capa.features.extractors.elf
|
import capa.features.extractors.elf
|
||||||
import capa.features.extractors.common
|
import capa.features.extractors.common
|
||||||
@@ -48,14 +48,14 @@ class BinExport2FeatureExtractor(StaticFeatureExtractor):
|
|||||||
address_space: AddressSpace = AddressSpace.from_buf(buf, self.analysis.base_address)
|
address_space: AddressSpace = AddressSpace.from_buf(buf, self.analysis.base_address)
|
||||||
self.ctx: AnalysisContext = AnalysisContext(self.buf, self.be2, self.idx, self.analysis, address_space)
|
self.ctx: AnalysisContext = AnalysisContext(self.buf, self.be2, self.idx, self.analysis, address_space)
|
||||||
|
|
||||||
self.global_features: list[tuple[Feature, Address]] = []
|
self.global_features: List[Tuple[Feature, Address]] = []
|
||||||
self.global_features.extend(list(capa.features.extractors.common.extract_format(self.buf)))
|
self.global_features.extend(list(capa.features.extractors.common.extract_format(self.buf)))
|
||||||
self.global_features.extend(list(capa.features.extractors.common.extract_os(self.buf)))
|
self.global_features.extend(list(capa.features.extractors.common.extract_os(self.buf)))
|
||||||
self.global_features.extend(list(capa.features.extractors.common.extract_arch(self.buf)))
|
self.global_features.extend(list(capa.features.extractors.common.extract_arch(self.buf)))
|
||||||
|
|
||||||
self.format: set[str] = set()
|
self.format: Set[str] = set()
|
||||||
self.os: set[str] = set()
|
self.os: Set[str] = set()
|
||||||
self.arch: set[str] = set()
|
self.arch: Set[str] = set()
|
||||||
|
|
||||||
for feature, _ in self.global_features:
|
for feature, _ in self.global_features:
|
||||||
assert isinstance(feature.value, str)
|
assert isinstance(feature.value, str)
|
||||||
@@ -72,10 +72,10 @@ class BinExport2FeatureExtractor(StaticFeatureExtractor):
|
|||||||
def get_base_address(self) -> AbsoluteVirtualAddress:
|
def get_base_address(self) -> AbsoluteVirtualAddress:
|
||||||
return AbsoluteVirtualAddress(self.analysis.base_address)
|
return AbsoluteVirtualAddress(self.analysis.base_address)
|
||||||
|
|
||||||
def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
|
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from self.global_features
|
yield from self.global_features
|
||||||
|
|
||||||
def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.binexport2.file.extract_features(self.be2, self.buf)
|
yield from capa.features.extractors.binexport2.file.extract_features(self.be2, self.buf)
|
||||||
|
|
||||||
def get_functions(self) -> Iterator[FunctionHandle]:
|
def get_functions(self) -> Iterator[FunctionHandle]:
|
||||||
@@ -97,7 +97,7 @@ class BinExport2FeatureExtractor(StaticFeatureExtractor):
|
|||||||
inner=FunctionContext(self.ctx, flow_graph_index, self.format, self.os, self.arch),
|
inner=FunctionContext(self.ctx, flow_graph_index, self.format, self.os, self.arch),
|
||||||
)
|
)
|
||||||
|
|
||||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.binexport2.function.extract_features(fh)
|
yield from capa.features.extractors.binexport2.function.extract_features(fh)
|
||||||
|
|
||||||
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
||||||
@@ -112,7 +112,7 @@ class BinExport2FeatureExtractor(StaticFeatureExtractor):
|
|||||||
inner=BasicBlockContext(basic_block_index),
|
inner=BasicBlockContext(basic_block_index),
|
||||||
)
|
)
|
||||||
|
|
||||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.binexport2.basicblock.extract_features(fh, bbh)
|
yield from capa.features.extractors.binexport2.basicblock.extract_features(fh, bbh)
|
||||||
|
|
||||||
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
||||||
@@ -126,5 +126,5 @@ class BinExport2FeatureExtractor(StaticFeatureExtractor):
|
|||||||
|
|
||||||
def extract_insn_features(
|
def extract_insn_features(
|
||||||
self, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
self, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.binexport2.insn.extract_features(fh, bbh, ih)
|
yield from capa.features.extractors.binexport2.insn.extract_features(fh, bbh, ih)
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import io
|
import io
|
||||||
import logging
|
import logging
|
||||||
from typing import Iterator
|
from typing import Tuple, Iterator
|
||||||
|
|
||||||
import pefile
|
import pefile
|
||||||
from elftools.elf.elffile import ELFFile
|
from elftools.elf.elffile import ELFFile
|
||||||
@@ -23,7 +23,7 @@ from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_export_names(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_export_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
||||||
if buf.startswith(capa.features.extractors.common.MATCH_PE):
|
if buf.startswith(capa.features.extractors.common.MATCH_PE):
|
||||||
pe: pefile.PE = pefile.PE(data=buf)
|
pe: pefile.PE = pefile.PE(data=buf)
|
||||||
yield from capa.features.extractors.pefile.extract_file_export_names(pe)
|
yield from capa.features.extractors.pefile.extract_file_export_names(pe)
|
||||||
@@ -34,7 +34,7 @@ def extract_file_export_names(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Fe
|
|||||||
logger.warning("unsupported format")
|
logger.warning("unsupported format")
|
||||||
|
|
||||||
|
|
||||||
def extract_file_import_names(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_import_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
||||||
if buf.startswith(capa.features.extractors.common.MATCH_PE):
|
if buf.startswith(capa.features.extractors.common.MATCH_PE):
|
||||||
pe: pefile.PE = pefile.PE(data=buf)
|
pe: pefile.PE = pefile.PE(data=buf)
|
||||||
yield from capa.features.extractors.pefile.extract_file_import_names(pe)
|
yield from capa.features.extractors.pefile.extract_file_import_names(pe)
|
||||||
@@ -45,7 +45,7 @@ def extract_file_import_names(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Fe
|
|||||||
logger.warning("unsupported format")
|
logger.warning("unsupported format")
|
||||||
|
|
||||||
|
|
||||||
def extract_file_section_names(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_section_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
||||||
if buf.startswith(capa.features.extractors.common.MATCH_PE):
|
if buf.startswith(capa.features.extractors.common.MATCH_PE):
|
||||||
pe: pefile.PE = pefile.PE(data=buf)
|
pe: pefile.PE = pefile.PE(data=buf)
|
||||||
yield from capa.features.extractors.pefile.extract_file_section_names(pe)
|
yield from capa.features.extractors.pefile.extract_file_section_names(pe)
|
||||||
@@ -56,15 +56,15 @@ def extract_file_section_names(_be2: BinExport2, buf: bytes) -> Iterator[tuple[F
|
|||||||
logger.warning("unsupported format")
|
logger.warning("unsupported format")
|
||||||
|
|
||||||
|
|
||||||
def extract_file_strings(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_strings(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.common.extract_file_strings(buf)
|
yield from capa.features.extractors.common.extract_file_strings(buf)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_format(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_format(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.common.extract_format(buf)
|
yield from capa.features.extractors.common.extract_format(buf)
|
||||||
|
|
||||||
|
|
||||||
def extract_features(be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract file features"""
|
"""extract file features"""
|
||||||
for file_handler in FILE_HANDLERS:
|
for file_handler in FILE_HANDLERS:
|
||||||
for feature, addr in file_handler(be2, buf):
|
for feature, addr in file_handler(be2, buf):
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Iterator
|
from typing import List, Tuple, Iterator
|
||||||
|
|
||||||
from capa.features.file import FunctionName
|
from capa.features.file import FunctionName
|
||||||
from capa.features.common import Feature, Characteristic
|
from capa.features.common import Feature, Characteristic
|
||||||
@@ -16,7 +16,7 @@ from capa.features.extractors.base_extractor import FunctionHandle
|
|||||||
from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
|
from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
|
||||||
|
|
||||||
|
|
||||||
def extract_function_calls_to(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_function_calls_to(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
|
|
||||||
be2: BinExport2 = fhi.ctx.be2
|
be2: BinExport2 = fhi.ctx.be2
|
||||||
@@ -32,7 +32,7 @@ def extract_function_calls_to(fh: FunctionHandle) -> Iterator[tuple[Feature, Add
|
|||||||
yield Characteristic("calls to"), AbsoluteVirtualAddress(caller_address)
|
yield Characteristic("calls to"), AbsoluteVirtualAddress(caller_address)
|
||||||
|
|
||||||
|
|
||||||
def extract_function_loop(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
|
|
||||||
be2: BinExport2 = fhi.ctx.be2
|
be2: BinExport2 = fhi.ctx.be2
|
||||||
@@ -40,7 +40,7 @@ def extract_function_loop(fh: FunctionHandle) -> Iterator[tuple[Feature, Address
|
|||||||
flow_graph_index: int = fhi.flow_graph_index
|
flow_graph_index: int = fhi.flow_graph_index
|
||||||
flow_graph: BinExport2.FlowGraph = be2.flow_graph[flow_graph_index]
|
flow_graph: BinExport2.FlowGraph = be2.flow_graph[flow_graph_index]
|
||||||
|
|
||||||
edges: list[tuple[int, int]] = []
|
edges: List[Tuple[int, int]] = []
|
||||||
for edge in flow_graph.edge:
|
for edge in flow_graph.edge:
|
||||||
edges.append((edge.source_basic_block_index, edge.target_basic_block_index))
|
edges.append((edge.source_basic_block_index, edge.target_basic_block_index))
|
||||||
|
|
||||||
@@ -48,7 +48,7 @@ def extract_function_loop(fh: FunctionHandle) -> Iterator[tuple[Feature, Address
|
|||||||
yield Characteristic("loop"), fh.address
|
yield Characteristic("loop"), fh.address
|
||||||
|
|
||||||
|
|
||||||
def extract_function_name(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_function_name(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
|
|
||||||
be2: BinExport2 = fhi.ctx.be2
|
be2: BinExport2 = fhi.ctx.be2
|
||||||
@@ -63,7 +63,7 @@ def extract_function_name(fh: FunctionHandle) -> Iterator[tuple[Feature, Address
|
|||||||
yield FunctionName(vertex.mangled_name), fh.address
|
yield FunctionName(vertex.mangled_name), fh.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for func_handler in FUNCTION_HANDLERS:
|
for func_handler in FUNCTION_HANDLERS:
|
||||||
for feature, addr in func_handler(fh):
|
for feature, addr in func_handler(fh):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import re
|
import re
|
||||||
from typing import Union, Iterator, Optional
|
from typing import Set, Dict, List, Tuple, Union, Iterator, Optional
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
@@ -22,7 +22,7 @@ HAS_ARCH_INTEL = {ARCH_I386, ARCH_AMD64}
|
|||||||
HAS_ARCH_ARM = {ARCH_AARCH64}
|
HAS_ARCH_ARM = {ARCH_AARCH64}
|
||||||
|
|
||||||
|
|
||||||
def mask_immediate(arch: set[str], immediate: int) -> int:
|
def mask_immediate(arch: Set[str], immediate: int) -> int:
|
||||||
if arch & HAS_ARCH64:
|
if arch & HAS_ARCH64:
|
||||||
immediate &= 0xFFFFFFFFFFFFFFFF
|
immediate &= 0xFFFFFFFFFFFFFFFF
|
||||||
elif arch & HAS_ARCH32:
|
elif arch & HAS_ARCH32:
|
||||||
@@ -30,7 +30,7 @@ def mask_immediate(arch: set[str], immediate: int) -> int:
|
|||||||
return immediate
|
return immediate
|
||||||
|
|
||||||
|
|
||||||
def twos_complement(arch: set[str], immediate: int, default: Optional[int] = None) -> int:
|
def twos_complement(arch: Set[str], immediate: int, default: Optional[int] = None) -> int:
|
||||||
if default is not None:
|
if default is not None:
|
||||||
return capa.features.extractors.helpers.twos_complement(immediate, default)
|
return capa.features.extractors.helpers.twos_complement(immediate, default)
|
||||||
elif arch & HAS_ARCH64:
|
elif arch & HAS_ARCH64:
|
||||||
@@ -50,36 +50,17 @@ def is_vertex_type(vertex: BinExport2.CallGraph.Vertex, type_: BinExport2.CallGr
|
|||||||
return vertex.HasField("type") and vertex.type == type_
|
return vertex.HasField("type") and vertex.type == type_
|
||||||
|
|
||||||
|
|
||||||
# internal to `build_expression_tree`
|
|
||||||
# this is unstable: it is subject to change, so don't rely on it!
|
|
||||||
def _prune_expression_tree_references_to_tree_index(
|
|
||||||
expression_tree: list[list[int]],
|
|
||||||
tree_index: int,
|
|
||||||
):
|
|
||||||
# `i` is the index of the tree node that we'll search for `tree_index`
|
|
||||||
# if we remove `tree_index` from it, and it is now empty,
|
|
||||||
# then we'll need to prune references to `i`.
|
|
||||||
for i, tree_node in enumerate(expression_tree):
|
|
||||||
if tree_index in tree_node:
|
|
||||||
tree_node.remove(tree_index)
|
|
||||||
|
|
||||||
if len(tree_node) == 0:
|
|
||||||
# if the parent node is now empty,
|
|
||||||
# remove references to that parent node.
|
|
||||||
_prune_expression_tree_references_to_tree_index(expression_tree, i)
|
|
||||||
|
|
||||||
|
|
||||||
# internal to `build_expression_tree`
|
# internal to `build_expression_tree`
|
||||||
# this is unstable: it is subject to change, so don't rely on it!
|
# this is unstable: it is subject to change, so don't rely on it!
|
||||||
def _prune_expression_tree_empty_shifts(
|
def _prune_expression_tree_empty_shifts(
|
||||||
be2: BinExport2,
|
be2: BinExport2,
|
||||||
operand: BinExport2.Operand,
|
operand: BinExport2.Operand,
|
||||||
expression_tree: list[list[int]],
|
expression_tree: List[List[int]],
|
||||||
tree_index: int,
|
tree_index: int,
|
||||||
):
|
):
|
||||||
expression_index = operand.expression_index[tree_index]
|
expression_index = operand.expression_index[tree_index]
|
||||||
expression = be2.expression[expression_index]
|
expression = be2.expression[expression_index]
|
||||||
children_tree_indexes: list[int] = expression_tree[tree_index]
|
children_tree_indexes: List[int] = expression_tree[tree_index]
|
||||||
|
|
||||||
if expression.type == BinExport2.Expression.OPERATOR:
|
if expression.type == BinExport2.Expression.OPERATOR:
|
||||||
if len(children_tree_indexes) == 0 and expression.symbol in ("lsl", "lsr"):
|
if len(children_tree_indexes) == 0 and expression.symbol in ("lsl", "lsr"):
|
||||||
@@ -89,7 +70,9 @@ def _prune_expression_tree_empty_shifts(
|
|||||||
#
|
#
|
||||||
# Which seems to be as if the shift wasn't there (shift of #0)
|
# Which seems to be as if the shift wasn't there (shift of #0)
|
||||||
# so we want to remove references to this node from any parent nodes.
|
# so we want to remove references to this node from any parent nodes.
|
||||||
_prune_expression_tree_references_to_tree_index(expression_tree, tree_index)
|
for tree_node in expression_tree:
|
||||||
|
if tree_index in tree_node:
|
||||||
|
tree_node.remove(tree_index)
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -99,37 +82,38 @@ def _prune_expression_tree_empty_shifts(
|
|||||||
|
|
||||||
# internal to `build_expression_tree`
|
# internal to `build_expression_tree`
|
||||||
# this is unstable: it is subject to change, so don't rely on it!
|
# this is unstable: it is subject to change, so don't rely on it!
|
||||||
def _fixup_expression_tree_references_to_tree_index(
|
def _prune_expression_tree_empty_commas(
|
||||||
expression_tree: list[list[int]],
|
|
||||||
existing_index: int,
|
|
||||||
new_index: int,
|
|
||||||
):
|
|
||||||
for tree_node in expression_tree:
|
|
||||||
for i, index in enumerate(tree_node):
|
|
||||||
if index == existing_index:
|
|
||||||
tree_node[i] = new_index
|
|
||||||
|
|
||||||
|
|
||||||
# internal to `build_expression_tree`
|
|
||||||
# this is unstable: it is subject to change, so don't rely on it!
|
|
||||||
def _fixup_expression_tree_lonely_commas(
|
|
||||||
be2: BinExport2,
|
be2: BinExport2,
|
||||||
operand: BinExport2.Operand,
|
operand: BinExport2.Operand,
|
||||||
expression_tree: list[list[int]],
|
expression_tree: List[List[int]],
|
||||||
tree_index: int,
|
tree_index: int,
|
||||||
):
|
):
|
||||||
expression_index = operand.expression_index[tree_index]
|
expression_index = operand.expression_index[tree_index]
|
||||||
expression = be2.expression[expression_index]
|
expression = be2.expression[expression_index]
|
||||||
children_tree_indexes: list[int] = expression_tree[tree_index]
|
children_tree_indexes: List[int] = expression_tree[tree_index]
|
||||||
|
|
||||||
if expression.type == BinExport2.Expression.OPERATOR:
|
if expression.type == BinExport2.Expression.OPERATOR:
|
||||||
if len(children_tree_indexes) == 1 and expression.symbol == ",":
|
if len(children_tree_indexes) == 1 and expression.symbol == ",":
|
||||||
existing_index = tree_index
|
# Due to the above pruning of empty LSL or LSR expressions,
|
||||||
new_index = children_tree_indexes[0]
|
# the parents might need to be fixed up.
|
||||||
_fixup_expression_tree_references_to_tree_index(expression_tree, existing_index, new_index)
|
#
|
||||||
|
# Specifically, if the pruned node was part of a comma list with two children,
|
||||||
|
# now there's only a single child, which renders as an extra comma,
|
||||||
|
# so we replace references to the comma node with the immediate child.
|
||||||
|
#
|
||||||
|
# A more correct way of doing this might be to walk up the parents and do fixups,
|
||||||
|
# but I'm not quite sure how to do this yet. Just do two passes right now.
|
||||||
|
child = children_tree_indexes[0]
|
||||||
|
|
||||||
|
for tree_node in expression_tree:
|
||||||
|
tree_node.index
|
||||||
|
if tree_index in tree_node:
|
||||||
|
tree_node[tree_node.index(tree_index)] = child
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
for child_tree_index in children_tree_indexes:
|
for child_tree_index in children_tree_indexes:
|
||||||
_fixup_expression_tree_lonely_commas(be2, operand, expression_tree, child_tree_index)
|
_prune_expression_tree_empty_commas(be2, operand, expression_tree, child_tree_index)
|
||||||
|
|
||||||
|
|
||||||
# internal to `build_expression_tree`
|
# internal to `build_expression_tree`
|
||||||
@@ -137,17 +121,17 @@ def _fixup_expression_tree_lonely_commas(
|
|||||||
def _prune_expression_tree(
|
def _prune_expression_tree(
|
||||||
be2: BinExport2,
|
be2: BinExport2,
|
||||||
operand: BinExport2.Operand,
|
operand: BinExport2.Operand,
|
||||||
expression_tree: list[list[int]],
|
expression_tree: List[List[int]],
|
||||||
):
|
):
|
||||||
_prune_expression_tree_empty_shifts(be2, operand, expression_tree, 0)
|
_prune_expression_tree_empty_shifts(be2, operand, expression_tree, 0)
|
||||||
_fixup_expression_tree_lonely_commas(be2, operand, expression_tree, 0)
|
_prune_expression_tree_empty_commas(be2, operand, expression_tree, 0)
|
||||||
|
|
||||||
|
|
||||||
# this is unstable: it is subject to change, so don't rely on it!
|
# this is unstable: it is subject to change, so don't rely on it!
|
||||||
def _build_expression_tree(
|
def _build_expression_tree(
|
||||||
be2: BinExport2,
|
be2: BinExport2,
|
||||||
operand: BinExport2.Operand,
|
operand: BinExport2.Operand,
|
||||||
) -> list[list[int]]:
|
) -> List[List[int]]:
|
||||||
# The reconstructed expression tree layout, linking parent nodes to their children.
|
# The reconstructed expression tree layout, linking parent nodes to their children.
|
||||||
#
|
#
|
||||||
# There is one list of integers for each expression in the operand.
|
# There is one list of integers for each expression in the operand.
|
||||||
@@ -175,7 +159,7 @@ def _build_expression_tree(
|
|||||||
# exist (see https://github.com/NationalSecurityAgency/ghidra/issues/6817)
|
# exist (see https://github.com/NationalSecurityAgency/ghidra/issues/6817)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
tree: list[list[int]] = []
|
tree: List[List[int]] = []
|
||||||
for i, expression_index in enumerate(operand.expression_index):
|
for i, expression_index in enumerate(operand.expression_index):
|
||||||
children = []
|
children = []
|
||||||
|
|
||||||
@@ -189,6 +173,7 @@ def _build_expression_tree(
|
|||||||
tree.append(children)
|
tree.append(children)
|
||||||
|
|
||||||
_prune_expression_tree(be2, operand, tree)
|
_prune_expression_tree(be2, operand, tree)
|
||||||
|
_prune_expression_tree(be2, operand, tree)
|
||||||
|
|
||||||
return tree
|
return tree
|
||||||
|
|
||||||
@@ -196,34 +181,21 @@ def _build_expression_tree(
|
|||||||
def _fill_operand_expression_list(
|
def _fill_operand_expression_list(
|
||||||
be2: BinExport2,
|
be2: BinExport2,
|
||||||
operand: BinExport2.Operand,
|
operand: BinExport2.Operand,
|
||||||
expression_tree: list[list[int]],
|
expression_tree: List[List[int]],
|
||||||
tree_index: int,
|
tree_index: int,
|
||||||
expression_list: list[BinExport2.Expression],
|
expression_list: List[BinExport2.Expression],
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Walk the given expression tree and collect the expression nodes in-order.
|
Walk the given expression tree and collect the expression nodes in-order.
|
||||||
"""
|
"""
|
||||||
expression_index = operand.expression_index[tree_index]
|
expression_index = operand.expression_index[tree_index]
|
||||||
expression = be2.expression[expression_index]
|
expression = be2.expression[expression_index]
|
||||||
children_tree_indexes: list[int] = expression_tree[tree_index]
|
children_tree_indexes: List[int] = expression_tree[tree_index]
|
||||||
|
|
||||||
if expression.type == BinExport2.Expression.REGISTER:
|
if expression.type == BinExport2.Expression.REGISTER:
|
||||||
assert len(children_tree_indexes) <= 1
|
assert len(children_tree_indexes) == 0
|
||||||
expression_list.append(expression)
|
expression_list.append(expression)
|
||||||
|
|
||||||
if len(children_tree_indexes) == 0:
|
|
||||||
return
|
return
|
||||||
elif len(children_tree_indexes) == 1:
|
|
||||||
# like for aarch64 with vector instructions, indicating vector data size:
|
|
||||||
#
|
|
||||||
# FADD V0.4S, V1.4S, V2.4S
|
|
||||||
#
|
|
||||||
# see: https://github.com/mandiant/capa/issues/2528
|
|
||||||
child_index = children_tree_indexes[0]
|
|
||||||
_fill_operand_expression_list(be2, operand, expression_tree, child_index, expression_list)
|
|
||||||
return
|
|
||||||
else:
|
|
||||||
raise NotImplementedError(len(children_tree_indexes))
|
|
||||||
|
|
||||||
elif expression.type == BinExport2.Expression.SYMBOL:
|
elif expression.type == BinExport2.Expression.SYMBOL:
|
||||||
assert len(children_tree_indexes) <= 1
|
assert len(children_tree_indexes) <= 1
|
||||||
@@ -246,23 +218,9 @@ def _fill_operand_expression_list(
|
|||||||
raise NotImplementedError(len(children_tree_indexes))
|
raise NotImplementedError(len(children_tree_indexes))
|
||||||
|
|
||||||
elif expression.type == BinExport2.Expression.IMMEDIATE_INT:
|
elif expression.type == BinExport2.Expression.IMMEDIATE_INT:
|
||||||
assert len(children_tree_indexes) <= 1
|
assert len(children_tree_indexes) == 0
|
||||||
expression_list.append(expression)
|
expression_list.append(expression)
|
||||||
|
|
||||||
if len(children_tree_indexes) == 0:
|
|
||||||
return
|
return
|
||||||
elif len(children_tree_indexes) == 1:
|
|
||||||
# the ghidra exporter can produce some weird expressions,
|
|
||||||
# particularly for MSRs, like for:
|
|
||||||
#
|
|
||||||
# sreg(3, 0, c.0, c.4, 4)
|
|
||||||
#
|
|
||||||
# see: https://github.com/mandiant/capa/issues/2530
|
|
||||||
child_index = children_tree_indexes[0]
|
|
||||||
_fill_operand_expression_list(be2, operand, expression_tree, child_index, expression_list)
|
|
||||||
return
|
|
||||||
else:
|
|
||||||
raise NotImplementedError(len(children_tree_indexes))
|
|
||||||
|
|
||||||
elif expression.type == BinExport2.Expression.SIZE_PREFIX:
|
elif expression.type == BinExport2.Expression.SIZE_PREFIX:
|
||||||
# like: b4
|
# like: b4
|
||||||
@@ -324,10 +282,10 @@ def _fill_operand_expression_list(
|
|||||||
raise NotImplementedError(expression.type)
|
raise NotImplementedError(expression.type)
|
||||||
|
|
||||||
|
|
||||||
def get_operand_expressions(be2: BinExport2, op: BinExport2.Operand) -> list[BinExport2.Expression]:
|
def get_operand_expressions(be2: BinExport2, op: BinExport2.Operand) -> List[BinExport2.Expression]:
|
||||||
tree = _build_expression_tree(be2, op)
|
tree = _build_expression_tree(be2, op)
|
||||||
|
|
||||||
expressions: list[BinExport2.Expression] = []
|
expressions: List[BinExport2.Expression] = []
|
||||||
_fill_operand_expression_list(be2, op, tree, 0, expressions)
|
_fill_operand_expression_list(be2, op, tree, 0, expressions)
|
||||||
|
|
||||||
return expressions
|
return expressions
|
||||||
@@ -373,11 +331,11 @@ def get_instruction_mnemonic(be2: BinExport2, instruction: BinExport2.Instructio
|
|||||||
return be2.mnemonic[instruction.mnemonic_index].name.lower()
|
return be2.mnemonic[instruction.mnemonic_index].name.lower()
|
||||||
|
|
||||||
|
|
||||||
def get_instruction_operands(be2: BinExport2, instruction: BinExport2.Instruction) -> list[BinExport2.Operand]:
|
def get_instruction_operands(be2: BinExport2, instruction: BinExport2.Instruction) -> List[BinExport2.Operand]:
|
||||||
return [be2.operand[operand_index] for operand_index in instruction.operand_index]
|
return [be2.operand[operand_index] for operand_index in instruction.operand_index]
|
||||||
|
|
||||||
|
|
||||||
def split_with_delimiters(s: str, delimiters: tuple[str, ...]) -> Iterator[str]:
|
def split_with_delimiters(s: str, delimiters: Tuple[str, ...]) -> Iterator[str]:
|
||||||
"""
|
"""
|
||||||
Splits a string by any of the provided delimiter characters,
|
Splits a string by any of the provided delimiter characters,
|
||||||
including the delimiters in the results.
|
including the delimiters in the results.
|
||||||
@@ -397,7 +355,7 @@ def split_with_delimiters(s: str, delimiters: tuple[str, ...]) -> Iterator[str]:
|
|||||||
yield s[start:]
|
yield s[start:]
|
||||||
|
|
||||||
|
|
||||||
BinExport2OperandPattern = Union[str, tuple[str, ...]]
|
BinExport2OperandPattern = Union[str, Tuple[str, ...]]
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -424,8 +382,8 @@ class BinExport2InstructionPattern:
|
|||||||
This matcher uses the BinExport2 data layout under the hood.
|
This matcher uses the BinExport2 data layout under the hood.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
mnemonics: tuple[str, ...]
|
mnemonics: Tuple[str, ...]
|
||||||
operands: tuple[Union[str, BinExport2OperandPattern], ...]
|
operands: Tuple[Union[str, BinExport2OperandPattern], ...]
|
||||||
capture: Optional[str]
|
capture: Optional[str]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -480,7 +438,7 @@ class BinExport2InstructionPattern:
|
|||||||
mnemonic, _, rest = pattern.partition(" ")
|
mnemonic, _, rest = pattern.partition(" ")
|
||||||
mnemonics = mnemonic.split("|")
|
mnemonics = mnemonic.split("|")
|
||||||
|
|
||||||
operands: list[Union[str, tuple[str, ...]]] = []
|
operands: List[Union[str, Tuple[str, ...]]] = []
|
||||||
while rest:
|
while rest:
|
||||||
rest = rest.strip()
|
rest = rest.strip()
|
||||||
if not rest.startswith("["):
|
if not rest.startswith("["):
|
||||||
@@ -551,7 +509,7 @@ class BinExport2InstructionPattern:
|
|||||||
expression: BinExport2.Expression
|
expression: BinExport2.Expression
|
||||||
|
|
||||||
def match(
|
def match(
|
||||||
self, mnemonic: str, operand_expressions: list[list[BinExport2.Expression]]
|
self, mnemonic: str, operand_expressions: List[List[BinExport2.Expression]]
|
||||||
) -> Optional["BinExport2InstructionPattern.MatchResult"]:
|
) -> Optional["BinExport2InstructionPattern.MatchResult"]:
|
||||||
"""
|
"""
|
||||||
Match the given BinExport2 data against this pattern.
|
Match the given BinExport2 data against this pattern.
|
||||||
@@ -644,10 +602,10 @@ class BinExport2InstructionPattern:
|
|||||||
class BinExport2InstructionPatternMatcher:
|
class BinExport2InstructionPatternMatcher:
|
||||||
"""Index and match a collection of instruction patterns."""
|
"""Index and match a collection of instruction patterns."""
|
||||||
|
|
||||||
def __init__(self, queries: list[BinExport2InstructionPattern]):
|
def __init__(self, queries: List[BinExport2InstructionPattern]):
|
||||||
self.queries = queries
|
self.queries = queries
|
||||||
# shard the patterns by (mnemonic, #operands)
|
# shard the patterns by (mnemonic, #operands)
|
||||||
self._index: dict[tuple[str, int], list[BinExport2InstructionPattern]] = defaultdict(list)
|
self._index: Dict[Tuple[str, int], List[BinExport2InstructionPattern]] = defaultdict(list)
|
||||||
|
|
||||||
for query in queries:
|
for query in queries:
|
||||||
for mnemonic in query.mnemonics:
|
for mnemonic in query.mnemonics:
|
||||||
@@ -665,7 +623,7 @@ class BinExport2InstructionPatternMatcher:
|
|||||||
)
|
)
|
||||||
|
|
||||||
def match(
|
def match(
|
||||||
self, mnemonic: str, operand_expressions: list[list[BinExport2.Expression]]
|
self, mnemonic: str, operand_expressions: List[List[BinExport2.Expression]]
|
||||||
) -> Optional[BinExport2InstructionPattern.MatchResult]:
|
) -> Optional[BinExport2InstructionPattern.MatchResult]:
|
||||||
queries = self._index.get((mnemonic.lower(), len(operand_expressions)), [])
|
queries = self._index.get((mnemonic.lower(), len(operand_expressions)), [])
|
||||||
for query in queries:
|
for query in queries:
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import Iterator
|
from typing import List, Tuple, Iterator
|
||||||
|
|
||||||
import capa.features.extractors.helpers
|
import capa.features.extractors.helpers
|
||||||
import capa.features.extractors.strings
|
import capa.features.extractors.strings
|
||||||
@@ -32,7 +32,7 @@ from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_api_features(fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_insn_api_features(fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
|
|
||||||
@@ -68,7 +68,7 @@ def extract_insn_api_features(fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
|
|||||||
|
|
||||||
def extract_insn_number_features(
|
def extract_insn_number_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
|
|
||||||
if fhi.arch & HAS_ARCH_INTEL:
|
if fhi.arch & HAS_ARCH_INTEL:
|
||||||
@@ -77,7 +77,7 @@ def extract_insn_number_features(
|
|||||||
yield from capa.features.extractors.binexport2.arch.arm.insn.extract_insn_number_features(fh, bbh, ih)
|
yield from capa.features.extractors.binexport2.arch.arm.insn.extract_insn_number_features(fh, bbh, ih)
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
|
|
||||||
@@ -92,7 +92,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
|||||||
# disassembler already identified string reference from instruction
|
# disassembler already identified string reference from instruction
|
||||||
return
|
return
|
||||||
|
|
||||||
reference_addresses: list[int] = []
|
reference_addresses: List[int] = []
|
||||||
|
|
||||||
if instruction_index in idx.data_reference_index_by_source_instruction_index:
|
if instruction_index in idx.data_reference_index_by_source_instruction_index:
|
||||||
for data_reference_index in idx.data_reference_index_by_source_instruction_index[instruction_index]:
|
for data_reference_index in idx.data_reference_index_by_source_instruction_index[instruction_index]:
|
||||||
@@ -142,7 +142,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
|||||||
|
|
||||||
def extract_insn_string_features(
|
def extract_insn_string_features(
|
||||||
fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
|
|
||||||
@@ -161,7 +161,7 @@ def extract_insn_string_features(
|
|||||||
|
|
||||||
def extract_insn_offset_features(
|
def extract_insn_offset_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
|
|
||||||
if fhi.arch & HAS_ARCH_INTEL:
|
if fhi.arch & HAS_ARCH_INTEL:
|
||||||
@@ -172,7 +172,7 @@ def extract_insn_offset_features(
|
|||||||
|
|
||||||
def extract_insn_nzxor_characteristic_features(
|
def extract_insn_nzxor_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
|
|
||||||
if fhi.arch & HAS_ARCH_INTEL:
|
if fhi.arch & HAS_ARCH_INTEL:
|
||||||
@@ -187,7 +187,7 @@ def extract_insn_nzxor_characteristic_features(
|
|||||||
|
|
||||||
def extract_insn_mnemonic_features(
|
def extract_insn_mnemonic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
|
|
||||||
@@ -199,7 +199,7 @@ def extract_insn_mnemonic_features(
|
|||||||
yield Mnemonic(mnemonic_name), ih.address
|
yield Mnemonic(mnemonic_name), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract functions calls from features
|
"""extract functions calls from features
|
||||||
|
|
||||||
most relevant at the function scope;
|
most relevant at the function scope;
|
||||||
@@ -221,7 +221,7 @@ def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
|||||||
|
|
||||||
def extract_function_indirect_call_characteristic_features(
|
def extract_function_indirect_call_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
|
|
||||||
if fhi.arch & HAS_ARCH_INTEL:
|
if fhi.arch & HAS_ARCH_INTEL:
|
||||||
@@ -234,7 +234,7 @@ def extract_function_indirect_call_characteristic_features(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract instruction features"""
|
"""extract instruction features"""
|
||||||
for inst_handler in INSTRUCTION_HANDLERS:
|
for inst_handler in INSTRUCTION_HANDLERS:
|
||||||
for feature, ea in inst_handler(f, bbh, insn):
|
for feature, ea in inst_handler(f, bbh, insn):
|
||||||
|
|||||||
@@ -5,25 +5,115 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Iterator
|
|
||||||
|
|
||||||
|
import string
|
||||||
|
from typing import Tuple, Iterator
|
||||||
|
|
||||||
|
from binaryninja import Function
|
||||||
from binaryninja import BasicBlock as BinjaBasicBlock
|
from binaryninja import BasicBlock as BinjaBasicBlock
|
||||||
|
from binaryninja import (
|
||||||
|
BinaryView,
|
||||||
|
SymbolType,
|
||||||
|
RegisterValueType,
|
||||||
|
VariableSourceType,
|
||||||
|
MediumLevelILOperation,
|
||||||
|
MediumLevelILBasicBlock,
|
||||||
|
MediumLevelILInstruction,
|
||||||
|
)
|
||||||
|
|
||||||
from capa.features.common import Feature, Characteristic
|
from capa.features.common import Feature, Characteristic
|
||||||
from capa.features.address import Address
|
from capa.features.address import Address
|
||||||
from capa.features.basicblock import BasicBlock
|
from capa.features.basicblock import BasicBlock
|
||||||
|
from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
|
||||||
from capa.features.extractors.base_extractor import BBHandle, FunctionHandle
|
from capa.features.extractors.base_extractor import BBHandle, FunctionHandle
|
||||||
|
|
||||||
|
|
||||||
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
def get_printable_len_ascii(s: bytes) -> int:
|
||||||
|
"""Return string length if all operand bytes are ascii or utf16-le printable"""
|
||||||
|
count = 0
|
||||||
|
for c in s:
|
||||||
|
if c == 0:
|
||||||
|
return count
|
||||||
|
if c < 127 and chr(c) in string.printable:
|
||||||
|
count += 1
|
||||||
|
return count
|
||||||
|
|
||||||
|
|
||||||
|
def get_printable_len_wide(s: bytes) -> int:
|
||||||
|
"""Return string length if all operand bytes are ascii or utf16-le printable"""
|
||||||
|
if all(c == 0x00 for c in s[1::2]):
|
||||||
|
return get_printable_len_ascii(s[::2])
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def get_stack_string_len(f: Function, il: MediumLevelILInstruction) -> int:
|
||||||
|
bv: BinaryView = f.view
|
||||||
|
|
||||||
|
if il.operation != MediumLevelILOperation.MLIL_CALL:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
target = il.dest
|
||||||
|
if target.operation not in [MediumLevelILOperation.MLIL_CONST, MediumLevelILOperation.MLIL_CONST_PTR]:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
addr = target.value.value
|
||||||
|
sym = bv.get_symbol_at(addr)
|
||||||
|
if not sym or sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.SymbolicFunctionSymbol]:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if sym.name not in ["__builtin_strncpy", "__builtin_strcpy", "__builtin_wcscpy"]:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if len(il.params) < 2:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
dest = il.params[0]
|
||||||
|
if dest.operation in [MediumLevelILOperation.MLIL_ADDRESS_OF, MediumLevelILOperation.MLIL_VAR]:
|
||||||
|
var = dest.src
|
||||||
|
else:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if var.source_type != VariableSourceType.StackVariableSourceType:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
src = il.params[1]
|
||||||
|
if src.value.type != RegisterValueType.ConstantDataAggregateValue:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
s = f.get_constant_data(RegisterValueType.ConstantDataAggregateValue, src.value.value)
|
||||||
|
return max(get_printable_len_ascii(bytes(s)), get_printable_len_wide(bytes(s)))
|
||||||
|
|
||||||
|
|
||||||
|
def bb_contains_stackstring(f: Function, bb: MediumLevelILBasicBlock) -> bool:
|
||||||
|
"""check basic block for stackstring indicators
|
||||||
|
|
||||||
|
true if basic block contains enough moves of constant bytes to the stack
|
||||||
|
"""
|
||||||
|
count = 0
|
||||||
|
for il in bb:
|
||||||
|
count += get_stack_string_len(f, il)
|
||||||
|
if count > MIN_STACKSTRING_LEN:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
|
"""extract stackstring indicators from basic block"""
|
||||||
|
bb: Tuple[BinjaBasicBlock, MediumLevelILBasicBlock] = bbh.inner
|
||||||
|
if bb[1] is not None and bb_contains_stackstring(fh.inner, bb[1]):
|
||||||
|
yield Characteristic("stack string"), bbh.address
|
||||||
|
|
||||||
|
|
||||||
|
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract tight loop indicators from a basic block"""
|
"""extract tight loop indicators from a basic block"""
|
||||||
bb: BinjaBasicBlock = bbh.inner
|
bb: Tuple[BinjaBasicBlock, MediumLevelILBasicBlock] = bbh.inner
|
||||||
for edge in bb.outgoing_edges:
|
for edge in bb[0].outgoing_edges:
|
||||||
if edge.target.start == bb.start:
|
if edge.target.start == bb[0].start:
|
||||||
yield Characteristic("tight loop"), bbh.address
|
yield Characteristic("tight loop"), bbh.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract basic block features"""
|
"""extract basic block features"""
|
||||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||||
for feature, addr in bb_handler(fh, bbh):
|
for feature, addr in bb_handler(fh, bbh):
|
||||||
@@ -31,4 +121,7 @@ def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Featur
|
|||||||
yield BasicBlock(), bbh.address
|
yield BasicBlock(), bbh.address
|
||||||
|
|
||||||
|
|
||||||
BASIC_BLOCK_HANDLERS = (extract_bb_tight_loop,)
|
BASIC_BLOCK_HANDLERS = (
|
||||||
|
extract_bb_tight_loop,
|
||||||
|
extract_bb_stackstring,
|
||||||
|
)
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Iterator
|
from typing import List, Tuple, Iterator
|
||||||
|
|
||||||
import binaryninja as binja
|
import binaryninja as binja
|
||||||
|
|
||||||
@@ -30,7 +30,7 @@ class BinjaFeatureExtractor(StaticFeatureExtractor):
|
|||||||
def __init__(self, bv: binja.BinaryView):
|
def __init__(self, bv: binja.BinaryView):
|
||||||
super().__init__(hashes=SampleHashes.from_bytes(bv.file.raw.read(0, bv.file.raw.length)))
|
super().__init__(hashes=SampleHashes.from_bytes(bv.file.raw.read(0, bv.file.raw.length)))
|
||||||
self.bv = bv
|
self.bv = bv
|
||||||
self.global_features: list[tuple[Feature, Address]] = []
|
self.global_features: List[Tuple[Feature, Address]] = []
|
||||||
self.global_features.extend(capa.features.extractors.binja.file.extract_file_format(self.bv))
|
self.global_features.extend(capa.features.extractors.binja.file.extract_file_format(self.bv))
|
||||||
self.global_features.extend(capa.features.extractors.binja.global_.extract_os(self.bv))
|
self.global_features.extend(capa.features.extractors.binja.global_.extract_os(self.bv))
|
||||||
self.global_features.extend(capa.features.extractors.binja.global_.extract_arch(self.bv))
|
self.global_features.extend(capa.features.extractors.binja.global_.extract_arch(self.bv))
|
||||||
@@ -48,24 +48,31 @@ class BinjaFeatureExtractor(StaticFeatureExtractor):
|
|||||||
for f in self.bv.functions:
|
for f in self.bv.functions:
|
||||||
yield FunctionHandle(address=AbsoluteVirtualAddress(f.start), inner=f)
|
yield FunctionHandle(address=AbsoluteVirtualAddress(f.start), inner=f)
|
||||||
|
|
||||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.binja.function.extract_features(fh)
|
yield from capa.features.extractors.binja.function.extract_features(fh)
|
||||||
|
|
||||||
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
||||||
f: binja.Function = fh.inner
|
f: binja.Function = fh.inner
|
||||||
for bb in f.basic_blocks:
|
# Set up a MLIL basic block dict look up to associate the disassembly basic block with its MLIL basic block
|
||||||
yield BBHandle(address=AbsoluteVirtualAddress(bb.start), inner=bb)
|
mlil_lookup = {}
|
||||||
|
for mlil_bb in f.mlil.basic_blocks:
|
||||||
|
mlil_lookup[mlil_bb.source_block.start] = mlil_bb
|
||||||
|
|
||||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
for bb in f.basic_blocks:
|
||||||
|
mlil_bb = mlil_lookup.get(bb.start)
|
||||||
|
|
||||||
|
yield BBHandle(address=AbsoluteVirtualAddress(bb.start), inner=(bb, mlil_bb))
|
||||||
|
|
||||||
|
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.binja.basicblock.extract_features(fh, bbh)
|
yield from capa.features.extractors.binja.basicblock.extract_features(fh, bbh)
|
||||||
|
|
||||||
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
||||||
import capa.features.extractors.binja.helpers as binja_helpers
|
import capa.features.extractors.binja.helpers as binja_helpers
|
||||||
|
|
||||||
bb: binja.BasicBlock = bbh.inner
|
bb: Tuple[binja.BasicBlock, binja.MediumLevelILBasicBlock] = bbh.inner
|
||||||
addr = bb.start
|
addr = bb[0].start
|
||||||
|
|
||||||
for text, length in bb:
|
for text, length in bb[0]:
|
||||||
insn = binja_helpers.DisassemblyInstruction(addr, length, text)
|
insn = binja_helpers.DisassemblyInstruction(addr, length, text)
|
||||||
yield InsnHandle(address=AbsoluteVirtualAddress(addr), inner=insn)
|
yield InsnHandle(address=AbsoluteVirtualAddress(addr), inner=insn)
|
||||||
addr += length
|
addr += length
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Iterator
|
from typing import Tuple, Iterator
|
||||||
|
|
||||||
from binaryninja import Segment, BinaryView, SymbolType, SymbolBinding
|
from binaryninja import Segment, BinaryView, SymbolType, SymbolBinding
|
||||||
|
|
||||||
@@ -13,22 +13,12 @@ import capa.features.extractors.common
|
|||||||
import capa.features.extractors.helpers
|
import capa.features.extractors.helpers
|
||||||
import capa.features.extractors.strings
|
import capa.features.extractors.strings
|
||||||
from capa.features.file import Export, Import, Section, FunctionName
|
from capa.features.file import Export, Import, Section, FunctionName
|
||||||
from capa.features.common import (
|
from capa.features.common import FORMAT_PE, FORMAT_ELF, Format, String, Feature, Characteristic
|
||||||
FORMAT_PE,
|
|
||||||
FORMAT_ELF,
|
|
||||||
FORMAT_SC32,
|
|
||||||
FORMAT_SC64,
|
|
||||||
FORMAT_BINJA_DB,
|
|
||||||
Format,
|
|
||||||
String,
|
|
||||||
Feature,
|
|
||||||
Characteristic,
|
|
||||||
)
|
|
||||||
from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, AbsoluteVirtualAddress
|
from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, AbsoluteVirtualAddress
|
||||||
from capa.features.extractors.binja.helpers import read_c_string, unmangle_c_name
|
from capa.features.extractors.binja.helpers import read_c_string, unmangle_c_name
|
||||||
|
|
||||||
|
|
||||||
def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[tuple[Feature, Address]]:
|
def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""check segment for embedded PE"""
|
"""check segment for embedded PE"""
|
||||||
start = 0
|
start = 0
|
||||||
if bv.view_type == "PE" and seg.start == bv.start:
|
if bv.view_type == "PE" and seg.start == bv.start:
|
||||||
@@ -42,13 +32,13 @@ def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[tuple[Feature
|
|||||||
yield Characteristic("embedded pe"), FileOffsetAddress(seg.start + offset)
|
yield Characteristic("embedded pe"), FileOffsetAddress(seg.start + offset)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_embedded_pe(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_embedded_pe(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract embedded PE features"""
|
"""extract embedded PE features"""
|
||||||
for seg in bv.segments:
|
for seg in bv.segments:
|
||||||
yield from check_segment_for_pe(bv, seg)
|
yield from check_segment_for_pe(bv, seg)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_export_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_export_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract function exports"""
|
"""extract function exports"""
|
||||||
for sym in bv.get_symbols_of_type(SymbolType.FunctionSymbol) + bv.get_symbols_of_type(SymbolType.DataSymbol):
|
for sym in bv.get_symbols_of_type(SymbolType.FunctionSymbol) + bv.get_symbols_of_type(SymbolType.DataSymbol):
|
||||||
if sym.binding in [SymbolBinding.GlobalBinding, SymbolBinding.WeakBinding]:
|
if sym.binding in [SymbolBinding.GlobalBinding, SymbolBinding.WeakBinding]:
|
||||||
@@ -82,7 +72,7 @@ def extract_file_export_names(bv: BinaryView) -> Iterator[tuple[Feature, Address
|
|||||||
yield Characteristic("forwarded export"), AbsoluteVirtualAddress(sym.address)
|
yield Characteristic("forwarded export"), AbsoluteVirtualAddress(sym.address)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_import_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_import_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract function imports
|
"""extract function imports
|
||||||
|
|
||||||
1. imports by ordinal:
|
1. imports by ordinal:
|
||||||
@@ -106,19 +96,19 @@ def extract_file_import_names(bv: BinaryView) -> Iterator[tuple[Feature, Address
|
|||||||
yield Import(name), addr
|
yield Import(name), addr
|
||||||
|
|
||||||
|
|
||||||
def extract_file_section_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_section_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract section names"""
|
"""extract section names"""
|
||||||
for name, section in bv.sections.items():
|
for name, section in bv.sections.items():
|
||||||
yield Section(name), AbsoluteVirtualAddress(section.start)
|
yield Section(name), AbsoluteVirtualAddress(section.start)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_strings(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_strings(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract ASCII and UTF-16 LE strings"""
|
"""extract ASCII and UTF-16 LE strings"""
|
||||||
for s in bv.strings:
|
for s in bv.strings:
|
||||||
yield String(s.value), FileOffsetAddress(s.start)
|
yield String(s.value), FileOffsetAddress(s.start)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_function_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_function_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract the names of statically-linked library functions.
|
extract the names of statically-linked library functions.
|
||||||
"""
|
"""
|
||||||
@@ -137,22 +127,12 @@ def extract_file_function_names(bv: BinaryView) -> Iterator[tuple[Feature, Addre
|
|||||||
yield FunctionName(name[1:]), sym.address
|
yield FunctionName(name[1:]), sym.address
|
||||||
|
|
||||||
|
|
||||||
def extract_file_format(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_format(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||||
if bv.file.database is not None:
|
|
||||||
yield Format(FORMAT_BINJA_DB), NO_ADDRESS
|
|
||||||
|
|
||||||
view_type = bv.view_type
|
view_type = bv.view_type
|
||||||
if view_type in ["PE", "COFF"]:
|
if view_type in ["PE", "COFF"]:
|
||||||
yield Format(FORMAT_PE), NO_ADDRESS
|
yield Format(FORMAT_PE), NO_ADDRESS
|
||||||
elif view_type == "ELF":
|
elif view_type == "ELF":
|
||||||
yield Format(FORMAT_ELF), NO_ADDRESS
|
yield Format(FORMAT_ELF), NO_ADDRESS
|
||||||
elif view_type == "Mapped":
|
|
||||||
if bv.arch.name == "x86":
|
|
||||||
yield Format(FORMAT_SC32), NO_ADDRESS
|
|
||||||
elif bv.arch.name == "x86_64":
|
|
||||||
yield Format(FORMAT_SC64), NO_ADDRESS
|
|
||||||
else:
|
|
||||||
raise NotImplementedError(f"unexpected raw file with arch: {bv.arch}")
|
|
||||||
elif view_type == "Raw":
|
elif view_type == "Raw":
|
||||||
# no file type to return when processing a binary file, but we want to continue processing
|
# no file type to return when processing a binary file, but we want to continue processing
|
||||||
return
|
return
|
||||||
@@ -160,7 +140,7 @@ def extract_file_format(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
|||||||
raise NotImplementedError(f"unexpected file format: {view_type}")
|
raise NotImplementedError(f"unexpected file format: {view_type}")
|
||||||
|
|
||||||
|
|
||||||
def extract_features(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract file features"""
|
"""extract file features"""
|
||||||
for file_handler in FILE_HANDLERS:
|
for file_handler in FILE_HANDLERS:
|
||||||
for feature, addr in file_handler(bv):
|
for feature, addr in file_handler(bv):
|
||||||
|
|||||||
@@ -105,13 +105,13 @@ def find_binaryninja() -> Optional[Path]:
|
|||||||
logger.debug("detected OS: linux")
|
logger.debug("detected OS: linux")
|
||||||
elif sys.platform == "darwin":
|
elif sys.platform == "darwin":
|
||||||
logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
|
logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
|
||||||
return None
|
return False
|
||||||
elif sys.platform == "win32":
|
elif sys.platform == "win32":
|
||||||
logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
|
logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
|
||||||
return None
|
return False
|
||||||
else:
|
else:
|
||||||
logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
|
logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
|
||||||
return None
|
return False
|
||||||
|
|
||||||
desktop_entry = get_desktop_entry("com.vector35.binaryninja.desktop")
|
desktop_entry = get_desktop_entry("com.vector35.binaryninja.desktop")
|
||||||
if not desktop_entry:
|
if not desktop_entry:
|
||||||
|
|||||||
@@ -5,28 +5,14 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import string
|
from typing import Tuple, Iterator
|
||||||
from typing import Iterator
|
|
||||||
|
|
||||||
from binaryninja import (
|
from binaryninja import Function, BinaryView, SymbolType, RegisterValueType, LowLevelILOperation
|
||||||
Function,
|
|
||||||
BinaryView,
|
|
||||||
SymbolType,
|
|
||||||
ILException,
|
|
||||||
RegisterValueType,
|
|
||||||
VariableSourceType,
|
|
||||||
LowLevelILOperation,
|
|
||||||
MediumLevelILOperation,
|
|
||||||
MediumLevelILBasicBlock,
|
|
||||||
MediumLevelILInstruction,
|
|
||||||
)
|
|
||||||
|
|
||||||
from capa.features.file import FunctionName
|
from capa.features.file import FunctionName
|
||||||
from capa.features.common import Feature, Characteristic
|
from capa.features.common import Feature, Characteristic
|
||||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||||
from capa.features.extractors import loops
|
from capa.features.extractors import loops
|
||||||
from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
|
|
||||||
from capa.features.extractors.binja.helpers import get_llil_instr_at_addr
|
|
||||||
from capa.features.extractors.base_extractor import FunctionHandle
|
from capa.features.extractors.base_extractor import FunctionHandle
|
||||||
|
|
||||||
|
|
||||||
@@ -38,7 +24,7 @@ def extract_function_calls_to(fh: FunctionHandle):
|
|||||||
# Everything that is a code reference to the current function is considered a caller, which actually includes
|
# Everything that is a code reference to the current function is considered a caller, which actually includes
|
||||||
# many other references that are NOT a caller. For example, an instruction `push function_start` will also be
|
# many other references that are NOT a caller. For example, an instruction `push function_start` will also be
|
||||||
# considered a caller to the function
|
# considered a caller to the function
|
||||||
llil = get_llil_instr_at_addr(func.view, caller.address)
|
llil = caller.llil
|
||||||
if (llil is None) or llil.operation not in [
|
if (llil is None) or llil.operation not in [
|
||||||
LowLevelILOperation.LLIL_CALL,
|
LowLevelILOperation.LLIL_CALL,
|
||||||
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
|
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
|
||||||
@@ -47,13 +33,14 @@ def extract_function_calls_to(fh: FunctionHandle):
|
|||||||
]:
|
]:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if llil.dest.operation not in [
|
if llil.dest.value.type not in [
|
||||||
LowLevelILOperation.LLIL_CONST,
|
RegisterValueType.ImportedAddressValue,
|
||||||
LowLevelILOperation.LLIL_CONST_PTR,
|
RegisterValueType.ConstantValue,
|
||||||
|
RegisterValueType.ConstantPointerValue,
|
||||||
]:
|
]:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
address = llil.dest.constant
|
address = llil.dest.value.value
|
||||||
if address != func.start:
|
if address != func.start:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -108,103 +95,10 @@ def extract_function_name(fh: FunctionHandle):
|
|||||||
yield FunctionName(name[1:]), sym.address
|
yield FunctionName(name[1:]), sym.address
|
||||||
|
|
||||||
|
|
||||||
def get_printable_len_ascii(s: bytes) -> int:
|
def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""Return string length if all operand bytes are ascii or utf16-le printable"""
|
|
||||||
count = 0
|
|
||||||
for c in s:
|
|
||||||
if c == 0:
|
|
||||||
return count
|
|
||||||
if c < 127 and chr(c) in string.printable:
|
|
||||||
count += 1
|
|
||||||
return count
|
|
||||||
|
|
||||||
|
|
||||||
def get_printable_len_wide(s: bytes) -> int:
|
|
||||||
"""Return string length if all operand bytes are ascii or utf16-le printable"""
|
|
||||||
if all(c == 0x00 for c in s[1::2]):
|
|
||||||
return get_printable_len_ascii(s[::2])
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
def get_stack_string_len(f: Function, il: MediumLevelILInstruction) -> int:
|
|
||||||
bv: BinaryView = f.view
|
|
||||||
|
|
||||||
if il.operation != MediumLevelILOperation.MLIL_CALL:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
target = il.dest
|
|
||||||
if target.operation not in [MediumLevelILOperation.MLIL_CONST, MediumLevelILOperation.MLIL_CONST_PTR]:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
addr = target.value.value
|
|
||||||
sym = bv.get_symbol_at(addr)
|
|
||||||
if not sym or sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.SymbolicFunctionSymbol]:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if sym.name not in ["__builtin_strncpy", "__builtin_strcpy", "__builtin_wcscpy"]:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if len(il.params) < 2:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
dest = il.params[0]
|
|
||||||
if dest.operation in [MediumLevelILOperation.MLIL_ADDRESS_OF, MediumLevelILOperation.MLIL_VAR]:
|
|
||||||
var = dest.src
|
|
||||||
else:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if var.source_type != VariableSourceType.StackVariableSourceType:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
src = il.params[1]
|
|
||||||
if src.value.type != RegisterValueType.ConstantDataAggregateValue:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
s = f.get_constant_data(RegisterValueType.ConstantDataAggregateValue, src.value.value)
|
|
||||||
return max(get_printable_len_ascii(bytes(s)), get_printable_len_wide(bytes(s)))
|
|
||||||
|
|
||||||
|
|
||||||
def bb_contains_stackstring(f: Function, bb: MediumLevelILBasicBlock) -> bool:
|
|
||||||
"""check basic block for stackstring indicators
|
|
||||||
|
|
||||||
true if basic block contains enough moves of constant bytes to the stack
|
|
||||||
"""
|
|
||||||
count = 0
|
|
||||||
for il in bb:
|
|
||||||
count += get_stack_string_len(f, il)
|
|
||||||
if count > MIN_STACKSTRING_LEN:
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def extract_stackstring(fh: FunctionHandle):
|
|
||||||
"""extract stackstring indicators"""
|
|
||||||
func: Function = fh.inner
|
|
||||||
bv: BinaryView = func.view
|
|
||||||
if bv is None:
|
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
|
||||||
mlil = func.mlil
|
|
||||||
except ILException:
|
|
||||||
return
|
|
||||||
|
|
||||||
for block in mlil.basic_blocks:
|
|
||||||
if bb_contains_stackstring(func, block):
|
|
||||||
yield Characteristic("stack string"), block.source_block.start
|
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
|
||||||
for func_handler in FUNCTION_HANDLERS:
|
for func_handler in FUNCTION_HANDLERS:
|
||||||
for feature, addr in func_handler(fh):
|
for feature, addr in func_handler(fh):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|
||||||
|
|
||||||
FUNCTION_HANDLERS = (
|
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call, extract_function_name)
|
||||||
extract_function_calls_to,
|
|
||||||
extract_function_loop,
|
|
||||||
extract_recursive_call,
|
|
||||||
extract_function_name,
|
|
||||||
extract_stackstring,
|
|
||||||
)
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import Iterator
|
from typing import Tuple, Iterator
|
||||||
|
|
||||||
from binaryninja import BinaryView
|
from binaryninja import BinaryView
|
||||||
|
|
||||||
@@ -16,7 +16,7 @@ from capa.features.address import NO_ADDRESS, Address
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_os(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
def extract_os(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||||
name = bv.platform.name
|
name = bv.platform.name
|
||||||
if "-" in name:
|
if "-" in name:
|
||||||
name = name.split("-")[0]
|
name = name.split("-")[0]
|
||||||
@@ -45,7 +45,7 @@ def extract_os(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
|||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def extract_arch(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
def extract_arch(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||||
arch = bv.arch.name
|
arch = bv.arch.name
|
||||||
if arch == "x86_64":
|
if arch == "x86_64":
|
||||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import re
|
import re
|
||||||
from typing import Callable, Optional
|
from typing import List, Callable
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
from binaryninja import BinaryView, LowLevelILFunction, LowLevelILInstruction
|
from binaryninja import BinaryView, LowLevelILInstruction
|
||||||
from binaryninja.architecture import InstructionTextToken
|
from binaryninja.architecture import InstructionTextToken
|
||||||
|
|
||||||
|
|
||||||
@@ -17,7 +17,7 @@ from binaryninja.architecture import InstructionTextToken
|
|||||||
class DisassemblyInstruction:
|
class DisassemblyInstruction:
|
||||||
address: int
|
address: int
|
||||||
length: int
|
length: int
|
||||||
text: list[InstructionTextToken]
|
text: List[InstructionTextToken]
|
||||||
|
|
||||||
|
|
||||||
LLIL_VISITOR = Callable[[LowLevelILInstruction, LowLevelILInstruction, int], bool]
|
LLIL_VISITOR = Callable[[LowLevelILInstruction, LowLevelILInstruction, int], bool]
|
||||||
@@ -54,7 +54,7 @@ def unmangle_c_name(name: str) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def read_c_string(bv: BinaryView, offset: int, max_len: int) -> str:
|
def read_c_string(bv: BinaryView, offset: int, max_len: int) -> str:
|
||||||
s: list[str] = []
|
s: List[str] = []
|
||||||
while len(s) < max_len:
|
while len(s) < max_len:
|
||||||
try:
|
try:
|
||||||
c = bv.read(offset + len(s), 1)[0]
|
c = bv.read(offset + len(s), 1)[0]
|
||||||
@@ -67,13 +67,3 @@ def read_c_string(bv: BinaryView, offset: int, max_len: int) -> str:
|
|||||||
s.append(chr(c))
|
s.append(chr(c))
|
||||||
|
|
||||||
return "".join(s)
|
return "".join(s)
|
||||||
|
|
||||||
|
|
||||||
def get_llil_instr_at_addr(bv: BinaryView, addr: int) -> Optional[LowLevelILInstruction]:
|
|
||||||
arch = bv.arch
|
|
||||||
buffer = bv.read(addr, arch.max_instr_length)
|
|
||||||
llil = LowLevelILFunction(arch=arch)
|
|
||||||
llil.current_address = addr
|
|
||||||
if arch.get_instruction_low_level_il(buffer, addr, llil) == 0:
|
|
||||||
return None
|
|
||||||
return llil[0]
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Any, Iterator, Optional
|
from typing import Any, List, Tuple, Iterator, Optional
|
||||||
|
|
||||||
from binaryninja import Function
|
from binaryninja import Function
|
||||||
from binaryninja import BasicBlock as BinjaBasicBlock
|
from binaryninja import BasicBlock as BinjaBasicBlock
|
||||||
@@ -23,7 +23,7 @@ import capa.features.extractors.helpers
|
|||||||
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
|
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
|
||||||
from capa.features.common import MAX_BYTES_FEATURE_SIZE, Bytes, String, Feature, Characteristic
|
from capa.features.common import MAX_BYTES_FEATURE_SIZE, Bytes, String, Feature, Characteristic
|
||||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||||
from capa.features.extractors.binja.helpers import DisassemblyInstruction, visit_llil_exprs, get_llil_instr_at_addr
|
from capa.features.extractors.binja.helpers import DisassemblyInstruction, visit_llil_exprs
|
||||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
||||||
|
|
||||||
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
|
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
|
||||||
@@ -36,27 +36,35 @@ SECURITY_COOKIE_BYTES_DELTA = 0x40
|
|||||||
# 2. The function must only make one call/jump to another address
|
# 2. The function must only make one call/jump to another address
|
||||||
# If the function being checked is a stub function, returns the target address. Otherwise, return None.
|
# If the function being checked is a stub function, returns the target address. Otherwise, return None.
|
||||||
def is_stub_function(bv: BinaryView, addr: int) -> Optional[int]:
|
def is_stub_function(bv: BinaryView, addr: int) -> Optional[int]:
|
||||||
llil = get_llil_instr_at_addr(bv, addr)
|
funcs = bv.get_functions_at(addr)
|
||||||
if llil is None or llil.operation not in [
|
for func in funcs:
|
||||||
|
if len(func.basic_blocks) != 1:
|
||||||
|
continue
|
||||||
|
|
||||||
|
call_count = 0
|
||||||
|
call_target = None
|
||||||
|
for il in func.llil.instructions:
|
||||||
|
if il.operation in [
|
||||||
LowLevelILOperation.LLIL_CALL,
|
LowLevelILOperation.LLIL_CALL,
|
||||||
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
|
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
|
||||||
LowLevelILOperation.LLIL_JUMP,
|
LowLevelILOperation.LLIL_JUMP,
|
||||||
LowLevelILOperation.LLIL_TAILCALL,
|
LowLevelILOperation.LLIL_TAILCALL,
|
||||||
]:
|
]:
|
||||||
return None
|
call_count += 1
|
||||||
|
if il.dest.value.type in [
|
||||||
# The LLIL instruction retrieved by `get_llil_instr_at_addr` did not go through a full analysis, so we cannot check
|
RegisterValueType.ImportedAddressValue,
|
||||||
# `llil.dest.value.type` here
|
RegisterValueType.ConstantValue,
|
||||||
if llil.dest.operation not in [
|
RegisterValueType.ConstantPointerValue,
|
||||||
LowLevelILOperation.LLIL_CONST,
|
|
||||||
LowLevelILOperation.LLIL_CONST_PTR,
|
|
||||||
]:
|
]:
|
||||||
|
call_target = il.dest.value.value
|
||||||
|
|
||||||
|
if call_count == 1 and call_target is not None:
|
||||||
|
return call_target
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return llil.dest.constant
|
|
||||||
|
|
||||||
|
def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
|
||||||
"""
|
"""
|
||||||
parse instruction API features
|
parse instruction API features
|
||||||
|
|
||||||
@@ -115,7 +123,7 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
|
|||||||
|
|
||||||
def extract_insn_number_features(
|
def extract_insn_number_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction number features
|
parse instruction number features
|
||||||
example:
|
example:
|
||||||
@@ -123,7 +131,7 @@ def extract_insn_number_features(
|
|||||||
"""
|
"""
|
||||||
func: Function = fh.inner
|
func: Function = fh.inner
|
||||||
|
|
||||||
results: list[tuple[Any[Number, OperandNumber], Address]] = []
|
results: List[Tuple[Any[Number, OperandNumber], Address]] = []
|
||||||
|
|
||||||
def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool:
|
def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool:
|
||||||
if il.operation == LowLevelILOperation.LLIL_LOAD:
|
if il.operation == LowLevelILOperation.LLIL_LOAD:
|
||||||
@@ -154,7 +162,7 @@ def extract_insn_number_features(
|
|||||||
yield from results
|
yield from results
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse referenced byte sequences
|
parse referenced byte sequences
|
||||||
example:
|
example:
|
||||||
@@ -201,7 +209,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
|||||||
|
|
||||||
def extract_insn_string_features(
|
def extract_insn_string_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction string features
|
parse instruction string features
|
||||||
|
|
||||||
@@ -258,7 +266,7 @@ def extract_insn_string_features(
|
|||||||
|
|
||||||
def extract_insn_offset_features(
|
def extract_insn_offset_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction structure offset features
|
parse instruction structure offset features
|
||||||
|
|
||||||
@@ -267,7 +275,7 @@ def extract_insn_offset_features(
|
|||||||
"""
|
"""
|
||||||
func: Function = fh.inner
|
func: Function = fh.inner
|
||||||
|
|
||||||
results: list[tuple[Any[Offset, OperandOffset], Address]] = []
|
results: List[Tuple[Any[Offset, OperandOffset], Address]] = []
|
||||||
address_size = func.view.arch.address_size * 8
|
address_size = func.view.arch.address_size * 8
|
||||||
|
|
||||||
def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool:
|
def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool:
|
||||||
@@ -345,7 +353,7 @@ def is_nzxor_stack_cookie(f: Function, bb: BinjaBasicBlock, llil: LowLevelILInst
|
|||||||
|
|
||||||
def extract_insn_nzxor_characteristic_features(
|
def extract_insn_nzxor_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction non-zeroing XOR instruction
|
parse instruction non-zeroing XOR instruction
|
||||||
ignore expected non-zeroing XORs, e.g. security cookies
|
ignore expected non-zeroing XORs, e.g. security cookies
|
||||||
@@ -359,7 +367,7 @@ def extract_insn_nzxor_characteristic_features(
|
|||||||
# e.g., <llil: eax = 0>, (LLIL_SET_REG). So we do not need to check whether the two operands are the same.
|
# e.g., <llil: eax = 0>, (LLIL_SET_REG). So we do not need to check whether the two operands are the same.
|
||||||
if il.operation == LowLevelILOperation.LLIL_XOR:
|
if il.operation == LowLevelILOperation.LLIL_XOR:
|
||||||
# Exclude cases related to the stack cookie
|
# Exclude cases related to the stack cookie
|
||||||
if is_nzxor_stack_cookie(fh.inner, bbh.inner, il):
|
if is_nzxor_stack_cookie(fh.inner, bbh.inner[0], il):
|
||||||
return False
|
return False
|
||||||
results.append((Characteristic("nzxor"), ih.address))
|
results.append((Characteristic("nzxor"), ih.address))
|
||||||
return False
|
return False
|
||||||
@@ -374,7 +382,7 @@ def extract_insn_nzxor_characteristic_features(
|
|||||||
|
|
||||||
def extract_insn_mnemonic_features(
|
def extract_insn_mnemonic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""parse instruction mnemonic features"""
|
"""parse instruction mnemonic features"""
|
||||||
insn: DisassemblyInstruction = ih.inner
|
insn: DisassemblyInstruction = ih.inner
|
||||||
yield Mnemonic(insn.text[0].text), ih.address
|
yield Mnemonic(insn.text[0].text), ih.address
|
||||||
@@ -382,7 +390,7 @@ def extract_insn_mnemonic_features(
|
|||||||
|
|
||||||
def extract_insn_obfs_call_plus_5_characteristic_features(
|
def extract_insn_obfs_call_plus_5_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse call $+5 instruction from the given instruction.
|
parse call $+5 instruction from the given instruction.
|
||||||
"""
|
"""
|
||||||
@@ -393,7 +401,7 @@ def extract_insn_obfs_call_plus_5_characteristic_features(
|
|||||||
|
|
||||||
def extract_insn_peb_access_characteristic_features(
|
def extract_insn_peb_access_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""parse instruction peb access
|
"""parse instruction peb access
|
||||||
|
|
||||||
fs:[0x30] on x86, gs:[0x60] on x64
|
fs:[0x30] on x86, gs:[0x60] on x64
|
||||||
@@ -436,7 +444,7 @@ def extract_insn_peb_access_characteristic_features(
|
|||||||
|
|
||||||
def extract_insn_segment_access_features(
|
def extract_insn_segment_access_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""parse instruction fs or gs access"""
|
"""parse instruction fs or gs access"""
|
||||||
func: Function = fh.inner
|
func: Function = fh.inner
|
||||||
|
|
||||||
@@ -463,7 +471,7 @@ def extract_insn_segment_access_features(
|
|||||||
|
|
||||||
def extract_insn_cross_section_cflow(
|
def extract_insn_cross_section_cflow(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""inspect the instruction for a CALL or JMP that crosses section boundaries"""
|
"""inspect the instruction for a CALL or JMP that crosses section boundaries"""
|
||||||
func: Function = fh.inner
|
func: Function = fh.inner
|
||||||
bv: BinaryView = func.view
|
bv: BinaryView = func.view
|
||||||
@@ -483,7 +491,7 @@ def extract_insn_cross_section_cflow(
|
|||||||
yield Characteristic("cross section flow"), ih.address
|
yield Characteristic("cross section flow"), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract functions calls from features
|
"""extract functions calls from features
|
||||||
|
|
||||||
most relevant at the function scope, however, its most efficient to extract at the instruction scope
|
most relevant at the function scope, however, its most efficient to extract at the instruction scope
|
||||||
@@ -526,7 +534,7 @@ def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
|||||||
|
|
||||||
def extract_function_indirect_call_characteristic_features(
|
def extract_function_indirect_call_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
|
"""extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
|
||||||
does not include calls like => call ds:dword_ABD4974
|
does not include calls like => call ds:dword_ABD4974
|
||||||
|
|
||||||
@@ -554,7 +562,7 @@ def extract_function_indirect_call_characteristic_features(
|
|||||||
yield Characteristic("indirect call"), ih.address
|
yield Characteristic("indirect call"), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract instruction features"""
|
"""extract instruction features"""
|
||||||
for inst_handler in INSTRUCTION_HANDLERS:
|
for inst_handler in INSTRUCTION_HANDLERS:
|
||||||
for feature, ea in inst_handler(f, bbh, insn):
|
for feature, ea in inst_handler(f, bbh, insn):
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Iterator
|
from typing import Tuple, Iterator
|
||||||
|
|
||||||
import capa.features.extractors.helpers
|
import capa.features.extractors.helpers
|
||||||
from capa.helpers import assert_never
|
from capa.helpers import assert_never
|
||||||
@@ -20,7 +20,7 @@ from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, Pr
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
this method extracts the given call's features (such as API name and arguments),
|
this method extracts the given call's features (such as API name and arguments),
|
||||||
and returns them as API, Number, and String features.
|
and returns them as API, Number, and String features.
|
||||||
@@ -55,7 +55,7 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -
|
|||||||
yield API(name), ch.address
|
yield API(name), ch.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for handler in CALL_HANDLERS:
|
for handler in CALL_HANDLERS:
|
||||||
for feature, addr in handler(ph, th, ch):
|
for feature, addr in handler(ph, th, ch):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Union, Iterator
|
from typing import Dict, Tuple, Union, Iterator
|
||||||
|
|
||||||
import capa.features.extractors.cape.call
|
import capa.features.extractors.cape.call
|
||||||
import capa.features.extractors.cape.file
|
import capa.features.extractors.cape.file
|
||||||
@@ -50,16 +50,16 @@ class CapeExtractor(DynamicFeatureExtractor):
|
|||||||
assert self.report.static is not None and self.report.static.pe is not None
|
assert self.report.static is not None and self.report.static.pe is not None
|
||||||
return AbsoluteVirtualAddress(self.report.static.pe.imagebase)
|
return AbsoluteVirtualAddress(self.report.static.pe.imagebase)
|
||||||
|
|
||||||
def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
|
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from self.global_features
|
yield from self.global_features
|
||||||
|
|
||||||
def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.cape.file.extract_features(self.report)
|
yield from capa.features.extractors.cape.file.extract_features(self.report)
|
||||||
|
|
||||||
def get_processes(self) -> Iterator[ProcessHandle]:
|
def get_processes(self) -> Iterator[ProcessHandle]:
|
||||||
yield from capa.features.extractors.cape.file.get_processes(self.report)
|
yield from capa.features.extractors.cape.file.get_processes(self.report)
|
||||||
|
|
||||||
def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.cape.process.extract_features(ph)
|
yield from capa.features.extractors.cape.process.extract_features(ph)
|
||||||
|
|
||||||
def get_process_name(self, ph) -> str:
|
def get_process_name(self, ph) -> str:
|
||||||
@@ -69,7 +69,7 @@ class CapeExtractor(DynamicFeatureExtractor):
|
|||||||
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
||||||
yield from capa.features.extractors.cape.process.get_threads(ph)
|
yield from capa.features.extractors.cape.process.get_threads(ph)
|
||||||
|
|
||||||
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
if False:
|
if False:
|
||||||
# force this routine to be a generator,
|
# force this routine to be a generator,
|
||||||
# but we don't actually have any elements to generate.
|
# but we don't actually have any elements to generate.
|
||||||
@@ -81,7 +81,7 @@ class CapeExtractor(DynamicFeatureExtractor):
|
|||||||
|
|
||||||
def extract_call_features(
|
def extract_call_features(
|
||||||
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.cape.call.extract_features(ph, th, ch)
|
yield from capa.features.extractors.cape.call.extract_features(ph, th, ch)
|
||||||
|
|
||||||
def get_call_name(self, ph, th, ch) -> str:
|
def get_call_name(self, ph, th, ch) -> str:
|
||||||
@@ -122,7 +122,7 @@ class CapeExtractor(DynamicFeatureExtractor):
|
|||||||
return "".join(parts)
|
return "".join(parts)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_report(cls, report: dict) -> "CapeExtractor":
|
def from_report(cls, report: Dict) -> "CapeExtractor":
|
||||||
cr = CapeReport.model_validate(report)
|
cr = CapeReport.model_validate(report)
|
||||||
|
|
||||||
if cr.info.version not in TESTED_VERSIONS:
|
if cr.info.version not in TESTED_VERSIONS:
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Iterator
|
from typing import Tuple, Iterator
|
||||||
|
|
||||||
from capa.features.file import Export, Import, Section
|
from capa.features.file import Export, Import, Section
|
||||||
from capa.features.common import String, Feature
|
from capa.features.common import String, Feature
|
||||||
@@ -41,7 +41,7 @@ def get_processes(report: CapeReport) -> Iterator[ProcessHandle]:
|
|||||||
seen_processes[addr].append(process)
|
seen_processes[addr].append(process)
|
||||||
|
|
||||||
|
|
||||||
def extract_import_names(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
def extract_import_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract imported function names
|
extract imported function names
|
||||||
"""
|
"""
|
||||||
@@ -62,57 +62,57 @@ def extract_import_names(report: CapeReport) -> Iterator[tuple[Feature, Address]
|
|||||||
yield Import(name), AbsoluteVirtualAddress(function.address)
|
yield Import(name), AbsoluteVirtualAddress(function.address)
|
||||||
|
|
||||||
|
|
||||||
def extract_export_names(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
def extract_export_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||||
assert report.static is not None and report.static.pe is not None
|
assert report.static is not None and report.static.pe is not None
|
||||||
for function in report.static.pe.exports:
|
for function in report.static.pe.exports:
|
||||||
yield Export(function.name), AbsoluteVirtualAddress(function.address)
|
yield Export(function.name), AbsoluteVirtualAddress(function.address)
|
||||||
|
|
||||||
|
|
||||||
def extract_section_names(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
def extract_section_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||||
assert report.static is not None and report.static.pe is not None
|
assert report.static is not None and report.static.pe is not None
|
||||||
for section in report.static.pe.sections:
|
for section in report.static.pe.sections:
|
||||||
yield Section(section.name), AbsoluteVirtualAddress(section.virtual_address)
|
yield Section(section.name), AbsoluteVirtualAddress(section.virtual_address)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_strings(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_strings(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||||
if report.strings is not None:
|
if report.strings is not None:
|
||||||
for string in report.strings:
|
for string in report.strings:
|
||||||
yield String(string), NO_ADDRESS
|
yield String(string), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_used_regkeys(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
def extract_used_regkeys(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for regkey in report.behavior.summary.keys:
|
for regkey in report.behavior.summary.keys:
|
||||||
yield String(regkey), NO_ADDRESS
|
yield String(regkey), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_used_files(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
def extract_used_files(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for file in report.behavior.summary.files:
|
for file in report.behavior.summary.files:
|
||||||
yield String(file), NO_ADDRESS
|
yield String(file), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_used_mutexes(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
def extract_used_mutexes(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for mutex in report.behavior.summary.mutexes:
|
for mutex in report.behavior.summary.mutexes:
|
||||||
yield String(mutex), NO_ADDRESS
|
yield String(mutex), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_used_commands(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
def extract_used_commands(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for cmd in report.behavior.summary.executed_commands:
|
for cmd in report.behavior.summary.executed_commands:
|
||||||
yield String(cmd), NO_ADDRESS
|
yield String(cmd), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_used_apis(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
def extract_used_apis(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for symbol in report.behavior.summary.resolved_apis:
|
for symbol in report.behavior.summary.resolved_apis:
|
||||||
yield String(symbol), NO_ADDRESS
|
yield String(symbol), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_used_services(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
def extract_used_services(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for svc in report.behavior.summary.created_services:
|
for svc in report.behavior.summary.created_services:
|
||||||
yield String(svc), NO_ADDRESS
|
yield String(svc), NO_ADDRESS
|
||||||
for svc in report.behavior.summary.started_services:
|
for svc in report.behavior.summary.started_services:
|
||||||
yield String(svc), NO_ADDRESS
|
yield String(svc), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_features(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for handler in FILE_HANDLERS:
|
for handler in FILE_HANDLERS:
|
||||||
for feature, addr in handler(report):
|
for feature, addr in handler(report):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Iterator
|
from typing import Tuple, Iterator
|
||||||
|
|
||||||
from capa.features.common import (
|
from capa.features.common import (
|
||||||
OS,
|
OS,
|
||||||
@@ -28,7 +28,7 @@ from capa.features.extractors.cape.models import CapeReport
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_arch(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
def extract_arch(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||||
if "Intel 80386" in report.target.file.type:
|
if "Intel 80386" in report.target.file.type:
|
||||||
yield Arch(ARCH_I386), NO_ADDRESS
|
yield Arch(ARCH_I386), NO_ADDRESS
|
||||||
elif "x86-64" in report.target.file.type:
|
elif "x86-64" in report.target.file.type:
|
||||||
@@ -40,7 +40,7 @@ def extract_arch(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def extract_format(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
def extract_format(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||||
if "PE" in report.target.file.type:
|
if "PE" in report.target.file.type:
|
||||||
yield Format(FORMAT_PE), NO_ADDRESS
|
yield Format(FORMAT_PE), NO_ADDRESS
|
||||||
elif "ELF" in report.target.file.type:
|
elif "ELF" in report.target.file.type:
|
||||||
@@ -52,7 +52,7 @@ def extract_format(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def extract_os(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
def extract_os(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||||
# this variable contains the output of the file command
|
# this variable contains the output of the file command
|
||||||
file_output = report.target.file.type
|
file_output = report.target.file.type
|
||||||
|
|
||||||
@@ -80,7 +80,7 @@ def extract_os(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
|||||||
yield OS(OS_ANY), NO_ADDRESS
|
yield OS(OS_ANY), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_features(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for global_handler in GLOBAL_HANDLER:
|
for global_handler in GLOBAL_HANDLER:
|
||||||
for feature, addr in global_handler(report):
|
for feature, addr in global_handler(report):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -6,12 +6,12 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
from typing import Any
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
from capa.features.extractors.base_extractor import ProcessHandle
|
from capa.features.extractors.base_extractor import ProcessHandle
|
||||||
|
|
||||||
|
|
||||||
def find_process(processes: list[dict[str, Any]], ph: ProcessHandle) -> dict[str, Any]:
|
def find_process(processes: List[Dict[str, Any]], ph: ProcessHandle) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
find a specific process identified by a process handler.
|
find a specific process identified by a process handler.
|
||||||
|
|
||||||
|
|||||||
@@ -6,9 +6,10 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import binascii
|
import binascii
|
||||||
from typing import Any, Union, Literal, Optional, Annotated, TypeAlias
|
from typing import Any, Dict, List, Union, Literal, Optional
|
||||||
|
|
||||||
from pydantic import Field, BaseModel, ConfigDict
|
from pydantic import Field, BaseModel, ConfigDict
|
||||||
|
from typing_extensions import Annotated, TypeAlias
|
||||||
from pydantic.functional_validators import BeforeValidator
|
from pydantic.functional_validators import BeforeValidator
|
||||||
|
|
||||||
|
|
||||||
@@ -58,11 +59,11 @@ Skip: TypeAlias = Optional[Any]
|
|||||||
# in a field with this type.
|
# in a field with this type.
|
||||||
# then we can update the model with the discovered shape.
|
# then we can update the model with the discovered shape.
|
||||||
TODO: TypeAlias = None
|
TODO: TypeAlias = None
|
||||||
ListTODO: TypeAlias = list[None]
|
ListTODO: TypeAlias = List[None]
|
||||||
DictTODO: TypeAlias = ExactModel
|
DictTODO: TypeAlias = ExactModel
|
||||||
|
|
||||||
Emptydict: TypeAlias = BaseModel
|
EmptyDict: TypeAlias = BaseModel
|
||||||
EmptyList: TypeAlias = list[Any]
|
EmptyList: TypeAlias = List[Any]
|
||||||
|
|
||||||
|
|
||||||
class Info(FlexibleModel):
|
class Info(FlexibleModel):
|
||||||
@@ -76,7 +77,7 @@ class ImportedSymbol(ExactModel):
|
|||||||
|
|
||||||
class ImportedDll(ExactModel):
|
class ImportedDll(ExactModel):
|
||||||
dll: str
|
dll: str
|
||||||
imports: list[ImportedSymbol]
|
imports: List[ImportedSymbol]
|
||||||
|
|
||||||
|
|
||||||
class DirectoryEntry(ExactModel):
|
class DirectoryEntry(ExactModel):
|
||||||
@@ -148,7 +149,7 @@ class Signer(ExactModel):
|
|||||||
aux_valid: Optional[bool] = None
|
aux_valid: Optional[bool] = None
|
||||||
aux_error: Optional[bool] = None
|
aux_error: Optional[bool] = None
|
||||||
aux_error_desc: Optional[str] = None
|
aux_error_desc: Optional[str] = None
|
||||||
aux_signers: Optional[list[AuxSigner]] = None
|
aux_signers: Optional[List[AuxSigner]] = None
|
||||||
|
|
||||||
|
|
||||||
class Overlay(ExactModel):
|
class Overlay(ExactModel):
|
||||||
@@ -177,22 +178,22 @@ class PE(ExactModel):
|
|||||||
pdbpath: Optional[str] = None
|
pdbpath: Optional[str] = None
|
||||||
timestamp: str
|
timestamp: str
|
||||||
|
|
||||||
# list[ImportedDll], or dict[basename(dll), ImportedDll]
|
# List[ImportedDll], or Dict[basename(dll), ImportedDll]
|
||||||
imports: Union[list[ImportedDll], dict[str, ImportedDll]]
|
imports: Union[List[ImportedDll], Dict[str, ImportedDll]]
|
||||||
imported_dll_count: Optional[int] = None
|
imported_dll_count: Optional[int] = None
|
||||||
imphash: str
|
imphash: str
|
||||||
|
|
||||||
exported_dll_name: Optional[str] = None
|
exported_dll_name: Optional[str] = None
|
||||||
exports: list[ExportedSymbol]
|
exports: List[ExportedSymbol]
|
||||||
|
|
||||||
dirents: list[DirectoryEntry]
|
dirents: List[DirectoryEntry]
|
||||||
sections: list[Section]
|
sections: List[Section]
|
||||||
|
|
||||||
ep_bytes: Optional[HexBytes] = None
|
ep_bytes: Optional[HexBytes] = None
|
||||||
|
|
||||||
overlay: Optional[Overlay] = None
|
overlay: Optional[Overlay] = None
|
||||||
resources: list[Resource]
|
resources: List[Resource]
|
||||||
versioninfo: list[KV]
|
versioninfo: List[KV]
|
||||||
|
|
||||||
# base64 encoded data
|
# base64 encoded data
|
||||||
icon: Optional[str] = None
|
icon: Optional[str] = None
|
||||||
@@ -203,7 +204,7 @@ class PE(ExactModel):
|
|||||||
# short hex string
|
# short hex string
|
||||||
icon_dhash: Optional[str] = None
|
icon_dhash: Optional[str] = None
|
||||||
|
|
||||||
digital_signers: list[DigitalSigner]
|
digital_signers: List[DigitalSigner]
|
||||||
guest_signers: Signer
|
guest_signers: Signer
|
||||||
|
|
||||||
|
|
||||||
@@ -216,9 +217,9 @@ class File(FlexibleModel):
|
|||||||
cape_type: Optional[str] = None
|
cape_type: Optional[str] = None
|
||||||
|
|
||||||
pid: Optional[Union[int, Literal[""]]] = None
|
pid: Optional[Union[int, Literal[""]]] = None
|
||||||
name: Union[list[str], str]
|
name: Union[List[str], str]
|
||||||
path: str
|
path: str
|
||||||
guest_paths: Union[list[str], str, None]
|
guest_paths: Union[List[str], str, None]
|
||||||
timestamp: Optional[str] = None
|
timestamp: Optional[str] = None
|
||||||
|
|
||||||
#
|
#
|
||||||
@@ -243,7 +244,7 @@ class File(FlexibleModel):
|
|||||||
ep_bytes: Optional[HexBytes] = None
|
ep_bytes: Optional[HexBytes] = None
|
||||||
entrypoint: Optional[int] = None
|
entrypoint: Optional[int] = None
|
||||||
data: Optional[str] = None
|
data: Optional[str] = None
|
||||||
strings: Optional[list[str]] = None
|
strings: Optional[List[str]] = None
|
||||||
|
|
||||||
#
|
#
|
||||||
# detections (skip)
|
# detections (skip)
|
||||||
@@ -282,7 +283,7 @@ class Call(ExactModel):
|
|||||||
|
|
||||||
api: str
|
api: str
|
||||||
|
|
||||||
arguments: list[Argument]
|
arguments: List[Argument]
|
||||||
status: bool
|
status: bool
|
||||||
return_: HexInt = Field(alias="return")
|
return_: HexInt = Field(alias="return")
|
||||||
pretty_return: Optional[str] = None
|
pretty_return: Optional[str] = None
|
||||||
@@ -297,18 +298,15 @@ class Call(ExactModel):
|
|||||||
id: int
|
id: int
|
||||||
|
|
||||||
|
|
||||||
# FlexibleModel to account for extended fields
|
class Process(ExactModel):
|
||||||
# refs: https://github.com/mandiant/capa/issues/2466
|
|
||||||
# https://github.com/kevoreilly/CAPEv2/pull/2199
|
|
||||||
class Process(FlexibleModel):
|
|
||||||
process_id: int
|
process_id: int
|
||||||
process_name: str
|
process_name: str
|
||||||
parent_id: int
|
parent_id: int
|
||||||
module_path: str
|
module_path: str
|
||||||
first_seen: str
|
first_seen: str
|
||||||
calls: list[Call]
|
calls: List[Call]
|
||||||
threads: list[int]
|
threads: List[int]
|
||||||
environ: dict[str, str]
|
environ: Dict[str, str]
|
||||||
|
|
||||||
|
|
||||||
class ProcessTree(ExactModel):
|
class ProcessTree(ExactModel):
|
||||||
@@ -316,25 +314,25 @@ class ProcessTree(ExactModel):
|
|||||||
pid: int
|
pid: int
|
||||||
parent_id: int
|
parent_id: int
|
||||||
module_path: str
|
module_path: str
|
||||||
threads: list[int]
|
threads: List[int]
|
||||||
environ: dict[str, str]
|
environ: Dict[str, str]
|
||||||
children: list["ProcessTree"]
|
children: List["ProcessTree"]
|
||||||
|
|
||||||
|
|
||||||
class Summary(ExactModel):
|
class Summary(ExactModel):
|
||||||
files: list[str]
|
files: List[str]
|
||||||
read_files: list[str]
|
read_files: List[str]
|
||||||
write_files: list[str]
|
write_files: List[str]
|
||||||
delete_files: list[str]
|
delete_files: List[str]
|
||||||
keys: list[str]
|
keys: List[str]
|
||||||
read_keys: list[str]
|
read_keys: List[str]
|
||||||
write_keys: list[str]
|
write_keys: List[str]
|
||||||
delete_keys: list[str]
|
delete_keys: List[str]
|
||||||
executed_commands: list[str]
|
executed_commands: List[str]
|
||||||
resolved_apis: list[str]
|
resolved_apis: List[str]
|
||||||
mutexes: list[str]
|
mutexes: List[str]
|
||||||
created_services: list[str]
|
created_services: List[str]
|
||||||
started_services: list[str]
|
started_services: List[str]
|
||||||
|
|
||||||
|
|
||||||
class EncryptedBuffer(ExactModel):
|
class EncryptedBuffer(ExactModel):
|
||||||
@@ -351,12 +349,12 @@ class Behavior(ExactModel):
|
|||||||
summary: Summary
|
summary: Summary
|
||||||
|
|
||||||
# list of processes, of threads, of calls
|
# list of processes, of threads, of calls
|
||||||
processes: list[Process]
|
processes: List[Process]
|
||||||
# tree of processes
|
# tree of processes
|
||||||
processtree: list[ProcessTree]
|
processtree: List[ProcessTree]
|
||||||
|
|
||||||
anomaly: list[str]
|
anomaly: List[str]
|
||||||
encryptedbuffers: list[EncryptedBuffer]
|
encryptedbuffers: List[EncryptedBuffer]
|
||||||
# these are small objects that describe atomic events,
|
# these are small objects that describe atomic events,
|
||||||
# like file move, registry access.
|
# like file move, registry access.
|
||||||
# we'll detect the same with our API call analysis.
|
# we'll detect the same with our API call analysis.
|
||||||
@@ -375,7 +373,7 @@ class Static(ExactModel):
|
|||||||
|
|
||||||
|
|
||||||
class Cape(ExactModel):
|
class Cape(ExactModel):
|
||||||
payloads: list[ProcessFile]
|
payloads: List[ProcessFile]
|
||||||
configs: Skip = None
|
configs: Skip = None
|
||||||
|
|
||||||
|
|
||||||
@@ -391,7 +389,7 @@ class CapeReport(FlexibleModel):
|
|||||||
# static analysis results
|
# static analysis results
|
||||||
#
|
#
|
||||||
static: Optional[Static] = None
|
static: Optional[Static] = None
|
||||||
strings: Optional[list[str]] = None
|
strings: Optional[List[str]] = None
|
||||||
|
|
||||||
#
|
#
|
||||||
# dynamic analysis results
|
# dynamic analysis results
|
||||||
@@ -400,10 +398,10 @@ class CapeReport(FlexibleModel):
|
|||||||
behavior: Behavior
|
behavior: Behavior
|
||||||
|
|
||||||
# post-processed results: payloads and extracted configs
|
# post-processed results: payloads and extracted configs
|
||||||
CAPE: Optional[Union[Cape, list]] = None
|
CAPE: Optional[Union[Cape, List]] = None
|
||||||
dropped: Optional[list[File]] = None
|
dropped: Optional[List[File]] = None
|
||||||
procdump: Optional[list[ProcessFile]] = None
|
procdump: Optional[List[ProcessFile]] = None
|
||||||
procmemory: Optional[ListTODO] = None
|
procmemory: ListTODO
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# information we won't use in capa
|
# information we won't use in capa
|
||||||
@@ -439,7 +437,7 @@ class CapeReport(FlexibleModel):
|
|||||||
malfamily_tag: Optional[str] = None
|
malfamily_tag: Optional[str] = None
|
||||||
malscore: float
|
malscore: float
|
||||||
detections: Skip = None
|
detections: Skip = None
|
||||||
detections2pid: Optional[dict[int, list[str]]] = None
|
detections2pid: Optional[Dict[int, List[str]]] = None
|
||||||
# AV detections for the sample.
|
# AV detections for the sample.
|
||||||
virustotal: Skip = None
|
virustotal: Skip = None
|
||||||
|
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Iterator
|
from typing import List, Tuple, Iterator
|
||||||
|
|
||||||
from capa.features.common import String, Feature
|
from capa.features.common import String, Feature
|
||||||
from capa.features.address import Address, ThreadAddress
|
from capa.features.address import Address, ThreadAddress
|
||||||
@@ -22,14 +22,14 @@ def get_threads(ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
|||||||
get the threads associated with a given process
|
get the threads associated with a given process
|
||||||
"""
|
"""
|
||||||
process: Process = ph.inner
|
process: Process = ph.inner
|
||||||
threads: list[int] = process.threads
|
threads: List[int] = process.threads
|
||||||
|
|
||||||
for thread in threads:
|
for thread in threads:
|
||||||
address: ThreadAddress = ThreadAddress(process=ph.address, tid=thread)
|
address: ThreadAddress = ThreadAddress(process=ph.address, tid=thread)
|
||||||
yield ThreadHandle(address=address, inner={})
|
yield ThreadHandle(address=address, inner={})
|
||||||
|
|
||||||
|
|
||||||
def extract_environ_strings(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_environ_strings(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract strings from a process' provided environment variables.
|
extract strings from a process' provided environment variables.
|
||||||
"""
|
"""
|
||||||
@@ -39,7 +39,7 @@ def extract_environ_strings(ph: ProcessHandle) -> Iterator[tuple[Feature, Addres
|
|||||||
yield String(value), ph.address
|
yield String(value), ph.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for handler in PROCESS_HANDLERS:
|
for handler in PROCESS_HANDLERS:
|
||||||
for feature, addr in handler(ph):
|
for feature, addr in handler(ph):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ import re
|
|||||||
import logging
|
import logging
|
||||||
import binascii
|
import binascii
|
||||||
import contextlib
|
import contextlib
|
||||||
from typing import Iterator
|
from typing import Tuple, Iterator
|
||||||
|
|
||||||
import pefile
|
import pefile
|
||||||
|
|
||||||
@@ -45,7 +45,7 @@ MATCH_RESULT = b'{"meta":'
|
|||||||
MATCH_JSON_OBJECT = b'{"'
|
MATCH_JSON_OBJECT = b'{"'
|
||||||
|
|
||||||
|
|
||||||
def extract_file_strings(buf: bytes, **kwargs) -> Iterator[tuple[String, Address]]:
|
def extract_file_strings(buf: bytes, **kwargs) -> Iterator[Tuple[String, Address]]:
|
||||||
"""
|
"""
|
||||||
extract ASCII and UTF-16 LE strings from file
|
extract ASCII and UTF-16 LE strings from file
|
||||||
"""
|
"""
|
||||||
@@ -56,7 +56,7 @@ def extract_file_strings(buf: bytes, **kwargs) -> Iterator[tuple[String, Address
|
|||||||
yield String(s.s), FileOffsetAddress(s.offset)
|
yield String(s.s), FileOffsetAddress(s.offset)
|
||||||
|
|
||||||
|
|
||||||
def extract_format(buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
def extract_format(buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
||||||
if buf.startswith(MATCH_PE):
|
if buf.startswith(MATCH_PE):
|
||||||
yield Format(FORMAT_PE), NO_ADDRESS
|
yield Format(FORMAT_PE), NO_ADDRESS
|
||||||
elif buf.startswith(MATCH_ELF):
|
elif buf.startswith(MATCH_ELF):
|
||||||
@@ -79,7 +79,7 @@ def extract_format(buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
|||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def extract_arch(buf) -> Iterator[tuple[Feature, Address]]:
|
def extract_arch(buf) -> Iterator[Tuple[Feature, Address]]:
|
||||||
if buf.startswith(MATCH_PE):
|
if buf.startswith(MATCH_PE):
|
||||||
yield from capa.features.extractors.pefile.extract_file_arch(pe=pefile.PE(data=buf))
|
yield from capa.features.extractors.pefile.extract_file_arch(pe=pefile.PE(data=buf))
|
||||||
|
|
||||||
@@ -111,7 +111,7 @@ def extract_arch(buf) -> Iterator[tuple[Feature, Address]]:
|
|||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def extract_os(buf, os=OS_AUTO) -> Iterator[tuple[Feature, Address]]:
|
def extract_os(buf, os=OS_AUTO) -> Iterator[Tuple[Feature, Address]]:
|
||||||
if os != OS_AUTO:
|
if os != OS_AUTO:
|
||||||
yield OS(os), NO_ADDRESS
|
yield OS(os), NO_ADDRESS
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import Union, Iterator, Optional
|
from typing import Dict, List, Tuple, Union, Iterator, Optional
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import dnfile
|
import dnfile
|
||||||
@@ -41,11 +41,11 @@ from capa.features.extractors.dnfile.helpers import (
|
|||||||
|
|
||||||
class DnFileFeatureExtractorCache:
|
class DnFileFeatureExtractorCache:
|
||||||
def __init__(self, pe: dnfile.dnPE):
|
def __init__(self, pe: dnfile.dnPE):
|
||||||
self.imports: dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
self.imports: Dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||||
self.native_imports: dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
self.native_imports: Dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||||
self.methods: dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
self.methods: Dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||||
self.fields: dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
self.fields: Dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||||
self.types: dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
self.types: Dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||||
|
|
||||||
for import_ in get_dotnet_managed_imports(pe):
|
for import_ in get_dotnet_managed_imports(pe):
|
||||||
self.imports[import_.token] = import_
|
self.imports[import_.token] = import_
|
||||||
@@ -84,7 +84,7 @@ class DnfileFeatureExtractor(StaticFeatureExtractor):
|
|||||||
self.token_cache: DnFileFeatureExtractorCache = DnFileFeatureExtractorCache(self.pe)
|
self.token_cache: DnFileFeatureExtractorCache = DnFileFeatureExtractorCache(self.pe)
|
||||||
|
|
||||||
# pre-compute these because we'll yield them at *every* scope.
|
# pre-compute these because we'll yield them at *every* scope.
|
||||||
self.global_features: list[tuple[Feature, Address]] = []
|
self.global_features: List[Tuple[Feature, Address]] = []
|
||||||
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_format())
|
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_format())
|
||||||
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_os(pe=self.pe))
|
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_os(pe=self.pe))
|
||||||
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_arch(pe=self.pe))
|
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_arch(pe=self.pe))
|
||||||
@@ -100,7 +100,7 @@ class DnfileFeatureExtractor(StaticFeatureExtractor):
|
|||||||
|
|
||||||
def get_functions(self) -> Iterator[FunctionHandle]:
|
def get_functions(self) -> Iterator[FunctionHandle]:
|
||||||
# create a method lookup table
|
# create a method lookup table
|
||||||
methods: dict[Address, FunctionHandle] = {}
|
methods: Dict[Address, FunctionHandle] = {}
|
||||||
for token, method in get_dotnet_managed_method_bodies(self.pe):
|
for token, method in get_dotnet_managed_method_bodies(self.pe):
|
||||||
fh: FunctionHandle = FunctionHandle(
|
fh: FunctionHandle = FunctionHandle(
|
||||||
address=DNTokenAddress(token),
|
address=DNTokenAddress(token),
|
||||||
@@ -136,7 +136,7 @@ class DnfileFeatureExtractor(StaticFeatureExtractor):
|
|||||||
|
|
||||||
yield from methods.values()
|
yield from methods.values()
|
||||||
|
|
||||||
def extract_function_features(self, fh) -> Iterator[tuple[Feature, Address]]:
|
def extract_function_features(self, fh) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.dnfile.function.extract_features(fh)
|
yield from capa.features.extractors.dnfile.function.extract_features(fh)
|
||||||
|
|
||||||
def get_basic_blocks(self, f) -> Iterator[BBHandle]:
|
def get_basic_blocks(self, f) -> Iterator[BBHandle]:
|
||||||
@@ -157,5 +157,5 @@ class DnfileFeatureExtractor(StaticFeatureExtractor):
|
|||||||
inner=insn,
|
inner=insn,
|
||||||
)
|
)
|
||||||
|
|
||||||
def extract_insn_features(self, fh, bbh, ih) -> Iterator[tuple[Feature, Address]]:
|
def extract_insn_features(self, fh, bbh, ih) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.dnfile.insn.extract_features(fh, bbh, ih)
|
yield from capa.features.extractors.dnfile.insn.extract_features(fh, bbh, ih)
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import Iterator
|
from typing import Tuple, Iterator
|
||||||
|
|
||||||
import dnfile
|
import dnfile
|
||||||
|
|
||||||
@@ -18,35 +18,35 @@ from capa.features.common import Class, Format, String, Feature, Namespace, Char
|
|||||||
from capa.features.address import Address
|
from capa.features.address import Address
|
||||||
|
|
||||||
|
|
||||||
def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[tuple[Import, Address]]:
|
def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[Tuple[Import, Address]]:
|
||||||
yield from capa.features.extractors.dotnetfile.extract_file_import_names(pe=pe)
|
yield from capa.features.extractors.dotnetfile.extract_file_import_names(pe=pe)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_format(pe: dnfile.dnPE) -> Iterator[tuple[Format, Address]]:
|
def extract_file_format(pe: dnfile.dnPE) -> Iterator[Tuple[Format, Address]]:
|
||||||
yield from capa.features.extractors.dotnetfile.extract_file_format(pe=pe)
|
yield from capa.features.extractors.dotnetfile.extract_file_format(pe=pe)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_function_names(pe: dnfile.dnPE) -> Iterator[tuple[FunctionName, Address]]:
|
def extract_file_function_names(pe: dnfile.dnPE) -> Iterator[Tuple[FunctionName, Address]]:
|
||||||
yield from capa.features.extractors.dotnetfile.extract_file_function_names(pe=pe)
|
yield from capa.features.extractors.dotnetfile.extract_file_function_names(pe=pe)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_strings(pe: dnfile.dnPE) -> Iterator[tuple[String, Address]]:
|
def extract_file_strings(pe: dnfile.dnPE) -> Iterator[Tuple[String, Address]]:
|
||||||
yield from capa.features.extractors.dotnetfile.extract_file_strings(pe=pe)
|
yield from capa.features.extractors.dotnetfile.extract_file_strings(pe=pe)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_mixed_mode_characteristic_features(pe: dnfile.dnPE) -> Iterator[tuple[Characteristic, Address]]:
|
def extract_file_mixed_mode_characteristic_features(pe: dnfile.dnPE) -> Iterator[Tuple[Characteristic, Address]]:
|
||||||
yield from capa.features.extractors.dotnetfile.extract_file_mixed_mode_characteristic_features(pe=pe)
|
yield from capa.features.extractors.dotnetfile.extract_file_mixed_mode_characteristic_features(pe=pe)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_namespace_features(pe: dnfile.dnPE) -> Iterator[tuple[Namespace, Address]]:
|
def extract_file_namespace_features(pe: dnfile.dnPE) -> Iterator[Tuple[Namespace, Address]]:
|
||||||
yield from capa.features.extractors.dotnetfile.extract_file_namespace_features(pe=pe)
|
yield from capa.features.extractors.dotnetfile.extract_file_namespace_features(pe=pe)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_class_features(pe: dnfile.dnPE) -> Iterator[tuple[Class, Address]]:
|
def extract_file_class_features(pe: dnfile.dnPE) -> Iterator[Tuple[Class, Address]]:
|
||||||
yield from capa.features.extractors.dotnetfile.extract_file_class_features(pe=pe)
|
yield from capa.features.extractors.dotnetfile.extract_file_class_features(pe=pe)
|
||||||
|
|
||||||
|
|
||||||
def extract_features(pe: dnfile.dnPE) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for file_handler in FILE_HANDLERS:
|
for file_handler in FILE_HANDLERS:
|
||||||
for feature, address in file_handler(pe):
|
for feature, address in file_handler(pe):
|
||||||
yield feature, address
|
yield feature, address
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Iterator
|
from typing import Tuple, Iterator
|
||||||
|
|
||||||
from capa.features.common import Feature, Characteristic
|
from capa.features.common import Feature, Characteristic
|
||||||
from capa.features.address import Address
|
from capa.features.address import Address
|
||||||
@@ -18,30 +18,30 @@ from capa.features.extractors.base_extractor import FunctionHandle
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_function_calls_to(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]:
|
def extract_function_calls_to(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]:
|
||||||
"""extract callers to a function"""
|
"""extract callers to a function"""
|
||||||
for dest in fh.ctx["calls_to"]:
|
for dest in fh.ctx["calls_to"]:
|
||||||
yield Characteristic("calls to"), dest
|
yield Characteristic("calls to"), dest
|
||||||
|
|
||||||
|
|
||||||
def extract_function_calls_from(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]:
|
def extract_function_calls_from(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]:
|
||||||
"""extract callers from a function"""
|
"""extract callers from a function"""
|
||||||
for src in fh.ctx["calls_from"]:
|
for src in fh.ctx["calls_from"]:
|
||||||
yield Characteristic("calls from"), src
|
yield Characteristic("calls from"), src
|
||||||
|
|
||||||
|
|
||||||
def extract_recursive_call(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]:
|
def extract_recursive_call(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]:
|
||||||
"""extract recursive function call"""
|
"""extract recursive function call"""
|
||||||
if fh.address in fh.ctx["calls_to"]:
|
if fh.address in fh.ctx["calls_to"]:
|
||||||
yield Characteristic("recursive call"), fh.address
|
yield Characteristic("recursive call"), fh.address
|
||||||
|
|
||||||
|
|
||||||
def extract_function_loop(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]:
|
def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]:
|
||||||
"""extract loop indicators from a function"""
|
"""extract loop indicators from a function"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for func_handler in FUNCTION_HANDLERS:
|
for func_handler in FUNCTION_HANDLERS:
|
||||||
for feature, addr in func_handler(fh):
|
for feature, addr in func_handler(fh):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Union, Iterator, Optional
|
from typing import Dict, Tuple, Union, Iterator, Optional
|
||||||
|
|
||||||
import dnfile
|
import dnfile
|
||||||
from dncil.cil.body import CilMethodBody
|
from dncil.cil.body import CilMethodBody
|
||||||
@@ -144,7 +144,7 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_dotnet_methoddef_property_accessors(pe: dnfile.dnPE) -> Iterator[tuple[int, str]]:
|
def get_dotnet_methoddef_property_accessors(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]:
|
||||||
"""get MethodDef methods used to access properties
|
"""get MethodDef methods used to access properties
|
||||||
|
|
||||||
see https://www.ntcore.com/files/dotnetformat.htm
|
see https://www.ntcore.com/files/dotnetformat.htm
|
||||||
@@ -194,7 +194,7 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]:
|
|||||||
"""
|
"""
|
||||||
nested_class_table = get_dotnet_nested_class_table_index(pe)
|
nested_class_table = get_dotnet_nested_class_table_index(pe)
|
||||||
|
|
||||||
accessor_map: dict[int, str] = {}
|
accessor_map: Dict[int, str] = {}
|
||||||
for methoddef, methoddef_access in get_dotnet_methoddef_property_accessors(pe):
|
for methoddef, methoddef_access in get_dotnet_methoddef_property_accessors(pe):
|
||||||
accessor_map[methoddef] = methoddef_access
|
accessor_map[methoddef] = methoddef_access
|
||||||
|
|
||||||
@@ -252,7 +252,7 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]:
|
|||||||
yield DnType(token, typedefname, namespace=typedefnamespace, member=field.row.Name)
|
yield DnType(token, typedefname, namespace=typedefnamespace, member=field.row.Name)
|
||||||
|
|
||||||
|
|
||||||
def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[tuple[int, CilMethodBody]]:
|
def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[Tuple[int, CilMethodBody]]:
|
||||||
"""get managed methods from MethodDef table"""
|
"""get managed methods from MethodDef table"""
|
||||||
for rid, method_def in iter_dotnet_table(pe, dnfile.mdtable.MethodDef.number):
|
for rid, method_def in iter_dotnet_table(pe, dnfile.mdtable.MethodDef.number):
|
||||||
assert isinstance(method_def, dnfile.mdtable.MethodDefRow)
|
assert isinstance(method_def, dnfile.mdtable.MethodDefRow)
|
||||||
@@ -332,7 +332,7 @@ def get_dotnet_table_row(pe: dnfile.dnPE, table_index: int, row_index: int) -> O
|
|||||||
|
|
||||||
def resolve_nested_typedef_name(
|
def resolve_nested_typedef_name(
|
||||||
nested_class_table: dict, index: int, typedef: dnfile.mdtable.TypeDefRow, pe: dnfile.dnPE
|
nested_class_table: dict, index: int, typedef: dnfile.mdtable.TypeDefRow, pe: dnfile.dnPE
|
||||||
) -> tuple[str, tuple[str, ...]]:
|
) -> Tuple[str, Tuple[str, ...]]:
|
||||||
"""Resolves all nested TypeDef class names. Returns the namespace as a str and the nested TypeRef name as a tuple"""
|
"""Resolves all nested TypeDef class names. Returns the namespace as a str and the nested TypeRef name as a tuple"""
|
||||||
|
|
||||||
if index in nested_class_table:
|
if index in nested_class_table:
|
||||||
@@ -368,7 +368,7 @@ def resolve_nested_typedef_name(
|
|||||||
|
|
||||||
def resolve_nested_typeref_name(
|
def resolve_nested_typeref_name(
|
||||||
index: int, typeref: dnfile.mdtable.TypeRefRow, pe: dnfile.dnPE
|
index: int, typeref: dnfile.mdtable.TypeRefRow, pe: dnfile.dnPE
|
||||||
) -> tuple[str, tuple[str, ...]]:
|
) -> Tuple[str, Tuple[str, ...]]:
|
||||||
"""Resolves all nested TypeRef class names. Returns the namespace as a str and the nested TypeRef name as a tuple"""
|
"""Resolves all nested TypeRef class names. Returns the namespace as a str and the nested TypeRef name as a tuple"""
|
||||||
# If the ResolutionScope decodes to a typeRef type then it is nested
|
# If the ResolutionScope decodes to a typeRef type then it is nested
|
||||||
if isinstance(typeref.ResolutionScope.table, dnfile.mdtable.TypeRef):
|
if isinstance(typeref.ResolutionScope.table, dnfile.mdtable.TypeRef):
|
||||||
@@ -398,7 +398,7 @@ def resolve_nested_typeref_name(
|
|||||||
return str(typeref.TypeNamespace), (str(typeref.TypeName),)
|
return str(typeref.TypeNamespace), (str(typeref.TypeName),)
|
||||||
|
|
||||||
|
|
||||||
def get_dotnet_nested_class_table_index(pe: dnfile.dnPE) -> dict[int, int]:
|
def get_dotnet_nested_class_table_index(pe: dnfile.dnPE) -> Dict[int, int]:
|
||||||
"""Build index for EnclosingClass based off the NestedClass row index in the nestedclass table"""
|
"""Build index for EnclosingClass based off the NestedClass row index in the nestedclass table"""
|
||||||
nested_class_table = {}
|
nested_class_table = {}
|
||||||
|
|
||||||
@@ -442,7 +442,7 @@ def is_dotnet_mixed_mode(pe: dnfile.dnPE) -> bool:
|
|||||||
return not bool(pe.net.Flags.CLR_ILONLY)
|
return not bool(pe.net.Flags.CLR_ILONLY)
|
||||||
|
|
||||||
|
|
||||||
def iter_dotnet_table(pe: dnfile.dnPE, table_index: int) -> Iterator[tuple[int, dnfile.base.MDTableRow]]:
|
def iter_dotnet_table(pe: dnfile.dnPE, table_index: int) -> Iterator[Tuple[int, dnfile.base.MDTableRow]]:
|
||||||
assert pe.net is not None
|
assert pe.net is not None
|
||||||
assert pe.net.mdtables is not None
|
assert pe.net.mdtables is not None
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import TYPE_CHECKING, Union, Iterator, Optional
|
from typing import TYPE_CHECKING, Tuple, Union, Iterator, Optional
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from capa.features.extractors.dnfile.extractor import DnFileFeatureExtractorCache
|
from capa.features.extractors.dnfile.extractor import DnFileFeatureExtractorCache
|
||||||
@@ -61,7 +61,7 @@ def get_callee(
|
|||||||
return callee
|
return callee
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""parse instruction API features"""
|
"""parse instruction API features"""
|
||||||
if ih.inner.opcode not in (
|
if ih.inner.opcode not in (
|
||||||
OpCodes.Call,
|
OpCodes.Call,
|
||||||
@@ -83,7 +83,7 @@ def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterato
|
|||||||
yield API(name), ih.address
|
yield API(name), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""parse instruction property features"""
|
"""parse instruction property features"""
|
||||||
name: Optional[str] = None
|
name: Optional[str] = None
|
||||||
access: Optional[str] = None
|
access: Optional[str] = None
|
||||||
@@ -118,7 +118,7 @@ def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> It
|
|||||||
|
|
||||||
def extract_insn_namespace_class_features(
|
def extract_insn_namespace_class_features(
|
||||||
fh: FunctionHandle, bh, ih: InsnHandle
|
fh: FunctionHandle, bh, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Union[Namespace, Class], Address]]:
|
) -> Iterator[Tuple[Union[Namespace, Class], Address]]:
|
||||||
"""parse instruction namespace and class features"""
|
"""parse instruction namespace and class features"""
|
||||||
type_: Optional[Union[DnType, DnUnmanagedMethod]] = None
|
type_: Optional[Union[DnType, DnUnmanagedMethod]] = None
|
||||||
|
|
||||||
@@ -173,13 +173,13 @@ def extract_insn_namespace_class_features(
|
|||||||
yield Namespace(type_.namespace), ih.address
|
yield Namespace(type_.namespace), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_number_features(fh, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_insn_number_features(fh, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""parse instruction number features"""
|
"""parse instruction number features"""
|
||||||
if ih.inner.is_ldc():
|
if ih.inner.is_ldc():
|
||||||
yield Number(ih.inner.get_ldc()), ih.address
|
yield Number(ih.inner.get_ldc()), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""parse instruction string features"""
|
"""parse instruction string features"""
|
||||||
if not ih.inner.is_ldstr():
|
if not ih.inner.is_ldstr():
|
||||||
return
|
return
|
||||||
@@ -197,7 +197,7 @@ def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iter
|
|||||||
|
|
||||||
def extract_unmanaged_call_characteristic_features(
|
def extract_unmanaged_call_characteristic_features(
|
||||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Characteristic, Address]]:
|
) -> Iterator[Tuple[Characteristic, Address]]:
|
||||||
if ih.inner.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp):
|
if ih.inner.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp):
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -209,7 +209,7 @@ def extract_unmanaged_call_characteristic_features(
|
|||||||
yield Characteristic("unmanaged call"), ih.address
|
yield Characteristic("unmanaged call"), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract instruction features"""
|
"""extract instruction features"""
|
||||||
for inst_handler in INSTRUCTION_HANDLERS:
|
for inst_handler in INSTRUCTION_HANDLERS:
|
||||||
for feature, addr in inst_handler(fh, bbh, ih):
|
for feature, addr in inst_handler(fh, bbh, ih):
|
||||||
|
|||||||
@@ -6,17 +6,17 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
from typing import Optional
|
from typing import Tuple, Optional
|
||||||
|
|
||||||
|
|
||||||
class DnType:
|
class DnType:
|
||||||
def __init__(
|
def __init__(
|
||||||
self, token: int, class_: tuple[str, ...], namespace: str = "", member: str = "", access: Optional[str] = None
|
self, token: int, class_: Tuple[str, ...], namespace: str = "", member: str = "", access: Optional[str] = None
|
||||||
):
|
):
|
||||||
self.token: int = token
|
self.token: int = token
|
||||||
self.access: Optional[str] = access
|
self.access: Optional[str] = access
|
||||||
self.namespace: str = namespace
|
self.namespace: str = namespace
|
||||||
self.class_: tuple[str, ...] = class_
|
self.class_: Tuple[str, ...] = class_
|
||||||
|
|
||||||
if member == ".ctor":
|
if member == ".ctor":
|
||||||
member = "ctor"
|
member = "ctor"
|
||||||
@@ -44,7 +44,7 @@ class DnType:
|
|||||||
return str(self)
|
return str(self)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def format_name(class_: tuple[str, ...], namespace: str = "", member: str = ""):
|
def format_name(class_: Tuple[str, ...], namespace: str = "", member: str = ""):
|
||||||
if len(class_) > 1:
|
if len(class_) > 1:
|
||||||
class_str = "/".join(class_) # Concat items in tuple, separated by a "/"
|
class_str = "/".join(class_) # Concat items in tuple, separated by a "/"
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import Iterator
|
from typing import Tuple, Iterator
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import dnfile
|
import dnfile
|
||||||
@@ -48,12 +48,12 @@ from capa.features.extractors.dnfile.helpers import (
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_format(**kwargs) -> Iterator[tuple[Format, Address]]:
|
def extract_file_format(**kwargs) -> Iterator[Tuple[Format, Address]]:
|
||||||
yield Format(FORMAT_DOTNET), NO_ADDRESS
|
yield Format(FORMAT_DOTNET), NO_ADDRESS
|
||||||
yield Format(FORMAT_PE), NO_ADDRESS
|
yield Format(FORMAT_PE), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Import, Address]]:
|
def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Import, Address]]:
|
||||||
for method in get_dotnet_managed_imports(pe):
|
for method in get_dotnet_managed_imports(pe):
|
||||||
# like System.IO.File::OpenRead
|
# like System.IO.File::OpenRead
|
||||||
yield Import(str(method)), DNTokenAddress(method.token)
|
yield Import(str(method)), DNTokenAddress(method.token)
|
||||||
@@ -64,12 +64,12 @@ def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Impor
|
|||||||
yield Import(name), DNTokenAddress(imp.token)
|
yield Import(name), DNTokenAddress(imp.token)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_function_names(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[FunctionName, Address]]:
|
def extract_file_function_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[FunctionName, Address]]:
|
||||||
for method in get_dotnet_managed_methods(pe):
|
for method in get_dotnet_managed_methods(pe):
|
||||||
yield FunctionName(str(method)), DNTokenAddress(method.token)
|
yield FunctionName(str(method)), DNTokenAddress(method.token)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Namespace, Address]]:
|
def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Namespace, Address]]:
|
||||||
"""emit namespace features from TypeRef and TypeDef tables"""
|
"""emit namespace features from TypeRef and TypeDef tables"""
|
||||||
|
|
||||||
# namespaces may be referenced multiple times, so we need to filter
|
# namespaces may be referenced multiple times, so we need to filter
|
||||||
@@ -93,7 +93,7 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple
|
|||||||
yield Namespace(namespace), NO_ADDRESS
|
yield Namespace(namespace), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Class, Address]]:
|
def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Class, Address]]:
|
||||||
"""emit class features from TypeRef and TypeDef tables"""
|
"""emit class features from TypeRef and TypeDef tables"""
|
||||||
nested_class_table = get_dotnet_nested_class_table_index(pe)
|
nested_class_table = get_dotnet_nested_class_table_index(pe)
|
||||||
|
|
||||||
@@ -116,11 +116,11 @@ def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Cla
|
|||||||
yield Class(DnType.format_name(typerefname, namespace=typerefnamespace)), DNTokenAddress(token)
|
yield Class(DnType.format_name(typerefname, namespace=typerefnamespace)), DNTokenAddress(token)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_os(**kwargs) -> Iterator[tuple[OS, Address]]:
|
def extract_file_os(**kwargs) -> Iterator[Tuple[OS, Address]]:
|
||||||
yield OS(OS_ANY), NO_ADDRESS
|
yield OS(OS_ANY), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Arch, Address]]:
|
def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Arch, Address]]:
|
||||||
# to distinguish in more detail, see https://stackoverflow.com/a/23614024/10548020
|
# to distinguish in more detail, see https://stackoverflow.com/a/23614024/10548020
|
||||||
# .NET 4.5 added option: any CPU, 32-bit preferred
|
# .NET 4.5 added option: any CPU, 32-bit preferred
|
||||||
assert pe.net is not None
|
assert pe.net is not None
|
||||||
@@ -134,18 +134,18 @@ def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Arch, Address
|
|||||||
yield Arch(ARCH_ANY), NO_ADDRESS
|
yield Arch(ARCH_ANY), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_file_strings(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[String, Address]]:
|
def extract_file_strings(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[String, Address]]:
|
||||||
yield from capa.features.extractors.common.extract_file_strings(pe.__data__)
|
yield from capa.features.extractors.common.extract_file_strings(pe.__data__)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_mixed_mode_characteristic_features(
|
def extract_file_mixed_mode_characteristic_features(
|
||||||
pe: dnfile.dnPE, **kwargs
|
pe: dnfile.dnPE, **kwargs
|
||||||
) -> Iterator[tuple[Characteristic, Address]]:
|
) -> Iterator[Tuple[Characteristic, Address]]:
|
||||||
if is_dotnet_mixed_mode(pe):
|
if is_dotnet_mixed_mode(pe):
|
||||||
yield Characteristic("mixed mode"), NO_ADDRESS
|
yield Characteristic("mixed mode"), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_file_features(pe: dnfile.dnPE) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for file_handler in FILE_HANDLERS:
|
for file_handler in FILE_HANDLERS:
|
||||||
for feature, addr in file_handler(pe=pe): # type: ignore
|
for feature, addr in file_handler(pe=pe): # type: ignore
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
@@ -162,7 +162,7 @@ FILE_HANDLERS = (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def extract_global_features(pe: dnfile.dnPE) -> Iterator[tuple[Feature, Address]]:
|
def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for handler in GLOBAL_HANDLERS:
|
for handler in GLOBAL_HANDLERS:
|
||||||
for feature, va in handler(pe=pe): # type: ignore
|
for feature, va in handler(pe=pe): # type: ignore
|
||||||
yield feature, va
|
yield feature, va
|
||||||
@@ -204,7 +204,7 @@ class DotnetFileFeatureExtractor(StaticFeatureExtractor):
|
|||||||
def is_mixed_mode(self) -> bool:
|
def is_mixed_mode(self) -> bool:
|
||||||
return is_dotnet_mixed_mode(self.pe)
|
return is_dotnet_mixed_mode(self.pe)
|
||||||
|
|
||||||
def get_runtime_version(self) -> tuple[int, int]:
|
def get_runtime_version(self) -> Tuple[int, int]:
|
||||||
assert self.pe.net is not None
|
assert self.pe.net is not None
|
||||||
assert self.pe.net.struct is not None
|
assert self.pe.net.struct is not None
|
||||||
assert self.pe.net.struct.MajorRuntimeVersion is not None
|
assert self.pe.net.struct.MajorRuntimeVersion is not None
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Iterator
|
from typing import Tuple, Iterator
|
||||||
|
|
||||||
import capa.features.extractors.helpers
|
import capa.features.extractors.helpers
|
||||||
from capa.features.insn import API, Number
|
from capa.features.insn import API, Number
|
||||||
@@ -19,7 +19,7 @@ from capa.features.extractors.drakvuf.models import Call
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
This method extracts the given call's features (such as API name and arguments),
|
This method extracts the given call's features (such as API name and arguments),
|
||||||
and returns them as API, Number, and String features.
|
and returns them as API, Number, and String features.
|
||||||
@@ -49,7 +49,7 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -
|
|||||||
yield API(name), ch.address
|
yield API(name), ch.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for handler in CALL_HANDLERS:
|
for handler in CALL_HANDLERS:
|
||||||
for feature, addr in handler(ph, th, ch):
|
for feature, addr in handler(ph, th, ch):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Union, Iterator
|
from typing import Dict, List, Tuple, Union, Iterator
|
||||||
|
|
||||||
import capa.features.extractors.drakvuf.call
|
import capa.features.extractors.drakvuf.call
|
||||||
import capa.features.extractors.drakvuf.file
|
import capa.features.extractors.drakvuf.file
|
||||||
@@ -39,7 +39,7 @@ class DrakvufExtractor(DynamicFeatureExtractor):
|
|||||||
self.report: DrakvufReport = report
|
self.report: DrakvufReport = report
|
||||||
|
|
||||||
# sort the api calls to prevent going through the entire list each time
|
# sort the api calls to prevent going through the entire list each time
|
||||||
self.sorted_calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]] = index_calls(report)
|
self.sorted_calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]] = index_calls(report)
|
||||||
|
|
||||||
# pre-compute these because we'll yield them at *every* scope.
|
# pre-compute these because we'll yield them at *every* scope.
|
||||||
self.global_features = list(capa.features.extractors.drakvuf.global_.extract_features(self.report))
|
self.global_features = list(capa.features.extractors.drakvuf.global_.extract_features(self.report))
|
||||||
@@ -48,16 +48,16 @@ class DrakvufExtractor(DynamicFeatureExtractor):
|
|||||||
# DRAKVUF currently does not yield information about the PE's address
|
# DRAKVUF currently does not yield information about the PE's address
|
||||||
return NO_ADDRESS
|
return NO_ADDRESS
|
||||||
|
|
||||||
def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
|
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from self.global_features
|
yield from self.global_features
|
||||||
|
|
||||||
def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.drakvuf.file.extract_features(self.report)
|
yield from capa.features.extractors.drakvuf.file.extract_features(self.report)
|
||||||
|
|
||||||
def get_processes(self) -> Iterator[ProcessHandle]:
|
def get_processes(self) -> Iterator[ProcessHandle]:
|
||||||
yield from capa.features.extractors.drakvuf.file.get_processes(self.sorted_calls)
|
yield from capa.features.extractors.drakvuf.file.get_processes(self.sorted_calls)
|
||||||
|
|
||||||
def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.drakvuf.process.extract_features(ph)
|
yield from capa.features.extractors.drakvuf.process.extract_features(ph)
|
||||||
|
|
||||||
def get_process_name(self, ph: ProcessHandle) -> str:
|
def get_process_name(self, ph: ProcessHandle) -> str:
|
||||||
@@ -66,7 +66,7 @@ class DrakvufExtractor(DynamicFeatureExtractor):
|
|||||||
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
||||||
yield from capa.features.extractors.drakvuf.process.get_threads(self.sorted_calls, ph)
|
yield from capa.features.extractors.drakvuf.process.get_threads(self.sorted_calls, ph)
|
||||||
|
|
||||||
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
if False:
|
if False:
|
||||||
# force this routine to be a generator,
|
# force this routine to be a generator,
|
||||||
# but we don't actually have any elements to generate.
|
# but we don't actually have any elements to generate.
|
||||||
@@ -87,10 +87,10 @@ class DrakvufExtractor(DynamicFeatureExtractor):
|
|||||||
|
|
||||||
def extract_call_features(
|
def extract_call_features(
|
||||||
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.drakvuf.call.extract_features(ph, th, ch)
|
yield from capa.features.extractors.drakvuf.call.extract_features(ph, th, ch)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_report(cls, report: Iterator[dict]) -> "DrakvufExtractor":
|
def from_report(cls, report: Iterator[Dict]) -> "DrakvufExtractor":
|
||||||
dr = DrakvufReport.from_raw_report(report)
|
dr = DrakvufReport.from_raw_report(report)
|
||||||
return DrakvufExtractor(report=dr)
|
return DrakvufExtractor(report=dr)
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Iterator
|
from typing import Dict, List, Tuple, Iterator
|
||||||
|
|
||||||
from capa.features.file import Import
|
from capa.features.file import Import
|
||||||
from capa.features.common import Feature
|
from capa.features.common import Feature
|
||||||
@@ -19,7 +19,7 @@ from capa.features.extractors.drakvuf.models import Call, DrakvufReport
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def get_processes(calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]]) -> Iterator[ProcessHandle]:
|
def get_processes(calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]]) -> Iterator[ProcessHandle]:
|
||||||
"""
|
"""
|
||||||
Get all the created processes for a sample.
|
Get all the created processes for a sample.
|
||||||
"""
|
"""
|
||||||
@@ -28,7 +28,7 @@ def get_processes(calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]])
|
|||||||
yield ProcessHandle(proc_addr, inner={"process_name": sample_call.process_name})
|
yield ProcessHandle(proc_addr, inner={"process_name": sample_call.process_name})
|
||||||
|
|
||||||
|
|
||||||
def extract_import_names(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
|
def extract_import_names(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
Extract imported function names.
|
Extract imported function names.
|
||||||
"""
|
"""
|
||||||
@@ -43,7 +43,7 @@ def extract_import_names(report: DrakvufReport) -> Iterator[tuple[Feature, Addre
|
|||||||
yield Import(name), AbsoluteVirtualAddress(function_address)
|
yield Import(name), AbsoluteVirtualAddress(function_address)
|
||||||
|
|
||||||
|
|
||||||
def extract_features(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for handler in FILE_HANDLERS:
|
for handler in FILE_HANDLERS:
|
||||||
for feature, addr in handler(report):
|
for feature, addr in handler(report):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Iterator
|
from typing import Tuple, Iterator
|
||||||
|
|
||||||
from capa.features.common import OS, FORMAT_PE, ARCH_AMD64, OS_WINDOWS, Arch, Format, Feature
|
from capa.features.common import OS, FORMAT_PE, ARCH_AMD64, OS_WINDOWS, Arch, Format, Feature
|
||||||
from capa.features.address import NO_ADDRESS, Address
|
from capa.features.address import NO_ADDRESS, Address
|
||||||
@@ -16,22 +16,22 @@ from capa.features.extractors.drakvuf.models import DrakvufReport
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_format(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
|
def extract_format(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
|
||||||
# DRAKVUF sandbox currently supports only Windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html
|
# DRAKVUF sandbox currently supports only Windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html
|
||||||
yield Format(FORMAT_PE), NO_ADDRESS
|
yield Format(FORMAT_PE), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_os(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
|
def extract_os(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
|
||||||
# DRAKVUF sandbox currently supports only PE files: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html
|
# DRAKVUF sandbox currently supports only PE files: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html
|
||||||
yield OS(OS_WINDOWS), NO_ADDRESS
|
yield OS(OS_WINDOWS), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_arch(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
|
def extract_arch(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
|
||||||
# DRAKVUF sandbox currently supports only x64 Windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html
|
# DRAKVUF sandbox currently supports only x64 Windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html
|
||||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_features(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for global_handler in GLOBAL_HANDLER:
|
for global_handler in GLOBAL_HANDLER:
|
||||||
for feature, addr in global_handler(report):
|
for feature, addr in global_handler(report):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -7,15 +7,16 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
|
from typing import Dict, List
|
||||||
|
|
||||||
from capa.features.address import ThreadAddress, ProcessAddress
|
from capa.features.address import ThreadAddress, ProcessAddress
|
||||||
from capa.features.extractors.drakvuf.models import Call, DrakvufReport
|
from capa.features.extractors.drakvuf.models import Call, DrakvufReport
|
||||||
|
|
||||||
|
|
||||||
def index_calls(report: DrakvufReport) -> dict[ProcessAddress, dict[ThreadAddress, list[Call]]]:
|
def index_calls(report: DrakvufReport) -> Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]]:
|
||||||
# this method organizes calls into processes and threads, and then sorts them based on
|
# this method organizes calls into processes and threads, and then sorts them based on
|
||||||
# timestamp so that we can address individual calls per index (CallAddress requires call index)
|
# timestamp so that we can address individual calls per index (CallAddress requires call index)
|
||||||
result: dict[ProcessAddress, dict[ThreadAddress, list[Call]]] = {}
|
result: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]] = {}
|
||||||
for call in itertools.chain(report.syscalls, report.apicalls):
|
for call in itertools.chain(report.syscalls, report.apicalls):
|
||||||
if call.pid == 0:
|
if call.pid == 0:
|
||||||
# DRAKVUF captures api/native calls from all processes running on the system.
|
# DRAKVUF captures api/native calls from all processes running on the system.
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import Any, Iterator
|
from typing import Any, Dict, List, Iterator
|
||||||
|
|
||||||
from pydantic import Field, BaseModel, ConfigDict, model_validator
|
from pydantic import Field, BaseModel, ConfigDict, model_validator
|
||||||
|
|
||||||
@@ -47,7 +47,7 @@ class LoadedDLL(ConciseModel):
|
|||||||
plugin_name: str = Field(alias="Plugin")
|
plugin_name: str = Field(alias="Plugin")
|
||||||
event: str = Field(alias="Event")
|
event: str = Field(alias="Event")
|
||||||
name: str = Field(alias="DllName")
|
name: str = Field(alias="DllName")
|
||||||
imports: dict[str, int] = Field(alias="Rva")
|
imports: Dict[str, int] = Field(alias="Rva")
|
||||||
|
|
||||||
|
|
||||||
class Call(ConciseModel):
|
class Call(ConciseModel):
|
||||||
@@ -58,18 +58,18 @@ class Call(ConciseModel):
|
|||||||
pid: int = Field(alias="PID")
|
pid: int = Field(alias="PID")
|
||||||
tid: int = Field(alias="TID")
|
tid: int = Field(alias="TID")
|
||||||
name: str = Field(alias="Method")
|
name: str = Field(alias="Method")
|
||||||
arguments: dict[str, str]
|
arguments: Dict[str, str]
|
||||||
|
|
||||||
|
|
||||||
class WinApiCall(Call):
|
class WinApiCall(Call):
|
||||||
# This class models Windows API calls captured by DRAKVUF (DLLs, etc.).
|
# This class models Windows API calls captured by DRAKVUF (DLLs, etc.).
|
||||||
arguments: dict[str, str] = Field(alias="Arguments")
|
arguments: Dict[str, str] = Field(alias="Arguments")
|
||||||
event: str = Field(alias="Event")
|
event: str = Field(alias="Event")
|
||||||
return_value: str = Field(alias="ReturnValue")
|
return_value: str = Field(alias="ReturnValue")
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@model_validator(mode="before")
|
||||||
@classmethod
|
@classmethod
|
||||||
def build_arguments(cls, values: dict[str, Any]) -> dict[str, Any]:
|
def build_arguments(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
args = values["Arguments"]
|
args = values["Arguments"]
|
||||||
values["Arguments"] = dict(arg.split("=", 1) for arg in args)
|
values["Arguments"] = dict(arg.split("=", 1) for arg in args)
|
||||||
return values
|
return values
|
||||||
@@ -100,7 +100,7 @@ class SystemCall(Call):
|
|||||||
|
|
||||||
@model_validator(mode="before")
|
@model_validator(mode="before")
|
||||||
@classmethod
|
@classmethod
|
||||||
def build_extra(cls, values: dict[str, Any]) -> dict[str, Any]:
|
def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
# DRAKVUF stores argument names and values as entries in the syscall's entry.
|
# DRAKVUF stores argument names and values as entries in the syscall's entry.
|
||||||
# This model validator collects those arguments into a list in the model.
|
# This model validator collects those arguments into a list in the model.
|
||||||
values["arguments"] = {
|
values["arguments"] = {
|
||||||
@@ -110,13 +110,13 @@ class SystemCall(Call):
|
|||||||
|
|
||||||
|
|
||||||
class DrakvufReport(ConciseModel):
|
class DrakvufReport(ConciseModel):
|
||||||
syscalls: list[SystemCall] = []
|
syscalls: List[SystemCall] = []
|
||||||
apicalls: list[WinApiCall] = []
|
apicalls: List[WinApiCall] = []
|
||||||
discovered_dlls: list[DiscoveredDLL] = []
|
discovered_dlls: List[DiscoveredDLL] = []
|
||||||
loaded_dlls: list[LoadedDLL] = []
|
loaded_dlls: List[LoadedDLL] = []
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_raw_report(cls, entries: Iterator[dict]) -> "DrakvufReport":
|
def from_raw_report(cls, entries: Iterator[Dict]) -> "DrakvufReport":
|
||||||
report = cls()
|
report = cls()
|
||||||
|
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Iterator
|
from typing import Dict, List, Tuple, Iterator
|
||||||
|
|
||||||
from capa.features.common import String, Feature
|
from capa.features.common import String, Feature
|
||||||
from capa.features.address import Address, ThreadAddress, ProcessAddress
|
from capa.features.address import Address, ThreadAddress, ProcessAddress
|
||||||
@@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
def get_threads(
|
def get_threads(
|
||||||
calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]], ph: ProcessHandle
|
calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]], ph: ProcessHandle
|
||||||
) -> Iterator[ThreadHandle]:
|
) -> Iterator[ThreadHandle]:
|
||||||
"""
|
"""
|
||||||
Get the threads associated with a given process.
|
Get the threads associated with a given process.
|
||||||
@@ -27,11 +27,11 @@ def get_threads(
|
|||||||
yield ThreadHandle(address=thread_addr, inner={})
|
yield ThreadHandle(address=thread_addr, inner={})
|
||||||
|
|
||||||
|
|
||||||
def extract_process_name(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_process_name(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield String(ph.inner["process_name"]), ph.address
|
yield String(ph.inner["process_name"]), ph.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for handler in PROCESS_HANDLERS:
|
for handler in PROCESS_HANDLERS:
|
||||||
for feature, addr in handler(ph):
|
for feature, addr in handler(ph):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Iterator
|
from typing import Dict, List, Iterator
|
||||||
|
|
||||||
from capa.features.address import ThreadAddress, ProcessAddress, DynamicCallAddress
|
from capa.features.address import ThreadAddress, ProcessAddress, DynamicCallAddress
|
||||||
from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle
|
from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle
|
||||||
@@ -17,7 +17,7 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
def get_calls(
|
def get_calls(
|
||||||
sorted_calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]], ph: ProcessHandle, th: ThreadHandle
|
sorted_calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]], ph: ProcessHandle, th: ThreadHandle
|
||||||
) -> Iterator[CallHandle]:
|
) -> Iterator[CallHandle]:
|
||||||
for i, call in enumerate(sorted_calls[ph.address][th.address]):
|
for i, call in enumerate(sorted_calls[ph.address][th.address]):
|
||||||
call_addr = DynamicCallAddress(thread=th.address, id=i)
|
call_addr = DynamicCallAddress(thread=th.address, id=i)
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ import logging
|
|||||||
import itertools
|
import itertools
|
||||||
import collections
|
import collections
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import TYPE_CHECKING, BinaryIO, Iterator, Optional
|
from typing import TYPE_CHECKING, Set, Dict, List, Tuple, BinaryIO, Iterator, Optional
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
@@ -394,7 +394,7 @@ class ELF:
|
|||||||
return read_cstr(phdr.buf, 0)
|
return read_cstr(phdr.buf, 0)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def versions_needed(self) -> dict[str, set[str]]:
|
def versions_needed(self) -> Dict[str, Set[str]]:
|
||||||
# symbol version requirements are stored in the .gnu.version_r section,
|
# symbol version requirements are stored in the .gnu.version_r section,
|
||||||
# which has type SHT_GNU_verneed (0x6ffffffe).
|
# which has type SHT_GNU_verneed (0x6ffffffe).
|
||||||
#
|
#
|
||||||
@@ -452,7 +452,7 @@ class ELF:
|
|||||||
return {}
|
return {}
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def dynamic_entries(self) -> Iterator[tuple[int, int]]:
|
def dynamic_entries(self) -> Iterator[Tuple[int, int]]:
|
||||||
"""
|
"""
|
||||||
read the entries from the dynamic section,
|
read the entries from the dynamic section,
|
||||||
yielding the tag and value for each entry.
|
yielding the tag and value for each entry.
|
||||||
@@ -547,7 +547,7 @@ class ELF:
|
|||||||
logger.warning("failed to read DT_NEEDED entry: %s", str(e))
|
logger.warning("failed to read DT_NEEDED entry: %s", str(e))
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def symtab(self) -> Optional[tuple[Shdr, Shdr]]:
|
def symtab(self) -> Optional[Tuple[Shdr, Shdr]]:
|
||||||
"""
|
"""
|
||||||
fetch the Shdr for the symtab and the associated strtab.
|
fetch the Shdr for the symtab and the associated strtab.
|
||||||
"""
|
"""
|
||||||
@@ -682,7 +682,7 @@ class SymTab:
|
|||||||
symtab: Shdr,
|
symtab: Shdr,
|
||||||
strtab: Shdr,
|
strtab: Shdr,
|
||||||
) -> None:
|
) -> None:
|
||||||
self.symbols: list[Symbol] = []
|
self.symbols: List[Symbol] = []
|
||||||
|
|
||||||
self.symtab = symtab
|
self.symtab = symtab
|
||||||
self.strtab = strtab
|
self.strtab = strtab
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import io
|
import io
|
||||||
import logging
|
import logging
|
||||||
from typing import Iterator
|
from typing import Tuple, Iterator
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from elftools.elf.elffile import ELFFile, DynamicSegment, SymbolTableSection
|
from elftools.elf.elffile import ELFFile, DynamicSegment, SymbolTableSection
|
||||||
@@ -166,7 +166,7 @@ def extract_file_arch(elf: ELFFile, **kwargs):
|
|||||||
logger.warning("unsupported architecture: %s", arch)
|
logger.warning("unsupported architecture: %s", arch)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_features(elf: ELFFile, buf: bytes) -> Iterator[tuple[Feature, int]]:
|
def extract_file_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature, int]]:
|
||||||
for file_handler in FILE_HANDLERS:
|
for file_handler in FILE_HANDLERS:
|
||||||
for feature, addr in file_handler(elf=elf, buf=buf): # type: ignore
|
for feature, addr in file_handler(elf=elf, buf=buf): # type: ignore
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
@@ -182,7 +182,7 @@ FILE_HANDLERS = (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def extract_global_features(elf: ELFFile, buf: bytes) -> Iterator[tuple[Feature, int]]:
|
def extract_global_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature, int]]:
|
||||||
for global_handler in GLOBAL_HANDLERS:
|
for global_handler in GLOBAL_HANDLERS:
|
||||||
for feature, addr in global_handler(elf=elf, buf=buf): # type: ignore
|
for feature, addr in global_handler(elf=elf, buf=buf): # type: ignore
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
import string
|
import string
|
||||||
import struct
|
import struct
|
||||||
from typing import Iterator
|
from typing import Tuple, Iterator
|
||||||
|
|
||||||
import ghidra
|
import ghidra
|
||||||
from ghidra.program.model.lang import OperandType
|
from ghidra.program.model.lang import OperandType
|
||||||
@@ -97,7 +97,7 @@ def _bb_has_tight_loop(bb: ghidra.program.model.block.CodeBlock):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract stackstring indicators from basic block"""
|
"""extract stackstring indicators from basic block"""
|
||||||
bb: ghidra.program.model.block.CodeBlock = bbh.inner
|
bb: ghidra.program.model.block.CodeBlock = bbh.inner
|
||||||
|
|
||||||
@@ -105,7 +105,7 @@ def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[
|
|||||||
yield Characteristic("stack string"), bbh.address
|
yield Characteristic("stack string"), bbh.address
|
||||||
|
|
||||||
|
|
||||||
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""check basic block for tight loop indicators"""
|
"""check basic block for tight loop indicators"""
|
||||||
bb: ghidra.program.model.block.CodeBlock = bbh.inner
|
bb: ghidra.program.model.block.CodeBlock = bbh.inner
|
||||||
|
|
||||||
@@ -119,7 +119,7 @@ BASIC_BLOCK_HANDLERS = (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract features from the given basic block.
|
extract features from the given basic block.
|
||||||
|
|
||||||
@@ -127,7 +127,7 @@ def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Featur
|
|||||||
bb: the basic block to process.
|
bb: the basic block to process.
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
tuple[Feature, int]: the features and their location found in this basic block.
|
Tuple[Feature, int]: the features and their location found in this basic block.
|
||||||
"""
|
"""
|
||||||
yield BasicBlock(), bbh.address
|
yield BasicBlock(), bbh.address
|
||||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Iterator
|
from typing import List, Tuple, Iterator
|
||||||
|
|
||||||
import capa.features.extractors.ghidra.file
|
import capa.features.extractors.ghidra.file
|
||||||
import capa.features.extractors.ghidra.insn
|
import capa.features.extractors.ghidra.insn
|
||||||
@@ -40,7 +40,7 @@ class GhidraFeatureExtractor(StaticFeatureExtractor):
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
self.global_features: list[tuple[Feature, Address]] = []
|
self.global_features: List[Tuple[Feature, Address]] = []
|
||||||
self.global_features.extend(capa.features.extractors.ghidra.file.extract_file_format())
|
self.global_features.extend(capa.features.extractors.ghidra.file.extract_file_format())
|
||||||
self.global_features.extend(capa.features.extractors.ghidra.global_.extract_os())
|
self.global_features.extend(capa.features.extractors.ghidra.global_.extract_os())
|
||||||
self.global_features.extend(capa.features.extractors.ghidra.global_.extract_arch())
|
self.global_features.extend(capa.features.extractors.ghidra.global_.extract_arch())
|
||||||
@@ -73,7 +73,7 @@ class GhidraFeatureExtractor(StaticFeatureExtractor):
|
|||||||
func = getFunctionContaining(toAddr(addr)) # type: ignore [name-defined] # noqa: F821
|
func = getFunctionContaining(toAddr(addr)) # type: ignore [name-defined] # noqa: F821
|
||||||
return FunctionHandle(address=AbsoluteVirtualAddress(func.getEntryPoint().getOffset()), inner=func)
|
return FunctionHandle(address=AbsoluteVirtualAddress(func.getEntryPoint().getOffset()), inner=func)
|
||||||
|
|
||||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.ghidra.function.extract_features(fh)
|
yield from capa.features.extractors.ghidra.function.extract_features(fh)
|
||||||
|
|
||||||
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
||||||
@@ -81,7 +81,7 @@ class GhidraFeatureExtractor(StaticFeatureExtractor):
|
|||||||
|
|
||||||
yield from ghidra_helpers.get_function_blocks(fh)
|
yield from ghidra_helpers.get_function_blocks(fh)
|
||||||
|
|
||||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.ghidra.basicblock.extract_features(fh, bbh)
|
yield from capa.features.extractors.ghidra.basicblock.extract_features(fh, bbh)
|
||||||
|
|
||||||
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import re
|
import re
|
||||||
import struct
|
import struct
|
||||||
from typing import Iterator
|
from typing import List, Tuple, Iterator
|
||||||
|
|
||||||
from ghidra.program.model.symbol import SourceType, SymbolType
|
from ghidra.program.model.symbol import SourceType, SymbolType
|
||||||
|
|
||||||
@@ -22,7 +22,7 @@ from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, Absolu
|
|||||||
MAX_OFFSET_PE_AFTER_MZ = 0x200
|
MAX_OFFSET_PE_AFTER_MZ = 0x200
|
||||||
|
|
||||||
|
|
||||||
def find_embedded_pe(block_bytez: bytes, mz_xor: list[tuple[bytes, bytes, int]]) -> Iterator[tuple[int, int]]:
|
def find_embedded_pe(block_bytez: bytes, mz_xor: List[Tuple[bytes, bytes, int]]) -> Iterator[Tuple[int, int]]:
|
||||||
"""check segment for embedded PE
|
"""check segment for embedded PE
|
||||||
|
|
||||||
adapted for Ghidra from:
|
adapted for Ghidra from:
|
||||||
@@ -60,11 +60,11 @@ def find_embedded_pe(block_bytez: bytes, mz_xor: list[tuple[bytes, bytes, int]])
|
|||||||
yield off, i
|
yield off, i
|
||||||
|
|
||||||
|
|
||||||
def extract_file_embedded_pe() -> Iterator[tuple[Feature, Address]]:
|
def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract embedded PE features"""
|
"""extract embedded PE features"""
|
||||||
|
|
||||||
# pre-compute XOR pairs
|
# pre-compute XOR pairs
|
||||||
mz_xor: list[tuple[bytes, bytes, int]] = [
|
mz_xor: List[Tuple[bytes, bytes, int]] = [
|
||||||
(
|
(
|
||||||
capa.features.extractors.helpers.xor_static(b"MZ", i),
|
capa.features.extractors.helpers.xor_static(b"MZ", i),
|
||||||
capa.features.extractors.helpers.xor_static(b"PE", i),
|
capa.features.extractors.helpers.xor_static(b"PE", i),
|
||||||
@@ -84,14 +84,14 @@ def extract_file_embedded_pe() -> Iterator[tuple[Feature, Address]]:
|
|||||||
yield Characteristic("embedded pe"), FileOffsetAddress(ea)
|
yield Characteristic("embedded pe"), FileOffsetAddress(ea)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_export_names() -> Iterator[tuple[Feature, Address]]:
|
def extract_file_export_names() -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract function exports"""
|
"""extract function exports"""
|
||||||
st = currentProgram().getSymbolTable() # type: ignore [name-defined] # noqa: F821
|
st = currentProgram().getSymbolTable() # type: ignore [name-defined] # noqa: F821
|
||||||
for addr in st.getExternalEntryPointIterator():
|
for addr in st.getExternalEntryPointIterator():
|
||||||
yield Export(st.getPrimarySymbol(addr).getName()), AbsoluteVirtualAddress(addr.getOffset())
|
yield Export(st.getPrimarySymbol(addr).getName()), AbsoluteVirtualAddress(addr.getOffset())
|
||||||
|
|
||||||
|
|
||||||
def extract_file_import_names() -> Iterator[tuple[Feature, Address]]:
|
def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract function imports
|
"""extract function imports
|
||||||
|
|
||||||
1. imports by ordinal:
|
1. imports by ordinal:
|
||||||
@@ -116,14 +116,14 @@ def extract_file_import_names() -> Iterator[tuple[Feature, Address]]:
|
|||||||
yield Import(name), AbsoluteVirtualAddress(addr)
|
yield Import(name), AbsoluteVirtualAddress(addr)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_section_names() -> Iterator[tuple[Feature, Address]]:
|
def extract_file_section_names() -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract section names"""
|
"""extract section names"""
|
||||||
|
|
||||||
for block in currentProgram().getMemory().getBlocks(): # type: ignore [name-defined] # noqa: F821
|
for block in currentProgram().getMemory().getBlocks(): # type: ignore [name-defined] # noqa: F821
|
||||||
yield Section(block.getName()), AbsoluteVirtualAddress(block.getStart().getOffset())
|
yield Section(block.getName()), AbsoluteVirtualAddress(block.getStart().getOffset())
|
||||||
|
|
||||||
|
|
||||||
def extract_file_strings() -> Iterator[tuple[Feature, Address]]:
|
def extract_file_strings() -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract ASCII and UTF-16 LE strings"""
|
"""extract ASCII and UTF-16 LE strings"""
|
||||||
|
|
||||||
for block in currentProgram().getMemory().getBlocks(): # type: ignore [name-defined] # noqa: F821
|
for block in currentProgram().getMemory().getBlocks(): # type: ignore [name-defined] # noqa: F821
|
||||||
@@ -141,7 +141,7 @@ def extract_file_strings() -> Iterator[tuple[Feature, Address]]:
|
|||||||
yield String(s.s), FileOffsetAddress(offset)
|
yield String(s.s), FileOffsetAddress(offset)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_function_names() -> Iterator[tuple[Feature, Address]]:
|
def extract_file_function_names() -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract the names of statically-linked library functions.
|
extract the names of statically-linked library functions.
|
||||||
"""
|
"""
|
||||||
@@ -162,7 +162,7 @@ def extract_file_function_names() -> Iterator[tuple[Feature, Address]]:
|
|||||||
yield FunctionName(name[1:]), addr
|
yield FunctionName(name[1:]), addr
|
||||||
|
|
||||||
|
|
||||||
def extract_file_format() -> Iterator[tuple[Feature, Address]]:
|
def extract_file_format() -> Iterator[Tuple[Feature, Address]]:
|
||||||
ef = currentProgram().getExecutableFormat() # type: ignore [name-defined] # noqa: F821
|
ef = currentProgram().getExecutableFormat() # type: ignore [name-defined] # noqa: F821
|
||||||
if "PE" in ef:
|
if "PE" in ef:
|
||||||
yield Format(FORMAT_PE), NO_ADDRESS
|
yield Format(FORMAT_PE), NO_ADDRESS
|
||||||
@@ -175,7 +175,7 @@ def extract_file_format() -> Iterator[tuple[Feature, Address]]:
|
|||||||
raise NotImplementedError(f"unexpected file format: {ef}")
|
raise NotImplementedError(f"unexpected file format: {ef}")
|
||||||
|
|
||||||
|
|
||||||
def extract_features() -> Iterator[tuple[Feature, Address]]:
|
def extract_features() -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract file features"""
|
"""extract file features"""
|
||||||
for file_handler in FILE_HANDLERS:
|
for file_handler in FILE_HANDLERS:
|
||||||
for feature, addr in file_handler():
|
for feature, addr in file_handler():
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Iterator
|
from typing import Tuple, Iterator
|
||||||
|
|
||||||
import ghidra
|
import ghidra
|
||||||
from ghidra.program.model.block import BasicBlockModel, SimpleBlockIterator
|
from ghidra.program.model.block import BasicBlockModel, SimpleBlockIterator
|
||||||
@@ -49,7 +49,7 @@ def extract_recursive_call(fh: FunctionHandle):
|
|||||||
yield Characteristic("recursive call"), AbsoluteVirtualAddress(f.getEntryPoint().getOffset())
|
yield Characteristic("recursive call"), AbsoluteVirtualAddress(f.getEntryPoint().getOffset())
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for func_handler in FUNCTION_HANDLERS:
|
for func_handler in FUNCTION_HANDLERS:
|
||||||
for feature, addr in func_handler(fh):
|
for feature, addr in func_handler(fh):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
import contextlib
|
import contextlib
|
||||||
from typing import Iterator
|
from typing import Tuple, Iterator
|
||||||
|
|
||||||
import capa.ghidra.helpers
|
import capa.ghidra.helpers
|
||||||
import capa.features.extractors.elf
|
import capa.features.extractors.elf
|
||||||
@@ -18,7 +18,7 @@ from capa.features.address import NO_ADDRESS, Address
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_os() -> Iterator[tuple[Feature, Address]]:
|
def extract_os() -> Iterator[Tuple[Feature, Address]]:
|
||||||
format_name: str = currentProgram().getExecutableFormat() # type: ignore [name-defined] # noqa: F821
|
format_name: str = currentProgram().getExecutableFormat() # type: ignore [name-defined] # noqa: F821
|
||||||
|
|
||||||
if "PE" in format_name:
|
if "PE" in format_name:
|
||||||
@@ -45,7 +45,7 @@ def extract_os() -> Iterator[tuple[Feature, Address]]:
|
|||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def extract_arch() -> Iterator[tuple[Feature, Address]]:
|
def extract_arch() -> Iterator[Tuple[Feature, Address]]:
|
||||||
lang_id = currentProgram().getMetadata().get("Language ID") # type: ignore [name-defined] # noqa: F821
|
lang_id = currentProgram().getMetadata().get("Language ID") # type: ignore [name-defined] # noqa: F821
|
||||||
|
|
||||||
if "x86" in lang_id and "64" in lang_id:
|
if "x86" in lang_id and "64" in lang_id:
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Iterator
|
from typing import Dict, List, Iterator
|
||||||
|
|
||||||
import ghidra
|
import ghidra
|
||||||
import java.lang
|
import java.lang
|
||||||
@@ -20,7 +20,7 @@ from capa.features.address import AbsoluteVirtualAddress
|
|||||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
||||||
|
|
||||||
|
|
||||||
def ints_to_bytes(bytez: list[int]) -> bytes:
|
def ints_to_bytes(bytez: List[int]) -> bytes:
|
||||||
"""convert Java signed ints to Python bytes
|
"""convert Java signed ints to Python bytes
|
||||||
|
|
||||||
args:
|
args:
|
||||||
@@ -83,10 +83,10 @@ def get_insn_in_range(bbh: BBHandle) -> Iterator[InsnHandle]:
|
|||||||
yield InsnHandle(address=AbsoluteVirtualAddress(insn.getAddress().getOffset()), inner=insn)
|
yield InsnHandle(address=AbsoluteVirtualAddress(insn.getAddress().getOffset()), inner=insn)
|
||||||
|
|
||||||
|
|
||||||
def get_file_imports() -> dict[int, list[str]]:
|
def get_file_imports() -> Dict[int, List[str]]:
|
||||||
"""get all import names & addrs"""
|
"""get all import names & addrs"""
|
||||||
|
|
||||||
import_dict: dict[int, list[str]] = {}
|
import_dict: Dict[int, List[str]] = {}
|
||||||
|
|
||||||
for f in currentProgram().getFunctionManager().getExternalFunctions(): # type: ignore [name-defined] # noqa: F821
|
for f in currentProgram().getFunctionManager().getExternalFunctions(): # type: ignore [name-defined] # noqa: F821
|
||||||
for r in f.getSymbol().getReferences():
|
for r in f.getSymbol().getReferences():
|
||||||
@@ -110,7 +110,7 @@ def get_file_imports() -> dict[int, list[str]]:
|
|||||||
return import_dict
|
return import_dict
|
||||||
|
|
||||||
|
|
||||||
def get_file_externs() -> dict[int, list[str]]:
|
def get_file_externs() -> Dict[int, List[str]]:
|
||||||
"""
|
"""
|
||||||
Gets function names & addresses of statically-linked library functions
|
Gets function names & addresses of statically-linked library functions
|
||||||
|
|
||||||
@@ -124,7 +124,7 @@ def get_file_externs() -> dict[int, list[str]]:
|
|||||||
- Note: See Symbol Table labels
|
- Note: See Symbol Table labels
|
||||||
"""
|
"""
|
||||||
|
|
||||||
extern_dict: dict[int, list[str]] = {}
|
extern_dict: Dict[int, List[str]] = {}
|
||||||
|
|
||||||
for sym in currentProgram().getSymbolTable().getAllSymbols(True): # type: ignore [name-defined] # noqa: F821
|
for sym in currentProgram().getSymbolTable().getAllSymbols(True): # type: ignore [name-defined] # noqa: F821
|
||||||
# .isExternal() misses more than this config for the function symbols
|
# .isExternal() misses more than this config for the function symbols
|
||||||
@@ -143,7 +143,7 @@ def get_file_externs() -> dict[int, list[str]]:
|
|||||||
return extern_dict
|
return extern_dict
|
||||||
|
|
||||||
|
|
||||||
def map_fake_import_addrs() -> dict[int, list[int]]:
|
def map_fake_import_addrs() -> Dict[int, List[int]]:
|
||||||
"""
|
"""
|
||||||
Map ghidra's fake import entrypoints to their
|
Map ghidra's fake import entrypoints to their
|
||||||
real addresses
|
real addresses
|
||||||
@@ -162,7 +162,7 @@ def map_fake_import_addrs() -> dict[int, list[int]]:
|
|||||||
- 0x473090 -> PTR_CreateServiceW_00473090
|
- 0x473090 -> PTR_CreateServiceW_00473090
|
||||||
- 'EXTERNAL:00000025' -> External Address (ghidra.program.model.address.SpecialAddress)
|
- 'EXTERNAL:00000025' -> External Address (ghidra.program.model.address.SpecialAddress)
|
||||||
"""
|
"""
|
||||||
fake_dict: dict[int, list[int]] = {}
|
fake_dict: Dict[int, List[int]] = {}
|
||||||
|
|
||||||
for f in currentProgram().getFunctionManager().getExternalFunctions(): # type: ignore [name-defined] # noqa: F821
|
for f in currentProgram().getFunctionManager().getExternalFunctions(): # type: ignore [name-defined] # noqa: F821
|
||||||
for r in f.getSymbol().getReferences():
|
for r in f.getSymbol().getReferences():
|
||||||
@@ -174,9 +174,9 @@ def map_fake_import_addrs() -> dict[int, list[int]]:
|
|||||||
|
|
||||||
def check_addr_for_api(
|
def check_addr_for_api(
|
||||||
addr: ghidra.program.model.address.Address,
|
addr: ghidra.program.model.address.Address,
|
||||||
fakes: dict[int, list[int]],
|
fakes: Dict[int, List[int]],
|
||||||
imports: dict[int, list[str]],
|
imports: Dict[int, List[str]],
|
||||||
externs: dict[int, list[str]],
|
externs: Dict[int, List[str]],
|
||||||
) -> bool:
|
) -> bool:
|
||||||
offset = addr.getOffset()
|
offset = addr.getOffset()
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Any, Iterator
|
from typing import Any, Dict, Tuple, Iterator
|
||||||
|
|
||||||
import ghidra
|
import ghidra
|
||||||
from ghidra.program.model.lang import OperandType
|
from ghidra.program.model.lang import OperandType
|
||||||
@@ -26,21 +26,21 @@ SECURITY_COOKIE_BYTES_DELTA = 0x40
|
|||||||
OPERAND_TYPE_DYNAMIC_ADDRESS = OperandType.DYNAMIC | OperandType.ADDRESS
|
OPERAND_TYPE_DYNAMIC_ADDRESS = OperandType.DYNAMIC | OperandType.ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def get_imports(ctx: dict[str, Any]) -> dict[int, Any]:
|
def get_imports(ctx: Dict[str, Any]) -> Dict[int, Any]:
|
||||||
"""Populate the import cache for this context"""
|
"""Populate the import cache for this context"""
|
||||||
if "imports_cache" not in ctx:
|
if "imports_cache" not in ctx:
|
||||||
ctx["imports_cache"] = capa.features.extractors.ghidra.helpers.get_file_imports()
|
ctx["imports_cache"] = capa.features.extractors.ghidra.helpers.get_file_imports()
|
||||||
return ctx["imports_cache"]
|
return ctx["imports_cache"]
|
||||||
|
|
||||||
|
|
||||||
def get_externs(ctx: dict[str, Any]) -> dict[int, Any]:
|
def get_externs(ctx: Dict[str, Any]) -> Dict[int, Any]:
|
||||||
"""Populate the externs cache for this context"""
|
"""Populate the externs cache for this context"""
|
||||||
if "externs_cache" not in ctx:
|
if "externs_cache" not in ctx:
|
||||||
ctx["externs_cache"] = capa.features.extractors.ghidra.helpers.get_file_externs()
|
ctx["externs_cache"] = capa.features.extractors.ghidra.helpers.get_file_externs()
|
||||||
return ctx["externs_cache"]
|
return ctx["externs_cache"]
|
||||||
|
|
||||||
|
|
||||||
def get_fakes(ctx: dict[str, Any]) -> dict[int, Any]:
|
def get_fakes(ctx: Dict[str, Any]) -> Dict[int, Any]:
|
||||||
"""Populate the fake import addrs cache for this context"""
|
"""Populate the fake import addrs cache for this context"""
|
||||||
if "fakes_cache" not in ctx:
|
if "fakes_cache" not in ctx:
|
||||||
ctx["fakes_cache"] = capa.features.extractors.ghidra.helpers.map_fake_import_addrs()
|
ctx["fakes_cache"] = capa.features.extractors.ghidra.helpers.map_fake_import_addrs()
|
||||||
@@ -48,7 +48,7 @@ def get_fakes(ctx: dict[str, Any]) -> dict[int, Any]:
|
|||||||
|
|
||||||
|
|
||||||
def check_for_api_call(
|
def check_for_api_call(
|
||||||
insn, externs: dict[int, Any], fakes: dict[int, Any], imports: dict[int, Any], imp_or_ex: bool
|
insn, externs: Dict[int, Any], fakes: Dict[int, Any], imports: Dict[int, Any], imp_or_ex: bool
|
||||||
) -> Iterator[Any]:
|
) -> Iterator[Any]:
|
||||||
"""check instruction for API call
|
"""check instruction for API call
|
||||||
|
|
||||||
@@ -110,7 +110,7 @@ def check_for_api_call(
|
|||||||
yield info
|
yield info
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_api_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_insn_api_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
||||||
|
|
||||||
if not capa.features.extractors.ghidra.helpers.is_call_or_jmp(insn):
|
if not capa.features.extractors.ghidra.helpers.is_call_or_jmp(insn):
|
||||||
@@ -131,7 +131,7 @@ def extract_insn_api_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle)
|
|||||||
yield API(ext), ih.address
|
yield API(ext), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_number_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_insn_number_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction number features
|
parse instruction number features
|
||||||
example:
|
example:
|
||||||
@@ -186,7 +186,7 @@ def extract_insn_number_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl
|
|||||||
yield OperandOffset(i, const), addr
|
yield OperandOffset(i, const), addr
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_offset_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_insn_offset_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction structure offset features
|
parse instruction structure offset features
|
||||||
|
|
||||||
@@ -219,7 +219,7 @@ def extract_insn_offset_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl
|
|||||||
yield OperandOffset(i, op_off), ih.address
|
yield OperandOffset(i, op_off), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_bytes_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_insn_bytes_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse referenced byte sequences
|
parse referenced byte sequences
|
||||||
|
|
||||||
@@ -234,7 +234,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
|||||||
yield Bytes(extracted_bytes), ih.address
|
yield Bytes(extracted_bytes), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_string_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_insn_string_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction string features
|
parse instruction string features
|
||||||
|
|
||||||
@@ -249,7 +249,7 @@ def extract_insn_string_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl
|
|||||||
|
|
||||||
def extract_insn_mnemonic_features(
|
def extract_insn_mnemonic_features(
|
||||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""parse instruction mnemonic features"""
|
"""parse instruction mnemonic features"""
|
||||||
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
||||||
|
|
||||||
@@ -258,7 +258,7 @@ def extract_insn_mnemonic_features(
|
|||||||
|
|
||||||
def extract_insn_obfs_call_plus_5_characteristic_features(
|
def extract_insn_obfs_call_plus_5_characteristic_features(
|
||||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse call $+5 instruction from the given instruction.
|
parse call $+5 instruction from the given instruction.
|
||||||
"""
|
"""
|
||||||
@@ -279,7 +279,7 @@ def extract_insn_obfs_call_plus_5_characteristic_features(
|
|||||||
|
|
||||||
def extract_insn_segment_access_features(
|
def extract_insn_segment_access_features(
|
||||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""parse instruction fs or gs access"""
|
"""parse instruction fs or gs access"""
|
||||||
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
||||||
|
|
||||||
@@ -294,7 +294,7 @@ def extract_insn_segment_access_features(
|
|||||||
|
|
||||||
def extract_insn_peb_access_characteristic_features(
|
def extract_insn_peb_access_characteristic_features(
|
||||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""parse instruction peb access
|
"""parse instruction peb access
|
||||||
|
|
||||||
fs:[0x30] on x86, gs:[0x60] on x64
|
fs:[0x30] on x86, gs:[0x60] on x64
|
||||||
@@ -310,7 +310,7 @@ def extract_insn_peb_access_characteristic_features(
|
|||||||
|
|
||||||
def extract_insn_cross_section_cflow(
|
def extract_insn_cross_section_cflow(
|
||||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""inspect the instruction for a CALL or JMP that crosses section boundaries"""
|
"""inspect the instruction for a CALL or JMP that crosses section boundaries"""
|
||||||
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
||||||
|
|
||||||
@@ -364,7 +364,7 @@ def extract_function_calls_from(
|
|||||||
fh: FunctionHandle,
|
fh: FunctionHandle,
|
||||||
bb: BBHandle,
|
bb: BBHandle,
|
||||||
ih: InsnHandle,
|
ih: InsnHandle,
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract functions calls from features
|
"""extract functions calls from features
|
||||||
|
|
||||||
most relevant at the function scope, however, its most efficient to extract at the instruction scope
|
most relevant at the function scope, however, its most efficient to extract at the instruction scope
|
||||||
@@ -393,7 +393,7 @@ def extract_function_indirect_call_characteristic_features(
|
|||||||
fh: FunctionHandle,
|
fh: FunctionHandle,
|
||||||
bb: BBHandle,
|
bb: BBHandle,
|
||||||
ih: InsnHandle,
|
ih: InsnHandle,
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
|
"""extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
|
||||||
does not include calls like => call ds:dword_ABD4974
|
does not include calls like => call ds:dword_ABD4974
|
||||||
|
|
||||||
@@ -442,7 +442,7 @@ def extract_insn_nzxor_characteristic_features(
|
|||||||
fh: FunctionHandle,
|
fh: FunctionHandle,
|
||||||
bb: BBHandle,
|
bb: BBHandle,
|
||||||
ih: InsnHandle,
|
ih: InsnHandle,
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
f: ghidra.program.database.function.FunctionDB = fh.inner
|
f: ghidra.program.database.function.FunctionDB = fh.inner
|
||||||
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
||||||
|
|
||||||
@@ -461,7 +461,7 @@ def extract_features(
|
|||||||
fh: FunctionHandle,
|
fh: FunctionHandle,
|
||||||
bb: BBHandle,
|
bb: BBHandle,
|
||||||
insn: InsnHandle,
|
insn: InsnHandle,
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for insn_handler in INSTRUCTION_HANDLERS:
|
for insn_handler in INSTRUCTION_HANDLERS:
|
||||||
for feature, addr in insn_handler(fh, bb, insn):
|
for feature, addr in insn_handler(fh, bb, insn):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
import struct
|
import struct
|
||||||
import builtins
|
import builtins
|
||||||
from typing import Iterator
|
from typing import Tuple, Iterator
|
||||||
|
|
||||||
MIN_STACKSTRING_LEN = 8
|
MIN_STACKSTRING_LEN = 8
|
||||||
|
|
||||||
@@ -119,7 +119,7 @@ def twos_complement(val: int, bits: int) -> int:
|
|||||||
return val
|
return val
|
||||||
|
|
||||||
|
|
||||||
def carve_pe(pbytes: bytes, offset: int = 0) -> Iterator[tuple[int, int]]:
|
def carve_pe(pbytes: bytes, offset: int = 0) -> Iterator[Tuple[int, int]]:
|
||||||
"""
|
"""
|
||||||
Generate (offset, key) tuples of embedded PEs
|
Generate (offset, key) tuples of embedded PEs
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
import string
|
import string
|
||||||
import struct
|
import struct
|
||||||
from typing import Iterator
|
from typing import Tuple, Iterator
|
||||||
|
|
||||||
import idaapi
|
import idaapi
|
||||||
|
|
||||||
@@ -80,19 +80,19 @@ def bb_contains_stackstring(f: idaapi.func_t, bb: idaapi.BasicBlock) -> bool:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract stackstring indicators from basic block"""
|
"""extract stackstring indicators from basic block"""
|
||||||
if bb_contains_stackstring(fh.inner, bbh.inner):
|
if bb_contains_stackstring(fh.inner, bbh.inner):
|
||||||
yield Characteristic("stack string"), bbh.address
|
yield Characteristic("stack string"), bbh.address
|
||||||
|
|
||||||
|
|
||||||
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract tight loop indicators from a basic block"""
|
"""extract tight loop indicators from a basic block"""
|
||||||
if capa.features.extractors.ida.helpers.is_basic_block_tight_loop(bbh.inner):
|
if capa.features.extractors.ida.helpers.is_basic_block_tight_loop(bbh.inner):
|
||||||
yield Characteristic("tight loop"), bbh.address
|
yield Characteristic("tight loop"), bbh.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract basic block features"""
|
"""extract basic block features"""
|
||||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||||
for feature, addr in bb_handler(fh, bbh):
|
for feature, addr in bb_handler(fh, bbh):
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Iterator
|
from typing import List, Tuple, Iterator
|
||||||
|
|
||||||
import idaapi
|
import idaapi
|
||||||
|
|
||||||
@@ -36,7 +36,7 @@ class IdaFeatureExtractor(StaticFeatureExtractor):
|
|||||||
sha256=capa.ida.helpers.retrieve_input_file_sha256(),
|
sha256=capa.ida.helpers.retrieve_input_file_sha256(),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
self.global_features: list[tuple[Feature, Address]] = []
|
self.global_features: List[Tuple[Feature, Address]] = []
|
||||||
self.global_features.extend(capa.features.extractors.ida.file.extract_file_format())
|
self.global_features.extend(capa.features.extractors.ida.file.extract_file_format())
|
||||||
self.global_features.extend(capa.features.extractors.ida.global_.extract_os())
|
self.global_features.extend(capa.features.extractors.ida.global_.extract_os())
|
||||||
self.global_features.extend(capa.features.extractors.ida.global_.extract_arch())
|
self.global_features.extend(capa.features.extractors.ida.global_.extract_arch())
|
||||||
@@ -61,7 +61,7 @@ class IdaFeatureExtractor(StaticFeatureExtractor):
|
|||||||
f = idaapi.get_func(ea)
|
f = idaapi.get_func(ea)
|
||||||
return FunctionHandle(address=AbsoluteVirtualAddress(f.start_ea), inner=f)
|
return FunctionHandle(address=AbsoluteVirtualAddress(f.start_ea), inner=f)
|
||||||
|
|
||||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.ida.function.extract_features(fh)
|
yield from capa.features.extractors.ida.function.extract_features(fh)
|
||||||
|
|
||||||
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
||||||
@@ -70,7 +70,7 @@ class IdaFeatureExtractor(StaticFeatureExtractor):
|
|||||||
for bb in ida_helpers.get_function_blocks(fh.inner):
|
for bb in ida_helpers.get_function_blocks(fh.inner):
|
||||||
yield BBHandle(address=AbsoluteVirtualAddress(bb.start_ea), inner=bb)
|
yield BBHandle(address=AbsoluteVirtualAddress(bb.start_ea), inner=bb)
|
||||||
|
|
||||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.ida.basicblock.extract_features(fh, bbh)
|
yield from capa.features.extractors.ida.basicblock.extract_features(fh, bbh)
|
||||||
|
|
||||||
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import struct
|
import struct
|
||||||
from typing import Iterator
|
from typing import Tuple, Iterator
|
||||||
|
|
||||||
import idc
|
import idc
|
||||||
import idaapi
|
import idaapi
|
||||||
@@ -26,7 +26,7 @@ from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, Absolu
|
|||||||
MAX_OFFSET_PE_AFTER_MZ = 0x200
|
MAX_OFFSET_PE_AFTER_MZ = 0x200
|
||||||
|
|
||||||
|
|
||||||
def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[tuple[int, int]]:
|
def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[Tuple[int, int]]:
|
||||||
"""check segment for embedded PE
|
"""check segment for embedded PE
|
||||||
|
|
||||||
adapted for IDA from:
|
adapted for IDA from:
|
||||||
@@ -71,7 +71,7 @@ def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[tuple[int, int]]:
|
|||||||
yield off, i
|
yield off, i
|
||||||
|
|
||||||
|
|
||||||
def extract_file_embedded_pe() -> Iterator[tuple[Feature, Address]]:
|
def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract embedded PE features
|
"""extract embedded PE features
|
||||||
|
|
||||||
IDA must load resource sections for this to be complete
|
IDA must load resource sections for this to be complete
|
||||||
@@ -83,7 +83,7 @@ def extract_file_embedded_pe() -> Iterator[tuple[Feature, Address]]:
|
|||||||
yield Characteristic("embedded pe"), FileOffsetAddress(ea)
|
yield Characteristic("embedded pe"), FileOffsetAddress(ea)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_export_names() -> Iterator[tuple[Feature, Address]]:
|
def extract_file_export_names() -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract function exports"""
|
"""extract function exports"""
|
||||||
for _, ordinal, ea, name in idautils.Entries():
|
for _, ordinal, ea, name in idautils.Entries():
|
||||||
forwarded_name = ida_entry.get_entry_forwarder(ordinal)
|
forwarded_name = ida_entry.get_entry_forwarder(ordinal)
|
||||||
@@ -95,7 +95,7 @@ def extract_file_export_names() -> Iterator[tuple[Feature, Address]]:
|
|||||||
yield Characteristic("forwarded export"), AbsoluteVirtualAddress(ea)
|
yield Characteristic("forwarded export"), AbsoluteVirtualAddress(ea)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_import_names() -> Iterator[tuple[Feature, Address]]:
|
def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract function imports
|
"""extract function imports
|
||||||
|
|
||||||
1. imports by ordinal:
|
1. imports by ordinal:
|
||||||
@@ -131,7 +131,7 @@ def extract_file_import_names() -> Iterator[tuple[Feature, Address]]:
|
|||||||
yield Import(info[1]), AbsoluteVirtualAddress(ea)
|
yield Import(info[1]), AbsoluteVirtualAddress(ea)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_section_names() -> Iterator[tuple[Feature, Address]]:
|
def extract_file_section_names() -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract section names
|
"""extract section names
|
||||||
|
|
||||||
IDA must load resource sections for this to be complete
|
IDA must load resource sections for this to be complete
|
||||||
@@ -142,7 +142,7 @@ def extract_file_section_names() -> Iterator[tuple[Feature, Address]]:
|
|||||||
yield Section(idaapi.get_segm_name(seg)), AbsoluteVirtualAddress(seg.start_ea)
|
yield Section(idaapi.get_segm_name(seg)), AbsoluteVirtualAddress(seg.start_ea)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_strings() -> Iterator[tuple[Feature, Address]]:
|
def extract_file_strings() -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract ASCII and UTF-16 LE strings
|
"""extract ASCII and UTF-16 LE strings
|
||||||
|
|
||||||
IDA must load resource sections for this to be complete
|
IDA must load resource sections for this to be complete
|
||||||
@@ -160,7 +160,7 @@ def extract_file_strings() -> Iterator[tuple[Feature, Address]]:
|
|||||||
yield String(s.s), FileOffsetAddress(seg.start_ea + s.offset)
|
yield String(s.s), FileOffsetAddress(seg.start_ea + s.offset)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_function_names() -> Iterator[tuple[Feature, Address]]:
|
def extract_file_function_names() -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract the names of statically-linked library functions.
|
extract the names of statically-linked library functions.
|
||||||
"""
|
"""
|
||||||
@@ -177,7 +177,7 @@ def extract_file_function_names() -> Iterator[tuple[Feature, Address]]:
|
|||||||
yield FunctionName(name[1:]), addr
|
yield FunctionName(name[1:]), addr
|
||||||
|
|
||||||
|
|
||||||
def extract_file_format() -> Iterator[tuple[Feature, Address]]:
|
def extract_file_format() -> Iterator[Tuple[Feature, Address]]:
|
||||||
filetype = capa.ida.helpers.get_filetype()
|
filetype = capa.ida.helpers.get_filetype()
|
||||||
|
|
||||||
if filetype in (idaapi.f_PE, idaapi.f_COFF):
|
if filetype in (idaapi.f_PE, idaapi.f_COFF):
|
||||||
@@ -191,7 +191,7 @@ def extract_file_format() -> Iterator[tuple[Feature, Address]]:
|
|||||||
raise NotImplementedError(f"unexpected file format: {filetype}")
|
raise NotImplementedError(f"unexpected file format: {filetype}")
|
||||||
|
|
||||||
|
|
||||||
def extract_features() -> Iterator[tuple[Feature, Address]]:
|
def extract_features() -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract file features"""
|
"""extract file features"""
|
||||||
for file_handler in FILE_HANDLERS:
|
for file_handler in FILE_HANDLERS:
|
||||||
for feature, addr in file_handler():
|
for feature, addr in file_handler():
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Iterator
|
from typing import Tuple, Iterator
|
||||||
|
|
||||||
import idaapi
|
import idaapi
|
||||||
import idautils
|
import idautils
|
||||||
@@ -43,7 +43,7 @@ def extract_recursive_call(fh: FunctionHandle):
|
|||||||
yield Characteristic("recursive call"), fh.address
|
yield Characteristic("recursive call"), fh.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for func_handler in FUNCTION_HANDLERS:
|
for func_handler in FUNCTION_HANDLERS:
|
||||||
for feature, addr in func_handler(fh):
|
for feature, addr in func_handler(fh):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
import contextlib
|
import contextlib
|
||||||
from typing import Iterator
|
from typing import Tuple, Iterator
|
||||||
|
|
||||||
import ida_loader
|
import ida_loader
|
||||||
|
|
||||||
@@ -19,7 +19,7 @@ from capa.features.address import NO_ADDRESS, Address
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_os() -> Iterator[tuple[Feature, Address]]:
|
def extract_os() -> Iterator[Tuple[Feature, Address]]:
|
||||||
format_name: str = ida_loader.get_file_type_name()
|
format_name: str = ida_loader.get_file_type_name()
|
||||||
|
|
||||||
if "PE" in format_name:
|
if "PE" in format_name:
|
||||||
@@ -46,7 +46,7 @@ def extract_os() -> Iterator[tuple[Feature, Address]]:
|
|||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def extract_arch() -> Iterator[tuple[Feature, Address]]:
|
def extract_arch() -> Iterator[Tuple[Feature, Address]]:
|
||||||
procname = capa.ida.helpers.get_processor_name()
|
procname = capa.ida.helpers.get_processor_name()
|
||||||
if procname == "metapc" and capa.ida.helpers.is_64bit():
|
if procname == "metapc" and capa.ida.helpers.is_64bit():
|
||||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import functools
|
import functools
|
||||||
from typing import Any, Iterator, Optional
|
from typing import Any, Dict, Tuple, Iterator, Optional
|
||||||
|
|
||||||
import idc
|
import idc
|
||||||
import idaapi
|
import idaapi
|
||||||
@@ -41,15 +41,7 @@ if hasattr(ida_bytes, "parse_binpat_str"):
|
|||||||
return
|
return
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
ea = ida_bytes.bin_search(start, end, patterns, ida_bytes.BIN_SEARCH_FORWARD)
|
ea, _ = ida_bytes.bin_search(start, end, patterns, ida_bytes.BIN_SEARCH_FORWARD)
|
||||||
if isinstance(ea, int):
|
|
||||||
# "ea_t" in IDA 8.4, 8.3
|
|
||||||
pass
|
|
||||||
elif isinstance(ea, tuple):
|
|
||||||
# "drc_t" in IDA 9
|
|
||||||
ea = ea[0]
|
|
||||||
else:
|
|
||||||
raise NotImplementedError(f"bin_search returned unhandled type: {type(ea)}")
|
|
||||||
if ea == idaapi.BADADDR:
|
if ea == idaapi.BADADDR:
|
||||||
break
|
break
|
||||||
start = ea + 1
|
start = ea + 1
|
||||||
@@ -132,9 +124,9 @@ def inspect_import(imports, library, ea, function, ordinal):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def get_file_imports() -> dict[int, tuple[str, str, int]]:
|
def get_file_imports() -> Dict[int, Tuple[str, str, int]]:
|
||||||
"""get file imports"""
|
"""get file imports"""
|
||||||
imports: dict[int, tuple[str, str, int]] = {}
|
imports: Dict[int, Tuple[str, str, int]] = {}
|
||||||
|
|
||||||
for idx in range(idaapi.get_import_module_qty()):
|
for idx in range(idaapi.get_import_module_qty()):
|
||||||
library = idaapi.get_import_module_name(idx)
|
library = idaapi.get_import_module_name(idx)
|
||||||
@@ -155,7 +147,7 @@ def get_file_imports() -> dict[int, tuple[str, str, int]]:
|
|||||||
return imports
|
return imports
|
||||||
|
|
||||||
|
|
||||||
def get_file_externs() -> dict[int, tuple[str, str, int]]:
|
def get_file_externs() -> Dict[int, Tuple[str, str, int]]:
|
||||||
externs = {}
|
externs = {}
|
||||||
|
|
||||||
for seg in get_segments(skip_header_segments=True):
|
for seg in get_segments(skip_header_segments=True):
|
||||||
@@ -256,7 +248,7 @@ def find_string_at(ea: int, min_: int = 4) -> str:
|
|||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
||||||
def get_op_phrase_info(op: idaapi.op_t) -> dict:
|
def get_op_phrase_info(op: idaapi.op_t) -> Dict:
|
||||||
"""parse phrase features from operand
|
"""parse phrase features from operand
|
||||||
|
|
||||||
Pretty much dup of sark's implementation:
|
Pretty much dup of sark's implementation:
|
||||||
@@ -331,7 +323,7 @@ def is_frame_register(reg: int) -> bool:
|
|||||||
return reg in (idautils.procregs.sp.reg, idautils.procregs.bp.reg)
|
return reg in (idautils.procregs.sp.reg, idautils.procregs.bp.reg)
|
||||||
|
|
||||||
|
|
||||||
def get_insn_ops(insn: idaapi.insn_t, target_ops: Optional[tuple[Any]] = None) -> idaapi.op_t:
|
def get_insn_ops(insn: idaapi.insn_t, target_ops: Optional[Tuple[Any]] = None) -> idaapi.op_t:
|
||||||
"""yield op_t for instruction, filter on type if specified"""
|
"""yield op_t for instruction, filter on type if specified"""
|
||||||
for op in insn.ops:
|
for op in insn.ops:
|
||||||
if op.type == idaapi.o_void:
|
if op.type == idaapi.o_void:
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import re
|
import re
|
||||||
from typing import Any, Iterator, Optional
|
from typing import Any, Dict, Tuple, Iterator, Optional
|
||||||
|
|
||||||
import idc
|
import idc
|
||||||
import ida_ua
|
import ida_ua
|
||||||
@@ -25,19 +25,19 @@ from capa.features.extractors.base_extractor import BBHandle, InsnHandle, Functi
|
|||||||
SECURITY_COOKIE_BYTES_DELTA = 0x40
|
SECURITY_COOKIE_BYTES_DELTA = 0x40
|
||||||
|
|
||||||
|
|
||||||
def get_imports(ctx: dict[str, Any]) -> dict[int, Any]:
|
def get_imports(ctx: Dict[str, Any]) -> Dict[int, Any]:
|
||||||
if "imports_cache" not in ctx:
|
if "imports_cache" not in ctx:
|
||||||
ctx["imports_cache"] = capa.features.extractors.ida.helpers.get_file_imports()
|
ctx["imports_cache"] = capa.features.extractors.ida.helpers.get_file_imports()
|
||||||
return ctx["imports_cache"]
|
return ctx["imports_cache"]
|
||||||
|
|
||||||
|
|
||||||
def get_externs(ctx: dict[str, Any]) -> dict[int, Any]:
|
def get_externs(ctx: Dict[str, Any]) -> Dict[int, Any]:
|
||||||
if "externs_cache" not in ctx:
|
if "externs_cache" not in ctx:
|
||||||
ctx["externs_cache"] = capa.features.extractors.ida.helpers.get_file_externs()
|
ctx["externs_cache"] = capa.features.extractors.ida.helpers.get_file_externs()
|
||||||
return ctx["externs_cache"]
|
return ctx["externs_cache"]
|
||||||
|
|
||||||
|
|
||||||
def check_for_api_call(insn: idaapi.insn_t, funcs: dict[int, Any]) -> Optional[tuple[str, str]]:
|
def check_for_api_call(insn: idaapi.insn_t, funcs: Dict[int, Any]) -> Optional[Tuple[str, str]]:
|
||||||
"""check instruction for API call"""
|
"""check instruction for API call"""
|
||||||
info = None
|
info = None
|
||||||
ref = insn.ea
|
ref = insn.ea
|
||||||
@@ -65,7 +65,7 @@ def check_for_api_call(insn: idaapi.insn_t, funcs: dict[int, Any]) -> Optional[t
|
|||||||
return info
|
return info
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction API features
|
parse instruction API features
|
||||||
|
|
||||||
@@ -135,7 +135,7 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
|
|||||||
|
|
||||||
def extract_insn_number_features(
|
def extract_insn_number_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction number features
|
parse instruction number features
|
||||||
example:
|
example:
|
||||||
@@ -181,7 +181,7 @@ def extract_insn_number_features(
|
|||||||
yield OperandOffset(i, const), ih.address
|
yield OperandOffset(i, const), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse referenced byte sequences
|
parse referenced byte sequences
|
||||||
example:
|
example:
|
||||||
@@ -203,7 +203,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
|||||||
|
|
||||||
def extract_insn_string_features(
|
def extract_insn_string_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction string features
|
parse instruction string features
|
||||||
|
|
||||||
@@ -221,7 +221,7 @@ def extract_insn_string_features(
|
|||||||
|
|
||||||
def extract_insn_offset_features(
|
def extract_insn_offset_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction structure offset features
|
parse instruction structure offset features
|
||||||
|
|
||||||
@@ -369,7 +369,7 @@ def is_nzxor_stack_cookie(f: idaapi.func_t, bb: idaapi.BasicBlock, insn: idaapi.
|
|||||||
|
|
||||||
def extract_insn_nzxor_characteristic_features(
|
def extract_insn_nzxor_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction non-zeroing XOR instruction
|
parse instruction non-zeroing XOR instruction
|
||||||
ignore expected non-zeroing XORs, e.g. security cookies
|
ignore expected non-zeroing XORs, e.g. security cookies
|
||||||
@@ -387,14 +387,14 @@ def extract_insn_nzxor_characteristic_features(
|
|||||||
|
|
||||||
def extract_insn_mnemonic_features(
|
def extract_insn_mnemonic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""parse instruction mnemonic features"""
|
"""parse instruction mnemonic features"""
|
||||||
yield Mnemonic(idc.print_insn_mnem(ih.inner.ea)), ih.address
|
yield Mnemonic(idc.print_insn_mnem(ih.inner.ea)), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_obfs_call_plus_5_characteristic_features(
|
def extract_insn_obfs_call_plus_5_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse call $+5 instruction from the given instruction.
|
parse call $+5 instruction from the given instruction.
|
||||||
"""
|
"""
|
||||||
@@ -409,7 +409,7 @@ def extract_insn_obfs_call_plus_5_characteristic_features(
|
|||||||
|
|
||||||
def extract_insn_peb_access_characteristic_features(
|
def extract_insn_peb_access_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""parse instruction peb access
|
"""parse instruction peb access
|
||||||
|
|
||||||
fs:[0x30] on x86, gs:[0x60] on x64
|
fs:[0x30] on x86, gs:[0x60] on x64
|
||||||
@@ -437,7 +437,7 @@ def extract_insn_peb_access_characteristic_features(
|
|||||||
|
|
||||||
def extract_insn_segment_access_features(
|
def extract_insn_segment_access_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""parse instruction fs or gs access
|
"""parse instruction fs or gs access
|
||||||
|
|
||||||
TODO:
|
TODO:
|
||||||
@@ -466,7 +466,7 @@ def extract_insn_segment_access_features(
|
|||||||
|
|
||||||
def extract_insn_cross_section_cflow(
|
def extract_insn_cross_section_cflow(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""inspect the instruction for a CALL or JMP that crosses section boundaries"""
|
"""inspect the instruction for a CALL or JMP that crosses section boundaries"""
|
||||||
insn: idaapi.insn_t = ih.inner
|
insn: idaapi.insn_t = ih.inner
|
||||||
|
|
||||||
@@ -482,7 +482,7 @@ def extract_insn_cross_section_cflow(
|
|||||||
yield Characteristic("cross section flow"), ih.address
|
yield Characteristic("cross section flow"), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract functions calls from features
|
"""extract functions calls from features
|
||||||
|
|
||||||
most relevant at the function scope, however, its most efficient to extract at the instruction scope
|
most relevant at the function scope, however, its most efficient to extract at the instruction scope
|
||||||
@@ -496,7 +496,7 @@ def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
|||||||
|
|
||||||
def extract_function_indirect_call_characteristic_features(
|
def extract_function_indirect_call_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
|
"""extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
|
||||||
does not include calls like => call ds:dword_ABD4974
|
does not include calls like => call ds:dword_ABD4974
|
||||||
|
|
||||||
@@ -509,7 +509,7 @@ def extract_function_indirect_call_characteristic_features(
|
|||||||
yield Characteristic("indirect call"), ih.address
|
yield Characteristic("indirect call"), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""extract instruction features"""
|
"""extract instruction features"""
|
||||||
for inst_handler in INSTRUCTION_HANDLERS:
|
for inst_handler in INSTRUCTION_HANDLERS:
|
||||||
for feature, ea in inst_handler(f, bbh, insn):
|
for feature, ea in inst_handler(f, bbh, insn):
|
||||||
|
|||||||
@@ -5,9 +5,11 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Union, TypeAlias
|
from typing import Dict, List, Tuple, Union
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
from typing_extensions import TypeAlias
|
||||||
|
|
||||||
from capa.features.common import Feature
|
from capa.features.common import Feature
|
||||||
from capa.features.address import NO_ADDRESS, Address, ThreadAddress, ProcessAddress, DynamicCallAddress
|
from capa.features.address import NO_ADDRESS, Address, ThreadAddress, ProcessAddress, DynamicCallAddress
|
||||||
from capa.features.extractors.base_extractor import (
|
from capa.features.extractors.base_extractor import (
|
||||||
@@ -25,19 +27,19 @@ from capa.features.extractors.base_extractor import (
|
|||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class InstructionFeatures:
|
class InstructionFeatures:
|
||||||
features: list[tuple[Address, Feature]]
|
features: List[Tuple[Address, Feature]]
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class BasicBlockFeatures:
|
class BasicBlockFeatures:
|
||||||
features: list[tuple[Address, Feature]]
|
features: List[Tuple[Address, Feature]]
|
||||||
instructions: dict[Address, InstructionFeatures]
|
instructions: Dict[Address, InstructionFeatures]
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class FunctionFeatures:
|
class FunctionFeatures:
|
||||||
features: list[tuple[Address, Feature]]
|
features: List[Tuple[Address, Feature]]
|
||||||
basic_blocks: dict[Address, BasicBlockFeatures]
|
basic_blocks: Dict[Address, BasicBlockFeatures]
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -50,9 +52,9 @@ class NullStaticFeatureExtractor(StaticFeatureExtractor):
|
|||||||
|
|
||||||
base_address: Address
|
base_address: Address
|
||||||
sample_hashes: SampleHashes
|
sample_hashes: SampleHashes
|
||||||
global_features: list[Feature]
|
global_features: List[Feature]
|
||||||
file_features: list[tuple[Address, Feature]]
|
file_features: List[Tuple[Address, Feature]]
|
||||||
functions: dict[Address, FunctionFeatures]
|
functions: Dict[Address, FunctionFeatures]
|
||||||
|
|
||||||
def get_base_address(self):
|
def get_base_address(self):
|
||||||
return self.base_address
|
return self.base_address
|
||||||
@@ -96,19 +98,19 @@ class NullStaticFeatureExtractor(StaticFeatureExtractor):
|
|||||||
@dataclass
|
@dataclass
|
||||||
class CallFeatures:
|
class CallFeatures:
|
||||||
name: str
|
name: str
|
||||||
features: list[tuple[Address, Feature]]
|
features: List[Tuple[Address, Feature]]
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ThreadFeatures:
|
class ThreadFeatures:
|
||||||
features: list[tuple[Address, Feature]]
|
features: List[Tuple[Address, Feature]]
|
||||||
calls: dict[Address, CallFeatures]
|
calls: Dict[Address, CallFeatures]
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ProcessFeatures:
|
class ProcessFeatures:
|
||||||
features: list[tuple[Address, Feature]]
|
features: List[Tuple[Address, Feature]]
|
||||||
threads: dict[Address, ThreadFeatures]
|
threads: Dict[Address, ThreadFeatures]
|
||||||
name: str
|
name: str
|
||||||
|
|
||||||
|
|
||||||
@@ -116,9 +118,9 @@ class ProcessFeatures:
|
|||||||
class NullDynamicFeatureExtractor(DynamicFeatureExtractor):
|
class NullDynamicFeatureExtractor(DynamicFeatureExtractor):
|
||||||
base_address: Address
|
base_address: Address
|
||||||
sample_hashes: SampleHashes
|
sample_hashes: SampleHashes
|
||||||
global_features: list[Feature]
|
global_features: List[Feature]
|
||||||
file_features: list[tuple[Address, Feature]]
|
file_features: List[Tuple[Address, Feature]]
|
||||||
processes: dict[Address, ProcessFeatures]
|
processes: Dict[Address, ProcessFeatures]
|
||||||
|
|
||||||
def extract_global_features(self):
|
def extract_global_features(self):
|
||||||
for feature in self.global_features:
|
for feature in self.global_features:
|
||||||
|
|||||||
@@ -148,11 +148,11 @@ def extract_file_features(pe, buf):
|
|||||||
buf: the raw sample bytes
|
buf: the raw sample bytes
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
tuple[Feature, VA]: a feature and its location.
|
Tuple[Feature, VA]: a feature and its location.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
for file_handler in FILE_HANDLERS:
|
for file_handler in FILE_HANDLERS:
|
||||||
# file_handler: type: (pe, bytes) -> Iterable[tuple[Feature, Address]]
|
# file_handler: type: (pe, bytes) -> Iterable[Tuple[Feature, Address]]
|
||||||
for feature, va in file_handler(pe=pe, buf=buf): # type: ignore
|
for feature, va in file_handler(pe=pe, buf=buf): # type: ignore
|
||||||
yield feature, va
|
yield feature, va
|
||||||
|
|
||||||
@@ -177,10 +177,10 @@ def extract_global_features(pe, buf):
|
|||||||
buf: the raw sample bytes
|
buf: the raw sample bytes
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
tuple[Feature, VA]: a feature and its location.
|
Tuple[Feature, VA]: a feature and its location.
|
||||||
"""
|
"""
|
||||||
for handler in GLOBAL_HANDLERS:
|
for handler in GLOBAL_HANDLERS:
|
||||||
# file_handler: type: (pe, bytes) -> Iterable[tuple[Feature, Address]]
|
# file_handler: type: (pe, bytes) -> Iterable[Tuple[Feature, Address]]
|
||||||
for feature, va in handler(pe=pe, buf=buf): # type: ignore
|
for feature, va in handler(pe=pe, buf=buf): # type: ignore
|
||||||
yield feature, va
|
yield feature, va
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,6 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import string
|
|
||||||
import contextlib
|
import contextlib
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
|
|
||||||
@@ -20,7 +19,6 @@ ASCII_RE_4 = re.compile(b"([%s]{%d,})" % (ASCII_BYTE, 4))
|
|||||||
UNICODE_RE_4 = re.compile(b"((?:[%s]\x00){%d,})" % (ASCII_BYTE, 4))
|
UNICODE_RE_4 = re.compile(b"((?:[%s]\x00){%d,})" % (ASCII_BYTE, 4))
|
||||||
REPEATS = [b"A", b"\x00", b"\xfe", b"\xff"]
|
REPEATS = [b"A", b"\x00", b"\xfe", b"\xff"]
|
||||||
SLICE_SIZE = 4096
|
SLICE_SIZE = 4096
|
||||||
PRINTABLE_CHAR_SET = set(string.printable)
|
|
||||||
|
|
||||||
String = namedtuple("String", ["s", "offset"])
|
String = namedtuple("String", ["s", "offset"])
|
||||||
|
|
||||||
@@ -86,7 +84,3 @@ def extract_unicode_strings(buf, n=4):
|
|||||||
for match in r.finditer(buf):
|
for match in r.finditer(buf):
|
||||||
with contextlib.suppress(UnicodeDecodeError):
|
with contextlib.suppress(UnicodeDecodeError):
|
||||||
yield String(match.group().decode("utf-16"), match.start())
|
yield String(match.group().decode("utf-16"), match.start())
|
||||||
|
|
||||||
|
|
||||||
def is_printable_str(s: str) -> bool:
|
|
||||||
return set(s).issubset(PRINTABLE_CHAR_SET)
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
import string
|
import string
|
||||||
import struct
|
import struct
|
||||||
from typing import Iterator
|
from typing import Tuple, Iterator
|
||||||
|
|
||||||
import envi
|
import envi
|
||||||
import envi.archs.i386.disasm
|
import envi.archs.i386.disasm
|
||||||
@@ -20,7 +20,7 @@ from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
|
|||||||
from capa.features.extractors.base_extractor import BBHandle, FunctionHandle
|
from capa.features.extractors.base_extractor import BBHandle, FunctionHandle
|
||||||
|
|
||||||
|
|
||||||
def interface_extract_basic_block_XXX(f: FunctionHandle, bb: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
def interface_extract_basic_block_XXX(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse features from the given basic block.
|
parse features from the given basic block.
|
||||||
|
|
||||||
@@ -47,7 +47,7 @@ def _bb_has_tight_loop(f, bb):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def extract_bb_tight_loop(f: FunctionHandle, bb: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_bb_tight_loop(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""check basic block for tight loop indicators"""
|
"""check basic block for tight loop indicators"""
|
||||||
if _bb_has_tight_loop(f, bb.inner):
|
if _bb_has_tight_loop(f, bb.inner):
|
||||||
yield Characteristic("tight loop"), bb.address
|
yield Characteristic("tight loop"), bb.address
|
||||||
@@ -70,7 +70,7 @@ def _bb_has_stackstring(f, bb):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def extract_stackstring(f: FunctionHandle, bb: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_stackstring(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""check basic block for stackstring indicators"""
|
"""check basic block for stackstring indicators"""
|
||||||
if _bb_has_stackstring(f, bb.inner):
|
if _bb_has_stackstring(f, bb.inner):
|
||||||
yield Characteristic("stack string"), bb.address
|
yield Characteristic("stack string"), bb.address
|
||||||
@@ -145,7 +145,7 @@ def is_printable_utf16le(chars: bytes) -> bool:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def extract_features(f: FunctionHandle, bb: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract features from the given basic block.
|
extract features from the given basic block.
|
||||||
|
|
||||||
@@ -154,7 +154,7 @@ def extract_features(f: FunctionHandle, bb: BBHandle) -> Iterator[tuple[Feature,
|
|||||||
bb (viv_utils.BasicBlock): the basic block to process.
|
bb (viv_utils.BasicBlock): the basic block to process.
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
tuple[Feature, int]: the features and their location found in this basic block.
|
Tuple[Feature, int]: the features and their location found in this basic block.
|
||||||
"""
|
"""
|
||||||
yield BasicBlock(), AbsoluteVirtualAddress(bb.inner.va)
|
yield BasicBlock(), AbsoluteVirtualAddress(bb.inner.va)
|
||||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import Any, Iterator
|
from typing import Any, Dict, List, Tuple, Iterator
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import viv_utils
|
import viv_utils
|
||||||
@@ -39,7 +39,7 @@ class VivisectFeatureExtractor(StaticFeatureExtractor):
|
|||||||
super().__init__(hashes=SampleHashes.from_bytes(self.buf))
|
super().__init__(hashes=SampleHashes.from_bytes(self.buf))
|
||||||
|
|
||||||
# pre-compute these because we'll yield them at *every* scope.
|
# pre-compute these because we'll yield them at *every* scope.
|
||||||
self.global_features: list[tuple[Feature, Address]] = []
|
self.global_features: List[Tuple[Feature, Address]] = []
|
||||||
self.global_features.extend(capa.features.extractors.viv.file.extract_file_format(self.buf))
|
self.global_features.extend(capa.features.extractors.viv.file.extract_file_format(self.buf))
|
||||||
self.global_features.extend(capa.features.extractors.common.extract_os(self.buf, os))
|
self.global_features.extend(capa.features.extractors.common.extract_os(self.buf, os))
|
||||||
self.global_features.extend(capa.features.extractors.viv.global_.extract_arch(self.vw))
|
self.global_features.extend(capa.features.extractors.viv.global_.extract_arch(self.vw))
|
||||||
@@ -55,13 +55,13 @@ class VivisectFeatureExtractor(StaticFeatureExtractor):
|
|||||||
yield from capa.features.extractors.viv.file.extract_features(self.vw, self.buf)
|
yield from capa.features.extractors.viv.file.extract_features(self.vw, self.buf)
|
||||||
|
|
||||||
def get_functions(self) -> Iterator[FunctionHandle]:
|
def get_functions(self) -> Iterator[FunctionHandle]:
|
||||||
cache: dict[str, Any] = {}
|
cache: Dict[str, Any] = {}
|
||||||
for va in sorted(self.vw.getFunctions()):
|
for va in sorted(self.vw.getFunctions()):
|
||||||
yield FunctionHandle(
|
yield FunctionHandle(
|
||||||
address=AbsoluteVirtualAddress(va), inner=viv_utils.Function(self.vw, va), ctx={"cache": cache}
|
address=AbsoluteVirtualAddress(va), inner=viv_utils.Function(self.vw, va), ctx={"cache": cache}
|
||||||
)
|
)
|
||||||
|
|
||||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.viv.function.extract_features(fh)
|
yield from capa.features.extractors.viv.function.extract_features(fh)
|
||||||
|
|
||||||
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
||||||
@@ -69,7 +69,7 @@ class VivisectFeatureExtractor(StaticFeatureExtractor):
|
|||||||
for bb in f.basic_blocks:
|
for bb in f.basic_blocks:
|
||||||
yield BBHandle(address=AbsoluteVirtualAddress(bb.va), inner=bb)
|
yield BBHandle(address=AbsoluteVirtualAddress(bb.va), inner=bb)
|
||||||
|
|
||||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh) -> Iterator[tuple[Feature, Address]]:
|
def extract_basic_block_features(self, fh: FunctionHandle, bbh) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.viv.basicblock.extract_features(fh, bbh)
|
yield from capa.features.extractors.viv.basicblock.extract_features(fh, bbh)
|
||||||
|
|
||||||
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
||||||
@@ -79,7 +79,7 @@ class VivisectFeatureExtractor(StaticFeatureExtractor):
|
|||||||
|
|
||||||
def extract_insn_features(
|
def extract_insn_features(
|
||||||
self, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
self, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.viv.insn.extract_features(fh, bbh, ih)
|
yield from capa.features.extractors.viv.insn.extract_features(fh, bbh, ih)
|
||||||
|
|
||||||
def is_library_function(self, addr):
|
def is_library_function(self, addr):
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Iterator
|
from typing import Tuple, Iterator
|
||||||
|
|
||||||
import PE.carve as pe_carve # vivisect PE
|
import PE.carve as pe_carve # vivisect PE
|
||||||
import vivisect
|
import vivisect
|
||||||
@@ -21,7 +21,7 @@ from capa.features.common import Feature, Characteristic
|
|||||||
from capa.features.address import Address, FileOffsetAddress, AbsoluteVirtualAddress
|
from capa.features.address import Address, FileOffsetAddress, AbsoluteVirtualAddress
|
||||||
|
|
||||||
|
|
||||||
def extract_file_embedded_pe(buf, **kwargs) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_embedded_pe(buf, **kwargs) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for offset, _ in pe_carve.carve(buf, 1):
|
for offset, _ in pe_carve.carve(buf, 1):
|
||||||
yield Characteristic("embedded pe"), FileOffsetAddress(offset)
|
yield Characteristic("embedded pe"), FileOffsetAddress(offset)
|
||||||
|
|
||||||
@@ -37,7 +37,7 @@ def get_first_vw_filename(vw: vivisect.VivWorkspace):
|
|||||||
return next(iter(vw.filemeta.keys()))
|
return next(iter(vw.filemeta.keys()))
|
||||||
|
|
||||||
|
|
||||||
def extract_file_export_names(vw: vivisect.VivWorkspace, **kwargs) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_export_names(vw: vivisect.VivWorkspace, **kwargs) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for va, _, name, _ in vw.getExports():
|
for va, _, name, _ in vw.getExports():
|
||||||
yield Export(name), AbsoluteVirtualAddress(va)
|
yield Export(name), AbsoluteVirtualAddress(va)
|
||||||
|
|
||||||
@@ -56,7 +56,7 @@ def extract_file_export_names(vw: vivisect.VivWorkspace, **kwargs) -> Iterator[t
|
|||||||
yield Characteristic("forwarded export"), AbsoluteVirtualAddress(va)
|
yield Characteristic("forwarded export"), AbsoluteVirtualAddress(va)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_import_names(vw, **kwargs) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_import_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract imported function names
|
extract imported function names
|
||||||
1. imports by ordinal:
|
1. imports by ordinal:
|
||||||
@@ -91,16 +91,16 @@ def is_viv_ord_impname(impname: str) -> bool:
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def extract_file_section_names(vw, **kwargs) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_section_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for va, _, segname, _ in vw.getSegments():
|
for va, _, segname, _ in vw.getSegments():
|
||||||
yield Section(segname), AbsoluteVirtualAddress(va)
|
yield Section(segname), AbsoluteVirtualAddress(va)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_strings(buf, **kwargs) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_strings(buf, **kwargs) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.common.extract_file_strings(buf)
|
yield from capa.features.extractors.common.extract_file_strings(buf)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_function_names(vw, **kwargs) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_function_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract the names of statically-linked library functions.
|
extract the names of statically-linked library functions.
|
||||||
"""
|
"""
|
||||||
@@ -117,11 +117,11 @@ def extract_file_function_names(vw, **kwargs) -> Iterator[tuple[Feature, Address
|
|||||||
yield FunctionName(name[1:]), addr
|
yield FunctionName(name[1:]), addr
|
||||||
|
|
||||||
|
|
||||||
def extract_file_format(buf, **kwargs) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_format(buf, **kwargs) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.common.extract_format(buf)
|
yield from capa.features.extractors.common.extract_format(buf)
|
||||||
|
|
||||||
|
|
||||||
def extract_features(vw, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(vw, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract file features from given workspace
|
extract file features from given workspace
|
||||||
|
|
||||||
@@ -130,7 +130,7 @@ def extract_features(vw, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
|||||||
buf: the raw input file bytes
|
buf: the raw input file bytes
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
tuple[Feature, Address]: a feature and its location.
|
Tuple[Feature, Address]: a feature and its location.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
for file_handler in FILE_HANDLERS:
|
for file_handler in FILE_HANDLERS:
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Iterator
|
from typing import Tuple, Iterator
|
||||||
|
|
||||||
import envi
|
import envi
|
||||||
import viv_utils
|
import viv_utils
|
||||||
@@ -19,7 +19,7 @@ from capa.features.extractors.elf import SymTab
|
|||||||
from capa.features.extractors.base_extractor import FunctionHandle
|
from capa.features.extractors.base_extractor import FunctionHandle
|
||||||
|
|
||||||
|
|
||||||
def interface_extract_function_XXX(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
def interface_extract_function_XXX(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse features from the given function.
|
parse features from the given function.
|
||||||
|
|
||||||
@@ -32,7 +32,7 @@ def interface_extract_function_XXX(fh: FunctionHandle) -> Iterator[tuple[Feature
|
|||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
def extract_function_symtab_names(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_function_symtab_names(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
if fh.inner.vw.metadata["Format"] == "elf":
|
if fh.inner.vw.metadata["Format"] == "elf":
|
||||||
# the file's symbol table gets added to the metadata of the vivisect workspace.
|
# the file's symbol table gets added to the metadata of the vivisect workspace.
|
||||||
# this is in order to eliminate the computational overhead of refetching symtab each time.
|
# this is in order to eliminate the computational overhead of refetching symtab each time.
|
||||||
@@ -54,13 +54,13 @@ def extract_function_symtab_names(fh: FunctionHandle) -> Iterator[tuple[Feature,
|
|||||||
yield FunctionName(sym_name), fh.address
|
yield FunctionName(sym_name), fh.address
|
||||||
|
|
||||||
|
|
||||||
def extract_function_calls_to(fhandle: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_function_calls_to(fhandle: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
f: viv_utils.Function = fhandle.inner
|
f: viv_utils.Function = fhandle.inner
|
||||||
for src, _, _, _ in f.vw.getXrefsTo(f.va, rtype=vivisect.const.REF_CODE):
|
for src, _, _, _ in f.vw.getXrefsTo(f.va, rtype=vivisect.const.REF_CODE):
|
||||||
yield Characteristic("calls to"), AbsoluteVirtualAddress(src)
|
yield Characteristic("calls to"), AbsoluteVirtualAddress(src)
|
||||||
|
|
||||||
|
|
||||||
def extract_function_loop(fhandle: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_function_loop(fhandle: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse if a function has a loop
|
parse if a function has a loop
|
||||||
"""
|
"""
|
||||||
@@ -88,7 +88,7 @@ def extract_function_loop(fhandle: FunctionHandle) -> Iterator[tuple[Feature, Ad
|
|||||||
yield Characteristic("loop"), fhandle.address
|
yield Characteristic("loop"), fhandle.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract features from the given function.
|
extract features from the given function.
|
||||||
|
|
||||||
@@ -96,7 +96,7 @@ def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
|||||||
fh: the function handle from which to extract features
|
fh: the function handle from which to extract features
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
tuple[Feature, int]: the features and their location found in this function.
|
Tuple[Feature, int]: the features and their location found in this function.
|
||||||
"""
|
"""
|
||||||
for func_handler in FUNCTION_HANDLERS:
|
for func_handler in FUNCTION_HANDLERS:
|
||||||
for feature, addr in func_handler(fh):
|
for feature, addr in func_handler(fh):
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import Iterator
|
from typing import Tuple, Iterator
|
||||||
|
|
||||||
from capa.features.common import ARCH_I386, ARCH_AMD64, Arch, Feature
|
from capa.features.common import ARCH_I386, ARCH_AMD64, Arch, Feature
|
||||||
from capa.features.address import NO_ADDRESS, Address
|
from capa.features.address import NO_ADDRESS, Address
|
||||||
@@ -14,7 +14,7 @@ from capa.features.address import NO_ADDRESS, Address
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_arch(vw) -> Iterator[tuple[Feature, Address]]:
|
def extract_arch(vw) -> Iterator[Tuple[Feature, Address]]:
|
||||||
arch = vw.getMeta("Architecture")
|
arch = vw.getMeta("Architecture")
|
||||||
if arch == "amd64":
|
if arch == "amd64":
|
||||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import collections
|
import collections
|
||||||
from typing import Deque, Optional
|
from typing import Set, List, Deque, Tuple, Optional
|
||||||
|
|
||||||
import envi
|
import envi
|
||||||
import vivisect.const
|
import vivisect.const
|
||||||
@@ -28,7 +28,7 @@ FAR_BRANCH_MASK = envi.BR_PROC | envi.BR_DEREF | envi.BR_ARCH
|
|||||||
DESTRUCTIVE_MNEMONICS = ("mov", "lea", "pop", "xor")
|
DESTRUCTIVE_MNEMONICS = ("mov", "lea", "pop", "xor")
|
||||||
|
|
||||||
|
|
||||||
def get_previous_instructions(vw: VivWorkspace, va: int) -> list[int]:
|
def get_previous_instructions(vw: VivWorkspace, va: int) -> List[int]:
|
||||||
"""
|
"""
|
||||||
collect the instructions that flow to the given address, local to the current function.
|
collect the instructions that flow to the given address, local to the current function.
|
||||||
|
|
||||||
@@ -37,7 +37,7 @@ def get_previous_instructions(vw: VivWorkspace, va: int) -> list[int]:
|
|||||||
va (int): the virtual address to inspect
|
va (int): the virtual address to inspect
|
||||||
|
|
||||||
returns:
|
returns:
|
||||||
list[int]: the prior instructions, which may fallthrough and/or jump here
|
List[int]: the prior instructions, which may fallthrough and/or jump here
|
||||||
"""
|
"""
|
||||||
ret = []
|
ret = []
|
||||||
|
|
||||||
@@ -71,7 +71,7 @@ class NotFoundError(Exception):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def find_definition(vw: VivWorkspace, va: int, reg: int) -> tuple[int, Optional[int]]:
|
def find_definition(vw: VivWorkspace, va: int, reg: int) -> Tuple[int, Optional[int]]:
|
||||||
"""
|
"""
|
||||||
scan backwards from the given address looking for assignments to the given register.
|
scan backwards from the given address looking for assignments to the given register.
|
||||||
if a constant, return that value.
|
if a constant, return that value.
|
||||||
@@ -88,7 +88,7 @@ def find_definition(vw: VivWorkspace, va: int, reg: int) -> tuple[int, Optional[
|
|||||||
NotFoundError: when the definition cannot be found.
|
NotFoundError: when the definition cannot be found.
|
||||||
"""
|
"""
|
||||||
q: Deque[int] = collections.deque()
|
q: Deque[int] = collections.deque()
|
||||||
seen: set[int] = set()
|
seen: Set[int] = set()
|
||||||
|
|
||||||
q.extend(get_previous_instructions(vw, va))
|
q.extend(get_previous_instructions(vw, va))
|
||||||
while q:
|
while q:
|
||||||
@@ -139,7 +139,7 @@ def is_indirect_call(vw: VivWorkspace, va: int, insn: envi.Opcode) -> bool:
|
|||||||
return insn.mnem in ("call", "jmp") and isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper)
|
return insn.mnem in ("call", "jmp") and isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper)
|
||||||
|
|
||||||
|
|
||||||
def resolve_indirect_call(vw: VivWorkspace, va: int, insn: envi.Opcode) -> tuple[int, Optional[int]]:
|
def resolve_indirect_call(vw: VivWorkspace, va: int, insn: envi.Opcode) -> Tuple[int, Optional[int]]:
|
||||||
"""
|
"""
|
||||||
inspect the given indirect call instruction and attempt to resolve the target address.
|
inspect the given indirect call instruction and attempt to resolve the target address.
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Callable, Iterator
|
from typing import List, Tuple, Callable, Iterator
|
||||||
|
|
||||||
import envi
|
import envi
|
||||||
import envi.exc
|
import envi.exc
|
||||||
@@ -33,7 +33,7 @@ SECURITY_COOKIE_BYTES_DELTA = 0x40
|
|||||||
|
|
||||||
def interface_extract_instruction_XXX(
|
def interface_extract_instruction_XXX(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse features from the given instruction.
|
parse features from the given instruction.
|
||||||
|
|
||||||
@@ -53,7 +53,7 @@ def get_imports(vw):
|
|||||||
caching accessor to vivisect workspace imports
|
caching accessor to vivisect workspace imports
|
||||||
avoids performance issues in vivisect when collecting locations
|
avoids performance issues in vivisect when collecting locations
|
||||||
|
|
||||||
returns: dict[int, tuple[str, str]]
|
returns: Dict[int, Tuple[str, str]]
|
||||||
"""
|
"""
|
||||||
if "imports" in vw.metadata:
|
if "imports" in vw.metadata:
|
||||||
return vw.metadata["imports"]
|
return vw.metadata["imports"]
|
||||||
@@ -65,7 +65,7 @@ def get_imports(vw):
|
|||||||
return imports
|
return imports
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse API features from the given instruction.
|
parse API features from the given instruction.
|
||||||
|
|
||||||
@@ -260,7 +260,7 @@ def read_bytes(vw, va: int) -> bytes:
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_bytes_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_insn_bytes_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse byte sequence features from the given instruction.
|
parse byte sequence features from the given instruction.
|
||||||
example:
|
example:
|
||||||
@@ -371,7 +371,7 @@ def is_security_cookie(f, bb, insn) -> bool:
|
|||||||
|
|
||||||
def extract_insn_nzxor_characteristic_features(
|
def extract_insn_nzxor_characteristic_features(
|
||||||
fh: FunctionHandle, bbhandle: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbhandle: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse non-zeroing XOR instruction from the given instruction.
|
parse non-zeroing XOR instruction from the given instruction.
|
||||||
ignore expected non-zeroing XORs, e.g. security cookies.
|
ignore expected non-zeroing XORs, e.g. security cookies.
|
||||||
@@ -392,12 +392,12 @@ def extract_insn_nzxor_characteristic_features(
|
|||||||
yield Characteristic("nzxor"), ih.address
|
yield Characteristic("nzxor"), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_mnemonic_features(f, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_insn_mnemonic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""parse mnemonic features from the given instruction."""
|
"""parse mnemonic features from the given instruction."""
|
||||||
yield Mnemonic(ih.inner.mnem), ih.address
|
yield Mnemonic(ih.inner.mnem), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse call $+5 instruction from the given instruction.
|
parse call $+5 instruction from the given instruction.
|
||||||
"""
|
"""
|
||||||
@@ -415,7 +415,7 @@ def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, ih: InsnHandle)
|
|||||||
yield Characteristic("call $+5"), ih.address
|
yield Characteristic("call $+5"), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_peb_access_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_insn_peb_access_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64
|
parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64
|
||||||
"""
|
"""
|
||||||
@@ -451,7 +451,7 @@ def extract_insn_peb_access_characteristic_features(f, bb, ih: InsnHandle) -> It
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_segment_access_features(f, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_insn_segment_access_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""parse the instruction for access to fs or gs"""
|
"""parse the instruction for access to fs or gs"""
|
||||||
insn: envi.Opcode = ih.inner
|
insn: envi.Opcode = ih.inner
|
||||||
|
|
||||||
@@ -472,7 +472,7 @@ def get_section(vw, va: int):
|
|||||||
raise KeyError(va)
|
raise KeyError(va)
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_cross_section_cflow(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_insn_cross_section_cflow(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
inspect the instruction for a CALL or JMP that crosses section boundaries.
|
inspect the instruction for a CALL or JMP that crosses section boundaries.
|
||||||
"""
|
"""
|
||||||
@@ -513,7 +513,7 @@ def extract_insn_cross_section_cflow(fh: FunctionHandle, bb, ih: InsnHandle) ->
|
|||||||
|
|
||||||
# this is a feature that's most relevant at the function scope,
|
# this is a feature that's most relevant at the function scope,
|
||||||
# however, its most efficient to extract at the instruction scope.
|
# however, its most efficient to extract at the instruction scope.
|
||||||
def extract_function_calls_from(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_function_calls_from(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
insn: envi.Opcode = ih.inner
|
insn: envi.Opcode = ih.inner
|
||||||
f: viv_utils.Function = fh.inner
|
f: viv_utils.Function = fh.inner
|
||||||
|
|
||||||
@@ -554,7 +554,7 @@ def extract_function_calls_from(fh: FunctionHandle, bb, ih: InsnHandle) -> Itera
|
|||||||
|
|
||||||
# this is a feature that's most relevant at the function or basic block scope,
|
# this is a feature that's most relevant at the function or basic block scope,
|
||||||
# however, its most efficient to extract at the instruction scope.
|
# however, its most efficient to extract at the instruction scope.
|
||||||
def extract_function_indirect_call_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_function_indirect_call_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract indirect function call characteristic (e.g., call eax or call dword ptr [edx+4])
|
extract indirect function call characteristic (e.g., call eax or call dword ptr [edx+4])
|
||||||
does not include calls like => call ds:dword_ABD4974
|
does not include calls like => call ds:dword_ABD4974
|
||||||
@@ -578,7 +578,7 @@ def extract_function_indirect_call_characteristic_features(f, bb, ih: InsnHandle
|
|||||||
|
|
||||||
def extract_op_number_features(
|
def extract_op_number_features(
|
||||||
fh: FunctionHandle, bb, ih: InsnHandle, i, oper: envi.Operand
|
fh: FunctionHandle, bb, ih: InsnHandle, i, oper: envi.Operand
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""parse number features from the given operand.
|
"""parse number features from the given operand.
|
||||||
|
|
||||||
example:
|
example:
|
||||||
@@ -623,7 +623,7 @@ def extract_op_number_features(
|
|||||||
|
|
||||||
def extract_op_offset_features(
|
def extract_op_offset_features(
|
||||||
fh: FunctionHandle, bb, ih: InsnHandle, i, oper: envi.Operand
|
fh: FunctionHandle, bb, ih: InsnHandle, i, oper: envi.Operand
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""parse structure offset features from the given operand."""
|
"""parse structure offset features from the given operand."""
|
||||||
# example:
|
# example:
|
||||||
#
|
#
|
||||||
@@ -674,7 +674,7 @@ def extract_op_offset_features(
|
|||||||
|
|
||||||
def extract_op_string_features(
|
def extract_op_string_features(
|
||||||
fh: FunctionHandle, bb, ih: InsnHandle, i, oper: envi.Operand
|
fh: FunctionHandle, bb, ih: InsnHandle, i, oper: envi.Operand
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""parse string features from the given operand."""
|
"""parse string features from the given operand."""
|
||||||
# example:
|
# example:
|
||||||
#
|
#
|
||||||
@@ -705,15 +705,15 @@ def extract_op_string_features(
|
|||||||
yield String(s), ih.address
|
yield String(s), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_operand_features(f: FunctionHandle, bb, insn: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_operand_features(f: FunctionHandle, bb, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for i, oper in enumerate(insn.inner.opers):
|
for i, oper in enumerate(insn.inner.opers):
|
||||||
for op_handler in OPERAND_HANDLERS:
|
for op_handler in OPERAND_HANDLERS:
|
||||||
for feature, addr in op_handler(f, bb, insn, i, oper):
|
for feature, addr in op_handler(f, bb, insn, i, oper):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|
||||||
|
|
||||||
OPERAND_HANDLERS: list[
|
OPERAND_HANDLERS: List[
|
||||||
Callable[[FunctionHandle, BBHandle, InsnHandle, int, envi.Operand], Iterator[tuple[Feature, Address]]]
|
Callable[[FunctionHandle, BBHandle, InsnHandle, int, envi.Operand], Iterator[Tuple[Feature, Address]]]
|
||||||
] = [
|
] = [
|
||||||
extract_op_number_features,
|
extract_op_number_features,
|
||||||
extract_op_offset_features,
|
extract_op_offset_features,
|
||||||
@@ -721,7 +721,7 @@ OPERAND_HANDLERS: list[
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def extract_features(f, bb, insn) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(f, bb, insn) -> Iterator[Tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract features from the given insn.
|
extract features from the given insn.
|
||||||
|
|
||||||
@@ -731,14 +731,14 @@ def extract_features(f, bb, insn) -> Iterator[tuple[Feature, Address]]:
|
|||||||
insn (vivisect...Instruction): the instruction to process.
|
insn (vivisect...Instruction): the instruction to process.
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
tuple[Feature, Address]: the features and their location found in this insn.
|
Tuple[Feature, Address]: the features and their location found in this insn.
|
||||||
"""
|
"""
|
||||||
for insn_handler in INSTRUCTION_HANDLERS:
|
for insn_handler in INSTRUCTION_HANDLERS:
|
||||||
for feature, addr in insn_handler(f, bb, insn):
|
for feature, addr in insn_handler(f, bb, insn):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|
||||||
|
|
||||||
INSTRUCTION_HANDLERS: list[Callable[[FunctionHandle, BBHandle, InsnHandle], Iterator[tuple[Feature, Address]]]] = [
|
INSTRUCTION_HANDLERS: List[Callable[[FunctionHandle, BBHandle, InsnHandle], Iterator[Tuple[Feature, Address]]]] = [
|
||||||
extract_insn_api_features,
|
extract_insn_api_features,
|
||||||
extract_insn_bytes_features,
|
extract_insn_bytes_features,
|
||||||
extract_insn_nzxor_characteristic_features,
|
extract_insn_nzxor_characteristic_features,
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import Optional
|
from typing import Dict, List, Tuple, Optional
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from zipfile import ZipFile
|
from zipfile import ZipFile
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
@@ -34,10 +34,7 @@ class VMRayMonitorProcess:
|
|||||||
pid: int # process ID assigned by OS
|
pid: int # process ID assigned by OS
|
||||||
ppid: int # parent process ID assigned by OS
|
ppid: int # parent process ID assigned by OS
|
||||||
monitor_id: int # unique ID assigned to process by VMRay
|
monitor_id: int # unique ID assigned to process by VMRay
|
||||||
origin_monitor_id: int # unique VMRay ID of parent process
|
|
||||||
image_name: str
|
image_name: str
|
||||||
filename: Optional[str] = ""
|
|
||||||
cmd_line: Optional[str] = ""
|
|
||||||
|
|
||||||
|
|
||||||
class VMRayAnalysis:
|
class VMRayAnalysis:
|
||||||
@@ -61,17 +58,17 @@ class VMRayAnalysis:
|
|||||||
"VMRay feature extractor does not support flog version %s" % self.flog.analysis.log_version
|
"VMRay feature extractor does not support flog version %s" % self.flog.analysis.log_version
|
||||||
)
|
)
|
||||||
|
|
||||||
self.exports: dict[int, str] = {}
|
self.exports: Dict[int, str] = {}
|
||||||
self.imports: dict[int, tuple[str, str]] = {}
|
self.imports: Dict[int, Tuple[str, str]] = {}
|
||||||
self.sections: dict[int, str] = {}
|
self.sections: Dict[int, str] = {}
|
||||||
self.monitor_processes: dict[int, VMRayMonitorProcess] = {}
|
self.monitor_processes: Dict[int, VMRayMonitorProcess] = {}
|
||||||
self.monitor_threads: dict[int, VMRayMonitorThread] = {}
|
self.monitor_threads: Dict[int, VMRayMonitorThread] = {}
|
||||||
|
|
||||||
# map monitor thread IDs to their associated monitor process ID
|
# map monitor thread IDs to their associated monitor process ID
|
||||||
self.monitor_threads_by_monitor_process: dict[int, list[int]] = defaultdict(list)
|
self.monitor_threads_by_monitor_process: Dict[int, List[int]] = defaultdict(list)
|
||||||
|
|
||||||
# map function calls to their associated monitor thread ID mapped to its associated monitor process ID
|
# map function calls to their associated monitor thread ID mapped to its associated monitor process ID
|
||||||
self.monitor_process_calls: dict[int, dict[int, list[FunctionCall]]] = defaultdict(lambda: defaultdict(list))
|
self.monitor_process_calls: Dict[int, Dict[int, List[FunctionCall]]] = defaultdict(lambda: defaultdict(list))
|
||||||
|
|
||||||
self.base_address: int
|
self.base_address: int
|
||||||
|
|
||||||
@@ -163,13 +160,7 @@ class VMRayAnalysis:
|
|||||||
self.sv2.processes[process.ref_parent_process.path[1]].os_pid if process.ref_parent_process else 0
|
self.sv2.processes[process.ref_parent_process.path[1]].os_pid if process.ref_parent_process else 0
|
||||||
)
|
)
|
||||||
self.monitor_processes[process.monitor_id] = VMRayMonitorProcess(
|
self.monitor_processes[process.monitor_id] = VMRayMonitorProcess(
|
||||||
process.os_pid,
|
process.os_pid, ppid, process.monitor_id, process.image_name
|
||||||
ppid,
|
|
||||||
process.monitor_id,
|
|
||||||
process.origin_monitor_id,
|
|
||||||
process.image_name,
|
|
||||||
process.filename,
|
|
||||||
process.cmd_line,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# not all processes are recorded in SummaryV2.json, get missing data from flog.xml, see #2394
|
# not all processes are recorded in SummaryV2.json, get missing data from flog.xml, see #2394
|
||||||
@@ -178,28 +169,14 @@ class VMRayAnalysis:
|
|||||||
monitor_process.os_pid,
|
monitor_process.os_pid,
|
||||||
monitor_process.os_parent_pid,
|
monitor_process.os_parent_pid,
|
||||||
monitor_process.process_id,
|
monitor_process.process_id,
|
||||||
monitor_process.parent_id,
|
|
||||||
monitor_process.image_name,
|
monitor_process.image_name,
|
||||||
monitor_process.filename,
|
|
||||||
monitor_process.cmd_line,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if monitor_process.process_id not in self.monitor_processes:
|
if monitor_process.process_id not in self.monitor_processes:
|
||||||
self.monitor_processes[monitor_process.process_id] = vmray_monitor_process
|
self.monitor_processes[monitor_process.process_id] = vmray_monitor_process
|
||||||
else:
|
else:
|
||||||
# we expect monitor processes recorded in both SummaryV2.json and flog.xml to equal
|
# we expect monitor processes recorded in both SummaryV2.json and flog.xml to equal
|
||||||
# to ensure this, we compare the pid, monitor_id, and origin_monitor_id
|
assert self.monitor_processes[monitor_process.process_id] == vmray_monitor_process
|
||||||
# for the other fields we've observed cases with slight deviations, e.g.,
|
|
||||||
# the ppid for a process in flog.xml is not set correctly, all other data is equal
|
|
||||||
sv2p = self.monitor_processes[monitor_process.process_id]
|
|
||||||
if self.monitor_processes[monitor_process.process_id] != vmray_monitor_process:
|
|
||||||
logger.debug("processes differ: %s (sv2) vs. %s (flog)", sv2p, vmray_monitor_process)
|
|
||||||
|
|
||||||
assert (sv2p.pid, sv2p.monitor_id, sv2p.origin_monitor_id) == (
|
|
||||||
vmray_monitor_process.pid,
|
|
||||||
vmray_monitor_process.monitor_id,
|
|
||||||
vmray_monitor_process.origin_monitor_id,
|
|
||||||
)
|
|
||||||
|
|
||||||
def _compute_monitor_threads(self):
|
def _compute_monitor_threads(self):
|
||||||
for monitor_thread in self.flog.analysis.monitor_threads:
|
for monitor_thread in self.flog.analysis.monitor_threads:
|
||||||
|
|||||||
@@ -6,20 +6,19 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import Iterator
|
from typing import Tuple, Iterator
|
||||||
|
|
||||||
import capa.features.extractors.helpers
|
import capa.features.extractors.helpers
|
||||||
from capa.features.insn import API, Number
|
from capa.features.insn import API, Number
|
||||||
from capa.features.common import String, Feature
|
from capa.features.common import String, Feature
|
||||||
from capa.features.address import Address
|
from capa.features.address import Address
|
||||||
from capa.features.extractors.strings import is_printable_str
|
|
||||||
from capa.features.extractors.vmray.models import PARAM_TYPE_INT, PARAM_TYPE_STR, Param, FunctionCall, hexint
|
from capa.features.extractors.vmray.models import PARAM_TYPE_INT, PARAM_TYPE_STR, Param, FunctionCall, hexint
|
||||||
from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle
|
from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def get_call_param_features(param: Param, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
def get_call_param_features(param: Param, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
if param.deref is not None:
|
if param.deref is not None:
|
||||||
# pointer types contain a special "deref" member that stores the deref'd value
|
# pointer types contain a special "deref" member that stores the deref'd value
|
||||||
# so we check for this first and ignore Param.value as this always contains the
|
# so we check for this first and ignore Param.value as this always contains the
|
||||||
@@ -28,7 +27,9 @@ def get_call_param_features(param: Param, ch: CallHandle) -> Iterator[tuple[Feat
|
|||||||
if param.deref.type_ in PARAM_TYPE_INT:
|
if param.deref.type_ in PARAM_TYPE_INT:
|
||||||
yield Number(hexint(param.deref.value)), ch.address
|
yield Number(hexint(param.deref.value)), ch.address
|
||||||
elif param.deref.type_ in PARAM_TYPE_STR:
|
elif param.deref.type_ in PARAM_TYPE_STR:
|
||||||
if is_printable_str(param.deref.value):
|
# TODO(mr-tz): remove FPS like " \\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\x09\\x0a\\x0b\\x0c\\x0d\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\..."
|
||||||
|
# https://github.com/mandiant/capa/issues/2432
|
||||||
|
|
||||||
# parsing the data up to here results in double-escaped backslashes, remove those here
|
# parsing the data up to here results in double-escaped backslashes, remove those here
|
||||||
yield String(param.deref.value.replace("\\\\", "\\")), ch.address
|
yield String(param.deref.value.replace("\\\\", "\\")), ch.address
|
||||||
else:
|
else:
|
||||||
@@ -38,7 +39,7 @@ def get_call_param_features(param: Param, ch: CallHandle) -> Iterator[tuple[Feat
|
|||||||
yield Number(hexint(param.value)), ch.address
|
yield Number(hexint(param.value)), ch.address
|
||||||
|
|
||||||
|
|
||||||
def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
call: FunctionCall = ch.inner
|
call: FunctionCall = ch.inner
|
||||||
|
|
||||||
if call.params_in:
|
if call.params_in:
|
||||||
@@ -49,7 +50,7 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -
|
|||||||
yield API(name), ch.address
|
yield API(name), ch.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for handler in CALL_HANDLERS:
|
for handler in CALL_HANDLERS:
|
||||||
for feature, addr in handler(ph, th, ch):
|
for feature, addr in handler(ph, th, ch):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
|
|
||||||
from typing import Iterator
|
from typing import List, Tuple, Iterator
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import capa.helpers
|
import capa.helpers
|
||||||
@@ -34,8 +34,8 @@ from capa.features.extractors.base_extractor import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_formatted_params(params: ParamList) -> list[str]:
|
def get_formatted_params(params: ParamList) -> List[str]:
|
||||||
params_list: list[str] = []
|
params_list: List[str] = []
|
||||||
|
|
||||||
for param in params:
|
for param in params:
|
||||||
if param.deref and param.deref.value is not None:
|
if param.deref and param.deref.value is not None:
|
||||||
@@ -69,10 +69,10 @@ class VMRayExtractor(DynamicFeatureExtractor):
|
|||||||
# value according to the PE header, the actual trace may use a different imagebase
|
# value according to the PE header, the actual trace may use a different imagebase
|
||||||
return AbsoluteVirtualAddress(self.analysis.base_address)
|
return AbsoluteVirtualAddress(self.analysis.base_address)
|
||||||
|
|
||||||
def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.vmray.file.extract_features(self.analysis)
|
yield from capa.features.extractors.vmray.file.extract_features(self.analysis)
|
||||||
|
|
||||||
def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
|
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from self.global_features
|
yield from self.global_features
|
||||||
|
|
||||||
def get_processes(self) -> Iterator[ProcessHandle]:
|
def get_processes(self) -> Iterator[ProcessHandle]:
|
||||||
@@ -80,13 +80,13 @@ class VMRayExtractor(DynamicFeatureExtractor):
|
|||||||
address: ProcessAddress = ProcessAddress(pid=monitor_process.pid, ppid=monitor_process.ppid)
|
address: ProcessAddress = ProcessAddress(pid=monitor_process.pid, ppid=monitor_process.ppid)
|
||||||
yield ProcessHandle(address, inner=monitor_process)
|
yield ProcessHandle(address, inner=monitor_process)
|
||||||
|
|
||||||
def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
# we have not identified process-specific features for VMRay yet
|
# we have not identified process-specific features for VMRay yet
|
||||||
yield from []
|
yield from []
|
||||||
|
|
||||||
def get_process_name(self, ph) -> str:
|
def get_process_name(self, ph) -> str:
|
||||||
monitor_process: VMRayMonitorProcess = ph.inner
|
monitor_process: VMRayMonitorProcess = ph.inner
|
||||||
return f"{monitor_process.image_name} ({monitor_process.cmd_line})"
|
return monitor_process.image_name
|
||||||
|
|
||||||
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
||||||
for monitor_thread_id in self.analysis.monitor_threads_by_monitor_process[ph.inner.monitor_id]:
|
for monitor_thread_id in self.analysis.monitor_threads_by_monitor_process[ph.inner.monitor_id]:
|
||||||
@@ -95,7 +95,7 @@ class VMRayExtractor(DynamicFeatureExtractor):
|
|||||||
address: ThreadAddress = ThreadAddress(process=ph.address, tid=monitor_thread.tid)
|
address: ThreadAddress = ThreadAddress(process=ph.address, tid=monitor_thread.tid)
|
||||||
yield ThreadHandle(address=address, inner=monitor_thread)
|
yield ThreadHandle(address=address, inner=monitor_thread)
|
||||||
|
|
||||||
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
if False:
|
if False:
|
||||||
# force this routine to be a generator,
|
# force this routine to be a generator,
|
||||||
# but we don't actually have any elements to generate.
|
# but we don't actually have any elements to generate.
|
||||||
@@ -109,7 +109,7 @@ class VMRayExtractor(DynamicFeatureExtractor):
|
|||||||
|
|
||||||
def extract_call_features(
|
def extract_call_features(
|
||||||
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.vmray.call.extract_features(ph, th, ch)
|
yield from capa.features.extractors.vmray.call.extract_features(ph, th, ch)
|
||||||
|
|
||||||
def get_call_name(self, ph, th, ch) -> str:
|
def get_call_name(self, ph, th, ch) -> str:
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import Iterator
|
from typing import Tuple, Iterator
|
||||||
|
|
||||||
import capa.features.extractors.common
|
import capa.features.extractors.common
|
||||||
from capa.features.file import Export, Import, Section
|
from capa.features.file import Export, Import, Section
|
||||||
@@ -18,52 +18,52 @@ from capa.features.extractors.helpers import generate_symbols
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_export_names(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
def extract_export_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for addr, name in analysis.exports.items():
|
for addr, name in analysis.exports.items():
|
||||||
yield Export(name), AbsoluteVirtualAddress(addr)
|
yield Export(name), AbsoluteVirtualAddress(addr)
|
||||||
|
|
||||||
|
|
||||||
def extract_import_names(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
def extract_import_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for addr, (module, api) in analysis.imports.items():
|
for addr, (module, api) in analysis.imports.items():
|
||||||
for symbol in generate_symbols(module, api, include_dll=True):
|
for symbol in generate_symbols(module, api, include_dll=True):
|
||||||
yield Import(symbol), AbsoluteVirtualAddress(addr)
|
yield Import(symbol), AbsoluteVirtualAddress(addr)
|
||||||
|
|
||||||
|
|
||||||
def extract_section_names(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
def extract_section_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for addr, name in analysis.sections.items():
|
for addr, name in analysis.sections.items():
|
||||||
yield Section(name), AbsoluteVirtualAddress(addr)
|
yield Section(name), AbsoluteVirtualAddress(addr)
|
||||||
|
|
||||||
|
|
||||||
def extract_referenced_filenames(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
def extract_referenced_filenames(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for filename in analysis.sv2.filenames.values():
|
for filename in analysis.sv2.filenames.values():
|
||||||
yield String(filename.filename), NO_ADDRESS
|
yield String(filename.filename), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_referenced_mutex_names(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
def extract_referenced_mutex_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for mutex in analysis.sv2.mutexes.values():
|
for mutex in analysis.sv2.mutexes.values():
|
||||||
yield String(mutex.name), NO_ADDRESS
|
yield String(mutex.name), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_referenced_domain_names(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
def extract_referenced_domain_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for domain in analysis.sv2.domains.values():
|
for domain in analysis.sv2.domains.values():
|
||||||
yield String(domain.domain), NO_ADDRESS
|
yield String(domain.domain), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_referenced_ip_addresses(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
def extract_referenced_ip_addresses(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for ip_address in analysis.sv2.ip_addresses.values():
|
for ip_address in analysis.sv2.ip_addresses.values():
|
||||||
yield String(ip_address.ip_address), NO_ADDRESS
|
yield String(ip_address.ip_address), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_referenced_registry_key_names(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
def extract_referenced_registry_key_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for registry_record in analysis.sv2.registry_records.values():
|
for registry_record in analysis.sv2.registry_records.values():
|
||||||
yield String(registry_record.reg_key_name), NO_ADDRESS
|
yield String(registry_record.reg_key_name), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_file_strings(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
def extract_file_strings(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.common.extract_file_strings(analysis.sample_file_buf)
|
yield from capa.features.extractors.common.extract_file_strings(analysis.sample_file_buf)
|
||||||
|
|
||||||
|
|
||||||
def extract_features(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for handler in FILE_HANDLERS:
|
for handler in FILE_HANDLERS:
|
||||||
for feature, addr in handler(analysis):
|
for feature, addr in handler(analysis):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Iterator
|
from typing import Tuple, Iterator
|
||||||
|
|
||||||
from capa.features.common import (
|
from capa.features.common import (
|
||||||
OS,
|
OS,
|
||||||
@@ -27,7 +27,7 @@ from capa.features.extractors.vmray import VMRayAnalysis
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_arch(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
def extract_arch(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||||
file_type: str = analysis.file_type
|
file_type: str = analysis.file_type
|
||||||
|
|
||||||
if "x86-32" in file_type:
|
if "x86-32" in file_type:
|
||||||
@@ -38,7 +38,7 @@ def extract_arch(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
|||||||
raise ValueError("unrecognized arch from the VMRay report: %s" % file_type)
|
raise ValueError("unrecognized arch from the VMRay report: %s" % file_type)
|
||||||
|
|
||||||
|
|
||||||
def extract_format(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
def extract_format(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||||
assert analysis.sample_file_static_data is not None
|
assert analysis.sample_file_static_data is not None
|
||||||
if analysis.sample_file_static_data.pe:
|
if analysis.sample_file_static_data.pe:
|
||||||
yield Format(FORMAT_PE), NO_ADDRESS
|
yield Format(FORMAT_PE), NO_ADDRESS
|
||||||
@@ -48,7 +48,7 @@ def extract_format(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]
|
|||||||
raise ValueError("unrecognized file format from the VMRay report: %s" % analysis.file_type)
|
raise ValueError("unrecognized file format from the VMRay report: %s" % analysis.file_type)
|
||||||
|
|
||||||
|
|
||||||
def extract_os(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
def extract_os(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||||
file_type: str = analysis.file_type
|
file_type: str = analysis.file_type
|
||||||
|
|
||||||
if "windows" in file_type.lower():
|
if "windows" in file_type.lower():
|
||||||
@@ -59,7 +59,7 @@ def extract_os(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
|||||||
raise ValueError("unrecognized OS from the VMRay report: %s" % file_type)
|
raise ValueError("unrecognized OS from the VMRay report: %s" % file_type)
|
||||||
|
|
||||||
|
|
||||||
def extract_features(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||||
for global_handler in GLOBAL_HANDLER:
|
for global_handler in GLOBAL_HANDLER:
|
||||||
for feature, addr in global_handler(analysis):
|
for feature, addr in global_handler(analysis):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user