mirror of
https://github.com/mandiant/capa.git
synced 2026-03-16 06:48:58 -07:00
Compare commits
74 Commits
ida-domain
...
mapa
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
346931df5a | ||
|
|
2456559bc0 | ||
|
|
5dd1f49023 | ||
|
|
7b23834d8e | ||
|
|
f1800b5eb4 | ||
|
|
43f556caf9 | ||
|
|
5f8c06c650 | ||
|
|
ceaa3b6d03 | ||
|
|
c03d833a84 | ||
|
|
1f4a16cbcc | ||
|
|
2c9e30c3e1 | ||
|
|
8c138e3d22 | ||
|
|
a11a03bc30 | ||
|
|
1173dc5fa5 | ||
|
|
e53f6abc1e | ||
|
|
038c46da16 | ||
|
|
563071349f | ||
|
|
517dfe154a | ||
|
|
2e36f67e11 | ||
|
|
7bd04fe297 | ||
|
|
9f781ec21b | ||
|
|
da1abed3f8 | ||
|
|
3bce2a9b62 | ||
|
|
d97b61551d | ||
|
|
e1ffa1dd09 | ||
|
|
10dfd287b4 | ||
|
|
e9b3311338 | ||
|
|
54cc4ee7a3 | ||
|
|
12863ab4f2 | ||
|
|
e41b5fb150 | ||
|
|
4697902310 | ||
|
|
ed0783c31e | ||
|
|
f03ee75d69 | ||
|
|
f526357def | ||
|
|
c1ec826a9f | ||
|
|
5ef4ad96ee | ||
|
|
8aef630a7f | ||
|
|
d1c9d20668 | ||
|
|
8ccd35d0cf | ||
|
|
3f72b43f48 | ||
|
|
f7bb889f30 | ||
|
|
e0bd6d5ea6 | ||
|
|
239bafd285 | ||
|
|
2033c4ab83 | ||
|
|
cbe005ae0f | ||
|
|
26aba8067f | ||
|
|
3582bce6fd | ||
|
|
535faf281d | ||
|
|
fe27335136 | ||
|
|
a40ae162ef | ||
|
|
1500a34984 | ||
|
|
77440c03f5 | ||
|
|
26fd6b8569 | ||
|
|
2540dd688b | ||
|
|
ff8e7ef52f | ||
|
|
6f078734c3 | ||
|
|
93c11d2d4e | ||
|
|
89c71f4d81 | ||
|
|
9599fbac02 | ||
|
|
b4c0f1369e | ||
|
|
37f2a897ff | ||
|
|
e39e610f66 | ||
|
|
073760f279 | ||
|
|
52a761ebb3 | ||
|
|
2a44482076 | ||
|
|
a359745765 | ||
|
|
203cc0aa0c | ||
|
|
3642ca94a6 | ||
|
|
8e233ca69d | ||
|
|
d5c23486e3 | ||
|
|
7600dd077b | ||
|
|
3de84eff1b | ||
|
|
7e16ed741c | ||
|
|
5a5545aa14 |
7
.github/dependabot.yml
vendored
7
.github/dependabot.yml
vendored
@@ -4,6 +4,13 @@ updates:
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
groups:
|
||||
vivisect:
|
||||
patterns:
|
||||
- "vivisect"
|
||||
- "pyasn1"
|
||||
- "pyasn1-modules"
|
||||
- "msgpack"
|
||||
ignore:
|
||||
- dependency-name: "*"
|
||||
update-types: ["version-update:semver-patch"]
|
||||
|
||||
2
.github/flake8.ini
vendored
2
.github/flake8.ini
vendored
@@ -33,8 +33,6 @@ per-file-ignores =
|
||||
scripts/*: T201
|
||||
# capa.exe is meant to print output
|
||||
capa/main.py: T201
|
||||
# IDA tests emit results to output window so need to print
|
||||
tests/test_ida_features.py: T201
|
||||
# utility used to find the Binary Ninja API via invoking python.exe
|
||||
capa/features/extractors/binja/find_binja_api.py: T201
|
||||
|
||||
|
||||
3
.github/mypy/mypy.ini
vendored
3
.github/mypy/mypy.ini
vendored
@@ -63,6 +63,9 @@ ignore_missing_imports = True
|
||||
[mypy-PyQt5.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-binaryninja]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-binaryninja.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
|
||||
2
.github/pull_request_template.md
vendored
2
.github/pull_request_template.md
vendored
@@ -20,3 +20,5 @@ closes #issue_number
|
||||
- [ ] No new tests needed
|
||||
<!-- Please help us keeping capa documentation up-to-date -->
|
||||
- [ ] No documentation update needed
|
||||
<!-- Please indicate if and how you have used AI to generate (parts of) your code submission. Include your prompt, model, tool, etc. -->
|
||||
- [ ] This submission includes AI-generated code and I have provided details in the description.
|
||||
|
||||
3
.github/pyinstaller/pyinstaller.spec
vendored
3
.github/pyinstaller/pyinstaller.spec
vendored
@@ -17,6 +17,8 @@ import sys
|
||||
|
||||
import capa.rules.cache
|
||||
|
||||
from PyInstaller.utils.hooks import collect_submodules
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
# SPECPATH is a global variable which points to .spec file path
|
||||
@@ -34,6 +36,7 @@ a = Analysis(
|
||||
["../../capa/main.py"],
|
||||
pathex=["capa"],
|
||||
binaries=None,
|
||||
hiddenimports=collect_submodules('rich'),
|
||||
datas=[
|
||||
# when invoking pyinstaller from the project root,
|
||||
# this gets invoked from the directory of the spec file,
|
||||
|
||||
62
.github/workflows/black-format.yml
vendored
Normal file
62
.github/workflows/black-format.yml
vendored
Normal file
@@ -0,0 +1,62 @@
|
||||
name: black auto-format
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
paths-ignore:
|
||||
- 'web/**'
|
||||
- 'doc/**'
|
||||
- '**.md'
|
||||
workflow_dispatch: # allow manual trigger
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
black-format:
|
||||
# only run on dependabot PRs or manual trigger
|
||||
if: github.actor == 'dependabot[bot]' || github.event_name == 'workflow_dispatch'
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||
with:
|
||||
ref: ${{ github.head_ref }}
|
||||
# need a token with write access to push the commit
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Set up Python 3.13
|
||||
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
|
||||
with:
|
||||
python-version: "3.13"
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
pip install -e .[dev,scripts]
|
||||
|
||||
- name: Run isort
|
||||
run: pre-commit run isort --all-files
|
||||
|
||||
- name: Run black/continue
|
||||
# black returns non-zero error code after formatting, which is what we expect
|
||||
continue-on-error: true
|
||||
run: pre-commit run black --all-files
|
||||
|
||||
- name: Check for changes
|
||||
id: changes
|
||||
run: |
|
||||
if git diff --quiet; then
|
||||
echo "has_changes=false" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "has_changes=true" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
- name: Commit and push formatting changes
|
||||
if: steps.changes.outputs.has_changes == 'true'
|
||||
run: |
|
||||
git config user.name "${GITHUB_ACTOR}"
|
||||
git config user.email "${GITHUB_ACTOR_ID}+${GITHUB_ACTOR}@users.noreply.github.com"
|
||||
git add -A
|
||||
git commit -m "style: auto-format with black and isort"
|
||||
git push
|
||||
2
.github/workflows/build.yml
vendored
2
.github/workflows/build.yml
vendored
@@ -167,7 +167,7 @@ jobs:
|
||||
- name: Set zip name
|
||||
run: echo "zip_name=capa-${GITHUB_REF#refs/tags/}-${{ matrix.asset_name }}.zip" >> $GITHUB_ENV
|
||||
- name: Zip ${{ matrix.artifact_name }} into ${{ env.zip_name }}
|
||||
run: zip ${{ env.zip_name }} ${{ matrix.artifact_name }}
|
||||
run: zip ${ZIP_NAME} ${{ matrix.artifact_name }}
|
||||
- name: Upload ${{ env.zip_name }} to GH Release
|
||||
uses: svenstaro/upload-release-action@2728235f7dc9ff598bd86ce3c274b74f802d2208 # v2
|
||||
with:
|
||||
|
||||
4
.github/workflows/pip-audit.yml
vendored
4
.github/workflows/pip-audit.yml
vendored
@@ -14,8 +14,8 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||
|
||||
- uses: pypa/gh-action-pip-audit@v1.0.8
|
||||
- uses: pypa/gh-action-pip-audit@1220774d901786e6f652ae159f7b6bc8fea6d266 # v1.1.0
|
||||
with:
|
||||
inputs: .
|
||||
|
||||
4
.github/workflows/tag.yml
vendored
4
.github/workflows/tag.yml
vendored
@@ -21,8 +21,10 @@ jobs:
|
||||
# user information is needed to create annotated tags (with a message)
|
||||
git config user.email 'capa-dev@mandiant.com'
|
||||
git config user.name 'Capa Bot'
|
||||
name=${{ github.event.release.tag_name }}
|
||||
name=${GITHUB_EVENT_RELEASE_TAG_NAME}
|
||||
git tag $name -m "https://github.com/mandiant/capa/releases/$name"
|
||||
env:
|
||||
GITHUB_EVENT_RELEASE_TAG_NAME: ${{ github.event.release.tag_name }}
|
||||
# TODO update branch name-major=${name%%.*}
|
||||
- name: Push tag to capa-rules
|
||||
uses: ad-m/github-push-action@d91a481090679876dfc4178fef17f286781251df # v0.8.0
|
||||
|
||||
7
.github/workflows/tests.yml
vendored
7
.github/workflows/tests.yml
vendored
@@ -113,6 +113,11 @@ jobs:
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
pip install -e .[dev,scripts]
|
||||
- name: Cache vivisect workspaces
|
||||
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
|
||||
with:
|
||||
path: tests/data/**/*.viv
|
||||
key: viv-${{ runner.os }}-${{ runner.arch }}-${{ matrix.python-version }}-${{ hashFiles('**/requirements.txt') }}
|
||||
- name: Run tests (fast)
|
||||
# this set of tests runs about 80% of the cases in 20% of the time,
|
||||
# and should catch most errors quickly.
|
||||
@@ -155,7 +160,7 @@ jobs:
|
||||
run: |
|
||||
mkdir ./.github/binja
|
||||
curl "https://raw.githubusercontent.com/Vector35/binaryninja-api/6812c97/scripts/download_headless.py" -o ./.github/binja/download_headless.py
|
||||
python ./.github/binja/download_headless.py --serial ${{ env.BN_SERIAL }} --output .github/binja/BinaryNinja-headless.zip
|
||||
python ./.github/binja/download_headless.py --serial ${BN_SERIAL} --output .github/binja/BinaryNinja-headless.zip
|
||||
unzip .github/binja/BinaryNinja-headless.zip -d .github/binja/
|
||||
python .github/binja/binaryninja/scripts/install_api.py --install-on-root --silent
|
||||
- name: Run tests
|
||||
|
||||
22
.github/workflows/web-release.yml
vendored
22
.github/workflows/web-release.yml
vendored
@@ -18,14 +18,18 @@ jobs:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set release name
|
||||
run: echo "RELEASE_NAME=capa-explorer-web-v${{ github.event.inputs.version }}-${GITHUB_SHA::7}" >> $GITHUB_ENV
|
||||
run: echo "RELEASE_NAME=capa-explorer-web-v${GITHUB_EVENT_INPUTS_VERSION}-${GITHUB_SHA::7}" >> $GITHUB_ENV
|
||||
env:
|
||||
GITHUB_EVENT_INPUTS_VERSION: ${{ github.event.inputs.version }}
|
||||
|
||||
- name: Check if release already exists
|
||||
run: |
|
||||
if ls web/explorer/releases/capa-explorer-web-v${{ github.event.inputs.version }}-* 1> /dev/null 2>&1; then
|
||||
echo "::error:: A release with version ${{ github.event.inputs.version }} already exists"
|
||||
if ls web/explorer/releases/capa-explorer-web-v${GITHUB_EVENT_INPUTS_VERSION}-* 1> /dev/null 2>&1; then
|
||||
echo "::error:: A release with version ${GITHUB_EVENT_INPUTS_VERSION} already exists"
|
||||
exit 1
|
||||
fi
|
||||
env:
|
||||
GITHUB_EVENT_INPUTS_VERSION: ${{ github.event.inputs.version }}
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4
|
||||
@@ -43,24 +47,24 @@ jobs:
|
||||
working-directory: web/explorer
|
||||
|
||||
- name: Compress bundle
|
||||
run: zip -r ${{ env.RELEASE_NAME }}.zip capa-explorer-web
|
||||
run: zip -r ${RELEASE_NAME}.zip capa-explorer-web
|
||||
working-directory: web/explorer
|
||||
|
||||
- name: Create releases directory
|
||||
run: mkdir -vp web/explorer/releases
|
||||
|
||||
- name: Move release to releases folder
|
||||
run: mv web/explorer/${{ env.RELEASE_NAME }}.zip web/explorer/releases
|
||||
run: mv web/explorer/${RELEASE_NAME}.zip web/explorer/releases
|
||||
|
||||
- name: Compute release SHA256 hash
|
||||
run: |
|
||||
echo "RELEASE_SHA256=$(sha256sum web/explorer/releases/${{ env.RELEASE_NAME }}.zip | awk '{print $1}')" >> $GITHUB_ENV
|
||||
echo "RELEASE_SHA256=$(sha256sum web/explorer/releases/${RELEASE_NAME}.zip | awk '{print $1}')" >> $GITHUB_ENV
|
||||
|
||||
- name: Update CHANGELOG.md
|
||||
run: |
|
||||
echo "## ${{ env.RELEASE_NAME }}" >> web/explorer/releases/CHANGELOG.md
|
||||
echo "## ${RELEASE_NAME}" >> web/explorer/releases/CHANGELOG.md
|
||||
echo "- Release Date: $(date -u '+%Y-%m-%d %H:%M:%S UTC')" >> web/explorer/releases/CHANGELOG.md
|
||||
echo "- SHA256: ${{ env.RELEASE_SHA256 }}" >> web/explorer/releases/CHANGELOG.md
|
||||
echo "- SHA256: ${RELEASE_SHA256}" >> web/explorer/releases/CHANGELOG.md
|
||||
echo "" >> web/explorer/releases/CHANGELOG.md
|
||||
cat web/explorer/releases/CHANGELOG.md
|
||||
|
||||
@@ -73,7 +77,7 @@ jobs:
|
||||
run: |
|
||||
git config --local user.email "capa-dev@mandiant.com"
|
||||
git config --local user.name "Capa Bot"
|
||||
git add -f web/explorer/releases/${{ env.RELEASE_NAME }}.zip web/explorer/releases/CHANGELOG.md
|
||||
git add -f web/explorer/releases/${RELEASE_NAME}.zip web/explorer/releases/CHANGELOG.md
|
||||
git add -u web/explorer/releases/
|
||||
|
||||
- name: Create Pull Request
|
||||
|
||||
@@ -136,7 +136,6 @@ repos:
|
||||
- "tests/"
|
||||
- "--ignore=tests/test_binja_features.py"
|
||||
- "--ignore=tests/test_ghidra_features.py"
|
||||
- "--ignore=tests/test_ida_features.py"
|
||||
- "--ignore=tests/test_viv_features.py"
|
||||
- "--ignore=tests/test_idalib_features.py"
|
||||
- "--ignore=tests/test_main.py"
|
||||
|
||||
44
CHANGELOG.md
44
CHANGELOG.md
@@ -5,29 +5,71 @@
|
||||
### New Features
|
||||
|
||||
- ghidra: support PyGhidra @mike-hunhoff #2788
|
||||
- vmray: extract number features from whitelisted void_ptr parameters (hKey, hKeyRoot) @adeboyedn #2835
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
### New Rules (5)
|
||||
### New Rules (23)
|
||||
|
||||
- nursery/run-as-nodejs-native-module mehunhoff@google.com
|
||||
- nursery/inject-shellcode-using-thread-pool-work-insertion-with-tp_io still@teamt5.org
|
||||
- nursery/inject-shellcode-using-thread-pool-work-insertion-with-tp_timer still@teamt5.org
|
||||
- nursery/inject-shellcode-using-thread-pool-work-insertion-with-tp_work still@teamt5.org
|
||||
- data-manipulation/encryption/hc-256/encrypt-data-using-hc-256 wballenthin@hex-rays.com
|
||||
- anti-analysis/anti-llm/terminate-anthropic-session-via-magic-strings wballenthin@hex-rays.com
|
||||
- nursery/access-aws-credentials maximemorin@google.com
|
||||
- nursery/access-cloudflare-credentials maximemorin@google.com
|
||||
- nursery/access-docker-credentials maximemorin@google.com
|
||||
- nursery/access-gcp-credentials maximemorin@google.com
|
||||
- nursery/access-kubernetes-credentials maximemorin@google.com
|
||||
- nursery/enumerate-aws-cloudformation maximemorin@google.com
|
||||
- nursery/enumerate-aws-cloudtrail maximemorin@google.com
|
||||
- nursery/enumerate-aws-direct-connect maximemorin@google.com
|
||||
- nursery/enumerate-aws-ec2 maximemorin@google.com
|
||||
- nursery/enumerate-aws-iam maximemorin@google.com
|
||||
- nursery/enumerate-aws-s3 maximemorin@google.com
|
||||
- nursery/enumerate-aws-support-cases maximemorin@google.com
|
||||
- persistence/registry/persist-via-shellserviceobjectdelayload-registry-key xpzhxhm@gmail.com
|
||||
- nursery/get-http-response-date @cosmoworker
|
||||
- host-interaction/process/create/create-process-in-dotnet moritz.raabe@mandiant.com social.tarang@gmail.com
|
||||
- nursery/read-file-in-dotnet moritz.raabe@mandiant.com anushka.virgaonkar@mandiant.com
|
||||
- nursery/write-file-in-dotnet william.ballenthin@mandiant.com anushka.virgaonkar@mandiant.com
|
||||
-
|
||||
|
||||
### Bug Fixes
|
||||
- main: suggest --os flag in unsupported OS error message to help users override ELF OS detection @devs6186 #2577
|
||||
- render: escape sample-controlled strings before passing to Rich to prevent MarkupError @devs6186 #2699
|
||||
- rules: handle empty or invalid YAML documents gracefully in `Rule.from_yaml` and `get_rules` @devs6186 #2900
|
||||
- Fixed insecure deserialization vulnerability in YAML loading @0x1622 (#2770)
|
||||
- loader: gracefully handle ELF files with unsupported architectures kamranulhaq2002@gmail.com #2800
|
||||
- loader: handle SegmentationViolation for malformed ELF files @kami922 #2799
|
||||
- lint: disable rule caching during linting @Maijin #2817
|
||||
- vmray: skip processes with invalid PID or missing filename @EclipseAditya #2807
|
||||
- features: fix Regex.get_value_str() returning escaped pattern instead of raw regex @EclipseAditya #1909
|
||||
- render: use default styling for dynamic -vv API/call details so they are easier to see @devs6186 #1865
|
||||
- loader: handle struct.error from dnfile and show clear CorruptFile message @devs6186 #2442
|
||||
- address: fix TypeError when sorting locations containing mixed address types @devs6186 #2195
|
||||
- loader: skip PE files with unrealistically large section virtual sizes to prevent resource exhaustion @devs6186 #1989
|
||||
|
||||
### capa Explorer Web
|
||||
- webui: fix 404 for "View rule in capa-rules" by using encodeURIComponent for rule name in URL @devs6186 #2482
|
||||
- webui: show error when JSON does not follow expected result document schema; suggest reanalyzing for VT URLs @devs6186 #2363
|
||||
- webui: fix global search to match feature types (match, regex, api, …) @devs6186 #2349
|
||||
|
||||
### capa Explorer IDA Pro plugin
|
||||
|
||||
### Performance
|
||||
|
||||
- perf: eliminate O(n²) tuple growth and reduce per-match overhead @devs6186 #2890
|
||||
|
||||
### Development
|
||||
|
||||
- doc: document that default output shows top-level matches only; -v/-vv show nested matches @devs6186 #1410
|
||||
- doc: fix typo in usage.md, add documentation links to README @devs6186 #2274
|
||||
- doc: add table comparing ways to consume capa output (CLI, IDA, Ghidra, dynamic sandbox, web) @devs6186 #2273
|
||||
- binja: add mypy config for top-level binaryninja module to fix mypy issues @devs6186 #2399
|
||||
- ci: deprecate macos-13 runner and use Python v3.13 for testing @mike-hunhoff #2777
|
||||
- ci: pin pip-audit action SHAs and update to v1.1.0 @kami922 #1131
|
||||
|
||||
### Raw diffs
|
||||
- [capa v9.3.1...master](https://github.com/mandiant/capa/compare/v9.3.1...master)
|
||||
|
||||
@@ -87,6 +87,8 @@ Download stable releases of the standalone capa binaries [here](https://github.c
|
||||
|
||||
To use capa as a library or integrate with another tool, see [doc/installation.md](https://github.com/mandiant/capa/blob/master/doc/installation.md) for further setup instructions.
|
||||
|
||||
**Documentation:** [Usage and tips](doc/usage.md) · [Installation](doc/installation.md) · [Limitations](doc/limitations.md) · [FAQ](doc/faq.md)
|
||||
|
||||
# capa Explorer Web
|
||||
The [capa Explorer Web](https://mandiant.github.io/capa/explorer/) enables you to interactively explore capa results in your web browser. Besides the online version you can download a standalone HTML file for local offline usage.
|
||||
|
||||
|
||||
@@ -277,7 +277,9 @@ def find_dynamic_capabilities(
|
||||
all_span_matches: MatchResults = collections.defaultdict(list)
|
||||
all_call_matches: MatchResults = collections.defaultdict(list)
|
||||
|
||||
feature_counts = rdoc.DynamicFeatureCounts(file=0, processes=())
|
||||
# Accumulate into a list to avoid O(n²) tuple concatenation.
|
||||
# Tuples are immutable, so `t += (x,)` copies the entire tuple each time.
|
||||
process_feature_counts: list[rdoc.ProcessFeatureCount] = []
|
||||
|
||||
assert isinstance(extractor, DynamicFeatureExtractor)
|
||||
processes: list[ProcessHandle] = list(extractor.get_processes())
|
||||
@@ -289,10 +291,10 @@ def find_dynamic_capabilities(
|
||||
task = pbar.add_task("matching", total=n_processes, unit="processes")
|
||||
for p in processes:
|
||||
process_capabilities = find_process_capabilities(ruleset, extractor, p)
|
||||
feature_counts.processes += (
|
||||
process_feature_counts.append(
|
||||
rdoc.ProcessFeatureCount(
|
||||
address=frz.Address.from_capa(p.address), count=process_capabilities.feature_count
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
for rule_name, res in process_capabilities.process_matches.items():
|
||||
@@ -317,7 +319,11 @@ def find_dynamic_capabilities(
|
||||
capa.engine.index_rule_matches(process_and_lower_features, rule, locations)
|
||||
|
||||
all_file_capabilities = find_file_capabilities(ruleset, extractor, process_and_lower_features)
|
||||
feature_counts.file = all_file_capabilities.feature_count
|
||||
|
||||
feature_counts = rdoc.DynamicFeatureCounts(
|
||||
file=all_file_capabilities.feature_count,
|
||||
processes=tuple(process_feature_counts),
|
||||
)
|
||||
|
||||
matches = dict(
|
||||
itertools.chain(
|
||||
|
||||
@@ -156,8 +156,11 @@ def find_static_capabilities(
|
||||
all_bb_matches: MatchResults = collections.defaultdict(list)
|
||||
all_insn_matches: MatchResults = collections.defaultdict(list)
|
||||
|
||||
feature_counts = rdoc.StaticFeatureCounts(file=0, functions=())
|
||||
library_functions: tuple[rdoc.LibraryFunction, ...] = ()
|
||||
# Accumulate into lists to avoid O(n²) tuple concatenation.
|
||||
# Tuples are immutable, so `t += (x,)` copies the entire tuple each time.
|
||||
# For binaries with thousands of functions this becomes quadratic in memory work.
|
||||
function_feature_counts: list[rdoc.FunctionFeatureCount] = []
|
||||
library_functions_list: list[rdoc.LibraryFunction] = []
|
||||
|
||||
assert isinstance(extractor, StaticFeatureExtractor)
|
||||
functions: list[FunctionHandle] = list(extractor.get_functions())
|
||||
@@ -176,20 +179,20 @@ def find_static_capabilities(
|
||||
if extractor.is_library_function(f.address):
|
||||
function_name = extractor.get_function_name(f.address)
|
||||
logger.debug("skipping library function 0x%x (%s)", f.address, function_name)
|
||||
library_functions += (
|
||||
rdoc.LibraryFunction(address=frz.Address.from_capa(f.address), name=function_name),
|
||||
library_functions_list.append(
|
||||
rdoc.LibraryFunction(address=frz.Address.from_capa(f.address), name=function_name)
|
||||
)
|
||||
n_libs = len(library_functions)
|
||||
n_libs = len(library_functions_list)
|
||||
percentage = round(100 * (n_libs / n_funcs))
|
||||
pbar.update(task, postfix=f"skipped {n_libs} library functions, {percentage}%")
|
||||
pbar.advance(task)
|
||||
continue
|
||||
|
||||
code_capabilities = find_code_capabilities(ruleset, extractor, f)
|
||||
feature_counts.functions += (
|
||||
function_feature_counts.append(
|
||||
rdoc.FunctionFeatureCount(
|
||||
address=frz.Address.from_capa(f.address), count=code_capabilities.feature_count
|
||||
),
|
||||
)
|
||||
)
|
||||
t1 = time.time()
|
||||
|
||||
@@ -230,7 +233,11 @@ def find_static_capabilities(
|
||||
capa.engine.index_rule_matches(function_and_lower_features, rule, locations)
|
||||
|
||||
all_file_capabilities = find_file_capabilities(ruleset, extractor, function_and_lower_features)
|
||||
feature_counts.file = all_file_capabilities.feature_count
|
||||
|
||||
feature_counts = rdoc.StaticFeatureCounts(
|
||||
file=all_file_capabilities.feature_count,
|
||||
functions=tuple(function_feature_counts),
|
||||
)
|
||||
|
||||
matches: MatchResults = dict(
|
||||
itertools.chain(
|
||||
@@ -244,4 +251,4 @@ def find_static_capabilities(
|
||||
)
|
||||
)
|
||||
|
||||
return Capabilities(matches, feature_counts, library_functions)
|
||||
return Capabilities(matches, feature_counts, tuple(library_functions_list))
|
||||
|
||||
@@ -189,6 +189,11 @@ class _NoAddress(Address):
|
||||
def __lt__(self, other):
|
||||
return False
|
||||
|
||||
def __gt__(self, other):
|
||||
# Mixed-type comparison: (real_address < NO_ADDRESS) invokes this so sort works.
|
||||
# NoAddress sorts last.
|
||||
return other is not self
|
||||
|
||||
def __hash__(self):
|
||||
return hash(0)
|
||||
|
||||
|
||||
@@ -369,6 +369,12 @@ class Regex(String):
|
||||
else:
|
||||
return Result(False, _MatchedRegex(self, {}), [])
|
||||
|
||||
def get_value_str(self) -> str:
|
||||
# return the raw regex pattern, not the escaped version from String.get_value_str().
|
||||
# see #1909.
|
||||
assert isinstance(self.value, str)
|
||||
return self.value
|
||||
|
||||
def __str__(self):
|
||||
assert isinstance(self.value, str)
|
||||
return f"regex(string =~ {self.value})"
|
||||
|
||||
@@ -20,6 +20,7 @@ Proto files generated via protobuf v24.4:
|
||||
from BinExport2 at 6916731d5f6693c4a4f0a052501fd3bd92cfd08b
|
||||
https://github.com/google/binexport/blob/6916731/binexport2.proto
|
||||
"""
|
||||
|
||||
import io
|
||||
import hashlib
|
||||
import logging
|
||||
|
||||
@@ -84,16 +84,14 @@ def extract_insn_number_features(
|
||||
yield OperandOffset(i, value), ih.address
|
||||
|
||||
|
||||
OFFSET_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
||||
"""
|
||||
OFFSET_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
|
||||
ldr|ldrb|ldrh|ldrsb|ldrsh|ldrex|ldrd|str|strb|strh|strex|strd reg, [reg(not-stack), #int] ; capture #int
|
||||
ldr|ldrb|ldrh|ldrsb|ldrsh|ldrex|ldrd|str|strb|strh|strex|strd reg, [reg(not-stack), #int]! ; capture #int
|
||||
ldr|ldrb|ldrh|ldrsb|ldrsh|ldrex|ldrd|str|strb|strh|strex|strd reg, [reg(not-stack)], #int ; capture #int
|
||||
ldp|ldpd|stp|stpd reg, reg, [reg(not-stack), #int] ; capture #int
|
||||
ldp|ldpd|stp|stpd reg, reg, [reg(not-stack), #int]! ; capture #int
|
||||
ldp|ldpd|stp|stpd reg, reg, [reg(not-stack)], #int ; capture #int
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
|
||||
def extract_insn_offset_features(
|
||||
@@ -117,12 +115,10 @@ def extract_insn_offset_features(
|
||||
yield OperandOffset(match.operand_index, value), ih.address
|
||||
|
||||
|
||||
NZXOR_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
||||
"""
|
||||
NZXOR_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
|
||||
eor reg, reg, reg
|
||||
eor reg, reg, #int
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
|
||||
def extract_insn_nzxor_characteristic_features(
|
||||
@@ -144,11 +140,9 @@ def extract_insn_nzxor_characteristic_features(
|
||||
yield Characteristic("nzxor"), ih.address
|
||||
|
||||
|
||||
INDIRECT_CALL_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
||||
"""
|
||||
INDIRECT_CALL_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
|
||||
blx|bx|blr reg
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
|
||||
def extract_function_indirect_call_characteristic_features(
|
||||
|
||||
@@ -34,17 +34,14 @@ from capa.features.extractors.binexport2.arch.intel.helpers import SECURITY_COOK
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
IGNORE_NUMBER_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
||||
"""
|
||||
IGNORE_NUMBER_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
|
||||
ret #int
|
||||
retn #int
|
||||
add reg(stack), #int
|
||||
sub reg(stack), #int
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
NUMBER_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
||||
"""
|
||||
NUMBER_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
|
||||
push #int0 ; capture #int0
|
||||
|
||||
# its a little tedious to enumerate all the address forms
|
||||
@@ -64,8 +61,7 @@ NUMBER_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
||||
# imagine reg is zero'd out, then this is like `mov reg, #int`
|
||||
# which is not uncommon.
|
||||
lea reg, [reg + #int] ; capture #int
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
|
||||
def extract_insn_number_features(
|
||||
@@ -100,8 +96,7 @@ def extract_insn_number_features(
|
||||
yield OperandOffset(match.operand_index, value), ih.address
|
||||
|
||||
|
||||
OFFSET_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
||||
"""
|
||||
OFFSET_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
|
||||
mov|movzx|movsb|cmp [reg + reg * #int + #int0], #int ; capture #int0
|
||||
mov|movzx|movsb|cmp [reg * #int + #int0], #int ; capture #int0
|
||||
mov|movzx|movsb|cmp [reg + reg + #int0], #int ; capture #int0
|
||||
@@ -114,18 +109,15 @@ OFFSET_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
||||
mov|movzx|movsb|cmp|lea reg, [reg * #int + #int0] ; capture #int0
|
||||
mov|movzx|movsb|cmp|lea reg, [reg + reg + #int0] ; capture #int0
|
||||
mov|movzx|movsb|cmp|lea reg, [reg(not-stack) + #int0] ; capture #int0
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
# these are patterns that access offset 0 from some pointer
|
||||
# (pointer is not the stack pointer).
|
||||
OFFSET_ZERO_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
||||
"""
|
||||
OFFSET_ZERO_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
|
||||
mov|movzx|movsb [reg(not-stack)], reg
|
||||
mov|movzx|movsb [reg(not-stack)], #int
|
||||
lea reg, [reg(not-stack)]
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
|
||||
def extract_insn_offset_features(
|
||||
@@ -189,12 +181,10 @@ def is_security_cookie(
|
||||
return False
|
||||
|
||||
|
||||
NZXOR_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
||||
"""
|
||||
NZXOR_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
|
||||
xor|xorpd|xorps|pxor reg, reg
|
||||
xor|xorpd|xorps|pxor reg, #int
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
|
||||
def extract_insn_nzxor_characteristic_features(
|
||||
@@ -228,8 +218,7 @@ def extract_insn_nzxor_characteristic_features(
|
||||
yield Characteristic("nzxor"), ih.address
|
||||
|
||||
|
||||
INDIRECT_CALL_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
||||
"""
|
||||
INDIRECT_CALL_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
|
||||
call|jmp reg0
|
||||
call|jmp [reg + reg * #int + #int]
|
||||
call|jmp [reg + reg * #int]
|
||||
@@ -237,8 +226,7 @@ INDIRECT_CALL_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
||||
call|jmp [reg + reg + #int]
|
||||
call|jmp [reg + #int]
|
||||
call|jmp [reg]
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
|
||||
def extract_function_indirect_call_characteristic_features(
|
||||
|
||||
@@ -35,7 +35,7 @@ from capa.features.extractors.base_extractor import (
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
TESTED_VERSIONS = {"2.2-CAPE", "2.4-CAPE"}
|
||||
TESTED_VERSIONS = {"2.2-CAPE", "2.4-CAPE", "2.5-CAPE"}
|
||||
|
||||
|
||||
class CapeExtractor(DynamicFeatureExtractor):
|
||||
|
||||
@@ -27,7 +27,12 @@ import capa.features.extractors.dnfile.file
|
||||
import capa.features.extractors.dnfile.insn
|
||||
import capa.features.extractors.dnfile.function
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import NO_ADDRESS, Address, DNTokenAddress, DNTokenOffsetAddress
|
||||
from capa.features.address import (
|
||||
NO_ADDRESS,
|
||||
Address,
|
||||
DNTokenAddress,
|
||||
DNTokenOffsetAddress,
|
||||
)
|
||||
from capa.features.extractors.dnfile.types import DnType, DnUnmanagedMethod
|
||||
from capa.features.extractors.base_extractor import (
|
||||
BBHandle,
|
||||
@@ -39,6 +44,7 @@ from capa.features.extractors.base_extractor import (
|
||||
from capa.features.extractors.dnfile.helpers import (
|
||||
get_dotnet_types,
|
||||
get_dotnet_fields,
|
||||
load_dotnet_image,
|
||||
get_dotnet_managed_imports,
|
||||
get_dotnet_managed_methods,
|
||||
get_dotnet_unmanaged_imports,
|
||||
@@ -83,7 +89,7 @@ class DnFileFeatureExtractorCache:
|
||||
|
||||
class DnfileFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self, path: Path):
|
||||
self.pe: dnfile.dnPE = dnfile.dnPE(str(path))
|
||||
self.pe = load_dotnet_image(path)
|
||||
super().__init__(hashes=SampleHashes.from_bytes(path.read_bytes()))
|
||||
|
||||
# pre-compute .NET token lookup tables; each .NET method has access to this cache for feature extraction
|
||||
@@ -112,7 +118,12 @@ class DnfileFeatureExtractor(StaticFeatureExtractor):
|
||||
fh: FunctionHandle = FunctionHandle(
|
||||
address=DNTokenAddress(token),
|
||||
inner=method,
|
||||
ctx={"pe": self.pe, "calls_from": set(), "calls_to": set(), "cache": self.token_cache},
|
||||
ctx={
|
||||
"pe": self.pe,
|
||||
"calls_from": set(),
|
||||
"calls_to": set(),
|
||||
"cache": self.token_cache,
|
||||
},
|
||||
)
|
||||
|
||||
# method tokens should be unique
|
||||
|
||||
@@ -15,8 +15,10 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import struct
|
||||
import logging
|
||||
from typing import Union, Iterator, Optional
|
||||
from pathlib import Path
|
||||
|
||||
import dnfile
|
||||
from dncil.cil.body import CilMethodBody
|
||||
@@ -30,6 +32,16 @@ from capa.features.extractors.dnfile.types import DnType, DnUnmanagedMethod
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def load_dotnet_image(path: Path) -> dnfile.dnPE:
|
||||
"""load a .NET PE file, raising CorruptFile on struct.error with the original error message."""
|
||||
try:
|
||||
return dnfile.dnPE(str(path))
|
||||
except struct.error as e:
|
||||
from capa.loader import CorruptFile
|
||||
|
||||
raise CorruptFile(f"Invalid or truncated .NET metadata: {e}") from e
|
||||
|
||||
|
||||
class DnfileMethodBodyReader(CilMethodBodyReaderBase):
|
||||
def __init__(self, pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow):
|
||||
self.pe: dnfile.dnPE = pe
|
||||
@@ -151,7 +163,9 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]:
|
||||
)
|
||||
|
||||
|
||||
def get_dotnet_methoddef_property_accessors(pe: dnfile.dnPE) -> Iterator[tuple[int, str]]:
|
||||
def get_dotnet_methoddef_property_accessors(
|
||||
pe: dnfile.dnPE,
|
||||
) -> Iterator[tuple[int, str]]:
|
||||
"""get MethodDef methods used to access properties
|
||||
|
||||
see https://www.ntcore.com/files/dotnetformat.htm
|
||||
@@ -226,7 +240,13 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]:
|
||||
|
||||
typedefnamespace, typedefname = resolve_nested_typedef_name(nested_class_table, rid, typedef, pe)
|
||||
|
||||
yield DnType(token, typedefname, namespace=typedefnamespace, member=method_name, access=access)
|
||||
yield DnType(
|
||||
token,
|
||||
typedefname,
|
||||
namespace=typedefnamespace,
|
||||
member=method_name,
|
||||
access=access,
|
||||
)
|
||||
|
||||
|
||||
def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]:
|
||||
@@ -259,7 +279,9 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]:
|
||||
yield DnType(token, typedefname, namespace=typedefnamespace, member=field.row.Name)
|
||||
|
||||
|
||||
def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[tuple[int, CilMethodBody]]:
|
||||
def get_dotnet_managed_method_bodies(
|
||||
pe: dnfile.dnPE,
|
||||
) -> Iterator[tuple[int, CilMethodBody]]:
|
||||
"""get managed methods from MethodDef table"""
|
||||
for rid, method_def in iter_dotnet_table(pe, dnfile.mdtable.MethodDef.number):
|
||||
assert isinstance(method_def, dnfile.mdtable.MethodDefRow)
|
||||
@@ -338,7 +360,10 @@ def get_dotnet_table_row(pe: dnfile.dnPE, table_index: int, row_index: int) -> O
|
||||
|
||||
|
||||
def resolve_nested_typedef_name(
|
||||
nested_class_table: dict, index: int, typedef: dnfile.mdtable.TypeDefRow, pe: dnfile.dnPE
|
||||
nested_class_table: dict,
|
||||
index: int,
|
||||
typedef: dnfile.mdtable.TypeDefRow,
|
||||
pe: dnfile.dnPE,
|
||||
) -> tuple[str, tuple[str, ...]]:
|
||||
"""Resolves all nested TypeDef class names. Returns the namespace as a str and the nested TypeRef name as a tuple"""
|
||||
|
||||
|
||||
@@ -42,6 +42,7 @@ from capa.features.extractors.dnfile.types import DnType
|
||||
from capa.features.extractors.base_extractor import SampleHashes, StaticFeatureExtractor
|
||||
from capa.features.extractors.dnfile.helpers import (
|
||||
iter_dotnet_table,
|
||||
load_dotnet_image,
|
||||
is_dotnet_mixed_mode,
|
||||
get_dotnet_managed_imports,
|
||||
get_dotnet_managed_methods,
|
||||
@@ -184,8 +185,8 @@ GLOBAL_HANDLERS = (
|
||||
class DotnetFileFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self, path: Path):
|
||||
super().__init__(hashes=SampleHashes.from_bytes(path.read_bytes()))
|
||||
self.path: Path = path
|
||||
self.pe: dnfile.dnPE = dnfile.dnPE(str(path))
|
||||
self.path = path
|
||||
self.pe = load_dotnet_image(path)
|
||||
|
||||
def get_base_address(self):
|
||||
return NO_ADDRESS
|
||||
@@ -217,7 +218,10 @@ class DotnetFileFeatureExtractor(StaticFeatureExtractor):
|
||||
assert self.pe.net.struct.MajorRuntimeVersion is not None
|
||||
assert self.pe.net.struct.MinorRuntimeVersion is not None
|
||||
|
||||
return self.pe.net.struct.MajorRuntimeVersion, self.pe.net.struct.MinorRuntimeVersion
|
||||
return (
|
||||
self.pe.net.struct.MajorRuntimeVersion,
|
||||
self.pe.net.struct.MinorRuntimeVersion,
|
||||
)
|
||||
|
||||
def get_meta_version_string(self) -> str:
|
||||
assert self.pe.net is not None
|
||||
|
||||
@@ -56,7 +56,7 @@ def get_previous_instructions(vw: VivWorkspace, va: int) -> list[int]:
|
||||
if ploc is not None:
|
||||
# from vivisect.const:
|
||||
# location: (L_VA, L_SIZE, L_LTYPE, L_TINFO)
|
||||
(pva, _, ptype, pinfo) = ploc
|
||||
pva, _, ptype, pinfo = ploc
|
||||
|
||||
if ptype == LOC_OP and not (pinfo & IF_NOFALL):
|
||||
ret.append(pva)
|
||||
|
||||
@@ -176,7 +176,7 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato
|
||||
|
||||
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper):
|
||||
try:
|
||||
(_, target) = resolve_indirect_call(f.vw, insn.va, insn=insn)
|
||||
_, target = resolve_indirect_call(f.vw, insn.va, insn=insn)
|
||||
except NotFoundError:
|
||||
# not able to resolve the indirect call, sorry
|
||||
return
|
||||
|
||||
@@ -26,6 +26,16 @@ from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, Pr
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
VOID_PTR_NUMBER_PARAMS = frozenset(
|
||||
{
|
||||
"hKey",
|
||||
"hKeyRoot",
|
||||
"hkResult",
|
||||
"samDesired",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def get_call_param_features(param: Param, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
if param.deref is not None:
|
||||
# pointer types contain a special "deref" member that stores the deref'd value
|
||||
@@ -39,10 +49,31 @@ def get_call_param_features(param: Param, ch: CallHandle) -> Iterator[tuple[Feat
|
||||
# parsing the data up to here results in double-escaped backslashes, remove those here
|
||||
yield String(param.deref.value.replace("\\\\", "\\")), ch.address
|
||||
else:
|
||||
logger.debug("skipping deref param type %s", param.deref.type_)
|
||||
if param.name in VOID_PTR_NUMBER_PARAMS:
|
||||
try:
|
||||
yield Number(hexint(param.deref.value)), ch.address
|
||||
except (ValueError, TypeError) as e:
|
||||
logger.debug(
|
||||
"failed to parse whitelisted void_ptr param %s value %s: %s",
|
||||
param.name,
|
||||
param.deref.value,
|
||||
e,
|
||||
)
|
||||
else:
|
||||
logger.debug("skipping deref param type %s", param.deref.type_)
|
||||
elif param.value is not None:
|
||||
if param.type_ in PARAM_TYPE_INT:
|
||||
yield Number(hexint(param.value)), ch.address
|
||||
elif param.type_ == "void_ptr" and param.name in VOID_PTR_NUMBER_PARAMS:
|
||||
try:
|
||||
yield Number(hexint(param.value)), ch.address
|
||||
except (ValueError, TypeError) as e:
|
||||
logger.debug(
|
||||
"failed to parse whitelisted void_ptr param %s value %s: %s",
|
||||
param.name,
|
||||
param.value,
|
||||
e,
|
||||
)
|
||||
|
||||
|
||||
def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
import logging
|
||||
from typing import Iterator
|
||||
from pathlib import Path
|
||||
|
||||
@@ -39,6 +39,8 @@ from capa.features.extractors.base_extractor import (
|
||||
DynamicFeatureExtractor,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_formatted_params(params: ParamList) -> list[str]:
|
||||
params_list: list[str] = []
|
||||
@@ -87,6 +89,16 @@ class VMRayExtractor(DynamicFeatureExtractor):
|
||||
|
||||
def get_processes(self) -> Iterator[ProcessHandle]:
|
||||
for monitor_process in self.analysis.monitor_processes.values():
|
||||
# skip invalid/incomplete monitor process entries, see #2807
|
||||
if monitor_process.pid == 0 or not monitor_process.filename:
|
||||
logger.debug(
|
||||
"skipping incomplete process entry: pid=%d, filename=%s, monitor_id=%d",
|
||||
monitor_process.pid,
|
||||
monitor_process.filename,
|
||||
monitor_process.monitor_id,
|
||||
)
|
||||
continue
|
||||
|
||||
address: ProcessAddress = ProcessAddress(pid=monitor_process.pid, ppid=monitor_process.ppid)
|
||||
yield ProcessHandle(address, inner=monitor_process)
|
||||
|
||||
|
||||
@@ -327,6 +327,9 @@ def log_unsupported_os_error():
|
||||
logger.error(" ")
|
||||
logger.error(" capa currently only analyzes executables for some operating systems")
|
||||
logger.error(" (including Windows, Linux, and Android).")
|
||||
logger.error(" ")
|
||||
logger.error(" If you know the target OS, you can specify it explicitly, for example:")
|
||||
logger.error(" capa --os linux <sample>")
|
||||
logger.error("-" * 80)
|
||||
|
||||
|
||||
|
||||
@@ -126,6 +126,57 @@ def get_meta_str(vw):
|
||||
return f"{', '.join(meta)}, number of functions: {len(vw.getFunctions())}"
|
||||
|
||||
|
||||
def _is_probably_corrupt_pe(path: Path) -> bool:
|
||||
"""
|
||||
Heuristic check for obviously malformed PE samples that provoke
|
||||
pathological behavior in vivisect (see GH-1989).
|
||||
|
||||
We treat a PE as "probably corrupt" when any section declares an
|
||||
unrealistically large virtual size compared to the file size, e.g.
|
||||
hundreds of megabytes in a tiny file. Such cases lead vivisect to
|
||||
try to map enormous regions and can exhaust CPU/memory.
|
||||
"""
|
||||
try:
|
||||
import pefile
|
||||
except Exception:
|
||||
# If pefile is unavailable, fall back to existing behavior.
|
||||
return False
|
||||
|
||||
try:
|
||||
pe = pefile.PE(str(path), fast_load=True)
|
||||
except pefile.PEFormatError:
|
||||
# Not a PE file (or badly formed); let existing checks handle it.
|
||||
return False
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
try:
|
||||
file_size = path.stat().st_size
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
if file_size <= 0:
|
||||
return False
|
||||
|
||||
# Flag sections whose declared virtual size is wildly disproportionate
|
||||
# to the file size (e.g. 900MB section in a ~400KB sample).
|
||||
_VSIZE_FILE_RATIO = 128
|
||||
_MAX_REASONABLE_VSIZE = 512 * 1024 * 1024 # 512 MB
|
||||
max_reasonable = max(file_size * _VSIZE_FILE_RATIO, _MAX_REASONABLE_VSIZE)
|
||||
|
||||
for section in getattr(pe, "sections", []):
|
||||
vsize = getattr(section, "Misc_VirtualSize", 0) or 0
|
||||
if vsize > max_reasonable:
|
||||
logger.debug(
|
||||
"detected unrealistic PE section virtual size: 0x%x (file size: 0x%x), treating as corrupt",
|
||||
vsize,
|
||||
file_size,
|
||||
)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def get_workspace(path: Path, input_format: str, sigpaths: list[Path]):
|
||||
"""
|
||||
load the program at the given path into a vivisect workspace using the given format.
|
||||
@@ -143,11 +194,18 @@ def get_workspace(path: Path, input_format: str, sigpaths: list[Path]):
|
||||
"""
|
||||
|
||||
# lazy import enables us to not require viv if user wants another backend.
|
||||
import envi.exc
|
||||
import viv_utils
|
||||
import viv_utils.flirt
|
||||
|
||||
logger.debug("generating vivisect workspace for: %s", path)
|
||||
|
||||
if input_format in (FORMAT_PE, FORMAT_AUTO) and _is_probably_corrupt_pe(path):
|
||||
raise CorruptFile(
|
||||
"PE file appears to contain unrealistically large sections and is likely corrupt"
|
||||
+ " - skipping analysis to avoid excessive resource usage."
|
||||
)
|
||||
|
||||
try:
|
||||
if input_format == FORMAT_AUTO:
|
||||
if not is_supported_format(path):
|
||||
@@ -164,6 +222,8 @@ def get_workspace(path: Path, input_format: str, sigpaths: list[Path]):
|
||||
vw = viv_utils.getShellcodeWorkspaceFromFile(str(path), arch="amd64", analyze=False)
|
||||
else:
|
||||
raise ValueError("unexpected format: " + input_format)
|
||||
except envi.exc.SegmentationViolation as e:
|
||||
raise CorruptFile(f"Invalid memory access during binary parsing: {e}") from e
|
||||
except Exception as e:
|
||||
# vivisect raises raw Exception instances, and we don't want
|
||||
# to do a subclass check via isinstance.
|
||||
|
||||
10
capa/main.py
10
capa/main.py
@@ -661,7 +661,9 @@ def get_rules_from_cli(args) -> RuleSet:
|
||||
raises:
|
||||
ShouldExitError: if the program is invoked incorrectly and should exit.
|
||||
"""
|
||||
enable_cache: bool = True
|
||||
enable_cache: bool = getattr(args, "enable_cache", True)
|
||||
# this allows calling functions to easily disable rule caching, e.g., used by the rule linter to avoid
|
||||
|
||||
try:
|
||||
if capa.helpers.is_running_standalone() and args.is_default_rules:
|
||||
cache_dir = get_default_root() / "cache"
|
||||
@@ -943,8 +945,7 @@ def main(argv: Optional[list[str]] = None):
|
||||
argv = sys.argv[1:]
|
||||
|
||||
desc = "The FLARE team's open-source tool to identify capabilities in executable files."
|
||||
epilog = textwrap.dedent(
|
||||
"""
|
||||
epilog = textwrap.dedent("""
|
||||
By default, capa uses a default set of embedded rules.
|
||||
You can see the rule set here:
|
||||
https://github.com/mandiant/capa-rules
|
||||
@@ -971,8 +972,7 @@ def main(argv: Optional[list[str]] = None):
|
||||
|
||||
filter rules by meta fields, e.g. rule name or namespace
|
||||
capa -t "create TCP socket" suspicious.exe
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description=desc, epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter
|
||||
|
||||
@@ -31,6 +31,7 @@ $ protoc.exe --python_out=. --mypy_out=. <path_to_proto> (e.g. capa/render/proto
|
||||
|
||||
Alternatively, --pyi_out=. can be used to generate a Python Interface file that supports development
|
||||
"""
|
||||
|
||||
import datetime
|
||||
from typing import Any, Union
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@ import io
|
||||
from typing import Union, Iterator, Optional
|
||||
|
||||
import rich.console
|
||||
from rich.markup import escape
|
||||
from rich.progress import Text
|
||||
|
||||
import capa.render.result_document as rd
|
||||
@@ -24,21 +25,21 @@ import capa.render.result_document as rd
|
||||
|
||||
def bold(s: str) -> Text:
|
||||
"""draw attention to the given string"""
|
||||
return Text.from_markup(f"[cyan]{s}")
|
||||
return Text.from_markup(f"[cyan]{escape(s)}")
|
||||
|
||||
|
||||
def bold2(s: str) -> Text:
|
||||
"""draw attention to the given string, within a `bold` section"""
|
||||
return Text.from_markup(f"[green]{s}")
|
||||
return Text.from_markup(f"[green]{escape(s)}")
|
||||
|
||||
|
||||
def mute(s: str) -> Text:
|
||||
"""draw attention away from the given string"""
|
||||
return Text.from_markup(f"[dim]{s}")
|
||||
return Text.from_markup(f"[dim]{escape(s)}")
|
||||
|
||||
|
||||
def warn(s: str) -> Text:
|
||||
return Text.from_markup(f"[yellow]{s}")
|
||||
return Text.from_markup(f"[yellow]{escape(s)}")
|
||||
|
||||
|
||||
def format_parts_id(data: Union[rd.AttackSpec, rd.MBCSpec]):
|
||||
|
||||
@@ -159,9 +159,8 @@ def render_call(layout: rd.DynamicLayout, addr: frz.Address) -> str:
|
||||
s.append(f"){rest}")
|
||||
|
||||
newline = "\n"
|
||||
return (
|
||||
f"{pname}{{pid:{call.thread.process.pid},tid:{call.thread.tid},call:{call.id}}}\n{rutils.mute(newline.join(s))}"
|
||||
)
|
||||
# Use default (non-dim) styling for API details so they remain readable in -vv output
|
||||
return f"{pname}{{pid:{call.thread.process.pid},tid:{call.thread.tid},call:{call.id}}}\n{newline.join(s)}"
|
||||
|
||||
|
||||
def render_short_call(layout: rd.DynamicLayout, addr: frz.Address) -> str:
|
||||
@@ -180,7 +179,8 @@ def render_short_call(layout: rd.DynamicLayout, addr: frz.Address) -> str:
|
||||
s.append(f"){rest}")
|
||||
|
||||
newline = "\n"
|
||||
return f"call:{call.id}\n{rutils.mute(newline.join(s))}"
|
||||
# Use default (non-dim) styling for API details so they remain readable in -vv output
|
||||
return f"call:{call.id}\n{newline.join(s)}"
|
||||
|
||||
|
||||
def render_static_meta(console: Console, meta: rd.StaticMetadata):
|
||||
|
||||
@@ -1143,6 +1143,8 @@ class Rule:
|
||||
else:
|
||||
# use pyyaml because it can be much faster than ruamel (pure python)
|
||||
doc = yaml.load(s, Loader=cls._get_yaml_loader())
|
||||
if doc is None or not isinstance(doc, dict) or "rule" not in doc:
|
||||
raise InvalidRule("empty or invalid YAML document")
|
||||
return cls.from_dict(doc, s)
|
||||
|
||||
@classmethod
|
||||
@@ -1447,6 +1449,13 @@ class RuleSet:
|
||||
scope: self._index_rules_by_feature(scope, self.rules_by_scope[scope], scores_by_rule) for scope in scopes
|
||||
}
|
||||
|
||||
# Pre-compute the topological index mapping for each scope.
|
||||
# This avoids rebuilding the dict on every call to _match (which runs once per
|
||||
# instruction/basic-block/function/file scope, i.e. potentially millions of times).
|
||||
self._rule_index_by_scope: dict[Scope, dict[str, int]] = {
|
||||
scope: {rule.name: i for i, rule in enumerate(self.rules_by_scope[scope])} for scope in scopes
|
||||
}
|
||||
|
||||
@property
|
||||
def file_rules(self):
|
||||
return self.rules_by_scope[Scope.FILE]
|
||||
@@ -1876,11 +1885,13 @@ class RuleSet:
|
||||
"""
|
||||
done = []
|
||||
|
||||
# use a queue of rules, because we'll be modifying the list (appending new items) as we go.
|
||||
while rules:
|
||||
rule = rules.pop(0)
|
||||
# use a list as a stack: append new items and pop() from the end, both O(1).
|
||||
# order doesn't matter here since every rule in the queue is processed eventually.
|
||||
rules_stack = list(rules)
|
||||
while rules_stack:
|
||||
rule = rules_stack.pop()
|
||||
for subscope_rule in rule.extract_subscope_rules():
|
||||
rules.append(subscope_rule)
|
||||
rules_stack.append(subscope_rule)
|
||||
done.append(rule)
|
||||
|
||||
return done
|
||||
@@ -1929,11 +1940,11 @@ class RuleSet:
|
||||
"""
|
||||
|
||||
feature_index: RuleSet._RuleFeatureIndex = self._feature_indexes_by_scopes[scope]
|
||||
rules: list[Rule] = self.rules_by_scope[scope]
|
||||
# Topologic location of rule given its name.
|
||||
# That is, rules with a lower index should be evaluated first, since their dependencies
|
||||
# will be evaluated later.
|
||||
rule_index_by_rule_name = {rule.name: i for i, rule in enumerate(rules)}
|
||||
# Pre-computed in __init__ to avoid rebuilding on every _match call.
|
||||
rule_index_by_rule_name = self._rule_index_by_scope[scope]
|
||||
|
||||
# This algorithm is optimized to evaluate as few rules as possible,
|
||||
# because the less work we do, the faster capa can run.
|
||||
@@ -2029,7 +2040,9 @@ class RuleSet:
|
||||
candidate_rules = [self.rules[name] for name in candidate_rule_names]
|
||||
|
||||
# Order rules topologically, so that rules with dependencies work correctly.
|
||||
# Sort descending so pop() from the end yields the topologically-first rule in O(1).
|
||||
RuleSet._sort_rules_by_index(rule_index_by_rule_name, candidate_rules)
|
||||
candidate_rules.reverse()
|
||||
|
||||
#
|
||||
# The following is derived from ceng.match
|
||||
@@ -2044,7 +2057,7 @@ class RuleSet:
|
||||
augmented_features = features
|
||||
|
||||
while candidate_rules:
|
||||
rule = candidate_rules.pop(0)
|
||||
rule = candidate_rules.pop()
|
||||
res = rule.evaluate(augmented_features, short_circuit=True)
|
||||
if res:
|
||||
# we first matched the rule with short circuiting enabled.
|
||||
@@ -2083,6 +2096,7 @@ class RuleSet:
|
||||
candidate_rule_names.update(new_candidates)
|
||||
candidate_rules.extend([self.rules[rule_name] for rule_name in new_candidates])
|
||||
RuleSet._sort_rules_by_index(rule_index_by_rule_name, candidate_rules)
|
||||
candidate_rules.reverse()
|
||||
|
||||
return (augmented_features, results)
|
||||
|
||||
@@ -2219,7 +2233,10 @@ def get_rules(
|
||||
|
||||
try:
|
||||
rule = Rule.from_yaml(content.decode("utf-8"))
|
||||
except InvalidRule:
|
||||
except InvalidRule as e:
|
||||
if e.args and e.args[0] == "empty or invalid YAML document":
|
||||
logger.warning("skipping %s: %s", path, e)
|
||||
continue
|
||||
raise
|
||||
else:
|
||||
rule.meta["capa/path"] = path.as_posix()
|
||||
|
||||
52
doc/plans/design.md
Normal file
52
doc/plans/design.md
Normal file
@@ -0,0 +1,52 @@
|
||||
# MAPA design
|
||||
|
||||
## Architecture
|
||||
|
||||
Four layers, each in its own module under the `mapa/` package:
|
||||
|
||||
| Module | Responsibility |
|
||||
|---|---|
|
||||
| `mapa/model.py` | Backend-neutral dataclasses: `MapaReport`, `MapaMeta`, `MapaSection`, `MapaLibrary`, `MapaFunction`, `MapaCall`, `MapaCaller`, `MapaString`, `AssemblageLocation` |
|
||||
| `mapa/ida_db.py` | IDA database lifecycle: `resolve_database()`, `open_database_session()`, SHA-256 caching, flock-based concurrency guard |
|
||||
| `mapa/collector.py` | Populates `MapaReport` from an open `ida_domain.Database`. All IDA queries live here. |
|
||||
| `mapa/renderer.py` | Rich-based text rendering from `MapaReport`. No IDA dependency. |
|
||||
| `mapa/cli.py` | Argument parsing, capa/assemblage loading, orchestration |
|
||||
|
||||
`scripts/mapa.py` is a thin entry point that delegates to `mapa.cli.main()`.
|
||||
|
||||
## Database lifecycle
|
||||
|
||||
Modeled on `idals.py` from idawilli:
|
||||
|
||||
1. If input is `.i64`/`.idb`, use directly.
|
||||
2. Otherwise, hash the file (MD5 + SHA-256), check `~/.cache/mandiant/mapa/<sha256>.i64`.
|
||||
3. On cache miss: acquire advisory flock, create database via `Database.open()` with `IdaCommandOptions(auto_analysis=True, new_database=True, output_database=..., load_resources=True)`, wait for `ida_auto.auto_wait()`.
|
||||
4. On cache hit or after creation: open read-only with `new_database=False, save_on_close=False`.
|
||||
5. Concurrency guard: poll for `.nam` file disappearance + `fcntl.flock` on `<db>.lock` + TOCTOU re-check.
|
||||
|
||||
## Collector design
|
||||
|
||||
The collector builds several indexes before the main function loop:
|
||||
|
||||
- **import_index**: `dict[int, (module, name)]` from `db.imports.get_all_imports()`
|
||||
- **extern_addrs**: `set[int]` from functions in XTRN segments
|
||||
- **thunk_targets**: `dict[int, int]` via `_resolve_thunk_target()` — follows code refs then data refs, max depth 5, single-target chains only
|
||||
- **resolved_callers/callees**: built by walking all non-thunk function flowcharts, resolving call targets through thunk chains, classifying as internal vs API
|
||||
|
||||
String extraction follows single data-reference chains from each instruction up to depth 10, checking `db.strings.get_at()` at each hop.
|
||||
|
||||
## ida-domain API usage
|
||||
|
||||
Primary queries used:
|
||||
|
||||
- `db.functions` — iteration, `get_at()`, `get_name()`, `get_flags()`, `get_flowchart()`
|
||||
- `db.segments.get_all()` — section enumeration
|
||||
- `db.imports.get_all_modules()`, `get_all_imports()` — library/import enumeration
|
||||
- `db.xrefs.code_refs_from_ea()`, `data_refs_from_ea()`, `calls_from_ea()` — call/thunk resolution
|
||||
- `db.strings.get_at()` — string lookup
|
||||
- `db.instructions.is_call_instruction()`, `get_mnemonic()` — instruction classification
|
||||
- `db.heads.size()` — instruction byte size
|
||||
- `FlowChart` with `FlowChartFlags.NOEXT | FlowChartFlags.PREDS` — CFG traversal
|
||||
- `FunctionFlags.THUNK`, `FunctionFlags.LIB` — function classification
|
||||
|
||||
No legacy `ida_*` module calls are used. All queries go through `ida-domain`.
|
||||
502
doc/plans/mapa-idalib-port.md
Normal file
502
doc/plans/mapa-idalib-port.md
Normal file
@@ -0,0 +1,502 @@
|
||||
# MAPA IDA/IDALib port plan
|
||||
|
||||
Goal: preserve the current `scripts/mapa.py` report while replacing the Lancelot/BinExport2 backend with IDALib. Use the `ida-domain` API for normal analysis where it cleanly maps to the needed data. Reuse the existing capa IDA backend as the semantic reference for imports, thunks, string resolution, function naming, and database bootstrap.
|
||||
|
||||
This revision adds concrete guidance from capa's existing IDA backend.
|
||||
|
||||
## Sources used
|
||||
|
||||
Primary sources:
|
||||
- `scripts/mapa.py`
|
||||
- https://github.com/HexRaysSA/ida-claude-plugins/blob/main/plugins/ida-plugin-development/skills/ida-domain-api/SKILL.md
|
||||
- `capa/features/extractors/ida/idalib.py`
|
||||
- `capa/features/extractors/ida/extractor.py`
|
||||
- `capa/features/extractors/ida/helpers.py`
|
||||
- `capa/features/extractors/ida/file.py`
|
||||
- `capa/features/extractors/ida/function.py`
|
||||
- `capa/features/extractors/ida/insn.py`
|
||||
- `capa/features/extractors/ida/basicblock.py`
|
||||
- `capa/ida/helpers.py`
|
||||
- `capa/loader.py`
|
||||
- `tests/fixtures.py`
|
||||
- `tests/test_idalib_features.py`
|
||||
- `capa/features/common.py`
|
||||
- `idals.py` from https://github.com/williballenthin/idawilli/tree/master/idals
|
||||
|
||||
Domain API references:
|
||||
- Overview: https://ida-domain.docs.hex-rays.com/llms.txt
|
||||
- Getting started: https://ida-domain.docs.hex-rays.com/getting_started/index.md
|
||||
- Examples: https://ida-domain.docs.hex-rays.com/examples/index.md
|
||||
- Database: https://ida-domain.docs.hex-rays.com/ref/database/index.md
|
||||
- Functions: https://ida-domain.docs.hex-rays.com/ref/functions/index.md
|
||||
- Flowchart: https://ida-domain.docs.hex-rays.com/ref/flowchart/index.md
|
||||
- Instructions: https://ida-domain.docs.hex-rays.com/ref/instructions/index.md
|
||||
- Xrefs: https://ida-domain.docs.hex-rays.com/ref/xrefs/index.md
|
||||
- Strings: https://ida-domain.docs.hex-rays.com/ref/strings/index.md
|
||||
- Segments: https://ida-domain.docs.hex-rays.com/ref/segments/index.md
|
||||
- Names: https://ida-domain.docs.hex-rays.com/ref/names/index.md
|
||||
- Entries: https://ida-domain.docs.hex-rays.com/ref/entries/index.md
|
||||
|
||||
## Key correction after reviewing capa
|
||||
|
||||
The current `capa/features/extractors/ida/` backend is IDALib-capable, but it is not written against `ida-domain` today. It uses the classic IDA Python surface: `idapro`, `idaapi`, `idautils`, `idc`, `ida_bytes`, `ida_funcs`, `ida_segment`, and related modules.
|
||||
|
||||
That means the correct migration strategy is not "invent a fresh IDA collector from scratch". The correct strategy is:
|
||||
- use capa's existing IDA backend as the behavioral spec and a source of proven heuristics
|
||||
- implement the new collector against `ida-domain` wherever the needed API exists cleanly
|
||||
- treat the existing legacy helpers as reference material, not as the default implementation path
|
||||
- only introduce lower-level `ida_*` calls if the implementer can point to a concrete `ida-domain` gap and document it
|
||||
|
||||
This is especially important for:
|
||||
- IDALib database bootstrap
|
||||
- import and extern enumeration
|
||||
- thunk-chain resolution
|
||||
- string/data-reference chasing
|
||||
- alternative function names from comments
|
||||
- known IDA version caveats
|
||||
|
||||
## Current MAPA output that must remain stable
|
||||
|
||||
The current script renders these sections, in this order:
|
||||
- `meta`
|
||||
- `modules`
|
||||
- `sections`
|
||||
- `libraries`
|
||||
- `functions`
|
||||
|
||||
Accepted intentional change for the port: remove `modules` entirely.
|
||||
|
||||
Inside `functions`, it currently:
|
||||
- iterates functions in address order
|
||||
- prints `thunk ...` for thunk functions
|
||||
- prints `function ...` for normal functions
|
||||
- groups adjacent functions by Assemblage source path when available
|
||||
- forwards callers through thunk targets so callers of a thunk appear on the real target
|
||||
- prints `B/E/I` as basic blocks / CFG edges / instructions plus total instruction bytes
|
||||
- prints capa rule names attached to the function
|
||||
- prints `calls:` for internal non-library callees
|
||||
- prints `api:` for import/external/library callees
|
||||
- prints `string:` for referenced strings
|
||||
|
||||
That output contract should stay stable unless a deliberate change is accepted and documented.
|
||||
|
||||
## What capa already gives you
|
||||
|
||||
There are three reusable assets.
|
||||
|
||||
The first is a proven `ida-domain` database-resolution and session-opening path in `../idawilli/idals/idals.py`. That code already does the part MAPA needs most: accept either a raw sample or an existing `.i64` / `.idb`, hash raw inputs, cache analyzed databases by SHA-256, and guard concurrent access.
|
||||
|
||||
The second is capa's proven IDALib bootstrap path:
|
||||
- `capa/features/extractors/ida/idalib.py`
|
||||
- `capa/loader.py`
|
||||
- `tests/fixtures.py`
|
||||
|
||||
The third is capa's proven semantic definition of the data MAPA cares about:
|
||||
- function enumeration: `capa/features/extractors/ida/extractor.py`
|
||||
- segments, imports, externs: `capa/features/extractors/ida/file.py`, `helpers.py`
|
||||
- callers and function names: `capa/features/extractors/ida/function.py`
|
||||
- API calls, call targets, strings, mnemonics, offsets: `capa/features/extractors/ida/insn.py`
|
||||
- CFG/basic blocks: `capa/features/extractors/ida/basicblock.py`, `helpers.py`
|
||||
- hashes, architecture, imagebase, file-type helpers: `capa/ida/helpers.py`
|
||||
|
||||
The practical split is simple. Use `idals.py` as the model for database resolution, caching, and guarded open/close. Use capa's IDA backend as the model for analysis semantics and parity behavior. Use `ida-domain` as the primary query surface inside the collector. Do not depend on Lancelot anywhere in the new implementation.
|
||||
|
||||
## Important behavioral facts from capa's backend
|
||||
|
||||
1. IDALib bootstrap in capa is not a bare `Database.open(...)` call.
|
||||
- It uses `capa.features.extractors.ida.idalib.has_idalib()` and `load_idalib()`.
|
||||
- It then calls `idapro.open_database(..., run_auto_analysis=True, args="-Olumina:host=0.0.0.0 -Osecondary_lumina:host=0.0.0.0 -R")`.
|
||||
- It disables console chatter with `idapro.enable_console_messages(False)`.
|
||||
- It waits for analysis completion with `ida_auto.auto_wait()`.
|
||||
|
||||
2. Capa explicitly disables Lumina during IDALib analysis.
|
||||
- Reason documented in `capa/loader.py`: Lumina can inject bad names or overwrite debug-info names.
|
||||
- MAPA should do the same unless there is a deliberate decision to trust Lumina.
|
||||
|
||||
3. Capa requests resource loading with `-R`.
|
||||
- This matters for some file-scope extraction.
|
||||
- `tests/test_idalib_features.py` notes that IDA 9.0 had resource-loading limitations under IDALib.
|
||||
|
||||
4. The existing `IdaFeatureExtractor.get_functions()` is not a direct drop-in for MAPA.
|
||||
- It calls `helpers.get_functions(skip_thunks=True, skip_libs=True)`.
|
||||
- MAPA must render thunk functions, so MAPA needs its own full function inventory.
|
||||
|
||||
5. Capa already encodes the thunk semantics MAPA needs.
|
||||
- `THUNK_CHAIN_DEPTH_DELTA` is defined in `capa/features/common.py` as `5`.
|
||||
- `capa/features/extractors/ida/insn.py:check_for_api_call()` follows code refs, then data refs, through thunk chains to resolve imports/externs.
|
||||
- `capa/features/extractors/binexport2/__init__.py:BinExport2Analysis._compute_thunks()` shows the intended "single-target thunk chain" rule: only resolve through chains with exactly one callee per thunk hop.
|
||||
|
||||
6. Capa already encodes MAPA-relevant string semantics.
|
||||
- `helpers.find_data_reference_from_insn(insn, max_depth=10)` follows single data-reference chains.
|
||||
- `helpers.find_string_at(ea)` looks for C strings and works around an IDA Unicode-decoding quirk.
|
||||
- `insn.extract_insn_string_features()` and `extract_insn_bytes_features()` use that behavior.
|
||||
|
||||
7. Capa already has the import and extern logic MAPA needs.
|
||||
- `helpers.get_file_imports()` enumerates import modules and normalizes names.
|
||||
- `helpers.get_file_externs()` enumerates functions from `SEG_XTRN` segments.
|
||||
- `file.extract_file_import_names()` shows how capa treats name-vs-ordinal imports.
|
||||
|
||||
8. Capa already has alternative-name logic.
|
||||
- `helpers.get_function_alternative_names()` parses comments that look like `Alternative name is 'foo'`.
|
||||
- `function.extract_function_alternative_names()` exposes them as `FunctionName` features.
|
||||
|
||||
9. Capa already has the CFG behavior MAPA should match.
|
||||
- `helpers.get_function_blocks()` uses `idaapi.FlowChart(f, flags=(idaapi.FC_PREDS | idaapi.FC_NOEXT))`.
|
||||
- The `NOEXT` part matters: it avoids useless external blocks contaminating B/E/I counts.
|
||||
|
||||
10. The test suite documents real version caveats.
|
||||
- IDA 9.0 and 9.1 had some ELF symbol issues.
|
||||
- IDA 9.0 under IDALib had resource-loading limitations.
|
||||
- MAPA validation should account for those when comparing outputs.
|
||||
|
||||
## Database resolution and caching pattern to copy from idals
|
||||
|
||||
`../idawilli/idals/idals.py` is the best starting point for the "raw file or existing database" problem. It already solves the user-visible behavior MAPA needs.
|
||||
|
||||
Its pattern is:
|
||||
- if the input suffix is `.i64` or `.idb`, use that database directly
|
||||
- otherwise compute hashes for the raw file with `compute_file_hashes()` and use the SHA-256 as the cache key
|
||||
- store the generated database in a common cache directory, currently `~/.cache/hex-rays/idals/<sha256>.i64`
|
||||
- serialize access with `database_access_guard()`
|
||||
- detect an already-open or unpacked database by watching for the companion `.nam` file
|
||||
- use an advisory `flock` on `<db>.lock` to avoid concurrent writers
|
||||
- after acquiring the lock, re-check `.nam` to close the TOCTOU hole
|
||||
- on a cache miss, analyze the raw sample with `Database.open(..., IdaCommandOptions(auto_analysis=True, new_database=True, output_database=..., load_resources=True), save_on_close=True)`
|
||||
- after the cached database exists, open it read-only with `open_database_session(..., auto_analysis=False)` and `save_on_close=False`
|
||||
|
||||
MAPA should adopt that pattern with only minor changes:
|
||||
- use the same SHA-256-keyed cache strategy
|
||||
- keep the same locking protocol
|
||||
- put the cache in a MAPA-specific directory, or intentionally share the idals directory if reuse is desired
|
||||
- expose the cache location as a small helper or constant so it can be documented and tested
|
||||
- reuse the computed SHA-256 for the `meta` section instead of hashing the sample twice
|
||||
|
||||
There is one deliberate integration check to make here. `idals.py` uses `ida-domain`'s `Database.open(...)`, while capa's bootstrap path uses `idapro.open_database(...)` and disables Lumina explicitly. For MAPA, prefer the `idals.py` open-and-cache pattern because it already handles the database lifecycle correctly. Then verify whether the `ida-domain` open path offers an equivalent way to suppress Lumina. If it does, use it. If it does not, decide whether that matters for MAPA output or whether database creation should fall back to capa's `idapro.open_database(...)` path while cached-session opens keep the `idals.py` pattern.
|
||||
|
||||
## Recommended architecture
|
||||
|
||||
Do not port `scripts/mapa.py` by replacing each Lancelot query inline. Split it into four layers:
|
||||
- CLI and argument parsing
|
||||
- IDA bootstrap and environment setup
|
||||
- report collection
|
||||
- rendering
|
||||
|
||||
Use backend-neutral dataclasses for the report model:
|
||||
- `MapaReport`
|
||||
- `MapaMeta`
|
||||
- `MapaSection`
|
||||
- `MapaLibrary`
|
||||
- `MapaFunction`
|
||||
- `MapaCall`
|
||||
- `MapaString`
|
||||
- `AssemblageLocation`
|
||||
|
||||
The collector should have one primary data-access layer: `ida-domain` for functions, flowcharts, instructions, strings, names, segments, xrefs, and database lifecycle. Existing capa helpers remain useful as semantic references and regression oracles.
|
||||
|
||||
## Best practical strategy
|
||||
|
||||
The implementation target is an IDALib-only collector with `ida-domain` as the primary API surface.
|
||||
|
||||
Concretely:
|
||||
- use `ida-domain` for function inventory, instruction iteration, CFG stats, name lookup, segment listing, xref walking, and cached database open/create
|
||||
- use the existing capa IDA code to understand the intended semantics for imports, externs, thunk resolution, data-reference chasing, and alternative names
|
||||
- if the implementer discovers a real `ida-domain` gap, document the gap explicitly before introducing lower-level `ida_*` calls
|
||||
|
||||
That gives the next implementer a clear target: no Lancelot, no default hybrid backend, and no legacy helper dependency unless a concrete gap forces it.
|
||||
|
||||
## Concrete mapping from MAPA fields to capa/backend logic
|
||||
|
||||
| MAPA field/behavior | First source to consult | Recommended implementation |
|
||||
|---|---|---|
|
||||
| IDALib discovery | `capa/features/extractors/ida/idalib.py` | Reuse `has_idalib()` / `load_idalib()` logic if MAPA needs to bootstrap `idapro` availability itself. |
|
||||
| resolve/open DB | `../idawilli/idals/idals.py` | Use `resolve_database()` and `open_database_session()` as the primary pattern. |
|
||||
| cache key and cache DB path | `../idawilli/idals/idals.py` | Hash raw inputs once and key cached databases by SHA-256. |
|
||||
| Lumina suppression policy | `capa/loader.py`, `tests/fixtures.py` | Carry forward capa's disable-Lumina behavior if the chosen open path supports it. |
|
||||
| sample hashes | `../idawilli/idals/idals.py`, `capa/ida/helpers.py`, `extractor.py` | Reuse the SHA-256 computed for cache lookup; prefer IDA-provided hashes when opening an existing database. |
|
||||
| image base | `capa/ida/helpers.py` | Prefer IDA imagebase helper; use Domain API only if it exposes the same value clearly. |
|
||||
| sections | `helpers.get_segments()`, `file.extract_file_section_names()` | Use `db.segments`; match capa's header-segment filtering rules if needed. |
|
||||
| import modules/functions | `helpers.get_file_imports()` | Implement with `ida-domain` if the needed import data is exposed cleanly; otherwise use this helper as the semantic reference for normalization. |
|
||||
| externs | `helpers.get_file_externs()` | Match this behavior with `ida-domain` if possible; if not, document the missing API and then fall back deliberately. |
|
||||
| function inventory | `extractor.py`, `helpers.get_functions()` | Do not use extractor's default function list because it skips thunks/libs. Build a MAPA-specific inventory with `ida-domain`. |
|
||||
| callers | `function.extract_function_calls_to()` | Reproduce the same behavior with domain xrefs and compare against this helper during validation. |
|
||||
| call targets | `insn.extract_function_calls_from()` | Reproduce the same behavior with domain xrefs and compare against this helper during validation. |
|
||||
| API calls | `insn.extract_insn_api_features()` | Match the import/extern/thunk resolution semantics exposed by this function. |
|
||||
| string refs | `helpers.find_data_reference_from_insn()`, `find_string_at()` | Match the same single-ref-chain behavior and max depth `10`. |
|
||||
| function names | `function.extract_function_name()`, alternative-name helpers | Use normal name, demangled name, alternative names, Assemblage overlay. |
|
||||
| B/E/I stats | `helpers.get_function_blocks()` | Match `PREDS | NOEXT` semantics; use domain flowchart if possible. |
|
||||
| function ordering | current `scripts/mapa.py` | Keep address order for deltas and rendering stability. |
|
||||
|
||||
## Step-by-step implementation plan
|
||||
|
||||
### 1. Freeze the current MAPA output
|
||||
|
||||
Before editing code, save golden outputs from the current `scripts/mapa.py` for:
|
||||
- a sample with normal internal calls and imports
|
||||
- a sample with thunk-heavy call patterns
|
||||
- a sample with capa and Assemblage overlays
|
||||
|
||||
These are the parity targets.
|
||||
|
||||
### 2. Add `resolve_database()` and `open_database_session()` helpers
|
||||
|
||||
Base these directly on `../idawilli/idals/idals.py`.
|
||||
|
||||
`resolve_database()` should:
|
||||
- accept either a raw sample or an existing `.i64` / `.idb`
|
||||
- return existing databases unchanged
|
||||
- hash raw inputs once and use SHA-256 as the cache key
|
||||
- place cached databases under the XDG cache root in `mandiant/mapa/`, i.e. `$XDG_CACHE_HOME/mandiant/mapa/` when set, else `~/.cache/mandiant/mapa/`
|
||||
- guard cache creation with the same `.nam` + `flock` protocol from `database_access_guard()`
|
||||
- analyze cache misses with `Database.open(..., IdaCommandOptions(auto_analysis=True, new_database=True, output_database=..., load_resources=True), save_on_close=True)`
|
||||
- keep cache creation transparent in normal mode and only log cache details in verbose/debug mode
|
||||
|
||||
`open_database_session()` should:
|
||||
- use the same guard before opening the database
|
||||
- open cached or user-supplied databases with `new_database=False`
|
||||
- default to `save_on_close=False`
|
||||
- optionally run `ida_auto.auto_wait()` when `auto_analysis=True`
|
||||
|
||||
This should become MAPA's primary database lifecycle.
|
||||
|
||||
Then add one capa-derived check on top: if the chosen open path can suppress Lumina, do so. If the `ida-domain` path cannot, verify whether that difference affects naming enough to justify a fallback to capa's `idapro.open_database(...)` path during cache creation.
|
||||
|
||||
### 3. Introduce a backend-neutral report model
|
||||
|
||||
Before touching the collector logic, split `scripts/mapa.py` into:
|
||||
- CLI
|
||||
- collector
|
||||
- renderer
|
||||
- input-overlay parsing for capa JSON and Assemblage JSONL
|
||||
|
||||
Keep the renderer stable. The collector should return value objects only.
|
||||
|
||||
### 4. Build a MAPA-specific function inventory
|
||||
|
||||
Do not use `IdaFeatureExtractor.get_functions()` as-is, because it skips thunks and library functions.
|
||||
|
||||
Instead:
|
||||
- enumerate all functions in address order with `ida-domain` if possible
|
||||
- keep flags for `is_thunk`, `is_library`, and `is_external`
|
||||
- retain enough metadata to render thunks, skip imports from the function list, and compute deltas
|
||||
|
||||
For parity, compare your inventory against:
|
||||
- `helpers.get_functions(skip_thunks=False, skip_libs=False)`
|
||||
- IDA function flags such as `FUNC_THUNK` and `FUNC_LIB`
|
||||
|
||||
### 5. Recreate import and extern logic using capa's semantics
|
||||
|
||||
For the `libraries` section and for `api:` classification, start from the behavior encoded in:
|
||||
- `helpers.get_file_imports()`
|
||||
- `helpers.get_file_externs()`
|
||||
|
||||
That behavior already handles:
|
||||
- PE imports with `__imp_` prefixes
|
||||
- ELF imports with `@@version` suffixes
|
||||
- ordinal imports
|
||||
- extern functions in `SEG_XTRN`
|
||||
|
||||
The implementation target remains `ida-domain`. The next implementer should reproduce this behavior there if the API surface is available. If a real gap appears, document the gap before introducing any fallback.
|
||||
|
||||
### 6. Implement thunk resolution with capa's exact semantics
|
||||
|
||||
Build one cached helper, for example `resolve_thunk_target(ea)`, and use it everywhere.
|
||||
|
||||
Behavior should match capa's existing semantics:
|
||||
- maximum thunk-chain depth: `THUNK_CHAIN_DEPTH_DELTA == 5`
|
||||
- follow code refs first, then data refs if needed
|
||||
- only resolve through single-target chains
|
||||
- stop on cycles, zero-target, or multi-target cases
|
||||
- allow the final resolved target to be an import or extern
|
||||
|
||||
Use two existing code paths as references:
|
||||
- `capa/features/extractors/ida/insn.py:check_for_api_call()`
|
||||
- `capa/features/extractors/binexport2/__init__.py:BinExport2Analysis._compute_thunks()`
|
||||
|
||||
This helper must drive:
|
||||
- caller forwarding
|
||||
- `calls:` lines
|
||||
- `api:` lines
|
||||
- capa match attachment when a match lands in a thunk
|
||||
|
||||
### 7. Use capa features as references, not as the collector
|
||||
|
||||
Do not build MAPA by instantiating `IdaFeatureExtractor()` and aggregating capa features into the final report. That would create a hidden second backend and blur the migration target.
|
||||
|
||||
Instead, query IDA directly through `ida-domain` and use the capa feature-extraction code as a reference when the intended semantics are unclear. The implementer should compare specific results against:
|
||||
- `Characteristic("calls to")`
|
||||
- `Characteristic("calls from")`
|
||||
- `API`
|
||||
- `String`
|
||||
- `FunctionName`
|
||||
- `Mnemonic`
|
||||
|
||||
This keeps the delivered collector IDALib-only while still giving the implementer a precise oracle for parity checks.
|
||||
|
||||
### 8. Recreate callers and callees
|
||||
|
||||
Use a precomputed normalized call graph. Do not compute callers ad hoc during rendering.
|
||||
|
||||
For each non-import function:
|
||||
- walk its instructions
|
||||
- identify call or jump-to-import patterns using the same logic as `extract_insn_api_features()`
|
||||
- resolve thunk chains
|
||||
- classify the resolved target as internal or API/import/extern
|
||||
- record caller and callee relationships on resolved targets
|
||||
|
||||
For parity, verify against these capa semantics:
|
||||
- function callers: `function.extract_function_calls_to()`
|
||||
- outgoing calls: `insn.extract_function_calls_from()`
|
||||
- API calls: `insn.extract_insn_api_features()`
|
||||
|
||||
Important detail: the existing helper treats both `call` and `jmp` as API-bearing instructions in some thunk/import cases. Do not assume `call` only.
|
||||
|
||||
### 9. Recreate B/E/I with capa's CFG semantics
|
||||
|
||||
For each rendered function:
|
||||
- basic blocks: count basic blocks using the equivalent of `helpers.get_function_blocks()`
|
||||
- edges: sum successors across those blocks
|
||||
- instructions: count instructions across those blocks
|
||||
- bytes: sum instruction sizes
|
||||
|
||||
The important parity rule is the CFG construction mode:
|
||||
- match `idaapi.FlowChart(f, flags=(idaapi.FC_PREDS | idaapi.FC_NOEXT))`
|
||||
|
||||
If the Domain API flowchart differs, use it only if it can match the no-external-block behavior. Otherwise use a tiny legacy helper for block enumeration and keep everything else in the Domain API.
|
||||
|
||||
### 10. Recreate string extraction with capa's data-ref chasing
|
||||
|
||||
Do not just test `db.strings.get_at(xref.to_ea)` and stop. That will miss the semantics capa already uses.
|
||||
|
||||
Start from capa's behavior:
|
||||
- follow a single data-reference chain from the instruction, up to depth `10`
|
||||
- if the final target is a string, emit it
|
||||
- otherwise it may be bytes, not a string
|
||||
|
||||
For MAPA specifically:
|
||||
- only render strings, not raw bytes
|
||||
- deduplicate by rendered string value, matching the current script
|
||||
- trim trailing whitespace the same way the current script does
|
||||
|
||||
Reference implementation:
|
||||
- `helpers.find_data_reference_from_insn()`
|
||||
- `helpers.find_string_at()`
|
||||
- `insn.extract_insn_string_features()`
|
||||
|
||||
### 11. Reuse capa's name and alternative-name semantics
|
||||
|
||||
For the function display name, use this order:
|
||||
- Assemblage override if present
|
||||
- demangled name
|
||||
- IDA function name
|
||||
- alternative names from comments if they help and the main name is poor
|
||||
- final fallback such as `sub_{ea:x}`
|
||||
|
||||
Do not mutate the database just to apply Assemblage names.
|
||||
|
||||
Reference points:
|
||||
- `function.extract_function_name()`
|
||||
- `helpers.get_function_alternative_names()`
|
||||
|
||||
### 12. Reattach capa matches by containing function
|
||||
|
||||
Keep the current capa JSON input format, but simplify the mapping logic.
|
||||
|
||||
Recommended algorithm:
|
||||
- parse the capa JSON as today
|
||||
- for each absolute match address, ask IDA for the containing function
|
||||
- if that function is a thunk, resolve it through the thunk resolver
|
||||
- attach the rule name to the resolved function start EA
|
||||
- warn when no containing function exists
|
||||
|
||||
This is simpler than the current BinExport-specific mapping and aligns better with IDA's data model.
|
||||
|
||||
### 13. Rebuild top-level sections using capa-backed semantics
|
||||
|
||||
For `meta`:
|
||||
- sample name: input path or IDA metadata
|
||||
- hashes: prefer IDA-provided hash helpers in `capa/ida/helpers.py`
|
||||
- architecture: reuse the logic in `capa/features/extractors/ida/global_.py`
|
||||
- timestamp: define explicitly, because BinExport's old field is gone
|
||||
|
||||
For `sections`:
|
||||
- use `ida-domain` segments if possible
|
||||
- match capa's `skip_header_segments` behavior if needed
|
||||
|
||||
For `libraries`:
|
||||
- use `helpers.get_file_imports()` and group/display import modules accordingly
|
||||
|
||||
For `modules`:
|
||||
- remove the section entirely as an intentional interface change
|
||||
- document the removal in the spec so future ports do not try to reintroduce BinExport-specific `module` semantics accidentally
|
||||
|
||||
### 14. Add tests using capa's existing IDALib pattern
|
||||
|
||||
Pure tests should cover:
|
||||
- Assemblage parsing and RVA-to-VA mapping
|
||||
- thunk-chain resolution
|
||||
- import/extern normalization
|
||||
- string de-duplication and trimming
|
||||
- final rendering from a prebuilt `MapaReport`
|
||||
|
||||
Integration tests should reuse the same lifecycle MAPA will use in production:
|
||||
- resolve the input to an existing or cached database
|
||||
- open it through the guarded session helper
|
||||
- collect the MAPA report
|
||||
- compare key functions and sections against golden outputs
|
||||
|
||||
Use `tests/test_idalib_features.py` as the reference for version-specific skips and expectations, and use `../idawilli/idals/idals.py` as the reference for database resolution and guarded open/close behavior.
|
||||
|
||||
### 15. Validate parity and document deliberate differences
|
||||
|
||||
Compare the new output against the frozen Lancelot output on the supplied samples.
|
||||
|
||||
Verify specifically:
|
||||
- function ordering
|
||||
- thunk rendering
|
||||
- thunk-forwarded callers
|
||||
- internal vs API call classification
|
||||
- libraries/imports section contents
|
||||
- string extraction
|
||||
- B/E/I counts
|
||||
- Assemblage path grouping
|
||||
- capa attachment
|
||||
|
||||
Document every known delta. The likely ones are:
|
||||
- function discovery differences between IDA and Lancelot
|
||||
- the intentional removal of the `modules` section
|
||||
- symbol differences across IDA versions, especially ELF on older 9.x
|
||||
- resource-dependent differences on older IDALib versions
|
||||
|
||||
## Minimal implementation checklist
|
||||
|
||||
A good order of work is:
|
||||
1. freeze current MAPA outputs
|
||||
2. add backend-neutral report dataclasses
|
||||
3. add `resolve_database()` and `open_database_session()` helpers modeled on `idals.py`
|
||||
4. implement the XDG cache path and quiet-by-default cache creation behavior
|
||||
5. build a full MAPA function inventory that includes thunks
|
||||
6. port sections and metadata
|
||||
7. implement import/extern classification to match capa semantics
|
||||
8. implement the thunk resolver using capa's existing semantics
|
||||
9. build normalized caller/callee/API indexes
|
||||
10. port B/E/I using `PREDS | NOEXT`-equivalent CFG traversal
|
||||
11. port string extraction using capa's data-ref-chain semantics
|
||||
12. port Assemblage overlay handling
|
||||
13. port capa JSON address-to-function attachment
|
||||
14. remove the `modules` section and document the interface change
|
||||
15. compare outputs against golden references
|
||||
16. document any proven `ida-domain` gaps and any intentional differences in spec/design during implementation
|
||||
|
||||
## Resolved decisions for the implementation handoff
|
||||
|
||||
Record these in `spec.md` or `design.md` during implementation so the behavior stays stable.
|
||||
|
||||
- accepted inputs: raw binary and existing IDA databases
|
||||
- cached databases live under the XDG cache root in `mandiant/mapa/`
|
||||
- MAPA may create and persist cached IDA databases automatically
|
||||
- cache creation stays quiet in normal mode and only surfaces in verbose/debug logging
|
||||
- Lumina stays disabled for now
|
||||
- `meta.ts` becomes `datetime.now()`
|
||||
- remove the `modules` section from the report
|
||||
- the implementation target is IDALib only and all Lancelot dependencies should be removed
|
||||
- assume `ida-domain` is sufficient unless the implementer can demonstrate a specific missing API; any lower-level fallback must be justified and documented
|
||||
52
doc/plans/spec.md
Normal file
52
doc/plans/spec.md
Normal file
@@ -0,0 +1,52 @@
|
||||
# MAPA specification
|
||||
|
||||
MAPA renders a structured text report of a binary's function map: metadata, sections, import libraries, and a per-function breakdown of callers, callees, API calls, strings, CFG stats, and capa rule matches.
|
||||
|
||||
## Invocation
|
||||
|
||||
```
|
||||
python scripts/mapa.py <input_file> [--capa <capa.json>] [--assemblage <locations.jsonl>] [--verbose] [--quiet]
|
||||
```
|
||||
|
||||
`input_file` accepts raw binaries (PE, ELF), existing IDA databases (`.i64`, `.idb`), or any file IDA can analyze. For raw files, MAPA automatically creates and caches an analyzed IDA database under the XDG cache root (`$XDG_CACHE_HOME/mandiant/mapa/` or `~/.cache/mandiant/mapa/`) keyed by the file's SHA-256 hash.
|
||||
|
||||
## Backend
|
||||
|
||||
IDALib only. All analysis uses `ida-domain` as the primary query API. The Lancelot/BinExport2 backend has been removed.
|
||||
|
||||
## Report sections
|
||||
|
||||
The report renders these sections in order:
|
||||
|
||||
1. **meta** — file name, SHA-256, architecture, timestamp
|
||||
2. **sections** — memory segments with address, permissions (rwx), and size
|
||||
3. **libraries** — import modules
|
||||
4. **functions** — per-function detail in address order
|
||||
|
||||
### Functions section
|
||||
|
||||
Each function renders as either `thunk <name> @ <address>` or `function <name> @ <address>` followed by:
|
||||
|
||||
- `xref:` — callers with direction arrow and function-order delta
|
||||
- `B/E/I:` — basic blocks / CFG edges / instructions (total bytes)
|
||||
- `capa:` — matched capa rule names
|
||||
- `calls:` — internal non-library callees with direction and delta
|
||||
- `api:` — import/external/library callees
|
||||
- `string:` — referenced strings (deduplicated, whitespace-trimmed)
|
||||
|
||||
Thunk functions show only the header, no body.
|
||||
|
||||
When Assemblage data is provided, adjacent functions are grouped by source file path, and function names are overridden with Assemblage names.
|
||||
|
||||
## Deliberate interface changes from the Lancelot/BinExport2 version
|
||||
|
||||
- The `modules` section has been removed. BinExport2's module concept has no IDA equivalent.
|
||||
|
||||
## Decisions
|
||||
|
||||
- **2026-03-16**: Lumina disabled during database creation via `IdaCommandOptions(plugin_options="lumina:host=0.0.0.0 -Osecondary_lumina:host=0.0.0.0")`, matching capa's `loader.py`. The `plugin_options` field maps to IDA's `-O` switch; embedding `-O` in the value for the second option works because `build_args()` concatenates it verbatim. Resource loading enabled via `load_resources=True` (maps to `-R`).
|
||||
- **2026-03-16**: Cache directory is `$XDG_CACHE_HOME/mandiant/mapa/` (or `~/.cache/mandiant/mapa/`). Separate from idals cache.
|
||||
- **2026-03-16**: `meta.ts` is `datetime.now(UTC).isoformat()` — no longer sourced from BinExport2.
|
||||
- **2026-03-16**: Thunk chain depth limit is 5 (matches capa's `THUNK_CHAIN_DEPTH_DELTA`).
|
||||
- **2026-03-16**: CFG stats use `FlowChartFlags.NOEXT | FlowChartFlags.PREDS` to match capa's block enumeration semantics.
|
||||
- **2026-03-16**: String extraction follows single data-reference chains up to depth 10, matching capa's `find_data_reference_from_insn`.
|
||||
17
doc/usage.md
17
doc/usage.md
@@ -2,6 +2,21 @@
|
||||
|
||||
See `capa -h` for all supported arguments and usage examples.
|
||||
|
||||
## Ways to consume capa output
|
||||
|
||||
| Method | Output / interface | Typical use |
|
||||
|--------|--------------------|-------------|
|
||||
| **CLI** | Text (default, `-v`, `-vv`), JSON (`-j`), or other formats | Scripting, CI, one-off analysis |
|
||||
| [**IDA Pro**](https://github.com/mandiant/capa/tree/master/capa/ida/plugin) | capa Explorer plugin inside IDA | Interactive analysis with jump-to-address |
|
||||
| [**Ghidra**](https://github.com/mandiant/capa/tree/master/capa/ghidra/plugin) | capa Explorer plugin inside Ghidra | Interactive analysis with Ghidra integration |
|
||||
| [**Binary Ninja**](https://github.com/mandiant/capa/tree/master/capa/features/extractors/binja) | capa run using Binary Ninja as the analysis backend | Interactive analysis with Binary Ninja integration |
|
||||
| [**Dynamic (Sandbox)**](https://www.mandiant.com/resources/blog/dynamic-capa-executable-behavior-cape-sandbox) | capa run on dynamic sandbox report (CAPE, VMRay, etc.) | Dynamic analysis of sandbox output |
|
||||
| [**Web (capa Explorer)**](https://mandiant.github.io/capa/explorer/) | Web UI (upload JSON or load from URL) | Sharing results, viewing from VirusTotal or similar |
|
||||
|
||||
## Default vs verbose output
|
||||
|
||||
By default, capa shows only *top-level* rule matches: capabilities that are not already implied by another displayed rule. For example, if a rule "persist via Run registry key" matches and it *contains* a match for "set registry value", the default output lists only "persist via Run registry key". This keeps the default output short while still reflecting all detected capabilities at the top level. Use **`-v`** to see all rule matches, including nested ones. Use **`-vv`** for an even more detailed view that shows how each rule matched.
|
||||
|
||||
## tips and tricks
|
||||
|
||||
### only run selected rules
|
||||
@@ -11,7 +26,7 @@ For example, `capa -t william.ballenthin@mandiant.com` runs rules that reference
|
||||
|
||||
### only analyze selected functions
|
||||
Use the `--restrict-to-functions` option to extract capabilities from only a selected set of functions. This is useful for analyzing
|
||||
large functions and figuring out their capabilities and their address of occurance; for example: PEB access, RC4 encryption, etc.
|
||||
large functions and figuring out their capabilities and their address of occurrence; for example: PEB access, RC4 encryption, etc.
|
||||
|
||||
To use this, you can copy the virtual addresses from your favorite disassembler and pass them to capa as follows:
|
||||
`capa sample.exe --restrict-to-functions 0x4019C0,0x401CD0`. If you add the `-v` option then capa will extract the interesting parts of a function for you.
|
||||
|
||||
0
mapa/__init__.py
Normal file
0
mapa/__init__.py
Normal file
5
mapa/__main__.py
Normal file
5
mapa/__main__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
import sys
|
||||
|
||||
from mapa.cli import main
|
||||
|
||||
sys.exit(main())
|
||||
169
mapa/cli.py
Normal file
169
mapa/cli.py
Normal file
@@ -0,0 +1,169 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
import time
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
|
||||
from rich.theme import Theme
|
||||
from rich.console import Console
|
||||
from rich.logging import RichHandler
|
||||
|
||||
from mapa.model import AssemblageLocation
|
||||
|
||||
logger = logging.getLogger("mapa")
|
||||
|
||||
|
||||
def _load_capa_matches(
|
||||
capa_path: Path,
|
||||
thunk_targets: dict[int, int],
|
||||
get_containing_function: object,
|
||||
) -> dict[int, set[str]]:
|
||||
"""Load capa JSON and map matches to function addresses.
|
||||
|
||||
get_containing_function should be a callable(address) -> int|None
|
||||
that returns the function start address for a given address.
|
||||
"""
|
||||
doc = json.loads(capa_path.read_text())
|
||||
|
||||
functions_by_basic_block: dict[int, int] = {}
|
||||
for function in doc["meta"]["analysis"]["layout"]["functions"]:
|
||||
for basic_block in function["matched_basic_blocks"]:
|
||||
functions_by_basic_block[basic_block["address"]["value"]] = function["address"]["value"]
|
||||
|
||||
matches_by_address: defaultdict[int, set[str]] = defaultdict(set)
|
||||
for rule_name, results in doc["rules"].items():
|
||||
for location, _ in results["matches"]:
|
||||
if location["type"] != "absolute":
|
||||
continue
|
||||
matches_by_address[location["value"]].add(rule_name)
|
||||
|
||||
matches_by_function: defaultdict[int, set[str]] = defaultdict(set)
|
||||
for address, matches in matches_by_address.items():
|
||||
func_addr = functions_by_basic_block.get(address, address)
|
||||
|
||||
if func_addr in thunk_targets:
|
||||
logger.debug("forwarding capa matches from thunk 0x%x to 0x%x", func_addr, thunk_targets[func_addr])
|
||||
func_addr = thunk_targets[func_addr]
|
||||
|
||||
matches_by_function[func_addr].update(matches)
|
||||
for match in matches:
|
||||
logger.info("capa: 0x%x: %s", func_addr, match)
|
||||
|
||||
return dict(matches_by_function)
|
||||
|
||||
|
||||
def _load_assemblage(assemblage_path: Path, base_address: int) -> dict[int, AssemblageLocation]:
|
||||
locations: dict[int, AssemblageLocation] = {}
|
||||
with assemblage_path.open("rt", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
location = AssemblageLocation.from_json(line)
|
||||
locations[base_address + location.rva] = location
|
||||
return locations
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
if argv is None:
|
||||
argv = sys.argv[1:]
|
||||
|
||||
parser = argparse.ArgumentParser(description="MAPA: binary function map")
|
||||
parser.add_argument("input_file", type=Path, help="path to input file (binary, .i64, or .idb)")
|
||||
parser.add_argument("--capa", type=Path, help="path to capa JSON results file")
|
||||
parser.add_argument("--assemblage", type=Path, help="path to Assemblage JSONL file")
|
||||
parser.add_argument("--verbose", action="store_true", help="enable verbose logging")
|
||||
parser.add_argument("--quiet", action="store_true", help="disable all output but errors")
|
||||
args = parser.parse_args(args=argv)
|
||||
|
||||
stderr_console = Console(stderr=True)
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG if args.verbose else (logging.ERROR if args.quiet else logging.INFO),
|
||||
format="%(message)s",
|
||||
handlers=[RichHandler(console=stderr_console, show_path=False, rich_tracebacks=args.verbose)],
|
||||
)
|
||||
|
||||
from mapa.ida_db import resolve_database, open_database_session
|
||||
from mapa.collector import collect_report
|
||||
from mapa.renderer import render_report
|
||||
|
||||
t0 = time.time()
|
||||
db_path, md5, sha256 = resolve_database(args.input_file)
|
||||
logger.debug("perf: resolve_database: %0.2fs", time.time() - t0)
|
||||
|
||||
theme = Theme(
|
||||
{
|
||||
"decoration": "grey54",
|
||||
"title": "yellow",
|
||||
"key": "blue",
|
||||
"value": "blue",
|
||||
"default": "blue",
|
||||
},
|
||||
inherit=False,
|
||||
)
|
||||
console = Console(theme=theme, markup=False, emoji=False)
|
||||
|
||||
t0 = time.time()
|
||||
with open_database_session(db_path) as db:
|
||||
logger.debug("perf: open_database: %0.2fs", time.time() - t0)
|
||||
|
||||
base_address = db.base_address or 0
|
||||
|
||||
assemblage_locations: dict[int, AssemblageLocation] = {}
|
||||
if args.assemblage:
|
||||
assemblage_locations = _load_assemblage(args.assemblage, base_address)
|
||||
|
||||
# We need thunk targets for capa match forwarding.
|
||||
# The collector will compute them too, but we need them before collection
|
||||
# for capa loading. We'll pass capa matches into the collector.
|
||||
matches_by_function: dict[int, set[str]] = {}
|
||||
if args.capa:
|
||||
# For now, pass an empty thunk_targets; capa matches that land
|
||||
# on thunks will be forwarded during collection.
|
||||
# The collector handles thunk forwarding internally via
|
||||
# resolve_thunk_target, which the capa loader needs too.
|
||||
# To avoid duplicating thunk logic, we do a simple pass here
|
||||
# and let the collector handle the rest.
|
||||
from mapa.collector import _resolve_thunk_target, _build_import_index, _build_extern_index
|
||||
from ida_domain.functions import FunctionFlags
|
||||
|
||||
import_index = _build_import_index(db)
|
||||
extern_addrs = _build_extern_index(db)
|
||||
|
||||
thunk_targets: dict[int, int] = {}
|
||||
for func in db.functions:
|
||||
flags = db.functions.get_flags(func)
|
||||
if flags and FunctionFlags.THUNK in flags:
|
||||
target = _resolve_thunk_target(db, int(func.start_ea), import_index, extern_addrs)
|
||||
if target is not None:
|
||||
thunk_targets[int(func.start_ea)] = target
|
||||
|
||||
matches_by_function = _load_capa_matches(
|
||||
args.capa,
|
||||
thunk_targets,
|
||||
lambda addr: None,
|
||||
)
|
||||
|
||||
t0 = time.time()
|
||||
report = collect_report(
|
||||
db,
|
||||
md5=md5,
|
||||
sha256=sha256,
|
||||
matches_by_function=matches_by_function,
|
||||
assemblage_locations=assemblage_locations,
|
||||
)
|
||||
logger.debug("perf: collect_report: %0.2fs", time.time() - t0)
|
||||
|
||||
t0 = time.time()
|
||||
render_report(report, console)
|
||||
logger.debug("perf: render_report: %0.2fs", time.time() - t0)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
409
mapa/collector.py
Normal file
409
mapa/collector.py
Normal file
@@ -0,0 +1,409 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from ida_domain.database import Database
|
||||
from ida_domain.flowchart import FlowChartFlags
|
||||
from ida_domain.functions import FunctionFlags
|
||||
|
||||
from mapa.model import (
|
||||
MapaCall,
|
||||
MapaCaller,
|
||||
MapaFunction,
|
||||
MapaLibrary,
|
||||
MapaMeta,
|
||||
MapaReport,
|
||||
MapaSection,
|
||||
MapaString,
|
||||
)
|
||||
from mapa.strings import extract_ascii_strings, extract_unicode_strings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
THUNK_CHAIN_DEPTH_DELTA = 5
|
||||
|
||||
|
||||
def _get_permissions_string(perm: int) -> str:
|
||||
return (
|
||||
("r" if perm & 4 else "-")
|
||||
+ ("w" if perm & 2 else "-")
|
||||
+ ("x" if perm & 1 else "-")
|
||||
)
|
||||
|
||||
|
||||
def _collect_meta(db: Database, md5: str, sha256: str) -> MapaMeta:
|
||||
name = db.path or ""
|
||||
if not md5 and db.md5:
|
||||
md5 = db.md5
|
||||
if not sha256 and db.sha256:
|
||||
sha256 = db.sha256
|
||||
arch = db.architecture or ""
|
||||
base_address = db.base_address or 0
|
||||
timestamp = datetime.now(timezone.utc).isoformat()
|
||||
return MapaMeta(
|
||||
name=name,
|
||||
sha256=sha256,
|
||||
md5=md5,
|
||||
arch=arch,
|
||||
timestamp=timestamp,
|
||||
base_address=base_address,
|
||||
)
|
||||
|
||||
|
||||
def _collect_sections(db: Database) -> list[MapaSection]:
|
||||
sections: list[MapaSection] = []
|
||||
for seg in db.segments.get_all():
|
||||
sections.append(
|
||||
MapaSection(
|
||||
address=int(seg.start_ea),
|
||||
size=int(seg.end_ea) - int(seg.start_ea),
|
||||
perms=_get_permissions_string(int(seg.perm)),
|
||||
name=db.segments.get_name(seg) or "",
|
||||
)
|
||||
)
|
||||
return sections
|
||||
|
||||
|
||||
def _normalize_module_name(name: str) -> str:
|
||||
"""Normalize an import module name to include extension.
|
||||
|
||||
IDA strips .dll from PE import module names (e.g. 'KERNEL32' instead of
|
||||
'KERNEL32.dll'). Add it back when the name has no extension.
|
||||
"""
|
||||
if "." not in name:
|
||||
return f"{name}.dll".lower()
|
||||
return name.lower()
|
||||
|
||||
|
||||
def _collect_libraries(db: Database) -> list[MapaLibrary]:
|
||||
libraries: list[MapaLibrary] = []
|
||||
for module in db.imports.get_all_modules():
|
||||
libraries.append(MapaLibrary(name=_normalize_module_name(module.name)))
|
||||
return libraries
|
||||
|
||||
|
||||
def _build_import_index(db: Database) -> dict[int, tuple[str, str]]:
|
||||
"""Build address -> (module, function_name) for all imports."""
|
||||
imports: dict[int, tuple[str, str]] = {}
|
||||
for imp in db.imports.get_all_imports():
|
||||
name = imp.name or f"ord{imp.ordinal}"
|
||||
imports[int(imp.address)] = (_normalize_module_name(imp.module_name), name)
|
||||
return imports
|
||||
|
||||
|
||||
def _build_extern_index(db: Database) -> set[int]:
|
||||
"""Collect addresses in XTRN segments."""
|
||||
externs: set[int] = set()
|
||||
for seg in db.segments.get_all():
|
||||
seg_class = db.segments.get_class(seg)
|
||||
if seg_class and seg_class.upper() == "XTRN":
|
||||
for func in db.functions.get_between(int(seg.start_ea), int(seg.end_ea)):
|
||||
externs.add(int(func.start_ea))
|
||||
return externs
|
||||
|
||||
|
||||
def _resolve_thunk_target(
|
||||
db: Database,
|
||||
ea: int,
|
||||
import_index: dict[int, tuple[str, str]],
|
||||
extern_addrs: set[int],
|
||||
) -> int | None:
|
||||
"""Follow thunk chains up to THUNK_CHAIN_DEPTH_DELTA hops.
|
||||
|
||||
Returns the final resolved address, or None if resolution fails.
|
||||
"""
|
||||
current = ea
|
||||
for _ in range(THUNK_CHAIN_DEPTH_DELTA):
|
||||
code_refs = list(db.xrefs.code_refs_from_ea(current, flow=False))
|
||||
if len(code_refs) == 1:
|
||||
target = int(code_refs[0])
|
||||
if target in import_index or target in extern_addrs:
|
||||
return target
|
||||
target_func = db.functions.get_at(target)
|
||||
if target_func:
|
||||
flags = db.functions.get_flags(target_func)
|
||||
if flags and FunctionFlags.THUNK in flags:
|
||||
current = target
|
||||
continue
|
||||
return target
|
||||
|
||||
data_refs = list(db.xrefs.data_refs_from_ea(current))
|
||||
if len(data_refs) == 1:
|
||||
target = int(data_refs[0])
|
||||
if target in import_index or target in extern_addrs:
|
||||
return target
|
||||
target_func = db.functions.get_at(target)
|
||||
if target_func:
|
||||
flags = db.functions.get_flags(target_func)
|
||||
if flags and FunctionFlags.THUNK in flags:
|
||||
current = target
|
||||
continue
|
||||
return target
|
||||
|
||||
break
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _build_string_index(db: Database) -> dict[int, str]:
|
||||
"""Scan all segments for ASCII and UTF-16 LE strings.
|
||||
|
||||
Uses regex-based extraction (from FLOSS/capa) on raw segment bytes
|
||||
rather than IDA's built-in string list, which misses short strings.
|
||||
"""
|
||||
index: dict[int, str] = {}
|
||||
for seg in db.segments.get_all():
|
||||
start = int(seg.start_ea)
|
||||
size = int(seg.end_ea) - start
|
||||
if size <= 0:
|
||||
continue
|
||||
buf = db.bytes.get_bytes_at(start, size)
|
||||
if not buf:
|
||||
continue
|
||||
for s in extract_ascii_strings(buf):
|
||||
index[start + s.offset] = s.s
|
||||
for s in extract_unicode_strings(buf):
|
||||
addr = start + s.offset
|
||||
if addr not in index or len(s.s) > len(index[addr]):
|
||||
index[addr] = s.s
|
||||
return index
|
||||
|
||||
|
||||
def _find_data_reference_string(
|
||||
db: Database,
|
||||
insn_ea: int,
|
||||
string_index: dict[int, str],
|
||||
max_depth: int = 10,
|
||||
) -> str | None:
|
||||
"""Follow single data-reference chains from an instruction to find a string."""
|
||||
current = insn_ea
|
||||
for _ in range(max_depth):
|
||||
try:
|
||||
data_refs = list(db.xrefs.data_refs_from_ea(current))
|
||||
except Exception:
|
||||
break
|
||||
if len(data_refs) != 1:
|
||||
break
|
||||
target = int(data_refs[0])
|
||||
if not db.is_valid_ea(target):
|
||||
break
|
||||
if target in string_index:
|
||||
return string_index[target]
|
||||
current = target
|
||||
return None
|
||||
|
||||
|
||||
def collect_report(
|
||||
db: Database,
|
||||
md5: str = "",
|
||||
sha256: str = "",
|
||||
matches_by_function: dict[int, set[str]] | None = None,
|
||||
assemblage_locations: dict[int, Any] | None = None,
|
||||
) -> MapaReport:
|
||||
"""Collect a complete MAPA report from an open IDA database."""
|
||||
if matches_by_function is None:
|
||||
matches_by_function = {}
|
||||
if assemblage_locations is None:
|
||||
assemblage_locations = {}
|
||||
|
||||
meta = _collect_meta(db, md5, sha256)
|
||||
sections = _collect_sections(db)
|
||||
libraries = _collect_libraries(db)
|
||||
import_index = _build_import_index(db)
|
||||
extern_addrs = _build_extern_index(db)
|
||||
string_index = _build_string_index(db)
|
||||
|
||||
all_functions: list[tuple[int, Any, bool, bool]] = []
|
||||
for func in db.functions:
|
||||
ea = int(func.start_ea)
|
||||
flags = db.functions.get_flags(func)
|
||||
is_thunk = flags is not None and FunctionFlags.THUNK in flags
|
||||
is_lib = flags is not None and FunctionFlags.LIB in flags
|
||||
all_functions.append((ea, func, is_thunk, is_lib))
|
||||
|
||||
all_functions.sort(key=lambda x: x[0])
|
||||
|
||||
func_address_to_order: dict[int, int] = {}
|
||||
for i, (ea, _, _, _) in enumerate(all_functions):
|
||||
func_address_to_order[ea] = i
|
||||
|
||||
thunk_targets: dict[int, int] = {}
|
||||
for ea, func, is_thunk, _ in all_functions:
|
||||
if is_thunk:
|
||||
target = _resolve_thunk_target(db, ea, import_index, extern_addrs)
|
||||
if target is not None:
|
||||
thunk_targets[ea] = target
|
||||
|
||||
resolved_callers: dict[int, set[int]] = {}
|
||||
resolved_callees: dict[int, list[tuple[int, bool]]] = {}
|
||||
|
||||
for ea, func, is_thunk, is_lib in all_functions:
|
||||
if is_thunk or ea in import_index or ea in extern_addrs:
|
||||
continue
|
||||
|
||||
fc = db.functions.get_flowchart(func, flags=FlowChartFlags.NOEXT | FlowChartFlags.PREDS)
|
||||
if fc is None:
|
||||
continue
|
||||
|
||||
seen_callees: set[int] = set()
|
||||
callees: list[tuple[int, bool]] = []
|
||||
|
||||
for block in fc:
|
||||
insns = block.get_instructions()
|
||||
if insns is None:
|
||||
continue
|
||||
for insn in insns:
|
||||
if not db.instructions.is_call_instruction(insn):
|
||||
# also check for jumps to imports (thunk pattern)
|
||||
mnem = db.instructions.get_mnemonic(insn)
|
||||
if mnem and mnem.lower().startswith("jmp"):
|
||||
call_targets = list(db.xrefs.code_refs_from_ea(int(insn.ea), flow=False))
|
||||
else:
|
||||
continue
|
||||
else:
|
||||
call_targets = list(db.xrefs.calls_from_ea(int(insn.ea)))
|
||||
if not call_targets:
|
||||
call_targets = list(db.xrefs.code_refs_from_ea(int(insn.ea), flow=False))
|
||||
|
||||
for target_ea in call_targets:
|
||||
target_ea = int(target_ea)
|
||||
resolved_target = target_ea
|
||||
|
||||
if target_ea in thunk_targets:
|
||||
resolved_target = thunk_targets[target_ea]
|
||||
|
||||
if resolved_target in seen_callees:
|
||||
continue
|
||||
seen_callees.add(resolved_target)
|
||||
|
||||
is_api = resolved_target in import_index or resolved_target in extern_addrs
|
||||
callees.append((resolved_target, is_api))
|
||||
|
||||
if resolved_target not in resolved_callers:
|
||||
resolved_callers[resolved_target] = set()
|
||||
resolved_callers[resolved_target].add(ea)
|
||||
|
||||
resolved_callees[ea] = callees
|
||||
|
||||
mapa_functions: list[MapaFunction] = []
|
||||
for ea, func, is_thunk, is_lib in all_functions:
|
||||
if ea in import_index or ea in extern_addrs:
|
||||
continue
|
||||
|
||||
name = db.functions.get_name(func) or f"sub_{ea:x}"
|
||||
if ea in assemblage_locations and assemblage_locations[ea].name:
|
||||
name = assemblage_locations[ea].name
|
||||
|
||||
order = func_address_to_order[ea]
|
||||
|
||||
mf = MapaFunction(
|
||||
address=ea,
|
||||
name=name,
|
||||
is_thunk=is_thunk,
|
||||
is_library=is_lib,
|
||||
)
|
||||
|
||||
if is_thunk:
|
||||
mapa_functions.append(mf)
|
||||
continue
|
||||
|
||||
fc = db.functions.get_flowchart(func, flags=FlowChartFlags.NOEXT | FlowChartFlags.PREDS)
|
||||
if fc is not None:
|
||||
num_blocks = 0
|
||||
num_edges = 0
|
||||
num_insns = 0
|
||||
total_bytes = 0
|
||||
|
||||
for block in fc:
|
||||
num_blocks += 1
|
||||
num_edges += block.count_successors()
|
||||
insns = block.get_instructions()
|
||||
if insns is None:
|
||||
continue
|
||||
for insn in insns:
|
||||
num_insns += 1
|
||||
insn_size = db.heads.size(int(insn.ea))
|
||||
total_bytes += insn_size
|
||||
|
||||
mf.num_basic_blocks = num_blocks
|
||||
mf.num_edges = num_edges
|
||||
mf.num_instructions = num_insns
|
||||
mf.total_instruction_bytes = total_bytes
|
||||
|
||||
for caller_ea in sorted(resolved_callers.get(ea, set())):
|
||||
if caller_ea not in func_address_to_order:
|
||||
continue
|
||||
caller_order = func_address_to_order[caller_ea]
|
||||
delta = caller_order - order
|
||||
direction = "↑" if delta < 0 else "↓"
|
||||
caller_func = db.functions.get_at(caller_ea)
|
||||
caller_name = db.functions.get_name(caller_func) if caller_func else f"sub_{caller_ea:x}"
|
||||
if caller_ea in assemblage_locations and assemblage_locations[caller_ea].name:
|
||||
caller_name = assemblage_locations[caller_ea].name
|
||||
mf.callers.append(MapaCaller(
|
||||
name=caller_name or f"sub_{caller_ea:x}",
|
||||
address=caller_ea,
|
||||
delta=delta,
|
||||
direction=direction,
|
||||
))
|
||||
|
||||
for target_ea, is_api in resolved_callees.get(ea, []):
|
||||
if is_api:
|
||||
if target_ea in import_index:
|
||||
module_name, func_name = import_index[target_ea]
|
||||
api_name = f"{module_name}!{func_name}"
|
||||
else:
|
||||
target_func = db.functions.get_at(target_ea)
|
||||
api_name = db.functions.get_name(target_func) if target_func else f"sub_{target_ea:x}"
|
||||
api_name = api_name or f"sub_{target_ea:x}"
|
||||
mf.apis.append(MapaCall(
|
||||
name=api_name,
|
||||
address=target_ea,
|
||||
is_api=True,
|
||||
))
|
||||
else:
|
||||
if target_ea not in func_address_to_order:
|
||||
continue
|
||||
target_order = func_address_to_order[target_ea]
|
||||
delta = target_order - order
|
||||
direction = "↑" if delta < 0 else "↓"
|
||||
target_func = db.functions.get_at(target_ea)
|
||||
target_name = db.functions.get_name(target_func) if target_func else f"sub_{target_ea:x}"
|
||||
if target_ea in assemblage_locations and assemblage_locations[target_ea].name:
|
||||
target_name = assemblage_locations[target_ea].name
|
||||
mf.calls.append(MapaCall(
|
||||
name=target_name or f"sub_{target_ea:x}",
|
||||
address=target_ea,
|
||||
is_api=False,
|
||||
delta=delta,
|
||||
direction=direction,
|
||||
))
|
||||
|
||||
if fc is not None:
|
||||
seen_strings: set[str] = set()
|
||||
fc2 = db.functions.get_flowchart(func, flags=FlowChartFlags.NOEXT | FlowChartFlags.PREDS)
|
||||
if fc2 is not None:
|
||||
for block in fc2:
|
||||
insns = block.get_instructions()
|
||||
if insns is None:
|
||||
continue
|
||||
for insn in insns:
|
||||
s = _find_data_reference_string(db, int(insn.ea), string_index)
|
||||
if s is not None:
|
||||
stripped = s.rstrip()
|
||||
if stripped and stripped not in seen_strings:
|
||||
seen_strings.add(stripped)
|
||||
mf.strings.append(MapaString(value=stripped, address=int(insn.ea)))
|
||||
|
||||
mf.capa_matches = sorted(matches_by_function.get(ea, set()))
|
||||
mapa_functions.append(mf)
|
||||
|
||||
return MapaReport(
|
||||
meta=meta,
|
||||
sections=sections,
|
||||
libraries=libraries,
|
||||
functions=mapa_functions,
|
||||
assemblage_locations=assemblage_locations,
|
||||
)
|
||||
165
mapa/ida_db.py
Normal file
165
mapa/ida_db.py
Normal file
@@ -0,0 +1,165 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import fcntl
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Iterator
|
||||
|
||||
import idapro # must be first: mutates sys.path so ida_auto and ida_domain are importable
|
||||
import ida_auto
|
||||
from ida_domain.database import Database, IdaCommandOptions
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DATABASE_ACCESS_TIMEOUT = 5.0
|
||||
DATABASE_ANALYSIS_TIMEOUT = 120.0
|
||||
DATABASE_POLL_INTERVAL = 0.25
|
||||
|
||||
|
||||
def get_cache_dir() -> Path:
|
||||
xdg = os.environ.get("XDG_CACHE_HOME")
|
||||
if xdg:
|
||||
base = Path(xdg)
|
||||
else:
|
||||
base = Path.home() / ".cache"
|
||||
return base / "mandiant" / "mapa"
|
||||
|
||||
|
||||
def compute_file_hashes(file_path: Path) -> tuple[str, str]:
|
||||
"""Compute (md5, sha256) for a file.
|
||||
|
||||
Raises:
|
||||
OSError: If the file cannot be read.
|
||||
"""
|
||||
md5_digest = hashlib.md5()
|
||||
sha256_digest = hashlib.sha256()
|
||||
with file_path.open("rb") as fh:
|
||||
for chunk in iter(lambda: fh.read(65536), b""):
|
||||
md5_digest.update(chunk)
|
||||
sha256_digest.update(chunk)
|
||||
return md5_digest.hexdigest(), sha256_digest.hexdigest()
|
||||
|
||||
|
||||
def _wait_for_repack(db_path: Path, timeout: float) -> None:
|
||||
nam_path = db_path.with_suffix(".nam")
|
||||
deadline = time.monotonic() + timeout
|
||||
while nam_path.exists():
|
||||
if time.monotonic() >= deadline:
|
||||
raise RuntimeError(
|
||||
f"Database {db_path} appears to be open in another program "
|
||||
f"({nam_path} still exists after {timeout:.0f}s)."
|
||||
)
|
||||
time.sleep(DATABASE_POLL_INTERVAL)
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def database_access_guard(db_path: Path, timeout: float) -> Iterator[None]:
|
||||
"""Advisory guard that serialises access to an IDA database.
|
||||
|
||||
Uses .nam polling + flock on <db>.lock with TOCTOU re-check.
|
||||
|
||||
Raises:
|
||||
RuntimeError: On timeout waiting for the database.
|
||||
"""
|
||||
_wait_for_repack(db_path, timeout)
|
||||
|
||||
lock_path = Path(str(db_path) + ".lock")
|
||||
lock_fd = lock_path.open("w")
|
||||
deadline = time.monotonic() + timeout
|
||||
try:
|
||||
while True:
|
||||
try:
|
||||
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
break
|
||||
except OSError:
|
||||
if time.monotonic() >= deadline:
|
||||
raise RuntimeError(
|
||||
f"Timed out waiting for lock on {db_path} after {timeout:.0f}s."
|
||||
)
|
||||
time.sleep(DATABASE_POLL_INTERVAL)
|
||||
|
||||
_wait_for_repack(db_path, max(0, deadline - time.monotonic()))
|
||||
yield
|
||||
finally:
|
||||
fcntl.flock(lock_fd, fcntl.LOCK_UN)
|
||||
lock_fd.close()
|
||||
|
||||
|
||||
def resolve_database(file_path: Path) -> tuple[Path, str, str]:
|
||||
"""Resolve an input path to an .i64/.idb database path.
|
||||
|
||||
Returns (db_path, md5, sha256). For existing databases, hashes are empty
|
||||
strings (they'll be read from IDA metadata instead).
|
||||
|
||||
Raises:
|
||||
RuntimeError: If analysis or caching fails.
|
||||
"""
|
||||
suffix = file_path.suffix.lower()
|
||||
if suffix in {".i64", ".idb"}:
|
||||
logger.debug("Using existing database: %s", file_path)
|
||||
return file_path, "", ""
|
||||
|
||||
cache_dir = get_cache_dir()
|
||||
cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
md5, sha256 = compute_file_hashes(file_path)
|
||||
cache_path = cache_dir / f"{sha256}.i64"
|
||||
|
||||
if cache_path.exists():
|
||||
logger.debug("Cache hit for %s -> %s", file_path, cache_path)
|
||||
return cache_path, md5, sha256
|
||||
|
||||
logger.debug("Cache miss for %s; analyzing to %s", file_path, cache_path)
|
||||
with database_access_guard(cache_path, timeout=DATABASE_ANALYSIS_TIMEOUT):
|
||||
if cache_path.exists():
|
||||
logger.debug("Cache populated while waiting for lock: %s", cache_path)
|
||||
return cache_path, md5, sha256
|
||||
|
||||
logger.info("Analyzing %s (this may take a moment)...", file_path.name)
|
||||
idapro.enable_console_messages(False)
|
||||
ida_options = IdaCommandOptions(
|
||||
auto_analysis=True,
|
||||
new_database=True,
|
||||
output_database=str(cache_path),
|
||||
load_resources=True,
|
||||
plugin_options="lumina:host=0.0.0.0 -Osecondary_lumina:host=0.0.0.0",
|
||||
)
|
||||
try:
|
||||
with Database.open(str(file_path), ida_options, save_on_close=True):
|
||||
ida_auto.auto_wait()
|
||||
except Exception as exc:
|
||||
raise RuntimeError(f"Analysis failed for {file_path}: {exc}") from exc
|
||||
|
||||
if not cache_path.exists():
|
||||
raise RuntimeError(f"Analysis produced no database for {file_path}")
|
||||
|
||||
logger.debug("Analysis completed: %s", cache_path)
|
||||
return cache_path, md5, sha256
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def open_database_session(db_path: Path, auto_analysis: bool = False) -> Iterator[Database]:
|
||||
"""Open a database session with advisory locking.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If opening fails or the database is locked.
|
||||
"""
|
||||
with database_access_guard(db_path, timeout=DATABASE_ACCESS_TIMEOUT):
|
||||
ida_options = IdaCommandOptions(auto_analysis=auto_analysis, new_database=False)
|
||||
logger.debug("Opening database session: %s (auto_analysis=%s)", db_path, auto_analysis)
|
||||
idapro.enable_console_messages(False)
|
||||
try:
|
||||
database = Database.open(str(db_path), ida_options, save_on_close=False)
|
||||
except Exception as exc:
|
||||
raise RuntimeError(f"Failed to open {db_path}: {exc}") from exc
|
||||
|
||||
with database:
|
||||
if auto_analysis:
|
||||
ida_auto.auto_wait()
|
||||
yield database
|
||||
|
||||
logger.debug("Closed database session: %s", db_path)
|
||||
106
mapa/model.py
Normal file
106
mapa/model.py
Normal file
@@ -0,0 +1,106 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
from dataclasses import field, dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class AssemblageLocation:
|
||||
name: str
|
||||
file: str
|
||||
prototype: str
|
||||
rva: int
|
||||
|
||||
@property
|
||||
def path(self) -> str:
|
||||
if not self.file.endswith(")"):
|
||||
return self.file
|
||||
return self.file.rpartition(" (")[0]
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict[str, Any]) -> AssemblageLocation:
|
||||
return cls(
|
||||
name=data["name"],
|
||||
file=data["file"],
|
||||
prototype=data["prototype"],
|
||||
rva=data["function_start"],
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, doc: str) -> AssemblageLocation:
|
||||
return cls.from_dict(json.loads(doc))
|
||||
|
||||
|
||||
@dataclass
|
||||
class MapaString:
|
||||
value: str
|
||||
address: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class MapaCall:
|
||||
name: str
|
||||
address: int
|
||||
is_api: bool
|
||||
delta: int = 0
|
||||
direction: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class MapaCaller:
|
||||
name: str
|
||||
address: int
|
||||
delta: int = 0
|
||||
direction: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class MapaFunction:
|
||||
address: int
|
||||
name: str
|
||||
is_thunk: bool = False
|
||||
is_library: bool = False
|
||||
num_basic_blocks: int = 0
|
||||
num_edges: int = 0
|
||||
num_instructions: int = 0
|
||||
total_instruction_bytes: int = 0
|
||||
callers: list[MapaCaller] = field(default_factory=list)
|
||||
calls: list[MapaCall] = field(default_factory=list)
|
||||
apis: list[MapaCall] = field(default_factory=list)
|
||||
strings: list[MapaString] = field(default_factory=list)
|
||||
capa_matches: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class MapaSection:
|
||||
address: int
|
||||
size: int
|
||||
perms: str
|
||||
name: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class MapaLibrary:
|
||||
name: str
|
||||
is_static: bool = False
|
||||
load_address: int | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class MapaMeta:
|
||||
name: str
|
||||
sha256: str
|
||||
md5: str = ""
|
||||
arch: str = ""
|
||||
timestamp: str = ""
|
||||
base_address: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class MapaReport:
|
||||
meta: MapaMeta
|
||||
sections: list[MapaSection] = field(default_factory=list)
|
||||
libraries: list[MapaLibrary] = field(default_factory=list)
|
||||
functions: list[MapaFunction] = field(default_factory=list)
|
||||
assemblage_locations: dict[int, AssemblageLocation] = field(default_factory=dict)
|
||||
148
mapa/renderer.py
Normal file
148
mapa/renderer.py
Normal file
@@ -0,0 +1,148 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
|
||||
import rich.padding
|
||||
from rich.text import Text
|
||||
from rich.markup import escape
|
||||
from rich.console import Console
|
||||
|
||||
from mapa.model import MapaReport
|
||||
|
||||
|
||||
class Renderer:
|
||||
def __init__(self, console: Console):
|
||||
self.console: Console = console
|
||||
self.indent: int = 0
|
||||
|
||||
@contextlib.contextmanager
|
||||
def indenting(self):
|
||||
self.indent += 1
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
self.indent -= 1
|
||||
|
||||
@staticmethod
|
||||
def markup(s: str, **kwargs) -> Text:
|
||||
escaped_args = {k: (escape(v) if isinstance(v, str) else v) for k, v in kwargs.items()}
|
||||
return Text.from_markup(s.format(**escaped_args))
|
||||
|
||||
def print(self, renderable, **kwargs):
|
||||
if not kwargs:
|
||||
return self.console.print(rich.padding.Padding(renderable, (0, 0, 0, self.indent * 2)))
|
||||
assert isinstance(renderable, str)
|
||||
return self.print(self.markup(renderable, **kwargs))
|
||||
|
||||
def writeln(self, s: str):
|
||||
self.print(s)
|
||||
|
||||
@contextlib.contextmanager
|
||||
def section(self, name):
|
||||
if isinstance(name, str):
|
||||
self.print("[title]{name}", name=name)
|
||||
elif isinstance(name, Text):
|
||||
name = name.copy()
|
||||
name.stylize_before(self.console.get_style("title"))
|
||||
self.print(name)
|
||||
else:
|
||||
raise ValueError("unexpected section name")
|
||||
with self.indenting():
|
||||
yield
|
||||
|
||||
|
||||
def render_report(report: MapaReport, console: Console) -> None:
|
||||
o = Renderer(console)
|
||||
|
||||
with o.section("meta"):
|
||||
o.writeln(f"name: {report.meta.name}")
|
||||
o.writeln(f"sha256: {report.meta.sha256}")
|
||||
o.writeln(f"arch: {report.meta.arch}")
|
||||
o.writeln(f"ts: {report.meta.timestamp}")
|
||||
|
||||
with o.section("sections"):
|
||||
for section in report.sections:
|
||||
o.writeln(f"- {hex(section.address)} {section.perms} {hex(section.size)}")
|
||||
|
||||
with o.section("libraries"):
|
||||
for lib in report.libraries:
|
||||
static = " (static)" if lib.is_static else ""
|
||||
addr = f" at {hex(lib.load_address)}" if lib.load_address is not None else ""
|
||||
o.writeln(f"- {lib.name:<12s}{static}{addr}")
|
||||
if not report.libraries:
|
||||
o.writeln("(none)")
|
||||
|
||||
func_address_to_order: dict[int, int] = {}
|
||||
for i, func in enumerate(report.functions):
|
||||
func_address_to_order[func.address] = i
|
||||
|
||||
with o.section("functions"):
|
||||
last_address: int | None = None
|
||||
for func in report.functions:
|
||||
if last_address is not None:
|
||||
try:
|
||||
last_path = report.assemblage_locations[last_address].path
|
||||
path = report.assemblage_locations[func.address].path
|
||||
if last_path != path:
|
||||
o.print(o.markup("[blue]~~~~~~~~~~~~~~~~~~~~~~~~~~~~~[/] [title]file[/] {path}\n", path=path))
|
||||
except KeyError:
|
||||
pass
|
||||
last_address = func.address
|
||||
|
||||
if func.is_thunk:
|
||||
with o.section(
|
||||
o.markup(
|
||||
"thunk [default]{function_name}[/] [decoration]@ {function_address}[/]",
|
||||
function_name=func.name,
|
||||
function_address=hex(func.address),
|
||||
)
|
||||
):
|
||||
continue
|
||||
|
||||
with o.section(
|
||||
o.markup(
|
||||
"function [default]{function_name}[/] [decoration]@ {function_address}[/]",
|
||||
function_name=func.name,
|
||||
function_address=hex(func.address),
|
||||
)
|
||||
):
|
||||
if func.is_thunk:
|
||||
o.writeln("")
|
||||
continue
|
||||
|
||||
for caller in func.callers:
|
||||
o.print(
|
||||
"xref: [decoration]{direction}[/] {name} [decoration]({delta:+})[/]",
|
||||
direction=caller.direction,
|
||||
name=caller.name,
|
||||
delta=caller.delta,
|
||||
)
|
||||
|
||||
o.writeln(
|
||||
f"B/E/I: {func.num_basic_blocks} / {func.num_edges} / {func.num_instructions} ({func.total_instruction_bytes} bytes)"
|
||||
)
|
||||
|
||||
for match in func.capa_matches:
|
||||
o.writeln(f"capa: {match}")
|
||||
|
||||
for call in func.calls:
|
||||
o.print(
|
||||
"calls: [decoration]{direction}[/] {name} [decoration]({delta:+})[/]",
|
||||
direction=call.direction,
|
||||
name=call.name,
|
||||
delta=call.delta,
|
||||
)
|
||||
|
||||
for api in func.apis:
|
||||
o.print(
|
||||
"api: {name}",
|
||||
name=api.name,
|
||||
)
|
||||
|
||||
for s in func.strings:
|
||||
o.print(
|
||||
'string: [decoration]"[/]{string}[decoration]"[/]',
|
||||
string=s.value,
|
||||
)
|
||||
|
||||
o.print("")
|
||||
76
mapa/strings.py
Normal file
76
mapa/strings.py
Normal file
@@ -0,0 +1,76 @@
|
||||
# String extraction routines from FLOSS via capa.
|
||||
# https://github.com/mandiant/flare-floss
|
||||
#
|
||||
# Copyright 2020 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from collections.abc import Iterator
|
||||
|
||||
ASCII_BYTE = r" !\"#\$%&\'\(\)\*\+,-\./0123456789:;<=>\?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\}\\\~\t".encode(
|
||||
"ascii"
|
||||
)
|
||||
ASCII_RE_4 = re.compile(b"([%s]{%d,})" % (ASCII_BYTE, 4))
|
||||
UNICODE_RE_4 = re.compile(b"((?:[%s]\x00){%d,})" % (ASCII_BYTE, 4))
|
||||
REPEATS = {ord("A"), 0x00, 0xFE, 0xFF}
|
||||
SLICE_SIZE = 4096
|
||||
|
||||
|
||||
@dataclass
|
||||
class StaticString:
|
||||
s: str
|
||||
offset: int
|
||||
|
||||
|
||||
def _buf_filled_with(buf: bytes, character: int) -> bool:
|
||||
if not buf:
|
||||
return False
|
||||
if len(buf) < SLICE_SIZE:
|
||||
return all(b == character for b in buf)
|
||||
dupe_chunk = bytes(character) * SLICE_SIZE
|
||||
for offset in range(0, len(buf), SLICE_SIZE):
|
||||
current_chunk = buf[offset : offset + SLICE_SIZE]
|
||||
if len(current_chunk) == SLICE_SIZE:
|
||||
if dupe_chunk != current_chunk:
|
||||
return False
|
||||
else:
|
||||
if not all(b == character for b in current_chunk):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def extract_ascii_strings(buf: bytes, n: int = 4) -> Iterator[StaticString]:
|
||||
if not buf:
|
||||
return
|
||||
if (buf[0] in REPEATS) and _buf_filled_with(buf, buf[0]):
|
||||
return
|
||||
r = ASCII_RE_4 if n == 4 else re.compile(b"([%s]{%d,})" % (ASCII_BYTE, n))
|
||||
for match in r.finditer(buf):
|
||||
yield StaticString(match.group().decode("ascii"), match.start())
|
||||
|
||||
|
||||
def extract_unicode_strings(buf: bytes, n: int = 4) -> Iterator[StaticString]:
|
||||
if not buf:
|
||||
return
|
||||
if (buf[0] in REPEATS) and _buf_filled_with(buf, buf[0]):
|
||||
return
|
||||
r = UNICODE_RE_4 if n == 4 else re.compile(b"((?:[%s]\x00){%d,})" % (ASCII_BYTE, n))
|
||||
for match in r.finditer(buf):
|
||||
try:
|
||||
yield StaticString(match.group().decode("utf-16le"), match.start())
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
@@ -144,11 +144,11 @@ dev = [
|
||||
"flake8-simplify==0.30.0",
|
||||
"flake8-use-pathlib==0.3.0",
|
||||
"flake8-copyright==0.2.4",
|
||||
"ruff==0.14.7",
|
||||
"black==25.12.0",
|
||||
"isort==7.0.0",
|
||||
"ruff==0.15.0",
|
||||
"black==26.3.0",
|
||||
"isort==8.0.0",
|
||||
"mypy==1.19.1",
|
||||
"mypy-protobuf==4.0.0",
|
||||
"mypy-protobuf==5.0.0",
|
||||
"PyGithub==2.8.1",
|
||||
"bump-my-version==1.2.4",
|
||||
# type stubs for mypy
|
||||
@@ -165,8 +165,8 @@ build = [
|
||||
# we want all developer environments to be consistent.
|
||||
# These dependencies are not used in production environments
|
||||
# and should not conflict with other libraries/tooling.
|
||||
"pyinstaller==6.17.0",
|
||||
"setuptools==80.9.0",
|
||||
"pyinstaller==6.19.0",
|
||||
"setuptools==80.10.1",
|
||||
"build==1.4.0"
|
||||
]
|
||||
scripts = [
|
||||
|
||||
@@ -10,7 +10,7 @@ annotated-types==0.7.0
|
||||
colorama==0.4.6
|
||||
cxxfilt==0.3.0
|
||||
dncil==1.0.2
|
||||
dnfile==0.17.0
|
||||
dnfile==0.18.0
|
||||
funcy==2.0
|
||||
humanize==4.15.0
|
||||
ida-netnode==3.0
|
||||
@@ -21,11 +21,11 @@ mdurl==0.1.2
|
||||
msgpack==1.0.8
|
||||
networkx==3.4.2
|
||||
pefile==2024.8.26
|
||||
pip==25.3
|
||||
protobuf==6.33.1
|
||||
pip==26.0
|
||||
protobuf==7.34.0
|
||||
pyasn1==0.5.1
|
||||
pyasn1-modules==0.3.0
|
||||
pycparser==2.23
|
||||
pycparser==3.0
|
||||
pydantic==2.12.4
|
||||
# pydantic pins pydantic-core,
|
||||
# but dependabot updates these separately (which is broken) and is annoying,
|
||||
@@ -37,13 +37,13 @@ pygments==2.19.1
|
||||
pyghidra==3.0.0
|
||||
python-flirt==0.9.2
|
||||
pyyaml==6.0.2
|
||||
rich==14.2.0
|
||||
rich==14.3.2
|
||||
ruamel-yaml==0.19.1
|
||||
ruamel-yaml-clib==0.2.14
|
||||
setuptools==80.9.0
|
||||
setuptools==80.10.1
|
||||
six==1.17.0
|
||||
sortedcontainers==2.4.0
|
||||
viv-utils==0.8.0
|
||||
vivisect==1.2.1
|
||||
vivisect==1.3.0
|
||||
msgspec==0.20.0
|
||||
bump-my-version==1.2.4
|
||||
|
||||
2
rules
2
rules
Submodule rules updated: 6a0d506713...03a20f69ae
@@ -61,6 +61,7 @@ usage:
|
||||
parallelism factor
|
||||
--no-mp disable subprocesses
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
import logging
|
||||
|
||||
@@ -28,6 +28,7 @@ Requires:
|
||||
- sarif_om 1.0.4
|
||||
- jschema_to_python 1.2.3
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
import logging
|
||||
|
||||
@@ -32,6 +32,7 @@ Example:
|
||||
│00000070│ 39 31 37 36 61 64 36 38 ┊ 32 66 66 64 64 36 35 66 │9176ad68┊2ffdd65f│
|
||||
│00000080│ 30 61 36 36 39 12 28 61 ┊ 34 62 33 35 64 65 37 31 │0a669•(a┊4b35de71│
|
||||
"""
|
||||
|
||||
import sys
|
||||
import logging
|
||||
import argparse
|
||||
|
||||
@@ -18,6 +18,7 @@ detect-elf-os
|
||||
|
||||
Attempt to detect the underlying OS that the given ELF file targets.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import logging
|
||||
import argparse
|
||||
|
||||
@@ -36,6 +36,7 @@ Check the log window for any errors, and/or the summary of changes.
|
||||
|
||||
Derived from: https://github.com/mandiant/capa/blob/master/scripts/import-to-ida.py
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
@@ -1229,6 +1229,7 @@ def main(argv=None):
|
||||
|
||||
time0 = time.time()
|
||||
|
||||
args.enable_cache = False
|
||||
try:
|
||||
rules = capa.main.get_rules_from_cli(args)
|
||||
except capa.main.ShouldExitError as e:
|
||||
|
||||
15
scripts/mapa.py
Normal file
15
scripts/mapa.py
Normal file
@@ -0,0 +1,15 @@
|
||||
#!/usr/bin/env python
|
||||
# /// script
|
||||
# requires-python = ">=3.12"
|
||||
# dependencies = [
|
||||
# "idapro",
|
||||
# "ida-domain",
|
||||
# "rich",
|
||||
# ]
|
||||
# ///
|
||||
import sys
|
||||
|
||||
from mapa.cli import main
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -54,6 +54,7 @@ Example::
|
||||
0x44cb60: ?
|
||||
0x44cba0: __guard_icall_checks_enforced
|
||||
"""
|
||||
|
||||
import sys
|
||||
import logging
|
||||
import argparse
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
"""
|
||||
Extract files relevant to capa analysis from VMRay Analysis Archive and create a new ZIP file.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import logging
|
||||
import zipfile
|
||||
|
||||
@@ -43,6 +43,7 @@ example:
|
||||
|
||||
^^^ --label or git hash
|
||||
"""
|
||||
|
||||
import sys
|
||||
import timeit
|
||||
import logging
|
||||
|
||||
@@ -34,6 +34,7 @@ Example:
|
||||
│00000080│ 30 61 36 36 39 12 28 61 ┊ 34 62 33 35 64 65 37 31 │0a669•(a┊4b35de71│
|
||||
|
||||
"""
|
||||
|
||||
import sys
|
||||
import logging
|
||||
import argparse
|
||||
|
||||
@@ -37,6 +37,7 @@ Example:
|
||||
────┴────────────────────────────────────────────────────
|
||||
|
||||
"""
|
||||
|
||||
import sys
|
||||
import logging
|
||||
import argparse
|
||||
|
||||
@@ -46,6 +46,7 @@ Example:
|
||||
2022-01-24 22:35:39,839 [INFO] Starting extraction...
|
||||
2022-01-24 22:35:42,632 [INFO] Writing results to linter-data.json
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import argparse
|
||||
|
||||
@@ -54,6 +54,7 @@ Example::
|
||||
- connect TCP socket
|
||||
...
|
||||
"""
|
||||
|
||||
import sys
|
||||
import logging
|
||||
import argparse
|
||||
|
||||
@@ -70,6 +70,7 @@ Example::
|
||||
insn: 0x10001027: mnemonic(shl)
|
||||
...
|
||||
"""
|
||||
|
||||
import sys
|
||||
import logging
|
||||
import argparse
|
||||
|
||||
Submodule tests/data updated: 689960a966...413fd2803e
@@ -20,6 +20,7 @@ from functools import lru_cache
|
||||
|
||||
import pytest
|
||||
|
||||
import capa.loader
|
||||
import capa.features.file
|
||||
import capa.features.insn
|
||||
import capa.features.common
|
||||
|
||||
@@ -458,9 +458,7 @@ def test_pattern_parsing():
|
||||
capture="#int",
|
||||
)
|
||||
|
||||
assert (
|
||||
BinExport2InstructionPatternMatcher.from_str(
|
||||
"""
|
||||
assert BinExport2InstructionPatternMatcher.from_str("""
|
||||
# comment
|
||||
br reg
|
||||
br reg(not-stack)
|
||||
@@ -481,10 +479,7 @@ def test_pattern_parsing():
|
||||
call [reg * #int + #int]
|
||||
call [reg + reg + #int]
|
||||
call [reg + #int]
|
||||
"""
|
||||
).queries
|
||||
is not None
|
||||
)
|
||||
""").queries is not None
|
||||
|
||||
|
||||
def match_address(extractor: BinExport2FeatureExtractor, queries: BinExport2InstructionPatternMatcher, address: int):
|
||||
@@ -507,8 +502,7 @@ def match_address_with_be2(
|
||||
|
||||
|
||||
def test_pattern_matching():
|
||||
queries = BinExport2InstructionPatternMatcher.from_str(
|
||||
"""
|
||||
queries = BinExport2InstructionPatternMatcher.from_str("""
|
||||
br reg(stack) ; capture reg
|
||||
br reg(not-stack) ; capture reg
|
||||
mov reg0, reg1 ; capture reg0
|
||||
@@ -522,8 +516,7 @@ def test_pattern_matching():
|
||||
ldp|stp reg, reg, [reg, #int]! ; capture #int
|
||||
ldp|stp reg, reg, [reg], #int ; capture #int
|
||||
ldrb reg0, [reg1(not-stack), reg2] ; capture reg2
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
# 0x210184: ldrb w2, [x0, x1]
|
||||
# query: ldrb reg0, [reg1(not-stack), reg2] ; capture reg2"
|
||||
@@ -550,11 +543,9 @@ BE2_EXTRACTOR_687 = fixtures.get_binexport_extractor(
|
||||
|
||||
|
||||
def test_pattern_matching_exclamation():
|
||||
queries = BinExport2InstructionPatternMatcher.from_str(
|
||||
"""
|
||||
queries = BinExport2InstructionPatternMatcher.from_str("""
|
||||
stp reg, reg, [reg, #int]! ; capture #int
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
# note this captures the sp
|
||||
# 0x107918: stp x20, x19, [sp,0xFFFFFFFFFFFFFFE0]!
|
||||
@@ -564,11 +555,9 @@ def test_pattern_matching_exclamation():
|
||||
|
||||
|
||||
def test_pattern_matching_stack():
|
||||
queries = BinExport2InstructionPatternMatcher.from_str(
|
||||
"""
|
||||
queries = BinExport2InstructionPatternMatcher.from_str("""
|
||||
stp reg, reg, [reg(stack), #int]! ; capture #int
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
# note this does capture the sp
|
||||
# compare this with the test above (exclamation)
|
||||
@@ -579,11 +568,9 @@ def test_pattern_matching_stack():
|
||||
|
||||
|
||||
def test_pattern_matching_not_stack():
|
||||
queries = BinExport2InstructionPatternMatcher.from_str(
|
||||
"""
|
||||
queries = BinExport2InstructionPatternMatcher.from_str("""
|
||||
stp reg, reg, [reg(not-stack), #int]! ; capture #int
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
# note this does not capture the sp
|
||||
# compare this with the test above (exclamation)
|
||||
@@ -597,11 +584,9 @@ BE2_EXTRACTOR_MIMI = fixtures.get_binexport_extractor(CD / "data" / "binexport2"
|
||||
|
||||
|
||||
def test_pattern_matching_x86():
|
||||
queries = BinExport2InstructionPatternMatcher.from_str(
|
||||
"""
|
||||
queries = BinExport2InstructionPatternMatcher.from_str("""
|
||||
cmp|lea reg, [reg(not-stack) + #int0] ; capture #int0
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
# 0x4018c0: LEA ECX, [EBX+0x2]
|
||||
# query: cmp|lea reg, [reg(not-stack) + #int0] ; capture #int0
|
||||
|
||||
@@ -23,9 +23,7 @@ def test_match_across_scopes_file_function(z9324d_extractor):
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
# this rule should match on a function (0x4073F0)
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: install service
|
||||
@@ -39,13 +37,9 @@ def test_match_across_scopes_file_function(z9324d_extractor):
|
||||
- api: advapi32.OpenSCManagerA
|
||||
- api: advapi32.CreateServiceA
|
||||
- api: advapi32.StartServiceA
|
||||
"""
|
||||
)
|
||||
),
|
||||
""")),
|
||||
# this rule should match on a file feature
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: .text section
|
||||
@@ -56,15 +50,11 @@ def test_match_across_scopes_file_function(z9324d_extractor):
|
||||
- 9324d1a8ae37a36ae560c37448c9705a
|
||||
features:
|
||||
- section: .text
|
||||
"""
|
||||
)
|
||||
),
|
||||
""")),
|
||||
# this rule should match on earlier rule matches:
|
||||
# - install service, with function scope
|
||||
# - .text section, with file scope
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: .text section and install service
|
||||
@@ -77,9 +67,7 @@ def test_match_across_scopes_file_function(z9324d_extractor):
|
||||
- and:
|
||||
- match: install service
|
||||
- match: .text section
|
||||
"""
|
||||
)
|
||||
),
|
||||
""")),
|
||||
]
|
||||
)
|
||||
capabilities = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
|
||||
@@ -92,9 +80,7 @@ def test_match_across_scopes(z9324d_extractor):
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
# this rule should match on a basic block (including at least 0x403685)
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: tight loop
|
||||
@@ -105,14 +91,10 @@ def test_match_across_scopes(z9324d_extractor):
|
||||
- 9324d1a8ae37a36ae560c37448c9705a:0x403685
|
||||
features:
|
||||
- characteristic: tight loop
|
||||
"""
|
||||
)
|
||||
),
|
||||
""")),
|
||||
# this rule should match on a function (0x403660)
|
||||
# based on API, as well as prior basic block rule match
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: kill thread loop
|
||||
@@ -126,13 +108,9 @@ def test_match_across_scopes(z9324d_extractor):
|
||||
- api: kernel32.TerminateThread
|
||||
- api: kernel32.CloseHandle
|
||||
- match: tight loop
|
||||
"""
|
||||
)
|
||||
),
|
||||
""")),
|
||||
# this rule should match on a file feature and a prior function rule match
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: kill thread program
|
||||
@@ -145,9 +123,7 @@ def test_match_across_scopes(z9324d_extractor):
|
||||
- and:
|
||||
- section: .text
|
||||
- match: kill thread loop
|
||||
"""
|
||||
)
|
||||
),
|
||||
""")),
|
||||
]
|
||||
)
|
||||
capabilities = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
|
||||
@@ -157,11 +133,7 @@ def test_match_across_scopes(z9324d_extractor):
|
||||
|
||||
|
||||
def test_subscope_bb_rules(z9324d_extractor):
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -172,22 +144,14 @@ def test_subscope_bb_rules(z9324d_extractor):
|
||||
- and:
|
||||
- basic block:
|
||||
- characteristic: tight loop
|
||||
"""
|
||||
)
|
||||
)
|
||||
]
|
||||
)
|
||||
"""))])
|
||||
# tight loop at 0x403685
|
||||
capabilities = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
|
||||
assert "test rule" in capabilities.matches
|
||||
|
||||
|
||||
def test_match_specific_functions(z9324d_extractor):
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: receive data
|
||||
@@ -199,11 +163,7 @@ def test_match_specific_functions(z9324d_extractor):
|
||||
features:
|
||||
- or:
|
||||
- api: recv
|
||||
"""
|
||||
)
|
||||
)
|
||||
]
|
||||
)
|
||||
"""))])
|
||||
extractor = FunctionFilter(z9324d_extractor, {0x4019C0})
|
||||
capabilities = capa.capabilities.common.find_capabilities(rules, extractor)
|
||||
matches = capabilities.matches["receive data"]
|
||||
@@ -214,11 +174,7 @@ def test_match_specific_functions(z9324d_extractor):
|
||||
|
||||
|
||||
def test_byte_matching(z9324d_extractor):
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: byte match test
|
||||
@@ -228,21 +184,13 @@ def test_byte_matching(z9324d_extractor):
|
||||
features:
|
||||
- and:
|
||||
- bytes: ED 24 9E F4 52 A9 07 47 55 8E E1 AB 30 8E 23 61
|
||||
"""
|
||||
)
|
||||
)
|
||||
]
|
||||
)
|
||||
"""))])
|
||||
capabilities = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
|
||||
assert "byte match test" in capabilities.matches
|
||||
|
||||
|
||||
def test_com_feature_matching(z395eb_extractor):
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: initialize IWebBrowser2
|
||||
@@ -254,21 +202,13 @@ def test_com_feature_matching(z395eb_extractor):
|
||||
- api: ole32.CoCreateInstance
|
||||
- com/class: InternetExplorer #bytes: 01 DF 02 00 00 00 00 00 C0 00 00 00 00 00 00 46 = CLSID_InternetExplorer
|
||||
- com/interface: IWebBrowser2 #bytes: 61 16 0C D3 AF CD D0 11 8A 3E 00 C0 4F C9 E2 6E = IID_IWebBrowser2
|
||||
"""
|
||||
)
|
||||
)
|
||||
]
|
||||
)
|
||||
"""))])
|
||||
capabilities = capa.main.find_capabilities(rules, z395eb_extractor)
|
||||
assert "initialize IWebBrowser2" in capabilities.matches
|
||||
|
||||
|
||||
def test_count_bb(z9324d_extractor):
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: count bb
|
||||
@@ -279,22 +219,14 @@ def test_count_bb(z9324d_extractor):
|
||||
features:
|
||||
- and:
|
||||
- count(basic blocks): 1 or more
|
||||
"""
|
||||
)
|
||||
)
|
||||
]
|
||||
)
|
||||
"""))])
|
||||
capabilities = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
|
||||
assert "count bb" in capabilities.matches
|
||||
|
||||
|
||||
def test_instruction_scope(z9324d_extractor):
|
||||
# .text:004071A4 68 E8 03 00 00 push 3E8h
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: push 1000
|
||||
@@ -306,11 +238,7 @@ def test_instruction_scope(z9324d_extractor):
|
||||
- and:
|
||||
- mnemonic: push
|
||||
- number: 1000
|
||||
"""
|
||||
)
|
||||
)
|
||||
]
|
||||
)
|
||||
"""))])
|
||||
capabilities = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
|
||||
assert "push 1000" in capabilities.matches
|
||||
assert 0x4071A4 in {result[0] for result in capabilities.matches["push 1000"]}
|
||||
@@ -320,11 +248,7 @@ def test_instruction_subscope(z9324d_extractor):
|
||||
# .text:00406F60 sub_406F60 proc near
|
||||
# [...]
|
||||
# .text:004071A4 68 E8 03 00 00 push 3E8h
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: push 1000 on i386
|
||||
@@ -338,11 +262,7 @@ def test_instruction_subscope(z9324d_extractor):
|
||||
- instruction:
|
||||
- mnemonic: push
|
||||
- number: 1000
|
||||
"""
|
||||
)
|
||||
)
|
||||
]
|
||||
)
|
||||
"""))])
|
||||
capabilities = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
|
||||
assert "push 1000 on i386" in capabilities.matches
|
||||
assert 0x406F60 in {result[0] for result in capabilities.matches["push 1000 on i386"]}
|
||||
|
||||
@@ -81,8 +81,7 @@ def test_cape_extractor(version: str, filename: str, exception: Type[BaseExcepti
|
||||
|
||||
|
||||
def test_cape_model_argument():
|
||||
call = Call.model_validate_json(
|
||||
"""
|
||||
call = Call.model_validate_json("""
|
||||
{
|
||||
"timestamp": "2023-10-20 12:30:14,015",
|
||||
"thread_id": "2380",
|
||||
@@ -105,7 +104,6 @@ def test_cape_model_argument():
|
||||
"repeated": 19,
|
||||
"id": 0
|
||||
}
|
||||
"""
|
||||
)
|
||||
""")
|
||||
assert call.arguments[0].value == 30
|
||||
assert call.arguments[1].value == 0x30
|
||||
|
||||
@@ -18,8 +18,7 @@ from capa.features.extractors.drakvuf.models import SystemCall
|
||||
|
||||
|
||||
def test_syscall_argument_construction():
|
||||
call_dictionary = json.loads(
|
||||
r"""
|
||||
call_dictionary = json.loads(r"""
|
||||
{
|
||||
"Plugin": "syscall",
|
||||
"TimeStamp": "1716999134.581449",
|
||||
@@ -43,8 +42,7 @@ def test_syscall_argument_construction():
|
||||
"Timeout": "0xfffff506a02846d8",
|
||||
"Alertable": "0x0"
|
||||
}
|
||||
"""
|
||||
)
|
||||
""")
|
||||
call = SystemCall(**call_dictionary)
|
||||
assert len(call.arguments) == call.nargs
|
||||
assert call.arguments["IoCompletionHandle"] == "0xffffffff80001ac0"
|
||||
|
||||
@@ -83,8 +83,7 @@ def get_call_ids(matches) -> Iterator[int]:
|
||||
def test_dynamic_call_scope():
|
||||
extractor = get_0000a657_thread3064()
|
||||
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -93,8 +92,7 @@ def test_dynamic_call_scope():
|
||||
dynamic: call
|
||||
features:
|
||||
- api: GetSystemTimeAsFileTime
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
ruleset = capa.rules.RuleSet([r])
|
||||
@@ -116,8 +114,7 @@ def test_dynamic_call_scope():
|
||||
def test_dynamic_span_scope():
|
||||
extractor = get_0000a657_thread3064()
|
||||
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -131,8 +128,7 @@ def test_dynamic_span_scope():
|
||||
- api: LdrGetDllHandle
|
||||
- api: LdrGetProcedureAddress
|
||||
- count(api(LdrGetDllHandle)): 2
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
ruleset = capa.rules.RuleSet([r])
|
||||
@@ -158,8 +154,7 @@ def test_dynamic_span_scope():
|
||||
def test_dynamic_span_scope_length():
|
||||
extractor = get_0000a657_thread3064()
|
||||
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -170,8 +165,7 @@ def test_dynamic_span_scope_length():
|
||||
- and:
|
||||
- api: GetSystemTimeAsFileTime
|
||||
- api: RtlAddVectoredExceptionHandler
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
ruleset = capa.rules.RuleSet([r])
|
||||
@@ -196,8 +190,7 @@ def test_dynamic_span_scope_length():
|
||||
def test_dynamic_span_call_subscope():
|
||||
extractor = get_0000a657_thread3064()
|
||||
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -210,8 +203,7 @@ def test_dynamic_span_call_subscope():
|
||||
- and:
|
||||
- api: LdrGetProcedureAddress
|
||||
- string: AddVectoredExceptionHandler
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
ruleset = capa.rules.RuleSet([r])
|
||||
@@ -234,8 +226,7 @@ def test_dynamic_span_call_subscope():
|
||||
def test_dynamic_span_scope_span_subscope():
|
||||
extractor = get_0000a657_thread3064()
|
||||
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -256,8 +247,7 @@ def test_dynamic_span_scope_span_subscope():
|
||||
- api: LdrGetDllHandle
|
||||
- api: LdrGetProcedureAddress
|
||||
- string: RemoveVectoredExceptionHandler
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
ruleset = capa.rules.RuleSet([r])
|
||||
@@ -269,8 +259,7 @@ def test_dynamic_span_scope_span_subscope():
|
||||
|
||||
# show that you can't use thread subscope in span rules.
|
||||
def test_dynamic_span_scope_thread_subscope():
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -281,8 +270,7 @@ def test_dynamic_span_scope_thread_subscope():
|
||||
- and:
|
||||
- thread:
|
||||
- string: "foo"
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
with pytest.raises(capa.rules.InvalidRule):
|
||||
capa.rules.Rule.from_yaml(rule)
|
||||
@@ -300,8 +288,7 @@ def test_dynamic_span_scope_thread_subscope():
|
||||
def test_dynamic_span_example():
|
||||
extractor = get_0000a657_thread3064()
|
||||
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -319,8 +306,7 @@ def test_dynamic_span_example():
|
||||
- api: LdrGetProcedureAddress
|
||||
- string: "AddVectoredExceptionHandler"
|
||||
- api: RtlAddVectoredExceptionHandler
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
ruleset = capa.rules.RuleSet([r])
|
||||
@@ -345,8 +331,7 @@ def test_dynamic_span_example():
|
||||
def test_dynamic_span_multiple_spans_overlapping_single_event():
|
||||
extractor = get_0000a657_thread3064()
|
||||
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -359,8 +344,7 @@ def test_dynamic_span_multiple_spans_overlapping_single_event():
|
||||
- and:
|
||||
- api: LdrGetProcedureAddress
|
||||
- string: "AddVectoredExceptionHandler"
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
ruleset = capa.rules.RuleSet([r])
|
||||
@@ -386,9 +370,7 @@ def test_dynamic_span_scope_match_statements():
|
||||
|
||||
ruleset = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: resolve add VEH
|
||||
@@ -401,12 +383,8 @@ def test_dynamic_span_scope_match_statements():
|
||||
- api: LdrGetDllHandle
|
||||
- api: LdrGetProcedureAddress
|
||||
- string: AddVectoredExceptionHandler
|
||||
"""
|
||||
)
|
||||
),
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
""")),
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: resolve remove VEH
|
||||
@@ -419,12 +397,8 @@ def test_dynamic_span_scope_match_statements():
|
||||
- api: LdrGetDllHandle
|
||||
- api: LdrGetProcedureAddress
|
||||
- string: RemoveVectoredExceptionHandler
|
||||
"""
|
||||
)
|
||||
),
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
""")),
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: resolve add and remove VEH
|
||||
@@ -435,12 +409,8 @@ def test_dynamic_span_scope_match_statements():
|
||||
- and:
|
||||
- match: resolve add VEH
|
||||
- match: resolve remove VEH
|
||||
"""
|
||||
)
|
||||
),
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
""")),
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: has VEH runtime linking
|
||||
@@ -450,9 +420,7 @@ def test_dynamic_span_scope_match_statements():
|
||||
features:
|
||||
- and:
|
||||
- match: linking/runtime-linking/veh
|
||||
"""
|
||||
)
|
||||
),
|
||||
""")),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@@ -17,8 +17,7 @@ import textwrap
|
||||
|
||||
import capa.rules
|
||||
|
||||
EXPECTED = textwrap.dedent(
|
||||
"""\
|
||||
EXPECTED = textwrap.dedent("""\
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -34,13 +33,11 @@ EXPECTED = textwrap.dedent(
|
||||
- and:
|
||||
- number: 1
|
||||
- number: 2
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
|
||||
def test_rule_reformat_top_level_elements():
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule = textwrap.dedent("""
|
||||
rule:
|
||||
features:
|
||||
- and:
|
||||
@@ -56,15 +53,13 @@ def test_rule_reformat_top_level_elements():
|
||||
examples:
|
||||
- foo1234
|
||||
- bar5678
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
assert capa.rules.Rule.from_yaml(rule).to_yaml() == EXPECTED
|
||||
|
||||
|
||||
def test_rule_reformat_indentation():
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -80,15 +75,13 @@ def test_rule_reformat_indentation():
|
||||
- and:
|
||||
- number: 1
|
||||
- number: 2
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
assert capa.rules.Rule.from_yaml(rule).to_yaml() == EXPECTED
|
||||
|
||||
|
||||
def test_rule_reformat_order():
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
authors:
|
||||
@@ -104,8 +97,7 @@ def test_rule_reformat_order():
|
||||
- and:
|
||||
- number: 1
|
||||
- number: 2
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
assert capa.rules.Rule.from_yaml(rule).to_yaml() == EXPECTED
|
||||
|
||||
@@ -113,8 +105,7 @@ def test_rule_reformat_order():
|
||||
def test_rule_reformat_meta_update():
|
||||
# test updating the rule content after parsing
|
||||
|
||||
src = textwrap.dedent(
|
||||
"""
|
||||
src = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
authors:
|
||||
@@ -130,8 +121,7 @@ def test_rule_reformat_meta_update():
|
||||
- and:
|
||||
- number: 1
|
||||
- number: 2
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
rule = capa.rules.Rule.from_yaml(src)
|
||||
rule.name = "test rule"
|
||||
@@ -141,8 +131,7 @@ def test_rule_reformat_meta_update():
|
||||
def test_rule_reformat_string_description():
|
||||
# the `description` should be aligned with the preceding feature name.
|
||||
# see #263
|
||||
src = textwrap.dedent(
|
||||
"""
|
||||
src = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -155,8 +144,7 @@ def test_rule_reformat_string_description():
|
||||
- and:
|
||||
- string: foo
|
||||
description: bar
|
||||
"""
|
||||
).lstrip()
|
||||
""").lstrip()
|
||||
|
||||
rule = capa.rules.Rule.from_yaml(src)
|
||||
assert rule.to_yaml() == src
|
||||
|
||||
@@ -108,9 +108,7 @@ def test_null_feature_extractor():
|
||||
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: create file
|
||||
@@ -120,9 +118,7 @@ def test_null_feature_extractor():
|
||||
features:
|
||||
- and:
|
||||
- api: CreateFile
|
||||
"""
|
||||
)
|
||||
),
|
||||
""")),
|
||||
]
|
||||
)
|
||||
capabilities = capa.main.find_capabilities(rules, EXTRACTOR)
|
||||
|
||||
@@ -88,9 +88,7 @@ def test_null_feature_extractor():
|
||||
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: xor loop
|
||||
@@ -102,9 +100,7 @@ def test_null_feature_extractor():
|
||||
- characteristic: tight loop
|
||||
- mnemonic: xor
|
||||
- characteristic: nzxor
|
||||
"""
|
||||
)
|
||||
),
|
||||
""")),
|
||||
]
|
||||
)
|
||||
capabilities = capa.main.find_capabilities(rules, EXTRACTOR)
|
||||
|
||||
@@ -1,187 +0,0 @@
|
||||
# Copyright 2020 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
run this script from within IDA to test the IDA feature extractor.
|
||||
you must have loaded a file referenced by a test case in order
|
||||
for this to do anything meaningful. for example, mimikatz.exe from testfiles.
|
||||
|
||||
you can invoke from the command line like this:
|
||||
|
||||
& 'C:\\Program Files\\IDA Pro 8.2\\idat.exe' \
|
||||
-S"C:\\Exclusions\\code\\capa\\tests\\test_ida_features.py --CAPA_AUTOEXIT=true" \
|
||||
-A \
|
||||
-Lidalog \
|
||||
'C:\\Exclusions\\code\\capa\\tests\\data\\mimikatz.exe_'
|
||||
|
||||
if you invoke from the command line, and provide the script argument `--CAPA_AUTOEXIT=true`,
|
||||
then the script will exit IDA after running the tests.
|
||||
|
||||
the output (in idalog) will look like this:
|
||||
|
||||
```
|
||||
Loading processor module C:\\Program Files\\IDA Pro 8.2\\procs\\pc.dll for metapc...Initializing processor module metapc...OK
|
||||
Loading type libraries...
|
||||
Autoanalysis subsystem has been initialized.
|
||||
Database for file 'mimikatz.exe_' has been loaded.
|
||||
--------------------------------------------------------------------------------
|
||||
PASS: test_ida_feature_counts/mimikatz-function=0x40E5C2-basic block-7
|
||||
PASS: test_ida_feature_counts/mimikatz-function=0x4702FD-characteristic(calls from)-0
|
||||
SKIP: test_ida_features/294b8d...-function=0x404970,bb=0x404970,insn=0x40499F-string(\r\n\x00:ht)-False
|
||||
SKIP: test_ida_features/64d9f-function=0x10001510,bb=0x100015B0-offset(0x4000)-True
|
||||
...
|
||||
SKIP: test_ida_features/pma16-01-function=0x404356,bb=0x4043B9-arch(i386)-True
|
||||
PASS: test_ida_features/mimikatz-file-import(cabinet.FCIAddFile)-True
|
||||
DONE
|
||||
C:\\Exclusions\\code\\capa\\tests\\test_ida_features.py: Traceback (most recent call last):
|
||||
File "C:\\Program Files\\IDA Pro 8.2\\python\\3\\ida_idaapi.py", line 588, in IDAPython_ExecScript
|
||||
exec(code, g)
|
||||
File "C:/Exclusions/code/capa/tests/test_ida_features.py", line 120, in <module>
|
||||
sys.exit(0)
|
||||
SystemExit: 0
|
||||
-> OK
|
||||
Flushing buffers, please wait...ok
|
||||
```
|
||||
|
||||
Look for lines that start with "FAIL" to identify test failures.
|
||||
"""
|
||||
import io
|
||||
import sys
|
||||
import inspect
|
||||
import logging
|
||||
import traceback
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
try:
|
||||
sys.path.append(str(Path(__file__).parent))
|
||||
import fixtures
|
||||
finally:
|
||||
sys.path.pop()
|
||||
|
||||
|
||||
logger = logging.getLogger("test_ida_features")
|
||||
|
||||
|
||||
def check_input_file(wanted):
|
||||
import idautils
|
||||
|
||||
# some versions (7.4) of IDA return a truncated version of the MD5.
|
||||
# https://github.com/idapython/bin/issues/11
|
||||
try:
|
||||
found = idautils.GetInputFileMD5()[:31].decode("ascii").lower()
|
||||
except UnicodeDecodeError:
|
||||
# in IDA 7.5 or so, GetInputFileMD5 started returning raw binary
|
||||
# rather than the hex digest
|
||||
found = bytes.hex(idautils.GetInputFileMD5()[:15]).lower()
|
||||
|
||||
if not wanted.startswith(found):
|
||||
raise RuntimeError(f"please run the tests against sample with MD5: `{wanted}`")
|
||||
|
||||
|
||||
def get_ida_extractor(_path):
|
||||
# have to import this inline so pytest doesn't bail outside of IDA
|
||||
import capa.features.extractors.ida.extractor
|
||||
|
||||
return capa.features.extractors.ida.extractor.IdaFeatureExtractor()
|
||||
|
||||
|
||||
def nocollect(f):
|
||||
"don't collect the decorated function as a pytest test"
|
||||
f.__test__ = False
|
||||
return f
|
||||
|
||||
|
||||
# although these look like pytest tests, they're not, because they don't run within pytest
|
||||
# (the runner is below) and they use `yield`, which is deprecated.
|
||||
@nocollect
|
||||
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
||||
def test_ida_features():
|
||||
# we're guaranteed to be in a function here, so there's a stack frame
|
||||
this_name = inspect.currentframe().f_code.co_name # type: ignore
|
||||
for sample, scope, feature, expected in fixtures.FEATURE_PRESENCE_TESTS + fixtures.FEATURE_PRESENCE_TESTS_IDA:
|
||||
id = fixtures.make_test_id((sample, scope, feature, expected))
|
||||
|
||||
try:
|
||||
check_input_file(fixtures.get_sample_md5_by_name(sample))
|
||||
except RuntimeError:
|
||||
yield this_name, id, "skip", None
|
||||
continue
|
||||
|
||||
scope = fixtures.resolve_scope(scope)
|
||||
sample = fixtures.resolve_sample(sample)
|
||||
|
||||
try:
|
||||
fixtures.do_test_feature_presence(get_ida_extractor, sample, scope, feature, expected)
|
||||
except Exception:
|
||||
f = io.StringIO()
|
||||
traceback.print_exc(file=f)
|
||||
yield this_name, id, "fail", f.getvalue()
|
||||
else:
|
||||
yield this_name, id, "pass", None
|
||||
|
||||
|
||||
@nocollect
|
||||
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
||||
def test_ida_feature_counts():
|
||||
# we're guaranteed to be in a function here, so there's a stack frame
|
||||
this_name = inspect.currentframe().f_code.co_name # type: ignore
|
||||
for sample, scope, feature, expected in fixtures.FEATURE_COUNT_TESTS:
|
||||
id = fixtures.make_test_id((sample, scope, feature, expected))
|
||||
|
||||
try:
|
||||
check_input_file(fixtures.get_sample_md5_by_name(sample))
|
||||
except RuntimeError:
|
||||
yield this_name, id, "skip", None
|
||||
continue
|
||||
|
||||
scope = fixtures.resolve_scope(scope)
|
||||
sample = fixtures.resolve_sample(sample)
|
||||
|
||||
try:
|
||||
fixtures.do_test_feature_count(get_ida_extractor, sample, scope, feature, expected)
|
||||
except Exception:
|
||||
f = io.StringIO()
|
||||
traceback.print_exc(file=f)
|
||||
yield this_name, id, "fail", f.getvalue()
|
||||
else:
|
||||
yield this_name, id, "pass", None
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import idc
|
||||
import ida_auto
|
||||
|
||||
ida_auto.auto_wait()
|
||||
|
||||
print("-" * 80)
|
||||
|
||||
# invoke all functions in this module that start with `test_`
|
||||
for name in dir(sys.modules[__name__]):
|
||||
if not name.startswith("test_"):
|
||||
continue
|
||||
|
||||
test = getattr(sys.modules[__name__], name)
|
||||
logger.debug("invoking test: %s", name)
|
||||
sys.stderr.flush()
|
||||
for name, id, state, info in test():
|
||||
print(f"{state.upper()}: {name}/{id}")
|
||||
if info:
|
||||
print(info)
|
||||
|
||||
print("DONE")
|
||||
|
||||
if "--CAPA_AUTOEXIT=true" in idc.ARGV:
|
||||
sys.exit(0)
|
||||
60
tests/test_loader_segfault.py
Normal file
60
tests/test_loader_segfault.py
Normal file
@@ -0,0 +1,60 @@
|
||||
# Copyright 2025 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
import envi.exc
|
||||
|
||||
from capa.loader import CorruptFile, get_workspace
|
||||
from capa.features.common import FORMAT_PE, FORMAT_ELF
|
||||
|
||||
|
||||
def test_segmentation_violation_handling():
|
||||
"""
|
||||
Test that SegmentationViolation from vivisect is caught and
|
||||
converted to a CorruptFile exception.
|
||||
|
||||
See #2794.
|
||||
"""
|
||||
fake_path = Path("/tmp/fake_malformed.elf")
|
||||
|
||||
with patch("viv_utils.getWorkspace") as mock_workspace:
|
||||
mock_workspace.side_effect = envi.exc.SegmentationViolation(
|
||||
0x30A4B8BD60,
|
||||
)
|
||||
|
||||
with pytest.raises(CorruptFile, match="Invalid memory access"):
|
||||
get_workspace(fake_path, FORMAT_ELF, [])
|
||||
|
||||
|
||||
def test_corrupt_pe_with_unrealistic_section_size_short_circuits():
|
||||
"""
|
||||
Test that a PE with an unrealistically large section virtual size
|
||||
is caught early and raises CorruptFile before vivisect is invoked.
|
||||
|
||||
See #1989.
|
||||
"""
|
||||
fake_path = Path("/tmp/fake_corrupt.exe")
|
||||
|
||||
with (
|
||||
patch("capa.loader._is_probably_corrupt_pe", return_value=True),
|
||||
patch("viv_utils.getWorkspace") as mock_workspace,
|
||||
):
|
||||
with pytest.raises(CorruptFile, match="unrealistically large sections"):
|
||||
get_workspace(fake_path, FORMAT_PE, [])
|
||||
|
||||
# vivisect should never have been called
|
||||
mock_workspace.assert_not_called()
|
||||
@@ -38,8 +38,7 @@ def test_main(z9324d_extractor):
|
||||
|
||||
def test_main_single_rule(z9324d_extractor, tmpdir):
|
||||
# tests a single rule can be loaded successfully
|
||||
RULE_CONTENT = textwrap.dedent(
|
||||
"""
|
||||
RULE_CONTENT = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -50,8 +49,7 @@ def test_main_single_rule(z9324d_extractor, tmpdir):
|
||||
- test
|
||||
features:
|
||||
- string: test
|
||||
"""
|
||||
)
|
||||
""")
|
||||
path = z9324d_extractor.path
|
||||
rule_file = tmpdir.mkdir("capa").join("rule.yml")
|
||||
rule_file.write(RULE_CONTENT)
|
||||
@@ -100,9 +98,7 @@ def test_main_shellcode(z499c2_extractor):
|
||||
def test_ruleset():
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: file rule
|
||||
@@ -111,12 +107,8 @@ def test_ruleset():
|
||||
dynamic: process
|
||||
features:
|
||||
- characteristic: embedded pe
|
||||
"""
|
||||
)
|
||||
),
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
""")),
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: function rule
|
||||
@@ -125,12 +117,8 @@ def test_ruleset():
|
||||
dynamic: process
|
||||
features:
|
||||
- characteristic: tight loop
|
||||
"""
|
||||
)
|
||||
),
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
""")),
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: basic block rule
|
||||
@@ -139,12 +127,8 @@ def test_ruleset():
|
||||
dynamic: process
|
||||
features:
|
||||
- characteristic: nzxor
|
||||
"""
|
||||
)
|
||||
),
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
""")),
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: process rule
|
||||
@@ -153,12 +137,8 @@ def test_ruleset():
|
||||
dynamic: process
|
||||
features:
|
||||
- string: "explorer.exe"
|
||||
"""
|
||||
)
|
||||
),
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
""")),
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: thread rule
|
||||
@@ -167,12 +147,8 @@ def test_ruleset():
|
||||
dynamic: thread
|
||||
features:
|
||||
- api: RegDeleteKey
|
||||
"""
|
||||
)
|
||||
),
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
""")),
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test call subscope
|
||||
@@ -184,12 +160,8 @@ def test_ruleset():
|
||||
- string: "explorer.exe"
|
||||
- call:
|
||||
- api: HttpOpenRequestW
|
||||
"""
|
||||
)
|
||||
),
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
""")),
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -207,9 +179,7 @@ def test_ruleset():
|
||||
- number: 6 = IPPROTO_TCP
|
||||
- number: 1 = SOCK_STREAM
|
||||
- number: 2 = AF_INET
|
||||
"""
|
||||
)
|
||||
),
|
||||
""")),
|
||||
]
|
||||
)
|
||||
assert len(rules.file_rules) == 2
|
||||
@@ -322,9 +292,7 @@ def test_main_cape1(tmp_path):
|
||||
# https://github.com/mandiant/capa/pull/1696
|
||||
rules = tmp_path / "rules"
|
||||
rules.mkdir()
|
||||
(rules / "create-or-open-registry-key.yml").write_text(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
(rules / "create-or-open-registry-key.yml").write_text(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: create or open registry key
|
||||
@@ -354,9 +322,7 @@ def test_main_cape1(tmp_path):
|
||||
- api: SHRegOpenUSKey
|
||||
- api: SHRegCreateUSKey
|
||||
- api: RtlCreateRegistryKey
|
||||
"""
|
||||
)
|
||||
)
|
||||
"""))
|
||||
|
||||
assert capa.main.main([str(path), "-r", str(rules)]) == 0
|
||||
assert capa.main.main([str(path), "-q", "-r", str(rules)]) == 0
|
||||
|
||||
188
tests/test_mapa.py
Normal file
188
tests/test_mapa.py
Normal file
@@ -0,0 +1,188 @@
|
||||
import json
|
||||
from io import StringIO
|
||||
|
||||
from rich.theme import Theme
|
||||
from rich.console import Console
|
||||
|
||||
from mapa.model import (
|
||||
AssemblageLocation,
|
||||
MapaCall,
|
||||
MapaCaller,
|
||||
MapaFunction,
|
||||
MapaLibrary,
|
||||
MapaMeta,
|
||||
MapaReport,
|
||||
MapaSection,
|
||||
MapaString,
|
||||
)
|
||||
from mapa.renderer import render_report
|
||||
|
||||
|
||||
class TestAssemblageLocation:
|
||||
def test_from_dict(self):
|
||||
data = {
|
||||
"name": "foo",
|
||||
"file": "src/main.c (line 42)",
|
||||
"prototype": "int foo(void)",
|
||||
"function_start": 0x1000,
|
||||
}
|
||||
loc = AssemblageLocation.from_dict(data)
|
||||
assert loc.name == "foo"
|
||||
assert loc.rva == 0x1000
|
||||
assert loc.path == "src/main.c"
|
||||
|
||||
def test_path_no_parens(self):
|
||||
loc = AssemblageLocation(name="bar", file="src/bar.c", prototype="", rva=0)
|
||||
assert loc.path == "src/bar.c"
|
||||
|
||||
def test_from_json(self):
|
||||
line = json.dumps({
|
||||
"name": "baz",
|
||||
"file": "lib.c",
|
||||
"prototype": "void baz()",
|
||||
"function_start": 0x2000,
|
||||
})
|
||||
loc = AssemblageLocation.from_json(line)
|
||||
assert loc.name == "baz"
|
||||
assert loc.rva == 0x2000
|
||||
|
||||
|
||||
class TestRenderer:
|
||||
@staticmethod
|
||||
def _make_console() -> tuple[Console, StringIO]:
|
||||
buf = StringIO()
|
||||
theme = Theme(
|
||||
{
|
||||
"decoration": "grey54",
|
||||
"title": "yellow",
|
||||
"key": "black",
|
||||
"value": "blue",
|
||||
"default": "black",
|
||||
},
|
||||
inherit=False,
|
||||
)
|
||||
console = Console(
|
||||
theme=theme,
|
||||
markup=False,
|
||||
emoji=False,
|
||||
file=buf,
|
||||
force_terminal=False,
|
||||
width=120,
|
||||
no_color=True,
|
||||
)
|
||||
return console, buf
|
||||
|
||||
def test_meta_section(self):
|
||||
report = MapaReport(
|
||||
meta=MapaMeta(
|
||||
name="test.exe",
|
||||
sha256="abc123",
|
||||
arch="x86_64",
|
||||
timestamp="2025-01-01T00:00:00",
|
||||
),
|
||||
)
|
||||
console, buf = self._make_console()
|
||||
render_report(report, console)
|
||||
output = buf.getvalue()
|
||||
assert "test.exe" in output
|
||||
assert "abc123" in output
|
||||
assert "x86_64" in output
|
||||
|
||||
def test_sections_rendered(self):
|
||||
report = MapaReport(
|
||||
meta=MapaMeta(name="t", sha256="s"),
|
||||
sections=[
|
||||
MapaSection(address=0x1000, size=0x2000, perms="r-x"),
|
||||
],
|
||||
)
|
||||
console, buf = self._make_console()
|
||||
render_report(report, console)
|
||||
output = buf.getvalue()
|
||||
assert "0x1000" in output
|
||||
assert "r-x" in output
|
||||
assert "0x2000" in output
|
||||
|
||||
def test_libraries_rendered(self):
|
||||
report = MapaReport(
|
||||
meta=MapaMeta(name="t", sha256="s"),
|
||||
libraries=[MapaLibrary(name="KERNEL32.dll")],
|
||||
)
|
||||
console, buf = self._make_console()
|
||||
render_report(report, console)
|
||||
output = buf.getvalue()
|
||||
assert "KERNEL32.dll" in output
|
||||
|
||||
def test_empty_libraries(self):
|
||||
report = MapaReport(
|
||||
meta=MapaMeta(name="t", sha256="s"),
|
||||
)
|
||||
console, buf = self._make_console()
|
||||
render_report(report, console)
|
||||
output = buf.getvalue()
|
||||
assert "(none)" in output
|
||||
|
||||
def test_thunk_function(self):
|
||||
report = MapaReport(
|
||||
meta=MapaMeta(name="t", sha256="s"),
|
||||
functions=[
|
||||
MapaFunction(address=0x1000, name="jmp_CreateFile", is_thunk=True),
|
||||
],
|
||||
)
|
||||
console, buf = self._make_console()
|
||||
render_report(report, console)
|
||||
output = buf.getvalue()
|
||||
assert "thunk" in output
|
||||
assert "jmp_CreateFile" in output
|
||||
|
||||
def test_function_with_calls_and_strings(self):
|
||||
report = MapaReport(
|
||||
meta=MapaMeta(name="t", sha256="s"),
|
||||
functions=[
|
||||
MapaFunction(
|
||||
address=0x1000,
|
||||
name="main",
|
||||
num_basic_blocks=3,
|
||||
num_edges=4,
|
||||
num_instructions=10,
|
||||
total_instruction_bytes=42,
|
||||
callers=[MapaCaller(name="start", address=0x500, delta=-1, direction="↑")],
|
||||
calls=[MapaCall(name="helper", address=0x2000, is_api=False, delta=1, direction="↓")],
|
||||
apis=[MapaCall(name="CreateFileW", address=0x3000, is_api=True)],
|
||||
strings=[MapaString(value="Hello World", address=0x4000)],
|
||||
capa_matches=["write file"],
|
||||
),
|
||||
],
|
||||
)
|
||||
console, buf = self._make_console()
|
||||
render_report(report, console)
|
||||
output = buf.getvalue()
|
||||
assert "function" in output
|
||||
assert "main" in output
|
||||
assert "3 / 4 / 10 (42 bytes)" in output
|
||||
assert "xref:" in output
|
||||
assert "start" in output
|
||||
assert "calls:" in output
|
||||
assert "helper" in output
|
||||
assert "api:" in output
|
||||
assert "CreateFileW" in output
|
||||
assert 'string:' in output
|
||||
assert "Hello World" in output
|
||||
assert "capa:" in output
|
||||
assert "write file" in output
|
||||
|
||||
|
||||
class TestStringDedup:
|
||||
def test_strings_deduped_in_model(self):
|
||||
seen: set[str] = set()
|
||||
strings = ["hello", "hello", "world", "hello"]
|
||||
result = []
|
||||
for s in strings:
|
||||
stripped = s.rstrip()
|
||||
if stripped and stripped not in seen:
|
||||
seen.add(stripped)
|
||||
result.append(stripped)
|
||||
assert result == ["hello", "world"]
|
||||
|
||||
def test_string_rstrip(self):
|
||||
s = "hello \n\t"
|
||||
assert s.rstrip() == "hello"
|
||||
@@ -46,8 +46,7 @@ def match(rules, features, va, scope=Scope.FUNCTION):
|
||||
|
||||
|
||||
def test_match_simple():
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -57,8 +56,7 @@ def test_match_simple():
|
||||
namespace: testns1/testns2
|
||||
features:
|
||||
- number: 100
|
||||
"""
|
||||
)
|
||||
""")
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
|
||||
features, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
|
||||
@@ -69,8 +67,7 @@ def test_match_simple():
|
||||
|
||||
|
||||
def test_match_range_exact():
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -79,8 +76,7 @@ def test_match_range_exact():
|
||||
dynamic: process
|
||||
features:
|
||||
- count(number(100)): 2
|
||||
"""
|
||||
)
|
||||
""")
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
|
||||
# just enough matches
|
||||
@@ -97,8 +93,7 @@ def test_match_range_exact():
|
||||
|
||||
|
||||
def test_match_range_range():
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -107,8 +102,7 @@ def test_match_range_range():
|
||||
dynamic: process
|
||||
features:
|
||||
- count(number(100)): (2, 3)
|
||||
"""
|
||||
)
|
||||
""")
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
|
||||
# just enough matches
|
||||
@@ -129,8 +123,7 @@ def test_match_range_range():
|
||||
|
||||
|
||||
def test_match_range_exact_zero():
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -146,8 +139,7 @@ def test_match_range_exact_zero():
|
||||
# so we have this additional trivial feature.
|
||||
- mnemonic: mov
|
||||
|
||||
"""
|
||||
)
|
||||
""")
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
|
||||
# feature isn't indexed - good.
|
||||
@@ -165,8 +157,7 @@ def test_match_range_exact_zero():
|
||||
|
||||
|
||||
def test_match_range_with_zero():
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -181,8 +172,7 @@ def test_match_range_with_zero():
|
||||
# since we don't support top level NOT statements.
|
||||
# so we have this additional trivial feature.
|
||||
- mnemonic: mov
|
||||
"""
|
||||
)
|
||||
""")
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
|
||||
# ok
|
||||
@@ -200,8 +190,7 @@ def test_match_range_with_zero():
|
||||
|
||||
def test_match_adds_matched_rule_feature():
|
||||
"""show that using `match` adds a feature for matched rules."""
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -210,8 +199,7 @@ def test_match_adds_matched_rule_feature():
|
||||
dynamic: process
|
||||
features:
|
||||
- number: 100
|
||||
"""
|
||||
)
|
||||
""")
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
features, _ = match([r], {capa.features.insn.Number(100): {1}}, 0x0)
|
||||
assert capa.features.common.MatchedRule("test rule") in features
|
||||
@@ -220,9 +208,7 @@ def test_match_adds_matched_rule_feature():
|
||||
def test_match_matched_rules():
|
||||
"""show that using `match` adds a feature for matched rules."""
|
||||
rules = [
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule1
|
||||
@@ -231,12 +217,8 @@ def test_match_matched_rules():
|
||||
dynamic: process
|
||||
features:
|
||||
- number: 100
|
||||
"""
|
||||
)
|
||||
),
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
""")),
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule2
|
||||
@@ -245,9 +227,7 @@ def test_match_matched_rules():
|
||||
dynamic: process
|
||||
features:
|
||||
- match: test rule1
|
||||
"""
|
||||
)
|
||||
),
|
||||
""")),
|
||||
]
|
||||
|
||||
features, _ = match(
|
||||
@@ -271,9 +251,7 @@ def test_match_matched_rules():
|
||||
|
||||
def test_match_namespace():
|
||||
rules = [
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: CreateFile API
|
||||
@@ -283,12 +261,8 @@ def test_match_namespace():
|
||||
namespace: file/create/CreateFile
|
||||
features:
|
||||
- api: CreateFile
|
||||
"""
|
||||
)
|
||||
),
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
""")),
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: WriteFile API
|
||||
@@ -298,12 +272,8 @@ def test_match_namespace():
|
||||
namespace: file/write
|
||||
features:
|
||||
- api: WriteFile
|
||||
"""
|
||||
)
|
||||
),
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
""")),
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: file-create
|
||||
@@ -312,12 +282,8 @@ def test_match_namespace():
|
||||
dynamic: process
|
||||
features:
|
||||
- match: file/create
|
||||
"""
|
||||
)
|
||||
),
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
""")),
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: filesystem-any
|
||||
@@ -326,9 +292,7 @@ def test_match_namespace():
|
||||
dynamic: process
|
||||
features:
|
||||
- match: file
|
||||
"""
|
||||
)
|
||||
),
|
||||
""")),
|
||||
]
|
||||
|
||||
features, matches = match(
|
||||
@@ -355,9 +319,7 @@ def test_match_namespace():
|
||||
|
||||
def test_match_substring():
|
||||
rules = [
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -367,9 +329,7 @@ def test_match_substring():
|
||||
features:
|
||||
- and:
|
||||
- substring: abc
|
||||
"""
|
||||
)
|
||||
),
|
||||
""")),
|
||||
]
|
||||
features, _ = match(
|
||||
capa.rules.topologically_order_rules(rules),
|
||||
@@ -409,9 +369,7 @@ def test_match_substring():
|
||||
|
||||
def test_match_regex():
|
||||
rules = [
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -421,12 +379,8 @@ def test_match_regex():
|
||||
features:
|
||||
- and:
|
||||
- string: /.*bbbb.*/
|
||||
"""
|
||||
)
|
||||
),
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
""")),
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: rule with implied wildcards
|
||||
@@ -436,12 +390,8 @@ def test_match_regex():
|
||||
features:
|
||||
- and:
|
||||
- string: /bbbb/
|
||||
"""
|
||||
)
|
||||
),
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
""")),
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: rule with anchor
|
||||
@@ -451,9 +401,7 @@ def test_match_regex():
|
||||
features:
|
||||
- and:
|
||||
- string: /^bbbb/
|
||||
"""
|
||||
)
|
||||
),
|
||||
""")),
|
||||
]
|
||||
features, _ = match(
|
||||
capa.rules.topologically_order_rules(rules),
|
||||
@@ -488,9 +436,7 @@ def test_match_regex():
|
||||
|
||||
def test_match_regex_ignorecase():
|
||||
rules = [
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -500,9 +446,7 @@ def test_match_regex_ignorecase():
|
||||
features:
|
||||
- and:
|
||||
- string: /.*bbbb.*/i
|
||||
"""
|
||||
)
|
||||
),
|
||||
""")),
|
||||
]
|
||||
features, _ = match(
|
||||
capa.rules.topologically_order_rules(rules),
|
||||
@@ -514,9 +458,7 @@ def test_match_regex_ignorecase():
|
||||
|
||||
def test_match_regex_complex():
|
||||
rules = [
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
r"""
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent(r"""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -526,9 +468,7 @@ def test_match_regex_complex():
|
||||
features:
|
||||
- or:
|
||||
- string: /.*HARDWARE\\Key\\key with spaces\\.*/i
|
||||
"""
|
||||
)
|
||||
),
|
||||
""")),
|
||||
]
|
||||
features, _ = match(
|
||||
capa.rules.topologically_order_rules(rules),
|
||||
@@ -540,9 +480,7 @@ def test_match_regex_complex():
|
||||
|
||||
def test_match_regex_values_always_string():
|
||||
rules = [
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -553,9 +491,7 @@ def test_match_regex_values_always_string():
|
||||
- or:
|
||||
- string: /123/
|
||||
- string: /0x123/
|
||||
"""
|
||||
)
|
||||
),
|
||||
""")),
|
||||
]
|
||||
features, _ = match(
|
||||
capa.rules.topologically_order_rules(rules),
|
||||
@@ -572,10 +508,22 @@ def test_match_regex_values_always_string():
|
||||
assert capa.features.common.MatchedRule("test rule") in features
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"pattern",
|
||||
[
|
||||
"/test\\.exe/",
|
||||
"/hello/i",
|
||||
"/foo\\\\bar/",
|
||||
],
|
||||
)
|
||||
def test_regex_get_value_str(pattern):
|
||||
# Regex.get_value_str() must return the raw pattern without escaping, see #1909.
|
||||
assert capa.features.common.Regex(pattern).get_value_str() == pattern
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason="can't have top level NOT")
|
||||
def test_match_only_not():
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -586,8 +534,7 @@ def test_match_only_not():
|
||||
features:
|
||||
- not:
|
||||
- number: 99
|
||||
"""
|
||||
)
|
||||
""")
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
|
||||
_, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
|
||||
@@ -595,8 +542,7 @@ def test_match_only_not():
|
||||
|
||||
|
||||
def test_match_not():
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -609,8 +555,7 @@ def test_match_not():
|
||||
- mnemonic: mov
|
||||
- not:
|
||||
- number: 99
|
||||
"""
|
||||
)
|
||||
""")
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
|
||||
_, matches = match([r], {capa.features.insn.Number(100): {1, 2}, capa.features.insn.Mnemonic("mov"): {1, 2}}, 0x0)
|
||||
@@ -619,8 +564,7 @@ def test_match_not():
|
||||
|
||||
@pytest.mark.xfail(reason="can't have nested NOT")
|
||||
def test_match_not_not():
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -632,8 +576,7 @@ def test_match_not_not():
|
||||
- not:
|
||||
- not:
|
||||
- number: 100
|
||||
"""
|
||||
)
|
||||
""")
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
|
||||
_, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
|
||||
@@ -641,8 +584,7 @@ def test_match_not_not():
|
||||
|
||||
|
||||
def test_match_operand_number():
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -652,8 +594,7 @@ def test_match_operand_number():
|
||||
features:
|
||||
- and:
|
||||
- operand[0].number: 0x10
|
||||
"""
|
||||
)
|
||||
""")
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
|
||||
assert capa.features.insn.OperandNumber(0, 0x10) in {capa.features.insn.OperandNumber(0, 0x10)}
|
||||
@@ -671,8 +612,7 @@ def test_match_operand_number():
|
||||
|
||||
|
||||
def test_match_operand_offset():
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -682,8 +622,7 @@ def test_match_operand_offset():
|
||||
features:
|
||||
- and:
|
||||
- operand[0].offset: 0x10
|
||||
"""
|
||||
)
|
||||
""")
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
|
||||
assert capa.features.insn.OperandOffset(0, 0x10) in {capa.features.insn.OperandOffset(0, 0x10)}
|
||||
@@ -701,8 +640,7 @@ def test_match_operand_offset():
|
||||
|
||||
|
||||
def test_match_property_access():
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -712,8 +650,7 @@ def test_match_property_access():
|
||||
features:
|
||||
- and:
|
||||
- property/read: System.IO.FileInfo::Length
|
||||
"""
|
||||
)
|
||||
""")
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
|
||||
assert capa.features.insn.Property("System.IO.FileInfo::Length", capa.features.common.FeatureAccess.READ) in {
|
||||
@@ -745,8 +682,7 @@ def test_match_property_access():
|
||||
|
||||
|
||||
def test_match_os_any():
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -764,8 +700,7 @@ def test_match_os_any():
|
||||
- and:
|
||||
- os: any
|
||||
- string: "Goodbye world"
|
||||
"""
|
||||
)
|
||||
""")
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
|
||||
_, matches = match(
|
||||
@@ -799,8 +734,7 @@ def test_match_os_any():
|
||||
|
||||
# this test demonstrates the behavior of unstable features that may change before the next major release.
|
||||
def test_index_features_and_unstable():
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -811,8 +745,7 @@ def test_index_features_and_unstable():
|
||||
- and:
|
||||
- mnemonic: mov
|
||||
- api: CreateFileW
|
||||
"""
|
||||
)
|
||||
""")
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
rr = capa.rules.RuleSet([r])
|
||||
index: capa.rules.RuleSet._RuleFeatureIndex = rr._feature_indexes_by_scopes[capa.rules.Scope.FUNCTION]
|
||||
@@ -828,8 +761,7 @@ def test_index_features_and_unstable():
|
||||
|
||||
# this test demonstrates the behavior of unstable features that may change before the next major release.
|
||||
def test_index_features_or_unstable():
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -840,8 +772,7 @@ def test_index_features_or_unstable():
|
||||
- or:
|
||||
- mnemonic: mov
|
||||
- api: CreateFileW
|
||||
"""
|
||||
)
|
||||
""")
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
rr = capa.rules.RuleSet([r])
|
||||
index: capa.rules.RuleSet._RuleFeatureIndex = rr._feature_indexes_by_scopes[capa.rules.Scope.FUNCTION]
|
||||
@@ -858,8 +789,7 @@ def test_index_features_or_unstable():
|
||||
|
||||
# this test demonstrates the behavior of unstable features that may change before the next major release.
|
||||
def test_index_features_nested_unstable():
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -872,8 +802,7 @@ def test_index_features_nested_unstable():
|
||||
- or:
|
||||
- api: CreateFileW
|
||||
- string: foo
|
||||
"""
|
||||
)
|
||||
""")
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
rr = capa.rules.RuleSet([r])
|
||||
index: capa.rules.RuleSet._RuleFeatureIndex = rr._feature_indexes_by_scopes[capa.rules.Scope.FUNCTION]
|
||||
|
||||
@@ -25,8 +25,7 @@ from capa.features.common import Arch, Substring
|
||||
|
||||
|
||||
def test_optimizer_order():
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -44,8 +43,7 @@ def test_optimizer_order():
|
||||
- or:
|
||||
- number: 1
|
||||
- offset: 4
|
||||
"""
|
||||
)
|
||||
""")
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
|
||||
# before optimization
|
||||
|
||||
@@ -56,8 +56,7 @@ def test_render_meta_attack():
|
||||
subtechnique = "Windows Service"
|
||||
canonical = "{:s}::{:s}::{:s} [{:s}]".format(tactic, technique, subtechnique, id)
|
||||
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -70,10 +69,7 @@ def test_render_meta_attack():
|
||||
- {:s}
|
||||
features:
|
||||
- number: 1
|
||||
""".format(
|
||||
canonical
|
||||
)
|
||||
)
|
||||
""".format(canonical))
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
rule_meta = capa.render.result_document.RuleMetadata.from_capa(r)
|
||||
attack = rule_meta.attack[0]
|
||||
@@ -94,8 +90,7 @@ def test_render_meta_mbc():
|
||||
method = "Heavens Gate"
|
||||
canonical = "{:s}::{:s}::{:s} [{:s}]".format(objective, behavior, method, id)
|
||||
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -108,10 +103,7 @@ def test_render_meta_mbc():
|
||||
- {:s}
|
||||
features:
|
||||
- number: 1
|
||||
""".format(
|
||||
canonical
|
||||
)
|
||||
)
|
||||
""".format(canonical))
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
rule_meta = capa.render.result_document.RuleMetadata.from_capa(r)
|
||||
mbc = rule_meta.mbc[0]
|
||||
@@ -129,8 +121,7 @@ def test_render_meta_maec():
|
||||
malware_category = "downloader"
|
||||
analysis_conclusion = "malicious"
|
||||
|
||||
rule_yaml = textwrap.dedent(
|
||||
"""
|
||||
rule_yaml = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -144,10 +135,7 @@ def test_render_meta_maec():
|
||||
maec/analysis-conclusion: {:s}
|
||||
features:
|
||||
- number: 1
|
||||
""".format(
|
||||
malware_family, malware_category, analysis_conclusion
|
||||
)
|
||||
)
|
||||
""".format(malware_family, malware_category, analysis_conclusion))
|
||||
rule = capa.rules.Rule.from_yaml(rule_yaml)
|
||||
rm = capa.render.result_document.RuleMatches(
|
||||
meta=capa.render.result_document.RuleMetadata.from_capa(rule),
|
||||
@@ -220,8 +208,7 @@ def test_render_vverbose_feature(feature, expected):
|
||||
|
||||
layout = capa.render.result_document.StaticLayout(functions=())
|
||||
|
||||
src = textwrap.dedent(
|
||||
"""
|
||||
src = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -237,8 +224,7 @@ def test_render_vverbose_feature(feature, expected):
|
||||
- and:
|
||||
- number: 1
|
||||
- number: 2
|
||||
"""
|
||||
)
|
||||
""")
|
||||
rule = capa.rules.Rule.from_yaml(src)
|
||||
|
||||
rm = capa.render.result_document.RuleMatches(
|
||||
|
||||
@@ -22,9 +22,7 @@ import capa.rules
|
||||
import capa.helpers
|
||||
import capa.rules.cache
|
||||
|
||||
R1 = capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
R1 = capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -40,13 +38,9 @@ R1 = capa.rules.Rule.from_yaml(
|
||||
- and:
|
||||
- number: 1
|
||||
- number: 2
|
||||
"""
|
||||
)
|
||||
)
|
||||
"""))
|
||||
|
||||
R2 = capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
R2 = capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule 2
|
||||
@@ -62,9 +56,7 @@ R2 = capa.rules.Rule.from_yaml(
|
||||
- and:
|
||||
- number: 3
|
||||
- number: 4
|
||||
"""
|
||||
)
|
||||
)
|
||||
"""))
|
||||
|
||||
|
||||
def test_ruleset_cache_ids():
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -21,9 +21,7 @@ import capa.rules
|
||||
|
||||
|
||||
def test_rule_scope_instruction():
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -35,14 +33,10 @@ def test_rule_scope_instruction():
|
||||
- mnemonic: mov
|
||||
- arch: i386
|
||||
- os: windows
|
||||
"""
|
||||
)
|
||||
)
|
||||
"""))
|
||||
|
||||
with pytest.raises(capa.rules.InvalidRule):
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -51,17 +45,11 @@ def test_rule_scope_instruction():
|
||||
dynamic: unsupported
|
||||
features:
|
||||
- characteristic: embedded pe
|
||||
"""
|
||||
)
|
||||
)
|
||||
"""))
|
||||
|
||||
|
||||
def test_rule_subscope_instruction():
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -75,11 +63,7 @@ def test_rule_subscope_instruction():
|
||||
- mnemonic: mov
|
||||
- arch: i386
|
||||
- os: windows
|
||||
"""
|
||||
)
|
||||
)
|
||||
]
|
||||
)
|
||||
"""))])
|
||||
# the function rule scope will have one rules:
|
||||
# - `test rule`
|
||||
assert len(rules.function_rules) == 1
|
||||
@@ -90,9 +74,7 @@ def test_rule_subscope_instruction():
|
||||
|
||||
|
||||
def test_scope_instruction_implied_and():
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -105,15 +87,11 @@ def test_scope_instruction_implied_and():
|
||||
- mnemonic: mov
|
||||
- arch: i386
|
||||
- os: windows
|
||||
"""
|
||||
)
|
||||
)
|
||||
"""))
|
||||
|
||||
|
||||
def test_scope_instruction_description():
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -127,13 +105,9 @@ def test_scope_instruction_description():
|
||||
- mnemonic: mov
|
||||
- arch: i386
|
||||
- os: windows
|
||||
"""
|
||||
)
|
||||
)
|
||||
"""))
|
||||
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -147,6 +121,4 @@ def test_scope_instruction_description():
|
||||
- mnemonic: mov
|
||||
- arch: i386
|
||||
- os: windows
|
||||
"""
|
||||
)
|
||||
)
|
||||
"""))
|
||||
|
||||
@@ -142,8 +142,7 @@ def test_proto_conversion(tmp_path):
|
||||
|
||||
|
||||
def test_detect_duplicate_features(tmpdir):
|
||||
TEST_RULE_0 = textwrap.dedent(
|
||||
"""
|
||||
TEST_RULE_0 = textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: Test Rule 0
|
||||
@@ -155,12 +154,10 @@ def test_detect_duplicate_features(tmpdir):
|
||||
- number: 1
|
||||
- not:
|
||||
- string: process
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
TEST_RULESET = {
|
||||
"rule_1": textwrap.dedent(
|
||||
"""
|
||||
"rule_1": textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: Test Rule 1
|
||||
@@ -179,10 +176,8 @@ def test_detect_duplicate_features(tmpdir):
|
||||
- count(mnemonic(xor)): 5
|
||||
- not:
|
||||
- os: linux
|
||||
"""
|
||||
),
|
||||
"rule_2": textwrap.dedent(
|
||||
"""
|
||||
"""),
|
||||
"rule_2": textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: Test Rule 2
|
||||
@@ -196,10 +191,8 @@ def test_detect_duplicate_features(tmpdir):
|
||||
- and:
|
||||
- api: CreateFile
|
||||
- mnemonic: xor
|
||||
"""
|
||||
),
|
||||
"rule_3": textwrap.dedent(
|
||||
"""
|
||||
"""),
|
||||
"rule_3": textwrap.dedent("""
|
||||
rule:
|
||||
meta:
|
||||
name: Test Rule 3
|
||||
@@ -214,8 +207,7 @@ def test_detect_duplicate_features(tmpdir):
|
||||
- and:
|
||||
- api: bind
|
||||
- number: 2
|
||||
"""
|
||||
),
|
||||
"""),
|
||||
}
|
||||
|
||||
"""
|
||||
|
||||
@@ -72,6 +72,15 @@ DYNAMIC_VMRAY_FEATURE_PRESENCE_TESTS = sorted(
|
||||
# VirtualAlloc(4096, 4)
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2420,call=2358", capa.features.insn.Number(4096), True),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2420,call=2358", capa.features.insn.Number(4), True),
|
||||
# call/number argument - registry API parameters (issue #2)
|
||||
# RegOpenKeyExW(Software\Microsoft\Windows\CurrentVersion\Policies\System, 0, 131078)
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2204,call=2395", capa.features.insn.Number(2147483649), True),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2204,call=2395", capa.features.insn.Number(0), True),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2204,call=2395", capa.features.insn.Number(131078), True),
|
||||
# RegOpenKeyExW call 2397 (same parameters)
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2204,call=2397", capa.features.insn.Number(2147483649), True),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2204,call=2397", capa.features.insn.Number(0), True),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2204,call=2397", capa.features.insn.Number(131078), True),
|
||||
],
|
||||
# order tests by (file, item)
|
||||
# so that our LRU cache is most effective.
|
||||
@@ -101,6 +110,11 @@ DYNAMIC_VMRAY_FEATURE_COUNT_TESTS = sorted(
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2420,call=10315", capa.features.insn.Number(4096), 1),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2420,call=10315", capa.features.insn.Number(4), 1),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2420,call=10315", capa.features.insn.Number(404), 0),
|
||||
# call/number argument - registry API parameters (issue #2)
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2204,call=2395", capa.features.insn.Number(2147483649), 1),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2204,call=2395", capa.features.insn.Number(0), 1),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2204,call=2395", capa.features.insn.Number(131078), 1),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2204,call=2395", capa.features.insn.Number(999999), 0),
|
||||
],
|
||||
# order tests by (file, item)
|
||||
# so that our LRU cache is most effective.
|
||||
|
||||
@@ -26,11 +26,9 @@ from capa.features.extractors.vmray.models import (
|
||||
|
||||
|
||||
def test_vmray_model_param():
|
||||
param_str = textwrap.dedent(
|
||||
"""
|
||||
param_str = textwrap.dedent("""
|
||||
<param name="addrlen" type="signed_32bit" value="16"/>
|
||||
"""
|
||||
)
|
||||
""")
|
||||
param: Param = Param.model_validate(xml_to_dict(param_str)["param"])
|
||||
|
||||
assert param.value is not None
|
||||
@@ -38,13 +36,11 @@ def test_vmray_model_param():
|
||||
|
||||
|
||||
def test_vmray_model_param_deref():
|
||||
param_str = textwrap.dedent(
|
||||
"""
|
||||
param_str = textwrap.dedent("""
|
||||
<param name="buf" type="ptr" value="0xaaaaaaaa">
|
||||
<deref type="str" value="Hello world"/>
|
||||
</param>
|
||||
"""
|
||||
)
|
||||
""")
|
||||
param: Param = Param.model_validate(xml_to_dict(param_str)["param"])
|
||||
|
||||
assert param.deref is not None
|
||||
@@ -52,8 +48,7 @@ def test_vmray_model_param_deref():
|
||||
|
||||
|
||||
def test_vmray_model_function_call():
|
||||
function_call_str = textwrap.dedent(
|
||||
"""
|
||||
function_call_str = textwrap.dedent("""
|
||||
<fncall fncall_id="18" process_id="1" thread_id="1" name="sys_time">
|
||||
<in>
|
||||
<param name="tloc" type="unknown" value="0x0"/>
|
||||
@@ -62,8 +57,7 @@ def test_vmray_model_function_call():
|
||||
<param name="ret_val" type="unknown" value="0xaaaaaaaa"/>
|
||||
</out>
|
||||
</fncall>
|
||||
"""
|
||||
)
|
||||
""")
|
||||
function_call: FunctionCall = FunctionCall.model_validate(xml_to_dict(function_call_str)["fncall"])
|
||||
|
||||
assert function_call.fncall_id == 18
|
||||
@@ -81,22 +75,19 @@ def test_vmray_model_function_call():
|
||||
|
||||
|
||||
def test_vmray_model_analysis_metadata():
|
||||
analysis_metadata: AnalysisMetadata = AnalysisMetadata.model_validate_json(
|
||||
"""
|
||||
analysis_metadata: AnalysisMetadata = AnalysisMetadata.model_validate_json("""
|
||||
{
|
||||
"sample_type": "Linux ELF Executable (x86-64)",
|
||||
"submission_filename": "abcd1234"
|
||||
}
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
assert analysis_metadata.sample_type == "Linux ELF Executable (x86-64)"
|
||||
assert analysis_metadata.submission_filename == "abcd1234"
|
||||
|
||||
|
||||
def test_vmray_model_elffile():
|
||||
elffile: ElfFile = ElfFile.model_validate_json(
|
||||
"""
|
||||
elffile: ElfFile = ElfFile.model_validate_json("""
|
||||
{
|
||||
"sections": [
|
||||
{
|
||||
@@ -107,16 +98,14 @@ def test_vmray_model_elffile():
|
||||
}
|
||||
]
|
||||
}
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
assert elffile.sections[0].header.sh_name == "abcd1234"
|
||||
assert elffile.sections[0].header.sh_addr == 2863311530
|
||||
|
||||
|
||||
def test_vmray_model_pefile():
|
||||
pefile: PEFile = PEFile.model_validate_json(
|
||||
"""
|
||||
pefile: PEFile = PEFile.model_validate_json("""
|
||||
{
|
||||
"basic_info": {
|
||||
"image_base": 2863311530
|
||||
@@ -150,8 +139,7 @@ def test_vmray_model_pefile():
|
||||
}
|
||||
]
|
||||
}
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
assert pefile.basic_info.image_base == 2863311530
|
||||
|
||||
|
||||
317
web/explorer/package-lock.json
generated
317
web/explorer/package-lock.json
generated
@@ -726,9 +726,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@rollup/rollup-android-arm-eabi": {
|
||||
"version": "4.36.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.36.0.tgz",
|
||||
"integrity": "sha512-jgrXjjcEwN6XpZXL0HUeOVGfjXhPyxAbbhD0BlXUB+abTOpbPiN5Wb3kOT7yb+uEtATNYF5x5gIfwutmuBA26w==",
|
||||
"version": "4.59.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.59.0.tgz",
|
||||
"integrity": "sha512-upnNBkA6ZH2VKGcBj9Fyl9IGNPULcjXRlg0LLeaioQWueH30p6IXtJEbKAgvyv+mJaMxSm1l6xwDXYjpEMiLMg==",
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
@@ -740,9 +740,9 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@rollup/rollup-android-arm64": {
|
||||
"version": "4.36.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.36.0.tgz",
|
||||
"integrity": "sha512-NyfuLvdPdNUfUNeYKUwPwKsE5SXa2J6bCt2LdB/N+AxShnkpiczi3tcLJrm5mA+eqpy0HmaIY9F6XCa32N5yzg==",
|
||||
"version": "4.59.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.59.0.tgz",
|
||||
"integrity": "sha512-hZ+Zxj3SySm4A/DylsDKZAeVg0mvi++0PYVceVyX7hemkw7OreKdCvW2oQ3T1FMZvCaQXqOTHb8qmBShoqk69Q==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -754,9 +754,9 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@rollup/rollup-darwin-arm64": {
|
||||
"version": "4.36.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.36.0.tgz",
|
||||
"integrity": "sha512-JQ1Jk5G4bGrD4pWJQzWsD8I1n1mgPXq33+/vP4sk8j/z/C2siRuxZtaUA7yMTf71TCZTZl/4e1bfzwUmFb3+rw==",
|
||||
"version": "4.59.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.59.0.tgz",
|
||||
"integrity": "sha512-W2Psnbh1J8ZJw0xKAd8zdNgF9HRLkdWwwdWqubSVk0pUuQkoHnv7rx4GiF9rT4t5DIZGAsConRE3AxCdJ4m8rg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -768,9 +768,9 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@rollup/rollup-darwin-x64": {
|
||||
"version": "4.36.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.36.0.tgz",
|
||||
"integrity": "sha512-6c6wMZa1lrtiRsbDziCmjE53YbTkxMYhhnWnSW8R/yqsM7a6mSJ3uAVT0t8Y/DGt7gxUWYuFM4bwWk9XCJrFKA==",
|
||||
"version": "4.59.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.59.0.tgz",
|
||||
"integrity": "sha512-ZW2KkwlS4lwTv7ZVsYDiARfFCnSGhzYPdiOU4IM2fDbL+QGlyAbjgSFuqNRbSthybLbIJ915UtZBtmuLrQAT/w==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -782,9 +782,9 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@rollup/rollup-freebsd-arm64": {
|
||||
"version": "4.36.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.36.0.tgz",
|
||||
"integrity": "sha512-KXVsijKeJXOl8QzXTsA+sHVDsFOmMCdBRgFmBb+mfEb/7geR7+C8ypAml4fquUt14ZyVXaw2o1FWhqAfOvA4sg==",
|
||||
"version": "4.59.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.59.0.tgz",
|
||||
"integrity": "sha512-EsKaJ5ytAu9jI3lonzn3BgG8iRBjV4LxZexygcQbpiU0wU0ATxhNVEpXKfUa0pS05gTcSDMKpn3Sx+QB9RlTTA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -796,9 +796,9 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@rollup/rollup-freebsd-x64": {
|
||||
"version": "4.36.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.36.0.tgz",
|
||||
"integrity": "sha512-dVeWq1ebbvByI+ndz4IJcD4a09RJgRYmLccwlQ8bPd4olz3Y213uf1iwvc7ZaxNn2ab7bjc08PrtBgMu6nb4pQ==",
|
||||
"version": "4.59.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.59.0.tgz",
|
||||
"integrity": "sha512-d3DuZi2KzTMjImrxoHIAODUZYoUUMsuUiY4SRRcJy6NJoZ6iIqWnJu9IScV9jXysyGMVuW+KNzZvBLOcpdl3Vg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -810,9 +810,9 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@rollup/rollup-linux-arm-gnueabihf": {
|
||||
"version": "4.36.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.36.0.tgz",
|
||||
"integrity": "sha512-bvXVU42mOVcF4le6XSjscdXjqx8okv4n5vmwgzcmtvFdifQ5U4dXFYaCB87namDRKlUL9ybVtLQ9ztnawaSzvg==",
|
||||
"version": "4.59.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.59.0.tgz",
|
||||
"integrity": "sha512-t4ONHboXi/3E0rT6OZl1pKbl2Vgxf9vJfWgmUoCEVQVxhW6Cw/c8I6hbbu7DAvgp82RKiH7TpLwxnJeKv2pbsw==",
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
@@ -824,9 +824,9 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@rollup/rollup-linux-arm-musleabihf": {
|
||||
"version": "4.36.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.36.0.tgz",
|
||||
"integrity": "sha512-JFIQrDJYrxOnyDQGYkqnNBtjDwTgbasdbUiQvcU8JmGDfValfH1lNpng+4FWlhaVIR4KPkeddYjsVVbmJYvDcg==",
|
||||
"version": "4.59.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.59.0.tgz",
|
||||
"integrity": "sha512-CikFT7aYPA2ufMD086cVORBYGHffBo4K8MQ4uPS/ZnY54GKj36i196u8U+aDVT2LX4eSMbyHtyOh7D7Zvk2VvA==",
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
@@ -838,9 +838,9 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@rollup/rollup-linux-arm64-gnu": {
|
||||
"version": "4.36.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.36.0.tgz",
|
||||
"integrity": "sha512-KqjYVh3oM1bj//5X7k79PSCZ6CvaVzb7Qs7VMWS+SlWB5M8p3FqufLP9VNp4CazJ0CsPDLwVD9r3vX7Ci4J56A==",
|
||||
"version": "4.59.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.59.0.tgz",
|
||||
"integrity": "sha512-jYgUGk5aLd1nUb1CtQ8E+t5JhLc9x5WdBKew9ZgAXg7DBk0ZHErLHdXM24rfX+bKrFe+Xp5YuJo54I5HFjGDAA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -852,9 +852,9 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@rollup/rollup-linux-arm64-musl": {
|
||||
"version": "4.36.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.36.0.tgz",
|
||||
"integrity": "sha512-QiGnhScND+mAAtfHqeT+cB1S9yFnNQ/EwCg5yE3MzoaZZnIV0RV9O5alJAoJKX/sBONVKeZdMfO8QSaWEygMhw==",
|
||||
"version": "4.59.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.59.0.tgz",
|
||||
"integrity": "sha512-peZRVEdnFWZ5Bh2KeumKG9ty7aCXzzEsHShOZEFiCQlDEepP1dpUl/SrUNXNg13UmZl+gzVDPsiCwnV1uI0RUA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -865,10 +865,10 @@
|
||||
"linux"
|
||||
]
|
||||
},
|
||||
"node_modules/@rollup/rollup-linux-loongarch64-gnu": {
|
||||
"version": "4.36.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loongarch64-gnu/-/rollup-linux-loongarch64-gnu-4.36.0.tgz",
|
||||
"integrity": "sha512-1ZPyEDWF8phd4FQtTzMh8FQwqzvIjLsl6/84gzUxnMNFBtExBtpL51H67mV9xipuxl1AEAerRBgBwFNpkw8+Lg==",
|
||||
"node_modules/@rollup/rollup-linux-loong64-gnu": {
|
||||
"version": "4.59.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-gnu/-/rollup-linux-loong64-gnu-4.59.0.tgz",
|
||||
"integrity": "sha512-gbUSW/97f7+r4gHy3Jlup8zDG190AuodsWnNiXErp9mT90iCy9NKKU0Xwx5k8VlRAIV2uU9CsMnEFg/xXaOfXg==",
|
||||
"cpu": [
|
||||
"loong64"
|
||||
],
|
||||
@@ -879,10 +879,38 @@
|
||||
"linux"
|
||||
]
|
||||
},
|
||||
"node_modules/@rollup/rollup-linux-powerpc64le-gnu": {
|
||||
"version": "4.36.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-powerpc64le-gnu/-/rollup-linux-powerpc64le-gnu-4.36.0.tgz",
|
||||
"integrity": "sha512-VMPMEIUpPFKpPI9GZMhJrtu8rxnp6mJR3ZzQPykq4xc2GmdHj3Q4cA+7avMyegXy4n1v+Qynr9fR88BmyO74tg==",
|
||||
"node_modules/@rollup/rollup-linux-loong64-musl": {
|
||||
"version": "4.59.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-musl/-/rollup-linux-loong64-musl-4.59.0.tgz",
|
||||
"integrity": "sha512-yTRONe79E+o0FWFijasoTjtzG9EBedFXJMl888NBEDCDV9I2wGbFFfJQQe63OijbFCUZqxpHz1GzpbtSFikJ4Q==",
|
||||
"cpu": [
|
||||
"loong64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
]
|
||||
},
|
||||
"node_modules/@rollup/rollup-linux-ppc64-gnu": {
|
||||
"version": "4.59.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-gnu/-/rollup-linux-ppc64-gnu-4.59.0.tgz",
|
||||
"integrity": "sha512-sw1o3tfyk12k3OEpRddF68a1unZ5VCN7zoTNtSn2KndUE+ea3m3ROOKRCZxEpmT9nsGnogpFP9x6mnLTCaoLkA==",
|
||||
"cpu": [
|
||||
"ppc64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
]
|
||||
},
|
||||
"node_modules/@rollup/rollup-linux-ppc64-musl": {
|
||||
"version": "4.59.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-musl/-/rollup-linux-ppc64-musl-4.59.0.tgz",
|
||||
"integrity": "sha512-+2kLtQ4xT3AiIxkzFVFXfsmlZiG5FXYW7ZyIIvGA7Bdeuh9Z0aN4hVyXS/G1E9bTP/vqszNIN/pUKCk/BTHsKA==",
|
||||
"cpu": [
|
||||
"ppc64"
|
||||
],
|
||||
@@ -894,9 +922,23 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@rollup/rollup-linux-riscv64-gnu": {
|
||||
"version": "4.36.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.36.0.tgz",
|
||||
"integrity": "sha512-ttE6ayb/kHwNRJGYLpuAvB7SMtOeQnVXEIpMtAvx3kepFQeowVED0n1K9nAdraHUPJ5hydEMxBpIR7o4nrm8uA==",
|
||||
"version": "4.59.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.59.0.tgz",
|
||||
"integrity": "sha512-NDYMpsXYJJaj+I7UdwIuHHNxXZ/b/N2hR15NyH3m2qAtb/hHPA4g4SuuvrdxetTdndfj9b1WOmy73kcPRoERUg==",
|
||||
"cpu": [
|
||||
"riscv64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
]
|
||||
},
|
||||
"node_modules/@rollup/rollup-linux-riscv64-musl": {
|
||||
"version": "4.59.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-musl/-/rollup-linux-riscv64-musl-4.59.0.tgz",
|
||||
"integrity": "sha512-nLckB8WOqHIf1bhymk+oHxvM9D3tyPndZH8i8+35p/1YiVoVswPid2yLzgX7ZJP0KQvnkhM4H6QZ5m0LzbyIAg==",
|
||||
"cpu": [
|
||||
"riscv64"
|
||||
],
|
||||
@@ -908,9 +950,9 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@rollup/rollup-linux-s390x-gnu": {
|
||||
"version": "4.36.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.36.0.tgz",
|
||||
"integrity": "sha512-4a5gf2jpS0AIe7uBjxDeUMNcFmaRTbNv7NxI5xOCs4lhzsVyGR/0qBXduPnoWf6dGC365saTiwag8hP1imTgag==",
|
||||
"version": "4.59.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.59.0.tgz",
|
||||
"integrity": "sha512-oF87Ie3uAIvORFBpwnCvUzdeYUqi2wY6jRFWJAy1qus/udHFYIkplYRW+wo+GRUP4sKzYdmE1Y3+rY5Gc4ZO+w==",
|
||||
"cpu": [
|
||||
"s390x"
|
||||
],
|
||||
@@ -922,9 +964,9 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@rollup/rollup-linux-x64-gnu": {
|
||||
"version": "4.36.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.36.0.tgz",
|
||||
"integrity": "sha512-5KtoW8UWmwFKQ96aQL3LlRXX16IMwyzMq/jSSVIIyAANiE1doaQsx/KRyhAvpHlPjPiSU/AYX/8m+lQ9VToxFQ==",
|
||||
"version": "4.59.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.59.0.tgz",
|
||||
"integrity": "sha512-3AHmtQq/ppNuUspKAlvA8HtLybkDflkMuLK4DPo77DfthRb71V84/c4MlWJXixZz4uruIH4uaa07IqoAkG64fg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -936,9 +978,9 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@rollup/rollup-linux-x64-musl": {
|
||||
"version": "4.36.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.36.0.tgz",
|
||||
"integrity": "sha512-sycrYZPrv2ag4OCvaN5js+f01eoZ2U+RmT5as8vhxiFz+kxwlHrsxOwKPSA8WyS+Wc6Epid9QeI/IkQ9NkgYyQ==",
|
||||
"version": "4.59.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.59.0.tgz",
|
||||
"integrity": "sha512-2UdiwS/9cTAx7qIUZB/fWtToJwvt0Vbo0zmnYt7ED35KPg13Q0ym1g442THLC7VyI6JfYTP4PiSOWyoMdV2/xg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -949,10 +991,38 @@
|
||||
"linux"
|
||||
]
|
||||
},
|
||||
"node_modules/@rollup/rollup-openbsd-x64": {
|
||||
"version": "4.59.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-openbsd-x64/-/rollup-openbsd-x64-4.59.0.tgz",
|
||||
"integrity": "sha512-M3bLRAVk6GOwFlPTIxVBSYKUaqfLrn8l0psKinkCFxl4lQvOSz8ZrKDz2gxcBwHFpci0B6rttydI4IpS4IS/jQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"openbsd"
|
||||
]
|
||||
},
|
||||
"node_modules/@rollup/rollup-openharmony-arm64": {
|
||||
"version": "4.59.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-openharmony-arm64/-/rollup-openharmony-arm64-4.59.0.tgz",
|
||||
"integrity": "sha512-tt9KBJqaqp5i5HUZzoafHZX8b5Q2Fe7UjYERADll83O4fGqJ49O1FsL6LpdzVFQcpwvnyd0i+K/VSwu/o/nWlA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"openharmony"
|
||||
]
|
||||
},
|
||||
"node_modules/@rollup/rollup-win32-arm64-msvc": {
|
||||
"version": "4.36.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.36.0.tgz",
|
||||
"integrity": "sha512-qbqt4N7tokFwwSVlWDsjfoHgviS3n/vZ8LK0h1uLG9TYIRuUTJC88E1xb3LM2iqZ/WTqNQjYrtmtGmrmmawB6A==",
|
||||
"version": "4.59.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.59.0.tgz",
|
||||
"integrity": "sha512-V5B6mG7OrGTwnxaNUzZTDTjDS7F75PO1ae6MJYdiMu60sq0CqN5CVeVsbhPxalupvTX8gXVSU9gq+Rx1/hvu6A==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -964,9 +1034,9 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@rollup/rollup-win32-ia32-msvc": {
|
||||
"version": "4.36.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.36.0.tgz",
|
||||
"integrity": "sha512-t+RY0JuRamIocMuQcfwYSOkmdX9dtkr1PbhKW42AMvaDQa+jOdpUYysroTF/nuPpAaQMWp7ye+ndlmmthieJrQ==",
|
||||
"version": "4.59.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.59.0.tgz",
|
||||
"integrity": "sha512-UKFMHPuM9R0iBegwzKF4y0C4J9u8C6MEJgFuXTBerMk7EJ92GFVFYBfOZaSGLu6COf7FxpQNqhNS4c4icUPqxA==",
|
||||
"cpu": [
|
||||
"ia32"
|
||||
],
|
||||
@@ -977,10 +1047,24 @@
|
||||
"win32"
|
||||
]
|
||||
},
|
||||
"node_modules/@rollup/rollup-win32-x64-gnu": {
|
||||
"version": "4.59.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-gnu/-/rollup-win32-x64-gnu-4.59.0.tgz",
|
||||
"integrity": "sha512-laBkYlSS1n2L8fSo1thDNGrCTQMmxjYY5G0WFWjFFYZkKPjsMBsgJfGf4TLxXrF6RyhI60L8TMOjBMvXiTcxeA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
]
|
||||
},
|
||||
"node_modules/@rollup/rollup-win32-x64-msvc": {
|
||||
"version": "4.36.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.36.0.tgz",
|
||||
"integrity": "sha512-aRXd7tRZkWLqGbChgcMMDEHjOKudo1kChb1Jt1IfR8cY/KIpgNviLeJy5FUb9IpSuQj8dU2fAYNMPW/hLKOSTw==",
|
||||
"version": "4.59.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.59.0.tgz",
|
||||
"integrity": "sha512-2HRCml6OztYXyJXAvdDXPKcawukWY2GpR5/nxKp4iBgiO3wcoEGkAaqctIbZcNB6KlUQBIqt8VYkNSj2397EfA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -998,9 +1082,9 @@
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/@types/estree": {
|
||||
"version": "1.0.6",
|
||||
"resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.6.tgz",
|
||||
"integrity": "sha512-AYnb1nQyY49te+VRAVgmzfcgjYS91mY5P0TKUDCLEM+gNnA+3T6rWITXRLYCpahpqSQbN5cE+gHpnPyXjHWxcw==",
|
||||
"version": "1.0.8",
|
||||
"resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz",
|
||||
"integrity": "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
@@ -1682,14 +1766,15 @@
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/editorconfig": {
|
||||
"version": "1.0.4",
|
||||
"resolved": "https://registry.npmjs.org/editorconfig/-/editorconfig-1.0.4.tgz",
|
||||
"integrity": "sha512-L9Qe08KWTlqYMVvMcTIvMAdl1cDUubzRNYL+WfA4bLDMHe4nemKkpmYzkznE1FwLKu0EEmy6obgQKzMJrg4x9Q==",
|
||||
"version": "1.0.7",
|
||||
"resolved": "https://registry.npmjs.org/editorconfig/-/editorconfig-1.0.7.tgz",
|
||||
"integrity": "sha512-e0GOtq/aTQhVdNyDU9e02+wz9oDDM+SIOQxWME2QRjzRX5yyLAuHDE+0aE8vHb9XRC8XD37eO2u57+F09JqFhw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@one-ini/wasm": "0.1.1",
|
||||
"commander": "^10.0.0",
|
||||
"minimatch": "9.0.1",
|
||||
"minimatch": "^9.0.1",
|
||||
"semver": "^7.5.3"
|
||||
},
|
||||
"bin": {
|
||||
@@ -1700,21 +1785,23 @@
|
||||
}
|
||||
},
|
||||
"node_modules/editorconfig/node_modules/brace-expansion": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
|
||||
"integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/editorconfig/node_modules/minimatch": {
|
||||
"version": "9.0.1",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.1.tgz",
|
||||
"integrity": "sha512-0jWhJpD/MdhPXwPuiRkCbfYfSKp2qnn2eOc279qI7f+osl/l+prKSrvhg157zSYvx/1nmgn2NqdT6k2Z7zSH9w==",
|
||||
"version": "9.0.9",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.9.tgz",
|
||||
"integrity": "sha512-OBwBN9AL4dqmETlpS2zasx+vTeWclWzkblfZk7KTA5j3jeOONz/tRCnZomUyvNg83wL5Zv9Ss6HMJXAgL8R2Yg==",
|
||||
"dev": true,
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"brace-expansion": "^2.0.1"
|
||||
"brace-expansion": "^2.0.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=16 || 14 >=14.17"
|
||||
@@ -2305,21 +2392,23 @@
|
||||
}
|
||||
},
|
||||
"node_modules/glob/node_modules/brace-expansion": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
|
||||
"integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/glob/node_modules/minimatch": {
|
||||
"version": "9.0.5",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz",
|
||||
"integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==",
|
||||
"version": "9.0.9",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.9.tgz",
|
||||
"integrity": "sha512-OBwBN9AL4dqmETlpS2zasx+vTeWclWzkblfZk7KTA5j3jeOONz/tRCnZomUyvNg83wL5Zv9Ss6HMJXAgL8R2Yg==",
|
||||
"dev": true,
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"brace-expansion": "^2.0.1"
|
||||
"brace-expansion": "^2.0.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=16 || 14 >=14.17"
|
||||
@@ -2756,10 +2845,11 @@
|
||||
}
|
||||
},
|
||||
"node_modules/lodash": {
|
||||
"version": "4.17.21",
|
||||
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
|
||||
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==",
|
||||
"dev": true
|
||||
"version": "4.17.23",
|
||||
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.23.tgz",
|
||||
"integrity": "sha512-LgVTMpQtIopCi79SJeDiP0TfWi5CNEc/L/aRdTh3yIvmZXTnheWpKjSZhnvMl8iXbC1tFg9gdHHDMLoV7CnG+w==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/lodash.merge": {
|
||||
"version": "4.6.2",
|
||||
@@ -2838,10 +2928,11 @@
|
||||
}
|
||||
},
|
||||
"node_modules/minimatch": {
|
||||
"version": "3.1.2",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
|
||||
"integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
|
||||
"version": "3.1.5",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.5.tgz",
|
||||
"integrity": "sha512-VgjWUsnnT6n+NUk6eZq77zeFdpW2LWDzP6zFGrCbHXiYNul5Dzqk2HHQ5uFH2DNW5Xbp8+jVzaeNt94ssEEl4w==",
|
||||
"dev": true,
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"brace-expansion": "^1.1.7"
|
||||
},
|
||||
@@ -3305,13 +3396,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/rollup": {
|
||||
"version": "4.36.0",
|
||||
"resolved": "https://registry.npmjs.org/rollup/-/rollup-4.36.0.tgz",
|
||||
"integrity": "sha512-zwATAXNQxUcd40zgtQG0ZafcRK4g004WtEl7kbuhTWPvf07PsfohXl39jVUvPF7jvNAIkKPQ2XrsDlWuxBd++Q==",
|
||||
"version": "4.59.0",
|
||||
"resolved": "https://registry.npmjs.org/rollup/-/rollup-4.59.0.tgz",
|
||||
"integrity": "sha512-2oMpl67a3zCH9H79LeMcbDhXW/UmWG/y2zuqnF2jQq5uq9TbM9TVyXvA4+t+ne2IIkBdrLpAaRQAvo7YI/Yyeg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@types/estree": "1.0.6"
|
||||
"@types/estree": "1.0.8"
|
||||
},
|
||||
"bin": {
|
||||
"rollup": "dist/bin/rollup"
|
||||
@@ -3321,25 +3412,31 @@
|
||||
"npm": ">=8.0.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@rollup/rollup-android-arm-eabi": "4.36.0",
|
||||
"@rollup/rollup-android-arm64": "4.36.0",
|
||||
"@rollup/rollup-darwin-arm64": "4.36.0",
|
||||
"@rollup/rollup-darwin-x64": "4.36.0",
|
||||
"@rollup/rollup-freebsd-arm64": "4.36.0",
|
||||
"@rollup/rollup-freebsd-x64": "4.36.0",
|
||||
"@rollup/rollup-linux-arm-gnueabihf": "4.36.0",
|
||||
"@rollup/rollup-linux-arm-musleabihf": "4.36.0",
|
||||
"@rollup/rollup-linux-arm64-gnu": "4.36.0",
|
||||
"@rollup/rollup-linux-arm64-musl": "4.36.0",
|
||||
"@rollup/rollup-linux-loongarch64-gnu": "4.36.0",
|
||||
"@rollup/rollup-linux-powerpc64le-gnu": "4.36.0",
|
||||
"@rollup/rollup-linux-riscv64-gnu": "4.36.0",
|
||||
"@rollup/rollup-linux-s390x-gnu": "4.36.0",
|
||||
"@rollup/rollup-linux-x64-gnu": "4.36.0",
|
||||
"@rollup/rollup-linux-x64-musl": "4.36.0",
|
||||
"@rollup/rollup-win32-arm64-msvc": "4.36.0",
|
||||
"@rollup/rollup-win32-ia32-msvc": "4.36.0",
|
||||
"@rollup/rollup-win32-x64-msvc": "4.36.0",
|
||||
"@rollup/rollup-android-arm-eabi": "4.59.0",
|
||||
"@rollup/rollup-android-arm64": "4.59.0",
|
||||
"@rollup/rollup-darwin-arm64": "4.59.0",
|
||||
"@rollup/rollup-darwin-x64": "4.59.0",
|
||||
"@rollup/rollup-freebsd-arm64": "4.59.0",
|
||||
"@rollup/rollup-freebsd-x64": "4.59.0",
|
||||
"@rollup/rollup-linux-arm-gnueabihf": "4.59.0",
|
||||
"@rollup/rollup-linux-arm-musleabihf": "4.59.0",
|
||||
"@rollup/rollup-linux-arm64-gnu": "4.59.0",
|
||||
"@rollup/rollup-linux-arm64-musl": "4.59.0",
|
||||
"@rollup/rollup-linux-loong64-gnu": "4.59.0",
|
||||
"@rollup/rollup-linux-loong64-musl": "4.59.0",
|
||||
"@rollup/rollup-linux-ppc64-gnu": "4.59.0",
|
||||
"@rollup/rollup-linux-ppc64-musl": "4.59.0",
|
||||
"@rollup/rollup-linux-riscv64-gnu": "4.59.0",
|
||||
"@rollup/rollup-linux-riscv64-musl": "4.59.0",
|
||||
"@rollup/rollup-linux-s390x-gnu": "4.59.0",
|
||||
"@rollup/rollup-linux-x64-gnu": "4.59.0",
|
||||
"@rollup/rollup-linux-x64-musl": "4.59.0",
|
||||
"@rollup/rollup-openbsd-x64": "4.59.0",
|
||||
"@rollup/rollup-openharmony-arm64": "4.59.0",
|
||||
"@rollup/rollup-win32-arm64-msvc": "4.59.0",
|
||||
"@rollup/rollup-win32-ia32-msvc": "4.59.0",
|
||||
"@rollup/rollup-win32-x64-gnu": "4.59.0",
|
||||
"@rollup/rollup-win32-x64-msvc": "4.59.0",
|
||||
"fsevents": "~2.3.2"
|
||||
}
|
||||
},
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
:scrollable="true"
|
||||
:filters="filters"
|
||||
:filterMode="filterMode"
|
||||
:globalFilterFields="['name', 'typeValue']"
|
||||
sortField="namespace"
|
||||
:sortOrder="1"
|
||||
removableSort
|
||||
|
||||
@@ -17,6 +17,9 @@
|
||||
import { useToast } from "primevue/usetoast";
|
||||
import { isGzipped, decompressGzip, readFileAsText } from "@/utils/fileUtils";
|
||||
|
||||
const VT_REANALYZE_SUGGESTION =
|
||||
"If this is a VirusTotal or similar link, the file may need to be reanalyzed. Try again later.";
|
||||
|
||||
export function useRdocLoader() {
|
||||
const toast = useToast();
|
||||
const MIN_SUPPORTED_VERSION = "7.0.0";
|
||||
@@ -32,6 +35,58 @@ export function useRdocLoader() {
|
||||
toast.add({ severity, summary, detail, life: 3000, group: "bc" }); // bc: bottom-center
|
||||
};
|
||||
|
||||
/**
|
||||
* Validates that the parsed object has the expected result document schema.
|
||||
* @param {Object} rdoc - The parsed JSON data.
|
||||
* @returns {{ valid: boolean, message?: string }} Validation result with an optional error message.
|
||||
*/
|
||||
const validateRdocSchema = (rdoc) => {
|
||||
const isInvalidObject = (v) => !v || typeof v !== "object" || Array.isArray(v);
|
||||
|
||||
if (isInvalidObject(rdoc)) {
|
||||
return { valid: false, message: "Invalid JSON: expected an object." };
|
||||
}
|
||||
if (isInvalidObject(rdoc.meta)) {
|
||||
return { valid: false, message: "Invalid result document: missing or invalid 'meta' field." };
|
||||
}
|
||||
if (rdoc.meta.version === undefined) {
|
||||
return { valid: false, message: "Invalid result document: missing 'meta.version'." };
|
||||
}
|
||||
if (isInvalidObject(rdoc.meta.analysis)) {
|
||||
return { valid: false, message: "Invalid result document: missing or invalid 'meta.analysis'." };
|
||||
}
|
||||
if (isInvalidObject(rdoc.meta.analysis.layout)) {
|
||||
return { valid: false, message: "Invalid result document: missing or invalid 'meta.analysis.layout'." };
|
||||
}
|
||||
if (isInvalidObject(rdoc.meta.analysis.feature_counts)) {
|
||||
return {
|
||||
valid: false,
|
||||
message: "Invalid result document: missing or invalid 'meta.analysis.feature_counts'."
|
||||
};
|
||||
}
|
||||
const fc = rdoc.meta.analysis.feature_counts;
|
||||
// Allow file-scoped-only documents (no functions/processes arrays).
|
||||
// If present, functions and processes must be arrays.
|
||||
if (fc.functions !== undefined && !Array.isArray(fc.functions)) {
|
||||
return {
|
||||
valid: false,
|
||||
message:
|
||||
"Invalid result document: 'meta.analysis.feature_counts.functions' must be an array when present."
|
||||
};
|
||||
}
|
||||
if (fc.processes !== undefined && !Array.isArray(fc.processes)) {
|
||||
return {
|
||||
valid: false,
|
||||
message:
|
||||
"Invalid result document: 'meta.analysis.feature_counts.processes' must be an array when present."
|
||||
};
|
||||
}
|
||||
if (isInvalidObject(rdoc.rules)) {
|
||||
return { valid: false, message: "Invalid result document: missing or invalid 'rules' field." };
|
||||
}
|
||||
return { valid: true };
|
||||
};
|
||||
|
||||
/**
|
||||
* Checks if the version of the loaded data is supported.
|
||||
* @param {Object} rdoc - The loaded JSON data containing version information.
|
||||
@@ -81,27 +136,41 @@ export function useRdocLoader() {
|
||||
* @returns {Promise<Object|null>} A promise that resolves to the processed RDOC data, or null if processing fails.
|
||||
*/
|
||||
const loadRdoc = async (source) => {
|
||||
const isUrl = typeof source === "string";
|
||||
|
||||
try {
|
||||
let data;
|
||||
|
||||
if (typeof source === "string") {
|
||||
// Load from URL
|
||||
if (isUrl) {
|
||||
const blob = await fetchFromUrl(source);
|
||||
data = await processBlob(blob);
|
||||
} else if (source instanceof File) {
|
||||
// Load from local
|
||||
data = await processBlob(source);
|
||||
} else {
|
||||
throw new Error("Invalid source type");
|
||||
}
|
||||
|
||||
const validation = validateRdocSchema(data);
|
||||
if (!validation.valid) {
|
||||
let detail = validation.message;
|
||||
if (isUrl) {
|
||||
detail += VT_REANALYZE_SUGGESTION;
|
||||
}
|
||||
showToast("error", "Invalid result document", detail);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (checkVersion(data)) {
|
||||
showToast("success", "Success", "JSON data loaded successfully");
|
||||
return data;
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("Error loading JSON:", error);
|
||||
showToast("error", "Failed to process the file", error.message);
|
||||
let detail = error.message;
|
||||
if (isUrl && (error instanceof SyntaxError || error.message.includes("JSON"))) {
|
||||
detail += VT_REANALYZE_SUGGESTION;
|
||||
}
|
||||
showToast("error", "Failed to process the file", detail);
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
@@ -322,8 +322,8 @@ export function parseFunctionCapabilities(doc) {
|
||||
});
|
||||
}
|
||||
|
||||
// Iterate through all functions in the document
|
||||
for (const f of doc.meta.analysis.feature_counts.functions) {
|
||||
// Iterate through all functions in the document (empty for file-scoped-only)
|
||||
for (const f of doc.meta.analysis.feature_counts.functions ?? []) {
|
||||
const addr = formatAddress(f.address);
|
||||
const matches = matchesByFunction.get(addr);
|
||||
// Skip functions with no matches (unlikely)
|
||||
|
||||
@@ -77,8 +77,8 @@ export function createATTACKHref(attack) {
|
||||
*/
|
||||
export function createCapaRulesUrl(node) {
|
||||
if (!node || !node.data) return null;
|
||||
const ruleName = node.data.name.toLowerCase().replace(/\s+/g, "-");
|
||||
return `https://mandiant.github.io/capa/rules/${ruleName}/`;
|
||||
const baseUrl = "https://mandiant.github.io/capa/rules/";
|
||||
return `${baseUrl}${encodeURIComponent(node.data.name)}/`;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user