Compare commits

..

2 Commits

Author SHA1 Message Date
Willi Ballenthin
036e157474 ida: use ida-domain api 2026-01-14 12:04:13 +01:00
Moritz
3919475728 Merge branch 'master' into idalib-tests 2026-01-14 12:04:13 +01:00
90 changed files with 1774 additions and 1511 deletions

View File

@@ -4,13 +4,6 @@ updates:
directory: "/"
schedule:
interval: "weekly"
groups:
vivisect:
patterns:
- "vivisect"
- "pyasn1"
- "pyasn1-modules"
- "msgpack"
ignore:
- dependency-name: "*"
update-types: ["version-update:semver-patch"]

2
.github/flake8.ini vendored
View File

@@ -33,6 +33,8 @@ per-file-ignores =
scripts/*: T201
# capa.exe is meant to print output
capa/main.py: T201
# IDA tests emit results to output window so need to print
tests/test_ida_features.py: T201
# utility used to find the Binary Ninja API via invoking python.exe
capa/features/extractors/binja/find_binja_api.py: T201

View File

@@ -63,9 +63,6 @@ ignore_missing_imports = True
[mypy-PyQt5.*]
ignore_missing_imports = True
[mypy-binaryninja]
ignore_missing_imports = True
[mypy-binaryninja.*]
ignore_missing_imports = True

View File

@@ -20,5 +20,3 @@ closes #issue_number
- [ ] No new tests needed
<!-- Please help us keeping capa documentation up-to-date -->
- [ ] No documentation update needed
<!-- Please indicate if and how you have used AI to generate (parts of) your code submission. Include your prompt, model, tool, etc. -->
- [ ] This submission includes AI-generated code and I have provided details in the description.

View File

@@ -17,8 +17,6 @@ import sys
import capa.rules.cache
from PyInstaller.utils.hooks import collect_submodules
from pathlib import Path
# SPECPATH is a global variable which points to .spec file path
@@ -36,7 +34,6 @@ a = Analysis(
["../../capa/main.py"],
pathex=["capa"],
binaries=None,
hiddenimports=collect_submodules('rich'),
datas=[
# when invoking pyinstaller from the project root,
# this gets invoked from the directory of the spec file,

View File

@@ -1,62 +0,0 @@
name: black auto-format
on:
pull_request:
branches: [ master ]
paths-ignore:
- 'web/**'
- 'doc/**'
- '**.md'
workflow_dispatch: # allow manual trigger
permissions:
contents: write
jobs:
black-format:
# only run on dependabot PRs or manual trigger
if: github.actor == 'dependabot[bot]' || github.event_name == 'workflow_dispatch'
runs-on: ubuntu-22.04
steps:
- name: Checkout repository
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
ref: ${{ github.head_ref }}
# need a token with write access to push the commit
token: ${{ secrets.GITHUB_TOKEN }}
- name: Set up Python 3.13
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
with:
python-version: "3.13"
- name: Install dependencies
run: |
pip install -r requirements.txt
pip install -e .[dev,scripts]
- name: Run isort
run: pre-commit run isort --all-files
- name: Run black/continue
# black returns non-zero error code after formatting, which is what we expect
continue-on-error: true
run: pre-commit run black --all-files
- name: Check for changes
id: changes
run: |
if git diff --quiet; then
echo "has_changes=false" >> "$GITHUB_OUTPUT"
else
echo "has_changes=true" >> "$GITHUB_OUTPUT"
fi
- name: Commit and push formatting changes
if: steps.changes.outputs.has_changes == 'true'
run: |
git config user.name "${GITHUB_ACTOR}"
git config user.email "${GITHUB_ACTOR_ID}+${GITHUB_ACTOR}@users.noreply.github.com"
git add -A
git commit -m "style: auto-format with black and isort"
git push

View File

@@ -167,7 +167,7 @@ jobs:
- name: Set zip name
run: echo "zip_name=capa-${GITHUB_REF#refs/tags/}-${{ matrix.asset_name }}.zip" >> $GITHUB_ENV
- name: Zip ${{ matrix.artifact_name }} into ${{ env.zip_name }}
run: zip ${ZIP_NAME} ${{ matrix.artifact_name }}
run: zip ${{ env.zip_name }} ${{ matrix.artifact_name }}
- name: Upload ${{ env.zip_name }} to GH Release
uses: svenstaro/upload-release-action@2728235f7dc9ff598bd86ce3c274b74f802d2208 # v2
with:

View File

@@ -14,8 +14,8 @@ jobs:
steps:
- name: Check out repository code
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
uses: actions/checkout@v4
- uses: pypa/gh-action-pip-audit@1220774d901786e6f652ae159f7b6bc8fea6d266 # v1.1.0
- uses: pypa/gh-action-pip-audit@v1.0.8
with:
inputs: .

View File

@@ -21,10 +21,8 @@ jobs:
# user information is needed to create annotated tags (with a message)
git config user.email 'capa-dev@mandiant.com'
git config user.name 'Capa Bot'
name=${GITHUB_EVENT_RELEASE_TAG_NAME}
name=${{ github.event.release.tag_name }}
git tag $name -m "https://github.com/mandiant/capa/releases/$name"
env:
GITHUB_EVENT_RELEASE_TAG_NAME: ${{ github.event.release.tag_name }}
# TODO update branch name-major=${name%%.*}
- name: Push tag to capa-rules
uses: ad-m/github-push-action@d91a481090679876dfc4178fef17f286781251df # v0.8.0

View File

@@ -113,11 +113,6 @@ jobs:
run: |
pip install -r requirements.txt
pip install -e .[dev,scripts]
- name: Cache vivisect workspaces
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
with:
path: tests/data/**/*.viv
key: viv-${{ runner.os }}-${{ runner.arch }}-${{ matrix.python-version }}-${{ hashFiles('**/requirements.txt') }}
- name: Run tests (fast)
# this set of tests runs about 80% of the cases in 20% of the time,
# and should catch most errors quickly.
@@ -160,7 +155,7 @@ jobs:
run: |
mkdir ./.github/binja
curl "https://raw.githubusercontent.com/Vector35/binaryninja-api/6812c97/scripts/download_headless.py" -o ./.github/binja/download_headless.py
python ./.github/binja/download_headless.py --serial ${BN_SERIAL} --output .github/binja/BinaryNinja-headless.zip
python ./.github/binja/download_headless.py --serial ${{ env.BN_SERIAL }} --output .github/binja/BinaryNinja-headless.zip
unzip .github/binja/BinaryNinja-headless.zip -d .github/binja/
python .github/binja/binaryninja/scripts/install_api.py --install-on-root --silent
- name: Run tests

View File

@@ -18,18 +18,14 @@ jobs:
- uses: actions/checkout@v4
- name: Set release name
run: echo "RELEASE_NAME=capa-explorer-web-v${GITHUB_EVENT_INPUTS_VERSION}-${GITHUB_SHA::7}" >> $GITHUB_ENV
env:
GITHUB_EVENT_INPUTS_VERSION: ${{ github.event.inputs.version }}
run: echo "RELEASE_NAME=capa-explorer-web-v${{ github.event.inputs.version }}-${GITHUB_SHA::7}" >> $GITHUB_ENV
- name: Check if release already exists
run: |
if ls web/explorer/releases/capa-explorer-web-v${GITHUB_EVENT_INPUTS_VERSION}-* 1> /dev/null 2>&1; then
echo "::error:: A release with version ${GITHUB_EVENT_INPUTS_VERSION} already exists"
if ls web/explorer/releases/capa-explorer-web-v${{ github.event.inputs.version }}-* 1> /dev/null 2>&1; then
echo "::error:: A release with version ${{ github.event.inputs.version }} already exists"
exit 1
fi
env:
GITHUB_EVENT_INPUTS_VERSION: ${{ github.event.inputs.version }}
- name: Set up Node.js
uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4
@@ -47,24 +43,24 @@ jobs:
working-directory: web/explorer
- name: Compress bundle
run: zip -r ${RELEASE_NAME}.zip capa-explorer-web
run: zip -r ${{ env.RELEASE_NAME }}.zip capa-explorer-web
working-directory: web/explorer
- name: Create releases directory
run: mkdir -vp web/explorer/releases
- name: Move release to releases folder
run: mv web/explorer/${RELEASE_NAME}.zip web/explorer/releases
run: mv web/explorer/${{ env.RELEASE_NAME }}.zip web/explorer/releases
- name: Compute release SHA256 hash
run: |
echo "RELEASE_SHA256=$(sha256sum web/explorer/releases/${RELEASE_NAME}.zip | awk '{print $1}')" >> $GITHUB_ENV
echo "RELEASE_SHA256=$(sha256sum web/explorer/releases/${{ env.RELEASE_NAME }}.zip | awk '{print $1}')" >> $GITHUB_ENV
- name: Update CHANGELOG.md
run: |
echo "## ${RELEASE_NAME}" >> web/explorer/releases/CHANGELOG.md
echo "## ${{ env.RELEASE_NAME }}" >> web/explorer/releases/CHANGELOG.md
echo "- Release Date: $(date -u '+%Y-%m-%d %H:%M:%S UTC')" >> web/explorer/releases/CHANGELOG.md
echo "- SHA256: ${RELEASE_SHA256}" >> web/explorer/releases/CHANGELOG.md
echo "- SHA256: ${{ env.RELEASE_SHA256 }}" >> web/explorer/releases/CHANGELOG.md
echo "" >> web/explorer/releases/CHANGELOG.md
cat web/explorer/releases/CHANGELOG.md
@@ -77,7 +73,7 @@ jobs:
run: |
git config --local user.email "capa-dev@mandiant.com"
git config --local user.name "Capa Bot"
git add -f web/explorer/releases/${RELEASE_NAME}.zip web/explorer/releases/CHANGELOG.md
git add -f web/explorer/releases/${{ env.RELEASE_NAME }}.zip web/explorer/releases/CHANGELOG.md
git add -u web/explorer/releases/
- name: Create Pull Request

View File

@@ -136,6 +136,7 @@ repos:
- "tests/"
- "--ignore=tests/test_binja_features.py"
- "--ignore=tests/test_ghidra_features.py"
- "--ignore=tests/test_ida_features.py"
- "--ignore=tests/test_viv_features.py"
- "--ignore=tests/test_idalib_features.py"
- "--ignore=tests/test_main.py"

View File

@@ -5,71 +5,29 @@
### New Features
- ghidra: support PyGhidra @mike-hunhoff #2788
- vmray: extract number features from whitelisted void_ptr parameters (hKey, hKeyRoot) @adeboyedn #2835
### Breaking Changes
### New Rules (23)
### New Rules (5)
- nursery/run-as-nodejs-native-module mehunhoff@google.com
- nursery/inject-shellcode-using-thread-pool-work-insertion-with-tp_io still@teamt5.org
- nursery/inject-shellcode-using-thread-pool-work-insertion-with-tp_timer still@teamt5.org
- nursery/inject-shellcode-using-thread-pool-work-insertion-with-tp_work still@teamt5.org
- data-manipulation/encryption/hc-256/encrypt-data-using-hc-256 wballenthin@hex-rays.com
- anti-analysis/anti-llm/terminate-anthropic-session-via-magic-strings wballenthin@hex-rays.com
- nursery/access-aws-credentials maximemorin@google.com
- nursery/access-cloudflare-credentials maximemorin@google.com
- nursery/access-docker-credentials maximemorin@google.com
- nursery/access-gcp-credentials maximemorin@google.com
- nursery/access-kubernetes-credentials maximemorin@google.com
- nursery/enumerate-aws-cloudformation maximemorin@google.com
- nursery/enumerate-aws-cloudtrail maximemorin@google.com
- nursery/enumerate-aws-direct-connect maximemorin@google.com
- nursery/enumerate-aws-ec2 maximemorin@google.com
- nursery/enumerate-aws-iam maximemorin@google.com
- nursery/enumerate-aws-s3 maximemorin@google.com
- nursery/enumerate-aws-support-cases maximemorin@google.com
- persistence/registry/persist-via-shellserviceobjectdelayload-registry-key xpzhxhm@gmail.com
- nursery/get-http-response-date @cosmoworker
- host-interaction/process/create/create-process-in-dotnet moritz.raabe@mandiant.com social.tarang@gmail.com
- nursery/read-file-in-dotnet moritz.raabe@mandiant.com anushka.virgaonkar@mandiant.com
- nursery/write-file-in-dotnet william.ballenthin@mandiant.com anushka.virgaonkar@mandiant.com
-
### Bug Fixes
- main: suggest --os flag in unsupported OS error message to help users override ELF OS detection @devs6186 #2577
- render: escape sample-controlled strings before passing to Rich to prevent MarkupError @devs6186 #2699
- rules: handle empty or invalid YAML documents gracefully in `Rule.from_yaml` and `get_rules` @devs6186 #2900
- Fixed insecure deserialization vulnerability in YAML loading @0x1622 (#2770)
- loader: gracefully handle ELF files with unsupported architectures kamranulhaq2002@gmail.com #2800
- loader: handle SegmentationViolation for malformed ELF files @kami922 #2799
- lint: disable rule caching during linting @Maijin #2817
- vmray: skip processes with invalid PID or missing filename @EclipseAditya #2807
- features: fix Regex.get_value_str() returning escaped pattern instead of raw regex @EclipseAditya #1909
- render: use default styling for dynamic -vv API/call details so they are easier to see @devs6186 #1865
- loader: handle struct.error from dnfile and show clear CorruptFile message @devs6186 #2442
- address: fix TypeError when sorting locations containing mixed address types @devs6186 #2195
- loader: skip PE files with unrealistically large section virtual sizes to prevent resource exhaustion @devs6186 #1989
### capa Explorer Web
- webui: fix 404 for "View rule in capa-rules" by using encodeURIComponent for rule name in URL @devs6186 #2482
- webui: show error when JSON does not follow expected result document schema; suggest reanalyzing for VT URLs @devs6186 #2363
- webui: fix global search to match feature types (match, regex, api, …) @devs6186 #2349
### capa Explorer IDA Pro plugin
### Performance
- perf: eliminate O(n²) tuple growth and reduce per-match overhead @devs6186 #2890
### Development
- doc: document that default output shows top-level matches only; -v/-vv show nested matches @devs6186 #1410
- doc: fix typo in usage.md, add documentation links to README @devs6186 #2274
- doc: add table comparing ways to consume capa output (CLI, IDA, Ghidra, dynamic sandbox, web) @devs6186 #2273
- binja: add mypy config for top-level binaryninja module to fix mypy issues @devs6186 #2399
- ci: deprecate macos-13 runner and use Python v3.13 for testing @mike-hunhoff #2777
- ci: pin pip-audit action SHAs and update to v1.1.0 @kami922 #1131
### Raw diffs
- [capa v9.3.1...master](https://github.com/mandiant/capa/compare/v9.3.1...master)

View File

@@ -87,8 +87,6 @@ Download stable releases of the standalone capa binaries [here](https://github.c
To use capa as a library or integrate with another tool, see [doc/installation.md](https://github.com/mandiant/capa/blob/master/doc/installation.md) for further setup instructions.
**Documentation:** [Usage and tips](doc/usage.md) · [Installation](doc/installation.md) · [Limitations](doc/limitations.md) · [FAQ](doc/faq.md)
# capa Explorer Web
The [capa Explorer Web](https://mandiant.github.io/capa/explorer/) enables you to interactively explore capa results in your web browser. Besides the online version you can download a standalone HTML file for local offline usage.

View File

@@ -277,9 +277,7 @@ def find_dynamic_capabilities(
all_span_matches: MatchResults = collections.defaultdict(list)
all_call_matches: MatchResults = collections.defaultdict(list)
# Accumulate into a list to avoid O(n²) tuple concatenation.
# Tuples are immutable, so `t += (x,)` copies the entire tuple each time.
process_feature_counts: list[rdoc.ProcessFeatureCount] = []
feature_counts = rdoc.DynamicFeatureCounts(file=0, processes=())
assert isinstance(extractor, DynamicFeatureExtractor)
processes: list[ProcessHandle] = list(extractor.get_processes())
@@ -291,10 +289,10 @@ def find_dynamic_capabilities(
task = pbar.add_task("matching", total=n_processes, unit="processes")
for p in processes:
process_capabilities = find_process_capabilities(ruleset, extractor, p)
process_feature_counts.append(
feature_counts.processes += (
rdoc.ProcessFeatureCount(
address=frz.Address.from_capa(p.address), count=process_capabilities.feature_count
)
),
)
for rule_name, res in process_capabilities.process_matches.items():
@@ -319,11 +317,7 @@ def find_dynamic_capabilities(
capa.engine.index_rule_matches(process_and_lower_features, rule, locations)
all_file_capabilities = find_file_capabilities(ruleset, extractor, process_and_lower_features)
feature_counts = rdoc.DynamicFeatureCounts(
file=all_file_capabilities.feature_count,
processes=tuple(process_feature_counts),
)
feature_counts.file = all_file_capabilities.feature_count
matches = dict(
itertools.chain(

View File

@@ -156,11 +156,8 @@ def find_static_capabilities(
all_bb_matches: MatchResults = collections.defaultdict(list)
all_insn_matches: MatchResults = collections.defaultdict(list)
# Accumulate into lists to avoid O(n²) tuple concatenation.
# Tuples are immutable, so `t += (x,)` copies the entire tuple each time.
# For binaries with thousands of functions this becomes quadratic in memory work.
function_feature_counts: list[rdoc.FunctionFeatureCount] = []
library_functions_list: list[rdoc.LibraryFunction] = []
feature_counts = rdoc.StaticFeatureCounts(file=0, functions=())
library_functions: tuple[rdoc.LibraryFunction, ...] = ()
assert isinstance(extractor, StaticFeatureExtractor)
functions: list[FunctionHandle] = list(extractor.get_functions())
@@ -179,20 +176,20 @@ def find_static_capabilities(
if extractor.is_library_function(f.address):
function_name = extractor.get_function_name(f.address)
logger.debug("skipping library function 0x%x (%s)", f.address, function_name)
library_functions_list.append(
rdoc.LibraryFunction(address=frz.Address.from_capa(f.address), name=function_name)
library_functions += (
rdoc.LibraryFunction(address=frz.Address.from_capa(f.address), name=function_name),
)
n_libs = len(library_functions_list)
n_libs = len(library_functions)
percentage = round(100 * (n_libs / n_funcs))
pbar.update(task, postfix=f"skipped {n_libs} library functions, {percentage}%")
pbar.advance(task)
continue
code_capabilities = find_code_capabilities(ruleset, extractor, f)
function_feature_counts.append(
feature_counts.functions += (
rdoc.FunctionFeatureCount(
address=frz.Address.from_capa(f.address), count=code_capabilities.feature_count
)
),
)
t1 = time.time()
@@ -233,11 +230,7 @@ def find_static_capabilities(
capa.engine.index_rule_matches(function_and_lower_features, rule, locations)
all_file_capabilities = find_file_capabilities(ruleset, extractor, function_and_lower_features)
feature_counts = rdoc.StaticFeatureCounts(
file=all_file_capabilities.feature_count,
functions=tuple(function_feature_counts),
)
feature_counts.file = all_file_capabilities.feature_count
matches: MatchResults = dict(
itertools.chain(
@@ -251,4 +244,4 @@ def find_static_capabilities(
)
)
return Capabilities(matches, feature_counts, tuple(library_functions_list))
return Capabilities(matches, feature_counts, library_functions)

View File

@@ -189,11 +189,6 @@ class _NoAddress(Address):
def __lt__(self, other):
return False
def __gt__(self, other):
# Mixed-type comparison: (real_address < NO_ADDRESS) invokes this so sort works.
# NoAddress sorts last.
return other is not self
def __hash__(self):
return hash(0)

View File

@@ -369,12 +369,6 @@ class Regex(String):
else:
return Result(False, _MatchedRegex(self, {}), [])
def get_value_str(self) -> str:
# return the raw regex pattern, not the escaped version from String.get_value_str().
# see #1909.
assert isinstance(self.value, str)
return self.value
def __str__(self):
assert isinstance(self.value, str)
return f"regex(string =~ {self.value})"

View File

@@ -20,7 +20,6 @@ Proto files generated via protobuf v24.4:
from BinExport2 at 6916731d5f6693c4a4f0a052501fd3bd92cfd08b
https://github.com/google/binexport/blob/6916731/binexport2.proto
"""
import io
import hashlib
import logging

View File

@@ -84,14 +84,16 @@ def extract_insn_number_features(
yield OperandOffset(i, value), ih.address
OFFSET_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
OFFSET_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
"""
ldr|ldrb|ldrh|ldrsb|ldrsh|ldrex|ldrd|str|strb|strh|strex|strd reg, [reg(not-stack), #int] ; capture #int
ldr|ldrb|ldrh|ldrsb|ldrsh|ldrex|ldrd|str|strb|strh|strex|strd reg, [reg(not-stack), #int]! ; capture #int
ldr|ldrb|ldrh|ldrsb|ldrsh|ldrex|ldrd|str|strb|strh|strex|strd reg, [reg(not-stack)], #int ; capture #int
ldp|ldpd|stp|stpd reg, reg, [reg(not-stack), #int] ; capture #int
ldp|ldpd|stp|stpd reg, reg, [reg(not-stack), #int]! ; capture #int
ldp|ldpd|stp|stpd reg, reg, [reg(not-stack)], #int ; capture #int
""")
"""
)
def extract_insn_offset_features(
@@ -115,10 +117,12 @@ def extract_insn_offset_features(
yield OperandOffset(match.operand_index, value), ih.address
NZXOR_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
NZXOR_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
"""
eor reg, reg, reg
eor reg, reg, #int
""")
"""
)
def extract_insn_nzxor_characteristic_features(
@@ -140,9 +144,11 @@ def extract_insn_nzxor_characteristic_features(
yield Characteristic("nzxor"), ih.address
INDIRECT_CALL_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
INDIRECT_CALL_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
"""
blx|bx|blr reg
""")
"""
)
def extract_function_indirect_call_characteristic_features(

View File

@@ -34,14 +34,17 @@ from capa.features.extractors.binexport2.arch.intel.helpers import SECURITY_COOK
logger = logging.getLogger(__name__)
IGNORE_NUMBER_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
IGNORE_NUMBER_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
"""
ret #int
retn #int
add reg(stack), #int
sub reg(stack), #int
""")
"""
)
NUMBER_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
NUMBER_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
"""
push #int0 ; capture #int0
# its a little tedious to enumerate all the address forms
@@ -61,7 +64,8 @@ NUMBER_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
# imagine reg is zero'd out, then this is like `mov reg, #int`
# which is not uncommon.
lea reg, [reg + #int] ; capture #int
""")
"""
)
def extract_insn_number_features(
@@ -96,7 +100,8 @@ def extract_insn_number_features(
yield OperandOffset(match.operand_index, value), ih.address
OFFSET_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
OFFSET_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
"""
mov|movzx|movsb|cmp [reg + reg * #int + #int0], #int ; capture #int0
mov|movzx|movsb|cmp [reg * #int + #int0], #int ; capture #int0
mov|movzx|movsb|cmp [reg + reg + #int0], #int ; capture #int0
@@ -109,15 +114,18 @@ OFFSET_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
mov|movzx|movsb|cmp|lea reg, [reg * #int + #int0] ; capture #int0
mov|movzx|movsb|cmp|lea reg, [reg + reg + #int0] ; capture #int0
mov|movzx|movsb|cmp|lea reg, [reg(not-stack) + #int0] ; capture #int0
""")
"""
)
# these are patterns that access offset 0 from some pointer
# (pointer is not the stack pointer).
OFFSET_ZERO_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
OFFSET_ZERO_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
"""
mov|movzx|movsb [reg(not-stack)], reg
mov|movzx|movsb [reg(not-stack)], #int
lea reg, [reg(not-stack)]
""")
"""
)
def extract_insn_offset_features(
@@ -181,10 +189,12 @@ def is_security_cookie(
return False
NZXOR_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
NZXOR_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
"""
xor|xorpd|xorps|pxor reg, reg
xor|xorpd|xorps|pxor reg, #int
""")
"""
)
def extract_insn_nzxor_characteristic_features(
@@ -218,7 +228,8 @@ def extract_insn_nzxor_characteristic_features(
yield Characteristic("nzxor"), ih.address
INDIRECT_CALL_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
INDIRECT_CALL_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
"""
call|jmp reg0
call|jmp [reg + reg * #int + #int]
call|jmp [reg + reg * #int]
@@ -226,7 +237,8 @@ INDIRECT_CALL_PATTERNS = BinExport2InstructionPatternMatcher.from_str("""
call|jmp [reg + reg + #int]
call|jmp [reg + #int]
call|jmp [reg]
""")
"""
)
def extract_function_indirect_call_characteristic_features(

View File

@@ -35,7 +35,7 @@ from capa.features.extractors.base_extractor import (
logger = logging.getLogger(__name__)
TESTED_VERSIONS = {"2.2-CAPE", "2.4-CAPE", "2.5-CAPE"}
TESTED_VERSIONS = {"2.2-CAPE", "2.4-CAPE"}
class CapeExtractor(DynamicFeatureExtractor):

View File

@@ -27,12 +27,7 @@ import capa.features.extractors.dnfile.file
import capa.features.extractors.dnfile.insn
import capa.features.extractors.dnfile.function
from capa.features.common import Feature
from capa.features.address import (
NO_ADDRESS,
Address,
DNTokenAddress,
DNTokenOffsetAddress,
)
from capa.features.address import NO_ADDRESS, Address, DNTokenAddress, DNTokenOffsetAddress
from capa.features.extractors.dnfile.types import DnType, DnUnmanagedMethod
from capa.features.extractors.base_extractor import (
BBHandle,
@@ -44,7 +39,6 @@ from capa.features.extractors.base_extractor import (
from capa.features.extractors.dnfile.helpers import (
get_dotnet_types,
get_dotnet_fields,
load_dotnet_image,
get_dotnet_managed_imports,
get_dotnet_managed_methods,
get_dotnet_unmanaged_imports,
@@ -89,7 +83,7 @@ class DnFileFeatureExtractorCache:
class DnfileFeatureExtractor(StaticFeatureExtractor):
def __init__(self, path: Path):
self.pe = load_dotnet_image(path)
self.pe: dnfile.dnPE = dnfile.dnPE(str(path))
super().__init__(hashes=SampleHashes.from_bytes(path.read_bytes()))
# pre-compute .NET token lookup tables; each .NET method has access to this cache for feature extraction
@@ -118,12 +112,7 @@ class DnfileFeatureExtractor(StaticFeatureExtractor):
fh: FunctionHandle = FunctionHandle(
address=DNTokenAddress(token),
inner=method,
ctx={
"pe": self.pe,
"calls_from": set(),
"calls_to": set(),
"cache": self.token_cache,
},
ctx={"pe": self.pe, "calls_from": set(), "calls_to": set(), "cache": self.token_cache},
)
# method tokens should be unique

View File

@@ -15,10 +15,8 @@
from __future__ import annotations
import struct
import logging
from typing import Union, Iterator, Optional
from pathlib import Path
import dnfile
from dncil.cil.body import CilMethodBody
@@ -32,16 +30,6 @@ from capa.features.extractors.dnfile.types import DnType, DnUnmanagedMethod
logger = logging.getLogger(__name__)
def load_dotnet_image(path: Path) -> dnfile.dnPE:
"""load a .NET PE file, raising CorruptFile on struct.error with the original error message."""
try:
return dnfile.dnPE(str(path))
except struct.error as e:
from capa.loader import CorruptFile
raise CorruptFile(f"Invalid or truncated .NET metadata: {e}") from e
class DnfileMethodBodyReader(CilMethodBodyReaderBase):
def __init__(self, pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow):
self.pe: dnfile.dnPE = pe
@@ -163,9 +151,7 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]:
)
def get_dotnet_methoddef_property_accessors(
pe: dnfile.dnPE,
) -> Iterator[tuple[int, str]]:
def get_dotnet_methoddef_property_accessors(pe: dnfile.dnPE) -> Iterator[tuple[int, str]]:
"""get MethodDef methods used to access properties
see https://www.ntcore.com/files/dotnetformat.htm
@@ -240,13 +226,7 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]:
typedefnamespace, typedefname = resolve_nested_typedef_name(nested_class_table, rid, typedef, pe)
yield DnType(
token,
typedefname,
namespace=typedefnamespace,
member=method_name,
access=access,
)
yield DnType(token, typedefname, namespace=typedefnamespace, member=method_name, access=access)
def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]:
@@ -279,9 +259,7 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]:
yield DnType(token, typedefname, namespace=typedefnamespace, member=field.row.Name)
def get_dotnet_managed_method_bodies(
pe: dnfile.dnPE,
) -> Iterator[tuple[int, CilMethodBody]]:
def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[tuple[int, CilMethodBody]]:
"""get managed methods from MethodDef table"""
for rid, method_def in iter_dotnet_table(pe, dnfile.mdtable.MethodDef.number):
assert isinstance(method_def, dnfile.mdtable.MethodDefRow)
@@ -360,10 +338,7 @@ def get_dotnet_table_row(pe: dnfile.dnPE, table_index: int, row_index: int) -> O
def resolve_nested_typedef_name(
nested_class_table: dict,
index: int,
typedef: dnfile.mdtable.TypeDefRow,
pe: dnfile.dnPE,
nested_class_table: dict, index: int, typedef: dnfile.mdtable.TypeDefRow, pe: dnfile.dnPE
) -> tuple[str, tuple[str, ...]]:
"""Resolves all nested TypeDef class names. Returns the namespace as a str and the nested TypeRef name as a tuple"""

View File

@@ -42,7 +42,6 @@ from capa.features.extractors.dnfile.types import DnType
from capa.features.extractors.base_extractor import SampleHashes, StaticFeatureExtractor
from capa.features.extractors.dnfile.helpers import (
iter_dotnet_table,
load_dotnet_image,
is_dotnet_mixed_mode,
get_dotnet_managed_imports,
get_dotnet_managed_methods,
@@ -185,8 +184,8 @@ GLOBAL_HANDLERS = (
class DotnetFileFeatureExtractor(StaticFeatureExtractor):
def __init__(self, path: Path):
super().__init__(hashes=SampleHashes.from_bytes(path.read_bytes()))
self.path = path
self.pe = load_dotnet_image(path)
self.path: Path = path
self.pe: dnfile.dnPE = dnfile.dnPE(str(path))
def get_base_address(self):
return NO_ADDRESS
@@ -218,10 +217,7 @@ class DotnetFileFeatureExtractor(StaticFeatureExtractor):
assert self.pe.net.struct.MajorRuntimeVersion is not None
assert self.pe.net.struct.MinorRuntimeVersion is not None
return (
self.pe.net.struct.MajorRuntimeVersion,
self.pe.net.struct.MinorRuntimeVersion,
)
return self.pe.net.struct.MajorRuntimeVersion, self.pe.net.struct.MinorRuntimeVersion
def get_meta_version_string(self) -> str:
assert self.pe.net is not None

View File

@@ -18,6 +18,7 @@ import struct
from typing import Iterator
import idaapi
from ida_domain import Database
import capa.features.extractors.ida.helpers
from capa.features.common import Feature, Characteristic
@@ -59,7 +60,7 @@ def get_printable_len(op: idaapi.op_t) -> int:
return 0
def is_mov_imm_to_stack(insn: idaapi.insn_t) -> bool:
def is_mov_imm_to_stack(db: Database, insn: idaapi.insn_t) -> bool:
"""verify instruction moves immediate onto stack"""
if insn.Op2.type != idaapi.o_imm:
return False
@@ -67,42 +68,43 @@ def is_mov_imm_to_stack(insn: idaapi.insn_t) -> bool:
if not helpers.is_op_stack_var(insn.ea, 0):
return False
if not insn.get_canon_mnem().startswith("mov"):
mnem = db.instructions.get_mnemonic(insn)
if not mnem.startswith("mov"):
return False
return True
def bb_contains_stackstring(f: idaapi.func_t, bb: idaapi.BasicBlock) -> bool:
def bb_contains_stackstring(db: Database, f: idaapi.func_t, bb: idaapi.BasicBlock) -> bool:
"""check basic block for stackstring indicators
true if basic block contains enough moves of constant bytes to the stack
"""
count = 0
for insn in capa.features.extractors.ida.helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
if is_mov_imm_to_stack(insn):
for insn in capa.features.extractors.ida.helpers.get_instructions_in_range(db, bb.start_ea, bb.end_ea):
if is_mov_imm_to_stack(db, insn):
count += get_printable_len(insn.Op2)
if count > MIN_STACKSTRING_LEN:
return True
return False
def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
def extract_bb_stackstring(db: Database, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
"""extract stackstring indicators from basic block"""
if bb_contains_stackstring(fh.inner, bbh.inner):
if bb_contains_stackstring(db, fh.inner, bbh.inner):
yield Characteristic("stack string"), bbh.address
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
def extract_bb_tight_loop(db: Database, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
"""extract tight loop indicators from a basic block"""
if capa.features.extractors.ida.helpers.is_basic_block_tight_loop(bbh.inner):
if capa.features.extractors.ida.helpers.is_basic_block_tight_loop(db, bbh.inner):
yield Characteristic("tight loop"), bbh.address
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
def extract_features(db: Database, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
"""extract basic block features"""
for bb_handler in BASIC_BLOCK_HANDLERS:
for feature, addr in bb_handler(fh, bbh):
for feature, addr in bb_handler(db, fh, bbh):
yield feature, addr
yield BasicBlock(), bbh.address

View File

@@ -13,8 +13,9 @@
# limitations under the License.
from typing import Iterator
from pathlib import Path
import idaapi
from ida_domain import Database
import capa.ida.helpers
import capa.features.extractors.elf
@@ -35,56 +36,68 @@ from capa.features.extractors.base_extractor import (
class IdaFeatureExtractor(StaticFeatureExtractor):
def __init__(self):
def __init__(self, db: Database):
self.db = db
super().__init__(
hashes=SampleHashes(
md5=capa.ida.helpers.retrieve_input_file_md5(),
md5=db.md5,
sha1="(unknown)",
sha256=capa.ida.helpers.retrieve_input_file_sha256(),
sha256=db.sha256,
)
)
self.global_features: list[tuple[Feature, Address]] = []
self.global_features.extend(capa.features.extractors.ida.file.extract_file_format())
self.global_features.extend(capa.features.extractors.ida.global_.extract_os())
self.global_features.extend(capa.features.extractors.ida.global_.extract_arch())
self.global_features.extend(capa.features.extractors.ida.file.extract_file_format(self.db))
self.global_features.extend(capa.features.extractors.ida.global_.extract_os(self.db))
self.global_features.extend(capa.features.extractors.ida.global_.extract_arch(self.db))
@classmethod
def from_current_database(cls) -> "IdaFeatureExtractor":
"""Create extractor for interactive IDA GUI use."""
db = Database.open()
return cls(db)
@classmethod
def from_file(cls, path: Path) -> "IdaFeatureExtractor":
"""Create extractor for idalib/headless use."""
db = Database.open(str(path))
return cls(db)
def get_base_address(self):
return AbsoluteVirtualAddress(idaapi.get_imagebase())
return AbsoluteVirtualAddress(self.db.base_address)
def extract_global_features(self):
yield from self.global_features
def extract_file_features(self):
yield from capa.features.extractors.ida.file.extract_features()
yield from capa.features.extractors.ida.file.extract_features(self.db)
def get_functions(self) -> Iterator[FunctionHandle]:
import capa.features.extractors.ida.helpers as ida_helpers
# ignore library functions and thunk functions as identified by IDA
yield from ida_helpers.get_functions(skip_thunks=True, skip_libs=True)
yield from ida_helpers.get_functions(self.db, skip_thunks=True, skip_libs=True)
@staticmethod
def get_function(ea: int) -> FunctionHandle:
f = idaapi.get_func(ea)
def get_function(self, ea: int) -> FunctionHandle:
f = self.db.functions.get_at(ea)
return FunctionHandle(address=AbsoluteVirtualAddress(f.start_ea), inner=f)
def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
yield from capa.features.extractors.ida.function.extract_features(fh)
yield from capa.features.extractors.ida.function.extract_features(self.db, fh)
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
import capa.features.extractors.ida.helpers as ida_helpers
for bb in ida_helpers.get_function_blocks(fh.inner):
for bb in ida_helpers.get_function_blocks(self.db, fh.inner):
yield BBHandle(address=AbsoluteVirtualAddress(bb.start_ea), inner=bb)
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
yield from capa.features.extractors.ida.basicblock.extract_features(fh, bbh)
yield from capa.features.extractors.ida.basicblock.extract_features(self.db, fh, bbh)
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
import capa.features.extractors.ida.helpers as ida_helpers
for insn in ida_helpers.get_instructions_in_range(bbh.inner.start_ea, bbh.inner.end_ea):
for insn in ida_helpers.get_instructions_in_range(self.db, bbh.inner.start_ea, bbh.inner.end_ea):
yield InsnHandle(address=AbsoluteVirtualAddress(insn.ea), inner=insn)
def extract_insn_features(self, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle):
yield from capa.features.extractors.ida.insn.extract_features(fh, bbh, ih)
yield from capa.features.extractors.ida.insn.extract_features(self.db, fh, bbh, ih)

View File

@@ -16,10 +16,9 @@
import struct
from typing import Iterator
import idc
import idaapi
import idautils
import ida_entry
from ida_domain import Database
from ida_domain.functions import FunctionFlags
import capa.ida.helpers
import capa.features.extractors.common
@@ -33,7 +32,7 @@ from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, Absolu
MAX_OFFSET_PE_AFTER_MZ = 0x200
def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[tuple[int, int]]:
def check_segment_for_pe(db: Database, seg) -> Iterator[tuple[int, int]]:
"""check segment for embedded PE
adapted for IDA from:
@@ -51,8 +50,7 @@ def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[tuple[int, int]]:
todo = []
for mzx, pex, i in mz_xor:
# find all segment offsets containing XOR'd "MZ" bytes
for off in capa.features.extractors.ida.helpers.find_byte_sequence(seg.start_ea, seg.end_ea, mzx):
for off in capa.features.extractors.ida.helpers.find_byte_sequence(db, seg.start_ea, seg.end_ea, mzx):
todo.append((off, mzx, pex, i))
while len(todo):
@@ -64,9 +62,11 @@ def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[tuple[int, int]]:
if seg_max < (e_lfanew + 4):
continue
newoff = struct.unpack("<I", capa.features.extractors.helpers.xor_static(idc.get_bytes(e_lfanew, 4), i))[0]
raw_bytes = db.bytes.get_bytes_at(e_lfanew, 4)
if not raw_bytes:
continue
newoff = struct.unpack("<I", capa.features.extractors.helpers.xor_static(raw_bytes, i))[0]
# assume XOR'd "PE" bytes exist within threshold
if newoff > MAX_OFFSET_PE_AFTER_MZ:
continue
@@ -74,35 +74,35 @@ def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[tuple[int, int]]:
if seg_max < (peoff + 2):
continue
if idc.get_bytes(peoff, 2) == pex:
pe_bytes = db.bytes.get_bytes_at(peoff, 2)
if pe_bytes == pex:
yield off, i
def extract_file_embedded_pe() -> Iterator[tuple[Feature, Address]]:
def extract_file_embedded_pe(db: Database) -> Iterator[tuple[Feature, Address]]:
"""extract embedded PE features
IDA must load resource sections for this to be complete
- '-R' from console
- Check 'Load resource sections' when opening binary in IDA manually
"""
for seg in capa.features.extractors.ida.helpers.get_segments(skip_header_segments=True):
for ea, _ in check_segment_for_pe(seg):
for seg in capa.features.extractors.ida.helpers.get_segments(db, skip_header_segments=True):
for ea, _ in check_segment_for_pe(db, seg):
yield Characteristic("embedded pe"), FileOffsetAddress(ea)
def extract_file_export_names() -> Iterator[tuple[Feature, Address]]:
def extract_file_export_names(db: Database) -> Iterator[tuple[Feature, Address]]:
"""extract function exports"""
for _, ordinal, ea, name in idautils.Entries():
forwarded_name = ida_entry.get_entry_forwarder(ordinal)
if forwarded_name is None:
yield Export(name), AbsoluteVirtualAddress(ea)
for entry in db.entries.get_all():
if entry.has_forwarder():
forwarded_name = capa.features.extractors.helpers.reformat_forwarded_export_name(entry.forwarder_name)
yield Export(forwarded_name), AbsoluteVirtualAddress(entry.address)
yield Characteristic("forwarded export"), AbsoluteVirtualAddress(entry.address)
else:
forwarded_name = capa.features.extractors.helpers.reformat_forwarded_export_name(forwarded_name)
yield Export(forwarded_name), AbsoluteVirtualAddress(ea)
yield Characteristic("forwarded export"), AbsoluteVirtualAddress(ea)
yield Export(entry.name), AbsoluteVirtualAddress(entry.address)
def extract_file_import_names() -> Iterator[tuple[Feature, Address]]:
def extract_file_import_names(db: Database) -> Iterator[tuple[Feature, Address]]:
"""extract function imports
1. imports by ordinal:
@@ -113,7 +113,7 @@ def extract_file_import_names() -> Iterator[tuple[Feature, Address]]:
- modulename.importname
- importname
"""
for ea, info in capa.features.extractors.ida.helpers.get_file_imports().items():
for ea, info in capa.features.extractors.ida.helpers.get_file_imports(db).items():
addr = AbsoluteVirtualAddress(ea)
if info[1] and info[2]:
# e.g. in mimikatz: ('cabinet', 'FCIAddFile', 11L)
@@ -134,30 +134,31 @@ def extract_file_import_names() -> Iterator[tuple[Feature, Address]]:
for name in capa.features.extractors.helpers.generate_symbols(dll, symbol, include_dll=True):
yield Import(name), addr
for ea, info in capa.features.extractors.ida.helpers.get_file_externs().items():
for ea, info in capa.features.extractors.ida.helpers.get_file_externs(db).items():
yield Import(info[1]), AbsoluteVirtualAddress(ea)
def extract_file_section_names() -> Iterator[tuple[Feature, Address]]:
def extract_file_section_names(db: Database) -> Iterator[tuple[Feature, Address]]:
"""extract section names
IDA must load resource sections for this to be complete
- '-R' from console
- Check 'Load resource sections' when opening binary in IDA manually
"""
for seg in capa.features.extractors.ida.helpers.get_segments(skip_header_segments=True):
yield Section(idaapi.get_segm_name(seg)), AbsoluteVirtualAddress(seg.start_ea)
for seg in capa.features.extractors.ida.helpers.get_segments(db, skip_header_segments=True):
name = db.segments.get_name(seg)
yield Section(name), AbsoluteVirtualAddress(seg.start_ea)
def extract_file_strings() -> Iterator[tuple[Feature, Address]]:
def extract_file_strings(db: Database) -> Iterator[tuple[Feature, Address]]:
"""extract ASCII and UTF-16 LE strings
IDA must load resource sections for this to be complete
- '-R' from console
- Check 'Load resource sections' when opening binary in IDA manually
"""
for seg in capa.features.extractors.ida.helpers.get_segments():
seg_buff = capa.features.extractors.ida.helpers.get_segment_buffer(seg)
for seg in capa.features.extractors.ida.helpers.get_segments(db):
seg_buff = capa.features.extractors.ida.helpers.get_segment_buffer(db, seg)
# differing to common string extractor factor in segment offset here
for s in capa.features.extractors.strings.extract_ascii_strings(seg_buff):
@@ -167,41 +168,40 @@ def extract_file_strings() -> Iterator[tuple[Feature, Address]]:
yield String(s.s), FileOffsetAddress(seg.start_ea + s.offset)
def extract_file_function_names() -> Iterator[tuple[Feature, Address]]:
"""
extract the names of statically-linked library functions.
"""
for ea in idautils.Functions():
addr = AbsoluteVirtualAddress(ea)
if idaapi.get_func(ea).flags & idaapi.FUNC_LIB:
name = idaapi.get_name(ea)
yield FunctionName(name), addr
if name.startswith("_"):
# some linkers may prefix linked routines with a `_` to avoid name collisions.
# extract features for both the mangled and un-mangled representations.
# e.g. `_fwrite` -> `fwrite`
# see: https://stackoverflow.com/a/2628384/87207
yield FunctionName(name[1:]), addr
def extract_file_function_names(db: Database) -> Iterator[tuple[Feature, Address]]:
"""extract the names of statically-linked library functions."""
for f in db.functions.get_all():
flags = db.functions.get_flags(f)
if flags & FunctionFlags.LIB:
addr = AbsoluteVirtualAddress(f.start_ea)
name = db.names.get_at(f.start_ea)
if name:
yield FunctionName(name), addr
if name.startswith("_"):
# some linkers may prefix linked routines with a `_` to avoid name collisions.
# extract features for both the mangled and un-mangled representations.
# e.g. `_fwrite` -> `fwrite`
# see: https://stackoverflow.com/a/2628384/87207
yield FunctionName(name[1:]), addr
def extract_file_format() -> Iterator[tuple[Feature, Address]]:
filetype = capa.ida.helpers.get_filetype()
def extract_file_format(db: Database) -> Iterator[tuple[Feature, Address]]:
format_name = db.format
if filetype in (idaapi.f_PE, idaapi.f_COFF):
if "PE" in format_name or "COFF" in format_name:
yield Format(FORMAT_PE), NO_ADDRESS
elif filetype == idaapi.f_ELF:
elif "ELF" in format_name:
yield Format(FORMAT_ELF), NO_ADDRESS
elif filetype == idaapi.f_BIN:
# no file type to return when processing a binary file, but we want to continue processing
elif "Binary" in format_name:
return
else:
raise NotImplementedError(f"unexpected file format: {filetype}")
raise NotImplementedError(f"unexpected file format: {format_name}")
def extract_features() -> Iterator[tuple[Feature, Address]]:
def extract_features(db: Database) -> Iterator[tuple[Feature, Address]]:
"""extract file features"""
for file_handler in FILE_HANDLERS:
for feature, addr in file_handler():
for feature, addr in file_handler(db):
yield feature, addr

View File

@@ -15,7 +15,7 @@
from typing import Iterator
import idaapi
import idautils
from ida_domain import Database
import capa.features.extractors.ida.helpers
from capa.features.file import FunctionName
@@ -25,19 +25,20 @@ from capa.features.extractors import loops
from capa.features.extractors.base_extractor import FunctionHandle
def extract_function_calls_to(fh: FunctionHandle):
def extract_function_calls_to(db: Database, fh: FunctionHandle):
"""extract callers to a function"""
for ea in idautils.CodeRefsTo(fh.inner.start_ea, True):
for ea in db.xrefs.code_refs_to_ea(fh.inner.start_ea):
yield Characteristic("calls to"), AbsoluteVirtualAddress(ea)
def extract_function_loop(fh: FunctionHandle):
def extract_function_loop(db: Database, fh: FunctionHandle):
"""extract loop indicators from a function"""
f: idaapi.func_t = fh.inner
edges = []
# construct control flow graph
for bb in idaapi.FlowChart(f):
flowchart = db.functions.get_flowchart(f)
for bb in flowchart:
for succ in bb.succs():
edges.append((bb.start_ea, succ.start_ea))
@@ -45,16 +46,16 @@ def extract_function_loop(fh: FunctionHandle):
yield Characteristic("loop"), fh.address
def extract_recursive_call(fh: FunctionHandle):
def extract_recursive_call(db: Database, fh: FunctionHandle):
"""extract recursive function call"""
if capa.features.extractors.ida.helpers.is_function_recursive(fh.inner):
if capa.features.extractors.ida.helpers.is_function_recursive(db, fh.inner):
yield Characteristic("recursive call"), fh.address
def extract_function_name(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
def extract_function_name(db: Database, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
ea = fh.inner.start_ea
name = idaapi.get_name(ea)
if name.startswith("sub_"):
name = db.names.get_at(ea)
if not name or name.startswith("sub_"):
# skip default names, like "sub_401000"
return
@@ -67,16 +68,15 @@ def extract_function_name(fh: FunctionHandle) -> Iterator[tuple[Feature, Address
yield FunctionName(name[1:]), fh.address
def extract_function_alternative_names(fh: FunctionHandle):
def extract_function_alternative_names(db: Database, fh: FunctionHandle):
"""Get all alternative names for an address."""
for aname in capa.features.extractors.ida.helpers.get_function_alternative_names(fh.inner.start_ea):
for aname in capa.features.extractors.ida.helpers.get_function_alternative_names(db, fh.inner.start_ea):
yield FunctionName(aname), fh.address
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
def extract_features(db: Database, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
for func_handler in FUNCTION_HANDLERS:
for feature, addr in func_handler(fh):
for feature, addr in func_handler(db, fh):
yield feature, addr

View File

@@ -16,7 +16,7 @@ import logging
import contextlib
from typing import Iterator
import ida_loader
from ida_domain import Database
import capa.ida.helpers
import capa.features.extractors.elf
@@ -26,8 +26,8 @@ from capa.features.address import NO_ADDRESS, Address
logger = logging.getLogger(__name__)
def extract_os() -> Iterator[tuple[Feature, Address]]:
format_name: str = ida_loader.get_file_type_name()
def extract_os(db: Database) -> Iterator[tuple[Feature, Address]]:
format_name: str = db.format
if "PE" in format_name:
yield OS(OS_WINDOWS), NO_ADDRESS
@@ -53,13 +53,14 @@ def extract_os() -> Iterator[tuple[Feature, Address]]:
return
def extract_arch() -> Iterator[tuple[Feature, Address]]:
procname = capa.ida.helpers.get_processor_name()
if procname == "metapc" and capa.ida.helpers.is_64bit():
def extract_arch(db: Database) -> Iterator[tuple[Feature, Address]]:
bitness = db.bitness
arch = db.architecture
if arch == "metapc" and bitness == 64:
yield Arch(ARCH_AMD64), NO_ADDRESS
elif procname == "metapc" and capa.ida.helpers.is_32bit():
elif arch == "metapc" and bitness == 32:
yield Arch(ARCH_I386), NO_ADDRESS
elif procname == "metapc":
elif arch == "metapc":
logger.debug("unsupported architecture: non-32-bit nor non-64-bit intel")
return
else:
@@ -67,5 +68,5 @@ def extract_arch() -> Iterator[tuple[Feature, Address]]:
# 1. handling a new architecture (e.g. aarch64)
#
# for (1), this logic will need to be updated as the format is implemented.
logger.debug("unsupported architecture: %s", procname)
logger.debug("unsupported architecture: %s", arch)
return

View File

@@ -22,109 +22,86 @@ import idautils
import ida_bytes
import ida_funcs
import ida_segment
from ida_domain import Database
from ida_domain.functions import FunctionFlags
from capa.features.address import AbsoluteVirtualAddress
from capa.features.extractors.base_extractor import FunctionHandle
IDA_NALT_ENCODING = ida_nalt.get_default_encoding_idx(ida_nalt.BPU_1B) # use one byte-per-character encoding
def find_byte_sequence(db: Database, start: int, end: int, seq: bytes) -> Iterator[int]:
"""yield all ea of a given byte sequence
if hasattr(ida_bytes, "parse_binpat_str"):
# TODO (mr): use find_bytes
# https://github.com/mandiant/capa/issues/2339
def find_byte_sequence(start: int, end: int, seq: bytes) -> Iterator[int]:
"""yield all ea of a given byte sequence
args:
start: min virtual address
end: max virtual address
seq: bytes to search e.g. b"\x01\x03"
"""
patterns = ida_bytes.compiled_binpat_vec_t()
seqstr = " ".join([f"{b:02x}" for b in seq])
err = ida_bytes.parse_binpat_str(patterns, 0, seqstr, 16, IDA_NALT_ENCODING)
if err:
return
while True:
ea = ida_bytes.bin_search(start, end, patterns, ida_bytes.BIN_SEARCH_FORWARD)
if isinstance(ea, int):
# "ea_t" in IDA 8.4, 8.3
pass
elif isinstance(ea, tuple):
# "drc_t" in IDA 9
ea = ea[0]
else:
raise NotImplementedError(f"bin_search returned unhandled type: {type(ea)}")
if ea == idaapi.BADADDR:
break
start = ea + 1
yield ea
else:
# for IDA 7.5 and older; using deprecated find_binary instead of bin_search
def find_byte_sequence(start: int, end: int, seq: bytes) -> Iterator[int]:
"""yield all ea of a given byte sequence
args:
start: min virtual address
end: max virtual address
seq: bytes to search e.g. b"\x01\x03"
"""
seqstr = " ".join([f"{b:02x}" for b in seq])
while True:
ea = idaapi.find_binary(start, end, seqstr, 0, idaapi.SEARCH_DOWN)
if ea == idaapi.BADADDR:
break
start = ea + 1
yield ea
args:
db: IDA Domain Database handle
start: min virtual address
end: max virtual address
seq: bytes to search e.g. b"\x01\x03"
"""
for match in db.bytes.find_binary_sequence(seq, start, end):
yield match
def get_functions(
start: Optional[int] = None, end: Optional[int] = None, skip_thunks: bool = False, skip_libs: bool = False
db: Database,
start: Optional[int] = None,
end: Optional[int] = None,
skip_thunks: bool = False,
skip_libs: bool = False,
) -> Iterator[FunctionHandle]:
"""get functions, range optional
args:
db: IDA Domain Database handle
start: min virtual address
end: max virtual address
skip_thunks: skip thunk functions
skip_libs: skip library functions
"""
for ea in idautils.Functions(start=start, end=end):
f = idaapi.get_func(ea)
if not (skip_thunks and (f.flags & idaapi.FUNC_THUNK) or skip_libs and (f.flags & idaapi.FUNC_LIB)):
yield FunctionHandle(address=AbsoluteVirtualAddress(ea), inner=f)
if start is not None and end is not None:
funcs = db.functions.get_between(start, end)
else:
funcs = db.functions.get_all()
for f in funcs:
flags = db.functions.get_flags(f)
if skip_thunks and (flags & FunctionFlags.THUNK):
continue
if skip_libs and (flags & FunctionFlags.LIB):
continue
yield FunctionHandle(address=AbsoluteVirtualAddress(f.start_ea), inner=f)
def get_segments(skip_header_segments=False) -> Iterator[idaapi.segment_t]:
def get_segments(db: Database, skip_header_segments: bool = False):
"""get list of segments (sections) in the binary image
args:
db: IDA Domain Database handle
skip_header_segments: IDA may load header segments - skip if set
"""
for n in range(idaapi.get_segm_qty()):
seg = idaapi.getnseg(n)
if seg and not (skip_header_segments and seg.is_header_segm()):
yield seg
for seg in db.segments.get_all():
if skip_header_segments and seg.is_header_segm():
continue
yield seg
def get_segment_buffer(seg: idaapi.segment_t) -> bytes:
def get_segment_buffer(db: Database, seg) -> bytes:
"""return bytes stored in a given segment
decrease buffer size until IDA is able to read bytes from the segment
args:
db: IDA Domain Database handle
seg: segment object
"""
buff = b""
sz = seg.end_ea - seg.start_ea
# decrease buffer size until IDA is able to read bytes from the segment
while sz > 0:
buff = idaapi.get_bytes(seg.start_ea, sz)
buff = db.bytes.get_bytes_at(seg.start_ea, sz)
if buff:
break
return buff
sz -= 0x1000
# IDA returns None if get_bytes fails, so convert for consistent return type
return buff if buff else b""
return b""
def inspect_import(imports, library, ea, function, ordinal):
@@ -140,8 +117,14 @@ def inspect_import(imports, library, ea, function, ordinal):
return True
def get_file_imports() -> dict[int, tuple[str, str, int]]:
"""get file imports"""
def get_file_imports(db: Database) -> dict[int, tuple[str, str, int]]:
"""get file imports
Note: import enumeration has no Domain API equivalent, using SDK fallback.
args:
db: IDA Domain Database handle (unused, kept for API consistency)
"""
imports: dict[int, tuple[str, str, int]] = {}
for idx in range(idaapi.get_import_module_qty()):
@@ -163,28 +146,35 @@ def get_file_imports() -> dict[int, tuple[str, str, int]]:
return imports
def get_file_externs() -> dict[int, tuple[str, str, int]]:
def get_file_externs(db: Database) -> dict[int, tuple[str, str, int]]:
"""get extern functions
args:
db: IDA Domain Database handle
"""
externs = {}
for seg in get_segments(skip_header_segments=True):
for seg in get_segments(db, skip_header_segments=True):
if seg.type != ida_segment.SEG_XTRN:
continue
for ea in idautils.Functions(seg.start_ea, seg.end_ea):
externs[ea] = ("", idaapi.get_func_name(ea), -1)
for f in db.functions.get_between(seg.start_ea, seg.end_ea):
name = db.functions.get_name(f)
externs[f.start_ea] = ("", name, -1)
return externs
def get_instructions_in_range(start: int, end: int) -> Iterator[idaapi.insn_t]:
def get_instructions_in_range(db: Database, start: int, end: int) -> Iterator[idaapi.insn_t]:
"""yield instructions in range
args:
db: IDA Domain Database handle
start: virtual address (inclusive)
end: virtual address (exclusive)
"""
for head in idautils.Heads(start, end):
insn = idautils.DecodeInstruction(head)
for head in db.heads.get_between(start, end):
insn = db.instructions.get_at(head)
if insn:
yield insn
@@ -234,21 +224,38 @@ def basic_block_size(bb: idaapi.BasicBlock) -> int:
return bb.end_ea - bb.start_ea
def read_bytes_at(ea: int, count: int) -> bytes:
""" """
# check if byte has a value, see get_wide_byte doc
if not idc.is_loaded(ea):
def read_bytes_at(db: Database, ea: int, count: int) -> bytes:
"""read bytes at address
args:
db: IDA Domain Database handle
ea: effective address
count: number of bytes to read
"""
if not db.bytes.is_value_initialized_at(ea):
return b""
segm_end = idc.get_segm_end(ea)
if ea + count > segm_end:
return idc.get_bytes(ea, segm_end - ea)
seg = db.segments.get_at(ea)
if seg is None:
return b""
if ea + count > seg.end_ea:
return db.bytes.get_bytes_at(ea, seg.end_ea - ea) or b""
else:
return idc.get_bytes(ea, count)
return db.bytes.get_bytes_at(ea, count) or b""
def find_string_at(ea: int, min_: int = 4) -> str:
"""check if ASCII string exists at a given virtual address"""
def find_string_at(db: Database, ea: int, min_: int = 4) -> str:
"""check if string exists at a given virtual address
Note: Uses SDK fallback as Domain API get_string_at only works for
addresses where IDA has already identified a string.
args:
db: IDA Domain Database handle (unused, kept for API consistency)
ea: effective address
min_: minimum string length
"""
found = idaapi.get_strlit_contents(ea, -1, idaapi.STRTYPE_C)
if found and len(found) >= min_:
try:
@@ -375,31 +382,51 @@ def mask_op_val(op: idaapi.op_t) -> int:
return masks.get(op.dtype, op.value) & op.value
def is_function_recursive(f: idaapi.func_t) -> bool:
"""check if function is recursive"""
return any(f.contains(ref) for ref in idautils.CodeRefsTo(f.start_ea, True))
def is_function_recursive(db: Database, f: idaapi.func_t) -> bool:
"""check if function is recursive
args:
db: IDA Domain Database handle
f: function object
"""
for ref in db.xrefs.code_refs_to_ea(f.start_ea):
if f.contains(ref):
return True
return False
def is_basic_block_tight_loop(bb: idaapi.BasicBlock) -> bool:
def is_basic_block_tight_loop(db: Database, bb: idaapi.BasicBlock) -> bool:
"""check basic block loops to self
args:
db: IDA Domain Database handle
bb: basic block object
true if last instruction in basic block branches to basic block start
"""
bb_end = idc.prev_head(bb.end_ea)
bb_end = db.heads.get_previous(bb.end_ea)
if bb_end is None:
return False
if bb.start_ea < bb_end:
for ref in idautils.CodeRefsFrom(bb_end, True):
for ref in db.xrefs.code_refs_from_ea(bb_end):
if ref == bb.start_ea:
return True
return False
def find_data_reference_from_insn(insn: idaapi.insn_t, max_depth: int = 10) -> int:
"""search for data reference from instruction, return address of instruction if no reference exists"""
def find_data_reference_from_insn(db: Database, insn: idaapi.insn_t, max_depth: int = 10) -> int:
"""search for data reference from instruction, return address of instruction if no reference exists
args:
db: IDA Domain Database handle
insn: instruction object
max_depth: maximum depth to follow references
"""
depth = 0
ea = insn.ea
while True:
data_refs = list(idautils.DataRefsFrom(ea))
data_refs = list(db.xrefs.data_refs_from_ea(ea))
if len(data_refs) != 1:
# break if no refs or more than one ref (assume nested pointers only have one data reference)
@@ -409,7 +436,7 @@ def find_data_reference_from_insn(insn: idaapi.insn_t, max_depth: int = 10) -> i
# break if circular reference
break
if not idaapi.is_mapped(data_refs[0]):
if not db.is_valid_ea(data_refs[0]):
# break if address is not mapped
break
@@ -423,10 +450,16 @@ def find_data_reference_from_insn(insn: idaapi.insn_t, max_depth: int = 10) -> i
return ea
def get_function_blocks(f: idaapi.func_t) -> Iterator[idaapi.BasicBlock]:
"""yield basic blocks contained in specified function"""
def get_function_blocks(db: Database, f: idaapi.func_t) -> Iterator[idaapi.BasicBlock]:
"""yield basic blocks contained in specified function
args:
db: IDA Domain Database handle
f: function object
"""
# leverage idaapi.FC_NOEXT flag to ignore useless external blocks referenced by the function
yield from idaapi.FlowChart(f, flags=(idaapi.FC_PREDS | idaapi.FC_NOEXT))
flowchart = db.functions.get_flowchart(f, flags=(idaapi.FC_PREDS | idaapi.FC_NOEXT))
yield from flowchart
def is_basic_block_return(bb: idaapi.BasicBlock) -> bool:
@@ -446,7 +479,17 @@ def find_alternative_names(cmt: str):
yield name
def get_function_alternative_names(fva: int):
"""Get all alternative names for an address."""
yield from find_alternative_names(ida_bytes.get_cmt(fva, False) or "")
yield from find_alternative_names(ida_funcs.get_func_cmt(idaapi.get_func(fva), False) or "")
def get_function_alternative_names(db: Database, fva: int):
"""Get all alternative names for an address.
args:
db: IDA Domain Database handle
fva: function virtual address
"""
cmt_info = db.comments.get_at(fva)
cmt = cmt_info.comment if cmt_info else ""
yield from find_alternative_names(cmt)
f = db.functions.get_at(fva)
if f:
func_cmt = db.functions.get_comment(f, False)
yield from find_alternative_names(func_cmt or "")

View File

@@ -18,7 +18,8 @@ from typing import Any, Iterator, Optional
import idc
import ida_ua
import idaapi
import idautils
from ida_domain import Database
from ida_domain.functions import FunctionFlags
import capa.features.extractors.helpers
import capa.features.extractors.ida.helpers
@@ -33,19 +34,19 @@ from capa.features.extractors.base_extractor import BBHandle, InsnHandle, Functi
SECURITY_COOKIE_BYTES_DELTA = 0x40
def get_imports(ctx: dict[str, Any]) -> dict[int, Any]:
def get_imports(db: Database, ctx: dict[str, Any]) -> dict[int, Any]:
if "imports_cache" not in ctx:
ctx["imports_cache"] = capa.features.extractors.ida.helpers.get_file_imports()
ctx["imports_cache"] = capa.features.extractors.ida.helpers.get_file_imports(db)
return ctx["imports_cache"]
def get_externs(ctx: dict[str, Any]) -> dict[int, Any]:
def get_externs(db: Database, ctx: dict[str, Any]) -> dict[int, Any]:
if "externs_cache" not in ctx:
ctx["externs_cache"] = capa.features.extractors.ida.helpers.get_file_externs()
ctx["externs_cache"] = capa.features.extractors.ida.helpers.get_file_externs(db)
return ctx["externs_cache"]
def check_for_api_call(insn: idaapi.insn_t, funcs: dict[int, Any]) -> Optional[tuple[str, str]]:
def check_for_api_call(db: Database, insn: idaapi.insn_t, funcs: dict[int, Any]) -> Optional[tuple[str, str]]:
"""check instruction for API call"""
info = None
ref = insn.ea
@@ -53,27 +54,32 @@ def check_for_api_call(insn: idaapi.insn_t, funcs: dict[int, Any]) -> Optional[t
# attempt to resolve API calls by following chained thunks to a reasonable depth
for _ in range(THUNK_CHAIN_DEPTH_DELTA):
# assume only one code/data ref when resolving "call" or "jmp"
try:
ref = tuple(idautils.CodeRefsFrom(ref, False))[0]
except IndexError:
try:
# thunks may be marked as data refs
ref = tuple(idautils.DataRefsFrom(ref))[0]
except IndexError:
code_refs = list(db.xrefs.code_refs_from_ea(ref, flow=False))
if code_refs:
ref = code_refs[0]
else:
# thunks may be marked as data refs
data_refs = list(db.xrefs.data_refs_from_ea(ref))
if data_refs:
ref = data_refs[0]
else:
break
info = funcs.get(ref)
if info:
break
f = idaapi.get_func(ref)
if not f or not (f.flags & idaapi.FUNC_THUNK):
f = db.functions.get_at(ref)
if f is None:
break
flags = db.functions.get_flags(f)
if not (flags & FunctionFlags.THUNK):
break
return info
def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
def extract_insn_api_features(db: Database, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
"""
parse instruction API features
@@ -82,35 +88,30 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
"""
insn: idaapi.insn_t = ih.inner
if insn.get_canon_mnem() not in ("call", "jmp"):
mnem = db.instructions.get_mnemonic(insn)
if mnem not in ("call", "jmp"):
return
# check call to imported functions
api = check_for_api_call(insn, get_imports(fh.ctx))
api = check_for_api_call(db, insn, get_imports(db, fh.ctx))
if api:
# tuple (<module>, <function>, <ordinal>)
for name in capa.features.extractors.helpers.generate_symbols(api[0], api[1]):
yield API(name), ih.address
# a call instruction should only call one function, stop if a call to an import is extracted
return
# check call to extern functions
api = check_for_api_call(insn, get_externs(fh.ctx))
api = check_for_api_call(db, insn, get_externs(db, fh.ctx))
if api:
# tuple (<module>, <function>, <ordinal>)
yield API(api[1]), ih.address
# a call instruction should only call one function, stop if a call to an extern is extracted
return
# extract dynamically resolved APIs stored in renamed globals (renamed for example using `renimp.idc`)
# examples: `CreateProcessA`, `HttpSendRequestA`
if insn.Op1.type == ida_ua.o_mem:
op_addr = insn.Op1.addr
op_name = idaapi.get_name(op_addr)
op_name = db.names.get_at(op_addr)
# when renaming a global using an API name, IDA assigns it the function type
# ensure we do not extract something wrong by checking that the address has a name and a type
# we could check that the type is a function definition, but that complicates the code
if (not op_name.startswith("off_")) and idc.get_type(op_addr):
if op_name and (not op_name.startswith("off_")) and idc.get_type(op_addr):
# Remove suffix used in repeated names, for example _0 in VirtualFree_0
match = re.match(r"(.+)_\d+", op_name)
if match:
@@ -119,19 +120,21 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
for name in capa.features.extractors.helpers.generate_symbols("", op_name):
yield API(name), ih.address
# extract IDA/FLIRT recognized API functions
targets = tuple(idautils.CodeRefsFrom(insn.ea, False))
targets = list(db.xrefs.code_refs_from_ea(insn.ea, flow=False))
if not targets:
return
target = targets[0]
target_func = idaapi.get_func(target)
target_func = db.functions.get_at(target)
if not target_func or target_func.start_ea != target:
# not a function (start)
return
name = idaapi.get_name(target_func.start_ea)
if target_func.flags & idaapi.FUNC_LIB or not name.startswith("sub_"):
name = db.names.get_at(target_func.start_ea)
if not name:
return
flags = db.functions.get_flags(target_func)
if flags & FunctionFlags.LIB or not name.startswith("sub_"):
yield API(name), ih.address
if name.startswith("_"):
# some linkers may prefix linked routines with a `_` to avoid name collisions.
@@ -140,13 +143,13 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
# see: https://stackoverflow.com/a/2628384/87207
yield API(name[1:]), ih.address
for altname in capa.features.extractors.ida.helpers.get_function_alternative_names(target_func.start_ea):
for altname in capa.features.extractors.ida.helpers.get_function_alternative_names(db, target_func.start_ea):
yield FunctionName(altname), ih.address
yield API(altname), ih.address
def extract_insn_number_features(
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
db: Database, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
) -> Iterator[tuple[Feature, Address]]:
"""
parse instruction number features
@@ -155,7 +158,7 @@ def extract_insn_number_features(
"""
insn: idaapi.insn_t = ih.inner
if idaapi.is_ret_insn(insn):
if db.instructions.breaks_sequential_flow(insn):
# skip things like:
# .text:0042250E retn 8
return
@@ -183,7 +186,8 @@ def extract_insn_number_features(
yield Number(const), ih.address
yield OperandNumber(i, const), ih.address
if insn.itype == idaapi.NN_add and 0 < const < MAX_STRUCTURE_SIZE and op.type == idaapi.o_imm:
mnem = db.instructions.get_mnemonic(insn)
if mnem == "add" and 0 < const < MAX_STRUCTURE_SIZE and op.type == idaapi.o_imm:
# for pattern like:
#
# add eax, 0x10
@@ -193,7 +197,7 @@ def extract_insn_number_features(
yield OperandOffset(i, const), ih.address
def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
def extract_insn_bytes_features(db: Database, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
"""
parse referenced byte sequences
example:
@@ -201,20 +205,20 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
"""
insn: idaapi.insn_t = ih.inner
if idaapi.is_call_insn(insn):
if db.instructions.is_call_instruction(insn):
return
ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(db, insn)
if ref != insn.ea:
extracted_bytes = capa.features.extractors.ida.helpers.read_bytes_at(ref, MAX_BYTES_FEATURE_SIZE)
extracted_bytes = capa.features.extractors.ida.helpers.read_bytes_at(db, ref, MAX_BYTES_FEATURE_SIZE)
if extracted_bytes and not capa.features.extractors.helpers.all_zeros(extracted_bytes):
if not capa.features.extractors.ida.helpers.find_string_at(ref):
if not capa.features.extractors.ida.helpers.find_string_at(db, ref):
# don't extract byte features for obvious strings
yield Bytes(extracted_bytes), ih.address
def extract_insn_string_features(
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
db: Database, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
) -> Iterator[tuple[Feature, Address]]:
"""
parse instruction string features
@@ -224,15 +228,15 @@ def extract_insn_string_features(
"""
insn: idaapi.insn_t = ih.inner
ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(db, insn)
if ref != insn.ea:
found = capa.features.extractors.ida.helpers.find_string_at(ref)
found = capa.features.extractors.ida.helpers.find_string_at(db, ref)
if found:
yield String(found), ih.address
def extract_insn_offset_features(
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
db: Database, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
) -> Iterator[tuple[Feature, Address]]:
"""
parse instruction structure offset features
@@ -256,7 +260,7 @@ def extract_insn_offset_features(
if op_off is None:
continue
if idaapi.is_mapped(op_off):
if db.is_valid_ea(op_off):
# Ignore:
# mov esi, dword_1005B148[esi]
continue
@@ -269,8 +273,9 @@ def extract_insn_offset_features(
yield Offset(op_off), ih.address
yield OperandOffset(i, op_off), ih.address
mnem = db.instructions.get_mnemonic(insn)
if (
insn.itype == idaapi.NN_lea
mnem == "lea"
and i == 1
# o_displ is used for both:
# [eax+1]
@@ -305,7 +310,7 @@ def contains_stack_cookie_keywords(s: str) -> bool:
return any(keyword in s for keyword in ("stack", "security"))
def bb_stack_cookie_registers(bb: idaapi.BasicBlock) -> Iterator[int]:
def bb_stack_cookie_registers(db: Database, bb: idaapi.BasicBlock) -> Iterator[int]:
"""scan basic block for stack cookie operations
yield registers ids that may have been used for stack cookie operations
@@ -331,21 +336,22 @@ def bb_stack_cookie_registers(bb: idaapi.BasicBlock) -> Iterator[int]:
TODO: this is expensive, but necessary?...
"""
for insn in capa.features.extractors.ida.helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
if contains_stack_cookie_keywords(idc.GetDisasm(insn.ea)):
for insn in capa.features.extractors.ida.helpers.get_instructions_in_range(db, bb.start_ea, bb.end_ea):
disasm = db.instructions.get_disassembly(insn)
if contains_stack_cookie_keywords(disasm):
for op in capa.features.extractors.ida.helpers.get_insn_ops(insn, target_ops=(idaapi.o_reg,)):
if capa.features.extractors.ida.helpers.is_op_write(insn, op):
# only include modified registers
yield op.reg
def is_nzxor_stack_cookie_delta(f: idaapi.func_t, bb: idaapi.BasicBlock, insn: idaapi.insn_t) -> bool:
def is_nzxor_stack_cookie_delta(db: Database, f: idaapi.func_t, bb: idaapi.BasicBlock, insn: idaapi.insn_t) -> bool:
"""check if nzxor exists within stack cookie delta"""
# security cookie check should use SP or BP
if not capa.features.extractors.ida.helpers.is_frame_register(insn.Op2.reg):
return False
f_bbs = tuple(capa.features.extractors.ida.helpers.get_function_blocks(f))
f_bbs = tuple(capa.features.extractors.ida.helpers.get_function_blocks(db, f))
# expect security cookie init in first basic block within first bytes (instructions)
if capa.features.extractors.ida.helpers.is_basic_block_equal(bb, f_bbs[0]) and insn.ea < (
@@ -362,15 +368,17 @@ def is_nzxor_stack_cookie_delta(f: idaapi.func_t, bb: idaapi.BasicBlock, insn: i
return False
def is_nzxor_stack_cookie(f: idaapi.func_t, bb: idaapi.BasicBlock, insn: idaapi.insn_t) -> bool:
def is_nzxor_stack_cookie(db: Database, f: idaapi.func_t, bb: idaapi.BasicBlock, insn: idaapi.insn_t) -> bool:
"""check if nzxor is related to stack cookie"""
if contains_stack_cookie_keywords(idaapi.get_cmt(insn.ea, False)):
cmt_info = db.comments.get_at(insn.ea)
cmt = cmt_info.comment if cmt_info else ""
if contains_stack_cookie_keywords(cmt):
# Example:
# xor ecx, ebp ; StackCookie
return True
if is_nzxor_stack_cookie_delta(f, bb, insn):
if is_nzxor_stack_cookie_delta(db, f, bb, insn):
return True
stack_cookie_regs = tuple(bb_stack_cookie_registers(bb))
stack_cookie_regs = tuple(bb_stack_cookie_registers(db, bb))
if any(op_reg in stack_cookie_regs for op_reg in (insn.Op1.reg, insn.Op2.reg)):
# Example:
# mov eax, ___security_cookie
@@ -380,7 +388,7 @@ def is_nzxor_stack_cookie(f: idaapi.func_t, bb: idaapi.BasicBlock, insn: idaapi.
def extract_insn_nzxor_characteristic_features(
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
db: Database, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
) -> Iterator[tuple[Feature, Address]]:
"""
parse instruction non-zeroing XOR instruction
@@ -388,31 +396,33 @@ def extract_insn_nzxor_characteristic_features(
"""
insn: idaapi.insn_t = ih.inner
if insn.itype not in (idaapi.NN_xor, idaapi.NN_xorpd, idaapi.NN_xorps, idaapi.NN_pxor):
mnem = db.instructions.get_mnemonic(insn)
if mnem not in ("xor", "xorpd", "xorps", "pxor"):
return
if capa.features.extractors.ida.helpers.is_operand_equal(insn.Op1, insn.Op2):
return
if is_nzxor_stack_cookie(fh.inner, bbh.inner, insn):
if is_nzxor_stack_cookie(db, fh.inner, bbh.inner, insn):
return
yield Characteristic("nzxor"), ih.address
def extract_insn_mnemonic_features(
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
db: Database, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
) -> Iterator[tuple[Feature, Address]]:
"""parse instruction mnemonic features"""
yield Mnemonic(idc.print_insn_mnem(ih.inner.ea)), ih.address
mnem = db.instructions.get_mnemonic(ih.inner)
yield Mnemonic(mnem), ih.address
def extract_insn_obfs_call_plus_5_characteristic_features(
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
db: Database, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
) -> Iterator[tuple[Feature, Address]]:
"""
parse call $+5 instruction from the given instruction.
"""
insn: idaapi.insn_t = ih.inner
if not idaapi.is_call_insn(insn):
if not db.instructions.is_call_instruction(insn):
return
if insn.ea + 5 == idc.get_operand_value(insn.ea, 0):
@@ -420,7 +430,7 @@ def extract_insn_obfs_call_plus_5_characteristic_features(
def extract_insn_peb_access_characteristic_features(
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
db: Database, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
) -> Iterator[tuple[Feature, Address]]:
"""parse instruction peb access
@@ -431,14 +441,15 @@ def extract_insn_peb_access_characteristic_features(
"""
insn: idaapi.insn_t = ih.inner
if insn.itype not in (idaapi.NN_push, idaapi.NN_mov):
mnem = db.instructions.get_mnemonic(insn)
if mnem not in ("push", "mov"):
return
if all(op.type != idaapi.o_mem for op in insn.ops):
# try to optimize for only memory references
return
disasm = idc.GetDisasm(insn.ea)
disasm = db.instructions.get_disassembly(insn)
if " fs:30h" in disasm or " gs:60h" in disasm:
# TODO(mike-hunhoff): use proper IDA API for fetching segment access
@@ -448,7 +459,7 @@ def extract_insn_peb_access_characteristic_features(
def extract_insn_segment_access_features(
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
db: Database, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
) -> Iterator[tuple[Feature, Address]]:
"""parse instruction fs or gs access
@@ -461,7 +472,7 @@ def extract_insn_segment_access_features(
# try to optimize for only memory references
return
disasm = idc.GetDisasm(insn.ea)
disasm = db.instructions.get_disassembly(insn)
if " fs:" in disasm:
# TODO(mike-hunhoff): use proper IDA API for fetching segment access
@@ -477,37 +488,39 @@ def extract_insn_segment_access_features(
def extract_insn_cross_section_cflow(
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
db: Database, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
) -> Iterator[tuple[Feature, Address]]:
"""inspect the instruction for a CALL or JMP that crosses section boundaries"""
insn: idaapi.insn_t = ih.inner
for ref in idautils.CodeRefsFrom(insn.ea, False):
if ref in get_imports(fh.ctx):
for ref in db.xrefs.code_refs_from_ea(insn.ea, flow=False):
if ref in get_imports(db, fh.ctx):
# ignore API calls
continue
if not idaapi.getseg(ref):
ref_seg = db.segments.get_at(ref)
if ref_seg is None:
# handle IDA API bug
continue
if idaapi.getseg(ref) == idaapi.getseg(insn.ea):
insn_seg = db.segments.get_at(insn.ea)
if ref_seg == insn_seg:
continue
yield Characteristic("cross section flow"), ih.address
def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
def extract_function_calls_from(db: Database, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
"""extract functions calls from features
most relevant at the function scope, however, its most efficient to extract at the instruction scope
"""
insn: idaapi.insn_t = ih.inner
if idaapi.is_call_insn(insn):
for ref in idautils.CodeRefsFrom(insn.ea, False):
if db.instructions.is_call_instruction(insn):
for ref in db.xrefs.code_refs_from_ea(insn.ea, flow=False):
yield Characteristic("calls from"), AbsoluteVirtualAddress(ref)
def extract_function_indirect_call_characteristic_features(
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
db: Database, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
) -> Iterator[tuple[Feature, Address]]:
"""extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
does not include calls like => call ds:dword_ABD4974
@@ -517,14 +530,14 @@ def extract_function_indirect_call_characteristic_features(
"""
insn: idaapi.insn_t = ih.inner
if idaapi.is_call_insn(insn) and idc.get_operand_type(insn.ea, 0) in (idc.o_reg, idc.o_phrase, idc.o_displ):
if db.instructions.is_call_instruction(insn) and idc.get_operand_type(insn.ea, 0) in (idc.o_reg, idc.o_phrase, idc.o_displ):
yield Characteristic("indirect call"), ih.address
def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[tuple[Feature, Address]]:
def extract_features(db: Database, f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[tuple[Feature, Address]]:
"""extract instruction features"""
for inst_handler in INSTRUCTION_HANDLERS:
for feature, ea in inst_handler(f, bbh, insn):
for feature, ea in inst_handler(db, f, bbh, insn):
yield feature, ea

View File

@@ -56,7 +56,7 @@ def get_previous_instructions(vw: VivWorkspace, va: int) -> list[int]:
if ploc is not None:
# from vivisect.const:
# location: (L_VA, L_SIZE, L_LTYPE, L_TINFO)
pva, _, ptype, pinfo = ploc
(pva, _, ptype, pinfo) = ploc
if ptype == LOC_OP and not (pinfo & IF_NOFALL):
ret.append(pva)

View File

@@ -176,7 +176,7 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper):
try:
_, target = resolve_indirect_call(f.vw, insn.va, insn=insn)
(_, target) = resolve_indirect_call(f.vw, insn.va, insn=insn)
except NotFoundError:
# not able to resolve the indirect call, sorry
return

View File

@@ -26,16 +26,6 @@ from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, Pr
logger = logging.getLogger(__name__)
VOID_PTR_NUMBER_PARAMS = frozenset(
{
"hKey",
"hKeyRoot",
"hkResult",
"samDesired",
}
)
def get_call_param_features(param: Param, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
if param.deref is not None:
# pointer types contain a special "deref" member that stores the deref'd value
@@ -49,31 +39,10 @@ def get_call_param_features(param: Param, ch: CallHandle) -> Iterator[tuple[Feat
# parsing the data up to here results in double-escaped backslashes, remove those here
yield String(param.deref.value.replace("\\\\", "\\")), ch.address
else:
if param.name in VOID_PTR_NUMBER_PARAMS:
try:
yield Number(hexint(param.deref.value)), ch.address
except (ValueError, TypeError) as e:
logger.debug(
"failed to parse whitelisted void_ptr param %s value %s: %s",
param.name,
param.deref.value,
e,
)
else:
logger.debug("skipping deref param type %s", param.deref.type_)
logger.debug("skipping deref param type %s", param.deref.type_)
elif param.value is not None:
if param.type_ in PARAM_TYPE_INT:
yield Number(hexint(param.value)), ch.address
elif param.type_ == "void_ptr" and param.name in VOID_PTR_NUMBER_PARAMS:
try:
yield Number(hexint(param.value)), ch.address
except (ValueError, TypeError) as e:
logger.debug(
"failed to parse whitelisted void_ptr param %s value %s: %s",
param.name,
param.value,
e,
)
def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:

View File

@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from typing import Iterator
from pathlib import Path
@@ -39,8 +39,6 @@ from capa.features.extractors.base_extractor import (
DynamicFeatureExtractor,
)
logger = logging.getLogger(__name__)
def get_formatted_params(params: ParamList) -> list[str]:
params_list: list[str] = []
@@ -89,16 +87,6 @@ class VMRayExtractor(DynamicFeatureExtractor):
def get_processes(self) -> Iterator[ProcessHandle]:
for monitor_process in self.analysis.monitor_processes.values():
# skip invalid/incomplete monitor process entries, see #2807
if monitor_process.pid == 0 or not monitor_process.filename:
logger.debug(
"skipping incomplete process entry: pid=%d, filename=%s, monitor_id=%d",
monitor_process.pid,
monitor_process.filename,
monitor_process.monitor_id,
)
continue
address: ProcessAddress = ProcessAddress(pid=monitor_process.pid, ppid=monitor_process.ppid)
yield ProcessHandle(address, inner=monitor_process)

View File

@@ -327,9 +327,6 @@ def log_unsupported_os_error():
logger.error(" ")
logger.error(" capa currently only analyzes executables for some operating systems")
logger.error(" (including Windows, Linux, and Android).")
logger.error(" ")
logger.error(" If you know the target OS, you can specify it explicitly, for example:")
logger.error(" capa --os linux <sample>")
logger.error("-" * 80)

View File

@@ -14,6 +14,7 @@
import ida_kernwin
from ida_domain import Database
from capa.ida.plugin.error import UserCancelledError
from capa.ida.plugin.qt_compat import QtCore, Signal
@@ -43,7 +44,8 @@ class CapaExplorerFeatureExtractor(IdaFeatureExtractor):
"""
def __init__(self):
super().__init__()
db = Database.open()
super().__init__(db)
self.indicator = CapaExplorerProgressIndicator()
def extract_function_features(self, fh: FunctionHandle):

View File

@@ -126,57 +126,6 @@ def get_meta_str(vw):
return f"{', '.join(meta)}, number of functions: {len(vw.getFunctions())}"
def _is_probably_corrupt_pe(path: Path) -> bool:
"""
Heuristic check for obviously malformed PE samples that provoke
pathological behavior in vivisect (see GH-1989).
We treat a PE as "probably corrupt" when any section declares an
unrealistically large virtual size compared to the file size, e.g.
hundreds of megabytes in a tiny file. Such cases lead vivisect to
try to map enormous regions and can exhaust CPU/memory.
"""
try:
import pefile
except Exception:
# If pefile is unavailable, fall back to existing behavior.
return False
try:
pe = pefile.PE(str(path), fast_load=True)
except pefile.PEFormatError:
# Not a PE file (or badly formed); let existing checks handle it.
return False
except Exception:
return False
try:
file_size = path.stat().st_size
except OSError:
return False
if file_size <= 0:
return False
# Flag sections whose declared virtual size is wildly disproportionate
# to the file size (e.g. 900MB section in a ~400KB sample).
_VSIZE_FILE_RATIO = 128
_MAX_REASONABLE_VSIZE = 512 * 1024 * 1024 # 512 MB
max_reasonable = max(file_size * _VSIZE_FILE_RATIO, _MAX_REASONABLE_VSIZE)
for section in getattr(pe, "sections", []):
vsize = getattr(section, "Misc_VirtualSize", 0) or 0
if vsize > max_reasonable:
logger.debug(
"detected unrealistic PE section virtual size: 0x%x (file size: 0x%x), treating as corrupt",
vsize,
file_size,
)
return True
return False
def get_workspace(path: Path, input_format: str, sigpaths: list[Path]):
"""
load the program at the given path into a vivisect workspace using the given format.
@@ -194,18 +143,11 @@ def get_workspace(path: Path, input_format: str, sigpaths: list[Path]):
"""
# lazy import enables us to not require viv if user wants another backend.
import envi.exc
import viv_utils
import viv_utils.flirt
logger.debug("generating vivisect workspace for: %s", path)
if input_format in (FORMAT_PE, FORMAT_AUTO) and _is_probably_corrupt_pe(path):
raise CorruptFile(
"PE file appears to contain unrealistically large sections and is likely corrupt"
+ " - skipping analysis to avoid excessive resource usage."
)
try:
if input_format == FORMAT_AUTO:
if not is_supported_format(path):
@@ -222,8 +164,6 @@ def get_workspace(path: Path, input_format: str, sigpaths: list[Path]):
vw = viv_utils.getShellcodeWorkspaceFromFile(str(path), arch="amd64", analyze=False)
else:
raise ValueError("unexpected format: " + input_format)
except envi.exc.SegmentationViolation as e:
raise CorruptFile(f"Invalid memory access during binary parsing: {e}") from e
except Exception as e:
# vivisect raises raw Exception instances, and we don't want
# to do a subclass check via isinstance.
@@ -417,7 +357,7 @@ def get_extractor(
ida_auto.auto_wait()
logger.debug("idalib: opened database.")
return capa.features.extractors.ida.extractor.IdaFeatureExtractor()
return capa.features.extractors.ida.extractor.IdaFeatureExtractor.from_current_database()
elif backend == BACKEND_GHIDRA:
import pyghidra

View File

@@ -661,9 +661,7 @@ def get_rules_from_cli(args) -> RuleSet:
raises:
ShouldExitError: if the program is invoked incorrectly and should exit.
"""
enable_cache: bool = getattr(args, "enable_cache", True)
# this allows calling functions to easily disable rule caching, e.g., used by the rule linter to avoid
enable_cache: bool = True
try:
if capa.helpers.is_running_standalone() and args.is_default_rules:
cache_dir = get_default_root() / "cache"
@@ -945,7 +943,8 @@ def main(argv: Optional[list[str]] = None):
argv = sys.argv[1:]
desc = "The FLARE team's open-source tool to identify capabilities in executable files."
epilog = textwrap.dedent("""
epilog = textwrap.dedent(
"""
By default, capa uses a default set of embedded rules.
You can see the rule set here:
https://github.com/mandiant/capa-rules
@@ -972,7 +971,8 @@ def main(argv: Optional[list[str]] = None):
filter rules by meta fields, e.g. rule name or namespace
capa -t "create TCP socket" suspicious.exe
""")
"""
)
parser = argparse.ArgumentParser(
description=desc, epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter
@@ -1094,7 +1094,7 @@ def ida_main():
meta = capa.ida.helpers.collect_metadata([rules_path])
capabilities = find_capabilities(rules, capa.features.extractors.ida.extractor.IdaFeatureExtractor())
capabilities = find_capabilities(rules, capa.features.extractors.ida.extractor.IdaFeatureExtractor.from_current_database())
meta.analysis.feature_counts = capabilities.feature_counts
meta.analysis.library_functions = capabilities.library_functions

View File

@@ -31,7 +31,6 @@ $ protoc.exe --python_out=. --mypy_out=. <path_to_proto> (e.g. capa/render/proto
Alternatively, --pyi_out=. can be used to generate a Python Interface file that supports development
"""
import datetime
from typing import Any, Union

View File

@@ -17,7 +17,6 @@ import io
from typing import Union, Iterator, Optional
import rich.console
from rich.markup import escape
from rich.progress import Text
import capa.render.result_document as rd
@@ -25,21 +24,21 @@ import capa.render.result_document as rd
def bold(s: str) -> Text:
"""draw attention to the given string"""
return Text.from_markup(f"[cyan]{escape(s)}")
return Text.from_markup(f"[cyan]{s}")
def bold2(s: str) -> Text:
"""draw attention to the given string, within a `bold` section"""
return Text.from_markup(f"[green]{escape(s)}")
return Text.from_markup(f"[green]{s}")
def mute(s: str) -> Text:
"""draw attention away from the given string"""
return Text.from_markup(f"[dim]{escape(s)}")
return Text.from_markup(f"[dim]{s}")
def warn(s: str) -> Text:
return Text.from_markup(f"[yellow]{escape(s)}")
return Text.from_markup(f"[yellow]{s}")
def format_parts_id(data: Union[rd.AttackSpec, rd.MBCSpec]):

View File

@@ -159,8 +159,9 @@ def render_call(layout: rd.DynamicLayout, addr: frz.Address) -> str:
s.append(f"){rest}")
newline = "\n"
# Use default (non-dim) styling for API details so they remain readable in -vv output
return f"{pname}{{pid:{call.thread.process.pid},tid:{call.thread.tid},call:{call.id}}}\n{newline.join(s)}"
return (
f"{pname}{{pid:{call.thread.process.pid},tid:{call.thread.tid},call:{call.id}}}\n{rutils.mute(newline.join(s))}"
)
def render_short_call(layout: rd.DynamicLayout, addr: frz.Address) -> str:
@@ -179,8 +180,7 @@ def render_short_call(layout: rd.DynamicLayout, addr: frz.Address) -> str:
s.append(f"){rest}")
newline = "\n"
# Use default (non-dim) styling for API details so they remain readable in -vv output
return f"call:{call.id}\n{newline.join(s)}"
return f"call:{call.id}\n{rutils.mute(newline.join(s))}"
def render_static_meta(console: Console, meta: rd.StaticMetadata):

View File

@@ -1143,8 +1143,6 @@ class Rule:
else:
# use pyyaml because it can be much faster than ruamel (pure python)
doc = yaml.load(s, Loader=cls._get_yaml_loader())
if doc is None or not isinstance(doc, dict) or "rule" not in doc:
raise InvalidRule("empty or invalid YAML document")
return cls.from_dict(doc, s)
@classmethod
@@ -1449,13 +1447,6 @@ class RuleSet:
scope: self._index_rules_by_feature(scope, self.rules_by_scope[scope], scores_by_rule) for scope in scopes
}
# Pre-compute the topological index mapping for each scope.
# This avoids rebuilding the dict on every call to _match (which runs once per
# instruction/basic-block/function/file scope, i.e. potentially millions of times).
self._rule_index_by_scope: dict[Scope, dict[str, int]] = {
scope: {rule.name: i for i, rule in enumerate(self.rules_by_scope[scope])} for scope in scopes
}
@property
def file_rules(self):
return self.rules_by_scope[Scope.FILE]
@@ -1885,13 +1876,11 @@ class RuleSet:
"""
done = []
# use a list as a stack: append new items and pop() from the end, both O(1).
# order doesn't matter here since every rule in the queue is processed eventually.
rules_stack = list(rules)
while rules_stack:
rule = rules_stack.pop()
# use a queue of rules, because we'll be modifying the list (appending new items) as we go.
while rules:
rule = rules.pop(0)
for subscope_rule in rule.extract_subscope_rules():
rules_stack.append(subscope_rule)
rules.append(subscope_rule)
done.append(rule)
return done
@@ -1940,11 +1929,11 @@ class RuleSet:
"""
feature_index: RuleSet._RuleFeatureIndex = self._feature_indexes_by_scopes[scope]
rules: list[Rule] = self.rules_by_scope[scope]
# Topologic location of rule given its name.
# That is, rules with a lower index should be evaluated first, since their dependencies
# will be evaluated later.
# Pre-computed in __init__ to avoid rebuilding on every _match call.
rule_index_by_rule_name = self._rule_index_by_scope[scope]
rule_index_by_rule_name = {rule.name: i for i, rule in enumerate(rules)}
# This algorithm is optimized to evaluate as few rules as possible,
# because the less work we do, the faster capa can run.
@@ -2040,9 +2029,7 @@ class RuleSet:
candidate_rules = [self.rules[name] for name in candidate_rule_names]
# Order rules topologically, so that rules with dependencies work correctly.
# Sort descending so pop() from the end yields the topologically-first rule in O(1).
RuleSet._sort_rules_by_index(rule_index_by_rule_name, candidate_rules)
candidate_rules.reverse()
#
# The following is derived from ceng.match
@@ -2057,7 +2044,7 @@ class RuleSet:
augmented_features = features
while candidate_rules:
rule = candidate_rules.pop()
rule = candidate_rules.pop(0)
res = rule.evaluate(augmented_features, short_circuit=True)
if res:
# we first matched the rule with short circuiting enabled.
@@ -2096,7 +2083,6 @@ class RuleSet:
candidate_rule_names.update(new_candidates)
candidate_rules.extend([self.rules[rule_name] for rule_name in new_candidates])
RuleSet._sort_rules_by_index(rule_index_by_rule_name, candidate_rules)
candidate_rules.reverse()
return (augmented_features, results)
@@ -2233,10 +2219,7 @@ def get_rules(
try:
rule = Rule.from_yaml(content.decode("utf-8"))
except InvalidRule as e:
if e.args and e.args[0] == "empty or invalid YAML document":
logger.warning("skipping %s: %s", path, e)
continue
except InvalidRule:
raise
else:
rule.meta["capa/path"] = path.as_posix()

View File

@@ -2,21 +2,6 @@
See `capa -h` for all supported arguments and usage examples.
## Ways to consume capa output
| Method | Output / interface | Typical use |
|--------|--------------------|-------------|
| **CLI** | Text (default, `-v`, `-vv`), JSON (`-j`), or other formats | Scripting, CI, one-off analysis |
| [**IDA Pro**](https://github.com/mandiant/capa/tree/master/capa/ida/plugin) | capa Explorer plugin inside IDA | Interactive analysis with jump-to-address |
| [**Ghidra**](https://github.com/mandiant/capa/tree/master/capa/ghidra/plugin) | capa Explorer plugin inside Ghidra | Interactive analysis with Ghidra integration |
| [**Binary Ninja**](https://github.com/mandiant/capa/tree/master/capa/features/extractors/binja) | capa run using Binary Ninja as the analysis backend | Interactive analysis with Binary Ninja integration |
| [**Dynamic (Sandbox)**](https://www.mandiant.com/resources/blog/dynamic-capa-executable-behavior-cape-sandbox) | capa run on dynamic sandbox report (CAPE, VMRay, etc.) | Dynamic analysis of sandbox output |
| [**Web (capa Explorer)**](https://mandiant.github.io/capa/explorer/) | Web UI (upload JSON or load from URL) | Sharing results, viewing from VirusTotal or similar |
## Default vs verbose output
By default, capa shows only *top-level* rule matches: capabilities that are not already implied by another displayed rule. For example, if a rule "persist via Run registry key" matches and it *contains* a match for "set registry value", the default output lists only "persist via Run registry key". This keeps the default output short while still reflecting all detected capabilities at the top level. Use **`-v`** to see all rule matches, including nested ones. Use **`-vv`** for an even more detailed view that shows how each rule matched.
## tips and tricks
### only run selected rules
@@ -26,7 +11,7 @@ For example, `capa -t william.ballenthin@mandiant.com` runs rules that reference
### only analyze selected functions
Use the `--restrict-to-functions` option to extract capabilities from only a selected set of functions. This is useful for analyzing
large functions and figuring out their capabilities and their address of occurrence; for example: PEB access, RC4 encryption, etc.
large functions and figuring out their capabilities and their address of occurance; for example: PEB access, RC4 encryption, etc.
To use this, you can copy the virtual addresses from your favorite disassembler and pass them to capa as follows:
`capa sample.exe --restrict-to-functions 0x4019C0,0x401CD0`. If you add the `-v` option then capa will extract the interesting parts of a function for you.

View File

@@ -144,11 +144,11 @@ dev = [
"flake8-simplify==0.30.0",
"flake8-use-pathlib==0.3.0",
"flake8-copyright==0.2.4",
"ruff==0.15.0",
"black==26.3.0",
"isort==8.0.0",
"ruff==0.14.7",
"black==25.12.0",
"isort==7.0.0",
"mypy==1.19.1",
"mypy-protobuf==5.0.0",
"mypy-protobuf==4.0.0",
"PyGithub==2.8.1",
"bump-my-version==1.2.4",
# type stubs for mypy
@@ -165,8 +165,8 @@ build = [
# we want all developer environments to be consistent.
# These dependencies are not used in production environments
# and should not conflict with other libraries/tooling.
"pyinstaller==6.19.0",
"setuptools==80.10.1",
"pyinstaller==6.17.0",
"setuptools==80.9.0",
"build==1.4.0"
]
scripts = [

View File

@@ -10,7 +10,7 @@ annotated-types==0.7.0
colorama==0.4.6
cxxfilt==0.3.0
dncil==1.0.2
dnfile==0.18.0
dnfile==0.17.0
funcy==2.0
humanize==4.15.0
ida-netnode==3.0
@@ -21,11 +21,11 @@ mdurl==0.1.2
msgpack==1.0.8
networkx==3.4.2
pefile==2024.8.26
pip==26.0
protobuf==7.34.0
pip==25.3
protobuf==6.33.1
pyasn1==0.5.1
pyasn1-modules==0.3.0
pycparser==3.0
pycparser==2.23
pydantic==2.12.4
# pydantic pins pydantic-core,
# but dependabot updates these separately (which is broken) and is annoying,
@@ -37,13 +37,13 @@ pygments==2.19.1
pyghidra==3.0.0
python-flirt==0.9.2
pyyaml==6.0.2
rich==14.3.2
rich==14.2.0
ruamel-yaml==0.19.1
ruamel-yaml-clib==0.2.14
setuptools==80.10.1
setuptools==80.9.0
six==1.17.0
sortedcontainers==2.4.0
viv-utils==0.8.0
vivisect==1.3.0
vivisect==1.2.1
msgspec==0.20.0
bump-my-version==1.2.4

2
rules

Submodule rules updated: 03a20f69ae...6a0d506713

View File

@@ -61,7 +61,6 @@ usage:
parallelism factor
--no-mp disable subprocesses
"""
import sys
import json
import logging

View File

@@ -28,7 +28,6 @@ Requires:
- sarif_om 1.0.4
- jschema_to_python 1.2.3
"""
import sys
import json
import logging

View File

@@ -32,7 +32,6 @@ Example:
│00000070│ 39 31 37 36 61 64 36 38 ┊ 32 66 66 64 64 36 35 66 │9176ad68┊2ffdd65f│
│00000080│ 30 61 36 36 39 12 28 61 ┊ 34 62 33 35 64 65 37 31 │0a669•(a┊4b35de71│
"""
import sys
import logging
import argparse

View File

@@ -18,7 +18,6 @@ detect-elf-os
Attempt to detect the underlying OS that the given ELF file targets.
"""
import sys
import logging
import argparse

View File

@@ -36,7 +36,6 @@ Check the log window for any errors, and/or the summary of changes.
Derived from: https://github.com/mandiant/capa/blob/master/scripts/import-to-ida.py
"""
import os
import json
from pathlib import Path

View File

@@ -1229,7 +1229,6 @@ def main(argv=None):
time0 = time.time()
args.enable_cache = False
try:
rules = capa.main.get_rules_from_cli(args)
except capa.main.ShouldExitError as e:

View File

@@ -54,7 +54,6 @@ Example::
0x44cb60: ?
0x44cba0: __guard_icall_checks_enforced
"""
import sys
import logging
import argparse

View File

@@ -16,7 +16,6 @@
"""
Extract files relevant to capa analysis from VMRay Analysis Archive and create a new ZIP file.
"""
import sys
import logging
import zipfile

View File

@@ -43,7 +43,6 @@ example:
^^^ --label or git hash
"""
import sys
import timeit
import logging

View File

@@ -34,7 +34,6 @@ Example:
│00000080│ 30 61 36 36 39 12 28 61 ┊ 34 62 33 35 64 65 37 31 │0a669•(a┊4b35de71│
"""
import sys
import logging
import argparse

View File

@@ -37,7 +37,6 @@ Example:
────┴────────────────────────────────────────────────────
"""
import sys
import logging
import argparse

View File

@@ -46,7 +46,6 @@ Example:
2022-01-24 22:35:39,839 [INFO] Starting extraction...
2022-01-24 22:35:42,632 [INFO] Writing results to linter-data.json
"""
import json
import logging
import argparse

View File

@@ -54,7 +54,6 @@ Example::
- connect TCP socket
...
"""
import sys
import logging
import argparse

View File

@@ -70,7 +70,6 @@ Example::
insn: 0x10001027: mnemonic(shl)
...
"""
import sys
import logging
import argparse
@@ -276,7 +275,7 @@ def ida_main():
function = idc.get_func_attr(idc.here(), idc.FUNCATTR_START)
print(f"getting features for current function {hex(function)}")
extractor = capa.features.extractors.ida.extractor.IdaFeatureExtractor()
extractor = capa.features.extractors.ida.extractor.IdaFeatureExtractor.from_current_database()
if not function:
for feature, addr in extractor.extract_file_features():

View File

@@ -175,7 +175,7 @@ def ida_main():
function = idc.get_func_attr(idc.here(), idc.FUNCATTR_START)
print(f"getting features for current function {hex(function)}")
extractor = capa.features.extractors.ida.extractor.IdaFeatureExtractor()
extractor = capa.features.extractors.ida.extractor.IdaFeatureExtractor.from_current_database()
feature_map: Counter[Feature] = Counter()
feature_map.update([feature for feature, _ in extractor.extract_file_features()])

View File

@@ -20,7 +20,6 @@ from functools import lru_cache
import pytest
import capa.loader
import capa.features.file
import capa.features.insn
import capa.features.common
@@ -242,7 +241,7 @@ def get_idalib_extractor(path: Path):
ida_auto.auto_wait()
logger.debug("idalib: opened database.")
extractor = capa.features.extractors.ida.extractor.IdaFeatureExtractor()
extractor = capa.features.extractors.ida.extractor.IdaFeatureExtractor.from_current_database()
fixup_idalib(path, extractor)
return extractor

View File

@@ -458,7 +458,9 @@ def test_pattern_parsing():
capture="#int",
)
assert BinExport2InstructionPatternMatcher.from_str("""
assert (
BinExport2InstructionPatternMatcher.from_str(
"""
# comment
br reg
br reg(not-stack)
@@ -479,7 +481,10 @@ def test_pattern_parsing():
call [reg * #int + #int]
call [reg + reg + #int]
call [reg + #int]
""").queries is not None
"""
).queries
is not None
)
def match_address(extractor: BinExport2FeatureExtractor, queries: BinExport2InstructionPatternMatcher, address: int):
@@ -502,7 +507,8 @@ def match_address_with_be2(
def test_pattern_matching():
queries = BinExport2InstructionPatternMatcher.from_str("""
queries = BinExport2InstructionPatternMatcher.from_str(
"""
br reg(stack) ; capture reg
br reg(not-stack) ; capture reg
mov reg0, reg1 ; capture reg0
@@ -516,7 +522,8 @@ def test_pattern_matching():
ldp|stp reg, reg, [reg, #int]! ; capture #int
ldp|stp reg, reg, [reg], #int ; capture #int
ldrb reg0, [reg1(not-stack), reg2] ; capture reg2
""")
"""
)
# 0x210184: ldrb w2, [x0, x1]
# query: ldrb reg0, [reg1(not-stack), reg2] ; capture reg2"
@@ -543,9 +550,11 @@ BE2_EXTRACTOR_687 = fixtures.get_binexport_extractor(
def test_pattern_matching_exclamation():
queries = BinExport2InstructionPatternMatcher.from_str("""
queries = BinExport2InstructionPatternMatcher.from_str(
"""
stp reg, reg, [reg, #int]! ; capture #int
""")
"""
)
# note this captures the sp
# 0x107918: stp x20, x19, [sp,0xFFFFFFFFFFFFFFE0]!
@@ -555,9 +564,11 @@ def test_pattern_matching_exclamation():
def test_pattern_matching_stack():
queries = BinExport2InstructionPatternMatcher.from_str("""
queries = BinExport2InstructionPatternMatcher.from_str(
"""
stp reg, reg, [reg(stack), #int]! ; capture #int
""")
"""
)
# note this does capture the sp
# compare this with the test above (exclamation)
@@ -568,9 +579,11 @@ def test_pattern_matching_stack():
def test_pattern_matching_not_stack():
queries = BinExport2InstructionPatternMatcher.from_str("""
queries = BinExport2InstructionPatternMatcher.from_str(
"""
stp reg, reg, [reg(not-stack), #int]! ; capture #int
""")
"""
)
# note this does not capture the sp
# compare this with the test above (exclamation)
@@ -584,9 +597,11 @@ BE2_EXTRACTOR_MIMI = fixtures.get_binexport_extractor(CD / "data" / "binexport2"
def test_pattern_matching_x86():
queries = BinExport2InstructionPatternMatcher.from_str("""
queries = BinExport2InstructionPatternMatcher.from_str(
"""
cmp|lea reg, [reg(not-stack) + #int0] ; capture #int0
""")
"""
)
# 0x4018c0: LEA ECX, [EBX+0x2]
# query: cmp|lea reg, [reg(not-stack) + #int0] ; capture #int0

View File

@@ -23,7 +23,9 @@ def test_match_across_scopes_file_function(z9324d_extractor):
rules = capa.rules.RuleSet(
[
# this rule should match on a function (0x4073F0)
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: install service
@@ -37,9 +39,13 @@ def test_match_across_scopes_file_function(z9324d_extractor):
- api: advapi32.OpenSCManagerA
- api: advapi32.CreateServiceA
- api: advapi32.StartServiceA
""")),
"""
)
),
# this rule should match on a file feature
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: .text section
@@ -50,11 +56,15 @@ def test_match_across_scopes_file_function(z9324d_extractor):
- 9324d1a8ae37a36ae560c37448c9705a
features:
- section: .text
""")),
"""
)
),
# this rule should match on earlier rule matches:
# - install service, with function scope
# - .text section, with file scope
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: .text section and install service
@@ -67,7 +77,9 @@ def test_match_across_scopes_file_function(z9324d_extractor):
- and:
- match: install service
- match: .text section
""")),
"""
)
),
]
)
capabilities = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
@@ -80,7 +92,9 @@ def test_match_across_scopes(z9324d_extractor):
rules = capa.rules.RuleSet(
[
# this rule should match on a basic block (including at least 0x403685)
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: tight loop
@@ -91,10 +105,14 @@ def test_match_across_scopes(z9324d_extractor):
- 9324d1a8ae37a36ae560c37448c9705a:0x403685
features:
- characteristic: tight loop
""")),
"""
)
),
# this rule should match on a function (0x403660)
# based on API, as well as prior basic block rule match
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: kill thread loop
@@ -108,9 +126,13 @@ def test_match_across_scopes(z9324d_extractor):
- api: kernel32.TerminateThread
- api: kernel32.CloseHandle
- match: tight loop
""")),
"""
)
),
# this rule should match on a file feature and a prior function rule match
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: kill thread program
@@ -123,7 +145,9 @@ def test_match_across_scopes(z9324d_extractor):
- and:
- section: .text
- match: kill thread loop
""")),
"""
)
),
]
)
capabilities = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
@@ -133,7 +157,11 @@ def test_match_across_scopes(z9324d_extractor):
def test_subscope_bb_rules(z9324d_extractor):
rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
rules = capa.rules.RuleSet(
[
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -144,14 +172,22 @@ def test_subscope_bb_rules(z9324d_extractor):
- and:
- basic block:
- characteristic: tight loop
"""))])
"""
)
)
]
)
# tight loop at 0x403685
capabilities = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
assert "test rule" in capabilities.matches
def test_match_specific_functions(z9324d_extractor):
rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
rules = capa.rules.RuleSet(
[
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: receive data
@@ -163,7 +199,11 @@ def test_match_specific_functions(z9324d_extractor):
features:
- or:
- api: recv
"""))])
"""
)
)
]
)
extractor = FunctionFilter(z9324d_extractor, {0x4019C0})
capabilities = capa.capabilities.common.find_capabilities(rules, extractor)
matches = capabilities.matches["receive data"]
@@ -174,7 +214,11 @@ def test_match_specific_functions(z9324d_extractor):
def test_byte_matching(z9324d_extractor):
rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
rules = capa.rules.RuleSet(
[
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: byte match test
@@ -184,13 +228,21 @@ def test_byte_matching(z9324d_extractor):
features:
- and:
- bytes: ED 24 9E F4 52 A9 07 47 55 8E E1 AB 30 8E 23 61
"""))])
"""
)
)
]
)
capabilities = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
assert "byte match test" in capabilities.matches
def test_com_feature_matching(z395eb_extractor):
rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
rules = capa.rules.RuleSet(
[
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: initialize IWebBrowser2
@@ -202,13 +254,21 @@ def test_com_feature_matching(z395eb_extractor):
- api: ole32.CoCreateInstance
- com/class: InternetExplorer #bytes: 01 DF 02 00 00 00 00 00 C0 00 00 00 00 00 00 46 = CLSID_InternetExplorer
- com/interface: IWebBrowser2 #bytes: 61 16 0C D3 AF CD D0 11 8A 3E 00 C0 4F C9 E2 6E = IID_IWebBrowser2
"""))])
"""
)
)
]
)
capabilities = capa.main.find_capabilities(rules, z395eb_extractor)
assert "initialize IWebBrowser2" in capabilities.matches
def test_count_bb(z9324d_extractor):
rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
rules = capa.rules.RuleSet(
[
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: count bb
@@ -219,14 +279,22 @@ def test_count_bb(z9324d_extractor):
features:
- and:
- count(basic blocks): 1 or more
"""))])
"""
)
)
]
)
capabilities = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
assert "count bb" in capabilities.matches
def test_instruction_scope(z9324d_extractor):
# .text:004071A4 68 E8 03 00 00 push 3E8h
rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
rules = capa.rules.RuleSet(
[
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: push 1000
@@ -238,7 +306,11 @@ def test_instruction_scope(z9324d_extractor):
- and:
- mnemonic: push
- number: 1000
"""))])
"""
)
)
]
)
capabilities = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
assert "push 1000" in capabilities.matches
assert 0x4071A4 in {result[0] for result in capabilities.matches["push 1000"]}
@@ -248,7 +320,11 @@ def test_instruction_subscope(z9324d_extractor):
# .text:00406F60 sub_406F60 proc near
# [...]
# .text:004071A4 68 E8 03 00 00 push 3E8h
rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
rules = capa.rules.RuleSet(
[
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: push 1000 on i386
@@ -262,7 +338,11 @@ def test_instruction_subscope(z9324d_extractor):
- instruction:
- mnemonic: push
- number: 1000
"""))])
"""
)
)
]
)
capabilities = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
assert "push 1000 on i386" in capabilities.matches
assert 0x406F60 in {result[0] for result in capabilities.matches["push 1000 on i386"]}

View File

@@ -81,7 +81,8 @@ def test_cape_extractor(version: str, filename: str, exception: Type[BaseExcepti
def test_cape_model_argument():
call = Call.model_validate_json("""
call = Call.model_validate_json(
"""
{
"timestamp": "2023-10-20 12:30:14,015",
"thread_id": "2380",
@@ -104,6 +105,7 @@ def test_cape_model_argument():
"repeated": 19,
"id": 0
}
""")
"""
)
assert call.arguments[0].value == 30
assert call.arguments[1].value == 0x30

View File

@@ -18,7 +18,8 @@ from capa.features.extractors.drakvuf.models import SystemCall
def test_syscall_argument_construction():
call_dictionary = json.loads(r"""
call_dictionary = json.loads(
r"""
{
"Plugin": "syscall",
"TimeStamp": "1716999134.581449",
@@ -42,7 +43,8 @@ def test_syscall_argument_construction():
"Timeout": "0xfffff506a02846d8",
"Alertable": "0x0"
}
""")
"""
)
call = SystemCall(**call_dictionary)
assert len(call.arguments) == call.nargs
assert call.arguments["IoCompletionHandle"] == "0xffffffff80001ac0"

View File

@@ -83,7 +83,8 @@ def get_call_ids(matches) -> Iterator[int]:
def test_dynamic_call_scope():
extractor = get_0000a657_thread3064()
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -92,7 +93,8 @@ def test_dynamic_call_scope():
dynamic: call
features:
- api: GetSystemTimeAsFileTime
""")
"""
)
r = capa.rules.Rule.from_yaml(rule)
ruleset = capa.rules.RuleSet([r])
@@ -114,7 +116,8 @@ def test_dynamic_call_scope():
def test_dynamic_span_scope():
extractor = get_0000a657_thread3064()
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -128,7 +131,8 @@ def test_dynamic_span_scope():
- api: LdrGetDllHandle
- api: LdrGetProcedureAddress
- count(api(LdrGetDllHandle)): 2
""")
"""
)
r = capa.rules.Rule.from_yaml(rule)
ruleset = capa.rules.RuleSet([r])
@@ -154,7 +158,8 @@ def test_dynamic_span_scope():
def test_dynamic_span_scope_length():
extractor = get_0000a657_thread3064()
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -165,7 +170,8 @@ def test_dynamic_span_scope_length():
- and:
- api: GetSystemTimeAsFileTime
- api: RtlAddVectoredExceptionHandler
""")
"""
)
r = capa.rules.Rule.from_yaml(rule)
ruleset = capa.rules.RuleSet([r])
@@ -190,7 +196,8 @@ def test_dynamic_span_scope_length():
def test_dynamic_span_call_subscope():
extractor = get_0000a657_thread3064()
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -203,7 +210,8 @@ def test_dynamic_span_call_subscope():
- and:
- api: LdrGetProcedureAddress
- string: AddVectoredExceptionHandler
""")
"""
)
r = capa.rules.Rule.from_yaml(rule)
ruleset = capa.rules.RuleSet([r])
@@ -226,7 +234,8 @@ def test_dynamic_span_call_subscope():
def test_dynamic_span_scope_span_subscope():
extractor = get_0000a657_thread3064()
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -247,7 +256,8 @@ def test_dynamic_span_scope_span_subscope():
- api: LdrGetDllHandle
- api: LdrGetProcedureAddress
- string: RemoveVectoredExceptionHandler
""")
"""
)
r = capa.rules.Rule.from_yaml(rule)
ruleset = capa.rules.RuleSet([r])
@@ -259,7 +269,8 @@ def test_dynamic_span_scope_span_subscope():
# show that you can't use thread subscope in span rules.
def test_dynamic_span_scope_thread_subscope():
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -270,7 +281,8 @@ def test_dynamic_span_scope_thread_subscope():
- and:
- thread:
- string: "foo"
""")
"""
)
with pytest.raises(capa.rules.InvalidRule):
capa.rules.Rule.from_yaml(rule)
@@ -288,7 +300,8 @@ def test_dynamic_span_scope_thread_subscope():
def test_dynamic_span_example():
extractor = get_0000a657_thread3064()
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -306,7 +319,8 @@ def test_dynamic_span_example():
- api: LdrGetProcedureAddress
- string: "AddVectoredExceptionHandler"
- api: RtlAddVectoredExceptionHandler
""")
"""
)
r = capa.rules.Rule.from_yaml(rule)
ruleset = capa.rules.RuleSet([r])
@@ -331,7 +345,8 @@ def test_dynamic_span_example():
def test_dynamic_span_multiple_spans_overlapping_single_event():
extractor = get_0000a657_thread3064()
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -344,7 +359,8 @@ def test_dynamic_span_multiple_spans_overlapping_single_event():
- and:
- api: LdrGetProcedureAddress
- string: "AddVectoredExceptionHandler"
""")
"""
)
r = capa.rules.Rule.from_yaml(rule)
ruleset = capa.rules.RuleSet([r])
@@ -370,7 +386,9 @@ def test_dynamic_span_scope_match_statements():
ruleset = capa.rules.RuleSet(
[
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: resolve add VEH
@@ -383,8 +401,12 @@ def test_dynamic_span_scope_match_statements():
- api: LdrGetDllHandle
- api: LdrGetProcedureAddress
- string: AddVectoredExceptionHandler
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: resolve remove VEH
@@ -397,8 +419,12 @@ def test_dynamic_span_scope_match_statements():
- api: LdrGetDllHandle
- api: LdrGetProcedureAddress
- string: RemoveVectoredExceptionHandler
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: resolve add and remove VEH
@@ -409,8 +435,12 @@ def test_dynamic_span_scope_match_statements():
- and:
- match: resolve add VEH
- match: resolve remove VEH
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: has VEH runtime linking
@@ -420,7 +450,9 @@ def test_dynamic_span_scope_match_statements():
features:
- and:
- match: linking/runtime-linking/veh
""")),
"""
)
),
]
)

View File

@@ -17,7 +17,8 @@ import textwrap
import capa.rules
EXPECTED = textwrap.dedent("""\
EXPECTED = textwrap.dedent(
"""\
rule:
meta:
name: test rule
@@ -33,11 +34,13 @@ EXPECTED = textwrap.dedent("""\
- and:
- number: 1
- number: 2
""")
"""
)
def test_rule_reformat_top_level_elements():
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
features:
- and:
@@ -53,13 +56,15 @@ def test_rule_reformat_top_level_elements():
examples:
- foo1234
- bar5678
""")
"""
)
assert capa.rules.Rule.from_yaml(rule).to_yaml() == EXPECTED
def test_rule_reformat_indentation():
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -75,13 +80,15 @@ def test_rule_reformat_indentation():
- and:
- number: 1
- number: 2
""")
"""
)
assert capa.rules.Rule.from_yaml(rule).to_yaml() == EXPECTED
def test_rule_reformat_order():
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
authors:
@@ -97,7 +104,8 @@ def test_rule_reformat_order():
- and:
- number: 1
- number: 2
""")
"""
)
assert capa.rules.Rule.from_yaml(rule).to_yaml() == EXPECTED
@@ -105,7 +113,8 @@ def test_rule_reformat_order():
def test_rule_reformat_meta_update():
# test updating the rule content after parsing
src = textwrap.dedent("""
src = textwrap.dedent(
"""
rule:
meta:
authors:
@@ -121,7 +130,8 @@ def test_rule_reformat_meta_update():
- and:
- number: 1
- number: 2
""")
"""
)
rule = capa.rules.Rule.from_yaml(src)
rule.name = "test rule"
@@ -131,7 +141,8 @@ def test_rule_reformat_meta_update():
def test_rule_reformat_string_description():
# the `description` should be aligned with the preceding feature name.
# see #263
src = textwrap.dedent("""
src = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -144,7 +155,8 @@ def test_rule_reformat_string_description():
- and:
- string: foo
description: bar
""").lstrip()
"""
).lstrip()
rule = capa.rules.Rule.from_yaml(src)
assert rule.to_yaml() == src

View File

@@ -108,7 +108,9 @@ def test_null_feature_extractor():
rules = capa.rules.RuleSet(
[
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: create file
@@ -118,7 +120,9 @@ def test_null_feature_extractor():
features:
- and:
- api: CreateFile
""")),
"""
)
),
]
)
capabilities = capa.main.find_capabilities(rules, EXTRACTOR)

View File

@@ -88,7 +88,9 @@ def test_null_feature_extractor():
rules = capa.rules.RuleSet(
[
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: xor loop
@@ -100,7 +102,9 @@ def test_null_feature_extractor():
- characteristic: tight loop
- mnemonic: xor
- characteristic: nzxor
""")),
"""
)
),
]
)
capabilities = capa.main.find_capabilities(rules, EXTRACTOR)

187
tests/test_ida_features.py Normal file
View File

@@ -0,0 +1,187 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
run this script from within IDA to test the IDA feature extractor.
you must have loaded a file referenced by a test case in order
for this to do anything meaningful. for example, mimikatz.exe from testfiles.
you can invoke from the command line like this:
& 'C:\\Program Files\\IDA Pro 8.2\\idat.exe' \
-S"C:\\Exclusions\\code\\capa\\tests\\test_ida_features.py --CAPA_AUTOEXIT=true" \
-A \
-Lidalog \
'C:\\Exclusions\\code\\capa\\tests\\data\\mimikatz.exe_'
if you invoke from the command line, and provide the script argument `--CAPA_AUTOEXIT=true`,
then the script will exit IDA after running the tests.
the output (in idalog) will look like this:
```
Loading processor module C:\\Program Files\\IDA Pro 8.2\\procs\\pc.dll for metapc...Initializing processor module metapc...OK
Loading type libraries...
Autoanalysis subsystem has been initialized.
Database for file 'mimikatz.exe_' has been loaded.
--------------------------------------------------------------------------------
PASS: test_ida_feature_counts/mimikatz-function=0x40E5C2-basic block-7
PASS: test_ida_feature_counts/mimikatz-function=0x4702FD-characteristic(calls from)-0
SKIP: test_ida_features/294b8d...-function=0x404970,bb=0x404970,insn=0x40499F-string(\r\n\x00:ht)-False
SKIP: test_ida_features/64d9f-function=0x10001510,bb=0x100015B0-offset(0x4000)-True
...
SKIP: test_ida_features/pma16-01-function=0x404356,bb=0x4043B9-arch(i386)-True
PASS: test_ida_features/mimikatz-file-import(cabinet.FCIAddFile)-True
DONE
C:\\Exclusions\\code\\capa\\tests\\test_ida_features.py: Traceback (most recent call last):
File "C:\\Program Files\\IDA Pro 8.2\\python\\3\\ida_idaapi.py", line 588, in IDAPython_ExecScript
exec(code, g)
File "C:/Exclusions/code/capa/tests/test_ida_features.py", line 120, in <module>
sys.exit(0)
SystemExit: 0
-> OK
Flushing buffers, please wait...ok
```
Look for lines that start with "FAIL" to identify test failures.
"""
import io
import sys
import inspect
import logging
import traceback
from pathlib import Path
import pytest
try:
sys.path.append(str(Path(__file__).parent))
import fixtures
finally:
sys.path.pop()
logger = logging.getLogger("test_ida_features")
def check_input_file(wanted):
import idautils
# some versions (7.4) of IDA return a truncated version of the MD5.
# https://github.com/idapython/bin/issues/11
try:
found = idautils.GetInputFileMD5()[:31].decode("ascii").lower()
except UnicodeDecodeError:
# in IDA 7.5 or so, GetInputFileMD5 started returning raw binary
# rather than the hex digest
found = bytes.hex(idautils.GetInputFileMD5()[:15]).lower()
if not wanted.startswith(found):
raise RuntimeError(f"please run the tests against sample with MD5: `{wanted}`")
def get_ida_extractor(_path):
# have to import this inline so pytest doesn't bail outside of IDA
import capa.features.extractors.ida.extractor
return capa.features.extractors.ida.extractor.IdaFeatureExtractor.from_current_database()
def nocollect(f):
"don't collect the decorated function as a pytest test"
f.__test__ = False
return f
# although these look like pytest tests, they're not, because they don't run within pytest
# (the runner is below) and they use `yield`, which is deprecated.
@nocollect
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
def test_ida_features():
# we're guaranteed to be in a function here, so there's a stack frame
this_name = inspect.currentframe().f_code.co_name # type: ignore
for sample, scope, feature, expected in fixtures.FEATURE_PRESENCE_TESTS + fixtures.FEATURE_PRESENCE_TESTS_IDA:
id = fixtures.make_test_id((sample, scope, feature, expected))
try:
check_input_file(fixtures.get_sample_md5_by_name(sample))
except RuntimeError:
yield this_name, id, "skip", None
continue
scope = fixtures.resolve_scope(scope)
sample = fixtures.resolve_sample(sample)
try:
fixtures.do_test_feature_presence(get_ida_extractor, sample, scope, feature, expected)
except Exception:
f = io.StringIO()
traceback.print_exc(file=f)
yield this_name, id, "fail", f.getvalue()
else:
yield this_name, id, "pass", None
@nocollect
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
def test_ida_feature_counts():
# we're guaranteed to be in a function here, so there's a stack frame
this_name = inspect.currentframe().f_code.co_name # type: ignore
for sample, scope, feature, expected in fixtures.FEATURE_COUNT_TESTS:
id = fixtures.make_test_id((sample, scope, feature, expected))
try:
check_input_file(fixtures.get_sample_md5_by_name(sample))
except RuntimeError:
yield this_name, id, "skip", None
continue
scope = fixtures.resolve_scope(scope)
sample = fixtures.resolve_sample(sample)
try:
fixtures.do_test_feature_count(get_ida_extractor, sample, scope, feature, expected)
except Exception:
f = io.StringIO()
traceback.print_exc(file=f)
yield this_name, id, "fail", f.getvalue()
else:
yield this_name, id, "pass", None
if __name__ == "__main__":
import idc
import ida_auto
ida_auto.auto_wait()
print("-" * 80)
# invoke all functions in this module that start with `test_`
for name in dir(sys.modules[__name__]):
if not name.startswith("test_"):
continue
test = getattr(sys.modules[__name__], name)
logger.debug("invoking test: %s", name)
sys.stderr.flush()
for name, id, state, info in test():
print(f"{state.upper()}: {name}/{id}")
if info:
print(info)
print("DONE")
if "--CAPA_AUTOEXIT=true" in idc.ARGV:
sys.exit(0)

View File

@@ -1,60 +0,0 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from pathlib import Path
from unittest.mock import patch
import pytest
import envi.exc
from capa.loader import CorruptFile, get_workspace
from capa.features.common import FORMAT_PE, FORMAT_ELF
def test_segmentation_violation_handling():
"""
Test that SegmentationViolation from vivisect is caught and
converted to a CorruptFile exception.
See #2794.
"""
fake_path = Path("/tmp/fake_malformed.elf")
with patch("viv_utils.getWorkspace") as mock_workspace:
mock_workspace.side_effect = envi.exc.SegmentationViolation(
0x30A4B8BD60,
)
with pytest.raises(CorruptFile, match="Invalid memory access"):
get_workspace(fake_path, FORMAT_ELF, [])
def test_corrupt_pe_with_unrealistic_section_size_short_circuits():
"""
Test that a PE with an unrealistically large section virtual size
is caught early and raises CorruptFile before vivisect is invoked.
See #1989.
"""
fake_path = Path("/tmp/fake_corrupt.exe")
with (
patch("capa.loader._is_probably_corrupt_pe", return_value=True),
patch("viv_utils.getWorkspace") as mock_workspace,
):
with pytest.raises(CorruptFile, match="unrealistically large sections"):
get_workspace(fake_path, FORMAT_PE, [])
# vivisect should never have been called
mock_workspace.assert_not_called()

View File

@@ -38,7 +38,8 @@ def test_main(z9324d_extractor):
def test_main_single_rule(z9324d_extractor, tmpdir):
# tests a single rule can be loaded successfully
RULE_CONTENT = textwrap.dedent("""
RULE_CONTENT = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -49,7 +50,8 @@ def test_main_single_rule(z9324d_extractor, tmpdir):
- test
features:
- string: test
""")
"""
)
path = z9324d_extractor.path
rule_file = tmpdir.mkdir("capa").join("rule.yml")
rule_file.write(RULE_CONTENT)
@@ -98,7 +100,9 @@ def test_main_shellcode(z499c2_extractor):
def test_ruleset():
rules = capa.rules.RuleSet(
[
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: file rule
@@ -107,8 +111,12 @@ def test_ruleset():
dynamic: process
features:
- characteristic: embedded pe
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: function rule
@@ -117,8 +125,12 @@ def test_ruleset():
dynamic: process
features:
- characteristic: tight loop
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: basic block rule
@@ -127,8 +139,12 @@ def test_ruleset():
dynamic: process
features:
- characteristic: nzxor
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: process rule
@@ -137,8 +153,12 @@ def test_ruleset():
dynamic: process
features:
- string: "explorer.exe"
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: thread rule
@@ -147,8 +167,12 @@ def test_ruleset():
dynamic: thread
features:
- api: RegDeleteKey
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test call subscope
@@ -160,8 +184,12 @@ def test_ruleset():
- string: "explorer.exe"
- call:
- api: HttpOpenRequestW
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -179,7 +207,9 @@ def test_ruleset():
- number: 6 = IPPROTO_TCP
- number: 1 = SOCK_STREAM
- number: 2 = AF_INET
""")),
"""
)
),
]
)
assert len(rules.file_rules) == 2
@@ -292,7 +322,9 @@ def test_main_cape1(tmp_path):
# https://github.com/mandiant/capa/pull/1696
rules = tmp_path / "rules"
rules.mkdir()
(rules / "create-or-open-registry-key.yml").write_text(textwrap.dedent("""
(rules / "create-or-open-registry-key.yml").write_text(
textwrap.dedent(
"""
rule:
meta:
name: create or open registry key
@@ -322,7 +354,9 @@ def test_main_cape1(tmp_path):
- api: SHRegOpenUSKey
- api: SHRegCreateUSKey
- api: RtlCreateRegistryKey
"""))
"""
)
)
assert capa.main.main([str(path), "-r", str(rules)]) == 0
assert capa.main.main([str(path), "-q", "-r", str(rules)]) == 0

View File

@@ -46,7 +46,8 @@ def match(rules, features, va, scope=Scope.FUNCTION):
def test_match_simple():
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -56,7 +57,8 @@ def test_match_simple():
namespace: testns1/testns2
features:
- number: 100
""")
"""
)
r = capa.rules.Rule.from_yaml(rule)
features, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
@@ -67,7 +69,8 @@ def test_match_simple():
def test_match_range_exact():
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -76,7 +79,8 @@ def test_match_range_exact():
dynamic: process
features:
- count(number(100)): 2
""")
"""
)
r = capa.rules.Rule.from_yaml(rule)
# just enough matches
@@ -93,7 +97,8 @@ def test_match_range_exact():
def test_match_range_range():
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -102,7 +107,8 @@ def test_match_range_range():
dynamic: process
features:
- count(number(100)): (2, 3)
""")
"""
)
r = capa.rules.Rule.from_yaml(rule)
# just enough matches
@@ -123,7 +129,8 @@ def test_match_range_range():
def test_match_range_exact_zero():
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -139,7 +146,8 @@ def test_match_range_exact_zero():
# so we have this additional trivial feature.
- mnemonic: mov
""")
"""
)
r = capa.rules.Rule.from_yaml(rule)
# feature isn't indexed - good.
@@ -157,7 +165,8 @@ def test_match_range_exact_zero():
def test_match_range_with_zero():
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -172,7 +181,8 @@ def test_match_range_with_zero():
# since we don't support top level NOT statements.
# so we have this additional trivial feature.
- mnemonic: mov
""")
"""
)
r = capa.rules.Rule.from_yaml(rule)
# ok
@@ -190,7 +200,8 @@ def test_match_range_with_zero():
def test_match_adds_matched_rule_feature():
"""show that using `match` adds a feature for matched rules."""
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -199,7 +210,8 @@ def test_match_adds_matched_rule_feature():
dynamic: process
features:
- number: 100
""")
"""
)
r = capa.rules.Rule.from_yaml(rule)
features, _ = match([r], {capa.features.insn.Number(100): {1}}, 0x0)
assert capa.features.common.MatchedRule("test rule") in features
@@ -208,7 +220,9 @@ def test_match_adds_matched_rule_feature():
def test_match_matched_rules():
"""show that using `match` adds a feature for matched rules."""
rules = [
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule1
@@ -217,8 +231,12 @@ def test_match_matched_rules():
dynamic: process
features:
- number: 100
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule2
@@ -227,7 +245,9 @@ def test_match_matched_rules():
dynamic: process
features:
- match: test rule1
""")),
"""
)
),
]
features, _ = match(
@@ -251,7 +271,9 @@ def test_match_matched_rules():
def test_match_namespace():
rules = [
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: CreateFile API
@@ -261,8 +283,12 @@ def test_match_namespace():
namespace: file/create/CreateFile
features:
- api: CreateFile
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: WriteFile API
@@ -272,8 +298,12 @@ def test_match_namespace():
namespace: file/write
features:
- api: WriteFile
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: file-create
@@ -282,8 +312,12 @@ def test_match_namespace():
dynamic: process
features:
- match: file/create
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: filesystem-any
@@ -292,7 +326,9 @@ def test_match_namespace():
dynamic: process
features:
- match: file
""")),
"""
)
),
]
features, matches = match(
@@ -319,7 +355,9 @@ def test_match_namespace():
def test_match_substring():
rules = [
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -329,7 +367,9 @@ def test_match_substring():
features:
- and:
- substring: abc
""")),
"""
)
),
]
features, _ = match(
capa.rules.topologically_order_rules(rules),
@@ -369,7 +409,9 @@ def test_match_substring():
def test_match_regex():
rules = [
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -379,8 +421,12 @@ def test_match_regex():
features:
- and:
- string: /.*bbbb.*/
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: rule with implied wildcards
@@ -390,8 +436,12 @@ def test_match_regex():
features:
- and:
- string: /bbbb/
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: rule with anchor
@@ -401,7 +451,9 @@ def test_match_regex():
features:
- and:
- string: /^bbbb/
""")),
"""
)
),
]
features, _ = match(
capa.rules.topologically_order_rules(rules),
@@ -436,7 +488,9 @@ def test_match_regex():
def test_match_regex_ignorecase():
rules = [
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -446,7 +500,9 @@ def test_match_regex_ignorecase():
features:
- and:
- string: /.*bbbb.*/i
""")),
"""
)
),
]
features, _ = match(
capa.rules.topologically_order_rules(rules),
@@ -458,7 +514,9 @@ def test_match_regex_ignorecase():
def test_match_regex_complex():
rules = [
capa.rules.Rule.from_yaml(textwrap.dedent(r"""
capa.rules.Rule.from_yaml(
textwrap.dedent(
r"""
rule:
meta:
name: test rule
@@ -468,7 +526,9 @@ def test_match_regex_complex():
features:
- or:
- string: /.*HARDWARE\\Key\\key with spaces\\.*/i
""")),
"""
)
),
]
features, _ = match(
capa.rules.topologically_order_rules(rules),
@@ -480,7 +540,9 @@ def test_match_regex_complex():
def test_match_regex_values_always_string():
rules = [
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -491,7 +553,9 @@ def test_match_regex_values_always_string():
- or:
- string: /123/
- string: /0x123/
""")),
"""
)
),
]
features, _ = match(
capa.rules.topologically_order_rules(rules),
@@ -508,22 +572,10 @@ def test_match_regex_values_always_string():
assert capa.features.common.MatchedRule("test rule") in features
@pytest.mark.parametrize(
"pattern",
[
"/test\\.exe/",
"/hello/i",
"/foo\\\\bar/",
],
)
def test_regex_get_value_str(pattern):
# Regex.get_value_str() must return the raw pattern without escaping, see #1909.
assert capa.features.common.Regex(pattern).get_value_str() == pattern
@pytest.mark.xfail(reason="can't have top level NOT")
def test_match_only_not():
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -534,7 +586,8 @@ def test_match_only_not():
features:
- not:
- number: 99
""")
"""
)
r = capa.rules.Rule.from_yaml(rule)
_, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
@@ -542,7 +595,8 @@ def test_match_only_not():
def test_match_not():
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -555,7 +609,8 @@ def test_match_not():
- mnemonic: mov
- not:
- number: 99
""")
"""
)
r = capa.rules.Rule.from_yaml(rule)
_, matches = match([r], {capa.features.insn.Number(100): {1, 2}, capa.features.insn.Mnemonic("mov"): {1, 2}}, 0x0)
@@ -564,7 +619,8 @@ def test_match_not():
@pytest.mark.xfail(reason="can't have nested NOT")
def test_match_not_not():
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -576,7 +632,8 @@ def test_match_not_not():
- not:
- not:
- number: 100
""")
"""
)
r = capa.rules.Rule.from_yaml(rule)
_, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
@@ -584,7 +641,8 @@ def test_match_not_not():
def test_match_operand_number():
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -594,7 +652,8 @@ def test_match_operand_number():
features:
- and:
- operand[0].number: 0x10
""")
"""
)
r = capa.rules.Rule.from_yaml(rule)
assert capa.features.insn.OperandNumber(0, 0x10) in {capa.features.insn.OperandNumber(0, 0x10)}
@@ -612,7 +671,8 @@ def test_match_operand_number():
def test_match_operand_offset():
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -622,7 +682,8 @@ def test_match_operand_offset():
features:
- and:
- operand[0].offset: 0x10
""")
"""
)
r = capa.rules.Rule.from_yaml(rule)
assert capa.features.insn.OperandOffset(0, 0x10) in {capa.features.insn.OperandOffset(0, 0x10)}
@@ -640,7 +701,8 @@ def test_match_operand_offset():
def test_match_property_access():
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -650,7 +712,8 @@ def test_match_property_access():
features:
- and:
- property/read: System.IO.FileInfo::Length
""")
"""
)
r = capa.rules.Rule.from_yaml(rule)
assert capa.features.insn.Property("System.IO.FileInfo::Length", capa.features.common.FeatureAccess.READ) in {
@@ -682,7 +745,8 @@ def test_match_property_access():
def test_match_os_any():
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -700,7 +764,8 @@ def test_match_os_any():
- and:
- os: any
- string: "Goodbye world"
""")
"""
)
r = capa.rules.Rule.from_yaml(rule)
_, matches = match(
@@ -734,7 +799,8 @@ def test_match_os_any():
# this test demonstrates the behavior of unstable features that may change before the next major release.
def test_index_features_and_unstable():
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -745,7 +811,8 @@ def test_index_features_and_unstable():
- and:
- mnemonic: mov
- api: CreateFileW
""")
"""
)
r = capa.rules.Rule.from_yaml(rule)
rr = capa.rules.RuleSet([r])
index: capa.rules.RuleSet._RuleFeatureIndex = rr._feature_indexes_by_scopes[capa.rules.Scope.FUNCTION]
@@ -761,7 +828,8 @@ def test_index_features_and_unstable():
# this test demonstrates the behavior of unstable features that may change before the next major release.
def test_index_features_or_unstable():
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -772,7 +840,8 @@ def test_index_features_or_unstable():
- or:
- mnemonic: mov
- api: CreateFileW
""")
"""
)
r = capa.rules.Rule.from_yaml(rule)
rr = capa.rules.RuleSet([r])
index: capa.rules.RuleSet._RuleFeatureIndex = rr._feature_indexes_by_scopes[capa.rules.Scope.FUNCTION]
@@ -789,7 +858,8 @@ def test_index_features_or_unstable():
# this test demonstrates the behavior of unstable features that may change before the next major release.
def test_index_features_nested_unstable():
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -802,7 +872,8 @@ def test_index_features_nested_unstable():
- or:
- api: CreateFileW
- string: foo
""")
"""
)
r = capa.rules.Rule.from_yaml(rule)
rr = capa.rules.RuleSet([r])
index: capa.rules.RuleSet._RuleFeatureIndex = rr._feature_indexes_by_scopes[capa.rules.Scope.FUNCTION]

View File

@@ -25,7 +25,8 @@ from capa.features.common import Arch, Substring
def test_optimizer_order():
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -43,7 +44,8 @@ def test_optimizer_order():
- or:
- number: 1
- offset: 4
""")
"""
)
r = capa.rules.Rule.from_yaml(rule)
# before optimization

View File

@@ -56,7 +56,8 @@ def test_render_meta_attack():
subtechnique = "Windows Service"
canonical = "{:s}::{:s}::{:s} [{:s}]".format(tactic, technique, subtechnique, id)
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -69,7 +70,10 @@ def test_render_meta_attack():
- {:s}
features:
- number: 1
""".format(canonical))
""".format(
canonical
)
)
r = capa.rules.Rule.from_yaml(rule)
rule_meta = capa.render.result_document.RuleMetadata.from_capa(r)
attack = rule_meta.attack[0]
@@ -90,7 +94,8 @@ def test_render_meta_mbc():
method = "Heavens Gate"
canonical = "{:s}::{:s}::{:s} [{:s}]".format(objective, behavior, method, id)
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -103,7 +108,10 @@ def test_render_meta_mbc():
- {:s}
features:
- number: 1
""".format(canonical))
""".format(
canonical
)
)
r = capa.rules.Rule.from_yaml(rule)
rule_meta = capa.render.result_document.RuleMetadata.from_capa(r)
mbc = rule_meta.mbc[0]
@@ -121,7 +129,8 @@ def test_render_meta_maec():
malware_category = "downloader"
analysis_conclusion = "malicious"
rule_yaml = textwrap.dedent("""
rule_yaml = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -135,7 +144,10 @@ def test_render_meta_maec():
maec/analysis-conclusion: {:s}
features:
- number: 1
""".format(malware_family, malware_category, analysis_conclusion))
""".format(
malware_family, malware_category, analysis_conclusion
)
)
rule = capa.rules.Rule.from_yaml(rule_yaml)
rm = capa.render.result_document.RuleMatches(
meta=capa.render.result_document.RuleMetadata.from_capa(rule),
@@ -208,7 +220,8 @@ def test_render_vverbose_feature(feature, expected):
layout = capa.render.result_document.StaticLayout(functions=())
src = textwrap.dedent("""
src = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -224,7 +237,8 @@ def test_render_vverbose_feature(feature, expected):
- and:
- number: 1
- number: 2
""")
"""
)
rule = capa.rules.Rule.from_yaml(src)
rm = capa.render.result_document.RuleMatches(

View File

@@ -22,7 +22,9 @@ import capa.rules
import capa.helpers
import capa.rules.cache
R1 = capa.rules.Rule.from_yaml(textwrap.dedent("""
R1 = capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -38,9 +40,13 @@ R1 = capa.rules.Rule.from_yaml(textwrap.dedent("""
- and:
- number: 1
- number: 2
"""))
"""
)
)
R2 = capa.rules.Rule.from_yaml(textwrap.dedent("""
R2 = capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule 2
@@ -56,7 +62,9 @@ R2 = capa.rules.Rule.from_yaml(textwrap.dedent("""
- and:
- number: 3
- number: 4
"""))
"""
)
)
def test_ruleset_cache_ids():

File diff suppressed because it is too large Load Diff

View File

@@ -21,7 +21,9 @@ import capa.rules
def test_rule_scope_instruction():
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -33,10 +35,14 @@ def test_rule_scope_instruction():
- mnemonic: mov
- arch: i386
- os: windows
"""))
"""
)
)
with pytest.raises(capa.rules.InvalidRule):
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -45,11 +51,17 @@ def test_rule_scope_instruction():
dynamic: unsupported
features:
- characteristic: embedded pe
"""))
"""
)
)
def test_rule_subscope_instruction():
rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
rules = capa.rules.RuleSet(
[
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -63,7 +75,11 @@ def test_rule_subscope_instruction():
- mnemonic: mov
- arch: i386
- os: windows
"""))])
"""
)
)
]
)
# the function rule scope will have one rules:
# - `test rule`
assert len(rules.function_rules) == 1
@@ -74,7 +90,9 @@ def test_rule_subscope_instruction():
def test_scope_instruction_implied_and():
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -87,11 +105,15 @@ def test_scope_instruction_implied_and():
- mnemonic: mov
- arch: i386
- os: windows
"""))
"""
)
)
def test_scope_instruction_description():
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -105,9 +127,13 @@ def test_scope_instruction_description():
- mnemonic: mov
- arch: i386
- os: windows
"""))
"""
)
)
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -121,4 +147,6 @@ def test_scope_instruction_description():
- mnemonic: mov
- arch: i386
- os: windows
"""))
"""
)
)

View File

@@ -142,7 +142,8 @@ def test_proto_conversion(tmp_path):
def test_detect_duplicate_features(tmpdir):
TEST_RULE_0 = textwrap.dedent("""
TEST_RULE_0 = textwrap.dedent(
"""
rule:
meta:
name: Test Rule 0
@@ -154,10 +155,12 @@ def test_detect_duplicate_features(tmpdir):
- number: 1
- not:
- string: process
""")
"""
)
TEST_RULESET = {
"rule_1": textwrap.dedent("""
"rule_1": textwrap.dedent(
"""
rule:
meta:
name: Test Rule 1
@@ -176,8 +179,10 @@ def test_detect_duplicate_features(tmpdir):
- count(mnemonic(xor)): 5
- not:
- os: linux
"""),
"rule_2": textwrap.dedent("""
"""
),
"rule_2": textwrap.dedent(
"""
rule:
meta:
name: Test Rule 2
@@ -191,8 +196,10 @@ def test_detect_duplicate_features(tmpdir):
- and:
- api: CreateFile
- mnemonic: xor
"""),
"rule_3": textwrap.dedent("""
"""
),
"rule_3": textwrap.dedent(
"""
rule:
meta:
name: Test Rule 3
@@ -207,7 +214,8 @@ def test_detect_duplicate_features(tmpdir):
- and:
- api: bind
- number: 2
"""),
"""
),
}
"""

View File

@@ -72,15 +72,6 @@ DYNAMIC_VMRAY_FEATURE_PRESENCE_TESTS = sorted(
# VirtualAlloc(4096, 4)
("93b2d1-vmray", "process=(2176:0),thread=2420,call=2358", capa.features.insn.Number(4096), True),
("93b2d1-vmray", "process=(2176:0),thread=2420,call=2358", capa.features.insn.Number(4), True),
# call/number argument - registry API parameters (issue #2)
# RegOpenKeyExW(Software\Microsoft\Windows\CurrentVersion\Policies\System, 0, 131078)
("93b2d1-vmray", "process=(2176:0),thread=2204,call=2395", capa.features.insn.Number(2147483649), True),
("93b2d1-vmray", "process=(2176:0),thread=2204,call=2395", capa.features.insn.Number(0), True),
("93b2d1-vmray", "process=(2176:0),thread=2204,call=2395", capa.features.insn.Number(131078), True),
# RegOpenKeyExW call 2397 (same parameters)
("93b2d1-vmray", "process=(2176:0),thread=2204,call=2397", capa.features.insn.Number(2147483649), True),
("93b2d1-vmray", "process=(2176:0),thread=2204,call=2397", capa.features.insn.Number(0), True),
("93b2d1-vmray", "process=(2176:0),thread=2204,call=2397", capa.features.insn.Number(131078), True),
],
# order tests by (file, item)
# so that our LRU cache is most effective.
@@ -110,11 +101,6 @@ DYNAMIC_VMRAY_FEATURE_COUNT_TESTS = sorted(
("93b2d1-vmray", "process=(2176:0),thread=2420,call=10315", capa.features.insn.Number(4096), 1),
("93b2d1-vmray", "process=(2176:0),thread=2420,call=10315", capa.features.insn.Number(4), 1),
("93b2d1-vmray", "process=(2176:0),thread=2420,call=10315", capa.features.insn.Number(404), 0),
# call/number argument - registry API parameters (issue #2)
("93b2d1-vmray", "process=(2176:0),thread=2204,call=2395", capa.features.insn.Number(2147483649), 1),
("93b2d1-vmray", "process=(2176:0),thread=2204,call=2395", capa.features.insn.Number(0), 1),
("93b2d1-vmray", "process=(2176:0),thread=2204,call=2395", capa.features.insn.Number(131078), 1),
("93b2d1-vmray", "process=(2176:0),thread=2204,call=2395", capa.features.insn.Number(999999), 0),
],
# order tests by (file, item)
# so that our LRU cache is most effective.

View File

@@ -26,9 +26,11 @@ from capa.features.extractors.vmray.models import (
def test_vmray_model_param():
param_str = textwrap.dedent("""
param_str = textwrap.dedent(
"""
<param name="addrlen" type="signed_32bit" value="16"/>
""")
"""
)
param: Param = Param.model_validate(xml_to_dict(param_str)["param"])
assert param.value is not None
@@ -36,11 +38,13 @@ def test_vmray_model_param():
def test_vmray_model_param_deref():
param_str = textwrap.dedent("""
param_str = textwrap.dedent(
"""
<param name="buf" type="ptr" value="0xaaaaaaaa">
<deref type="str" value="Hello world"/>
</param>
""")
"""
)
param: Param = Param.model_validate(xml_to_dict(param_str)["param"])
assert param.deref is not None
@@ -48,7 +52,8 @@ def test_vmray_model_param_deref():
def test_vmray_model_function_call():
function_call_str = textwrap.dedent("""
function_call_str = textwrap.dedent(
"""
<fncall fncall_id="18" process_id="1" thread_id="1" name="sys_time">
<in>
<param name="tloc" type="unknown" value="0x0"/>
@@ -57,7 +62,8 @@ def test_vmray_model_function_call():
<param name="ret_val" type="unknown" value="0xaaaaaaaa"/>
</out>
</fncall>
""")
"""
)
function_call: FunctionCall = FunctionCall.model_validate(xml_to_dict(function_call_str)["fncall"])
assert function_call.fncall_id == 18
@@ -75,19 +81,22 @@ def test_vmray_model_function_call():
def test_vmray_model_analysis_metadata():
analysis_metadata: AnalysisMetadata = AnalysisMetadata.model_validate_json("""
analysis_metadata: AnalysisMetadata = AnalysisMetadata.model_validate_json(
"""
{
"sample_type": "Linux ELF Executable (x86-64)",
"submission_filename": "abcd1234"
}
""")
"""
)
assert analysis_metadata.sample_type == "Linux ELF Executable (x86-64)"
assert analysis_metadata.submission_filename == "abcd1234"
def test_vmray_model_elffile():
elffile: ElfFile = ElfFile.model_validate_json("""
elffile: ElfFile = ElfFile.model_validate_json(
"""
{
"sections": [
{
@@ -98,14 +107,16 @@ def test_vmray_model_elffile():
}
]
}
""")
"""
)
assert elffile.sections[0].header.sh_name == "abcd1234"
assert elffile.sections[0].header.sh_addr == 2863311530
def test_vmray_model_pefile():
pefile: PEFile = PEFile.model_validate_json("""
pefile: PEFile = PEFile.model_validate_json(
"""
{
"basic_info": {
"image_base": 2863311530
@@ -139,7 +150,8 @@ def test_vmray_model_pefile():
}
]
}
""")
"""
)
assert pefile.basic_info.image_base == 2863311530

View File

@@ -726,9 +726,9 @@
}
},
"node_modules/@rollup/rollup-android-arm-eabi": {
"version": "4.59.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.59.0.tgz",
"integrity": "sha512-upnNBkA6ZH2VKGcBj9Fyl9IGNPULcjXRlg0LLeaioQWueH30p6IXtJEbKAgvyv+mJaMxSm1l6xwDXYjpEMiLMg==",
"version": "4.36.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.36.0.tgz",
"integrity": "sha512-jgrXjjcEwN6XpZXL0HUeOVGfjXhPyxAbbhD0BlXUB+abTOpbPiN5Wb3kOT7yb+uEtATNYF5x5gIfwutmuBA26w==",
"cpu": [
"arm"
],
@@ -740,9 +740,9 @@
]
},
"node_modules/@rollup/rollup-android-arm64": {
"version": "4.59.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.59.0.tgz",
"integrity": "sha512-hZ+Zxj3SySm4A/DylsDKZAeVg0mvi++0PYVceVyX7hemkw7OreKdCvW2oQ3T1FMZvCaQXqOTHb8qmBShoqk69Q==",
"version": "4.36.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.36.0.tgz",
"integrity": "sha512-NyfuLvdPdNUfUNeYKUwPwKsE5SXa2J6bCt2LdB/N+AxShnkpiczi3tcLJrm5mA+eqpy0HmaIY9F6XCa32N5yzg==",
"cpu": [
"arm64"
],
@@ -754,9 +754,9 @@
]
},
"node_modules/@rollup/rollup-darwin-arm64": {
"version": "4.59.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.59.0.tgz",
"integrity": "sha512-W2Psnbh1J8ZJw0xKAd8zdNgF9HRLkdWwwdWqubSVk0pUuQkoHnv7rx4GiF9rT4t5DIZGAsConRE3AxCdJ4m8rg==",
"version": "4.36.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.36.0.tgz",
"integrity": "sha512-JQ1Jk5G4bGrD4pWJQzWsD8I1n1mgPXq33+/vP4sk8j/z/C2siRuxZtaUA7yMTf71TCZTZl/4e1bfzwUmFb3+rw==",
"cpu": [
"arm64"
],
@@ -768,9 +768,9 @@
]
},
"node_modules/@rollup/rollup-darwin-x64": {
"version": "4.59.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.59.0.tgz",
"integrity": "sha512-ZW2KkwlS4lwTv7ZVsYDiARfFCnSGhzYPdiOU4IM2fDbL+QGlyAbjgSFuqNRbSthybLbIJ915UtZBtmuLrQAT/w==",
"version": "4.36.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.36.0.tgz",
"integrity": "sha512-6c6wMZa1lrtiRsbDziCmjE53YbTkxMYhhnWnSW8R/yqsM7a6mSJ3uAVT0t8Y/DGt7gxUWYuFM4bwWk9XCJrFKA==",
"cpu": [
"x64"
],
@@ -782,9 +782,9 @@
]
},
"node_modules/@rollup/rollup-freebsd-arm64": {
"version": "4.59.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.59.0.tgz",
"integrity": "sha512-EsKaJ5ytAu9jI3lonzn3BgG8iRBjV4LxZexygcQbpiU0wU0ATxhNVEpXKfUa0pS05gTcSDMKpn3Sx+QB9RlTTA==",
"version": "4.36.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.36.0.tgz",
"integrity": "sha512-KXVsijKeJXOl8QzXTsA+sHVDsFOmMCdBRgFmBb+mfEb/7geR7+C8ypAml4fquUt14ZyVXaw2o1FWhqAfOvA4sg==",
"cpu": [
"arm64"
],
@@ -796,9 +796,9 @@
]
},
"node_modules/@rollup/rollup-freebsd-x64": {
"version": "4.59.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.59.0.tgz",
"integrity": "sha512-d3DuZi2KzTMjImrxoHIAODUZYoUUMsuUiY4SRRcJy6NJoZ6iIqWnJu9IScV9jXysyGMVuW+KNzZvBLOcpdl3Vg==",
"version": "4.36.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.36.0.tgz",
"integrity": "sha512-dVeWq1ebbvByI+ndz4IJcD4a09RJgRYmLccwlQ8bPd4olz3Y213uf1iwvc7ZaxNn2ab7bjc08PrtBgMu6nb4pQ==",
"cpu": [
"x64"
],
@@ -810,9 +810,9 @@
]
},
"node_modules/@rollup/rollup-linux-arm-gnueabihf": {
"version": "4.59.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.59.0.tgz",
"integrity": "sha512-t4ONHboXi/3E0rT6OZl1pKbl2Vgxf9vJfWgmUoCEVQVxhW6Cw/c8I6hbbu7DAvgp82RKiH7TpLwxnJeKv2pbsw==",
"version": "4.36.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.36.0.tgz",
"integrity": "sha512-bvXVU42mOVcF4le6XSjscdXjqx8okv4n5vmwgzcmtvFdifQ5U4dXFYaCB87namDRKlUL9ybVtLQ9ztnawaSzvg==",
"cpu": [
"arm"
],
@@ -824,9 +824,9 @@
]
},
"node_modules/@rollup/rollup-linux-arm-musleabihf": {
"version": "4.59.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.59.0.tgz",
"integrity": "sha512-CikFT7aYPA2ufMD086cVORBYGHffBo4K8MQ4uPS/ZnY54GKj36i196u8U+aDVT2LX4eSMbyHtyOh7D7Zvk2VvA==",
"version": "4.36.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.36.0.tgz",
"integrity": "sha512-JFIQrDJYrxOnyDQGYkqnNBtjDwTgbasdbUiQvcU8JmGDfValfH1lNpng+4FWlhaVIR4KPkeddYjsVVbmJYvDcg==",
"cpu": [
"arm"
],
@@ -838,9 +838,9 @@
]
},
"node_modules/@rollup/rollup-linux-arm64-gnu": {
"version": "4.59.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.59.0.tgz",
"integrity": "sha512-jYgUGk5aLd1nUb1CtQ8E+t5JhLc9x5WdBKew9ZgAXg7DBk0ZHErLHdXM24rfX+bKrFe+Xp5YuJo54I5HFjGDAA==",
"version": "4.36.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.36.0.tgz",
"integrity": "sha512-KqjYVh3oM1bj//5X7k79PSCZ6CvaVzb7Qs7VMWS+SlWB5M8p3FqufLP9VNp4CazJ0CsPDLwVD9r3vX7Ci4J56A==",
"cpu": [
"arm64"
],
@@ -852,9 +852,9 @@
]
},
"node_modules/@rollup/rollup-linux-arm64-musl": {
"version": "4.59.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.59.0.tgz",
"integrity": "sha512-peZRVEdnFWZ5Bh2KeumKG9ty7aCXzzEsHShOZEFiCQlDEepP1dpUl/SrUNXNg13UmZl+gzVDPsiCwnV1uI0RUA==",
"version": "4.36.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.36.0.tgz",
"integrity": "sha512-QiGnhScND+mAAtfHqeT+cB1S9yFnNQ/EwCg5yE3MzoaZZnIV0RV9O5alJAoJKX/sBONVKeZdMfO8QSaWEygMhw==",
"cpu": [
"arm64"
],
@@ -865,10 +865,10 @@
"linux"
]
},
"node_modules/@rollup/rollup-linux-loong64-gnu": {
"version": "4.59.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-gnu/-/rollup-linux-loong64-gnu-4.59.0.tgz",
"integrity": "sha512-gbUSW/97f7+r4gHy3Jlup8zDG190AuodsWnNiXErp9mT90iCy9NKKU0Xwx5k8VlRAIV2uU9CsMnEFg/xXaOfXg==",
"node_modules/@rollup/rollup-linux-loongarch64-gnu": {
"version": "4.36.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loongarch64-gnu/-/rollup-linux-loongarch64-gnu-4.36.0.tgz",
"integrity": "sha512-1ZPyEDWF8phd4FQtTzMh8FQwqzvIjLsl6/84gzUxnMNFBtExBtpL51H67mV9xipuxl1AEAerRBgBwFNpkw8+Lg==",
"cpu": [
"loong64"
],
@@ -879,38 +879,10 @@
"linux"
]
},
"node_modules/@rollup/rollup-linux-loong64-musl": {
"version": "4.59.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-musl/-/rollup-linux-loong64-musl-4.59.0.tgz",
"integrity": "sha512-yTRONe79E+o0FWFijasoTjtzG9EBedFXJMl888NBEDCDV9I2wGbFFfJQQe63OijbFCUZqxpHz1GzpbtSFikJ4Q==",
"cpu": [
"loong64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
]
},
"node_modules/@rollup/rollup-linux-ppc64-gnu": {
"version": "4.59.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-gnu/-/rollup-linux-ppc64-gnu-4.59.0.tgz",
"integrity": "sha512-sw1o3tfyk12k3OEpRddF68a1unZ5VCN7zoTNtSn2KndUE+ea3m3ROOKRCZxEpmT9nsGnogpFP9x6mnLTCaoLkA==",
"cpu": [
"ppc64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
]
},
"node_modules/@rollup/rollup-linux-ppc64-musl": {
"version": "4.59.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-musl/-/rollup-linux-ppc64-musl-4.59.0.tgz",
"integrity": "sha512-+2kLtQ4xT3AiIxkzFVFXfsmlZiG5FXYW7ZyIIvGA7Bdeuh9Z0aN4hVyXS/G1E9bTP/vqszNIN/pUKCk/BTHsKA==",
"node_modules/@rollup/rollup-linux-powerpc64le-gnu": {
"version": "4.36.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-powerpc64le-gnu/-/rollup-linux-powerpc64le-gnu-4.36.0.tgz",
"integrity": "sha512-VMPMEIUpPFKpPI9GZMhJrtu8rxnp6mJR3ZzQPykq4xc2GmdHj3Q4cA+7avMyegXy4n1v+Qynr9fR88BmyO74tg==",
"cpu": [
"ppc64"
],
@@ -922,23 +894,9 @@
]
},
"node_modules/@rollup/rollup-linux-riscv64-gnu": {
"version": "4.59.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.59.0.tgz",
"integrity": "sha512-NDYMpsXYJJaj+I7UdwIuHHNxXZ/b/N2hR15NyH3m2qAtb/hHPA4g4SuuvrdxetTdndfj9b1WOmy73kcPRoERUg==",
"cpu": [
"riscv64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
]
},
"node_modules/@rollup/rollup-linux-riscv64-musl": {
"version": "4.59.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-musl/-/rollup-linux-riscv64-musl-4.59.0.tgz",
"integrity": "sha512-nLckB8WOqHIf1bhymk+oHxvM9D3tyPndZH8i8+35p/1YiVoVswPid2yLzgX7ZJP0KQvnkhM4H6QZ5m0LzbyIAg==",
"version": "4.36.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.36.0.tgz",
"integrity": "sha512-ttE6ayb/kHwNRJGYLpuAvB7SMtOeQnVXEIpMtAvx3kepFQeowVED0n1K9nAdraHUPJ5hydEMxBpIR7o4nrm8uA==",
"cpu": [
"riscv64"
],
@@ -950,9 +908,9 @@
]
},
"node_modules/@rollup/rollup-linux-s390x-gnu": {
"version": "4.59.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.59.0.tgz",
"integrity": "sha512-oF87Ie3uAIvORFBpwnCvUzdeYUqi2wY6jRFWJAy1qus/udHFYIkplYRW+wo+GRUP4sKzYdmE1Y3+rY5Gc4ZO+w==",
"version": "4.36.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.36.0.tgz",
"integrity": "sha512-4a5gf2jpS0AIe7uBjxDeUMNcFmaRTbNv7NxI5xOCs4lhzsVyGR/0qBXduPnoWf6dGC365saTiwag8hP1imTgag==",
"cpu": [
"s390x"
],
@@ -964,9 +922,9 @@
]
},
"node_modules/@rollup/rollup-linux-x64-gnu": {
"version": "4.59.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.59.0.tgz",
"integrity": "sha512-3AHmtQq/ppNuUspKAlvA8HtLybkDflkMuLK4DPo77DfthRb71V84/c4MlWJXixZz4uruIH4uaa07IqoAkG64fg==",
"version": "4.36.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.36.0.tgz",
"integrity": "sha512-5KtoW8UWmwFKQ96aQL3LlRXX16IMwyzMq/jSSVIIyAANiE1doaQsx/KRyhAvpHlPjPiSU/AYX/8m+lQ9VToxFQ==",
"cpu": [
"x64"
],
@@ -978,9 +936,9 @@
]
},
"node_modules/@rollup/rollup-linux-x64-musl": {
"version": "4.59.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.59.0.tgz",
"integrity": "sha512-2UdiwS/9cTAx7qIUZB/fWtToJwvt0Vbo0zmnYt7ED35KPg13Q0ym1g442THLC7VyI6JfYTP4PiSOWyoMdV2/xg==",
"version": "4.36.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.36.0.tgz",
"integrity": "sha512-sycrYZPrv2ag4OCvaN5js+f01eoZ2U+RmT5as8vhxiFz+kxwlHrsxOwKPSA8WyS+Wc6Epid9QeI/IkQ9NkgYyQ==",
"cpu": [
"x64"
],
@@ -991,38 +949,10 @@
"linux"
]
},
"node_modules/@rollup/rollup-openbsd-x64": {
"version": "4.59.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-openbsd-x64/-/rollup-openbsd-x64-4.59.0.tgz",
"integrity": "sha512-M3bLRAVk6GOwFlPTIxVBSYKUaqfLrn8l0psKinkCFxl4lQvOSz8ZrKDz2gxcBwHFpci0B6rttydI4IpS4IS/jQ==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"openbsd"
]
},
"node_modules/@rollup/rollup-openharmony-arm64": {
"version": "4.59.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-openharmony-arm64/-/rollup-openharmony-arm64-4.59.0.tgz",
"integrity": "sha512-tt9KBJqaqp5i5HUZzoafHZX8b5Q2Fe7UjYERADll83O4fGqJ49O1FsL6LpdzVFQcpwvnyd0i+K/VSwu/o/nWlA==",
"cpu": [
"arm64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"openharmony"
]
},
"node_modules/@rollup/rollup-win32-arm64-msvc": {
"version": "4.59.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.59.0.tgz",
"integrity": "sha512-V5B6mG7OrGTwnxaNUzZTDTjDS7F75PO1ae6MJYdiMu60sq0CqN5CVeVsbhPxalupvTX8gXVSU9gq+Rx1/hvu6A==",
"version": "4.36.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.36.0.tgz",
"integrity": "sha512-qbqt4N7tokFwwSVlWDsjfoHgviS3n/vZ8LK0h1uLG9TYIRuUTJC88E1xb3LM2iqZ/WTqNQjYrtmtGmrmmawB6A==",
"cpu": [
"arm64"
],
@@ -1034,9 +964,9 @@
]
},
"node_modules/@rollup/rollup-win32-ia32-msvc": {
"version": "4.59.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.59.0.tgz",
"integrity": "sha512-UKFMHPuM9R0iBegwzKF4y0C4J9u8C6MEJgFuXTBerMk7EJ92GFVFYBfOZaSGLu6COf7FxpQNqhNS4c4icUPqxA==",
"version": "4.36.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.36.0.tgz",
"integrity": "sha512-t+RY0JuRamIocMuQcfwYSOkmdX9dtkr1PbhKW42AMvaDQa+jOdpUYysroTF/nuPpAaQMWp7ye+ndlmmthieJrQ==",
"cpu": [
"ia32"
],
@@ -1047,24 +977,10 @@
"win32"
]
},
"node_modules/@rollup/rollup-win32-x64-gnu": {
"version": "4.59.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-gnu/-/rollup-win32-x64-gnu-4.59.0.tgz",
"integrity": "sha512-laBkYlSS1n2L8fSo1thDNGrCTQMmxjYY5G0WFWjFFYZkKPjsMBsgJfGf4TLxXrF6RyhI60L8TMOjBMvXiTcxeA==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"win32"
]
},
"node_modules/@rollup/rollup-win32-x64-msvc": {
"version": "4.59.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.59.0.tgz",
"integrity": "sha512-2HRCml6OztYXyJXAvdDXPKcawukWY2GpR5/nxKp4iBgiO3wcoEGkAaqctIbZcNB6KlUQBIqt8VYkNSj2397EfA==",
"version": "4.36.0",
"resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.36.0.tgz",
"integrity": "sha512-aRXd7tRZkWLqGbChgcMMDEHjOKudo1kChb1Jt1IfR8cY/KIpgNviLeJy5FUb9IpSuQj8dU2fAYNMPW/hLKOSTw==",
"cpu": [
"x64"
],
@@ -1082,9 +998,9 @@
"dev": true
},
"node_modules/@types/estree": {
"version": "1.0.8",
"resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz",
"integrity": "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==",
"version": "1.0.6",
"resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.6.tgz",
"integrity": "sha512-AYnb1nQyY49te+VRAVgmzfcgjYS91mY5P0TKUDCLEM+gNnA+3T6rWITXRLYCpahpqSQbN5cE+gHpnPyXjHWxcw==",
"dev": true,
"license": "MIT"
},
@@ -1766,15 +1682,14 @@
"dev": true
},
"node_modules/editorconfig": {
"version": "1.0.7",
"resolved": "https://registry.npmjs.org/editorconfig/-/editorconfig-1.0.7.tgz",
"integrity": "sha512-e0GOtq/aTQhVdNyDU9e02+wz9oDDM+SIOQxWME2QRjzRX5yyLAuHDE+0aE8vHb9XRC8XD37eO2u57+F09JqFhw==",
"version": "1.0.4",
"resolved": "https://registry.npmjs.org/editorconfig/-/editorconfig-1.0.4.tgz",
"integrity": "sha512-L9Qe08KWTlqYMVvMcTIvMAdl1cDUubzRNYL+WfA4bLDMHe4nemKkpmYzkznE1FwLKu0EEmy6obgQKzMJrg4x9Q==",
"dev": true,
"license": "MIT",
"dependencies": {
"@one-ini/wasm": "0.1.1",
"commander": "^10.0.0",
"minimatch": "^9.0.1",
"minimatch": "9.0.1",
"semver": "^7.5.3"
},
"bin": {
@@ -1785,23 +1700,21 @@
}
},
"node_modules/editorconfig/node_modules/brace-expansion": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
"integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
"dev": true,
"license": "MIT",
"dependencies": {
"balanced-match": "^1.0.0"
}
},
"node_modules/editorconfig/node_modules/minimatch": {
"version": "9.0.9",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.9.tgz",
"integrity": "sha512-OBwBN9AL4dqmETlpS2zasx+vTeWclWzkblfZk7KTA5j3jeOONz/tRCnZomUyvNg83wL5Zv9Ss6HMJXAgL8R2Yg==",
"version": "9.0.1",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.1.tgz",
"integrity": "sha512-0jWhJpD/MdhPXwPuiRkCbfYfSKp2qnn2eOc279qI7f+osl/l+prKSrvhg157zSYvx/1nmgn2NqdT6k2Z7zSH9w==",
"dev": true,
"license": "ISC",
"dependencies": {
"brace-expansion": "^2.0.2"
"brace-expansion": "^2.0.1"
},
"engines": {
"node": ">=16 || 14 >=14.17"
@@ -2392,23 +2305,21 @@
}
},
"node_modules/glob/node_modules/brace-expansion": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
"integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
"dev": true,
"license": "MIT",
"dependencies": {
"balanced-match": "^1.0.0"
}
},
"node_modules/glob/node_modules/minimatch": {
"version": "9.0.9",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.9.tgz",
"integrity": "sha512-OBwBN9AL4dqmETlpS2zasx+vTeWclWzkblfZk7KTA5j3jeOONz/tRCnZomUyvNg83wL5Zv9Ss6HMJXAgL8R2Yg==",
"version": "9.0.5",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz",
"integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==",
"dev": true,
"license": "ISC",
"dependencies": {
"brace-expansion": "^2.0.2"
"brace-expansion": "^2.0.1"
},
"engines": {
"node": ">=16 || 14 >=14.17"
@@ -2845,11 +2756,10 @@
}
},
"node_modules/lodash": {
"version": "4.17.23",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.23.tgz",
"integrity": "sha512-LgVTMpQtIopCi79SJeDiP0TfWi5CNEc/L/aRdTh3yIvmZXTnheWpKjSZhnvMl8iXbC1tFg9gdHHDMLoV7CnG+w==",
"dev": true,
"license": "MIT"
"version": "4.17.21",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==",
"dev": true
},
"node_modules/lodash.merge": {
"version": "4.6.2",
@@ -2928,11 +2838,10 @@
}
},
"node_modules/minimatch": {
"version": "3.1.5",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.5.tgz",
"integrity": "sha512-VgjWUsnnT6n+NUk6eZq77zeFdpW2LWDzP6zFGrCbHXiYNul5Dzqk2HHQ5uFH2DNW5Xbp8+jVzaeNt94ssEEl4w==",
"version": "3.1.2",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
"integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
"dev": true,
"license": "ISC",
"dependencies": {
"brace-expansion": "^1.1.7"
},
@@ -3396,13 +3305,13 @@
}
},
"node_modules/rollup": {
"version": "4.59.0",
"resolved": "https://registry.npmjs.org/rollup/-/rollup-4.59.0.tgz",
"integrity": "sha512-2oMpl67a3zCH9H79LeMcbDhXW/UmWG/y2zuqnF2jQq5uq9TbM9TVyXvA4+t+ne2IIkBdrLpAaRQAvo7YI/Yyeg==",
"version": "4.36.0",
"resolved": "https://registry.npmjs.org/rollup/-/rollup-4.36.0.tgz",
"integrity": "sha512-zwATAXNQxUcd40zgtQG0ZafcRK4g004WtEl7kbuhTWPvf07PsfohXl39jVUvPF7jvNAIkKPQ2XrsDlWuxBd++Q==",
"dev": true,
"license": "MIT",
"dependencies": {
"@types/estree": "1.0.8"
"@types/estree": "1.0.6"
},
"bin": {
"rollup": "dist/bin/rollup"
@@ -3412,31 +3321,25 @@
"npm": ">=8.0.0"
},
"optionalDependencies": {
"@rollup/rollup-android-arm-eabi": "4.59.0",
"@rollup/rollup-android-arm64": "4.59.0",
"@rollup/rollup-darwin-arm64": "4.59.0",
"@rollup/rollup-darwin-x64": "4.59.0",
"@rollup/rollup-freebsd-arm64": "4.59.0",
"@rollup/rollup-freebsd-x64": "4.59.0",
"@rollup/rollup-linux-arm-gnueabihf": "4.59.0",
"@rollup/rollup-linux-arm-musleabihf": "4.59.0",
"@rollup/rollup-linux-arm64-gnu": "4.59.0",
"@rollup/rollup-linux-arm64-musl": "4.59.0",
"@rollup/rollup-linux-loong64-gnu": "4.59.0",
"@rollup/rollup-linux-loong64-musl": "4.59.0",
"@rollup/rollup-linux-ppc64-gnu": "4.59.0",
"@rollup/rollup-linux-ppc64-musl": "4.59.0",
"@rollup/rollup-linux-riscv64-gnu": "4.59.0",
"@rollup/rollup-linux-riscv64-musl": "4.59.0",
"@rollup/rollup-linux-s390x-gnu": "4.59.0",
"@rollup/rollup-linux-x64-gnu": "4.59.0",
"@rollup/rollup-linux-x64-musl": "4.59.0",
"@rollup/rollup-openbsd-x64": "4.59.0",
"@rollup/rollup-openharmony-arm64": "4.59.0",
"@rollup/rollup-win32-arm64-msvc": "4.59.0",
"@rollup/rollup-win32-ia32-msvc": "4.59.0",
"@rollup/rollup-win32-x64-gnu": "4.59.0",
"@rollup/rollup-win32-x64-msvc": "4.59.0",
"@rollup/rollup-android-arm-eabi": "4.36.0",
"@rollup/rollup-android-arm64": "4.36.0",
"@rollup/rollup-darwin-arm64": "4.36.0",
"@rollup/rollup-darwin-x64": "4.36.0",
"@rollup/rollup-freebsd-arm64": "4.36.0",
"@rollup/rollup-freebsd-x64": "4.36.0",
"@rollup/rollup-linux-arm-gnueabihf": "4.36.0",
"@rollup/rollup-linux-arm-musleabihf": "4.36.0",
"@rollup/rollup-linux-arm64-gnu": "4.36.0",
"@rollup/rollup-linux-arm64-musl": "4.36.0",
"@rollup/rollup-linux-loongarch64-gnu": "4.36.0",
"@rollup/rollup-linux-powerpc64le-gnu": "4.36.0",
"@rollup/rollup-linux-riscv64-gnu": "4.36.0",
"@rollup/rollup-linux-s390x-gnu": "4.36.0",
"@rollup/rollup-linux-x64-gnu": "4.36.0",
"@rollup/rollup-linux-x64-musl": "4.36.0",
"@rollup/rollup-win32-arm64-msvc": "4.36.0",
"@rollup/rollup-win32-ia32-msvc": "4.36.0",
"@rollup/rollup-win32-x64-msvc": "4.36.0",
"fsevents": "~2.3.2"
}
},

View File

@@ -22,7 +22,6 @@
:scrollable="true"
:filters="filters"
:filterMode="filterMode"
:globalFilterFields="['name', 'typeValue']"
sortField="namespace"
:sortOrder="1"
removableSort

View File

@@ -17,9 +17,6 @@
import { useToast } from "primevue/usetoast";
import { isGzipped, decompressGzip, readFileAsText } from "@/utils/fileUtils";
const VT_REANALYZE_SUGGESTION =
"If this is a VirusTotal or similar link, the file may need to be reanalyzed. Try again later.";
export function useRdocLoader() {
const toast = useToast();
const MIN_SUPPORTED_VERSION = "7.0.0";
@@ -35,58 +32,6 @@ export function useRdocLoader() {
toast.add({ severity, summary, detail, life: 3000, group: "bc" }); // bc: bottom-center
};
/**
* Validates that the parsed object has the expected result document schema.
* @param {Object} rdoc - The parsed JSON data.
* @returns {{ valid: boolean, message?: string }} Validation result with an optional error message.
*/
const validateRdocSchema = (rdoc) => {
const isInvalidObject = (v) => !v || typeof v !== "object" || Array.isArray(v);
if (isInvalidObject(rdoc)) {
return { valid: false, message: "Invalid JSON: expected an object." };
}
if (isInvalidObject(rdoc.meta)) {
return { valid: false, message: "Invalid result document: missing or invalid 'meta' field." };
}
if (rdoc.meta.version === undefined) {
return { valid: false, message: "Invalid result document: missing 'meta.version'." };
}
if (isInvalidObject(rdoc.meta.analysis)) {
return { valid: false, message: "Invalid result document: missing or invalid 'meta.analysis'." };
}
if (isInvalidObject(rdoc.meta.analysis.layout)) {
return { valid: false, message: "Invalid result document: missing or invalid 'meta.analysis.layout'." };
}
if (isInvalidObject(rdoc.meta.analysis.feature_counts)) {
return {
valid: false,
message: "Invalid result document: missing or invalid 'meta.analysis.feature_counts'."
};
}
const fc = rdoc.meta.analysis.feature_counts;
// Allow file-scoped-only documents (no functions/processes arrays).
// If present, functions and processes must be arrays.
if (fc.functions !== undefined && !Array.isArray(fc.functions)) {
return {
valid: false,
message:
"Invalid result document: 'meta.analysis.feature_counts.functions' must be an array when present."
};
}
if (fc.processes !== undefined && !Array.isArray(fc.processes)) {
return {
valid: false,
message:
"Invalid result document: 'meta.analysis.feature_counts.processes' must be an array when present."
};
}
if (isInvalidObject(rdoc.rules)) {
return { valid: false, message: "Invalid result document: missing or invalid 'rules' field." };
}
return { valid: true };
};
/**
* Checks if the version of the loaded data is supported.
* @param {Object} rdoc - The loaded JSON data containing version information.
@@ -136,41 +81,27 @@ export function useRdocLoader() {
* @returns {Promise<Object|null>} A promise that resolves to the processed RDOC data, or null if processing fails.
*/
const loadRdoc = async (source) => {
const isUrl = typeof source === "string";
try {
let data;
if (isUrl) {
if (typeof source === "string") {
// Load from URL
const blob = await fetchFromUrl(source);
data = await processBlob(blob);
} else if (source instanceof File) {
// Load from local
data = await processBlob(source);
} else {
throw new Error("Invalid source type");
}
const validation = validateRdocSchema(data);
if (!validation.valid) {
let detail = validation.message;
if (isUrl) {
detail += VT_REANALYZE_SUGGESTION;
}
showToast("error", "Invalid result document", detail);
return null;
}
if (checkVersion(data)) {
showToast("success", "Success", "JSON data loaded successfully");
return data;
}
} catch (error) {
console.error("Error loading JSON:", error);
let detail = error.message;
if (isUrl && (error instanceof SyntaxError || error.message.includes("JSON"))) {
detail += VT_REANALYZE_SUGGESTION;
}
showToast("error", "Failed to process the file", detail);
showToast("error", "Failed to process the file", error.message);
}
return null;
};

View File

@@ -322,8 +322,8 @@ export function parseFunctionCapabilities(doc) {
});
}
// Iterate through all functions in the document (empty for file-scoped-only)
for (const f of doc.meta.analysis.feature_counts.functions ?? []) {
// Iterate through all functions in the document
for (const f of doc.meta.analysis.feature_counts.functions) {
const addr = formatAddress(f.address);
const matches = matchesByFunction.get(addr);
// Skip functions with no matches (unlikely)

View File

@@ -77,8 +77,8 @@ export function createATTACKHref(attack) {
*/
export function createCapaRulesUrl(node) {
if (!node || !node.data) return null;
const baseUrl = "https://mandiant.github.io/capa/rules/";
return `${baseUrl}${encodeURIComponent(node.data.name)}/`;
const ruleName = node.data.name.toLowerCase().replace(/\s+/g, "-");
return `https://mandiant.github.io/capa/rules/${ruleName}/`;
}
/**