Merge remote-tracking branch 'parentrepo/dynamic-feature-extraction' into find-dynamic-capabilities

This commit is contained in:
Yacine Elhamer
2023-07-20 20:02:49 +01:00
141 changed files with 1054 additions and 756 deletions

View File

@@ -159,12 +159,25 @@ The process described here has several goals:
Please follow these steps to have your contribution considered by the maintainers:
0. Sign the [Contributor License Agreement](#contributor-license-agreement)
1. Follow the [styleguides](#styleguides)
2. Update the CHANGELOG and add tests and documentation. In case they are not needed, indicate it in [the PR template](pull_request_template.md).
3. After you submit your pull request, verify that all [status checks](https://help.github.com/articles/about-status-checks/) are passing <details><summary>What if the status checks are failing? </summary>If a status check is failing, and you believe that the failure is unrelated to your change, please leave a comment on the pull request explaining why you believe the failure is unrelated. A maintainer will re-run the status check for you. If we conclude that the failure was a false positive, then we will open an issue to track that problem with our status check suite.</details>
While the prerequisites above must be satisfied prior to having your pull request reviewed, the reviewer(s) may ask you to complete additional design work, tests, or other changes before your pull request can be ultimately accepted.
### Contributor License Agreement
Contributions to this project must be accompanied by a Contributor License
Agreement. You (or your employer) retain the copyright to your contribution,
this simply gives us permission to use and redistribute your contributions as
part of the project. Head over to <https://cla.developers.google.com/> to see
your current agreements on file or to sign a new one.
You generally only need to submit a CLA once, so if you've already submitted one
(even if it was for a different project), you probably don't need to do it
again.
## Styleguides
### Git Commit Messages

18
.github/flake8.ini vendored
View File

@@ -13,8 +13,16 @@ extend-ignore =
# B010 Do not call setattr with a constant attribute value
B010,
# G200 Logging statement uses exception in arguments
G200
G200,
# SIM102 Use a single if-statement instead of nested if-statements
# doesn't provide a space for commenting or logical separation of conditions
SIM102,
# SIM114 Use logical or and a single body
# makes logic trees too complex
SIM114,
# SIM117 Use 'with Foo, Bar:' instead of multiple with statements
# makes lines too long
SIM117
per-file-ignores =
# T201 print found.
@@ -26,4 +34,8 @@ per-file-ignores =
# IDA tests emit results to output window so need to print
tests/test_ida_features.py: T201
# utility used to find the Binary Ninja API via invoking python.exe
capa/features/extractors/binja/find_binja_api.py: T201
capa/features/extractors/binja/find_binja_api.py: T201
copyright-check = True
copyright-min-file-size = 1
copyright-regexp = Copyright \(C\) 2023 Mandiant, Inc. All Rights Reserved.

22
.github/ruff.toml vendored
View File

@@ -41,25 +41,3 @@ exclude = [
"*_pb2.py",
"*_pb2.pyi"
]
[per-file-ignores]
# until we address #1592 and move test fixtures into conftest.py
# then we need to ignore imports done to enable pytest fixtures.
#
# F401: `foo` imported but unused
# F811 Redefinition of unused `foo`
"tests/test_main.py" = ["F401", "F811"]
"tests/test_proto.py" = ["F401", "F811"]
"tests/test_freeze.py" = ["F401", "F811"]
"tests/test_function_id.py" = ["F401", "F811"]
"tests/test_viv_features.py" = ["F401", "F811"]
"tests/test_cape_features.py" = ["F401", "F811"]
"tests/test_binja_features.py" = ["F401", "F811"]
"tests/test_pefile_features.py" = ["F401", "F811"]
"tests/test_dnfile_features.py" = ["F401", "F811"]
"tests/test_dotnet_features.py" = ["F401", "F811"]
"tests/test_result_document.py" = ["F401", "F811"]
"tests/test_dotnetfile_features.py" = ["F401", "F811"]
"tests/test_static_freeze.py" = ["F401", "F811"]
"tests/_test_proto.py" = ["F401", "F811"]
"tests/_test_result_document.py" = ["F401", "F811"]

View File

@@ -6,6 +6,9 @@ on:
release:
types: [edited, published]
permissions:
contents: write
jobs:
build:
name: PyInstaller for ${{ matrix.os }}

View File

@@ -7,6 +7,8 @@ on:
pull_request_target:
types: [opened, edited, synchronize]
permissions: read-all
jobs:
check_changelog:
# no need to check for dependency updates via dependabot

View File

@@ -6,6 +6,9 @@ on:
release:
types: [published]
permissions:
contents: write
jobs:
pypi-publish:
runs-on: ubuntu-latest
@@ -29,14 +32,7 @@ jobs:
- name: upload package artifacts
uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # v3.1.2
with:
name: ${{ matrix.asset_name }}
path: dist/*
- name: upload package to GH Release
uses: svenstaro/upload-release-action@2728235f7dc9ff598bd86ce3c274b74f802d2208 # v2
with:
repo_token: ${{ secrets.GITHUB_TOKEN}}
file: dist/*
tag: ${{ github.ref }}
- name: publish package
uses: pypa/gh-action-pypi-publish@f5622bde02b04381239da3573277701ceca8f6a0 # release/v1
with:

View File

@@ -4,6 +4,8 @@ on:
release:
types: [published]
permissions: read-all
jobs:
tag:
name: Tag capa rules

View File

@@ -6,6 +6,8 @@ on:
pull_request:
branches: [ master, "dynamic-feature-extraction" ]
permissions: read-all
# save workspaces to speed up testing
env:
CAPA_SAVE_WORKSPACE: "True"
@@ -27,10 +29,11 @@ jobs:
steps:
- name: Checkout capa
uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0
- name: Set up Python 3.8
# use latest available python to take advantage of best performance
- name: Set up Python 3.11
uses: actions/setup-python@d27e3f3d7c64b4bbf8e4abfb9b63b83e846e0435 # v4.5.0
with:
python-version: "3.8"
python-version: "3.11"
- name: Install dependencies
run: pip install -e .[dev]
- name: Lint with ruff
@@ -51,10 +54,10 @@ jobs:
uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0
with:
submodules: recursive
- name: Set up Python 3.8
- name: Set up Python 3.11
uses: actions/setup-python@d27e3f3d7c64b4bbf8e4abfb9b63b83e846e0435 # v4.5.0
with:
python-version: "3.8"
python-version: "3.11"
- name: Install capa
run: pip install -e .[dev]
- name: Run rule linter

2
.gitignore vendored
View File

@@ -124,3 +124,5 @@ Pipfile
Pipfile.lock
/cache/
.github/binja/binaryninja
.github/binja/download_headless.py
.github/binja/BinaryNinja-headless.zip

View File

@@ -1,30 +1,54 @@
# Change Log
## master (unreleased)
- extract function and API names from ELF symtab entries @yelhamer https://github.com/mandiant/capa-rules/issues/736
### New Features
- Utility script to detect feature overlap between new and existing CAPA rules [#1451](https://github.com/mandiant/capa/issues/1451) [@Aayush-Goel-04](https://github.com/aayush-goel-04)
- Add a dynamic feature extractor for the CAPE sandbox @yelhamer [#1535](https://github.com/mandiant/capa/issues/1535)
- Add unit tests for the new CAPE extractor #1563 @yelhamer
- Add a CAPE file format and CAPE-based dynamic feature extraction to scripts/show-features.py #1566 @yelhamer
- Add a new process scope for the dynamic analysis flavor #1517 @yelhamer
- Add a new thread scope for the dynamic analysis flavor #1517 @yelhamer
- Add support for flavor-based rule scopes @yelhamer
- use fancy box drawing characters for default output #1586 @williballenthin
- use [pre-commit](https://pre-commit.com/) to invoke linters #1579 @williballenthin
- publish via PyPI trusted publishing #1491 @williballenthin
- migrate to pyproject.toml #1301 @williballenthin
- Add ProcessesAddress and ThreadAddress #1612 @yelhamer
- Add dynamic capability extraction @yelhamer
### Breaking Changes
- Update Metadata type in capa main [#1411](https://github.com/mandiant/capa/issues/1411) [@Aayush-Goel-04](https://github.com/aayush-goel-04) @manasghandat
- Python 3.8 is now the minimum supported Python version #1578 @williballenthin
- Change the old FeatureExtractor class' name into StaticFeatureExtractor, and make the former an alias for both the StaticFeatureExtractor and DynamicFeatureExtractor classes @yelhamer [#1567](https://github.com/mandiant/capa/issues/1567)
### New Rules (1)
- executable/pe/export/forwarded-export ronnie.salomonsen@mandiant.com
-
### Bug Fixes
### capa explorer IDA Pro plugin
### Development
### Raw diffs
- [capa v6.0.0...master](https://github.com/mandiant/capa/compare/v6.0.0...master)
- [capa-rules v6.0.0...master](https://github.com/mandiant/capa-rules/compare/v6.0.0...master)
## v6.0.0
capa v6.0 brings many bug fixes and quality improvements, including 64 rule updates and 26 new rules. We're now publishing to PyPI via [Trusted Publishing](https://blog.pypi.org/posts/2023-04-20-introducing-trusted-publishers/) and have migrated to using a `pyproject.toml` file. @Aayush-Goel-04 contributed a lot of new code across many files, so please welcome them to the project, along with @anders-v @crowface28 @dkelly2e @RonnieSalomonsen and @ejfocampo as first-time rule contributors!
For those that use capa as a library, we've introduced some limited breaking changes that better represent data types (versus less-structured data like dictionaries and strings). With the recent deprecation, we've also dropped support for Python 3.7.
### New Features
- add script to detect feature overlap between new and existing capa rules [#1451](https://github.com/mandiant/capa/issues/1451) [@Aayush-Goel-04](https://github.com/aayush-goel-04)
- extract forwarded exports from PE files #1624 @williballenthin
- extract function and API names from ELF symtab entries @yelhamer https://github.com/mandiant/capa-rules/issues/736
- use fancy box drawing characters for default output #1586 @williballenthin
### New Rules (23)
### Breaking Changes
- use a class to represent Metadata (not dict) #1411 @Aayush-Goel-04 @manasghandat
- use pathlib.Path to represent file paths #1534 @Aayush-Goel-04
- Python 3.8 is now the minimum supported Python version #1578 @williballenthin
- Require a Contributor License Agreement (CLA) for PRs going forward #1642 @williballenthin
### New Rules (26)
- load-code/shellcode/execute-shellcode-via-windows-callback-function ervin.ocampo@mandiant.com jakub.jozwiak@mandiant.com
- nursery/execute-shellcode-via-indirect-call ronnie.salomonsen@mandiant.com
@@ -48,7 +72,9 @@
- host-interaction/memory/create-new-application-domain-in-dotnet jakub.jozwiak@mandiant.com
- host-interaction/gui/switch-active-desktop jakub.jozwiak@mandiant.com
- host-interaction/service/query-service-configuration @mr-tz
-
- anti-analysis/anti-av/patch-event-tracing-for-windows-function jakub.jozwiak@mandiant.com
- data-manipulation/encoding/xor/covertly-decode-and-write-data-to-windows-directory-using-indirect-calls dan.kelly@mandiant.com
- linking/runtime-linking/resolve-function-by-brute-ratel-badger-hash jakub.jozwiak@mandiant.com
### Bug Fixes
- extractor: add a Binary Ninja test that asserts its version #1487 @xusheng6
@@ -58,22 +84,30 @@
- symtab: fix struct.unpack() format for 64-bit ELF files @yelhamer
- symtab: safeguard against ZeroDivisionError for files containing a symtab with a null entry size @yelhamer
- improve ELF strtab and needed parsing @mr-tz
- better handle exceptional cases when parsing ELF files [#1458](https://github.com/mandiant/capa/issues/1458) [@Aayush-Goel-04](https://github.com/aayush-goel-04)
- Improved testing coverage for Binary Ninja Backend [#1446](https://github.com/mandiant/capa/issues/1446) [@Aayush-Goel-04](https://github.com/aayush-goel-04)
- Add logging and print redirect to tqdm for capa main [#749](https://github.com/mandiant/capa/issues/749) [@Aayush-Goel-04](https://github.com/aayush-goel-04)
- better handle exceptional cases when parsing ELF files #1458 @Aayush-Goel-04
- improved testing coverage for Binary Ninja backend #1446 @Aayush-Goel-04
- add logging and print redirect to tqdm for capa main #749 @Aayush-Goel-04
- extractor: fix binja installation path detection does not work with Python 3.11
- tests: refine the IDA test runner script #1513 @williballenthin
- output: don't leave behind traces of progress bar @williballenthin
- import-to-ida: fix bug introduced with JSON report changes in v5 #1584 @williballenthin
- main: don't show spinner when emitting debug messages #1636 @williballenthin
- rules: add forwarded export characteristics to rule syntax file scope #1653 @RonnieSalomonsen
### capa explorer IDA Pro plugin
### Development
- update ATT&CK/MBC data for linting #1568 @mr-tz
- log time taken to analyze each function #1290 @williballenthin
- tests: make fixture available via conftest.py #1592 @williballenthin
- publish via PyPI trusted publishing #1491 @williballenthin
- migrate to pyproject.toml #1301 @williballenthin
- use [pre-commit](https://pre-commit.com/) to invoke linters #1579 @williballenthin
### Raw diffs
- [capa v5.1.0...master](https://github.com/mandiant/capa/compare/v5.1.0...master)
- [capa-rules v5.1.0...master](https://github.com/mandiant/capa-rules/compare/v5.1.0...master)
- [capa v5.1.0...v6.0.0](https://github.com/mandiant/capa/compare/v5.1.0...v6.0.0)
- [capa-rules v5.1.0...v6.0.0](https://github.com/mandiant/capa-rules/compare/v5.1.0...v6.0.0)
## v5.1.0
capa version 5.1.0 adds a Protocol Buffers (protobuf) format for result documents. Additionally, the [Vector35](https://vector35.com/) team contributed a new feature extractor using Binary Ninja. Other new features are a new CLI flag to override the detected operating system, functionality to read and render existing result documents, and a output color format that's easier to read.

View File

@@ -187,7 +187,7 @@
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright (C) 2020 Mandiant, Inc.
Copyright (C) 2023 Mandiant, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

View File

@@ -2,7 +2,7 @@
[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa)
[![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases)
[![Number of rules](https://img.shields.io/badge/rules-811-blue.svg)](https://github.com/mandiant/capa-rules)
[![Number of rules](https://img.shields.io/badge/rules-824-blue.svg)](https://github.com/mandiant/capa-rules)
[![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster)
[![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases)
[![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt)

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -1,3 +1,10 @@
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
class UnsupportedRuntimeError(RuntimeError):
pass

View File

@@ -1,3 +1,10 @@
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import abc

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -130,7 +130,7 @@ def is_mov_imm_to_stack(il: MediumLevelILInstruction) -> bool:
if il.src.operation != MediumLevelILOperation.MLIL_CONST:
return False
if not il.dest.source_type == VariableSourceType.StackVariableSourceType:
if il.dest.source_type != VariableSourceType.StackVariableSourceType:
return False
return True

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -34,7 +34,7 @@ class BinjaFeatureExtractor(StaticFeatureExtractor):
self.global_features.extend(capa.features.extractors.binja.file.extract_file_format(self.bv))
self.global_features.extend(capa.features.extractors.binja.global_.extract_os(self.bv))
self.global_features.extend(capa.features.extractors.binja.global_.extract_arch(self.bv))
with open(self.bv, "rb") as f:
with open(self.bv.name, "rb") as f:
self.sample_hashes = SampleHashes.from_sample(f.read())
def get_base_address(self):
@@ -64,9 +64,7 @@ class BinjaFeatureExtractor(StaticFeatureExtractor):
mlil_lookup[mlil_bb.source_block.start] = mlil_bb
for bb in f.basic_blocks:
mlil_bb = None
if bb.start in mlil_lookup:
mlil_bb = mlil_lookup[bb.start]
mlil_bb = mlil_lookup.get(bb.start)
yield BBHandle(address=AbsoluteVirtualAddress(bb.start), inner=(bb, mlil_bb))

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -6,6 +6,7 @@
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import subprocess
from pathlib import Path
# When the script gets executed as a standalone executable (via PyInstaller), `import binaryninja` does not work because
# we have excluded the binaryninja module in `pyinstaller.spec`. The trick here is to call the system Python and try
@@ -25,9 +26,9 @@ if spec is not None:
"""
def find_binja_path() -> str:
def find_binja_path() -> Path:
raw_output = subprocess.check_output(["python", "-c", code]).decode("ascii").strip()
return bytes.fromhex(raw_output).decode("utf8")
return Path(bytes.fromhex(raw_output).decode("utf8"))
if __name__ == "__main__":

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -1,3 +1,10 @@
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import logging
from typing import Tuple, Iterator

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -155,8 +155,7 @@ def extract_insn_number_features(
for llil in func.get_llils_at(ih.address):
visit_llil_exprs(llil, llil_checker)
for result in results:
yield result
yield from results
def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
@@ -318,8 +317,7 @@ def extract_insn_offset_features(
for llil in func.get_llils_at(ih.address):
visit_llil_exprs(llil, llil_checker)
for result in results:
yield result
yield from results
def is_nzxor_stack_cookie(f: Function, bb: BinjaBasicBlock, llil: LowLevelILInstruction) -> bool:
@@ -375,8 +373,7 @@ def extract_insn_nzxor_characteristic_features(
for llil in func.get_llils_at(ih.address):
visit_llil_exprs(llil, llil_checker)
for result in results:
yield result
yield from results
def extract_insn_mnemonic_features(
@@ -438,8 +435,7 @@ def extract_insn_peb_access_characteristic_features(
for llil in func.get_llils_at(ih.address):
visit_llil_exprs(llil, llil_checker)
for result in results:
yield result
yield from results
def extract_insn_segment_access_features(
@@ -466,8 +462,7 @@ def extract_insn_segment_access_features(
for llil in func.get_llils_at(ih.address):
visit_llil_exprs(llil, llil_checker)
for result in results:
yield result
yield from results
def extract_insn_cross_section_cflow(

View File

@@ -1,10 +1,11 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import logging
from typing import Dict, Tuple, Union, Iterator

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -1,10 +1,11 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
from typing import Any, Dict, List
from capa.features.extractors.base_extractor import ProcessHandle

View File

@@ -1,10 +1,11 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import logging
from typing import Dict, List, Tuple, Iterator

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -1,3 +1,10 @@
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import io
import logging
import binascii

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -9,6 +9,7 @@
from __future__ import annotations
from typing import Dict, List, Tuple, Union, Iterator, Optional
from pathlib import Path
import dnfile
from dncil.cil.opcode import OpCodes
@@ -58,27 +59,26 @@ class DnFileFeatureExtractorCache:
self.types[type_.token] = type_
def get_import(self, token: int) -> Optional[Union[DnType, DnUnmanagedMethod]]:
return self.imports.get(token, None)
return self.imports.get(token)
def get_native_import(self, token: int) -> Optional[Union[DnType, DnUnmanagedMethod]]:
return self.native_imports.get(token, None)
return self.native_imports.get(token)
def get_method(self, token: int) -> Optional[Union[DnType, DnUnmanagedMethod]]:
return self.methods.get(token, None)
return self.methods.get(token)
def get_field(self, token: int) -> Optional[Union[DnType, DnUnmanagedMethod]]:
return self.fields.get(token, None)
return self.fields.get(token)
def get_type(self, token: int) -> Optional[Union[DnType, DnUnmanagedMethod]]:
return self.types.get(token, None)
return self.types.get(token)
class DnfileFeatureExtractor(StaticFeatureExtractor):
def __init__(self, path: str):
def __init__(self, path: Path):
super().__init__()
self.pe: dnfile.dnPE = dnfile.dnPE(path)
with open(path, "rb") as f:
self.sample_hashes = SampleHashes.from_sample(f.read())
self.pe: dnfile.dnPE = dnfile.dnPE(str(path))
self.sample_hashes = SampleHashes.from_sample(path.read_bytes())
# pre-compute .NET token lookup tables; each .NET method has access to this cache for feature extraction
# most relevant at instruction scope
@@ -130,7 +130,7 @@ class DnfileFeatureExtractor(StaticFeatureExtractor):
address: DNTokenAddress = DNTokenAddress(insn.operand.value)
# record call to destination method; note: we only consider MethodDef methods for destinations
dest: Optional[FunctionHandle] = methods.get(address, None)
dest: Optional[FunctionHandle] = methods.get(address)
if dest is not None:
dest.ctx["calls_to"].add(fh.address)

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -52,7 +52,7 @@ def resolve_dotnet_token(pe: dnfile.dnPE, token: Token) -> Union[dnfile.base.MDT
return InvalidToken(token.value)
return user_string
table: Optional[dnfile.base.ClrMetaDataTable] = pe.net.mdtables.tables.get(token.table, None)
table: Optional[dnfile.base.ClrMetaDataTable] = pe.net.mdtables.tables.get(token.table)
if table is None:
# table index is not valid
return InvalidToken(token.value)
@@ -204,7 +204,7 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]:
continue
token: int = calculate_dotnet_token_value(method.table.number, method.row_index)
access: Optional[str] = accessor_map.get(token, None)
access: Optional[str] = accessor_map.get(token)
method_name: str = method.row.Name
if method_name.startswith(("get_", "set_")):

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -9,7 +9,7 @@
from typing import Optional
class DnType(object):
class DnType:
def __init__(self, token: int, class_: str, namespace: str = "", member: str = "", access: Optional[str] = None):
self.token: int = token
self.access: Optional[str] = access

View File

@@ -1,5 +1,13 @@
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import logging
from typing import Tuple, Iterator
from pathlib import Path
import dnfile
import pefile
@@ -74,10 +82,10 @@ GLOBAL_HANDLERS = (
class DnfileFeatureExtractor(StaticFeatureExtractor):
def __init__(self, path: str):
def __init__(self, path: Path):
super().__init__()
self.path: str = path
self.pe: dnfile.dnPE = dnfile.dnPE(path)
self.path: Path = path
self.pe: dnfile.dnPE = dnfile.dnPE(str(path))
def get_base_address(self) -> AbsoluteVirtualAddress:
return AbsoluteVirtualAddress(0x0)

View File

@@ -1,5 +1,13 @@
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import logging
from typing import Tuple, Iterator
from pathlib import Path
import dnfile
import pefile
@@ -158,10 +166,10 @@ GLOBAL_HANDLERS = (
class DotnetFileFeatureExtractor(StaticFeatureExtractor):
def __init__(self, path: str):
def __init__(self, path: Path):
super().__init__()
self.path: str = path
self.pe: dnfile.dnPE = dnfile.dnPE(path)
self.path: Path = path
self.pe: dnfile.dnPE = dnfile.dnPE(str(path))
def get_base_address(self):
return NO_ADDRESS

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -706,8 +706,7 @@ class SymTab:
return a tuple: (name, value, size, info, other, shndx)
for each symbol contained in the symbol table
"""
for symbol in self.symbols:
yield symbol
yield from self.symbols
@classmethod
def from_Elf(cls, ElfBinary) -> Optional["SymTab"]:

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -8,6 +8,7 @@
import io
import logging
from typing import Tuple, Iterator
from pathlib import Path
from elftools.elf.elffile import ELFFile, SymbolTableSection
@@ -107,11 +108,10 @@ GLOBAL_HANDLERS = (
class ElfFeatureExtractor(StaticFeatureExtractor):
def __init__(self, path: str):
def __init__(self, path: Path):
super().__init__()
self.path = path
with open(self.path, "rb") as f:
self.elf = ELFFile(io.BytesIO(f.read()))
self.path: Path = path
self.elf = ELFFile(io.BytesIO(path.read_bytes()))
def get_base_address(self):
# virtual address of the first segment with type LOAD
@@ -120,15 +120,13 @@ class ElfFeatureExtractor(StaticFeatureExtractor):
return AbsoluteVirtualAddress(segment.header.p_vaddr)
def extract_global_features(self):
with open(self.path, "rb") as f:
buf = f.read()
buf = self.path.read_bytes()
for feature, addr in extract_global_features(self.elf, buf):
yield feature, addr
def extract_file_features(self):
with open(self.path, "rb") as f:
buf = f.read()
buf = self.path.read_bytes()
for feature, addr in extract_file_features(self.elf, buf):
yield feature, addr

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -74,6 +74,23 @@ def generate_symbols(dll: str, symbol: str) -> Iterator[str]:
yield symbol[:-1]
def reformat_forwarded_export_name(forwarded_name: str) -> str:
"""
a forwarded export has a DLL name/path an symbol name.
we want the former to be lowercase, and the latter to be verbatim.
"""
# use rpartition so we can split on separator between dll and name.
# the dll name can be a full path, like in the case of
# ef64d6d7c34250af8e21a10feb931c9b
# which i assume means the path can have embedded periods.
# so we don't want the first period, we want the last.
forwarded_dll, _, forwarded_symbol = forwarded_name.rpartition(".")
forwarded_dll = forwarded_dll.lower()
return f"{forwarded_dll}.{forwarded_symbol}"
def all_zeros(bytez: bytes) -> bool:
return all(b == 0 for b in builtins.bytes(bytez))

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -12,6 +12,7 @@ from typing import Tuple, Iterator
import idc
import idaapi
import idautils
import ida_entry
import capa.features.extractors.common
import capa.features.extractors.helpers
@@ -83,8 +84,14 @@ def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]:
def extract_file_export_names() -> Iterator[Tuple[Feature, Address]]:
"""extract function exports"""
for _, _, ea, name in idautils.Entries():
yield Export(name), AbsoluteVirtualAddress(ea)
for _, ordinal, ea, name in idautils.Entries():
forwarded_name = ida_entry.get_entry_forwarder(ordinal)
if forwarded_name is None:
yield Export(name), AbsoluteVirtualAddress(ea)
else:
forwarded_name = capa.features.extractors.helpers.reformat_forwarded_export_name(forwarded_name)
yield Export(forwarded_name), AbsoluteVirtualAddress(ea)
yield Characteristic("forwarded export"), AbsoluteVirtualAddress(ea)
def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -1,3 +1,10 @@
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import logging
import contextlib
from typing import Tuple, Iterator

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -122,7 +122,7 @@ def get_file_externs() -> Dict[int, Tuple[str, str, int]]:
externs = {}
for seg in get_segments(skip_header_segments=True):
if not (seg.type == ida_segment.SEG_XTRN):
if seg.type != ida_segment.SEG_XTRN:
continue
for ea in idautils.Functions(seg.start_ea, seg.end_ea):
@@ -275,20 +275,18 @@ def is_op_offset(insn: idaapi.insn_t, op: idaapi.op_t) -> bool:
def is_sp_modified(insn: idaapi.insn_t) -> bool:
"""determine if instruction modifies SP, ESP, RSP"""
for op in get_insn_ops(insn, target_ops=(idaapi.o_reg,)):
if op.reg == idautils.procregs.sp.reg and is_op_write(insn, op):
# register is stack and written
return True
return False
return any(
op.reg == idautils.procregs.sp.reg and is_op_write(insn, op)
for op in get_insn_ops(insn, target_ops=(idaapi.o_reg,))
)
def is_bp_modified(insn: idaapi.insn_t) -> bool:
"""check if instruction modifies BP, EBP, RBP"""
for op in get_insn_ops(insn, target_ops=(idaapi.o_reg,)):
if op.reg == idautils.procregs.bp.reg and is_op_write(insn, op):
# register is base and written
return True
return False
return any(
op.reg == idautils.procregs.bp.reg and is_op_write(insn, op)
for op in get_insn_ops(insn, target_ops=(idaapi.o_reg,))
)
def is_frame_register(reg: int) -> bool:
@@ -334,10 +332,7 @@ def mask_op_val(op: idaapi.op_t) -> int:
def is_function_recursive(f: idaapi.func_t) -> bool:
"""check if function is recursive"""
for ref in idautils.CodeRefsTo(f.start_ea, True):
if f.contains(ref):
return True
return False
return any(f.contains(ref) for ref in idautils.CodeRefsTo(f.start_ea, True))
def is_basic_block_tight_loop(bb: idaapi.BasicBlock) -> bool:
@@ -386,8 +381,7 @@ def find_data_reference_from_insn(insn: idaapi.insn_t, max_depth: int = 10) -> i
def get_function_blocks(f: idaapi.func_t) -> Iterator[idaapi.BasicBlock]:
"""yield basic blocks contained in specified function"""
# leverage idaapi.FC_NOEXT flag to ignore useless external blocks referenced by the function
for block in idaapi.FlowChart(f, flags=(idaapi.FC_PREDS | idaapi.FC_NOEXT)):
yield block
yield from idaapi.FlowChart(f, flags=(idaapi.FC_PREDS | idaapi.FC_NOEXT))
def is_basic_block_return(bb: idaapi.BasicBlock) -> bool:

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -216,7 +216,7 @@ def extract_insn_offset_features(
p_info = capa.features.extractors.ida.helpers.get_op_phrase_info(op)
op_off = p_info.get("offset", None)
op_off = p_info.get("offset")
if op_off is None:
continue
@@ -447,7 +447,7 @@ def extract_insn_cross_section_cflow(
insn: idaapi.insn_t = ih.inner
for ref in idautils.CodeRefsFrom(insn.ea, False):
if ref in get_imports(fh.ctx).keys():
if ref in get_imports(fh.ctx):
# ignore API calls
continue
if not idaapi.getseg(ref):

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -1,3 +1,10 @@
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
from typing import Dict, List, Tuple, Union
from dataclasses import dataclass

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -7,6 +7,7 @@
# See the License for the specific language governing permissions and limitations under the License.
import logging
from pathlib import Path
import pefile
@@ -39,8 +40,20 @@ def extract_file_export_names(pe, **kwargs):
name = export.name.partition(b"\x00")[0].decode("ascii")
except UnicodeDecodeError:
continue
va = base_address + export.address
yield Export(name), AbsoluteVirtualAddress(va)
if export.forwarder is None:
va = base_address + export.address
yield Export(name), AbsoluteVirtualAddress(va)
else:
try:
forwarded_name = export.forwarder.partition(b"\x00")[0].decode("ascii")
except UnicodeDecodeError:
continue
forwarded_name = capa.features.extractors.helpers.reformat_forwarded_export_name(forwarded_name)
va = base_address + export.address
yield Export(forwarded_name), AbsoluteVirtualAddress(va)
yield Characteristic("forwarded export"), AbsoluteVirtualAddress(va)
def extract_file_import_names(pe, **kwargs):
@@ -173,23 +186,21 @@ GLOBAL_HANDLERS = (
class PefileFeatureExtractor(StaticFeatureExtractor):
def __init__(self, path: str):
def __init__(self, path: Path):
super().__init__()
self.path = path
self.pe = pefile.PE(path)
self.path: Path = path
self.pe = pefile.PE(str(path))
def get_base_address(self):
return AbsoluteVirtualAddress(self.pe.OPTIONAL_HEADER.ImageBase)
def extract_global_features(self):
with open(self.path, "rb") as f:
buf = f.read()
buf = Path(self.path).read_bytes()
yield from extract_global_features(self.pe, buf)
def extract_file_features(self):
with open(self.path, "rb") as f:
buf = f.read()
buf = Path(self.path).read_bytes()
yield from extract_file_features(self.pe, buf)

View File

@@ -1,6 +1,6 @@
# strings code from FLOSS, https://github.com/mandiant/flare-floss
#
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -9,6 +9,7 @@
# See the License for the specific language governing permissions and limitations under the License.
import re
import contextlib
from collections import namedtuple
ASCII_BYTE = r" !\"#\$%&\'\(\)\*\+,-\./0123456789:;<=>\?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\}\\\~\t".encode(
@@ -81,7 +82,5 @@ def extract_unicode_strings(buf, n=4):
reg = b"((?:[%s]\x00){%d,})" % (ASCII_BYTE, n)
r = re.compile(reg)
for match in r.finditer(buf):
try:
with contextlib.suppress(UnicodeDecodeError):
yield String(match.group().decode("utf-16"), match.start())
except UnicodeDecodeError:
pass

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -7,6 +7,7 @@
# See the License for the specific language governing permissions and limitations under the License.
import logging
from typing import Any, Dict, List, Tuple, Iterator
from pathlib import Path
import viv_utils
import viv_utils.flirt
@@ -31,13 +32,12 @@ logger = logging.getLogger(__name__)
class VivisectFeatureExtractor(StaticFeatureExtractor):
def __init__(self, vw, path, os):
def __init__(self, vw, path: Path, os):
super().__init__()
self.vw = vw
self.path = path
with open(path, "rb") as f:
self.buf = f.read()
self.sample_hashes = SampleHashes.from_sample(self.buf)
self.buf = path.read_bytes()
self.sample_hashes = SampleHashes.from_sample(self.buf)
# pre-compute these because we'll yield them at *every* scope.
self.global_features: List[Tuple[Feature, Address]] = []

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -8,6 +8,7 @@
from typing import Tuple, Iterator
import PE.carve as pe_carve # vivisect PE
import vivisect
import viv_utils
import viv_utils.flirt
@@ -25,10 +26,35 @@ def extract_file_embedded_pe(buf, **kwargs) -> Iterator[Tuple[Feature, Address]]
yield Characteristic("embedded pe"), FileOffsetAddress(offset)
def extract_file_export_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address]]:
def get_first_vw_filename(vw: vivisect.VivWorkspace):
# vivisect associates metadata with each file that its loaded into the workspace.
# capa only loads a single file into each workspace.
# so to access the metadata for the file in question, we can just take the first one.
# otherwise, we'd have to pass around the module name of the file we're analyzing,
# which is a pain.
#
# so this is a simplifying assumption.
return next(iter(vw.filemeta.keys()))
def extract_file_export_names(vw: vivisect.VivWorkspace, **kwargs) -> Iterator[Tuple[Feature, Address]]:
for va, _, name, _ in vw.getExports():
yield Export(name), AbsoluteVirtualAddress(va)
if vw.getMeta("Format") == "pe":
pe = vw.parsedbin
baseaddr = pe.IMAGE_NT_HEADERS.OptionalHeader.ImageBase
for rva, _, forwarded_name in vw.getFileMeta(get_first_vw_filename(vw), "forwarders"):
try:
forwarded_name = forwarded_name.partition(b"\x00")[0].decode("ascii")
except UnicodeDecodeError:
continue
forwarded_name = capa.features.extractors.helpers.reformat_forwarded_export_name(forwarded_name)
va = baseaddr + rva
yield Export(forwarded_name), AbsoluteVirtualAddress(va)
yield Characteristic("forwarded export"), AbsoluteVirtualAddress(va)
def extract_file_import_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address]]:
"""

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -1,3 +1,10 @@
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import logging
from typing import Tuple, Iterator

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -7,7 +7,7 @@
# See the License for the specific language governing permissions and limitations under the License.
import collections
from typing import Set, List, Deque, Tuple, Union, Optional
from typing import Set, List, Deque, Tuple, Optional
import envi
import vivisect.const
@@ -71,7 +71,7 @@ class NotFoundError(Exception):
pass
def find_definition(vw: VivWorkspace, va: int, reg: int) -> Tuple[int, Union[int, None]]:
def find_definition(vw: VivWorkspace, va: int, reg: int) -> Tuple[int, Optional[int]]:
"""
scan backwards from the given address looking for assignments to the given register.
if a constant, return that value.

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -410,9 +410,7 @@ def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, ih: InsnHandle)
if insn.va + 5 == insn.opers[0].getOperValue(insn):
yield Characteristic("call $+5"), ih.address
if isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper) or isinstance(
insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper
):
if isinstance(insn.opers[0], (envi.archs.i386.disasm.i386ImmMemOper, envi.archs.amd64.disasm.Amd64RipRelOper)):
if insn.va + 5 == insn.opers[0].getOperAddr(insn):
yield Characteristic("call $+5"), ih.address

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -1,7 +1,7 @@
"""
capa freeze file format: `| capa0000 | + zlib(utf-8(json(...)))`
Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -592,6 +592,7 @@ def load(buf: bytes):
def main(argv=None):
import sys
import argparse
from pathlib import Path
import capa.main
@@ -608,8 +609,7 @@ def main(argv=None):
extractor = capa.main.get_extractor(args.sample, args.format, args.os, args.backend, sigpaths, False)
with open(args.output, "wb") as f:
f.write(dump(extractor))
Path(args.output).write_bytes(dump(extractor))
return 0

View File

@@ -1,3 +1,10 @@
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import binascii
from typing import Union, Optional

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -1,17 +1,17 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import os
import json
import inspect
import logging
import contextlib
import importlib.util
from typing import NoReturn
from pathlib import Path
import tqdm
@@ -34,11 +34,10 @@ def hex(n: int) -> str:
return f"0x{(n):X}"
def get_file_taste(sample_path: str) -> bytes:
if not os.path.exists(sample_path):
def get_file_taste(sample_path: Path) -> bytes:
if not sample_path.exists():
raise IOError(f"sample path {sample_path} does not exist or cannot be accessed")
with open(sample_path, "rb") as f:
taste = f.read(8)
taste = sample_path.open("rb").read(8)
return taste
@@ -52,26 +51,25 @@ def assert_never(value) -> NoReturn:
assert False, f"Unhandled value: {value} ({type(value).__name__})" # noqa: B011
def get_format_from_report(sample: str) -> str:
with open(sample, "rb") as f:
report = json.load(f)
if "CAPE" in report.keys():
def get_format_from_report(sample: Path) -> str:
report = json.load(sample.open())
if "CAPE" in report:
return FORMAT_CAPE
return FORMAT_UNKNOWN
def get_format_from_extension(sample: str) -> str:
def get_format_from_extension(sample: Path) -> str:
format_ = FORMAT_UNKNOWN
if sample.endswith(EXTENSIONS_SHELLCODE_32):
if sample.name.endswith(EXTENSIONS_SHELLCODE_32):
format_ = FORMAT_SC32
elif sample.endswith(EXTENSIONS_SHELLCODE_64):
elif sample.name.endswith(EXTENSIONS_SHELLCODE_64):
format_ = FORMAT_SC64
elif sample.endswith(EXTENSIONS_DYNAMIC):
elif sample.name.endswith(EXTENSIONS_DYNAMIC):
format_ = get_format_from_report(sample)
return format_
def get_auto_format(path: str) -> str:
def get_auto_format(path: Path) -> str:
format_ = get_format(path)
if format_ == FORMAT_UNKNOWN:
format_ = get_format_from_extension(path)
@@ -80,13 +78,12 @@ def get_auto_format(path: str) -> str:
return format_
def get_format(sample: str) -> str:
def get_format(sample: Path) -> str:
# imported locally to avoid import cycle
from capa.features.extractors.common import extract_format
from capa.features.extractors.dnfile_ import DnfileFeatureExtractor
with open(sample, "rb") as f:
buf = f.read()
buf = sample.read_bytes()
for feature, _ in extract_format(buf):
if feature == Format(FORMAT_PE):

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -9,7 +9,8 @@ import json
import logging
import datetime
import contextlib
from typing import Optional
from typing import List, Optional
from pathlib import Path
import idc
import idaapi
@@ -120,7 +121,7 @@ def get_file_sha256():
return sha256
def collect_metadata(rules):
def collect_metadata(rules: List[Path]):
""" """
md5 = get_file_md5()
sha256 = get_file_sha256()
@@ -157,7 +158,7 @@ def collect_metadata(rules):
arch=arch,
os=os,
extractor="ida",
rules=rules,
rules=tuple(r.resolve().absolute().as_posix() for r in rules),
base_address=capa.features.freeze.Address.from_capa(idaapi.get_imagebase()),
layout=rdoc.Layout(
functions=(),

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -197,11 +197,11 @@ class CapaRuleGenFeatureCache:
return features, matches
def _get_cached_func_node(self, fh: FunctionHandle) -> Optional[CapaRuleGenFeatureCacheNode]:
f_node: Optional[CapaRuleGenFeatureCacheNode] = self.func_nodes.get(fh.address, None)
f_node: Optional[CapaRuleGenFeatureCacheNode] = self.func_nodes.get(fh.address)
if f_node is None:
# function is not in our cache, do extraction now
self._find_function_and_below_features(fh)
f_node = self.func_nodes.get(fh.address, None)
f_node = self.func_nodes.get(fh.address)
return f_node
def get_all_function_features(self, fh: FunctionHandle) -> FeatureSet:

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -1,17 +1,17 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import os
import copy
import logging
import itertools
import collections
from enum import IntFlag
from typing import Any, List, Optional
from pathlib import Path
import idaapi
import ida_kernwin
@@ -71,10 +71,9 @@ AnalyzeOptionsText = {
}
def write_file(path, data):
def write_file(path: Path, data):
""" """
with open(path, "wb") as save_file:
save_file.write(data)
path.write_bytes(data)
def trim_function_name(f, max_length=25):
@@ -574,10 +573,10 @@ class CapaExplorerForm(idaapi.PluginForm):
def ensure_capa_settings_rule_path(self):
try:
path: str = settings.user.get(CAPA_SETTINGS_RULE_PATH, "")
path: Path = Path(settings.user.get(CAPA_SETTINGS_RULE_PATH, ""))
# resolve rules directory - check self and settings first, then ask user
if not os.path.exists(path):
if not path.exists():
# configure rules selection messagebox
rules_message = QtWidgets.QMessageBox()
rules_message.setIcon(QtWidgets.QMessageBox.Information)
@@ -595,15 +594,15 @@ class CapaExplorerForm(idaapi.PluginForm):
if pressed == QtWidgets.QMessageBox.Cancel:
raise UserCancelledError()
path = self.ask_user_directory()
path = Path(self.ask_user_directory())
if not path:
raise UserCancelledError()
if not os.path.exists(path):
if not path.exists():
logger.error("rule path %s does not exist or cannot be accessed", path)
return False
settings.user[CAPA_SETTINGS_RULE_PATH] = path
settings.user[CAPA_SETTINGS_RULE_PATH] = str(path)
except UserCancelledError:
capa.ida.helpers.inform_user_ida_ui("Analysis requires capa rules")
logger.warning(
@@ -627,7 +626,7 @@ class CapaExplorerForm(idaapi.PluginForm):
if not self.ensure_capa_settings_rule_path():
return False
rule_path: str = settings.user.get(CAPA_SETTINGS_RULE_PATH, "")
rule_path: Path = Path(settings.user.get(CAPA_SETTINGS_RULE_PATH, ""))
try:
def on_load_rule(_, i, total):
@@ -767,7 +766,7 @@ class CapaExplorerForm(idaapi.PluginForm):
update_wait_box("extracting features")
try:
meta = capa.ida.helpers.collect_metadata([settings.user[CAPA_SETTINGS_RULE_PATH]])
meta = capa.ida.helpers.collect_metadata([Path(settings.user[CAPA_SETTINGS_RULE_PATH])])
capabilities, counts = capa.main.find_capabilities(
ruleset, self.feature_extractor, disable_progress=True
)
@@ -1208,11 +1207,11 @@ class CapaExplorerForm(idaapi.PluginForm):
self.set_rulegen_status(f"Failed to create function rule matches from rule set ({e})")
return
if capa.rules.Scope.FUNCTION in rule.scopes and rule.name in func_matches.keys():
if capa.rules.Scope.FUNCTION in rule.scopes and rule.name in func_matches:
is_match = True
elif capa.rules.Scope.BASIC_BLOCK in rule.scopes and rule.name in bb_matches.keys():
elif capa.rules.Scope.BASIC_BLOCK in rule.scopes and rule.name in bb_matches:
is_match = True
elif capa.rules.Scope.INSTRUCTION in rule.scopes and rule.name in insn_matches.keys():
elif capa.rules.Scope.INSTRUCTION in rule.scopes and rule.name in insn_matches:
is_match = True
elif capa.rules.Scope.FILE in rule.scopes:
try:
@@ -1220,7 +1219,7 @@ class CapaExplorerForm(idaapi.PluginForm):
except Exception as e:
self.set_rulegen_status(f"Failed to create file rule matches from rule set ({e})")
return
if rule.name in file_matches.keys():
if rule.name in file_matches:
is_match = True
else:
is_match = False
@@ -1310,8 +1309,8 @@ class CapaExplorerForm(idaapi.PluginForm):
s = self.resdoc_cache.json().encode("utf-8")
path = self.ask_user_capa_json_file()
if not path:
path = Path(self.ask_user_capa_json_file())
if not path.exists():
return
write_file(path, s)
@@ -1323,8 +1322,8 @@ class CapaExplorerForm(idaapi.PluginForm):
idaapi.info("No rule to save.")
return
path = self.ask_user_capa_rule_file()
if not path:
path = Path(self.ask_user_capa_rule_file())
if not path.exists():
return
write_file(path, s)

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -30,7 +30,7 @@ class CapaExplorerIdaHooks(idaapi.UI_Hooks):
@retval must be 0
"""
self.process_action_handle = self.process_action_hooks.get(name, None)
self.process_action_handle = self.process_action_hooks.get(name)
if self.process_action_handle:
self.process_action_handle(self.process_action_meta)

View File

@@ -1,3 +1,10 @@
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import base64
# this is just `capa/.github/icon.png`.

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -130,8 +130,7 @@ class CapaExplorerDataItem:
def children(self) -> Iterator["CapaExplorerDataItem"]:
"""yield children"""
for child in self._children:
yield child
yield from self._children
def removeChildren(self):
"""remove children"""

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -628,7 +628,7 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
matched_rule_source = ""
# check if match is a matched rule
matched_rule = doc.rules.get(feature.match, None)
matched_rule = doc.rules.get(feature.match)
if matched_rule is not None:
matched_rule_source = matched_rule.source

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -1224,8 +1224,7 @@ class CapaExplorerQtreeView(QtWidgets.QTreeView):
yield self.new_action(*action)
# add default actions
for action in self.load_default_context_menu_actions(data):
yield action
yield from self.load_default_context_menu_actions(data)
def load_default_context_menu(self, pos, item, model_index):
"""create default custom context menu

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env python3
"""
Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -14,7 +14,6 @@ import sys
import json
import time
import logging
import os.path
import argparse
import datetime
import textwrap
@@ -22,6 +21,7 @@ import itertools
import contextlib
import collections
from typing import Any, Dict, List, Tuple, Callable
from pathlib import Path
import halo
import tqdm
@@ -96,6 +96,7 @@ SIGNATURES_PATH_DEFAULT_STRING = "(embedded signatures)"
BACKEND_VIV = "vivisect"
BACKEND_DOTNET = "dotnet"
BACKEND_BINJA = "binja"
BACKEND_PEFILE = "pefile"
E_MISSING_RULES = 10
E_MISSING_FILE = 11
@@ -280,6 +281,7 @@ def find_static_capabilities(
pb = pbar(functions, desc="matching", unit=" functions", postfix="skipped 0 library functions", leave=False)
for f in pb:
t0 = time.time()
if extractor.is_library_function(f.address):
function_name = extractor.get_function_name(f.address)
logger.debug("skipping library function 0x%x (%s)", f.address, function_name)
@@ -298,7 +300,18 @@ def find_static_capabilities(
feature_counts.functions += (
rdoc.FunctionFeatureCount(address=frz.Address.from_capa(f.address), count=feature_count),
)
logger.debug("analyzed function 0x%x and extracted %d features", f.address, feature_count)
t1 = time.time()
match_count = sum(len(res) for res in function_matches.values())
match_count += sum(len(res) for res in bb_matches.values())
match_count += sum(len(res) for res in insn_matches.values())
logger.debug(
"analyzed function 0x%x and extracted %d features, %d matches in %0.02fs",
f.address,
feature_count,
match_count,
t1 - t0,
)
for rule_name, res in function_matches.items():
all_function_matches[rule_name].extend(res)
@@ -470,10 +483,9 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, **kwargs) -
def has_rule_with_namespace(rules: RuleSet, capabilities: MatchResults, namespace: str) -> bool:
for rule_name in capabilities.keys():
if rules.rules[rule_name].meta.get("namespace", "").startswith(namespace):
return True
return False
return any(
rules.rules[rule_name].meta.get("namespace", "").startswith(namespace) for rule_name in capabilities.keys()
)
def is_internal_rule(rule: Rule) -> bool:
@@ -506,26 +518,23 @@ def has_file_limitation(rules: RuleSet, capabilities: MatchResults, is_standalon
return False
def is_supported_format(sample: str) -> bool:
def is_supported_format(sample: Path) -> bool:
"""
Return if this is a supported file based on magic header values
"""
with open(sample, "rb") as f:
taste = f.read(0x100)
taste = sample.open("rb").read(0x100)
return len(list(capa.features.extractors.common.extract_format(taste))) == 1
def is_supported_arch(sample: str) -> bool:
with open(sample, "rb") as f:
buf = f.read()
def is_supported_arch(sample: Path) -> bool:
buf = sample.read_bytes()
return len(list(capa.features.extractors.common.extract_arch(buf))) == 1
def get_arch(sample: str) -> str:
with open(sample, "rb") as f:
buf = f.read()
def get_arch(sample: Path) -> str:
buf = sample.read_bytes()
for feature, _ in capa.features.extractors.common.extract_arch(buf):
assert isinstance(feature.value, str)
@@ -534,16 +543,14 @@ def get_arch(sample: str) -> str:
return "unknown"
def is_supported_os(sample: str) -> bool:
with open(sample, "rb") as f:
buf = f.read()
def is_supported_os(sample: Path) -> bool:
buf = sample.read_bytes()
return len(list(capa.features.extractors.common.extract_os(buf))) == 1
def get_os(sample: str) -> str:
with open(sample, "rb") as f:
buf = f.read()
def get_os(sample: Path) -> str:
buf = sample.read_bytes()
for feature, _ in capa.features.extractors.common.extract_os(buf):
assert isinstance(feature.value, str)
@@ -571,7 +578,7 @@ def is_running_standalone() -> bool:
return hasattr(sys, "frozen") and hasattr(sys, "_MEIPASS")
def get_default_root() -> str:
def get_default_root() -> Path:
"""
get the file system path to the default resources directory.
under PyInstaller, this comes from _MEIPASS.
@@ -582,30 +589,27 @@ def get_default_root() -> str:
# its injected by pyinstaller.
# so we'll fetch this attribute dynamically.
assert hasattr(sys, "_MEIPASS")
return sys._MEIPASS
return Path(sys._MEIPASS)
else:
return os.path.join(os.path.dirname(__file__), "..")
return Path(__file__).resolve().parent.parent
def get_default_signatures() -> List[str]:
def get_default_signatures() -> List[Path]:
"""
compute a list of file system paths to the default FLIRT signatures.
"""
sigs_path = os.path.join(get_default_root(), "sigs")
sigs_path = get_default_root() / "sigs"
logger.debug("signatures path: %s", sigs_path)
ret = []
for root, _, files in os.walk(sigs_path):
for file in files:
if not (file.endswith(".pat") or file.endswith(".pat.gz") or file.endswith(".sig")):
continue
ret.append(os.path.join(root, file))
for file in sigs_path.rglob("*"):
if file.is_file() and file.suffix.lower() in (".pat", ".pat.gz", ".sig"):
ret.append(file)
return ret
def get_workspace(path, format_, sigpaths):
def get_workspace(path: Path, format_: str, sigpaths: List[Path]):
"""
load the program at the given path into a vivisect workspace using the given format.
also apply the given FLIRT signatures.
@@ -631,18 +635,18 @@ def get_workspace(path, format_, sigpaths):
raise UnsupportedFormatError()
# don't analyze, so that we can add our Flirt function analyzer first.
vw = viv_utils.getWorkspace(path, analyze=False, should_save=False)
vw = viv_utils.getWorkspace(str(path), analyze=False, should_save=False)
elif format_ in {FORMAT_PE, FORMAT_ELF}:
vw = viv_utils.getWorkspace(path, analyze=False, should_save=False)
vw = viv_utils.getWorkspace(str(path), analyze=False, should_save=False)
elif format_ == FORMAT_SC32:
# these are not analyzed nor saved.
vw = viv_utils.getShellcodeWorkspaceFromFile(path, arch="i386", analyze=False)
vw = viv_utils.getShellcodeWorkspaceFromFile(str(path), arch="i386", analyze=False)
elif format_ == FORMAT_SC64:
vw = viv_utils.getShellcodeWorkspaceFromFile(path, arch="amd64", analyze=False)
vw = viv_utils.getShellcodeWorkspaceFromFile(str(path), arch="amd64", analyze=False)
else:
raise ValueError("unexpected format: " + format_)
viv_utils.flirt.register_flirt_signature_analyzers(vw, sigpaths)
viv_utils.flirt.register_flirt_signature_analyzers(vw, [str(s) for s in sigpaths])
vw.analyze()
@@ -651,11 +655,11 @@ def get_workspace(path, format_, sigpaths):
def get_extractor(
path: str,
path: Path,
format_: str,
os_: str,
backend: str,
sigpaths: List[str],
sigpaths: List[Path],
should_save_workspace=False,
disable_progress=False,
) -> FeatureExtractor:
@@ -679,8 +683,7 @@ def get_extractor(
if format_ == FORMAT_CAPE:
import capa.features.extractors.cape.extractor
with open(path, "rb") as f:
report = json.load(f)
report = json.load(Path(path).open())
return capa.features.extractors.cape.extractor.CapeExtractor.from_report(report)
elif format_ == FORMAT_DOTNET:
@@ -695,8 +698,8 @@ def get_extractor(
# We need to fist find the binja API installation path and add it into sys.path
if is_running_standalone():
bn_api = find_binja_path()
if os.path.exists(bn_api):
sys.path.append(bn_api)
if bn_api.exists():
sys.path.append(str(bn_api))
try:
from binaryninja import BinaryView, BinaryViewType
@@ -709,14 +712,18 @@ def get_extractor(
import capa.features.extractors.binja.extractor
with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress):
bv: BinaryView = BinaryViewType.get_view_of_file(path)
bv: BinaryView = BinaryViewType.get_view_of_file(str(path))
if bv is None:
raise RuntimeError(f"Binary Ninja cannot open file {path}")
return capa.features.extractors.binja.extractor.BinjaFeatureExtractor(bv)
# default to use vivisect backend
else:
elif backend == BACKEND_PEFILE:
import capa.features.extractors.pefile
return capa.features.extractors.pefile.PefileFeatureExtractor(path)
elif backend == BACKEND_VIV:
import capa.features.extractors.viv.extractor
with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress):
@@ -734,8 +741,11 @@ def get_extractor(
return capa.features.extractors.viv.extractor.VivisectFeatureExtractor(vw, path, os_)
else:
raise ValueError("unexpected backend: " + backend)
def get_file_extractors(sample: str, format_: str) -> List[FeatureExtractor]:
def get_file_extractors(sample: Path, format_: str) -> List[FeatureExtractor]:
file_extractors: List[FeatureExtractor] = []
if format_ == FORMAT_PE:
@@ -749,14 +759,13 @@ def get_file_extractors(sample: str, format_: str) -> List[FeatureExtractor]:
file_extractors.append(capa.features.extractors.elffile.ElfFeatureExtractor(sample))
elif format_ == FORMAT_CAPE:
with open(sample, "rb") as f:
report = json.load(f)
report = json.load(Path(sample).open())
file_extractors.append(capa.features.extractors.cape.extractor.CapeExtractor.from_report(report))
return file_extractors
def is_nursery_rule_path(path: str) -> bool:
def is_nursery_rule_path(path: Path) -> bool:
"""
The nursery is a spot for rules that have not yet been fully polished.
For example, they may not have references to public example of a technique.
@@ -766,21 +775,21 @@ def is_nursery_rule_path(path: str) -> bool:
When nursery rules are loaded, their metadata section should be updated with:
`nursery=True`.
"""
return "nursery" in path
return "nursery" in path.parts
def collect_rule_file_paths(rule_paths: List[str]) -> List[str]:
def collect_rule_file_paths(rule_paths: List[Path]) -> List[Path]:
"""
collect all rule file paths, including those in subdirectories.
"""
rule_file_paths = []
for rule_path in rule_paths:
if not os.path.exists(rule_path):
if not rule_path.exists():
raise IOError(f"rule path {rule_path} does not exist or cannot be accessed")
if os.path.isfile(rule_path):
if rule_path.is_file():
rule_file_paths.append(rule_path)
elif os.path.isdir(rule_path):
elif rule_path.is_dir():
logger.debug("reading rules from directory %s", rule_path)
for root, _, files in os.walk(rule_path):
if ".git" in root:
@@ -797,14 +806,12 @@ def collect_rule_file_paths(rule_paths: List[str]) -> List[str]:
# other things maybe are rules, but are mis-named.
logger.warning("skipping non-.yml file: %s", file)
continue
rule_path = os.path.join(root, file)
rule_file_paths.append(rule_path)
rule_file_paths.append(Path(root) / file)
return rule_file_paths
# TypeAlias. note: using `foo: TypeAlias = bar` is Python 3.10+
RulePath = str
RulePath = Path
def on_load_rule_default(_path: RulePath, i: int, _total: int) -> None:
@@ -824,17 +831,13 @@ def get_rules(
"""
if cache_dir is None:
cache_dir = capa.rules.cache.get_default_cache_directory()
# rule_paths may contain directory paths,
# so search for file paths recursively.
rule_file_paths = collect_rule_file_paths(rule_paths)
# this list is parallel to `rule_file_paths`:
# rule_file_paths[i] corresponds to rule_contents[i].
rule_contents = []
for file_path in rule_file_paths:
with open(file_path, "rb") as f:
rule_contents.append(f.read())
rule_contents = [file_path.read_bytes() for file_path in rule_file_paths]
ruleset = capa.rules.cache.load_cached_ruleset(cache_dir, rule_contents)
if ruleset is not None:
@@ -851,9 +854,8 @@ def get_rules(
except capa.rules.InvalidRule:
raise
else:
rule.meta["capa/path"] = path
if is_nursery_rule_path(path):
rule.meta["capa/nursery"] = True
rule.meta["capa/path"] = path.as_posix()
rule.meta["capa/nursery"] = is_nursery_rule_path(path)
rules.append(rule)
logger.debug("loaded rule: '%s' with scope: %s", rule.name, rule.scopes)
@@ -865,27 +867,25 @@ def get_rules(
return ruleset
def get_signatures(sigs_path):
if not os.path.exists(sigs_path):
def get_signatures(sigs_path: Path) -> List[Path]:
if not sigs_path.exists():
raise IOError(f"signatures path {sigs_path} does not exist or cannot be accessed")
paths = []
if os.path.isfile(sigs_path):
paths: List[Path] = []
if sigs_path.is_file():
paths.append(sigs_path)
elif os.path.isdir(sigs_path):
logger.debug("reading signatures from directory %s", os.path.abspath(os.path.normpath(sigs_path)))
for root, _, files in os.walk(sigs_path):
for file in files:
if file.endswith((".pat", ".pat.gz", ".sig")):
sig_path = os.path.join(root, file)
paths.append(sig_path)
elif sigs_path.is_dir():
logger.debug("reading signatures from directory %s", sigs_path.resolve())
for file in sigs_path.rglob("*"):
if file.is_file() and file.suffix.lower() in (".pat", ".pat.gz", ".sig"):
paths.append(file)
# nicely normalize and format path so that debugging messages are clearer
paths = [os.path.abspath(os.path.normpath(path)) for path in paths]
# Convert paths to their absolute and normalized forms
paths = [path.resolve().absolute() for path in paths]
# load signatures in deterministic order: the alphabetic sorting of filename.
# this means that `0_sigs.pat` loads before `1_sigs.pat`.
paths = sorted(paths, key=os.path.basename)
paths = sorted(paths, key=lambda path: path.name)
for path in paths:
logger.debug("found signature file: %s", path)
@@ -930,10 +930,10 @@ def get_sample_analysis(format_, arch, os_, extractor, rules_path, counts):
def collect_metadata(
argv: List[str],
sample_path: str,
sample_path: Path,
format_: str,
os_: str,
rules_path: List[str],
rules_path: List[Path],
extractor: FeatureExtractor,
counts: dict,
) -> rdoc.Metadata:
@@ -941,9 +941,7 @@ def collect_metadata(
# we fetch the hashes from the report
md5, sha1, sha256 = extractor.get_sample_hashes()
if rules_path != [RULES_PATH_DEFAULT_STRING]:
rules_path = [os.path.abspath(os.path.normpath(r)) for r in rules_path]
rules = tuple(r.resolve().absolute().as_posix() for r in rules_path)
format_ = get_format(sample_path) if format_ == FORMAT_AUTO else format_
arch = get_arch(sample_path)
os_ = get_os(sample_path) if os_ == OS_AUTO else os_
@@ -1150,7 +1148,7 @@ def install_common_args(parser, wanted=None):
"--backend",
type=str,
help="select the backend to use",
choices=(BACKEND_VIV, BACKEND_BINJA),
choices=(BACKEND_VIV, BACKEND_BINJA, BACKEND_PEFILE),
default=BACKEND_VIV,
)
@@ -1247,8 +1245,11 @@ def handle_common_args(args):
else:
raise RuntimeError("unexpected --color value: " + args.color)
if hasattr(args, "sample"):
args.sample = Path(args.sample)
if hasattr(args, "rules"):
rules_paths: List[str] = []
rules_paths: List[Path] = []
if args.rules == [RULES_PATH_DEFAULT_STRING]:
logger.debug("-" * 80)
@@ -1258,9 +1259,9 @@ def handle_common_args(args):
logger.debug(" https://github.com/mandiant/capa-rules")
logger.debug("-" * 80)
default_rule_path = os.path.join(get_default_root(), "rules")
default_rule_path = get_default_root() / "rules"
if not os.path.exists(default_rule_path):
if not default_rule_path.exists():
# when a users installs capa via pip,
# this pulls down just the source code - not the default rules.
# i'm not sure the default rules should even be written to the library directory,
@@ -1272,10 +1273,9 @@ def handle_common_args(args):
rules_paths.append(default_rule_path)
args.is_default_rules = True
else:
rules_paths = args.rules
if RULES_PATH_DEFAULT_STRING in rules_paths:
rules_paths.remove(RULES_PATH_DEFAULT_STRING)
for rule in args.rules:
if RULES_PATH_DEFAULT_STRING != rule:
rules_paths.append(Path(rule))
for rule_path in rules_paths:
logger.debug("using rules path: %s", rule_path)
@@ -1293,8 +1293,9 @@ def handle_common_args(args):
)
logger.debug("-" * 80)
sigs_path = os.path.join(get_default_root(), "sigs")
if not os.path.exists(sigs_path):
sigs_path = get_default_root() / "sigs"
if not sigs_path.exists():
logger.error(
"Using default signature path, but it doesn't exist. " # noqa: G003 [logging statement uses +]
+ "Please install the signatures first: "
@@ -1302,13 +1303,13 @@ def handle_common_args(args):
)
raise IOError(f"signatures path {sigs_path} does not exist or cannot be accessed")
else:
sigs_path = args.signatures
sigs_path = Path(args.signatures)
logger.debug("using signatures path: %s", sigs_path)
args.signatures = sigs_path
def main(argv=None):
def main(argv: Optional[List[str]] = None):
if sys.version_info < (3, 8):
raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.8+")
@@ -1375,7 +1376,7 @@ def main(argv=None):
try:
if is_running_standalone() and args.is_default_rules:
cache_dir = os.path.join(get_default_root(), "cache")
cache_dir = get_default_root() / "cache"
else:
cache_dir = capa.rules.cache.get_default_cache_directory()
@@ -1460,8 +1461,7 @@ def main(argv=None):
if format_ == FORMAT_FREEZE:
# freeze format deserializes directly into an extractor
with open(args.sample, "rb") as f:
extractor: FeatureExtractor = frz.load(f.read())
extractor: FeatureExtractor = frz.load(Path(args.sample).read_bytes())
else:
# all other formats we must create an extractor,
# such as viv, binary ninja, etc. workspaces
@@ -1487,7 +1487,7 @@ def main(argv=None):
args.backend,
sig_paths,
should_save_workspace,
disable_progress=args.quiet,
disable_progress=args.quiet or args.debug,
)
except UnsupportedFormatError:
log_unsupported_format_error()
@@ -1546,7 +1546,7 @@ def ida_main():
logger.debug(" https://github.com/mandiant/capa-rules")
logger.debug("-" * 80)
rules_path = os.path.join(get_default_root(), "rules")
rules_path = get_default_root() / "rules"
logger.debug("rule path: %s", rules_path)
rules = get_rules([rules_path])

View File

@@ -1,3 +1,10 @@
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import logging
import capa.engine as ceng

View File

@@ -1,3 +1,10 @@
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import typing
import collections

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -133,7 +133,7 @@ def metadata_to_pb2(meta: rd.Metadata) -> capa_pb2.Metadata:
arch=meta.analysis.arch,
os=meta.analysis.os,
extractor=meta.analysis.extractor,
rules=meta.analysis.rules,
rules=list(meta.analysis.rules),
base_address=addr_to_pb2(meta.analysis.base_address),
layout=capa_pb2.Layout(
functions=[

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -14,7 +14,7 @@ example::
0x10003415
0x10003797
Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at: [package root]/LICENSE.txt

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -6,7 +6,7 @@
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
from typing import Dict, Iterable
from typing import Dict, Iterable, Optional
import tabulate
@@ -129,6 +129,7 @@ def render_feature(ostream, match: rd.Match, feature: frzf.Feature, indent=0):
ostream.write(" " * indent)
key = feature.type
value: Optional[str]
if isinstance(feature, frzf.BasicBlockFeature):
# i don't think it makes sense to have standalone basic block features.
# we don't parse them from rules, only things like: `count(basic block) > 1`
@@ -140,7 +141,7 @@ def render_feature(ostream, match: rd.Match, feature: frzf.Feature, indent=0):
value = feature.class_
else:
# convert attributes to dictionary using aliased names, if applicable
value = feature.dict(by_alias=True).get(key, None)
value = feature.dict(by_alias=True).get(key)
if value is None:
raise ValueError(f"{key} contains None")

View File

@@ -1,4 +1,4 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -14,6 +14,7 @@ import logging
import binascii
import collections
from enum import Enum
from pathlib import Path
from capa.helpers import assert_never
@@ -117,7 +118,7 @@ class Scopes:
dynamic: str
def __contains__(self, scope: Union[Scope, str]) -> bool:
assert isinstance(scope, Scope) or isinstance(scope, str)
assert isinstance(scope, (Scope, str))
return (scope == self.static) or (scope == self.dynamic)
@classmethod
@@ -150,6 +151,7 @@ SUPPORTED_FEATURES: Dict[str, Set] = {
capa.features.common.Class,
capa.features.common.Namespace,
capa.features.common.Characteristic("mixed mode"),
capa.features.common.Characteristic("forwarded export"),
},
PROCESS_SCOPE: {
capa.features.common.MatchedRule,
@@ -800,8 +802,7 @@ class Rule:
# note: we cannot recurse into the subscope sub-tree,
# because its been replaced by a `match` statement.
for child in statement.get_children():
for new_rule in self._extract_subscope_rules_rec(child):
yield new_rule
yield from self._extract_subscope_rules_rec(child)
def is_subscope_rule(self):
return bool(self.meta.get("capa/subscope-rule", False))
@@ -827,8 +828,7 @@ class Rule:
# replace old node with reference to new rule
# yield new rule
for new_rule in self._extract_subscope_rules_rec(self.statement):
yield new_rule
yield from self._extract_subscope_rules_rec(self.statement)
def evaluate(self, features: FeatureSet, short_circuit=True):
capa.perf.counters["evaluate.feature"] += 1
@@ -923,7 +923,7 @@ class Rule:
@classmethod
def from_yaml_file(cls, path, use_ruamel=False) -> "Rule":
with open(path, "rb") as f:
with Path(path).open("rb") as f:
try:
rule = cls.from_yaml(f.read().decode("utf-8"), use_ruamel=use_ruamel)
# import here to avoid circular dependency

View File

@@ -1,10 +1,18 @@
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import os
import sys
import zlib
import pickle
import hashlib
import logging
import os.path
from typing import List, Optional
from pathlib import Path
from dataclasses import dataclass
import capa.rules
@@ -36,7 +44,7 @@ def compute_cache_identifier(rule_content: List[bytes]) -> CacheIdentifier:
return hash.hexdigest()
def get_default_cache_directory() -> str:
def get_default_cache_directory() -> Path:
# ref: https://github.com/mandiant/capa/issues/1212#issuecomment-1361259813
#
# Linux: $XDG_CACHE_HOME/capa/
@@ -45,22 +53,22 @@ def get_default_cache_directory() -> str:
# ref: https://stackoverflow.com/a/8220141/87207
if sys.platform == "linux" or sys.platform == "linux2":
directory = os.environ.get("XDG_CACHE_HOME", os.path.join(os.environ["HOME"], ".cache", "capa"))
directory = Path(os.environ.get("XDG_CACHE_HOME", Path.home() / ".cache" / "capa"))
elif sys.platform == "darwin":
directory = os.path.join(os.environ["HOME"], "Library", "Caches", "capa")
directory = Path.home() / "Library" / "Caches" / "capa"
elif sys.platform == "win32":
directory = os.path.join(os.environ["LOCALAPPDATA"], "flare", "capa", "cache")
directory = Path(os.environ["LOCALAPPDATA"]) / "flare" / "capa" / "cache"
else:
raise NotImplementedError(f"unsupported platform: {sys.platform}")
os.makedirs(directory, exist_ok=True)
directory.mkdir(parents=True, exist_ok=True)
return directory
def get_cache_path(cache_dir: str, id: CacheIdentifier) -> str:
def get_cache_path(cache_dir: Path, id: CacheIdentifier) -> Path:
filename = "capa-" + id[:8] + ".cache"
return os.path.join(cache_dir, filename)
return cache_dir / filename
MAGIC = b"capa"
@@ -102,7 +110,7 @@ def compute_ruleset_cache_identifier(ruleset: capa.rules.RuleSet) -> CacheIdenti
return compute_cache_identifier(rule_contents)
def cache_ruleset(cache_dir: str, ruleset: capa.rules.RuleSet):
def cache_ruleset(cache_dir: Path, ruleset: capa.rules.RuleSet):
"""
cache the given ruleset to disk, using the given cache directory.
this can subsequently be reloaded via `load_cached_ruleset`,
@@ -113,19 +121,18 @@ def cache_ruleset(cache_dir: str, ruleset: capa.rules.RuleSet):
"""
id = compute_ruleset_cache_identifier(ruleset)
path = get_cache_path(cache_dir, id)
if os.path.exists(path):
if path.exists():
logger.debug("rule set already cached to %s", path)
return
cache = RuleCache(id, ruleset)
with open(path, "wb") as f:
f.write(cache.dump())
path.write_bytes(cache.dump())
logger.debug("rule set cached to %s", path)
return
def load_cached_ruleset(cache_dir: str, rule_contents: List[bytes]) -> Optional[capa.rules.RuleSet]:
def load_cached_ruleset(cache_dir: Path, rule_contents: List[bytes]) -> Optional[capa.rules.RuleSet]:
"""
load a cached ruleset from disk, using the given cache directory.
the raw rule contents are required here to prove that the rules haven't changed
@@ -136,20 +143,19 @@ def load_cached_ruleset(cache_dir: str, rule_contents: List[bytes]) -> Optional[
"""
id = compute_cache_identifier(rule_contents)
path = get_cache_path(cache_dir, id)
if not os.path.exists(path):
if not path.exists():
logger.debug("rule set cache does not exist: %s", path)
return None
logger.debug("loading rule set from cache: %s", path)
with open(path, "rb") as f:
buf = f.read()
buf = path.read_bytes()
try:
cache = RuleCache.load(buf)
except AssertionError:
logger.debug("rule set cache is invalid: %s", path)
# delete the cache that seems to be invalid.
os.remove(path)
path.unlink()
return None
else:
return cache.ruleset

View File

@@ -1,4 +1,11 @@
__version__ = "5.1.0"
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
__version__ = "6.0.0"
def get_major_version():

View File

@@ -18,6 +18,7 @@ authors = [
{name = "Mike Hunhoff", email = "michael.hunhoff@mandiant.com"},
]
description = "The FLARE team's open-source tool to identify capabilities in executable files."
readme = {file = "README.md", content-type = "text/markdown"}
license = {file = "LICENSE.txt"}
requires-python = ">=3.8"
keywords = ["malware analysis", "reverse engineering", "capability detection", "software behaviors", "capa", "FLARE"]
@@ -48,13 +49,12 @@ dependencies = [
"dnfile==0.13.0",
"dncil==1.0.2",
"pydantic==1.10.9",
"protobuf==4.23.2",
"protobuf==4.23.4",
]
dynamic = ["version", "readme"]
dynamic = ["version"]
[tool.setuptools.dynamic]
version = {attr = "capa.version.__version__"}
readme = {file = "README.md"}
[tool.setuptools]
packages = ["capa"]
@@ -67,15 +67,18 @@ dev = [
"pytest-instafail==0.5.0",
"pytest-cov==4.1.0",
"flake8==6.0.0",
"flake8-bugbear==23.6.5",
"flake8-bugbear==23.7.10",
"flake8-encodings==0.5.0.post1",
"flake8-comprehensions==3.13.0",
"flake8-comprehensions==3.14.0",
"flake8-logging-format==0.9.0",
"flake8-no-implicit-concat==0.3.4",
"flake8-print==5.0.0",
"flake8-todos==0.3.0",
"ruff==0.0.275",
"black==23.3.0",
"flake8-simplify==0.20.0",
"flake8-use-pathlib==0.3.0",
"flake8-copyright==0.2.4",
"ruff==0.0.278",
"black==23.7.0",
"isort==5.11.4",
"mypy==1.4.1",
"psutil==5.9.2",

2
rules

Submodule rules updated: f934f44f71...a49c174fee

View File

@@ -1,4 +1,11 @@
#!/usr/bin/env python
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
"""
bulk-process
@@ -47,7 +54,7 @@ usage:
parallelism factor
--no-mp disable subprocesses
Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at: [package root]/LICENSE.txt
@@ -59,10 +66,10 @@ import os
import sys
import json
import logging
import os.path
import argparse
import multiprocessing
import multiprocessing.pool
from pathlib import Path
import capa
import capa.main
@@ -167,9 +174,8 @@ def main(argv=None):
return -1
samples = []
for base, _, files in os.walk(args.input):
for file in files:
samples.append(os.path.join(base, file))
for file in Path(args.input).rglob("*"):
samples.append(file)
cpu_count = multiprocessing.cpu_count()
@@ -206,7 +212,7 @@ def main(argv=None):
if result["status"] == "error":
logger.warning(result["error"])
elif result["status"] == "ok":
results[result["path"]] = rd.ResultDocument.parse_obj(result["ok"]).json(exclude_none=True)
results[result["path"].as_posix()] = rd.ResultDocument.parse_obj(result["ok"]).json(exclude_none=True)
else:
raise ValueError(f"unexpected status: {result['status']}")

View File

@@ -15,10 +15,10 @@ Unless required by applicable law or agreed to in writing, software distributed
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
"""
import os
import sys
import logging
import argparse
from pathlib import Path
import capa.main
import capa.rules
@@ -47,8 +47,9 @@ def main(argv=None):
logging.getLogger("capa").setLevel(logging.ERROR)
try:
os.makedirs(args.cache, exist_ok=True)
rules = capa.main.get_rules(args.rules, cache_dir=args.cache)
cache_dir = Path(args.cache)
cache_dir.mkdir(parents=True, exist_ok=True)
rules = capa.main.get_rules(args.rules, cache_dir)
logger.info("successfully loaded %s rules", len(rules))
except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e:
logger.error("%s", str(e))
@@ -56,9 +57,9 @@ def main(argv=None):
content = capa.rules.cache.get_ruleset_content(rules)
id = capa.rules.cache.compute_cache_identifier(content)
path = capa.rules.cache.get_cache_path(args.cache, id)
path = capa.rules.cache.get_cache_path(cache_dir, id)
assert os.path.exists(path)
assert path.exists()
logger.info("cached to: %s", path)

View File

@@ -1,3 +1,10 @@
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
"""
Convert capa rules to YARA rules (where this is possible)
@@ -37,6 +44,7 @@ import logging
import argparse
import datetime
import itertools
from pathlib import Path
import capa.main
import capa.rules
@@ -71,8 +79,8 @@ default_tags = "CAPA "
# minimum number of rounds to do be able to convert rules which depend on referenced rules in several levels of depth
min_rounds = 5
unsupported_capa_rules = open("unsupported_capa_rules.yml", "wb")
unsupported_capa_rules_names = open("unsupported_capa_rules.txt", "wb")
unsupported_capa_rules = Path("unsupported_capa_rules.yml").open("wb")
unsupported_capa_rules_names = Path("unsupported_capa_rules.txt").open("wb")
unsupported_capa_rules_list = []
condition_header = """
@@ -396,7 +404,7 @@ def convert_rule(rule, rulename, cround, depth):
# this is "x or more". could be coded for strings TODO
return "BREAK", "Some aka x or more (TODO)", rule_comment, incomplete
if s_type == "And" or s_type == "Or" or s_type == "Not" and not kid.name == "Some":
if s_type == "And" or s_type == "Or" or s_type == "Not" and kid.name != "Some":
logger.info("doing bool with recursion: %r", kid)
logger.info("kid coming: %r", kid.name)
# logger.info("grandchildren: " + repr(kid.children))
@@ -714,7 +722,7 @@ def main(argv=None):
logging.getLogger("capa2yara").setLevel(level)
try:
rules = capa.main.get_rules([args.rules])
rules = capa.main.get_rules([Path(args.rules)])
namespaces = capa.rules.index_rules_by_namespace(list(rules.rules.values()))
logger.info("successfully loaded %d rules (including subscope rules which will be ignored)", len(rules))
if args.tag:

View File

@@ -1,8 +1,16 @@
#!/usr/bin/env python3
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import json
import collections
from typing import Any, Set, Dict
from pathlib import Path
import capa.main
import capa.rules
@@ -159,7 +167,7 @@ def render_dictionary(doc: rd.ResultDocument) -> Dict[str, Any]:
# ==== render dictionary helpers
def capa_details(rules_path, file_path, output_format="dictionary"):
def capa_details(rules_path: Path, file_path: Path, output_format="dictionary"):
# load rules from disk
rules = capa.main.get_rules([rules_path])
@@ -170,7 +178,7 @@ def capa_details(rules_path, file_path, output_format="dictionary"):
capabilities, counts = capa.main.find_capabilities(rules, extractor, disable_progress=True)
# collect metadata (used only to make rendering more complete)
meta = capa.main.collect_metadata([], file_path, FORMAT_AUTO, OS_AUTO, rules_path, extractor, counts)
meta = capa.main.collect_metadata([], file_path, FORMAT_AUTO, OS_AUTO, [rules_path], extractor, counts)
meta.analysis.layout = capa.main.compute_layout(rules, extractor, capabilities)
capa_output: Any = False
@@ -192,18 +200,18 @@ def capa_details(rules_path, file_path, output_format="dictionary"):
if __name__ == "__main__":
import sys
import os.path
import argparse
RULES_PATH = os.path.join(os.path.dirname(__file__), "..", "rules")
RULES_PATH = capa.main.get_default_root() / "rules"
parser = argparse.ArgumentParser(description="Extract capabilities from a file")
parser.add_argument("file", help="file to extract capabilities from")
parser.add_argument("--rules", help="path to rules directory", default=os.path.abspath(RULES_PATH))
parser.add_argument("--rules", help="path to rules directory", default=RULES_PATH)
parser.add_argument(
"--output", help="output format", choices=["dictionary", "json", "texttable"], default="dictionary"
)
args = parser.parse_args()
print(capa_details(args.rules, args.file, args.output))
if args.rules != RULES_PATH:
args.rules = Path(args.rules)
print(capa_details(args.rules, Path(args.file), args.output))
sys.exit(0)

Some files were not shown because too many files have changed in this diff Show More