mirror of
https://github.com/mandiant/capa.git
synced 2025-12-06 21:00:57 -08:00
Compare commits
129 Commits
v1.1.0
...
feature/un
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
216e288ade | ||
|
|
ec55a9c482 | ||
|
|
e5136683e6 | ||
|
|
63561b73c1 | ||
|
|
b700ad1655 | ||
|
|
52eef09c8b | ||
|
|
de3f223bb5 | ||
|
|
6a2e5ad530 | ||
|
|
5575166b7a | ||
|
|
8a2ba9cd71 | ||
|
|
e523c6cb50 | ||
|
|
8be1c84fd2 | ||
|
|
739100d481 | ||
|
|
fd7d9aafe9 | ||
|
|
a39e3cca79 | ||
|
|
ad011b08f6 | ||
|
|
854e3d7774 | ||
|
|
b4fa6fc954 | ||
|
|
585a9c167f | ||
|
|
5f731f72ed | ||
|
|
385c956184 | ||
|
|
d8f2b7b4df | ||
|
|
b49ed276a9 | ||
|
|
a2da55fb6f | ||
|
|
1b6ac29053 | ||
|
|
d3dad3a66a | ||
|
|
b084f7cb9b | ||
|
|
89edaf4c5c | ||
|
|
6cd2931645 | ||
|
|
295d3fee5d | ||
|
|
0af6386693 | ||
|
|
1873d0b7c5 | ||
|
|
c032d556fb | ||
|
|
d7f1c23f4d | ||
|
|
df4c75882d | ||
|
|
0cfbed05b4 | ||
|
|
ca95512811 | ||
|
|
bb9803fcc0 | ||
|
|
1fe945e3ed | ||
|
|
6ba4798822 | ||
|
|
f424dd126f | ||
|
|
9fa128b27d | ||
|
|
09bca1e5f7 | ||
|
|
c623791a84 | ||
|
|
980a34adca | ||
|
|
8721eb05eb | ||
|
|
512ea89662 | ||
|
|
15259d455c | ||
|
|
1e097ef759 | ||
|
|
f7925c2990 | ||
|
|
b94f665d4b | ||
|
|
68f27dfea4 | ||
|
|
35226e1e4e | ||
|
|
9c40befdd3 | ||
|
|
c1b7176e36 | ||
|
|
259a0a2007 | ||
|
|
eee565b596 | ||
|
|
26061c25a5 | ||
|
|
897da4237d | ||
|
|
1923d479d8 | ||
|
|
6b8bce4f42 | ||
|
|
107a68628b | ||
|
|
26c9811ba1 | ||
|
|
b784f086b4 | ||
|
|
d161c094a6 | ||
|
|
8cbe3f8546 | ||
|
|
0e049ef56d | ||
|
|
f193ceb91a | ||
|
|
ac7f079af8 | ||
|
|
5f47280e0d | ||
|
|
b7d39cf4c9 | ||
|
|
de2c3c9800 | ||
|
|
6e525a93d7 | ||
|
|
90cdef5232 | ||
|
|
fb19841997 | ||
|
|
40d16c925f | ||
|
|
d5f73b47a4 | ||
|
|
caf738ee4e | ||
|
|
c6f27200fe | ||
|
|
e3e13cdb11 | ||
|
|
5c967cd6ef | ||
|
|
db3369fd09 | ||
|
|
35086d4a69 | ||
|
|
adaac03d1d | ||
|
|
199cccaef9 | ||
|
|
e64277ed41 | ||
|
|
744b4915c9 | ||
|
|
5d9ccf1f76 | ||
|
|
15607d63ab | ||
|
|
362db6898a | ||
|
|
70b4546c33 | ||
|
|
791afd7ac8 | ||
|
|
6f352283e6 | ||
|
|
db85fbab4f | ||
|
|
20cc23adc5 | ||
|
|
828819e13f | ||
|
|
79d94144c6 | ||
|
|
c46a1d2b44 | ||
|
|
7a18fbf9d4 | ||
|
|
7d62156a29 | ||
|
|
a59e1054fe | ||
|
|
db45a06ba7 | ||
|
|
c739caee40 | ||
|
|
bdf6b69be6 | ||
|
|
fdd6f7434b | ||
|
|
def8130a24 | ||
|
|
21adb2b9d1 | ||
|
|
5929c0652c | ||
|
|
e7bf5bfceb | ||
|
|
c2f55fad12 | ||
|
|
7ac4cf47f7 | ||
|
|
3f49a224f5 | ||
|
|
695f1bf55a | ||
|
|
10f5a54e1d | ||
|
|
042654ee97 | ||
|
|
1da450001c | ||
|
|
7996e2efe7 | ||
|
|
5eded3c5cc | ||
|
|
cdae840519 | ||
|
|
fcb8c4a293 | ||
|
|
4e6b475ff6 | ||
|
|
02a8ad1ea4 | ||
|
|
393b332f9c | ||
|
|
bf4a8dcd3e | ||
|
|
9bde11fa6f | ||
|
|
43c6eec30b | ||
|
|
f7cd52826e | ||
|
|
23d31c3c2c | ||
|
|
732b47e845 |
86
.github/workflows/build.yml
vendored
86
.github/workflows/build.yml
vendored
@@ -14,38 +14,64 @@ jobs:
|
||||
- os: ubuntu-16.04
|
||||
# use old linux so that the shared library versioning is more portable
|
||||
artifact_name: capa
|
||||
asset_name: capa-linux
|
||||
asset_name: linux
|
||||
- os: windows-latest
|
||||
artifact_name: capa.exe
|
||||
asset_name: capa-windows.exe
|
||||
asset_name: windows
|
||||
- os: macos-latest
|
||||
artifact_name: capa
|
||||
asset_name: capa-macos
|
||||
asset_name: macos
|
||||
steps:
|
||||
- name: Checkout capa
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: true
|
||||
- name: Set up Python 2.7
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 2.7
|
||||
- name: Install PyInstaller
|
||||
run: pip install pyinstaller
|
||||
- name: Install capa
|
||||
run: pip install -e .
|
||||
- name: Build standalone executable
|
||||
run: pyinstaller .github/pyinstaller/pyinstaller.spec
|
||||
- name: Does it run?
|
||||
run: dist/capa "tests/data/Practical Malware Analysis Lab 01-01.dll_"
|
||||
- uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ matrix.asset_name }}
|
||||
path: dist/${{ matrix.artifact_name }}
|
||||
- name: Upload binaries to GH Release
|
||||
uses: svenstaro/upload-release-action@v2
|
||||
with:
|
||||
repo_token: ${{ secrets.CAPA_TOKEN }}
|
||||
file: dist/${{ matrix.artifact_name }}
|
||||
asset_name: ${{ matrix.asset_name }}
|
||||
tag: ${{ github.ref }}
|
||||
- name: Checkout capa
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: true
|
||||
- name: Set up Python 2.7
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 2.7
|
||||
- name: Install PyInstaller
|
||||
# pyinstaller 4 doesn't support Python 2.7
|
||||
run: pip install 'pyinstaller==3.*'
|
||||
- name: Install capa
|
||||
run: pip install -e .
|
||||
- name: Build standalone executable
|
||||
run: pyinstaller .github/pyinstaller/pyinstaller.spec
|
||||
- name: Does it run?
|
||||
run: dist/capa "tests/data/Practical Malware Analysis Lab 01-01.dll_"
|
||||
- uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ matrix.asset_name }}
|
||||
path: dist/${{ matrix.artifact_name }}
|
||||
|
||||
zip:
|
||||
name: zip ${{ matrix.asset_name }}
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- asset_name: linux
|
||||
artifact_name: capa
|
||||
- asset_name: windows
|
||||
artifact_name: capa.exe
|
||||
- asset_name: macos
|
||||
artifact_name: capa
|
||||
steps:
|
||||
- name: Download ${{ matrix.asset_name }}
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: ${{ matrix.asset_name }}
|
||||
- name: Set executable flag
|
||||
run: chmod +x ${{ matrix.artifact_name }}
|
||||
- name: Set zip name
|
||||
run: echo ::set-env name=zip_name::capa-${GITHUB_REF#refs/tags/}-${{ matrix.asset_name }}.zip
|
||||
- name: Zip ${{ matrix.artifact_name }} into ${{ env.zip_name }}
|
||||
run: zip ${{ env.zip_name }} ${{ matrix.artifact_name }}
|
||||
- name: Upload ${{ env.zip_name }} to GH Release
|
||||
uses: svenstaro/upload-release-action@v2
|
||||
with:
|
||||
repo_token: ${{ secrets.GITHUB_TOKEN}}
|
||||
file: ${{ env.zip_name }}
|
||||
tag: ${{ github.ref }}
|
||||
|
||||
|
||||
16
.github/workflows/tests.yml
vendored
16
.github/workflows/tests.yml
vendored
@@ -41,19 +41,29 @@ jobs:
|
||||
run: python scripts/lint.py rules/
|
||||
|
||||
tests:
|
||||
name: Tests in ${{ matrix.python }}
|
||||
runs-on: ubuntu-latest
|
||||
needs: [code_style, rule_linter]
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- python: 2.7
|
||||
- python: 3.6
|
||||
- python: 3.7
|
||||
- python: 3.8
|
||||
- python: '3.9.0-alpha - 3.9.x' # Python latest
|
||||
steps:
|
||||
- name: Checkout capa with submodules
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: true
|
||||
- name: Set up Python 2.7
|
||||
- name: Set up Python ${{ matrix.python }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 2.7
|
||||
python-version: ${{ matrix.python }}
|
||||
- name: Install capa
|
||||
run: pip install -e .[dev]
|
||||
# TODO: remove `pefile` when we bump lancelot >= 0.3.7
|
||||
run: pip install -e .[dev] pefile
|
||||
- name: Run tests
|
||||
run: pytest tests/
|
||||
|
||||
|
||||
18
CHANGELOG.md
18
CHANGELOG.md
@@ -38,15 +38,15 @@ Download a standalone binary below and checkout the readme [here on GitHub](http
|
||||
- hash data using sha1 via x86 extensions @re-fox
|
||||
- hash data using sha256 via x86 extensions @re-fox
|
||||
- capture network configuration via ipconfig @re-fox
|
||||
- hash data via WinCrypt @michael-hunhoff
|
||||
- get file attributes @michael-hunhoff
|
||||
- allocate thread local storage @michael-hunhoff
|
||||
- get thread local storage value @michael-hunhoff
|
||||
- set thread local storage @michael-hunhoff
|
||||
- get session integrity level @michael-hunhoff
|
||||
- add file to cabinet file @michael-hunhoff
|
||||
- flush cabinet file @michael-hunhoff
|
||||
- open cabinet file @michael-hunhoff
|
||||
- hash data via WinCrypt @mike-hunhoff
|
||||
- get file attributes @mike-hunhoff
|
||||
- allocate thread local storage @mike-hunhoff
|
||||
- get thread local storage value @mike-hunhoff
|
||||
- set thread local storage @mike-hunhoff
|
||||
- get session integrity level @mike-hunhoff
|
||||
- add file to cabinet file @mike-hunhoff
|
||||
- flush cabinet file @mike-hunhoff
|
||||
- open cabinet file @mike-hunhoff
|
||||
- gather firefox profile information @re-fox
|
||||
- encrypt data using skipjack @re-fox
|
||||
- encrypt data using camellia @re-fox
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||

|
||||
|
||||
[](https://github.com/fireeye/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster)
|
||||
[](https://github.com/fireeye/capa-rules)
|
||||
[](https://github.com/fireeye/capa-rules)
|
||||
[](LICENSE.txt)
|
||||
|
||||
capa detects capabilities in executable files.
|
||||
|
||||
@@ -161,7 +161,7 @@ class Regex(String):
|
||||
|
||||
|
||||
class StringFactory(object):
|
||||
def __new__(self, value, description):
|
||||
def __new__(self, value, description=None):
|
||||
if value.startswith("/") and (value.endswith("/") or value.endswith("/i")):
|
||||
return Regex(value, description=description)
|
||||
return String(value, description=description)
|
||||
|
||||
@@ -196,7 +196,7 @@ class NullFeatureExtractor(FeatureExtractor):
|
||||
'functions': {
|
||||
0x401000: {
|
||||
'features': [
|
||||
(0x401000, capa.features.Characteristic('switch')),
|
||||
(0x401000, capa.features.Characteristic('nzxor')),
|
||||
],
|
||||
'basic blocks': {
|
||||
0x401000: {
|
||||
|
||||
@@ -14,16 +14,6 @@ from capa.features import Characteristic
|
||||
from capa.features.extractors import loops
|
||||
|
||||
|
||||
def extract_function_switch(f):
|
||||
""" extract switch indicators from a function
|
||||
|
||||
arg:
|
||||
f (IDA func_t)
|
||||
"""
|
||||
if capa.features.extractors.ida.helpers.is_function_switch_statement(f):
|
||||
yield Characteristic("switch"), f.start_ea
|
||||
|
||||
|
||||
def extract_function_calls_to(f):
|
||||
""" extract callers to a function
|
||||
|
||||
@@ -72,7 +62,7 @@ def extract_features(f):
|
||||
yield feature, ea
|
||||
|
||||
|
||||
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_switch, extract_function_loop, extract_recursive_call)
|
||||
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call)
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
@@ -300,22 +300,6 @@ def is_function_recursive(f):
|
||||
return False
|
||||
|
||||
|
||||
def is_function_switch_statement(f):
|
||||
""" check a function for switch statement indicators
|
||||
|
||||
adapted from:
|
||||
https://reverseengineering.stackexchange.com/questions/17548/calc-switch-cases-in-idapython-cant-iterate-over-results?rq=1
|
||||
|
||||
arg:
|
||||
f (IDA func_t)
|
||||
"""
|
||||
for (start, end) in idautils.Chunks(f.start_ea):
|
||||
for head in idautils.Heads(start, end):
|
||||
if idaapi.get_switch_info(head):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def is_basic_block_tight_loop(bb):
|
||||
""" check basic block loops to self
|
||||
|
||||
@@ -331,3 +315,29 @@ def is_basic_block_tight_loop(bb):
|
||||
if ref == bb.start_ea:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def find_data_reference_from_insn(insn, max_depth=10):
|
||||
""" search for data reference from instruction, return address of instruction if no reference exists """
|
||||
depth = 0
|
||||
ea = insn.ea
|
||||
|
||||
while True:
|
||||
data_refs = list(idautils.DataRefsFrom(ea))
|
||||
|
||||
if len(data_refs) != 1:
|
||||
# break if no refs or more than one ref (assume nested pointers only have one data reference)
|
||||
break
|
||||
|
||||
if ea == data_refs[0]:
|
||||
# break if circular reference
|
||||
break
|
||||
|
||||
depth += 1
|
||||
if depth > max_depth:
|
||||
# break if max depth
|
||||
break
|
||||
|
||||
ea = data_refs[0]
|
||||
|
||||
return ea
|
||||
|
||||
@@ -98,8 +98,11 @@ def extract_insn_number_features(f, bb, insn):
|
||||
# .text:00401145 add esp, 0Ch
|
||||
return
|
||||
|
||||
for op in capa.features.extractors.ida.helpers.get_insn_ops(insn, target_ops=(idaapi.o_imm,)):
|
||||
const = capa.features.extractors.ida.helpers.mask_op_val(op)
|
||||
for op in capa.features.extractors.ida.helpers.get_insn_ops(insn, target_ops=(idaapi.o_imm, idaapi.o_mem)):
|
||||
if op.type == idaapi.o_imm:
|
||||
const = capa.features.extractors.ida.helpers.mask_op_val(op)
|
||||
else:
|
||||
const = op.addr
|
||||
if not idaapi.is_mapped(const):
|
||||
yield Number(const), insn.ea
|
||||
yield Number(const, arch=get_arch(f.ctx)), insn.ea
|
||||
@@ -116,11 +119,8 @@ def extract_insn_bytes_features(f, bb, insn):
|
||||
example:
|
||||
push offset iid_004118d4_IShellLinkA ; riid
|
||||
"""
|
||||
if idaapi.is_call_insn(insn):
|
||||
# ignore call instructions
|
||||
return
|
||||
|
||||
for ref in idautils.DataRefsFrom(insn.ea):
|
||||
ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
|
||||
if ref != insn.ea:
|
||||
extracted_bytes = capa.features.extractors.ida.helpers.read_bytes_at(ref, MAX_BYTES_FEATURE_SIZE)
|
||||
if extracted_bytes and not capa.features.extractors.helpers.all_zeros(extracted_bytes):
|
||||
yield Bytes(extracted_bytes), insn.ea
|
||||
@@ -137,7 +137,8 @@ def extract_insn_string_features(f, bb, insn):
|
||||
example:
|
||||
push offset aAcr ; "ACR > "
|
||||
"""
|
||||
for ref in idautils.DataRefsFrom(insn.ea):
|
||||
ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
|
||||
if ref != insn.ea:
|
||||
found = capa.features.extractors.ida.helpers.find_string_at(ref)
|
||||
if found:
|
||||
yield String(found), insn.ea
|
||||
|
||||
92
capa/features/extractors/lancelot/__init__.py
Normal file
92
capa/features/extractors/lancelot/__init__.py
Normal file
@@ -0,0 +1,92 @@
|
||||
# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
|
||||
import lancelot
|
||||
|
||||
import capa.features.extractors
|
||||
import capa.features.extractors.lancelot.file
|
||||
import capa.features.extractors.lancelot.insn
|
||||
import capa.features.extractors.lancelot.function
|
||||
import capa.features.extractors.lancelot.basicblock
|
||||
|
||||
__all__ = ["file", "function", "basicblock", "insn"]
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BB(object):
|
||||
"""extend the lancelot.BasicBlock with an __int__ method to access the address"""
|
||||
|
||||
def __init__(self, ws, bb):
|
||||
super(BB, self).__init__()
|
||||
self.ws = ws
|
||||
self.address = bb.address
|
||||
self.length = bb.length
|
||||
self.predecessors = bb.predecessors
|
||||
self.successors = bb.successors
|
||||
|
||||
def __int__(self):
|
||||
return self.address
|
||||
|
||||
@property
|
||||
def instructions(self):
|
||||
va = self.address
|
||||
while va < self.address + self.length:
|
||||
try:
|
||||
insn = self.ws.read_insn(va)
|
||||
except ValueError:
|
||||
logger.warning("failed to read instruction at 0x%x", va)
|
||||
return
|
||||
|
||||
yield insn
|
||||
va += insn.length
|
||||
|
||||
|
||||
class LancelotFeatureExtractor(capa.features.extractors.FeatureExtractor):
|
||||
def __init__(self, buf):
|
||||
super(LancelotFeatureExtractor, self).__init__()
|
||||
self.buf = buf
|
||||
self.ws = lancelot.from_bytes(buf)
|
||||
self.ctx = {}
|
||||
|
||||
def get_base_address(self):
|
||||
return self.ws.base_address
|
||||
|
||||
def extract_file_features(self):
|
||||
for feature, va in capa.features.extractors.lancelot.file.extract_file_features(self.buf):
|
||||
yield feature, va
|
||||
|
||||
def get_functions(self):
|
||||
for va in self.ws.get_functions():
|
||||
# this is just the address of the function
|
||||
yield va
|
||||
|
||||
def extract_function_features(self, f):
|
||||
for feature, va in capa.features.extractors.lancelot.function.extract_function_features(self.ws, f):
|
||||
yield feature, va
|
||||
|
||||
def get_basic_blocks(self, f):
|
||||
try:
|
||||
cfg = self.ws.build_cfg(f)
|
||||
except:
|
||||
logger.warning("failed to build CFG for 0x%x", f)
|
||||
return
|
||||
else:
|
||||
for bb in cfg.basic_blocks.values():
|
||||
yield BB(self.ws, bb)
|
||||
|
||||
def extract_basic_block_features(self, f, bb):
|
||||
for feature, va in capa.features.extractors.lancelot.basicblock.extract_basic_block_features(self.ws, bb):
|
||||
yield feature, va
|
||||
|
||||
def get_instructions(self, f, bb):
|
||||
return bb.instructions
|
||||
|
||||
def extract_insn_features(self, f, bb, insn):
|
||||
for feature, va in capa.features.extractors.lancelot.insn.extract_insn_features(self, f, bb, insn):
|
||||
yield feature, va
|
||||
120
capa/features/extractors/lancelot/basicblock.py
Normal file
120
capa/features/extractors/lancelot/basicblock.py
Normal file
@@ -0,0 +1,120 @@
|
||||
import string
|
||||
import struct
|
||||
import logging
|
||||
|
||||
from lancelot import (
|
||||
FLOW_VA,
|
||||
OPERAND_SIZE,
|
||||
OPERAND_TYPE,
|
||||
MEMORY_OPERAND_BASE,
|
||||
OPERAND_TYPE_MEMORY,
|
||||
OPERAND_TYPE_IMMEDIATE,
|
||||
IMMEDIATE_OPERAND_VALUE,
|
||||
)
|
||||
|
||||
from capa.features import Characteristic
|
||||
from capa.features.basicblock import BasicBlock
|
||||
from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_bb_tight_loop(ws, bb):
|
||||
""" check basic block for tight loop indicators """
|
||||
if bb.address in map(lambda flow: flow[FLOW_VA], bb.successors):
|
||||
yield Characteristic("tight loop"), bb.address
|
||||
|
||||
|
||||
def is_mov_imm_to_stack(insn):
|
||||
if not insn.mnemonic.startswith("mov"):
|
||||
return False
|
||||
|
||||
try:
|
||||
dst, src = insn.operands
|
||||
except ValueError:
|
||||
# not two operands
|
||||
return False
|
||||
|
||||
if src[OPERAND_TYPE] != OPERAND_TYPE_IMMEDIATE:
|
||||
return False
|
||||
|
||||
if src[IMMEDIATE_OPERAND_VALUE] < 0:
|
||||
return False
|
||||
|
||||
if dst[OPERAND_TYPE] != OPERAND_TYPE_MEMORY:
|
||||
return False
|
||||
|
||||
if dst[MEMORY_OPERAND_BASE] not in ("ebp", "rbp", "esp", "rsp"):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def is_printable_ascii(chars):
|
||||
return all(c < 127 and chr(c) in string.printable for c in chars)
|
||||
|
||||
|
||||
def is_printable_utf16le(chars):
|
||||
if all(c == b"\x00" for c in chars[1::2]):
|
||||
return is_printable_ascii(chars[::2])
|
||||
|
||||
|
||||
def get_printable_len(operand):
|
||||
"""
|
||||
Return string length if all operand bytes are ascii or utf16-le printable
|
||||
"""
|
||||
operand_size = operand[OPERAND_SIZE]
|
||||
if operand_size == 8:
|
||||
chars = struct.pack("<B", operand[IMMEDIATE_OPERAND_VALUE])
|
||||
elif operand_size == 16:
|
||||
chars = struct.pack("<H", operand[IMMEDIATE_OPERAND_VALUE])
|
||||
elif operand_size == 32:
|
||||
chars = struct.pack("<I", operand[IMMEDIATE_OPERAND_VALUE])
|
||||
elif operand_size == 64:
|
||||
chars = struct.pack("<Q", operand[IMMEDIATE_OPERAND_VALUE])
|
||||
else:
|
||||
raise ValueError("unexpected operand size: " + str(operand_size))
|
||||
|
||||
if is_printable_ascii(chars):
|
||||
return operand_size / 8
|
||||
if is_printable_utf16le(chars):
|
||||
return operand_size / 16
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def _bb_has_stackstring(ws, bb):
|
||||
"""
|
||||
extract potential stackstring creation, using the following heuristics:
|
||||
- basic block contains enough moves of constant bytes to the stack
|
||||
"""
|
||||
count = 0
|
||||
for insn in bb.instructions:
|
||||
if is_mov_imm_to_stack(insn):
|
||||
# add number of operand bytes
|
||||
src = insn.operands[1]
|
||||
count += get_printable_len(src)
|
||||
|
||||
if count > MIN_STACKSTRING_LEN:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def extract_stackstring(ws, bb):
|
||||
""" check basic block for stackstring indicators """
|
||||
if _bb_has_stackstring(ws, bb):
|
||||
yield Characteristic("stack string"), bb.address
|
||||
|
||||
|
||||
def extract_basic_block_features(ws, bb):
|
||||
yield BasicBlock(), bb.address
|
||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||
for feature, va in bb_handler(ws, bb):
|
||||
yield feature, va
|
||||
|
||||
|
||||
BASIC_BLOCK_HANDLERS = (
|
||||
extract_bb_tight_loop,
|
||||
extract_stackstring,
|
||||
)
|
||||
81
capa/features/extractors/lancelot/file.py
Normal file
81
capa/features/extractors/lancelot/file.py
Normal file
@@ -0,0 +1,81 @@
|
||||
import pefile
|
||||
|
||||
import capa.features.extractors.strings
|
||||
from capa.features import String, Characteristic
|
||||
from capa.features.file import Export, Import, Section
|
||||
|
||||
|
||||
def extract_file_embedded_pe(buf, pe):
|
||||
buf = buf[2:]
|
||||
|
||||
total_offset = 2
|
||||
while True:
|
||||
try:
|
||||
offset = buf.index(b"MZ")
|
||||
except ValueError:
|
||||
return
|
||||
else:
|
||||
rest = buf[offset:]
|
||||
total_offset += offset
|
||||
|
||||
try:
|
||||
_ = pefile.PE(data=rest)
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
yield Characteristic("embedded pe"), total_offset
|
||||
|
||||
buf = rest[2:]
|
||||
total_offset += 2
|
||||
|
||||
|
||||
def extract_file_export_names(buf, pe):
|
||||
if not hasattr(pe, "DIRECTORY_ENTRY_EXPORT"):
|
||||
return
|
||||
|
||||
base_address = pe.OPTIONAL_HEADER.ImageBase
|
||||
for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols:
|
||||
yield Export(exp.name.decode("ascii")), base_address + exp.address
|
||||
|
||||
|
||||
def extract_file_import_names(buf, pe):
|
||||
base_address = pe.OPTIONAL_HEADER.ImageBase
|
||||
for entry in pe.DIRECTORY_ENTRY_IMPORT:
|
||||
libname = entry.dll.decode("ascii").lower().partition(".")[0]
|
||||
for imp in entry.imports:
|
||||
if imp.ordinal:
|
||||
yield Import("%s.#%s" % (libname, imp.ordinal)), imp.address
|
||||
else:
|
||||
impname = imp.name.decode("ascii")
|
||||
yield Import("%s.%s" % (libname, impname)), imp.address
|
||||
yield Import("%s" % (impname)), imp.address
|
||||
|
||||
|
||||
def extract_file_section_names(buf, pe):
|
||||
base_address = pe.OPTIONAL_HEADER.ImageBase
|
||||
for section in pe.sections:
|
||||
yield Section(section.Name.partition(b"\x00")[0].decode("ascii")), base_address + section.VirtualAddress
|
||||
|
||||
|
||||
def extract_file_strings(buf, pe):
|
||||
for s in capa.features.extractors.strings.extract_ascii_strings(buf):
|
||||
yield String(s.s), s.offset
|
||||
|
||||
for s in capa.features.extractors.strings.extract_unicode_strings(buf):
|
||||
yield String(s.s), s.offset
|
||||
|
||||
|
||||
def extract_file_features(buf):
|
||||
pe = pefile.PE(data=buf)
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, va in file_handler(buf, pe):
|
||||
yield feature, va
|
||||
|
||||
|
||||
FILE_HANDLERS = (
|
||||
extract_file_embedded_pe,
|
||||
extract_file_export_names,
|
||||
extract_file_import_names,
|
||||
extract_file_section_names,
|
||||
extract_file_strings,
|
||||
)
|
||||
64
capa/features/extractors/lancelot/function.py
Normal file
64
capa/features/extractors/lancelot/function.py
Normal file
@@ -0,0 +1,64 @@
|
||||
import logging
|
||||
|
||||
try:
|
||||
from functools import lru_cache
|
||||
except ImportError:
|
||||
from backports.functools_lru_cache import lru_cache
|
||||
|
||||
from lancelot import (
|
||||
FLOW_VA,
|
||||
FLOW_TYPE,
|
||||
FLOW_TYPE_CONDITIONAL_JUMP,
|
||||
FLOW_TYPE_CONDITIONAL_MOVE,
|
||||
FLOW_TYPE_UNCONDITIONAL_JUMP,
|
||||
)
|
||||
|
||||
from capa.features import Characteristic
|
||||
from capa.features.extractors import loops
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_call_graph(ws):
|
||||
return ws.build_call_graph()
|
||||
|
||||
|
||||
def extract_function_calls_to(ws, f):
|
||||
cg = get_call_graph(ws)
|
||||
|
||||
for caller in cg.calls_to.get(f, []):
|
||||
yield Characteristic("calls to"), caller
|
||||
|
||||
|
||||
def extract_function_loop(ws, f):
|
||||
edges = []
|
||||
for bb in ws.build_cfg(f).basic_blocks.values():
|
||||
for flow in bb.successors:
|
||||
if flow[FLOW_TYPE] in (
|
||||
FLOW_TYPE_UNCONDITIONAL_JUMP,
|
||||
FLOW_TYPE_CONDITIONAL_JUMP,
|
||||
FLOW_TYPE_CONDITIONAL_MOVE,
|
||||
):
|
||||
edges.append((bb.address, flow[FLOW_VA]))
|
||||
continue
|
||||
|
||||
if edges and loops.has_loop(edges):
|
||||
yield Characteristic("loop"), f
|
||||
|
||||
|
||||
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop)
|
||||
|
||||
|
||||
_not_implemented = set([])
|
||||
|
||||
|
||||
def extract_function_features(ws, f):
|
||||
for func_handler in FUNCTION_HANDLERS:
|
||||
try:
|
||||
for feature, va in func_handler(ws, f):
|
||||
yield feature, va
|
||||
except NotImplementedError:
|
||||
if func_handler.__name__ not in _not_implemented:
|
||||
logger.warning("not implemented: %s", func_handler.__name__)
|
||||
_not_implemented.add(func_handler.__name__)
|
||||
33
capa/features/extractors/lancelot/helpers.py
Normal file
33
capa/features/extractors/lancelot/helpers.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from lancelot import (
|
||||
OPERAND_TYPE,
|
||||
MEMORY_OPERAND_BASE,
|
||||
MEMORY_OPERAND_DISP,
|
||||
OPERAND_TYPE_MEMORY,
|
||||
OPERAND_TYPE_IMMEDIATE,
|
||||
IMMEDIATE_OPERAND_VALUE,
|
||||
IMMEDIATE_OPERAND_IS_RELATIVE,
|
||||
)
|
||||
|
||||
|
||||
def get_operand_target(insn, op):
|
||||
if op[OPERAND_TYPE] == OPERAND_TYPE_MEMORY:
|
||||
# call direct, x64
|
||||
# rip relative
|
||||
# kernel32-64:180001041 call cs:__imp_RtlVirtualUnwind_0
|
||||
if op[MEMORY_OPERAND_BASE] == "rip":
|
||||
return op[MEMORY_OPERAND_DISP] + insn.address + insn.length
|
||||
|
||||
# call direct, x32
|
||||
# mimikatz:0x403BD3 call ds:CryptAcquireContextW
|
||||
elif op[MEMORY_OPERAND_BASE] == None:
|
||||
return op[MEMORY_OPERAND_DISP]
|
||||
|
||||
# call via thunk
|
||||
# mimikatz:0x455A41 call LsaQueryInformationPolicy
|
||||
elif op[OPERAND_TYPE] == OPERAND_TYPE_IMMEDIATE and op[IMMEDIATE_OPERAND_IS_RELATIVE]:
|
||||
return op[IMMEDIATE_OPERAND_VALUE] + insn.address + insn.length
|
||||
|
||||
elif op[OPERAND_TYPE] == OPERAND_TYPE_IMMEDIATE:
|
||||
return op[IMMEDIATE_OPERAND_VALUE]
|
||||
|
||||
raise ValueError("memory operand has no target")
|
||||
149
capa/features/extractors/lancelot/indirect_calls.py
Normal file
149
capa/features/extractors/lancelot/indirect_calls.py
Normal file
@@ -0,0 +1,149 @@
|
||||
# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import collections
|
||||
|
||||
from lancelot import (
|
||||
FLOW_VA,
|
||||
OPERAND_TYPE,
|
||||
PERMISSION_READ,
|
||||
MEMORY_OPERAND_BASE,
|
||||
MEMORY_OPERAND_DISP,
|
||||
OPERAND_TYPE_MEMORY,
|
||||
MEMORY_OPERAND_INDEX,
|
||||
OPERAND_TYPE_REGISTER,
|
||||
MEMORY_OPERAND_SEGMENT,
|
||||
OPERAND_TYPE_IMMEDIATE,
|
||||
IMMEDIATE_OPERAND_VALUE,
|
||||
REGISTER_OPERAND_REGISTER,
|
||||
IMMEDIATE_OPERAND_IS_RELATIVE,
|
||||
)
|
||||
|
||||
from capa.features.extractors.lancelot.helpers import get_operand_target
|
||||
|
||||
DESTRUCTIVE_MNEMONICS = ("mov", "lea", "pop", "xor")
|
||||
|
||||
|
||||
class NotFoundError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def read_instructions(ws, bb):
|
||||
va = bb.address
|
||||
while va < bb.address + bb.length:
|
||||
try:
|
||||
insn = ws.read_insn(va)
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
yield insn
|
||||
va += insn.length
|
||||
|
||||
|
||||
def build_instruction_predecessors(ws, cfg):
|
||||
preds = collections.defaultdict(set)
|
||||
|
||||
for bb in cfg.basic_blocks.values():
|
||||
insns = list(read_instructions(ws, bb))
|
||||
|
||||
for i, insn in enumerate(insns):
|
||||
if i == 0:
|
||||
for pred in bb.predecessors:
|
||||
pred_bb = cfg.basic_blocks[pred[FLOW_VA]]
|
||||
preds[insn.address].add(list(read_instructions(ws, pred_bb))[-1].address)
|
||||
else:
|
||||
preds[insn.address].add(insns[i - 1].address)
|
||||
|
||||
return preds
|
||||
|
||||
|
||||
def find_definition(ws, f, insn):
|
||||
"""
|
||||
scan backwards from the given address looking for assignments to the given register.
|
||||
if a constant, return that value.
|
||||
args:
|
||||
ws (lancelot.PE)
|
||||
f (int): the function start address
|
||||
insn (lancelot.Instruction): call instruction to resolve
|
||||
returns:
|
||||
(va: int, value?: int|None): the address of the assignment and the value, if a constant.
|
||||
raises:
|
||||
NotFoundError: when the definition cannot be found.
|
||||
"""
|
||||
assert insn.mnemonic == "call"
|
||||
op0 = insn.operands[0]
|
||||
assert op0[OPERAND_TYPE] == OPERAND_TYPE_REGISTER
|
||||
reg = op0[REGISTER_OPERAND_REGISTER]
|
||||
|
||||
cfg = ws.build_cfg(f)
|
||||
preds = build_instruction_predecessors(ws, cfg)
|
||||
|
||||
q = collections.deque()
|
||||
seen = set([])
|
||||
q.extend(preds[insn.address])
|
||||
while q:
|
||||
cur = q.popleft()
|
||||
|
||||
# skip if we've already processed this location
|
||||
if cur in seen:
|
||||
continue
|
||||
seen.add(cur)
|
||||
|
||||
insn = ws.read_insn(cur)
|
||||
operands = insn.operands
|
||||
|
||||
if len(operands) == 0:
|
||||
q.extend(preds[cur])
|
||||
continue
|
||||
|
||||
op0 = operands[0]
|
||||
if not (
|
||||
op0[OPERAND_TYPE] == OPERAND_TYPE_REGISTER
|
||||
and op0[REGISTER_OPERAND_REGISTER] == reg
|
||||
and insn.mnemonic in DESTRUCTIVE_MNEMONICS
|
||||
):
|
||||
q.extend(preds[cur])
|
||||
continue
|
||||
|
||||
# if we reach here, the instruction is destructive to our target register.
|
||||
|
||||
# we currently only support extracting the constant from something like: `mov $reg, IAT`
|
||||
# so, any other pattern results in an unknown value, represented by None.
|
||||
# this is a good place to extend in the future, if we need more robust support.
|
||||
if insn.mnemonic != "mov":
|
||||
return (cur, None)
|
||||
else:
|
||||
op1 = operands[1]
|
||||
try:
|
||||
target = get_operand_target(insn, op1)
|
||||
except ValueError:
|
||||
return (cur, None)
|
||||
else:
|
||||
return (cur, target)
|
||||
|
||||
raise NotFoundError()
|
||||
|
||||
|
||||
def is_indirect_call(insn):
|
||||
return insn.mnemonic == "call" and insn.operands[0][OPERAND_TYPE] == OPERAND_TYPE_REGISTER
|
||||
|
||||
|
||||
def resolve_indirect_call(ws, f, insn):
|
||||
"""
|
||||
inspect the given indirect call instruction and attempt to resolve the target address.
|
||||
args:
|
||||
ws (lancelot.PE): the analysis workspace
|
||||
f (int): the address of the function to analyze
|
||||
insn (lancelot.Instruction): the instruction at which to start analysis
|
||||
returns:
|
||||
(va: int, value?: int|None): the address of the assignment and the value, if a constant.
|
||||
raises:
|
||||
NotFoundError: when the definition cannot be found.
|
||||
"""
|
||||
assert is_indirect_call(insn)
|
||||
return find_definition(ws, f, insn)
|
||||
487
capa/features/extractors/lancelot/insn.py
Normal file
487
capa/features/extractors/lancelot/insn.py
Normal file
@@ -0,0 +1,487 @@
|
||||
import logging
|
||||
import itertools
|
||||
|
||||
import pefile
|
||||
|
||||
try:
|
||||
from functools import lru_cache
|
||||
except ImportError:
|
||||
from backports.functools_lru_cache import lru_cache
|
||||
|
||||
from lancelot import (
|
||||
OPERAND_TYPE,
|
||||
PERMISSION_READ,
|
||||
MEMORY_OPERAND_BASE,
|
||||
MEMORY_OPERAND_DISP,
|
||||
OPERAND_TYPE_MEMORY,
|
||||
MEMORY_OPERAND_INDEX,
|
||||
OPERAND_TYPE_REGISTER,
|
||||
MEMORY_OPERAND_SEGMENT,
|
||||
OPERAND_TYPE_IMMEDIATE,
|
||||
IMMEDIATE_OPERAND_VALUE,
|
||||
REGISTER_OPERAND_REGISTER,
|
||||
IMMEDIATE_OPERAND_IS_RELATIVE,
|
||||
)
|
||||
|
||||
import capa.features.extractors.helpers
|
||||
from capa.features import ARCH_X32, ARCH_X64, MAX_BYTES_FEATURE_SIZE, Bytes, String, Characteristic
|
||||
from capa.features.insn import Number, Offset, Mnemonic
|
||||
from capa.features.extractors.lancelot.helpers import get_operand_target
|
||||
from capa.features.extractors.lancelot.function import get_call_graph
|
||||
from capa.features.extractors.lancelot.indirect_calls import NotFoundError, resolve_indirect_call
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
|
||||
# byte range within the first and returning basic blocks, this helps to reduce FP features
|
||||
SECURITY_COOKIE_BYTES_DELTA = 0x40
|
||||
|
||||
|
||||
def get_arch(ws):
|
||||
if ws.arch == "x32":
|
||||
return ARCH_X32
|
||||
elif ws.arch == "x64":
|
||||
return ARCH_X64
|
||||
else:
|
||||
raise ValueError("unexpected architecture")
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_pefile(xtor):
|
||||
return pefile.PE(data=xtor.buf)
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_imports(xtor):
|
||||
pe = get_pefile(xtor)
|
||||
|
||||
imports = {}
|
||||
for entry in pe.DIRECTORY_ENTRY_IMPORT:
|
||||
libname = entry.dll.decode("ascii").lower().partition(".")[0]
|
||||
for imp in entry.imports:
|
||||
if imp.ordinal:
|
||||
imports[imp.address] = "%s.#%s" % (libname, imp.ordinal)
|
||||
else:
|
||||
impname = imp.name.decode("ascii")
|
||||
imports[imp.address] = "%s.%s" % (libname, impname)
|
||||
return imports
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_thunks(xtor):
|
||||
thunks = {}
|
||||
for va in xtor.ws.get_functions():
|
||||
try:
|
||||
insn = xtor.ws.read_insn(va)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
if insn.mnemonic != "jmp":
|
||||
continue
|
||||
|
||||
op0 = insn.operands[0]
|
||||
|
||||
try:
|
||||
target = get_operand_target(insn, op0)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
imports = get_imports(xtor)
|
||||
if target not in imports:
|
||||
continue
|
||||
|
||||
thunks[va] = imports[target]
|
||||
|
||||
return thunks
|
||||
|
||||
|
||||
def extract_insn_api_features(xtor, f, bb, insn):
|
||||
"""parse API features from the given instruction."""
|
||||
|
||||
if insn.mnemonic != "call":
|
||||
return
|
||||
|
||||
op0 = insn.operands[0]
|
||||
|
||||
if op0[OPERAND_TYPE] == OPERAND_TYPE_REGISTER:
|
||||
try:
|
||||
(_, target) = resolve_indirect_call(xtor.ws, f, insn)
|
||||
except NotFoundError:
|
||||
return
|
||||
if target is None:
|
||||
return
|
||||
else:
|
||||
try:
|
||||
target = get_operand_target(insn, op0)
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
imports = get_imports(xtor)
|
||||
if target in imports:
|
||||
for feature, va in capa.features.extractors.helpers.generate_api_features(imports[target], insn.address):
|
||||
yield feature, va
|
||||
return
|
||||
|
||||
thunks = get_thunks(xtor)
|
||||
if target in thunks:
|
||||
for feature, va in capa.features.extractors.helpers.generate_api_features(thunks[target], insn.address):
|
||||
yield feature, va
|
||||
|
||||
|
||||
def extract_insn_mnemonic_features(xtor, f, bb, insn):
|
||||
"""parse mnemonic features from the given instruction."""
|
||||
yield Mnemonic(insn.mnemonic), insn.address
|
||||
|
||||
|
||||
def extract_insn_number_features(xtor, f, bb, insn):
|
||||
"""parse number features from the given instruction."""
|
||||
operands = insn.operands
|
||||
|
||||
for operand in operands:
|
||||
if operand[OPERAND_TYPE] != OPERAND_TYPE_IMMEDIATE:
|
||||
continue
|
||||
|
||||
v = operand[IMMEDIATE_OPERAND_VALUE]
|
||||
|
||||
if xtor.ws.probe(v) & PERMISSION_READ:
|
||||
# v is a valid address
|
||||
# therefore, assume its not also a constant.
|
||||
continue
|
||||
|
||||
if (
|
||||
insn.mnemonic == "add"
|
||||
and operands[0][OPERAND_TYPE] == OPERAND_TYPE_REGISTER
|
||||
and operands[0][REGISTER_OPERAND_REGISTER] == "esp"
|
||||
):
|
||||
# skip things like:
|
||||
#
|
||||
# .text:00401140 call sub_407E2B
|
||||
# .text:00401145 add esp, 0Ch
|
||||
return
|
||||
|
||||
yield Number(v), insn.address
|
||||
yield Number(v, arch=get_arch(xtor.ws)), insn.address
|
||||
|
||||
|
||||
def extract_insn_offset_features(xtor, f, bb, insn):
|
||||
"""parse structure offset features from the given instruction."""
|
||||
operands = insn.operands
|
||||
|
||||
for operand in operands:
|
||||
if operand[OPERAND_TYPE] != OPERAND_TYPE_MEMORY:
|
||||
continue
|
||||
|
||||
if operand[MEMORY_OPERAND_BASE] in ("esp", "ebp", "rbp"):
|
||||
continue
|
||||
|
||||
# lancelot provides `None` when the displacement is not present.
|
||||
v = operand[MEMORY_OPERAND_DISP] or 0
|
||||
|
||||
yield Offset(v), insn.address
|
||||
yield Offset(v, arch=get_arch(xtor.ws)), insn.address
|
||||
|
||||
|
||||
def derefs(xtor, p):
|
||||
"""
|
||||
recursively follow the given pointer, yielding the valid memory addresses along the way.
|
||||
useful when you may have a pointer to string, or pointer to pointer to string, etc.
|
||||
this is a "do what i mean" type of helper function.
|
||||
"""
|
||||
|
||||
depth = 0
|
||||
while True:
|
||||
if not xtor.ws.probe(p) & PERMISSION_READ:
|
||||
return
|
||||
yield p
|
||||
|
||||
next = xtor.ws.read_pointer(p)
|
||||
|
||||
# sanity: pointer points to self
|
||||
if next == p:
|
||||
return
|
||||
|
||||
# sanity: avoid chains of pointers that are unreasonably deep
|
||||
depth += 1
|
||||
if depth > 10:
|
||||
return
|
||||
|
||||
p = next
|
||||
|
||||
|
||||
def read_bytes(xtor, va):
|
||||
"""
|
||||
read up to MAX_BYTES_FEATURE_SIZE from the given address.
|
||||
|
||||
raises:
|
||||
ValueError: if the given address is not valid.
|
||||
"""
|
||||
start = va
|
||||
end = va + MAX_BYTES_FEATURE_SIZE
|
||||
pe = get_pefile(xtor)
|
||||
|
||||
for section in pe.sections:
|
||||
section_start = pe.OPTIONAL_HEADER.ImageBase + section.VirtualAddress
|
||||
section_end = pe.OPTIONAL_HEADER.ImageBase + section.VirtualAddress + section.Misc_VirtualSize
|
||||
|
||||
if section_start <= start < section_end:
|
||||
end = min(end, section_end)
|
||||
return xtor.ws.read_bytes(start, end - start)
|
||||
|
||||
raise ValueError("invalid address")
|
||||
|
||||
|
||||
# these are mnemonics that may flow (jump) elsewhere
|
||||
FLOW_MNEMONICS = set(
|
||||
[
|
||||
"call",
|
||||
"jb",
|
||||
"jbe",
|
||||
"jcxz",
|
||||
"jecxz",
|
||||
"jknzd",
|
||||
"jkzd",
|
||||
"jl",
|
||||
"jle",
|
||||
"jmp",
|
||||
"jnb",
|
||||
"jnbe",
|
||||
"jnl",
|
||||
"jnle",
|
||||
"jno",
|
||||
"jnp",
|
||||
"jns",
|
||||
"jnz",
|
||||
"jo",
|
||||
"jp",
|
||||
"jrcxz",
|
||||
"js",
|
||||
"jz",
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def extract_insn_bytes_features(xtor, f, bb, insn):
|
||||
"""
|
||||
parse byte sequence features from the given instruction.
|
||||
"""
|
||||
if insn.mnemonic in FLOW_MNEMONICS:
|
||||
return
|
||||
|
||||
for operand in insn.operands:
|
||||
try:
|
||||
target = get_operand_target(insn, operand)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
for ptr in derefs(xtor, target):
|
||||
try:
|
||||
buf = read_bytes(xtor, ptr)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
if capa.features.extractors.helpers.all_zeros(buf):
|
||||
continue
|
||||
|
||||
yield Bytes(buf), insn.address
|
||||
|
||||
|
||||
def first(s):
|
||||
"""enumerate the first element in the sequence"""
|
||||
for i in s:
|
||||
yield i
|
||||
break
|
||||
|
||||
|
||||
def extract_insn_string_features(xtor, f, bb, insn):
|
||||
"""parse string features from the given instruction."""
|
||||
for bytez, va in extract_insn_bytes_features(xtor, f, bb, insn):
|
||||
buf = bytez.value
|
||||
|
||||
for s in itertools.chain(
|
||||
first(capa.features.extractors.strings.extract_ascii_strings(buf)),
|
||||
first(capa.features.extractors.strings.extract_unicode_strings(buf)),
|
||||
):
|
||||
if s.offset == 0:
|
||||
yield String(s.s), va
|
||||
|
||||
|
||||
def is_security_cookie(xtor, f, bb, insn):
|
||||
"""
|
||||
check if an instruction is related to security cookie checks
|
||||
"""
|
||||
op1 = insn.operands[1]
|
||||
if op1[OPERAND_TYPE] == OPERAND_TYPE_REGISTER and op1[REGISTER_OPERAND_REGISTER] not in (
|
||||
"esp",
|
||||
"ebp",
|
||||
"rbp",
|
||||
"rsp",
|
||||
):
|
||||
return False
|
||||
|
||||
# expect security cookie init in first basic block within first bytes (instructions)
|
||||
if f == bb.address and insn.address < (bb.address + SECURITY_COOKIE_BYTES_DELTA):
|
||||
return True
|
||||
|
||||
# ... or within last bytes (instructions) before a return
|
||||
insns = list(xtor.get_instructions(f, bb))
|
||||
if insns[-1].mnemonic in ("ret", "retn") and insn.address > (bb.address + bb.length - SECURITY_COOKIE_BYTES_DELTA):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def extract_insn_nzxor_characteristic_features(xtor, f, bb, insn):
|
||||
"""
|
||||
parse non-zeroing XOR instruction from the given instruction.
|
||||
ignore expected non-zeroing XORs, e.g. security cookies.
|
||||
"""
|
||||
if insn.mnemonic != "xor":
|
||||
return
|
||||
|
||||
operands = insn.operands
|
||||
if operands[0] == operands[1]:
|
||||
return
|
||||
|
||||
if is_security_cookie(xtor, f, bb, insn):
|
||||
return
|
||||
|
||||
yield Characteristic("nzxor"), insn.address
|
||||
|
||||
|
||||
def extract_insn_peb_access_characteristic_features(xtor, f, bb, insn):
|
||||
"""
|
||||
parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64
|
||||
"""
|
||||
for operand in insn.operands:
|
||||
if (
|
||||
operand[OPERAND_TYPE] == OPERAND_TYPE_MEMORY
|
||||
and operand[MEMORY_OPERAND_SEGMENT] == "gs"
|
||||
and operand[MEMORY_OPERAND_DISP] == 0x60
|
||||
):
|
||||
yield Characteristic("peb access"), insn.address
|
||||
|
||||
if (
|
||||
operand[OPERAND_TYPE] == OPERAND_TYPE_MEMORY
|
||||
and operand[MEMORY_OPERAND_SEGMENT] == "fs"
|
||||
and operand[MEMORY_OPERAND_DISP] == 0x30
|
||||
):
|
||||
yield Characteristic("peb access"), insn.address
|
||||
|
||||
|
||||
def extract_insn_segment_access_features(xtor, f, bb, insn):
|
||||
""" parse the instruction for access to fs or gs """
|
||||
for operand in insn.operands:
|
||||
if operand[OPERAND_TYPE] == OPERAND_TYPE_MEMORY and operand[MEMORY_OPERAND_SEGMENT] == "gs":
|
||||
yield Characteristic("gs access"), insn.address
|
||||
|
||||
if operand[OPERAND_TYPE] == OPERAND_TYPE_MEMORY and operand[MEMORY_OPERAND_SEGMENT] == "fs":
|
||||
yield Characteristic("fs access"), insn.address
|
||||
|
||||
|
||||
def get_section(xtor, va):
|
||||
pe = get_pefile(xtor)
|
||||
|
||||
for i, section in enumerate(pe.sections):
|
||||
section_start = pe.OPTIONAL_HEADER.ImageBase + section.VirtualAddress
|
||||
section_end = pe.OPTIONAL_HEADER.ImageBase + section.VirtualAddress + section.Misc_VirtualSize
|
||||
|
||||
if section_start <= va < section_end:
|
||||
return i
|
||||
|
||||
raise ValueError("invalid address")
|
||||
|
||||
|
||||
def extract_insn_cross_section_cflow(xtor, f, bb, insn):
|
||||
"""
|
||||
inspect the instruction for a CALL or JMP that crosses section boundaries.
|
||||
"""
|
||||
if insn.mnemonic not in FLOW_MNEMONICS:
|
||||
return
|
||||
|
||||
try:
|
||||
target = get_operand_target(insn, insn.operands[0])
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
if target in get_imports(xtor):
|
||||
return
|
||||
|
||||
try:
|
||||
if get_section(xtor, insn.address) != get_section(xtor, target):
|
||||
yield Characteristic("cross section flow"), insn.address
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
|
||||
def extract_function_calls_from(xtor, f, bb, insn):
|
||||
cg = get_call_graph(xtor.ws)
|
||||
|
||||
for callee in cg.calls_from.get(insn.address, []):
|
||||
yield Characteristic("calls from"), callee
|
||||
|
||||
if callee == f:
|
||||
yield Characteristic("recursive call"), insn.address
|
||||
|
||||
# lancelot doesn't count API calls when constructing the call graph
|
||||
# so we still have to scan for calls to an import
|
||||
if insn.mnemonic != "call":
|
||||
return
|
||||
|
||||
try:
|
||||
target = get_operand_target(insn, insn.operands[0])
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
imports = get_imports(xtor)
|
||||
if target in imports:
|
||||
yield Characteristic("calls from"), target
|
||||
|
||||
|
||||
# this is a feature that's most relevant at the function or basic block scope,
|
||||
# however, its most efficient to extract at the instruction scope.
|
||||
def extract_function_indirect_call_characteristic_features(xtor, f, bb, insn):
|
||||
"""
|
||||
extract indirect function call characteristic (e.g., call eax or call dword ptr [edx+4])
|
||||
does not include calls like => call ds:dword_ABD4974
|
||||
"""
|
||||
if insn.mnemonic != "call":
|
||||
return
|
||||
|
||||
op0 = insn.operands[0]
|
||||
if op0[OPERAND_TYPE] == OPERAND_TYPE_REGISTER:
|
||||
yield Characteristic("indirect call"), insn.address
|
||||
elif op0[OPERAND_TYPE] == OPERAND_TYPE_MEMORY and op0[MEMORY_OPERAND_BASE] is not None:
|
||||
yield Characteristic("indirect call"), insn.address
|
||||
elif op0[OPERAND_TYPE] == OPERAND_TYPE_MEMORY and op0[MEMORY_OPERAND_INDEX] is not None:
|
||||
yield Characteristic("indirect call"), insn.address
|
||||
|
||||
|
||||
_not_implemented = set([])
|
||||
|
||||
|
||||
def extract_insn_features(xtor, f, bb, insn):
|
||||
for insn_handler in INSTRUCTION_HANDLERS:
|
||||
try:
|
||||
for feature, va in insn_handler(xtor, f, bb, insn):
|
||||
yield feature, va
|
||||
except NotImplementedError:
|
||||
if insn_handler.__name__ not in _not_implemented:
|
||||
logger.warning("not implemented: %s", insn_handler.__name__)
|
||||
_not_implemented.add(insn_handler.__name__)
|
||||
|
||||
|
||||
INSTRUCTION_HANDLERS = (
|
||||
extract_insn_api_features,
|
||||
extract_insn_number_features,
|
||||
extract_insn_string_features,
|
||||
extract_insn_bytes_features,
|
||||
extract_insn_offset_features,
|
||||
extract_insn_nzxor_characteristic_features,
|
||||
extract_insn_mnemonic_features,
|
||||
extract_insn_peb_access_characteristic_features,
|
||||
extract_insn_cross_section_cflow,
|
||||
extract_insn_segment_access_features,
|
||||
extract_function_calls_from,
|
||||
extract_function_indirect_call_characteristic_features,
|
||||
)
|
||||
@@ -8,11 +8,7 @@
|
||||
|
||||
import types
|
||||
|
||||
import file
|
||||
import insn
|
||||
import function
|
||||
import viv_utils
|
||||
import basicblock
|
||||
|
||||
import capa.features.extractors
|
||||
import capa.features.extractors.viv.file
|
||||
|
||||
@@ -25,45 +25,6 @@ def interface_extract_function_XXX(f):
|
||||
yield NotImplementedError("feature"), NotImplementedError("virtual address")
|
||||
|
||||
|
||||
def get_switches(vw):
|
||||
"""
|
||||
caching accessor to vivisect workspace switch constructs.
|
||||
"""
|
||||
if "switches" in vw.metadata:
|
||||
return vw.metadata["switches"]
|
||||
else:
|
||||
# addresses of switches in the program
|
||||
switches = set()
|
||||
|
||||
for case_va, _ in filter(lambda t: "case" in t[1], vw.getNames()):
|
||||
# assume that the xref to a case location is a switch construct
|
||||
for switch_va, _, _, _ in vw.getXrefsTo(case_va):
|
||||
switches.add(switch_va)
|
||||
|
||||
vw.metadata["switches"] = switches
|
||||
return switches
|
||||
|
||||
|
||||
def get_functions_with_switch(vw):
|
||||
if "functions_with_switch" in vw.metadata:
|
||||
return vw.metadata["functions_with_switch"]
|
||||
else:
|
||||
functions = set()
|
||||
for switch in get_switches(vw):
|
||||
functions.add(vw.getFunction(switch))
|
||||
vw.metadata["functions_with_switch"] = functions
|
||||
return functions
|
||||
|
||||
|
||||
def extract_function_switch(f):
|
||||
"""
|
||||
parse if a function contains a switch statement based on location names
|
||||
method can be optimized
|
||||
"""
|
||||
if f.va in get_functions_with_switch(f.vw):
|
||||
yield Characteristic("switch"), f.va
|
||||
|
||||
|
||||
def extract_function_calls_to(f):
|
||||
for src, _, _, _ in f.vw.getXrefsTo(f.va, rtype=vivisect.const.REF_CODE):
|
||||
yield Characteristic("calls to"), src
|
||||
@@ -106,4 +67,4 @@ def extract_features(f):
|
||||
yield feature, va
|
||||
|
||||
|
||||
FUNCTION_HANDLERS = (extract_function_switch, extract_function_calls_to, extract_function_loop)
|
||||
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop)
|
||||
|
||||
@@ -128,10 +128,13 @@ def extract_insn_number_features(f, bb, insn):
|
||||
# push 3136B0h ; dwControlCode
|
||||
for oper in insn.opers:
|
||||
# this is for both x32 and x64
|
||||
if not isinstance(oper, envi.archs.i386.disasm.i386ImmOper):
|
||||
if not isinstance(oper, (envi.archs.i386.disasm.i386ImmOper, envi.archs.i386.disasm.i386ImmMemOper)):
|
||||
continue
|
||||
|
||||
v = oper.getOperValue(oper)
|
||||
if isinstance(oper, envi.archs.i386.disasm.i386ImmOper):
|
||||
v = oper.getOperValue(oper)
|
||||
else:
|
||||
v = oper.getOperAddr(oper)
|
||||
|
||||
if f.vw.probeMemory(v, 1, envi.memory.MM_READ):
|
||||
# this is a valid address
|
||||
@@ -162,7 +165,12 @@ def derefs(vw, p):
|
||||
return
|
||||
yield p
|
||||
|
||||
next = vw.readMemoryPtr(p)
|
||||
try:
|
||||
next = vw.readMemoryPtr(p)
|
||||
except Exception:
|
||||
# if not enough bytes can be read, such as end of the section.
|
||||
# unfortunately, viv returns a plain old generic `Exception` for this.
|
||||
return
|
||||
|
||||
# sanity: pointer points to self
|
||||
if next == p:
|
||||
@@ -390,7 +398,9 @@ def extract_insn_peb_access_characteristic_features(f, bb, insn):
|
||||
if insn.mnem not in ["push", "mov"]:
|
||||
return
|
||||
|
||||
if "fs" in insn.getPrefixName():
|
||||
prefix = insn.getPrefixName()
|
||||
|
||||
if "fs" in prefix:
|
||||
for oper in insn.opers:
|
||||
# examples
|
||||
#
|
||||
@@ -403,10 +413,12 @@ def extract_insn_peb_access_characteristic_features(f, bb, insn):
|
||||
isinstance(oper, envi.archs.i386.disasm.i386ImmMemOper) and oper.imm == 0x30
|
||||
):
|
||||
yield Characteristic("peb access"), insn.va
|
||||
elif "gs" in insn.getPrefixName():
|
||||
elif "gs" in prefix:
|
||||
for oper in insn.opers:
|
||||
if (isinstance(oper, envi.archs.amd64.disasm.i386RegMemOper) and oper.disp == 0x60) or (
|
||||
isinstance(oper, envi.archs.amd64.disasm.i386ImmMemOper) and oper.imm == 0x60
|
||||
if (
|
||||
(isinstance(oper, envi.archs.amd64.disasm.i386RegMemOper) and oper.disp == 0x60)
|
||||
or (isinstance(oper, envi.archs.amd64.disasm.i386SibOper) and oper.imm == 0x60)
|
||||
or (isinstance(oper, envi.archs.amd64.disasm.i386ImmMemOper) and oper.imm == 0x60)
|
||||
):
|
||||
yield Characteristic("peb access"), insn.va
|
||||
else:
|
||||
|
||||
@@ -101,7 +101,9 @@ def dumps(extractor):
|
||||
for feature, va in extractor.extract_basic_block_features(f, bb):
|
||||
ret["scopes"]["basic block"].append(serialize_feature(feature) + (hex(va), (hex(f), hex(bb),)))
|
||||
|
||||
for insn, insnva in sorted([(insn, int(insn)) for insn in extractor.get_instructions(f, bb)]):
|
||||
for insnva, insn in sorted(
|
||||
[(insn.__int__(), insn) for insn in extractor.get_instructions(f, bb)], key=lambda p: p[0]
|
||||
):
|
||||
ret["functions"][hex(f)][hex(bb)].append(hex(insnva))
|
||||
|
||||
for feature, va in extractor.extract_insn_features(f, bb, insn):
|
||||
@@ -245,12 +247,7 @@ def main(argv=None):
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
vw = capa.main.get_workspace(args.sample, args.format)
|
||||
|
||||
# don't import this at top level to support ida/py3 backend
|
||||
import capa.features.extractors.viv
|
||||
|
||||
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(vw, args.sample)
|
||||
extractor = capa.main.get_extractor(args.sample, args.format)
|
||||
with open(args.output, "wb") as f:
|
||||
f.write(dump(extractor))
|
||||
|
||||
|
||||
@@ -24,7 +24,10 @@ class Number(Feature):
|
||||
super(Number, self).__init__(value, arch=arch, description=description)
|
||||
|
||||
def get_value_str(self):
|
||||
return "0x%X" % self.value
|
||||
if self.value < 0:
|
||||
return "-0x%X" % (-self.value)
|
||||
else:
|
||||
return "0x%X" % self.value
|
||||
|
||||
|
||||
class Offset(Feature):
|
||||
|
||||
@@ -353,7 +353,7 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
# TODO: do we display 'not'
|
||||
pass
|
||||
elif statement["type"] == "some":
|
||||
return CapaExplorerDefaultItem(parent, statement["count"] + " or more")
|
||||
return CapaExplorerDefaultItem(parent, str(statement["count"]) + " or more")
|
||||
elif statement["type"] == "range":
|
||||
# `range` is a weird node, its almost a hybrid of statement + feature.
|
||||
# it is a specific feature repeated multiple times.
|
||||
@@ -528,7 +528,7 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
if feature[feature["type"]] in ("embedded pe",):
|
||||
return CapaExplorerByteViewItem(parent, display, location)
|
||||
|
||||
if feature[feature["type"]] in ("loop", "recursive call", "tight loop", "switch"):
|
||||
if feature[feature["type"]] in ("loop", "recursive call", "tight loop"):
|
||||
return CapaExplorerFeatureItem(parent, display=display)
|
||||
|
||||
# default to instruction view for all other characteristics
|
||||
@@ -546,7 +546,17 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
if feature["type"] == "basicblock":
|
||||
return CapaExplorerBlockItem(parent, location)
|
||||
|
||||
if feature["type"] in ("bytes", "api", "mnemonic", "number", "offset"):
|
||||
if feature["type"] in (
|
||||
"bytes",
|
||||
"api",
|
||||
"mnemonic",
|
||||
"number",
|
||||
"offset",
|
||||
"number/x32",
|
||||
"number/x64",
|
||||
"offset/x32",
|
||||
"offset/x64",
|
||||
):
|
||||
# display instruction preview
|
||||
return CapaExplorerInstructionViewItem(parent, display, location)
|
||||
|
||||
|
||||
56
capa/main.py
56
capa/main.py
@@ -18,6 +18,7 @@ import datetime
|
||||
import textwrap
|
||||
import collections
|
||||
|
||||
import halo
|
||||
import tqdm
|
||||
import colorama
|
||||
|
||||
@@ -31,7 +32,7 @@ import capa.features.extractors
|
||||
from capa.helpers import oint, get_file_taste
|
||||
|
||||
RULES_PATH_DEFAULT_STRING = "(embedded rules)"
|
||||
SUPPORTED_FILE_MAGIC = set(["MZ"])
|
||||
SUPPORTED_FILE_MAGIC = set([b"MZ"])
|
||||
|
||||
|
||||
logger = logging.getLogger("capa")
|
||||
@@ -106,7 +107,7 @@ def find_capabilities(ruleset, extractor, disable_progress=None):
|
||||
|
||||
meta = {"feature_counts": {"file": 0, "functions": {},}}
|
||||
|
||||
for f in tqdm.tqdm(extractor.get_functions(), disable=disable_progress, unit=" functions"):
|
||||
for f in tqdm.tqdm(list(extractor.get_functions()), disable=disable_progress, desc="matching", unit=" functions"):
|
||||
function_matches, bb_matches, feature_count = find_function_capabilities(ruleset, extractor, f)
|
||||
meta["feature_counts"]["functions"][f.__int__()] = feature_count
|
||||
logger.debug("analyzed function 0x%x and extracted %d features", f.__int__(), feature_count)
|
||||
@@ -269,16 +270,17 @@ def get_workspace(path, format, should_save=True):
|
||||
return vw
|
||||
|
||||
|
||||
def get_extractor_py2(path, format):
|
||||
def get_extractor_py2(path, format, disable_progress=False):
|
||||
import capa.features.extractors.viv
|
||||
|
||||
vw = get_workspace(path, format, should_save=False)
|
||||
with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress):
|
||||
vw = get_workspace(path, format, should_save=False)
|
||||
|
||||
try:
|
||||
vw.saveWorkspace()
|
||||
except IOError:
|
||||
# see #168 for discussion around how to handle non-writable directories
|
||||
logger.info("source directory is not writable, won't save intermediate workspace")
|
||||
try:
|
||||
vw.saveWorkspace()
|
||||
except IOError:
|
||||
# see #168 for discussion around how to handle non-writable directories
|
||||
logger.info("source directory is not writable, won't save intermediate workspace")
|
||||
|
||||
return capa.features.extractors.viv.VivisectFeatureExtractor(vw, path)
|
||||
|
||||
@@ -287,19 +289,36 @@ class UnsupportedRuntimeError(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
def get_extractor_py3(path, format):
|
||||
raise UnsupportedRuntimeError()
|
||||
def get_extractor_py3(path, format, disable_progress=False):
|
||||
try:
|
||||
import lancelot
|
||||
|
||||
import capa.features.extractors.lancelot
|
||||
except ImportError:
|
||||
logger.warning("lancelot not installed")
|
||||
raise UnsupportedRuntimeError()
|
||||
|
||||
if format not in ("pe", "auto"):
|
||||
raise UnsupportedFormatError(format)
|
||||
|
||||
if not is_supported_file_type(path):
|
||||
raise UnsupportedFormatError()
|
||||
|
||||
with open(path, "rb") as f:
|
||||
buf = f.read()
|
||||
|
||||
return capa.features.extractors.lancelot.LancelotFeatureExtractor(buf)
|
||||
|
||||
|
||||
def get_extractor(path, format):
|
||||
def get_extractor(path, format, disable_progress=False):
|
||||
"""
|
||||
raises:
|
||||
UnsupportedFormatError:
|
||||
"""
|
||||
if sys.version_info >= (3, 0):
|
||||
return get_extractor_py3(path, format)
|
||||
return get_extractor_py3(path, format, disable_progress=disable_progress)
|
||||
else:
|
||||
return get_extractor_py2(path, format)
|
||||
return get_extractor_py2(path, format, disable_progress=disable_progress)
|
||||
|
||||
|
||||
def is_nursery_rule_path(path):
|
||||
@@ -315,7 +334,7 @@ def is_nursery_rule_path(path):
|
||||
return "nursery" in path
|
||||
|
||||
|
||||
def get_rules(rule_path):
|
||||
def get_rules(rule_path, disable_progress=False):
|
||||
if not os.path.exists(rule_path):
|
||||
raise IOError("rule path %s does not exist or cannot be accessed" % rule_path)
|
||||
|
||||
@@ -343,7 +362,8 @@ def get_rules(rule_path):
|
||||
rule_paths.append(rule_path)
|
||||
|
||||
rules = []
|
||||
for rule_path in rule_paths:
|
||||
|
||||
for rule_path in tqdm.tqdm(list(rule_paths), disable=disable_progress, desc="loading ", unit=" rules"):
|
||||
try:
|
||||
rule = capa.rules.Rule.from_yaml_file(rule_path)
|
||||
except capa.rules.InvalidRule:
|
||||
@@ -526,7 +546,7 @@ def main(argv=None):
|
||||
logger.debug("using rules path: %s", rules_path)
|
||||
|
||||
try:
|
||||
rules = get_rules(rules_path)
|
||||
rules = get_rules(rules_path, disable_progress=args.quiet)
|
||||
rules = capa.rules.RuleSet(rules)
|
||||
logger.debug("successfully loaded %s rules", len(rules))
|
||||
if args.tag:
|
||||
@@ -546,7 +566,7 @@ def main(argv=None):
|
||||
else:
|
||||
format = args.format
|
||||
try:
|
||||
extractor = get_extractor(args.sample, args.format)
|
||||
extractor = get_extractor(args.sample, args.format, disable_progress=args.quiet)
|
||||
except UnsupportedFormatError:
|
||||
logger.error("-" * 80)
|
||||
logger.error(" Input file does not appear to be a PE file.")
|
||||
|
||||
@@ -69,7 +69,6 @@ SUPPORTED_FEATURES = {
|
||||
FUNCTION_SCOPE: {
|
||||
# plus basic block scope features, see below
|
||||
capa.features.basicblock.BasicBlock,
|
||||
capa.features.Characteristic("switch"),
|
||||
capa.features.Characteristic("calls from"),
|
||||
capa.features.Characteristic("calls to"),
|
||||
capa.features.Characteristic("loop"),
|
||||
|
||||
47
capa/unpack/__init__.py
Normal file
47
capa/unpack/__init__.py
Normal file
@@ -0,0 +1,47 @@
|
||||
import sys
|
||||
import logging
|
||||
|
||||
try:
|
||||
from functools import lru_cache
|
||||
except ImportError:
|
||||
from backports.functools_lru_cache import lru_cache
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class NotPackedError(ValueError):
|
||||
def __init__(self):
|
||||
super(NotPackedError, self).__init__("not packed")
|
||||
|
||||
|
||||
def can_unpack():
|
||||
# the unpacking backend is based on Speakeasy, which supports python 3.6+
|
||||
return sys.version_info >= (3, 6)
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_unpackers():
|
||||
# break import loop
|
||||
import capa.unpack.aspack
|
||||
|
||||
return {p.name: p for p in [capa.unpack.aspack.AspackUnpacker]}
|
||||
|
||||
|
||||
def detect_packer(buf):
|
||||
for unpacker in get_unpackers().values():
|
||||
if unpacker.is_packed(buf):
|
||||
return unpacker.name
|
||||
|
||||
raise NotPackedError()
|
||||
|
||||
|
||||
def is_packed(buf):
|
||||
try:
|
||||
detect_packer(buf)
|
||||
return True
|
||||
except NotPackedError:
|
||||
return False
|
||||
|
||||
|
||||
def unpack_pe(packer, buf):
|
||||
return get_unpackers()[packer].unpack_pe(buf)
|
||||
459
capa/unpack/aspack.py
Normal file
459
capa/unpack/aspack.py
Normal file
@@ -0,0 +1,459 @@
|
||||
import io
|
||||
import struct
|
||||
import logging
|
||||
import contextlib
|
||||
import collections
|
||||
|
||||
import pefile
|
||||
import speakeasy
|
||||
import speakeasy.common as se_common
|
||||
import speakeasy.profiler
|
||||
import speakeasy.windows.objman
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def pefile_get_section_by_name(pe, section_name):
|
||||
for section in pe.sections:
|
||||
try:
|
||||
if section.Name.partition(b"\x00")[0].decode("ascii") == section_name:
|
||||
return section
|
||||
except:
|
||||
continue
|
||||
raise ValueError("section not found")
|
||||
|
||||
|
||||
def prepare_emu_context(se, module):
|
||||
"""
|
||||
prepare an Speakeasy instance for emulating the given module, without running it.
|
||||
|
||||
this is useful when planning to manually control the emulator,
|
||||
such as via `Speakeasy.emu.emu_eng.start(...)`.
|
||||
typically, Speakeasy expects to do "Run based" analysis,
|
||||
which doesn't give us too much control.
|
||||
|
||||
much of this was derived from win32::Win32Emulator::run_module.
|
||||
hopefully this can eventually be merged into Speakeasy.
|
||||
|
||||
args:
|
||||
se (speakeasy.Speakeasy): the instance to prepare
|
||||
module (speakeasy.Module): the module that will be emulated
|
||||
"""
|
||||
se._init_hooks()
|
||||
|
||||
main_exe = None
|
||||
if not module.is_exe():
|
||||
container = se.emu.init_container_process()
|
||||
if container:
|
||||
se.emu.processes.append(container)
|
||||
se.emu.curr_process = container
|
||||
else:
|
||||
main_exe = module
|
||||
|
||||
if main_exe:
|
||||
se.emu.user_modules = [main_exe] + se.emu.user_modules
|
||||
|
||||
# Create an empty process object for the module if none is supplied
|
||||
if len(se.emu.processes) == 0:
|
||||
p = speakeasy.windows.objman.Process(se.emu, path=module.get_emu_path(), base=module.base, pe=module)
|
||||
se.emu.curr_process = p
|
||||
|
||||
t = speakeasy.windows.objman.Thread(se.emu, stack_base=se.emu.stack_base, stack_commit=module.stack_commit)
|
||||
|
||||
se.emu.om.objects.update({t.address: t})
|
||||
se.emu.curr_process.threads.append(t)
|
||||
se.emu.curr_thread = t
|
||||
|
||||
peb = se.emu.alloc_peb(se.emu.curr_process)
|
||||
se.emu.init_teb(t, peb)
|
||||
|
||||
|
||||
INSN_PUSHA = 0x60
|
||||
INSN_POPA = 0x61
|
||||
|
||||
|
||||
class AspackUnpacker(speakeasy.Speakeasy):
|
||||
name = "aspack"
|
||||
|
||||
def __init__(self, buf, debug=False):
|
||||
super(AspackUnpacker, self).__init__(debug=debug)
|
||||
self.module = self.load_module(data=buf)
|
||||
prepare_emu_context(self, self.module)
|
||||
|
||||
@staticmethod
|
||||
def detect_aspack(buf):
|
||||
"""
|
||||
return True if the given buffer contains an ASPack'd PE file.
|
||||
we detect aspack by looking at the section names for .aspack.
|
||||
the unpacking routine contains further validation and will raise an exception if necessary.
|
||||
|
||||
args:
|
||||
buf (bytes): the contents of a PE file.
|
||||
|
||||
returns: bool
|
||||
"""
|
||||
try:
|
||||
pe = pefile.PE(data=buf, fast_load=True)
|
||||
except:
|
||||
return False
|
||||
|
||||
try:
|
||||
pefile_get_section_by_name(pe, ".aspack")
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def unpack_pe(cls, buf):
|
||||
"""
|
||||
unpack the given buffer that contains an ASPack'd PE file.
|
||||
return the contents of a reconstructed PE file.
|
||||
|
||||
args:
|
||||
buf (bytes): the contents of an ASPack'd PE file.
|
||||
|
||||
returns: bytes
|
||||
"""
|
||||
unpacker = cls(buf)
|
||||
return unpacker.unpack()
|
||||
|
||||
def stepi(self):
|
||||
self.emu.emu_eng.start(self.emu.get_pc(), count=1)
|
||||
|
||||
def remove_hook(self, hook_type, hook_handle):
|
||||
# TODO: this should be part of speakeasy
|
||||
self.emu.hooks[hook_type].remove(hook_handle)
|
||||
self.emu.emu_eng.hook_remove(hook_handle.handle)
|
||||
|
||||
def remove_mem_read_hook(self, hook_handle):
|
||||
# TODO: this should be part of speakeasy
|
||||
self.remove_hook(se_common.HOOK_MEM_READ, hook_handle)
|
||||
|
||||
@contextlib.contextmanager
|
||||
def mem_read_hook(self, hook):
|
||||
"""
|
||||
context manager for temporarily installing a hook on the emulator.
|
||||
|
||||
example:
|
||||
|
||||
with self.mem_read_hook(lambda emu, access, addr, size, ctx: emu.stop()):
|
||||
self.emu.emu_eng.start(0x401000)
|
||||
|
||||
args:
|
||||
hook (speakeasy.common.MemReadHook): the hook to install
|
||||
"""
|
||||
handle = self.add_mem_read_hook(hook)
|
||||
# if this fails, then there's still an unfixed bug in Speakeasy
|
||||
assert handle.handle != 0
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
self.remove_mem_read_hook(handle)
|
||||
|
||||
def remove_code_hook(self, hook_handle):
|
||||
# TODO: this should be part of speakeasy
|
||||
self.remove_hook(se_common.HOOK_CODE, hook_handle)
|
||||
|
||||
@contextlib.contextmanager
|
||||
def code_hook(self, hook):
|
||||
"""
|
||||
context manager for temporarily installing a hook on the emulator.
|
||||
|
||||
example:
|
||||
|
||||
with self.code_hook(lambda emu, addr, size, ctx: emu.stop()):
|
||||
self.emu.emu_eng.start(0x401000)
|
||||
|
||||
args:
|
||||
hook (speakeasy.common.CodeHook): the hook to install
|
||||
"""
|
||||
handle = self.add_code_hook(hook)
|
||||
assert handle.handle != 0
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
self.remove_code_hook(handle)
|
||||
|
||||
def read_ptr(self, va):
|
||||
endian = "little"
|
||||
val = self.mem_read(va, self.emu.ptr_size)
|
||||
return int.from_bytes(val, endian)
|
||||
|
||||
def dump(self):
|
||||
"""
|
||||
emulate the loaded module, pausing after an appropriate section hop.
|
||||
then, dump and return the module's memory and OEP.
|
||||
|
||||
this routine is specific to aspack. it makes the following assumptions:
|
||||
- aspack starts with a PUSHA to save off the CPU context
|
||||
- aspeck then runs its unpacking stub
|
||||
- aspeck executes POPA to restore the CPU context
|
||||
- aspack section hops to the OEP
|
||||
|
||||
we'll emulate in a few phases:
|
||||
1. single step over PUSHA at the entrypoint
|
||||
2. extract the address of the saved CPU context
|
||||
3. emulate until the saved CPU context is read
|
||||
4. assert this is a POPA instruction
|
||||
5. emulate until a section hop
|
||||
6. profit!
|
||||
|
||||
return the module's memory segment and the OEP.
|
||||
|
||||
returns: Tuple[byte, int]
|
||||
"""
|
||||
|
||||
# prime the emulator.
|
||||
# this is derived from winemu::WindowsEmulator::start()
|
||||
self.emu.curr_run = speakeasy.profiler.Run()
|
||||
self.emu.curr_mod = self.module
|
||||
self.emu.set_hooks()
|
||||
self.emu._set_emu_hooks()
|
||||
|
||||
# 0. sanity checking: assert entrypoint is a PUSHA instruction
|
||||
entrypoint = self.module.base + self.module.ep
|
||||
opcode = self.emu.mem_read(entrypoint, 1)[0]
|
||||
if opcode != INSN_PUSHA:
|
||||
raise ValueError("not packed with supported ASPack")
|
||||
|
||||
# 1. single step over PUSHA
|
||||
self.emu.set_pc(entrypoint)
|
||||
self.stepi()
|
||||
|
||||
# 2. extract address of saved CPU context
|
||||
saved_cpu_context = self.emu.get_stack_ptr()
|
||||
|
||||
# 3. emulate until saved CPU context is accessed
|
||||
def until_read(target):
|
||||
"""return a mem_read hook that stops the emulator when an address is read."""
|
||||
|
||||
def inner(emu, _access, addr, _size, _value, _ctx):
|
||||
if addr == target:
|
||||
emu.stop()
|
||||
return True
|
||||
|
||||
return inner
|
||||
|
||||
with self.mem_read_hook(until_read(saved_cpu_context)):
|
||||
self.emu.emu_eng.start(self.emu.get_pc())
|
||||
|
||||
# 4. assert this is a POPA instruction
|
||||
opcode = self.emu.mem_read(self.emu.get_pc(), 1)[0]
|
||||
if opcode != INSN_POPA:
|
||||
raise ValueError("not packed with supported ASPack")
|
||||
logger.debug("POPA: 0x%x", self.emu.get_pc())
|
||||
|
||||
# 5. emulate until a section hop
|
||||
aspack_section = self.module.get_section_by_name(".aspack")
|
||||
start = self.module.base + aspack_section.VirtualAddress
|
||||
end = start + aspack_section.Misc_VirtualSize
|
||||
|
||||
def until_section_hop(start, end):
|
||||
def inner(emu, addr, _size, _ctx):
|
||||
if addr < start or addr >= end:
|
||||
emu.stop()
|
||||
return True
|
||||
|
||||
return inner
|
||||
|
||||
with self.code_hook(until_section_hop(start, end)):
|
||||
self.emu.emu_eng.start(self.emu.get_pc())
|
||||
|
||||
# 6. dump and return
|
||||
oep = self.emu.get_pc()
|
||||
logger.debug("OEP: 0x%x", oep)
|
||||
|
||||
mm = self.get_address_map(self.module.base)
|
||||
buf = self.mem_read(mm.base, mm.size)
|
||||
|
||||
return buf, oep
|
||||
|
||||
def fixup(self, buf, oep):
|
||||
"""
|
||||
fixup a PE image that's been dumped from memory after unpacking aspack.
|
||||
|
||||
there are two big fixes that need to happen:
|
||||
1. update the section pointers and sizes
|
||||
2. rebuild the import table
|
||||
|
||||
for (1) updating the section pointers, we'll just update the
|
||||
physical pointers to match the virtual pointers, since this is a loaded image.
|
||||
|
||||
for (2) rebuilding the import table, we'll:
|
||||
(a) inspect the emulation results for resolved imports, which tells us dll/symbol names
|
||||
(b) scan the dumped image for the unpacked import thunks (Import Address Table/Thunk Table)
|
||||
(c) match the import thunks with resolved imports
|
||||
(d) build the import table structures
|
||||
(e) write the reconstructed table into the .aspack section
|
||||
|
||||
since the .aspack section contains the unpacking stub, which is no longer used,
|
||||
then we'll write the reconstructed IAT there. hopefully its big enough.
|
||||
"""
|
||||
pe = pefile.PE(data=buf)
|
||||
|
||||
pe.OPTIONAL_HEADER.AddressOfEntryPoint = oep - self.module.base
|
||||
|
||||
# 1. update section pointers and sizes.
|
||||
for section in pe.sections:
|
||||
section.PointerToRawData = section.VirtualAddress
|
||||
section.SizeOfRawData = section.Misc_VirtualSize
|
||||
|
||||
# 2. rebuild the import table
|
||||
|
||||
# place the reconstructed import table in the .aspack section (unpacking stub)
|
||||
reconstruction_target = pefile_get_section_by_name(pe, ".aspack").VirtualAddress
|
||||
|
||||
# mapping from import pointer to (dll name, symbol name).
|
||||
# the import pointer is generated by speakeasy and is not mapped.
|
||||
# it often looks something like 0xfeedf008.
|
||||
# as we encounter pointers with values like this, we can resolve the symbol.
|
||||
imports = {}
|
||||
|
||||
# 2a. find resolved imports
|
||||
for addr, (dll, sym) in self.module.import_table.items():
|
||||
# these are items in the original import table.
|
||||
logger.debug(f"found static import {dll}.{sym}")
|
||||
imports[addr] = (dll, sym)
|
||||
for (addr, dll, sym) in self.emu.dyn_imps:
|
||||
# these are imports that have been resolved at runtime by the unpacking stub.
|
||||
logger.debug(f"found dynamic import {dll}.{sym}")
|
||||
imports[addr] = (dll, sym)
|
||||
|
||||
# 2b. find the existing thunk tables
|
||||
# these are pointer-aligned tables of import pointers.
|
||||
# in my test sample, its found at the start of the first section.
|
||||
|
||||
# ordered list of tuples (VA, import pointer)
|
||||
# look up the symbol using the import pointer and the `imports` mapping.
|
||||
thunks = []
|
||||
|
||||
# scan from the start of the first section
|
||||
# until we reach values that don't look like thunk tables.
|
||||
for va in range(pe.sections[0].VirtualAddress + self.module.base, 0xFFFFFFFFFFFFFFFF, self.emu.ptr_size):
|
||||
ptr = self.read_ptr(va)
|
||||
if ptr == 0:
|
||||
# probably padding/terminating entry
|
||||
continue
|
||||
|
||||
if ptr in imports:
|
||||
thunks.append((va, ptr,))
|
||||
logger.debug(f"found import thunk at {va:08x} to {ptr:08x} for {imports[ptr][0]}\t{imports[ptr][1]}")
|
||||
continue
|
||||
|
||||
# otherwise, at the end of the thunk tables
|
||||
break
|
||||
|
||||
# collect the thunk entries into contiguous tables, grouped by dll name.
|
||||
#
|
||||
# list of thunk tuples that are contiguous and have the same dll name:
|
||||
# (VA, import pointer, dll name, symbol name)
|
||||
curr_idt_table = []
|
||||
# list of list of thunk tuples, like above
|
||||
idt_tables = []
|
||||
for thunk in thunks:
|
||||
va, imp = thunk
|
||||
dll, sym = imports[imp]
|
||||
|
||||
if not curr_idt_table:
|
||||
curr_idt_table.append((va, imp, dll, sym))
|
||||
elif curr_idt_table[0][2] == dll:
|
||||
curr_idt_table.append((va, imp, dll, sym))
|
||||
else:
|
||||
idt_tables.append(curr_idt_table)
|
||||
curr_idt_table = [(va, imp, dll, sym)]
|
||||
idt_tables.append(curr_idt_table)
|
||||
|
||||
# 2d. build the import table structures
|
||||
|
||||
# mapping from the data identifier to its RVA (which will be found within the reconstruction blob)
|
||||
locations = {}
|
||||
# the raw bytes of the reconstructed import structures.
|
||||
# it will have the following layout:
|
||||
# 1. DLL name strings and Hint/Name table entries
|
||||
# 2. Import Lookup Tables (points into (1))
|
||||
# 3. Import Directory Tables (points into (1), (2), and original Thunk Tables)
|
||||
reconstruction = io.BytesIO()
|
||||
|
||||
# list of dll names
|
||||
dlls = list(sorted(set(map(lambda pair: pair[0], imports.values()))))
|
||||
# mapping from dll name to list of symbols
|
||||
symbols = collections.defaultdict(set)
|
||||
for dll, sym in imports.values():
|
||||
symbols[dll].add(sym)
|
||||
|
||||
# emit strings into the reconstruction blob
|
||||
for dll in dlls:
|
||||
locations[("dll", dll)] = reconstruction_target + reconstruction.tell()
|
||||
reconstruction.write(dll.encode("ascii") + b"\x00")
|
||||
if reconstruction.tell() % 2 == 1:
|
||||
# padding
|
||||
reconstruction.write(b"\x00")
|
||||
|
||||
for sym in sorted(symbols[dll]):
|
||||
locations[("hint", dll, sym)] = reconstruction_target + reconstruction.tell()
|
||||
# export name pointer table hint == 0
|
||||
reconstruction.write(b"\x00\x00")
|
||||
# name
|
||||
reconstruction.write(sym.encode("ascii") + b"\x00")
|
||||
if reconstruction.tell() % 2 == 1:
|
||||
# padding
|
||||
reconstruction.write(b"\x00")
|
||||
|
||||
# emit Import Lookup Tables for each recovered thunk table
|
||||
ptr_format = "<I" if self.emu.ptr_size == 4 else "<Q"
|
||||
for i, idt_entry in enumerate(idt_tables):
|
||||
locations[("import lookup table", i)] = reconstruction_target + reconstruction.tell()
|
||||
for (va, imp, dll, sym) in idt_entry:
|
||||
reconstruction.write(struct.pack(ptr_format, locations[("hint", dll, sym)]))
|
||||
reconstruction.write(b"\x00" * 8)
|
||||
|
||||
# emit Import Descriptor Tables for each recovered thunk table
|
||||
IDT_ENTRY_SIZE = 0x20
|
||||
for i, idt_entry in enumerate(idt_tables):
|
||||
va, _, dll, _ = idt_entry[0]
|
||||
rva = va - self.module.base
|
||||
locations[("import descriptor table", i)] = reconstruction_target + reconstruction.tell()
|
||||
|
||||
# import lookup table rva
|
||||
reconstruction.write(struct.pack("<I", locations[("import lookup table", i)]))
|
||||
# date stamp
|
||||
reconstruction.write(struct.pack("<I", 0x0))
|
||||
# forwarder chain
|
||||
reconstruction.write(struct.pack("<I", 0x0))
|
||||
# name rva
|
||||
reconstruction.write(struct.pack("<I", locations[("dll", dll)]))
|
||||
# import address table rva
|
||||
reconstruction.write(struct.pack("<I", rva))
|
||||
# empty last entry
|
||||
reconstruction.write(b"\x00" * IDT_ENTRY_SIZE)
|
||||
|
||||
# if the reconstructed import structures are larger than the unpacking stub...
|
||||
# i'm not sure what we'll do. probably need to add a section.
|
||||
assert len(reconstruction.getvalue()) <= pefile_get_section_by_name(pe, ".aspack").Misc_VirtualSize
|
||||
|
||||
pe.set_bytes_at_rva(reconstruction_target, reconstruction.getvalue())
|
||||
pe.OPTIONAL_HEADER.DATA_DIRECTORY[1].VirtualAddress = locations[("import descriptor table", 0)]
|
||||
pe.OPTIONAL_HEADER.DATA_DIRECTORY[1].Size = IDT_ENTRY_SIZE * len(idt_tables)
|
||||
|
||||
return pe.write()
|
||||
|
||||
def unpack(self):
|
||||
buf, oep = self.dump()
|
||||
buf = self.fixup(buf, oep)
|
||||
return buf
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
input = sys.argv[1]
|
||||
output = sys.argv[1]
|
||||
|
||||
with open(sys.argv[1], "rb") as f:
|
||||
buf = f.read()
|
||||
|
||||
with open(sys.argv[2], "wb") as f:
|
||||
f.write(AspackUnpacker.unpack_pe(buf))
|
||||
2
rules
2
rules
Submodule rules updated: 8994d9c9f1...7ae5ae215f
@@ -1,112 +1,112 @@
|
||||
"""
|
||||
Binary Ninja plugin that imports a capa report,
|
||||
produced via `capa --json /path/to/sample`,
|
||||
into the current database.
|
||||
|
||||
It will mark up functions with their capa matches, like:
|
||||
|
||||
; capa: print debug messages (host-interaction/log/debug/write-event)
|
||||
; capa: delete service (host-interaction/service/delete)
|
||||
; Attributes: bp-based frame
|
||||
|
||||
public UninstallService
|
||||
UninstallService proc near
|
||||
...
|
||||
|
||||
To use, invoke from the Binary Ninja Tools menu, or from the
|
||||
command-palette.
|
||||
|
||||
Adapted for Binary Ninja by @psifertex
|
||||
|
||||
This script will verify that the report matches the workspace.
|
||||
Check the log window for any errors, and/or the summary of changes.
|
||||
|
||||
Derived from: https://github.com/fireeye/capa/blob/master/scripts/import-to-ida.py
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
|
||||
from binaryninja import *
|
||||
|
||||
|
||||
def append_func_cmt(bv, va, cmt):
|
||||
"""
|
||||
add the given comment to the given function,
|
||||
if it doesn't already exist.
|
||||
"""
|
||||
func = bv.get_function_at(va)
|
||||
if not func:
|
||||
raise ValueError("not a function")
|
||||
|
||||
if cmt in func.comment:
|
||||
return
|
||||
|
||||
func.comment = func.comment + "\n" + cmt
|
||||
|
||||
|
||||
def load_analysis(bv):
|
||||
shortname = os.path.splitext(os.path.basename(bv.file.filename))[0]
|
||||
dirname = os.path.dirname(bv.file.filename)
|
||||
log_info(f"dirname: {dirname}\nshortname: {shortname}\n")
|
||||
if os.access(os.path.join(dirname, shortname + ".js"), os.R_OK):
|
||||
path = os.path.join(dirname, shortname + ".js")
|
||||
elif os.access(os.path.join(dirname, shortname + ".json"), os.R_OK):
|
||||
path = os.path.join(dirname, shortname + ".json")
|
||||
else:
|
||||
path = interaction.get_open_filename_input("capa report:", "JSON (*.js *.json);;All Files (*)")
|
||||
if not path or not os.access(path, os.R_OK):
|
||||
log_error("Invalid filename.")
|
||||
return 0
|
||||
log_info("Using capa file %s" % path)
|
||||
|
||||
with open(path, "rb") as f:
|
||||
doc = json.loads(f.read().decode("utf-8"))
|
||||
|
||||
if "meta" not in doc or "rules" not in doc:
|
||||
log_error("doesn't appear to be a capa report")
|
||||
return -1
|
||||
|
||||
a = doc["meta"]["sample"]["md5"].lower()
|
||||
md5 = Transform["MD5"]
|
||||
rawhex = Transform["RawHex"]
|
||||
b = rawhex.encode(md5.encode(bv.parent_view.read(bv.parent_view.start, bv.parent_view.end))).decode("utf-8")
|
||||
if not a == b:
|
||||
log_error("sample mismatch")
|
||||
return -2
|
||||
|
||||
rows = []
|
||||
for rule in doc["rules"].values():
|
||||
if rule["meta"].get("lib"):
|
||||
continue
|
||||
if rule["meta"].get("capa/subscope"):
|
||||
continue
|
||||
if rule["meta"]["scope"] != "function":
|
||||
continue
|
||||
|
||||
name = rule["meta"]["name"]
|
||||
ns = rule["meta"].get("namespace", "")
|
||||
for va in rule["matches"].keys():
|
||||
va = int(va)
|
||||
rows.append((ns, name, va))
|
||||
|
||||
# order by (namespace, name) so that like things show up together
|
||||
rows = sorted(rows)
|
||||
for ns, name, va in rows:
|
||||
if ns:
|
||||
cmt = "%s (%s)" % (name, ns)
|
||||
else:
|
||||
cmt = "%s" % (name,)
|
||||
|
||||
log_info("0x%x: %s" % (va, cmt))
|
||||
try:
|
||||
# message will look something like:
|
||||
#
|
||||
# capa: delete service (host-interaction/service/delete)
|
||||
append_func_cmt(bv, va, "capa: " + cmt)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
log_info("ok")
|
||||
|
||||
|
||||
PluginCommand.register("Load capa file", "Loads an analysis file from capa", load_analysis)
|
||||
"""
|
||||
Binary Ninja plugin that imports a capa report,
|
||||
produced via `capa --json /path/to/sample`,
|
||||
into the current database.
|
||||
|
||||
It will mark up functions with their capa matches, like:
|
||||
|
||||
; capa: print debug messages (host-interaction/log/debug/write-event)
|
||||
; capa: delete service (host-interaction/service/delete)
|
||||
; Attributes: bp-based frame
|
||||
|
||||
public UninstallService
|
||||
UninstallService proc near
|
||||
...
|
||||
|
||||
To use, invoke from the Binary Ninja Tools menu, or from the
|
||||
command-palette.
|
||||
|
||||
Adapted for Binary Ninja by @psifertex
|
||||
|
||||
This script will verify that the report matches the workspace.
|
||||
Check the log window for any errors, and/or the summary of changes.
|
||||
|
||||
Derived from: https://github.com/fireeye/capa/blob/master/scripts/import-to-ida.py
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
|
||||
from binaryninja import *
|
||||
|
||||
|
||||
def append_func_cmt(bv, va, cmt):
|
||||
"""
|
||||
add the given comment to the given function,
|
||||
if it doesn't already exist.
|
||||
"""
|
||||
func = bv.get_function_at(va)
|
||||
if not func:
|
||||
raise ValueError("not a function")
|
||||
|
||||
if cmt in func.comment:
|
||||
return
|
||||
|
||||
func.comment = func.comment + "\n" + cmt
|
||||
|
||||
|
||||
def load_analysis(bv):
|
||||
shortname = os.path.splitext(os.path.basename(bv.file.filename))[0]
|
||||
dirname = os.path.dirname(bv.file.filename)
|
||||
log_info(f"dirname: {dirname}\nshortname: {shortname}\n")
|
||||
if os.access(os.path.join(dirname, shortname + ".js"), os.R_OK):
|
||||
path = os.path.join(dirname, shortname + ".js")
|
||||
elif os.access(os.path.join(dirname, shortname + ".json"), os.R_OK):
|
||||
path = os.path.join(dirname, shortname + ".json")
|
||||
else:
|
||||
path = interaction.get_open_filename_input("capa report:", "JSON (*.js *.json);;All Files (*)")
|
||||
if not path or not os.access(path, os.R_OK):
|
||||
log_error("Invalid filename.")
|
||||
return 0
|
||||
log_info("Using capa file %s" % path)
|
||||
|
||||
with open(path, "rb") as f:
|
||||
doc = json.loads(f.read().decode("utf-8"))
|
||||
|
||||
if "meta" not in doc or "rules" not in doc:
|
||||
log_error("doesn't appear to be a capa report")
|
||||
return -1
|
||||
|
||||
a = doc["meta"]["sample"]["md5"].lower()
|
||||
md5 = Transform["MD5"]
|
||||
rawhex = Transform["RawHex"]
|
||||
b = rawhex.encode(md5.encode(bv.parent_view.read(bv.parent_view.start, bv.parent_view.end))).decode("utf-8")
|
||||
if not a == b:
|
||||
log_error("sample mismatch")
|
||||
return -2
|
||||
|
||||
rows = []
|
||||
for rule in doc["rules"].values():
|
||||
if rule["meta"].get("lib"):
|
||||
continue
|
||||
if rule["meta"].get("capa/subscope"):
|
||||
continue
|
||||
if rule["meta"]["scope"] != "function":
|
||||
continue
|
||||
|
||||
name = rule["meta"]["name"]
|
||||
ns = rule["meta"].get("namespace", "")
|
||||
for va in rule["matches"].keys():
|
||||
va = int(va)
|
||||
rows.append((ns, name, va))
|
||||
|
||||
# order by (namespace, name) so that like things show up together
|
||||
rows = sorted(rows)
|
||||
for ns, name, va in rows:
|
||||
if ns:
|
||||
cmt = "%s (%s)" % (name, ns)
|
||||
else:
|
||||
cmt = "%s" % (name,)
|
||||
|
||||
log_info("0x%x: %s" % (va, cmt))
|
||||
try:
|
||||
# message will look something like:
|
||||
#
|
||||
# capa: delete service (host-interaction/service/delete)
|
||||
append_func_cmt(bv, va, "capa: " + cmt)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
log_info("ok")
|
||||
|
||||
|
||||
PluginCommand.register("Load capa file", "Loads an analysis file from capa", load_analysis)
|
||||
|
||||
@@ -1,117 +1,117 @@
|
||||
"""
|
||||
IDA Pro script that imports a capa report,
|
||||
produced via `capa --json /path/to/sample`,
|
||||
into the current database.
|
||||
|
||||
It will mark up functions with their capa matches, like:
|
||||
|
||||
; capa: print debug messages (host-interaction/log/debug/write-event)
|
||||
; capa: delete service (host-interaction/service/delete)
|
||||
; Attributes: bp-based frame
|
||||
|
||||
public UninstallService
|
||||
UninstallService proc near
|
||||
...
|
||||
|
||||
To use, invoke from the IDA Pro scripting dialog,
|
||||
such as via Alt-F9,
|
||||
and then select the existing capa report from the file system.
|
||||
|
||||
This script will verify that the report matches the workspace.
|
||||
Check the output window for any errors, and/or the summary of changes.
|
||||
|
||||
Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and limitations under the License.
|
||||
"""
|
||||
import json
|
||||
import logging
|
||||
|
||||
import idc
|
||||
import idautils
|
||||
import ida_idaapi
|
||||
import ida_kernwin
|
||||
|
||||
logger = logging.getLogger("capa")
|
||||
|
||||
|
||||
def append_func_cmt(va, cmt, repeatable=False):
|
||||
"""
|
||||
add the given comment to the given function,
|
||||
if it doesn't already exist.
|
||||
"""
|
||||
func = ida_funcs.get_func(va)
|
||||
if not func:
|
||||
raise ValueError("not a function")
|
||||
|
||||
existing = ida_funcs.get_func_cmt(func, repeatable) or ""
|
||||
if cmt in existing:
|
||||
return
|
||||
|
||||
new = existing + "\n" + cmt
|
||||
ida_funcs.set_func_cmt(func, new, repeatable)
|
||||
|
||||
|
||||
def main():
|
||||
path = ida_kernwin.ask_file(False, "*", "capa report")
|
||||
if not path:
|
||||
return 0
|
||||
|
||||
with open(path, "rb") as f:
|
||||
doc = json.loads(f.read().decode("utf-8"))
|
||||
|
||||
if "meta" not in doc or "rules" not in doc:
|
||||
logger.error("doesn't appear to be a capa report")
|
||||
return -1
|
||||
|
||||
# in IDA 7.4, the MD5 hash may be truncated, for example:
|
||||
# wanted: 84882c9d43e23d63b82004fae74ebb61
|
||||
# found: b'84882C9D43E23D63B82004FAE74EBB6\x00'
|
||||
#
|
||||
# see: https://github.com/idapython/bin/issues/11
|
||||
a = doc["meta"]["sample"]["md5"].lower()
|
||||
b = idautils.GetInputFileMD5().decode("ascii").lower().rstrip("\x00")
|
||||
if not a.startswith(b):
|
||||
logger.error("sample mismatch")
|
||||
return -2
|
||||
|
||||
rows = []
|
||||
for rule in doc["rules"].values():
|
||||
if rule["meta"].get("lib"):
|
||||
continue
|
||||
if rule["meta"].get("capa/subscope"):
|
||||
continue
|
||||
if rule["meta"]["scope"] != "function":
|
||||
continue
|
||||
|
||||
name = rule["meta"]["name"]
|
||||
ns = rule["meta"].get("namespace", "")
|
||||
for va in rule["matches"].keys():
|
||||
va = int(va)
|
||||
rows.append((ns, name, va))
|
||||
|
||||
# order by (namespace, name) so that like things show up together
|
||||
rows = sorted(rows)
|
||||
for ns, name, va in rows:
|
||||
if ns:
|
||||
cmt = "%s (%s)" % (name, ns)
|
||||
else:
|
||||
cmt = "%s" % (name,)
|
||||
|
||||
logger.info("0x%x: %s", va, cmt)
|
||||
try:
|
||||
# message will look something like:
|
||||
#
|
||||
# capa: delete service (host-interaction/service/delete)
|
||||
append_func_cmt(va, "capa: " + cmt, repeatable=False)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
logger.info("ok")
|
||||
|
||||
|
||||
main()
|
||||
"""
|
||||
IDA Pro script that imports a capa report,
|
||||
produced via `capa --json /path/to/sample`,
|
||||
into the current database.
|
||||
|
||||
It will mark up functions with their capa matches, like:
|
||||
|
||||
; capa: print debug messages (host-interaction/log/debug/write-event)
|
||||
; capa: delete service (host-interaction/service/delete)
|
||||
; Attributes: bp-based frame
|
||||
|
||||
public UninstallService
|
||||
UninstallService proc near
|
||||
...
|
||||
|
||||
To use, invoke from the IDA Pro scripting dialog,
|
||||
such as via Alt-F9,
|
||||
and then select the existing capa report from the file system.
|
||||
|
||||
This script will verify that the report matches the workspace.
|
||||
Check the output window for any errors, and/or the summary of changes.
|
||||
|
||||
Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and limitations under the License.
|
||||
"""
|
||||
import json
|
||||
import logging
|
||||
|
||||
import idc
|
||||
import idautils
|
||||
import ida_idaapi
|
||||
import ida_kernwin
|
||||
|
||||
logger = logging.getLogger("capa")
|
||||
|
||||
|
||||
def append_func_cmt(va, cmt, repeatable=False):
|
||||
"""
|
||||
add the given comment to the given function,
|
||||
if it doesn't already exist.
|
||||
"""
|
||||
func = ida_funcs.get_func(va)
|
||||
if not func:
|
||||
raise ValueError("not a function")
|
||||
|
||||
existing = ida_funcs.get_func_cmt(func, repeatable) or ""
|
||||
if cmt in existing:
|
||||
return
|
||||
|
||||
new = existing + "\n" + cmt
|
||||
ida_funcs.set_func_cmt(func, new, repeatable)
|
||||
|
||||
|
||||
def main():
|
||||
path = ida_kernwin.ask_file(False, "*", "capa report")
|
||||
if not path:
|
||||
return 0
|
||||
|
||||
with open(path, "rb") as f:
|
||||
doc = json.loads(f.read().decode("utf-8"))
|
||||
|
||||
if "meta" not in doc or "rules" not in doc:
|
||||
logger.error("doesn't appear to be a capa report")
|
||||
return -1
|
||||
|
||||
# in IDA 7.4, the MD5 hash may be truncated, for example:
|
||||
# wanted: 84882c9d43e23d63b82004fae74ebb61
|
||||
# found: b'84882C9D43E23D63B82004FAE74EBB6\x00'
|
||||
#
|
||||
# see: https://github.com/idapython/bin/issues/11
|
||||
a = doc["meta"]["sample"]["md5"].lower()
|
||||
b = idautils.GetInputFileMD5().decode("ascii").lower().rstrip("\x00")
|
||||
if not a.startswith(b):
|
||||
logger.error("sample mismatch")
|
||||
return -2
|
||||
|
||||
rows = []
|
||||
for rule in doc["rules"].values():
|
||||
if rule["meta"].get("lib"):
|
||||
continue
|
||||
if rule["meta"].get("capa/subscope"):
|
||||
continue
|
||||
if rule["meta"]["scope"] != "function":
|
||||
continue
|
||||
|
||||
name = rule["meta"]["name"]
|
||||
ns = rule["meta"].get("namespace", "")
|
||||
for va in rule["matches"].keys():
|
||||
va = int(va)
|
||||
rows.append((ns, name, va))
|
||||
|
||||
# order by (namespace, name) so that like things show up together
|
||||
rows = sorted(rows)
|
||||
for ns, name, va in rows:
|
||||
if ns:
|
||||
cmt = "%s (%s)" % (name, ns)
|
||||
else:
|
||||
cmt = "%s" % (name,)
|
||||
|
||||
logger.info("0x%x: %s", va, cmt)
|
||||
try:
|
||||
# message will look something like:
|
||||
#
|
||||
# capa: delete service (host-interaction/service/delete)
|
||||
append_func_cmt(va, "capa: " + cmt, repeatable=False)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
logger.info("ok")
|
||||
|
||||
|
||||
main()
|
||||
|
||||
7
setup.py
7
setup.py
@@ -11,17 +11,20 @@ import sys
|
||||
|
||||
import setuptools
|
||||
|
||||
requirements = ["six", "tqdm", "pyyaml", "tabulate", "colorama", "termcolor", "ruamel.yaml", "wcwidth"]
|
||||
# halo==0.0.30 is the last version to support py2.7
|
||||
requirements = ["six", "tqdm", "pyyaml", "tabulate", "colorama", "termcolor", "ruamel.yaml", "wcwidth", "halo==0.0.30"]
|
||||
|
||||
if sys.version_info >= (3, 0):
|
||||
# py3
|
||||
requirements.append("networkx")
|
||||
requirements.append("pylancelot~=0.3.6")
|
||||
else:
|
||||
# py2
|
||||
requirements.append("enum34")
|
||||
requirements.append("vivisect @ https://github.com/williballenthin/vivisect/tarball/v0.0.20200804#egg=vivisect")
|
||||
requirements.append("viv-utils")
|
||||
requirements.append("networkx==2.2") # v2.2 is last version supported by Python 2.7
|
||||
requirements.append("backports.functools-lru-cache")
|
||||
|
||||
# this sets __version__
|
||||
# via: http://stackoverflow.com/a/7071358/87207
|
||||
@@ -52,7 +55,7 @@ setuptools.setup(
|
||||
"pycodestyle",
|
||||
"black ; python_version>'3.0'",
|
||||
"isort",
|
||||
]
|
||||
],
|
||||
},
|
||||
zip_safe=False,
|
||||
keywords="capa",
|
||||
|
||||
Submodule tests/data updated: 23b302f2de...aeb505b914
@@ -7,79 +7,507 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import os
|
||||
import sys
|
||||
import os.path
|
||||
import contextlib
|
||||
import collections
|
||||
|
||||
import pytest
|
||||
import viv_utils
|
||||
|
||||
import capa.main
|
||||
import capa.features.file
|
||||
import capa.features.insn
|
||||
import capa.features.basicblock
|
||||
from capa.features import ARCH_X32, ARCH_X64
|
||||
|
||||
try:
|
||||
from functools import lru_cache
|
||||
except ImportError:
|
||||
from backports.functools_lru_cache import lru_cache
|
||||
|
||||
|
||||
CD = os.path.dirname(__file__)
|
||||
|
||||
|
||||
Sample = collections.namedtuple("Sample", ["vw", "path"])
|
||||
@contextlib.contextmanager
|
||||
def xfail(condition, reason=None):
|
||||
"""
|
||||
context manager that wraps a block that is expected to fail in some cases.
|
||||
when it does fail (and is expected), then mark this as pytest.xfail.
|
||||
if its unexpected, raise an exception, so the test fails.
|
||||
|
||||
example::
|
||||
|
||||
# this test:
|
||||
# - passes on py3 if foo() works
|
||||
# - fails on py3 if foo() fails
|
||||
# - xfails on py2 if foo() fails
|
||||
# - fails on py2 if foo() works
|
||||
with xfail(sys.version_info < (3, 0), reason="py2 doesn't foo"):
|
||||
foo()
|
||||
"""
|
||||
try:
|
||||
# do the block
|
||||
yield
|
||||
except:
|
||||
if condition:
|
||||
# we expected the test to fail, so raise and register this via pytest
|
||||
pytest.xfail(reason)
|
||||
else:
|
||||
# we don't expect an exception, so the test should fail
|
||||
raise
|
||||
else:
|
||||
if not condition:
|
||||
# here we expect the block to run successfully,
|
||||
# and we've received no exception,
|
||||
# so this is good
|
||||
pass
|
||||
else:
|
||||
# we expected an exception, but didn't find one. that's an error.
|
||||
raise RuntimeError("expected to fail, but didn't")
|
||||
|
||||
|
||||
@lru_cache()
|
||||
def get_viv_extractor(path):
|
||||
import capa.features.extractors.viv
|
||||
|
||||
if "raw32" in path:
|
||||
vw = capa.main.get_workspace(path, "sc32", should_save=False)
|
||||
elif "raw64" in path:
|
||||
vw = capa.main.get_workspace(path, "sc64", should_save=False)
|
||||
else:
|
||||
vw = capa.main.get_workspace(path, "auto", should_save=True)
|
||||
return capa.features.extractors.viv.VivisectFeatureExtractor(vw, path)
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_lancelot_extractor(path):
|
||||
import capa.features.extractors.lancelot
|
||||
|
||||
with open(path, "rb") as f:
|
||||
buf = f.read()
|
||||
|
||||
return capa.features.extractors.lancelot.LancelotFeatureExtractor(buf)
|
||||
|
||||
|
||||
@lru_cache()
|
||||
def extract_file_features(extractor):
|
||||
features = collections.defaultdict(set)
|
||||
for feature, va in extractor.extract_file_features():
|
||||
features[feature].add(va)
|
||||
return features
|
||||
|
||||
|
||||
# f may not be hashable (e.g. ida func_t) so cannot @lru_cache this
|
||||
def extract_function_features(extractor, f):
|
||||
features = collections.defaultdict(set)
|
||||
for bb in extractor.get_basic_blocks(f):
|
||||
for insn in extractor.get_instructions(f, bb):
|
||||
for feature, va in extractor.extract_insn_features(f, bb, insn):
|
||||
features[feature].add(va)
|
||||
for feature, va in extractor.extract_basic_block_features(f, bb):
|
||||
features[feature].add(va)
|
||||
for feature, va in extractor.extract_function_features(f):
|
||||
features[feature].add(va)
|
||||
return features
|
||||
|
||||
|
||||
# f may not be hashable (e.g. ida func_t) so cannot @lru_cache this
|
||||
def extract_basic_block_features(extractor, f, bb):
|
||||
features = collections.defaultdict(set)
|
||||
for insn in extractor.get_instructions(f, bb):
|
||||
for feature, va in extractor.extract_insn_features(f, bb, insn):
|
||||
features[feature].add(va)
|
||||
for feature, va in extractor.extract_basic_block_features(f, bb):
|
||||
features[feature].add(va)
|
||||
return features
|
||||
|
||||
|
||||
def get_data_path_by_name(name):
|
||||
if name == "mimikatz":
|
||||
return os.path.join(CD, "data", "mimikatz.exe_")
|
||||
elif name == "kernel32":
|
||||
return os.path.join(CD, "data", "kernel32.dll_")
|
||||
elif name == "kernel32-64":
|
||||
return os.path.join(CD, "data", "kernel32-64.dll_")
|
||||
elif name == "pma12-04":
|
||||
return os.path.join(CD, "data", "Practical Malware Analysis Lab 12-04.exe_")
|
||||
elif name == "pma21-01":
|
||||
return os.path.join(CD, "data", "Practical Malware Analysis Lab 21-01.exe_")
|
||||
elif name == "al-khaser x86":
|
||||
return os.path.join(CD, "data", "al-khaser_x86.exe_")
|
||||
elif name.startswith("39c05"):
|
||||
return os.path.join(CD, "data", "39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.dll_")
|
||||
elif name.startswith("499c2"):
|
||||
return os.path.join(CD, "data", "499c2a85f6e8142c3f48d4251c9c7cd6.raw32")
|
||||
elif name.startswith("9324d"):
|
||||
return os.path.join(CD, "data", "9324d1a8ae37a36ae560c37448c9705a.exe_")
|
||||
elif name.startswith("a1982"):
|
||||
return os.path.join(CD, "data", "a198216798ca38f280dc413f8c57f2c2.exe_")
|
||||
elif name.startswith("a933a"):
|
||||
return os.path.join(CD, "data", "a933a1a402775cfa94b6bee0963f4b46.dll_")
|
||||
elif name.startswith("bfb9b"):
|
||||
return os.path.join(CD, "data", "bfb9b5391a13d0afd787e87ab90f14f5.dll_")
|
||||
elif name.startswith("c9188"):
|
||||
return os.path.join(CD, "data", "c91887d861d9bd4a5872249b641bc9f9.exe_")
|
||||
elif name == "aspack":
|
||||
return os.path.join(CD, "data", "2055994ff75b4309eee3a49c5749d306")
|
||||
else:
|
||||
raise ValueError("unexpected sample fixture")
|
||||
|
||||
|
||||
def get_sample_md5_by_name(name):
|
||||
"""used by IDA tests to ensure the correct IDB is loaded"""
|
||||
if name == "mimikatz":
|
||||
return "5f66b82558ca92e54e77f216ef4c066c"
|
||||
elif name == "kernel32":
|
||||
return "e80758cf485db142fca1ee03a34ead05"
|
||||
elif name == "kernel32-64":
|
||||
return "a8565440629ac87f6fef7d588fe3ff0f"
|
||||
elif name == "pma12-04":
|
||||
return "56bed8249e7c2982a90e54e1e55391a2"
|
||||
elif name == "pma21-01":
|
||||
return "c8403fb05244e23a7931c766409b5e22"
|
||||
elif name == "al-khaser x86":
|
||||
return "db648cd247281954344f1d810c6fd590"
|
||||
elif name.startswith("39c05"):
|
||||
return "b7841b9d5dc1f511a93cc7576672ec0c"
|
||||
elif name.startswith("499c2"):
|
||||
return "499c2a85f6e8142c3f48d4251c9c7cd6"
|
||||
elif name.startswith("9324d"):
|
||||
return "9324d1a8ae37a36ae560c37448c9705a"
|
||||
elif name.startswith("a1982"):
|
||||
return "a198216798ca38f280dc413f8c57f2c2"
|
||||
elif name.startswith("a933a"):
|
||||
return "a933a1a402775cfa94b6bee0963f4b46"
|
||||
elif name.startswith("bfb9b"):
|
||||
return "bfb9b5391a13d0afd787e87ab90f14f5"
|
||||
elif name.startswith("c9188"):
|
||||
return "c91887d861d9bd4a5872249b641bc9f9"
|
||||
else:
|
||||
raise ValueError("unexpected sample fixture")
|
||||
|
||||
|
||||
def resolve_sample(sample):
|
||||
return get_data_path_by_name(sample)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mimikatz():
|
||||
path = os.path.join(CD, "data", "mimikatz.exe_")
|
||||
return Sample(viv_utils.getWorkspace(path), path)
|
||||
def sample(request):
|
||||
return resolve_sample(request.param)
|
||||
|
||||
|
||||
def get_function(extractor, fva):
|
||||
for f in extractor.get_functions():
|
||||
if f.__int__() == fva:
|
||||
return f
|
||||
raise ValueError("function not found")
|
||||
|
||||
|
||||
def get_basic_block(extractor, f, va):
|
||||
for bb in extractor.get_basic_blocks(f):
|
||||
if bb.__int__() == va:
|
||||
return bb
|
||||
raise ValueError("basic block not found")
|
||||
|
||||
|
||||
def resolve_scope(scope):
|
||||
if scope == "file":
|
||||
|
||||
def inner(extractor):
|
||||
return extract_file_features(extractor)
|
||||
|
||||
inner.__name__ = scope
|
||||
return inner
|
||||
elif "bb=" in scope:
|
||||
# like `function=0x401000,bb=0x40100A`
|
||||
fspec, _, bbspec = scope.partition(",")
|
||||
fva = int(fspec.partition("=")[2], 0x10)
|
||||
bbva = int(bbspec.partition("=")[2], 0x10)
|
||||
|
||||
def inner(extractor):
|
||||
f = get_function(extractor, fva)
|
||||
bb = get_basic_block(extractor, f, bbva)
|
||||
return extract_basic_block_features(extractor, f, bb)
|
||||
|
||||
inner.__name__ = scope
|
||||
return inner
|
||||
elif scope.startswith("function"):
|
||||
# like `function=0x401000`
|
||||
va = int(scope.partition("=")[2], 0x10)
|
||||
|
||||
def inner(extractor):
|
||||
f = get_function(extractor, va)
|
||||
return extract_function_features(extractor, f)
|
||||
|
||||
inner.__name__ = scope
|
||||
return inner
|
||||
else:
|
||||
raise ValueError("unexpected scope fixture")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_a933a1a402775cfa94b6bee0963f4b46():
|
||||
path = os.path.join(CD, "data", "a933a1a402775cfa94b6bee0963f4b46.dll_")
|
||||
return Sample(viv_utils.getWorkspace(path), path)
|
||||
def scope(request):
|
||||
return resolve_scope(request.param)
|
||||
|
||||
|
||||
def make_test_id(values):
|
||||
return "-".join(map(str, values))
|
||||
|
||||
|
||||
def parametrize(params, values, **kwargs):
|
||||
"""
|
||||
extend `pytest.mark.parametrize` to pretty-print features.
|
||||
by default, it renders objects as an opaque value.
|
||||
ref: https://docs.pytest.org/en/2.9.0/example/parametrize.html#different-options-for-test-ids
|
||||
rendered ID might look something like:
|
||||
mimikatz-function=0x403BAC-api(CryptDestroyKey)-True
|
||||
"""
|
||||
ids = list(map(make_test_id, values))
|
||||
return pytest.mark.parametrize(params, values, ids=ids, **kwargs)
|
||||
|
||||
|
||||
FEATURE_PRESENCE_TESTS = [
|
||||
# file/characteristic("embedded pe")
|
||||
("pma12-04", "file", capa.features.Characteristic("embedded pe"), True),
|
||||
# file/string
|
||||
("mimikatz", "file", capa.features.String("SCardControl"), True),
|
||||
("mimikatz", "file", capa.features.String("SCardTransmit"), True),
|
||||
("mimikatz", "file", capa.features.String("ACR > "), True),
|
||||
("mimikatz", "file", capa.features.String("nope"), False),
|
||||
# file/sections
|
||||
("mimikatz", "file", capa.features.file.Section(".text"), True),
|
||||
("mimikatz", "file", capa.features.file.Section(".nope"), False),
|
||||
# IDA doesn't extract unmapped sections by default
|
||||
# ("mimikatz", "file", capa.features.file.Section(".rsrc"), True),
|
||||
# file/exports
|
||||
("kernel32", "file", capa.features.file.Export("BaseThreadInitThunk"), True),
|
||||
("kernel32", "file", capa.features.file.Export("lstrlenW"), True),
|
||||
("kernel32", "file", capa.features.file.Export("nope"), False),
|
||||
# file/imports
|
||||
("mimikatz", "file", capa.features.file.Import("advapi32.CryptSetHashParam"), True),
|
||||
("mimikatz", "file", capa.features.file.Import("CryptSetHashParam"), True),
|
||||
("mimikatz", "file", capa.features.file.Import("kernel32.IsWow64Process"), True),
|
||||
("mimikatz", "file", capa.features.file.Import("msvcrt.exit"), True),
|
||||
("mimikatz", "file", capa.features.file.Import("cabinet.#11"), True),
|
||||
("mimikatz", "file", capa.features.file.Import("#11"), False),
|
||||
("mimikatz", "file", capa.features.file.Import("#nope"), False),
|
||||
("mimikatz", "file", capa.features.file.Import("nope"), False),
|
||||
# function/characteristic(loop)
|
||||
("mimikatz", "function=0x401517", capa.features.Characteristic("loop"), True),
|
||||
("mimikatz", "function=0x401000", capa.features.Characteristic("loop"), False),
|
||||
# bb/characteristic(tight loop)
|
||||
("mimikatz", "function=0x402EC4", capa.features.Characteristic("tight loop"), True),
|
||||
("mimikatz", "function=0x401000", capa.features.Characteristic("tight loop"), False),
|
||||
# bb/characteristic(stack string)
|
||||
("mimikatz", "function=0x4556E5", capa.features.Characteristic("stack string"), True),
|
||||
("mimikatz", "function=0x401000", capa.features.Characteristic("stack string"), False),
|
||||
# bb/characteristic(tight loop)
|
||||
("mimikatz", "function=0x402EC4,bb=0x402F8E", capa.features.Characteristic("tight loop"), True),
|
||||
("mimikatz", "function=0x401000,bb=0x401000", capa.features.Characteristic("tight loop"), False),
|
||||
# insn/mnemonic
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Mnemonic("push"), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Mnemonic("movzx"), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Mnemonic("xor"), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Mnemonic("in"), False),
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Mnemonic("out"), False),
|
||||
# insn/number
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Number(0x3136B0), True),
|
||||
# insn/number: stack adjustments
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Number(0xC), False),
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Number(0x10), False),
|
||||
# insn/number: arch flavors
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, arch=ARCH_X32), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, arch=ARCH_X64), False),
|
||||
# insn/offset
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x4), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0xC), True),
|
||||
# insn/offset: stack references
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x8), False),
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x10), False),
|
||||
# insn/offset: negative
|
||||
("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x1), True),
|
||||
("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x2), True),
|
||||
# insn/offset: arch flavors
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, arch=ARCH_X32), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, arch=ARCH_X64), False),
|
||||
# insn/api
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContextW"), True),
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContext"), True),
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptGenKey"), True),
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptImportKey"), True),
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptDestroyKey"), True),
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptAcquireContextW"), True),
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptAcquireContext"), True),
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptGenKey"), True),
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptImportKey"), True),
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptDestroyKey"), True),
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("Nope"), False),
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.Nope"), False),
|
||||
# insn/api: thunk
|
||||
("mimikatz", "function=0x4556E5", capa.features.insn.API("advapi32.LsaQueryInformationPolicy"), True),
|
||||
("mimikatz", "function=0x4556E5", capa.features.insn.API("LsaQueryInformationPolicy"), True),
|
||||
# insn/api: x64
|
||||
("kernel32-64", "function=0x180001010", capa.features.insn.API("RtlVirtualUnwind"), True,),
|
||||
("kernel32-64", "function=0x180001010", capa.features.insn.API("RtlVirtualUnwind"), True),
|
||||
# insn/api: x64 thunk
|
||||
("kernel32-64", "function=0x1800202B0", capa.features.insn.API("RtlCaptureContext"), True,),
|
||||
("kernel32-64", "function=0x1800202B0", capa.features.insn.API("RtlCaptureContext"), True),
|
||||
# insn/api: resolve indirect calls
|
||||
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CreatePipe"), True),
|
||||
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.SetHandleInformation"), True),
|
||||
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CloseHandle"), True),
|
||||
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.WriteFile"), True),
|
||||
# insn/string
|
||||
("mimikatz", "function=0x40105D", capa.features.String("SCardControl"), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.String("SCardTransmit"), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.String("ACR > "), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.String("nope"), False),
|
||||
# insn/string, pointer to string
|
||||
("mimikatz", "function=0x44EDEF", capa.features.String("INPUTEVENT"), True),
|
||||
# insn/bytes
|
||||
("mimikatz", "function=0x40105D", capa.features.Bytes("SCardControl".encode("utf-16le")), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.Bytes("SCardTransmit".encode("utf-16le")), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.Bytes("ACR > ".encode("utf-16le")), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.Bytes("nope".encode("ascii")), False),
|
||||
# insn/bytes, pointer to bytes
|
||||
("mimikatz", "function=0x44EDEF", capa.features.Bytes("INPUTEVENT".encode("utf-16le")), True),
|
||||
# insn/characteristic(nzxor)
|
||||
("mimikatz", "function=0x410DFC", capa.features.Characteristic("nzxor"), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.Characteristic("nzxor"), False),
|
||||
# insn/characteristic(nzxor): no security cookies
|
||||
("mimikatz", "function=0x46D534", capa.features.Characteristic("nzxor"), False),
|
||||
# insn/characteristic(peb access)
|
||||
("kernel32-64", "function=0x1800017D0", capa.features.Characteristic("peb access"), True),
|
||||
("mimikatz", "function=0x4556E5", capa.features.Characteristic("peb access"), False),
|
||||
# insn/characteristic(gs access)
|
||||
("kernel32-64", "function=0x180001068", capa.features.Characteristic("gs access"), True),
|
||||
("mimikatz", "function=0x4556E5", capa.features.Characteristic("gs access"), False),
|
||||
# insn/characteristic(cross section flow)
|
||||
("a1982...", "function=0x4014D0", capa.features.Characteristic("cross section flow"), True),
|
||||
# insn/characteristic(cross section flow): imports don't count
|
||||
("kernel32-64", "function=0x180001068", capa.features.Characteristic("cross section flow"), False),
|
||||
("mimikatz", "function=0x4556E5", capa.features.Characteristic("cross section flow"), False),
|
||||
# insn/characteristic(recursive call)
|
||||
("39c05...", "function=0x10003100", capa.features.Characteristic("recursive call"), True),
|
||||
("mimikatz", "function=0x4556E5", capa.features.Characteristic("recursive call"), False),
|
||||
# insn/characteristic(indirect call)
|
||||
("mimikatz", "function=0x4175FF", capa.features.Characteristic("indirect call"), True),
|
||||
("mimikatz", "function=0x4556E5", capa.features.Characteristic("indirect call"), False),
|
||||
# insn/characteristic(calls from)
|
||||
("mimikatz", "function=0x4556E5", capa.features.Characteristic("calls from"), True),
|
||||
("mimikatz", "function=0x4702FD", capa.features.Characteristic("calls from"), False),
|
||||
# function/characteristic(calls to)
|
||||
("mimikatz", "function=0x40105D", capa.features.Characteristic("calls to"), True),
|
||||
("mimikatz", "function=0x4556E5", capa.features.Characteristic("calls to"), False),
|
||||
]
|
||||
|
||||
FEATURE_COUNT_TESTS = [
|
||||
("mimikatz", "function=0x40E5C2", capa.features.basicblock.BasicBlock(), 7),
|
||||
("mimikatz", "function=0x4702FD", capa.features.Characteristic("calls from"), 0),
|
||||
("mimikatz", "function=0x40E5C2", capa.features.Characteristic("calls from"), 3),
|
||||
("mimikatz", "function=0x4556E5", capa.features.Characteristic("calls to"), 0),
|
||||
("mimikatz", "function=0x40B1F1", capa.features.Characteristic("calls to"), 3),
|
||||
]
|
||||
|
||||
|
||||
def do_test_feature_presence(get_extractor, sample, scope, feature, expected):
|
||||
extractor = get_extractor(sample)
|
||||
features = scope(extractor)
|
||||
if expected:
|
||||
msg = "%s should be found in %s" % (str(feature), scope.__name__)
|
||||
else:
|
||||
msg = "%s should not be found in %s" % (str(feature), scope.__name__)
|
||||
assert feature.evaluate(features) == expected, msg
|
||||
|
||||
|
||||
def do_test_feature_count(get_extractor, sample, scope, feature, expected):
|
||||
extractor = get_extractor(sample)
|
||||
features = scope(extractor)
|
||||
msg = "%s should be found %d times in %s, found: %d" % (
|
||||
str(feature),
|
||||
expected,
|
||||
scope.__name__,
|
||||
len(features[feature]),
|
||||
)
|
||||
assert len(features[feature]) == expected, msg
|
||||
|
||||
|
||||
def get_extractor(path):
|
||||
if sys.version_info >= (3, 0):
|
||||
extractor = get_lancelot_extractor(path)
|
||||
else:
|
||||
extractor = get_viv_extractor(path)
|
||||
|
||||
# overload the extractor so that the fixture exposes `extractor.path`
|
||||
setattr(extractor, "path", path)
|
||||
return extractor
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def kernel32():
|
||||
path = os.path.join(CD, "data", "kernel32.dll_")
|
||||
return Sample(viv_utils.getWorkspace(path), path)
|
||||
def mimikatz_extractor():
|
||||
return get_extractor(get_data_path_by_name("mimikatz"))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_a198216798ca38f280dc413f8c57f2c2():
|
||||
path = os.path.join(CD, "data", "a198216798ca38f280dc413f8c57f2c2.exe_")
|
||||
return Sample(viv_utils.getWorkspace(path), path)
|
||||
def a933a_extractor():
|
||||
return get_extractor(get_data_path_by_name("a933a..."))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_9324d1a8ae37a36ae560c37448c9705a():
|
||||
path = os.path.join(CD, "data", "9324d1a8ae37a36ae560c37448c9705a.exe_")
|
||||
return Sample(viv_utils.getWorkspace(path), path)
|
||||
def kernel32_extractor():
|
||||
return get_extractor(get_data_path_by_name("kernel32"))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pma_lab_12_04():
|
||||
path = os.path.join(CD, "data", "Practical Malware Analysis Lab 12-04.exe_")
|
||||
return Sample(viv_utils.getWorkspace(path), path)
|
||||
def a1982_extractor():
|
||||
return get_extractor(get_data_path_by_name("a1982..."))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_bfb9b5391a13d0afd787e87ab90f14f5():
|
||||
path = os.path.join(CD, "data", "bfb9b5391a13d0afd787e87ab90f14f5.dll_")
|
||||
return Sample(viv_utils.getWorkspace(path), path)
|
||||
def z9324d_extractor():
|
||||
return get_extractor(get_data_path_by_name("9324d..."))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_lab21_01():
|
||||
path = os.path.join(CD, "data", "Practical Malware Analysis Lab 21-01.exe_")
|
||||
return Sample(viv_utils.getWorkspace(path), path)
|
||||
def pma12_04_extractor():
|
||||
return get_extractor(get_data_path_by_name("pma12-04"))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_c91887d861d9bd4a5872249b641bc9f9():
|
||||
path = os.path.join(CD, "data", "c91887d861d9bd4a5872249b641bc9f9.exe_")
|
||||
return Sample(viv_utils.getWorkspace(path), path)
|
||||
def bfb9b_extractor():
|
||||
return get_extractor(get_data_path_by_name("bfb9b..."))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41():
|
||||
path = os.path.join(CD, "data", "39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.dll_",)
|
||||
return Sample(viv_utils.getWorkspace(path), path)
|
||||
def pma21_01_extractor():
|
||||
return get_extractor(get_data_path_by_name("pma21-01"))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32():
|
||||
path = os.path.join(CD, "data", "499c2a85f6e8142c3f48d4251c9c7cd6.raw32")
|
||||
return Sample(viv_utils.getShellcodeWorkspace(path), path)
|
||||
def c9188_extractor():
|
||||
return get_extractor(get_data_path_by_name("c9188..."))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def z39c05_extractor():
|
||||
return get_extractor(get_data_path_by_name("39c05..."))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def z499c2_extractor():
|
||||
return get_extractor(get_data_path_by_name("499c2..."))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def al_khaser_x86_extractor():
|
||||
return get_extractor(get_data_path_by_name("al-khaser x86"))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def aspack_extractor():
|
||||
return get_extractor(get_data_path_by_name("aspack"))
|
||||
|
||||
@@ -5,9 +5,10 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import sys
|
||||
import textwrap
|
||||
|
||||
import pytest
|
||||
from fixtures import *
|
||||
|
||||
import capa.main
|
||||
@@ -23,7 +24,7 @@ EXTRACTOR = capa.features.extractors.NullFeatureExtractor(
|
||||
"file features": [(0x402345, capa.features.Characteristic("embedded pe")),],
|
||||
"functions": {
|
||||
0x401000: {
|
||||
"features": [(0x401000, capa.features.Characteristic("switch")),],
|
||||
"features": [(0x401000, capa.features.Characteristic("indirect call")),],
|
||||
"basic blocks": {
|
||||
0x401000: {
|
||||
"features": [(0x401000, capa.features.Characteristic("tight loop")),],
|
||||
@@ -104,17 +105,14 @@ def compare_extractors_viv_null(viv_ext, null_ext):
|
||||
viv_ext (capa.features.extractors.viv.VivisectFeatureExtractor)
|
||||
null_ext (capa.features.extractors.NullFeatureExtractor)
|
||||
"""
|
||||
|
||||
# TODO: ordering of these things probably doesn't work yet
|
||||
|
||||
assert list(viv_ext.extract_file_features()) == list(null_ext.extract_file_features())
|
||||
assert to_int(list(viv_ext.get_functions())) == list(null_ext.get_functions())
|
||||
assert list(map(to_int, viv_ext.get_functions())) == list(null_ext.get_functions())
|
||||
for f in viv_ext.get_functions():
|
||||
assert to_int(list(viv_ext.get_basic_blocks(f))) == list(null_ext.get_basic_blocks(to_int(f)))
|
||||
assert list(map(to_int, viv_ext.get_basic_blocks(f))) == list(null_ext.get_basic_blocks(to_int(f)))
|
||||
assert list(viv_ext.extract_function_features(f)) == list(null_ext.extract_function_features(to_int(f)))
|
||||
|
||||
for bb in viv_ext.get_basic_blocks(f):
|
||||
assert to_int(list(viv_ext.get_instructions(f, bb))) == list(
|
||||
assert list(map(to_int, viv_ext.get_instructions(f, bb))) == list(
|
||||
null_ext.get_instructions(to_int(f), to_int(bb))
|
||||
)
|
||||
assert list(viv_ext.extract_basic_block_features(f, bb)) == list(
|
||||
@@ -129,10 +127,7 @@ def compare_extractors_viv_null(viv_ext, null_ext):
|
||||
|
||||
def to_int(o):
|
||||
"""helper to get int value of extractor items"""
|
||||
if isinstance(o, list):
|
||||
return map(lambda x: capa.helpers.oint(x), o)
|
||||
else:
|
||||
return capa.helpers.oint(o)
|
||||
return capa.helpers.oint(o)
|
||||
|
||||
|
||||
def test_freeze_s_roundtrip():
|
||||
@@ -169,18 +164,22 @@ def test_serialize_features():
|
||||
roundtrip_feature(capa.features.file.Import("#11"))
|
||||
|
||||
|
||||
def test_freeze_sample(tmpdir, sample_9324d1a8ae37a36ae560c37448c9705a):
|
||||
@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
|
||||
def test_freeze_sample(tmpdir, z9324d_extractor):
|
||||
# tmpdir fixture handles cleanup
|
||||
o = tmpdir.mkdir("capa").join("test.frz").strpath
|
||||
assert capa.features.freeze.main([sample_9324d1a8ae37a36ae560c37448c9705a.path, o, "-v"]) == 0
|
||||
path = z9324d_extractor.path
|
||||
assert capa.features.freeze.main([path, o, "-v"]) == 0
|
||||
|
||||
|
||||
def test_freeze_load_sample(tmpdir, sample_9324d1a8ae37a36ae560c37448c9705a):
|
||||
@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
|
||||
def test_freeze_load_sample(tmpdir, z9324d_extractor):
|
||||
o = tmpdir.mkdir("capa").join("test.frz")
|
||||
viv_extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
|
||||
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
|
||||
)
|
||||
|
||||
with open(o.strpath, "wb") as f:
|
||||
f.write(capa.features.freeze.dump(viv_extractor))
|
||||
null_extractor = capa.features.freeze.load(o.open("rb").read())
|
||||
compare_extractors_viv_null(viv_extractor, null_extractor)
|
||||
f.write(capa.features.freeze.dump(z9324d_extractor))
|
||||
|
||||
with open(o.strpath, "rb") as f:
|
||||
null_extractor = capa.features.freeze.load(f.read())
|
||||
|
||||
compare_extractors_viv_null(z9324d_extractor, null_extractor)
|
||||
|
||||
@@ -1,288 +1,104 @@
|
||||
# run this script from within IDA with ./tests/data/mimikatz.exe open
|
||||
import logging
|
||||
import binascii
|
||||
import traceback
|
||||
import collections
|
||||
|
||||
import pytest
|
||||
|
||||
import capa.features
|
||||
import capa.features.file
|
||||
import capa.features.insn
|
||||
import capa.features.basicblock
|
||||
from capa.features import ARCH_X32, ARCH_X64
|
||||
|
||||
logger = logging.getLogger("test_ida_features")
|
||||
|
||||
|
||||
def check_input_file():
|
||||
import idautils
|
||||
|
||||
wanted = "5f66b82558ca92e54e77f216ef4c066c"
|
||||
# some versions (7.4) of IDA return a truncated version of the MD5.
|
||||
# https://github.com/idapython/bin/issues/11
|
||||
try:
|
||||
found = idautils.GetInputFileMD5()[:31].decode("ascii").lower()
|
||||
except UnicodeDecodeError:
|
||||
# in IDA 7.5 or so, GetInputFileMD5 started returning raw binary
|
||||
# rather than the hex digest
|
||||
found = binascii.hexlify(idautils.GetInputFileMD5()[:15]).decode("ascii").lower()
|
||||
if not wanted.startswith(found):
|
||||
raise RuntimeError("please run the tests against `mimikatz.exe`")
|
||||
|
||||
|
||||
def get_extractor():
|
||||
check_input_file()
|
||||
|
||||
# have to import import this inline so pytest doesn't bail outside of IDA
|
||||
import capa.features.extractors.ida
|
||||
|
||||
return capa.features.extractors.ida.IdaFeatureExtractor()
|
||||
|
||||
|
||||
def extract_file_features():
|
||||
extractor = get_extractor()
|
||||
features = set([])
|
||||
for feature, va in extractor.extract_file_features():
|
||||
features.add(feature)
|
||||
return features
|
||||
|
||||
|
||||
def extract_function_features(f):
|
||||
extractor = get_extractor()
|
||||
features = collections.defaultdict(set)
|
||||
for bb in extractor.get_basic_blocks(f):
|
||||
for insn in extractor.get_instructions(f, bb):
|
||||
for feature, va in extractor.extract_insn_features(f, bb, insn):
|
||||
features[feature].add(va)
|
||||
for feature, va in extractor.extract_basic_block_features(f, bb):
|
||||
features[feature].add(va)
|
||||
for feature, va in extractor.extract_function_features(f):
|
||||
features[feature].add(va)
|
||||
return features
|
||||
|
||||
|
||||
def extract_basic_block_features(f, bb):
|
||||
extractor = get_extractor()
|
||||
features = collections.defaultdict(set)
|
||||
for insn in extractor.get_instructions(f, bb):
|
||||
for feature, va in extractor.extract_insn_features(f, bb, insn):
|
||||
features[feature].add(va)
|
||||
for feature, va in extractor.extract_basic_block_features(f, bb):
|
||||
features[feature].add(va)
|
||||
return features
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
||||
def test_api_features():
|
||||
f = get_extractor().get_function(0x403BAC)
|
||||
features = extract_function_features(f)
|
||||
assert capa.features.insn.API("advapi32.CryptAcquireContextW") in features
|
||||
assert capa.features.insn.API("advapi32.CryptAcquireContext") in features
|
||||
assert capa.features.insn.API("advapi32.CryptGenKey") in features
|
||||
assert capa.features.insn.API("advapi32.CryptImportKey") in features
|
||||
assert capa.features.insn.API("advapi32.CryptDestroyKey") in features
|
||||
assert capa.features.insn.API("CryptAcquireContextW") in features
|
||||
assert capa.features.insn.API("CryptAcquireContext") in features
|
||||
assert capa.features.insn.API("CryptGenKey") in features
|
||||
assert capa.features.insn.API("CryptImportKey") in features
|
||||
assert capa.features.insn.API("CryptDestroyKey") in features
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
||||
def test_string_features():
|
||||
f = get_extractor().get_function(0x40105D)
|
||||
features = extract_function_features(f)
|
||||
assert capa.features.String("SCardControl") in features
|
||||
assert capa.features.String("SCardTransmit") in features
|
||||
assert capa.features.String("ACR > ") in features
|
||||
# other strings not in this function
|
||||
assert capa.features.String("bcrypt.dll") not in features
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
||||
def test_byte_features():
|
||||
f = get_extractor().get_function(0x40105D)
|
||||
features = extract_function_features(f)
|
||||
wanted = capa.features.Bytes("SCardControl".encode("utf-16le"))
|
||||
# use `==` rather than `is` because the result is not `True` but a truthy value.
|
||||
assert wanted.evaluate(features) == True
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
||||
def test_number_features():
|
||||
f = get_extractor().get_function(0x40105D)
|
||||
features = extract_function_features(f)
|
||||
assert capa.features.insn.Number(0xFF) in features
|
||||
assert capa.features.insn.Number(0x3136B0) in features
|
||||
# the following are stack adjustments
|
||||
assert capa.features.insn.Number(0xC) not in features
|
||||
assert capa.features.insn.Number(0x10) not in features
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
||||
def test_number_arch_features():
|
||||
f = get_extractor().get_function(0x40105D)
|
||||
features = extract_function_features(f)
|
||||
assert capa.features.insn.Number(0xFF) in features
|
||||
assert capa.features.insn.Number(0xFF, arch=ARCH_X32) in features
|
||||
assert capa.features.insn.Number(0xFF, arch=ARCH_X64) not in features
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
||||
def test_offset_features():
|
||||
f = get_extractor().get_function(0x40105D)
|
||||
features = extract_function_features(f)
|
||||
assert capa.features.insn.Offset(0x0) in features
|
||||
assert capa.features.insn.Offset(0x4) in features
|
||||
assert capa.features.insn.Offset(0xC) in features
|
||||
# the following are stack references
|
||||
assert capa.features.insn.Offset(0x8) not in features
|
||||
assert capa.features.insn.Offset(0x10) not in features
|
||||
|
||||
# this function has the following negative offsets
|
||||
# movzx ecx, byte ptr [eax-1]
|
||||
# movzx eax, byte ptr [eax-2]
|
||||
f = get_extractor().get_function(0x4011FB)
|
||||
features = extract_function_features(f)
|
||||
assert capa.features.insn.Offset(-0x1) in features
|
||||
assert capa.features.insn.Offset(-0x2) in features
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
||||
def test_offset_arch_features():
|
||||
f = get_extractor().get_function(0x40105D)
|
||||
features = extract_function_features(f)
|
||||
assert capa.features.insn.Offset(0x0) in features
|
||||
assert capa.features.insn.Offset(0x0, arch=ARCH_X32) in features
|
||||
assert capa.features.insn.Offset(0x0, arch=ARCH_X64) not in features
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
||||
def test_nzxor_features():
|
||||
f = get_extractor().get_function(0x410DFC)
|
||||
features = extract_function_features(f)
|
||||
assert capa.features.Characteristic("nzxor") in features # 0x0410F0B
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
||||
def test_mnemonic_features():
|
||||
f = get_extractor().get_function(0x40105D)
|
||||
features = extract_function_features(f)
|
||||
assert capa.features.insn.Mnemonic("push") in features
|
||||
assert capa.features.insn.Mnemonic("movzx") in features
|
||||
assert capa.features.insn.Mnemonic("xor") in features
|
||||
|
||||
assert capa.features.insn.Mnemonic("in") not in features
|
||||
assert capa.features.insn.Mnemonic("out") not in features
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
||||
def test_file_section_name_features():
|
||||
features = extract_file_features()
|
||||
assert capa.features.file.Section(".idata") in features
|
||||
assert capa.features.file.Section(".text") in features
|
||||
assert capa.features.file.Section(".nope") not in features
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
||||
def test_tight_loop_features():
|
||||
extractor = get_extractor()
|
||||
|
||||
f = extractor.get_function(0x402EC4)
|
||||
for bb in extractor.get_basic_blocks(f):
|
||||
if bb.__int__() != 0x402F8E:
|
||||
continue
|
||||
features = extract_basic_block_features(f, bb)
|
||||
assert capa.features.Characteristic("tight loop") in features
|
||||
assert capa.features.basicblock.BasicBlock() in features
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
||||
def test_tight_loop_bb_features():
|
||||
extractor = get_extractor()
|
||||
|
||||
f = extractor.get_function(0x402EC4)
|
||||
for bb in extractor.get_basic_blocks(f):
|
||||
if bb.__int__() != 0x402F8E:
|
||||
continue
|
||||
features = extract_basic_block_features(f, bb)
|
||||
assert capa.features.Characteristic("tight loop") in features
|
||||
assert capa.features.basicblock.BasicBlock() in features
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
||||
def test_file_import_name_features():
|
||||
features = extract_file_features()
|
||||
assert capa.features.file.Import("advapi32.CryptSetHashParam") in features
|
||||
assert capa.features.file.Import("CryptSetHashParam") in features
|
||||
assert capa.features.file.Import("kernel32.IsWow64Process") in features
|
||||
assert capa.features.file.Import("msvcrt.exit") in features
|
||||
assert capa.features.file.Import("cabinet.#11") in features
|
||||
assert capa.features.file.Import("#11") not in features
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
||||
def test_stackstring_features():
|
||||
f = get_extractor().get_function(0x4556E5)
|
||||
features = extract_function_features(f)
|
||||
assert capa.features.Characteristic("stack string") in features
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
||||
def test_switch_features():
|
||||
f = get_extractor().get_function(0x409411)
|
||||
features = extract_function_features(f)
|
||||
assert capa.features.Characteristic("switch") in features
|
||||
|
||||
f = get_extractor().get_function(0x409393)
|
||||
features = extract_function_features(f)
|
||||
assert capa.features.Characteristic("switch") not in features
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
||||
def test_function_calls_to():
|
||||
# this function is used in a function pointer
|
||||
f = get_extractor().get_function(0x4011FB)
|
||||
features = extract_function_features(f)
|
||||
assert capa.features.Characteristic("calls to") not in features
|
||||
|
||||
# __FindPESection is called once
|
||||
f = get_extractor().get_function(0x470360)
|
||||
features = extract_function_features(f)
|
||||
assert len(features[capa.features.Characteristic("calls to")]) == 1
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
||||
def test_function_calls_from():
|
||||
f = get_extractor().get_function(0x4011FB)
|
||||
features = extract_function_features(f)
|
||||
assert capa.features.Characteristic("calls from") in features
|
||||
assert len(features[capa.features.Characteristic("calls from")]) == 3
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
||||
def test_basic_block_count():
|
||||
f = get_extractor().get_function(0x4011FB)
|
||||
features = extract_function_features(f)
|
||||
assert len(features[capa.features.basicblock.BasicBlock()]) == 15
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("-" * 80)
|
||||
|
||||
# invoke all functions in this module that start with `test_`
|
||||
for name in dir(sys.modules[__name__]):
|
||||
if not name.startswith("test_"):
|
||||
continue
|
||||
|
||||
test = getattr(sys.modules[__name__], name)
|
||||
logger.debug("invoking test: %s", name)
|
||||
sys.stderr.flush()
|
||||
try:
|
||||
test()
|
||||
except AssertionError as e:
|
||||
print("FAIL %s" % (name))
|
||||
traceback.print_exc()
|
||||
else:
|
||||
print("OK %s" % (name))
|
||||
# run this script from within IDA with ./tests/data/mimikatz.exe open
|
||||
import sys
|
||||
import logging
|
||||
import os.path
|
||||
import binascii
|
||||
import traceback
|
||||
|
||||
import pytest
|
||||
|
||||
try:
|
||||
sys.path.append(os.path.dirname(__file__))
|
||||
from fixtures import *
|
||||
finally:
|
||||
sys.path.pop()
|
||||
|
||||
|
||||
logger = logging.getLogger("test_ida_features")
|
||||
|
||||
|
||||
def check_input_file(wanted):
|
||||
import idautils
|
||||
|
||||
# some versions (7.4) of IDA return a truncated version of the MD5.
|
||||
# https://github.com/idapython/bin/issues/11
|
||||
try:
|
||||
found = idautils.GetInputFileMD5()[:31].decode("ascii").lower()
|
||||
except UnicodeDecodeError:
|
||||
# in IDA 7.5 or so, GetInputFileMD5 started returning raw binary
|
||||
# rather than the hex digest
|
||||
found = binascii.hexlify(idautils.GetInputFileMD5()[:15]).decode("ascii").lower()
|
||||
|
||||
if not wanted.startswith(found):
|
||||
raise RuntimeError("please run the tests against sample with MD5: `%s`" % (wanted))
|
||||
|
||||
|
||||
def get_ida_extractor(_path):
|
||||
check_input_file("5f66b82558ca92e54e77f216ef4c066c")
|
||||
|
||||
# have to import import this inline so pytest doesn't bail outside of IDA
|
||||
import capa.features.extractors.ida
|
||||
|
||||
return capa.features.extractors.ida.IdaFeatureExtractor()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
||||
def test_ida_features():
|
||||
for (sample, scope, feature, expected) in FEATURE_PRESENCE_TESTS:
|
||||
id = make_test_id((sample, scope, feature, expected))
|
||||
|
||||
try:
|
||||
check_input_file(get_sample_md5_by_name(sample))
|
||||
except RuntimeError:
|
||||
print("SKIP %s" % (id))
|
||||
continue
|
||||
|
||||
scope = resolve_scope(scope)
|
||||
sample = resolve_sample(sample)
|
||||
|
||||
try:
|
||||
do_test_feature_presence(get_ida_extractor, sample, scope, feature, expected)
|
||||
except Exception as e:
|
||||
print("FAIL %s" % (id))
|
||||
traceback.print_exc()
|
||||
else:
|
||||
print("OK %s" % (id))
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
||||
def test_ida_feature_counts():
|
||||
for (sample, scope, feature, expected) in FEATURE_COUNT_TESTS:
|
||||
id = make_test_id((sample, scope, feature, expected))
|
||||
|
||||
try:
|
||||
check_input_file(get_sample_md5_by_name(sample))
|
||||
except RuntimeError:
|
||||
print("SKIP %s" % (id))
|
||||
continue
|
||||
|
||||
scope = resolve_scope(scope)
|
||||
sample = resolve_sample(sample)
|
||||
|
||||
try:
|
||||
do_test_feature_count(get_ida_extractor, sample, scope, feature, expected)
|
||||
except Exception as e:
|
||||
print("FAIL %s" % (id))
|
||||
traceback.print_exc()
|
||||
else:
|
||||
print("OK %s" % (id))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("-" * 80)
|
||||
|
||||
# invoke all functions in this module that start with `test_`
|
||||
for name in dir(sys.modules[__name__]):
|
||||
if not name.startswith("test_"):
|
||||
continue
|
||||
|
||||
test = getattr(sys.modules[__name__], name)
|
||||
logger.debug("invoking test: %s", name)
|
||||
sys.stderr.flush()
|
||||
test()
|
||||
|
||||
print("DONE")
|
||||
|
||||
26
tests/test_lancelot_features.py
Normal file
26
tests/test_lancelot_features.py
Normal file
@@ -0,0 +1,26 @@
|
||||
# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
|
||||
from fixtures import *
|
||||
|
||||
|
||||
@parametrize(
|
||||
"sample,scope,feature,expected", FEATURE_PRESENCE_TESTS, indirect=["sample", "scope"],
|
||||
)
|
||||
def test_lancelot_features(sample, scope, feature, expected):
|
||||
with xfail(sys.version_info < (3, 0), reason="lancelot only works on py3"):
|
||||
do_test_feature_presence(get_lancelot_extractor, sample, scope, feature, expected)
|
||||
|
||||
|
||||
@parametrize(
|
||||
"sample,scope,feature,expected", FEATURE_COUNT_TESTS, indirect=["sample", "scope"],
|
||||
)
|
||||
def test_lancelot_feature_counts(sample, scope, feature, expected):
|
||||
with xfail(sys.version_info < (3, 0), reason="lancelot only works on py3"):
|
||||
do_test_feature_count(get_lancelot_extractor, sample, scope, feature, expected)
|
||||
@@ -5,28 +5,31 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import sys
|
||||
import textwrap
|
||||
|
||||
import pytest
|
||||
from fixtures import *
|
||||
|
||||
import capa.main
|
||||
import capa.rules
|
||||
import capa.engine
|
||||
import capa.features
|
||||
import capa.features.extractors.viv
|
||||
from capa.engine import *
|
||||
|
||||
|
||||
def test_main(sample_9324d1a8ae37a36ae560c37448c9705a):
|
||||
@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
|
||||
def test_main(z9324d_extractor):
|
||||
# tests rules can be loaded successfully and all output modes
|
||||
assert capa.main.main([sample_9324d1a8ae37a36ae560c37448c9705a.path, "-vv"]) == 0
|
||||
assert capa.main.main([sample_9324d1a8ae37a36ae560c37448c9705a.path, "-v"]) == 0
|
||||
assert capa.main.main([sample_9324d1a8ae37a36ae560c37448c9705a.path, "-j"]) == 0
|
||||
assert capa.main.main([sample_9324d1a8ae37a36ae560c37448c9705a.path]) == 0
|
||||
path = z9324d_extractor.path
|
||||
assert capa.main.main([path, "-vv"]) == 0
|
||||
assert capa.main.main([path, "-v"]) == 0
|
||||
assert capa.main.main([path, "-j"]) == 0
|
||||
assert capa.main.main([path]) == 0
|
||||
|
||||
|
||||
def test_main_single_rule(sample_9324d1a8ae37a36ae560c37448c9705a, tmpdir):
|
||||
@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
|
||||
def test_main_single_rule(z9324d_extractor, tmpdir):
|
||||
# tests a single rule can be loaded successfully
|
||||
RULE_CONTENT = textwrap.dedent(
|
||||
"""
|
||||
@@ -38,16 +41,19 @@ def test_main_single_rule(sample_9324d1a8ae37a36ae560c37448c9705a, tmpdir):
|
||||
- string: test
|
||||
"""
|
||||
)
|
||||
path = z9324d_extractor.path
|
||||
rule_file = tmpdir.mkdir("capa").join("rule.yml")
|
||||
rule_file.write(RULE_CONTENT)
|
||||
assert capa.main.main([sample_9324d1a8ae37a36ae560c37448c9705a.path, "-v", "-r", rule_file.strpath,]) == 0
|
||||
assert capa.main.main([path, "-v", "-r", rule_file.strpath,]) == 0
|
||||
|
||||
|
||||
def test_main_shellcode(sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32):
|
||||
assert capa.main.main([sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32.path, "-vv", "-f", "sc32"]) == 0
|
||||
assert capa.main.main([sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32.path, "-v", "-f", "sc32"]) == 0
|
||||
assert capa.main.main([sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32.path, "-j", "-f", "sc32"]) == 0
|
||||
assert capa.main.main([sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32.path, "-f", "sc32"]) == 0
|
||||
@pytest.mark.xfail(sys.version_info >= (3, 0), reason="lancelot doesn't support shellcode workspaces")
|
||||
def test_main_shellcode(z499c2_extractor):
|
||||
path = z499c2_extractor.path
|
||||
assert capa.main.main([path, "-vv", "-f", "sc32"]) == 0
|
||||
assert capa.main.main([path, "-v", "-f", "sc32"]) == 0
|
||||
assert capa.main.main([path, "-j", "-f", "sc32"]) == 0
|
||||
assert capa.main.main([path, "-f", "sc32"]) == 0
|
||||
|
||||
|
||||
def test_ruleset():
|
||||
@@ -73,7 +79,7 @@ def test_ruleset():
|
||||
name: function rule
|
||||
scope: function
|
||||
features:
|
||||
- characteristic: switch
|
||||
- characteristic: tight loop
|
||||
"""
|
||||
)
|
||||
),
|
||||
@@ -96,7 +102,8 @@ def test_ruleset():
|
||||
assert len(rules.basic_block_rules) == 1
|
||||
|
||||
|
||||
def test_match_across_scopes_file_function(sample_9324d1a8ae37a36ae560c37448c9705a):
|
||||
@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
|
||||
def test_match_across_scopes_file_function(z9324d_extractor):
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
# this rule should match on a function (0x4073F0)
|
||||
@@ -153,16 +160,14 @@ def test_match_across_scopes_file_function(sample_9324d1a8ae37a36ae560c37448c970
|
||||
),
|
||||
]
|
||||
)
|
||||
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
|
||||
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
|
||||
)
|
||||
capabilities, meta = capa.main.find_capabilities(rules, extractor)
|
||||
capabilities, meta = capa.main.find_capabilities(rules, z9324d_extractor)
|
||||
assert "install service" in capabilities
|
||||
assert ".text section" in capabilities
|
||||
assert ".text section and install service" in capabilities
|
||||
|
||||
|
||||
def test_match_across_scopes(sample_9324d1a8ae37a36ae560c37448c9705a):
|
||||
@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
|
||||
def test_match_across_scopes(z9324d_extractor):
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
# this rule should match on a basic block (including at least 0x403685)
|
||||
@@ -218,16 +223,14 @@ def test_match_across_scopes(sample_9324d1a8ae37a36ae560c37448c9705a):
|
||||
),
|
||||
]
|
||||
)
|
||||
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
|
||||
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path
|
||||
)
|
||||
capabilities, meta = capa.main.find_capabilities(rules, extractor)
|
||||
capabilities, meta = capa.main.find_capabilities(rules, z9324d_extractor)
|
||||
assert "tight loop" in capabilities
|
||||
assert "kill thread loop" in capabilities
|
||||
assert "kill thread program" in capabilities
|
||||
|
||||
|
||||
def test_subscope_bb_rules(sample_9324d1a8ae37a36ae560c37448c9705a):
|
||||
@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
|
||||
def test_subscope_bb_rules(z9324d_extractor):
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
@@ -247,14 +250,12 @@ def test_subscope_bb_rules(sample_9324d1a8ae37a36ae560c37448c9705a):
|
||||
]
|
||||
)
|
||||
# tight loop at 0x403685
|
||||
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
|
||||
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
|
||||
)
|
||||
capabilities, meta = capa.main.find_capabilities(rules, extractor)
|
||||
capabilities, meta = capa.main.find_capabilities(rules, z9324d_extractor)
|
||||
assert "test rule" in capabilities
|
||||
|
||||
|
||||
def test_byte_matching(sample_9324d1a8ae37a36ae560c37448c9705a):
|
||||
@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
|
||||
def test_byte_matching(z9324d_extractor):
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
@@ -272,15 +273,12 @@ def test_byte_matching(sample_9324d1a8ae37a36ae560c37448c9705a):
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
|
||||
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
|
||||
)
|
||||
capabilities, meta = capa.main.find_capabilities(rules, extractor)
|
||||
capabilities, meta = capa.main.find_capabilities(rules, z9324d_extractor)
|
||||
assert "byte match test" in capabilities
|
||||
|
||||
|
||||
def test_count_bb(sample_9324d1a8ae37a36ae560c37448c9705a):
|
||||
@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
|
||||
def test_count_bb(z9324d_extractor):
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
@@ -299,9 +297,5 @@ def test_count_bb(sample_9324d1a8ae37a36ae560c37448c9705a):
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
|
||||
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
|
||||
)
|
||||
capabilities, meta = capa.main.find_capabilities(rules, extractor)
|
||||
capabilities, meta = capa.main.find_capabilities(rules, z9324d_extractor)
|
||||
assert "count bb" in capabilities
|
||||
|
||||
@@ -162,6 +162,23 @@ def test_rule_yaml_count_range():
|
||||
assert r.evaluate({Number(100): {1, 2, 3}}) == False
|
||||
|
||||
|
||||
def test_rule_yaml_count_string():
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
features:
|
||||
- count(string(foo)): 2
|
||||
"""
|
||||
)
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
assert r.evaluate({String("foo"): {}}) == False
|
||||
assert r.evaluate({String("foo"): {1}}) == False
|
||||
assert r.evaluate({String("foo"): {1, 2}}) == True
|
||||
assert r.evaluate({String("foo"): {1, 2, 3}}) == False
|
||||
|
||||
|
||||
def test_invalid_rule_feature():
|
||||
with pytest.raises(capa.rules.InvalidRule):
|
||||
capa.rules.Rule.from_yaml(
|
||||
@@ -267,7 +284,7 @@ def test_subscope_rules():
|
||||
- function:
|
||||
- and:
|
||||
- characteristic: nzxor
|
||||
- characteristic: switch
|
||||
- characteristic: loop
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
62
tests/test_unpack.py
Normal file
62
tests/test_unpack.py
Normal file
@@ -0,0 +1,62 @@
|
||||
# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import sys
|
||||
|
||||
import pefile
|
||||
import pytest
|
||||
from fixtures import *
|
||||
|
||||
import capa.unpack
|
||||
|
||||
|
||||
@pytest.mark.xfail(sys.version_info <= (3, 5), reason="auto-unpack only works on py3.6+")
|
||||
def test_aspack_is_packed(aspack_extractor):
|
||||
path = aspack_extractor.path
|
||||
|
||||
with open(path, "rb") as f:
|
||||
buf = f.read()
|
||||
|
||||
assert capa.unpack.is_packed(buf) is True
|
||||
|
||||
|
||||
@pytest.mark.xfail(sys.version_info <= (3, 5), reason="auto-unpack only works on py3.6+")
|
||||
def test_aspack_detect(aspack_extractor):
|
||||
path = aspack_extractor.path
|
||||
|
||||
with open(path, "rb") as f:
|
||||
buf = f.read()
|
||||
|
||||
assert capa.unpack.detect_packer(buf) == "aspack"
|
||||
|
||||
|
||||
@pytest.mark.xfail(sys.version_info <= (3, 5), reason="auto-unpack only works on py3.6+")
|
||||
def test_aspack_unpack(aspack_extractor):
|
||||
with open(aspack_extractor.path, "rb") as f:
|
||||
buf = f.read()
|
||||
|
||||
unpacked = capa.unpack.unpack_pe("aspack", buf)
|
||||
|
||||
pe = pefile.PE(data=unpacked)
|
||||
assert pe.OPTIONAL_HEADER.ImageBase == 0x4AD00000
|
||||
assert pe.OPTIONAL_HEADER.AddressOfEntryPoint == 0x1A610
|
||||
assert b"This program cannot be run in DOS mode" in unpacked
|
||||
assert "(C) Copyright 1985-2000 Microsoft Corp.".encode("utf-16le") in unpacked
|
||||
assert "CMD.EXE has halted. %0".encode("utf-16le") in unpacked
|
||||
|
||||
dlls = set([])
|
||||
syms = set([])
|
||||
for entry in pe.DIRECTORY_ENTRY_IMPORT:
|
||||
dlls.add(entry.dll.decode("ascii").lower().partition(".")[0])
|
||||
for imp in entry.imports:
|
||||
syms.add(imp.name.decode("ascii"))
|
||||
|
||||
assert dlls == {"advapi32", "kernel32", "msvcrt", "user32"}
|
||||
assert "RegQueryValueExW" in syms
|
||||
assert "WriteConsoleW" in syms
|
||||
assert "realloc" in syms
|
||||
assert "GetProcessWindowStation" in syms
|
||||
@@ -5,340 +5,22 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import sys
|
||||
|
||||
import viv_utils
|
||||
from fixtures import *
|
||||
|
||||
import capa.features
|
||||
import capa.features.file
|
||||
import capa.features.insn
|
||||
import capa.features.basicblock
|
||||
import capa.features.extractors.viv.file
|
||||
import capa.features.extractors.viv.insn
|
||||
import capa.features.extractors.viv.function
|
||||
import capa.features.extractors.viv.basicblock
|
||||
from capa.features import ARCH_X32, ARCH_X64
|
||||
|
||||
@parametrize(
|
||||
"sample,scope,feature,expected", FEATURE_PRESENCE_TESTS, indirect=["sample", "scope"],
|
||||
)
|
||||
def test_viv_features(sample, scope, feature, expected):
|
||||
with xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2"):
|
||||
do_test_feature_presence(get_viv_extractor, sample, scope, feature, expected)
|
||||
|
||||
def extract_file_features(vw, path):
|
||||
features = set([])
|
||||
for feature, va in capa.features.extractors.viv.file.extract_features(vw, path):
|
||||
features.add(feature)
|
||||
return features
|
||||
|
||||
|
||||
def extract_function_features(f):
|
||||
features = collections.defaultdict(set)
|
||||
for bb in f.basic_blocks:
|
||||
for insn in bb.instructions:
|
||||
for feature, va in capa.features.extractors.viv.insn.extract_features(f, bb, insn):
|
||||
features[feature].add(va)
|
||||
for feature, va in capa.features.extractors.viv.basicblock.extract_features(f, bb):
|
||||
features[feature].add(va)
|
||||
for feature, va in capa.features.extractors.viv.function.extract_features(f):
|
||||
features[feature].add(va)
|
||||
return features
|
||||
|
||||
|
||||
def extract_basic_block_features(f, bb):
|
||||
features = set({})
|
||||
for insn in bb.instructions:
|
||||
for feature, _ in capa.features.extractors.viv.insn.extract_features(f, bb, insn):
|
||||
features.add(feature)
|
||||
for feature, _ in capa.features.extractors.viv.basicblock.extract_features(f, bb):
|
||||
features.add(feature)
|
||||
return features
|
||||
|
||||
|
||||
def test_api_features(mimikatz):
|
||||
features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x403BAC))
|
||||
assert capa.features.insn.API("advapi32.CryptAcquireContextW") in features
|
||||
assert capa.features.insn.API("advapi32.CryptAcquireContext") in features
|
||||
assert capa.features.insn.API("advapi32.CryptGenKey") in features
|
||||
assert capa.features.insn.API("advapi32.CryptImportKey") in features
|
||||
assert capa.features.insn.API("advapi32.CryptDestroyKey") in features
|
||||
assert capa.features.insn.API("CryptAcquireContextW") in features
|
||||
assert capa.features.insn.API("CryptAcquireContext") in features
|
||||
assert capa.features.insn.API("CryptGenKey") in features
|
||||
assert capa.features.insn.API("CryptImportKey") in features
|
||||
assert capa.features.insn.API("CryptDestroyKey") in features
|
||||
|
||||
|
||||
def test_api_features_64_bit(sample_a198216798ca38f280dc413f8c57f2c2):
|
||||
features = extract_function_features(viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x4011B0))
|
||||
assert capa.features.insn.API("kernel32.GetStringTypeA") in features
|
||||
assert capa.features.insn.API("kernel32.GetStringTypeW") not in features
|
||||
assert capa.features.insn.API("kernel32.GetStringType") in features
|
||||
assert capa.features.insn.API("GetStringTypeA") in features
|
||||
assert capa.features.insn.API("GetStringType") in features
|
||||
# call via thunk in IDA Pro
|
||||
features = extract_function_features(viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x401CB0))
|
||||
assert capa.features.insn.API("msvcrt.vfprintf") in features
|
||||
assert capa.features.insn.API("vfprintf") in features
|
||||
|
||||
|
||||
def test_string_features(mimikatz):
|
||||
features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x40105D))
|
||||
assert capa.features.String("SCardControl") in features
|
||||
assert capa.features.String("SCardTransmit") in features
|
||||
assert capa.features.String("ACR > ") in features
|
||||
# other strings not in this function
|
||||
assert capa.features.String("bcrypt.dll") not in features
|
||||
|
||||
|
||||
def test_string_pointer_features(mimikatz):
|
||||
features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x44EDEF))
|
||||
assert capa.features.String("INPUTEVENT") in features
|
||||
|
||||
|
||||
def test_byte_features(sample_9324d1a8ae37a36ae560c37448c9705a):
|
||||
features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60))
|
||||
wanted = capa.features.Bytes(b"\xED\x24\x9E\xF4\x52\xA9\x07\x47\x55\x8E\xE1\xAB\x30\x8E\x23\x61")
|
||||
# use `==` rather than `is` because the result is not `True` but a truthy value.
|
||||
assert wanted.evaluate(features) == True
|
||||
|
||||
|
||||
def test_byte_features64(sample_lab21_01):
|
||||
features = extract_function_features(viv_utils.Function(sample_lab21_01.vw, 0x1400010C0))
|
||||
wanted = capa.features.Bytes(b"\x32\xA2\xDF\x2D\x99\x2B\x00\x00")
|
||||
# use `==` rather than `is` because the result is not `True` but a truthy value.
|
||||
assert wanted.evaluate(features) == True
|
||||
|
||||
|
||||
def test_bytes_pointer_features(mimikatz):
|
||||
features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x44EDEF))
|
||||
assert capa.features.Bytes("INPUTEVENT".encode("utf-16le")).evaluate(features) == True
|
||||
|
||||
|
||||
def test_number_features(mimikatz):
|
||||
features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x40105D))
|
||||
assert capa.features.insn.Number(0xFF) in features
|
||||
assert capa.features.insn.Number(0x3136B0) in features
|
||||
# the following are stack adjustments
|
||||
assert capa.features.insn.Number(0xC) not in features
|
||||
assert capa.features.insn.Number(0x10) not in features
|
||||
|
||||
|
||||
def test_number_arch_features(mimikatz):
|
||||
features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x40105D))
|
||||
assert capa.features.insn.Number(0xFF) in features
|
||||
assert capa.features.insn.Number(0xFF, arch=ARCH_X32) in features
|
||||
assert capa.features.insn.Number(0xFF, arch=ARCH_X64) not in features
|
||||
|
||||
|
||||
def test_offset_features(mimikatz):
|
||||
features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x40105D))
|
||||
assert capa.features.insn.Offset(0x0) in features
|
||||
assert capa.features.insn.Offset(0x4) in features
|
||||
assert capa.features.insn.Offset(0xC) in features
|
||||
# the following are stack references
|
||||
assert capa.features.insn.Offset(0x8) not in features
|
||||
assert capa.features.insn.Offset(0x10) not in features
|
||||
|
||||
# this function has the following negative offsets
|
||||
# movzx ecx, byte ptr [eax-1]
|
||||
# movzx eax, byte ptr [eax-2]
|
||||
features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x4011FB))
|
||||
assert capa.features.insn.Offset(-0x1) in features
|
||||
assert capa.features.insn.Offset(-0x2) in features
|
||||
|
||||
|
||||
def test_offset_arch_features(mimikatz):
|
||||
features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x40105D))
|
||||
assert capa.features.insn.Offset(0x0) in features
|
||||
assert capa.features.insn.Offset(0x0, arch=ARCH_X32) in features
|
||||
assert capa.features.insn.Offset(0x0, arch=ARCH_X64) not in features
|
||||
|
||||
|
||||
def test_nzxor_features(mimikatz):
|
||||
features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x410DFC))
|
||||
assert capa.features.Characteristic("nzxor") in features # 0x0410F0B
|
||||
|
||||
|
||||
def get_bb_insn(f, va):
|
||||
"""fetch the BasicBlock and Instruction instances for the given VA in the given function."""
|
||||
for bb in f.basic_blocks:
|
||||
for insn in bb.instructions:
|
||||
if insn.va == va:
|
||||
return (bb, insn)
|
||||
raise KeyError(va)
|
||||
|
||||
|
||||
def test_is_security_cookie(mimikatz):
|
||||
# not a security cookie check
|
||||
f = viv_utils.Function(mimikatz.vw, 0x410DFC)
|
||||
for va in [0x0410F0B]:
|
||||
bb, insn = get_bb_insn(f, va)
|
||||
assert capa.features.extractors.viv.insn.is_security_cookie(f, bb, insn) == False
|
||||
|
||||
# security cookie initial set and final check
|
||||
f = viv_utils.Function(mimikatz.vw, 0x46C54A)
|
||||
for va in [0x46C557, 0x46C63A]:
|
||||
bb, insn = get_bb_insn(f, va)
|
||||
assert capa.features.extractors.viv.insn.is_security_cookie(f, bb, insn) == True
|
||||
|
||||
|
||||
def test_mnemonic_features(mimikatz):
|
||||
features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x40105D))
|
||||
assert capa.features.insn.Mnemonic("push") in features
|
||||
assert capa.features.insn.Mnemonic("movzx") in features
|
||||
assert capa.features.insn.Mnemonic("xor") in features
|
||||
|
||||
assert capa.features.insn.Mnemonic("in") not in features
|
||||
assert capa.features.insn.Mnemonic("out") not in features
|
||||
|
||||
|
||||
def test_peb_access_features(sample_a933a1a402775cfa94b6bee0963f4b46):
|
||||
features = extract_function_features(viv_utils.Function(sample_a933a1a402775cfa94b6bee0963f4b46.vw, 0xABA6FEC))
|
||||
assert capa.features.Characteristic("peb access") in features
|
||||
|
||||
|
||||
def test_file_section_name_features(mimikatz):
|
||||
features = extract_file_features(mimikatz.vw, mimikatz.path)
|
||||
assert capa.features.file.Section(".rsrc") in features
|
||||
assert capa.features.file.Section(".text") in features
|
||||
assert capa.features.file.Section(".nope") not in features
|
||||
|
||||
|
||||
def test_tight_loop_features(mimikatz):
|
||||
f = viv_utils.Function(mimikatz.vw, 0x402EC4)
|
||||
for bb in f.basic_blocks:
|
||||
if bb.va != 0x402F8E:
|
||||
continue
|
||||
features = extract_basic_block_features(f, bb)
|
||||
assert capa.features.Characteristic("tight loop") in features
|
||||
assert capa.features.basicblock.BasicBlock() in features
|
||||
|
||||
|
||||
def test_tight_loop_bb_features(mimikatz):
|
||||
f = viv_utils.Function(mimikatz.vw, 0x402EC4)
|
||||
for bb in f.basic_blocks:
|
||||
if bb.va != 0x402F8E:
|
||||
continue
|
||||
features = extract_basic_block_features(f, bb)
|
||||
assert capa.features.Characteristic("tight loop") in features
|
||||
assert capa.features.basicblock.BasicBlock() in features
|
||||
|
||||
|
||||
def test_file_export_name_features(kernel32):
|
||||
features = extract_file_features(kernel32.vw, kernel32.path)
|
||||
assert capa.features.file.Export("BaseThreadInitThunk") in features
|
||||
assert capa.features.file.Export("lstrlenW") in features
|
||||
|
||||
|
||||
def test_file_import_name_features(mimikatz):
|
||||
features = extract_file_features(mimikatz.vw, mimikatz.path)
|
||||
assert capa.features.file.Import("advapi32.CryptSetHashParam") in features
|
||||
assert capa.features.file.Import("CryptSetHashParam") in features
|
||||
assert capa.features.file.Import("kernel32.IsWow64Process") in features
|
||||
assert capa.features.file.Import("msvcrt.exit") in features
|
||||
assert capa.features.file.Import("cabinet.#11") in features
|
||||
assert capa.features.file.Import("#11") not in features
|
||||
|
||||
|
||||
def test_cross_section_flow_features(sample_a198216798ca38f280dc413f8c57f2c2):
|
||||
features = extract_function_features(viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x4014D0))
|
||||
assert capa.features.Characteristic("cross section flow") in features
|
||||
|
||||
# this function has calls to some imports,
|
||||
# which should not trigger cross-section flow characteristic
|
||||
features = extract_function_features(viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x401563))
|
||||
assert capa.features.Characteristic("cross section flow") not in features
|
||||
|
||||
|
||||
def test_segment_access_features(sample_a933a1a402775cfa94b6bee0963f4b46):
|
||||
features = extract_function_features(viv_utils.Function(sample_a933a1a402775cfa94b6bee0963f4b46.vw, 0xABA6FEC))
|
||||
assert capa.features.Characteristic("fs access") in features
|
||||
|
||||
|
||||
def test_thunk_features(sample_9324d1a8ae37a36ae560c37448c9705a):
|
||||
features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x407970))
|
||||
assert capa.features.insn.API("kernel32.CreateToolhelp32Snapshot") in features
|
||||
assert capa.features.insn.API("CreateToolhelp32Snapshot") in features
|
||||
|
||||
|
||||
def test_file_embedded_pe(pma_lab_12_04):
|
||||
features = extract_file_features(pma_lab_12_04.vw, pma_lab_12_04.path)
|
||||
assert capa.features.Characteristic("embedded pe") in features
|
||||
|
||||
|
||||
def test_stackstring_features(mimikatz):
|
||||
features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x4556E5))
|
||||
assert capa.features.Characteristic("stack string") in features
|
||||
|
||||
|
||||
def test_switch_features(mimikatz):
|
||||
features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x409411))
|
||||
assert capa.features.Characteristic("switch") in features
|
||||
|
||||
features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x409393))
|
||||
assert capa.features.Characteristic("switch") not in features
|
||||
|
||||
|
||||
def test_recursive_call_feature(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41):
|
||||
features = extract_function_features(
|
||||
viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10003100)
|
||||
)
|
||||
assert capa.features.Characteristic("recursive call") in features
|
||||
|
||||
features = extract_function_features(
|
||||
viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10007B00)
|
||||
)
|
||||
assert capa.features.Characteristic("recursive call") not in features
|
||||
|
||||
|
||||
def test_loop_feature(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41):
|
||||
features = extract_function_features(
|
||||
viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10003D30)
|
||||
)
|
||||
assert capa.features.Characteristic("loop") in features
|
||||
|
||||
features = extract_function_features(
|
||||
viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10007250)
|
||||
)
|
||||
assert capa.features.Characteristic("loop") not in features
|
||||
|
||||
|
||||
def test_file_string_features(sample_bfb9b5391a13d0afd787e87ab90f14f5):
|
||||
features = extract_file_features(
|
||||
sample_bfb9b5391a13d0afd787e87ab90f14f5.vw, sample_bfb9b5391a13d0afd787e87ab90f14f5.path,
|
||||
)
|
||||
assert capa.features.String("WarStop") in features # ASCII, offset 0x40EC
|
||||
assert capa.features.String("cimage/png") in features # UTF-16 LE, offset 0x350E
|
||||
|
||||
|
||||
def test_function_calls_to(sample_9324d1a8ae37a36ae560c37448c9705a):
|
||||
features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60))
|
||||
assert capa.features.Characteristic("calls to") in features
|
||||
assert len(features[capa.features.Characteristic("calls to")]) == 1
|
||||
|
||||
|
||||
def test_function_calls_to64(sample_lab21_01):
|
||||
features = extract_function_features(viv_utils.Function(sample_lab21_01.vw, 0x1400052D0)) # memcpy
|
||||
assert capa.features.Characteristic("calls to") in features
|
||||
assert len(features[capa.features.Characteristic("calls to")]) == 8
|
||||
|
||||
|
||||
def test_function_calls_from(sample_9324d1a8ae37a36ae560c37448c9705a):
|
||||
features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60))
|
||||
assert capa.features.Characteristic("calls from") in features
|
||||
assert len(features[capa.features.Characteristic("calls from")]) == 23
|
||||
|
||||
|
||||
def test_basic_block_count(sample_9324d1a8ae37a36ae560c37448c9705a):
|
||||
features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60))
|
||||
assert len(features[capa.features.basicblock.BasicBlock()]) == 26
|
||||
|
||||
|
||||
def test_indirect_call_features(sample_a933a1a402775cfa94b6bee0963f4b46):
|
||||
features = extract_function_features(viv_utils.Function(sample_a933a1a402775cfa94b6bee0963f4b46.vw, 0xABA68A0))
|
||||
assert capa.features.Characteristic("indirect call") in features
|
||||
assert len(features[capa.features.Characteristic("indirect call")]) == 3
|
||||
|
||||
|
||||
def test_indirect_calls_resolved(sample_c91887d861d9bd4a5872249b641bc9f9):
|
||||
features = extract_function_features(viv_utils.Function(sample_c91887d861d9bd4a5872249b641bc9f9.vw, 0x401A77))
|
||||
assert capa.features.insn.API("kernel32.CreatePipe") in features
|
||||
assert capa.features.insn.API("kernel32.SetHandleInformation") in features
|
||||
assert capa.features.insn.API("kernel32.CloseHandle") in features
|
||||
assert capa.features.insn.API("kernel32.WriteFile") in features
|
||||
@parametrize(
|
||||
"sample,scope,feature,expected", FEATURE_COUNT_TESTS, indirect=["sample", "scope"],
|
||||
)
|
||||
def test_viv_feature_counts(sample, scope, feature, expected):
|
||||
with xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2"):
|
||||
do_test_feature_count(get_viv_extractor, sample, scope, feature, expected)
|
||||
|
||||
Reference in New Issue
Block a user