mirror of
https://github.com/mandiant/capa.git
synced 2025-12-05 20:40:05 -08:00
Merge branch 'master' into wb-proto
This commit is contained in:
1
.github/pyinstaller/pyinstaller.spec
vendored
1
.github/pyinstaller/pyinstaller.spec
vendored
@@ -61,6 +61,7 @@ a = Analysis(
|
||||
"qt5",
|
||||
"pyqtwebengine",
|
||||
"pyasn1",
|
||||
"binaryninja",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
34
.github/workflows/tests.yml
vendored
34
.github/workflows/tests.yml
vendored
@@ -90,3 +90,37 @@ jobs:
|
||||
run: pip install -e .[dev]
|
||||
- name: Run tests
|
||||
run: pytest -v tests/
|
||||
|
||||
binja-tests:
|
||||
name: Binary Ninja tests for ${{ matrix.python-version }} on ${{ matrix.os }}
|
||||
runs-on: ubuntu-20.04
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.7", "3.11"]
|
||||
steps:
|
||||
- name: Checkout capa with submodules
|
||||
uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0
|
||||
with:
|
||||
submodules: recursive
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@d27e3f3d7c64b4bbf8e4abfb9b63b83e846e0435 # v4.5.0
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install pyyaml
|
||||
run: sudo apt-get install -y libyaml-dev
|
||||
- name: Install capa
|
||||
run: pip install -e .[dev]
|
||||
- name: install Binary Ninja
|
||||
env:
|
||||
BN_SERIAL: ${{ secrets.BN_SERIAL }}
|
||||
run: |
|
||||
mkdir ./.github/binja
|
||||
curl "https://raw.githubusercontent.com/Vector35/binaryninja-api/6812c97/scripts/download_headless.py" -o ./.github/binja/download_headless.py
|
||||
python ./.github/binja/download_headless.py --serial $BN_SERIAL --output .github/binja/BinaryNinja-headless.zip
|
||||
unzip .github/binja/BinaryNinja-headless.zip -d .github/binja/
|
||||
python .github/binja/binaryninja/scripts/install_api.py --install-on-root --silent
|
||||
- name: Run tests
|
||||
env:
|
||||
BN_LICENSE: ${{ secrets.BN_LICENSE }}
|
||||
run: pytest -v tests/test_binja_features.py # explicitly refer to the binja tests for performance. other tests run above.
|
||||
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -118,8 +118,12 @@ rule-linter-output.log
|
||||
scripts/perf/*.txt
|
||||
scripts/perf/*.svg
|
||||
scripts/perf/*.zip
|
||||
|
||||
.direnv
|
||||
.envrc
|
||||
.DS_Store
|
||||
*/.DS_Store
|
||||
Pipfile
|
||||
Pipfile.lock
|
||||
/cache/
|
||||
.github/binja/binaryninja
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
### New Features
|
||||
- add protobuf format for result documents #1219 @williballenthin @mr-tz
|
||||
|
||||
- extractor: add Binary Ninja feature extractor @xusheng6
|
||||
- new cli flag `--os` to override auto-detected operating system for a sample @captainGeech42
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
0
capa/features/extractors/binja/__init__.py
Normal file
0
capa/features/extractors/binja/__init__.py
Normal file
146
capa/features/extractors/binja/basicblock.py
Normal file
146
capa/features/extractors/binja/basicblock.py
Normal file
@@ -0,0 +1,146 @@
|
||||
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import sys
|
||||
import string
|
||||
import struct
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
from binaryninja import Function
|
||||
from binaryninja import BasicBlock as BinjaBasicBlock
|
||||
from binaryninja import (
|
||||
BinaryView,
|
||||
VariableSourceType,
|
||||
MediumLevelILSetVar,
|
||||
MediumLevelILOperation,
|
||||
MediumLevelILBasicBlock,
|
||||
MediumLevelILInstruction,
|
||||
)
|
||||
|
||||
from capa.features.common import Feature, Characteristic
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.basicblock import BasicBlock
|
||||
from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
|
||||
from capa.features.extractors.base_extractor import BBHandle, FunctionHandle
|
||||
|
||||
|
||||
def get_printable_len(il: MediumLevelILSetVar) -> int:
|
||||
"""Return string length if all operand bytes are ascii or utf16-le printable"""
|
||||
width = il.dest.type.width
|
||||
value = il.src.value.value
|
||||
|
||||
if width == 1:
|
||||
chars = struct.pack("<B", value & 0xFF)
|
||||
elif width == 2:
|
||||
chars = struct.pack("<H", value & 0xFFFF)
|
||||
elif width == 4:
|
||||
chars = struct.pack("<I", value & 0xFFFFFFFF)
|
||||
elif width == 8:
|
||||
chars = struct.pack("<Q", value & 0xFFFFFFFFFFFFFFFF)
|
||||
else:
|
||||
return 0
|
||||
|
||||
def is_printable_ascii(chars_: bytes):
|
||||
return all(c < 127 and chr(c) in string.printable for c in chars_)
|
||||
|
||||
def is_printable_utf16le(chars_: bytes):
|
||||
if all(c == 0x00 for c in chars_[1::2]):
|
||||
return is_printable_ascii(chars_[::2])
|
||||
|
||||
if is_printable_ascii(chars):
|
||||
return width
|
||||
|
||||
if is_printable_utf16le(chars):
|
||||
return width // 2
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def is_mov_imm_to_stack(il: MediumLevelILInstruction) -> bool:
|
||||
"""verify instruction moves immediate onto stack"""
|
||||
if il.operation != MediumLevelILOperation.MLIL_SET_VAR:
|
||||
return False
|
||||
|
||||
if il.src.operation != MediumLevelILOperation.MLIL_CONST:
|
||||
return False
|
||||
|
||||
if not il.dest.source_type == VariableSourceType.StackVariableSourceType:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def bb_contains_stackstring(f: Function, bb: MediumLevelILBasicBlock) -> bool:
|
||||
"""check basic block for stackstring indicators
|
||||
|
||||
true if basic block contains enough moves of constant bytes to the stack
|
||||
"""
|
||||
count = 0
|
||||
for il in bb:
|
||||
if is_mov_imm_to_stack(il):
|
||||
count += get_printable_len(il)
|
||||
|
||||
if count > MIN_STACKSTRING_LEN:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract stackstring indicators from basic block"""
|
||||
bb: Tuple[BinjaBasicBlock, MediumLevelILBasicBlock] = bbh.inner
|
||||
if bb[1] is not None and bb_contains_stackstring(fh.inner, bb[1]):
|
||||
yield Characteristic("stack string"), bbh.address
|
||||
|
||||
|
||||
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract tight loop indicators from a basic block"""
|
||||
bb: Tuple[BinjaBasicBlock, MediumLevelILBasicBlock] = bbh.inner
|
||||
for edge in bb[0].outgoing_edges:
|
||||
if edge.target.start == bb[0].start:
|
||||
yield Characteristic("tight loop"), bbh.address
|
||||
|
||||
|
||||
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract basic block features"""
|
||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||
for feature, addr in bb_handler(fh, bbh):
|
||||
yield feature, addr
|
||||
yield BasicBlock(), bbh.address
|
||||
|
||||
|
||||
BASIC_BLOCK_HANDLERS = (
|
||||
extract_bb_tight_loop,
|
||||
extract_bb_stackstring,
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 2:
|
||||
return
|
||||
|
||||
from binaryninja import BinaryViewType
|
||||
|
||||
from capa.features.extractors.binja.extractor import BinjaFeatureExtractor
|
||||
|
||||
bv: BinaryView = BinaryViewType.get_view_of_file(sys.argv[1])
|
||||
if bv is None:
|
||||
return
|
||||
|
||||
features = []
|
||||
extractor = BinjaFeatureExtractor(bv)
|
||||
for fh in extractor.get_functions():
|
||||
for bbh in extractor.get_basic_blocks(fh):
|
||||
features.extend(list(extract_features(fh, bbh)))
|
||||
|
||||
import pprint
|
||||
|
||||
pprint.pprint(features)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
77
capa/features/extractors/binja/extractor.py
Normal file
77
capa/features/extractors/binja/extractor.py
Normal file
@@ -0,0 +1,77 @@
|
||||
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import List, Tuple, Iterator
|
||||
|
||||
import binaryninja as binja
|
||||
|
||||
import capa.features.extractors.elf
|
||||
import capa.features.extractors.binja.file
|
||||
import capa.features.extractors.binja.insn
|
||||
import capa.features.extractors.binja.global_
|
||||
import capa.features.extractors.binja.function
|
||||
import capa.features.extractors.binja.basicblock
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
|
||||
|
||||
|
||||
class BinjaFeatureExtractor(FeatureExtractor):
|
||||
def __init__(self, bv: binja.BinaryView):
|
||||
super().__init__()
|
||||
self.bv = bv
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
self.global_features.extend(capa.features.extractors.binja.file.extract_file_format(self.bv))
|
||||
self.global_features.extend(capa.features.extractors.binja.global_.extract_os(self.bv))
|
||||
self.global_features.extend(capa.features.extractors.binja.global_.extract_arch(self.bv))
|
||||
|
||||
def get_base_address(self):
|
||||
return AbsoluteVirtualAddress(self.bv.start)
|
||||
|
||||
def extract_global_features(self):
|
||||
yield from self.global_features
|
||||
|
||||
def extract_file_features(self):
|
||||
yield from capa.features.extractors.binja.file.extract_features(self.bv)
|
||||
|
||||
def get_functions(self) -> Iterator[FunctionHandle]:
|
||||
for f in self.bv.functions:
|
||||
yield FunctionHandle(address=AbsoluteVirtualAddress(f.start), inner=f)
|
||||
|
||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.binja.function.extract_features(fh)
|
||||
|
||||
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
||||
f: binja.Function = fh.inner
|
||||
# Set up a MLIL basic block dict look up to associate the disassembly basic block with its MLIL basic block
|
||||
mlil_lookup = {}
|
||||
for mlil_bb in f.mlil.basic_blocks:
|
||||
mlil_lookup[mlil_bb.source_block.start] = mlil_bb
|
||||
|
||||
for bb in f.basic_blocks:
|
||||
mlil_bb = None
|
||||
if bb.start in mlil_lookup:
|
||||
mlil_bb = mlil_lookup[bb.start]
|
||||
|
||||
yield BBHandle(address=AbsoluteVirtualAddress(bb.start), inner=(bb, mlil_bb))
|
||||
|
||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.binja.basicblock.extract_features(fh, bbh)
|
||||
|
||||
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
||||
import capa.features.extractors.binja.helpers as binja_helpers
|
||||
|
||||
bb: Tuple[binja.BasicBlock, binja.MediumLevelILBasicBlock] = bbh.inner
|
||||
addr = bb[0].start
|
||||
|
||||
for text, length in bb[0]:
|
||||
insn = binja_helpers.DisassemblyInstruction(addr, length, text)
|
||||
yield InsnHandle(address=AbsoluteVirtualAddress(addr), inner=insn)
|
||||
addr += length
|
||||
|
||||
def extract_insn_features(self, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle):
|
||||
yield from capa.features.extractors.binja.insn.extract_features(fh, bbh, ih)
|
||||
188
capa/features/extractors/binja/file.py
Normal file
188
capa/features/extractors/binja/file.py
Normal file
@@ -0,0 +1,188 @@
|
||||
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import sys
|
||||
import struct
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
from binaryninja import Symbol, Segment, BinaryView, SymbolType, SymbolBinding
|
||||
|
||||
import capa.features.extractors.common
|
||||
import capa.features.extractors.helpers
|
||||
import capa.features.extractors.strings
|
||||
from capa.features.file import Export, Import, Section, FunctionName
|
||||
from capa.features.common import FORMAT_PE, FORMAT_ELF, Format, String, Feature, Characteristic
|
||||
from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.binja.helpers import unmangle_c_name
|
||||
|
||||
|
||||
def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[Tuple[int, int]]:
|
||||
"""check segment for embedded PE
|
||||
|
||||
adapted for binja from:
|
||||
https://github.com/vivisect/vivisect/blob/7be4037b1cecc4551b397f840405a1fc606f9b53/PE/carve.py#L19
|
||||
"""
|
||||
mz_xor = [
|
||||
(
|
||||
capa.features.extractors.helpers.xor_static(b"MZ", i),
|
||||
capa.features.extractors.helpers.xor_static(b"PE", i),
|
||||
i,
|
||||
)
|
||||
for i in range(256)
|
||||
]
|
||||
|
||||
todo = []
|
||||
# If this is the first segment of the binary, skip the first bytes. Otherwise, there will always be a matched
|
||||
# PE at the start of the binaryview.
|
||||
start = seg.start
|
||||
if bv.view_type == "PE" and start == bv.start:
|
||||
start += 1
|
||||
|
||||
for mzx, pex, i in mz_xor:
|
||||
for off, _ in bv.find_all_data(start, seg.end, mzx):
|
||||
todo.append((off, mzx, pex, i))
|
||||
|
||||
while len(todo):
|
||||
off, mzx, pex, i = todo.pop()
|
||||
|
||||
# The MZ header has one field we will check e_lfanew is at 0x3c
|
||||
e_lfanew = off + 0x3C
|
||||
|
||||
if seg.end < (e_lfanew + 4):
|
||||
continue
|
||||
|
||||
newoff = struct.unpack("<I", capa.features.extractors.helpers.xor_static(bv.read(e_lfanew, 4), i))[0]
|
||||
|
||||
peoff = off + newoff
|
||||
if seg.end < (peoff + 2):
|
||||
continue
|
||||
|
||||
if bv.read(peoff, 2) == pex:
|
||||
yield off, i
|
||||
|
||||
|
||||
def extract_file_embedded_pe(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract embedded PE features"""
|
||||
for seg in bv.segments:
|
||||
for ea, _ in check_segment_for_pe(bv, seg):
|
||||
yield Characteristic("embedded pe"), FileOffsetAddress(ea)
|
||||
|
||||
|
||||
def extract_file_export_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract function exports"""
|
||||
for sym in bv.get_symbols_of_type(SymbolType.FunctionSymbol):
|
||||
if sym.binding in [SymbolBinding.GlobalBinding, SymbolBinding.WeakBinding]:
|
||||
name = sym.short_name
|
||||
yield Export(name), AbsoluteVirtualAddress(sym.address)
|
||||
unmangled_name = unmangle_c_name(name)
|
||||
if name != unmangled_name:
|
||||
yield Export(unmangled_name), AbsoluteVirtualAddress(sym.address)
|
||||
|
||||
|
||||
def extract_file_import_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract function imports
|
||||
|
||||
1. imports by ordinal:
|
||||
- modulename.#ordinal
|
||||
|
||||
2. imports by name, results in two features to support importname-only
|
||||
matching:
|
||||
- modulename.importname
|
||||
- importname
|
||||
"""
|
||||
for sym in bv.get_symbols_of_type(SymbolType.ImportAddressSymbol):
|
||||
lib_name = str(sym.namespace)
|
||||
addr = AbsoluteVirtualAddress(sym.address)
|
||||
for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym.short_name):
|
||||
yield Import(name), addr
|
||||
|
||||
ordinal = sym.ordinal
|
||||
if ordinal != 0 and (lib_name != ""):
|
||||
ordinal_name = "#%d" % (ordinal)
|
||||
for name in capa.features.extractors.helpers.generate_symbols(lib_name, ordinal_name):
|
||||
yield Import(name), addr
|
||||
|
||||
|
||||
def extract_file_section_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract section names"""
|
||||
for name, section in bv.sections.items():
|
||||
yield Section(name), AbsoluteVirtualAddress(section.start)
|
||||
|
||||
|
||||
def extract_file_strings(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract ASCII and UTF-16 LE strings"""
|
||||
for s in bv.strings:
|
||||
yield String(s.value), FileOffsetAddress(s.start)
|
||||
|
||||
|
||||
def extract_file_function_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
extract the names of statically-linked library functions.
|
||||
"""
|
||||
for sym_name in bv.symbols:
|
||||
for sym in bv.symbols[sym_name]:
|
||||
if sym.type == SymbolType.LibraryFunctionSymbol:
|
||||
name = sym.short_name
|
||||
yield FunctionName(name), sym.address
|
||||
if name.startswith("_"):
|
||||
# some linkers may prefix linked routines with a `_` to avoid name collisions.
|
||||
# extract features for both the mangled and un-mangled representations.
|
||||
# e.g. `_fwrite` -> `fwrite`
|
||||
# see: https://stackoverflow.com/a/2628384/87207
|
||||
yield FunctionName(name[1:]), sym.address
|
||||
|
||||
|
||||
def extract_file_format(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
view_type = bv.view_type
|
||||
if view_type in ["PE", "COFF"]:
|
||||
yield Format(FORMAT_PE), NO_ADDRESS
|
||||
elif view_type == "ELF":
|
||||
yield Format(FORMAT_ELF), NO_ADDRESS
|
||||
elif view_type == "Raw":
|
||||
# no file type to return when processing a binary file, but we want to continue processing
|
||||
return
|
||||
else:
|
||||
raise NotImplementedError("unexpected file format: %d" % view_type)
|
||||
|
||||
|
||||
def extract_features(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract file features"""
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, addr in file_handler(bv):
|
||||
yield feature, addr
|
||||
|
||||
|
||||
FILE_HANDLERS = (
|
||||
extract_file_export_names,
|
||||
extract_file_import_names,
|
||||
extract_file_strings,
|
||||
extract_file_section_names,
|
||||
extract_file_embedded_pe,
|
||||
extract_file_function_names,
|
||||
extract_file_format,
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
""" """
|
||||
if len(sys.argv) < 2:
|
||||
return
|
||||
|
||||
from binaryninja import BinaryViewType
|
||||
|
||||
bv: BinaryView = BinaryViewType.get_view_of_file(sys.argv[1])
|
||||
if bv is None:
|
||||
return
|
||||
|
||||
import pprint
|
||||
|
||||
pprint.pprint(list(extract_features(bv)))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
34
capa/features/extractors/binja/find_binja_api.py
Normal file
34
capa/features/extractors/binja/find_binja_api.py
Normal file
@@ -0,0 +1,34 @@
|
||||
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import subprocess
|
||||
|
||||
# When the script gets executed as a standalone executable (via PyInstaller), `import binaryninja` does not work because
|
||||
# we have excluded the binaryninja module in `pyinstaller.spec`. The trick here is to call the system Python and try
|
||||
# to find out the path of the binaryninja module that has been installed.
|
||||
# Note, including the binaryninja module in the `pyintaller.spec` would not work, since the binaryninja module tries to
|
||||
# find the binaryninja core e.g., `libbinaryninjacore.dylib`, using a relative path. And this does not work when the
|
||||
# binaryninja module is extracted by the PyInstaller.
|
||||
code = r"""
|
||||
from pathlib import Path
|
||||
import importlib
|
||||
spec = importlib.util.find_spec('binaryninja')
|
||||
if spec is not None:
|
||||
if len(spec.submodule_search_locations) > 0:
|
||||
path = Path(spec.submodule_search_locations[0])
|
||||
# encode the path with utf8 then convert to hex, make sure it can be read and restored properly
|
||||
print(str(path.parent).encode('utf8').hex())
|
||||
"""
|
||||
|
||||
|
||||
def find_binja_path() -> str:
|
||||
raw_output = subprocess.check_output(["python", "-c", "%s" % code]).decode("ascii").strip()
|
||||
return bytes.fromhex(raw_output).decode("utf8")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(find_binja_path())
|
||||
97
capa/features/extractors/binja/function.py
Normal file
97
capa/features/extractors/binja/function.py
Normal file
@@ -0,0 +1,97 @@
|
||||
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import sys
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
from binaryninja import Function, BinaryView, LowLevelILOperation
|
||||
|
||||
from capa.features.common import Feature, Characteristic
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors import loops
|
||||
from capa.features.extractors.base_extractor import FunctionHandle
|
||||
|
||||
|
||||
def extract_function_calls_to(fh: FunctionHandle):
|
||||
"""extract callers to a function"""
|
||||
func: Function = fh.inner
|
||||
bv: BinaryView = func.view
|
||||
|
||||
for caller in func.caller_sites:
|
||||
# Everything that is a code reference to the current function is considered a caller, which actually includes
|
||||
# many other references that are NOT a caller. For example, an instruction `push function_start` will also be
|
||||
# considered a caller to the function
|
||||
if caller.llil.operation in [
|
||||
LowLevelILOperation.LLIL_CALL,
|
||||
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
|
||||
LowLevelILOperation.LLIL_JUMP,
|
||||
LowLevelILOperation.LLIL_TAILCALL,
|
||||
]:
|
||||
yield Characteristic("calls to"), AbsoluteVirtualAddress(caller.address)
|
||||
|
||||
|
||||
def extract_function_loop(fh: FunctionHandle):
|
||||
"""extract loop indicators from a function"""
|
||||
func: Function = fh.inner
|
||||
|
||||
edges = []
|
||||
|
||||
# construct control flow graph
|
||||
for bb in func.basic_blocks:
|
||||
for edge in bb.outgoing_edges:
|
||||
edges.append((bb.start, edge.target.start))
|
||||
|
||||
if loops.has_loop(edges):
|
||||
yield Characteristic("loop"), fh.address
|
||||
|
||||
|
||||
def extract_recursive_call(fh: FunctionHandle):
|
||||
"""extract recursive function call"""
|
||||
func: Function = fh.inner
|
||||
bv: BinaryView = func.view
|
||||
if bv is None:
|
||||
return
|
||||
|
||||
for ref in bv.get_code_refs(func.start):
|
||||
if ref.function == func:
|
||||
yield Characteristic("recursive call"), fh.address
|
||||
|
||||
|
||||
def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
for func_handler in FUNCTION_HANDLERS:
|
||||
for feature, addr in func_handler(fh):
|
||||
yield feature, addr
|
||||
|
||||
|
||||
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call)
|
||||
|
||||
|
||||
def main():
|
||||
""" """
|
||||
if len(sys.argv) < 2:
|
||||
return
|
||||
|
||||
from binaryninja import BinaryViewType
|
||||
|
||||
from capa.features.extractors.binja.extractor import BinjaFeatureExtractor
|
||||
|
||||
bv: BinaryView = BinaryViewType.get_view_of_file(sys.argv[1])
|
||||
if bv is None:
|
||||
return
|
||||
|
||||
features = []
|
||||
extractor = BinjaFeatureExtractor(bv)
|
||||
for fh in extractor.get_functions():
|
||||
features.extend(list(extract_features(fh)))
|
||||
|
||||
import pprint
|
||||
|
||||
pprint.pprint(features)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
55
capa/features/extractors/binja/global_.py
Normal file
55
capa/features/extractors/binja/global_.py
Normal file
@@ -0,0 +1,55 @@
|
||||
import logging
|
||||
import contextlib
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
from binaryninja import BinaryView
|
||||
|
||||
import capa.features.extractors.elf
|
||||
from capa.features.common import OS, OS_MACOS, ARCH_I386, ARCH_AMD64, OS_WINDOWS, Arch, Feature
|
||||
from capa.features.address import NO_ADDRESS, Address
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_os(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
name = bv.platform.name
|
||||
if "-" in name:
|
||||
name = name.split("-")[0]
|
||||
|
||||
if name == "windows":
|
||||
yield OS(OS_WINDOWS), NO_ADDRESS
|
||||
|
||||
elif name == "macos":
|
||||
yield OS(OS_MACOS), NO_ADDRESS
|
||||
|
||||
elif name in ["linux", "freebsd", "decree"]:
|
||||
yield OS(name), NO_ADDRESS
|
||||
|
||||
else:
|
||||
# we likely end up here:
|
||||
# 1. handling shellcode, or
|
||||
# 2. handling a new file format (e.g. macho)
|
||||
#
|
||||
# for (1) we can't do much - its shellcode and all bets are off.
|
||||
# we could maybe accept a further CLI argument to specify the OS,
|
||||
# but i think this would be rarely used.
|
||||
# rules that rely on OS conditions will fail to match on shellcode.
|
||||
#
|
||||
# for (2), this logic will need to be updated as the format is implemented.
|
||||
logger.debug("unsupported file format: %s, will not guess OS", name)
|
||||
return
|
||||
|
||||
|
||||
def extract_arch(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
arch = bv.arch.name
|
||||
if arch == "x86_64":
|
||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||
elif arch == "x86":
|
||||
yield Arch(ARCH_I386), NO_ADDRESS
|
||||
else:
|
||||
# we likely end up here:
|
||||
# 1. handling a new architecture (e.g. aarch64)
|
||||
#
|
||||
# for (1), this logic will need to be updated as the format is implemented.
|
||||
logger.debug("unsupported architecture: %s", arch)
|
||||
return
|
||||
50
capa/features/extractors/binja/helpers.py
Normal file
50
capa/features/extractors/binja/helpers.py
Normal file
@@ -0,0 +1,50 @@
|
||||
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import re
|
||||
from typing import List, Callable
|
||||
from dataclasses import dataclass
|
||||
|
||||
from binaryninja import LowLevelILInstruction
|
||||
from binaryninja.architecture import InstructionTextToken
|
||||
|
||||
|
||||
@dataclass
|
||||
class DisassemblyInstruction:
|
||||
address: int
|
||||
length: int
|
||||
text: List[InstructionTextToken]
|
||||
|
||||
|
||||
LLIL_VISITOR = Callable[[LowLevelILInstruction, LowLevelILInstruction, int], bool]
|
||||
|
||||
|
||||
def visit_llil_exprs(il: LowLevelILInstruction, func: LLIL_VISITOR):
|
||||
# BN does not really support operand index at the disassembly level, so use the LLIL operand index as a substitute.
|
||||
# Note, this is NOT always guaranteed to be the same as disassembly operand.
|
||||
for i, op in enumerate(il.operands):
|
||||
if isinstance(op, LowLevelILInstruction) and func(op, il, i):
|
||||
visit_llil_exprs(op, func)
|
||||
|
||||
|
||||
def unmangle_c_name(name: str) -> str:
|
||||
# https://learn.microsoft.com/en-us/cpp/build/reference/decorated-names?view=msvc-170#FormatC
|
||||
# Possible variations for BaseThreadInitThunk:
|
||||
# @BaseThreadInitThunk@12
|
||||
# _BaseThreadInitThunk
|
||||
# _BaseThreadInitThunk@12
|
||||
# It is also possible for a function to have a `Stub` appended to its name:
|
||||
# _lstrlenWStub@4
|
||||
|
||||
# A small optimization to avoid running the regex too many times
|
||||
# TODO: this still increases the unit test execution time from 170s to 200s, should be able to accelerate it
|
||||
if name[0] in ["@", "_"]:
|
||||
match = re.match(r"^[@|_](.*?)(Stub)?(@\d+)?$", name)
|
||||
if match:
|
||||
return match.group(1)
|
||||
|
||||
return name
|
||||
630
capa/features/extractors/binja/insn.py
Normal file
630
capa/features/extractors/binja/insn.py
Normal file
@@ -0,0 +1,630 @@
|
||||
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import sys
|
||||
from typing import Any, Dict, List, Tuple, Iterator, Optional
|
||||
|
||||
from binaryninja import Function
|
||||
from binaryninja import BasicBlock as BinjaBasicBlock
|
||||
from binaryninja import (
|
||||
BinaryView,
|
||||
ILRegister,
|
||||
SymbolType,
|
||||
BinaryReader,
|
||||
RegisterValueType,
|
||||
LowLevelILOperation,
|
||||
LowLevelILInstruction,
|
||||
InstructionTextTokenType,
|
||||
)
|
||||
|
||||
import capa.features.extractors.helpers
|
||||
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
|
||||
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Feature, Characteristic
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.binja.helpers import DisassemblyInstruction, visit_llil_exprs
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
||||
|
||||
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
|
||||
# byte range within the first and returning basic blocks, this helps to reduce FP features
|
||||
SECURITY_COOKIE_BYTES_DELTA = 0x40
|
||||
|
||||
|
||||
# check if a function is a stub function to another function/symbol. The criteria is:
|
||||
# 1. The function must only have one basic block
|
||||
# 2. The function must only make one call/jump to another address
|
||||
# If the function being checked is a stub function, returns the target address. Otherwise, return None.
|
||||
def is_stub_function(bv: BinaryView, addr: int) -> Optional[int]:
|
||||
funcs = bv.get_functions_at(addr)
|
||||
for func in funcs:
|
||||
if len(func.basic_blocks) != 1:
|
||||
continue
|
||||
|
||||
call_count = 0
|
||||
call_target = None
|
||||
for il in func.llil.instructions:
|
||||
if il.operation in [
|
||||
LowLevelILOperation.LLIL_CALL,
|
||||
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
|
||||
LowLevelILOperation.LLIL_JUMP,
|
||||
LowLevelILOperation.LLIL_TAILCALL,
|
||||
]:
|
||||
call_count += 1
|
||||
if il.dest.value.type in [
|
||||
RegisterValueType.ImportedAddressValue,
|
||||
RegisterValueType.ConstantValue,
|
||||
RegisterValueType.ConstantPointerValue,
|
||||
]:
|
||||
call_target = il.dest.value.value
|
||||
|
||||
if call_count == 1 and call_target is not None:
|
||||
return call_target
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction API features
|
||||
|
||||
example:
|
||||
call dword [0x00473038]
|
||||
"""
|
||||
insn: DisassemblyInstruction = ih.inner
|
||||
func: Function = fh.inner
|
||||
bv: BinaryView = func.view
|
||||
|
||||
for llil in func.get_llils_at(ih.address):
|
||||
if llil.operation in [
|
||||
LowLevelILOperation.LLIL_CALL,
|
||||
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
|
||||
LowLevelILOperation.LLIL_JUMP,
|
||||
LowLevelILOperation.LLIL_TAILCALL,
|
||||
]:
|
||||
if llil.dest.value.type not in [
|
||||
RegisterValueType.ImportedAddressValue,
|
||||
RegisterValueType.ConstantValue,
|
||||
RegisterValueType.ConstantPointerValue,
|
||||
]:
|
||||
continue
|
||||
address = llil.dest.value.value
|
||||
candidate_addrs = [address]
|
||||
stub_addr = is_stub_function(bv, address)
|
||||
if stub_addr is not None:
|
||||
candidate_addrs.append(stub_addr)
|
||||
|
||||
for address in candidate_addrs:
|
||||
sym = func.view.get_symbol_at(address)
|
||||
if sym is None or sym.type not in [SymbolType.ImportAddressSymbol, SymbolType.ImportedFunctionSymbol]:
|
||||
continue
|
||||
|
||||
sym_name = sym.short_name
|
||||
|
||||
lib_name = ""
|
||||
import_lib = bv.lookup_imported_object_library(sym.address)
|
||||
if import_lib is not None:
|
||||
lib_name = import_lib[0].name
|
||||
if lib_name.endswith(".dll"):
|
||||
lib_name = lib_name[:-4]
|
||||
elif lib_name.endswith(".so"):
|
||||
lib_name = lib_name[:-3]
|
||||
|
||||
for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name):
|
||||
yield API(name), ih.address
|
||||
|
||||
if sym_name.startswith("_"):
|
||||
for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name[1:]):
|
||||
yield API(name), ih.address
|
||||
|
||||
|
||||
def extract_insn_number_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction number features
|
||||
example:
|
||||
push 3136B0h ; dwControlCode
|
||||
"""
|
||||
insn: DisassemblyInstruction = ih.inner
|
||||
func: Function = fh.inner
|
||||
bv: BinaryView = func.view
|
||||
|
||||
results: List[Tuple[Any[Number, OperandNumber], Address]] = []
|
||||
address_size = func.view.arch.address_size * 8
|
||||
|
||||
def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool:
|
||||
if il.operation == LowLevelILOperation.LLIL_LOAD:
|
||||
return False
|
||||
|
||||
if il.operation not in [LowLevelILOperation.LLIL_CONST, LowLevelILOperation.LLIL_CONST_PTR]:
|
||||
return True
|
||||
|
||||
for op in parent.operands:
|
||||
if isinstance(op, ILRegister) and op.name in ["esp", "ebp", "rsp", "rbp", "sp"]:
|
||||
return False
|
||||
elif isinstance(op, LowLevelILInstruction) and op.operation == LowLevelILOperation.LLIL_REG:
|
||||
if op.src.name in ["esp", "ebp", "rsp", "rbp", "sp"]:
|
||||
return False
|
||||
|
||||
raw_value = il.value.value
|
||||
if parent.operation == LowLevelILOperation.LLIL_SUB:
|
||||
raw_value = -raw_value
|
||||
|
||||
results.append((Number(raw_value), ih.address))
|
||||
results.append((OperandNumber(index, raw_value), ih.address))
|
||||
|
||||
return False
|
||||
|
||||
for llil in func.get_llils_at(ih.address):
|
||||
visit_llil_exprs(llil, llil_checker)
|
||||
|
||||
for result in results:
|
||||
yield result
|
||||
|
||||
|
||||
def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse referenced byte sequences
|
||||
example:
|
||||
push offset iid_004118d4_IShellLinkA ; riid
|
||||
"""
|
||||
insn: DisassemblyInstruction = ih.inner
|
||||
func: Function = fh.inner
|
||||
bv: BinaryView = func.view
|
||||
|
||||
candidate_addrs = set()
|
||||
|
||||
llil = func.get_llil_at(ih.address)
|
||||
if llil is None or llil.operation in [LowLevelILOperation.LLIL_CALL, LowLevelILOperation.LLIL_CALL_STACK_ADJUST]:
|
||||
return
|
||||
|
||||
for ref in bv.get_code_refs_from(ih.address):
|
||||
if ref == ih.address:
|
||||
continue
|
||||
|
||||
if len(bv.get_functions_containing(ref)) > 0:
|
||||
continue
|
||||
|
||||
candidate_addrs.add(ref)
|
||||
|
||||
# collect candidate address by enumerating all integers, https://github.com/Vector35/binaryninja-api/issues/3966
|
||||
def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool:
|
||||
if il.operation in [LowLevelILOperation.LLIL_CONST, LowLevelILOperation.LLIL_CONST_PTR]:
|
||||
value = il.value.value
|
||||
if value > 0:
|
||||
candidate_addrs.add(value)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
for llil in func.get_llils_at(ih.address):
|
||||
visit_llil_exprs(llil, llil_checker)
|
||||
|
||||
for addr in candidate_addrs:
|
||||
extracted_bytes = bv.read(addr, MAX_BYTES_FEATURE_SIZE)
|
||||
if extracted_bytes and not capa.features.extractors.helpers.all_zeros(extracted_bytes):
|
||||
if bv.get_string_at(addr) is None:
|
||||
# don't extract byte features for obvious strings
|
||||
yield Bytes(extracted_bytes), ih.address
|
||||
|
||||
|
||||
def extract_insn_string_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction string features
|
||||
|
||||
example:
|
||||
push offset aAcr ; "ACR > "
|
||||
"""
|
||||
insn: DisassemblyInstruction = ih.inner
|
||||
func: Function = fh.inner
|
||||
bv: BinaryView = func.view
|
||||
|
||||
candidate_addrs = set()
|
||||
|
||||
# collect candidate address from code refs directly
|
||||
for ref in bv.get_code_refs_from(ih.address):
|
||||
if ref == ih.address:
|
||||
continue
|
||||
|
||||
if len(bv.get_functions_containing(ref)) > 0:
|
||||
continue
|
||||
|
||||
candidate_addrs.add(ref)
|
||||
|
||||
# collect candidate address by enumerating all integers, https://github.com/Vector35/binaryninja-api/issues/3966
|
||||
def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool:
|
||||
if il.operation in [LowLevelILOperation.LLIL_CONST, LowLevelILOperation.LLIL_CONST_PTR]:
|
||||
value = il.value.value
|
||||
if value > 0:
|
||||
candidate_addrs.add(value)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
for llil in func.get_llils_at(ih.address):
|
||||
visit_llil_exprs(llil, llil_checker)
|
||||
|
||||
# Now we have all the candidate address, check them for string or pointer to string
|
||||
br = BinaryReader(bv)
|
||||
for addr in candidate_addrs:
|
||||
found = bv.get_string_at(addr)
|
||||
if found:
|
||||
yield String(found.value), ih.address
|
||||
|
||||
br.seek(addr)
|
||||
pointer = None
|
||||
if bv.arch.address_size == 4:
|
||||
pointer = br.read32()
|
||||
elif bv.arch.address_size == 8:
|
||||
pointer = br.read64()
|
||||
|
||||
if pointer is not None:
|
||||
found = bv.get_string_at(pointer)
|
||||
if found:
|
||||
yield String(found.value), ih.address
|
||||
|
||||
|
||||
def extract_insn_offset_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction structure offset features
|
||||
|
||||
example:
|
||||
.text:0040112F cmp [esi+4], ebx
|
||||
"""
|
||||
insn: DisassemblyInstruction = ih.inner
|
||||
func: Function = fh.inner
|
||||
|
||||
results: List[Tuple[Any[Offset, OperandOffset], Address]] = []
|
||||
address_size = func.view.arch.address_size * 8
|
||||
|
||||
def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool:
|
||||
# The most common case, read/write dereference to something like `dword [eax+0x28]`
|
||||
if il.operation in [LowLevelILOperation.LLIL_ADD, LowLevelILOperation.LLIL_SUB]:
|
||||
left = il.left
|
||||
right = il.right
|
||||
# Exclude offsets based on stack/franme pointers
|
||||
if left.operation == LowLevelILOperation.LLIL_REG and left.src.name in ["esp", "ebp", "rsp", "rbp", "sp"]:
|
||||
return True
|
||||
|
||||
if right.operation != LowLevelILOperation.LLIL_CONST:
|
||||
return True
|
||||
|
||||
raw_value = right.value.value
|
||||
# If this is not a dereference, then this must be an add and the offset must be in the range \
|
||||
# [0, MAX_STRUCTURE_SIZE]. For example,
|
||||
# add eax, 0x10,
|
||||
# lea ebx, [eax + 1]
|
||||
if parent.operation not in [LowLevelILOperation.LLIL_LOAD, LowLevelILOperation.LLIL_STORE]:
|
||||
if il.operation != LowLevelILOperation.LLIL_ADD or (not 0 < raw_value < MAX_STRUCTURE_SIZE):
|
||||
return False
|
||||
|
||||
if address_size > 0:
|
||||
# BN also encodes the constant value as two's complement, we need to restore its original value
|
||||
value = capa.features.extractors.helpers.twos_complement(raw_value, address_size)
|
||||
else:
|
||||
value = raw_value
|
||||
|
||||
results.append((Offset(value), ih.address))
|
||||
results.append((OperandOffset(index, value), ih.address))
|
||||
return False
|
||||
|
||||
# An edge case: for code like `push dword [esi]`, we need to generate a feature for offset 0x0
|
||||
elif il.operation in [LowLevelILOperation.LLIL_LOAD, LowLevelILOperation.LLIL_STORE]:
|
||||
if il.operands[0].operation == LowLevelILOperation.LLIL_REG:
|
||||
results.append((Offset(0), ih.address))
|
||||
results.append((OperandOffset(index, 0), ih.address))
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
for llil in func.get_llils_at(ih.address):
|
||||
visit_llil_exprs(llil, llil_checker)
|
||||
|
||||
for result in results:
|
||||
yield result
|
||||
|
||||
|
||||
def is_nzxor_stack_cookie(f: Function, bb: BinjaBasicBlock, llil: LowLevelILInstruction) -> bool:
|
||||
"""check if nzxor exists within stack cookie delta"""
|
||||
# TODO: we can do a much accurate analysi using LLIL SSA
|
||||
|
||||
reg_names = []
|
||||
if llil.left.operation == LowLevelILOperation.LLIL_REG:
|
||||
reg_names.append(llil.left.src.name)
|
||||
|
||||
if llil.right.operation == LowLevelILOperation.LLIL_REG:
|
||||
reg_names.append(llil.right.src.name)
|
||||
|
||||
# stack cookie reg should be stack/frame pointer
|
||||
if not any(reg in ["ebp", "esp", "rbp", "rsp", "sp"] for reg in reg_names):
|
||||
return False
|
||||
|
||||
# expect security cookie init in first basic block within first bytes (instructions)
|
||||
if len(bb.incoming_edges) == 0 and llil.address < (bb.start + SECURITY_COOKIE_BYTES_DELTA):
|
||||
return True
|
||||
|
||||
# ... or within last bytes (instructions) before a return
|
||||
if len(bb.outgoing_edges) == 0 and llil.address > (bb.end - SECURITY_COOKIE_BYTES_DELTA):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def extract_insn_nzxor_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction non-zeroing XOR instruction
|
||||
ignore expected non-zeroing XORs, e.g. security cookies
|
||||
"""
|
||||
insn: DisassemblyInstruction = ih.inner
|
||||
func: Function = fh.inner
|
||||
|
||||
results = []
|
||||
|
||||
def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool:
|
||||
# If the two operands of the xor instruction are the same, the LLIL will be translated to other instructions,
|
||||
# e.g., <llil: eax = 0>, (LLIL_SET_REG). So we do not need to check whether the two operands are the same.
|
||||
if il.operation == LowLevelILOperation.LLIL_XOR:
|
||||
# Exclude cases related to the stack cookie
|
||||
if is_nzxor_stack_cookie(fh.inner, bbh.inner[0], il):
|
||||
return False
|
||||
results.append((Characteristic("nzxor"), ih.address))
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
for llil in func.get_llils_at(ih.address):
|
||||
visit_llil_exprs(llil, llil_checker)
|
||||
|
||||
for result in results:
|
||||
yield result
|
||||
|
||||
|
||||
def extract_insn_mnemonic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""parse instruction mnemonic features"""
|
||||
insn: DisassemblyInstruction = ih.inner
|
||||
yield Mnemonic(insn.text[0].text), ih.address
|
||||
|
||||
|
||||
def extract_insn_obfs_call_plus_5_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse call $+5 instruction from the given instruction.
|
||||
"""
|
||||
insn: DisassemblyInstruction = ih.inner
|
||||
if insn.text[0].text == "call" and insn.text[2].text == "$+5" and insn.length == 5:
|
||||
yield Characteristic("call $+5"), ih.address
|
||||
|
||||
|
||||
def extract_insn_peb_access_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""parse instruction peb access
|
||||
|
||||
fs:[0x30] on x86, gs:[0x60] on x64
|
||||
"""
|
||||
insn: DisassemblyInstruction = ih.inner
|
||||
func: Function = fh.inner
|
||||
|
||||
results = []
|
||||
|
||||
def llil_checker(il: LowLevelILInstruction, parent: LowLevelILOperation, index: int) -> bool:
|
||||
if il.operation != LowLevelILOperation.LLIL_LOAD:
|
||||
return True
|
||||
|
||||
src = il.src
|
||||
if src.operation != LowLevelILOperation.LLIL_ADD:
|
||||
return True
|
||||
|
||||
left = src.left
|
||||
right = src.right
|
||||
|
||||
if left.operation != LowLevelILOperation.LLIL_REG:
|
||||
return True
|
||||
|
||||
reg = left.src.name
|
||||
|
||||
if right.operation != LowLevelILOperation.LLIL_CONST:
|
||||
return True
|
||||
|
||||
value = right.value.value
|
||||
if not (reg, value) in (("fsbase", 0x30), ("gsbase", 0x60)):
|
||||
return True
|
||||
|
||||
results.append((Characteristic("peb access"), ih.address))
|
||||
return False
|
||||
|
||||
for llil in func.get_llils_at(ih.address):
|
||||
visit_llil_exprs(llil, llil_checker)
|
||||
|
||||
for result in results:
|
||||
yield result
|
||||
|
||||
|
||||
def extract_insn_segment_access_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""parse instruction fs or gs access"""
|
||||
insn: DisassemblyInstruction = ih.inner
|
||||
func: Function = fh.inner
|
||||
|
||||
results = []
|
||||
|
||||
def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool:
|
||||
if il.operation == LowLevelILOperation.LLIL_REG:
|
||||
reg = il.src.name
|
||||
if reg == "fsbase":
|
||||
results.append((Characteristic("fs access"), ih.address))
|
||||
return False
|
||||
elif reg == "gsbase":
|
||||
results.append((Characteristic("gs access"), ih.address))
|
||||
return False
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
for llil in func.get_llils_at(ih.address):
|
||||
visit_llil_exprs(llil, llil_checker)
|
||||
|
||||
for result in results:
|
||||
yield result
|
||||
|
||||
|
||||
def extract_insn_cross_section_cflow(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""inspect the instruction for a CALL or JMP that crosses section boundaries"""
|
||||
insn: DisassemblyInstruction = ih.inner
|
||||
func: Function = fh.inner
|
||||
bv: BinaryView = func.view
|
||||
|
||||
if bv is None:
|
||||
return
|
||||
|
||||
seg1 = bv.get_segment_at(ih.address)
|
||||
sections1 = bv.get_sections_at(ih.address)
|
||||
for ref in bv.get_code_refs_from(ih.address):
|
||||
if len(bv.get_functions_at(ref)) == 0:
|
||||
continue
|
||||
|
||||
seg2 = bv.get_segment_at(ref)
|
||||
sections2 = bv.get_sections_at(ref)
|
||||
if seg1 != seg2 or sections1 != sections2:
|
||||
yield Characteristic("cross section flow"), ih.address
|
||||
|
||||
|
||||
def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract functions calls from features
|
||||
|
||||
most relevant at the function scope, however, its most efficient to extract at the instruction scope
|
||||
"""
|
||||
insn: DisassemblyInstruction = ih.inner
|
||||
func: Function = fh.inner
|
||||
bv: BinaryView = func.view
|
||||
|
||||
if bv is None:
|
||||
return
|
||||
|
||||
for il in func.get_llils_at(ih.address):
|
||||
if il.operation not in [
|
||||
LowLevelILOperation.LLIL_CALL,
|
||||
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
|
||||
LowLevelILOperation.LLIL_TAILCALL,
|
||||
]:
|
||||
continue
|
||||
|
||||
dest = il.dest
|
||||
if dest.operation == LowLevelILOperation.LLIL_CONST_PTR:
|
||||
value = dest.value.value
|
||||
yield Characteristic("calls from"), AbsoluteVirtualAddress(value)
|
||||
elif dest.operation == LowLevelILOperation.LLIL_CONST:
|
||||
yield Characteristic("calls from"), AbsoluteVirtualAddress(dest.value)
|
||||
elif dest.operation == LowLevelILOperation.LLIL_LOAD:
|
||||
indirect_src = dest.src
|
||||
if indirect_src.operation == LowLevelILOperation.LLIL_CONST_PTR:
|
||||
value = indirect_src.value.value
|
||||
yield Characteristic("calls from"), AbsoluteVirtualAddress(value)
|
||||
elif indirect_src.operation == LowLevelILOperation.LLIL_CONST:
|
||||
yield Characteristic("calls from"), AbsoluteVirtualAddress(indirect_src.value)
|
||||
elif dest.operation == LowLevelILOperation.LLIL_REG:
|
||||
if dest.value.type in [
|
||||
RegisterValueType.ImportedAddressValue,
|
||||
RegisterValueType.ConstantValue,
|
||||
RegisterValueType.ConstantPointerValue,
|
||||
]:
|
||||
yield Characteristic("calls from"), AbsoluteVirtualAddress(dest.value.value)
|
||||
|
||||
|
||||
def extract_function_indirect_call_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
|
||||
does not include calls like => call ds:dword_ABD4974
|
||||
|
||||
most relevant at the function or basic block scope;
|
||||
however, its most efficient to extract at the instruction scope
|
||||
"""
|
||||
insn: DisassemblyInstruction = ih.inner
|
||||
func: Function = fh.inner
|
||||
|
||||
llil = func.get_llil_at(ih.address)
|
||||
if llil is None or llil.operation not in [
|
||||
LowLevelILOperation.LLIL_CALL,
|
||||
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
|
||||
LowLevelILOperation.LLIL_TAILCALL,
|
||||
]:
|
||||
return
|
||||
|
||||
if llil.dest.operation in [LowLevelILOperation.LLIL_CONST, LowLevelILOperation.LLIL_CONST_PTR]:
|
||||
return
|
||||
|
||||
if llil.dest.operation == LowLevelILOperation.LLIL_LOAD:
|
||||
src = llil.dest.src
|
||||
if src.operation in [LowLevelILOperation.LLIL_CONST, LowLevelILOperation.LLIL_CONST_PTR]:
|
||||
return
|
||||
|
||||
yield Characteristic("indirect call"), ih.address
|
||||
|
||||
|
||||
def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract instruction features"""
|
||||
for inst_handler in INSTRUCTION_HANDLERS:
|
||||
for feature, ea in inst_handler(f, bbh, insn):
|
||||
yield feature, ea
|
||||
|
||||
|
||||
INSTRUCTION_HANDLERS = (
|
||||
extract_insn_api_features,
|
||||
extract_insn_number_features,
|
||||
extract_insn_bytes_features,
|
||||
extract_insn_string_features,
|
||||
extract_insn_offset_features,
|
||||
extract_insn_nzxor_characteristic_features,
|
||||
extract_insn_mnemonic_features,
|
||||
extract_insn_obfs_call_plus_5_characteristic_features,
|
||||
extract_insn_peb_access_characteristic_features,
|
||||
extract_insn_cross_section_cflow,
|
||||
extract_insn_segment_access_features,
|
||||
extract_function_calls_from,
|
||||
extract_function_indirect_call_characteristic_features,
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
""" """
|
||||
if len(sys.argv) < 2:
|
||||
return
|
||||
|
||||
from binaryninja import BinaryViewType
|
||||
|
||||
from capa.features.extractors.binja.extractor import BinjaFeatureExtractor
|
||||
|
||||
bv: BinaryView = BinaryViewType.get_view_of_file(sys.argv[1])
|
||||
if bv is None:
|
||||
return
|
||||
|
||||
features = []
|
||||
extractor = BinjaFeatureExtractor(bv)
|
||||
for fh in extractor.get_functions():
|
||||
for bbh in extractor.get_basic_blocks(fh):
|
||||
for insn in extractor.get_instructions(fh, bbh):
|
||||
features.extend(list(extract_features(fh, bbh, insn)))
|
||||
|
||||
import pprint
|
||||
|
||||
pprint.pprint(features)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -268,7 +268,8 @@ def dumps(extractor: capa.features.extractors.base_extractor.FeatureExtractor) -
|
||||
basic_block=bbaddr,
|
||||
address=Address.from_capa(addr),
|
||||
feature=feature_from_capa(feature),
|
||||
)
|
||||
) # type: ignore
|
||||
# Mypy is unable to recognise `basic_block` as a argument due to alias
|
||||
for feature, addr in extractor.extract_basic_block_features(f, bb)
|
||||
]
|
||||
|
||||
@@ -287,38 +288,41 @@ def dumps(extractor: capa.features.extractors.base_extractor.FeatureExtractor) -
|
||||
instructions.append(
|
||||
InstructionFeatures(
|
||||
address=iaddr,
|
||||
features=ifeatures,
|
||||
features=tuple(ifeatures),
|
||||
)
|
||||
)
|
||||
|
||||
basic_blocks.append(
|
||||
BasicBlockFeatures(
|
||||
address=bbaddr,
|
||||
features=bbfeatures,
|
||||
instructions=instructions,
|
||||
features=tuple(bbfeatures),
|
||||
instructions=tuple(instructions),
|
||||
)
|
||||
)
|
||||
|
||||
function_features.append(
|
||||
FunctionFeatures(
|
||||
address=faddr,
|
||||
features=ffeatures,
|
||||
features=tuple(ffeatures),
|
||||
basic_blocks=basic_blocks,
|
||||
)
|
||||
) # type: ignore
|
||||
# Mypy is unable to recognise `basic_blocks` as a argument due to alias
|
||||
)
|
||||
|
||||
features = Features(
|
||||
global_=global_features,
|
||||
file=file_features,
|
||||
functions=function_features,
|
||||
)
|
||||
file=tuple(file_features),
|
||||
functions=tuple(function_features),
|
||||
) # type: ignore
|
||||
# Mypy is unable to recognise `global_` as a argument due to alias
|
||||
|
||||
freeze = Freeze(
|
||||
version=2,
|
||||
base_address=Address.from_capa(extractor.get_base_address()),
|
||||
extractor=Extractor(name=extractor.__class__.__name__),
|
||||
features=features,
|
||||
)
|
||||
) # type: ignore
|
||||
# Mypy is unable to recognise `base_address` as a argument due to alias
|
||||
|
||||
return freeze.json()
|
||||
|
||||
|
||||
@@ -101,59 +101,79 @@ class FeatureModel(BaseModel):
|
||||
|
||||
def feature_from_capa(f: capa.features.common.Feature) -> "Feature":
|
||||
if isinstance(f, capa.features.common.OS):
|
||||
assert isinstance(f.value, str)
|
||||
return OSFeature(os=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.common.Arch):
|
||||
assert isinstance(f.value, str)
|
||||
return ArchFeature(arch=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.common.Format):
|
||||
assert isinstance(f.value, str)
|
||||
return FormatFeature(format=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.common.MatchedRule):
|
||||
assert isinstance(f.value, str)
|
||||
return MatchFeature(match=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.common.Characteristic):
|
||||
assert isinstance(f.value, str)
|
||||
return CharacteristicFeature(characteristic=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.file.Export):
|
||||
assert isinstance(f.value, str)
|
||||
return ExportFeature(export=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.file.Import):
|
||||
return ImportFeature(import_=f.value, description=f.description)
|
||||
assert isinstance(f.value, str)
|
||||
return ImportFeature(import_=f.value, description=f.description) # type: ignore
|
||||
# Mypy is unable to recognise `import_` as a argument due to alias
|
||||
|
||||
elif isinstance(f, capa.features.file.Section):
|
||||
assert isinstance(f.value, str)
|
||||
return SectionFeature(section=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.file.FunctionName):
|
||||
return FunctionNameFeature(function_name=f.value, description=f.description)
|
||||
assert isinstance(f.value, str)
|
||||
return FunctionNameFeature(function_name=f.value, description=f.description) # type: ignore
|
||||
# Mypy is unable to recognise `function_name` as a argument due to alias
|
||||
|
||||
# must come before check for String due to inheritance
|
||||
elif isinstance(f, capa.features.common.Substring):
|
||||
assert isinstance(f.value, str)
|
||||
return SubstringFeature(substring=f.value, description=f.description)
|
||||
|
||||
# must come before check for String due to inheritance
|
||||
elif isinstance(f, capa.features.common.Regex):
|
||||
assert isinstance(f.value, str)
|
||||
return RegexFeature(regex=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.common.String):
|
||||
assert isinstance(f.value, str)
|
||||
return StringFeature(string=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.common.Class):
|
||||
return ClassFeature(class_=f.value, description=f.description)
|
||||
assert isinstance(f.value, str)
|
||||
return ClassFeature(class_=f.value, description=f.description) # type: ignore
|
||||
# Mypy is unable to recognise `class_` as a argument due to alias
|
||||
|
||||
elif isinstance(f, capa.features.common.Namespace):
|
||||
assert isinstance(f.value, str)
|
||||
return NamespaceFeature(namespace=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.basicblock.BasicBlock):
|
||||
return BasicBlockFeature(description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.insn.API):
|
||||
assert isinstance(f.value, str)
|
||||
return APIFeature(api=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.insn.Property):
|
||||
assert isinstance(f.value, str)
|
||||
return PropertyFeature(property=f.value, access=f.access, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.insn.Number):
|
||||
assert isinstance(f.value, (int, float))
|
||||
return NumberFeature(number=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.common.Bytes):
|
||||
@@ -162,16 +182,22 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature":
|
||||
return BytesFeature(bytes=binascii.hexlify(buf).decode("ascii"), description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.insn.Offset):
|
||||
assert isinstance(f.value, int)
|
||||
return OffsetFeature(offset=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.insn.Mnemonic):
|
||||
assert isinstance(f.value, str)
|
||||
return MnemonicFeature(mnemonic=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.insn.OperandNumber):
|
||||
return OperandNumberFeature(index=f.index, operand_number=f.value, description=f.description)
|
||||
assert isinstance(f.value, int)
|
||||
return OperandNumberFeature(index=f.index, operand_number=f.value, description=f.description) # type: ignore
|
||||
# Mypy is unable to recognise `operand_number` as a argument due to alias
|
||||
|
||||
elif isinstance(f, capa.features.insn.OperandOffset):
|
||||
return OperandOffsetFeature(index=f.index, operand_offset=f.value, description=f.description)
|
||||
assert isinstance(f.value, int)
|
||||
return OperandOffsetFeature(index=f.index, operand_offset=f.value, description=f.description) # type: ignore
|
||||
# Mypy is unable to recognise `operand_offset` as a argument due to alias
|
||||
|
||||
else:
|
||||
raise NotImplementedError(f"feature_from_capa({type(f)}) not implemented")
|
||||
|
||||
@@ -70,7 +70,7 @@ class Number(Feature):
|
||||
elif isinstance(self.value, float):
|
||||
return str(self.value)
|
||||
else:
|
||||
raise ValueError("invalid value type")
|
||||
raise ValueError("invalid value type %s" % (type(self.value)))
|
||||
|
||||
|
||||
# max recognized structure size (and therefore, offset size)
|
||||
|
||||
30
capa/main.py
30
capa/main.py
@@ -77,6 +77,7 @@ RULES_PATH_DEFAULT_STRING = "(embedded rules)"
|
||||
SIGNATURES_PATH_DEFAULT_STRING = "(embedded signatures)"
|
||||
BACKEND_VIV = "vivisect"
|
||||
BACKEND_DOTNET = "dotnet"
|
||||
BACKEND_BINJA = "binja"
|
||||
|
||||
E_MISSING_RULES = 10
|
||||
E_MISSING_FILE = 11
|
||||
@@ -523,6 +524,33 @@ def get_extractor(
|
||||
|
||||
return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path)
|
||||
|
||||
elif backend == BACKEND_BINJA:
|
||||
from capa.features.extractors.binja.find_binja_api import find_binja_path
|
||||
|
||||
# When we are running as a standalone executable, we cannot directly import binaryninja
|
||||
# We need to fist find the binja API installation path and add it into sys.path
|
||||
if is_running_standalone():
|
||||
bn_api = find_binja_path()
|
||||
if os.path.exists(bn_api):
|
||||
sys.path.append(bn_api)
|
||||
|
||||
try:
|
||||
from binaryninja import BinaryView, BinaryViewType
|
||||
except ImportError:
|
||||
raise RuntimeError(
|
||||
"Cannot import binaryninja module. Please install the Binary Ninja Python API first: "
|
||||
"https://docs.binary.ninja/dev/batch.html#install-the-api)."
|
||||
)
|
||||
|
||||
import capa.features.extractors.binja.extractor
|
||||
|
||||
with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress):
|
||||
bv: BinaryView = BinaryViewType.get_view_of_file(path)
|
||||
if bv is None:
|
||||
raise RuntimeError("Binary Ninja cannot open file %s" % (path))
|
||||
|
||||
return capa.features.extractors.binja.extractor.BinjaFeatureExtractor(bv)
|
||||
|
||||
# default to use vivisect backend
|
||||
else:
|
||||
import capa.features.extractors.viv.extractor
|
||||
@@ -873,7 +901,7 @@ def install_common_args(parser, wanted=None):
|
||||
"--backend",
|
||||
type=str,
|
||||
help="select the backend to use",
|
||||
choices=(BACKEND_VIV,),
|
||||
choices=(BACKEND_VIV, BACKEND_BINJA),
|
||||
default=BACKEND_VIV,
|
||||
)
|
||||
|
||||
|
||||
@@ -354,9 +354,9 @@ class Match(FrozenModel):
|
||||
return cls(
|
||||
success=success,
|
||||
node=node,
|
||||
children=children,
|
||||
locations=locations,
|
||||
captures=captures,
|
||||
children=tuple(children),
|
||||
locations=tuple(locations),
|
||||
captures={capture: tuple(captures[capture]) for capture in captures},
|
||||
)
|
||||
|
||||
|
||||
@@ -485,8 +485,8 @@ class RuleMetadata(FrozenModel):
|
||||
namespace=rule.meta.get("namespace"),
|
||||
authors=rule.meta.get("authors"),
|
||||
scope=capa.rules.Scope(rule.meta.get("scope")),
|
||||
attack=list(map(AttackSpec.from_str, rule.meta.get("att&ck", []))),
|
||||
mbc=list(map(MBCSpec.from_str, rule.meta.get("mbc", []))),
|
||||
attack=tuple(map(AttackSpec.from_str, rule.meta.get("att&ck", []))),
|
||||
mbc=tuple(map(MBCSpec.from_str, rule.meta.get("mbc", []))),
|
||||
references=rule.meta.get("references", []),
|
||||
examples=rule.meta.get("examples", []),
|
||||
description=rule.meta.get("description", ""),
|
||||
@@ -498,8 +498,10 @@ class RuleMetadata(FrozenModel):
|
||||
malware_family=rule.meta.get("maec/malware-family"),
|
||||
malware_category=rule.meta.get("maec/malware-category"),
|
||||
malware_category_ov=rule.meta.get("maec/malware-category-ov"),
|
||||
),
|
||||
)
|
||||
), # type: ignore
|
||||
# Mypy is unable to recognise arguments due to alias
|
||||
) # type: ignore
|
||||
# Mypy is unable to recognise arguments due to alias
|
||||
|
||||
class Config:
|
||||
frozen = True
|
||||
|
||||
@@ -126,6 +126,12 @@ Or install capa with build dependencies:
|
||||
|
||||
`$ pip install -e /local/path/to/src[build]`
|
||||
|
||||
#### Generate rule cache
|
||||
|
||||
Generate cache for all rules in the `rules` folder and save the output in the `cache` folder.
|
||||
|
||||
`$ python scripts/cache-ruleset.py rules/ cache/`
|
||||
|
||||
#### Run Pyinstaller
|
||||
`$ pyinstaller .github/pyinstaller/pyinstaller.spec`
|
||||
|
||||
|
||||
2
rules
2
rules
Submodule rules updated: 8a2e23baa0...232af1ca4c
2
setup.py
2
setup.py
@@ -76,7 +76,7 @@ setuptools.setup(
|
||||
"pycodestyle==2.10.0",
|
||||
"black==23.1.0",
|
||||
"isort==5.11.4",
|
||||
"mypy==1.0.1",
|
||||
"mypy==1.1.1",
|
||||
"psutil==5.9.2",
|
||||
"stix2==3.0.1",
|
||||
"requests==2.28.0",
|
||||
|
||||
Submodule tests/data updated: b3b250b694...3cbd7768c2
@@ -160,6 +160,29 @@ def get_dnfile_extractor(path):
|
||||
return extractor
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_binja_extractor(path):
|
||||
from binaryninja import Settings, BinaryViewType
|
||||
|
||||
import capa.features.extractors.binja.extractor
|
||||
|
||||
# Workaround for a BN bug: https://github.com/Vector35/binaryninja-api/issues/4051
|
||||
settings = Settings()
|
||||
if path.endswith("kernel32-64.dll_"):
|
||||
old_pdb = settings.get_bool("pdb.loadGlobalSymbols")
|
||||
settings.set_bool("pdb.loadGlobalSymbols", False)
|
||||
bv = BinaryViewType.get_view_of_file(path)
|
||||
if path.endswith("kernel32-64.dll_"):
|
||||
settings.set_bool("pdb.loadGlobalSymbols", old_pdb)
|
||||
|
||||
extractor = capa.features.extractors.binja.extractor.BinjaFeatureExtractor(bv)
|
||||
|
||||
# overload the extractor so that the fixture exposes `extractor.path`
|
||||
setattr(extractor, "path", path)
|
||||
|
||||
return extractor
|
||||
|
||||
|
||||
def extract_global_features(extractor):
|
||||
features = collections.defaultdict(set)
|
||||
for feature, va in extractor.extract_global_features():
|
||||
@@ -668,7 +691,7 @@ FEATURE_PRESENCE_TESTS = sorted(
|
||||
("mimikatz", "function=0x46D534", capa.features.common.Characteristic("nzxor"), False),
|
||||
# insn/characteristic(nzxor): xorps
|
||||
# viv needs fixup to recognize function, see above
|
||||
("3b13b...", "function=0x10006860", capa.features.common.Characteristic("nzxor"), True),
|
||||
("mimikatz", "function=0x410dfc", capa.features.common.Characteristic("nzxor"), True),
|
||||
# insn/characteristic(peb access)
|
||||
("kernel32-64", "function=0x1800017D0", capa.features.common.Characteristic("peb access"), True),
|
||||
("mimikatz", "function=0x4556E5", capa.features.common.Characteristic("peb access"), False),
|
||||
|
||||
47
tests/test_binja_features.py
Normal file
47
tests/test_binja_features.py
Normal file
@@ -0,0 +1,47 @@
|
||||
# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
|
||||
import fixtures
|
||||
from fixtures import *
|
||||
|
||||
logger = logging.getLogger(__file__)
|
||||
|
||||
# We need to skip the binja test if we cannot import binaryninja, e.g., in GitHub CI.
|
||||
binja_present: bool = False
|
||||
try:
|
||||
import binaryninja
|
||||
|
||||
try:
|
||||
binaryninja.load(source=b"\x90")
|
||||
except RuntimeError as e:
|
||||
logger.warning("Binary Ninja license is not valid, provide via $BN_LICENSE or license.dat")
|
||||
else:
|
||||
binja_present = True
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
@pytest.mark.skipif(binja_present is False, reason="Skip binja tests if the binaryninja Python API is not installed")
|
||||
@fixtures.parametrize(
|
||||
"sample,scope,feature,expected",
|
||||
fixtures.FEATURE_PRESENCE_TESTS,
|
||||
indirect=["sample", "scope"],
|
||||
)
|
||||
def test_binja_features(sample, scope, feature, expected):
|
||||
fixtures.do_test_feature_presence(fixtures.get_binja_extractor, sample, scope, feature, expected)
|
||||
|
||||
|
||||
@pytest.mark.skipif(binja_present is False, reason="Skip binja tests if the binaryninja Python API is not installed")
|
||||
@fixtures.parametrize(
|
||||
"sample,scope,feature,expected",
|
||||
fixtures.FEATURE_COUNT_TESTS,
|
||||
indirect=["sample", "scope"],
|
||||
)
|
||||
def test_binja_feature_counts(sample, scope, feature, expected):
|
||||
fixtures.do_test_feature_count(fixtures.get_binja_extractor, sample, scope, feature, expected)
|
||||
Reference in New Issue
Block a user