Merge branch 'master' into fmt-black

This commit is contained in:
William Ballenthin
2020-07-02 10:25:24 -06:00
29 changed files with 847 additions and 612 deletions

View File

@@ -84,6 +84,7 @@ Download capa from the [Releases](/releases) page or get the nightly builds here
- [section](#section)
- [counting](#counting)
- [matching prior rule matches](#matching-prior-rule-matches)
- [descriptions](#descriptions)
- [limitations](#Limitations)
# installation
@@ -339,8 +340,9 @@ For example, a crypto constant.
The parameter is a number; if prefixed with `0x` then in hex format, otherwise, decimal format.
To associate context with a number, e.g. for constant definitions, append an equal sign and the respective name to
the number definition. This helps with documenting rules and provides context in capa's output.
To help humans understand the meaning of a number, such that the constant `0x40` means `PAGE_EXECUTE_READWRITE`, you may provide a description alongside the definition.
Use the inline syntax (preferred) by ending the line with ` = DESCRIPTION STRING`.
Check the [description section](#description) for more details.
Examples:
@@ -362,20 +364,31 @@ Regexes should be surrounded with `/` characters.
By default, capa uses case-sensitive matching and assumes leading and trailing wildcards.
To perform case-insensitive matching append an `i`. To anchor the regex at the start or end of a string, use `^` and/or `$`.
To add context to a string use the two-line syntax, using the `description` tag: `description: DESCRIPTION STRING`.
The inline syntax is not supported.
Check the [description section](#description) for more details.
Examples:
string: This program cannot be run in DOS mode.
string: Firefox 64.0
string: /SELECT.*FROM.*WHERE/
string: /Hardware\\Description\\System\\CentralProcessor/i
```
- string: This program cannot be run in DOS mode.
description: MS-DOS stub message
- string: '{3E5FC7F9-9A51-4367-9063-A120244FBEC7}'
description: CLSID_CMSTPLUA
- string: Firefox 64.0
- string:'/SELECT.*FROM.*WHERE/
- string: /Hardware\\Description\\System\\CentralProcessor/i
```
Note that regex matching is expensive (`O(features)` rather than `O(1)`) so they should be used sparingly.
### bytes
A sequence of bytes referenced by the logic of the program.
The provided sequence must match from the beginning of the referenced bytes and be no more than `0x100` bytes.
The parameter is a sequence of hexadecimal bytes followed by an optional description.
The parameter is a sequence of hexadecimal bytes.
To help humans understand the meaning of the bytes sequence, you may provide a description.
Use the inline syntax (preferred) by ending the line with ` = DESCRIPTION STRING`.
Check the [description section](#description) for more details.
The example below illustrates byte matching given a COM CLSID pushed onto the stack prior to `CoCreateInstance`.
@@ -397,6 +410,7 @@ A structure offset referenced by the logic of the program.
This should not be a stack offset.
The parameter is a number; if prefixed with `0x` then in hex format, otherwise, decimal format.
It can be followed by an optional description.
Examples:
@@ -453,6 +467,7 @@ These are the features supported at the file-scope:
- [import](#import)
- [section](#section)
### file string
An ASCII or UTF-16 LE string present in the file.
@@ -511,6 +526,10 @@ These rules can be expressed like:
count(mnemonic(mov)): 3
count(basic block): 4
`count` supports inline descriptions, except for [strings](#string), using the following syntax:
count(number(2 = AF_INET/SOCK_DGRAM)): 2
## matching prior rule matches
capa rules can specify logic for matching on other rule matches.
@@ -532,6 +551,28 @@ By default, library rules will not be output to the user as a rule match,
but can be matched by other rules.
When no active rules depend on a library rule, these the library rules will not be evaluated - maintaining performance.
## description
All features support an optional description which helps with documenting rules and provides context in capa's output.
For all features except for [strings](#string), the description can be specified inline preceded by ` = `: ` = DESCRIPTION STRING`.
For example:
```
- number: 0x4550 = IMAGE_DOS_SIGNATURE (MZ)
```
The inline syntax is preferred.
For [strings](#string) or if the description is long or contains newlines, use the two-line syntax.
It uses the `description` tag in the following way: `description: DESCRIPTION STRING`
For example:
```
- string: This program cannot be run in DOS mode.
description: MS-DOS stub message
- number: 0x4550
description: IMAGE_DOS_SIGNATURE (MZ)
```
# limitations
To learn more about capa's current limitations see [here](doc/limitations.md).

View File

@@ -161,11 +161,11 @@ class Range(Statement):
self.max = max if max is not None else (1 << 64 - 1)
def evaluate(self, ctx):
if self.child not in ctx:
return Result(False, self, [])
count = len(ctx.get(self.child, []))
if self.min == 0 and count == 0:
return Result(True, self, [])
count = len(ctx[self.child])
return Result(self.min <= count <= self.max, self, [], locations=ctx[self.child])
return Result(self.min <= count <= self.max, self, [], locations=ctx.get(self.child))
def __str__(self):
if self.max == (1 << 64 - 1):

View File

@@ -17,10 +17,11 @@ def bytes_to_str(b):
class Feature(object):
def __init__(self, args):
def __init__(self, args, description=None):
super(Feature, self).__init__()
self.name = self.__class__.__name__
self.name = self.__class__.__name__.lower()
self.args = args
self.description = description
def __hash__(self):
return hash((self.name, tuple(self.args)))
@@ -28,8 +29,16 @@ class Feature(object):
def __eq__(self, other):
return self.name == other.name and self.args == other.args
# Used to overwrite the rendering of the feature args in `__str__` and the
# json output
def get_args_str(self):
return ','.join(self.args)
def __str__(self):
return "%s(%s)" % (self.name.lower(), ",".join(self.args))
if self.description:
return '%s(%s = %s)' % (self.name, self.get_args_str(), self.description)
else:
return '%s(%s)' % (self.name, self.get_args_str())
def __repr__(self):
return str(self)
@@ -49,51 +58,41 @@ class Feature(object):
class MatchedRule(Feature):
def __init__(self, rule_name):
super(MatchedRule, self).__init__([rule_name])
def __init__(self, rule_name, description=None):
super(MatchedRule, self).__init__([rule_name], description)
self.name = 'match'
self.rule_name = rule_name
def __str__(self):
return "match(%s)" % (self.rule_name)
class Characteristic(Feature):
def __init__(self, name, value=None):
"""
when `value` is not provided, this serves as descriptor for a class of characteristics.
this is only used internally, such as in `rules.py` when checking if a statement is
supported by a given scope.
"""
super(Characteristic, self).__init__([name, value])
self.name = name
def __init__(self, value, description=None):
super(Characteristic, self).__init__([value], description)
self.value = value
def evaluate(self, ctx):
if self.value is None:
raise ValueError("cannot evaluate characteristc %s with empty value" % (str(self)))
return super(Characteristic, self).evaluate(ctx)
def freeze_serialize(self):
# in an older version of capa, characteristics could theoretically match non-existence (value=False).
# but we found this was never used (and better expressed with `not: characteristic: ...`).
# this was represented using an additional parameter for Characteristic.
# its been removed, but we keep it around in the freeze format to maintain backwards compatibility.
# this value is ignored, however.
return (self.__class__.__name__, [self.value, True])
def __str__(self):
if self.value is None:
return "characteristic(%s)" % (self.name)
else:
return "characteristic(%s(%s))" % (self.name, self.value)
@classmethod
def freeze_deserialize(cls, args):
# see above. we ignore the second element in the 2-tuple here.
return cls(args[0])
class String(Feature):
def __init__(self, value):
super(String, self).__init__([value])
def __init__(self, value, description=None):
super(String, self).__init__([value], description)
self.value = value
def __str__(self):
return 'string("%s")' % (self.value)
class Bytes(Feature):
def __init__(self, value, symbol=None):
super(Bytes, self).__init__([value])
def __init__(self, value, description=None):
super(Bytes, self).__init__([value], description)
self.value = value
self.symbol = symbol
def evaluate(self, ctx):
for feature, locations in ctx.items():
@@ -105,11 +104,8 @@ class Bytes(Feature):
return capa.engine.Result(False, self, [])
def __str__(self):
if self.symbol:
return "bytes(0x%s = %s)" % (bytes_to_str(self.value).upper(), self.symbol)
else:
return "bytes(0x%s)" % (bytes_to_str(self.value).upper())
def get_args_str(self):
return bytes_to_str(self.value).upper()
def freeze_serialize(self):
return (self.__class__.__name__, [bytes_to_str(x).upper() for x in self.args])

View File

@@ -185,22 +185,22 @@ class NullFeatureExtractor(FeatureExtractor):
extractor = NullFeatureExtractor({
'file features': [
(0x402345, capa.features.Characteristic('embedded pe', True)),
(0x402345, capa.features.Characteristic('embedded pe')),
],
'functions': {
0x401000: {
'features': [
(0x401000, capa.features.Characteristic('switch', True)),
(0x401000, capa.features.Characteristic('switch')),
],
'basic blocks': {
0x401000: {
'features': [
(0x401000, capa.features.Characteristic('tight-loop', True)),
(0x401000, capa.features.Characteristic('tight-loop')),
],
'instructions': {
0x401000: {
'features': [
(0x401000, capa.features.Characteristic('nzxor', True)),
(0x401000, capa.features.Characteristic('nzxor')),
],
},
0x401002: ...

View File

@@ -103,7 +103,7 @@ def extract_bb_stackstring(f, bb):
bb (IDA BasicBlock)
"""
if _ida_bb_contains_stackstring(f, bb):
yield Characteristic("stack string", True), bb.start_ea
yield Characteristic('stack string'), bb.start_ea
def _ida_bb_contains_tight_loop(f, bb):
@@ -133,7 +133,7 @@ def extract_bb_tight_loop(f, bb):
bb (IDA BasicBlock)
"""
if _ida_bb_contains_tight_loop(f, bb):
yield Characteristic("tight loop", True), bb.start_ea
yield Characteristic('tight loop'), bb.start_ea
def extract_features(f, bb):

View File

@@ -75,7 +75,7 @@ def extract_file_embedded_pe():
continue
for ea, _ in _ida_check_segment_for_pe(seg):
yield Characteristic("embedded pe", True), ea
yield Characteristic('embedded pe'), ea
def extract_file_export_names():

View File

@@ -29,7 +29,7 @@ def extract_function_switch(f):
f (IDA func_t)
"""
if _ida_function_contains_switch(f):
yield Characteristic("switch", True), f.start_ea
yield Characteristic('switch'), f.start_ea
def extract_function_calls_to(f):
@@ -39,7 +39,7 @@ def extract_function_calls_to(f):
f (IDA func_t)
"""
for ea in idautils.CodeRefsTo(f.start_ea, True):
yield Characteristic("calls to", True), ea
yield Characteristic('calls to'), ea
def extract_function_loop(f):
@@ -53,7 +53,7 @@ def extract_function_loop(f):
map(lambda s: edges.append((bb.start_ea, s.start_ea)), bb.succs())
if edges and loops.has_loop(edges):
yield Characteristic("loop", True), f.start_ea
yield Characteristic('loop'), f.start_ea
def extract_recursive_call(f):
@@ -64,7 +64,7 @@ def extract_recursive_call(f):
"""
for ref in idautils.CodeRefsTo(f.start_ea, True):
if f.contains(ref):
yield Characteristic("recursive call", True), f.start_ea
yield Characteristic('recursive call'), f.start_ea
break

View File

@@ -259,7 +259,7 @@ def extract_insn_nzxor_characteristic_features(f, bb, insn):
if _is_nzxor_stack_cookie(f, bb, insn):
return
yield Characteristic("nzxor", True), insn.ea
yield Characteristic('nzxor'), insn.ea
def extract_insn_mnemonic_features(f, bb, insn):
@@ -292,7 +292,7 @@ def extract_insn_peb_access_characteristic_features(f, bb, insn):
if " fs:30h" in disasm or " gs:60h" in disasm:
# TODO: replace above with proper IDA
yield Characteristic("peb access", True), insn.ea
yield Characteristic('peb access'), insn.ea
def extract_insn_segment_access_features(f, bb, insn):
@@ -309,11 +309,11 @@ def extract_insn_segment_access_features(f, bb, insn):
if " fs:" in disasm:
# TODO: replace above with proper IDA
yield Characteristic("fs access", True), insn.ea
yield Characteristic('fs access'), insn.ea
if " gs:" in disasm:
# TODO: replace above with proper IDA
yield Characteristic("gs access", True), insn.ea
yield Characteristic('gs access'), insn.ea
def extract_insn_cross_section_cflow(f, bb, insn):
@@ -336,7 +336,7 @@ def extract_insn_cross_section_cflow(f, bb, insn):
if idaapi.getseg(ref) == idaapi.getseg(insn.ea):
continue
yield Characteristic("cross section flow", True), insn.ea
yield Characteristic('cross section flow'), insn.ea
def extract_function_calls_from(f, bb, insn):
@@ -354,7 +354,7 @@ def extract_function_calls_from(f, bb, insn):
return
for ref in idautils.CodeRefsFrom(insn.ea, False):
yield Characteristic("calls from", True), ref
yield Characteristic('calls from'), ref
def extract_function_indirect_call_characteristic_features(f, bb, insn):
@@ -373,7 +373,7 @@ def extract_function_indirect_call_characteristic_features(f, bb, insn):
return
if idc.get_operand_type(insn.ea, 0) in (idc.o_reg, idc.o_phrase, idc.o_displ):
yield Characteristic("indirect call", True), insn.ea
yield Characteristic('indirect call'), insn.ea
def extract_features(f, bb, insn):

View File

@@ -39,7 +39,7 @@ def _bb_has_tight_loop(f, bb):
def extract_bb_tight_loop(f, bb):
""" check basic block for tight loop indicators """
if _bb_has_tight_loop(f, bb):
yield Characteristic("tight loop", True), bb.va
yield Characteristic('tight loop'), bb.va
def _bb_has_stackstring(f, bb):
@@ -62,7 +62,7 @@ def _bb_has_stackstring(f, bb):
def extract_stackstring(f, bb):
""" check basic block for stackstring indicators """
if _bb_has_stackstring(f, bb):
yield Characteristic("stack string", True), bb.va
yield Characteristic('stack string'), bb.va
def is_mov_imm_to_stack(instr):

View File

@@ -13,7 +13,7 @@ def extract_file_embedded_pe(vw, file_path):
fbytes = f.read()
for offset, i in pe_carve.carve(fbytes, 1):
yield Characteristic("embedded pe", True), offset
yield Characteristic('embedded pe'), offset
def extract_file_export_names(vw, file_path):

View File

@@ -53,12 +53,12 @@ def extract_function_switch(f):
method can be optimized
"""
if f.va in get_functions_with_switch(f.vw):
yield Characteristic("switch", True), f.va
yield Characteristic('switch'), f.va
def extract_function_calls_to(f):
for src, _, _, _ in f.vw.getXrefsTo(f.va, rtype=vivisect.const.REF_CODE):
yield Characteristic("calls to", True), src
yield Characteristic('calls to'), src
def extract_function_loop(f):
@@ -74,7 +74,7 @@ def extract_function_loop(f):
edges.append((bb.va, bva))
if edges and loops.has_loop(edges):
yield Characteristic("loop", True), f.va
yield Characteristic('loop'), f.va
def extract_features(f):

View File

@@ -287,7 +287,7 @@ def extract_insn_nzxor_characteristic_features(f, bb, insn):
if is_security_cookie(f, bb, insn):
return
yield Characteristic("nzxor", True), insn.va
yield Characteristic('nzxor'), insn.va
def extract_insn_mnemonic_features(f, bb, insn):
@@ -313,16 +313,14 @@ def extract_insn_peb_access_characteristic_features(f, bb, insn):
# IDA: push large dword ptr fs:30h
# viv: fs: push dword [0x00000030]
# fs: push dword [eax + 0x30] ; i386RegMemOper, with eax = 0
if (isinstance(oper, envi.archs.i386.disasm.i386RegMemOper) and oper.disp == 0x30) or (
isinstance(oper, envi.archs.i386.disasm.i386ImmMemOper) and oper.imm == 0x30
):
yield Characteristic("peb access", True), insn.va
elif "gs" in insn.getPrefixName():
if (isinstance(oper, envi.archs.i386.disasm.i386RegMemOper) and oper.disp == 0x30) or \
(isinstance(oper, envi.archs.i386.disasm.i386ImmMemOper) and oper.imm == 0x30):
yield Characteristic('peb access'), insn.va
elif 'gs' in insn.getPrefixName():
for oper in insn.opers:
if (isinstance(oper, envi.archs.amd64.disasm.i386RegMemOper) and oper.disp == 0x60) or (
isinstance(oper, envi.archs.amd64.disasm.i386ImmMemOper) and oper.imm == 0x60
):
yield Characteristic("peb access", True), insn.va
if (isinstance(oper, envi.archs.amd64.disasm.i386RegMemOper) and oper.disp == 0x60) or \
(isinstance(oper, envi.archs.amd64.disasm.i386ImmMemOper) and oper.imm == 0x60):
yield Characteristic('peb access'), insn.va
else:
pass
@@ -331,11 +329,11 @@ def extract_insn_segment_access_features(f, bb, insn):
""" parse the instruction for access to fs or gs """
prefix = insn.getPrefixName()
if prefix == "fs":
yield Characteristic("fs access", True), insn.va
if prefix == 'fs':
yield Characteristic('fs access'), insn.va
if prefix == "gs":
yield Characteristic("gs access", True), insn.va
if prefix == 'gs':
yield Characteristic('gs access'), insn.va
def get_section(vw, va):
@@ -372,7 +370,7 @@ def extract_insn_cross_section_cflow(f, bb, insn):
continue
if get_section(f.vw, insn.va) != get_section(f.vw, va):
yield Characteristic("cross section flow", True), insn.va
yield Characteristic('cross section flow'), insn.va
except KeyError:
continue
@@ -390,7 +388,7 @@ def extract_function_calls_from(f, bb, insn):
if isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper):
oper = insn.opers[0]
target = oper.getOperAddr(insn)
yield Characteristic("calls from", True), target
yield Characteristic('calls from'), target
# call via thunk on x86,
# see 9324d1a8ae37a36ae560c37448c9705a at 0x407985
@@ -399,18 +397,18 @@ def extract_function_calls_from(f, bb, insn):
# see Lab21-01.exe_:0x140001178
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper):
target = insn.opers[0].getOperValue(insn)
yield Characteristic("calls from", True), target
yield Characteristic('calls from'), target
# call via IAT, x64
elif isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper):
op = insn.opers[0]
target = op.getOperAddr(insn)
yield Characteristic("calls from", True), target
yield Characteristic('calls from'), target
if target and target == f.va:
# if we found a jump target and it's the function address
# mark as recursive
yield Characteristic("recursive call", True), target
yield Characteristic('recursive call'), target
# this is a feature that's most relevant at the function or basic block scope,
@@ -426,13 +424,13 @@ def extract_function_indirect_call_characteristic_features(f, bb, insn):
# Checks below work for x86 and x64
if isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper):
# call edx
yield Characteristic("indirect call", True), insn.va
yield Characteristic('indirect call'), insn.va
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegMemOper):
# call dword ptr [eax+50h]
yield Characteristic("indirect call", True), insn.va
yield Characteristic('indirect call'), insn.va
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386SibOper):
# call qword ptr [rsp+78h]
yield Characteristic("indirect call", True), insn.va
yield Characteristic('indirect call'), insn.va
def extract_features(f, bb, insn):

View File

@@ -2,30 +2,21 @@ from capa.features import Feature
class Export(Feature):
def __init__(self, value):
def __init__(self, value, description=None):
# value is export name
super(Export, self).__init__([value])
super(Export, self).__init__([value], description)
self.value = value
def __str__(self):
return "Export(%s)" % (self.value)
class Import(Feature):
def __init__(self, value):
def __init__(self, value, description=None):
# value is import name
super(Import, self).__init__([value])
super(Import, self).__init__([value], description)
self.value = value
def __str__(self):
return "Import(%s)" % (self.value)
class Section(Feature):
def __init__(self, value):
def __init__(self, value, description=None):
# value is section name
super(Section, self).__init__([value])
super(Section, self).__init__([value], description)
self.value = value
def __str__(self):
return "Section(%s)" % (self.value)

View File

@@ -2,45 +2,34 @@ from capa.features import Feature
class API(Feature):
def __init__(self, name):
def __init__(self, name, description=None):
# Downcase library name if given
if "." in name:
modname, impname = name.split(".")
name = modname.lower() + "." + impname
super(API, self).__init__([name])
super(API, self).__init__([name], description)
class Number(Feature):
def __init__(self, value, symbol=None):
super(Number, self).__init__([value])
def __init__(self, value, description=None):
super(Number, self).__init__([value], description)
self.value = value
self.symbol = symbol
def __str__(self):
if self.symbol:
return "number(0x%x = %s)" % (self.value, self.symbol)
else:
return "number(0x%x)" % (self.value)
def get_args_str(self):
return '0x%X' % self.value
class Offset(Feature):
def __init__(self, value, symbol=None):
def __init__(self, value, description=None):
super(Offset, self).__init__([value])
self.value = value
self.symbol = symbol
def __str__(self):
if self.symbol:
return "offset(0x%x = %s)" % (self.value, self.symbol)
else:
return "offset(0x%x)" % (self.value)
def get_args_str(self):
return '0x%X' % self.value
class Mnemonic(Feature):
def __init__(self, value):
super(Mnemonic, self).__init__([value])
def __init__(self, value, description=None):
super(Mnemonic, self).__init__([value], description)
self.value = value
def __str__(self):
return "mnemonic(%s)" % (self.value)

View File

@@ -190,6 +190,14 @@ class CapaExplorerFunctionItem(CapaExplorerDataItem):
self._data[0] = self.fmt % display
class CapaExplorerSubscopeItem(CapaExplorerDataItem):
fmt = 'subscope(%s)'
def __init__(self, parent, scope):
super(CapaExplorerSubscopeItem, self).__init__(parent, [self.fmt % scope, '', ''])
class CapaExplorerBlockItem(CapaExplorerDataItem):
""" store data relevant to capa basic block result """

View File

@@ -17,6 +17,7 @@ from capa.ida.explorer.item import (
CapaExplorerBlockItem,
CapaExplorerRuleMatchItem,
CapaExplorerFeatureItem,
CapaExplorerSubscopeItem
)
import capa.ida.helpers
@@ -108,20 +109,10 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
font.setBold(True)
return font
if (
role == QtCore.Qt.FontRole
and isinstance(
item,
(
CapaExplorerRuleItem,
CapaExplorerRuleMatchItem,
CapaExplorerBlockItem,
CapaExplorerFunctionItem,
CapaExplorerFeatureItem,
),
)
and column == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION
):
if role == QtCore.Qt.FontRole and isinstance(item, (CapaExplorerRuleItem, CapaExplorerRuleMatchItem,
CapaExplorerBlockItem, CapaExplorerFunctionItem,
CapaExplorerFeatureItem, CapaExplorerSubscopeItem)) and \
column == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION:
# set bold font for top-level rules
font = QtGui.QFont()
font.setBold(True)
@@ -322,11 +313,12 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
return item.childCount()
def render_capa_doc_statement_node(self, parent, statement, doc):
def render_capa_doc_statement_node(self, parent, statement, locations, doc):
""" render capa statement read from doc
@param parent: parent to which new child is assigned
@param statement: statement read from doc
@param locations: locations of children (applies to range only?)
@param doc: capa result doc
"statement": {
@@ -356,10 +348,16 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
else:
display += "between %d and %d" % (statement["min"], statement["max"])
return CapaExplorerFeatureItem(parent, display=display)
elif statement["type"] == "subscope":
return CapaExplorerFeatureItem(parent, "subscope(%s)" % statement["subscope"])
elif statement["type"] == "regex":
parent2 = CapaExplorerFeatureItem(parent, display=display)
for location in locations:
# for each location render child node for range statement
self.render_capa_doc_feature(parent2, statement['child'], location, doc)
return parent2
elif statement['type'] == 'subscope':
return CapaExplorerSubscopeItem(parent, statement['subscope'])
elif statement['type'] == 'regex':
# regex is a `Statement` not a `Feature`
# this is because it doesn't get extracted, but applies to all strings in scope.
# so we have to handle it here
@@ -401,10 +399,11 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
):
return
if match["node"]["type"] == "statement":
parent2 = self.render_capa_doc_statement_node(parent, match["node"]["statement"], doc)
elif match["node"]["type"] == "feature":
parent2 = self.render_capa_doc_feature_node(parent, match["node"]["feature"], match["locations"], doc)
if match['node']['type'] == 'statement':
parent2 = self.render_capa_doc_statement_node(parent, match['node']['statement'],
match.get('locations', []), doc)
elif match['node']['type'] == 'feature':
parent2 = self.render_capa_doc_feature_node(parent, match['node']['feature'], match['locations'], doc)
else:
raise RuntimeError("unexpected node type: " + str(match["node"]["type"]))

View File

@@ -375,10 +375,14 @@ class CapaExplorerForm(idaapi.PluginForm):
self.render_capa_doc_summary(doc)
self.render_capa_doc_mitre_summary(doc)
self.view_tree.sortByColumn(CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION, QtCore.Qt.AscendingOrder)
self.set_view_tree_default_sort_order()
logger.info("render views completed.")
def set_view_tree_default_sort_order(self):
""" """
self.view_tree.sortByColumn(CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION, QtCore.Qt.AscendingOrder)
def render_capa_doc_summary(self, doc):
""" """
for (row, rule) in enumerate(rutils.capability_rules(doc)):
@@ -459,6 +463,7 @@ class CapaExplorerForm(idaapi.PluginForm):
self.model_data.reset()
self.view_tree.reset()
self.view_checkbox_limit_by.setChecked(False)
self.set_view_tree_default_sort_order()
def reload(self):
""" reload views and re-run capa analysis """

View File

@@ -1,6 +1,7 @@
import json
import six
import capa.rules
import capa.engine
@@ -83,23 +84,11 @@ def convert_feature_to_result_document(feature):
"type": "characteristic"
},
"""
name, value = feature.freeze_serialize()
result = {'type': feature.name, feature.name: feature.get_args_str()}
if feature.description:
result['description'] = feature.description
# make the terms pretty
name = name.lower()
if name == "matchedrule":
name = "match"
# in the common case, there's a single argument
# so use it directly.
# like: name=number value=1
if isinstance(value, list) and len(value) == 1:
value = value[0]
return {
"type": name,
name: value,
}
return result
def convert_node_to_result_document(node):
@@ -144,7 +133,10 @@ def convert_match_to_result_document(rules, capabilities, result):
# so only add `locations` to feature nodes.
if isinstance(result.statement, capa.features.Feature):
if bool(result.success):
doc["locations"] = result.locations
doc['locations'] = result.locations
elif isinstance(result.statement, capa.rules.Range):
if bool(result.success):
doc['locations'] = result.locations
# if we have a `match` statement, then we're referencing another rule.
# this could an external rule (written by a human), or

View File

@@ -4,12 +4,31 @@ import capa.rules
import capa.render.utils as rutils
def render_statement(ostream, statement, indent=0):
ostream.write(" " * indent)
if statement["type"] in ("and", "or", "optional"):
ostream.write(statement["type"])
ostream.writeln(":")
elif statement["type"] == "not":
def render_locations(ostream, match):
# its possible to have an empty locations array here,
# such as when we're in MODE_FAILURE and showing the logic
# under a `not` statement (which will have no matched locations).
locations = list(sorted(match.get('locations', [])))
if len(locations) == 1:
ostream.write(' @ ')
ostream.write(rutils.hex(locations[0]))
elif len(locations) > 1:
ostream.write(' @ ')
if len(locations) > 4:
# don't display too many locations, because it becomes very noisy.
# probably only the first handful of locations will be useful for inspection.
ostream.write(', '.join(map(rutils.hex, locations[0:4])))
ostream.write(', and %d more...' % (len(locations) - 4))
else:
ostream.write(', '.join(map(rutils.hex, locations)))
def render_statement(ostream, match, statement, indent=0):
ostream.write(' ' * indent)
if statement['type'] in ('and', 'or', 'optional'):
ostream.write(statement['type'])
ostream.writeln(':')
elif statement['type'] == 'not':
# this statement is handled specially in `render_match` using the MODE_SUCCESS/MODE_FAILURE flags.
ostream.writeln("not:")
elif statement["type"] == "some":
@@ -21,32 +40,36 @@ def render_statement(ostream, statement, indent=0):
# there's no additional logic in the feature part, just the existence of a feature.
# so, we have to inline some of the feature rendering here.
child = statement["child"]
if child["type"] in ("string", "api", "mnemonic", "basic block", "export", "import", "section", "match"):
feature = "%s(%s)" % (child["type"], rutils.bold2(child[child["type"]]))
elif child["type"] in ("number", "offset"):
feature = "%s(%s)" % (child["type"], rutils.bold2(rutils.hex(child[child["type"]])))
elif child["type"] == "bytes":
feature = "%s(%s)" % (child["type"], rutils.bold2(rutils.hex_string(child[child["type"]])))
elif child["type"] == "characteristic":
feature = "characteristic(%s)" % (rutils.bold2(child["characteristic"][0]))
child = statement['child']
if child['type'] in ('string', 'api', 'mnemonic', 'basic block', 'export', 'import', 'section', 'match', 'characteristic'):
value = rutils.bold2(child[child['type']])
elif child['type'] in ('number', 'offset'):
value = rutils.bold2(rutils.hex(child[child['type']]))
elif child['type'] == 'bytes':
value = rutils.bold2(rutils.hex_string(child[child['type']]))
else:
raise RuntimeError("unexpected feature type: " + str(child))
ostream.write("count(%s): " % feature)
if statement["max"] == statement["min"]:
ostream.writeln("%d" % (statement["min"]))
elif statement["min"] == 0:
ostream.writeln("%d or fewer" % (statement["max"]))
elif statement["max"] == (1 << 64 - 1):
ostream.writeln("%d or more" % (statement["min"]))
if child['description']:
ostream.write('count(%s(%s = %s)): ' % (child['type'], value, child['description']))
else:
ostream.writeln("between %d and %d" % (statement["min"], statement["max"]))
elif statement["type"] == "subscope":
ostream.write(statement["subscope"])
ostream.writeln(":")
elif statement["type"] == "regex":
ostream.write('count(%s(%s)): ' % (child['type'], value))
if statement['max'] == statement['min']:
ostream.write('%d' % (statement['min']))
elif statement['min'] == 0:
ostream.write('%d or fewer' % (statement['max']))
elif statement['max'] == (1 << 64 - 1):
ostream.write('%d or more' % (statement['min']))
else:
ostream.write('between %d and %d' % (statement['min'], statement['max']))
render_locations(ostream, match)
ostream.write('\n')
elif statement['type'] == 'subscope':
ostream.write(statement['subscope'])
ostream.writeln(':')
elif statement['type'] == 'regex':
# regex is a `Statement` not a `Feature`
# this is because it doesn't get extracted, but applies to all strings in scope.
# so we have to handle it here
@@ -56,52 +79,38 @@ def render_statement(ostream, statement, indent=0):
def render_feature(ostream, match, feature, indent=0):
ostream.write(" " * indent)
ostream.write(' ' * indent)
if feature["type"] in ("string", "api", "mnemonic", "basic block", "export", "import", "section", "match"):
ostream.write(feature["type"])
ostream.write(": ")
ostream.write(rutils.bold2(feature[feature["type"]]))
elif feature["type"] in ("number", "offset"):
ostream.write(feature["type"])
ostream.write(": ")
ostream.write(rutils.bold2(rutils.hex(feature[feature["type"]])))
elif feature["type"] == "bytes":
ostream.write("bytes: ")
if feature['type'] in ('string', 'api', 'mnemonic', 'basic block', 'export', 'import', 'section', 'match', 'characteristic'):
ostream.write(feature['type'])
ostream.write(': ')
ostream.write(rutils.bold2(feature[feature['type']]))
elif feature['type'] in ('number', 'offset'):
ostream.write(feature['type'])
ostream.write(': ')
ostream.write(rutils.bold2(rutils.hex(feature[feature['type']])))
elif feature['type'] == 'bytes':
ostream.write('bytes: ')
# bytes is the uppercase, hex-encoded string.
# it should always be an even number of characters (its hex).
ostream.write(rutils.bold2(rutils.hex_string(feature[feature["type"]])))
elif feature["type"] == "characteristic":
ostream.write("characteristic(%s)" % (rutils.bold2(feature["characteristic"][0])))
ostream.write(rutils.bold2(rutils.hex_string(feature[feature['type']])))
# note that regex is found in `render_statement`
else:
raise RuntimeError("unexpected feature type: " + str(feature))
# its possible to have an empty locations array here,
# such as when we're in MODE_FAILURE and showing the logic
# under a `not` statement (which will have no matched locations).
locations = list(sorted(match.get("locations", [])))
if len(locations) == 1:
ostream.write(" @ ")
ostream.write(rutils.hex(locations[0]))
elif len(locations) > 1:
ostream.write(" @ ")
if len(locations) > 4:
# don't display too many locations, because it becomes very noisy.
# probably only the first handful of locations will be useful for inspection.
ostream.write(", ".join(map(rutils.hex, locations[0:4])))
ostream.write(", and %d more..." % (len(locations) - 4))
else:
ostream.write(", ".join(map(rutils.hex, locations)))
if 'description' in feature:
ostream.write(' = ')
ostream.write(feature['description'])
ostream.write("\n")
render_locations(ostream, match)
ostream.write('\n')
def render_node(ostream, match, node, indent=0):
if node["type"] == "statement":
render_statement(ostream, node["statement"], indent=indent)
elif node["type"] == "feature":
render_feature(ostream, match, node["feature"], indent=indent)
if node['type'] == 'statement':
render_statement(ostream, match, node['statement'], indent=indent)
elif node['type'] == 'feature':
render_feature(ostream, match, node['feature'], indent=indent)
else:
raise RuntimeError("unexpected node type: " + str(node))

View File

@@ -138,8 +138,8 @@ class InvalidRuleSet(ValueError):
def ensure_feature_valid_for_scope(scope, feature):
if isinstance(feature, capa.features.Characteristic):
if capa.features.Characteristic(feature.name) not in SUPPORTED_FEATURES[scope]:
raise InvalidRule("feature %s not support for scope %s" % (feature, scope))
if capa.features.Characteristic(feature.value) not in SUPPORTED_FEATURES[scope]:
raise InvalidRule('feature %s not support for scope %s' % (feature, scope))
elif not isinstance(feature, tuple(filter(lambda t: isinstance(t, type), SUPPORTED_FEATURES[scope]))):
raise InvalidRule("feature %s not support for scope %s" % (feature, scope))
@@ -205,10 +205,9 @@ def parse_feature(key):
return capa.features.insn.Mnemonic
elif key == "basic blocks":
return capa.features.basicblock.BasicBlock
elif key.startswith("characteristic(") and key.endswith(")"):
characteristic = key[len("characteristic(") : -len(")")]
return lambda v: capa.features.Characteristic(characteristic, v)
elif key == "export":
elif key == 'characteristic':
return capa.features.Characteristic
elif key == 'export':
return capa.features.file.Export
elif key == "import":
return capa.features.file.Import
@@ -220,18 +219,18 @@ def parse_feature(key):
raise InvalidRule("unexpected statement: %s" % key)
def parse_symbol(s, value_type):
"""
def parse_description(s, value_type, description=None):
'''
s can be an int or a string
"""
if isinstance(s, str) and "=" in s:
value, symbol = s.split("=", 1)
symbol = symbol.strip()
if symbol == "":
raise InvalidRule('unexpected value: "%s", symbol name cannot be empty' % s)
'''
if value_type != 'string' and isinstance(s, str) and ' = ' in s:
if description:
raise InvalidRule('unexpected value: "%s", only one description allowed (inline description with ` = `)' % s)
value, description = s.split(' = ', 1)
if description == '':
raise InvalidRule('unexpected value: "%s", description cannot be empty' % s)
else:
value = s
symbol = None
if isinstance(value, str):
if value_type == "bytes":
@@ -242,21 +241,20 @@ def parse_symbol(s, value_type):
raise InvalidRule('unexpected bytes value: "%s", must be a valid hex sequence' % value)
if len(value) > MAX_BYTES_FEATURE_SIZE:
raise InvalidRule(
"unexpected bytes value: byte sequences must be no larger than %s bytes" % MAX_BYTES_FEATURE_SIZE
)
else:
raise InvalidRule('unexpected bytes value: byte sequences must be no larger than %s bytes' %
MAX_BYTES_FEATURE_SIZE)
elif value_type in {'number', 'offset'}:
try:
value = parse_int(value)
except ValueError:
raise InvalidRule('unexpected value: "%s", must begin with numerical value' % value)
return value, symbol
return value, description
def build_statements(d, scope):
if len(d.keys()) != 1:
raise InvalidRule("too many statements")
if len(d.keys()) > 2:
raise InvalidRule('too many statements')
key = list(d.keys())[0]
if key == "and":
@@ -303,48 +301,33 @@ def build_statements(d, scope):
term = key[len("count(") : -len(")")]
if term.startswith("characteristic("):
# characteristic features are specified a bit specially:
# they simply indicate the presence of something unusual/interesting,
# and we embed the name in the feature name, like `characteristic(nzxor)`.
#
# when we're dealing with counts, like `count(characteristic(nzxor))`,
# we can simply extract the feature and assume we're looking for `True` values.
Feature = parse_feature(term)
feature = Feature(True)
ensure_feature_valid_for_scope(scope, feature)
else:
# however, for remaining counted features, like `count(mnemonic(mov))`,
# we have to jump through hoops.
#
# when looking for the existance of such a feature, our rule might look like:
# - mnemonic: mov
#
# but here we deal with the form: `mnemonic(mov)`.
term, _, arg = term.partition("(")
Feature = parse_feature(term)
# when looking for the existence of such a feature, our rule might look like:
# - mnemonic: mov
#
# but here we deal with the form: `mnemonic(mov)`.
term, _, arg = term.partition('(')
Feature = parse_feature(term)
if arg:
arg = arg[: -len(")")]
# can't rely on yaml parsing ints embedded within strings
# like:
#
# count(offset(0xC))
# count(number(0x11223344))
# count(number(0x100 = symbol name))
if term in ("number", "offset", "bytes"):
value, symbol = parse_symbol(arg, term)
feature = Feature(value, symbol)
else:
# arg is string, like:
#
# count(mnemonic(mov))
# count(string(error))
# TODO: what about embedded newlines?
feature = Feature(arg)
if arg:
arg = arg[:-len(')')]
# can't rely on yaml parsing ints embedded within strings
# like:
#
# count(offset(0xC))
# count(number(0x11223344))
# count(number(0x100 = description))
if term != 'string':
value, description = parse_description(arg, term)
feature = Feature(value, description)
else:
feature = Feature()
ensure_feature_valid_for_scope(scope, feature)
# arg is string (which doesn't support inline descriptions), like:
#
# count(string(error))
# TODO: what about embedded newlines?
feature = Feature(arg)
else:
feature = Feature()
ensure_feature_valid_for_scope(scope, feature)
count = d[key]
if isinstance(count, int):
@@ -373,13 +356,8 @@ def build_statements(d, scope):
)
else:
Feature = parse_feature(key)
if key in ("number", "offset", "bytes"):
# parse numbers with symbol description, e.g. 0x4550 = IMAGE_DOS_SIGNATURE
# or regular numbers, e.g. 37
value, symbol = parse_symbol(d[key], key)
feature = Feature(value, symbol)
else:
feature = Feature(d[key])
value, description = parse_description(d[key], key, d.get('description'))
feature = Feature(value, description)
ensure_feature_valid_for_scope(scope, feature)
return feature

13
ci/hooks/hook-vivisect.py Normal file
View File

@@ -0,0 +1,13 @@
from PyInstaller.utils.hooks import copy_metadata
# in order for viv-utils to use pkg_resources to fetch
# the installed version of vivisect,
# we need to instruct pyinstaller to embed this metadata.
#
# so we set the pyinstaller.spec/hookspath to reference
# the directory with this hook.
#
# this hook runs at analysis time and updates the embedded metadata.
#
# ref: https://github.com/pyinstaller/pyinstaller/issues/1713#issuecomment-162682084
datas = copy_metadata('vivisect')

193
ci/pyinstaller.spec Normal file
View File

@@ -0,0 +1,193 @@
# -*- mode: python -*-
import os.path
import subprocess
import wcwidth
with open('./capa/version.py', 'wb') as f:
f.write("__version__ = '%s'"
% subprocess.check_output(["git", "describe", "--always"]).strip())
a = Analysis(
['../capa/main.py'],
pathex=['capa'],
binaries=None,
datas=[
('../rules', 'rules'),
# capa.render.default uses tabulate that depends on wcwidth.
# it seems wcwidth uses a json file `version.json`
# and this doesn't get picked up by pyinstaller automatically.
# so we manually embed the wcwidth resources here.
#
# ref: https://stackoverflow.com/a/62278462/87207
(os.path.dirname(wcwidth.__file__), 'wcwidth')
],
hiddenimports=[
# vivisect does manual/runtime importing of its modules,
# so declare the things that could be imported here.
"pycparser",
"vivisect",
"vivisect.analysis",
"vivisect.analysis.amd64",
"vivisect.analysis.amd64",
"vivisect.analysis.amd64.emulation",
"vivisect.analysis.amd64.golang",
"vivisect.analysis.crypto",
"vivisect.analysis.crypto",
"vivisect.analysis.crypto.constants",
"vivisect.analysis.elf",
"vivisect.analysis.elf",
"vivisect.analysis.elf.elfplt",
"vivisect.analysis.elf.libc_start_main",
"vivisect.analysis.generic",
"vivisect.analysis.generic",
"vivisect.analysis.generic.codeblocks",
"vivisect.analysis.generic.emucode",
"vivisect.analysis.generic.entrypoints",
"vivisect.analysis.generic.funcentries",
"vivisect.analysis.generic.impapi",
"vivisect.analysis.generic.mkpointers",
"vivisect.analysis.generic.pointers",
"vivisect.analysis.generic.pointertables",
"vivisect.analysis.generic.relocations",
"vivisect.analysis.generic.strconst",
"vivisect.analysis.generic.switchcase",
"vivisect.analysis.generic.thunks",
"vivisect.analysis.i386",
"vivisect.analysis.i386",
"vivisect.analysis.i386.calling",
"vivisect.analysis.i386.golang",
"vivisect.analysis.i386.importcalls",
"vivisect.analysis.i386.instrhook",
"vivisect.analysis.i386.thunk_bx",
"vivisect.analysis.ms",
"vivisect.analysis.ms",
"vivisect.analysis.ms.hotpatch",
"vivisect.analysis.ms.localhints",
"vivisect.analysis.ms.msvc",
"vivisect.analysis.ms.msvcfunc",
"vivisect.analysis.ms.vftables",
"vivisect.analysis.pe",
"vivisect.impapi.posix.amd64",
"vivisect.impapi.posix.i386",
"vivisect.impapi.windows",
"vivisect.impapi.windows.amd64",
"vivisect.impapi.windows.i386",
"vivisect.parsers.blob",
"vivisect.parsers.elf",
"vivisect.parsers.ihex",
"vivisect.parsers.macho",
"vivisect.parsers.parse_pe",
"vivisect.parsers.utils",
"vivisect.storage",
"vivisect.storage.basicfile",
"vstruct.constants",
"vstruct.constants.ntstatus",
"vstruct.defs",
"vstruct.defs.arm7",
"vstruct.defs.bmp",
"vstruct.defs.dns",
"vstruct.defs.elf",
"vstruct.defs.gif",
"vstruct.defs.ihex",
"vstruct.defs.inet",
"vstruct.defs.java",
"vstruct.defs.kdcom",
"vstruct.defs.macho",
"vstruct.defs.macho.const",
"vstruct.defs.macho.fat",
"vstruct.defs.macho.loader",
"vstruct.defs.macho.stabs",
"vstruct.defs.minidump",
"vstruct.defs.pcap",
"vstruct.defs.pe",
"vstruct.defs.pptp",
"vstruct.defs.rar",
"vstruct.defs.swf",
"vstruct.defs.win32",
"vstruct.defs.windows",
"vstruct.defs.windows.win_5_1_i386",
"vstruct.defs.windows.win_5_1_i386.ntdll",
"vstruct.defs.windows.win_5_1_i386.ntoskrnl",
"vstruct.defs.windows.win_5_1_i386.win32k",
"vstruct.defs.windows.win_5_2_i386",
"vstruct.defs.windows.win_5_2_i386.ntdll",
"vstruct.defs.windows.win_5_2_i386.ntoskrnl",
"vstruct.defs.windows.win_5_2_i386.win32k",
"vstruct.defs.windows.win_6_1_amd64",
"vstruct.defs.windows.win_6_1_amd64.ntdll",
"vstruct.defs.windows.win_6_1_amd64.ntoskrnl",
"vstruct.defs.windows.win_6_1_amd64.win32k",
"vstruct.defs.windows.win_6_1_i386",
"vstruct.defs.windows.win_6_1_i386.ntdll",
"vstruct.defs.windows.win_6_1_i386.ntoskrnl",
"vstruct.defs.windows.win_6_1_i386.win32k",
"vstruct.defs.windows.win_6_1_wow64",
"vstruct.defs.windows.win_6_1_wow64.ntdll",
"vstruct.defs.windows.win_6_2_amd64",
"vstruct.defs.windows.win_6_2_amd64.ntdll",
"vstruct.defs.windows.win_6_2_amd64.ntoskrnl",
"vstruct.defs.windows.win_6_2_amd64.win32k",
"vstruct.defs.windows.win_6_2_i386",
"vstruct.defs.windows.win_6_2_i386.ntdll",
"vstruct.defs.windows.win_6_2_i386.ntoskrnl",
"vstruct.defs.windows.win_6_2_i386.win32k",
"vstruct.defs.windows.win_6_2_wow64",
"vstruct.defs.windows.win_6_2_wow64.ntdll",
"vstruct.defs.windows.win_6_3_amd64",
"vstruct.defs.windows.win_6_3_amd64.ntdll",
"vstruct.defs.windows.win_6_3_amd64.ntoskrnl",
"vstruct.defs.windows.win_6_3_i386",
"vstruct.defs.windows.win_6_3_i386.ntdll",
"vstruct.defs.windows.win_6_3_i386.ntoskrnl",
"vstruct.defs.windows.win_6_3_wow64",
"vstruct.defs.windows.win_6_3_wow64.ntdll",
],
hookspath=['ci/hooks'],
runtime_hooks=None,
excludes=[
# ignore packages that would otherwise be bundled with the .exe.
# review: build/pyinstaller/xref-pyinstaller.html
# we don't do any GUI stuff, so ignore these modules
"tkinter",
"_tkinter",
"Tkinter",
# tqdm provides renderers for ipython,
# however, this drags in a lot of dependencies.
# since we don't spawn a notebook, we can safely remove these.
"IPython",
"ipywidgets",
])
a.binaries = a.binaries - TOC([
('tcl85.dll', None, None),
('tk85.dll', None, None),
('_tkinter', None, None)])
pyz = PYZ(a.pure, a.zipped_data)
exe = EXE(pyz,
a.scripts,
a.binaries,
a.zipfiles,
a.datas,
exclude_binaries=False,
name='capa',
icon='logo.ico',
debug=False,
strip=None,
upx=True,
console=True )
# enable the following to debug the contents of the .exe
#
#coll = COLLECT(exe,
# a.binaries,
# a.zipfiles,
# a.datas,
# strip=None,
# upx=True,
# name='capa-dat')

2
rules

Submodule rules updated: e5db226844...bb1df0277d

View File

@@ -323,7 +323,8 @@ def lint_features(ctx, rule):
def get_features(ctx, rule):
# get features from rule and all dependencies including subscopes and matched rules
features = []
deps = [ctx["rules"].rules[dep] for dep in rule.get_dependencies()]
namespaces = capa.rules.index_rules_by_namespace([rule])
deps = [ctx['rules'].rules[dep] for dep in rule.get_dependencies(namespaces)]
for r in [rule] + deps:
features.extend(get_rule_features(r))
return features

View File

@@ -86,7 +86,8 @@ def test_complex():
def test_range():
# unbounded range, but no matching feature
assert Range(Number(1)).evaluate({Number(2): {}}) == False
# since the lower bound is zero, and there are zero matches, ok
assert Range(Number(1)).evaluate({Number(2): {}}) == True
# unbounded range with matching feature should always match
assert Range(Number(1)).evaluate({Number(1): {}}) == True
@@ -117,6 +118,103 @@ def test_range():
assert Range(Number(1), min=1, max=3).evaluate({Number(1): {1, 2, 3, 4}}) == False
def test_range_exact():
rule = textwrap.dedent('''
rule:
meta:
name: test rule
features:
- count(number(100)): 2
''')
r = capa.rules.Rule.from_yaml(rule)
# just enough matches
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
assert 'test rule' in matches
# not enough matches
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1}}, 0x0)
assert 'test rule' not in matches
# too many matches
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2, 3}}, 0x0)
assert 'test rule' not in matches
def test_range_range():
rule = textwrap.dedent('''
rule:
meta:
name: test rule
features:
- count(number(100)): (2, 3)
''')
r = capa.rules.Rule.from_yaml(rule)
# just enough matches
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
assert 'test rule' in matches
# enough matches
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2, 3}}, 0x0)
assert 'test rule' in matches
# not enough matches
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1}}, 0x0)
assert 'test rule' not in matches
# too many matches
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2, 3, 4}}, 0x0)
assert 'test rule' not in matches
def test_range_exact_zero():
rule = textwrap.dedent('''
rule:
meta:
name: test rule
features:
- count(number(100)): 0
''')
r = capa.rules.Rule.from_yaml(rule)
# feature isn't indexed - good.
features, matches = capa.engine.match([r], {}, 0x0)
assert 'test rule' in matches
# feature is indexed, but no matches.
# i don't think we should ever really have this case, but good to check anyways.
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {}}, 0x0)
assert 'test rule' in matches
# too many matches
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1}}, 0x0)
assert 'test rule' not in matches
def test_range_with_zero():
rule = textwrap.dedent('''
rule:
meta:
name: test rule
features:
- count(number(100)): (0, 1)
''')
r = capa.rules.Rule.from_yaml(rule)
# ok
features, matches = capa.engine.match([r], {}, 0x0)
assert 'test rule' in matches
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {}}, 0x0)
assert 'test rule' in matches
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1}}, 0x0)
assert 'test rule' in matches
# too many matches
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
assert 'test rule' not in matches
def test_match_adds_matched_rule_feature():
"""show that using `match` adds a feature for matched rules."""
rule = textwrap.dedent(

View File

@@ -10,37 +10,26 @@ import capa.features.freeze
from fixtures import *
EXTRACTOR = capa.features.extractors.NullFeatureExtractor(
{
"file features": [
(0x402345, capa.features.Characteristic("embedded pe", True)),
],
"functions": {
0x401000: {
"features": [(0x401000, capa.features.Characteristic("switch", True)),],
"basic blocks": {
0x401000: {
"features": [
(
0x401000,
capa.features.Characteristic("tight loop", True),
),
],
"instructions": {
0x401000: {
"features": [
(0x401000, capa.features.insn.Mnemonic("xor")),
(
0x401000,
capa.features.Characteristic("nzxor", True),
),
],
},
0x401002: {
"features": [
(0x401002, capa.features.insn.Mnemonic("mov")),
]
},
EXTRACTOR = capa.features.extractors.NullFeatureExtractor({
'file features': [
(0x402345, capa.features.Characteristic('embedded pe')),
],
'functions': {
0x401000: {
'features': [
(0x401000, capa.features.Characteristic('switch')),
],
'basic blocks': {
0x401000: {
'features': [
(0x401000, capa.features.Characteristic('tight loop')),
],
'instructions': {
0x401000: {
'features': [
(0x401000, capa.features.insn.Mnemonic('xor')),
(0x401000, capa.features.Characteristic('nzxor')),
],
},
},
},
@@ -55,25 +44,19 @@ def test_null_feature_extractor():
assert list(EXTRACTOR.get_basic_blocks(0x401000)) == [0x401000]
assert list(EXTRACTOR.get_instructions(0x401000, 0x0401000)) == [0x401000, 0x401002]
rules = capa.rules.RuleSet(
[
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: xor loop
scope: basic block
features:
- and:
- characteristic(tight loop): true
- mnemonic: xor
- characteristic(nzxor): true
"""
)
),
]
)
rules = capa.rules.RuleSet([
capa.rules.Rule.from_yaml(textwrap.dedent('''
rule:
meta:
name: xor loop
scope: basic block
features:
- and:
- characteristic: tight loop
- mnemonic: xor
- characteristic: nzxor
''')),
])
capabilities = capa.main.find_capabilities(rules, EXTRACTOR)
assert "xor loop" in capabilities
@@ -178,9 +161,9 @@ def test_serialize_features():
roundtrip_feature(capa.features.String("SCardControl"))
roundtrip_feature(capa.features.insn.Number(0xFF))
roundtrip_feature(capa.features.insn.Offset(0x0))
roundtrip_feature(capa.features.insn.Mnemonic("push"))
roundtrip_feature(capa.features.file.Section(".rsrc"))
roundtrip_feature(capa.features.Characteristic("tight loop", True))
roundtrip_feature(capa.features.insn.Mnemonic('push'))
roundtrip_feature(capa.features.file.Section('.rsrc'))
roundtrip_feature(capa.features.Characteristic('tight loop'))
roundtrip_feature(capa.features.basicblock.BasicBlock())
roundtrip_feature(capa.features.file.Export("BaseThreadInitThunk"))
roundtrip_feature(capa.features.file.Import("kernel32.IsWow64Process"))

View File

@@ -52,46 +52,33 @@ def test_main_shellcode(sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32):
def test_ruleset():
rules = capa.rules.RuleSet(
[
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: file rule
scope: file
features:
- characteristic(embedded pe): y
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: function rule
scope: function
features:
- characteristic(switch): y
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: basic block rule
scope: basic block
features:
- characteristic(nzxor): y
"""
)
),
]
)
rules = capa.rules.RuleSet([
capa.rules.Rule.from_yaml(textwrap.dedent('''
rule:
meta:
name: file rule
scope: file
features:
- characteristic: embedded pe
''')),
capa.rules.Rule.from_yaml(textwrap.dedent('''
rule:
meta:
name: function rule
scope: function
features:
- characteristic: switch
''')),
capa.rules.Rule.from_yaml(textwrap.dedent('''
rule:
meta:
name: basic block rule
scope: basic block
features:
- characteristic: nzxor
''')),
])
assert len(rules.file_rules) == 1
assert len(rules.function_rules) == 1
assert len(rules.basic_block_rules) == 1
@@ -165,65 +152,48 @@ def test_match_across_scopes_file_function(sample_9324d1a8ae37a36ae560c37448c970
def test_match_across_scopes(sample_9324d1a8ae37a36ae560c37448c9705a):
rules = capa.rules.RuleSet(
[
# this rule should match on a basic block (including at least 0x403685)
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: tight loop
scope: basic block
examples:
- 9324d1a8ae37a36ae560c37448c9705a:0x403685
features:
- characteristic(tight loop): true
"""
)
),
# this rule should match on a function (0x403660)
# based on API, as well as prior basic block rule match
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: kill thread loop
scope: function
examples:
- 9324d1a8ae37a36ae560c37448c9705a:0x403660
features:
- and:
- api: kernel32.TerminateThread
- api: kernel32.CloseHandle
- match: tight loop
"""
)
),
# this rule should match on a file feature and a prior function rule match
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: kill thread program
scope: file
examples:
- 9324d1a8ae37a36ae560c37448c9705a
features:
- and:
- section: .text
- match: kill thread loop
"""
)
),
]
)
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
sample_9324d1a8ae37a36ae560c37448c9705a.vw,
sample_9324d1a8ae37a36ae560c37448c9705a.path,
)
rules = capa.rules.RuleSet([
# this rule should match on a basic block (including at least 0x403685)
capa.rules.Rule.from_yaml(textwrap.dedent('''
rule:
meta:
name: tight loop
scope: basic block
examples:
- 9324d1a8ae37a36ae560c37448c9705a:0x403685
features:
- characteristic: tight loop
''')),
# this rule should match on a function (0x403660)
# based on API, as well as prior basic block rule match
capa.rules.Rule.from_yaml(textwrap.dedent('''
rule:
meta:
name: kill thread loop
scope: function
examples:
- 9324d1a8ae37a36ae560c37448c9705a:0x403660
features:
- and:
- api: kernel32.TerminateThread
- api: kernel32.CloseHandle
- match: tight loop
''')),
# this rule should match on a file feature and a prior function rule match
capa.rules.Rule.from_yaml(textwrap.dedent('''
rule:
meta:
name: kill thread program
scope: file
examples:
- 9324d1a8ae37a36ae560c37448c9705a
features:
- and:
- section: .text
- match: kill thread loop
''')),
])
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path)
capabilities = capa.main.find_capabilities(rules, extractor)
assert "tight loop" in capabilities
assert "kill thread loop" in capabilities
@@ -231,24 +201,18 @@ def test_match_across_scopes(sample_9324d1a8ae37a36ae560c37448c9705a):
def test_subscope_bb_rules(sample_9324d1a8ae37a36ae560c37448c9705a):
rules = capa.rules.RuleSet(
[
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
scope: function
features:
- and:
- basic block:
- characteristic(tight loop): true
"""
)
)
]
)
rules = capa.rules.RuleSet([
capa.rules.Rule.from_yaml(textwrap.dedent('''
rule:
meta:
name: test rule
scope: function
features:
- and:
- basic block:
- characteristic: tight loop
'''))
])
# tight loop at 0x403685
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
sample_9324d1a8ae37a36ae560c37448c9705a.vw,

View File

@@ -4,6 +4,7 @@ import pytest
import capa.rules
from capa.features.insn import Number, Offset
from capa.features import String
def test_rule_ctor():
@@ -66,6 +67,22 @@ def test_rule_yaml_complex():
assert r.evaluate({Number(6): {1}, Number(7): {1}, Number(8): {1}}) == False
def test_rule_yaml_descriptions():
rule = textwrap.dedent('''
rule:
meta:
name: test rule
features:
- and:
- number: 1 = This is the number 1
- string: This program cannot be run in DOS mode.
description: MS-DOS stub message
- count(number(2 = AF_INET/SOCK_DGRAM)): 2
''')
r = capa.rules.Rule.from_yaml(rule)
assert r.evaluate({Number(1): {1}, Number(2): {2, 3}, String('This program cannot be run in DOS mode.'): {4}}) == True
def test_rule_yaml_not():
rule = textwrap.dedent(
"""
@@ -132,37 +149,47 @@ def test_invalid_rule_feature():
)
with pytest.raises(capa.rules.InvalidRule):
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
scope: file
features:
- characteristic(nzxor): true
"""
)
)
capa.rules.Rule.from_yaml(textwrap.dedent('''
rule:
meta:
name: test rule
scope: file
features:
- characteristic: nzxor
'''))
with pytest.raises(capa.rules.InvalidRule):
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
scope: function
features:
- characteristic(embedded pe): true
"""
)
)
capa.rules.Rule.from_yaml(textwrap.dedent('''
rule:
meta:
name: test rule
scope: function
features:
- characteristic: embedded pe
'''))
with pytest.raises(capa.rules.InvalidRule):
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
capa.rules.Rule.from_yaml(textwrap.dedent('''
rule:
meta:
name: test rule
scope: basic block
features:
- characteristic: embedded pe
'''))
def test_lib_rules():
rules = capa.rules.RuleSet([
capa.rules.Rule.from_yaml(textwrap.dedent('''
rule:
meta:
name: a lib rule
lib: true
features:
- api: CreateFileA
''')),
capa.rules.Rule.from_yaml(textwrap.dedent('''
rule:
meta:
name: test rule
@@ -207,27 +234,21 @@ def test_lib_rules():
def test_subscope_rules():
rules = capa.rules.RuleSet(
[
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
scope: file
features:
rules = capa.rules.RuleSet([
capa.rules.Rule.from_yaml(textwrap.dedent('''
rule:
meta:
name: test rule
scope: file
features:
- and:
- characteristic: embedded pe
- function:
- and:
- characteristic(embedded pe): true
- function:
- and:
- characteristic(nzxor): true
- characteristic(switch): true
"""
)
)
]
)
- characteristic: nzxor
- characteristic: switch
'''))
])
# the file rule scope will have one rules:
# - `test rule`
assert len(rules.file_rules) == 1
@@ -295,10 +316,8 @@ def test_invalid_rules():
meta:
name: test rule
features:
- characteristic(number(1)): True
"""
)
)
- characteristic: number(1)
'''))
with pytest.raises(capa.rules.InvalidRule):
r = capa.rules.Rule.from_yaml(
@@ -308,10 +327,8 @@ def test_invalid_rules():
meta:
name: test rule
features:
- characteristic(count(number(100))): True
"""
)
)
- characteristic: count(number(100))
'''))
def test_number_symbol():

View File

@@ -130,7 +130,7 @@ def test_offset_features(mimikatz):
def test_nzxor_features(mimikatz):
features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x410DFC))
assert capa.features.Characteristic("nzxor", True) in features # 0x0410F0B
assert capa.features.Characteristic('nzxor') in features # 0x0410F0B
def get_bb_insn(f, va):
@@ -169,10 +169,8 @@ def test_mnemonic_features(mimikatz):
def test_peb_access_features(sample_a933a1a402775cfa94b6bee0963f4b46):
features = extract_function_features(
viv_utils.Function(sample_a933a1a402775cfa94b6bee0963f4b46.vw, 0xABA6FEC)
)
assert capa.features.Characteristic("peb access", True) in features
features = extract_function_features(viv_utils.Function(sample_a933a1a402775cfa94b6bee0963f4b46.vw, 0xABA6FEC))
assert capa.features.Characteristic('peb access') in features
def test_file_section_name_features(mimikatz):
@@ -188,7 +186,7 @@ def test_tight_loop_features(mimikatz):
if bb.va != 0x402F8E:
continue
features = extract_basic_block_features(f, bb)
assert capa.features.Characteristic("tight loop", True) in features
assert capa.features.Characteristic('tight loop') in features
assert capa.features.basicblock.BasicBlock() in features
@@ -198,7 +196,7 @@ def test_tight_loop_bb_features(mimikatz):
if bb.va != 0x402F8E:
continue
features = extract_basic_block_features(f, bb)
assert capa.features.Characteristic("tight loop", True) in features
assert capa.features.Characteristic('tight loop') in features
assert capa.features.basicblock.BasicBlock() in features
@@ -219,24 +217,18 @@ def test_file_import_name_features(mimikatz):
def test_cross_section_flow_features(sample_a198216798ca38f280dc413f8c57f2c2):
features = extract_function_features(
viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x4014D0)
)
assert capa.features.Characteristic("cross section flow", True) in features
features = extract_function_features(viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x4014D0))
assert capa.features.Characteristic('cross section flow') in features
# this function has calls to some imports,
# which should not trigger cross-section flow characteristic
features = extract_function_features(
viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x401563)
)
assert capa.features.Characteristic("cross section flow", True) not in features
features = extract_function_features(viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x401563))
assert capa.features.Characteristic('cross section flow') not in features
def test_segment_access_features(sample_a933a1a402775cfa94b6bee0963f4b46):
features = extract_function_features(
viv_utils.Function(sample_a933a1a402775cfa94b6bee0963f4b46.vw, 0xABA6FEC)
)
assert capa.features.Characteristic("fs access", True) in features
features = extract_function_features(viv_utils.Function(sample_a933a1a402775cfa94b6bee0963f4b46.vw, 0xABA6FEC))
assert capa.features.Characteristic('fs access') in features
def test_thunk_features(sample_9324d1a8ae37a36ae560c37448c9705a):
@@ -249,60 +241,36 @@ def test_thunk_features(sample_9324d1a8ae37a36ae560c37448c9705a):
def test_file_embedded_pe(pma_lab_12_04):
features = extract_file_features(pma_lab_12_04.vw, pma_lab_12_04.path)
assert capa.features.Characteristic("embedded pe", True) in features
assert capa.features.Characteristic('embedded pe') in features
def test_stackstring_features(mimikatz):
features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x4556E5))
assert capa.features.Characteristic("stack string", True) in features
assert capa.features.Characteristic('stack string') in features
def test_switch_features(mimikatz):
features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x409411))
assert capa.features.Characteristic("switch", True) in features
assert capa.features.Characteristic('switch') in features
features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x409393))
assert capa.features.Characteristic("switch", True) not in features
assert capa.features.Characteristic('switch') not in features
def test_recursive_call_feature(
sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41,
):
features = extract_function_features(
viv_utils.Function(
sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw,
0x10003100,
)
)
assert capa.features.Characteristic("recursive call", True) in features
def test_recursive_call_feature(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41):
features = extract_function_features(viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10003100))
assert capa.features.Characteristic('recursive call') in features
features = extract_function_features(
viv_utils.Function(
sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw,
0x10007B00,
)
)
assert capa.features.Characteristic("recursive call", True) not in features
features = extract_function_features(viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10007B00))
assert capa.features.Characteristic('recursive call') not in features
def test_loop_feature(
sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41,
):
features = extract_function_features(
viv_utils.Function(
sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw,
0x10003D30,
)
)
assert capa.features.Characteristic("loop", True) in features
def test_loop_feature(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41):
features = extract_function_features(viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10003D30))
assert capa.features.Characteristic('loop') in features
features = extract_function_features(
viv_utils.Function(
sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw,
0x10007250,
)
)
assert capa.features.Characteristic("loop", True) not in features
features = extract_function_features(viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10007250))
assert capa.features.Characteristic('loop') not in features
def test_file_string_features(sample_bfb9b5391a13d0afd787e87ab90f14f5):
@@ -315,27 +283,21 @@ def test_file_string_features(sample_bfb9b5391a13d0afd787e87ab90f14f5):
def test_function_calls_to(sample_9324d1a8ae37a36ae560c37448c9705a):
features = extract_function_features(
viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60)
)
assert capa.features.Characteristic("calls to", True) in features
assert len(features[capa.features.Characteristic("calls to", True)]) == 1
features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60))
assert capa.features.Characteristic('calls to') in features
assert len(features[capa.features.Characteristic('calls to')]) == 1
def test_function_calls_to64(sample_lab21_01):
features = extract_function_features(
viv_utils.Function(sample_lab21_01.vw, 0x1400052D0)
) # memcpy
assert capa.features.Characteristic("calls to", True) in features
assert len(features[capa.features.Characteristic("calls to", True)]) == 8
features = extract_function_features(viv_utils.Function(sample_lab21_01.vw, 0x1400052D0)) # memcpy
assert capa.features.Characteristic('calls to') in features
assert len(features[capa.features.Characteristic('calls to')]) == 8
def test_function_calls_from(sample_9324d1a8ae37a36ae560c37448c9705a):
features = extract_function_features(
viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60)
)
assert capa.features.Characteristic("calls from", True) in features
assert len(features[capa.features.Characteristic("calls from", True)]) == 23
features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60))
assert capa.features.Characteristic('calls from') in features
assert len(features[capa.features.Characteristic('calls from')]) == 23
def test_basic_block_count(sample_9324d1a8ae37a36ae560c37448c9705a):
@@ -346,11 +308,9 @@ def test_basic_block_count(sample_9324d1a8ae37a36ae560c37448c9705a):
def test_indirect_call_features(sample_a933a1a402775cfa94b6bee0963f4b46):
features = extract_function_features(
viv_utils.Function(sample_a933a1a402775cfa94b6bee0963f4b46.vw, 0xABA68A0)
)
assert capa.features.Characteristic("indirect call", True) in features
assert len(features[capa.features.Characteristic("indirect call", True)]) == 3
features = extract_function_features(viv_utils.Function(sample_a933a1a402775cfa94b6bee0963f4b46.vw, 0xABA68A0))
assert capa.features.Characteristic('indirect call') in features
assert len(features[capa.features.Characteristic('indirect call')]) == 3
def test_indirect_calls_resolved(sample_c91887d861d9bd4a5872249b641bc9f9):