This commit is contained in:
William Ballenthin
2020-07-01 12:43:12 -06:00
parent 23e70b4e85
commit b9d017ad10
39 changed files with 1368 additions and 1333 deletions

View File

@@ -7,23 +7,24 @@ import capa.features
class Statement(object):
'''
"""
superclass for structural nodes, such as and/or/not.
this exists to provide a default impl for `__str__` and `__repr__`,
and to declare the interface method `evaluate`
'''
"""
def __init__(self):
super(Statement, self).__init__()
self.name = self.__class__.__name__
def __str__(self):
return '%s(%s)' % (self.name.lower(), ','.join(map(str, self.get_children())))
return "%s(%s)" % (self.name.lower(), ",".join(map(str, self.get_children())))
def __repr__(self):
return str(self)
def evaluate(self, ctx):
'''
"""
classes that inherit `Statement` must implement `evaluate`
args:
@@ -31,30 +32,30 @@ class Statement(object):
returns:
Result
'''
"""
raise NotImplementedError()
def get_children(self):
if hasattr(self, 'child'):
if hasattr(self, "child"):
yield self.child
if hasattr(self, 'children'):
if hasattr(self, "children"):
for child in self.children:
yield child
def replace_child(self, existing, new):
if hasattr(self, 'child'):
if hasattr(self, "child"):
if self.child is existing:
self.child = new
if hasattr(self, 'children'):
if hasattr(self, "children"):
for i, child in enumerate(self.children):
if child is existing:
self.children[i] = new
class Result(object):
'''
"""
represents the results of an evaluation of statements against features.
instances of this class should behave like a bool,
@@ -65,15 +66,16 @@ class Result(object):
as well as the children Result instances.
we need this so that we can render the tree of expressions and their results.
'''
"""
def __init__(self, success, statement, children, locations=None):
'''
"""
args:
success (bool)
statement (capa.engine.Statement or capa.features.Feature)
children (list[Result])
locations (iterable[VA])
'''
"""
super(Result, self).__init__()
self.success = success
self.statement = statement
@@ -93,7 +95,8 @@ class Result(object):
class And(Statement):
'''match if all of the children evaluate to True.'''
"""match if all of the children evaluate to True."""
def __init__(self, *children):
super(And, self).__init__()
self.children = list(children)
@@ -105,7 +108,8 @@ class And(Statement):
class Or(Statement):
'''match if any of the children evaluate to True.'''
"""match if any of the children evaluate to True."""
def __init__(self, *children):
super(Or, self).__init__()
self.children = list(children)
@@ -117,7 +121,8 @@ class Or(Statement):
class Not(Statement):
'''match only if the child evaluates to False.'''
"""match only if the child evaluates to False."""
def __init__(self, child):
super(Not, self).__init__()
self.child = child
@@ -129,7 +134,8 @@ class Not(Statement):
class Some(Statement):
'''match if at least N of the children evaluate to True.'''
"""match if at least N of the children evaluate to True."""
def __init__(self, count, *children):
super(Some, self).__init__()
self.count = count
@@ -146,7 +152,8 @@ class Some(Statement):
class Range(Statement):
'''match if the child is contained in the ctx set with a count in the given range.'''
"""match if the child is contained in the ctx set with a count in the given range."""
def __init__(self, child, min=None, max=None):
super(Range, self).__init__()
self.child = child
@@ -162,27 +169,28 @@ class Range(Statement):
def __str__(self):
if self.max == (1 << 64 - 1):
return 'range(%s, min=%d, max=infinity)' % (str(self.child), self.min)
return "range(%s, min=%d, max=infinity)" % (str(self.child), self.min)
else:
return 'range(%s, min=%d, max=%d)' % (str(self.child), self.min, self.max)
return "range(%s, min=%d, max=%d)" % (str(self.child), self.min, self.max)
class Regex(Statement):
'''match if the given pattern matches a String feature.'''
"""match if the given pattern matches a String feature."""
def __init__(self, pattern):
super(Regex, self).__init__()
self.pattern = pattern
pat = self.pattern[len('/'):-len('/')]
pat = self.pattern[len("/") : -len("/")]
flags = re.DOTALL
if pattern.endswith('/i'):
pat = self.pattern[len('/'):-len('/i')]
if pattern.endswith("/i"):
pat = self.pattern[len("/") : -len("/i")]
flags |= re.IGNORECASE
self.re = re.compile(pat, flags)
self.match = ''
self.match = ""
def evaluate(self, ctx):
for feature, locations in ctx.items():
if not isinstance(feature, (capa.features.String, )):
if not isinstance(feature, (capa.features.String,)):
continue
# `re.search` finds a match anywhere in the given string
@@ -200,27 +208,28 @@ class Regex(Statement):
class Subscope(Statement):
'''
"""
a subscope element is a placeholder in a rule - it should not be evaluated directly.
the engine should preprocess rules to extract subscope statements into their own rules.
'''
"""
def __init__(self, scope, child):
super(Subscope, self).__init__()
self.scope = scope
self.child = child
def evaluate(self, ctx):
raise ValueError('cannot evaluate a subscope directly!')
raise ValueError("cannot evaluate a subscope directly!")
def topologically_order_rules(rules):
'''
"""
order the given rules such that dependencies show up before dependents.
this means that as we match rules, we can add features for the matches, and these
will be matched by subsequent rules if they follow this order.
assumes that the rule dependency graph is a DAG.
'''
"""
# we evaluate `rules` multiple times, so if its a generator, realize it into a list.
rules = list(rules)
namespaces = capa.rules.index_rules_by_namespace(rules)
@@ -245,7 +254,7 @@ def topologically_order_rules(rules):
def match(rules, features, va):
'''
"""
Args:
rules (List[capa.rules.Rule]): these must already be ordered topologically by dependency.
features (Mapping[capa.features.Feature, int]):
@@ -255,7 +264,7 @@ def match(rules, features, va):
Tuple[List[capa.features.Feature], Dict[str, Tuple[int, capa.engine.Result]]]: two-tuple with entries:
- list of features used for matching (which may be greater than argument, due to rule match features), and
- mapping from rule name to (location of match, result object)
'''
"""
results = collections.defaultdict(list)
# copy features so that we can modify it
@@ -270,10 +279,10 @@ def match(rules, features, va):
results[rule.name].append((va, res))
features[capa.features.MatchedRule(rule.name)].add(va)
namespace = rule.meta.get('namespace')
namespace = rule.meta.get("namespace")
if namespace:
while namespace:
features[capa.features.MatchedRule(namespace)].add(va)
namespace, _, _ = namespace.rpartition('/')
namespace, _, _ = namespace.rpartition("/")
return (features, results)

View File

@@ -11,9 +11,9 @@ MAX_BYTES_FEATURE_SIZE = 0x100
def bytes_to_str(b):
if sys.version_info[0] >= 3:
return str(codecs.encode(b, 'hex').decode('utf-8'))
return str(codecs.encode(b, "hex").decode("utf-8"))
else:
return codecs.encode(b, 'hex')
return codecs.encode(b, "hex")
class Feature(object):
@@ -29,7 +29,7 @@ class Feature(object):
return self.name == other.name and self.args == other.args
def __str__(self):
return '%s(%s)' % (self.name.lower(), ','.join(self.args))
return "%s(%s)" % (self.name.lower(), ",".join(self.args))
def __repr__(self):
return str(self)
@@ -41,8 +41,7 @@ class Feature(object):
return self.__dict__
def freeze_serialize(self):
return (self.__class__.__name__,
self.args)
return (self.__class__.__name__, self.args)
@classmethod
def freeze_deserialize(cls, args):
@@ -55,30 +54,30 @@ class MatchedRule(Feature):
self.rule_name = rule_name
def __str__(self):
return 'match(%s)' % (self.rule_name)
return "match(%s)" % (self.rule_name)
class Characteristic(Feature):
def __init__(self, name, value=None):
'''
"""
when `value` is not provided, this serves as descriptor for a class of characteristics.
this is only used internally, such as in `rules.py` when checking if a statement is
supported by a given scope.
'''
"""
super(Characteristic, self).__init__([name, value])
self.name = name
self.value = value
def evaluate(self, ctx):
if self.value is None:
raise ValueError('cannot evaluate characteristc %s with empty value' % (str(self)))
raise ValueError("cannot evaluate characteristc %s with empty value" % (str(self)))
return super(Characteristic, self).evaluate(ctx)
def __str__(self):
if self.value is None:
return 'characteristic(%s)' % (self.name)
return "characteristic(%s)" % (self.name)
else:
return 'characteristic(%s(%s))' % (self.name, self.value)
return "characteristic(%s(%s))" % (self.name, self.value)
class String(Feature):
@@ -98,7 +97,7 @@ class Bytes(Feature):
def evaluate(self, ctx):
for feature, locations in ctx.items():
if not isinstance(feature, (capa.features.Bytes, )):
if not isinstance(feature, (capa.features.Bytes,)):
continue
if feature.value.startswith(self.value):
@@ -108,14 +107,13 @@ class Bytes(Feature):
def __str__(self):
if self.symbol:
return 'bytes(0x%s = %s)' % (bytes_to_str(self.value).upper(), self.symbol)
return "bytes(0x%s = %s)" % (bytes_to_str(self.value).upper(), self.symbol)
else:
return 'bytes(0x%s)' % (bytes_to_str(self.value).upper())
return "bytes(0x%s)" % (bytes_to_str(self.value).upper())
def freeze_serialize(self):
return (self.__class__.__name__,
[bytes_to_str(x).upper() for x in self.args])
return (self.__class__.__name__, [bytes_to_str(x).upper() for x in self.args])
@classmethod
def freeze_deserialize(cls, args):
return cls(*[codecs.decode(x, 'hex') for x in args])
return cls(*[codecs.decode(x, "hex") for x in args])

View File

@@ -6,4 +6,4 @@ class BasicBlock(Feature):
super(BasicBlock, self).__init__([])
def __str__(self):
return 'basic block'
return "basic block"

View File

@@ -10,11 +10,11 @@ try:
except (ImportError, SyntaxError):
pass
__all__ = ['ida', 'viv']
__all__ = ["ida", "viv"]
class FeatureExtractor(object):
'''
"""
FeatureExtractor defines the interface for fetching features from a sample.
There may be multiple backends that support fetching features for capa.
@@ -27,7 +27,8 @@ class FeatureExtractor(object):
Also, this provides a way to hook in an IDA backend.
This class is not instantiated directly; it is the base class for other implementations.
'''
"""
__metaclass__ = abc.ABCMeta
def __init__(self):
@@ -40,7 +41,7 @@ class FeatureExtractor(object):
@abc.abstractmethod
def extract_file_features(self):
'''
"""
extract file-scope features.
example::
@@ -51,12 +52,12 @@ class FeatureExtractor(object):
yields:
Tuple[capa.features.Feature, int]: feature and its location
'''
"""
raise NotImplemented
@abc.abstractmethod
def get_functions(self):
'''
"""
enumerate the functions and provide opaque values that will
subsequently be provided to `.extract_function_features()`, etc.
@@ -67,12 +68,12 @@ class FeatureExtractor(object):
yields:
any: the opaque function value.
'''
"""
raise NotImplemented
@abc.abstractmethod
def extract_function_features(self, f):
'''
"""
extract function-scope features.
the arguments are opaque values previously provided by `.get_functions()`, etc.
@@ -88,12 +89,12 @@ class FeatureExtractor(object):
yields:
Tuple[capa.features.Feature, int]: feature and its location
'''
"""
raise NotImplemented
@abc.abstractmethod
def get_basic_blocks(self, f):
'''
"""
enumerate the basic blocks in the given function and provide opaque values that will
subsequently be provided to `.extract_basic_block_features()`, etc.
@@ -104,12 +105,12 @@ class FeatureExtractor(object):
yields:
any: the opaque basic block value.
'''
"""
raise NotImplemented
@abc.abstractmethod
def extract_basic_block_features(self, f, bb):
'''
"""
extract basic block-scope features.
the arguments are opaque values previously provided by `.get_functions()`, etc.
@@ -127,12 +128,12 @@ class FeatureExtractor(object):
yields:
Tuple[capa.features.Feature, int]: feature and its location
'''
"""
raise NotImplemented
@abc.abstractmethod
def get_instructions(self, f, bb):
'''
"""
enumerate the instructions in the given basic block and provide opaque values that will
subsequently be provided to `.extract_insn_features()`, etc.
@@ -143,12 +144,12 @@ class FeatureExtractor(object):
yields:
any: the opaque function value.
'''
"""
raise NotImplemented
@abc.abstractmethod
def extract_insn_features(self, f, bb, insn):
'''
"""
extract instruction-scope features.
the arguments are opaque values previously provided by `.get_functions()`, etc.
@@ -168,12 +169,12 @@ class FeatureExtractor(object):
yields:
Tuple[capa.features.Feature, int]: feature and its location
'''
"""
raise NotImplemented
class NullFeatureExtractor(FeatureExtractor):
'''
"""
An extractor that extracts some user-provided features.
The structure of the single parameter is demonstrated in the example below.
@@ -211,64 +212,66 @@ class NullFeatureExtractor(FeatureExtractor):
0x40200: ...
}
)
'''
"""
def __init__(self, features):
super(NullFeatureExtractor, self).__init__()
self.features = features
def extract_file_features(self):
for p in self.features.get('file features', []):
for p in self.features.get("file features", []):
va, feature = p
yield feature, va
def get_functions(self):
for va in sorted(self.features['functions'].keys()):
for va in sorted(self.features["functions"].keys()):
yield va
def extract_function_features(self, f):
for p in (self.features # noqa: E127 line over-indented
.get('functions', {})
.get(f, {})
.get('features', [])):
for p in self.features.get("functions", {}).get(f, {}).get("features", []): # noqa: E127 line over-indented
va, feature = p
yield feature, va
def get_basic_blocks(self, f):
for va in sorted(self.features # noqa: E127 line over-indented
.get('functions', {})
.get(f, {})
.get('basic blocks', {})
.keys()):
for va in sorted(
self.features.get("functions", {}) # noqa: E127 line over-indented
.get(f, {})
.get("basic blocks", {})
.keys()
):
yield va
def extract_basic_block_features(self, f, bb):
for p in (self.features # noqa: E127 line over-indented
.get('functions', {})
.get(f, {})
.get('basic blocks', {})
.get(bb, {})
.get('features', [])):
for p in (
self.features.get("functions", {}) # noqa: E127 line over-indented
.get(f, {})
.get("basic blocks", {})
.get(bb, {})
.get("features", [])
):
va, feature = p
yield feature, va
def get_instructions(self, f, bb):
for va in sorted(self.features # noqa: E127 line over-indented
.get('functions', {})
.get(f, {})
.get('basic blocks', {})
.get(bb, {})
.get('instructions', {})
.keys()):
for va in sorted(
self.features.get("functions", {}) # noqa: E127 line over-indented
.get(f, {})
.get("basic blocks", {})
.get(bb, {})
.get("instructions", {})
.keys()
):
yield va
def extract_insn_features(self, f, bb, insn):
for p in (self.features # noqa: E127 line over-indented
.get('functions', {})
.get(f, {})
.get('basic blocks', {})
.get(bb, {})
.get('instructions', {})
.get(insn, {})
.get('features', [])):
for p in (
self.features.get("functions", {}) # noqa: E127 line over-indented
.get(f, {})
.get("basic blocks", {})
.get(bb, {})
.get("instructions", {})
.get(insn, {})
.get("features", [])
):
va, feature = p
yield feature, va

View File

@@ -10,27 +10,27 @@ def xor_static(data, i):
if sys.version_info >= (3, 0):
return bytes(c ^ i for c in data)
else:
return ''.join(chr(ord(c) ^ i) for c in data)
return "".join(chr(ord(c) ^ i) for c in data)
def is_aw_function(function_name):
'''
"""
is the given function name an A/W function?
these are variants of functions that, on Windows, accept either a narrow or wide string.
'''
"""
if len(function_name) < 2:
return False
# last character should be 'A' or 'W'
if function_name[-1] not in ('A', 'W'):
if function_name[-1] not in ("A", "W"):
return False
# second to last character should be lowercase letter
return 'a' <= function_name[-2] <= 'z' or '0' <= function_name[-2] <= '9'
return "a" <= function_name[-2] <= "z" or "0" <= function_name[-2] <= "9"
def generate_api_features(apiname, va):
'''
"""
for a given function name and address, generate API names.
we over-generate features to make matching easier.
these include:
@@ -38,7 +38,7 @@ def generate_api_features(apiname, va):
- kernel32.CreateFile
- CreateFileA
- CreateFile
'''
"""
# (kernel32.CreateFileA, 0x401000)
yield API(apiname), va
@@ -46,8 +46,8 @@ def generate_api_features(apiname, va):
# (kernel32.CreateFile, 0x401000)
yield API(apiname[:-1]), va
if '.' in apiname:
modname, impname = apiname.split('.')
if "." in apiname:
modname, impname = apiname.split(".")
# strip modname to support importname-only matching
# (CreateFileA, 0x401000)
yield API(impname), va

View File

@@ -26,17 +26,17 @@ def get_va(self):
def add_va_int_cast(o):
'''
"""
dynamically add a cast-to-int (`__int__`) method to the given object
that returns the value of the `.va` property.
this bit of skullduggery lets use cast viv-utils objects as ints.
the correct way of doing this is to update viv-utils (or subclass the objects here).
'''
"""
if sys.version_info >= (3, 0):
setattr(o, '__int__', types.MethodType(get_va, o))
setattr(o, "__int__", types.MethodType(get_va, o))
else:
setattr(o, '__int__', types.MethodType(get_va, o, type(o)))
setattr(o, "__int__", types.MethodType(get_va, o, type(o)))
return o

View File

@@ -15,23 +15,23 @@ from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
def _ida_get_printable_len(op):
''' Return string length if all operand bytes are ascii or utf16-le printable
""" Return string length if all operand bytes are ascii or utf16-le printable
args:
op (IDA op_t)
'''
"""
op_val = helpers.mask_op_val(op)
if op.dtype == idaapi.dt_byte:
chars = struct.pack('<B', op_val)
chars = struct.pack("<B", op_val)
elif op.dtype == idaapi.dt_word:
chars = struct.pack('<H', op_val)
chars = struct.pack("<H", op_val)
elif op.dtype == idaapi.dt_dword:
chars = struct.pack('<I', op_val)
chars = struct.pack("<I", op_val)
elif op.dtype == idaapi.dt_qword:
chars = struct.pack('<Q', op_val)
chars = struct.pack("<Q", op_val)
else:
raise ValueError('Unhandled operand data type 0x%x.' % op.dtype)
raise ValueError("Unhandled operand data type 0x%x." % op.dtype)
def _is_printable_ascii(chars):
if sys.version_info >= (3, 0):
@@ -44,7 +44,7 @@ def _ida_get_printable_len(op):
if all(c == 0x00 for c in chars[1::2]):
return _is_printable_ascii(chars[::2])
else:
if all(c == '\x00' for c in chars[1::2]):
if all(c == "\x00" for c in chars[1::2]):
return _is_printable_ascii(chars[::2])
if _is_printable_ascii(chars):
@@ -57,32 +57,32 @@ def _ida_get_printable_len(op):
def _is_mov_imm_to_stack(insn):
''' verify instruction moves immediate onto stack
""" verify instruction moves immediate onto stack
args:
insn (IDA insn_t)
'''
"""
if insn.Op2.type != idaapi.o_imm:
return False
if not helpers.is_op_stack_var(insn.ea, 0):
return False
if not insn.get_canon_mnem().startswith('mov'):
if not insn.get_canon_mnem().startswith("mov"):
return False
return True
def _ida_bb_contains_stackstring(f, bb):
''' check basic block for stackstring indicators
""" check basic block for stackstring indicators
true if basic block contains enough moves of constant bytes to the stack
args:
f (IDA func_t)
bb (IDA BasicBlock)
'''
"""
count = 0
for insn in helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
@@ -96,25 +96,25 @@ def _ida_bb_contains_stackstring(f, bb):
def extract_bb_stackstring(f, bb):
''' extract stackstring indicators from basic block
""" extract stackstring indicators from basic block
args:
f (IDA func_t)
bb (IDA BasicBlock)
'''
"""
if _ida_bb_contains_stackstring(f, bb):
yield Characteristic('stack string', True), bb.start_ea
yield Characteristic("stack string", True), bb.start_ea
def _ida_bb_contains_tight_loop(f, bb):
''' check basic block for stackstring indicators
""" check basic block for stackstring indicators
true if last instruction in basic block branches to basic block start
args:
f (IDA func_t)
bb (IDA BasicBlock)
'''
"""
bb_end = idc.prev_head(bb.end_ea)
if bb.start_ea < bb_end:
@@ -126,23 +126,23 @@ def _ida_bb_contains_tight_loop(f, bb):
def extract_bb_tight_loop(f, bb):
''' extract tight loop indicators from a basic block
""" extract tight loop indicators from a basic block
args:
f (IDA func_t)
bb (IDA BasicBlock)
'''
"""
if _ida_bb_contains_tight_loop(f, bb):
yield Characteristic('tight loop', True), bb.start_ea
yield Characteristic("tight loop", True), bb.start_ea
def extract_features(f, bb):
''' extract basic block features
""" extract basic block features
args:
f (IDA func_t)
bb (IDA BasicBlock)
'''
"""
yield BasicBlock(), bb.start_ea
for bb_handler in BASIC_BLOCK_HANDLERS:
@@ -166,5 +166,5 @@ def main():
pprint.pprint(features)
if __name__ == '__main__':
if __name__ == "__main__":
main()

View File

@@ -16,32 +16,39 @@ import capa.features.extractors.ida.helpers
def _ida_check_segment_for_pe(seg):
''' check segment for embedded PE
""" check segment for embedded PE
adapted for IDA from:
https://github.com/vivisect/vivisect/blob/7be4037b1cecc4551b397f840405a1fc606f9b53/PE/carve.py#L19
args:
seg (IDA segment_t)
'''
"""
seg_max = seg.end_ea
mz_xor = [(capa.features.extractors.helpers.xor_static(b'MZ', i),
capa.features.extractors.helpers.xor_static(b'PE', i),
i)
for i in range(256)]
todo = [(capa.features.extractors.ida.helpers.find_byte_sequence(seg.start_ea, seg.end_ea, mzx), mzx, pex, i) for mzx, pex, i in mz_xor]
mz_xor = [
(
capa.features.extractors.helpers.xor_static(b"MZ", i),
capa.features.extractors.helpers.xor_static(b"PE", i),
i,
)
for i in range(256)
]
todo = [
(capa.features.extractors.ida.helpers.find_byte_sequence(seg.start_ea, seg.end_ea, mzx), mzx, pex, i)
for mzx, pex, i in mz_xor
]
todo = [(off, mzx, pex, i) for (off, mzx, pex, i) in todo if off != idaapi.BADADDR]
while len(todo):
off, mzx, pex, i = todo.pop()
# The MZ header has one field we will check e_lfanew is at 0x3c
e_lfanew = off + 0x3c
e_lfanew = off + 0x3C
if seg_max < (e_lfanew + 4):
continue
newoff = struct.unpack('<I', capa.features.extractors.helpers.xor_static(idc.get_bytes(e_lfanew, 4), i))[0]
newoff = struct.unpack("<I", capa.features.extractors.helpers.xor_static(idc.get_bytes(e_lfanew, 4), i))[0]
peoff = off + newoff
if seg_max < (peoff + 2):
@@ -56,29 +63,29 @@ def _ida_check_segment_for_pe(seg):
def extract_file_embedded_pe():
''' extract embedded PE features
""" extract embedded PE features
IDA must load resource sections for this to be complete
- '-R' from console
- Check 'Load resource sections' when opening binary in IDA manually
'''
"""
for seg in capa.features.extractors.ida.helpers.get_segments():
if seg.is_header_segm():
# IDA may load header segments, skip if present
continue
for ea, _ in _ida_check_segment_for_pe(seg):
yield Characteristic('embedded pe', True), ea
yield Characteristic("embedded pe", True), ea
def extract_file_export_names():
''' extract function exports '''
""" extract function exports """
for _, _, ea, name in idautils.Entries():
yield Export(name), ea
def extract_file_import_names():
''' extract function imports
""" extract function imports
1. imports by ordinal:
- modulename.#ordinal
@@ -87,25 +94,25 @@ def extract_file_import_names():
matching:
- modulename.importname
- importname
'''
"""
for ea, imp_info in capa.features.extractors.ida.helpers.get_file_imports().items():
dllname, name, ordi = imp_info
if name:
yield Import('%s.%s' % (dllname, name)), ea
yield Import("%s.%s" % (dllname, name)), ea
yield Import(name), ea
if ordi:
yield Import('%s.#%s' % (dllname, str(ordi))), ea
yield Import("%s.#%s" % (dllname, str(ordi))), ea
def extract_file_section_names():
''' extract section names
""" extract section names
IDA must load resource sections for this to be complete
- '-R' from console
- Check 'Load resource sections' when opening binary in IDA manually
'''
"""
for seg in capa.features.extractors.ida.helpers.get_segments():
if seg.is_header_segm():
# IDA may load header segments, skip if present
@@ -115,12 +122,12 @@ def extract_file_section_names():
def extract_file_strings():
''' extract ASCII and UTF-16 LE strings
""" extract ASCII and UTF-16 LE strings
IDA must load resource sections for this to be complete
- '-R' from console
- Check 'Load resource sections' when opening binary in IDA manually
'''
"""
for seg in capa.features.extractors.ida.helpers.get_segments():
seg_buff = capa.features.extractors.ida.helpers.get_segment_buffer(seg)
@@ -132,7 +139,7 @@ def extract_file_strings():
def extract_features():
''' extract file features '''
""" extract file features """
for file_handler in FILE_HANDLERS:
for feature, va in file_handler():
yield feature, va
@@ -151,5 +158,5 @@ def main():
pprint.pprint(list(extract_features()))
if __name__ == '__main__':
if __name__ == "__main__":
main()

View File

@@ -6,14 +6,14 @@ from capa.features.extractors import loops
def _ida_function_contains_switch(f):
''' check a function for switch statement indicators
""" check a function for switch statement indicators
adapted from:
https://reverseengineering.stackexchange.com/questions/17548/calc-switch-cases-in-idapython-cant-iterate-over-results?rq=1
arg:
f (IDA func_t)
'''
"""
for start, end in idautils.Chunks(f.start_ea):
for head in idautils.Heads(start, end):
if idaapi.get_switch_info(head):
@@ -23,68 +23,63 @@ def _ida_function_contains_switch(f):
def extract_function_switch(f):
''' extract switch indicators from a function
""" extract switch indicators from a function
arg:
f (IDA func_t)
'''
"""
if _ida_function_contains_switch(f):
yield Characteristic('switch', True), f.start_ea
yield Characteristic("switch", True), f.start_ea
def extract_function_calls_to(f):
''' extract callers to a function
""" extract callers to a function
args:
f (IDA func_t)
'''
"""
for ea in idautils.CodeRefsTo(f.start_ea, True):
yield Characteristic('calls to', True), ea
yield Characteristic("calls to", True), ea
def extract_function_loop(f):
''' extract loop indicators from a function
""" extract loop indicators from a function
args:
f (IDA func_t)
'''
"""
edges = []
for bb in idaapi.FlowChart(f):
map(lambda s: edges.append((bb.start_ea, s.start_ea)), bb.succs())
if edges and loops.has_loop(edges):
yield Characteristic('loop', True), f.start_ea
yield Characteristic("loop", True), f.start_ea
def extract_recursive_call(f):
''' extract recursive function call
""" extract recursive function call
args:
f (IDA func_t)
'''
"""
for ref in idautils.CodeRefsTo(f.start_ea, True):
if f.contains(ref):
yield Characteristic('recursive call', True), f.start_ea
yield Characteristic("recursive call", True), f.start_ea
break
def extract_features(f):
''' extract function features
""" extract function features
arg:
f (IDA func_t)
'''
"""
for func_handler in FUNCTION_HANDLERS:
for feature, va in func_handler(f):
yield feature, va
FUNCTION_HANDLERS = (
extract_function_calls_to,
extract_function_switch,
extract_function_loop,
extract_recursive_call
)
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_switch, extract_function_loop, extract_recursive_call)
def main():
@@ -96,5 +91,5 @@ def main():
pprint.pprint(features)
if __name__ == '__main__':
if __name__ == "__main__":
main()

View File

@@ -7,21 +7,21 @@ import idc
def find_byte_sequence(start, end, seq):
''' find byte sequence
""" find byte sequence
args:
start: min virtual address
end: max virtual address
seq: bytes to search e.g. b'\x01\x03'
'''
"""
if sys.version_info >= (3, 0):
return idaapi.find_binary(start, end, ' '.join(['%02x' % b for b in seq]), 0, idaapi.SEARCH_DOWN)
return idaapi.find_binary(start, end, " ".join(["%02x" % b for b in seq]), 0, idaapi.SEARCH_DOWN)
else:
return idaapi.find_binary(start, end, ' '.join(['%02x' % ord(b) for b in seq]), 0, idaapi.SEARCH_DOWN)
return idaapi.find_binary(start, end, " ".join(["%02x" % ord(b) for b in seq]), 0, idaapi.SEARCH_DOWN)
def get_functions(start=None, end=None, ignore_thunks=False, ignore_libs=False):
''' get functions, range optional
""" get functions, range optional
args:
start: min virtual address
@@ -29,7 +29,7 @@ def get_functions(start=None, end=None, ignore_thunks=False, ignore_libs=False):
ret:
yield func_t*
'''
"""
for ea in idautils.Functions(start=start, end=end):
f = idaapi.get_func(ea)
@@ -43,7 +43,7 @@ def get_functions(start=None, end=None, ignore_thunks=False, ignore_libs=False):
def get_segments():
''' Get list of segments (sections) in the binary image '''
""" Get list of segments (sections) in the binary image """
for n in range(idaapi.get_segm_qty()):
seg = idaapi.getnseg(n)
if seg:
@@ -51,11 +51,11 @@ def get_segments():
def get_segment_buffer(seg):
''' return bytes stored in a given segment
""" return bytes stored in a given segment
decrease buffer size until IDA is able to read bytes from the segment
'''
buff = b''
"""
buff = b""
sz = seg.end_ea - seg.start_ea
while sz > 0:
@@ -65,11 +65,11 @@ def get_segment_buffer(seg):
sz -= 0x1000
# IDA returns None if get_bytes fails, so convert for consistent return type
return buff if buff else b''
return buff if buff else b""
def get_file_imports():
''' get file imports '''
""" get file imports """
_imports = {}
for idx in range(idaapi.get_import_module_qty()):
@@ -79,9 +79,9 @@ def get_file_imports():
continue
def _inspect_import(ea, name, ordi):
if name and name.startswith('__imp_'):
if name and name.startswith("__imp_"):
# handle mangled names starting
name = name[len('__imp_'):]
name = name[len("__imp_") :]
_imports[ea] = (dllname.lower(), name, ordi)
return True
@@ -91,14 +91,14 @@ def get_file_imports():
def get_instructions_in_range(start, end):
''' yield instructions in range
""" yield instructions in range
args:
start: virtual address (inclusive)
end: virtual address (exclusive)
yield:
(insn_t*)
'''
"""
for head in idautils.Heads(start, end):
inst = idautils.DecodeInstruction(head)
if inst:
@@ -106,7 +106,7 @@ def get_instructions_in_range(start, end):
def is_operand_equal(op1, op2):
''' compare two IDA op_t '''
""" compare two IDA op_t """
if op1.flags != op2.flags:
return False
@@ -132,14 +132,12 @@ def is_operand_equal(op1, op2):
def is_basic_block_equal(bb1, bb2):
''' compare two IDA BasicBlock '''
return bb1.start_ea == bb2.start_ea \
and bb1.end_ea == bb2.end_ea \
and bb1.type == bb2.type
""" compare two IDA BasicBlock """
return bb1.start_ea == bb2.start_ea and bb1.end_ea == bb2.end_ea and bb1.type == bb2.type
def basic_block_size(bb):
''' calculate size of basic block '''
""" calculate size of basic block """
return bb.end_ea - bb.start_ea
@@ -152,11 +150,11 @@ def read_bytes_at(ea, count):
def find_string_at(ea, min=4):
''' check if ASCII string exists at a given virtual address '''
""" check if ASCII string exists at a given virtual address """
found = idaapi.get_strlit_contents(ea, -1, idaapi.STRTYPE_C)
if found and len(found) > min:
try:
found = found.decode('ascii')
found = found.decode("ascii")
# hacky check for IDA bug; get_strlit_contents also reads Unicode as
# myy__uunniiccoodde when searching in ASCII mode so we check for that here
# and return the fixed up value
@@ -169,11 +167,11 @@ def find_string_at(ea, min=4):
def get_op_phrase_info(op):
''' parse phrase features from operand
""" parse phrase features from operand
Pretty much dup of sark's implementation:
https://github.com/tmr232/Sark/blob/master/sark/code/instruction.py#L28-L73
'''
"""
if op.type not in (idaapi.o_phrase, idaapi.o_displ):
return
@@ -202,21 +200,21 @@ def get_op_phrase_info(op):
# This is only relevant to Intel architectures.
index = None
return {'base': base, 'index': index, 'scale': scale, 'offset': offset}
return {"base": base, "index": index, "scale": scale, "offset": offset}
def is_op_write(insn, op):
''' Check if an operand is written to (destination operand) '''
""" Check if an operand is written to (destination operand) """
return idaapi.has_cf_chg(insn.get_canon_feature(), op.n)
def is_op_read(insn, op):
''' Check if an operand is read from (source operand) '''
""" Check if an operand is read from (source operand) """
return idaapi.has_cf_use(insn.get_canon_feature(), op.n)
def is_sp_modified(insn):
''' determine if instruction modifies SP, ESP, RSP '''
""" determine if instruction modifies SP, ESP, RSP """
for op in get_insn_ops(insn, op_type=(idaapi.o_reg,)):
if op.reg != idautils.procregs.sp.reg:
continue
@@ -228,7 +226,7 @@ def is_sp_modified(insn):
def is_bp_modified(insn):
''' check if instruction modifies BP, EBP, RBP '''
""" check if instruction modifies BP, EBP, RBP """
for op in get_insn_ops(insn, op_type=(idaapi.o_reg,)):
if op.reg != idautils.procregs.bp.reg:
continue
@@ -240,12 +238,12 @@ def is_bp_modified(insn):
def is_frame_register(reg):
''' check if register is sp or bp '''
""" check if register is sp or bp """
return reg in (idautils.procregs.sp.reg, idautils.procregs.bp.reg)
def get_insn_ops(insn, op_type=None):
''' yield op_t for instruction, filter on type if specified '''
""" yield op_t for instruction, filter on type if specified """
for op in insn.ops:
if op.type == idaapi.o_void:
# avoid looping all 6 ops if only subset exists
@@ -258,17 +256,17 @@ def get_insn_ops(insn, op_type=None):
def ea_flags(ea):
''' retrieve processor flags for a given address '''
""" retrieve processor flags for a given address """
return idaapi.get_flags(ea)
def is_op_stack_var(ea, n):
''' check if operand is a stack variable '''
""" check if operand is a stack variable """
return idaapi.is_stkvar(ea_flags(ea), n)
def mask_op_val(op):
''' mask off a value based on data type
""" mask off a value based on data type
necesssary due to a bug in 64-bit
@@ -277,22 +275,22 @@ def mask_op_val(op):
insn.Op2.dtype == idaapi.dt_dword
insn.Op2.value == 0xffffffffffffffff
'''
"""
masks = {
idaapi.dt_byte: 0xFF,
idaapi.dt_word: 0xFFFF,
idaapi.dt_dword: 0xFFFFFFFF,
idaapi.dt_qword: 0xFFFFFFFFFFFFFFFF
idaapi.dt_qword: 0xFFFFFFFFFFFFFFFF,
}
mask = masks.get(op.dtype, None)
if not mask:
raise ValueError('No support for operand data type 0x%x' % op.dtype)
raise ValueError("No support for operand data type 0x%x" % op.dtype)
return mask & op.value
def ea_to_offset(ea):
''' convert virtual address to file offset '''
""" convert virtual address to file offset """
return idaapi.get_fileregion_offset(ea)

View File

@@ -26,7 +26,7 @@ def get_imports():
def _check_for_api_call(insn):
''' check instruction for API call '''
""" check instruction for API call """
if not idaapi.is_call_insn(insn):
return
@@ -34,7 +34,7 @@ def _check_for_api_call(insn):
imp = get_imports().get(call_ref, None)
if imp:
yield '%s.%s' % (imp[0], imp[1])
yield "%s.%s" % (imp[0], imp[1])
else:
f = idaapi.get_func(call_ref)
@@ -46,11 +46,11 @@ def _check_for_api_call(insn):
imp = get_imports().get(thunk_ref, None)
if imp:
yield '%s.%s' % (imp[0], imp[1])
yield "%s.%s" % (imp[0], imp[1])
def extract_insn_api_features(f, bb, insn):
''' parse instruction API features
""" parse instruction API features
args:
f (IDA func_t)
@@ -59,14 +59,14 @@ def extract_insn_api_features(f, bb, insn):
example:
call dword [0x00473038]
'''
"""
for api_name in _check_for_api_call(insn):
for feature, va in capa.features.extractors.helpers.generate_api_features(api_name, insn.ea):
yield feature, va
def extract_insn_number_features(f, bb, insn):
''' parse instruction number features
""" parse instruction number features
args:
f (IDA func_t)
@@ -75,7 +75,7 @@ def extract_insn_number_features(f, bb, insn):
example:
push 3136B0h ; dwControlCode
'''
"""
if idaapi.is_ret_insn(insn):
# skip things like:
# .text:0042250E retn 8
@@ -97,7 +97,7 @@ def extract_insn_number_features(f, bb, insn):
def extract_insn_bytes_features(f, bb, insn):
''' parse referenced byte sequences
""" parse referenced byte sequences
args:
f (IDA func_t)
@@ -106,7 +106,7 @@ def extract_insn_bytes_features(f, bb, insn):
example:
push offset iid_004118d4_IShellLinkA ; riid
'''
"""
if idaapi.is_call_insn(insn):
# ignore call instructions
return
@@ -119,7 +119,7 @@ def extract_insn_bytes_features(f, bb, insn):
def extract_insn_string_features(f, bb, insn):
''' parse instruction string features
""" parse instruction string features
args:
f (IDA func_t)
@@ -128,7 +128,7 @@ def extract_insn_string_features(f, bb, insn):
example:
push offset aAcr ; "ACR > "
'''
"""
for ref in idautils.DataRefsFrom(insn.ea):
found = capa.features.extractors.ida.helpers.find_string_at(ref)
if found:
@@ -136,7 +136,7 @@ def extract_insn_string_features(f, bb, insn):
def extract_insn_offset_features(f, bb, insn):
''' parse instruction structure offset features
""" parse instruction structure offset features
args:
f (IDA func_t)
@@ -145,7 +145,7 @@ def extract_insn_offset_features(f, bb, insn):
example:
.text:0040112F cmp [esi+4], ebx
'''
"""
for op in capa.features.extractors.ida.helpers.get_insn_ops(insn, op_type=(idaapi.o_phrase, idaapi.o_displ)):
if capa.features.extractors.ida.helpers.is_op_stack_var(insn.ea, op.n):
# skip stack offsets
@@ -156,7 +156,7 @@ def extract_insn_offset_features(f, bb, insn):
if not p_info:
continue
op_off = p_info['offset']
op_off = p_info["offset"]
if 0 == op_off:
# TODO: Do we want to record offset of zero?
@@ -172,26 +172,26 @@ def extract_insn_offset_features(f, bb, insn):
def _contains_stack_cookie_keywords(s):
''' check if string contains stack cookie keywords
""" check if string contains stack cookie keywords
Examples:
xor ecx, ebp ; StackCookie
mov eax, ___security_cookie
'''
"""
if not s:
return False
s = s.strip().lower()
if 'cookie' not in s:
if "cookie" not in s:
return False
return any(keyword in s for keyword in ('stack', 'security'))
return any(keyword in s for keyword in ("stack", "security"))
def _bb_stack_cookie_registers(bb):
''' scan basic block for stack cookie operations
""" scan basic block for stack cookie operations
yield registers ids that may have been used for stack cookie operations
@@ -215,7 +215,7 @@ def _bb_stack_cookie_registers(bb):
.text:004062FA jnz loc_40639D
TODO: this is expensive, but necessary?...
'''
"""
for insn in capa.features.extractors.ida.helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
if _contains_stack_cookie_keywords(idc.GetDisasm(insn.ea)):
for op in capa.features.extractors.ida.helpers.get_insn_ops(insn, op_type=(idaapi.o_reg,)):
@@ -225,7 +225,7 @@ def _bb_stack_cookie_registers(bb):
def _is_nzxor_stack_cookie(f, bb, insn):
''' check if nzxor is related to stack cookie '''
""" check if nzxor is related to stack cookie """
if _contains_stack_cookie_keywords(idaapi.get_cmt(insn.ea, False)):
# Example:
# xor ecx, ebp ; StackCookie
@@ -241,7 +241,7 @@ def _is_nzxor_stack_cookie(f, bb, insn):
def extract_insn_nzxor_characteristic_features(f, bb, insn):
''' parse instruction non-zeroing XOR instruction
""" parse instruction non-zeroing XOR instruction
ignore expected non-zeroing XORs, e.g. security cookies
@@ -249,7 +249,7 @@ def extract_insn_nzxor_characteristic_features(f, bb, insn):
f (IDA func_t)
bb (IDA BasicBlock)
insn (IDA insn_t)
'''
"""
if insn.itype != idaapi.NN_xor:
return
@@ -259,28 +259,28 @@ def extract_insn_nzxor_characteristic_features(f, bb, insn):
if _is_nzxor_stack_cookie(f, bb, insn):
return
yield Characteristic('nzxor', True), insn.ea
yield Characteristic("nzxor", True), insn.ea
def extract_insn_mnemonic_features(f, bb, insn):
''' parse instruction mnemonic features
""" parse instruction mnemonic features
args:
f (IDA func_t)
bb (IDA BasicBlock)
insn (IDA insn_t)
'''
"""
yield Mnemonic(insn.get_canon_mnem()), insn.ea
def extract_insn_peb_access_characteristic_features(f, bb, insn):
''' parse instruction peb access
""" parse instruction peb access
fs:[0x30] on x86, gs:[0x60] on x64
TODO:
IDA should be able to do this..
'''
"""
if insn.itype not in (idaapi.NN_push, idaapi.NN_mov):
return
@@ -290,40 +290,40 @@ def extract_insn_peb_access_characteristic_features(f, bb, insn):
disasm = idc.GetDisasm(insn.ea)
if ' fs:30h' in disasm or ' gs:60h' in disasm:
if " fs:30h" in disasm or " gs:60h" in disasm:
# TODO: replace above with proper IDA
yield Characteristic('peb access', True), insn.ea
yield Characteristic("peb access", True), insn.ea
def extract_insn_segment_access_features(f, bb, insn):
''' parse instruction fs or gs access
""" parse instruction fs or gs access
TODO:
IDA should be able to do this...
'''
"""
if all(map(lambda op: op.type != idaapi.o_mem, insn.ops)):
# try to optimize for only memory referencese
return
disasm = idc.GetDisasm(insn.ea)
if ' fs:' in disasm:
if " fs:" in disasm:
# TODO: replace above with proper IDA
yield Characteristic('fs access', True), insn.ea
yield Characteristic("fs access", True), insn.ea
if ' gs:' in disasm:
if " gs:" in disasm:
# TODO: replace above with proper IDA
yield Characteristic('gs access', True), insn.ea
yield Characteristic("gs access", True), insn.ea
def extract_insn_cross_section_cflow(f, bb, insn):
''' inspect the instruction for a CALL or JMP that crosses section boundaries
""" inspect the instruction for a CALL or JMP that crosses section boundaries
args:
f (IDA func_t)
bb (IDA BasicBlock)
insn (IDA insn_t)
'''
"""
for ref in idautils.CodeRefsFrom(insn.ea, False):
if ref in get_imports().keys():
# ignore API calls
@@ -336,11 +336,11 @@ def extract_insn_cross_section_cflow(f, bb, insn):
if idaapi.getseg(ref) == idaapi.getseg(insn.ea):
continue
yield Characteristic('cross section flow', True), insn.ea
yield Characteristic("cross section flow", True), insn.ea
def extract_function_calls_from(f, bb, insn):
''' extract functions calls from features
""" extract functions calls from features
most relevant at the function scope, however, its most efficient to extract at the instruction scope
@@ -348,17 +348,17 @@ def extract_function_calls_from(f, bb, insn):
f (IDA func_t)
bb (IDA BasicBlock)
insn (IDA insn_t)
'''
"""
if not idaapi.is_call_insn(insn):
# ignore jmp, etc.
return
for ref in idautils.CodeRefsFrom(insn.ea, False):
yield Characteristic('calls from', True), ref
yield Characteristic("calls from", True), ref
def extract_function_indirect_call_characteristic_features(f, bb, insn):
''' extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
""" extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
does not include calls like => call ds:dword_ABD4974
most relevant at the function or basic block scope;
@@ -368,22 +368,22 @@ def extract_function_indirect_call_characteristic_features(f, bb, insn):
f (IDA func_t)
bb (IDA BasicBlock)
insn (IDA insn_t)
'''
"""
if not idaapi.is_call_insn(insn):
return
if idc.get_operand_type(insn.ea, 0) in (idc.o_reg, idc.o_phrase, idc.o_displ):
yield Characteristic('indirect call', True), insn.ea
yield Characteristic("indirect call", True), insn.ea
def extract_features(f, bb, insn):
''' extract instruction features
""" extract instruction features
args:
f (IDA func_t)
bb (IDA BasicBlock)
insn (IDA insn_t)
'''
"""
for inst_handler in INSTRUCTION_HANDLERS:
for feature, va in inst_handler(f, bb, insn):
yield feature, va
@@ -401,7 +401,7 @@ INSTRUCTION_HANDLERS = (
extract_insn_cross_section_cflow,
extract_insn_segment_access_features,
extract_function_calls_from,
extract_function_indirect_call_characteristic_features
extract_function_indirect_call_characteristic_features,
)
@@ -416,5 +416,5 @@ def main():
pprint.pprint(features)
if __name__ == '__main__':
if __name__ == "__main__":
main()

View File

@@ -3,7 +3,7 @@ from networkx import nx
def has_loop(edges, threshold=2):
''' check if a list of edges representing a directed graph contains a loop
""" check if a list of edges representing a directed graph contains a loop
args:
edges: list of edge sets representing a directed graph i.e. [(1, 2), (2, 1)]
@@ -11,7 +11,7 @@ def has_loop(edges, threshold=2):
returns:
bool
'''
"""
g = nx.DiGraph()
g.add_edges_from(edges)
return any(len(comp) >= threshold for comp in strongly_connected_components(g))

View File

@@ -7,26 +7,28 @@ import re
from collections import namedtuple
ASCII_BYTE = r' !\"#\$%&\'\(\)\*\+,-\./0123456789:;<=>\?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\}\\\~\t'.encode('ascii')
ASCII_RE_4 = re.compile(b'([%s]{%d,})' % (ASCII_BYTE, 4))
UNICODE_RE_4 = re.compile(b'((?:[%s]\x00){%d,})' % (ASCII_BYTE, 4))
REPEATS = [b'A', b'\x00', b'\xfe', b'\xff']
ASCII_BYTE = r" !\"#\$%&\'\(\)\*\+,-\./0123456789:;<=>\?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\}\\\~\t".encode(
"ascii"
)
ASCII_RE_4 = re.compile(b"([%s]{%d,})" % (ASCII_BYTE, 4))
UNICODE_RE_4 = re.compile(b"((?:[%s]\x00){%d,})" % (ASCII_BYTE, 4))
REPEATS = [b"A", b"\x00", b"\xfe", b"\xff"]
SLICE_SIZE = 4096
String = namedtuple('String', ['s', 'offset'])
String = namedtuple("String", ["s", "offset"])
def buf_filled_with(buf, character):
dupe_chunk = character * SLICE_SIZE
for offset in range(0, len(buf), SLICE_SIZE):
new_chunk = buf[offset: offset + SLICE_SIZE]
if dupe_chunk[:len(new_chunk)] != new_chunk:
new_chunk = buf[offset : offset + SLICE_SIZE]
if dupe_chunk[: len(new_chunk)] != new_chunk:
return False
return True
def extract_ascii_strings(buf, n=4):
'''
"""
Extract ASCII strings from the given binary data.
:param buf: A bytestring.
@@ -34,7 +36,7 @@ def extract_ascii_strings(buf, n=4):
:param n: The minimum length of strings to extract.
:type n: int
:rtype: Sequence[String]
'''
"""
if not buf:
return
@@ -46,14 +48,14 @@ def extract_ascii_strings(buf, n=4):
if n == 4:
r = ASCII_RE_4
else:
reg = b'([%s]{%d,})' % (ASCII_BYTE, n)
reg = b"([%s]{%d,})" % (ASCII_BYTE, n)
r = re.compile(reg)
for match in r.finditer(buf):
yield String(match.group().decode('ascii'), match.start())
yield String(match.group().decode("ascii"), match.start())
def extract_unicode_strings(buf, n=4):
'''
"""
Extract naive UTF-16 strings from the given binary data.
:param buf: A bytestring.
@@ -61,7 +63,7 @@ def extract_unicode_strings(buf, n=4):
:param n: The minimum length of strings to extract.
:type n: int
:rtype: Sequence[String]
'''
"""
if not buf:
return
@@ -72,11 +74,11 @@ def extract_unicode_strings(buf, n=4):
if n == 4:
r = UNICODE_RE_4
else:
reg = b'((?:[%s]\x00){%d,})' % (ASCII_BYTE, n)
reg = b"((?:[%s]\x00){%d,})" % (ASCII_BYTE, n)
r = re.compile(reg)
for match in r.finditer(buf):
try:
yield String(match.group().decode('utf-16'), match.start())
yield String(match.group().decode("utf-16"), match.start())
except UnicodeDecodeError:
pass
@@ -84,15 +86,15 @@ def extract_unicode_strings(buf, n=4):
def main():
import sys
with open(sys.argv[1], 'rb') as f:
with open(sys.argv[1], "rb") as f:
b = f.read()
for s in extract_ascii_strings(b):
print('0x{:x}: {:s}'.format(s.offset, s.s))
print("0x{:x}: {:s}".format(s.offset, s.s))
for s in extract_unicode_strings(b):
print('0x{:x}: {:s}'.format(s.offset, s.s))
print("0x{:x}: {:s}".format(s.offset, s.s))
if __name__ == '__main__':
if __name__ == "__main__":
main()

View File

@@ -13,7 +13,8 @@ import file
import function
import basicblock
import insn
__all__ = ['file', 'function', 'basicblock', 'insn']
__all__ = ["file", "function", "basicblock", "insn"]
def get_va(self):
@@ -27,14 +28,14 @@ def get_va(self):
def add_va_int_cast(o):
'''
"""
dynamically add a cast-to-int (`__int__`) method to the given object
that returns the value of the `.va` property.
this bit of skullduggery lets use cast viv-utils objects as ints.
the correct way of doing this is to update viv-utils (or subclass the objects here).
'''
setattr(o, '__int__', types.MethodType(get_va, o, type(o)))
"""
setattr(o, "__int__", types.MethodType(get_va, o, type(o)))
return o

View File

@@ -10,7 +10,7 @@ from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
def interface_extract_basic_block_XXX(f, bb):
'''
"""
parse features from the given basic block.
args:
@@ -19,14 +19,14 @@ def interface_extract_basic_block_XXX(f, bb):
yields:
(Feature, int): the feature and the address at which its found.
'''
yield NotImplementedError('feature'), NotImplementedError('virtual address')
"""
yield NotImplementedError("feature"), NotImplementedError("virtual address")
def _bb_has_tight_loop(f, bb):
'''
"""
parse tight loops, true if last instruction in basic block branches to bb start
'''
"""
if len(bb.instructions) > 0:
for bva, bflags in bb.instructions[-1].getBranches():
if bflags & vivisect.envi.BR_COND:
@@ -37,16 +37,16 @@ def _bb_has_tight_loop(f, bb):
def extract_bb_tight_loop(f, bb):
''' check basic block for tight loop indicators '''
""" check basic block for tight loop indicators """
if _bb_has_tight_loop(f, bb):
yield Characteristic('tight loop', True), bb.va
yield Characteristic("tight loop", True), bb.va
def _bb_has_stackstring(f, bb):
'''
"""
extract potential stackstring creation, using the following heuristics:
- basic block contains enough moves of constant bytes to the stack
'''
"""
count = 0
for instr in bb.instructions:
if is_mov_imm_to_stack(instr):
@@ -60,16 +60,16 @@ def _bb_has_stackstring(f, bb):
def extract_stackstring(f, bb):
''' check basic block for stackstring indicators '''
""" check basic block for stackstring indicators """
if _bb_has_stackstring(f, bb):
yield Characteristic('stack string', True), bb.va
yield Characteristic("stack string", True), bb.va
def is_mov_imm_to_stack(instr):
'''
"""
Return if instruction moves immediate onto stack
'''
if not instr.mnem.startswith('mov'):
"""
if not instr.mnem.startswith("mov"):
return False
try:
@@ -82,32 +82,33 @@ def is_mov_imm_to_stack(instr):
return False
# TODO what about 64-bit operands?
if not isinstance(dst, envi.archs.i386.disasm.i386SibOper) and \
not isinstance(dst, envi.archs.i386.disasm.i386RegMemOper):
if not isinstance(dst, envi.archs.i386.disasm.i386SibOper) and not isinstance(
dst, envi.archs.i386.disasm.i386RegMemOper
):
return False
if not dst.reg:
return False
rname = dst._dis_regctx.getRegisterName(dst.reg)
if rname not in ['ebp', 'rbp', 'esp', 'rsp']:
if rname not in ["ebp", "rbp", "esp", "rsp"]:
return False
return True
def get_printable_len(oper):
'''
"""
Return string length if all operand bytes are ascii or utf16-le printable
'''
"""
if oper.tsize == 1:
chars = struct.pack('<B', oper.imm)
chars = struct.pack("<B", oper.imm)
elif oper.tsize == 2:
chars = struct.pack('<H', oper.imm)
chars = struct.pack("<H", oper.imm)
elif oper.tsize == 4:
chars = struct.pack('<I', oper.imm)
chars = struct.pack("<I", oper.imm)
elif oper.tsize == 8:
chars = struct.pack('<Q', oper.imm)
chars = struct.pack("<Q", oper.imm)
if is_printable_ascii(chars):
return oper.tsize
if is_printable_utf16le(chars):
@@ -120,12 +121,12 @@ def is_printable_ascii(chars):
def is_printable_utf16le(chars):
if all(c == '\x00' for c in chars[1::2]):
if all(c == "\x00" for c in chars[1::2]):
return is_printable_ascii(chars[::2])
def extract_features(f, bb):
'''
"""
extract features from the given basic block.
args:
@@ -134,7 +135,7 @@ def extract_features(f, bb):
yields:
Feature, set[VA]: the features and their location found in this basic block.
'''
"""
yield BasicBlock(), bb.va
for bb_handler in BASIC_BLOCK_HANDLERS:
for feature, va in bb_handler(f, bb):

View File

@@ -9,11 +9,11 @@ import capa.features.extractors.strings
def extract_file_embedded_pe(vw, file_path):
with open(file_path, 'rb') as f:
with open(file_path, "rb") as f:
fbytes = f.read()
for offset, i in pe_carve.carve(fbytes, 1):
yield Characteristic('embedded pe', True), offset
yield Characteristic("embedded pe", True), offset
def extract_file_export_names(vw, file_path):
@@ -22,21 +22,21 @@ def extract_file_export_names(vw, file_path):
def extract_file_import_names(vw, file_path):
'''
"""
extract imported function names
1. imports by ordinal:
- modulename.#ordinal
2. imports by name, results in two features to support importname-only matching:
- modulename.importname
- importname
'''
"""
for va, _, _, tinfo in vw.getImports():
# vivisect source: tinfo = "%s.%s" % (libname, impname)
modname, impname = tinfo.split('.')
modname, impname = tinfo.split(".")
if is_viv_ord_impname(impname):
# replace ord prefix with #
impname = '#%s' % impname[len('ord'):]
tinfo = '%s.%s' % (modname, impname)
impname = "#%s" % impname[len("ord") :]
tinfo = "%s.%s" % (modname, impname)
yield Import(tinfo), va
else:
yield Import(tinfo), va
@@ -44,13 +44,13 @@ def extract_file_import_names(vw, file_path):
def is_viv_ord_impname(impname):
'''
"""
return if import name matches vivisect's ordinal naming scheme `'ord%d' % ord`
'''
if not impname.startswith('ord'):
"""
if not impname.startswith("ord"):
return False
try:
int(impname[len('ord'):])
int(impname[len("ord") :])
except ValueError:
return False
else:
@@ -63,10 +63,10 @@ def extract_file_section_names(vw, file_path):
def extract_file_strings(vw, file_path):
'''
"""
extract ASCII and UTF-16 LE strings from file
'''
with open(file_path, 'rb') as f:
"""
with open(file_path, "rb") as f:
b = f.read()
for s in capa.features.extractors.strings.extract_ascii_strings(b):
@@ -77,7 +77,7 @@ def extract_file_strings(vw, file_path):
def extract_features(vw, file_path):
'''
"""
extract file features from given workspace
args:
@@ -86,7 +86,7 @@ def extract_features(vw, file_path):
yields:
Tuple[Feature, VA]: a feature and its location.
'''
"""
for file_handler in FILE_HANDLERS:
for feature, va in file_handler(vw, file_path):

View File

@@ -5,7 +5,7 @@ from capa.features.extractors import loops
def interface_extract_function_XXX(f):
'''
"""
parse features from the given function.
args:
@@ -13,58 +13,58 @@ def interface_extract_function_XXX(f):
yields:
(Feature, int): the feature and the address at which its found.
'''
yield NotImplementedError('feature'), NotImplementedError('virtual address')
"""
yield NotImplementedError("feature"), NotImplementedError("virtual address")
def get_switches(vw):
'''
"""
caching accessor to vivisect workspace switch constructs.
'''
if 'switches' in vw.metadata:
return vw.metadata['switches']
"""
if "switches" in vw.metadata:
return vw.metadata["switches"]
else:
# addresses of switches in the program
switches = set()
for case_va, _ in filter(lambda t: 'case' in t[1], vw.getNames()):
for case_va, _ in filter(lambda t: "case" in t[1], vw.getNames()):
# assume that the xref to a case location is a switch construct
for switch_va, _, _, _ in vw.getXrefsTo(case_va):
switches.add(switch_va)
vw.metadata['switches'] = switches
vw.metadata["switches"] = switches
return switches
def get_functions_with_switch(vw):
if 'functions_with_switch' in vw.metadata:
return vw.metadata['functions_with_switch']
if "functions_with_switch" in vw.metadata:
return vw.metadata["functions_with_switch"]
else:
functions = set()
for switch in get_switches(vw):
functions.add(vw.getFunction(switch))
vw.metadata['functions_with_switch'] = functions
vw.metadata["functions_with_switch"] = functions
return functions
def extract_function_switch(f):
'''
"""
parse if a function contains a switch statement based on location names
method can be optimized
'''
"""
if f.va in get_functions_with_switch(f.vw):
yield Characteristic('switch', True), f.va
yield Characteristic("switch", True), f.va
def extract_function_calls_to(f):
for src, _, _, _ in f.vw.getXrefsTo(f.va, rtype=vivisect.const.REF_CODE):
yield Characteristic('calls to', True), src
yield Characteristic("calls to", True), src
def extract_function_loop(f):
'''
"""
parse if a function has a loop
'''
"""
edges = []
for bb in f.basic_blocks:
@@ -74,11 +74,11 @@ def extract_function_loop(f):
edges.append((bb.va, bva))
if edges and loops.has_loop(edges):
yield Characteristic('loop', True), f.va
yield Characteristic("loop", True), f.va
def extract_features(f):
'''
"""
extract features from the given function.
args:
@@ -86,14 +86,10 @@ def extract_features(f):
yields:
Feature, set[VA]: the features and their location found in this function.
'''
"""
for func_handler in FUNCTION_HANDLERS:
for feature, va in func_handler(f):
yield feature, va
FUNCTION_HANDLERS = (
extract_function_switch,
extract_function_calls_to,
extract_function_loop
)
FUNCTION_HANDLERS = (extract_function_switch, extract_function_calls_to, extract_function_loop)

View File

@@ -14,13 +14,13 @@ Amd64RipRelOper = envi.archs.amd64.disasm.Amd64RipRelOper
LOC_OP = vivisect.const.LOC_OP
IF_NOFALL = envi.IF_NOFALL
REF_CODE = vivisect.const.REF_CODE
FAR_BRANCH_MASK = (envi.BR_PROC | envi.BR_DEREF | envi.BR_ARCH)
FAR_BRANCH_MASK = envi.BR_PROC | envi.BR_DEREF | envi.BR_ARCH
DESTRUCTIVE_MNEMONICS = ('mov', 'lea', 'pop', 'xor')
DESTRUCTIVE_MNEMONICS = ("mov", "lea", "pop", "xor")
def get_previous_instructions(vw, va):
'''
"""
collect the instructions that flow to the given address, local to the current function.
args:
@@ -29,7 +29,7 @@ def get_previous_instructions(vw, va):
returns:
List[int]: the prior instructions, which may fallthrough and/or jump here
'''
"""
ret = []
# find the immediate prior instruction.
@@ -61,7 +61,7 @@ class NotFoundError(Exception):
def find_definition(vw, va, reg):
'''
"""
scan backwards from the given address looking for assignments to the given register.
if a constant, return that value.
@@ -75,7 +75,7 @@ def find_definition(vw, va, reg):
raises:
NotFoundError: when the definition cannot be found.
'''
"""
q = collections.deque()
seen = set([])
@@ -95,10 +95,7 @@ def find_definition(vw, va, reg):
continue
opnd0 = insn.opers[0]
if not \
(isinstance(opnd0, i386RegOper)
and opnd0.reg == reg
and insn.mnem in DESTRUCTIVE_MNEMONICS):
if not (isinstance(opnd0, i386RegOper) and opnd0.reg == reg and insn.mnem in DESTRUCTIVE_MNEMONICS):
q.extend(get_previous_instructions(vw, cur))
continue
@@ -107,7 +104,7 @@ def find_definition(vw, va, reg):
# we currently only support extracting the constant from something like: `mov $reg, IAT`
# so, any other pattern results in an unknown value, represented by None.
# this is a good place to extend in the future, if we need more robust support.
if insn.mnem != 'mov':
if insn.mnem != "mov":
return (cur, None)
else:
opnd1 = insn.opers[1]
@@ -128,12 +125,11 @@ def is_indirect_call(vw, va, insn=None):
if insn is None:
insn = vw.parseOpcode(va)
return (insn.mnem == 'call'
and isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper))
return insn.mnem == "call" and isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper)
def resolve_indirect_call(vw, va, insn=None):
'''
"""
inspect the given indirect call instruction and attempt to resolve the target address.
args:
@@ -145,7 +141,7 @@ def resolve_indirect_call(vw, va, insn=None):
raises:
NotFoundError: when the definition cannot be found.
'''
"""
if insn is None:
insn = vw.parseOpcode(va)

View File

@@ -15,7 +15,7 @@ from capa.features.extractors.viv.indirect_calls import resolve_indirect_call
def interface_extract_instruction_XXX(f, bb, insn):
'''
"""
parse features from the given instruction.
args:
@@ -25,31 +25,31 @@ def interface_extract_instruction_XXX(f, bb, insn):
yields:
(Feature, int): the feature and the address at which its found.
'''
yield NotImplementedError('feature'), NotImplementedError('virtual address')
"""
yield NotImplementedError("feature"), NotImplementedError("virtual address")
def get_imports(vw):
'''
"""
caching accessor to vivisect workspace imports
avoids performance issues in vivisect when collecting locations
'''
if 'imports' in vw.metadata:
return vw.metadata['imports']
"""
if "imports" in vw.metadata:
return vw.metadata["imports"]
else:
imports = {p[0]: p[3] for p in vw.getImports()}
vw.metadata['imports'] = imports
vw.metadata["imports"] = imports
return imports
def extract_insn_api_features(f, bb, insn):
'''parse API features from the given instruction.'''
"""parse API features from the given instruction."""
# example:
#
# call dword [0x00473038]
if insn.mnem != 'call':
if insn.mnem != "call":
return
# traditional call via IAT
@@ -71,7 +71,7 @@ def extract_insn_api_features(f, bb, insn):
target = insn.opers[0].getOperValue(insn)
try:
thunk = f.vw.getFunctionMeta(target, 'Thunk')
thunk = f.vw.getFunctionMeta(target, "Thunk")
except vivisect.exc.InvalidFunction:
return
else:
@@ -108,7 +108,7 @@ def extract_insn_api_features(f, bb, insn):
def extract_insn_number_features(f, bb, insn):
'''parse number features from the given instruction.'''
"""parse number features from the given instruction."""
# example:
#
# push 3136B0h ; dwControlCode
@@ -124,9 +124,7 @@ def extract_insn_number_features(f, bb, insn):
# assume its not also a constant.
continue
if insn.mnem == 'add' \
and insn.opers[0].isReg() \
and insn.opers[0].reg == envi.archs.i386.disasm.REG_ESP:
if insn.mnem == "add" and insn.opers[0].isReg() and insn.opers[0].reg == envi.archs.i386.disasm.REG_ESP:
# skip things like:
#
# .text:00401140 call sub_407E2B
@@ -137,13 +135,13 @@ def extract_insn_number_features(f, bb, insn):
def extract_insn_bytes_features(f, bb, insn):
'''
"""
parse byte sequence features from the given instruction.
example:
# push offset iid_004118d4_IShellLinkA ; riid
'''
"""
for oper in insn.opers:
if insn.mnem == 'call':
if insn.mnem == "call":
# ignore call instructions
continue
@@ -184,7 +182,7 @@ def read_string(vw, offset):
pass
else:
if alen > 0:
return vw.readMemory(offset, alen).decode('utf-8')
return vw.readMemory(offset, alen).decode("utf-8")
try:
ulen = vw.detectUnicode(offset)
@@ -199,13 +197,13 @@ def read_string(vw, offset):
# vivisect seems to mis-detect the end unicode strings
# off by one, too short
ulen += 1
return vw.readMemory(offset, ulen).decode('utf-16')
return vw.readMemory(offset, ulen).decode("utf-16")
raise ValueError('not a string', offset)
raise ValueError("not a string", offset)
def extract_insn_string_features(f, bb, insn):
'''parse string features from the given instruction.'''
"""parse string features from the given instruction."""
# example:
#
# push offset aAcr ; "ACR > "
@@ -222,11 +220,11 @@ def extract_insn_string_features(f, bb, insn):
except ValueError:
continue
else:
yield String(s.rstrip('\x00')), insn.va
yield String(s.rstrip("\x00")), insn.va
def extract_insn_offset_features(f, bb, insn):
'''parse structure offset features from the given instruction.'''
"""parse structure offset features from the given instruction."""
# example:
#
# .text:0040112F cmp [esi+4], ebx
@@ -249,15 +247,18 @@ def extract_insn_offset_features(f, bb, insn):
def is_security_cookie(f, bb, insn):
'''
"""
check if an instruction is related to security cookie checks
'''
"""
# security cookie check should use SP or BP
oper = insn.opers[1]
if oper.isReg() \
and oper.reg not in [envi.archs.i386.disasm.REG_ESP, envi.archs.i386.disasm.REG_EBP,
# TODO: do x64 support for real.
envi.archs.amd64.disasm.REG_RBP, envi.archs.amd64.disasm.REG_RSP]:
if oper.isReg() and oper.reg not in [
envi.archs.i386.disasm.REG_ESP,
envi.archs.i386.disasm.REG_EBP,
# TODO: do x64 support for real.
envi.archs.amd64.disasm.REG_RBP,
envi.archs.amd64.disasm.REG_RSP,
]:
return False
# expect security cookie init in first basic block within first bytes (instructions)
@@ -273,11 +274,11 @@ def is_security_cookie(f, bb, insn):
def extract_insn_nzxor_characteristic_features(f, bb, insn):
'''
"""
parse non-zeroing XOR instruction from the given instruction.
ignore expected non-zeroing XORs, e.g. security cookies.
'''
if insn.mnem != 'xor':
"""
if insn.mnem != "xor":
return
if insn.opers[0] == insn.opers[1]:
@@ -286,24 +287,24 @@ def extract_insn_nzxor_characteristic_features(f, bb, insn):
if is_security_cookie(f, bb, insn):
return
yield Characteristic('nzxor', True), insn.va
yield Characteristic("nzxor", True), insn.va
def extract_insn_mnemonic_features(f, bb, insn):
'''parse mnemonic features from the given instruction.'''
"""parse mnemonic features from the given instruction."""
yield Mnemonic(insn.mnem), insn.va
def extract_insn_peb_access_characteristic_features(f, bb, insn):
'''
"""
parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64
'''
"""
# TODO handle where fs/gs are loaded into a register or onto the stack and used later
if insn.mnem not in ['push', 'mov']:
if insn.mnem not in ["push", "mov"]:
return
if 'fs' in insn.getPrefixName():
if "fs" in insn.getPrefixName():
for oper in insn.opers:
# examples
#
@@ -312,27 +313,29 @@ def extract_insn_peb_access_characteristic_features(f, bb, insn):
# IDA: push large dword ptr fs:30h
# viv: fs: push dword [0x00000030]
# fs: push dword [eax + 0x30] ; i386RegMemOper, with eax = 0
if (isinstance(oper, envi.archs.i386.disasm.i386RegMemOper) and oper.disp == 0x30) or \
(isinstance(oper, envi.archs.i386.disasm.i386ImmMemOper) and oper.imm == 0x30):
yield Characteristic('peb access', True), insn.va
elif 'gs' in insn.getPrefixName():
if (isinstance(oper, envi.archs.i386.disasm.i386RegMemOper) and oper.disp == 0x30) or (
isinstance(oper, envi.archs.i386.disasm.i386ImmMemOper) and oper.imm == 0x30
):
yield Characteristic("peb access", True), insn.va
elif "gs" in insn.getPrefixName():
for oper in insn.opers:
if (isinstance(oper, envi.archs.amd64.disasm.i386RegMemOper) and oper.disp == 0x60) or \
(isinstance(oper, envi.archs.amd64.disasm.i386ImmMemOper) and oper.imm == 0x60):
yield Characteristic('peb access', True), insn.va
if (isinstance(oper, envi.archs.amd64.disasm.i386RegMemOper) and oper.disp == 0x60) or (
isinstance(oper, envi.archs.amd64.disasm.i386ImmMemOper) and oper.imm == 0x60
):
yield Characteristic("peb access", True), insn.va
else:
pass
def extract_insn_segment_access_features(f, bb, insn):
''' parse the instruction for access to fs or gs '''
""" parse the instruction for access to fs or gs """
prefix = insn.getPrefixName()
if prefix == 'fs':
yield Characteristic('fs access', True), insn.va
if prefix == "fs":
yield Characteristic("fs access", True), insn.va
if prefix == 'gs':
yield Characteristic('gs access', True), insn.va
if prefix == "gs":
yield Characteristic("gs access", True), insn.va
def get_section(vw, va):
@@ -344,16 +347,16 @@ def get_section(vw, va):
def extract_insn_cross_section_cflow(f, bb, insn):
'''
"""
inspect the instruction for a CALL or JMP that crosses section boundaries.
'''
"""
for va, flags in insn.getBranches():
if flags & envi.BR_FALL:
continue
try:
# skip 32-bit calls to imports
if insn.mnem == 'call' and isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper):
if insn.mnem == "call" and isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper):
oper = insn.opers[0]
target = oper.getOperAddr(insn)
@@ -361,7 +364,7 @@ def extract_insn_cross_section_cflow(f, bb, insn):
continue
# skip 64-bit calls to imports
elif insn.mnem == 'call' and isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper):
elif insn.mnem == "call" and isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper):
op = insn.opers[0]
target = op.getOperAddr(insn)
@@ -369,7 +372,7 @@ def extract_insn_cross_section_cflow(f, bb, insn):
continue
if get_section(f.vw, insn.va) != get_section(f.vw, va):
yield Characteristic('cross section flow', True), insn.va
yield Characteristic("cross section flow", True), insn.va
except KeyError:
continue
@@ -378,7 +381,7 @@ def extract_insn_cross_section_cflow(f, bb, insn):
# this is a feature that's most relevant at the function scope,
# however, its most efficient to extract at the instruction scope.
def extract_function_calls_from(f, bb, insn):
if insn.mnem != 'call':
if insn.mnem != "call":
return
target = None
@@ -387,7 +390,7 @@ def extract_function_calls_from(f, bb, insn):
if isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper):
oper = insn.opers[0]
target = oper.getOperAddr(insn)
yield Characteristic('calls from', True), target
yield Characteristic("calls from", True), target
# call via thunk on x86,
# see 9324d1a8ae37a36ae560c37448c9705a at 0x407985
@@ -396,44 +399,44 @@ def extract_function_calls_from(f, bb, insn):
# see Lab21-01.exe_:0x140001178
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper):
target = insn.opers[0].getOperValue(insn)
yield Characteristic('calls from', True), target
yield Characteristic("calls from", True), target
# call via IAT, x64
elif isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper):
op = insn.opers[0]
target = op.getOperAddr(insn)
yield Characteristic('calls from', True), target
yield Characteristic("calls from", True), target
if target and target == f.va:
# if we found a jump target and it's the function address
# mark as recursive
yield Characteristic('recursive call', True), target
yield Characteristic("recursive call", True), target
# this is a feature that's most relevant at the function or basic block scope,
# however, its most efficient to extract at the instruction scope.
def extract_function_indirect_call_characteristic_features(f, bb, insn):
'''
"""
extract indirect function call characteristic (e.g., call eax or call dword ptr [edx+4])
does not include calls like => call ds:dword_ABD4974
'''
if insn.mnem != 'call':
"""
if insn.mnem != "call":
return
# Checks below work for x86 and x64
if isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper):
# call edx
yield Characteristic('indirect call', True), insn.va
yield Characteristic("indirect call", True), insn.va
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegMemOper):
# call dword ptr [eax+50h]
yield Characteristic('indirect call', True), insn.va
yield Characteristic("indirect call", True), insn.va
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386SibOper):
# call qword ptr [rsp+78h]
yield Characteristic('indirect call', True), insn.va
yield Characteristic("indirect call", True), insn.va
def extract_features(f, bb, insn):
'''
"""
extract features from the given insn.
args:
@@ -443,7 +446,7 @@ def extract_features(f, bb, insn):
yields:
Feature, set[VA]: the features and their location found in this insn.
'''
"""
for insn_handler in INSTRUCTION_HANDLERS:
for feature, va in insn_handler(f, bb, insn):
yield feature, va
@@ -461,5 +464,5 @@ INSTRUCTION_HANDLERS = (
extract_insn_cross_section_cflow,
extract_insn_segment_access_features,
extract_function_calls_from,
extract_function_indirect_call_characteristic_features
extract_function_indirect_call_characteristic_features,
)

View File

@@ -8,7 +8,7 @@ class Export(Feature):
self.value = value
def __str__(self):
return 'Export(%s)' % (self.value)
return "Export(%s)" % (self.value)
class Import(Feature):
@@ -18,7 +18,7 @@ class Import(Feature):
self.value = value
def __str__(self):
return 'Import(%s)' % (self.value)
return "Import(%s)" % (self.value)
class Section(Feature):
@@ -28,4 +28,4 @@ class Section(Feature):
self.value = value
def __str__(self):
return 'Section(%s)' % (self.value)
return "Section(%s)" % (self.value)

View File

@@ -1,4 +1,4 @@
'''
"""
capa freeze file format: `| capa0000 | + zlib(utf-8(json(...)))`
json format:
@@ -39,7 +39,7 @@ json format:
],
}
}
'''
"""
import json
import zlib
import logging
@@ -61,10 +61,7 @@ def serialize_feature(feature):
return feature.freeze_serialize()
KNOWN_FEATURES = {
F.__name__: F
for F in capa.features.Feature.__subclasses__()
}
KNOWN_FEATURES = {F.__name__: F for F in capa.features.Feature.__subclasses__()}
def deserialize_feature(doc):
@@ -73,7 +70,7 @@ def deserialize_feature(doc):
def dumps(extractor):
'''
"""
serialize the given extractor to a string
args:
@@ -81,79 +78,64 @@ def dumps(extractor):
returns:
str: the serialized features.
'''
ret = {
'version': 1,
'functions': {},
'scopes': {
'file': [],
'function': [],
'basic block': [],
'instruction': [],
}
}
"""
ret = {"version": 1, "functions": {}, "scopes": {"file": [], "function": [], "basic block": [], "instruction": [],}}
for feature, va in extractor.extract_file_features():
ret['scopes']['file'].append(
serialize_feature(feature) + (hex(va), ())
)
ret["scopes"]["file"].append(serialize_feature(feature) + (hex(va), ()))
for f in extractor.get_functions():
ret['functions'][hex(f)] = {}
ret["functions"][hex(f)] = {}
for feature, va in extractor.extract_function_features(f):
ret['scopes']['function'].append(
serialize_feature(feature) + (hex(va), (hex(f), ))
)
ret["scopes"]["function"].append(serialize_feature(feature) + (hex(va), (hex(f),)))
for bb in extractor.get_basic_blocks(f):
ret['functions'][hex(f)][hex(bb)] = []
ret["functions"][hex(f)][hex(bb)] = []
for feature, va in extractor.extract_basic_block_features(f, bb):
ret['scopes']['basic block'].append(
serialize_feature(feature) + (hex(va), (hex(f), hex(bb), ))
)
ret["scopes"]["basic block"].append(serialize_feature(feature) + (hex(va), (hex(f), hex(bb),)))
for insn, insnva in sorted([(insn, int(insn)) for insn in extractor.get_instructions(f, bb)]):
ret['functions'][hex(f)][hex(bb)].append(hex(insnva))
ret["functions"][hex(f)][hex(bb)].append(hex(insnva))
for feature, va in extractor.extract_insn_features(f, bb, insn):
ret['scopes']['instruction'].append(
serialize_feature(feature) + (hex(va), (hex(f), hex(bb), hex(insnva), ))
ret["scopes"]["instruction"].append(
serialize_feature(feature) + (hex(va), (hex(f), hex(bb), hex(insnva),))
)
return json.dumps(ret)
def loads(s):
'''deserialize a set of features (as a NullFeatureExtractor) from a string.'''
"""deserialize a set of features (as a NullFeatureExtractor) from a string."""
doc = json.loads(s)
if doc.get('version') != 1:
raise ValueError('unsupported freeze format version: %d' % (doc.get('version')))
if doc.get("version") != 1:
raise ValueError("unsupported freeze format version: %d" % (doc.get("version")))
features = {
'file features': [],
'functions': {},
"file features": [],
"functions": {},
}
for fva, function in doc.get('functions', {}).items():
for fva, function in doc.get("functions", {}).items():
fva = int(fva, 0x10)
features['functions'][fva] = {
'features': [],
'basic blocks': {},
features["functions"][fva] = {
"features": [],
"basic blocks": {},
}
for bbva, bb in function.items():
bbva = int(bbva, 0x10)
features['functions'][fva]['basic blocks'][bbva] = {
'features': [],
'instructions': {},
features["functions"][fva]["basic blocks"][bbva] = {
"features": [],
"instructions": {},
}
for insnva in bb:
insnva = int(insnva, 0x10)
features['functions'][fva]['basic blocks'][bbva]['instructions'][insnva] = {
'features': [],
features["functions"][fva]["basic blocks"][bbva]["instructions"][insnva] = {
"features": [],
}
# in the following blocks, each entry looks like:
@@ -161,13 +143,13 @@ def loads(s):
# ('MatchedRule', ('foo', ), '0x401000', ('0x401000', ))
# ^^^^^^^^^^^^^ ^^^^^^^^^ ^^^^^^^^^^ ^^^^^^^^^^^^^^
# feature name args addr func/bb/insn
for feature in doc.get('scopes', {}).get('file', []):
for feature in doc.get("scopes", {}).get("file", []):
va, loc = feature[2:]
va = int(va, 0x10)
feature = deserialize_feature(feature[:2])
features['file features'].append((va, feature))
features["file features"].append((va, feature))
for feature in doc.get('scopes', {}).get('function', []):
for feature in doc.get("scopes", {}).get("function", []):
# fetch the pair like:
#
# ('0x401000', ('0x401000', ))
@@ -183,42 +165,42 @@ def loads(s):
# ^^^^^^^^^^^^^ ^^^^^^^^^
# feature name args
feature = deserialize_feature(feature[:2])
features['functions'][loc[0]]['features'].append((va, feature))
features["functions"][loc[0]]["features"].append((va, feature))
for feature in doc.get('scopes', {}).get('basic block', []):
for feature in doc.get("scopes", {}).get("basic block", []):
va, loc = feature[2:]
va = int(va, 0x10)
loc = [int(lo, 0x10) for lo in loc]
feature = deserialize_feature(feature[:2])
features['functions'][loc[0]]['basic blocks'][loc[1]]['features'].append((va, feature))
features["functions"][loc[0]]["basic blocks"][loc[1]]["features"].append((va, feature))
for feature in doc.get('scopes', {}).get('instruction', []):
for feature in doc.get("scopes", {}).get("instruction", []):
va, loc = feature[2:]
va = int(va, 0x10)
loc = [int(lo, 0x10) for lo in loc]
feature = deserialize_feature(feature[:2])
features['functions'][loc[0]]['basic blocks'][loc[1]]['instructions'][loc[2]]['features'].append((va, feature))
features["functions"][loc[0]]["basic blocks"][loc[1]]["instructions"][loc[2]]["features"].append((va, feature))
return capa.features.extractors.NullFeatureExtractor(features)
MAGIC = 'capa0000'.encode('ascii')
MAGIC = "capa0000".encode("ascii")
def dump(extractor):
'''serialize the given extractor to a byte array.'''
return MAGIC + zlib.compress(dumps(extractor).encode('utf-8'))
"""serialize the given extractor to a byte array."""
return MAGIC + zlib.compress(dumps(extractor).encode("utf-8"))
def is_freeze(buf):
return buf[:len(MAGIC)] == MAGIC
return buf[: len(MAGIC)] == MAGIC
def load(buf):
'''deserialize a set of features (as a NullFeatureExtractor) from a byte array.'''
"""deserialize a set of features (as a NullFeatureExtractor) from a byte array."""
if not is_freeze(buf):
raise ValueError('missing magic header')
return loads(zlib.decompress(buf[len(MAGIC):]).decode('utf-8'))
raise ValueError("missing magic header")
return loads(zlib.decompress(buf[len(MAGIC) :]).decode("utf-8"))
def main(argv=None):
@@ -230,24 +212,21 @@ def main(argv=None):
argv = sys.argv[1:]
formats = [
('auto', '(default) detect file type automatically'),
('pe', 'Windows PE file'),
('sc32', '32-bit shellcode'),
('sc64', '64-bit shellcode'),
("auto", "(default) detect file type automatically"),
("pe", "Windows PE file"),
("sc32", "32-bit shellcode"),
("sc64", "64-bit shellcode"),
]
format_help = ', '.join(['%s: %s' % (f[0], f[1]) for f in formats])
format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats])
parser = argparse.ArgumentParser(description='save capa features to a file')
parser.add_argument('sample', type=str,
help='Path to sample to analyze')
parser.add_argument('output', type=str,
help='Path to output file')
parser.add_argument('-v', '--verbose', action='store_true',
help='Enable verbose output')
parser.add_argument('-q', '--quiet', action='store_true',
help='Disable all output but errors')
parser.add_argument('-f', '--format', choices=[f[0] for f in formats], default='auto',
help='Select sample format, %s' % format_help)
parser = argparse.ArgumentParser(description="save capa features to a file")
parser.add_argument("sample", type=str, help="Path to sample to analyze")
parser.add_argument("output", type=str, help="Path to output file")
parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose output")
parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
parser.add_argument(
"-f", "--format", choices=[f[0] for f in formats], default="auto", help="Select sample format, %s" % format_help
)
args = parser.parse_args(args=argv)
if args.quiet:
@@ -264,13 +243,15 @@ def main(argv=None):
# don't import this at top level to support ida/py3 backend
import capa.features.extractors.viv
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(vw, args.sample)
with open(args.output, 'wb') as f:
with open(args.output, "wb") as f:
f.write(dump(extractor))
return 0
if __name__ == '__main__':
if __name__ == "__main__":
import sys
sys.exit(main())

View File

@@ -4,9 +4,9 @@ from capa.features import Feature
class API(Feature):
def __init__(self, name):
# Downcase library name if given
if '.' in name:
modname, impname = name.split('.')
name = modname.lower() + '.' + impname
if "." in name:
modname, impname = name.split(".")
name = modname.lower() + "." + impname
super(API, self).__init__([name])
@@ -19,9 +19,9 @@ class Number(Feature):
def __str__(self):
if self.symbol:
return 'number(0x%x = %s)' % (self.value, self.symbol)
return "number(0x%x = %s)" % (self.value, self.symbol)
else:
return 'number(0x%x)' % (self.value)
return "number(0x%x)" % (self.value)
class Offset(Feature):
@@ -32,9 +32,9 @@ class Offset(Feature):
def __str__(self):
if self.symbol:
return 'offset(0x%x = %s)' % (self.value, self.symbol)
return "offset(0x%x = %s)" % (self.value, self.symbol)
else:
return 'offset(0x%x)' % (self.value)
return "offset(0x%x)" % (self.value)
class Mnemonic(Feature):
@@ -43,4 +43,4 @@ class Mnemonic(Feature):
self.value = value
def __str__(self):
return 'mnemonic(%s)' % (self.value)
return "mnemonic(%s)" % (self.value)

View File

@@ -4,7 +4,7 @@ _hex = hex
def hex(i):
# under py2.7, long integers get formatted with a trailing `L`
# and this is not pretty. so strip it out.
return _hex(oint(i)).rstrip('L')
return _hex(oint(i)).rstrip("L")
def oint(i):

View File

@@ -15,14 +15,14 @@ def info_to_name(display):
e.g. function(my_function) => my_function
"""
try:
return display.split('(')[1].rstrip(')')
return display.split("(")[1].rstrip(")")
except IndexError:
return ''
return ""
def location_to_hex(location):
""" convert location to hex for display """
return '%08X' % location
return "%08X" % location
class CapaExplorerDataItem(object):
@@ -35,7 +35,12 @@ class CapaExplorerDataItem(object):
self.children = []
self._checked = False
self.flags = (QtCore.Qt.ItemIsEnabled | QtCore.Qt.ItemIsSelectable | QtCore.Qt.ItemIsTristate | QtCore.Qt.ItemIsUserCheckable)
self.flags = (
QtCore.Qt.ItemIsEnabled
| QtCore.Qt.ItemIsSelectable
| QtCore.Qt.ItemIsTristate
| QtCore.Qt.ItemIsUserCheckable
)
if self.pred:
self.pred.appendChild(self)
@@ -109,7 +114,7 @@ class CapaExplorerDataItem(object):
def __str__(self):
""" get string representation of columns """
return ' '.join([data for data in self._data if data])
return " ".join([data for data in self._data if data])
@property
def info(self):
@@ -133,12 +138,12 @@ class CapaExplorerDataItem(object):
class CapaExplorerRuleItem(CapaExplorerDataItem):
""" store data relevant to capa function result """
fmt = '%s (%d matches)'
fmt = "%s (%d matches)"
def __init__(self, parent, display, count, source):
""" """
display = self.fmt % (display, count) if count > 1 else display
super(CapaExplorerRuleItem, self).__init__(parent, [display, '', ''])
super(CapaExplorerRuleItem, self).__init__(parent, [display, "", ""])
self._source = source
@property
@@ -150,9 +155,9 @@ class CapaExplorerRuleItem(CapaExplorerDataItem):
class CapaExplorerRuleMatchItem(CapaExplorerDataItem):
""" store data relevant to capa function match result """
def __init__(self, parent, display, source=''):
def __init__(self, parent, display, source=""):
""" """
super(CapaExplorerRuleMatchItem, self).__init__(parent, [display, '', ''])
super(CapaExplorerRuleMatchItem, self).__init__(parent, [display, "", ""])
self._source = source
@property
@@ -164,12 +169,13 @@ class CapaExplorerRuleMatchItem(CapaExplorerDataItem):
class CapaExplorerFunctionItem(CapaExplorerDataItem):
""" store data relevant to capa function result """
fmt = 'function(%s)'
fmt = "function(%s)"
def __init__(self, parent, location):
""" """
super(CapaExplorerFunctionItem, self).__init__(parent, [self.fmt % idaapi.get_name(location),
location_to_hex(location), ''])
super(CapaExplorerFunctionItem, self).__init__(
parent, [self.fmt % idaapi.get_name(location), location_to_hex(location), ""]
)
@property
def info(self):
@@ -187,32 +193,31 @@ class CapaExplorerFunctionItem(CapaExplorerDataItem):
class CapaExplorerBlockItem(CapaExplorerDataItem):
""" store data relevant to capa basic block result """
fmt = 'basic block(loc_%08X)'
fmt = "basic block(loc_%08X)"
def __init__(self, parent, location):
""" """
super(CapaExplorerBlockItem, self).__init__(parent, [self.fmt % location, location_to_hex(location), ''])
super(CapaExplorerBlockItem, self).__init__(parent, [self.fmt % location, location_to_hex(location), ""])
class CapaExplorerDefaultItem(CapaExplorerDataItem):
""" store data relevant to capa default result """
def __init__(self, parent, display, details='', location=None):
def __init__(self, parent, display, details="", location=None):
""" """
location = location_to_hex(location) if location else ''
location = location_to_hex(location) if location else ""
super(CapaExplorerDefaultItem, self).__init__(parent, [display, location, details])
class CapaExplorerFeatureItem(CapaExplorerDataItem):
""" store data relevant to capa feature result """
def __init__(self, parent, display, location='', details=''):
location = location_to_hex(location) if location else ''
def __init__(self, parent, display, location="", details=""):
location = location_to_hex(location) if location else ""
super(CapaExplorerFeatureItem, self).__init__(parent, [display, location, details])
class CapaExplorerInstructionViewItem(CapaExplorerFeatureItem):
def __init__(self, parent, display, location):
""" """
details = capa.ida.helpers.get_disasm_line(location)
@@ -221,26 +226,24 @@ class CapaExplorerInstructionViewItem(CapaExplorerFeatureItem):
class CapaExplorerByteViewItem(CapaExplorerFeatureItem):
def __init__(self, parent, display, location):
""" """
byte_snap = idaapi.get_bytes(location, 32)
if byte_snap:
byte_snap = codecs.encode(byte_snap, 'hex').upper()
byte_snap = codecs.encode(byte_snap, "hex").upper()
if sys.version_info >= (3, 0):
details = ' '.join([byte_snap[i:i + 2].decode() for i in range(0, len(byte_snap), 2)])
details = " ".join([byte_snap[i : i + 2].decode() for i in range(0, len(byte_snap), 2)])
else:
details = ' '.join([byte_snap[i:i + 2] for i in range(0, len(byte_snap), 2)])
details = " ".join([byte_snap[i : i + 2] for i in range(0, len(byte_snap), 2)])
else:
details = ''
details = ""
super(CapaExplorerByteViewItem, self).__init__(parent, display, location=location, details=details)
self.ida_highlight = idc.get_color(location, idc.CIC_ITEM)
class CapaExplorerStringViewItem(CapaExplorerFeatureItem):
def __init__(self, parent, display, location):
""" """
super(CapaExplorerStringViewItem, self).__init__(parent, display, location=location)

View File

@@ -16,7 +16,7 @@ from capa.ida.explorer.item import (
CapaExplorerByteViewItem,
CapaExplorerBlockItem,
CapaExplorerRuleMatchItem,
CapaExplorerFeatureItem
CapaExplorerFeatureItem,
)
import capa.ida.helpers
@@ -37,7 +37,7 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
def __init__(self, parent=None):
""" """
super(CapaExplorerDataModel, self).__init__(parent)
self.root_node = CapaExplorerDataItem(None, ['Rule Information', 'Address', 'Details'])
self.root_node = CapaExplorerDataItem(None, ["Rule Information", "Address", "Details"])
def reset(self):
""" """
@@ -86,8 +86,11 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
# display data in corresponding column
return item.data(column)
if role == QtCore.Qt.ToolTipRole and isinstance(item, (CapaExplorerRuleItem, CapaExplorerRuleMatchItem)) and \
CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION == column:
if (
role == QtCore.Qt.ToolTipRole
and isinstance(item, (CapaExplorerRuleItem, CapaExplorerRuleMatchItem))
and CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION == column
):
# show tooltip containing rule source
return item.source
@@ -95,18 +98,30 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
# inform view how to display content of checkbox - un/checked
return QtCore.Qt.Checked if item.isChecked() else QtCore.Qt.Unchecked
if role == QtCore.Qt.FontRole and column in (CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS,
CapaExplorerDataModel.COLUMN_INDEX_DETAILS):
if role == QtCore.Qt.FontRole and column in (
CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS,
CapaExplorerDataModel.COLUMN_INDEX_DETAILS,
):
# set font for virtual address and details columns
font = QtGui.QFont('Courier', weight=QtGui.QFont.Medium)
font = QtGui.QFont("Courier", weight=QtGui.QFont.Medium)
if column == CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS:
font.setBold(True)
return font
if role == QtCore.Qt.FontRole and isinstance(item, (CapaExplorerRuleItem, CapaExplorerRuleMatchItem,
CapaExplorerBlockItem, CapaExplorerFunctionItem,
CapaExplorerFeatureItem)) and \
column == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION:
if (
role == QtCore.Qt.FontRole
and isinstance(
item,
(
CapaExplorerRuleItem,
CapaExplorerRuleMatchItem,
CapaExplorerBlockItem,
CapaExplorerFunctionItem,
CapaExplorerFeatureItem,
),
)
and column == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION
):
# set bold font for top-level rules
font = QtGui.QFont()
font.setBold(True)
@@ -116,8 +131,11 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
# set color for virtual address column
return QtGui.QColor(88, 139, 174)
if role == QtCore.Qt.ForegroundRole and isinstance(item, CapaExplorerFeatureItem) and column == \
CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION:
if (
role == QtCore.Qt.ForegroundRole
and isinstance(item, CapaExplorerFeatureItem)
and column == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION
):
# set color for feature items
return QtGui.QColor(79, 121, 66)
@@ -222,8 +240,9 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
def util_reset_ida_highlighting(self, item, checked):
""" """
if not isinstance(item, (CapaExplorerStringViewItem, CapaExplorerInstructionViewItem,
CapaExplorerByteViewItem)):
if not isinstance(
item, (CapaExplorerStringViewItem, CapaExplorerInstructionViewItem, CapaExplorerByteViewItem)
):
# ignore other item types
return
@@ -254,8 +273,10 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
if not model_index.isValid():
return False
if role == QtCore.Qt.CheckStateRole and model_index.column() ==\
CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION:
if (
role == QtCore.Qt.CheckStateRole
and model_index.column() == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION
):
# user un/checked box - un/check parent and children
for child_index in self.iterateChildrenIndexFromRootIndex(model_index, ignore_root=False):
child_index.internalPointer().setChecked(value)
@@ -263,9 +284,12 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
self.dataChanged.emit(child_index, child_index)
return True
if role == QtCore.Qt.EditRole and value and \
model_index.column() == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION and \
isinstance(model_index.internalPointer(), CapaExplorerFunctionItem):
if (
role == QtCore.Qt.EditRole
and value
and model_index.column() == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION
and isinstance(model_index.internalPointer(), CapaExplorerFunctionItem)
):
# user renamed function - update IDA database and data model
old_name = model_index.internalPointer().info
new_name = str(value)
@@ -309,39 +333,39 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
"type": "or"
},
"""
if statement['type'] in ('and', 'or', 'optional'):
return CapaExplorerDefaultItem(parent, statement['type'])
elif statement['type'] == 'not':
if statement["type"] in ("and", "or", "optional"):
return CapaExplorerDefaultItem(parent, statement["type"])
elif statement["type"] == "not":
# TODO: do we display 'not'
pass
elif statement['type'] == 'some':
return CapaExplorerDefaultItem(parent, statement['count'] + ' or more')
elif statement['type'] == 'range':
elif statement["type"] == "some":
return CapaExplorerDefaultItem(parent, statement["count"] + " or more")
elif statement["type"] == "range":
# `range` is a weird node, its almost a hybrid of statement + feature.
# it is a specific feature repeated multiple times.
# there's no additional logic in the feature part, just the existence of a feature.
# so, we have to inline some of the feature rendering here.
display = 'count(%s): ' % self.capa_doc_feature_to_display(statement['child'])
display = "count(%s): " % self.capa_doc_feature_to_display(statement["child"])
if statement['max'] == statement['min']:
display += '%d' % (statement['min'])
elif statement['min'] == 0:
display += '%d or fewer' % (statement['max'])
elif statement['max'] == (1 << 64 - 1):
display += '%d or more' % (statement['min'])
if statement["max"] == statement["min"]:
display += "%d" % (statement["min"])
elif statement["min"] == 0:
display += "%d or fewer" % (statement["max"])
elif statement["max"] == (1 << 64 - 1):
display += "%d or more" % (statement["min"])
else:
display += 'between %d and %d' % (statement['min'], statement['max'])
display += "between %d and %d" % (statement["min"], statement["max"])
return CapaExplorerFeatureItem(parent, display=display)
elif statement['type'] == 'subscope':
return CapaExplorerFeatureItem(parent, 'subscope(%s)' % statement['subscope'])
elif statement['type'] == 'regex':
elif statement["type"] == "subscope":
return CapaExplorerFeatureItem(parent, "subscope(%s)" % statement["subscope"])
elif statement["type"] == "regex":
# regex is a `Statement` not a `Feature`
# this is because it doesn't get extracted, but applies to all strings in scope.
# so we have to handle it here
return CapaExplorerFeatureItem(parent, 'regex(%s)' % statement['pattern'], details=statement['match'])
return CapaExplorerFeatureItem(parent, "regex(%s)" % statement["pattern"], details=statement["match"])
else:
raise RuntimeError('unexpected match statement type: ' + str(statement))
raise RuntimeError("unexpected match statement type: " + str(statement))
def render_capa_doc_match(self, parent, match, doc):
""" render capa match read from doc
@@ -367,23 +391,24 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
}
},
"""
if not match['success']:
if not match["success"]:
# TODO: display failed branches at some point? Help with debugging rules?
return
# optional statement with no successful children is empty
if (match['node'].get('statement', {}).get('type') == 'optional'
and not any(map(lambda m: m['success'], match['children']))):
if match["node"].get("statement", {}).get("type") == "optional" and not any(
map(lambda m: m["success"], match["children"])
):
return
if match['node']['type'] == 'statement':
parent2 = self.render_capa_doc_statement_node(parent, match['node']['statement'], doc)
elif match['node']['type'] == 'feature':
parent2 = self.render_capa_doc_feature_node(parent, match['node']['feature'], match['locations'], doc)
if match["node"]["type"] == "statement":
parent2 = self.render_capa_doc_statement_node(parent, match["node"]["statement"], doc)
elif match["node"]["type"] == "feature":
parent2 = self.render_capa_doc_feature_node(parent, match["node"]["feature"], match["locations"], doc)
else:
raise RuntimeError('unexpected node type: ' + str(match['node']['type']))
raise RuntimeError("unexpected node type: " + str(match["node"]["type"]))
for child in match['children']:
for child in match["children"]:
self.render_capa_doc_match(parent2, child, doc)
def render_capa_doc(self, doc):
@@ -394,17 +419,17 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
self.beginResetModel()
for rule in rutils.capability_rules(doc):
parent = CapaExplorerRuleItem(self.root_node, rule['meta']['name'], len(rule['matches']), rule['source'])
parent = CapaExplorerRuleItem(self.root_node, rule["meta"]["name"], len(rule["matches"]), rule["source"])
for (location, match) in doc[rule['meta']['name']]['matches'].items():
if rule['meta']['scope'] == capa.rules.FILE_SCOPE:
for (location, match) in doc[rule["meta"]["name"]]["matches"].items():
if rule["meta"]["scope"] == capa.rules.FILE_SCOPE:
parent2 = parent
elif rule['meta']['scope'] == capa.rules.FUNCTION_SCOPE:
elif rule["meta"]["scope"] == capa.rules.FUNCTION_SCOPE:
parent2 = CapaExplorerFunctionItem(parent, location)
elif rule['meta']['scope'] == capa.rules.BASIC_BLOCK_SCOPE:
elif rule["meta"]["scope"] == capa.rules.BASIC_BLOCK_SCOPE:
parent2 = CapaExplorerBlockItem(parent, location)
else:
raise RuntimeError('unexpected rule scope: ' + str(rule['meta']['scope']))
raise RuntimeError("unexpected rule scope: " + str(rule["meta"]["scope"]))
self.render_capa_doc_match(parent2, match, doc)
@@ -421,20 +446,20 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
},
"""
mapping = {
'string': 'string(%s)',
'bytes': 'bytes(%s)',
'api': 'api(%s)',
'mnemonic': 'mnemonic(%s)',
'export': 'export(%s)',
'import': 'import(%s)',
'section': 'section(%s)',
'number': 'number(0x%X)',
'offset': 'offset(0x%X)',
'characteristic': 'characteristic(%s)',
'match': 'rule match(%s)'
"string": "string(%s)",
"bytes": "bytes(%s)",
"api": "api(%s)",
"mnemonic": "mnemonic(%s)",
"export": "export(%s)",
"import": "import(%s)",
"section": "section(%s)",
"number": "number(0x%X)",
"offset": "offset(0x%X)",
"characteristic": "characteristic(%s)",
"match": "rule match(%s)",
}
'''
"""
"feature": {
"characteristic": [
"loop",
@@ -442,21 +467,23 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
],
"type": "characteristic"
},
'''
if feature['type'] == 'characteristic':
return mapping['characteristic'] % feature['characteristic'][0]
"""
if feature["type"] == "characteristic":
return mapping["characteristic"] % feature["characteristic"][0]
# convert bytes feature from "410ab4" to "41 0A B4"
if feature['type'] == 'bytes':
return mapping['bytes'] % ' '.join(feature['bytes'][i:i + 2] for i in
range(0, len(feature['bytes']), 2)).upper()
if feature["type"] == "bytes":
return (
mapping["bytes"]
% " ".join(feature["bytes"][i : i + 2] for i in range(0, len(feature["bytes"]), 2)).upper()
)
try:
fmt = mapping[feature['type']]
fmt = mapping[feature["type"]]
except KeyError:
raise RuntimeError('unexpected doc type: ' + str(feature['type']))
raise RuntimeError("unexpected doc type: " + str(feature["type"]))
return fmt % feature[feature['type']]
return fmt % feature[feature["type"]]
def render_capa_doc_feature_node(self, parent, feature, locations, doc):
""" """
@@ -473,7 +500,7 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
return parent2
def render_capa_doc_feature(self, parent, feature, location, doc, display='-'):
def render_capa_doc_feature(self, parent, feature, location, doc, display="-"):
""" render capa feature read from doc
@param parent: parent node to which new child is assigned
@@ -491,51 +518,38 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
@param location: address of feature
@param display: text to display in plugin ui
"""
instruction_view = (
'bytes',
'api',
'mnemonic',
'number',
'offset'
)
byte_view = (
'section',
)
string_view = (
'string',
)
default_feature_view = (
'import',
'export'
)
instruction_view = ("bytes", "api", "mnemonic", "number", "offset")
byte_view = ("section",)
string_view = ("string",)
default_feature_view = ("import", "export")
# special handling for characteristic pending type
if feature['type'] == 'characteristic':
if feature['characteristic'][0] in ('embedded pe',):
if feature["type"] == "characteristic":
if feature["characteristic"][0] in ("embedded pe",):
return CapaExplorerByteViewItem(parent, display, location)
if feature['characteristic'][0] in ('loop', 'recursive call', 'tight loop', 'switch'):
if feature["characteristic"][0] in ("loop", "recursive call", "tight loop", "switch"):
return CapaExplorerFeatureItem(parent, display=display)
# default to instruction view
return CapaExplorerInstructionViewItem(parent, display, location)
if feature['type'] == 'match':
return CapaExplorerRuleMatchItem(parent, display, source=doc.get(feature['match'], {}).get('source', ''))
if feature["type"] == "match":
return CapaExplorerRuleMatchItem(parent, display, source=doc.get(feature["match"], {}).get("source", ""))
if feature['type'] in instruction_view:
if feature["type"] in instruction_view:
return CapaExplorerInstructionViewItem(parent, display, location)
if feature['type'] in byte_view:
if feature["type"] in byte_view:
return CapaExplorerByteViewItem(parent, display, location)
if feature['type'] in string_view:
if feature["type"] in string_view:
return CapaExplorerStringViewItem(parent, display, location)
if feature['type'] in default_feature_view:
if feature["type"] in default_feature_view:
return CapaExplorerFeatureItem(parent, display=display)
raise RuntimeError('unexpected feature type: ' + str(feature['type']))
raise RuntimeError("unexpected feature type: " + str(feature["type"]))
def update_function_name(self, old_name, new_name):
""" update all instances of function name
@@ -548,8 +562,9 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
# convert name to view format for matching
old_name = CapaExplorerFunctionItem.fmt % old_name
for model_index in self.match(root_index, QtCore.Qt.DisplayRole, old_name, hits=-1,
flags=QtCore.Qt.MatchRecursive):
for model_index in self.match(
root_index, QtCore.Qt.DisplayRole, old_name, hits=-1, flags=QtCore.Qt.MatchRecursive
):
if not isinstance(model_index.internalPointer(), CapaExplorerFunctionItem):
continue

View File

@@ -4,7 +4,6 @@ from capa.ida.explorer.model import CapaExplorerDataModel
class CapaExplorerSortFilterProxyModel(QtCore.QSortFilterProxyModel):
def __init__(self, parent=None):
""" """
super(CapaExplorerSortFilterProxyModel, self).__init__(parent)
@@ -20,8 +19,12 @@ class CapaExplorerSortFilterProxyModel(QtCore.QSortFilterProxyModel):
ldata = left.internalPointer().data(left.column())
rdata = right.internalPointer().data(right.column())
if ldata and rdata and left.column() == CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS and left.column() \
== right.column():
if (
ldata
and rdata
and left.column() == CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS
and left.column() == right.column()
):
# convert virtual address before compare
return int(ldata, 16) < int(rdata, 16)
else:

View File

@@ -55,7 +55,7 @@ class CapaExplorerQtreeView(QtWidgets.QTreeView):
self.doubleClicked.connect(self.slot_double_click)
# self.clicked.connect(self.slot_click)
self.setStyleSheet('QTreeView::item {padding-right: 15 px;padding-bottom: 2 px;}')
self.setStyleSheet("QTreeView::item {padding-right: 15 px;padding-bottom: 2 px;}")
def reset(self):
""" reset user interface changes
@@ -114,8 +114,8 @@ class CapaExplorerQtreeView(QtWidgets.QTreeView):
@yield QAction*
"""
default_actions = [
('Copy column', data, self.slot_copy_column),
('Copy row', data, self.slot_copy_row),
("Copy column", data, self.slot_copy_column),
("Copy row", data, self.slot_copy_row),
]
# add default actions
@@ -130,7 +130,7 @@ class CapaExplorerQtreeView(QtWidgets.QTreeView):
@yield QAction*
"""
function_actions = [
('Rename function', data, self.slot_rename_function),
("Rename function", data, self.slot_rename_function),
]
# add function actions

View File

@@ -3,47 +3,48 @@ import logging
import idaapi
import idc
logger = logging.getLogger('capa')
logger = logging.getLogger("capa")
# file type names as returned by idaapi.get_file_type_name()
SUPPORTED_FILE_TYPES = [
'Portable executable for 80386 (PE)',
'Portable executable for AMD64 (PE)',
'Binary file' # x86/AMD64 shellcode support
"Portable executable for 80386 (PE)",
"Portable executable for AMD64 (PE)",
"Binary file", # x86/AMD64 shellcode support
]
def inform_user_ida_ui(message):
idaapi.info('%s. Please refer to IDA Output window for more information.' % message)
idaapi.info("%s. Please refer to IDA Output window for more information." % message)
def is_supported_file_type():
file_type = idaapi.get_file_type_name()
if file_type not in SUPPORTED_FILE_TYPES:
logger.error('-' * 80)
logger.error(' Input file does not appear to be a PE file.')
logger.error(' ')
logger.error("-" * 80)
logger.error(" Input file does not appear to be a PE file.")
logger.error(" ")
logger.error(
' capa currently only supports analyzing PE files (or binary files containing x86/AMD64 shellcode) with IDA.')
logger.error(' If you don\'t know the input file type, you can try using the `file` utility to guess it.')
logger.error('-' * 80)
inform_user_ida_ui('capa does not support the format of this file')
" capa currently only supports analyzing PE files (or binary files containing x86/AMD64 shellcode) with IDA."
)
logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.")
logger.error("-" * 80)
inform_user_ida_ui("capa does not support the format of this file")
return False
return True
def get_disasm_line(va):
''' '''
""" """
return idc.generate_disasm_line(va, idc.GENDSM_FORCE_CODE)
def is_func_start(ea):
''' check if function stat exists at virtual address '''
""" check if function stat exists at virtual address """
f = idaapi.get_func(ea)
return f and f.start_ea == ea
def get_func_start_ea(ea):
''' '''
""" """
f = idaapi.get_func(ea)
return f if f is None else f.start_ea

View File

@@ -2,11 +2,7 @@ import os
import logging
import collections
from PyQt5 import (
QtWidgets,
QtGui,
QtCore
)
from PyQt5 import QtWidgets, QtGui, QtCore
import idaapi
@@ -20,13 +16,12 @@ from capa.ida.explorer.view import CapaExplorerQtreeView
from capa.ida.explorer.model import CapaExplorerDataModel
from capa.ida.explorer.proxy import CapaExplorerSortFilterProxyModel
PLUGIN_NAME = 'capa explorer'
PLUGIN_NAME = "capa explorer"
logger = logging.getLogger('capa')
logger = logging.getLogger("capa")
class CapaExplorerIdaHooks(idaapi.UI_Hooks):
def __init__(self, screen_ea_changed_hook, action_hooks):
""" facilitate IDA UI hooks
@@ -78,7 +73,6 @@ class CapaExplorerIdaHooks(idaapi.UI_Hooks):
class CapaExplorerForm(idaapi.PluginForm):
def __init__(self):
""" """
super(CapaExplorerForm, self).__init__()
@@ -109,20 +103,20 @@ class CapaExplorerForm(idaapi.PluginForm):
self.view_tree.reset()
logger.info('form created.')
logger.info("form created.")
def Show(self):
""" """
return idaapi.PluginForm.Show(self, self.form_title, options=(
idaapi.PluginForm.WOPN_TAB | idaapi.PluginForm.WCLS_CLOSE_LATER
))
return idaapi.PluginForm.Show(
self, self.form_title, options=(idaapi.PluginForm.WOPN_TAB | idaapi.PluginForm.WCLS_CLOSE_LATER)
)
def OnClose(self, form):
""" form is closed """
self.unload_ida_hooks()
self.ida_reset()
logger.info('form closed.')
logger.info("form closed.")
def load_interface(self):
""" load user interface """
@@ -165,8 +159,8 @@ class CapaExplorerForm(idaapi.PluginForm):
def load_view_summary(self):
""" """
table_headers = [
'Capability',
'Namespace',
"Capability",
"Namespace",
]
table = QtWidgets.QTableWidget()
@@ -180,15 +174,15 @@ class CapaExplorerForm(idaapi.PluginForm):
table.setHorizontalHeaderLabels(table_headers)
table.horizontalHeader().setDefaultAlignment(QtCore.Qt.AlignLeft)
table.setShowGrid(False)
table.setStyleSheet('QTableWidget::item { padding: 25px; }')
table.setStyleSheet("QTableWidget::item { padding: 25px; }")
self.view_summary = table
def load_view_attack(self):
""" """
table_headers = [
'ATT&CK Tactic',
'ATT&CK Technique ',
"ATT&CK Tactic",
"ATT&CK Technique ",
]
table = QtWidgets.QTableWidget()
@@ -202,13 +196,13 @@ class CapaExplorerForm(idaapi.PluginForm):
table.setHorizontalHeaderLabels(table_headers)
table.horizontalHeader().setDefaultAlignment(QtCore.Qt.AlignLeft)
table.setShowGrid(False)
table.setStyleSheet('QTableWidget::item { padding: 25px; }')
table.setStyleSheet("QTableWidget::item { padding: 25px; }")
self.view_attack = table
def load_view_checkbox_limit_by(self):
""" """
check = QtWidgets.QCheckBox('Limit results to current function')
check = QtWidgets.QCheckBox("Limit results to current function")
check.setChecked(False)
check.stateChanged.connect(self.slot_checkbox_limit_by_changed)
@@ -231,7 +225,7 @@ class CapaExplorerForm(idaapi.PluginForm):
tab = QtWidgets.QWidget()
tab.setLayout(layout)
self.view_tabs.addTab(tab, 'Tree View')
self.view_tabs.addTab(tab, "Tree View")
def load_view_summary_tab(self):
""" """
@@ -241,7 +235,7 @@ class CapaExplorerForm(idaapi.PluginForm):
tab = QtWidgets.QWidget()
tab.setLayout(layout)
self.view_tabs.addTab(tab, 'Summary')
self.view_tabs.addTab(tab, "Summary")
def load_view_attack_tab(self):
""" """
@@ -251,16 +245,16 @@ class CapaExplorerForm(idaapi.PluginForm):
tab = QtWidgets.QWidget()
tab.setLayout(layout)
self.view_tabs.addTab(tab, 'MITRE')
self.view_tabs.addTab(tab, "MITRE")
def load_file_menu(self):
""" load file menu actions """
actions = (
('Reset view', 'Reset plugin view', self.reset),
('Run analysis', 'Run capa analysis on current database', self.reload),
("Reset view", "Reset plugin view", self.reset),
("Run analysis", "Run capa analysis on current database", self.reload),
)
menu = self.view_menu_bar.addMenu('File')
menu = self.view_menu_bar.addMenu("File")
for name, _, handle in actions:
action = QtWidgets.QAction(name, self.parent)
@@ -271,8 +265,8 @@ class CapaExplorerForm(idaapi.PluginForm):
def load_ida_hooks(self):
""" """
action_hooks = {
'MakeName': self.ida_hook_rename,
'EditFunction': self.ida_hook_rename,
"MakeName": self.ida_hook_rename,
"EditFunction": self.ida_hook_rename,
}
self.ida_hooks = CapaExplorerIdaHooks(self.ida_hook_screen_ea_changed, action_hooks)
@@ -300,10 +294,10 @@ class CapaExplorerForm(idaapi.PluginForm):
if post:
# post action update data model w/ current name
self.model_data.update_function_name(meta.get('prev_name', ''), curr_name)
self.model_data.update_function_name(meta.get("prev_name", ""), curr_name)
else:
# pre action so save current name for replacement later
meta['prev_name'] = curr_name
meta["prev_name"] = curr_name
def ida_hook_screen_ea_changed(self, widget, new_ea, old_ea):
""" """
@@ -328,21 +322,21 @@ class CapaExplorerForm(idaapi.PluginForm):
match = capa.ida.explorer.item.ea_to_hex_str(new_func_start)
else:
# navigated to virtual address not in valid function - clear filter
match = ''
match = ""
# filter on virtual address to avoid updating filter string if function name is changed
self.model_proxy.add_single_string_filter(CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS, match)
def load_capa_results(self):
""" """
logger.info('-' * 80)
logger.info(' Using default embedded rules.')
logger.info(' ')
logger.info(' You can see the current default rule set here:')
logger.info(' https://github.com/fireeye/capa-rules')
logger.info('-' * 80)
logger.info("-" * 80)
logger.info(" Using default embedded rules.")
logger.info(" ")
logger.info(" You can see the current default rule set here:")
logger.info(" https://github.com/fireeye/capa-rules")
logger.info("-" * 80)
rules_path = os.path.join(os.path.dirname(self.file_loc), '../..', 'rules')
rules_path = os.path.join(os.path.dirname(self.file_loc), "../..", "rules")
rules = capa.main.get_rules(rules_path)
rules = capa.rules.RuleSet(rules)
capabilities = capa.main.find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor(), True)
@@ -350,27 +344,30 @@ class CapaExplorerForm(idaapi.PluginForm):
# support binary files specifically for x86/AMD64 shellcode
# warn user binary file is loaded but still allow capa to process it
# TODO: check specific architecture of binary files based on how user configured IDA processors
if idaapi.get_file_type_name() == 'Binary file':
logger.warning('-' * 80)
logger.warning(' Input file appears to be a binary file.')
logger.warning(' ')
if idaapi.get_file_type_name() == "Binary file":
logger.warning("-" * 80)
logger.warning(" Input file appears to be a binary file.")
logger.warning(" ")
logger.warning(
' capa currently only supports analyzing binary files containing x86/AMD64 shellcode with IDA.')
" capa currently only supports analyzing binary files containing x86/AMD64 shellcode with IDA."
)
logger.warning(
' This means the results may be misleading or incomplete if the binary file loaded in IDA is not x86/AMD64.')
logger.warning(' If you don\'t know the input file type, you can try using the `file` utility to guess it.')
logger.warning('-' * 80)
" This means the results may be misleading or incomplete if the binary file loaded in IDA is not x86/AMD64."
)
logger.warning(" If you don't know the input file type, you can try using the `file` utility to guess it.")
logger.warning("-" * 80)
capa.ida.helpers.inform_user_ida_ui('capa encountered warnings during analysis')
capa.ida.helpers.inform_user_ida_ui("capa encountered warnings during analysis")
if capa.main.has_file_limitation(rules, capabilities, is_standalone=False):
capa.ida.helpers.inform_user_ida_ui('capa encountered warnings during analysis')
capa.ida.helpers.inform_user_ida_ui("capa encountered warnings during analysis")
logger.info('analysis completed.')
logger.info("analysis completed.")
doc = capa.render.convert_capabilities_to_result_document(rules, capabilities)
import json
with open("C:\\Users\\spring\\Desktop\\hmm.json", "w") as twitter_data_file:
json.dump(doc, twitter_data_file, indent=4, sort_keys=True, cls=capa.render.CapaJsonObjectEncoder)
@@ -380,22 +377,22 @@ class CapaExplorerForm(idaapi.PluginForm):
self.view_tree.sortByColumn(CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION, QtCore.Qt.AscendingOrder)
logger.info('render views completed.')
logger.info("render views completed.")
def render_capa_doc_summary(self, doc):
""" """
for (row, rule) in enumerate(rutils.capability_rules(doc)):
count = len(rule['matches'])
count = len(rule["matches"])
if count == 1:
capability = rule['meta']['name']
capability = rule["meta"]["name"]
else:
capability = '%s (%d matches)' % (rule['meta']['name'], count)
capability = "%s (%d matches)" % (rule["meta"]["name"], count)
self.view_summary.setRowCount(row + 1)
self.view_summary.setItem(row, 0, self.render_new_table_header_item(capability))
self.view_summary.setItem(row, 1, QtWidgets.QTableWidgetItem(rule['meta']['namespace']))
self.view_summary.setItem(row, 1, QtWidgets.QTableWidgetItem(rule["meta"]["namespace"]))
# resize columns to content
self.view_summary.resizeColumnsToContents()
@@ -404,17 +401,17 @@ class CapaExplorerForm(idaapi.PluginForm):
""" """
tactics = collections.defaultdict(set)
for rule in rutils.capability_rules(doc):
if not rule['meta'].get('att&ck'):
if not rule["meta"].get("att&ck"):
continue
for attack in rule['meta']['att&ck']:
tactic, _, rest = attack.partition('::')
if '::' in rest:
technique, _, rest = rest.partition('::')
subtechnique, _, id = rest.rpartition(' ')
for attack in rule["meta"]["att&ck"]:
tactic, _, rest = attack.partition("::")
if "::" in rest:
technique, _, rest = rest.partition("::")
subtechnique, _, id = rest.rpartition(" ")
tactics[tactic].add((technique, subtechnique, id))
else:
technique, _, id = rest.rpartition(' ')
technique, _, id = rest.rpartition(" ")
tactics[tactic].add((technique, id))
column_one = []
@@ -422,17 +419,17 @@ class CapaExplorerForm(idaapi.PluginForm):
for tactic, techniques in sorted(tactics.items()):
column_one.append(tactic.upper())
column_one.extend(['' for i in range(len(techniques) - 1)])
column_one.extend(["" for i in range(len(techniques) - 1)])
for spec in sorted(techniques):
if len(spec) == 2:
technique, id = spec
column_two.append('%s %s' % (technique, id))
column_two.append("%s %s" % (technique, id))
elif len(spec) == 3:
technique, subtechnique, id = spec
column_two.append('%s::%s %s' % (technique, subtechnique, id))
column_two.append("%s::%s %s" % (technique, subtechnique, id))
else:
raise RuntimeError('unexpected ATT&CK spec format')
raise RuntimeError("unexpected ATT&CK spec format")
self.view_attack.setRowCount(max(len(column_one), len(column_two)))
@@ -471,8 +468,8 @@ class CapaExplorerForm(idaapi.PluginForm):
self.view_summary.setRowCount(0)
self.load_capa_results()
logger.info('reload complete.')
idaapi.info('%s reload completed.' % PLUGIN_NAME)
logger.info("reload complete.")
idaapi.info("%s reload completed." % PLUGIN_NAME)
def reset(self):
""" reset user interface elements
@@ -481,8 +478,8 @@ class CapaExplorerForm(idaapi.PluginForm):
"""
self.ida_reset()
logger.info('reset completed.')
idaapi.info('%s reset completed.' % PLUGIN_NAME)
logger.info("reset completed.")
idaapi.info("%s reset completed." % PLUGIN_NAME)
def slot_menu_bar_hovered(self, action):
""" display menu action tooltip
@@ -491,7 +488,9 @@ class CapaExplorerForm(idaapi.PluginForm):
@reference: https://stackoverflow.com/questions/21725119/why-wont-qtooltips-appear-on-qactions-within-a-qmenu
"""
QtWidgets.QToolTip.showText(QtGui.QCursor.pos(), action.toolTip(), self.view_menu_bar, self.view_menu_bar.actionGeometry(action))
QtWidgets.QToolTip.showText(
QtGui.QCursor.pos(), action.toolTip(), self.view_menu_bar, self.view_menu_bar.actionGeometry(action)
)
def slot_checkbox_limit_by_changed(self):
""" slot activated if checkbox clicked
@@ -499,7 +498,7 @@ class CapaExplorerForm(idaapi.PluginForm):
if checked, configure function filter if screen location is located
in function, otherwise clear filter
"""
match = ''
match = ""
if self.view_checkbox_limit_by.isChecked():
location = capa.ida.helpers.get_func_start_ea(idaapi.get_screen_ea())
if location:
@@ -530,5 +529,5 @@ def main():
CAPA_EXPLORER_FORM.Show()
if __name__ == '__main__':
if __name__ == "__main__":
main()

View File

@@ -19,10 +19,10 @@ from capa.ida import plugin_helpers
import capa.features.extractors.ida.helpers
logger = logging.getLogger('rulegen')
logger = logging.getLogger("rulegen")
AUTHOR_NAME = ''
AUTHOR_NAME = ""
COLOR_HIGHLIGHT = 0xD096FF
@@ -35,11 +35,11 @@ def get_func_start(ea):
class Hooks(idaapi.UI_Hooks):
'''
"""
Notifies the plugin when navigating to another function
NOTE: it uses the global variable FLEX to access the
PluginForm object. This looks nasty, maybe there is a better way?
'''
"""
def screen_ea_changed(self, ea, prev_ea):
widget = idaapi.get_current_widget()
@@ -55,14 +55,13 @@ class Hooks(idaapi.UI_Hooks):
# changed to another function
RULE_GEN_FORM.reload_features_tree()
except Exception as e:
logger.warn('exception: %s', e)
logger.warn("exception: %s", e)
class RuleGeneratorForm(idaapi.PluginForm):
def __init__(self):
super(RuleGeneratorForm, self).__init__()
self.title = 'capa rule generator'
self.title = "capa rule generator"
self.parent = None
self.parent_items = {}
@@ -70,7 +69,7 @@ class RuleGeneratorForm(idaapi.PluginForm):
self.hooks = Hooks() # dirty?
if self.hooks.hook():
logger.info('UI notification hook installed successfully')
logger.info("UI notification hook installed successfully")
def init_ui(self):
self.tree = QTreeWidget()
@@ -79,7 +78,7 @@ class RuleGeneratorForm(idaapi.PluginForm):
self.reload_features_tree()
button_reset = QtWidgets.QPushButton('&Reset')
button_reset = QtWidgets.QPushButton("&Reset")
button_reset.clicked.connect(self.reset)
h_layout = QtWidgets.QHBoxLayout()
@@ -96,7 +95,7 @@ class RuleGeneratorForm(idaapi.PluginForm):
def reset(self):
plugin_helpers.reset_selection(self.tree)
plugin_helpers.reset_colors(self.orig_colors)
self.rule_text.setText('')
self.rule_text.setText("")
def reload_features_tree(self):
self.reset()
@@ -119,7 +118,7 @@ class RuleGeneratorForm(idaapi.PluginForm):
extractor = capa.features.extractors.ida.IdaFeatureExtractor()
f = idaapi.get_func(idaapi.get_screen_ea())
if not f:
logger.info('function does not exist at 0x%x', idaapi.get_screen_ea())
logger.info("function does not exist at 0x%x", idaapi.get_screen_ea())
return
return self.extract_function_features(f)
@@ -137,7 +136,7 @@ class RuleGeneratorForm(idaapi.PluginForm):
def create_tree(self, features):
self.tree.setMinimumWidth(400)
# self.tree.setMinimumHeight(300)
self.tree.setHeaderLabels(['Feature', 'Virtual Address', 'Disassembly'])
self.tree.setHeaderLabels(["Feature", "Virtual Address", "Disassembly"])
# auto resize columns
self.tree.header().setSectionResizeMode(QHeaderView.ResizeToContents)
self.tree.itemClicked.connect(self.on_item_clicked)
@@ -151,16 +150,22 @@ class RuleGeneratorForm(idaapi.PluginForm):
# level 1
if feature not in self.parent_items:
self.parent_items[feature] = plugin_helpers.add_child_item(self.parent_items[type(feature)], [str(feature)])
self.parent_items[feature] = plugin_helpers.add_child_item(
self.parent_items[type(feature)], [str(feature)]
)
# level n > 1
if len(vas) > 1:
for va in sorted(vas):
plugin_helpers.add_child_item(self.parent_items[feature], [str(feature), '0x%X' % va, plugin_helpers.get_disasm_line(va)], feature)
plugin_helpers.add_child_item(
self.parent_items[feature],
[str(feature), "0x%X" % va, plugin_helpers.get_disasm_line(va)],
feature,
)
else:
va = vas.pop()
self.parent_items[feature].setText(0, str(feature))
self.parent_items[feature].setText(1, '0x%X' % va)
self.parent_items[feature].setText(1, "0x%X" % va)
self.parent_items[feature].setText(2, plugin_helpers.get_disasm_line(va))
self.parent_items[feature].setData(0, 0x100, feature)
@@ -188,29 +193,31 @@ class RuleGeneratorForm(idaapi.PluginForm):
def get_rule_from_features(self, features):
rule_parts = []
counted = zip(Counter(features).keys(), # equals to list(set(words))
Counter(features).values()) # counts the elements' frequency
counted = zip(
Counter(features).keys(), Counter(features).values() # equals to list(set(words))
) # counts the elements' frequency
# single features
for k, v in filter(lambda t: t[1] == 1, counted):
# TODO args to hex if int
if k.name.lower() == 'bytes':
if k.name.lower() == "bytes":
# Convert raw bytes to uppercase hex representation (e.g., '12 34 56')
upper_hex_bytes = binascii.hexlify(args_to_str(k.args)).upper()
rule_value_str = ''
rule_value_str = ""
for i in range(0, len(upper_hex_bytes), 2):
rule_value_str += upper_hex_bytes[i:i + 2] + ' '
r = ' - %s: %s' % (k.name.lower(), rule_value_str)
rule_value_str += upper_hex_bytes[i : i + 2] + " "
r = " - %s: %s" % (k.name.lower(), rule_value_str)
else:
r = ' - %s: %s' % (k.name.lower(), args_to_str(k.args))
r = " - %s: %s" % (k.name.lower(), args_to_str(k.args))
rule_parts.append(r)
# counted features
for k, v in filter(lambda t: t[1] > 1, counted):
r = ' - count(%s): %d' % (str(k), v)
r = " - count(%s): %d" % (str(k), v)
rule_parts.append(r)
rule_prefix = textwrap.dedent('''
rule_prefix = textwrap.dedent(
"""
rule:
meta:
name:
@@ -219,8 +226,10 @@ class RuleGeneratorForm(idaapi.PluginForm):
examples:
- %s:0x%X
features:
''' % (AUTHOR_NAME, idc.retrieve_input_file_md5(), get_func_start(idc.here()))).strip()
return '%s\n%s' % (rule_prefix, '\n'.join(sorted(rule_parts)))
"""
% (AUTHOR_NAME, idc.retrieve_input_file_md5(), get_func_start(idc.here()))
).strip()
return "%s\n%s" % (rule_prefix, "\n".join(sorted(rule_parts)))
# TODO merge into capa_idautils, get feature data
def get_selected_items(self):
@@ -242,26 +251,25 @@ class RuleGeneratorForm(idaapi.PluginForm):
self.init_ui()
def Show(self):
return idaapi.PluginForm.Show(self, self.title, options=(
idaapi.PluginForm.WOPN_RESTORE
| idaapi.PluginForm.WOPN_PERSIST
))
return idaapi.PluginForm.Show(
self, self.title, options=(idaapi.PluginForm.WOPN_RESTORE | idaapi.PluginForm.WOPN_PERSIST)
)
def OnClose(self, form):
self.reset()
if self.hooks.unhook():
logger.info('UI notification hook uninstalled successfully')
logger.info('RuleGeneratorForm closed')
logger.info("UI notification hook uninstalled successfully")
logger.info("RuleGeneratorForm closed")
def args_to_str(args):
a = []
for arg in args:
if (isinstance(arg, int) or isinstance(arg, long)) and arg > 10:
a.append('0x%X' % arg)
a.append("0x%X" % arg)
else:
a.append(str(arg))
return ','.join(a)
return ",".join(a)
def main():
@@ -280,5 +288,5 @@ def main():
RULE_GEN_FORM.Show()
if __name__ == '__main__':
if __name__ == "__main__":
main()

View File

@@ -8,34 +8,34 @@ import idc
import idaapi
CAPA_EXTENSION = '.capas'
CAPA_EXTENSION = ".capas"
logger = logging.getLogger('capa_ida')
logger = logging.getLogger("capa_ida")
def get_input_file(freeze=True):
'''
"""
get input file path
freeze (bool): if True, get freeze file if it exists
'''
"""
# try original file in same directory as idb/i64 without idb/i64 file extension
input_file = idc.get_idb_path()[:-4]
if freeze:
# use frozen file if it exists
freeze_file_cand = '%s%s' % (input_file, CAPA_EXTENSION)
freeze_file_cand = "%s%s" % (input_file, CAPA_EXTENSION)
if os.path.isfile(freeze_file_cand):
return freeze_file_cand
if not os.path.isfile(input_file):
# TM naming
input_file = '%s.mal_' % idc.get_idb_path()[:-4]
input_file = "%s.mal_" % idc.get_idb_path()[:-4]
if not os.path.isfile(input_file):
input_file = idaapi.ask_file(0, '*.*', 'Please specify input file.')
input_file = idaapi.ask_file(0, "*.*", "Please specify input file.")
if not input_file:
raise ValueError('could not find input file')
raise ValueError("could not find input file")
return input_file

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python2
'''
"""
capa - detect capabilities in programs.
'''
"""
import os
import os.path
import sys
@@ -23,16 +23,16 @@ import capa.features.extractors
from capa.helpers import oint
SUPPORTED_FILE_MAGIC = set(['MZ'])
SUPPORTED_FILE_MAGIC = set(["MZ"])
logger = logging.getLogger('capa')
logger = logging.getLogger("capa")
def set_vivisect_log_level(level):
logging.getLogger('vivisect').setLevel(level)
logging.getLogger('vtrace').setLevel(level)
logging.getLogger('envi').setLevel(level)
logging.getLogger("vivisect").setLevel(level)
logging.getLogger("vtrace").setLevel(level)
logging.getLogger("envi").setLevel(level)
def find_function_capabilities(ruleset, extractor, f):
@@ -83,7 +83,7 @@ def find_file_capabilities(ruleset, extractor, function_features):
if feature not in file_features:
file_features[feature] = set()
logger.info('analyzed file and extracted %d features', len(file_features))
logger.info("analyzed file and extracted %d features", len(file_features))
file_features.update(function_features)
@@ -95,7 +95,7 @@ def find_capabilities(ruleset, extractor, disable_progress=None):
all_function_matches = collections.defaultdict(list)
all_bb_matches = collections.defaultdict(list)
for f in tqdm.tqdm(extractor.get_functions(), disable=disable_progress, unit=' functions'):
for f in tqdm.tqdm(extractor.get_functions(), disable=disable_progress, unit=" functions"):
function_matches, bb_matches = find_function_capabilities(ruleset, extractor, f)
for rule_name, res in function_matches.items():
all_function_matches[rule_name].extend(res)
@@ -104,8 +104,10 @@ def find_capabilities(ruleset, extractor, disable_progress=None):
# mapping from matched rule feature to set of addresses at which it matched.
# type: Dict[MatchedRule, Set[int]]
function_features = {capa.features.MatchedRule(rule_name): set(map(lambda p: p[0], results))
for rule_name, results in all_function_matches.items()}
function_features = {
capa.features.MatchedRule(rule_name): set(map(lambda p: p[0], results))
for rule_name, results in all_function_matches.items()
}
all_file_matches = find_file_capabilities(ruleset, extractor, function_features)
@@ -119,7 +121,7 @@ def find_capabilities(ruleset, extractor, disable_progress=None):
def has_rule_with_namespace(rules, capabilities, rule_cat):
for rule_name in capabilities.keys():
if rules.rules[rule_name].meta.get('namespace', '').startswith(rule_cat):
if rules.rules[rule_name].meta.get("namespace", "").startswith(rule_cat):
return True
return False
@@ -128,61 +130,61 @@ def has_file_limitation(rules, capabilities, is_standalone=True):
file_limitations = {
# capa will likely detect installer specific functionality.
# this is probably not what the user wants.
'executable/installer': [
' This sample appears to be an installer.',
' ',
' capa cannot handle installers well. This means the results may be misleading or incomplete.'
' You should try to understand the install mechanism and analyze created files with capa.'
"executable/installer": [
" This sample appears to be an installer.",
" ",
" capa cannot handle installers well. This means the results may be misleading or incomplete."
" You should try to understand the install mechanism and analyze created files with capa.",
],
# capa won't detect much in .NET samples.
# it might match some file-level things.
# for consistency, bail on things that we don't support.
'runtime/dotnet': [
' This sample appears to be a .NET module.',
' ',
' .NET is a cross-platform framework for running managed applications.',
' capa cannot handle non-native files. This means that the results may be misleading or incomplete.',
' You may have to analyze the file manually, using a tool like the .NET decompiler dnSpy.'
"runtime/dotnet": [
" This sample appears to be a .NET module.",
" ",
" .NET is a cross-platform framework for running managed applications.",
" capa cannot handle non-native files. This means that the results may be misleading or incomplete.",
" You may have to analyze the file manually, using a tool like the .NET decompiler dnSpy.",
],
# capa will detect dozens of capabilities for AutoIt samples,
# but these are due to the AutoIt runtime, not the payload script.
# so, don't confuse the user with FP matches - bail instead
'compiler/autoit': [
' This sample appears to be compiled with AutoIt.',
' ',
' AutoIt is a freeware BASIC-like scripting language designed for automating the Windows GUI.',
' capa cannot handle AutoIt scripts. This means that the results will be misleading or incomplete.',
' You may have to analyze the file manually, using a tool like the AutoIt decompiler MyAut2Exe.'
"compiler/autoit": [
" This sample appears to be compiled with AutoIt.",
" ",
" AutoIt is a freeware BASIC-like scripting language designed for automating the Windows GUI.",
" capa cannot handle AutoIt scripts. This means that the results will be misleading or incomplete.",
" You may have to analyze the file manually, using a tool like the AutoIt decompiler MyAut2Exe.",
],
# capa won't detect much in packed samples
'anti-analysis/packer/': [
' This sample appears to be packed.',
' ',
' Packed samples have often been obfuscated to hide their logic.',
' capa cannot handle obfuscation well. This means the results may be misleading or incomplete.',
' If possible, you should try to unpack this input file before analyzing it with capa.'
]
"anti-analysis/packer/": [
" This sample appears to be packed.",
" ",
" Packed samples have often been obfuscated to hide their logic.",
" capa cannot handle obfuscation well. This means the results may be misleading or incomplete.",
" If possible, you should try to unpack this input file before analyzing it with capa.",
],
}
for category, dialogue in file_limitations.items():
if not has_rule_with_namespace(rules, capabilities, category):
continue
logger.warning('-' * 80)
logger.warning("-" * 80)
for line in dialogue:
logger.warning(line)
if is_standalone:
logger.warning(' ')
logger.warning(' Use -v or -vv if you really want to see the capabilities identified by capa.')
logger.warning('-' * 80)
logger.warning(" ")
logger.warning(" Use -v or -vv if you really want to see the capabilities identified by capa.")
logger.warning("-" * 80)
return True
return False
def is_supported_file_type(sample):
'''
"""
Return if this is a supported file based on magic header values
'''
with open(sample, 'rb') as f:
"""
with open(sample, "rb") as f:
magic = f.read(2)
if magic in SUPPORTED_FILE_MAGIC:
return True
@@ -190,36 +192,37 @@ def is_supported_file_type(sample):
return False
def get_shellcode_vw(sample, arch='auto'):
'''
def get_shellcode_vw(sample, arch="auto"):
"""
Return shellcode workspace using explicit arch or via auto detect
'''
"""
import viv_utils
with open(sample, 'rb') as f:
with open(sample, "rb") as f:
sample_bytes = f.read()
if arch == 'auto':
if arch == "auto":
# choose arch with most functions, idea by Jay G.
vw_cands = []
for arch in ['i386', 'amd64']:
for arch in ["i386", "amd64"]:
vw_cands.append(viv_utils.getShellcodeWorkspace(sample_bytes, arch))
if not vw_cands:
raise ValueError('could not generate vivisect workspace')
raise ValueError("could not generate vivisect workspace")
vw = max(vw_cands, key=lambda vw: len(vw.getFunctions()))
else:
vw = viv_utils.getShellcodeWorkspace(sample_bytes, arch)
vw.setMeta('Format', 'blob') # TODO fix in viv_utils
vw.setMeta("Format", "blob") # TODO fix in viv_utils
return vw
def get_meta_str(vw):
'''
"""
Return workspace meta information string
'''
"""
meta = []
for k in ['Format', 'Platform', 'Architecture']:
for k in ["Format", "Platform", "Architecture"]:
if k in vw.metadata:
meta.append('%s: %s' % (k.lower(), vw.metadata[k]))
return '%s, number of functions: %d' % (', '.join(meta), len(vw.getFunctions()))
meta.append("%s: %s" % (k.lower(), vw.metadata[k]))
return "%s, number of functions: %d" % (", ".join(meta), len(vw.getFunctions()))
class UnsupportedFormatError(ValueError):
@@ -228,23 +231,25 @@ class UnsupportedFormatError(ValueError):
def get_workspace(path, format):
import viv_utils
logger.info('generating vivisect workspace for: %s', path)
if format == 'auto':
logger.info("generating vivisect workspace for: %s", path)
if format == "auto":
if not is_supported_file_type(path):
raise UnsupportedFormatError()
vw = viv_utils.getWorkspace(path)
elif format == 'pe':
elif format == "pe":
vw = viv_utils.getWorkspace(path)
elif format == 'sc32':
vw = get_shellcode_vw(path, arch='i386')
elif format == 'sc64':
vw = get_shellcode_vw(path, arch='amd64')
logger.info('%s', get_meta_str(vw))
elif format == "sc32":
vw = get_shellcode_vw(path, arch="i386")
elif format == "sc64":
vw = get_shellcode_vw(path, arch="amd64")
logger.info("%s", get_meta_str(vw))
return vw
def get_extractor_py2(path, format):
import capa.features.extractors.viv
vw = get_workspace(path, format)
return capa.features.extractors.viv.VivisectFeatureExtractor(vw, path)
@@ -258,10 +263,10 @@ def get_extractor_py3(path, format):
def get_extractor(path, format):
'''
"""
raises:
UnsupportedFormatError:
'''
"""
if sys.version_info >= (3, 0):
return get_extractor_py3(path, format)
else:
@@ -269,7 +274,7 @@ def get_extractor(path, format):
def is_nursery_rule_path(path):
'''
"""
The nursery is a spot for rules that have not yet been fully polished.
For example, they may not have references to public example of a technique.
Yet, we still want to capture and report on their matches.
@@ -277,23 +282,23 @@ def is_nursery_rule_path(path):
When nursery rules are loaded, their metadata section should be updated with:
`nursery=True`.
'''
return 'nursery' in path
"""
return "nursery" in path
def get_rules(rule_path):
if not os.path.exists(rule_path):
raise IOError('%s does not exist or cannot be accessed' % rule_path)
raise IOError("%s does not exist or cannot be accessed" % rule_path)
rule_paths = []
if os.path.isfile(rule_path):
rule_paths.append(rule_path)
elif os.path.isdir(rule_path):
logger.debug('reading rules from directory %s', rule_path)
logger.debug("reading rules from directory %s", rule_path)
for root, dirs, files in os.walk(rule_path):
for file in files:
if not file.endswith('.yml'):
logger.warning('skipping non-.yml file: %s', file)
if not file.endswith(".yml"):
logger.warning("skipping non-.yml file: %s", file)
continue
rule_path = os.path.join(root, file)
@@ -301,18 +306,18 @@ def get_rules(rule_path):
rules = []
for rule_path in rule_paths:
logger.debug('reading rule file: %s', rule_path)
logger.debug("reading rule file: %s", rule_path)
try:
rule = capa.rules.Rule.from_yaml_file(rule_path)
except capa.rules.InvalidRule:
raise
else:
rule.meta['capa/path'] = rule_path
rule.meta["capa/path"] = rule_path
if is_nursery_rule_path(rule_path):
rule.meta['capa/nursery'] = True
rule.meta["capa/nursery"] = True
rules.append(rule)
logger.debug('rule: %s scope: %s', rule.name, rule.scope)
logger.debug("rule: %s scope: %s", rule.name, rule.scope)
return rules
@@ -322,35 +327,37 @@ def main(argv=None):
argv = sys.argv[1:]
formats = [
('auto', '(default) detect file type automatically'),
('pe', 'Windows PE file'),
('sc32', '32-bit shellcode'),
('sc64', '64-bit shellcode'),
('freeze', 'features previously frozen by capa'),
("auto", "(default) detect file type automatically"),
("pe", "Windows PE file"),
("sc32", "32-bit shellcode"),
("sc64", "64-bit shellcode"),
("freeze", "features previously frozen by capa"),
]
format_help = ', '.join(['%s: %s' % (f[0], f[1]) for f in formats])
format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats])
parser = argparse.ArgumentParser(description='detect capabilities in programs.')
parser.add_argument('sample', type=str,
help='Path to sample to analyze')
parser.add_argument('-r', '--rules', type=str, default='(embedded rules)',
help='Path to rule file or directory, use embedded rules by default')
parser.add_argument('-t', '--tag', type=str,
help='Filter on rule meta field values')
parser.add_argument('--version', action='store_true',
help='Print the executable version and exit')
parser.add_argument('-j', '--json', action='store_true',
help='Emit JSON instead of text')
parser.add_argument('-v', '--verbose', action='store_true',
help='Enable verbose result document (no effect with --json)')
parser.add_argument('-vv', '--vverbose', action='store_true',
help='Enable very verbose result document (no effect with --json)')
parser.add_argument('-d', '--debug', action='store_true',
help='Enable debugging output on STDERR')
parser.add_argument('-q', '--quiet', action='store_true',
help='Disable all output but errors')
parser.add_argument('-f', '--format', choices=[f[0] for f in formats], default='auto',
help='Select sample format, %s' % format_help)
parser = argparse.ArgumentParser(description="detect capabilities in programs.")
parser.add_argument("sample", type=str, help="Path to sample to analyze")
parser.add_argument(
"-r",
"--rules",
type=str,
default="(embedded rules)",
help="Path to rule file or directory, use embedded rules by default",
)
parser.add_argument("-t", "--tag", type=str, help="Filter on rule meta field values")
parser.add_argument("--version", action="store_true", help="Print the executable version and exit")
parser.add_argument("-j", "--json", action="store_true", help="Emit JSON instead of text")
parser.add_argument(
"-v", "--verbose", action="store_true", help="Enable verbose result document (no effect with --json)"
)
parser.add_argument(
"-vv", "--vverbose", action="store_true", help="Enable very verbose result document (no effect with --json)"
)
parser.add_argument("-d", "--debug", action="store_true", help="Enable debugging output on STDERR")
parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
parser.add_argument(
"-f", "--format", choices=[f[0] for f in formats], default="auto", help="Select sample format, %s" % format_help
)
args = parser.parse_args(args=argv)
if args.version:
@@ -375,68 +382,70 @@ def main(argv=None):
# because cp65001 is utf-8, we just map that codepage to the utf-8 codec.
# see #380 and: https://stackoverflow.com/a/3259271/87207
import codecs
codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
if args.rules == '(embedded rules)':
logger.info('-' * 80)
logger.info(' Using default embedded rules.')
logger.info(' To provide your own rules, use the form `capa.exe ./path/to/rules/ /path/to/mal.exe`.')
logger.info(' You can see the current default rule set here:')
logger.info(' https://github.com/fireeye/capa-rules')
logger.info('-' * 80)
codecs.register(lambda name: codecs.lookup("utf-8") if name == "cp65001" else None)
if hasattr(sys, 'frozen') and hasattr(sys, '_MEIPASS'):
logger.debug('detected running under PyInstaller')
args.rules = os.path.join(sys._MEIPASS, 'rules')
logger.debug('default rule path (PyInstaller method): %s', args.rules)
if args.rules == "(embedded rules)":
logger.info("-" * 80)
logger.info(" Using default embedded rules.")
logger.info(" To provide your own rules, use the form `capa.exe ./path/to/rules/ /path/to/mal.exe`.")
logger.info(" You can see the current default rule set here:")
logger.info(" https://github.com/fireeye/capa-rules")
logger.info("-" * 80)
if hasattr(sys, "frozen") and hasattr(sys, "_MEIPASS"):
logger.debug("detected running under PyInstaller")
args.rules = os.path.join(sys._MEIPASS, "rules")
logger.debug("default rule path (PyInstaller method): %s", args.rules)
else:
logger.debug('detected running from source')
args.rules = os.path.join(os.path.dirname(__file__), '..', 'rules')
logger.debug('default rule path (source method): %s', args.rules)
logger.debug("detected running from source")
args.rules = os.path.join(os.path.dirname(__file__), "..", "rules")
logger.debug("default rule path (source method): %s", args.rules)
else:
logger.info('using rules path: %s', args.rules)
logger.info("using rules path: %s", args.rules)
try:
rules = get_rules(args.rules)
rules = capa.rules.RuleSet(rules)
logger.info('successfully loaded %s rules', len(rules))
logger.info("successfully loaded %s rules", len(rules))
if args.tag:
rules = rules.filter_rules_by_meta(args.tag)
logger.info('selected %s rules', len(rules))
logger.info("selected %s rules", len(rules))
for i, r in enumerate(rules.rules, 1):
# TODO don't display subscope rules?
logger.debug(' %d. %s', i, r)
logger.debug(" %d. %s", i, r)
except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e:
logger.error('%s', str(e))
logger.error("%s", str(e))
return -1
with open(args.sample, 'rb') as f:
with open(args.sample, "rb") as f:
taste = f.read(8)
if ((args.format == 'freeze')
or (args.format == 'auto' and capa.features.freeze.is_freeze(taste))):
with open(args.sample, 'rb') as f:
if (args.format == "freeze") or (args.format == "auto" and capa.features.freeze.is_freeze(taste)):
with open(args.sample, "rb") as f:
extractor = capa.features.freeze.load(f.read())
else:
try:
extractor = get_extractor(args.sample, args.format)
except UnsupportedFormatError:
logger.error('-' * 80)
logger.error(' Input file does not appear to be a PE file.')
logger.error(' ')
logger.error(' capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64).')
logger.error(' If you don\'t know the input file type, you can try using the `file` utility to guess it.')
logger.error('-' * 80)
logger.error("-" * 80)
logger.error(" Input file does not appear to be a PE file.")
logger.error(" ")
logger.error(
" capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64)."
)
logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.")
logger.error("-" * 80)
return -1
except UnsupportedRuntimeError:
logger.error('-' * 80)
logger.error(' Unsupported runtime or Python interpreter.')
logger.error(' ')
logger.error(' capa supports running under Python 2.7 using Vivisect for binary analysis.')
logger.error(' It can also run within IDA Pro, using either Python 2.7 or 3.5+.')
logger.error(' ')
logger.error(' If you\'re seeing this message on the command line, please ensure you\'re running Python 2.7.')
logger.error('-' * 80)
logger.error("-" * 80)
logger.error(" Unsupported runtime or Python interpreter.")
logger.error(" ")
logger.error(" capa supports running under Python 2.7 using Vivisect for binary analysis.")
logger.error(" It can also run within IDA Pro, using either Python 2.7 or 3.5+.")
logger.error(" ")
logger.error(" If you're seeing this message on the command line, please ensure you're running Python 2.7.")
logger.error("-" * 80)
return -1
capabilities = find_capabilities(rules, extractor)
@@ -462,7 +471,7 @@ def main(argv=None):
print(capa.render.render_default(rules, capabilities))
colorama.deinit()
logger.info('done.')
logger.info("done.")
return 0
@@ -472,34 +481,37 @@ def ida_main():
logging.getLogger().setLevel(logging.INFO)
import capa.ida.helpers
if not capa.ida.helpers.is_supported_file_type():
return -1
logger.info('-' * 80)
logger.info(' Using default embedded rules.')
logger.info(' ')
logger.info(' You can see the current default rule set here:')
logger.info(' https://github.com/fireeye/capa-rules')
logger.info('-' * 80)
logger.info("-" * 80)
logger.info(" Using default embedded rules.")
logger.info(" ")
logger.info(" You can see the current default rule set here:")
logger.info(" https://github.com/fireeye/capa-rules")
logger.info("-" * 80)
if hasattr(sys, 'frozen') and hasattr(sys, '_MEIPASS'):
logger.debug('detected running under PyInstaller')
rules_path = os.path.join(sys._MEIPASS, 'rules')
logger.debug('default rule path (PyInstaller method): %s', rules_path)
if hasattr(sys, "frozen") and hasattr(sys, "_MEIPASS"):
logger.debug("detected running under PyInstaller")
rules_path = os.path.join(sys._MEIPASS, "rules")
logger.debug("default rule path (PyInstaller method): %s", rules_path)
else:
logger.debug('detected running from source')
rules_path = os.path.join(os.path.dirname(__file__), '..', 'rules')
logger.debug('default rule path (source method): %s', rules_path)
logger.debug("detected running from source")
rules_path = os.path.join(os.path.dirname(__file__), "..", "rules")
logger.debug("default rule path (source method): %s", rules_path)
rules = get_rules(rules_path)
import capa.rules
rules = capa.rules.RuleSet(rules)
import capa.features.extractors.ida
capabilities = find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor())
if has_file_limitation(rules, capabilities, is_standalone=False):
capa.ida.helpers.inform_user_ida_ui('capa encountered warnings during analysis')
capa.ida.helpers.inform_user_ida_ui("capa encountered warnings during analysis")
render_capabilities_default(rules, capabilities)
@@ -513,7 +525,7 @@ def is_runtime_ida():
return True
if __name__ == '__main__':
if __name__ == "__main__":
if is_runtime_ida():
ida_main()
else:

View File

@@ -18,43 +18,41 @@ def convert_statement_to_result_document(statement):
"""
if isinstance(statement, capa.engine.And):
return {
'type': 'and',
"type": "and",
}
elif isinstance(statement, capa.engine.Or):
return {
'type': 'or',
"type": "or",
}
elif isinstance(statement, capa.engine.Not):
return {
'type': 'not',
"type": "not",
}
elif isinstance(statement, capa.engine.Some) and statement.count == 0:
return {
'type': 'optional'
}
return {"type": "optional"}
elif isinstance(statement, capa.engine.Some) and statement.count > 0:
return {
'type': 'some',
'count': statement.count,
"type": "some",
"count": statement.count,
}
elif isinstance(statement, capa.engine.Range):
return {
'type': 'range',
'min': statement.min,
'max': statement.max,
'child': convert_feature_to_result_document(statement.child),
"type": "range",
"min": statement.min,
"max": statement.max,
"child": convert_feature_to_result_document(statement.child),
}
elif isinstance(statement, capa.engine.Regex):
return {
'type': 'regex',
'pattern': statement.pattern,
"type": "regex",
"pattern": statement.pattern,
# the string that was matched
'match': statement.match,
"match": statement.match,
}
elif isinstance(statement, capa.engine.Subscope):
return {
'type': 'subscope',
'subscope': statement.scope,
"type": "subscope",
"subscope": statement.scope,
}
else:
raise RuntimeError("unexpected match statement type: " + str(statement))
@@ -89,8 +87,8 @@ def convert_feature_to_result_document(feature):
# make the terms pretty
name = name.lower()
if name == 'matchedrule':
name = 'match'
if name == "matchedrule":
name = "match"
# in the common case, there's a single argument
# so use it directly.
@@ -99,7 +97,7 @@ def convert_feature_to_result_document(feature):
value = value[0]
return {
'type': name,
"type": name,
name: value,
}
@@ -119,13 +117,13 @@ def convert_node_to_result_document(node):
if isinstance(node, capa.engine.Statement):
return {
'type': 'statement',
'statement': convert_statement_to_result_document(node),
"type": "statement",
"statement": convert_statement_to_result_document(node),
}
elif isinstance(node, capa.features.Feature):
return {
'type': 'feature',
'feature': convert_feature_to_result_document(node),
"type": "feature",
"feature": convert_feature_to_result_document(node),
}
else:
raise RuntimeError("unexpected match node type")
@@ -137,19 +135,16 @@ def convert_match_to_result_document(rules, capabilities, result):
this will become part of the "result document" format that can be emitted to JSON.
"""
doc = {
'success': bool(result.success),
'node': convert_node_to_result_document(result.statement),
'children': [
convert_match_to_result_document(rules, capabilities, child)
for child in result.children
],
"success": bool(result.success),
"node": convert_node_to_result_document(result.statement),
"children": [convert_match_to_result_document(rules, capabilities, child) for child in result.children],
}
# logic expression, like `and`, don't have locations - their children do.
# so only add `locations` to feature nodes.
if isinstance(result.statement, capa.features.Feature):
if bool(result.success):
doc['locations'] = result.locations
doc["locations"] = result.locations
# if we have a `match` statement, then we're referencing another rule.
# this could an external rule (written by a human), or
@@ -159,31 +154,30 @@ def convert_match_to_result_document(rules, capabilities, result):
# so, we need to lookup the other rule results
# and then filter those down to the address used here.
# finally, splice that logic into this tree.
if (doc['node']['type'] == 'feature'
and doc['node']['feature']['type'] == 'match'
# only add subtree on success,
# because there won't be results for the other rule on failure.
and doc['success']):
if (
doc["node"]["type"] == "feature"
and doc["node"]["feature"]["type"] == "match"
# only add subtree on success,
# because there won't be results for the other rule on failure.
and doc["success"]
):
rule_name = doc['node']['feature']['match']
rule_name = doc["node"]["feature"]["match"]
rule = rules[rule_name]
rule_matches = {address: result for (address, result) in capabilities[rule_name]}
if rule.meta.get('capa/subscope-rule'):
if rule.meta.get("capa/subscope-rule"):
# for a subscope rule, fixup the node to be a scope node, rather than a match feature node.
#
# e.g. `contain loop/30c4c78e29bf4d54894fc74f664c62e8` -> `basic block`
scope = rule.meta['scope']
doc['node'] = {
'type': 'statement',
'statement': {
'type': 'subscope',
'subscope': scope,
},
scope = rule.meta["scope"]
doc["node"] = {
"type": "statement",
"statement": {"type": "subscope", "subscope": scope,},
}
for location in doc['locations']:
doc['children'].append(convert_match_to_result_document(rules, capabilities, rule_matches[location]))
for location in doc["locations"]:
doc["children"].append(convert_match_to_result_document(rules, capabilities, rule_matches[location]))
return doc
@@ -220,15 +214,14 @@ def convert_capabilities_to_result_document(rules, capabilities):
for rule_name, matches in capabilities.items():
rule = rules[rule_name]
if rule.meta.get('capa/subscope-rule'):
if rule.meta.get("capa/subscope-rule"):
continue
doc[rule_name] = {
'meta': dict(rule.meta),
'source': rule.definition,
'matches': {
addr: convert_match_to_result_document(rules, capabilities, match)
for (addr, match) in matches
"meta": dict(rule.meta),
"source": rule.definition,
"matches": {
addr: convert_match_to_result_document(rules, capabilities, match) for (addr, match) in matches
},
}
@@ -241,6 +234,7 @@ def render_vverbose(rules, capabilities):
# and capa.render.vverbose import capa.render (implicitly, as a submodule)
# so, defer the import until routine is called, breaking the import loop.
import capa.render.vverbose
doc = convert_capabilities_to_result_document(rules, capabilities)
return capa.render.vverbose.render_vverbose(doc)
@@ -248,6 +242,7 @@ def render_vverbose(rules, capabilities):
def render_verbose(rules, capabilities):
# break import loop
import capa.render.verbose
doc = convert_capabilities_to_result_document(rules, capabilities)
return capa.render.verbose.render_verbose(doc)
@@ -256,6 +251,7 @@ def render_default(rules, capabilities):
# break import loop
import capa.render.verbose
import capa.render.default
doc = convert_capabilities_to_result_document(rules, capabilities)
return capa.render.default.render_default(doc)
@@ -273,7 +269,5 @@ class CapaJsonObjectEncoder(json.JSONEncoder):
def render_json(rules, capabilities):
return json.dumps(
convert_capabilities_to_result_document(rules, capabilities),
cls=CapaJsonObjectEncoder,
sort_keys=True,
convert_capabilities_to_result_document(rules, capabilities), cls=CapaJsonObjectEncoder, sort_keys=True,
)

View File

@@ -9,7 +9,7 @@ import capa.render.utils as rutils
def width(s, character_count):
"""pad the given string to at least `character_count`"""
if len(s) < character_count:
return s + ' ' * (character_count - len(s))
return s + " " * (character_count - len(s))
else:
return s
@@ -28,15 +28,15 @@ def render_capabilities(doc, ostream):
"""
rows = []
for rule in rutils.capability_rules(doc):
count = len(rule['matches'])
count = len(rule["matches"])
if count == 1:
capability = rutils.bold(rule['meta']['name'])
capability = rutils.bold(rule["meta"]["name"])
else:
capability = '%s (%d matches)' % (rutils.bold(rule['meta']['name']), count)
rows.append((capability, rule['meta']['namespace']))
capability = "%s (%d matches)" % (rutils.bold(rule["meta"]["name"]), count)
rows.append((capability, rule["meta"]["namespace"]))
ostream.write(tabulate.tabulate(rows, headers=[width('CAPABILITY', 40), width('NAMESPACE', 40)], tablefmt='psql'))
ostream.write('\n')
ostream.write(tabulate.tabulate(rows, headers=[width("CAPABILITY", 40), width("NAMESPACE", 40)], tablefmt="psql"))
ostream.write("\n")
def render_attack(doc, ostream):
@@ -57,17 +57,17 @@ def render_attack(doc, ostream):
"""
tactics = collections.defaultdict(set)
for rule in rutils.capability_rules(doc):
if not rule['meta'].get('att&ck'):
if not rule["meta"].get("att&ck"):
continue
for attack in rule['meta']['att&ck']:
tactic, _, rest = attack.partition('::')
if '::' in rest:
technique, _, rest = rest.partition('::')
subtechnique, _, id = rest.rpartition(' ')
for attack in rule["meta"]["att&ck"]:
tactic, _, rest = attack.partition("::")
if "::" in rest:
technique, _, rest = rest.partition("::")
subtechnique, _, id = rest.rpartition(" ")
tactics[tactic].add((technique, subtechnique, id))
else:
technique, _, id = rest.rpartition(' ')
technique, _, id = rest.rpartition(" ")
tactics[tactic].add((technique, id))
rows = []
@@ -76,15 +76,17 @@ def render_attack(doc, ostream):
for spec in sorted(techniques):
if len(spec) == 2:
technique, id = spec
inner_rows.append('%s %s' % (rutils.bold(technique), id))
inner_rows.append("%s %s" % (rutils.bold(technique), id))
elif len(spec) == 3:
technique, subtechnique, id = spec
inner_rows.append('%s::%s %s' % (rutils.bold(technique), subtechnique, id))
inner_rows.append("%s::%s %s" % (rutils.bold(technique), subtechnique, id))
else:
raise RuntimeError('unexpected ATT&CK spec format')
rows.append((rutils.bold(tactic.upper()), '\n'.join(inner_rows), ))
ostream.write(tabulate.tabulate(rows, headers=[width('ATT&CK Tactic', 20), width('ATT&CK Technique', 60)], tablefmt='psql'))
ostream.write('\n')
raise RuntimeError("unexpected ATT&CK spec format")
rows.append((rutils.bold(tactic.upper()), "\n".join(inner_rows),))
ostream.write(
tabulate.tabulate(rows, headers=[width("ATT&CK Tactic", 20), width("ATT&CK Technique", 60)], tablefmt="psql")
)
ostream.write("\n")
def render_default(doc):

View File

@@ -4,38 +4,40 @@ import termcolor
def bold(s):
"""draw attention to the given string"""
return termcolor.colored(s, 'blue')
return termcolor.colored(s, "blue")
def bold2(s):
"""draw attention to the given string, within a `bold` section"""
return termcolor.colored(s, 'green')
return termcolor.colored(s, "green")
def hex(n):
"""render the given number using upper case hex, like: 0x123ABC"""
return '0x%X' % n
return "0x%X" % n
def hex_string(h):
""" render hex string e.g. "0a40b1" as "0A 40 B1" """
return ' '.join(h[i:i + 2] for i in range(0, len(h), 2)).upper()
return " ".join(h[i : i + 2] for i in range(0, len(h), 2)).upper()
def capability_rules(doc):
"""enumerate the rules in (namespace, name) order that are 'capability' rules (not lib/subscope/disposition/etc)."""
for (_, _, rule) in sorted(map(lambda rule: (rule['meta'].get('namespace', ''), rule['meta']['name'], rule), doc.values())):
if rule['meta'].get('lib'):
for (_, _, rule) in sorted(
map(lambda rule: (rule["meta"].get("namespace", ""), rule["meta"]["name"], rule), doc.values())
):
if rule["meta"].get("lib"):
continue
if rule['meta'].get('capa/subscope'):
if rule["meta"].get("capa/subscope"):
continue
if rule['meta'].get('maec/analysis-conclusion'):
if rule["meta"].get("maec/analysis-conclusion"):
continue
if rule['meta'].get('maec/analysis-conclusion-ov'):
if rule["meta"].get("maec/analysis-conclusion-ov"):
continue
if rule['meta'].get('maec/malware-category'):
if rule["meta"].get("maec/malware-category"):
continue
if rule['meta'].get('maec/malware-category-ov'):
if rule["meta"].get("maec/malware-category-ov"):
continue
yield rule
@@ -44,4 +46,4 @@ def capability_rules(doc):
class StringIO(six.StringIO):
def writeln(self, s):
self.write(s)
self.write('\n')
self.write("\n")

View File

@@ -24,29 +24,29 @@ def render_verbose(doc):
ostream = rutils.StringIO()
for rule in rutils.capability_rules(doc):
count = len(rule['matches'])
count = len(rule["matches"])
if count == 1:
capability = rutils.bold(rule['meta']['name'])
capability = rutils.bold(rule["meta"]["name"])
else:
capability = '%s (%d matches)' % (rutils.bold(rule['meta']['name']), count)
capability = "%s (%d matches)" % (rutils.bold(rule["meta"]["name"]), count)
ostream.writeln(capability)
rows = []
for key in ('namespace', 'description', 'scope'):
if key == 'name' or key not in rule['meta']:
for key in ("namespace", "description", "scope"):
if key == "name" or key not in rule["meta"]:
continue
v = rule['meta'][key]
v = rule["meta"][key]
if isinstance(v, list) and len(v) == 1:
v = v[0]
rows.append((key, v))
if rule['meta']['scope'] != capa.rules.FILE_SCOPE:
locations = doc[rule['meta']['name']]['matches'].keys()
rows.append(('matches', '\n'.join(map(rutils.hex, locations))))
if rule["meta"]["scope"] != capa.rules.FILE_SCOPE:
locations = doc[rule["meta"]["name"]]["matches"].keys()
rows.append(("matches", "\n".join(map(rutils.hex, locations))))
ostream.writeln(tabulate.tabulate(rows, tablefmt='plain'))
ostream.write('\n')
ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
ostream.write("\n")
return ostream.getvalue()

View File

@@ -5,145 +5,147 @@ import capa.render.utils as rutils
def render_statement(ostream, statement, indent=0):
ostream.write(' ' * indent)
if statement['type'] in ('and', 'or', 'optional'):
ostream.write(statement['type'])
ostream.writeln(':')
elif statement['type'] == 'not':
ostream.write(" " * indent)
if statement["type"] in ("and", "or", "optional"):
ostream.write(statement["type"])
ostream.writeln(":")
elif statement["type"] == "not":
# this statement is handled specially in `render_match` using the MODE_SUCCESS/MODE_FAILURE flags.
ostream.writeln('not:')
elif statement['type'] == 'some':
ostream.write(statement['count'] + ' or more')
ostream.writeln(':')
elif statement['type'] == 'range':
ostream.writeln("not:")
elif statement["type"] == "some":
ostream.write(statement["count"] + " or more")
ostream.writeln(":")
elif statement["type"] == "range":
# `range` is a weird node, its almost a hybrid of statement+feature.
# it is a specific feature repeated multiple times.
# there's no additional logic in the feature part, just the existence of a feature.
# so, we have to inline some of the feature rendering here.
child = statement['child']
if child['type'] in ('string', 'api', 'mnemonic', 'basic block', 'export', 'import', 'section', 'match'):
feature = '%s(%s)' % (child['type'], rutils.bold2(child[child['type']]))
elif child['type'] in ('number', 'offset'):
feature = '%s(%s)' % (child['type'], rutils.bold2(rutils.hex(child[child['type']])))
elif child['type'] == 'bytes':
feature = '%s(%s)' % (child['type'], rutils.bold2(rutils.hex_string(child[child['type']])))
elif child['type'] == 'characteristic':
feature = 'characteristic(%s)' % (rutils.bold2(child['characteristic'][0]))
child = statement["child"]
if child["type"] in ("string", "api", "mnemonic", "basic block", "export", "import", "section", "match"):
feature = "%s(%s)" % (child["type"], rutils.bold2(child[child["type"]]))
elif child["type"] in ("number", "offset"):
feature = "%s(%s)" % (child["type"], rutils.bold2(rutils.hex(child[child["type"]])))
elif child["type"] == "bytes":
feature = "%s(%s)" % (child["type"], rutils.bold2(rutils.hex_string(child[child["type"]])))
elif child["type"] == "characteristic":
feature = "characteristic(%s)" % (rutils.bold2(child["characteristic"][0]))
else:
raise RuntimeError('unexpected feature type: ' + str(child))
raise RuntimeError("unexpected feature type: " + str(child))
ostream.write('count(%s): ' % feature)
ostream.write("count(%s): " % feature)
if statement['max'] == statement['min']:
ostream.writeln('%d' % (statement['min']))
elif statement['min'] == 0:
ostream.writeln('%d or fewer' % (statement['max']))
elif statement['max'] == (1 << 64 - 1):
ostream.writeln('%d or more' % (statement['min']))
if statement["max"] == statement["min"]:
ostream.writeln("%d" % (statement["min"]))
elif statement["min"] == 0:
ostream.writeln("%d or fewer" % (statement["max"]))
elif statement["max"] == (1 << 64 - 1):
ostream.writeln("%d or more" % (statement["min"]))
else:
ostream.writeln('between %d and %d' % (statement['min'], statement['max']))
elif statement['type'] == 'subscope':
ostream.write(statement['subscope'])
ostream.writeln(':')
elif statement['type'] == 'regex':
ostream.writeln("between %d and %d" % (statement["min"], statement["max"]))
elif statement["type"] == "subscope":
ostream.write(statement["subscope"])
ostream.writeln(":")
elif statement["type"] == "regex":
# regex is a `Statement` not a `Feature`
# this is because it doesn't get extracted, but applies to all strings in scope.
# so we have to handle it here
ostream.writeln('string: %s' % (statement['match']))
ostream.writeln("string: %s" % (statement["match"]))
else:
raise RuntimeError("unexpected match statement type: " + str(statement))
def render_feature(ostream, match, feature, indent=0):
ostream.write(' ' * indent)
ostream.write(" " * indent)
if feature['type'] in ('string', 'api', 'mnemonic', 'basic block', 'export', 'import', 'section', 'match'):
ostream.write(feature['type'])
ostream.write(': ')
ostream.write(rutils.bold2(feature[feature['type']]))
elif feature['type'] in ('number', 'offset'):
ostream.write(feature['type'])
ostream.write(': ')
ostream.write(rutils.bold2(rutils.hex(feature[feature['type']])))
elif feature['type'] == 'bytes':
ostream.write('bytes: ')
if feature["type"] in ("string", "api", "mnemonic", "basic block", "export", "import", "section", "match"):
ostream.write(feature["type"])
ostream.write(": ")
ostream.write(rutils.bold2(feature[feature["type"]]))
elif feature["type"] in ("number", "offset"):
ostream.write(feature["type"])
ostream.write(": ")
ostream.write(rutils.bold2(rutils.hex(feature[feature["type"]])))
elif feature["type"] == "bytes":
ostream.write("bytes: ")
# bytes is the uppercase, hex-encoded string.
# it should always be an even number of characters (its hex).
ostream.write(rutils.bold2(rutils.hex_string(feature[feature['type']])))
elif feature['type'] == 'characteristic':
ostream.write('characteristic(%s)' % (rutils.bold2(feature['characteristic'][0])))
ostream.write(rutils.bold2(rutils.hex_string(feature[feature["type"]])))
elif feature["type"] == "characteristic":
ostream.write("characteristic(%s)" % (rutils.bold2(feature["characteristic"][0])))
# note that regex is found in `render_statement`
else:
raise RuntimeError('unexpected feature type: ' + str(feature))
raise RuntimeError("unexpected feature type: " + str(feature))
# its possible to have an empty locations array here,
# such as when we're in MODE_FAILURE and showing the logic
# under a `not` statement (which will have no matched locations).
locations = list(sorted(match.get('locations', [])))
locations = list(sorted(match.get("locations", [])))
if len(locations) == 1:
ostream.write(' @ ')
ostream.write(" @ ")
ostream.write(rutils.hex(locations[0]))
elif len(locations) > 1:
ostream.write(' @ ')
ostream.write(" @ ")
if len(locations) > 4:
# don't display too many locations, because it becomes very noisy.
# probably only the first handful of locations will be useful for inspection.
ostream.write(', '.join(map(rutils.hex, locations[0:4])))
ostream.write(', and %d more...' % (len(locations) - 4))
ostream.write(", ".join(map(rutils.hex, locations[0:4])))
ostream.write(", and %d more..." % (len(locations) - 4))
else:
ostream.write(', '.join(map(rutils.hex, locations)))
ostream.write(", ".join(map(rutils.hex, locations)))
ostream.write('\n')
ostream.write("\n")
def render_node(ostream, match, node, indent=0):
if node['type'] == 'statement':
render_statement(ostream, node['statement'], indent=indent)
elif node['type'] == 'feature':
render_feature(ostream, match, node['feature'], indent=indent)
if node["type"] == "statement":
render_statement(ostream, node["statement"], indent=indent)
elif node["type"] == "feature":
render_feature(ostream, match, node["feature"], indent=indent)
else:
raise RuntimeError('unexpected node type: ' + str(node))
raise RuntimeError("unexpected node type: " + str(node))
# display nodes that successfully evaluated against the sample.
MODE_SUCCESS = 'success'
MODE_SUCCESS = "success"
# display nodes that did not evaluate to True against the sample.
# this is useful when rendering the logic tree under a `not` node.
MODE_FAILURE = 'failure'
MODE_FAILURE = "failure"
def render_match(ostream, match, indent=0, mode=MODE_SUCCESS):
child_mode = mode
if mode == MODE_SUCCESS:
# display only nodes that evaluated successfully.
if not match['success']:
if not match["success"]:
return
# optional statement with no successful children is empty
if (match['node'].get('statement', {}).get('type') == 'optional'
and not any(map(lambda m: m['success'], match['children']))):
if match["node"].get("statement", {}).get("type") == "optional" and not any(
map(lambda m: m["success"], match["children"])
):
return
# not statement, so invert the child mode to show failed evaluations
if match['node'].get('statement', {}).get('type') == 'not':
if match["node"].get("statement", {}).get("type") == "not":
child_mode = MODE_FAILURE
elif mode == MODE_FAILURE:
# display only nodes that did not evaluate to True
if match['success']:
if match["success"]:
return
# optional statement with successful children is not relevant
if (match['node'].get('statement', {}).get('type') == 'optional'
and any(map(lambda m: m['success'], match['children']))):
if match["node"].get("statement", {}).get("type") == "optional" and any(
map(lambda m: m["success"], match["children"])
):
return
# not statement, so invert the child mode to show successful evaluations
if match['node'].get('statement', {}).get('type') == 'not':
if match["node"].get("statement", {}).get("type") == "not":
child_mode = MODE_SUCCESS
else:
raise RuntimeError('unexpected mode: ' + mode)
raise RuntimeError("unexpected mode: " + mode)
render_node(ostream, match, match['node'], indent=indent)
render_node(ostream, match, match["node"], indent=indent)
for child in match['children']:
for child in match["children"]:
render_match(ostream, child, indent=indent + 1, mode=child_mode)
@@ -151,44 +153,44 @@ def render_vverbose(doc):
ostream = rutils.StringIO()
for rule in rutils.capability_rules(doc):
count = len(rule['matches'])
count = len(rule["matches"])
if count == 1:
capability = rutils.bold(rule['meta']['name'])
capability = rutils.bold(rule["meta"]["name"])
else:
capability = '%s (%d matches)' % (rutils.bold(rule['meta']['name']), count)
capability = "%s (%d matches)" % (rutils.bold(rule["meta"]["name"]), count)
ostream.writeln(capability)
rows = []
for key in capa.rules.META_KEYS:
if key == 'name' or key not in rule['meta']:
if key == "name" or key not in rule["meta"]:
continue
v = rule['meta'][key]
v = rule["meta"][key]
if isinstance(v, list) and len(v) == 1:
v = v[0]
elif isinstance(v, list) and len(v) > 1:
v = ', '.join(v)
v = ", ".join(v)
rows.append((key, v))
ostream.writeln(tabulate.tabulate(rows, tablefmt='plain'))
ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
if rule['meta']['scope'] == capa.rules.FILE_SCOPE:
matches = list(doc[rule['meta']['name']]['matches'].values())
if rule["meta"]["scope"] == capa.rules.FILE_SCOPE:
matches = list(doc[rule["meta"]["name"]]["matches"].values())
if len(matches) != 1:
# i think there should only ever be one match per file-scope rule,
# because we do the file-scope evaluation a single time.
# but i'm not 100% sure if this is/will always be true.
# so, lets be explicit about our assumptions and raise an exception if they fail.
raise RuntimeError('unexpected file scope match count: ' + len(matches))
raise RuntimeError("unexpected file scope match count: " + len(matches))
render_match(ostream, matches[0], indent=0)
else:
for location, match in sorted(doc[rule['meta']['name']]['matches'].items()):
ostream.write(rule['meta']['scope'])
ostream.write(' @ ')
for location, match in sorted(doc[rule["meta"]["name"]]["matches"].items()):
ostream.write(rule["meta"]["scope"])
ostream.write(" @ ")
ostream.writeln(rutils.hex(location))
render_match(ostream, match, indent=1)
ostream.write('\n')
ostream.write("\n")
return ostream.getvalue()

View File

@@ -22,32 +22,32 @@ logger = logging.getLogger(__name__)
# these are the standard metadata fields, in the preferred order.
# when reformatted, any custom keys will come after these.
META_KEYS = (
'name',
'namespace',
'rule-category',
'maec/analysis-conclusion',
'maec/analysis-conclusion-ov',
'maec/malware-category',
'maec/malware-category-ov',
'author',
'description',
'lib',
'scope',
'att&ck',
'mbc',
'references',
'examples'
"name",
"namespace",
"rule-category",
"maec/analysis-conclusion",
"maec/analysis-conclusion-ov",
"maec/malware-category",
"maec/malware-category-ov",
"author",
"description",
"lib",
"scope",
"att&ck",
"mbc",
"references",
"examples",
)
# these are meta fields that are internal to capa,
# and added during rule reading/construction.
# they may help use manipulate or index rules,
# but should not be exposed to clients.
HIDDEN_META_KEYS = ('capa/nursery', 'capa/path')
HIDDEN_META_KEYS = ("capa/nursery", "capa/path")
FILE_SCOPE = 'file'
FUNCTION_SCOPE = 'function'
BASIC_BLOCK_SCOPE = 'basic block'
FILE_SCOPE = "file"
FUNCTION_SCOPE = "function"
BASIC_BLOCK_SCOPE = "basic block"
SUPPORTED_FEATURES = {
@@ -56,7 +56,7 @@ SUPPORTED_FEATURES = {
capa.features.file.Export,
capa.features.file.Import,
capa.features.file.Section,
capa.features.Characteristic('embedded pe'),
capa.features.Characteristic("embedded pe"),
capa.features.String,
},
FUNCTION_SCOPE: {
@@ -68,18 +68,18 @@ SUPPORTED_FEATURES = {
capa.features.insn.Offset,
capa.features.insn.Mnemonic,
capa.features.basicblock.BasicBlock,
capa.features.Characteristic('switch'),
capa.features.Characteristic('nzxor'),
capa.features.Characteristic('peb access'),
capa.features.Characteristic('fs access'),
capa.features.Characteristic('gs access'),
capa.features.Characteristic('cross section flow'),
capa.features.Characteristic('stack string'),
capa.features.Characteristic('calls from'),
capa.features.Characteristic('calls to'),
capa.features.Characteristic('indirect call'),
capa.features.Characteristic('loop'),
capa.features.Characteristic('recursive call')
capa.features.Characteristic("switch"),
capa.features.Characteristic("nzxor"),
capa.features.Characteristic("peb access"),
capa.features.Characteristic("fs access"),
capa.features.Characteristic("gs access"),
capa.features.Characteristic("cross section flow"),
capa.features.Characteristic("stack string"),
capa.features.Characteristic("calls from"),
capa.features.Characteristic("calls to"),
capa.features.Characteristic("indirect call"),
capa.features.Characteristic("loop"),
capa.features.Characteristic("recursive call"),
},
BASIC_BLOCK_SCOPE: {
capa.features.MatchedRule,
@@ -89,14 +89,14 @@ SUPPORTED_FEATURES = {
capa.features.Bytes,
capa.features.insn.Offset,
capa.features.insn.Mnemonic,
capa.features.Characteristic('nzxor'),
capa.features.Characteristic('peb access'),
capa.features.Characteristic('fs access'),
capa.features.Characteristic('gs access'),
capa.features.Characteristic('cross section flow'),
capa.features.Characteristic('tight loop'),
capa.features.Characteristic('stack string'),
capa.features.Characteristic('indirect call')
capa.features.Characteristic("nzxor"),
capa.features.Characteristic("peb access"),
capa.features.Characteristic("fs access"),
capa.features.Characteristic("gs access"),
capa.features.Characteristic("cross section flow"),
capa.features.Characteristic("tight loop"),
capa.features.Characteristic("stack string"),
capa.features.Characteristic("indirect call"),
},
}
@@ -107,7 +107,7 @@ class InvalidRule(ValueError):
self.msg = msg
def __str__(self):
return 'invalid rule: %s' % (self.msg)
return "invalid rule: %s" % (self.msg)
def __repr__(self):
return str(self)
@@ -121,7 +121,7 @@ class InvalidRuleWithPath(InvalidRule):
self.__cause__ = None
def __str__(self):
return 'invalid rule: %s: %s' % (self.path, self.msg)
return "invalid rule: %s: %s" % (self.path, self.msg)
class InvalidRuleSet(ValueError):
@@ -130,7 +130,7 @@ class InvalidRuleSet(ValueError):
self.msg = msg
def __str__(self):
return 'invalid rule set: %s' % (self.msg)
return "invalid rule set: %s" % (self.msg)
def __repr__(self):
return str(self)
@@ -139,111 +139,112 @@ class InvalidRuleSet(ValueError):
def ensure_feature_valid_for_scope(scope, feature):
if isinstance(feature, capa.features.Characteristic):
if capa.features.Characteristic(feature.name) not in SUPPORTED_FEATURES[scope]:
raise InvalidRule('feature %s not support for scope %s' % (feature, scope))
raise InvalidRule("feature %s not support for scope %s" % (feature, scope))
elif not isinstance(feature, tuple(filter(lambda t: isinstance(t, type), SUPPORTED_FEATURES[scope]))):
raise InvalidRule('feature %s not support for scope %s' % (feature, scope))
raise InvalidRule("feature %s not support for scope %s" % (feature, scope))
def parse_int(s):
if s.startswith('0x'):
if s.startswith("0x"):
return int(s, 0x10)
else:
return int(s, 10)
def parse_range(s):
'''
"""
parse a string "(0, 1)" into a range (min, max).
min and/or max may by None to indicate an unbound range.
'''
"""
# we want to use `{` characters, but this is a dict in yaml.
if not s.startswith('('):
raise InvalidRule('invalid range: %s' % (s))
if not s.startswith("("):
raise InvalidRule("invalid range: %s" % (s))
if not s.endswith(')'):
raise InvalidRule('invalid range: %s' % (s))
if not s.endswith(")"):
raise InvalidRule("invalid range: %s" % (s))
s = s[len('('):-len(')')]
min, _, max = s.partition(',')
s = s[len("(") : -len(")")]
min, _, max = s.partition(",")
min = min.strip()
max = max.strip()
if min:
min = parse_int(min.strip())
if min < 0:
raise InvalidRule('range min less than zero')
raise InvalidRule("range min less than zero")
else:
min = None
if max:
max = parse_int(max.strip())
if max < 0:
raise InvalidRule('range max less than zero')
raise InvalidRule("range max less than zero")
else:
max = None
if min is not None and max is not None:
if max < min:
raise InvalidRule('range max less than min')
raise InvalidRule("range max less than min")
return min, max
def parse_feature(key):
# keep this in sync with supported features
if key == 'api':
if key == "api":
return capa.features.insn.API
elif key == 'string':
elif key == "string":
return capa.features.String
elif key == 'bytes':
elif key == "bytes":
return capa.features.Bytes
elif key == 'number':
elif key == "number":
return capa.features.insn.Number
elif key == 'offset':
elif key == "offset":
return capa.features.insn.Offset
elif key == 'mnemonic':
elif key == "mnemonic":
return capa.features.insn.Mnemonic
elif key == 'basic blocks':
elif key == "basic blocks":
return capa.features.basicblock.BasicBlock
elif key.startswith('characteristic(') and key.endswith(')'):
characteristic = key[len('characteristic('):-len(')')]
elif key.startswith("characteristic(") and key.endswith(")"):
characteristic = key[len("characteristic(") : -len(")")]
return lambda v: capa.features.Characteristic(characteristic, v)
elif key == 'export':
elif key == "export":
return capa.features.file.Export
elif key == 'import':
elif key == "import":
return capa.features.file.Import
elif key == 'section':
elif key == "section":
return capa.features.file.Section
elif key == 'match':
elif key == "match":
return capa.features.MatchedRule
else:
raise InvalidRule('unexpected statement: %s' % key)
raise InvalidRule("unexpected statement: %s" % key)
def parse_symbol(s, value_type):
'''
"""
s can be an int or a string
'''
if isinstance(s, str) and '=' in s:
value, symbol = s.split('=', 1)
"""
if isinstance(s, str) and "=" in s:
value, symbol = s.split("=", 1)
symbol = symbol.strip()
if symbol == '':
if symbol == "":
raise InvalidRule('unexpected value: "%s", symbol name cannot be empty' % s)
else:
value = s
symbol = None
if isinstance(value, str):
if value_type == 'bytes':
if value_type == "bytes":
try:
value = codecs.decode(value.replace(' ', ''), 'hex')
value = codecs.decode(value.replace(" ", ""), "hex")
# TODO: Remove TypeError when Python2 is not used anymore
except (TypeError, binascii.Error):
raise InvalidRule('unexpected bytes value: "%s", must be a valid hex sequence' % value)
if len(value) > MAX_BYTES_FEATURE_SIZE:
raise InvalidRule('unexpected bytes value: byte sequences must be no larger than %s bytes' %
MAX_BYTES_FEATURE_SIZE)
raise InvalidRule(
"unexpected bytes value: byte sequences must be no larger than %s bytes" % MAX_BYTES_FEATURE_SIZE
)
else:
try:
value = parse_int(value)
@@ -255,54 +256,54 @@ def parse_symbol(s, value_type):
def build_statements(d, scope):
if len(d.keys()) != 1:
raise InvalidRule('too many statements')
raise InvalidRule("too many statements")
key = list(d.keys())[0]
if key == 'and':
if key == "and":
return And(*[build_statements(dd, scope) for dd in d[key]])
elif key == 'or':
elif key == "or":
return Or(*[build_statements(dd, scope) for dd in d[key]])
elif key == 'not':
elif key == "not":
if len(d[key]) != 1:
raise InvalidRule('not statement must have exactly one child statement')
raise InvalidRule("not statement must have exactly one child statement")
return Not(*[build_statements(dd, scope) for dd in d[key]])
elif key.endswith(' or more'):
count = int(key[:-len('or more')])
elif key.endswith(" or more"):
count = int(key[: -len("or more")])
return Some(count, *[build_statements(dd, scope) for dd in d[key]])
elif key == 'optional':
elif key == "optional":
# `optional` is an alias for `0 or more`
# which is useful for documenting behaviors,
# like with `write file`, we might say that `WriteFile` is optionally found alongside `CreateFileA`.
return Some(0, *[build_statements(dd, scope) for dd in d[key]])
elif key == 'function':
elif key == "function":
if scope != FILE_SCOPE:
raise InvalidRule('function subscope supported only for file scope')
raise InvalidRule("function subscope supported only for file scope")
if len(d[key]) != 1:
raise InvalidRule('subscope must have exactly one child statement')
raise InvalidRule("subscope must have exactly one child statement")
return Subscope(FUNCTION_SCOPE, *[build_statements(dd, FUNCTION_SCOPE) for dd in d[key]])
elif key == 'basic block':
elif key == "basic block":
if scope != FUNCTION_SCOPE:
raise InvalidRule('basic block subscope supported only for function scope')
raise InvalidRule("basic block subscope supported only for function scope")
if len(d[key]) != 1:
raise InvalidRule('subscope must have exactly one child statement')
raise InvalidRule("subscope must have exactly one child statement")
return Subscope(BASIC_BLOCK_SCOPE, *[build_statements(dd, BASIC_BLOCK_SCOPE) for dd in d[key]])
elif key.startswith('count(') and key.endswith(')'):
elif key.startswith("count(") and key.endswith(")"):
# e.g.:
#
# count(basic block)
# count(mnemonic(mov))
# count(characteristic(nzxor))
term = key[len('count('):-len(')')]
term = key[len("count(") : -len(")")]
if term.startswith('characteristic('):
if term.startswith("characteristic("):
# characteristic features are specified a bit specially:
# they simply indicate the presence of something unusual/interesting,
# and we embed the name in the feature name, like `characteristic(nzxor)`.
@@ -320,18 +321,18 @@ def build_statements(d, scope):
# - mnemonic: mov
#
# but here we deal with the form: `mnemonic(mov)`.
term, _, arg = term.partition('(')
term, _, arg = term.partition("(")
Feature = parse_feature(term)
if arg:
arg = arg[:-len(')')]
arg = arg[: -len(")")]
# can't rely on yaml parsing ints embedded within strings
# like:
#
# count(offset(0xC))
# count(number(0x11223344))
# count(number(0x100 = symbol name))
if term in ('number', 'offset', 'bytes'):
if term in ("number", "offset", "bytes"):
value, symbol = parse_symbol(arg, term)
feature = Feature(value, symbol)
else:
@@ -348,29 +349,31 @@ def build_statements(d, scope):
count = d[key]
if isinstance(count, int):
return Range(feature, min=count, max=count)
elif count.endswith(' or more'):
min = parse_int(count[:-len(' or more')])
elif count.endswith(" or more"):
min = parse_int(count[: -len(" or more")])
max = None
return Range(feature, min=min, max=max)
elif count.endswith(' or fewer'):
elif count.endswith(" or fewer"):
min = None
max = parse_int(count[:-len(' or fewer')])
max = parse_int(count[: -len(" or fewer")])
return Range(feature, min=min, max=max)
elif count.startswith('('):
elif count.startswith("("):
min, max = parse_range(count)
return Range(feature, min=min, max=max)
else:
raise InvalidRule('unexpected range: %s' % (count))
elif key == 'string' and d[key].startswith('/') and (d[key].endswith('/') or d[key].endswith('/i')):
raise InvalidRule("unexpected range: %s" % (count))
elif key == "string" and d[key].startswith("/") and (d[key].endswith("/") or d[key].endswith("/i")):
try:
return Regex(d[key])
except re.error:
if d[key].endswith('/i'):
d[key] = d[key][:-len('i')]
raise InvalidRule('invalid regular expression: %s it should use Python syntax, try it at https://pythex.org' % d[key])
if d[key].endswith("/i"):
d[key] = d[key][: -len("i")]
raise InvalidRule(
"invalid regular expression: %s it should use Python syntax, try it at https://pythex.org" % d[key]
)
else:
Feature = parse_feature(key)
if key in ('number', 'offset', 'bytes'):
if key in ("number", "offset", "bytes"):
# parse numbers with symbol description, e.g. 0x4550 = IMAGE_DOS_SIGNATURE
# or regular numbers, e.g. 37
value, symbol = parse_symbol(d[key], key)
@@ -390,7 +393,7 @@ def second(s):
# we use the ruamel.yaml parser because it supports roundtripping of documents with comments.
yaml = ruamel.yaml.YAML(typ='rt')
yaml = ruamel.yaml.YAML(typ="rt")
# use block mode, not inline json-like mode
@@ -410,7 +413,7 @@ yaml.width = 4096
class Rule(object):
def __init__(self, name, scope, statement, meta, definition=''):
def __init__(self, name, scope, statement, meta, definition=""):
super(Rule, self).__init__()
self.name = name
self.scope = scope
@@ -419,13 +422,13 @@ class Rule(object):
self.definition = definition
def __str__(self):
return 'Rule(name=%s)' % (self.name)
return "Rule(name=%s)" % (self.name)
def __repr__(self):
return 'Rule(scope=%s, name=%s)' % (self.scope, self.name)
return "Rule(scope=%s, name=%s)" % (self.scope, self.name)
def get_dependencies(self, namespaces):
'''
"""
fetch the names of rules this rule relies upon.
these are only the direct dependencies; a user must
compute the transitive dependency graph themself, if they want it.
@@ -436,7 +439,7 @@ class Rule(object):
Returns:
List[str]: names of rules upon which this rule depends.
'''
"""
deps = set([])
def rec(statement):
@@ -469,24 +472,31 @@ class Rule(object):
def _extract_subscope_rules_rec(self, statement):
if isinstance(statement, Statement):
# for each child that is a subscope,
for subscope in filter(lambda statement: isinstance(statement, capa.engine.Subscope), statement.get_children()):
for subscope in filter(
lambda statement: isinstance(statement, capa.engine.Subscope), statement.get_children()
):
# create a new rule from it.
# the name is a randomly generated, hopefully unique value.
# ideally, this won't every be rendered to a user.
name = self.name + '/' + uuid.uuid4().hex
new_rule = Rule(name, subscope.scope, subscope.child, {
'name': name,
'scope': subscope.scope,
# these derived rules are never meant to be inspected separately,
# they are dependencies for the parent rule,
# so mark it as such.
'lib': True,
# metadata that indicates this is derived from a subscope statement
'capa/subscope-rule': True,
# metadata that links the child rule the parent rule
'capa/parent': self.name,
})
name = self.name + "/" + uuid.uuid4().hex
new_rule = Rule(
name,
subscope.scope,
subscope.child,
{
"name": name,
"scope": subscope.scope,
# these derived rules are never meant to be inspected separately,
# they are dependencies for the parent rule,
# so mark it as such.
"lib": True,
# metadata that indicates this is derived from a subscope statement
"capa/subscope-rule": True,
# metadata that links the child rule the parent rule
"capa/parent": self.name,
},
)
# update the existing statement to `match` the new rule
new_node = capa.features.MatchedRule(name)
@@ -503,7 +513,7 @@ class Rule(object):
yield new_rule
def extract_subscope_rules(self):
'''
"""
scan through the statements of this rule,
replacing subscope statements with `match` references to a newly created rule,
which are yielded from this routine.
@@ -514,7 +524,7 @@ class Rule(object):
for derived_rule in rule.extract_subscope_rules():
assert derived_rule.meta['capa/parent'] == rule.name
'''
"""
# recurse through statements
# when encounter Subscope statement
@@ -531,27 +541,21 @@ class Rule(object):
@classmethod
def from_dict(cls, d, s):
name = d['rule']['meta']['name']
name = d["rule"]["meta"]["name"]
# if scope is not specified, default to function scope.
# this is probably the mode that rule authors will start with.
scope = d['rule']['meta'].get('scope', FUNCTION_SCOPE)
statements = d['rule']['features']
scope = d["rule"]["meta"].get("scope", FUNCTION_SCOPE)
statements = d["rule"]["features"]
# the rule must start with a single logic node.
# doing anything else is too implicit and difficult to remove (AND vs OR ???).
if len(statements) != 1:
raise InvalidRule('rule must begin with a single top level statement')
raise InvalidRule("rule must begin with a single top level statement")
if isinstance(statements[0], capa.engine.Subscope):
raise InvalidRule('top level statement may not be a subscope')
raise InvalidRule("top level statement may not be a subscope")
return cls(
name,
scope,
build_statements(statements[0], scope),
d['rule']['meta'],
s
)
return cls(name, scope, build_statements(statements[0], scope), d["rule"]["meta"], s)
@classmethod
def from_yaml(cls, s):
@@ -559,9 +563,9 @@ class Rule(object):
@classmethod
def from_yaml_file(cls, path):
with open(path, 'rb') as f:
with open(path, "rb") as f:
try:
return cls.from_yaml(f.read().decode('utf-8'))
return cls.from_yaml(f.read().decode("utf-8"))
except InvalidRule as e:
raise InvalidRuleWithPath(path, str(e))
@@ -578,11 +582,11 @@ class Rule(object):
definition = yaml.load(self.definition)
# definition retains a reference to `meta`,
# so we're updating that in place.
definition['rule']['meta'] = self.meta
definition["rule"]["meta"] = self.meta
meta = self.meta
meta['name'] = self.name
meta['scope'] = self.scope
meta["name"] = self.name
meta["scope"] = self.scope
def move_to_end(m, k):
# ruamel.yaml uses an ordereddict-like structure to track maps (CommentedMap).
@@ -592,8 +596,8 @@ class Rule(object):
del m[k]
m[k] = v
move_to_end(definition['rule'], 'meta')
move_to_end(definition['rule'], 'features')
move_to_end(definition["rule"], "meta")
move_to_end(definition["rule"], "features")
for key in META_KEYS:
if key in meta:
@@ -624,11 +628,11 @@ class Rule(object):
continue
meta[key] = value
return ostream.getvalue().decode('utf-8').rstrip('\n') + '\n'
return ostream.getvalue().decode("utf-8").rstrip("\n") + "\n"
def get_rules_with_scope(rules, scope):
'''
"""
from the given collection of rules, select those with the given scope.
args:
@@ -637,12 +641,12 @@ def get_rules_with_scope(rules, scope):
returns:
List[capa.rules.Rule]:
'''
"""
return list(rule for rule in rules if rule.scope == scope)
def get_rules_and_dependencies(rules, rule_name):
'''
"""
from the given collection of rules, select a rule and its dependencies (transitively).
args:
@@ -651,7 +655,7 @@ def get_rules_and_dependencies(rules, rule_name):
yields:
Rule:
'''
"""
# we evaluate `rules` multiple times, so if its a generator, realize it into a list.
rules = list(rules)
namespaces = index_rules_by_namespace(rules)
@@ -674,17 +678,17 @@ def ensure_rules_are_unique(rules):
seen = set([])
for rule in rules:
if rule.name in seen:
raise InvalidRule('duplicate rule name: ' + rule.name)
raise InvalidRule("duplicate rule name: " + rule.name)
seen.add(rule.name)
def ensure_rule_dependencies_are_met(rules):
'''
"""
raise an exception if a rule dependency does not exist.
raises:
InvalidRule: if a dependency is not met.
'''
"""
# we evaluate `rules` multiple times, so if its a generator, realize it into a list.
rules = list(rules)
namespaces = index_rules_by_namespace(rules)
@@ -696,7 +700,7 @@ def ensure_rule_dependencies_are_met(rules):
def index_rules_by_namespace(rules):
'''
"""
compute the rules that fit into each namespace found within the given rules.
for example, given:
@@ -714,23 +718,23 @@ def index_rules_by_namespace(rules):
rules (List[Rule]):
Returns: Dict[str, List[Rule]]
'''
"""
namespaces = collections.defaultdict(list)
for rule in rules:
namespace = rule.meta.get('namespace')
namespace = rule.meta.get("namespace")
if not namespace:
continue
while namespace:
namespaces[namespace].append(rule)
namespace, _, _ = namespace.rpartition('/')
namespace, _, _ = namespace.rpartition("/")
return dict(namespaces)
class RuleSet(object):
'''
"""
a ruleset is initialized with a collection of rules, which it verifies and sorts into scopes.
each set of scoped rules is sorted topologically, which enables rules to match on past rule matches.
@@ -742,7 +746,7 @@ class RuleSet(object):
...
])
capa.engine.match(ruleset.file_rules, ...)
'''
"""
def __init__(self, rules):
super(RuleSet, self).__init__()
@@ -754,7 +758,7 @@ class RuleSet(object):
ensure_rule_dependencies_are_met(rules)
if len(rules) == 0:
raise InvalidRuleSet('no rules selected')
raise InvalidRuleSet("no rules selected")
self.file_rules = self._get_rules_for_scope(rules, FILE_SCOPE)
self.function_rules = self._get_rules_for_scope(rules, FUNCTION_SCOPE)
@@ -769,12 +773,12 @@ class RuleSet(object):
@staticmethod
def _get_rules_for_scope(rules, scope):
'''
"""
given a collection of rules, collect the rules that are needed at the given scope.
these rules are ordered topologically.
don't include "lib" rules, unless they are dependencies of other rules.
'''
"""
scope_rules = set([])
# we need to process all rules, not just rules with the given scope.
@@ -782,7 +786,7 @@ class RuleSet(object):
# at lower scope, e.g. function scope.
# so, we find all dependencies of all rules, and later will filter them down.
for rule in rules:
if rule.meta.get('lib', False):
if rule.meta.get("lib", False):
continue
scope_rules.update(get_rules_and_dependencies(rules, rule.name))
@@ -790,7 +794,7 @@ class RuleSet(object):
@staticmethod
def _extract_subscope_rules(rules):
'''
"""
process the given sequence of rules.
for each one, extract any embedded subscope rules into their own rule.
process these recursively.
@@ -798,7 +802,7 @@ class RuleSet(object):
note: this operation mutates the rules passed in - they may now have `match` statements
for the extracted subscope rules.
'''
"""
done = []
# use a queue of rules, because we'll be modifying the list (appending new items) as we go.
@@ -811,14 +815,14 @@ class RuleSet(object):
return done
def filter_rules_by_meta(self, tag):
'''
"""
return new rule set with rules filtered based on all meta field values, adds all dependency rules
apply tag-based rule filter assuming that all required rules are loaded
can be used to specify selected rules vs. providing a rules child directory where capa cannot resolve
dependencies from unknown paths
TODO handle circular dependencies?
TODO support -t=metafield <k>
'''
"""
rules = self.rules.values()
rules_filtered = set([])
for rule in rules:

View File

@@ -1,2 +1,2 @@
__version__ = '0.0.0'
__commit__ = '00000000'
__version__ = "0.0.0"
__commit__ = "00000000"