mirror of
https://github.com/mandiant/capa.git
synced 2025-12-22 07:10:29 -08:00
pep8
This commit is contained in:
@@ -7,23 +7,24 @@ import capa.features
|
||||
|
||||
|
||||
class Statement(object):
|
||||
'''
|
||||
"""
|
||||
superclass for structural nodes, such as and/or/not.
|
||||
this exists to provide a default impl for `__str__` and `__repr__`,
|
||||
and to declare the interface method `evaluate`
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(Statement, self).__init__()
|
||||
self.name = self.__class__.__name__
|
||||
|
||||
def __str__(self):
|
||||
return '%s(%s)' % (self.name.lower(), ','.join(map(str, self.get_children())))
|
||||
return "%s(%s)" % (self.name.lower(), ",".join(map(str, self.get_children())))
|
||||
|
||||
def __repr__(self):
|
||||
return str(self)
|
||||
|
||||
def evaluate(self, ctx):
|
||||
'''
|
||||
"""
|
||||
classes that inherit `Statement` must implement `evaluate`
|
||||
|
||||
args:
|
||||
@@ -31,30 +32,30 @@ class Statement(object):
|
||||
|
||||
returns:
|
||||
Result
|
||||
'''
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_children(self):
|
||||
if hasattr(self, 'child'):
|
||||
if hasattr(self, "child"):
|
||||
yield self.child
|
||||
|
||||
if hasattr(self, 'children'):
|
||||
if hasattr(self, "children"):
|
||||
for child in self.children:
|
||||
yield child
|
||||
|
||||
def replace_child(self, existing, new):
|
||||
if hasattr(self, 'child'):
|
||||
if hasattr(self, "child"):
|
||||
if self.child is existing:
|
||||
self.child = new
|
||||
|
||||
if hasattr(self, 'children'):
|
||||
if hasattr(self, "children"):
|
||||
for i, child in enumerate(self.children):
|
||||
if child is existing:
|
||||
self.children[i] = new
|
||||
|
||||
|
||||
class Result(object):
|
||||
'''
|
||||
"""
|
||||
represents the results of an evaluation of statements against features.
|
||||
|
||||
instances of this class should behave like a bool,
|
||||
@@ -65,15 +66,16 @@ class Result(object):
|
||||
as well as the children Result instances.
|
||||
|
||||
we need this so that we can render the tree of expressions and their results.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, success, statement, children, locations=None):
|
||||
'''
|
||||
"""
|
||||
args:
|
||||
success (bool)
|
||||
statement (capa.engine.Statement or capa.features.Feature)
|
||||
children (list[Result])
|
||||
locations (iterable[VA])
|
||||
'''
|
||||
"""
|
||||
super(Result, self).__init__()
|
||||
self.success = success
|
||||
self.statement = statement
|
||||
@@ -93,7 +95,8 @@ class Result(object):
|
||||
|
||||
|
||||
class And(Statement):
|
||||
'''match if all of the children evaluate to True.'''
|
||||
"""match if all of the children evaluate to True."""
|
||||
|
||||
def __init__(self, *children):
|
||||
super(And, self).__init__()
|
||||
self.children = list(children)
|
||||
@@ -105,7 +108,8 @@ class And(Statement):
|
||||
|
||||
|
||||
class Or(Statement):
|
||||
'''match if any of the children evaluate to True.'''
|
||||
"""match if any of the children evaluate to True."""
|
||||
|
||||
def __init__(self, *children):
|
||||
super(Or, self).__init__()
|
||||
self.children = list(children)
|
||||
@@ -117,7 +121,8 @@ class Or(Statement):
|
||||
|
||||
|
||||
class Not(Statement):
|
||||
'''match only if the child evaluates to False.'''
|
||||
"""match only if the child evaluates to False."""
|
||||
|
||||
def __init__(self, child):
|
||||
super(Not, self).__init__()
|
||||
self.child = child
|
||||
@@ -129,7 +134,8 @@ class Not(Statement):
|
||||
|
||||
|
||||
class Some(Statement):
|
||||
'''match if at least N of the children evaluate to True.'''
|
||||
"""match if at least N of the children evaluate to True."""
|
||||
|
||||
def __init__(self, count, *children):
|
||||
super(Some, self).__init__()
|
||||
self.count = count
|
||||
@@ -146,7 +152,8 @@ class Some(Statement):
|
||||
|
||||
|
||||
class Range(Statement):
|
||||
'''match if the child is contained in the ctx set with a count in the given range.'''
|
||||
"""match if the child is contained in the ctx set with a count in the given range."""
|
||||
|
||||
def __init__(self, child, min=None, max=None):
|
||||
super(Range, self).__init__()
|
||||
self.child = child
|
||||
@@ -162,27 +169,28 @@ class Range(Statement):
|
||||
|
||||
def __str__(self):
|
||||
if self.max == (1 << 64 - 1):
|
||||
return 'range(%s, min=%d, max=infinity)' % (str(self.child), self.min)
|
||||
return "range(%s, min=%d, max=infinity)" % (str(self.child), self.min)
|
||||
else:
|
||||
return 'range(%s, min=%d, max=%d)' % (str(self.child), self.min, self.max)
|
||||
return "range(%s, min=%d, max=%d)" % (str(self.child), self.min, self.max)
|
||||
|
||||
|
||||
class Regex(Statement):
|
||||
'''match if the given pattern matches a String feature.'''
|
||||
"""match if the given pattern matches a String feature."""
|
||||
|
||||
def __init__(self, pattern):
|
||||
super(Regex, self).__init__()
|
||||
self.pattern = pattern
|
||||
pat = self.pattern[len('/'):-len('/')]
|
||||
pat = self.pattern[len("/") : -len("/")]
|
||||
flags = re.DOTALL
|
||||
if pattern.endswith('/i'):
|
||||
pat = self.pattern[len('/'):-len('/i')]
|
||||
if pattern.endswith("/i"):
|
||||
pat = self.pattern[len("/") : -len("/i")]
|
||||
flags |= re.IGNORECASE
|
||||
self.re = re.compile(pat, flags)
|
||||
self.match = ''
|
||||
self.match = ""
|
||||
|
||||
def evaluate(self, ctx):
|
||||
for feature, locations in ctx.items():
|
||||
if not isinstance(feature, (capa.features.String, )):
|
||||
if not isinstance(feature, (capa.features.String,)):
|
||||
continue
|
||||
|
||||
# `re.search` finds a match anywhere in the given string
|
||||
@@ -200,27 +208,28 @@ class Regex(Statement):
|
||||
|
||||
|
||||
class Subscope(Statement):
|
||||
'''
|
||||
"""
|
||||
a subscope element is a placeholder in a rule - it should not be evaluated directly.
|
||||
the engine should preprocess rules to extract subscope statements into their own rules.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, scope, child):
|
||||
super(Subscope, self).__init__()
|
||||
self.scope = scope
|
||||
self.child = child
|
||||
|
||||
def evaluate(self, ctx):
|
||||
raise ValueError('cannot evaluate a subscope directly!')
|
||||
raise ValueError("cannot evaluate a subscope directly!")
|
||||
|
||||
|
||||
def topologically_order_rules(rules):
|
||||
'''
|
||||
"""
|
||||
order the given rules such that dependencies show up before dependents.
|
||||
this means that as we match rules, we can add features for the matches, and these
|
||||
will be matched by subsequent rules if they follow this order.
|
||||
|
||||
assumes that the rule dependency graph is a DAG.
|
||||
'''
|
||||
"""
|
||||
# we evaluate `rules` multiple times, so if its a generator, realize it into a list.
|
||||
rules = list(rules)
|
||||
namespaces = capa.rules.index_rules_by_namespace(rules)
|
||||
@@ -245,7 +254,7 @@ def topologically_order_rules(rules):
|
||||
|
||||
|
||||
def match(rules, features, va):
|
||||
'''
|
||||
"""
|
||||
Args:
|
||||
rules (List[capa.rules.Rule]): these must already be ordered topologically by dependency.
|
||||
features (Mapping[capa.features.Feature, int]):
|
||||
@@ -255,7 +264,7 @@ def match(rules, features, va):
|
||||
Tuple[List[capa.features.Feature], Dict[str, Tuple[int, capa.engine.Result]]]: two-tuple with entries:
|
||||
- list of features used for matching (which may be greater than argument, due to rule match features), and
|
||||
- mapping from rule name to (location of match, result object)
|
||||
'''
|
||||
"""
|
||||
results = collections.defaultdict(list)
|
||||
|
||||
# copy features so that we can modify it
|
||||
@@ -270,10 +279,10 @@ def match(rules, features, va):
|
||||
results[rule.name].append((va, res))
|
||||
features[capa.features.MatchedRule(rule.name)].add(va)
|
||||
|
||||
namespace = rule.meta.get('namespace')
|
||||
namespace = rule.meta.get("namespace")
|
||||
if namespace:
|
||||
while namespace:
|
||||
features[capa.features.MatchedRule(namespace)].add(va)
|
||||
namespace, _, _ = namespace.rpartition('/')
|
||||
namespace, _, _ = namespace.rpartition("/")
|
||||
|
||||
return (features, results)
|
||||
|
||||
@@ -11,9 +11,9 @@ MAX_BYTES_FEATURE_SIZE = 0x100
|
||||
|
||||
def bytes_to_str(b):
|
||||
if sys.version_info[0] >= 3:
|
||||
return str(codecs.encode(b, 'hex').decode('utf-8'))
|
||||
return str(codecs.encode(b, "hex").decode("utf-8"))
|
||||
else:
|
||||
return codecs.encode(b, 'hex')
|
||||
return codecs.encode(b, "hex")
|
||||
|
||||
|
||||
class Feature(object):
|
||||
@@ -29,7 +29,7 @@ class Feature(object):
|
||||
return self.name == other.name and self.args == other.args
|
||||
|
||||
def __str__(self):
|
||||
return '%s(%s)' % (self.name.lower(), ','.join(self.args))
|
||||
return "%s(%s)" % (self.name.lower(), ",".join(self.args))
|
||||
|
||||
def __repr__(self):
|
||||
return str(self)
|
||||
@@ -41,8 +41,7 @@ class Feature(object):
|
||||
return self.__dict__
|
||||
|
||||
def freeze_serialize(self):
|
||||
return (self.__class__.__name__,
|
||||
self.args)
|
||||
return (self.__class__.__name__, self.args)
|
||||
|
||||
@classmethod
|
||||
def freeze_deserialize(cls, args):
|
||||
@@ -55,30 +54,30 @@ class MatchedRule(Feature):
|
||||
self.rule_name = rule_name
|
||||
|
||||
def __str__(self):
|
||||
return 'match(%s)' % (self.rule_name)
|
||||
return "match(%s)" % (self.rule_name)
|
||||
|
||||
|
||||
class Characteristic(Feature):
|
||||
def __init__(self, name, value=None):
|
||||
'''
|
||||
"""
|
||||
when `value` is not provided, this serves as descriptor for a class of characteristics.
|
||||
this is only used internally, such as in `rules.py` when checking if a statement is
|
||||
supported by a given scope.
|
||||
'''
|
||||
"""
|
||||
super(Characteristic, self).__init__([name, value])
|
||||
self.name = name
|
||||
self.value = value
|
||||
|
||||
def evaluate(self, ctx):
|
||||
if self.value is None:
|
||||
raise ValueError('cannot evaluate characteristc %s with empty value' % (str(self)))
|
||||
raise ValueError("cannot evaluate characteristc %s with empty value" % (str(self)))
|
||||
return super(Characteristic, self).evaluate(ctx)
|
||||
|
||||
def __str__(self):
|
||||
if self.value is None:
|
||||
return 'characteristic(%s)' % (self.name)
|
||||
return "characteristic(%s)" % (self.name)
|
||||
else:
|
||||
return 'characteristic(%s(%s))' % (self.name, self.value)
|
||||
return "characteristic(%s(%s))" % (self.name, self.value)
|
||||
|
||||
|
||||
class String(Feature):
|
||||
@@ -98,7 +97,7 @@ class Bytes(Feature):
|
||||
|
||||
def evaluate(self, ctx):
|
||||
for feature, locations in ctx.items():
|
||||
if not isinstance(feature, (capa.features.Bytes, )):
|
||||
if not isinstance(feature, (capa.features.Bytes,)):
|
||||
continue
|
||||
|
||||
if feature.value.startswith(self.value):
|
||||
@@ -108,14 +107,13 @@ class Bytes(Feature):
|
||||
|
||||
def __str__(self):
|
||||
if self.symbol:
|
||||
return 'bytes(0x%s = %s)' % (bytes_to_str(self.value).upper(), self.symbol)
|
||||
return "bytes(0x%s = %s)" % (bytes_to_str(self.value).upper(), self.symbol)
|
||||
else:
|
||||
return 'bytes(0x%s)' % (bytes_to_str(self.value).upper())
|
||||
return "bytes(0x%s)" % (bytes_to_str(self.value).upper())
|
||||
|
||||
def freeze_serialize(self):
|
||||
return (self.__class__.__name__,
|
||||
[bytes_to_str(x).upper() for x in self.args])
|
||||
return (self.__class__.__name__, [bytes_to_str(x).upper() for x in self.args])
|
||||
|
||||
@classmethod
|
||||
def freeze_deserialize(cls, args):
|
||||
return cls(*[codecs.decode(x, 'hex') for x in args])
|
||||
return cls(*[codecs.decode(x, "hex") for x in args])
|
||||
|
||||
@@ -6,4 +6,4 @@ class BasicBlock(Feature):
|
||||
super(BasicBlock, self).__init__([])
|
||||
|
||||
def __str__(self):
|
||||
return 'basic block'
|
||||
return "basic block"
|
||||
|
||||
@@ -10,11 +10,11 @@ try:
|
||||
except (ImportError, SyntaxError):
|
||||
pass
|
||||
|
||||
__all__ = ['ida', 'viv']
|
||||
__all__ = ["ida", "viv"]
|
||||
|
||||
|
||||
class FeatureExtractor(object):
|
||||
'''
|
||||
"""
|
||||
FeatureExtractor defines the interface for fetching features from a sample.
|
||||
|
||||
There may be multiple backends that support fetching features for capa.
|
||||
@@ -27,7 +27,8 @@ class FeatureExtractor(object):
|
||||
Also, this provides a way to hook in an IDA backend.
|
||||
|
||||
This class is not instantiated directly; it is the base class for other implementations.
|
||||
'''
|
||||
"""
|
||||
|
||||
__metaclass__ = abc.ABCMeta
|
||||
|
||||
def __init__(self):
|
||||
@@ -40,7 +41,7 @@ class FeatureExtractor(object):
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_file_features(self):
|
||||
'''
|
||||
"""
|
||||
extract file-scope features.
|
||||
|
||||
example::
|
||||
@@ -51,12 +52,12 @@ class FeatureExtractor(object):
|
||||
|
||||
yields:
|
||||
Tuple[capa.features.Feature, int]: feature and its location
|
||||
'''
|
||||
"""
|
||||
raise NotImplemented
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_functions(self):
|
||||
'''
|
||||
"""
|
||||
enumerate the functions and provide opaque values that will
|
||||
subsequently be provided to `.extract_function_features()`, etc.
|
||||
|
||||
@@ -67,12 +68,12 @@ class FeatureExtractor(object):
|
||||
|
||||
yields:
|
||||
any: the opaque function value.
|
||||
'''
|
||||
"""
|
||||
raise NotImplemented
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_function_features(self, f):
|
||||
'''
|
||||
"""
|
||||
extract function-scope features.
|
||||
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
||||
|
||||
@@ -88,12 +89,12 @@ class FeatureExtractor(object):
|
||||
|
||||
yields:
|
||||
Tuple[capa.features.Feature, int]: feature and its location
|
||||
'''
|
||||
"""
|
||||
raise NotImplemented
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_basic_blocks(self, f):
|
||||
'''
|
||||
"""
|
||||
enumerate the basic blocks in the given function and provide opaque values that will
|
||||
subsequently be provided to `.extract_basic_block_features()`, etc.
|
||||
|
||||
@@ -104,12 +105,12 @@ class FeatureExtractor(object):
|
||||
|
||||
yields:
|
||||
any: the opaque basic block value.
|
||||
'''
|
||||
"""
|
||||
raise NotImplemented
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_basic_block_features(self, f, bb):
|
||||
'''
|
||||
"""
|
||||
extract basic block-scope features.
|
||||
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
||||
|
||||
@@ -127,12 +128,12 @@ class FeatureExtractor(object):
|
||||
|
||||
yields:
|
||||
Tuple[capa.features.Feature, int]: feature and its location
|
||||
'''
|
||||
"""
|
||||
raise NotImplemented
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_instructions(self, f, bb):
|
||||
'''
|
||||
"""
|
||||
enumerate the instructions in the given basic block and provide opaque values that will
|
||||
subsequently be provided to `.extract_insn_features()`, etc.
|
||||
|
||||
@@ -143,12 +144,12 @@ class FeatureExtractor(object):
|
||||
|
||||
yields:
|
||||
any: the opaque function value.
|
||||
'''
|
||||
"""
|
||||
raise NotImplemented
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_insn_features(self, f, bb, insn):
|
||||
'''
|
||||
"""
|
||||
extract instruction-scope features.
|
||||
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
||||
|
||||
@@ -168,12 +169,12 @@ class FeatureExtractor(object):
|
||||
|
||||
yields:
|
||||
Tuple[capa.features.Feature, int]: feature and its location
|
||||
'''
|
||||
"""
|
||||
raise NotImplemented
|
||||
|
||||
|
||||
class NullFeatureExtractor(FeatureExtractor):
|
||||
'''
|
||||
"""
|
||||
An extractor that extracts some user-provided features.
|
||||
The structure of the single parameter is demonstrated in the example below.
|
||||
|
||||
@@ -211,64 +212,66 @@ class NullFeatureExtractor(FeatureExtractor):
|
||||
0x40200: ...
|
||||
}
|
||||
)
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, features):
|
||||
super(NullFeatureExtractor, self).__init__()
|
||||
self.features = features
|
||||
|
||||
def extract_file_features(self):
|
||||
for p in self.features.get('file features', []):
|
||||
for p in self.features.get("file features", []):
|
||||
va, feature = p
|
||||
yield feature, va
|
||||
|
||||
def get_functions(self):
|
||||
for va in sorted(self.features['functions'].keys()):
|
||||
for va in sorted(self.features["functions"].keys()):
|
||||
yield va
|
||||
|
||||
def extract_function_features(self, f):
|
||||
for p in (self.features # noqa: E127 line over-indented
|
||||
.get('functions', {})
|
||||
.get(f, {})
|
||||
.get('features', [])):
|
||||
for p in self.features.get("functions", {}).get(f, {}).get("features", []): # noqa: E127 line over-indented
|
||||
va, feature = p
|
||||
yield feature, va
|
||||
|
||||
def get_basic_blocks(self, f):
|
||||
for va in sorted(self.features # noqa: E127 line over-indented
|
||||
.get('functions', {})
|
||||
.get(f, {})
|
||||
.get('basic blocks', {})
|
||||
.keys()):
|
||||
for va in sorted(
|
||||
self.features.get("functions", {}) # noqa: E127 line over-indented
|
||||
.get(f, {})
|
||||
.get("basic blocks", {})
|
||||
.keys()
|
||||
):
|
||||
yield va
|
||||
|
||||
def extract_basic_block_features(self, f, bb):
|
||||
for p in (self.features # noqa: E127 line over-indented
|
||||
.get('functions', {})
|
||||
.get(f, {})
|
||||
.get('basic blocks', {})
|
||||
.get(bb, {})
|
||||
.get('features', [])):
|
||||
for p in (
|
||||
self.features.get("functions", {}) # noqa: E127 line over-indented
|
||||
.get(f, {})
|
||||
.get("basic blocks", {})
|
||||
.get(bb, {})
|
||||
.get("features", [])
|
||||
):
|
||||
va, feature = p
|
||||
yield feature, va
|
||||
|
||||
def get_instructions(self, f, bb):
|
||||
for va in sorted(self.features # noqa: E127 line over-indented
|
||||
.get('functions', {})
|
||||
.get(f, {})
|
||||
.get('basic blocks', {})
|
||||
.get(bb, {})
|
||||
.get('instructions', {})
|
||||
.keys()):
|
||||
for va in sorted(
|
||||
self.features.get("functions", {}) # noqa: E127 line over-indented
|
||||
.get(f, {})
|
||||
.get("basic blocks", {})
|
||||
.get(bb, {})
|
||||
.get("instructions", {})
|
||||
.keys()
|
||||
):
|
||||
yield va
|
||||
|
||||
def extract_insn_features(self, f, bb, insn):
|
||||
for p in (self.features # noqa: E127 line over-indented
|
||||
.get('functions', {})
|
||||
.get(f, {})
|
||||
.get('basic blocks', {})
|
||||
.get(bb, {})
|
||||
.get('instructions', {})
|
||||
.get(insn, {})
|
||||
.get('features', [])):
|
||||
for p in (
|
||||
self.features.get("functions", {}) # noqa: E127 line over-indented
|
||||
.get(f, {})
|
||||
.get("basic blocks", {})
|
||||
.get(bb, {})
|
||||
.get("instructions", {})
|
||||
.get(insn, {})
|
||||
.get("features", [])
|
||||
):
|
||||
va, feature = p
|
||||
yield feature, va
|
||||
|
||||
@@ -10,27 +10,27 @@ def xor_static(data, i):
|
||||
if sys.version_info >= (3, 0):
|
||||
return bytes(c ^ i for c in data)
|
||||
else:
|
||||
return ''.join(chr(ord(c) ^ i) for c in data)
|
||||
return "".join(chr(ord(c) ^ i) for c in data)
|
||||
|
||||
|
||||
def is_aw_function(function_name):
|
||||
'''
|
||||
"""
|
||||
is the given function name an A/W function?
|
||||
these are variants of functions that, on Windows, accept either a narrow or wide string.
|
||||
'''
|
||||
"""
|
||||
if len(function_name) < 2:
|
||||
return False
|
||||
|
||||
# last character should be 'A' or 'W'
|
||||
if function_name[-1] not in ('A', 'W'):
|
||||
if function_name[-1] not in ("A", "W"):
|
||||
return False
|
||||
|
||||
# second to last character should be lowercase letter
|
||||
return 'a' <= function_name[-2] <= 'z' or '0' <= function_name[-2] <= '9'
|
||||
return "a" <= function_name[-2] <= "z" or "0" <= function_name[-2] <= "9"
|
||||
|
||||
|
||||
def generate_api_features(apiname, va):
|
||||
'''
|
||||
"""
|
||||
for a given function name and address, generate API names.
|
||||
we over-generate features to make matching easier.
|
||||
these include:
|
||||
@@ -38,7 +38,7 @@ def generate_api_features(apiname, va):
|
||||
- kernel32.CreateFile
|
||||
- CreateFileA
|
||||
- CreateFile
|
||||
'''
|
||||
"""
|
||||
# (kernel32.CreateFileA, 0x401000)
|
||||
yield API(apiname), va
|
||||
|
||||
@@ -46,8 +46,8 @@ def generate_api_features(apiname, va):
|
||||
# (kernel32.CreateFile, 0x401000)
|
||||
yield API(apiname[:-1]), va
|
||||
|
||||
if '.' in apiname:
|
||||
modname, impname = apiname.split('.')
|
||||
if "." in apiname:
|
||||
modname, impname = apiname.split(".")
|
||||
# strip modname to support importname-only matching
|
||||
# (CreateFileA, 0x401000)
|
||||
yield API(impname), va
|
||||
|
||||
@@ -26,17 +26,17 @@ def get_va(self):
|
||||
|
||||
|
||||
def add_va_int_cast(o):
|
||||
'''
|
||||
"""
|
||||
dynamically add a cast-to-int (`__int__`) method to the given object
|
||||
that returns the value of the `.va` property.
|
||||
this bit of skullduggery lets use cast viv-utils objects as ints.
|
||||
the correct way of doing this is to update viv-utils (or subclass the objects here).
|
||||
'''
|
||||
"""
|
||||
|
||||
if sys.version_info >= (3, 0):
|
||||
setattr(o, '__int__', types.MethodType(get_va, o))
|
||||
setattr(o, "__int__", types.MethodType(get_va, o))
|
||||
else:
|
||||
setattr(o, '__int__', types.MethodType(get_va, o, type(o)))
|
||||
setattr(o, "__int__", types.MethodType(get_va, o, type(o)))
|
||||
return o
|
||||
|
||||
|
||||
|
||||
@@ -15,23 +15,23 @@ from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
|
||||
|
||||
|
||||
def _ida_get_printable_len(op):
|
||||
''' Return string length if all operand bytes are ascii or utf16-le printable
|
||||
""" Return string length if all operand bytes are ascii or utf16-le printable
|
||||
|
||||
args:
|
||||
op (IDA op_t)
|
||||
'''
|
||||
"""
|
||||
op_val = helpers.mask_op_val(op)
|
||||
|
||||
if op.dtype == idaapi.dt_byte:
|
||||
chars = struct.pack('<B', op_val)
|
||||
chars = struct.pack("<B", op_val)
|
||||
elif op.dtype == idaapi.dt_word:
|
||||
chars = struct.pack('<H', op_val)
|
||||
chars = struct.pack("<H", op_val)
|
||||
elif op.dtype == idaapi.dt_dword:
|
||||
chars = struct.pack('<I', op_val)
|
||||
chars = struct.pack("<I", op_val)
|
||||
elif op.dtype == idaapi.dt_qword:
|
||||
chars = struct.pack('<Q', op_val)
|
||||
chars = struct.pack("<Q", op_val)
|
||||
else:
|
||||
raise ValueError('Unhandled operand data type 0x%x.' % op.dtype)
|
||||
raise ValueError("Unhandled operand data type 0x%x." % op.dtype)
|
||||
|
||||
def _is_printable_ascii(chars):
|
||||
if sys.version_info >= (3, 0):
|
||||
@@ -44,7 +44,7 @@ def _ida_get_printable_len(op):
|
||||
if all(c == 0x00 for c in chars[1::2]):
|
||||
return _is_printable_ascii(chars[::2])
|
||||
else:
|
||||
if all(c == '\x00' for c in chars[1::2]):
|
||||
if all(c == "\x00" for c in chars[1::2]):
|
||||
return _is_printable_ascii(chars[::2])
|
||||
|
||||
if _is_printable_ascii(chars):
|
||||
@@ -57,32 +57,32 @@ def _ida_get_printable_len(op):
|
||||
|
||||
|
||||
def _is_mov_imm_to_stack(insn):
|
||||
''' verify instruction moves immediate onto stack
|
||||
""" verify instruction moves immediate onto stack
|
||||
|
||||
args:
|
||||
insn (IDA insn_t)
|
||||
'''
|
||||
"""
|
||||
if insn.Op2.type != idaapi.o_imm:
|
||||
return False
|
||||
|
||||
if not helpers.is_op_stack_var(insn.ea, 0):
|
||||
return False
|
||||
|
||||
if not insn.get_canon_mnem().startswith('mov'):
|
||||
if not insn.get_canon_mnem().startswith("mov"):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def _ida_bb_contains_stackstring(f, bb):
|
||||
''' check basic block for stackstring indicators
|
||||
""" check basic block for stackstring indicators
|
||||
|
||||
true if basic block contains enough moves of constant bytes to the stack
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
bb (IDA BasicBlock)
|
||||
'''
|
||||
"""
|
||||
count = 0
|
||||
|
||||
for insn in helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
|
||||
@@ -96,25 +96,25 @@ def _ida_bb_contains_stackstring(f, bb):
|
||||
|
||||
|
||||
def extract_bb_stackstring(f, bb):
|
||||
''' extract stackstring indicators from basic block
|
||||
""" extract stackstring indicators from basic block
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
bb (IDA BasicBlock)
|
||||
'''
|
||||
"""
|
||||
if _ida_bb_contains_stackstring(f, bb):
|
||||
yield Characteristic('stack string', True), bb.start_ea
|
||||
yield Characteristic("stack string", True), bb.start_ea
|
||||
|
||||
|
||||
def _ida_bb_contains_tight_loop(f, bb):
|
||||
''' check basic block for stackstring indicators
|
||||
""" check basic block for stackstring indicators
|
||||
|
||||
true if last instruction in basic block branches to basic block start
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
bb (IDA BasicBlock)
|
||||
'''
|
||||
"""
|
||||
bb_end = idc.prev_head(bb.end_ea)
|
||||
|
||||
if bb.start_ea < bb_end:
|
||||
@@ -126,23 +126,23 @@ def _ida_bb_contains_tight_loop(f, bb):
|
||||
|
||||
|
||||
def extract_bb_tight_loop(f, bb):
|
||||
''' extract tight loop indicators from a basic block
|
||||
""" extract tight loop indicators from a basic block
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
bb (IDA BasicBlock)
|
||||
'''
|
||||
"""
|
||||
if _ida_bb_contains_tight_loop(f, bb):
|
||||
yield Characteristic('tight loop', True), bb.start_ea
|
||||
yield Characteristic("tight loop", True), bb.start_ea
|
||||
|
||||
|
||||
def extract_features(f, bb):
|
||||
''' extract basic block features
|
||||
""" extract basic block features
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
bb (IDA BasicBlock)
|
||||
'''
|
||||
"""
|
||||
yield BasicBlock(), bb.start_ea
|
||||
|
||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||
@@ -166,5 +166,5 @@ def main():
|
||||
pprint.pprint(features)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@@ -16,32 +16,39 @@ import capa.features.extractors.ida.helpers
|
||||
|
||||
|
||||
def _ida_check_segment_for_pe(seg):
|
||||
''' check segment for embedded PE
|
||||
""" check segment for embedded PE
|
||||
|
||||
adapted for IDA from:
|
||||
https://github.com/vivisect/vivisect/blob/7be4037b1cecc4551b397f840405a1fc606f9b53/PE/carve.py#L19
|
||||
|
||||
args:
|
||||
seg (IDA segment_t)
|
||||
'''
|
||||
"""
|
||||
seg_max = seg.end_ea
|
||||
mz_xor = [(capa.features.extractors.helpers.xor_static(b'MZ', i),
|
||||
capa.features.extractors.helpers.xor_static(b'PE', i),
|
||||
i)
|
||||
for i in range(256)]
|
||||
todo = [(capa.features.extractors.ida.helpers.find_byte_sequence(seg.start_ea, seg.end_ea, mzx), mzx, pex, i) for mzx, pex, i in mz_xor]
|
||||
mz_xor = [
|
||||
(
|
||||
capa.features.extractors.helpers.xor_static(b"MZ", i),
|
||||
capa.features.extractors.helpers.xor_static(b"PE", i),
|
||||
i,
|
||||
)
|
||||
for i in range(256)
|
||||
]
|
||||
todo = [
|
||||
(capa.features.extractors.ida.helpers.find_byte_sequence(seg.start_ea, seg.end_ea, mzx), mzx, pex, i)
|
||||
for mzx, pex, i in mz_xor
|
||||
]
|
||||
todo = [(off, mzx, pex, i) for (off, mzx, pex, i) in todo if off != idaapi.BADADDR]
|
||||
|
||||
while len(todo):
|
||||
off, mzx, pex, i = todo.pop()
|
||||
|
||||
# The MZ header has one field we will check e_lfanew is at 0x3c
|
||||
e_lfanew = off + 0x3c
|
||||
e_lfanew = off + 0x3C
|
||||
|
||||
if seg_max < (e_lfanew + 4):
|
||||
continue
|
||||
|
||||
newoff = struct.unpack('<I', capa.features.extractors.helpers.xor_static(idc.get_bytes(e_lfanew, 4), i))[0]
|
||||
newoff = struct.unpack("<I", capa.features.extractors.helpers.xor_static(idc.get_bytes(e_lfanew, 4), i))[0]
|
||||
|
||||
peoff = off + newoff
|
||||
if seg_max < (peoff + 2):
|
||||
@@ -56,29 +63,29 @@ def _ida_check_segment_for_pe(seg):
|
||||
|
||||
|
||||
def extract_file_embedded_pe():
|
||||
''' extract embedded PE features
|
||||
""" extract embedded PE features
|
||||
|
||||
IDA must load resource sections for this to be complete
|
||||
- '-R' from console
|
||||
- Check 'Load resource sections' when opening binary in IDA manually
|
||||
'''
|
||||
"""
|
||||
for seg in capa.features.extractors.ida.helpers.get_segments():
|
||||
if seg.is_header_segm():
|
||||
# IDA may load header segments, skip if present
|
||||
continue
|
||||
|
||||
for ea, _ in _ida_check_segment_for_pe(seg):
|
||||
yield Characteristic('embedded pe', True), ea
|
||||
yield Characteristic("embedded pe", True), ea
|
||||
|
||||
|
||||
def extract_file_export_names():
|
||||
''' extract function exports '''
|
||||
""" extract function exports """
|
||||
for _, _, ea, name in idautils.Entries():
|
||||
yield Export(name), ea
|
||||
|
||||
|
||||
def extract_file_import_names():
|
||||
''' extract function imports
|
||||
""" extract function imports
|
||||
|
||||
1. imports by ordinal:
|
||||
- modulename.#ordinal
|
||||
@@ -87,25 +94,25 @@ def extract_file_import_names():
|
||||
matching:
|
||||
- modulename.importname
|
||||
- importname
|
||||
'''
|
||||
"""
|
||||
for ea, imp_info in capa.features.extractors.ida.helpers.get_file_imports().items():
|
||||
dllname, name, ordi = imp_info
|
||||
|
||||
if name:
|
||||
yield Import('%s.%s' % (dllname, name)), ea
|
||||
yield Import("%s.%s" % (dllname, name)), ea
|
||||
yield Import(name), ea
|
||||
|
||||
if ordi:
|
||||
yield Import('%s.#%s' % (dllname, str(ordi))), ea
|
||||
yield Import("%s.#%s" % (dllname, str(ordi))), ea
|
||||
|
||||
|
||||
def extract_file_section_names():
|
||||
''' extract section names
|
||||
""" extract section names
|
||||
|
||||
IDA must load resource sections for this to be complete
|
||||
- '-R' from console
|
||||
- Check 'Load resource sections' when opening binary in IDA manually
|
||||
'''
|
||||
"""
|
||||
for seg in capa.features.extractors.ida.helpers.get_segments():
|
||||
if seg.is_header_segm():
|
||||
# IDA may load header segments, skip if present
|
||||
@@ -115,12 +122,12 @@ def extract_file_section_names():
|
||||
|
||||
|
||||
def extract_file_strings():
|
||||
''' extract ASCII and UTF-16 LE strings
|
||||
""" extract ASCII and UTF-16 LE strings
|
||||
|
||||
IDA must load resource sections for this to be complete
|
||||
- '-R' from console
|
||||
- Check 'Load resource sections' when opening binary in IDA manually
|
||||
'''
|
||||
"""
|
||||
for seg in capa.features.extractors.ida.helpers.get_segments():
|
||||
seg_buff = capa.features.extractors.ida.helpers.get_segment_buffer(seg)
|
||||
|
||||
@@ -132,7 +139,7 @@ def extract_file_strings():
|
||||
|
||||
|
||||
def extract_features():
|
||||
''' extract file features '''
|
||||
""" extract file features """
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, va in file_handler():
|
||||
yield feature, va
|
||||
@@ -151,5 +158,5 @@ def main():
|
||||
pprint.pprint(list(extract_features()))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@@ -6,14 +6,14 @@ from capa.features.extractors import loops
|
||||
|
||||
|
||||
def _ida_function_contains_switch(f):
|
||||
''' check a function for switch statement indicators
|
||||
""" check a function for switch statement indicators
|
||||
|
||||
adapted from:
|
||||
https://reverseengineering.stackexchange.com/questions/17548/calc-switch-cases-in-idapython-cant-iterate-over-results?rq=1
|
||||
|
||||
arg:
|
||||
f (IDA func_t)
|
||||
'''
|
||||
"""
|
||||
for start, end in idautils.Chunks(f.start_ea):
|
||||
for head in idautils.Heads(start, end):
|
||||
if idaapi.get_switch_info(head):
|
||||
@@ -23,68 +23,63 @@ def _ida_function_contains_switch(f):
|
||||
|
||||
|
||||
def extract_function_switch(f):
|
||||
''' extract switch indicators from a function
|
||||
""" extract switch indicators from a function
|
||||
|
||||
arg:
|
||||
f (IDA func_t)
|
||||
'''
|
||||
"""
|
||||
if _ida_function_contains_switch(f):
|
||||
yield Characteristic('switch', True), f.start_ea
|
||||
yield Characteristic("switch", True), f.start_ea
|
||||
|
||||
|
||||
def extract_function_calls_to(f):
|
||||
''' extract callers to a function
|
||||
""" extract callers to a function
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
'''
|
||||
"""
|
||||
for ea in idautils.CodeRefsTo(f.start_ea, True):
|
||||
yield Characteristic('calls to', True), ea
|
||||
yield Characteristic("calls to", True), ea
|
||||
|
||||
|
||||
def extract_function_loop(f):
|
||||
''' extract loop indicators from a function
|
||||
""" extract loop indicators from a function
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
'''
|
||||
"""
|
||||
edges = []
|
||||
for bb in idaapi.FlowChart(f):
|
||||
map(lambda s: edges.append((bb.start_ea, s.start_ea)), bb.succs())
|
||||
|
||||
if edges and loops.has_loop(edges):
|
||||
yield Characteristic('loop', True), f.start_ea
|
||||
yield Characteristic("loop", True), f.start_ea
|
||||
|
||||
|
||||
def extract_recursive_call(f):
|
||||
''' extract recursive function call
|
||||
""" extract recursive function call
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
'''
|
||||
"""
|
||||
for ref in idautils.CodeRefsTo(f.start_ea, True):
|
||||
if f.contains(ref):
|
||||
yield Characteristic('recursive call', True), f.start_ea
|
||||
yield Characteristic("recursive call", True), f.start_ea
|
||||
break
|
||||
|
||||
|
||||
def extract_features(f):
|
||||
''' extract function features
|
||||
""" extract function features
|
||||
|
||||
arg:
|
||||
f (IDA func_t)
|
||||
'''
|
||||
"""
|
||||
for func_handler in FUNCTION_HANDLERS:
|
||||
for feature, va in func_handler(f):
|
||||
yield feature, va
|
||||
|
||||
|
||||
FUNCTION_HANDLERS = (
|
||||
extract_function_calls_to,
|
||||
extract_function_switch,
|
||||
extract_function_loop,
|
||||
extract_recursive_call
|
||||
)
|
||||
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_switch, extract_function_loop, extract_recursive_call)
|
||||
|
||||
|
||||
def main():
|
||||
@@ -96,5 +91,5 @@ def main():
|
||||
pprint.pprint(features)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@@ -7,21 +7,21 @@ import idc
|
||||
|
||||
|
||||
def find_byte_sequence(start, end, seq):
|
||||
''' find byte sequence
|
||||
""" find byte sequence
|
||||
|
||||
args:
|
||||
start: min virtual address
|
||||
end: max virtual address
|
||||
seq: bytes to search e.g. b'\x01\x03'
|
||||
'''
|
||||
"""
|
||||
if sys.version_info >= (3, 0):
|
||||
return idaapi.find_binary(start, end, ' '.join(['%02x' % b for b in seq]), 0, idaapi.SEARCH_DOWN)
|
||||
return idaapi.find_binary(start, end, " ".join(["%02x" % b for b in seq]), 0, idaapi.SEARCH_DOWN)
|
||||
else:
|
||||
return idaapi.find_binary(start, end, ' '.join(['%02x' % ord(b) for b in seq]), 0, idaapi.SEARCH_DOWN)
|
||||
return idaapi.find_binary(start, end, " ".join(["%02x" % ord(b) for b in seq]), 0, idaapi.SEARCH_DOWN)
|
||||
|
||||
|
||||
def get_functions(start=None, end=None, ignore_thunks=False, ignore_libs=False):
|
||||
''' get functions, range optional
|
||||
""" get functions, range optional
|
||||
|
||||
args:
|
||||
start: min virtual address
|
||||
@@ -29,7 +29,7 @@ def get_functions(start=None, end=None, ignore_thunks=False, ignore_libs=False):
|
||||
|
||||
ret:
|
||||
yield func_t*
|
||||
'''
|
||||
"""
|
||||
for ea in idautils.Functions(start=start, end=end):
|
||||
f = idaapi.get_func(ea)
|
||||
|
||||
@@ -43,7 +43,7 @@ def get_functions(start=None, end=None, ignore_thunks=False, ignore_libs=False):
|
||||
|
||||
|
||||
def get_segments():
|
||||
''' Get list of segments (sections) in the binary image '''
|
||||
""" Get list of segments (sections) in the binary image """
|
||||
for n in range(idaapi.get_segm_qty()):
|
||||
seg = idaapi.getnseg(n)
|
||||
if seg:
|
||||
@@ -51,11 +51,11 @@ def get_segments():
|
||||
|
||||
|
||||
def get_segment_buffer(seg):
|
||||
''' return bytes stored in a given segment
|
||||
""" return bytes stored in a given segment
|
||||
|
||||
decrease buffer size until IDA is able to read bytes from the segment
|
||||
'''
|
||||
buff = b''
|
||||
"""
|
||||
buff = b""
|
||||
sz = seg.end_ea - seg.start_ea
|
||||
|
||||
while sz > 0:
|
||||
@@ -65,11 +65,11 @@ def get_segment_buffer(seg):
|
||||
sz -= 0x1000
|
||||
|
||||
# IDA returns None if get_bytes fails, so convert for consistent return type
|
||||
return buff if buff else b''
|
||||
return buff if buff else b""
|
||||
|
||||
|
||||
def get_file_imports():
|
||||
''' get file imports '''
|
||||
""" get file imports """
|
||||
_imports = {}
|
||||
|
||||
for idx in range(idaapi.get_import_module_qty()):
|
||||
@@ -79,9 +79,9 @@ def get_file_imports():
|
||||
continue
|
||||
|
||||
def _inspect_import(ea, name, ordi):
|
||||
if name and name.startswith('__imp_'):
|
||||
if name and name.startswith("__imp_"):
|
||||
# handle mangled names starting
|
||||
name = name[len('__imp_'):]
|
||||
name = name[len("__imp_") :]
|
||||
_imports[ea] = (dllname.lower(), name, ordi)
|
||||
return True
|
||||
|
||||
@@ -91,14 +91,14 @@ def get_file_imports():
|
||||
|
||||
|
||||
def get_instructions_in_range(start, end):
|
||||
''' yield instructions in range
|
||||
""" yield instructions in range
|
||||
|
||||
args:
|
||||
start: virtual address (inclusive)
|
||||
end: virtual address (exclusive)
|
||||
yield:
|
||||
(insn_t*)
|
||||
'''
|
||||
"""
|
||||
for head in idautils.Heads(start, end):
|
||||
inst = idautils.DecodeInstruction(head)
|
||||
if inst:
|
||||
@@ -106,7 +106,7 @@ def get_instructions_in_range(start, end):
|
||||
|
||||
|
||||
def is_operand_equal(op1, op2):
|
||||
''' compare two IDA op_t '''
|
||||
""" compare two IDA op_t """
|
||||
if op1.flags != op2.flags:
|
||||
return False
|
||||
|
||||
@@ -132,14 +132,12 @@ def is_operand_equal(op1, op2):
|
||||
|
||||
|
||||
def is_basic_block_equal(bb1, bb2):
|
||||
''' compare two IDA BasicBlock '''
|
||||
return bb1.start_ea == bb2.start_ea \
|
||||
and bb1.end_ea == bb2.end_ea \
|
||||
and bb1.type == bb2.type
|
||||
""" compare two IDA BasicBlock """
|
||||
return bb1.start_ea == bb2.start_ea and bb1.end_ea == bb2.end_ea and bb1.type == bb2.type
|
||||
|
||||
|
||||
def basic_block_size(bb):
|
||||
''' calculate size of basic block '''
|
||||
""" calculate size of basic block """
|
||||
return bb.end_ea - bb.start_ea
|
||||
|
||||
|
||||
@@ -152,11 +150,11 @@ def read_bytes_at(ea, count):
|
||||
|
||||
|
||||
def find_string_at(ea, min=4):
|
||||
''' check if ASCII string exists at a given virtual address '''
|
||||
""" check if ASCII string exists at a given virtual address """
|
||||
found = idaapi.get_strlit_contents(ea, -1, idaapi.STRTYPE_C)
|
||||
if found and len(found) > min:
|
||||
try:
|
||||
found = found.decode('ascii')
|
||||
found = found.decode("ascii")
|
||||
# hacky check for IDA bug; get_strlit_contents also reads Unicode as
|
||||
# myy__uunniiccoodde when searching in ASCII mode so we check for that here
|
||||
# and return the fixed up value
|
||||
@@ -169,11 +167,11 @@ def find_string_at(ea, min=4):
|
||||
|
||||
|
||||
def get_op_phrase_info(op):
|
||||
''' parse phrase features from operand
|
||||
""" parse phrase features from operand
|
||||
|
||||
Pretty much dup of sark's implementation:
|
||||
https://github.com/tmr232/Sark/blob/master/sark/code/instruction.py#L28-L73
|
||||
'''
|
||||
"""
|
||||
if op.type not in (idaapi.o_phrase, idaapi.o_displ):
|
||||
return
|
||||
|
||||
@@ -202,21 +200,21 @@ def get_op_phrase_info(op):
|
||||
# This is only relevant to Intel architectures.
|
||||
index = None
|
||||
|
||||
return {'base': base, 'index': index, 'scale': scale, 'offset': offset}
|
||||
return {"base": base, "index": index, "scale": scale, "offset": offset}
|
||||
|
||||
|
||||
def is_op_write(insn, op):
|
||||
''' Check if an operand is written to (destination operand) '''
|
||||
""" Check if an operand is written to (destination operand) """
|
||||
return idaapi.has_cf_chg(insn.get_canon_feature(), op.n)
|
||||
|
||||
|
||||
def is_op_read(insn, op):
|
||||
''' Check if an operand is read from (source operand) '''
|
||||
""" Check if an operand is read from (source operand) """
|
||||
return idaapi.has_cf_use(insn.get_canon_feature(), op.n)
|
||||
|
||||
|
||||
def is_sp_modified(insn):
|
||||
''' determine if instruction modifies SP, ESP, RSP '''
|
||||
""" determine if instruction modifies SP, ESP, RSP """
|
||||
for op in get_insn_ops(insn, op_type=(idaapi.o_reg,)):
|
||||
if op.reg != idautils.procregs.sp.reg:
|
||||
continue
|
||||
@@ -228,7 +226,7 @@ def is_sp_modified(insn):
|
||||
|
||||
|
||||
def is_bp_modified(insn):
|
||||
''' check if instruction modifies BP, EBP, RBP '''
|
||||
""" check if instruction modifies BP, EBP, RBP """
|
||||
for op in get_insn_ops(insn, op_type=(idaapi.o_reg,)):
|
||||
if op.reg != idautils.procregs.bp.reg:
|
||||
continue
|
||||
@@ -240,12 +238,12 @@ def is_bp_modified(insn):
|
||||
|
||||
|
||||
def is_frame_register(reg):
|
||||
''' check if register is sp or bp '''
|
||||
""" check if register is sp or bp """
|
||||
return reg in (idautils.procregs.sp.reg, idautils.procregs.bp.reg)
|
||||
|
||||
|
||||
def get_insn_ops(insn, op_type=None):
|
||||
''' yield op_t for instruction, filter on type if specified '''
|
||||
""" yield op_t for instruction, filter on type if specified """
|
||||
for op in insn.ops:
|
||||
if op.type == idaapi.o_void:
|
||||
# avoid looping all 6 ops if only subset exists
|
||||
@@ -258,17 +256,17 @@ def get_insn_ops(insn, op_type=None):
|
||||
|
||||
|
||||
def ea_flags(ea):
|
||||
''' retrieve processor flags for a given address '''
|
||||
""" retrieve processor flags for a given address """
|
||||
return idaapi.get_flags(ea)
|
||||
|
||||
|
||||
def is_op_stack_var(ea, n):
|
||||
''' check if operand is a stack variable '''
|
||||
""" check if operand is a stack variable """
|
||||
return idaapi.is_stkvar(ea_flags(ea), n)
|
||||
|
||||
|
||||
def mask_op_val(op):
|
||||
''' mask off a value based on data type
|
||||
""" mask off a value based on data type
|
||||
|
||||
necesssary due to a bug in 64-bit
|
||||
|
||||
@@ -277,22 +275,22 @@ def mask_op_val(op):
|
||||
|
||||
insn.Op2.dtype == idaapi.dt_dword
|
||||
insn.Op2.value == 0xffffffffffffffff
|
||||
'''
|
||||
"""
|
||||
masks = {
|
||||
idaapi.dt_byte: 0xFF,
|
||||
idaapi.dt_word: 0xFFFF,
|
||||
idaapi.dt_dword: 0xFFFFFFFF,
|
||||
idaapi.dt_qword: 0xFFFFFFFFFFFFFFFF
|
||||
idaapi.dt_qword: 0xFFFFFFFFFFFFFFFF,
|
||||
}
|
||||
|
||||
mask = masks.get(op.dtype, None)
|
||||
|
||||
if not mask:
|
||||
raise ValueError('No support for operand data type 0x%x' % op.dtype)
|
||||
raise ValueError("No support for operand data type 0x%x" % op.dtype)
|
||||
|
||||
return mask & op.value
|
||||
|
||||
|
||||
def ea_to_offset(ea):
|
||||
''' convert virtual address to file offset '''
|
||||
""" convert virtual address to file offset """
|
||||
return idaapi.get_fileregion_offset(ea)
|
||||
|
||||
@@ -26,7 +26,7 @@ def get_imports():
|
||||
|
||||
|
||||
def _check_for_api_call(insn):
|
||||
''' check instruction for API call '''
|
||||
""" check instruction for API call """
|
||||
if not idaapi.is_call_insn(insn):
|
||||
return
|
||||
|
||||
@@ -34,7 +34,7 @@ def _check_for_api_call(insn):
|
||||
imp = get_imports().get(call_ref, None)
|
||||
|
||||
if imp:
|
||||
yield '%s.%s' % (imp[0], imp[1])
|
||||
yield "%s.%s" % (imp[0], imp[1])
|
||||
else:
|
||||
f = idaapi.get_func(call_ref)
|
||||
|
||||
@@ -46,11 +46,11 @@ def _check_for_api_call(insn):
|
||||
imp = get_imports().get(thunk_ref, None)
|
||||
|
||||
if imp:
|
||||
yield '%s.%s' % (imp[0], imp[1])
|
||||
yield "%s.%s" % (imp[0], imp[1])
|
||||
|
||||
|
||||
def extract_insn_api_features(f, bb, insn):
|
||||
''' parse instruction API features
|
||||
""" parse instruction API features
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
@@ -59,14 +59,14 @@ def extract_insn_api_features(f, bb, insn):
|
||||
|
||||
example:
|
||||
call dword [0x00473038]
|
||||
'''
|
||||
"""
|
||||
for api_name in _check_for_api_call(insn):
|
||||
for feature, va in capa.features.extractors.helpers.generate_api_features(api_name, insn.ea):
|
||||
yield feature, va
|
||||
|
||||
|
||||
def extract_insn_number_features(f, bb, insn):
|
||||
''' parse instruction number features
|
||||
""" parse instruction number features
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
@@ -75,7 +75,7 @@ def extract_insn_number_features(f, bb, insn):
|
||||
|
||||
example:
|
||||
push 3136B0h ; dwControlCode
|
||||
'''
|
||||
"""
|
||||
if idaapi.is_ret_insn(insn):
|
||||
# skip things like:
|
||||
# .text:0042250E retn 8
|
||||
@@ -97,7 +97,7 @@ def extract_insn_number_features(f, bb, insn):
|
||||
|
||||
|
||||
def extract_insn_bytes_features(f, bb, insn):
|
||||
''' parse referenced byte sequences
|
||||
""" parse referenced byte sequences
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
@@ -106,7 +106,7 @@ def extract_insn_bytes_features(f, bb, insn):
|
||||
|
||||
example:
|
||||
push offset iid_004118d4_IShellLinkA ; riid
|
||||
'''
|
||||
"""
|
||||
if idaapi.is_call_insn(insn):
|
||||
# ignore call instructions
|
||||
return
|
||||
@@ -119,7 +119,7 @@ def extract_insn_bytes_features(f, bb, insn):
|
||||
|
||||
|
||||
def extract_insn_string_features(f, bb, insn):
|
||||
''' parse instruction string features
|
||||
""" parse instruction string features
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
@@ -128,7 +128,7 @@ def extract_insn_string_features(f, bb, insn):
|
||||
|
||||
example:
|
||||
push offset aAcr ; "ACR > "
|
||||
'''
|
||||
"""
|
||||
for ref in idautils.DataRefsFrom(insn.ea):
|
||||
found = capa.features.extractors.ida.helpers.find_string_at(ref)
|
||||
if found:
|
||||
@@ -136,7 +136,7 @@ def extract_insn_string_features(f, bb, insn):
|
||||
|
||||
|
||||
def extract_insn_offset_features(f, bb, insn):
|
||||
''' parse instruction structure offset features
|
||||
""" parse instruction structure offset features
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
@@ -145,7 +145,7 @@ def extract_insn_offset_features(f, bb, insn):
|
||||
|
||||
example:
|
||||
.text:0040112F cmp [esi+4], ebx
|
||||
'''
|
||||
"""
|
||||
for op in capa.features.extractors.ida.helpers.get_insn_ops(insn, op_type=(idaapi.o_phrase, idaapi.o_displ)):
|
||||
if capa.features.extractors.ida.helpers.is_op_stack_var(insn.ea, op.n):
|
||||
# skip stack offsets
|
||||
@@ -156,7 +156,7 @@ def extract_insn_offset_features(f, bb, insn):
|
||||
if not p_info:
|
||||
continue
|
||||
|
||||
op_off = p_info['offset']
|
||||
op_off = p_info["offset"]
|
||||
|
||||
if 0 == op_off:
|
||||
# TODO: Do we want to record offset of zero?
|
||||
@@ -172,26 +172,26 @@ def extract_insn_offset_features(f, bb, insn):
|
||||
|
||||
|
||||
def _contains_stack_cookie_keywords(s):
|
||||
''' check if string contains stack cookie keywords
|
||||
""" check if string contains stack cookie keywords
|
||||
|
||||
Examples:
|
||||
xor ecx, ebp ; StackCookie
|
||||
|
||||
mov eax, ___security_cookie
|
||||
'''
|
||||
"""
|
||||
if not s:
|
||||
return False
|
||||
|
||||
s = s.strip().lower()
|
||||
|
||||
if 'cookie' not in s:
|
||||
if "cookie" not in s:
|
||||
return False
|
||||
|
||||
return any(keyword in s for keyword in ('stack', 'security'))
|
||||
return any(keyword in s for keyword in ("stack", "security"))
|
||||
|
||||
|
||||
def _bb_stack_cookie_registers(bb):
|
||||
''' scan basic block for stack cookie operations
|
||||
""" scan basic block for stack cookie operations
|
||||
|
||||
yield registers ids that may have been used for stack cookie operations
|
||||
|
||||
@@ -215,7 +215,7 @@ def _bb_stack_cookie_registers(bb):
|
||||
.text:004062FA jnz loc_40639D
|
||||
|
||||
TODO: this is expensive, but necessary?...
|
||||
'''
|
||||
"""
|
||||
for insn in capa.features.extractors.ida.helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
|
||||
if _contains_stack_cookie_keywords(idc.GetDisasm(insn.ea)):
|
||||
for op in capa.features.extractors.ida.helpers.get_insn_ops(insn, op_type=(idaapi.o_reg,)):
|
||||
@@ -225,7 +225,7 @@ def _bb_stack_cookie_registers(bb):
|
||||
|
||||
|
||||
def _is_nzxor_stack_cookie(f, bb, insn):
|
||||
''' check if nzxor is related to stack cookie '''
|
||||
""" check if nzxor is related to stack cookie """
|
||||
if _contains_stack_cookie_keywords(idaapi.get_cmt(insn.ea, False)):
|
||||
# Example:
|
||||
# xor ecx, ebp ; StackCookie
|
||||
@@ -241,7 +241,7 @@ def _is_nzxor_stack_cookie(f, bb, insn):
|
||||
|
||||
|
||||
def extract_insn_nzxor_characteristic_features(f, bb, insn):
|
||||
''' parse instruction non-zeroing XOR instruction
|
||||
""" parse instruction non-zeroing XOR instruction
|
||||
|
||||
ignore expected non-zeroing XORs, e.g. security cookies
|
||||
|
||||
@@ -249,7 +249,7 @@ def extract_insn_nzxor_characteristic_features(f, bb, insn):
|
||||
f (IDA func_t)
|
||||
bb (IDA BasicBlock)
|
||||
insn (IDA insn_t)
|
||||
'''
|
||||
"""
|
||||
if insn.itype != idaapi.NN_xor:
|
||||
return
|
||||
|
||||
@@ -259,28 +259,28 @@ def extract_insn_nzxor_characteristic_features(f, bb, insn):
|
||||
if _is_nzxor_stack_cookie(f, bb, insn):
|
||||
return
|
||||
|
||||
yield Characteristic('nzxor', True), insn.ea
|
||||
yield Characteristic("nzxor", True), insn.ea
|
||||
|
||||
|
||||
def extract_insn_mnemonic_features(f, bb, insn):
|
||||
''' parse instruction mnemonic features
|
||||
""" parse instruction mnemonic features
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
bb (IDA BasicBlock)
|
||||
insn (IDA insn_t)
|
||||
'''
|
||||
"""
|
||||
yield Mnemonic(insn.get_canon_mnem()), insn.ea
|
||||
|
||||
|
||||
def extract_insn_peb_access_characteristic_features(f, bb, insn):
|
||||
''' parse instruction peb access
|
||||
""" parse instruction peb access
|
||||
|
||||
fs:[0x30] on x86, gs:[0x60] on x64
|
||||
|
||||
TODO:
|
||||
IDA should be able to do this..
|
||||
'''
|
||||
"""
|
||||
if insn.itype not in (idaapi.NN_push, idaapi.NN_mov):
|
||||
return
|
||||
|
||||
@@ -290,40 +290,40 @@ def extract_insn_peb_access_characteristic_features(f, bb, insn):
|
||||
|
||||
disasm = idc.GetDisasm(insn.ea)
|
||||
|
||||
if ' fs:30h' in disasm or ' gs:60h' in disasm:
|
||||
if " fs:30h" in disasm or " gs:60h" in disasm:
|
||||
# TODO: replace above with proper IDA
|
||||
yield Characteristic('peb access', True), insn.ea
|
||||
yield Characteristic("peb access", True), insn.ea
|
||||
|
||||
|
||||
def extract_insn_segment_access_features(f, bb, insn):
|
||||
''' parse instruction fs or gs access
|
||||
""" parse instruction fs or gs access
|
||||
|
||||
TODO:
|
||||
IDA should be able to do this...
|
||||
'''
|
||||
"""
|
||||
if all(map(lambda op: op.type != idaapi.o_mem, insn.ops)):
|
||||
# try to optimize for only memory referencese
|
||||
return
|
||||
|
||||
disasm = idc.GetDisasm(insn.ea)
|
||||
|
||||
if ' fs:' in disasm:
|
||||
if " fs:" in disasm:
|
||||
# TODO: replace above with proper IDA
|
||||
yield Characteristic('fs access', True), insn.ea
|
||||
yield Characteristic("fs access", True), insn.ea
|
||||
|
||||
if ' gs:' in disasm:
|
||||
if " gs:" in disasm:
|
||||
# TODO: replace above with proper IDA
|
||||
yield Characteristic('gs access', True), insn.ea
|
||||
yield Characteristic("gs access", True), insn.ea
|
||||
|
||||
|
||||
def extract_insn_cross_section_cflow(f, bb, insn):
|
||||
''' inspect the instruction for a CALL or JMP that crosses section boundaries
|
||||
""" inspect the instruction for a CALL or JMP that crosses section boundaries
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
bb (IDA BasicBlock)
|
||||
insn (IDA insn_t)
|
||||
'''
|
||||
"""
|
||||
for ref in idautils.CodeRefsFrom(insn.ea, False):
|
||||
if ref in get_imports().keys():
|
||||
# ignore API calls
|
||||
@@ -336,11 +336,11 @@ def extract_insn_cross_section_cflow(f, bb, insn):
|
||||
if idaapi.getseg(ref) == idaapi.getseg(insn.ea):
|
||||
continue
|
||||
|
||||
yield Characteristic('cross section flow', True), insn.ea
|
||||
yield Characteristic("cross section flow", True), insn.ea
|
||||
|
||||
|
||||
def extract_function_calls_from(f, bb, insn):
|
||||
''' extract functions calls from features
|
||||
""" extract functions calls from features
|
||||
|
||||
most relevant at the function scope, however, its most efficient to extract at the instruction scope
|
||||
|
||||
@@ -348,17 +348,17 @@ def extract_function_calls_from(f, bb, insn):
|
||||
f (IDA func_t)
|
||||
bb (IDA BasicBlock)
|
||||
insn (IDA insn_t)
|
||||
'''
|
||||
"""
|
||||
if not idaapi.is_call_insn(insn):
|
||||
# ignore jmp, etc.
|
||||
return
|
||||
|
||||
for ref in idautils.CodeRefsFrom(insn.ea, False):
|
||||
yield Characteristic('calls from', True), ref
|
||||
yield Characteristic("calls from", True), ref
|
||||
|
||||
|
||||
def extract_function_indirect_call_characteristic_features(f, bb, insn):
|
||||
''' extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
|
||||
""" extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
|
||||
does not include calls like => call ds:dword_ABD4974
|
||||
|
||||
most relevant at the function or basic block scope;
|
||||
@@ -368,22 +368,22 @@ def extract_function_indirect_call_characteristic_features(f, bb, insn):
|
||||
f (IDA func_t)
|
||||
bb (IDA BasicBlock)
|
||||
insn (IDA insn_t)
|
||||
'''
|
||||
"""
|
||||
if not idaapi.is_call_insn(insn):
|
||||
return
|
||||
|
||||
if idc.get_operand_type(insn.ea, 0) in (idc.o_reg, idc.o_phrase, idc.o_displ):
|
||||
yield Characteristic('indirect call', True), insn.ea
|
||||
yield Characteristic("indirect call", True), insn.ea
|
||||
|
||||
|
||||
def extract_features(f, bb, insn):
|
||||
''' extract instruction features
|
||||
""" extract instruction features
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
bb (IDA BasicBlock)
|
||||
insn (IDA insn_t)
|
||||
'''
|
||||
"""
|
||||
for inst_handler in INSTRUCTION_HANDLERS:
|
||||
for feature, va in inst_handler(f, bb, insn):
|
||||
yield feature, va
|
||||
@@ -401,7 +401,7 @@ INSTRUCTION_HANDLERS = (
|
||||
extract_insn_cross_section_cflow,
|
||||
extract_insn_segment_access_features,
|
||||
extract_function_calls_from,
|
||||
extract_function_indirect_call_characteristic_features
|
||||
extract_function_indirect_call_characteristic_features,
|
||||
)
|
||||
|
||||
|
||||
@@ -416,5 +416,5 @@ def main():
|
||||
pprint.pprint(features)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@@ -3,7 +3,7 @@ from networkx import nx
|
||||
|
||||
|
||||
def has_loop(edges, threshold=2):
|
||||
''' check if a list of edges representing a directed graph contains a loop
|
||||
""" check if a list of edges representing a directed graph contains a loop
|
||||
|
||||
args:
|
||||
edges: list of edge sets representing a directed graph i.e. [(1, 2), (2, 1)]
|
||||
@@ -11,7 +11,7 @@ def has_loop(edges, threshold=2):
|
||||
|
||||
returns:
|
||||
bool
|
||||
'''
|
||||
"""
|
||||
g = nx.DiGraph()
|
||||
g.add_edges_from(edges)
|
||||
return any(len(comp) >= threshold for comp in strongly_connected_components(g))
|
||||
|
||||
@@ -7,26 +7,28 @@ import re
|
||||
from collections import namedtuple
|
||||
|
||||
|
||||
ASCII_BYTE = r' !\"#\$%&\'\(\)\*\+,-\./0123456789:;<=>\?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\}\\\~\t'.encode('ascii')
|
||||
ASCII_RE_4 = re.compile(b'([%s]{%d,})' % (ASCII_BYTE, 4))
|
||||
UNICODE_RE_4 = re.compile(b'((?:[%s]\x00){%d,})' % (ASCII_BYTE, 4))
|
||||
REPEATS = [b'A', b'\x00', b'\xfe', b'\xff']
|
||||
ASCII_BYTE = r" !\"#\$%&\'\(\)\*\+,-\./0123456789:;<=>\?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\}\\\~\t".encode(
|
||||
"ascii"
|
||||
)
|
||||
ASCII_RE_4 = re.compile(b"([%s]{%d,})" % (ASCII_BYTE, 4))
|
||||
UNICODE_RE_4 = re.compile(b"((?:[%s]\x00){%d,})" % (ASCII_BYTE, 4))
|
||||
REPEATS = [b"A", b"\x00", b"\xfe", b"\xff"]
|
||||
SLICE_SIZE = 4096
|
||||
|
||||
String = namedtuple('String', ['s', 'offset'])
|
||||
String = namedtuple("String", ["s", "offset"])
|
||||
|
||||
|
||||
def buf_filled_with(buf, character):
|
||||
dupe_chunk = character * SLICE_SIZE
|
||||
for offset in range(0, len(buf), SLICE_SIZE):
|
||||
new_chunk = buf[offset: offset + SLICE_SIZE]
|
||||
if dupe_chunk[:len(new_chunk)] != new_chunk:
|
||||
new_chunk = buf[offset : offset + SLICE_SIZE]
|
||||
if dupe_chunk[: len(new_chunk)] != new_chunk:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def extract_ascii_strings(buf, n=4):
|
||||
'''
|
||||
"""
|
||||
Extract ASCII strings from the given binary data.
|
||||
|
||||
:param buf: A bytestring.
|
||||
@@ -34,7 +36,7 @@ def extract_ascii_strings(buf, n=4):
|
||||
:param n: The minimum length of strings to extract.
|
||||
:type n: int
|
||||
:rtype: Sequence[String]
|
||||
'''
|
||||
"""
|
||||
|
||||
if not buf:
|
||||
return
|
||||
@@ -46,14 +48,14 @@ def extract_ascii_strings(buf, n=4):
|
||||
if n == 4:
|
||||
r = ASCII_RE_4
|
||||
else:
|
||||
reg = b'([%s]{%d,})' % (ASCII_BYTE, n)
|
||||
reg = b"([%s]{%d,})" % (ASCII_BYTE, n)
|
||||
r = re.compile(reg)
|
||||
for match in r.finditer(buf):
|
||||
yield String(match.group().decode('ascii'), match.start())
|
||||
yield String(match.group().decode("ascii"), match.start())
|
||||
|
||||
|
||||
def extract_unicode_strings(buf, n=4):
|
||||
'''
|
||||
"""
|
||||
Extract naive UTF-16 strings from the given binary data.
|
||||
|
||||
:param buf: A bytestring.
|
||||
@@ -61,7 +63,7 @@ def extract_unicode_strings(buf, n=4):
|
||||
:param n: The minimum length of strings to extract.
|
||||
:type n: int
|
||||
:rtype: Sequence[String]
|
||||
'''
|
||||
"""
|
||||
|
||||
if not buf:
|
||||
return
|
||||
@@ -72,11 +74,11 @@ def extract_unicode_strings(buf, n=4):
|
||||
if n == 4:
|
||||
r = UNICODE_RE_4
|
||||
else:
|
||||
reg = b'((?:[%s]\x00){%d,})' % (ASCII_BYTE, n)
|
||||
reg = b"((?:[%s]\x00){%d,})" % (ASCII_BYTE, n)
|
||||
r = re.compile(reg)
|
||||
for match in r.finditer(buf):
|
||||
try:
|
||||
yield String(match.group().decode('utf-16'), match.start())
|
||||
yield String(match.group().decode("utf-16"), match.start())
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
|
||||
@@ -84,15 +86,15 @@ def extract_unicode_strings(buf, n=4):
|
||||
def main():
|
||||
import sys
|
||||
|
||||
with open(sys.argv[1], 'rb') as f:
|
||||
with open(sys.argv[1], "rb") as f:
|
||||
b = f.read()
|
||||
|
||||
for s in extract_ascii_strings(b):
|
||||
print('0x{:x}: {:s}'.format(s.offset, s.s))
|
||||
print("0x{:x}: {:s}".format(s.offset, s.s))
|
||||
|
||||
for s in extract_unicode_strings(b):
|
||||
print('0x{:x}: {:s}'.format(s.offset, s.s))
|
||||
print("0x{:x}: {:s}".format(s.offset, s.s))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@@ -13,7 +13,8 @@ import file
|
||||
import function
|
||||
import basicblock
|
||||
import insn
|
||||
__all__ = ['file', 'function', 'basicblock', 'insn']
|
||||
|
||||
__all__ = ["file", "function", "basicblock", "insn"]
|
||||
|
||||
|
||||
def get_va(self):
|
||||
@@ -27,14 +28,14 @@ def get_va(self):
|
||||
|
||||
|
||||
def add_va_int_cast(o):
|
||||
'''
|
||||
"""
|
||||
dynamically add a cast-to-int (`__int__`) method to the given object
|
||||
that returns the value of the `.va` property.
|
||||
|
||||
this bit of skullduggery lets use cast viv-utils objects as ints.
|
||||
the correct way of doing this is to update viv-utils (or subclass the objects here).
|
||||
'''
|
||||
setattr(o, '__int__', types.MethodType(get_va, o, type(o)))
|
||||
"""
|
||||
setattr(o, "__int__", types.MethodType(get_va, o, type(o)))
|
||||
return o
|
||||
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
|
||||
|
||||
|
||||
def interface_extract_basic_block_XXX(f, bb):
|
||||
'''
|
||||
"""
|
||||
parse features from the given basic block.
|
||||
|
||||
args:
|
||||
@@ -19,14 +19,14 @@ def interface_extract_basic_block_XXX(f, bb):
|
||||
|
||||
yields:
|
||||
(Feature, int): the feature and the address at which its found.
|
||||
'''
|
||||
yield NotImplementedError('feature'), NotImplementedError('virtual address')
|
||||
"""
|
||||
yield NotImplementedError("feature"), NotImplementedError("virtual address")
|
||||
|
||||
|
||||
def _bb_has_tight_loop(f, bb):
|
||||
'''
|
||||
"""
|
||||
parse tight loops, true if last instruction in basic block branches to bb start
|
||||
'''
|
||||
"""
|
||||
if len(bb.instructions) > 0:
|
||||
for bva, bflags in bb.instructions[-1].getBranches():
|
||||
if bflags & vivisect.envi.BR_COND:
|
||||
@@ -37,16 +37,16 @@ def _bb_has_tight_loop(f, bb):
|
||||
|
||||
|
||||
def extract_bb_tight_loop(f, bb):
|
||||
''' check basic block for tight loop indicators '''
|
||||
""" check basic block for tight loop indicators """
|
||||
if _bb_has_tight_loop(f, bb):
|
||||
yield Characteristic('tight loop', True), bb.va
|
||||
yield Characteristic("tight loop", True), bb.va
|
||||
|
||||
|
||||
def _bb_has_stackstring(f, bb):
|
||||
'''
|
||||
"""
|
||||
extract potential stackstring creation, using the following heuristics:
|
||||
- basic block contains enough moves of constant bytes to the stack
|
||||
'''
|
||||
"""
|
||||
count = 0
|
||||
for instr in bb.instructions:
|
||||
if is_mov_imm_to_stack(instr):
|
||||
@@ -60,16 +60,16 @@ def _bb_has_stackstring(f, bb):
|
||||
|
||||
|
||||
def extract_stackstring(f, bb):
|
||||
''' check basic block for stackstring indicators '''
|
||||
""" check basic block for stackstring indicators """
|
||||
if _bb_has_stackstring(f, bb):
|
||||
yield Characteristic('stack string', True), bb.va
|
||||
yield Characteristic("stack string", True), bb.va
|
||||
|
||||
|
||||
def is_mov_imm_to_stack(instr):
|
||||
'''
|
||||
"""
|
||||
Return if instruction moves immediate onto stack
|
||||
'''
|
||||
if not instr.mnem.startswith('mov'):
|
||||
"""
|
||||
if not instr.mnem.startswith("mov"):
|
||||
return False
|
||||
|
||||
try:
|
||||
@@ -82,32 +82,33 @@ def is_mov_imm_to_stack(instr):
|
||||
return False
|
||||
|
||||
# TODO what about 64-bit operands?
|
||||
if not isinstance(dst, envi.archs.i386.disasm.i386SibOper) and \
|
||||
not isinstance(dst, envi.archs.i386.disasm.i386RegMemOper):
|
||||
if not isinstance(dst, envi.archs.i386.disasm.i386SibOper) and not isinstance(
|
||||
dst, envi.archs.i386.disasm.i386RegMemOper
|
||||
):
|
||||
return False
|
||||
|
||||
if not dst.reg:
|
||||
return False
|
||||
|
||||
rname = dst._dis_regctx.getRegisterName(dst.reg)
|
||||
if rname not in ['ebp', 'rbp', 'esp', 'rsp']:
|
||||
if rname not in ["ebp", "rbp", "esp", "rsp"]:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def get_printable_len(oper):
|
||||
'''
|
||||
"""
|
||||
Return string length if all operand bytes are ascii or utf16-le printable
|
||||
'''
|
||||
"""
|
||||
if oper.tsize == 1:
|
||||
chars = struct.pack('<B', oper.imm)
|
||||
chars = struct.pack("<B", oper.imm)
|
||||
elif oper.tsize == 2:
|
||||
chars = struct.pack('<H', oper.imm)
|
||||
chars = struct.pack("<H", oper.imm)
|
||||
elif oper.tsize == 4:
|
||||
chars = struct.pack('<I', oper.imm)
|
||||
chars = struct.pack("<I", oper.imm)
|
||||
elif oper.tsize == 8:
|
||||
chars = struct.pack('<Q', oper.imm)
|
||||
chars = struct.pack("<Q", oper.imm)
|
||||
if is_printable_ascii(chars):
|
||||
return oper.tsize
|
||||
if is_printable_utf16le(chars):
|
||||
@@ -120,12 +121,12 @@ def is_printable_ascii(chars):
|
||||
|
||||
|
||||
def is_printable_utf16le(chars):
|
||||
if all(c == '\x00' for c in chars[1::2]):
|
||||
if all(c == "\x00" for c in chars[1::2]):
|
||||
return is_printable_ascii(chars[::2])
|
||||
|
||||
|
||||
def extract_features(f, bb):
|
||||
'''
|
||||
"""
|
||||
extract features from the given basic block.
|
||||
|
||||
args:
|
||||
@@ -134,7 +135,7 @@ def extract_features(f, bb):
|
||||
|
||||
yields:
|
||||
Feature, set[VA]: the features and their location found in this basic block.
|
||||
'''
|
||||
"""
|
||||
yield BasicBlock(), bb.va
|
||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||
for feature, va in bb_handler(f, bb):
|
||||
|
||||
@@ -9,11 +9,11 @@ import capa.features.extractors.strings
|
||||
|
||||
|
||||
def extract_file_embedded_pe(vw, file_path):
|
||||
with open(file_path, 'rb') as f:
|
||||
with open(file_path, "rb") as f:
|
||||
fbytes = f.read()
|
||||
|
||||
for offset, i in pe_carve.carve(fbytes, 1):
|
||||
yield Characteristic('embedded pe', True), offset
|
||||
yield Characteristic("embedded pe", True), offset
|
||||
|
||||
|
||||
def extract_file_export_names(vw, file_path):
|
||||
@@ -22,21 +22,21 @@ def extract_file_export_names(vw, file_path):
|
||||
|
||||
|
||||
def extract_file_import_names(vw, file_path):
|
||||
'''
|
||||
"""
|
||||
extract imported function names
|
||||
1. imports by ordinal:
|
||||
- modulename.#ordinal
|
||||
2. imports by name, results in two features to support importname-only matching:
|
||||
- modulename.importname
|
||||
- importname
|
||||
'''
|
||||
"""
|
||||
for va, _, _, tinfo in vw.getImports():
|
||||
# vivisect source: tinfo = "%s.%s" % (libname, impname)
|
||||
modname, impname = tinfo.split('.')
|
||||
modname, impname = tinfo.split(".")
|
||||
if is_viv_ord_impname(impname):
|
||||
# replace ord prefix with #
|
||||
impname = '#%s' % impname[len('ord'):]
|
||||
tinfo = '%s.%s' % (modname, impname)
|
||||
impname = "#%s" % impname[len("ord") :]
|
||||
tinfo = "%s.%s" % (modname, impname)
|
||||
yield Import(tinfo), va
|
||||
else:
|
||||
yield Import(tinfo), va
|
||||
@@ -44,13 +44,13 @@ def extract_file_import_names(vw, file_path):
|
||||
|
||||
|
||||
def is_viv_ord_impname(impname):
|
||||
'''
|
||||
"""
|
||||
return if import name matches vivisect's ordinal naming scheme `'ord%d' % ord`
|
||||
'''
|
||||
if not impname.startswith('ord'):
|
||||
"""
|
||||
if not impname.startswith("ord"):
|
||||
return False
|
||||
try:
|
||||
int(impname[len('ord'):])
|
||||
int(impname[len("ord") :])
|
||||
except ValueError:
|
||||
return False
|
||||
else:
|
||||
@@ -63,10 +63,10 @@ def extract_file_section_names(vw, file_path):
|
||||
|
||||
|
||||
def extract_file_strings(vw, file_path):
|
||||
'''
|
||||
"""
|
||||
extract ASCII and UTF-16 LE strings from file
|
||||
'''
|
||||
with open(file_path, 'rb') as f:
|
||||
"""
|
||||
with open(file_path, "rb") as f:
|
||||
b = f.read()
|
||||
|
||||
for s in capa.features.extractors.strings.extract_ascii_strings(b):
|
||||
@@ -77,7 +77,7 @@ def extract_file_strings(vw, file_path):
|
||||
|
||||
|
||||
def extract_features(vw, file_path):
|
||||
'''
|
||||
"""
|
||||
extract file features from given workspace
|
||||
|
||||
args:
|
||||
@@ -86,7 +86,7 @@ def extract_features(vw, file_path):
|
||||
|
||||
yields:
|
||||
Tuple[Feature, VA]: a feature and its location.
|
||||
'''
|
||||
"""
|
||||
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, va in file_handler(vw, file_path):
|
||||
|
||||
@@ -5,7 +5,7 @@ from capa.features.extractors import loops
|
||||
|
||||
|
||||
def interface_extract_function_XXX(f):
|
||||
'''
|
||||
"""
|
||||
parse features from the given function.
|
||||
|
||||
args:
|
||||
@@ -13,58 +13,58 @@ def interface_extract_function_XXX(f):
|
||||
|
||||
yields:
|
||||
(Feature, int): the feature and the address at which its found.
|
||||
'''
|
||||
yield NotImplementedError('feature'), NotImplementedError('virtual address')
|
||||
"""
|
||||
yield NotImplementedError("feature"), NotImplementedError("virtual address")
|
||||
|
||||
|
||||
def get_switches(vw):
|
||||
'''
|
||||
"""
|
||||
caching accessor to vivisect workspace switch constructs.
|
||||
'''
|
||||
if 'switches' in vw.metadata:
|
||||
return vw.metadata['switches']
|
||||
"""
|
||||
if "switches" in vw.metadata:
|
||||
return vw.metadata["switches"]
|
||||
else:
|
||||
# addresses of switches in the program
|
||||
switches = set()
|
||||
|
||||
for case_va, _ in filter(lambda t: 'case' in t[1], vw.getNames()):
|
||||
for case_va, _ in filter(lambda t: "case" in t[1], vw.getNames()):
|
||||
# assume that the xref to a case location is a switch construct
|
||||
for switch_va, _, _, _ in vw.getXrefsTo(case_va):
|
||||
switches.add(switch_va)
|
||||
|
||||
vw.metadata['switches'] = switches
|
||||
vw.metadata["switches"] = switches
|
||||
return switches
|
||||
|
||||
|
||||
def get_functions_with_switch(vw):
|
||||
if 'functions_with_switch' in vw.metadata:
|
||||
return vw.metadata['functions_with_switch']
|
||||
if "functions_with_switch" in vw.metadata:
|
||||
return vw.metadata["functions_with_switch"]
|
||||
else:
|
||||
functions = set()
|
||||
for switch in get_switches(vw):
|
||||
functions.add(vw.getFunction(switch))
|
||||
vw.metadata['functions_with_switch'] = functions
|
||||
vw.metadata["functions_with_switch"] = functions
|
||||
return functions
|
||||
|
||||
|
||||
def extract_function_switch(f):
|
||||
'''
|
||||
"""
|
||||
parse if a function contains a switch statement based on location names
|
||||
method can be optimized
|
||||
'''
|
||||
"""
|
||||
if f.va in get_functions_with_switch(f.vw):
|
||||
yield Characteristic('switch', True), f.va
|
||||
yield Characteristic("switch", True), f.va
|
||||
|
||||
|
||||
def extract_function_calls_to(f):
|
||||
for src, _, _, _ in f.vw.getXrefsTo(f.va, rtype=vivisect.const.REF_CODE):
|
||||
yield Characteristic('calls to', True), src
|
||||
yield Characteristic("calls to", True), src
|
||||
|
||||
|
||||
def extract_function_loop(f):
|
||||
'''
|
||||
"""
|
||||
parse if a function has a loop
|
||||
'''
|
||||
"""
|
||||
edges = []
|
||||
|
||||
for bb in f.basic_blocks:
|
||||
@@ -74,11 +74,11 @@ def extract_function_loop(f):
|
||||
edges.append((bb.va, bva))
|
||||
|
||||
if edges and loops.has_loop(edges):
|
||||
yield Characteristic('loop', True), f.va
|
||||
yield Characteristic("loop", True), f.va
|
||||
|
||||
|
||||
def extract_features(f):
|
||||
'''
|
||||
"""
|
||||
extract features from the given function.
|
||||
|
||||
args:
|
||||
@@ -86,14 +86,10 @@ def extract_features(f):
|
||||
|
||||
yields:
|
||||
Feature, set[VA]: the features and their location found in this function.
|
||||
'''
|
||||
"""
|
||||
for func_handler in FUNCTION_HANDLERS:
|
||||
for feature, va in func_handler(f):
|
||||
yield feature, va
|
||||
|
||||
|
||||
FUNCTION_HANDLERS = (
|
||||
extract_function_switch,
|
||||
extract_function_calls_to,
|
||||
extract_function_loop
|
||||
)
|
||||
FUNCTION_HANDLERS = (extract_function_switch, extract_function_calls_to, extract_function_loop)
|
||||
|
||||
@@ -14,13 +14,13 @@ Amd64RipRelOper = envi.archs.amd64.disasm.Amd64RipRelOper
|
||||
LOC_OP = vivisect.const.LOC_OP
|
||||
IF_NOFALL = envi.IF_NOFALL
|
||||
REF_CODE = vivisect.const.REF_CODE
|
||||
FAR_BRANCH_MASK = (envi.BR_PROC | envi.BR_DEREF | envi.BR_ARCH)
|
||||
FAR_BRANCH_MASK = envi.BR_PROC | envi.BR_DEREF | envi.BR_ARCH
|
||||
|
||||
DESTRUCTIVE_MNEMONICS = ('mov', 'lea', 'pop', 'xor')
|
||||
DESTRUCTIVE_MNEMONICS = ("mov", "lea", "pop", "xor")
|
||||
|
||||
|
||||
def get_previous_instructions(vw, va):
|
||||
'''
|
||||
"""
|
||||
collect the instructions that flow to the given address, local to the current function.
|
||||
|
||||
args:
|
||||
@@ -29,7 +29,7 @@ def get_previous_instructions(vw, va):
|
||||
|
||||
returns:
|
||||
List[int]: the prior instructions, which may fallthrough and/or jump here
|
||||
'''
|
||||
"""
|
||||
ret = []
|
||||
|
||||
# find the immediate prior instruction.
|
||||
@@ -61,7 +61,7 @@ class NotFoundError(Exception):
|
||||
|
||||
|
||||
def find_definition(vw, va, reg):
|
||||
'''
|
||||
"""
|
||||
scan backwards from the given address looking for assignments to the given register.
|
||||
if a constant, return that value.
|
||||
|
||||
@@ -75,7 +75,7 @@ def find_definition(vw, va, reg):
|
||||
|
||||
raises:
|
||||
NotFoundError: when the definition cannot be found.
|
||||
'''
|
||||
"""
|
||||
q = collections.deque()
|
||||
seen = set([])
|
||||
|
||||
@@ -95,10 +95,7 @@ def find_definition(vw, va, reg):
|
||||
continue
|
||||
|
||||
opnd0 = insn.opers[0]
|
||||
if not \
|
||||
(isinstance(opnd0, i386RegOper)
|
||||
and opnd0.reg == reg
|
||||
and insn.mnem in DESTRUCTIVE_MNEMONICS):
|
||||
if not (isinstance(opnd0, i386RegOper) and opnd0.reg == reg and insn.mnem in DESTRUCTIVE_MNEMONICS):
|
||||
q.extend(get_previous_instructions(vw, cur))
|
||||
continue
|
||||
|
||||
@@ -107,7 +104,7 @@ def find_definition(vw, va, reg):
|
||||
# we currently only support extracting the constant from something like: `mov $reg, IAT`
|
||||
# so, any other pattern results in an unknown value, represented by None.
|
||||
# this is a good place to extend in the future, if we need more robust support.
|
||||
if insn.mnem != 'mov':
|
||||
if insn.mnem != "mov":
|
||||
return (cur, None)
|
||||
else:
|
||||
opnd1 = insn.opers[1]
|
||||
@@ -128,12 +125,11 @@ def is_indirect_call(vw, va, insn=None):
|
||||
if insn is None:
|
||||
insn = vw.parseOpcode(va)
|
||||
|
||||
return (insn.mnem == 'call'
|
||||
and isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper))
|
||||
return insn.mnem == "call" and isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper)
|
||||
|
||||
|
||||
def resolve_indirect_call(vw, va, insn=None):
|
||||
'''
|
||||
"""
|
||||
inspect the given indirect call instruction and attempt to resolve the target address.
|
||||
|
||||
args:
|
||||
@@ -145,7 +141,7 @@ def resolve_indirect_call(vw, va, insn=None):
|
||||
|
||||
raises:
|
||||
NotFoundError: when the definition cannot be found.
|
||||
'''
|
||||
"""
|
||||
if insn is None:
|
||||
insn = vw.parseOpcode(va)
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ from capa.features.extractors.viv.indirect_calls import resolve_indirect_call
|
||||
|
||||
|
||||
def interface_extract_instruction_XXX(f, bb, insn):
|
||||
'''
|
||||
"""
|
||||
parse features from the given instruction.
|
||||
|
||||
args:
|
||||
@@ -25,31 +25,31 @@ def interface_extract_instruction_XXX(f, bb, insn):
|
||||
|
||||
yields:
|
||||
(Feature, int): the feature and the address at which its found.
|
||||
'''
|
||||
yield NotImplementedError('feature'), NotImplementedError('virtual address')
|
||||
"""
|
||||
yield NotImplementedError("feature"), NotImplementedError("virtual address")
|
||||
|
||||
|
||||
def get_imports(vw):
|
||||
'''
|
||||
"""
|
||||
caching accessor to vivisect workspace imports
|
||||
avoids performance issues in vivisect when collecting locations
|
||||
'''
|
||||
if 'imports' in vw.metadata:
|
||||
return vw.metadata['imports']
|
||||
"""
|
||||
if "imports" in vw.metadata:
|
||||
return vw.metadata["imports"]
|
||||
else:
|
||||
imports = {p[0]: p[3] for p in vw.getImports()}
|
||||
vw.metadata['imports'] = imports
|
||||
vw.metadata["imports"] = imports
|
||||
return imports
|
||||
|
||||
|
||||
def extract_insn_api_features(f, bb, insn):
|
||||
'''parse API features from the given instruction.'''
|
||||
"""parse API features from the given instruction."""
|
||||
|
||||
# example:
|
||||
#
|
||||
# call dword [0x00473038]
|
||||
|
||||
if insn.mnem != 'call':
|
||||
if insn.mnem != "call":
|
||||
return
|
||||
|
||||
# traditional call via IAT
|
||||
@@ -71,7 +71,7 @@ def extract_insn_api_features(f, bb, insn):
|
||||
target = insn.opers[0].getOperValue(insn)
|
||||
|
||||
try:
|
||||
thunk = f.vw.getFunctionMeta(target, 'Thunk')
|
||||
thunk = f.vw.getFunctionMeta(target, "Thunk")
|
||||
except vivisect.exc.InvalidFunction:
|
||||
return
|
||||
else:
|
||||
@@ -108,7 +108,7 @@ def extract_insn_api_features(f, bb, insn):
|
||||
|
||||
|
||||
def extract_insn_number_features(f, bb, insn):
|
||||
'''parse number features from the given instruction.'''
|
||||
"""parse number features from the given instruction."""
|
||||
# example:
|
||||
#
|
||||
# push 3136B0h ; dwControlCode
|
||||
@@ -124,9 +124,7 @@ def extract_insn_number_features(f, bb, insn):
|
||||
# assume its not also a constant.
|
||||
continue
|
||||
|
||||
if insn.mnem == 'add' \
|
||||
and insn.opers[0].isReg() \
|
||||
and insn.opers[0].reg == envi.archs.i386.disasm.REG_ESP:
|
||||
if insn.mnem == "add" and insn.opers[0].isReg() and insn.opers[0].reg == envi.archs.i386.disasm.REG_ESP:
|
||||
# skip things like:
|
||||
#
|
||||
# .text:00401140 call sub_407E2B
|
||||
@@ -137,13 +135,13 @@ def extract_insn_number_features(f, bb, insn):
|
||||
|
||||
|
||||
def extract_insn_bytes_features(f, bb, insn):
|
||||
'''
|
||||
"""
|
||||
parse byte sequence features from the given instruction.
|
||||
example:
|
||||
# push offset iid_004118d4_IShellLinkA ; riid
|
||||
'''
|
||||
"""
|
||||
for oper in insn.opers:
|
||||
if insn.mnem == 'call':
|
||||
if insn.mnem == "call":
|
||||
# ignore call instructions
|
||||
continue
|
||||
|
||||
@@ -184,7 +182,7 @@ def read_string(vw, offset):
|
||||
pass
|
||||
else:
|
||||
if alen > 0:
|
||||
return vw.readMemory(offset, alen).decode('utf-8')
|
||||
return vw.readMemory(offset, alen).decode("utf-8")
|
||||
|
||||
try:
|
||||
ulen = vw.detectUnicode(offset)
|
||||
@@ -199,13 +197,13 @@ def read_string(vw, offset):
|
||||
# vivisect seems to mis-detect the end unicode strings
|
||||
# off by one, too short
|
||||
ulen += 1
|
||||
return vw.readMemory(offset, ulen).decode('utf-16')
|
||||
return vw.readMemory(offset, ulen).decode("utf-16")
|
||||
|
||||
raise ValueError('not a string', offset)
|
||||
raise ValueError("not a string", offset)
|
||||
|
||||
|
||||
def extract_insn_string_features(f, bb, insn):
|
||||
'''parse string features from the given instruction.'''
|
||||
"""parse string features from the given instruction."""
|
||||
# example:
|
||||
#
|
||||
# push offset aAcr ; "ACR > "
|
||||
@@ -222,11 +220,11 @@ def extract_insn_string_features(f, bb, insn):
|
||||
except ValueError:
|
||||
continue
|
||||
else:
|
||||
yield String(s.rstrip('\x00')), insn.va
|
||||
yield String(s.rstrip("\x00")), insn.va
|
||||
|
||||
|
||||
def extract_insn_offset_features(f, bb, insn):
|
||||
'''parse structure offset features from the given instruction.'''
|
||||
"""parse structure offset features from the given instruction."""
|
||||
# example:
|
||||
#
|
||||
# .text:0040112F cmp [esi+4], ebx
|
||||
@@ -249,15 +247,18 @@ def extract_insn_offset_features(f, bb, insn):
|
||||
|
||||
|
||||
def is_security_cookie(f, bb, insn):
|
||||
'''
|
||||
"""
|
||||
check if an instruction is related to security cookie checks
|
||||
'''
|
||||
"""
|
||||
# security cookie check should use SP or BP
|
||||
oper = insn.opers[1]
|
||||
if oper.isReg() \
|
||||
and oper.reg not in [envi.archs.i386.disasm.REG_ESP, envi.archs.i386.disasm.REG_EBP,
|
||||
# TODO: do x64 support for real.
|
||||
envi.archs.amd64.disasm.REG_RBP, envi.archs.amd64.disasm.REG_RSP]:
|
||||
if oper.isReg() and oper.reg not in [
|
||||
envi.archs.i386.disasm.REG_ESP,
|
||||
envi.archs.i386.disasm.REG_EBP,
|
||||
# TODO: do x64 support for real.
|
||||
envi.archs.amd64.disasm.REG_RBP,
|
||||
envi.archs.amd64.disasm.REG_RSP,
|
||||
]:
|
||||
return False
|
||||
|
||||
# expect security cookie init in first basic block within first bytes (instructions)
|
||||
@@ -273,11 +274,11 @@ def is_security_cookie(f, bb, insn):
|
||||
|
||||
|
||||
def extract_insn_nzxor_characteristic_features(f, bb, insn):
|
||||
'''
|
||||
"""
|
||||
parse non-zeroing XOR instruction from the given instruction.
|
||||
ignore expected non-zeroing XORs, e.g. security cookies.
|
||||
'''
|
||||
if insn.mnem != 'xor':
|
||||
"""
|
||||
if insn.mnem != "xor":
|
||||
return
|
||||
|
||||
if insn.opers[0] == insn.opers[1]:
|
||||
@@ -286,24 +287,24 @@ def extract_insn_nzxor_characteristic_features(f, bb, insn):
|
||||
if is_security_cookie(f, bb, insn):
|
||||
return
|
||||
|
||||
yield Characteristic('nzxor', True), insn.va
|
||||
yield Characteristic("nzxor", True), insn.va
|
||||
|
||||
|
||||
def extract_insn_mnemonic_features(f, bb, insn):
|
||||
'''parse mnemonic features from the given instruction.'''
|
||||
"""parse mnemonic features from the given instruction."""
|
||||
yield Mnemonic(insn.mnem), insn.va
|
||||
|
||||
|
||||
def extract_insn_peb_access_characteristic_features(f, bb, insn):
|
||||
'''
|
||||
"""
|
||||
parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64
|
||||
'''
|
||||
"""
|
||||
# TODO handle where fs/gs are loaded into a register or onto the stack and used later
|
||||
|
||||
if insn.mnem not in ['push', 'mov']:
|
||||
if insn.mnem not in ["push", "mov"]:
|
||||
return
|
||||
|
||||
if 'fs' in insn.getPrefixName():
|
||||
if "fs" in insn.getPrefixName():
|
||||
for oper in insn.opers:
|
||||
# examples
|
||||
#
|
||||
@@ -312,27 +313,29 @@ def extract_insn_peb_access_characteristic_features(f, bb, insn):
|
||||
# IDA: push large dword ptr fs:30h
|
||||
# viv: fs: push dword [0x00000030]
|
||||
# fs: push dword [eax + 0x30] ; i386RegMemOper, with eax = 0
|
||||
if (isinstance(oper, envi.archs.i386.disasm.i386RegMemOper) and oper.disp == 0x30) or \
|
||||
(isinstance(oper, envi.archs.i386.disasm.i386ImmMemOper) and oper.imm == 0x30):
|
||||
yield Characteristic('peb access', True), insn.va
|
||||
elif 'gs' in insn.getPrefixName():
|
||||
if (isinstance(oper, envi.archs.i386.disasm.i386RegMemOper) and oper.disp == 0x30) or (
|
||||
isinstance(oper, envi.archs.i386.disasm.i386ImmMemOper) and oper.imm == 0x30
|
||||
):
|
||||
yield Characteristic("peb access", True), insn.va
|
||||
elif "gs" in insn.getPrefixName():
|
||||
for oper in insn.opers:
|
||||
if (isinstance(oper, envi.archs.amd64.disasm.i386RegMemOper) and oper.disp == 0x60) or \
|
||||
(isinstance(oper, envi.archs.amd64.disasm.i386ImmMemOper) and oper.imm == 0x60):
|
||||
yield Characteristic('peb access', True), insn.va
|
||||
if (isinstance(oper, envi.archs.amd64.disasm.i386RegMemOper) and oper.disp == 0x60) or (
|
||||
isinstance(oper, envi.archs.amd64.disasm.i386ImmMemOper) and oper.imm == 0x60
|
||||
):
|
||||
yield Characteristic("peb access", True), insn.va
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
def extract_insn_segment_access_features(f, bb, insn):
|
||||
''' parse the instruction for access to fs or gs '''
|
||||
""" parse the instruction for access to fs or gs """
|
||||
prefix = insn.getPrefixName()
|
||||
|
||||
if prefix == 'fs':
|
||||
yield Characteristic('fs access', True), insn.va
|
||||
if prefix == "fs":
|
||||
yield Characteristic("fs access", True), insn.va
|
||||
|
||||
if prefix == 'gs':
|
||||
yield Characteristic('gs access', True), insn.va
|
||||
if prefix == "gs":
|
||||
yield Characteristic("gs access", True), insn.va
|
||||
|
||||
|
||||
def get_section(vw, va):
|
||||
@@ -344,16 +347,16 @@ def get_section(vw, va):
|
||||
|
||||
|
||||
def extract_insn_cross_section_cflow(f, bb, insn):
|
||||
'''
|
||||
"""
|
||||
inspect the instruction for a CALL or JMP that crosses section boundaries.
|
||||
'''
|
||||
"""
|
||||
for va, flags in insn.getBranches():
|
||||
if flags & envi.BR_FALL:
|
||||
continue
|
||||
|
||||
try:
|
||||
# skip 32-bit calls to imports
|
||||
if insn.mnem == 'call' and isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper):
|
||||
if insn.mnem == "call" and isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper):
|
||||
oper = insn.opers[0]
|
||||
target = oper.getOperAddr(insn)
|
||||
|
||||
@@ -361,7 +364,7 @@ def extract_insn_cross_section_cflow(f, bb, insn):
|
||||
continue
|
||||
|
||||
# skip 64-bit calls to imports
|
||||
elif insn.mnem == 'call' and isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper):
|
||||
elif insn.mnem == "call" and isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper):
|
||||
op = insn.opers[0]
|
||||
target = op.getOperAddr(insn)
|
||||
|
||||
@@ -369,7 +372,7 @@ def extract_insn_cross_section_cflow(f, bb, insn):
|
||||
continue
|
||||
|
||||
if get_section(f.vw, insn.va) != get_section(f.vw, va):
|
||||
yield Characteristic('cross section flow', True), insn.va
|
||||
yield Characteristic("cross section flow", True), insn.va
|
||||
|
||||
except KeyError:
|
||||
continue
|
||||
@@ -378,7 +381,7 @@ def extract_insn_cross_section_cflow(f, bb, insn):
|
||||
# this is a feature that's most relevant at the function scope,
|
||||
# however, its most efficient to extract at the instruction scope.
|
||||
def extract_function_calls_from(f, bb, insn):
|
||||
if insn.mnem != 'call':
|
||||
if insn.mnem != "call":
|
||||
return
|
||||
|
||||
target = None
|
||||
@@ -387,7 +390,7 @@ def extract_function_calls_from(f, bb, insn):
|
||||
if isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper):
|
||||
oper = insn.opers[0]
|
||||
target = oper.getOperAddr(insn)
|
||||
yield Characteristic('calls from', True), target
|
||||
yield Characteristic("calls from", True), target
|
||||
|
||||
# call via thunk on x86,
|
||||
# see 9324d1a8ae37a36ae560c37448c9705a at 0x407985
|
||||
@@ -396,44 +399,44 @@ def extract_function_calls_from(f, bb, insn):
|
||||
# see Lab21-01.exe_:0x140001178
|
||||
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper):
|
||||
target = insn.opers[0].getOperValue(insn)
|
||||
yield Characteristic('calls from', True), target
|
||||
yield Characteristic("calls from", True), target
|
||||
|
||||
# call via IAT, x64
|
||||
elif isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper):
|
||||
op = insn.opers[0]
|
||||
target = op.getOperAddr(insn)
|
||||
yield Characteristic('calls from', True), target
|
||||
yield Characteristic("calls from", True), target
|
||||
|
||||
if target and target == f.va:
|
||||
# if we found a jump target and it's the function address
|
||||
# mark as recursive
|
||||
yield Characteristic('recursive call', True), target
|
||||
yield Characteristic("recursive call", True), target
|
||||
|
||||
|
||||
# this is a feature that's most relevant at the function or basic block scope,
|
||||
# however, its most efficient to extract at the instruction scope.
|
||||
def extract_function_indirect_call_characteristic_features(f, bb, insn):
|
||||
'''
|
||||
"""
|
||||
extract indirect function call characteristic (e.g., call eax or call dword ptr [edx+4])
|
||||
does not include calls like => call ds:dword_ABD4974
|
||||
'''
|
||||
if insn.mnem != 'call':
|
||||
"""
|
||||
if insn.mnem != "call":
|
||||
return
|
||||
|
||||
# Checks below work for x86 and x64
|
||||
if isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper):
|
||||
# call edx
|
||||
yield Characteristic('indirect call', True), insn.va
|
||||
yield Characteristic("indirect call", True), insn.va
|
||||
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegMemOper):
|
||||
# call dword ptr [eax+50h]
|
||||
yield Characteristic('indirect call', True), insn.va
|
||||
yield Characteristic("indirect call", True), insn.va
|
||||
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386SibOper):
|
||||
# call qword ptr [rsp+78h]
|
||||
yield Characteristic('indirect call', True), insn.va
|
||||
yield Characteristic("indirect call", True), insn.va
|
||||
|
||||
|
||||
def extract_features(f, bb, insn):
|
||||
'''
|
||||
"""
|
||||
extract features from the given insn.
|
||||
|
||||
args:
|
||||
@@ -443,7 +446,7 @@ def extract_features(f, bb, insn):
|
||||
|
||||
yields:
|
||||
Feature, set[VA]: the features and their location found in this insn.
|
||||
'''
|
||||
"""
|
||||
for insn_handler in INSTRUCTION_HANDLERS:
|
||||
for feature, va in insn_handler(f, bb, insn):
|
||||
yield feature, va
|
||||
@@ -461,5 +464,5 @@ INSTRUCTION_HANDLERS = (
|
||||
extract_insn_cross_section_cflow,
|
||||
extract_insn_segment_access_features,
|
||||
extract_function_calls_from,
|
||||
extract_function_indirect_call_characteristic_features
|
||||
extract_function_indirect_call_characteristic_features,
|
||||
)
|
||||
|
||||
@@ -8,7 +8,7 @@ class Export(Feature):
|
||||
self.value = value
|
||||
|
||||
def __str__(self):
|
||||
return 'Export(%s)' % (self.value)
|
||||
return "Export(%s)" % (self.value)
|
||||
|
||||
|
||||
class Import(Feature):
|
||||
@@ -18,7 +18,7 @@ class Import(Feature):
|
||||
self.value = value
|
||||
|
||||
def __str__(self):
|
||||
return 'Import(%s)' % (self.value)
|
||||
return "Import(%s)" % (self.value)
|
||||
|
||||
|
||||
class Section(Feature):
|
||||
@@ -28,4 +28,4 @@ class Section(Feature):
|
||||
self.value = value
|
||||
|
||||
def __str__(self):
|
||||
return 'Section(%s)' % (self.value)
|
||||
return "Section(%s)" % (self.value)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
'''
|
||||
"""
|
||||
capa freeze file format: `| capa0000 | + zlib(utf-8(json(...)))`
|
||||
|
||||
json format:
|
||||
@@ -39,7 +39,7 @@ json format:
|
||||
],
|
||||
}
|
||||
}
|
||||
'''
|
||||
"""
|
||||
import json
|
||||
import zlib
|
||||
import logging
|
||||
@@ -61,10 +61,7 @@ def serialize_feature(feature):
|
||||
return feature.freeze_serialize()
|
||||
|
||||
|
||||
KNOWN_FEATURES = {
|
||||
F.__name__: F
|
||||
for F in capa.features.Feature.__subclasses__()
|
||||
}
|
||||
KNOWN_FEATURES = {F.__name__: F for F in capa.features.Feature.__subclasses__()}
|
||||
|
||||
|
||||
def deserialize_feature(doc):
|
||||
@@ -73,7 +70,7 @@ def deserialize_feature(doc):
|
||||
|
||||
|
||||
def dumps(extractor):
|
||||
'''
|
||||
"""
|
||||
serialize the given extractor to a string
|
||||
|
||||
args:
|
||||
@@ -81,79 +78,64 @@ def dumps(extractor):
|
||||
|
||||
returns:
|
||||
str: the serialized features.
|
||||
'''
|
||||
ret = {
|
||||
'version': 1,
|
||||
'functions': {},
|
||||
'scopes': {
|
||||
'file': [],
|
||||
'function': [],
|
||||
'basic block': [],
|
||||
'instruction': [],
|
||||
}
|
||||
}
|
||||
"""
|
||||
ret = {"version": 1, "functions": {}, "scopes": {"file": [], "function": [], "basic block": [], "instruction": [],}}
|
||||
|
||||
for feature, va in extractor.extract_file_features():
|
||||
ret['scopes']['file'].append(
|
||||
serialize_feature(feature) + (hex(va), ())
|
||||
)
|
||||
ret["scopes"]["file"].append(serialize_feature(feature) + (hex(va), ()))
|
||||
|
||||
for f in extractor.get_functions():
|
||||
ret['functions'][hex(f)] = {}
|
||||
ret["functions"][hex(f)] = {}
|
||||
|
||||
for feature, va in extractor.extract_function_features(f):
|
||||
ret['scopes']['function'].append(
|
||||
serialize_feature(feature) + (hex(va), (hex(f), ))
|
||||
)
|
||||
ret["scopes"]["function"].append(serialize_feature(feature) + (hex(va), (hex(f),)))
|
||||
|
||||
for bb in extractor.get_basic_blocks(f):
|
||||
ret['functions'][hex(f)][hex(bb)] = []
|
||||
ret["functions"][hex(f)][hex(bb)] = []
|
||||
|
||||
for feature, va in extractor.extract_basic_block_features(f, bb):
|
||||
ret['scopes']['basic block'].append(
|
||||
serialize_feature(feature) + (hex(va), (hex(f), hex(bb), ))
|
||||
)
|
||||
ret["scopes"]["basic block"].append(serialize_feature(feature) + (hex(va), (hex(f), hex(bb),)))
|
||||
|
||||
for insn, insnva in sorted([(insn, int(insn)) for insn in extractor.get_instructions(f, bb)]):
|
||||
ret['functions'][hex(f)][hex(bb)].append(hex(insnva))
|
||||
ret["functions"][hex(f)][hex(bb)].append(hex(insnva))
|
||||
|
||||
for feature, va in extractor.extract_insn_features(f, bb, insn):
|
||||
ret['scopes']['instruction'].append(
|
||||
serialize_feature(feature) + (hex(va), (hex(f), hex(bb), hex(insnva), ))
|
||||
ret["scopes"]["instruction"].append(
|
||||
serialize_feature(feature) + (hex(va), (hex(f), hex(bb), hex(insnva),))
|
||||
)
|
||||
return json.dumps(ret)
|
||||
|
||||
|
||||
def loads(s):
|
||||
'''deserialize a set of features (as a NullFeatureExtractor) from a string.'''
|
||||
"""deserialize a set of features (as a NullFeatureExtractor) from a string."""
|
||||
doc = json.loads(s)
|
||||
|
||||
if doc.get('version') != 1:
|
||||
raise ValueError('unsupported freeze format version: %d' % (doc.get('version')))
|
||||
if doc.get("version") != 1:
|
||||
raise ValueError("unsupported freeze format version: %d" % (doc.get("version")))
|
||||
|
||||
features = {
|
||||
'file features': [],
|
||||
'functions': {},
|
||||
"file features": [],
|
||||
"functions": {},
|
||||
}
|
||||
|
||||
for fva, function in doc.get('functions', {}).items():
|
||||
for fva, function in doc.get("functions", {}).items():
|
||||
fva = int(fva, 0x10)
|
||||
features['functions'][fva] = {
|
||||
'features': [],
|
||||
'basic blocks': {},
|
||||
features["functions"][fva] = {
|
||||
"features": [],
|
||||
"basic blocks": {},
|
||||
}
|
||||
|
||||
for bbva, bb in function.items():
|
||||
bbva = int(bbva, 0x10)
|
||||
features['functions'][fva]['basic blocks'][bbva] = {
|
||||
'features': [],
|
||||
'instructions': {},
|
||||
features["functions"][fva]["basic blocks"][bbva] = {
|
||||
"features": [],
|
||||
"instructions": {},
|
||||
}
|
||||
|
||||
for insnva in bb:
|
||||
insnva = int(insnva, 0x10)
|
||||
features['functions'][fva]['basic blocks'][bbva]['instructions'][insnva] = {
|
||||
'features': [],
|
||||
features["functions"][fva]["basic blocks"][bbva]["instructions"][insnva] = {
|
||||
"features": [],
|
||||
}
|
||||
|
||||
# in the following blocks, each entry looks like:
|
||||
@@ -161,13 +143,13 @@ def loads(s):
|
||||
# ('MatchedRule', ('foo', ), '0x401000', ('0x401000', ))
|
||||
# ^^^^^^^^^^^^^ ^^^^^^^^^ ^^^^^^^^^^ ^^^^^^^^^^^^^^
|
||||
# feature name args addr func/bb/insn
|
||||
for feature in doc.get('scopes', {}).get('file', []):
|
||||
for feature in doc.get("scopes", {}).get("file", []):
|
||||
va, loc = feature[2:]
|
||||
va = int(va, 0x10)
|
||||
feature = deserialize_feature(feature[:2])
|
||||
features['file features'].append((va, feature))
|
||||
features["file features"].append((va, feature))
|
||||
|
||||
for feature in doc.get('scopes', {}).get('function', []):
|
||||
for feature in doc.get("scopes", {}).get("function", []):
|
||||
# fetch the pair like:
|
||||
#
|
||||
# ('0x401000', ('0x401000', ))
|
||||
@@ -183,42 +165,42 @@ def loads(s):
|
||||
# ^^^^^^^^^^^^^ ^^^^^^^^^
|
||||
# feature name args
|
||||
feature = deserialize_feature(feature[:2])
|
||||
features['functions'][loc[0]]['features'].append((va, feature))
|
||||
features["functions"][loc[0]]["features"].append((va, feature))
|
||||
|
||||
for feature in doc.get('scopes', {}).get('basic block', []):
|
||||
for feature in doc.get("scopes", {}).get("basic block", []):
|
||||
va, loc = feature[2:]
|
||||
va = int(va, 0x10)
|
||||
loc = [int(lo, 0x10) for lo in loc]
|
||||
feature = deserialize_feature(feature[:2])
|
||||
features['functions'][loc[0]]['basic blocks'][loc[1]]['features'].append((va, feature))
|
||||
features["functions"][loc[0]]["basic blocks"][loc[1]]["features"].append((va, feature))
|
||||
|
||||
for feature in doc.get('scopes', {}).get('instruction', []):
|
||||
for feature in doc.get("scopes", {}).get("instruction", []):
|
||||
va, loc = feature[2:]
|
||||
va = int(va, 0x10)
|
||||
loc = [int(lo, 0x10) for lo in loc]
|
||||
feature = deserialize_feature(feature[:2])
|
||||
features['functions'][loc[0]]['basic blocks'][loc[1]]['instructions'][loc[2]]['features'].append((va, feature))
|
||||
features["functions"][loc[0]]["basic blocks"][loc[1]]["instructions"][loc[2]]["features"].append((va, feature))
|
||||
|
||||
return capa.features.extractors.NullFeatureExtractor(features)
|
||||
|
||||
|
||||
MAGIC = 'capa0000'.encode('ascii')
|
||||
MAGIC = "capa0000".encode("ascii")
|
||||
|
||||
|
||||
def dump(extractor):
|
||||
'''serialize the given extractor to a byte array.'''
|
||||
return MAGIC + zlib.compress(dumps(extractor).encode('utf-8'))
|
||||
"""serialize the given extractor to a byte array."""
|
||||
return MAGIC + zlib.compress(dumps(extractor).encode("utf-8"))
|
||||
|
||||
|
||||
def is_freeze(buf):
|
||||
return buf[:len(MAGIC)] == MAGIC
|
||||
return buf[: len(MAGIC)] == MAGIC
|
||||
|
||||
|
||||
def load(buf):
|
||||
'''deserialize a set of features (as a NullFeatureExtractor) from a byte array.'''
|
||||
"""deserialize a set of features (as a NullFeatureExtractor) from a byte array."""
|
||||
if not is_freeze(buf):
|
||||
raise ValueError('missing magic header')
|
||||
return loads(zlib.decompress(buf[len(MAGIC):]).decode('utf-8'))
|
||||
raise ValueError("missing magic header")
|
||||
return loads(zlib.decompress(buf[len(MAGIC) :]).decode("utf-8"))
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
@@ -230,24 +212,21 @@ def main(argv=None):
|
||||
argv = sys.argv[1:]
|
||||
|
||||
formats = [
|
||||
('auto', '(default) detect file type automatically'),
|
||||
('pe', 'Windows PE file'),
|
||||
('sc32', '32-bit shellcode'),
|
||||
('sc64', '64-bit shellcode'),
|
||||
("auto", "(default) detect file type automatically"),
|
||||
("pe", "Windows PE file"),
|
||||
("sc32", "32-bit shellcode"),
|
||||
("sc64", "64-bit shellcode"),
|
||||
]
|
||||
format_help = ', '.join(['%s: %s' % (f[0], f[1]) for f in formats])
|
||||
format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats])
|
||||
|
||||
parser = argparse.ArgumentParser(description='save capa features to a file')
|
||||
parser.add_argument('sample', type=str,
|
||||
help='Path to sample to analyze')
|
||||
parser.add_argument('output', type=str,
|
||||
help='Path to output file')
|
||||
parser.add_argument('-v', '--verbose', action='store_true',
|
||||
help='Enable verbose output')
|
||||
parser.add_argument('-q', '--quiet', action='store_true',
|
||||
help='Disable all output but errors')
|
||||
parser.add_argument('-f', '--format', choices=[f[0] for f in formats], default='auto',
|
||||
help='Select sample format, %s' % format_help)
|
||||
parser = argparse.ArgumentParser(description="save capa features to a file")
|
||||
parser.add_argument("sample", type=str, help="Path to sample to analyze")
|
||||
parser.add_argument("output", type=str, help="Path to output file")
|
||||
parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose output")
|
||||
parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
|
||||
parser.add_argument(
|
||||
"-f", "--format", choices=[f[0] for f in formats], default="auto", help="Select sample format, %s" % format_help
|
||||
)
|
||||
args = parser.parse_args(args=argv)
|
||||
|
||||
if args.quiet:
|
||||
@@ -264,13 +243,15 @@ def main(argv=None):
|
||||
|
||||
# don't import this at top level to support ida/py3 backend
|
||||
import capa.features.extractors.viv
|
||||
|
||||
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(vw, args.sample)
|
||||
with open(args.output, 'wb') as f:
|
||||
with open(args.output, "wb") as f:
|
||||
f.write(dump(extractor))
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
sys.exit(main())
|
||||
|
||||
@@ -4,9 +4,9 @@ from capa.features import Feature
|
||||
class API(Feature):
|
||||
def __init__(self, name):
|
||||
# Downcase library name if given
|
||||
if '.' in name:
|
||||
modname, impname = name.split('.')
|
||||
name = modname.lower() + '.' + impname
|
||||
if "." in name:
|
||||
modname, impname = name.split(".")
|
||||
name = modname.lower() + "." + impname
|
||||
|
||||
super(API, self).__init__([name])
|
||||
|
||||
@@ -19,9 +19,9 @@ class Number(Feature):
|
||||
|
||||
def __str__(self):
|
||||
if self.symbol:
|
||||
return 'number(0x%x = %s)' % (self.value, self.symbol)
|
||||
return "number(0x%x = %s)" % (self.value, self.symbol)
|
||||
else:
|
||||
return 'number(0x%x)' % (self.value)
|
||||
return "number(0x%x)" % (self.value)
|
||||
|
||||
|
||||
class Offset(Feature):
|
||||
@@ -32,9 +32,9 @@ class Offset(Feature):
|
||||
|
||||
def __str__(self):
|
||||
if self.symbol:
|
||||
return 'offset(0x%x = %s)' % (self.value, self.symbol)
|
||||
return "offset(0x%x = %s)" % (self.value, self.symbol)
|
||||
else:
|
||||
return 'offset(0x%x)' % (self.value)
|
||||
return "offset(0x%x)" % (self.value)
|
||||
|
||||
|
||||
class Mnemonic(Feature):
|
||||
@@ -43,4 +43,4 @@ class Mnemonic(Feature):
|
||||
self.value = value
|
||||
|
||||
def __str__(self):
|
||||
return 'mnemonic(%s)' % (self.value)
|
||||
return "mnemonic(%s)" % (self.value)
|
||||
|
||||
@@ -4,7 +4,7 @@ _hex = hex
|
||||
def hex(i):
|
||||
# under py2.7, long integers get formatted with a trailing `L`
|
||||
# and this is not pretty. so strip it out.
|
||||
return _hex(oint(i)).rstrip('L')
|
||||
return _hex(oint(i)).rstrip("L")
|
||||
|
||||
|
||||
def oint(i):
|
||||
|
||||
@@ -15,14 +15,14 @@ def info_to_name(display):
|
||||
e.g. function(my_function) => my_function
|
||||
"""
|
||||
try:
|
||||
return display.split('(')[1].rstrip(')')
|
||||
return display.split("(")[1].rstrip(")")
|
||||
except IndexError:
|
||||
return ''
|
||||
return ""
|
||||
|
||||
|
||||
def location_to_hex(location):
|
||||
""" convert location to hex for display """
|
||||
return '%08X' % location
|
||||
return "%08X" % location
|
||||
|
||||
|
||||
class CapaExplorerDataItem(object):
|
||||
@@ -35,7 +35,12 @@ class CapaExplorerDataItem(object):
|
||||
self.children = []
|
||||
self._checked = False
|
||||
|
||||
self.flags = (QtCore.Qt.ItemIsEnabled | QtCore.Qt.ItemIsSelectable | QtCore.Qt.ItemIsTristate | QtCore.Qt.ItemIsUserCheckable)
|
||||
self.flags = (
|
||||
QtCore.Qt.ItemIsEnabled
|
||||
| QtCore.Qt.ItemIsSelectable
|
||||
| QtCore.Qt.ItemIsTristate
|
||||
| QtCore.Qt.ItemIsUserCheckable
|
||||
)
|
||||
|
||||
if self.pred:
|
||||
self.pred.appendChild(self)
|
||||
@@ -109,7 +114,7 @@ class CapaExplorerDataItem(object):
|
||||
|
||||
def __str__(self):
|
||||
""" get string representation of columns """
|
||||
return ' '.join([data for data in self._data if data])
|
||||
return " ".join([data for data in self._data if data])
|
||||
|
||||
@property
|
||||
def info(self):
|
||||
@@ -133,12 +138,12 @@ class CapaExplorerDataItem(object):
|
||||
class CapaExplorerRuleItem(CapaExplorerDataItem):
|
||||
""" store data relevant to capa function result """
|
||||
|
||||
fmt = '%s (%d matches)'
|
||||
fmt = "%s (%d matches)"
|
||||
|
||||
def __init__(self, parent, display, count, source):
|
||||
""" """
|
||||
display = self.fmt % (display, count) if count > 1 else display
|
||||
super(CapaExplorerRuleItem, self).__init__(parent, [display, '', ''])
|
||||
super(CapaExplorerRuleItem, self).__init__(parent, [display, "", ""])
|
||||
self._source = source
|
||||
|
||||
@property
|
||||
@@ -150,9 +155,9 @@ class CapaExplorerRuleItem(CapaExplorerDataItem):
|
||||
class CapaExplorerRuleMatchItem(CapaExplorerDataItem):
|
||||
""" store data relevant to capa function match result """
|
||||
|
||||
def __init__(self, parent, display, source=''):
|
||||
def __init__(self, parent, display, source=""):
|
||||
""" """
|
||||
super(CapaExplorerRuleMatchItem, self).__init__(parent, [display, '', ''])
|
||||
super(CapaExplorerRuleMatchItem, self).__init__(parent, [display, "", ""])
|
||||
self._source = source
|
||||
|
||||
@property
|
||||
@@ -164,12 +169,13 @@ class CapaExplorerRuleMatchItem(CapaExplorerDataItem):
|
||||
class CapaExplorerFunctionItem(CapaExplorerDataItem):
|
||||
""" store data relevant to capa function result """
|
||||
|
||||
fmt = 'function(%s)'
|
||||
fmt = "function(%s)"
|
||||
|
||||
def __init__(self, parent, location):
|
||||
""" """
|
||||
super(CapaExplorerFunctionItem, self).__init__(parent, [self.fmt % idaapi.get_name(location),
|
||||
location_to_hex(location), ''])
|
||||
super(CapaExplorerFunctionItem, self).__init__(
|
||||
parent, [self.fmt % idaapi.get_name(location), location_to_hex(location), ""]
|
||||
)
|
||||
|
||||
@property
|
||||
def info(self):
|
||||
@@ -187,32 +193,31 @@ class CapaExplorerFunctionItem(CapaExplorerDataItem):
|
||||
class CapaExplorerBlockItem(CapaExplorerDataItem):
|
||||
""" store data relevant to capa basic block result """
|
||||
|
||||
fmt = 'basic block(loc_%08X)'
|
||||
fmt = "basic block(loc_%08X)"
|
||||
|
||||
def __init__(self, parent, location):
|
||||
""" """
|
||||
super(CapaExplorerBlockItem, self).__init__(parent, [self.fmt % location, location_to_hex(location), ''])
|
||||
super(CapaExplorerBlockItem, self).__init__(parent, [self.fmt % location, location_to_hex(location), ""])
|
||||
|
||||
|
||||
class CapaExplorerDefaultItem(CapaExplorerDataItem):
|
||||
""" store data relevant to capa default result """
|
||||
|
||||
def __init__(self, parent, display, details='', location=None):
|
||||
def __init__(self, parent, display, details="", location=None):
|
||||
""" """
|
||||
location = location_to_hex(location) if location else ''
|
||||
location = location_to_hex(location) if location else ""
|
||||
super(CapaExplorerDefaultItem, self).__init__(parent, [display, location, details])
|
||||
|
||||
|
||||
class CapaExplorerFeatureItem(CapaExplorerDataItem):
|
||||
""" store data relevant to capa feature result """
|
||||
|
||||
def __init__(self, parent, display, location='', details=''):
|
||||
location = location_to_hex(location) if location else ''
|
||||
def __init__(self, parent, display, location="", details=""):
|
||||
location = location_to_hex(location) if location else ""
|
||||
super(CapaExplorerFeatureItem, self).__init__(parent, [display, location, details])
|
||||
|
||||
|
||||
class CapaExplorerInstructionViewItem(CapaExplorerFeatureItem):
|
||||
|
||||
def __init__(self, parent, display, location):
|
||||
""" """
|
||||
details = capa.ida.helpers.get_disasm_line(location)
|
||||
@@ -221,26 +226,24 @@ class CapaExplorerInstructionViewItem(CapaExplorerFeatureItem):
|
||||
|
||||
|
||||
class CapaExplorerByteViewItem(CapaExplorerFeatureItem):
|
||||
|
||||
def __init__(self, parent, display, location):
|
||||
""" """
|
||||
byte_snap = idaapi.get_bytes(location, 32)
|
||||
|
||||
if byte_snap:
|
||||
byte_snap = codecs.encode(byte_snap, 'hex').upper()
|
||||
byte_snap = codecs.encode(byte_snap, "hex").upper()
|
||||
if sys.version_info >= (3, 0):
|
||||
details = ' '.join([byte_snap[i:i + 2].decode() for i in range(0, len(byte_snap), 2)])
|
||||
details = " ".join([byte_snap[i : i + 2].decode() for i in range(0, len(byte_snap), 2)])
|
||||
else:
|
||||
details = ' '.join([byte_snap[i:i + 2] for i in range(0, len(byte_snap), 2)])
|
||||
details = " ".join([byte_snap[i : i + 2] for i in range(0, len(byte_snap), 2)])
|
||||
else:
|
||||
details = ''
|
||||
details = ""
|
||||
|
||||
super(CapaExplorerByteViewItem, self).__init__(parent, display, location=location, details=details)
|
||||
self.ida_highlight = idc.get_color(location, idc.CIC_ITEM)
|
||||
|
||||
|
||||
class CapaExplorerStringViewItem(CapaExplorerFeatureItem):
|
||||
|
||||
def __init__(self, parent, display, location):
|
||||
""" """
|
||||
super(CapaExplorerStringViewItem, self).__init__(parent, display, location=location)
|
||||
|
||||
@@ -16,7 +16,7 @@ from capa.ida.explorer.item import (
|
||||
CapaExplorerByteViewItem,
|
||||
CapaExplorerBlockItem,
|
||||
CapaExplorerRuleMatchItem,
|
||||
CapaExplorerFeatureItem
|
||||
CapaExplorerFeatureItem,
|
||||
)
|
||||
|
||||
import capa.ida.helpers
|
||||
@@ -37,7 +37,7 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
def __init__(self, parent=None):
|
||||
""" """
|
||||
super(CapaExplorerDataModel, self).__init__(parent)
|
||||
self.root_node = CapaExplorerDataItem(None, ['Rule Information', 'Address', 'Details'])
|
||||
self.root_node = CapaExplorerDataItem(None, ["Rule Information", "Address", "Details"])
|
||||
|
||||
def reset(self):
|
||||
""" """
|
||||
@@ -86,8 +86,11 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
# display data in corresponding column
|
||||
return item.data(column)
|
||||
|
||||
if role == QtCore.Qt.ToolTipRole and isinstance(item, (CapaExplorerRuleItem, CapaExplorerRuleMatchItem)) and \
|
||||
CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION == column:
|
||||
if (
|
||||
role == QtCore.Qt.ToolTipRole
|
||||
and isinstance(item, (CapaExplorerRuleItem, CapaExplorerRuleMatchItem))
|
||||
and CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION == column
|
||||
):
|
||||
# show tooltip containing rule source
|
||||
return item.source
|
||||
|
||||
@@ -95,18 +98,30 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
# inform view how to display content of checkbox - un/checked
|
||||
return QtCore.Qt.Checked if item.isChecked() else QtCore.Qt.Unchecked
|
||||
|
||||
if role == QtCore.Qt.FontRole and column in (CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS,
|
||||
CapaExplorerDataModel.COLUMN_INDEX_DETAILS):
|
||||
if role == QtCore.Qt.FontRole and column in (
|
||||
CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS,
|
||||
CapaExplorerDataModel.COLUMN_INDEX_DETAILS,
|
||||
):
|
||||
# set font for virtual address and details columns
|
||||
font = QtGui.QFont('Courier', weight=QtGui.QFont.Medium)
|
||||
font = QtGui.QFont("Courier", weight=QtGui.QFont.Medium)
|
||||
if column == CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS:
|
||||
font.setBold(True)
|
||||
return font
|
||||
|
||||
if role == QtCore.Qt.FontRole and isinstance(item, (CapaExplorerRuleItem, CapaExplorerRuleMatchItem,
|
||||
CapaExplorerBlockItem, CapaExplorerFunctionItem,
|
||||
CapaExplorerFeatureItem)) and \
|
||||
column == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION:
|
||||
if (
|
||||
role == QtCore.Qt.FontRole
|
||||
and isinstance(
|
||||
item,
|
||||
(
|
||||
CapaExplorerRuleItem,
|
||||
CapaExplorerRuleMatchItem,
|
||||
CapaExplorerBlockItem,
|
||||
CapaExplorerFunctionItem,
|
||||
CapaExplorerFeatureItem,
|
||||
),
|
||||
)
|
||||
and column == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION
|
||||
):
|
||||
# set bold font for top-level rules
|
||||
font = QtGui.QFont()
|
||||
font.setBold(True)
|
||||
@@ -116,8 +131,11 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
# set color for virtual address column
|
||||
return QtGui.QColor(88, 139, 174)
|
||||
|
||||
if role == QtCore.Qt.ForegroundRole and isinstance(item, CapaExplorerFeatureItem) and column == \
|
||||
CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION:
|
||||
if (
|
||||
role == QtCore.Qt.ForegroundRole
|
||||
and isinstance(item, CapaExplorerFeatureItem)
|
||||
and column == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION
|
||||
):
|
||||
# set color for feature items
|
||||
return QtGui.QColor(79, 121, 66)
|
||||
|
||||
@@ -222,8 +240,9 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
|
||||
def util_reset_ida_highlighting(self, item, checked):
|
||||
""" """
|
||||
if not isinstance(item, (CapaExplorerStringViewItem, CapaExplorerInstructionViewItem,
|
||||
CapaExplorerByteViewItem)):
|
||||
if not isinstance(
|
||||
item, (CapaExplorerStringViewItem, CapaExplorerInstructionViewItem, CapaExplorerByteViewItem)
|
||||
):
|
||||
# ignore other item types
|
||||
return
|
||||
|
||||
@@ -254,8 +273,10 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
if not model_index.isValid():
|
||||
return False
|
||||
|
||||
if role == QtCore.Qt.CheckStateRole and model_index.column() ==\
|
||||
CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION:
|
||||
if (
|
||||
role == QtCore.Qt.CheckStateRole
|
||||
and model_index.column() == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION
|
||||
):
|
||||
# user un/checked box - un/check parent and children
|
||||
for child_index in self.iterateChildrenIndexFromRootIndex(model_index, ignore_root=False):
|
||||
child_index.internalPointer().setChecked(value)
|
||||
@@ -263,9 +284,12 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
self.dataChanged.emit(child_index, child_index)
|
||||
return True
|
||||
|
||||
if role == QtCore.Qt.EditRole and value and \
|
||||
model_index.column() == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION and \
|
||||
isinstance(model_index.internalPointer(), CapaExplorerFunctionItem):
|
||||
if (
|
||||
role == QtCore.Qt.EditRole
|
||||
and value
|
||||
and model_index.column() == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION
|
||||
and isinstance(model_index.internalPointer(), CapaExplorerFunctionItem)
|
||||
):
|
||||
# user renamed function - update IDA database and data model
|
||||
old_name = model_index.internalPointer().info
|
||||
new_name = str(value)
|
||||
@@ -309,39 +333,39 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
"type": "or"
|
||||
},
|
||||
"""
|
||||
if statement['type'] in ('and', 'or', 'optional'):
|
||||
return CapaExplorerDefaultItem(parent, statement['type'])
|
||||
elif statement['type'] == 'not':
|
||||
if statement["type"] in ("and", "or", "optional"):
|
||||
return CapaExplorerDefaultItem(parent, statement["type"])
|
||||
elif statement["type"] == "not":
|
||||
# TODO: do we display 'not'
|
||||
pass
|
||||
elif statement['type'] == 'some':
|
||||
return CapaExplorerDefaultItem(parent, statement['count'] + ' or more')
|
||||
elif statement['type'] == 'range':
|
||||
elif statement["type"] == "some":
|
||||
return CapaExplorerDefaultItem(parent, statement["count"] + " or more")
|
||||
elif statement["type"] == "range":
|
||||
# `range` is a weird node, its almost a hybrid of statement + feature.
|
||||
# it is a specific feature repeated multiple times.
|
||||
# there's no additional logic in the feature part, just the existence of a feature.
|
||||
# so, we have to inline some of the feature rendering here.
|
||||
display = 'count(%s): ' % self.capa_doc_feature_to_display(statement['child'])
|
||||
display = "count(%s): " % self.capa_doc_feature_to_display(statement["child"])
|
||||
|
||||
if statement['max'] == statement['min']:
|
||||
display += '%d' % (statement['min'])
|
||||
elif statement['min'] == 0:
|
||||
display += '%d or fewer' % (statement['max'])
|
||||
elif statement['max'] == (1 << 64 - 1):
|
||||
display += '%d or more' % (statement['min'])
|
||||
if statement["max"] == statement["min"]:
|
||||
display += "%d" % (statement["min"])
|
||||
elif statement["min"] == 0:
|
||||
display += "%d or fewer" % (statement["max"])
|
||||
elif statement["max"] == (1 << 64 - 1):
|
||||
display += "%d or more" % (statement["min"])
|
||||
else:
|
||||
display += 'between %d and %d' % (statement['min'], statement['max'])
|
||||
display += "between %d and %d" % (statement["min"], statement["max"])
|
||||
|
||||
return CapaExplorerFeatureItem(parent, display=display)
|
||||
elif statement['type'] == 'subscope':
|
||||
return CapaExplorerFeatureItem(parent, 'subscope(%s)' % statement['subscope'])
|
||||
elif statement['type'] == 'regex':
|
||||
elif statement["type"] == "subscope":
|
||||
return CapaExplorerFeatureItem(parent, "subscope(%s)" % statement["subscope"])
|
||||
elif statement["type"] == "regex":
|
||||
# regex is a `Statement` not a `Feature`
|
||||
# this is because it doesn't get extracted, but applies to all strings in scope.
|
||||
# so we have to handle it here
|
||||
return CapaExplorerFeatureItem(parent, 'regex(%s)' % statement['pattern'], details=statement['match'])
|
||||
return CapaExplorerFeatureItem(parent, "regex(%s)" % statement["pattern"], details=statement["match"])
|
||||
else:
|
||||
raise RuntimeError('unexpected match statement type: ' + str(statement))
|
||||
raise RuntimeError("unexpected match statement type: " + str(statement))
|
||||
|
||||
def render_capa_doc_match(self, parent, match, doc):
|
||||
""" render capa match read from doc
|
||||
@@ -367,23 +391,24 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
}
|
||||
},
|
||||
"""
|
||||
if not match['success']:
|
||||
if not match["success"]:
|
||||
# TODO: display failed branches at some point? Help with debugging rules?
|
||||
return
|
||||
|
||||
# optional statement with no successful children is empty
|
||||
if (match['node'].get('statement', {}).get('type') == 'optional'
|
||||
and not any(map(lambda m: m['success'], match['children']))):
|
||||
if match["node"].get("statement", {}).get("type") == "optional" and not any(
|
||||
map(lambda m: m["success"], match["children"])
|
||||
):
|
||||
return
|
||||
|
||||
if match['node']['type'] == 'statement':
|
||||
parent2 = self.render_capa_doc_statement_node(parent, match['node']['statement'], doc)
|
||||
elif match['node']['type'] == 'feature':
|
||||
parent2 = self.render_capa_doc_feature_node(parent, match['node']['feature'], match['locations'], doc)
|
||||
if match["node"]["type"] == "statement":
|
||||
parent2 = self.render_capa_doc_statement_node(parent, match["node"]["statement"], doc)
|
||||
elif match["node"]["type"] == "feature":
|
||||
parent2 = self.render_capa_doc_feature_node(parent, match["node"]["feature"], match["locations"], doc)
|
||||
else:
|
||||
raise RuntimeError('unexpected node type: ' + str(match['node']['type']))
|
||||
raise RuntimeError("unexpected node type: " + str(match["node"]["type"]))
|
||||
|
||||
for child in match['children']:
|
||||
for child in match["children"]:
|
||||
self.render_capa_doc_match(parent2, child, doc)
|
||||
|
||||
def render_capa_doc(self, doc):
|
||||
@@ -394,17 +419,17 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
self.beginResetModel()
|
||||
|
||||
for rule in rutils.capability_rules(doc):
|
||||
parent = CapaExplorerRuleItem(self.root_node, rule['meta']['name'], len(rule['matches']), rule['source'])
|
||||
parent = CapaExplorerRuleItem(self.root_node, rule["meta"]["name"], len(rule["matches"]), rule["source"])
|
||||
|
||||
for (location, match) in doc[rule['meta']['name']]['matches'].items():
|
||||
if rule['meta']['scope'] == capa.rules.FILE_SCOPE:
|
||||
for (location, match) in doc[rule["meta"]["name"]]["matches"].items():
|
||||
if rule["meta"]["scope"] == capa.rules.FILE_SCOPE:
|
||||
parent2 = parent
|
||||
elif rule['meta']['scope'] == capa.rules.FUNCTION_SCOPE:
|
||||
elif rule["meta"]["scope"] == capa.rules.FUNCTION_SCOPE:
|
||||
parent2 = CapaExplorerFunctionItem(parent, location)
|
||||
elif rule['meta']['scope'] == capa.rules.BASIC_BLOCK_SCOPE:
|
||||
elif rule["meta"]["scope"] == capa.rules.BASIC_BLOCK_SCOPE:
|
||||
parent2 = CapaExplorerBlockItem(parent, location)
|
||||
else:
|
||||
raise RuntimeError('unexpected rule scope: ' + str(rule['meta']['scope']))
|
||||
raise RuntimeError("unexpected rule scope: " + str(rule["meta"]["scope"]))
|
||||
|
||||
self.render_capa_doc_match(parent2, match, doc)
|
||||
|
||||
@@ -421,20 +446,20 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
},
|
||||
"""
|
||||
mapping = {
|
||||
'string': 'string(%s)',
|
||||
'bytes': 'bytes(%s)',
|
||||
'api': 'api(%s)',
|
||||
'mnemonic': 'mnemonic(%s)',
|
||||
'export': 'export(%s)',
|
||||
'import': 'import(%s)',
|
||||
'section': 'section(%s)',
|
||||
'number': 'number(0x%X)',
|
||||
'offset': 'offset(0x%X)',
|
||||
'characteristic': 'characteristic(%s)',
|
||||
'match': 'rule match(%s)'
|
||||
"string": "string(%s)",
|
||||
"bytes": "bytes(%s)",
|
||||
"api": "api(%s)",
|
||||
"mnemonic": "mnemonic(%s)",
|
||||
"export": "export(%s)",
|
||||
"import": "import(%s)",
|
||||
"section": "section(%s)",
|
||||
"number": "number(0x%X)",
|
||||
"offset": "offset(0x%X)",
|
||||
"characteristic": "characteristic(%s)",
|
||||
"match": "rule match(%s)",
|
||||
}
|
||||
|
||||
'''
|
||||
"""
|
||||
"feature": {
|
||||
"characteristic": [
|
||||
"loop",
|
||||
@@ -442,21 +467,23 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
],
|
||||
"type": "characteristic"
|
||||
},
|
||||
'''
|
||||
if feature['type'] == 'characteristic':
|
||||
return mapping['characteristic'] % feature['characteristic'][0]
|
||||
"""
|
||||
if feature["type"] == "characteristic":
|
||||
return mapping["characteristic"] % feature["characteristic"][0]
|
||||
|
||||
# convert bytes feature from "410ab4" to "41 0A B4"
|
||||
if feature['type'] == 'bytes':
|
||||
return mapping['bytes'] % ' '.join(feature['bytes'][i:i + 2] for i in
|
||||
range(0, len(feature['bytes']), 2)).upper()
|
||||
if feature["type"] == "bytes":
|
||||
return (
|
||||
mapping["bytes"]
|
||||
% " ".join(feature["bytes"][i : i + 2] for i in range(0, len(feature["bytes"]), 2)).upper()
|
||||
)
|
||||
|
||||
try:
|
||||
fmt = mapping[feature['type']]
|
||||
fmt = mapping[feature["type"]]
|
||||
except KeyError:
|
||||
raise RuntimeError('unexpected doc type: ' + str(feature['type']))
|
||||
raise RuntimeError("unexpected doc type: " + str(feature["type"]))
|
||||
|
||||
return fmt % feature[feature['type']]
|
||||
return fmt % feature[feature["type"]]
|
||||
|
||||
def render_capa_doc_feature_node(self, parent, feature, locations, doc):
|
||||
""" """
|
||||
@@ -473,7 +500,7 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
|
||||
return parent2
|
||||
|
||||
def render_capa_doc_feature(self, parent, feature, location, doc, display='-'):
|
||||
def render_capa_doc_feature(self, parent, feature, location, doc, display="-"):
|
||||
""" render capa feature read from doc
|
||||
|
||||
@param parent: parent node to which new child is assigned
|
||||
@@ -491,51 +518,38 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
@param location: address of feature
|
||||
@param display: text to display in plugin ui
|
||||
"""
|
||||
instruction_view = (
|
||||
'bytes',
|
||||
'api',
|
||||
'mnemonic',
|
||||
'number',
|
||||
'offset'
|
||||
)
|
||||
byte_view = (
|
||||
'section',
|
||||
)
|
||||
string_view = (
|
||||
'string',
|
||||
)
|
||||
default_feature_view = (
|
||||
'import',
|
||||
'export'
|
||||
)
|
||||
instruction_view = ("bytes", "api", "mnemonic", "number", "offset")
|
||||
byte_view = ("section",)
|
||||
string_view = ("string",)
|
||||
default_feature_view = ("import", "export")
|
||||
|
||||
# special handling for characteristic pending type
|
||||
if feature['type'] == 'characteristic':
|
||||
if feature['characteristic'][0] in ('embedded pe',):
|
||||
if feature["type"] == "characteristic":
|
||||
if feature["characteristic"][0] in ("embedded pe",):
|
||||
return CapaExplorerByteViewItem(parent, display, location)
|
||||
|
||||
if feature['characteristic'][0] in ('loop', 'recursive call', 'tight loop', 'switch'):
|
||||
if feature["characteristic"][0] in ("loop", "recursive call", "tight loop", "switch"):
|
||||
return CapaExplorerFeatureItem(parent, display=display)
|
||||
|
||||
# default to instruction view
|
||||
return CapaExplorerInstructionViewItem(parent, display, location)
|
||||
|
||||
if feature['type'] == 'match':
|
||||
return CapaExplorerRuleMatchItem(parent, display, source=doc.get(feature['match'], {}).get('source', ''))
|
||||
if feature["type"] == "match":
|
||||
return CapaExplorerRuleMatchItem(parent, display, source=doc.get(feature["match"], {}).get("source", ""))
|
||||
|
||||
if feature['type'] in instruction_view:
|
||||
if feature["type"] in instruction_view:
|
||||
return CapaExplorerInstructionViewItem(parent, display, location)
|
||||
|
||||
if feature['type'] in byte_view:
|
||||
if feature["type"] in byte_view:
|
||||
return CapaExplorerByteViewItem(parent, display, location)
|
||||
|
||||
if feature['type'] in string_view:
|
||||
if feature["type"] in string_view:
|
||||
return CapaExplorerStringViewItem(parent, display, location)
|
||||
|
||||
if feature['type'] in default_feature_view:
|
||||
if feature["type"] in default_feature_view:
|
||||
return CapaExplorerFeatureItem(parent, display=display)
|
||||
|
||||
raise RuntimeError('unexpected feature type: ' + str(feature['type']))
|
||||
raise RuntimeError("unexpected feature type: " + str(feature["type"]))
|
||||
|
||||
def update_function_name(self, old_name, new_name):
|
||||
""" update all instances of function name
|
||||
@@ -548,8 +562,9 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
# convert name to view format for matching
|
||||
old_name = CapaExplorerFunctionItem.fmt % old_name
|
||||
|
||||
for model_index in self.match(root_index, QtCore.Qt.DisplayRole, old_name, hits=-1,
|
||||
flags=QtCore.Qt.MatchRecursive):
|
||||
for model_index in self.match(
|
||||
root_index, QtCore.Qt.DisplayRole, old_name, hits=-1, flags=QtCore.Qt.MatchRecursive
|
||||
):
|
||||
if not isinstance(model_index.internalPointer(), CapaExplorerFunctionItem):
|
||||
continue
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@ from capa.ida.explorer.model import CapaExplorerDataModel
|
||||
|
||||
|
||||
class CapaExplorerSortFilterProxyModel(QtCore.QSortFilterProxyModel):
|
||||
|
||||
def __init__(self, parent=None):
|
||||
""" """
|
||||
super(CapaExplorerSortFilterProxyModel, self).__init__(parent)
|
||||
@@ -20,8 +19,12 @@ class CapaExplorerSortFilterProxyModel(QtCore.QSortFilterProxyModel):
|
||||
ldata = left.internalPointer().data(left.column())
|
||||
rdata = right.internalPointer().data(right.column())
|
||||
|
||||
if ldata and rdata and left.column() == CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS and left.column() \
|
||||
== right.column():
|
||||
if (
|
||||
ldata
|
||||
and rdata
|
||||
and left.column() == CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS
|
||||
and left.column() == right.column()
|
||||
):
|
||||
# convert virtual address before compare
|
||||
return int(ldata, 16) < int(rdata, 16)
|
||||
else:
|
||||
|
||||
@@ -55,7 +55,7 @@ class CapaExplorerQtreeView(QtWidgets.QTreeView):
|
||||
self.doubleClicked.connect(self.slot_double_click)
|
||||
# self.clicked.connect(self.slot_click)
|
||||
|
||||
self.setStyleSheet('QTreeView::item {padding-right: 15 px;padding-bottom: 2 px;}')
|
||||
self.setStyleSheet("QTreeView::item {padding-right: 15 px;padding-bottom: 2 px;}")
|
||||
|
||||
def reset(self):
|
||||
""" reset user interface changes
|
||||
@@ -114,8 +114,8 @@ class CapaExplorerQtreeView(QtWidgets.QTreeView):
|
||||
@yield QAction*
|
||||
"""
|
||||
default_actions = [
|
||||
('Copy column', data, self.slot_copy_column),
|
||||
('Copy row', data, self.slot_copy_row),
|
||||
("Copy column", data, self.slot_copy_column),
|
||||
("Copy row", data, self.slot_copy_row),
|
||||
]
|
||||
|
||||
# add default actions
|
||||
@@ -130,7 +130,7 @@ class CapaExplorerQtreeView(QtWidgets.QTreeView):
|
||||
@yield QAction*
|
||||
"""
|
||||
function_actions = [
|
||||
('Rename function', data, self.slot_rename_function),
|
||||
("Rename function", data, self.slot_rename_function),
|
||||
]
|
||||
|
||||
# add function actions
|
||||
|
||||
@@ -3,47 +3,48 @@ import logging
|
||||
import idaapi
|
||||
import idc
|
||||
|
||||
logger = logging.getLogger('capa')
|
||||
logger = logging.getLogger("capa")
|
||||
|
||||
# file type names as returned by idaapi.get_file_type_name()
|
||||
SUPPORTED_FILE_TYPES = [
|
||||
'Portable executable for 80386 (PE)',
|
||||
'Portable executable for AMD64 (PE)',
|
||||
'Binary file' # x86/AMD64 shellcode support
|
||||
"Portable executable for 80386 (PE)",
|
||||
"Portable executable for AMD64 (PE)",
|
||||
"Binary file", # x86/AMD64 shellcode support
|
||||
]
|
||||
|
||||
|
||||
def inform_user_ida_ui(message):
|
||||
idaapi.info('%s. Please refer to IDA Output window for more information.' % message)
|
||||
idaapi.info("%s. Please refer to IDA Output window for more information." % message)
|
||||
|
||||
|
||||
def is_supported_file_type():
|
||||
file_type = idaapi.get_file_type_name()
|
||||
if file_type not in SUPPORTED_FILE_TYPES:
|
||||
logger.error('-' * 80)
|
||||
logger.error(' Input file does not appear to be a PE file.')
|
||||
logger.error(' ')
|
||||
logger.error("-" * 80)
|
||||
logger.error(" Input file does not appear to be a PE file.")
|
||||
logger.error(" ")
|
||||
logger.error(
|
||||
' capa currently only supports analyzing PE files (or binary files containing x86/AMD64 shellcode) with IDA.')
|
||||
logger.error(' If you don\'t know the input file type, you can try using the `file` utility to guess it.')
|
||||
logger.error('-' * 80)
|
||||
inform_user_ida_ui('capa does not support the format of this file')
|
||||
" capa currently only supports analyzing PE files (or binary files containing x86/AMD64 shellcode) with IDA."
|
||||
)
|
||||
logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.")
|
||||
logger.error("-" * 80)
|
||||
inform_user_ida_ui("capa does not support the format of this file")
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def get_disasm_line(va):
|
||||
''' '''
|
||||
""" """
|
||||
return idc.generate_disasm_line(va, idc.GENDSM_FORCE_CODE)
|
||||
|
||||
|
||||
def is_func_start(ea):
|
||||
''' check if function stat exists at virtual address '''
|
||||
""" check if function stat exists at virtual address """
|
||||
f = idaapi.get_func(ea)
|
||||
return f and f.start_ea == ea
|
||||
|
||||
|
||||
def get_func_start_ea(ea):
|
||||
''' '''
|
||||
""" """
|
||||
f = idaapi.get_func(ea)
|
||||
return f if f is None else f.start_ea
|
||||
|
||||
@@ -2,11 +2,7 @@ import os
|
||||
import logging
|
||||
import collections
|
||||
|
||||
from PyQt5 import (
|
||||
QtWidgets,
|
||||
QtGui,
|
||||
QtCore
|
||||
)
|
||||
from PyQt5 import QtWidgets, QtGui, QtCore
|
||||
|
||||
import idaapi
|
||||
|
||||
@@ -20,13 +16,12 @@ from capa.ida.explorer.view import CapaExplorerQtreeView
|
||||
from capa.ida.explorer.model import CapaExplorerDataModel
|
||||
from capa.ida.explorer.proxy import CapaExplorerSortFilterProxyModel
|
||||
|
||||
PLUGIN_NAME = 'capa explorer'
|
||||
PLUGIN_NAME = "capa explorer"
|
||||
|
||||
logger = logging.getLogger('capa')
|
||||
logger = logging.getLogger("capa")
|
||||
|
||||
|
||||
class CapaExplorerIdaHooks(idaapi.UI_Hooks):
|
||||
|
||||
def __init__(self, screen_ea_changed_hook, action_hooks):
|
||||
""" facilitate IDA UI hooks
|
||||
|
||||
@@ -78,7 +73,6 @@ class CapaExplorerIdaHooks(idaapi.UI_Hooks):
|
||||
|
||||
|
||||
class CapaExplorerForm(idaapi.PluginForm):
|
||||
|
||||
def __init__(self):
|
||||
""" """
|
||||
super(CapaExplorerForm, self).__init__()
|
||||
@@ -109,20 +103,20 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
|
||||
self.view_tree.reset()
|
||||
|
||||
logger.info('form created.')
|
||||
logger.info("form created.")
|
||||
|
||||
def Show(self):
|
||||
""" """
|
||||
return idaapi.PluginForm.Show(self, self.form_title, options=(
|
||||
idaapi.PluginForm.WOPN_TAB | idaapi.PluginForm.WCLS_CLOSE_LATER
|
||||
))
|
||||
return idaapi.PluginForm.Show(
|
||||
self, self.form_title, options=(idaapi.PluginForm.WOPN_TAB | idaapi.PluginForm.WCLS_CLOSE_LATER)
|
||||
)
|
||||
|
||||
def OnClose(self, form):
|
||||
""" form is closed """
|
||||
self.unload_ida_hooks()
|
||||
self.ida_reset()
|
||||
|
||||
logger.info('form closed.')
|
||||
logger.info("form closed.")
|
||||
|
||||
def load_interface(self):
|
||||
""" load user interface """
|
||||
@@ -165,8 +159,8 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
def load_view_summary(self):
|
||||
""" """
|
||||
table_headers = [
|
||||
'Capability',
|
||||
'Namespace',
|
||||
"Capability",
|
||||
"Namespace",
|
||||
]
|
||||
|
||||
table = QtWidgets.QTableWidget()
|
||||
@@ -180,15 +174,15 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
table.setHorizontalHeaderLabels(table_headers)
|
||||
table.horizontalHeader().setDefaultAlignment(QtCore.Qt.AlignLeft)
|
||||
table.setShowGrid(False)
|
||||
table.setStyleSheet('QTableWidget::item { padding: 25px; }')
|
||||
table.setStyleSheet("QTableWidget::item { padding: 25px; }")
|
||||
|
||||
self.view_summary = table
|
||||
|
||||
def load_view_attack(self):
|
||||
""" """
|
||||
table_headers = [
|
||||
'ATT&CK Tactic',
|
||||
'ATT&CK Technique ',
|
||||
"ATT&CK Tactic",
|
||||
"ATT&CK Technique ",
|
||||
]
|
||||
|
||||
table = QtWidgets.QTableWidget()
|
||||
@@ -202,13 +196,13 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
table.setHorizontalHeaderLabels(table_headers)
|
||||
table.horizontalHeader().setDefaultAlignment(QtCore.Qt.AlignLeft)
|
||||
table.setShowGrid(False)
|
||||
table.setStyleSheet('QTableWidget::item { padding: 25px; }')
|
||||
table.setStyleSheet("QTableWidget::item { padding: 25px; }")
|
||||
|
||||
self.view_attack = table
|
||||
|
||||
def load_view_checkbox_limit_by(self):
|
||||
""" """
|
||||
check = QtWidgets.QCheckBox('Limit results to current function')
|
||||
check = QtWidgets.QCheckBox("Limit results to current function")
|
||||
check.setChecked(False)
|
||||
check.stateChanged.connect(self.slot_checkbox_limit_by_changed)
|
||||
|
||||
@@ -231,7 +225,7 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
tab = QtWidgets.QWidget()
|
||||
tab.setLayout(layout)
|
||||
|
||||
self.view_tabs.addTab(tab, 'Tree View')
|
||||
self.view_tabs.addTab(tab, "Tree View")
|
||||
|
||||
def load_view_summary_tab(self):
|
||||
""" """
|
||||
@@ -241,7 +235,7 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
tab = QtWidgets.QWidget()
|
||||
tab.setLayout(layout)
|
||||
|
||||
self.view_tabs.addTab(tab, 'Summary')
|
||||
self.view_tabs.addTab(tab, "Summary")
|
||||
|
||||
def load_view_attack_tab(self):
|
||||
""" """
|
||||
@@ -251,16 +245,16 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
tab = QtWidgets.QWidget()
|
||||
tab.setLayout(layout)
|
||||
|
||||
self.view_tabs.addTab(tab, 'MITRE')
|
||||
self.view_tabs.addTab(tab, "MITRE")
|
||||
|
||||
def load_file_menu(self):
|
||||
""" load file menu actions """
|
||||
actions = (
|
||||
('Reset view', 'Reset plugin view', self.reset),
|
||||
('Run analysis', 'Run capa analysis on current database', self.reload),
|
||||
("Reset view", "Reset plugin view", self.reset),
|
||||
("Run analysis", "Run capa analysis on current database", self.reload),
|
||||
)
|
||||
|
||||
menu = self.view_menu_bar.addMenu('File')
|
||||
menu = self.view_menu_bar.addMenu("File")
|
||||
|
||||
for name, _, handle in actions:
|
||||
action = QtWidgets.QAction(name, self.parent)
|
||||
@@ -271,8 +265,8 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
def load_ida_hooks(self):
|
||||
""" """
|
||||
action_hooks = {
|
||||
'MakeName': self.ida_hook_rename,
|
||||
'EditFunction': self.ida_hook_rename,
|
||||
"MakeName": self.ida_hook_rename,
|
||||
"EditFunction": self.ida_hook_rename,
|
||||
}
|
||||
|
||||
self.ida_hooks = CapaExplorerIdaHooks(self.ida_hook_screen_ea_changed, action_hooks)
|
||||
@@ -300,10 +294,10 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
|
||||
if post:
|
||||
# post action update data model w/ current name
|
||||
self.model_data.update_function_name(meta.get('prev_name', ''), curr_name)
|
||||
self.model_data.update_function_name(meta.get("prev_name", ""), curr_name)
|
||||
else:
|
||||
# pre action so save current name for replacement later
|
||||
meta['prev_name'] = curr_name
|
||||
meta["prev_name"] = curr_name
|
||||
|
||||
def ida_hook_screen_ea_changed(self, widget, new_ea, old_ea):
|
||||
""" """
|
||||
@@ -328,21 +322,21 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
match = capa.ida.explorer.item.ea_to_hex_str(new_func_start)
|
||||
else:
|
||||
# navigated to virtual address not in valid function - clear filter
|
||||
match = ''
|
||||
match = ""
|
||||
|
||||
# filter on virtual address to avoid updating filter string if function name is changed
|
||||
self.model_proxy.add_single_string_filter(CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS, match)
|
||||
|
||||
def load_capa_results(self):
|
||||
""" """
|
||||
logger.info('-' * 80)
|
||||
logger.info(' Using default embedded rules.')
|
||||
logger.info(' ')
|
||||
logger.info(' You can see the current default rule set here:')
|
||||
logger.info(' https://github.com/fireeye/capa-rules')
|
||||
logger.info('-' * 80)
|
||||
logger.info("-" * 80)
|
||||
logger.info(" Using default embedded rules.")
|
||||
logger.info(" ")
|
||||
logger.info(" You can see the current default rule set here:")
|
||||
logger.info(" https://github.com/fireeye/capa-rules")
|
||||
logger.info("-" * 80)
|
||||
|
||||
rules_path = os.path.join(os.path.dirname(self.file_loc), '../..', 'rules')
|
||||
rules_path = os.path.join(os.path.dirname(self.file_loc), "../..", "rules")
|
||||
rules = capa.main.get_rules(rules_path)
|
||||
rules = capa.rules.RuleSet(rules)
|
||||
capabilities = capa.main.find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor(), True)
|
||||
@@ -350,27 +344,30 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
# support binary files specifically for x86/AMD64 shellcode
|
||||
# warn user binary file is loaded but still allow capa to process it
|
||||
# TODO: check specific architecture of binary files based on how user configured IDA processors
|
||||
if idaapi.get_file_type_name() == 'Binary file':
|
||||
logger.warning('-' * 80)
|
||||
logger.warning(' Input file appears to be a binary file.')
|
||||
logger.warning(' ')
|
||||
if idaapi.get_file_type_name() == "Binary file":
|
||||
logger.warning("-" * 80)
|
||||
logger.warning(" Input file appears to be a binary file.")
|
||||
logger.warning(" ")
|
||||
logger.warning(
|
||||
' capa currently only supports analyzing binary files containing x86/AMD64 shellcode with IDA.')
|
||||
" capa currently only supports analyzing binary files containing x86/AMD64 shellcode with IDA."
|
||||
)
|
||||
logger.warning(
|
||||
' This means the results may be misleading or incomplete if the binary file loaded in IDA is not x86/AMD64.')
|
||||
logger.warning(' If you don\'t know the input file type, you can try using the `file` utility to guess it.')
|
||||
logger.warning('-' * 80)
|
||||
" This means the results may be misleading or incomplete if the binary file loaded in IDA is not x86/AMD64."
|
||||
)
|
||||
logger.warning(" If you don't know the input file type, you can try using the `file` utility to guess it.")
|
||||
logger.warning("-" * 80)
|
||||
|
||||
capa.ida.helpers.inform_user_ida_ui('capa encountered warnings during analysis')
|
||||
capa.ida.helpers.inform_user_ida_ui("capa encountered warnings during analysis")
|
||||
|
||||
if capa.main.has_file_limitation(rules, capabilities, is_standalone=False):
|
||||
capa.ida.helpers.inform_user_ida_ui('capa encountered warnings during analysis')
|
||||
capa.ida.helpers.inform_user_ida_ui("capa encountered warnings during analysis")
|
||||
|
||||
logger.info('analysis completed.')
|
||||
logger.info("analysis completed.")
|
||||
|
||||
doc = capa.render.convert_capabilities_to_result_document(rules, capabilities)
|
||||
|
||||
import json
|
||||
|
||||
with open("C:\\Users\\spring\\Desktop\\hmm.json", "w") as twitter_data_file:
|
||||
json.dump(doc, twitter_data_file, indent=4, sort_keys=True, cls=capa.render.CapaJsonObjectEncoder)
|
||||
|
||||
@@ -380,22 +377,22 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
|
||||
self.view_tree.sortByColumn(CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION, QtCore.Qt.AscendingOrder)
|
||||
|
||||
logger.info('render views completed.')
|
||||
logger.info("render views completed.")
|
||||
|
||||
def render_capa_doc_summary(self, doc):
|
||||
""" """
|
||||
for (row, rule) in enumerate(rutils.capability_rules(doc)):
|
||||
count = len(rule['matches'])
|
||||
count = len(rule["matches"])
|
||||
|
||||
if count == 1:
|
||||
capability = rule['meta']['name']
|
||||
capability = rule["meta"]["name"]
|
||||
else:
|
||||
capability = '%s (%d matches)' % (rule['meta']['name'], count)
|
||||
capability = "%s (%d matches)" % (rule["meta"]["name"], count)
|
||||
|
||||
self.view_summary.setRowCount(row + 1)
|
||||
|
||||
self.view_summary.setItem(row, 0, self.render_new_table_header_item(capability))
|
||||
self.view_summary.setItem(row, 1, QtWidgets.QTableWidgetItem(rule['meta']['namespace']))
|
||||
self.view_summary.setItem(row, 1, QtWidgets.QTableWidgetItem(rule["meta"]["namespace"]))
|
||||
|
||||
# resize columns to content
|
||||
self.view_summary.resizeColumnsToContents()
|
||||
@@ -404,17 +401,17 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
""" """
|
||||
tactics = collections.defaultdict(set)
|
||||
for rule in rutils.capability_rules(doc):
|
||||
if not rule['meta'].get('att&ck'):
|
||||
if not rule["meta"].get("att&ck"):
|
||||
continue
|
||||
|
||||
for attack in rule['meta']['att&ck']:
|
||||
tactic, _, rest = attack.partition('::')
|
||||
if '::' in rest:
|
||||
technique, _, rest = rest.partition('::')
|
||||
subtechnique, _, id = rest.rpartition(' ')
|
||||
for attack in rule["meta"]["att&ck"]:
|
||||
tactic, _, rest = attack.partition("::")
|
||||
if "::" in rest:
|
||||
technique, _, rest = rest.partition("::")
|
||||
subtechnique, _, id = rest.rpartition(" ")
|
||||
tactics[tactic].add((technique, subtechnique, id))
|
||||
else:
|
||||
technique, _, id = rest.rpartition(' ')
|
||||
technique, _, id = rest.rpartition(" ")
|
||||
tactics[tactic].add((technique, id))
|
||||
|
||||
column_one = []
|
||||
@@ -422,17 +419,17 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
|
||||
for tactic, techniques in sorted(tactics.items()):
|
||||
column_one.append(tactic.upper())
|
||||
column_one.extend(['' for i in range(len(techniques) - 1)])
|
||||
column_one.extend(["" for i in range(len(techniques) - 1)])
|
||||
|
||||
for spec in sorted(techniques):
|
||||
if len(spec) == 2:
|
||||
technique, id = spec
|
||||
column_two.append('%s %s' % (technique, id))
|
||||
column_two.append("%s %s" % (technique, id))
|
||||
elif len(spec) == 3:
|
||||
technique, subtechnique, id = spec
|
||||
column_two.append('%s::%s %s' % (technique, subtechnique, id))
|
||||
column_two.append("%s::%s %s" % (technique, subtechnique, id))
|
||||
else:
|
||||
raise RuntimeError('unexpected ATT&CK spec format')
|
||||
raise RuntimeError("unexpected ATT&CK spec format")
|
||||
|
||||
self.view_attack.setRowCount(max(len(column_one), len(column_two)))
|
||||
|
||||
@@ -471,8 +468,8 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
self.view_summary.setRowCount(0)
|
||||
self.load_capa_results()
|
||||
|
||||
logger.info('reload complete.')
|
||||
idaapi.info('%s reload completed.' % PLUGIN_NAME)
|
||||
logger.info("reload complete.")
|
||||
idaapi.info("%s reload completed." % PLUGIN_NAME)
|
||||
|
||||
def reset(self):
|
||||
""" reset user interface elements
|
||||
@@ -481,8 +478,8 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
"""
|
||||
self.ida_reset()
|
||||
|
||||
logger.info('reset completed.')
|
||||
idaapi.info('%s reset completed.' % PLUGIN_NAME)
|
||||
logger.info("reset completed.")
|
||||
idaapi.info("%s reset completed." % PLUGIN_NAME)
|
||||
|
||||
def slot_menu_bar_hovered(self, action):
|
||||
""" display menu action tooltip
|
||||
@@ -491,7 +488,9 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
|
||||
@reference: https://stackoverflow.com/questions/21725119/why-wont-qtooltips-appear-on-qactions-within-a-qmenu
|
||||
"""
|
||||
QtWidgets.QToolTip.showText(QtGui.QCursor.pos(), action.toolTip(), self.view_menu_bar, self.view_menu_bar.actionGeometry(action))
|
||||
QtWidgets.QToolTip.showText(
|
||||
QtGui.QCursor.pos(), action.toolTip(), self.view_menu_bar, self.view_menu_bar.actionGeometry(action)
|
||||
)
|
||||
|
||||
def slot_checkbox_limit_by_changed(self):
|
||||
""" slot activated if checkbox clicked
|
||||
@@ -499,7 +498,7 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
if checked, configure function filter if screen location is located
|
||||
in function, otherwise clear filter
|
||||
"""
|
||||
match = ''
|
||||
match = ""
|
||||
if self.view_checkbox_limit_by.isChecked():
|
||||
location = capa.ida.helpers.get_func_start_ea(idaapi.get_screen_ea())
|
||||
if location:
|
||||
@@ -530,5 +529,5 @@ def main():
|
||||
CAPA_EXPLORER_FORM.Show()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@@ -19,10 +19,10 @@ from capa.ida import plugin_helpers
|
||||
import capa.features.extractors.ida.helpers
|
||||
|
||||
|
||||
logger = logging.getLogger('rulegen')
|
||||
logger = logging.getLogger("rulegen")
|
||||
|
||||
|
||||
AUTHOR_NAME = ''
|
||||
AUTHOR_NAME = ""
|
||||
COLOR_HIGHLIGHT = 0xD096FF
|
||||
|
||||
|
||||
@@ -35,11 +35,11 @@ def get_func_start(ea):
|
||||
|
||||
|
||||
class Hooks(idaapi.UI_Hooks):
|
||||
'''
|
||||
"""
|
||||
Notifies the plugin when navigating to another function
|
||||
NOTE: it uses the global variable FLEX to access the
|
||||
PluginForm object. This looks nasty, maybe there is a better way?
|
||||
'''
|
||||
"""
|
||||
|
||||
def screen_ea_changed(self, ea, prev_ea):
|
||||
widget = idaapi.get_current_widget()
|
||||
@@ -55,14 +55,13 @@ class Hooks(idaapi.UI_Hooks):
|
||||
# changed to another function
|
||||
RULE_GEN_FORM.reload_features_tree()
|
||||
except Exception as e:
|
||||
logger.warn('exception: %s', e)
|
||||
logger.warn("exception: %s", e)
|
||||
|
||||
|
||||
class RuleGeneratorForm(idaapi.PluginForm):
|
||||
|
||||
def __init__(self):
|
||||
super(RuleGeneratorForm, self).__init__()
|
||||
self.title = 'capa rule generator'
|
||||
self.title = "capa rule generator"
|
||||
|
||||
self.parent = None
|
||||
self.parent_items = {}
|
||||
@@ -70,7 +69,7 @@ class RuleGeneratorForm(idaapi.PluginForm):
|
||||
|
||||
self.hooks = Hooks() # dirty?
|
||||
if self.hooks.hook():
|
||||
logger.info('UI notification hook installed successfully')
|
||||
logger.info("UI notification hook installed successfully")
|
||||
|
||||
def init_ui(self):
|
||||
self.tree = QTreeWidget()
|
||||
@@ -79,7 +78,7 @@ class RuleGeneratorForm(idaapi.PluginForm):
|
||||
|
||||
self.reload_features_tree()
|
||||
|
||||
button_reset = QtWidgets.QPushButton('&Reset')
|
||||
button_reset = QtWidgets.QPushButton("&Reset")
|
||||
button_reset.clicked.connect(self.reset)
|
||||
|
||||
h_layout = QtWidgets.QHBoxLayout()
|
||||
@@ -96,7 +95,7 @@ class RuleGeneratorForm(idaapi.PluginForm):
|
||||
def reset(self):
|
||||
plugin_helpers.reset_selection(self.tree)
|
||||
plugin_helpers.reset_colors(self.orig_colors)
|
||||
self.rule_text.setText('')
|
||||
self.rule_text.setText("")
|
||||
|
||||
def reload_features_tree(self):
|
||||
self.reset()
|
||||
@@ -119,7 +118,7 @@ class RuleGeneratorForm(idaapi.PluginForm):
|
||||
extractor = capa.features.extractors.ida.IdaFeatureExtractor()
|
||||
f = idaapi.get_func(idaapi.get_screen_ea())
|
||||
if not f:
|
||||
logger.info('function does not exist at 0x%x', idaapi.get_screen_ea())
|
||||
logger.info("function does not exist at 0x%x", idaapi.get_screen_ea())
|
||||
return
|
||||
|
||||
return self.extract_function_features(f)
|
||||
@@ -137,7 +136,7 @@ class RuleGeneratorForm(idaapi.PluginForm):
|
||||
def create_tree(self, features):
|
||||
self.tree.setMinimumWidth(400)
|
||||
# self.tree.setMinimumHeight(300)
|
||||
self.tree.setHeaderLabels(['Feature', 'Virtual Address', 'Disassembly'])
|
||||
self.tree.setHeaderLabels(["Feature", "Virtual Address", "Disassembly"])
|
||||
# auto resize columns
|
||||
self.tree.header().setSectionResizeMode(QHeaderView.ResizeToContents)
|
||||
self.tree.itemClicked.connect(self.on_item_clicked)
|
||||
@@ -151,16 +150,22 @@ class RuleGeneratorForm(idaapi.PluginForm):
|
||||
|
||||
# level 1
|
||||
if feature not in self.parent_items:
|
||||
self.parent_items[feature] = plugin_helpers.add_child_item(self.parent_items[type(feature)], [str(feature)])
|
||||
self.parent_items[feature] = plugin_helpers.add_child_item(
|
||||
self.parent_items[type(feature)], [str(feature)]
|
||||
)
|
||||
|
||||
# level n > 1
|
||||
if len(vas) > 1:
|
||||
for va in sorted(vas):
|
||||
plugin_helpers.add_child_item(self.parent_items[feature], [str(feature), '0x%X' % va, plugin_helpers.get_disasm_line(va)], feature)
|
||||
plugin_helpers.add_child_item(
|
||||
self.parent_items[feature],
|
||||
[str(feature), "0x%X" % va, plugin_helpers.get_disasm_line(va)],
|
||||
feature,
|
||||
)
|
||||
else:
|
||||
va = vas.pop()
|
||||
self.parent_items[feature].setText(0, str(feature))
|
||||
self.parent_items[feature].setText(1, '0x%X' % va)
|
||||
self.parent_items[feature].setText(1, "0x%X" % va)
|
||||
self.parent_items[feature].setText(2, plugin_helpers.get_disasm_line(va))
|
||||
self.parent_items[feature].setData(0, 0x100, feature)
|
||||
|
||||
@@ -188,29 +193,31 @@ class RuleGeneratorForm(idaapi.PluginForm):
|
||||
|
||||
def get_rule_from_features(self, features):
|
||||
rule_parts = []
|
||||
counted = zip(Counter(features).keys(), # equals to list(set(words))
|
||||
Counter(features).values()) # counts the elements' frequency
|
||||
counted = zip(
|
||||
Counter(features).keys(), Counter(features).values() # equals to list(set(words))
|
||||
) # counts the elements' frequency
|
||||
|
||||
# single features
|
||||
for k, v in filter(lambda t: t[1] == 1, counted):
|
||||
# TODO args to hex if int
|
||||
if k.name.lower() == 'bytes':
|
||||
if k.name.lower() == "bytes":
|
||||
# Convert raw bytes to uppercase hex representation (e.g., '12 34 56')
|
||||
upper_hex_bytes = binascii.hexlify(args_to_str(k.args)).upper()
|
||||
rule_value_str = ''
|
||||
rule_value_str = ""
|
||||
for i in range(0, len(upper_hex_bytes), 2):
|
||||
rule_value_str += upper_hex_bytes[i:i + 2] + ' '
|
||||
r = ' - %s: %s' % (k.name.lower(), rule_value_str)
|
||||
rule_value_str += upper_hex_bytes[i : i + 2] + " "
|
||||
r = " - %s: %s" % (k.name.lower(), rule_value_str)
|
||||
else:
|
||||
r = ' - %s: %s' % (k.name.lower(), args_to_str(k.args))
|
||||
r = " - %s: %s" % (k.name.lower(), args_to_str(k.args))
|
||||
rule_parts.append(r)
|
||||
|
||||
# counted features
|
||||
for k, v in filter(lambda t: t[1] > 1, counted):
|
||||
r = ' - count(%s): %d' % (str(k), v)
|
||||
r = " - count(%s): %d" % (str(k), v)
|
||||
rule_parts.append(r)
|
||||
|
||||
rule_prefix = textwrap.dedent('''
|
||||
rule_prefix = textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name:
|
||||
@@ -219,8 +226,10 @@ class RuleGeneratorForm(idaapi.PluginForm):
|
||||
examples:
|
||||
- %s:0x%X
|
||||
features:
|
||||
''' % (AUTHOR_NAME, idc.retrieve_input_file_md5(), get_func_start(idc.here()))).strip()
|
||||
return '%s\n%s' % (rule_prefix, '\n'.join(sorted(rule_parts)))
|
||||
"""
|
||||
% (AUTHOR_NAME, idc.retrieve_input_file_md5(), get_func_start(idc.here()))
|
||||
).strip()
|
||||
return "%s\n%s" % (rule_prefix, "\n".join(sorted(rule_parts)))
|
||||
|
||||
# TODO merge into capa_idautils, get feature data
|
||||
def get_selected_items(self):
|
||||
@@ -242,26 +251,25 @@ class RuleGeneratorForm(idaapi.PluginForm):
|
||||
self.init_ui()
|
||||
|
||||
def Show(self):
|
||||
return idaapi.PluginForm.Show(self, self.title, options=(
|
||||
idaapi.PluginForm.WOPN_RESTORE
|
||||
| idaapi.PluginForm.WOPN_PERSIST
|
||||
))
|
||||
return idaapi.PluginForm.Show(
|
||||
self, self.title, options=(idaapi.PluginForm.WOPN_RESTORE | idaapi.PluginForm.WOPN_PERSIST)
|
||||
)
|
||||
|
||||
def OnClose(self, form):
|
||||
self.reset()
|
||||
if self.hooks.unhook():
|
||||
logger.info('UI notification hook uninstalled successfully')
|
||||
logger.info('RuleGeneratorForm closed')
|
||||
logger.info("UI notification hook uninstalled successfully")
|
||||
logger.info("RuleGeneratorForm closed")
|
||||
|
||||
|
||||
def args_to_str(args):
|
||||
a = []
|
||||
for arg in args:
|
||||
if (isinstance(arg, int) or isinstance(arg, long)) and arg > 10:
|
||||
a.append('0x%X' % arg)
|
||||
a.append("0x%X" % arg)
|
||||
else:
|
||||
a.append(str(arg))
|
||||
return ','.join(a)
|
||||
return ",".join(a)
|
||||
|
||||
|
||||
def main():
|
||||
@@ -280,5 +288,5 @@ def main():
|
||||
RULE_GEN_FORM.Show()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@@ -8,34 +8,34 @@ import idc
|
||||
import idaapi
|
||||
|
||||
|
||||
CAPA_EXTENSION = '.capas'
|
||||
CAPA_EXTENSION = ".capas"
|
||||
|
||||
|
||||
logger = logging.getLogger('capa_ida')
|
||||
logger = logging.getLogger("capa_ida")
|
||||
|
||||
|
||||
def get_input_file(freeze=True):
|
||||
'''
|
||||
"""
|
||||
get input file path
|
||||
|
||||
freeze (bool): if True, get freeze file if it exists
|
||||
'''
|
||||
"""
|
||||
# try original file in same directory as idb/i64 without idb/i64 file extension
|
||||
input_file = idc.get_idb_path()[:-4]
|
||||
|
||||
if freeze:
|
||||
# use frozen file if it exists
|
||||
freeze_file_cand = '%s%s' % (input_file, CAPA_EXTENSION)
|
||||
freeze_file_cand = "%s%s" % (input_file, CAPA_EXTENSION)
|
||||
if os.path.isfile(freeze_file_cand):
|
||||
return freeze_file_cand
|
||||
|
||||
if not os.path.isfile(input_file):
|
||||
# TM naming
|
||||
input_file = '%s.mal_' % idc.get_idb_path()[:-4]
|
||||
input_file = "%s.mal_" % idc.get_idb_path()[:-4]
|
||||
if not os.path.isfile(input_file):
|
||||
input_file = idaapi.ask_file(0, '*.*', 'Please specify input file.')
|
||||
input_file = idaapi.ask_file(0, "*.*", "Please specify input file.")
|
||||
if not input_file:
|
||||
raise ValueError('could not find input file')
|
||||
raise ValueError("could not find input file")
|
||||
return input_file
|
||||
|
||||
|
||||
|
||||
328
capa/main.py
328
capa/main.py
@@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python2
|
||||
'''
|
||||
"""
|
||||
capa - detect capabilities in programs.
|
||||
'''
|
||||
"""
|
||||
import os
|
||||
import os.path
|
||||
import sys
|
||||
@@ -23,16 +23,16 @@ import capa.features.extractors
|
||||
from capa.helpers import oint
|
||||
|
||||
|
||||
SUPPORTED_FILE_MAGIC = set(['MZ'])
|
||||
SUPPORTED_FILE_MAGIC = set(["MZ"])
|
||||
|
||||
|
||||
logger = logging.getLogger('capa')
|
||||
logger = logging.getLogger("capa")
|
||||
|
||||
|
||||
def set_vivisect_log_level(level):
|
||||
logging.getLogger('vivisect').setLevel(level)
|
||||
logging.getLogger('vtrace').setLevel(level)
|
||||
logging.getLogger('envi').setLevel(level)
|
||||
logging.getLogger("vivisect").setLevel(level)
|
||||
logging.getLogger("vtrace").setLevel(level)
|
||||
logging.getLogger("envi").setLevel(level)
|
||||
|
||||
|
||||
def find_function_capabilities(ruleset, extractor, f):
|
||||
@@ -83,7 +83,7 @@ def find_file_capabilities(ruleset, extractor, function_features):
|
||||
if feature not in file_features:
|
||||
file_features[feature] = set()
|
||||
|
||||
logger.info('analyzed file and extracted %d features', len(file_features))
|
||||
logger.info("analyzed file and extracted %d features", len(file_features))
|
||||
|
||||
file_features.update(function_features)
|
||||
|
||||
@@ -95,7 +95,7 @@ def find_capabilities(ruleset, extractor, disable_progress=None):
|
||||
all_function_matches = collections.defaultdict(list)
|
||||
all_bb_matches = collections.defaultdict(list)
|
||||
|
||||
for f in tqdm.tqdm(extractor.get_functions(), disable=disable_progress, unit=' functions'):
|
||||
for f in tqdm.tqdm(extractor.get_functions(), disable=disable_progress, unit=" functions"):
|
||||
function_matches, bb_matches = find_function_capabilities(ruleset, extractor, f)
|
||||
for rule_name, res in function_matches.items():
|
||||
all_function_matches[rule_name].extend(res)
|
||||
@@ -104,8 +104,10 @@ def find_capabilities(ruleset, extractor, disable_progress=None):
|
||||
|
||||
# mapping from matched rule feature to set of addresses at which it matched.
|
||||
# type: Dict[MatchedRule, Set[int]]
|
||||
function_features = {capa.features.MatchedRule(rule_name): set(map(lambda p: p[0], results))
|
||||
for rule_name, results in all_function_matches.items()}
|
||||
function_features = {
|
||||
capa.features.MatchedRule(rule_name): set(map(lambda p: p[0], results))
|
||||
for rule_name, results in all_function_matches.items()
|
||||
}
|
||||
|
||||
all_file_matches = find_file_capabilities(ruleset, extractor, function_features)
|
||||
|
||||
@@ -119,7 +121,7 @@ def find_capabilities(ruleset, extractor, disable_progress=None):
|
||||
|
||||
def has_rule_with_namespace(rules, capabilities, rule_cat):
|
||||
for rule_name in capabilities.keys():
|
||||
if rules.rules[rule_name].meta.get('namespace', '').startswith(rule_cat):
|
||||
if rules.rules[rule_name].meta.get("namespace", "").startswith(rule_cat):
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -128,61 +130,61 @@ def has_file_limitation(rules, capabilities, is_standalone=True):
|
||||
file_limitations = {
|
||||
# capa will likely detect installer specific functionality.
|
||||
# this is probably not what the user wants.
|
||||
'executable/installer': [
|
||||
' This sample appears to be an installer.',
|
||||
' ',
|
||||
' capa cannot handle installers well. This means the results may be misleading or incomplete.'
|
||||
' You should try to understand the install mechanism and analyze created files with capa.'
|
||||
"executable/installer": [
|
||||
" This sample appears to be an installer.",
|
||||
" ",
|
||||
" capa cannot handle installers well. This means the results may be misleading or incomplete."
|
||||
" You should try to understand the install mechanism and analyze created files with capa.",
|
||||
],
|
||||
# capa won't detect much in .NET samples.
|
||||
# it might match some file-level things.
|
||||
# for consistency, bail on things that we don't support.
|
||||
'runtime/dotnet': [
|
||||
' This sample appears to be a .NET module.',
|
||||
' ',
|
||||
' .NET is a cross-platform framework for running managed applications.',
|
||||
' capa cannot handle non-native files. This means that the results may be misleading or incomplete.',
|
||||
' You may have to analyze the file manually, using a tool like the .NET decompiler dnSpy.'
|
||||
"runtime/dotnet": [
|
||||
" This sample appears to be a .NET module.",
|
||||
" ",
|
||||
" .NET is a cross-platform framework for running managed applications.",
|
||||
" capa cannot handle non-native files. This means that the results may be misleading or incomplete.",
|
||||
" You may have to analyze the file manually, using a tool like the .NET decompiler dnSpy.",
|
||||
],
|
||||
# capa will detect dozens of capabilities for AutoIt samples,
|
||||
# but these are due to the AutoIt runtime, not the payload script.
|
||||
# so, don't confuse the user with FP matches - bail instead
|
||||
'compiler/autoit': [
|
||||
' This sample appears to be compiled with AutoIt.',
|
||||
' ',
|
||||
' AutoIt is a freeware BASIC-like scripting language designed for automating the Windows GUI.',
|
||||
' capa cannot handle AutoIt scripts. This means that the results will be misleading or incomplete.',
|
||||
' You may have to analyze the file manually, using a tool like the AutoIt decompiler MyAut2Exe.'
|
||||
"compiler/autoit": [
|
||||
" This sample appears to be compiled with AutoIt.",
|
||||
" ",
|
||||
" AutoIt is a freeware BASIC-like scripting language designed for automating the Windows GUI.",
|
||||
" capa cannot handle AutoIt scripts. This means that the results will be misleading or incomplete.",
|
||||
" You may have to analyze the file manually, using a tool like the AutoIt decompiler MyAut2Exe.",
|
||||
],
|
||||
# capa won't detect much in packed samples
|
||||
'anti-analysis/packer/': [
|
||||
' This sample appears to be packed.',
|
||||
' ',
|
||||
' Packed samples have often been obfuscated to hide their logic.',
|
||||
' capa cannot handle obfuscation well. This means the results may be misleading or incomplete.',
|
||||
' If possible, you should try to unpack this input file before analyzing it with capa.'
|
||||
]
|
||||
"anti-analysis/packer/": [
|
||||
" This sample appears to be packed.",
|
||||
" ",
|
||||
" Packed samples have often been obfuscated to hide their logic.",
|
||||
" capa cannot handle obfuscation well. This means the results may be misleading or incomplete.",
|
||||
" If possible, you should try to unpack this input file before analyzing it with capa.",
|
||||
],
|
||||
}
|
||||
|
||||
for category, dialogue in file_limitations.items():
|
||||
if not has_rule_with_namespace(rules, capabilities, category):
|
||||
continue
|
||||
logger.warning('-' * 80)
|
||||
logger.warning("-" * 80)
|
||||
for line in dialogue:
|
||||
logger.warning(line)
|
||||
if is_standalone:
|
||||
logger.warning(' ')
|
||||
logger.warning(' Use -v or -vv if you really want to see the capabilities identified by capa.')
|
||||
logger.warning('-' * 80)
|
||||
logger.warning(" ")
|
||||
logger.warning(" Use -v or -vv if you really want to see the capabilities identified by capa.")
|
||||
logger.warning("-" * 80)
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def is_supported_file_type(sample):
|
||||
'''
|
||||
"""
|
||||
Return if this is a supported file based on magic header values
|
||||
'''
|
||||
with open(sample, 'rb') as f:
|
||||
"""
|
||||
with open(sample, "rb") as f:
|
||||
magic = f.read(2)
|
||||
if magic in SUPPORTED_FILE_MAGIC:
|
||||
return True
|
||||
@@ -190,36 +192,37 @@ def is_supported_file_type(sample):
|
||||
return False
|
||||
|
||||
|
||||
def get_shellcode_vw(sample, arch='auto'):
|
||||
'''
|
||||
def get_shellcode_vw(sample, arch="auto"):
|
||||
"""
|
||||
Return shellcode workspace using explicit arch or via auto detect
|
||||
'''
|
||||
"""
|
||||
import viv_utils
|
||||
with open(sample, 'rb') as f:
|
||||
|
||||
with open(sample, "rb") as f:
|
||||
sample_bytes = f.read()
|
||||
if arch == 'auto':
|
||||
if arch == "auto":
|
||||
# choose arch with most functions, idea by Jay G.
|
||||
vw_cands = []
|
||||
for arch in ['i386', 'amd64']:
|
||||
for arch in ["i386", "amd64"]:
|
||||
vw_cands.append(viv_utils.getShellcodeWorkspace(sample_bytes, arch))
|
||||
if not vw_cands:
|
||||
raise ValueError('could not generate vivisect workspace')
|
||||
raise ValueError("could not generate vivisect workspace")
|
||||
vw = max(vw_cands, key=lambda vw: len(vw.getFunctions()))
|
||||
else:
|
||||
vw = viv_utils.getShellcodeWorkspace(sample_bytes, arch)
|
||||
vw.setMeta('Format', 'blob') # TODO fix in viv_utils
|
||||
vw.setMeta("Format", "blob") # TODO fix in viv_utils
|
||||
return vw
|
||||
|
||||
|
||||
def get_meta_str(vw):
|
||||
'''
|
||||
"""
|
||||
Return workspace meta information string
|
||||
'''
|
||||
"""
|
||||
meta = []
|
||||
for k in ['Format', 'Platform', 'Architecture']:
|
||||
for k in ["Format", "Platform", "Architecture"]:
|
||||
if k in vw.metadata:
|
||||
meta.append('%s: %s' % (k.lower(), vw.metadata[k]))
|
||||
return '%s, number of functions: %d' % (', '.join(meta), len(vw.getFunctions()))
|
||||
meta.append("%s: %s" % (k.lower(), vw.metadata[k]))
|
||||
return "%s, number of functions: %d" % (", ".join(meta), len(vw.getFunctions()))
|
||||
|
||||
|
||||
class UnsupportedFormatError(ValueError):
|
||||
@@ -228,23 +231,25 @@ class UnsupportedFormatError(ValueError):
|
||||
|
||||
def get_workspace(path, format):
|
||||
import viv_utils
|
||||
logger.info('generating vivisect workspace for: %s', path)
|
||||
if format == 'auto':
|
||||
|
||||
logger.info("generating vivisect workspace for: %s", path)
|
||||
if format == "auto":
|
||||
if not is_supported_file_type(path):
|
||||
raise UnsupportedFormatError()
|
||||
vw = viv_utils.getWorkspace(path)
|
||||
elif format == 'pe':
|
||||
elif format == "pe":
|
||||
vw = viv_utils.getWorkspace(path)
|
||||
elif format == 'sc32':
|
||||
vw = get_shellcode_vw(path, arch='i386')
|
||||
elif format == 'sc64':
|
||||
vw = get_shellcode_vw(path, arch='amd64')
|
||||
logger.info('%s', get_meta_str(vw))
|
||||
elif format == "sc32":
|
||||
vw = get_shellcode_vw(path, arch="i386")
|
||||
elif format == "sc64":
|
||||
vw = get_shellcode_vw(path, arch="amd64")
|
||||
logger.info("%s", get_meta_str(vw))
|
||||
return vw
|
||||
|
||||
|
||||
def get_extractor_py2(path, format):
|
||||
import capa.features.extractors.viv
|
||||
|
||||
vw = get_workspace(path, format)
|
||||
return capa.features.extractors.viv.VivisectFeatureExtractor(vw, path)
|
||||
|
||||
@@ -258,10 +263,10 @@ def get_extractor_py3(path, format):
|
||||
|
||||
|
||||
def get_extractor(path, format):
|
||||
'''
|
||||
"""
|
||||
raises:
|
||||
UnsupportedFormatError:
|
||||
'''
|
||||
"""
|
||||
if sys.version_info >= (3, 0):
|
||||
return get_extractor_py3(path, format)
|
||||
else:
|
||||
@@ -269,7 +274,7 @@ def get_extractor(path, format):
|
||||
|
||||
|
||||
def is_nursery_rule_path(path):
|
||||
'''
|
||||
"""
|
||||
The nursery is a spot for rules that have not yet been fully polished.
|
||||
For example, they may not have references to public example of a technique.
|
||||
Yet, we still want to capture and report on their matches.
|
||||
@@ -277,23 +282,23 @@ def is_nursery_rule_path(path):
|
||||
|
||||
When nursery rules are loaded, their metadata section should be updated with:
|
||||
`nursery=True`.
|
||||
'''
|
||||
return 'nursery' in path
|
||||
"""
|
||||
return "nursery" in path
|
||||
|
||||
|
||||
def get_rules(rule_path):
|
||||
if not os.path.exists(rule_path):
|
||||
raise IOError('%s does not exist or cannot be accessed' % rule_path)
|
||||
raise IOError("%s does not exist or cannot be accessed" % rule_path)
|
||||
|
||||
rule_paths = []
|
||||
if os.path.isfile(rule_path):
|
||||
rule_paths.append(rule_path)
|
||||
elif os.path.isdir(rule_path):
|
||||
logger.debug('reading rules from directory %s', rule_path)
|
||||
logger.debug("reading rules from directory %s", rule_path)
|
||||
for root, dirs, files in os.walk(rule_path):
|
||||
for file in files:
|
||||
if not file.endswith('.yml'):
|
||||
logger.warning('skipping non-.yml file: %s', file)
|
||||
if not file.endswith(".yml"):
|
||||
logger.warning("skipping non-.yml file: %s", file)
|
||||
continue
|
||||
|
||||
rule_path = os.path.join(root, file)
|
||||
@@ -301,18 +306,18 @@ def get_rules(rule_path):
|
||||
|
||||
rules = []
|
||||
for rule_path in rule_paths:
|
||||
logger.debug('reading rule file: %s', rule_path)
|
||||
logger.debug("reading rule file: %s", rule_path)
|
||||
try:
|
||||
rule = capa.rules.Rule.from_yaml_file(rule_path)
|
||||
except capa.rules.InvalidRule:
|
||||
raise
|
||||
else:
|
||||
rule.meta['capa/path'] = rule_path
|
||||
rule.meta["capa/path"] = rule_path
|
||||
if is_nursery_rule_path(rule_path):
|
||||
rule.meta['capa/nursery'] = True
|
||||
rule.meta["capa/nursery"] = True
|
||||
|
||||
rules.append(rule)
|
||||
logger.debug('rule: %s scope: %s', rule.name, rule.scope)
|
||||
logger.debug("rule: %s scope: %s", rule.name, rule.scope)
|
||||
|
||||
return rules
|
||||
|
||||
@@ -322,35 +327,37 @@ def main(argv=None):
|
||||
argv = sys.argv[1:]
|
||||
|
||||
formats = [
|
||||
('auto', '(default) detect file type automatically'),
|
||||
('pe', 'Windows PE file'),
|
||||
('sc32', '32-bit shellcode'),
|
||||
('sc64', '64-bit shellcode'),
|
||||
('freeze', 'features previously frozen by capa'),
|
||||
("auto", "(default) detect file type automatically"),
|
||||
("pe", "Windows PE file"),
|
||||
("sc32", "32-bit shellcode"),
|
||||
("sc64", "64-bit shellcode"),
|
||||
("freeze", "features previously frozen by capa"),
|
||||
]
|
||||
format_help = ', '.join(['%s: %s' % (f[0], f[1]) for f in formats])
|
||||
format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats])
|
||||
|
||||
parser = argparse.ArgumentParser(description='detect capabilities in programs.')
|
||||
parser.add_argument('sample', type=str,
|
||||
help='Path to sample to analyze')
|
||||
parser.add_argument('-r', '--rules', type=str, default='(embedded rules)',
|
||||
help='Path to rule file or directory, use embedded rules by default')
|
||||
parser.add_argument('-t', '--tag', type=str,
|
||||
help='Filter on rule meta field values')
|
||||
parser.add_argument('--version', action='store_true',
|
||||
help='Print the executable version and exit')
|
||||
parser.add_argument('-j', '--json', action='store_true',
|
||||
help='Emit JSON instead of text')
|
||||
parser.add_argument('-v', '--verbose', action='store_true',
|
||||
help='Enable verbose result document (no effect with --json)')
|
||||
parser.add_argument('-vv', '--vverbose', action='store_true',
|
||||
help='Enable very verbose result document (no effect with --json)')
|
||||
parser.add_argument('-d', '--debug', action='store_true',
|
||||
help='Enable debugging output on STDERR')
|
||||
parser.add_argument('-q', '--quiet', action='store_true',
|
||||
help='Disable all output but errors')
|
||||
parser.add_argument('-f', '--format', choices=[f[0] for f in formats], default='auto',
|
||||
help='Select sample format, %s' % format_help)
|
||||
parser = argparse.ArgumentParser(description="detect capabilities in programs.")
|
||||
parser.add_argument("sample", type=str, help="Path to sample to analyze")
|
||||
parser.add_argument(
|
||||
"-r",
|
||||
"--rules",
|
||||
type=str,
|
||||
default="(embedded rules)",
|
||||
help="Path to rule file or directory, use embedded rules by default",
|
||||
)
|
||||
parser.add_argument("-t", "--tag", type=str, help="Filter on rule meta field values")
|
||||
parser.add_argument("--version", action="store_true", help="Print the executable version and exit")
|
||||
parser.add_argument("-j", "--json", action="store_true", help="Emit JSON instead of text")
|
||||
parser.add_argument(
|
||||
"-v", "--verbose", action="store_true", help="Enable verbose result document (no effect with --json)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-vv", "--vverbose", action="store_true", help="Enable very verbose result document (no effect with --json)"
|
||||
)
|
||||
parser.add_argument("-d", "--debug", action="store_true", help="Enable debugging output on STDERR")
|
||||
parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
|
||||
parser.add_argument(
|
||||
"-f", "--format", choices=[f[0] for f in formats], default="auto", help="Select sample format, %s" % format_help
|
||||
)
|
||||
args = parser.parse_args(args=argv)
|
||||
|
||||
if args.version:
|
||||
@@ -375,68 +382,70 @@ def main(argv=None):
|
||||
# because cp65001 is utf-8, we just map that codepage to the utf-8 codec.
|
||||
# see #380 and: https://stackoverflow.com/a/3259271/87207
|
||||
import codecs
|
||||
codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
|
||||
|
||||
if args.rules == '(embedded rules)':
|
||||
logger.info('-' * 80)
|
||||
logger.info(' Using default embedded rules.')
|
||||
logger.info(' To provide your own rules, use the form `capa.exe ./path/to/rules/ /path/to/mal.exe`.')
|
||||
logger.info(' You can see the current default rule set here:')
|
||||
logger.info(' https://github.com/fireeye/capa-rules')
|
||||
logger.info('-' * 80)
|
||||
codecs.register(lambda name: codecs.lookup("utf-8") if name == "cp65001" else None)
|
||||
|
||||
if hasattr(sys, 'frozen') and hasattr(sys, '_MEIPASS'):
|
||||
logger.debug('detected running under PyInstaller')
|
||||
args.rules = os.path.join(sys._MEIPASS, 'rules')
|
||||
logger.debug('default rule path (PyInstaller method): %s', args.rules)
|
||||
if args.rules == "(embedded rules)":
|
||||
logger.info("-" * 80)
|
||||
logger.info(" Using default embedded rules.")
|
||||
logger.info(" To provide your own rules, use the form `capa.exe ./path/to/rules/ /path/to/mal.exe`.")
|
||||
logger.info(" You can see the current default rule set here:")
|
||||
logger.info(" https://github.com/fireeye/capa-rules")
|
||||
logger.info("-" * 80)
|
||||
|
||||
if hasattr(sys, "frozen") and hasattr(sys, "_MEIPASS"):
|
||||
logger.debug("detected running under PyInstaller")
|
||||
args.rules = os.path.join(sys._MEIPASS, "rules")
|
||||
logger.debug("default rule path (PyInstaller method): %s", args.rules)
|
||||
else:
|
||||
logger.debug('detected running from source')
|
||||
args.rules = os.path.join(os.path.dirname(__file__), '..', 'rules')
|
||||
logger.debug('default rule path (source method): %s', args.rules)
|
||||
logger.debug("detected running from source")
|
||||
args.rules = os.path.join(os.path.dirname(__file__), "..", "rules")
|
||||
logger.debug("default rule path (source method): %s", args.rules)
|
||||
else:
|
||||
logger.info('using rules path: %s', args.rules)
|
||||
logger.info("using rules path: %s", args.rules)
|
||||
|
||||
try:
|
||||
rules = get_rules(args.rules)
|
||||
rules = capa.rules.RuleSet(rules)
|
||||
logger.info('successfully loaded %s rules', len(rules))
|
||||
logger.info("successfully loaded %s rules", len(rules))
|
||||
if args.tag:
|
||||
rules = rules.filter_rules_by_meta(args.tag)
|
||||
logger.info('selected %s rules', len(rules))
|
||||
logger.info("selected %s rules", len(rules))
|
||||
for i, r in enumerate(rules.rules, 1):
|
||||
# TODO don't display subscope rules?
|
||||
logger.debug(' %d. %s', i, r)
|
||||
logger.debug(" %d. %s", i, r)
|
||||
except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e:
|
||||
logger.error('%s', str(e))
|
||||
logger.error("%s", str(e))
|
||||
return -1
|
||||
|
||||
with open(args.sample, 'rb') as f:
|
||||
with open(args.sample, "rb") as f:
|
||||
taste = f.read(8)
|
||||
|
||||
if ((args.format == 'freeze')
|
||||
or (args.format == 'auto' and capa.features.freeze.is_freeze(taste))):
|
||||
with open(args.sample, 'rb') as f:
|
||||
if (args.format == "freeze") or (args.format == "auto" and capa.features.freeze.is_freeze(taste)):
|
||||
with open(args.sample, "rb") as f:
|
||||
extractor = capa.features.freeze.load(f.read())
|
||||
else:
|
||||
try:
|
||||
extractor = get_extractor(args.sample, args.format)
|
||||
except UnsupportedFormatError:
|
||||
logger.error('-' * 80)
|
||||
logger.error(' Input file does not appear to be a PE file.')
|
||||
logger.error(' ')
|
||||
logger.error(' capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64).')
|
||||
logger.error(' If you don\'t know the input file type, you can try using the `file` utility to guess it.')
|
||||
logger.error('-' * 80)
|
||||
logger.error("-" * 80)
|
||||
logger.error(" Input file does not appear to be a PE file.")
|
||||
logger.error(" ")
|
||||
logger.error(
|
||||
" capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64)."
|
||||
)
|
||||
logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.")
|
||||
logger.error("-" * 80)
|
||||
return -1
|
||||
except UnsupportedRuntimeError:
|
||||
logger.error('-' * 80)
|
||||
logger.error(' Unsupported runtime or Python interpreter.')
|
||||
logger.error(' ')
|
||||
logger.error(' capa supports running under Python 2.7 using Vivisect for binary analysis.')
|
||||
logger.error(' It can also run within IDA Pro, using either Python 2.7 or 3.5+.')
|
||||
logger.error(' ')
|
||||
logger.error(' If you\'re seeing this message on the command line, please ensure you\'re running Python 2.7.')
|
||||
logger.error('-' * 80)
|
||||
logger.error("-" * 80)
|
||||
logger.error(" Unsupported runtime or Python interpreter.")
|
||||
logger.error(" ")
|
||||
logger.error(" capa supports running under Python 2.7 using Vivisect for binary analysis.")
|
||||
logger.error(" It can also run within IDA Pro, using either Python 2.7 or 3.5+.")
|
||||
logger.error(" ")
|
||||
logger.error(" If you're seeing this message on the command line, please ensure you're running Python 2.7.")
|
||||
logger.error("-" * 80)
|
||||
return -1
|
||||
|
||||
capabilities = find_capabilities(rules, extractor)
|
||||
@@ -462,7 +471,7 @@ def main(argv=None):
|
||||
print(capa.render.render_default(rules, capabilities))
|
||||
colorama.deinit()
|
||||
|
||||
logger.info('done.')
|
||||
logger.info("done.")
|
||||
|
||||
return 0
|
||||
|
||||
@@ -472,34 +481,37 @@ def ida_main():
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
import capa.ida.helpers
|
||||
|
||||
if not capa.ida.helpers.is_supported_file_type():
|
||||
return -1
|
||||
|
||||
logger.info('-' * 80)
|
||||
logger.info(' Using default embedded rules.')
|
||||
logger.info(' ')
|
||||
logger.info(' You can see the current default rule set here:')
|
||||
logger.info(' https://github.com/fireeye/capa-rules')
|
||||
logger.info('-' * 80)
|
||||
logger.info("-" * 80)
|
||||
logger.info(" Using default embedded rules.")
|
||||
logger.info(" ")
|
||||
logger.info(" You can see the current default rule set here:")
|
||||
logger.info(" https://github.com/fireeye/capa-rules")
|
||||
logger.info("-" * 80)
|
||||
|
||||
if hasattr(sys, 'frozen') and hasattr(sys, '_MEIPASS'):
|
||||
logger.debug('detected running under PyInstaller')
|
||||
rules_path = os.path.join(sys._MEIPASS, 'rules')
|
||||
logger.debug('default rule path (PyInstaller method): %s', rules_path)
|
||||
if hasattr(sys, "frozen") and hasattr(sys, "_MEIPASS"):
|
||||
logger.debug("detected running under PyInstaller")
|
||||
rules_path = os.path.join(sys._MEIPASS, "rules")
|
||||
logger.debug("default rule path (PyInstaller method): %s", rules_path)
|
||||
else:
|
||||
logger.debug('detected running from source')
|
||||
rules_path = os.path.join(os.path.dirname(__file__), '..', 'rules')
|
||||
logger.debug('default rule path (source method): %s', rules_path)
|
||||
logger.debug("detected running from source")
|
||||
rules_path = os.path.join(os.path.dirname(__file__), "..", "rules")
|
||||
logger.debug("default rule path (source method): %s", rules_path)
|
||||
|
||||
rules = get_rules(rules_path)
|
||||
import capa.rules
|
||||
|
||||
rules = capa.rules.RuleSet(rules)
|
||||
|
||||
import capa.features.extractors.ida
|
||||
|
||||
capabilities = find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor())
|
||||
|
||||
if has_file_limitation(rules, capabilities, is_standalone=False):
|
||||
capa.ida.helpers.inform_user_ida_ui('capa encountered warnings during analysis')
|
||||
capa.ida.helpers.inform_user_ida_ui("capa encountered warnings during analysis")
|
||||
|
||||
render_capabilities_default(rules, capabilities)
|
||||
|
||||
@@ -513,7 +525,7 @@ def is_runtime_ida():
|
||||
return True
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
if is_runtime_ida():
|
||||
ida_main()
|
||||
else:
|
||||
|
||||
@@ -18,43 +18,41 @@ def convert_statement_to_result_document(statement):
|
||||
"""
|
||||
if isinstance(statement, capa.engine.And):
|
||||
return {
|
||||
'type': 'and',
|
||||
"type": "and",
|
||||
}
|
||||
elif isinstance(statement, capa.engine.Or):
|
||||
return {
|
||||
'type': 'or',
|
||||
"type": "or",
|
||||
}
|
||||
elif isinstance(statement, capa.engine.Not):
|
||||
return {
|
||||
'type': 'not',
|
||||
"type": "not",
|
||||
}
|
||||
elif isinstance(statement, capa.engine.Some) and statement.count == 0:
|
||||
return {
|
||||
'type': 'optional'
|
||||
}
|
||||
return {"type": "optional"}
|
||||
elif isinstance(statement, capa.engine.Some) and statement.count > 0:
|
||||
return {
|
||||
'type': 'some',
|
||||
'count': statement.count,
|
||||
"type": "some",
|
||||
"count": statement.count,
|
||||
}
|
||||
elif isinstance(statement, capa.engine.Range):
|
||||
return {
|
||||
'type': 'range',
|
||||
'min': statement.min,
|
||||
'max': statement.max,
|
||||
'child': convert_feature_to_result_document(statement.child),
|
||||
"type": "range",
|
||||
"min": statement.min,
|
||||
"max": statement.max,
|
||||
"child": convert_feature_to_result_document(statement.child),
|
||||
}
|
||||
elif isinstance(statement, capa.engine.Regex):
|
||||
return {
|
||||
'type': 'regex',
|
||||
'pattern': statement.pattern,
|
||||
"type": "regex",
|
||||
"pattern": statement.pattern,
|
||||
# the string that was matched
|
||||
'match': statement.match,
|
||||
"match": statement.match,
|
||||
}
|
||||
elif isinstance(statement, capa.engine.Subscope):
|
||||
return {
|
||||
'type': 'subscope',
|
||||
'subscope': statement.scope,
|
||||
"type": "subscope",
|
||||
"subscope": statement.scope,
|
||||
}
|
||||
else:
|
||||
raise RuntimeError("unexpected match statement type: " + str(statement))
|
||||
@@ -89,8 +87,8 @@ def convert_feature_to_result_document(feature):
|
||||
|
||||
# make the terms pretty
|
||||
name = name.lower()
|
||||
if name == 'matchedrule':
|
||||
name = 'match'
|
||||
if name == "matchedrule":
|
||||
name = "match"
|
||||
|
||||
# in the common case, there's a single argument
|
||||
# so use it directly.
|
||||
@@ -99,7 +97,7 @@ def convert_feature_to_result_document(feature):
|
||||
value = value[0]
|
||||
|
||||
return {
|
||||
'type': name,
|
||||
"type": name,
|
||||
name: value,
|
||||
}
|
||||
|
||||
@@ -119,13 +117,13 @@ def convert_node_to_result_document(node):
|
||||
|
||||
if isinstance(node, capa.engine.Statement):
|
||||
return {
|
||||
'type': 'statement',
|
||||
'statement': convert_statement_to_result_document(node),
|
||||
"type": "statement",
|
||||
"statement": convert_statement_to_result_document(node),
|
||||
}
|
||||
elif isinstance(node, capa.features.Feature):
|
||||
return {
|
||||
'type': 'feature',
|
||||
'feature': convert_feature_to_result_document(node),
|
||||
"type": "feature",
|
||||
"feature": convert_feature_to_result_document(node),
|
||||
}
|
||||
else:
|
||||
raise RuntimeError("unexpected match node type")
|
||||
@@ -137,19 +135,16 @@ def convert_match_to_result_document(rules, capabilities, result):
|
||||
this will become part of the "result document" format that can be emitted to JSON.
|
||||
"""
|
||||
doc = {
|
||||
'success': bool(result.success),
|
||||
'node': convert_node_to_result_document(result.statement),
|
||||
'children': [
|
||||
convert_match_to_result_document(rules, capabilities, child)
|
||||
for child in result.children
|
||||
],
|
||||
"success": bool(result.success),
|
||||
"node": convert_node_to_result_document(result.statement),
|
||||
"children": [convert_match_to_result_document(rules, capabilities, child) for child in result.children],
|
||||
}
|
||||
|
||||
# logic expression, like `and`, don't have locations - their children do.
|
||||
# so only add `locations` to feature nodes.
|
||||
if isinstance(result.statement, capa.features.Feature):
|
||||
if bool(result.success):
|
||||
doc['locations'] = result.locations
|
||||
doc["locations"] = result.locations
|
||||
|
||||
# if we have a `match` statement, then we're referencing another rule.
|
||||
# this could an external rule (written by a human), or
|
||||
@@ -159,31 +154,30 @@ def convert_match_to_result_document(rules, capabilities, result):
|
||||
# so, we need to lookup the other rule results
|
||||
# and then filter those down to the address used here.
|
||||
# finally, splice that logic into this tree.
|
||||
if (doc['node']['type'] == 'feature'
|
||||
and doc['node']['feature']['type'] == 'match'
|
||||
# only add subtree on success,
|
||||
# because there won't be results for the other rule on failure.
|
||||
and doc['success']):
|
||||
if (
|
||||
doc["node"]["type"] == "feature"
|
||||
and doc["node"]["feature"]["type"] == "match"
|
||||
# only add subtree on success,
|
||||
# because there won't be results for the other rule on failure.
|
||||
and doc["success"]
|
||||
):
|
||||
|
||||
rule_name = doc['node']['feature']['match']
|
||||
rule_name = doc["node"]["feature"]["match"]
|
||||
rule = rules[rule_name]
|
||||
rule_matches = {address: result for (address, result) in capabilities[rule_name]}
|
||||
|
||||
if rule.meta.get('capa/subscope-rule'):
|
||||
if rule.meta.get("capa/subscope-rule"):
|
||||
# for a subscope rule, fixup the node to be a scope node, rather than a match feature node.
|
||||
#
|
||||
# e.g. `contain loop/30c4c78e29bf4d54894fc74f664c62e8` -> `basic block`
|
||||
scope = rule.meta['scope']
|
||||
doc['node'] = {
|
||||
'type': 'statement',
|
||||
'statement': {
|
||||
'type': 'subscope',
|
||||
'subscope': scope,
|
||||
},
|
||||
scope = rule.meta["scope"]
|
||||
doc["node"] = {
|
||||
"type": "statement",
|
||||
"statement": {"type": "subscope", "subscope": scope,},
|
||||
}
|
||||
|
||||
for location in doc['locations']:
|
||||
doc['children'].append(convert_match_to_result_document(rules, capabilities, rule_matches[location]))
|
||||
for location in doc["locations"]:
|
||||
doc["children"].append(convert_match_to_result_document(rules, capabilities, rule_matches[location]))
|
||||
|
||||
return doc
|
||||
|
||||
@@ -220,15 +214,14 @@ def convert_capabilities_to_result_document(rules, capabilities):
|
||||
for rule_name, matches in capabilities.items():
|
||||
rule = rules[rule_name]
|
||||
|
||||
if rule.meta.get('capa/subscope-rule'):
|
||||
if rule.meta.get("capa/subscope-rule"):
|
||||
continue
|
||||
|
||||
doc[rule_name] = {
|
||||
'meta': dict(rule.meta),
|
||||
'source': rule.definition,
|
||||
'matches': {
|
||||
addr: convert_match_to_result_document(rules, capabilities, match)
|
||||
for (addr, match) in matches
|
||||
"meta": dict(rule.meta),
|
||||
"source": rule.definition,
|
||||
"matches": {
|
||||
addr: convert_match_to_result_document(rules, capabilities, match) for (addr, match) in matches
|
||||
},
|
||||
}
|
||||
|
||||
@@ -241,6 +234,7 @@ def render_vverbose(rules, capabilities):
|
||||
# and capa.render.vverbose import capa.render (implicitly, as a submodule)
|
||||
# so, defer the import until routine is called, breaking the import loop.
|
||||
import capa.render.vverbose
|
||||
|
||||
doc = convert_capabilities_to_result_document(rules, capabilities)
|
||||
return capa.render.vverbose.render_vverbose(doc)
|
||||
|
||||
@@ -248,6 +242,7 @@ def render_vverbose(rules, capabilities):
|
||||
def render_verbose(rules, capabilities):
|
||||
# break import loop
|
||||
import capa.render.verbose
|
||||
|
||||
doc = convert_capabilities_to_result_document(rules, capabilities)
|
||||
return capa.render.verbose.render_verbose(doc)
|
||||
|
||||
@@ -256,6 +251,7 @@ def render_default(rules, capabilities):
|
||||
# break import loop
|
||||
import capa.render.verbose
|
||||
import capa.render.default
|
||||
|
||||
doc = convert_capabilities_to_result_document(rules, capabilities)
|
||||
return capa.render.default.render_default(doc)
|
||||
|
||||
@@ -273,7 +269,5 @@ class CapaJsonObjectEncoder(json.JSONEncoder):
|
||||
|
||||
def render_json(rules, capabilities):
|
||||
return json.dumps(
|
||||
convert_capabilities_to_result_document(rules, capabilities),
|
||||
cls=CapaJsonObjectEncoder,
|
||||
sort_keys=True,
|
||||
convert_capabilities_to_result_document(rules, capabilities), cls=CapaJsonObjectEncoder, sort_keys=True,
|
||||
)
|
||||
|
||||
@@ -9,7 +9,7 @@ import capa.render.utils as rutils
|
||||
def width(s, character_count):
|
||||
"""pad the given string to at least `character_count`"""
|
||||
if len(s) < character_count:
|
||||
return s + ' ' * (character_count - len(s))
|
||||
return s + " " * (character_count - len(s))
|
||||
else:
|
||||
return s
|
||||
|
||||
@@ -28,15 +28,15 @@ def render_capabilities(doc, ostream):
|
||||
"""
|
||||
rows = []
|
||||
for rule in rutils.capability_rules(doc):
|
||||
count = len(rule['matches'])
|
||||
count = len(rule["matches"])
|
||||
if count == 1:
|
||||
capability = rutils.bold(rule['meta']['name'])
|
||||
capability = rutils.bold(rule["meta"]["name"])
|
||||
else:
|
||||
capability = '%s (%d matches)' % (rutils.bold(rule['meta']['name']), count)
|
||||
rows.append((capability, rule['meta']['namespace']))
|
||||
capability = "%s (%d matches)" % (rutils.bold(rule["meta"]["name"]), count)
|
||||
rows.append((capability, rule["meta"]["namespace"]))
|
||||
|
||||
ostream.write(tabulate.tabulate(rows, headers=[width('CAPABILITY', 40), width('NAMESPACE', 40)], tablefmt='psql'))
|
||||
ostream.write('\n')
|
||||
ostream.write(tabulate.tabulate(rows, headers=[width("CAPABILITY", 40), width("NAMESPACE", 40)], tablefmt="psql"))
|
||||
ostream.write("\n")
|
||||
|
||||
|
||||
def render_attack(doc, ostream):
|
||||
@@ -57,17 +57,17 @@ def render_attack(doc, ostream):
|
||||
"""
|
||||
tactics = collections.defaultdict(set)
|
||||
for rule in rutils.capability_rules(doc):
|
||||
if not rule['meta'].get('att&ck'):
|
||||
if not rule["meta"].get("att&ck"):
|
||||
continue
|
||||
|
||||
for attack in rule['meta']['att&ck']:
|
||||
tactic, _, rest = attack.partition('::')
|
||||
if '::' in rest:
|
||||
technique, _, rest = rest.partition('::')
|
||||
subtechnique, _, id = rest.rpartition(' ')
|
||||
for attack in rule["meta"]["att&ck"]:
|
||||
tactic, _, rest = attack.partition("::")
|
||||
if "::" in rest:
|
||||
technique, _, rest = rest.partition("::")
|
||||
subtechnique, _, id = rest.rpartition(" ")
|
||||
tactics[tactic].add((technique, subtechnique, id))
|
||||
else:
|
||||
technique, _, id = rest.rpartition(' ')
|
||||
technique, _, id = rest.rpartition(" ")
|
||||
tactics[tactic].add((technique, id))
|
||||
|
||||
rows = []
|
||||
@@ -76,15 +76,17 @@ def render_attack(doc, ostream):
|
||||
for spec in sorted(techniques):
|
||||
if len(spec) == 2:
|
||||
technique, id = spec
|
||||
inner_rows.append('%s %s' % (rutils.bold(technique), id))
|
||||
inner_rows.append("%s %s" % (rutils.bold(technique), id))
|
||||
elif len(spec) == 3:
|
||||
technique, subtechnique, id = spec
|
||||
inner_rows.append('%s::%s %s' % (rutils.bold(technique), subtechnique, id))
|
||||
inner_rows.append("%s::%s %s" % (rutils.bold(technique), subtechnique, id))
|
||||
else:
|
||||
raise RuntimeError('unexpected ATT&CK spec format')
|
||||
rows.append((rutils.bold(tactic.upper()), '\n'.join(inner_rows), ))
|
||||
ostream.write(tabulate.tabulate(rows, headers=[width('ATT&CK Tactic', 20), width('ATT&CK Technique', 60)], tablefmt='psql'))
|
||||
ostream.write('\n')
|
||||
raise RuntimeError("unexpected ATT&CK spec format")
|
||||
rows.append((rutils.bold(tactic.upper()), "\n".join(inner_rows),))
|
||||
ostream.write(
|
||||
tabulate.tabulate(rows, headers=[width("ATT&CK Tactic", 20), width("ATT&CK Technique", 60)], tablefmt="psql")
|
||||
)
|
||||
ostream.write("\n")
|
||||
|
||||
|
||||
def render_default(doc):
|
||||
|
||||
@@ -4,38 +4,40 @@ import termcolor
|
||||
|
||||
def bold(s):
|
||||
"""draw attention to the given string"""
|
||||
return termcolor.colored(s, 'blue')
|
||||
return termcolor.colored(s, "blue")
|
||||
|
||||
|
||||
def bold2(s):
|
||||
"""draw attention to the given string, within a `bold` section"""
|
||||
return termcolor.colored(s, 'green')
|
||||
return termcolor.colored(s, "green")
|
||||
|
||||
|
||||
def hex(n):
|
||||
"""render the given number using upper case hex, like: 0x123ABC"""
|
||||
return '0x%X' % n
|
||||
return "0x%X" % n
|
||||
|
||||
|
||||
def hex_string(h):
|
||||
""" render hex string e.g. "0a40b1" as "0A 40 B1" """
|
||||
return ' '.join(h[i:i + 2] for i in range(0, len(h), 2)).upper()
|
||||
return " ".join(h[i : i + 2] for i in range(0, len(h), 2)).upper()
|
||||
|
||||
|
||||
def capability_rules(doc):
|
||||
"""enumerate the rules in (namespace, name) order that are 'capability' rules (not lib/subscope/disposition/etc)."""
|
||||
for (_, _, rule) in sorted(map(lambda rule: (rule['meta'].get('namespace', ''), rule['meta']['name'], rule), doc.values())):
|
||||
if rule['meta'].get('lib'):
|
||||
for (_, _, rule) in sorted(
|
||||
map(lambda rule: (rule["meta"].get("namespace", ""), rule["meta"]["name"], rule), doc.values())
|
||||
):
|
||||
if rule["meta"].get("lib"):
|
||||
continue
|
||||
if rule['meta'].get('capa/subscope'):
|
||||
if rule["meta"].get("capa/subscope"):
|
||||
continue
|
||||
if rule['meta'].get('maec/analysis-conclusion'):
|
||||
if rule["meta"].get("maec/analysis-conclusion"):
|
||||
continue
|
||||
if rule['meta'].get('maec/analysis-conclusion-ov'):
|
||||
if rule["meta"].get("maec/analysis-conclusion-ov"):
|
||||
continue
|
||||
if rule['meta'].get('maec/malware-category'):
|
||||
if rule["meta"].get("maec/malware-category"):
|
||||
continue
|
||||
if rule['meta'].get('maec/malware-category-ov'):
|
||||
if rule["meta"].get("maec/malware-category-ov"):
|
||||
continue
|
||||
|
||||
yield rule
|
||||
@@ -44,4 +46,4 @@ def capability_rules(doc):
|
||||
class StringIO(six.StringIO):
|
||||
def writeln(self, s):
|
||||
self.write(s)
|
||||
self.write('\n')
|
||||
self.write("\n")
|
||||
|
||||
@@ -24,29 +24,29 @@ def render_verbose(doc):
|
||||
ostream = rutils.StringIO()
|
||||
|
||||
for rule in rutils.capability_rules(doc):
|
||||
count = len(rule['matches'])
|
||||
count = len(rule["matches"])
|
||||
if count == 1:
|
||||
capability = rutils.bold(rule['meta']['name'])
|
||||
capability = rutils.bold(rule["meta"]["name"])
|
||||
else:
|
||||
capability = '%s (%d matches)' % (rutils.bold(rule['meta']['name']), count)
|
||||
capability = "%s (%d matches)" % (rutils.bold(rule["meta"]["name"]), count)
|
||||
|
||||
ostream.writeln(capability)
|
||||
|
||||
rows = []
|
||||
for key in ('namespace', 'description', 'scope'):
|
||||
if key == 'name' or key not in rule['meta']:
|
||||
for key in ("namespace", "description", "scope"):
|
||||
if key == "name" or key not in rule["meta"]:
|
||||
continue
|
||||
|
||||
v = rule['meta'][key]
|
||||
v = rule["meta"][key]
|
||||
if isinstance(v, list) and len(v) == 1:
|
||||
v = v[0]
|
||||
rows.append((key, v))
|
||||
|
||||
if rule['meta']['scope'] != capa.rules.FILE_SCOPE:
|
||||
locations = doc[rule['meta']['name']]['matches'].keys()
|
||||
rows.append(('matches', '\n'.join(map(rutils.hex, locations))))
|
||||
if rule["meta"]["scope"] != capa.rules.FILE_SCOPE:
|
||||
locations = doc[rule["meta"]["name"]]["matches"].keys()
|
||||
rows.append(("matches", "\n".join(map(rutils.hex, locations))))
|
||||
|
||||
ostream.writeln(tabulate.tabulate(rows, tablefmt='plain'))
|
||||
ostream.write('\n')
|
||||
ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
|
||||
ostream.write("\n")
|
||||
|
||||
return ostream.getvalue()
|
||||
|
||||
@@ -5,145 +5,147 @@ import capa.render.utils as rutils
|
||||
|
||||
|
||||
def render_statement(ostream, statement, indent=0):
|
||||
ostream.write(' ' * indent)
|
||||
if statement['type'] in ('and', 'or', 'optional'):
|
||||
ostream.write(statement['type'])
|
||||
ostream.writeln(':')
|
||||
elif statement['type'] == 'not':
|
||||
ostream.write(" " * indent)
|
||||
if statement["type"] in ("and", "or", "optional"):
|
||||
ostream.write(statement["type"])
|
||||
ostream.writeln(":")
|
||||
elif statement["type"] == "not":
|
||||
# this statement is handled specially in `render_match` using the MODE_SUCCESS/MODE_FAILURE flags.
|
||||
ostream.writeln('not:')
|
||||
elif statement['type'] == 'some':
|
||||
ostream.write(statement['count'] + ' or more')
|
||||
ostream.writeln(':')
|
||||
elif statement['type'] == 'range':
|
||||
ostream.writeln("not:")
|
||||
elif statement["type"] == "some":
|
||||
ostream.write(statement["count"] + " or more")
|
||||
ostream.writeln(":")
|
||||
elif statement["type"] == "range":
|
||||
# `range` is a weird node, its almost a hybrid of statement+feature.
|
||||
# it is a specific feature repeated multiple times.
|
||||
# there's no additional logic in the feature part, just the existence of a feature.
|
||||
# so, we have to inline some of the feature rendering here.
|
||||
|
||||
child = statement['child']
|
||||
if child['type'] in ('string', 'api', 'mnemonic', 'basic block', 'export', 'import', 'section', 'match'):
|
||||
feature = '%s(%s)' % (child['type'], rutils.bold2(child[child['type']]))
|
||||
elif child['type'] in ('number', 'offset'):
|
||||
feature = '%s(%s)' % (child['type'], rutils.bold2(rutils.hex(child[child['type']])))
|
||||
elif child['type'] == 'bytes':
|
||||
feature = '%s(%s)' % (child['type'], rutils.bold2(rutils.hex_string(child[child['type']])))
|
||||
elif child['type'] == 'characteristic':
|
||||
feature = 'characteristic(%s)' % (rutils.bold2(child['characteristic'][0]))
|
||||
child = statement["child"]
|
||||
if child["type"] in ("string", "api", "mnemonic", "basic block", "export", "import", "section", "match"):
|
||||
feature = "%s(%s)" % (child["type"], rutils.bold2(child[child["type"]]))
|
||||
elif child["type"] in ("number", "offset"):
|
||||
feature = "%s(%s)" % (child["type"], rutils.bold2(rutils.hex(child[child["type"]])))
|
||||
elif child["type"] == "bytes":
|
||||
feature = "%s(%s)" % (child["type"], rutils.bold2(rutils.hex_string(child[child["type"]])))
|
||||
elif child["type"] == "characteristic":
|
||||
feature = "characteristic(%s)" % (rutils.bold2(child["characteristic"][0]))
|
||||
else:
|
||||
raise RuntimeError('unexpected feature type: ' + str(child))
|
||||
raise RuntimeError("unexpected feature type: " + str(child))
|
||||
|
||||
ostream.write('count(%s): ' % feature)
|
||||
ostream.write("count(%s): " % feature)
|
||||
|
||||
if statement['max'] == statement['min']:
|
||||
ostream.writeln('%d' % (statement['min']))
|
||||
elif statement['min'] == 0:
|
||||
ostream.writeln('%d or fewer' % (statement['max']))
|
||||
elif statement['max'] == (1 << 64 - 1):
|
||||
ostream.writeln('%d or more' % (statement['min']))
|
||||
if statement["max"] == statement["min"]:
|
||||
ostream.writeln("%d" % (statement["min"]))
|
||||
elif statement["min"] == 0:
|
||||
ostream.writeln("%d or fewer" % (statement["max"]))
|
||||
elif statement["max"] == (1 << 64 - 1):
|
||||
ostream.writeln("%d or more" % (statement["min"]))
|
||||
else:
|
||||
ostream.writeln('between %d and %d' % (statement['min'], statement['max']))
|
||||
elif statement['type'] == 'subscope':
|
||||
ostream.write(statement['subscope'])
|
||||
ostream.writeln(':')
|
||||
elif statement['type'] == 'regex':
|
||||
ostream.writeln("between %d and %d" % (statement["min"], statement["max"]))
|
||||
elif statement["type"] == "subscope":
|
||||
ostream.write(statement["subscope"])
|
||||
ostream.writeln(":")
|
||||
elif statement["type"] == "regex":
|
||||
# regex is a `Statement` not a `Feature`
|
||||
# this is because it doesn't get extracted, but applies to all strings in scope.
|
||||
# so we have to handle it here
|
||||
ostream.writeln('string: %s' % (statement['match']))
|
||||
ostream.writeln("string: %s" % (statement["match"]))
|
||||
else:
|
||||
raise RuntimeError("unexpected match statement type: " + str(statement))
|
||||
|
||||
|
||||
def render_feature(ostream, match, feature, indent=0):
|
||||
ostream.write(' ' * indent)
|
||||
ostream.write(" " * indent)
|
||||
|
||||
if feature['type'] in ('string', 'api', 'mnemonic', 'basic block', 'export', 'import', 'section', 'match'):
|
||||
ostream.write(feature['type'])
|
||||
ostream.write(': ')
|
||||
ostream.write(rutils.bold2(feature[feature['type']]))
|
||||
elif feature['type'] in ('number', 'offset'):
|
||||
ostream.write(feature['type'])
|
||||
ostream.write(': ')
|
||||
ostream.write(rutils.bold2(rutils.hex(feature[feature['type']])))
|
||||
elif feature['type'] == 'bytes':
|
||||
ostream.write('bytes: ')
|
||||
if feature["type"] in ("string", "api", "mnemonic", "basic block", "export", "import", "section", "match"):
|
||||
ostream.write(feature["type"])
|
||||
ostream.write(": ")
|
||||
ostream.write(rutils.bold2(feature[feature["type"]]))
|
||||
elif feature["type"] in ("number", "offset"):
|
||||
ostream.write(feature["type"])
|
||||
ostream.write(": ")
|
||||
ostream.write(rutils.bold2(rutils.hex(feature[feature["type"]])))
|
||||
elif feature["type"] == "bytes":
|
||||
ostream.write("bytes: ")
|
||||
# bytes is the uppercase, hex-encoded string.
|
||||
# it should always be an even number of characters (its hex).
|
||||
ostream.write(rutils.bold2(rutils.hex_string(feature[feature['type']])))
|
||||
elif feature['type'] == 'characteristic':
|
||||
ostream.write('characteristic(%s)' % (rutils.bold2(feature['characteristic'][0])))
|
||||
ostream.write(rutils.bold2(rutils.hex_string(feature[feature["type"]])))
|
||||
elif feature["type"] == "characteristic":
|
||||
ostream.write("characteristic(%s)" % (rutils.bold2(feature["characteristic"][0])))
|
||||
# note that regex is found in `render_statement`
|
||||
else:
|
||||
raise RuntimeError('unexpected feature type: ' + str(feature))
|
||||
raise RuntimeError("unexpected feature type: " + str(feature))
|
||||
|
||||
# its possible to have an empty locations array here,
|
||||
# such as when we're in MODE_FAILURE and showing the logic
|
||||
# under a `not` statement (which will have no matched locations).
|
||||
locations = list(sorted(match.get('locations', [])))
|
||||
locations = list(sorted(match.get("locations", [])))
|
||||
if len(locations) == 1:
|
||||
ostream.write(' @ ')
|
||||
ostream.write(" @ ")
|
||||
ostream.write(rutils.hex(locations[0]))
|
||||
elif len(locations) > 1:
|
||||
ostream.write(' @ ')
|
||||
ostream.write(" @ ")
|
||||
if len(locations) > 4:
|
||||
# don't display too many locations, because it becomes very noisy.
|
||||
# probably only the first handful of locations will be useful for inspection.
|
||||
ostream.write(', '.join(map(rutils.hex, locations[0:4])))
|
||||
ostream.write(', and %d more...' % (len(locations) - 4))
|
||||
ostream.write(", ".join(map(rutils.hex, locations[0:4])))
|
||||
ostream.write(", and %d more..." % (len(locations) - 4))
|
||||
else:
|
||||
ostream.write(', '.join(map(rutils.hex, locations)))
|
||||
ostream.write(", ".join(map(rutils.hex, locations)))
|
||||
|
||||
ostream.write('\n')
|
||||
ostream.write("\n")
|
||||
|
||||
|
||||
def render_node(ostream, match, node, indent=0):
|
||||
if node['type'] == 'statement':
|
||||
render_statement(ostream, node['statement'], indent=indent)
|
||||
elif node['type'] == 'feature':
|
||||
render_feature(ostream, match, node['feature'], indent=indent)
|
||||
if node["type"] == "statement":
|
||||
render_statement(ostream, node["statement"], indent=indent)
|
||||
elif node["type"] == "feature":
|
||||
render_feature(ostream, match, node["feature"], indent=indent)
|
||||
else:
|
||||
raise RuntimeError('unexpected node type: ' + str(node))
|
||||
raise RuntimeError("unexpected node type: " + str(node))
|
||||
|
||||
|
||||
# display nodes that successfully evaluated against the sample.
|
||||
MODE_SUCCESS = 'success'
|
||||
MODE_SUCCESS = "success"
|
||||
|
||||
# display nodes that did not evaluate to True against the sample.
|
||||
# this is useful when rendering the logic tree under a `not` node.
|
||||
MODE_FAILURE = 'failure'
|
||||
MODE_FAILURE = "failure"
|
||||
|
||||
|
||||
def render_match(ostream, match, indent=0, mode=MODE_SUCCESS):
|
||||
child_mode = mode
|
||||
if mode == MODE_SUCCESS:
|
||||
# display only nodes that evaluated successfully.
|
||||
if not match['success']:
|
||||
if not match["success"]:
|
||||
return
|
||||
# optional statement with no successful children is empty
|
||||
if (match['node'].get('statement', {}).get('type') == 'optional'
|
||||
and not any(map(lambda m: m['success'], match['children']))):
|
||||
if match["node"].get("statement", {}).get("type") == "optional" and not any(
|
||||
map(lambda m: m["success"], match["children"])
|
||||
):
|
||||
return
|
||||
# not statement, so invert the child mode to show failed evaluations
|
||||
if match['node'].get('statement', {}).get('type') == 'not':
|
||||
if match["node"].get("statement", {}).get("type") == "not":
|
||||
child_mode = MODE_FAILURE
|
||||
elif mode == MODE_FAILURE:
|
||||
# display only nodes that did not evaluate to True
|
||||
if match['success']:
|
||||
if match["success"]:
|
||||
return
|
||||
# optional statement with successful children is not relevant
|
||||
if (match['node'].get('statement', {}).get('type') == 'optional'
|
||||
and any(map(lambda m: m['success'], match['children']))):
|
||||
if match["node"].get("statement", {}).get("type") == "optional" and any(
|
||||
map(lambda m: m["success"], match["children"])
|
||||
):
|
||||
return
|
||||
# not statement, so invert the child mode to show successful evaluations
|
||||
if match['node'].get('statement', {}).get('type') == 'not':
|
||||
if match["node"].get("statement", {}).get("type") == "not":
|
||||
child_mode = MODE_SUCCESS
|
||||
else:
|
||||
raise RuntimeError('unexpected mode: ' + mode)
|
||||
raise RuntimeError("unexpected mode: " + mode)
|
||||
|
||||
render_node(ostream, match, match['node'], indent=indent)
|
||||
render_node(ostream, match, match["node"], indent=indent)
|
||||
|
||||
for child in match['children']:
|
||||
for child in match["children"]:
|
||||
render_match(ostream, child, indent=indent + 1, mode=child_mode)
|
||||
|
||||
|
||||
@@ -151,44 +153,44 @@ def render_vverbose(doc):
|
||||
ostream = rutils.StringIO()
|
||||
|
||||
for rule in rutils.capability_rules(doc):
|
||||
count = len(rule['matches'])
|
||||
count = len(rule["matches"])
|
||||
if count == 1:
|
||||
capability = rutils.bold(rule['meta']['name'])
|
||||
capability = rutils.bold(rule["meta"]["name"])
|
||||
else:
|
||||
capability = '%s (%d matches)' % (rutils.bold(rule['meta']['name']), count)
|
||||
capability = "%s (%d matches)" % (rutils.bold(rule["meta"]["name"]), count)
|
||||
|
||||
ostream.writeln(capability)
|
||||
|
||||
rows = []
|
||||
for key in capa.rules.META_KEYS:
|
||||
if key == 'name' or key not in rule['meta']:
|
||||
if key == "name" or key not in rule["meta"]:
|
||||
continue
|
||||
|
||||
v = rule['meta'][key]
|
||||
v = rule["meta"][key]
|
||||
if isinstance(v, list) and len(v) == 1:
|
||||
v = v[0]
|
||||
elif isinstance(v, list) and len(v) > 1:
|
||||
v = ', '.join(v)
|
||||
v = ", ".join(v)
|
||||
rows.append((key, v))
|
||||
|
||||
ostream.writeln(tabulate.tabulate(rows, tablefmt='plain'))
|
||||
ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
|
||||
|
||||
if rule['meta']['scope'] == capa.rules.FILE_SCOPE:
|
||||
matches = list(doc[rule['meta']['name']]['matches'].values())
|
||||
if rule["meta"]["scope"] == capa.rules.FILE_SCOPE:
|
||||
matches = list(doc[rule["meta"]["name"]]["matches"].values())
|
||||
if len(matches) != 1:
|
||||
# i think there should only ever be one match per file-scope rule,
|
||||
# because we do the file-scope evaluation a single time.
|
||||
# but i'm not 100% sure if this is/will always be true.
|
||||
# so, lets be explicit about our assumptions and raise an exception if they fail.
|
||||
raise RuntimeError('unexpected file scope match count: ' + len(matches))
|
||||
raise RuntimeError("unexpected file scope match count: " + len(matches))
|
||||
render_match(ostream, matches[0], indent=0)
|
||||
else:
|
||||
for location, match in sorted(doc[rule['meta']['name']]['matches'].items()):
|
||||
ostream.write(rule['meta']['scope'])
|
||||
ostream.write(' @ ')
|
||||
for location, match in sorted(doc[rule["meta"]["name"]]["matches"].items()):
|
||||
ostream.write(rule["meta"]["scope"])
|
||||
ostream.write(" @ ")
|
||||
ostream.writeln(rutils.hex(location))
|
||||
render_match(ostream, match, indent=1)
|
||||
|
||||
ostream.write('\n')
|
||||
ostream.write("\n")
|
||||
|
||||
return ostream.getvalue()
|
||||
|
||||
352
capa/rules.py
352
capa/rules.py
@@ -22,32 +22,32 @@ logger = logging.getLogger(__name__)
|
||||
# these are the standard metadata fields, in the preferred order.
|
||||
# when reformatted, any custom keys will come after these.
|
||||
META_KEYS = (
|
||||
'name',
|
||||
'namespace',
|
||||
'rule-category',
|
||||
'maec/analysis-conclusion',
|
||||
'maec/analysis-conclusion-ov',
|
||||
'maec/malware-category',
|
||||
'maec/malware-category-ov',
|
||||
'author',
|
||||
'description',
|
||||
'lib',
|
||||
'scope',
|
||||
'att&ck',
|
||||
'mbc',
|
||||
'references',
|
||||
'examples'
|
||||
"name",
|
||||
"namespace",
|
||||
"rule-category",
|
||||
"maec/analysis-conclusion",
|
||||
"maec/analysis-conclusion-ov",
|
||||
"maec/malware-category",
|
||||
"maec/malware-category-ov",
|
||||
"author",
|
||||
"description",
|
||||
"lib",
|
||||
"scope",
|
||||
"att&ck",
|
||||
"mbc",
|
||||
"references",
|
||||
"examples",
|
||||
)
|
||||
# these are meta fields that are internal to capa,
|
||||
# and added during rule reading/construction.
|
||||
# they may help use manipulate or index rules,
|
||||
# but should not be exposed to clients.
|
||||
HIDDEN_META_KEYS = ('capa/nursery', 'capa/path')
|
||||
HIDDEN_META_KEYS = ("capa/nursery", "capa/path")
|
||||
|
||||
|
||||
FILE_SCOPE = 'file'
|
||||
FUNCTION_SCOPE = 'function'
|
||||
BASIC_BLOCK_SCOPE = 'basic block'
|
||||
FILE_SCOPE = "file"
|
||||
FUNCTION_SCOPE = "function"
|
||||
BASIC_BLOCK_SCOPE = "basic block"
|
||||
|
||||
|
||||
SUPPORTED_FEATURES = {
|
||||
@@ -56,7 +56,7 @@ SUPPORTED_FEATURES = {
|
||||
capa.features.file.Export,
|
||||
capa.features.file.Import,
|
||||
capa.features.file.Section,
|
||||
capa.features.Characteristic('embedded pe'),
|
||||
capa.features.Characteristic("embedded pe"),
|
||||
capa.features.String,
|
||||
},
|
||||
FUNCTION_SCOPE: {
|
||||
@@ -68,18 +68,18 @@ SUPPORTED_FEATURES = {
|
||||
capa.features.insn.Offset,
|
||||
capa.features.insn.Mnemonic,
|
||||
capa.features.basicblock.BasicBlock,
|
||||
capa.features.Characteristic('switch'),
|
||||
capa.features.Characteristic('nzxor'),
|
||||
capa.features.Characteristic('peb access'),
|
||||
capa.features.Characteristic('fs access'),
|
||||
capa.features.Characteristic('gs access'),
|
||||
capa.features.Characteristic('cross section flow'),
|
||||
capa.features.Characteristic('stack string'),
|
||||
capa.features.Characteristic('calls from'),
|
||||
capa.features.Characteristic('calls to'),
|
||||
capa.features.Characteristic('indirect call'),
|
||||
capa.features.Characteristic('loop'),
|
||||
capa.features.Characteristic('recursive call')
|
||||
capa.features.Characteristic("switch"),
|
||||
capa.features.Characteristic("nzxor"),
|
||||
capa.features.Characteristic("peb access"),
|
||||
capa.features.Characteristic("fs access"),
|
||||
capa.features.Characteristic("gs access"),
|
||||
capa.features.Characteristic("cross section flow"),
|
||||
capa.features.Characteristic("stack string"),
|
||||
capa.features.Characteristic("calls from"),
|
||||
capa.features.Characteristic("calls to"),
|
||||
capa.features.Characteristic("indirect call"),
|
||||
capa.features.Characteristic("loop"),
|
||||
capa.features.Characteristic("recursive call"),
|
||||
},
|
||||
BASIC_BLOCK_SCOPE: {
|
||||
capa.features.MatchedRule,
|
||||
@@ -89,14 +89,14 @@ SUPPORTED_FEATURES = {
|
||||
capa.features.Bytes,
|
||||
capa.features.insn.Offset,
|
||||
capa.features.insn.Mnemonic,
|
||||
capa.features.Characteristic('nzxor'),
|
||||
capa.features.Characteristic('peb access'),
|
||||
capa.features.Characteristic('fs access'),
|
||||
capa.features.Characteristic('gs access'),
|
||||
capa.features.Characteristic('cross section flow'),
|
||||
capa.features.Characteristic('tight loop'),
|
||||
capa.features.Characteristic('stack string'),
|
||||
capa.features.Characteristic('indirect call')
|
||||
capa.features.Characteristic("nzxor"),
|
||||
capa.features.Characteristic("peb access"),
|
||||
capa.features.Characteristic("fs access"),
|
||||
capa.features.Characteristic("gs access"),
|
||||
capa.features.Characteristic("cross section flow"),
|
||||
capa.features.Characteristic("tight loop"),
|
||||
capa.features.Characteristic("stack string"),
|
||||
capa.features.Characteristic("indirect call"),
|
||||
},
|
||||
}
|
||||
|
||||
@@ -107,7 +107,7 @@ class InvalidRule(ValueError):
|
||||
self.msg = msg
|
||||
|
||||
def __str__(self):
|
||||
return 'invalid rule: %s' % (self.msg)
|
||||
return "invalid rule: %s" % (self.msg)
|
||||
|
||||
def __repr__(self):
|
||||
return str(self)
|
||||
@@ -121,7 +121,7 @@ class InvalidRuleWithPath(InvalidRule):
|
||||
self.__cause__ = None
|
||||
|
||||
def __str__(self):
|
||||
return 'invalid rule: %s: %s' % (self.path, self.msg)
|
||||
return "invalid rule: %s: %s" % (self.path, self.msg)
|
||||
|
||||
|
||||
class InvalidRuleSet(ValueError):
|
||||
@@ -130,7 +130,7 @@ class InvalidRuleSet(ValueError):
|
||||
self.msg = msg
|
||||
|
||||
def __str__(self):
|
||||
return 'invalid rule set: %s' % (self.msg)
|
||||
return "invalid rule set: %s" % (self.msg)
|
||||
|
||||
def __repr__(self):
|
||||
return str(self)
|
||||
@@ -139,111 +139,112 @@ class InvalidRuleSet(ValueError):
|
||||
def ensure_feature_valid_for_scope(scope, feature):
|
||||
if isinstance(feature, capa.features.Characteristic):
|
||||
if capa.features.Characteristic(feature.name) not in SUPPORTED_FEATURES[scope]:
|
||||
raise InvalidRule('feature %s not support for scope %s' % (feature, scope))
|
||||
raise InvalidRule("feature %s not support for scope %s" % (feature, scope))
|
||||
elif not isinstance(feature, tuple(filter(lambda t: isinstance(t, type), SUPPORTED_FEATURES[scope]))):
|
||||
raise InvalidRule('feature %s not support for scope %s' % (feature, scope))
|
||||
raise InvalidRule("feature %s not support for scope %s" % (feature, scope))
|
||||
|
||||
|
||||
def parse_int(s):
|
||||
if s.startswith('0x'):
|
||||
if s.startswith("0x"):
|
||||
return int(s, 0x10)
|
||||
else:
|
||||
return int(s, 10)
|
||||
|
||||
|
||||
def parse_range(s):
|
||||
'''
|
||||
"""
|
||||
parse a string "(0, 1)" into a range (min, max).
|
||||
min and/or max may by None to indicate an unbound range.
|
||||
'''
|
||||
"""
|
||||
# we want to use `{` characters, but this is a dict in yaml.
|
||||
if not s.startswith('('):
|
||||
raise InvalidRule('invalid range: %s' % (s))
|
||||
if not s.startswith("("):
|
||||
raise InvalidRule("invalid range: %s" % (s))
|
||||
|
||||
if not s.endswith(')'):
|
||||
raise InvalidRule('invalid range: %s' % (s))
|
||||
if not s.endswith(")"):
|
||||
raise InvalidRule("invalid range: %s" % (s))
|
||||
|
||||
s = s[len('('):-len(')')]
|
||||
min, _, max = s.partition(',')
|
||||
s = s[len("(") : -len(")")]
|
||||
min, _, max = s.partition(",")
|
||||
min = min.strip()
|
||||
max = max.strip()
|
||||
|
||||
if min:
|
||||
min = parse_int(min.strip())
|
||||
if min < 0:
|
||||
raise InvalidRule('range min less than zero')
|
||||
raise InvalidRule("range min less than zero")
|
||||
else:
|
||||
min = None
|
||||
|
||||
if max:
|
||||
max = parse_int(max.strip())
|
||||
if max < 0:
|
||||
raise InvalidRule('range max less than zero')
|
||||
raise InvalidRule("range max less than zero")
|
||||
else:
|
||||
max = None
|
||||
|
||||
if min is not None and max is not None:
|
||||
if max < min:
|
||||
raise InvalidRule('range max less than min')
|
||||
raise InvalidRule("range max less than min")
|
||||
|
||||
return min, max
|
||||
|
||||
|
||||
def parse_feature(key):
|
||||
# keep this in sync with supported features
|
||||
if key == 'api':
|
||||
if key == "api":
|
||||
return capa.features.insn.API
|
||||
elif key == 'string':
|
||||
elif key == "string":
|
||||
return capa.features.String
|
||||
elif key == 'bytes':
|
||||
elif key == "bytes":
|
||||
return capa.features.Bytes
|
||||
elif key == 'number':
|
||||
elif key == "number":
|
||||
return capa.features.insn.Number
|
||||
elif key == 'offset':
|
||||
elif key == "offset":
|
||||
return capa.features.insn.Offset
|
||||
elif key == 'mnemonic':
|
||||
elif key == "mnemonic":
|
||||
return capa.features.insn.Mnemonic
|
||||
elif key == 'basic blocks':
|
||||
elif key == "basic blocks":
|
||||
return capa.features.basicblock.BasicBlock
|
||||
elif key.startswith('characteristic(') and key.endswith(')'):
|
||||
characteristic = key[len('characteristic('):-len(')')]
|
||||
elif key.startswith("characteristic(") and key.endswith(")"):
|
||||
characteristic = key[len("characteristic(") : -len(")")]
|
||||
return lambda v: capa.features.Characteristic(characteristic, v)
|
||||
elif key == 'export':
|
||||
elif key == "export":
|
||||
return capa.features.file.Export
|
||||
elif key == 'import':
|
||||
elif key == "import":
|
||||
return capa.features.file.Import
|
||||
elif key == 'section':
|
||||
elif key == "section":
|
||||
return capa.features.file.Section
|
||||
elif key == 'match':
|
||||
elif key == "match":
|
||||
return capa.features.MatchedRule
|
||||
else:
|
||||
raise InvalidRule('unexpected statement: %s' % key)
|
||||
raise InvalidRule("unexpected statement: %s" % key)
|
||||
|
||||
|
||||
def parse_symbol(s, value_type):
|
||||
'''
|
||||
"""
|
||||
s can be an int or a string
|
||||
'''
|
||||
if isinstance(s, str) and '=' in s:
|
||||
value, symbol = s.split('=', 1)
|
||||
"""
|
||||
if isinstance(s, str) and "=" in s:
|
||||
value, symbol = s.split("=", 1)
|
||||
symbol = symbol.strip()
|
||||
if symbol == '':
|
||||
if symbol == "":
|
||||
raise InvalidRule('unexpected value: "%s", symbol name cannot be empty' % s)
|
||||
else:
|
||||
value = s
|
||||
symbol = None
|
||||
|
||||
if isinstance(value, str):
|
||||
if value_type == 'bytes':
|
||||
if value_type == "bytes":
|
||||
try:
|
||||
value = codecs.decode(value.replace(' ', ''), 'hex')
|
||||
value = codecs.decode(value.replace(" ", ""), "hex")
|
||||
# TODO: Remove TypeError when Python2 is not used anymore
|
||||
except (TypeError, binascii.Error):
|
||||
raise InvalidRule('unexpected bytes value: "%s", must be a valid hex sequence' % value)
|
||||
|
||||
if len(value) > MAX_BYTES_FEATURE_SIZE:
|
||||
raise InvalidRule('unexpected bytes value: byte sequences must be no larger than %s bytes' %
|
||||
MAX_BYTES_FEATURE_SIZE)
|
||||
raise InvalidRule(
|
||||
"unexpected bytes value: byte sequences must be no larger than %s bytes" % MAX_BYTES_FEATURE_SIZE
|
||||
)
|
||||
else:
|
||||
try:
|
||||
value = parse_int(value)
|
||||
@@ -255,54 +256,54 @@ def parse_symbol(s, value_type):
|
||||
|
||||
def build_statements(d, scope):
|
||||
if len(d.keys()) != 1:
|
||||
raise InvalidRule('too many statements')
|
||||
raise InvalidRule("too many statements")
|
||||
|
||||
key = list(d.keys())[0]
|
||||
if key == 'and':
|
||||
if key == "and":
|
||||
return And(*[build_statements(dd, scope) for dd in d[key]])
|
||||
elif key == 'or':
|
||||
elif key == "or":
|
||||
return Or(*[build_statements(dd, scope) for dd in d[key]])
|
||||
elif key == 'not':
|
||||
elif key == "not":
|
||||
if len(d[key]) != 1:
|
||||
raise InvalidRule('not statement must have exactly one child statement')
|
||||
raise InvalidRule("not statement must have exactly one child statement")
|
||||
return Not(*[build_statements(dd, scope) for dd in d[key]])
|
||||
elif key.endswith(' or more'):
|
||||
count = int(key[:-len('or more')])
|
||||
elif key.endswith(" or more"):
|
||||
count = int(key[: -len("or more")])
|
||||
return Some(count, *[build_statements(dd, scope) for dd in d[key]])
|
||||
elif key == 'optional':
|
||||
elif key == "optional":
|
||||
# `optional` is an alias for `0 or more`
|
||||
# which is useful for documenting behaviors,
|
||||
# like with `write file`, we might say that `WriteFile` is optionally found alongside `CreateFileA`.
|
||||
return Some(0, *[build_statements(dd, scope) for dd in d[key]])
|
||||
|
||||
elif key == 'function':
|
||||
elif key == "function":
|
||||
if scope != FILE_SCOPE:
|
||||
raise InvalidRule('function subscope supported only for file scope')
|
||||
raise InvalidRule("function subscope supported only for file scope")
|
||||
|
||||
if len(d[key]) != 1:
|
||||
raise InvalidRule('subscope must have exactly one child statement')
|
||||
raise InvalidRule("subscope must have exactly one child statement")
|
||||
|
||||
return Subscope(FUNCTION_SCOPE, *[build_statements(dd, FUNCTION_SCOPE) for dd in d[key]])
|
||||
|
||||
elif key == 'basic block':
|
||||
elif key == "basic block":
|
||||
if scope != FUNCTION_SCOPE:
|
||||
raise InvalidRule('basic block subscope supported only for function scope')
|
||||
raise InvalidRule("basic block subscope supported only for function scope")
|
||||
|
||||
if len(d[key]) != 1:
|
||||
raise InvalidRule('subscope must have exactly one child statement')
|
||||
raise InvalidRule("subscope must have exactly one child statement")
|
||||
|
||||
return Subscope(BASIC_BLOCK_SCOPE, *[build_statements(dd, BASIC_BLOCK_SCOPE) for dd in d[key]])
|
||||
|
||||
elif key.startswith('count(') and key.endswith(')'):
|
||||
elif key.startswith("count(") and key.endswith(")"):
|
||||
# e.g.:
|
||||
#
|
||||
# count(basic block)
|
||||
# count(mnemonic(mov))
|
||||
# count(characteristic(nzxor))
|
||||
|
||||
term = key[len('count('):-len(')')]
|
||||
term = key[len("count(") : -len(")")]
|
||||
|
||||
if term.startswith('characteristic('):
|
||||
if term.startswith("characteristic("):
|
||||
# characteristic features are specified a bit specially:
|
||||
# they simply indicate the presence of something unusual/interesting,
|
||||
# and we embed the name in the feature name, like `characteristic(nzxor)`.
|
||||
@@ -320,18 +321,18 @@ def build_statements(d, scope):
|
||||
# - mnemonic: mov
|
||||
#
|
||||
# but here we deal with the form: `mnemonic(mov)`.
|
||||
term, _, arg = term.partition('(')
|
||||
term, _, arg = term.partition("(")
|
||||
Feature = parse_feature(term)
|
||||
|
||||
if arg:
|
||||
arg = arg[:-len(')')]
|
||||
arg = arg[: -len(")")]
|
||||
# can't rely on yaml parsing ints embedded within strings
|
||||
# like:
|
||||
#
|
||||
# count(offset(0xC))
|
||||
# count(number(0x11223344))
|
||||
# count(number(0x100 = symbol name))
|
||||
if term in ('number', 'offset', 'bytes'):
|
||||
if term in ("number", "offset", "bytes"):
|
||||
value, symbol = parse_symbol(arg, term)
|
||||
feature = Feature(value, symbol)
|
||||
else:
|
||||
@@ -348,29 +349,31 @@ def build_statements(d, scope):
|
||||
count = d[key]
|
||||
if isinstance(count, int):
|
||||
return Range(feature, min=count, max=count)
|
||||
elif count.endswith(' or more'):
|
||||
min = parse_int(count[:-len(' or more')])
|
||||
elif count.endswith(" or more"):
|
||||
min = parse_int(count[: -len(" or more")])
|
||||
max = None
|
||||
return Range(feature, min=min, max=max)
|
||||
elif count.endswith(' or fewer'):
|
||||
elif count.endswith(" or fewer"):
|
||||
min = None
|
||||
max = parse_int(count[:-len(' or fewer')])
|
||||
max = parse_int(count[: -len(" or fewer")])
|
||||
return Range(feature, min=min, max=max)
|
||||
elif count.startswith('('):
|
||||
elif count.startswith("("):
|
||||
min, max = parse_range(count)
|
||||
return Range(feature, min=min, max=max)
|
||||
else:
|
||||
raise InvalidRule('unexpected range: %s' % (count))
|
||||
elif key == 'string' and d[key].startswith('/') and (d[key].endswith('/') or d[key].endswith('/i')):
|
||||
raise InvalidRule("unexpected range: %s" % (count))
|
||||
elif key == "string" and d[key].startswith("/") and (d[key].endswith("/") or d[key].endswith("/i")):
|
||||
try:
|
||||
return Regex(d[key])
|
||||
except re.error:
|
||||
if d[key].endswith('/i'):
|
||||
d[key] = d[key][:-len('i')]
|
||||
raise InvalidRule('invalid regular expression: %s it should use Python syntax, try it at https://pythex.org' % d[key])
|
||||
if d[key].endswith("/i"):
|
||||
d[key] = d[key][: -len("i")]
|
||||
raise InvalidRule(
|
||||
"invalid regular expression: %s it should use Python syntax, try it at https://pythex.org" % d[key]
|
||||
)
|
||||
else:
|
||||
Feature = parse_feature(key)
|
||||
if key in ('number', 'offset', 'bytes'):
|
||||
if key in ("number", "offset", "bytes"):
|
||||
# parse numbers with symbol description, e.g. 0x4550 = IMAGE_DOS_SIGNATURE
|
||||
# or regular numbers, e.g. 37
|
||||
value, symbol = parse_symbol(d[key], key)
|
||||
@@ -390,7 +393,7 @@ def second(s):
|
||||
|
||||
|
||||
# we use the ruamel.yaml parser because it supports roundtripping of documents with comments.
|
||||
yaml = ruamel.yaml.YAML(typ='rt')
|
||||
yaml = ruamel.yaml.YAML(typ="rt")
|
||||
|
||||
|
||||
# use block mode, not inline json-like mode
|
||||
@@ -410,7 +413,7 @@ yaml.width = 4096
|
||||
|
||||
|
||||
class Rule(object):
|
||||
def __init__(self, name, scope, statement, meta, definition=''):
|
||||
def __init__(self, name, scope, statement, meta, definition=""):
|
||||
super(Rule, self).__init__()
|
||||
self.name = name
|
||||
self.scope = scope
|
||||
@@ -419,13 +422,13 @@ class Rule(object):
|
||||
self.definition = definition
|
||||
|
||||
def __str__(self):
|
||||
return 'Rule(name=%s)' % (self.name)
|
||||
return "Rule(name=%s)" % (self.name)
|
||||
|
||||
def __repr__(self):
|
||||
return 'Rule(scope=%s, name=%s)' % (self.scope, self.name)
|
||||
return "Rule(scope=%s, name=%s)" % (self.scope, self.name)
|
||||
|
||||
def get_dependencies(self, namespaces):
|
||||
'''
|
||||
"""
|
||||
fetch the names of rules this rule relies upon.
|
||||
these are only the direct dependencies; a user must
|
||||
compute the transitive dependency graph themself, if they want it.
|
||||
@@ -436,7 +439,7 @@ class Rule(object):
|
||||
|
||||
Returns:
|
||||
List[str]: names of rules upon which this rule depends.
|
||||
'''
|
||||
"""
|
||||
deps = set([])
|
||||
|
||||
def rec(statement):
|
||||
@@ -469,24 +472,31 @@ class Rule(object):
|
||||
def _extract_subscope_rules_rec(self, statement):
|
||||
if isinstance(statement, Statement):
|
||||
# for each child that is a subscope,
|
||||
for subscope in filter(lambda statement: isinstance(statement, capa.engine.Subscope), statement.get_children()):
|
||||
for subscope in filter(
|
||||
lambda statement: isinstance(statement, capa.engine.Subscope), statement.get_children()
|
||||
):
|
||||
|
||||
# create a new rule from it.
|
||||
# the name is a randomly generated, hopefully unique value.
|
||||
# ideally, this won't every be rendered to a user.
|
||||
name = self.name + '/' + uuid.uuid4().hex
|
||||
new_rule = Rule(name, subscope.scope, subscope.child, {
|
||||
'name': name,
|
||||
'scope': subscope.scope,
|
||||
# these derived rules are never meant to be inspected separately,
|
||||
# they are dependencies for the parent rule,
|
||||
# so mark it as such.
|
||||
'lib': True,
|
||||
# metadata that indicates this is derived from a subscope statement
|
||||
'capa/subscope-rule': True,
|
||||
# metadata that links the child rule the parent rule
|
||||
'capa/parent': self.name,
|
||||
})
|
||||
name = self.name + "/" + uuid.uuid4().hex
|
||||
new_rule = Rule(
|
||||
name,
|
||||
subscope.scope,
|
||||
subscope.child,
|
||||
{
|
||||
"name": name,
|
||||
"scope": subscope.scope,
|
||||
# these derived rules are never meant to be inspected separately,
|
||||
# they are dependencies for the parent rule,
|
||||
# so mark it as such.
|
||||
"lib": True,
|
||||
# metadata that indicates this is derived from a subscope statement
|
||||
"capa/subscope-rule": True,
|
||||
# metadata that links the child rule the parent rule
|
||||
"capa/parent": self.name,
|
||||
},
|
||||
)
|
||||
|
||||
# update the existing statement to `match` the new rule
|
||||
new_node = capa.features.MatchedRule(name)
|
||||
@@ -503,7 +513,7 @@ class Rule(object):
|
||||
yield new_rule
|
||||
|
||||
def extract_subscope_rules(self):
|
||||
'''
|
||||
"""
|
||||
scan through the statements of this rule,
|
||||
replacing subscope statements with `match` references to a newly created rule,
|
||||
which are yielded from this routine.
|
||||
@@ -514,7 +524,7 @@ class Rule(object):
|
||||
|
||||
for derived_rule in rule.extract_subscope_rules():
|
||||
assert derived_rule.meta['capa/parent'] == rule.name
|
||||
'''
|
||||
"""
|
||||
|
||||
# recurse through statements
|
||||
# when encounter Subscope statement
|
||||
@@ -531,27 +541,21 @@ class Rule(object):
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d, s):
|
||||
name = d['rule']['meta']['name']
|
||||
name = d["rule"]["meta"]["name"]
|
||||
# if scope is not specified, default to function scope.
|
||||
# this is probably the mode that rule authors will start with.
|
||||
scope = d['rule']['meta'].get('scope', FUNCTION_SCOPE)
|
||||
statements = d['rule']['features']
|
||||
scope = d["rule"]["meta"].get("scope", FUNCTION_SCOPE)
|
||||
statements = d["rule"]["features"]
|
||||
|
||||
# the rule must start with a single logic node.
|
||||
# doing anything else is too implicit and difficult to remove (AND vs OR ???).
|
||||
if len(statements) != 1:
|
||||
raise InvalidRule('rule must begin with a single top level statement')
|
||||
raise InvalidRule("rule must begin with a single top level statement")
|
||||
|
||||
if isinstance(statements[0], capa.engine.Subscope):
|
||||
raise InvalidRule('top level statement may not be a subscope')
|
||||
raise InvalidRule("top level statement may not be a subscope")
|
||||
|
||||
return cls(
|
||||
name,
|
||||
scope,
|
||||
build_statements(statements[0], scope),
|
||||
d['rule']['meta'],
|
||||
s
|
||||
)
|
||||
return cls(name, scope, build_statements(statements[0], scope), d["rule"]["meta"], s)
|
||||
|
||||
@classmethod
|
||||
def from_yaml(cls, s):
|
||||
@@ -559,9 +563,9 @@ class Rule(object):
|
||||
|
||||
@classmethod
|
||||
def from_yaml_file(cls, path):
|
||||
with open(path, 'rb') as f:
|
||||
with open(path, "rb") as f:
|
||||
try:
|
||||
return cls.from_yaml(f.read().decode('utf-8'))
|
||||
return cls.from_yaml(f.read().decode("utf-8"))
|
||||
except InvalidRule as e:
|
||||
raise InvalidRuleWithPath(path, str(e))
|
||||
|
||||
@@ -578,11 +582,11 @@ class Rule(object):
|
||||
definition = yaml.load(self.definition)
|
||||
# definition retains a reference to `meta`,
|
||||
# so we're updating that in place.
|
||||
definition['rule']['meta'] = self.meta
|
||||
definition["rule"]["meta"] = self.meta
|
||||
meta = self.meta
|
||||
|
||||
meta['name'] = self.name
|
||||
meta['scope'] = self.scope
|
||||
meta["name"] = self.name
|
||||
meta["scope"] = self.scope
|
||||
|
||||
def move_to_end(m, k):
|
||||
# ruamel.yaml uses an ordereddict-like structure to track maps (CommentedMap).
|
||||
@@ -592,8 +596,8 @@ class Rule(object):
|
||||
del m[k]
|
||||
m[k] = v
|
||||
|
||||
move_to_end(definition['rule'], 'meta')
|
||||
move_to_end(definition['rule'], 'features')
|
||||
move_to_end(definition["rule"], "meta")
|
||||
move_to_end(definition["rule"], "features")
|
||||
|
||||
for key in META_KEYS:
|
||||
if key in meta:
|
||||
@@ -624,11 +628,11 @@ class Rule(object):
|
||||
continue
|
||||
meta[key] = value
|
||||
|
||||
return ostream.getvalue().decode('utf-8').rstrip('\n') + '\n'
|
||||
return ostream.getvalue().decode("utf-8").rstrip("\n") + "\n"
|
||||
|
||||
|
||||
def get_rules_with_scope(rules, scope):
|
||||
'''
|
||||
"""
|
||||
from the given collection of rules, select those with the given scope.
|
||||
|
||||
args:
|
||||
@@ -637,12 +641,12 @@ def get_rules_with_scope(rules, scope):
|
||||
|
||||
returns:
|
||||
List[capa.rules.Rule]:
|
||||
'''
|
||||
"""
|
||||
return list(rule for rule in rules if rule.scope == scope)
|
||||
|
||||
|
||||
def get_rules_and_dependencies(rules, rule_name):
|
||||
'''
|
||||
"""
|
||||
from the given collection of rules, select a rule and its dependencies (transitively).
|
||||
|
||||
args:
|
||||
@@ -651,7 +655,7 @@ def get_rules_and_dependencies(rules, rule_name):
|
||||
|
||||
yields:
|
||||
Rule:
|
||||
'''
|
||||
"""
|
||||
# we evaluate `rules` multiple times, so if its a generator, realize it into a list.
|
||||
rules = list(rules)
|
||||
namespaces = index_rules_by_namespace(rules)
|
||||
@@ -674,17 +678,17 @@ def ensure_rules_are_unique(rules):
|
||||
seen = set([])
|
||||
for rule in rules:
|
||||
if rule.name in seen:
|
||||
raise InvalidRule('duplicate rule name: ' + rule.name)
|
||||
raise InvalidRule("duplicate rule name: " + rule.name)
|
||||
seen.add(rule.name)
|
||||
|
||||
|
||||
def ensure_rule_dependencies_are_met(rules):
|
||||
'''
|
||||
"""
|
||||
raise an exception if a rule dependency does not exist.
|
||||
|
||||
raises:
|
||||
InvalidRule: if a dependency is not met.
|
||||
'''
|
||||
"""
|
||||
# we evaluate `rules` multiple times, so if its a generator, realize it into a list.
|
||||
rules = list(rules)
|
||||
namespaces = index_rules_by_namespace(rules)
|
||||
@@ -696,7 +700,7 @@ def ensure_rule_dependencies_are_met(rules):
|
||||
|
||||
|
||||
def index_rules_by_namespace(rules):
|
||||
'''
|
||||
"""
|
||||
compute the rules that fit into each namespace found within the given rules.
|
||||
|
||||
for example, given:
|
||||
@@ -714,23 +718,23 @@ def index_rules_by_namespace(rules):
|
||||
rules (List[Rule]):
|
||||
|
||||
Returns: Dict[str, List[Rule]]
|
||||
'''
|
||||
"""
|
||||
namespaces = collections.defaultdict(list)
|
||||
|
||||
for rule in rules:
|
||||
namespace = rule.meta.get('namespace')
|
||||
namespace = rule.meta.get("namespace")
|
||||
if not namespace:
|
||||
continue
|
||||
|
||||
while namespace:
|
||||
namespaces[namespace].append(rule)
|
||||
namespace, _, _ = namespace.rpartition('/')
|
||||
namespace, _, _ = namespace.rpartition("/")
|
||||
|
||||
return dict(namespaces)
|
||||
|
||||
|
||||
class RuleSet(object):
|
||||
'''
|
||||
"""
|
||||
a ruleset is initialized with a collection of rules, which it verifies and sorts into scopes.
|
||||
each set of scoped rules is sorted topologically, which enables rules to match on past rule matches.
|
||||
|
||||
@@ -742,7 +746,7 @@ class RuleSet(object):
|
||||
...
|
||||
])
|
||||
capa.engine.match(ruleset.file_rules, ...)
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, rules):
|
||||
super(RuleSet, self).__init__()
|
||||
@@ -754,7 +758,7 @@ class RuleSet(object):
|
||||
ensure_rule_dependencies_are_met(rules)
|
||||
|
||||
if len(rules) == 0:
|
||||
raise InvalidRuleSet('no rules selected')
|
||||
raise InvalidRuleSet("no rules selected")
|
||||
|
||||
self.file_rules = self._get_rules_for_scope(rules, FILE_SCOPE)
|
||||
self.function_rules = self._get_rules_for_scope(rules, FUNCTION_SCOPE)
|
||||
@@ -769,12 +773,12 @@ class RuleSet(object):
|
||||
|
||||
@staticmethod
|
||||
def _get_rules_for_scope(rules, scope):
|
||||
'''
|
||||
"""
|
||||
given a collection of rules, collect the rules that are needed at the given scope.
|
||||
these rules are ordered topologically.
|
||||
|
||||
don't include "lib" rules, unless they are dependencies of other rules.
|
||||
'''
|
||||
"""
|
||||
scope_rules = set([])
|
||||
|
||||
# we need to process all rules, not just rules with the given scope.
|
||||
@@ -782,7 +786,7 @@ class RuleSet(object):
|
||||
# at lower scope, e.g. function scope.
|
||||
# so, we find all dependencies of all rules, and later will filter them down.
|
||||
for rule in rules:
|
||||
if rule.meta.get('lib', False):
|
||||
if rule.meta.get("lib", False):
|
||||
continue
|
||||
|
||||
scope_rules.update(get_rules_and_dependencies(rules, rule.name))
|
||||
@@ -790,7 +794,7 @@ class RuleSet(object):
|
||||
|
||||
@staticmethod
|
||||
def _extract_subscope_rules(rules):
|
||||
'''
|
||||
"""
|
||||
process the given sequence of rules.
|
||||
for each one, extract any embedded subscope rules into their own rule.
|
||||
process these recursively.
|
||||
@@ -798,7 +802,7 @@ class RuleSet(object):
|
||||
|
||||
note: this operation mutates the rules passed in - they may now have `match` statements
|
||||
for the extracted subscope rules.
|
||||
'''
|
||||
"""
|
||||
done = []
|
||||
|
||||
# use a queue of rules, because we'll be modifying the list (appending new items) as we go.
|
||||
@@ -811,14 +815,14 @@ class RuleSet(object):
|
||||
return done
|
||||
|
||||
def filter_rules_by_meta(self, tag):
|
||||
'''
|
||||
"""
|
||||
return new rule set with rules filtered based on all meta field values, adds all dependency rules
|
||||
apply tag-based rule filter assuming that all required rules are loaded
|
||||
can be used to specify selected rules vs. providing a rules child directory where capa cannot resolve
|
||||
dependencies from unknown paths
|
||||
TODO handle circular dependencies?
|
||||
TODO support -t=metafield <k>
|
||||
'''
|
||||
"""
|
||||
rules = self.rules.values()
|
||||
rules_filtered = set([])
|
||||
for rule in rules:
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
__version__ = '0.0.0'
|
||||
__commit__ = '00000000'
|
||||
__version__ = "0.0.0"
|
||||
__commit__ = "00000000"
|
||||
|
||||
Reference in New Issue
Block a user