mirror of
https://github.com/mandiant/capa.git
synced 2025-12-22 07:10:29 -08:00
pep8
This commit is contained in:
@@ -7,23 +7,24 @@ import capa.features
|
|||||||
|
|
||||||
|
|
||||||
class Statement(object):
|
class Statement(object):
|
||||||
'''
|
"""
|
||||||
superclass for structural nodes, such as and/or/not.
|
superclass for structural nodes, such as and/or/not.
|
||||||
this exists to provide a default impl for `__str__` and `__repr__`,
|
this exists to provide a default impl for `__str__` and `__repr__`,
|
||||||
and to declare the interface method `evaluate`
|
and to declare the interface method `evaluate`
|
||||||
'''
|
"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super(Statement, self).__init__()
|
super(Statement, self).__init__()
|
||||||
self.name = self.__class__.__name__
|
self.name = self.__class__.__name__
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return '%s(%s)' % (self.name.lower(), ','.join(map(str, self.get_children())))
|
return "%s(%s)" % (self.name.lower(), ",".join(map(str, self.get_children())))
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return str(self)
|
return str(self)
|
||||||
|
|
||||||
def evaluate(self, ctx):
|
def evaluate(self, ctx):
|
||||||
'''
|
"""
|
||||||
classes that inherit `Statement` must implement `evaluate`
|
classes that inherit `Statement` must implement `evaluate`
|
||||||
|
|
||||||
args:
|
args:
|
||||||
@@ -31,30 +32,30 @@ class Statement(object):
|
|||||||
|
|
||||||
returns:
|
returns:
|
||||||
Result
|
Result
|
||||||
'''
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
def get_children(self):
|
def get_children(self):
|
||||||
if hasattr(self, 'child'):
|
if hasattr(self, "child"):
|
||||||
yield self.child
|
yield self.child
|
||||||
|
|
||||||
if hasattr(self, 'children'):
|
if hasattr(self, "children"):
|
||||||
for child in self.children:
|
for child in self.children:
|
||||||
yield child
|
yield child
|
||||||
|
|
||||||
def replace_child(self, existing, new):
|
def replace_child(self, existing, new):
|
||||||
if hasattr(self, 'child'):
|
if hasattr(self, "child"):
|
||||||
if self.child is existing:
|
if self.child is existing:
|
||||||
self.child = new
|
self.child = new
|
||||||
|
|
||||||
if hasattr(self, 'children'):
|
if hasattr(self, "children"):
|
||||||
for i, child in enumerate(self.children):
|
for i, child in enumerate(self.children):
|
||||||
if child is existing:
|
if child is existing:
|
||||||
self.children[i] = new
|
self.children[i] = new
|
||||||
|
|
||||||
|
|
||||||
class Result(object):
|
class Result(object):
|
||||||
'''
|
"""
|
||||||
represents the results of an evaluation of statements against features.
|
represents the results of an evaluation of statements against features.
|
||||||
|
|
||||||
instances of this class should behave like a bool,
|
instances of this class should behave like a bool,
|
||||||
@@ -65,15 +66,16 @@ class Result(object):
|
|||||||
as well as the children Result instances.
|
as well as the children Result instances.
|
||||||
|
|
||||||
we need this so that we can render the tree of expressions and their results.
|
we need this so that we can render the tree of expressions and their results.
|
||||||
'''
|
"""
|
||||||
|
|
||||||
def __init__(self, success, statement, children, locations=None):
|
def __init__(self, success, statement, children, locations=None):
|
||||||
'''
|
"""
|
||||||
args:
|
args:
|
||||||
success (bool)
|
success (bool)
|
||||||
statement (capa.engine.Statement or capa.features.Feature)
|
statement (capa.engine.Statement or capa.features.Feature)
|
||||||
children (list[Result])
|
children (list[Result])
|
||||||
locations (iterable[VA])
|
locations (iterable[VA])
|
||||||
'''
|
"""
|
||||||
super(Result, self).__init__()
|
super(Result, self).__init__()
|
||||||
self.success = success
|
self.success = success
|
||||||
self.statement = statement
|
self.statement = statement
|
||||||
@@ -93,7 +95,8 @@ class Result(object):
|
|||||||
|
|
||||||
|
|
||||||
class And(Statement):
|
class And(Statement):
|
||||||
'''match if all of the children evaluate to True.'''
|
"""match if all of the children evaluate to True."""
|
||||||
|
|
||||||
def __init__(self, *children):
|
def __init__(self, *children):
|
||||||
super(And, self).__init__()
|
super(And, self).__init__()
|
||||||
self.children = list(children)
|
self.children = list(children)
|
||||||
@@ -105,7 +108,8 @@ class And(Statement):
|
|||||||
|
|
||||||
|
|
||||||
class Or(Statement):
|
class Or(Statement):
|
||||||
'''match if any of the children evaluate to True.'''
|
"""match if any of the children evaluate to True."""
|
||||||
|
|
||||||
def __init__(self, *children):
|
def __init__(self, *children):
|
||||||
super(Or, self).__init__()
|
super(Or, self).__init__()
|
||||||
self.children = list(children)
|
self.children = list(children)
|
||||||
@@ -117,7 +121,8 @@ class Or(Statement):
|
|||||||
|
|
||||||
|
|
||||||
class Not(Statement):
|
class Not(Statement):
|
||||||
'''match only if the child evaluates to False.'''
|
"""match only if the child evaluates to False."""
|
||||||
|
|
||||||
def __init__(self, child):
|
def __init__(self, child):
|
||||||
super(Not, self).__init__()
|
super(Not, self).__init__()
|
||||||
self.child = child
|
self.child = child
|
||||||
@@ -129,7 +134,8 @@ class Not(Statement):
|
|||||||
|
|
||||||
|
|
||||||
class Some(Statement):
|
class Some(Statement):
|
||||||
'''match if at least N of the children evaluate to True.'''
|
"""match if at least N of the children evaluate to True."""
|
||||||
|
|
||||||
def __init__(self, count, *children):
|
def __init__(self, count, *children):
|
||||||
super(Some, self).__init__()
|
super(Some, self).__init__()
|
||||||
self.count = count
|
self.count = count
|
||||||
@@ -146,7 +152,8 @@ class Some(Statement):
|
|||||||
|
|
||||||
|
|
||||||
class Range(Statement):
|
class Range(Statement):
|
||||||
'''match if the child is contained in the ctx set with a count in the given range.'''
|
"""match if the child is contained in the ctx set with a count in the given range."""
|
||||||
|
|
||||||
def __init__(self, child, min=None, max=None):
|
def __init__(self, child, min=None, max=None):
|
||||||
super(Range, self).__init__()
|
super(Range, self).__init__()
|
||||||
self.child = child
|
self.child = child
|
||||||
@@ -162,27 +169,28 @@ class Range(Statement):
|
|||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
if self.max == (1 << 64 - 1):
|
if self.max == (1 << 64 - 1):
|
||||||
return 'range(%s, min=%d, max=infinity)' % (str(self.child), self.min)
|
return "range(%s, min=%d, max=infinity)" % (str(self.child), self.min)
|
||||||
else:
|
else:
|
||||||
return 'range(%s, min=%d, max=%d)' % (str(self.child), self.min, self.max)
|
return "range(%s, min=%d, max=%d)" % (str(self.child), self.min, self.max)
|
||||||
|
|
||||||
|
|
||||||
class Regex(Statement):
|
class Regex(Statement):
|
||||||
'''match if the given pattern matches a String feature.'''
|
"""match if the given pattern matches a String feature."""
|
||||||
|
|
||||||
def __init__(self, pattern):
|
def __init__(self, pattern):
|
||||||
super(Regex, self).__init__()
|
super(Regex, self).__init__()
|
||||||
self.pattern = pattern
|
self.pattern = pattern
|
||||||
pat = self.pattern[len('/'):-len('/')]
|
pat = self.pattern[len("/") : -len("/")]
|
||||||
flags = re.DOTALL
|
flags = re.DOTALL
|
||||||
if pattern.endswith('/i'):
|
if pattern.endswith("/i"):
|
||||||
pat = self.pattern[len('/'):-len('/i')]
|
pat = self.pattern[len("/") : -len("/i")]
|
||||||
flags |= re.IGNORECASE
|
flags |= re.IGNORECASE
|
||||||
self.re = re.compile(pat, flags)
|
self.re = re.compile(pat, flags)
|
||||||
self.match = ''
|
self.match = ""
|
||||||
|
|
||||||
def evaluate(self, ctx):
|
def evaluate(self, ctx):
|
||||||
for feature, locations in ctx.items():
|
for feature, locations in ctx.items():
|
||||||
if not isinstance(feature, (capa.features.String, )):
|
if not isinstance(feature, (capa.features.String,)):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# `re.search` finds a match anywhere in the given string
|
# `re.search` finds a match anywhere in the given string
|
||||||
@@ -200,27 +208,28 @@ class Regex(Statement):
|
|||||||
|
|
||||||
|
|
||||||
class Subscope(Statement):
|
class Subscope(Statement):
|
||||||
'''
|
"""
|
||||||
a subscope element is a placeholder in a rule - it should not be evaluated directly.
|
a subscope element is a placeholder in a rule - it should not be evaluated directly.
|
||||||
the engine should preprocess rules to extract subscope statements into their own rules.
|
the engine should preprocess rules to extract subscope statements into their own rules.
|
||||||
'''
|
"""
|
||||||
|
|
||||||
def __init__(self, scope, child):
|
def __init__(self, scope, child):
|
||||||
super(Subscope, self).__init__()
|
super(Subscope, self).__init__()
|
||||||
self.scope = scope
|
self.scope = scope
|
||||||
self.child = child
|
self.child = child
|
||||||
|
|
||||||
def evaluate(self, ctx):
|
def evaluate(self, ctx):
|
||||||
raise ValueError('cannot evaluate a subscope directly!')
|
raise ValueError("cannot evaluate a subscope directly!")
|
||||||
|
|
||||||
|
|
||||||
def topologically_order_rules(rules):
|
def topologically_order_rules(rules):
|
||||||
'''
|
"""
|
||||||
order the given rules such that dependencies show up before dependents.
|
order the given rules such that dependencies show up before dependents.
|
||||||
this means that as we match rules, we can add features for the matches, and these
|
this means that as we match rules, we can add features for the matches, and these
|
||||||
will be matched by subsequent rules if they follow this order.
|
will be matched by subsequent rules if they follow this order.
|
||||||
|
|
||||||
assumes that the rule dependency graph is a DAG.
|
assumes that the rule dependency graph is a DAG.
|
||||||
'''
|
"""
|
||||||
# we evaluate `rules` multiple times, so if its a generator, realize it into a list.
|
# we evaluate `rules` multiple times, so if its a generator, realize it into a list.
|
||||||
rules = list(rules)
|
rules = list(rules)
|
||||||
namespaces = capa.rules.index_rules_by_namespace(rules)
|
namespaces = capa.rules.index_rules_by_namespace(rules)
|
||||||
@@ -245,7 +254,7 @@ def topologically_order_rules(rules):
|
|||||||
|
|
||||||
|
|
||||||
def match(rules, features, va):
|
def match(rules, features, va):
|
||||||
'''
|
"""
|
||||||
Args:
|
Args:
|
||||||
rules (List[capa.rules.Rule]): these must already be ordered topologically by dependency.
|
rules (List[capa.rules.Rule]): these must already be ordered topologically by dependency.
|
||||||
features (Mapping[capa.features.Feature, int]):
|
features (Mapping[capa.features.Feature, int]):
|
||||||
@@ -255,7 +264,7 @@ def match(rules, features, va):
|
|||||||
Tuple[List[capa.features.Feature], Dict[str, Tuple[int, capa.engine.Result]]]: two-tuple with entries:
|
Tuple[List[capa.features.Feature], Dict[str, Tuple[int, capa.engine.Result]]]: two-tuple with entries:
|
||||||
- list of features used for matching (which may be greater than argument, due to rule match features), and
|
- list of features used for matching (which may be greater than argument, due to rule match features), and
|
||||||
- mapping from rule name to (location of match, result object)
|
- mapping from rule name to (location of match, result object)
|
||||||
'''
|
"""
|
||||||
results = collections.defaultdict(list)
|
results = collections.defaultdict(list)
|
||||||
|
|
||||||
# copy features so that we can modify it
|
# copy features so that we can modify it
|
||||||
@@ -270,10 +279,10 @@ def match(rules, features, va):
|
|||||||
results[rule.name].append((va, res))
|
results[rule.name].append((va, res))
|
||||||
features[capa.features.MatchedRule(rule.name)].add(va)
|
features[capa.features.MatchedRule(rule.name)].add(va)
|
||||||
|
|
||||||
namespace = rule.meta.get('namespace')
|
namespace = rule.meta.get("namespace")
|
||||||
if namespace:
|
if namespace:
|
||||||
while namespace:
|
while namespace:
|
||||||
features[capa.features.MatchedRule(namespace)].add(va)
|
features[capa.features.MatchedRule(namespace)].add(va)
|
||||||
namespace, _, _ = namespace.rpartition('/')
|
namespace, _, _ = namespace.rpartition("/")
|
||||||
|
|
||||||
return (features, results)
|
return (features, results)
|
||||||
|
|||||||
@@ -11,9 +11,9 @@ MAX_BYTES_FEATURE_SIZE = 0x100
|
|||||||
|
|
||||||
def bytes_to_str(b):
|
def bytes_to_str(b):
|
||||||
if sys.version_info[0] >= 3:
|
if sys.version_info[0] >= 3:
|
||||||
return str(codecs.encode(b, 'hex').decode('utf-8'))
|
return str(codecs.encode(b, "hex").decode("utf-8"))
|
||||||
else:
|
else:
|
||||||
return codecs.encode(b, 'hex')
|
return codecs.encode(b, "hex")
|
||||||
|
|
||||||
|
|
||||||
class Feature(object):
|
class Feature(object):
|
||||||
@@ -29,7 +29,7 @@ class Feature(object):
|
|||||||
return self.name == other.name and self.args == other.args
|
return self.name == other.name and self.args == other.args
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return '%s(%s)' % (self.name.lower(), ','.join(self.args))
|
return "%s(%s)" % (self.name.lower(), ",".join(self.args))
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return str(self)
|
return str(self)
|
||||||
@@ -41,8 +41,7 @@ class Feature(object):
|
|||||||
return self.__dict__
|
return self.__dict__
|
||||||
|
|
||||||
def freeze_serialize(self):
|
def freeze_serialize(self):
|
||||||
return (self.__class__.__name__,
|
return (self.__class__.__name__, self.args)
|
||||||
self.args)
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def freeze_deserialize(cls, args):
|
def freeze_deserialize(cls, args):
|
||||||
@@ -55,30 +54,30 @@ class MatchedRule(Feature):
|
|||||||
self.rule_name = rule_name
|
self.rule_name = rule_name
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return 'match(%s)' % (self.rule_name)
|
return "match(%s)" % (self.rule_name)
|
||||||
|
|
||||||
|
|
||||||
class Characteristic(Feature):
|
class Characteristic(Feature):
|
||||||
def __init__(self, name, value=None):
|
def __init__(self, name, value=None):
|
||||||
'''
|
"""
|
||||||
when `value` is not provided, this serves as descriptor for a class of characteristics.
|
when `value` is not provided, this serves as descriptor for a class of characteristics.
|
||||||
this is only used internally, such as in `rules.py` when checking if a statement is
|
this is only used internally, such as in `rules.py` when checking if a statement is
|
||||||
supported by a given scope.
|
supported by a given scope.
|
||||||
'''
|
"""
|
||||||
super(Characteristic, self).__init__([name, value])
|
super(Characteristic, self).__init__([name, value])
|
||||||
self.name = name
|
self.name = name
|
||||||
self.value = value
|
self.value = value
|
||||||
|
|
||||||
def evaluate(self, ctx):
|
def evaluate(self, ctx):
|
||||||
if self.value is None:
|
if self.value is None:
|
||||||
raise ValueError('cannot evaluate characteristc %s with empty value' % (str(self)))
|
raise ValueError("cannot evaluate characteristc %s with empty value" % (str(self)))
|
||||||
return super(Characteristic, self).evaluate(ctx)
|
return super(Characteristic, self).evaluate(ctx)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
if self.value is None:
|
if self.value is None:
|
||||||
return 'characteristic(%s)' % (self.name)
|
return "characteristic(%s)" % (self.name)
|
||||||
else:
|
else:
|
||||||
return 'characteristic(%s(%s))' % (self.name, self.value)
|
return "characteristic(%s(%s))" % (self.name, self.value)
|
||||||
|
|
||||||
|
|
||||||
class String(Feature):
|
class String(Feature):
|
||||||
@@ -98,7 +97,7 @@ class Bytes(Feature):
|
|||||||
|
|
||||||
def evaluate(self, ctx):
|
def evaluate(self, ctx):
|
||||||
for feature, locations in ctx.items():
|
for feature, locations in ctx.items():
|
||||||
if not isinstance(feature, (capa.features.Bytes, )):
|
if not isinstance(feature, (capa.features.Bytes,)):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if feature.value.startswith(self.value):
|
if feature.value.startswith(self.value):
|
||||||
@@ -108,14 +107,13 @@ class Bytes(Feature):
|
|||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
if self.symbol:
|
if self.symbol:
|
||||||
return 'bytes(0x%s = %s)' % (bytes_to_str(self.value).upper(), self.symbol)
|
return "bytes(0x%s = %s)" % (bytes_to_str(self.value).upper(), self.symbol)
|
||||||
else:
|
else:
|
||||||
return 'bytes(0x%s)' % (bytes_to_str(self.value).upper())
|
return "bytes(0x%s)" % (bytes_to_str(self.value).upper())
|
||||||
|
|
||||||
def freeze_serialize(self):
|
def freeze_serialize(self):
|
||||||
return (self.__class__.__name__,
|
return (self.__class__.__name__, [bytes_to_str(x).upper() for x in self.args])
|
||||||
[bytes_to_str(x).upper() for x in self.args])
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def freeze_deserialize(cls, args):
|
def freeze_deserialize(cls, args):
|
||||||
return cls(*[codecs.decode(x, 'hex') for x in args])
|
return cls(*[codecs.decode(x, "hex") for x in args])
|
||||||
|
|||||||
@@ -6,4 +6,4 @@ class BasicBlock(Feature):
|
|||||||
super(BasicBlock, self).__init__([])
|
super(BasicBlock, self).__init__([])
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return 'basic block'
|
return "basic block"
|
||||||
|
|||||||
@@ -10,11 +10,11 @@ try:
|
|||||||
except (ImportError, SyntaxError):
|
except (ImportError, SyntaxError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
__all__ = ['ida', 'viv']
|
__all__ = ["ida", "viv"]
|
||||||
|
|
||||||
|
|
||||||
class FeatureExtractor(object):
|
class FeatureExtractor(object):
|
||||||
'''
|
"""
|
||||||
FeatureExtractor defines the interface for fetching features from a sample.
|
FeatureExtractor defines the interface for fetching features from a sample.
|
||||||
|
|
||||||
There may be multiple backends that support fetching features for capa.
|
There may be multiple backends that support fetching features for capa.
|
||||||
@@ -27,7 +27,8 @@ class FeatureExtractor(object):
|
|||||||
Also, this provides a way to hook in an IDA backend.
|
Also, this provides a way to hook in an IDA backend.
|
||||||
|
|
||||||
This class is not instantiated directly; it is the base class for other implementations.
|
This class is not instantiated directly; it is the base class for other implementations.
|
||||||
'''
|
"""
|
||||||
|
|
||||||
__metaclass__ = abc.ABCMeta
|
__metaclass__ = abc.ABCMeta
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@@ -40,7 +41,7 @@ class FeatureExtractor(object):
|
|||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_file_features(self):
|
def extract_file_features(self):
|
||||||
'''
|
"""
|
||||||
extract file-scope features.
|
extract file-scope features.
|
||||||
|
|
||||||
example::
|
example::
|
||||||
@@ -51,12 +52,12 @@ class FeatureExtractor(object):
|
|||||||
|
|
||||||
yields:
|
yields:
|
||||||
Tuple[capa.features.Feature, int]: feature and its location
|
Tuple[capa.features.Feature, int]: feature and its location
|
||||||
'''
|
"""
|
||||||
raise NotImplemented
|
raise NotImplemented
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def get_functions(self):
|
def get_functions(self):
|
||||||
'''
|
"""
|
||||||
enumerate the functions and provide opaque values that will
|
enumerate the functions and provide opaque values that will
|
||||||
subsequently be provided to `.extract_function_features()`, etc.
|
subsequently be provided to `.extract_function_features()`, etc.
|
||||||
|
|
||||||
@@ -67,12 +68,12 @@ class FeatureExtractor(object):
|
|||||||
|
|
||||||
yields:
|
yields:
|
||||||
any: the opaque function value.
|
any: the opaque function value.
|
||||||
'''
|
"""
|
||||||
raise NotImplemented
|
raise NotImplemented
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_function_features(self, f):
|
def extract_function_features(self, f):
|
||||||
'''
|
"""
|
||||||
extract function-scope features.
|
extract function-scope features.
|
||||||
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
||||||
|
|
||||||
@@ -88,12 +89,12 @@ class FeatureExtractor(object):
|
|||||||
|
|
||||||
yields:
|
yields:
|
||||||
Tuple[capa.features.Feature, int]: feature and its location
|
Tuple[capa.features.Feature, int]: feature and its location
|
||||||
'''
|
"""
|
||||||
raise NotImplemented
|
raise NotImplemented
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def get_basic_blocks(self, f):
|
def get_basic_blocks(self, f):
|
||||||
'''
|
"""
|
||||||
enumerate the basic blocks in the given function and provide opaque values that will
|
enumerate the basic blocks in the given function and provide opaque values that will
|
||||||
subsequently be provided to `.extract_basic_block_features()`, etc.
|
subsequently be provided to `.extract_basic_block_features()`, etc.
|
||||||
|
|
||||||
@@ -104,12 +105,12 @@ class FeatureExtractor(object):
|
|||||||
|
|
||||||
yields:
|
yields:
|
||||||
any: the opaque basic block value.
|
any: the opaque basic block value.
|
||||||
'''
|
"""
|
||||||
raise NotImplemented
|
raise NotImplemented
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_basic_block_features(self, f, bb):
|
def extract_basic_block_features(self, f, bb):
|
||||||
'''
|
"""
|
||||||
extract basic block-scope features.
|
extract basic block-scope features.
|
||||||
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
||||||
|
|
||||||
@@ -127,12 +128,12 @@ class FeatureExtractor(object):
|
|||||||
|
|
||||||
yields:
|
yields:
|
||||||
Tuple[capa.features.Feature, int]: feature and its location
|
Tuple[capa.features.Feature, int]: feature and its location
|
||||||
'''
|
"""
|
||||||
raise NotImplemented
|
raise NotImplemented
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def get_instructions(self, f, bb):
|
def get_instructions(self, f, bb):
|
||||||
'''
|
"""
|
||||||
enumerate the instructions in the given basic block and provide opaque values that will
|
enumerate the instructions in the given basic block and provide opaque values that will
|
||||||
subsequently be provided to `.extract_insn_features()`, etc.
|
subsequently be provided to `.extract_insn_features()`, etc.
|
||||||
|
|
||||||
@@ -143,12 +144,12 @@ class FeatureExtractor(object):
|
|||||||
|
|
||||||
yields:
|
yields:
|
||||||
any: the opaque function value.
|
any: the opaque function value.
|
||||||
'''
|
"""
|
||||||
raise NotImplemented
|
raise NotImplemented
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_insn_features(self, f, bb, insn):
|
def extract_insn_features(self, f, bb, insn):
|
||||||
'''
|
"""
|
||||||
extract instruction-scope features.
|
extract instruction-scope features.
|
||||||
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
||||||
|
|
||||||
@@ -168,12 +169,12 @@ class FeatureExtractor(object):
|
|||||||
|
|
||||||
yields:
|
yields:
|
||||||
Tuple[capa.features.Feature, int]: feature and its location
|
Tuple[capa.features.Feature, int]: feature and its location
|
||||||
'''
|
"""
|
||||||
raise NotImplemented
|
raise NotImplemented
|
||||||
|
|
||||||
|
|
||||||
class NullFeatureExtractor(FeatureExtractor):
|
class NullFeatureExtractor(FeatureExtractor):
|
||||||
'''
|
"""
|
||||||
An extractor that extracts some user-provided features.
|
An extractor that extracts some user-provided features.
|
||||||
The structure of the single parameter is demonstrated in the example below.
|
The structure of the single parameter is demonstrated in the example below.
|
||||||
|
|
||||||
@@ -211,64 +212,66 @@ class NullFeatureExtractor(FeatureExtractor):
|
|||||||
0x40200: ...
|
0x40200: ...
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
'''
|
"""
|
||||||
|
|
||||||
def __init__(self, features):
|
def __init__(self, features):
|
||||||
super(NullFeatureExtractor, self).__init__()
|
super(NullFeatureExtractor, self).__init__()
|
||||||
self.features = features
|
self.features = features
|
||||||
|
|
||||||
def extract_file_features(self):
|
def extract_file_features(self):
|
||||||
for p in self.features.get('file features', []):
|
for p in self.features.get("file features", []):
|
||||||
va, feature = p
|
va, feature = p
|
||||||
yield feature, va
|
yield feature, va
|
||||||
|
|
||||||
def get_functions(self):
|
def get_functions(self):
|
||||||
for va in sorted(self.features['functions'].keys()):
|
for va in sorted(self.features["functions"].keys()):
|
||||||
yield va
|
yield va
|
||||||
|
|
||||||
def extract_function_features(self, f):
|
def extract_function_features(self, f):
|
||||||
for p in (self.features # noqa: E127 line over-indented
|
for p in self.features.get("functions", {}).get(f, {}).get("features", []): # noqa: E127 line over-indented
|
||||||
.get('functions', {})
|
|
||||||
.get(f, {})
|
|
||||||
.get('features', [])):
|
|
||||||
va, feature = p
|
va, feature = p
|
||||||
yield feature, va
|
yield feature, va
|
||||||
|
|
||||||
def get_basic_blocks(self, f):
|
def get_basic_blocks(self, f):
|
||||||
for va in sorted(self.features # noqa: E127 line over-indented
|
for va in sorted(
|
||||||
.get('functions', {})
|
self.features.get("functions", {}) # noqa: E127 line over-indented
|
||||||
.get(f, {})
|
.get(f, {})
|
||||||
.get('basic blocks', {})
|
.get("basic blocks", {})
|
||||||
.keys()):
|
.keys()
|
||||||
|
):
|
||||||
yield va
|
yield va
|
||||||
|
|
||||||
def extract_basic_block_features(self, f, bb):
|
def extract_basic_block_features(self, f, bb):
|
||||||
for p in (self.features # noqa: E127 line over-indented
|
for p in (
|
||||||
.get('functions', {})
|
self.features.get("functions", {}) # noqa: E127 line over-indented
|
||||||
.get(f, {})
|
.get(f, {})
|
||||||
.get('basic blocks', {})
|
.get("basic blocks", {})
|
||||||
.get(bb, {})
|
.get(bb, {})
|
||||||
.get('features', [])):
|
.get("features", [])
|
||||||
|
):
|
||||||
va, feature = p
|
va, feature = p
|
||||||
yield feature, va
|
yield feature, va
|
||||||
|
|
||||||
def get_instructions(self, f, bb):
|
def get_instructions(self, f, bb):
|
||||||
for va in sorted(self.features # noqa: E127 line over-indented
|
for va in sorted(
|
||||||
.get('functions', {})
|
self.features.get("functions", {}) # noqa: E127 line over-indented
|
||||||
.get(f, {})
|
.get(f, {})
|
||||||
.get('basic blocks', {})
|
.get("basic blocks", {})
|
||||||
.get(bb, {})
|
.get(bb, {})
|
||||||
.get('instructions', {})
|
.get("instructions", {})
|
||||||
.keys()):
|
.keys()
|
||||||
|
):
|
||||||
yield va
|
yield va
|
||||||
|
|
||||||
def extract_insn_features(self, f, bb, insn):
|
def extract_insn_features(self, f, bb, insn):
|
||||||
for p in (self.features # noqa: E127 line over-indented
|
for p in (
|
||||||
.get('functions', {})
|
self.features.get("functions", {}) # noqa: E127 line over-indented
|
||||||
.get(f, {})
|
.get(f, {})
|
||||||
.get('basic blocks', {})
|
.get("basic blocks", {})
|
||||||
.get(bb, {})
|
.get(bb, {})
|
||||||
.get('instructions', {})
|
.get("instructions", {})
|
||||||
.get(insn, {})
|
.get(insn, {})
|
||||||
.get('features', [])):
|
.get("features", [])
|
||||||
|
):
|
||||||
va, feature = p
|
va, feature = p
|
||||||
yield feature, va
|
yield feature, va
|
||||||
|
|||||||
@@ -10,27 +10,27 @@ def xor_static(data, i):
|
|||||||
if sys.version_info >= (3, 0):
|
if sys.version_info >= (3, 0):
|
||||||
return bytes(c ^ i for c in data)
|
return bytes(c ^ i for c in data)
|
||||||
else:
|
else:
|
||||||
return ''.join(chr(ord(c) ^ i) for c in data)
|
return "".join(chr(ord(c) ^ i) for c in data)
|
||||||
|
|
||||||
|
|
||||||
def is_aw_function(function_name):
|
def is_aw_function(function_name):
|
||||||
'''
|
"""
|
||||||
is the given function name an A/W function?
|
is the given function name an A/W function?
|
||||||
these are variants of functions that, on Windows, accept either a narrow or wide string.
|
these are variants of functions that, on Windows, accept either a narrow or wide string.
|
||||||
'''
|
"""
|
||||||
if len(function_name) < 2:
|
if len(function_name) < 2:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# last character should be 'A' or 'W'
|
# last character should be 'A' or 'W'
|
||||||
if function_name[-1] not in ('A', 'W'):
|
if function_name[-1] not in ("A", "W"):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# second to last character should be lowercase letter
|
# second to last character should be lowercase letter
|
||||||
return 'a' <= function_name[-2] <= 'z' or '0' <= function_name[-2] <= '9'
|
return "a" <= function_name[-2] <= "z" or "0" <= function_name[-2] <= "9"
|
||||||
|
|
||||||
|
|
||||||
def generate_api_features(apiname, va):
|
def generate_api_features(apiname, va):
|
||||||
'''
|
"""
|
||||||
for a given function name and address, generate API names.
|
for a given function name and address, generate API names.
|
||||||
we over-generate features to make matching easier.
|
we over-generate features to make matching easier.
|
||||||
these include:
|
these include:
|
||||||
@@ -38,7 +38,7 @@ def generate_api_features(apiname, va):
|
|||||||
- kernel32.CreateFile
|
- kernel32.CreateFile
|
||||||
- CreateFileA
|
- CreateFileA
|
||||||
- CreateFile
|
- CreateFile
|
||||||
'''
|
"""
|
||||||
# (kernel32.CreateFileA, 0x401000)
|
# (kernel32.CreateFileA, 0x401000)
|
||||||
yield API(apiname), va
|
yield API(apiname), va
|
||||||
|
|
||||||
@@ -46,8 +46,8 @@ def generate_api_features(apiname, va):
|
|||||||
# (kernel32.CreateFile, 0x401000)
|
# (kernel32.CreateFile, 0x401000)
|
||||||
yield API(apiname[:-1]), va
|
yield API(apiname[:-1]), va
|
||||||
|
|
||||||
if '.' in apiname:
|
if "." in apiname:
|
||||||
modname, impname = apiname.split('.')
|
modname, impname = apiname.split(".")
|
||||||
# strip modname to support importname-only matching
|
# strip modname to support importname-only matching
|
||||||
# (CreateFileA, 0x401000)
|
# (CreateFileA, 0x401000)
|
||||||
yield API(impname), va
|
yield API(impname), va
|
||||||
|
|||||||
@@ -26,17 +26,17 @@ def get_va(self):
|
|||||||
|
|
||||||
|
|
||||||
def add_va_int_cast(o):
|
def add_va_int_cast(o):
|
||||||
'''
|
"""
|
||||||
dynamically add a cast-to-int (`__int__`) method to the given object
|
dynamically add a cast-to-int (`__int__`) method to the given object
|
||||||
that returns the value of the `.va` property.
|
that returns the value of the `.va` property.
|
||||||
this bit of skullduggery lets use cast viv-utils objects as ints.
|
this bit of skullduggery lets use cast viv-utils objects as ints.
|
||||||
the correct way of doing this is to update viv-utils (or subclass the objects here).
|
the correct way of doing this is to update viv-utils (or subclass the objects here).
|
||||||
'''
|
"""
|
||||||
|
|
||||||
if sys.version_info >= (3, 0):
|
if sys.version_info >= (3, 0):
|
||||||
setattr(o, '__int__', types.MethodType(get_va, o))
|
setattr(o, "__int__", types.MethodType(get_va, o))
|
||||||
else:
|
else:
|
||||||
setattr(o, '__int__', types.MethodType(get_va, o, type(o)))
|
setattr(o, "__int__", types.MethodType(get_va, o, type(o)))
|
||||||
return o
|
return o
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -15,23 +15,23 @@ from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
|
|||||||
|
|
||||||
|
|
||||||
def _ida_get_printable_len(op):
|
def _ida_get_printable_len(op):
|
||||||
''' Return string length if all operand bytes are ascii or utf16-le printable
|
""" Return string length if all operand bytes are ascii or utf16-le printable
|
||||||
|
|
||||||
args:
|
args:
|
||||||
op (IDA op_t)
|
op (IDA op_t)
|
||||||
'''
|
"""
|
||||||
op_val = helpers.mask_op_val(op)
|
op_val = helpers.mask_op_val(op)
|
||||||
|
|
||||||
if op.dtype == idaapi.dt_byte:
|
if op.dtype == idaapi.dt_byte:
|
||||||
chars = struct.pack('<B', op_val)
|
chars = struct.pack("<B", op_val)
|
||||||
elif op.dtype == idaapi.dt_word:
|
elif op.dtype == idaapi.dt_word:
|
||||||
chars = struct.pack('<H', op_val)
|
chars = struct.pack("<H", op_val)
|
||||||
elif op.dtype == idaapi.dt_dword:
|
elif op.dtype == idaapi.dt_dword:
|
||||||
chars = struct.pack('<I', op_val)
|
chars = struct.pack("<I", op_val)
|
||||||
elif op.dtype == idaapi.dt_qword:
|
elif op.dtype == idaapi.dt_qword:
|
||||||
chars = struct.pack('<Q', op_val)
|
chars = struct.pack("<Q", op_val)
|
||||||
else:
|
else:
|
||||||
raise ValueError('Unhandled operand data type 0x%x.' % op.dtype)
|
raise ValueError("Unhandled operand data type 0x%x." % op.dtype)
|
||||||
|
|
||||||
def _is_printable_ascii(chars):
|
def _is_printable_ascii(chars):
|
||||||
if sys.version_info >= (3, 0):
|
if sys.version_info >= (3, 0):
|
||||||
@@ -44,7 +44,7 @@ def _ida_get_printable_len(op):
|
|||||||
if all(c == 0x00 for c in chars[1::2]):
|
if all(c == 0x00 for c in chars[1::2]):
|
||||||
return _is_printable_ascii(chars[::2])
|
return _is_printable_ascii(chars[::2])
|
||||||
else:
|
else:
|
||||||
if all(c == '\x00' for c in chars[1::2]):
|
if all(c == "\x00" for c in chars[1::2]):
|
||||||
return _is_printable_ascii(chars[::2])
|
return _is_printable_ascii(chars[::2])
|
||||||
|
|
||||||
if _is_printable_ascii(chars):
|
if _is_printable_ascii(chars):
|
||||||
@@ -57,32 +57,32 @@ def _ida_get_printable_len(op):
|
|||||||
|
|
||||||
|
|
||||||
def _is_mov_imm_to_stack(insn):
|
def _is_mov_imm_to_stack(insn):
|
||||||
''' verify instruction moves immediate onto stack
|
""" verify instruction moves immediate onto stack
|
||||||
|
|
||||||
args:
|
args:
|
||||||
insn (IDA insn_t)
|
insn (IDA insn_t)
|
||||||
'''
|
"""
|
||||||
if insn.Op2.type != idaapi.o_imm:
|
if insn.Op2.type != idaapi.o_imm:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if not helpers.is_op_stack_var(insn.ea, 0):
|
if not helpers.is_op_stack_var(insn.ea, 0):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if not insn.get_canon_mnem().startswith('mov'):
|
if not insn.get_canon_mnem().startswith("mov"):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def _ida_bb_contains_stackstring(f, bb):
|
def _ida_bb_contains_stackstring(f, bb):
|
||||||
''' check basic block for stackstring indicators
|
""" check basic block for stackstring indicators
|
||||||
|
|
||||||
true if basic block contains enough moves of constant bytes to the stack
|
true if basic block contains enough moves of constant bytes to the stack
|
||||||
|
|
||||||
args:
|
args:
|
||||||
f (IDA func_t)
|
f (IDA func_t)
|
||||||
bb (IDA BasicBlock)
|
bb (IDA BasicBlock)
|
||||||
'''
|
"""
|
||||||
count = 0
|
count = 0
|
||||||
|
|
||||||
for insn in helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
|
for insn in helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
|
||||||
@@ -96,25 +96,25 @@ def _ida_bb_contains_stackstring(f, bb):
|
|||||||
|
|
||||||
|
|
||||||
def extract_bb_stackstring(f, bb):
|
def extract_bb_stackstring(f, bb):
|
||||||
''' extract stackstring indicators from basic block
|
""" extract stackstring indicators from basic block
|
||||||
|
|
||||||
args:
|
args:
|
||||||
f (IDA func_t)
|
f (IDA func_t)
|
||||||
bb (IDA BasicBlock)
|
bb (IDA BasicBlock)
|
||||||
'''
|
"""
|
||||||
if _ida_bb_contains_stackstring(f, bb):
|
if _ida_bb_contains_stackstring(f, bb):
|
||||||
yield Characteristic('stack string', True), bb.start_ea
|
yield Characteristic("stack string", True), bb.start_ea
|
||||||
|
|
||||||
|
|
||||||
def _ida_bb_contains_tight_loop(f, bb):
|
def _ida_bb_contains_tight_loop(f, bb):
|
||||||
''' check basic block for stackstring indicators
|
""" check basic block for stackstring indicators
|
||||||
|
|
||||||
true if last instruction in basic block branches to basic block start
|
true if last instruction in basic block branches to basic block start
|
||||||
|
|
||||||
args:
|
args:
|
||||||
f (IDA func_t)
|
f (IDA func_t)
|
||||||
bb (IDA BasicBlock)
|
bb (IDA BasicBlock)
|
||||||
'''
|
"""
|
||||||
bb_end = idc.prev_head(bb.end_ea)
|
bb_end = idc.prev_head(bb.end_ea)
|
||||||
|
|
||||||
if bb.start_ea < bb_end:
|
if bb.start_ea < bb_end:
|
||||||
@@ -126,23 +126,23 @@ def _ida_bb_contains_tight_loop(f, bb):
|
|||||||
|
|
||||||
|
|
||||||
def extract_bb_tight_loop(f, bb):
|
def extract_bb_tight_loop(f, bb):
|
||||||
''' extract tight loop indicators from a basic block
|
""" extract tight loop indicators from a basic block
|
||||||
|
|
||||||
args:
|
args:
|
||||||
f (IDA func_t)
|
f (IDA func_t)
|
||||||
bb (IDA BasicBlock)
|
bb (IDA BasicBlock)
|
||||||
'''
|
"""
|
||||||
if _ida_bb_contains_tight_loop(f, bb):
|
if _ida_bb_contains_tight_loop(f, bb):
|
||||||
yield Characteristic('tight loop', True), bb.start_ea
|
yield Characteristic("tight loop", True), bb.start_ea
|
||||||
|
|
||||||
|
|
||||||
def extract_features(f, bb):
|
def extract_features(f, bb):
|
||||||
''' extract basic block features
|
""" extract basic block features
|
||||||
|
|
||||||
args:
|
args:
|
||||||
f (IDA func_t)
|
f (IDA func_t)
|
||||||
bb (IDA BasicBlock)
|
bb (IDA BasicBlock)
|
||||||
'''
|
"""
|
||||||
yield BasicBlock(), bb.start_ea
|
yield BasicBlock(), bb.start_ea
|
||||||
|
|
||||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||||
@@ -166,5 +166,5 @@ def main():
|
|||||||
pprint.pprint(features)
|
pprint.pprint(features)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
@@ -16,32 +16,39 @@ import capa.features.extractors.ida.helpers
|
|||||||
|
|
||||||
|
|
||||||
def _ida_check_segment_for_pe(seg):
|
def _ida_check_segment_for_pe(seg):
|
||||||
''' check segment for embedded PE
|
""" check segment for embedded PE
|
||||||
|
|
||||||
adapted for IDA from:
|
adapted for IDA from:
|
||||||
https://github.com/vivisect/vivisect/blob/7be4037b1cecc4551b397f840405a1fc606f9b53/PE/carve.py#L19
|
https://github.com/vivisect/vivisect/blob/7be4037b1cecc4551b397f840405a1fc606f9b53/PE/carve.py#L19
|
||||||
|
|
||||||
args:
|
args:
|
||||||
seg (IDA segment_t)
|
seg (IDA segment_t)
|
||||||
'''
|
"""
|
||||||
seg_max = seg.end_ea
|
seg_max = seg.end_ea
|
||||||
mz_xor = [(capa.features.extractors.helpers.xor_static(b'MZ', i),
|
mz_xor = [
|
||||||
capa.features.extractors.helpers.xor_static(b'PE', i),
|
(
|
||||||
i)
|
capa.features.extractors.helpers.xor_static(b"MZ", i),
|
||||||
for i in range(256)]
|
capa.features.extractors.helpers.xor_static(b"PE", i),
|
||||||
todo = [(capa.features.extractors.ida.helpers.find_byte_sequence(seg.start_ea, seg.end_ea, mzx), mzx, pex, i) for mzx, pex, i in mz_xor]
|
i,
|
||||||
|
)
|
||||||
|
for i in range(256)
|
||||||
|
]
|
||||||
|
todo = [
|
||||||
|
(capa.features.extractors.ida.helpers.find_byte_sequence(seg.start_ea, seg.end_ea, mzx), mzx, pex, i)
|
||||||
|
for mzx, pex, i in mz_xor
|
||||||
|
]
|
||||||
todo = [(off, mzx, pex, i) for (off, mzx, pex, i) in todo if off != idaapi.BADADDR]
|
todo = [(off, mzx, pex, i) for (off, mzx, pex, i) in todo if off != idaapi.BADADDR]
|
||||||
|
|
||||||
while len(todo):
|
while len(todo):
|
||||||
off, mzx, pex, i = todo.pop()
|
off, mzx, pex, i = todo.pop()
|
||||||
|
|
||||||
# The MZ header has one field we will check e_lfanew is at 0x3c
|
# The MZ header has one field we will check e_lfanew is at 0x3c
|
||||||
e_lfanew = off + 0x3c
|
e_lfanew = off + 0x3C
|
||||||
|
|
||||||
if seg_max < (e_lfanew + 4):
|
if seg_max < (e_lfanew + 4):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
newoff = struct.unpack('<I', capa.features.extractors.helpers.xor_static(idc.get_bytes(e_lfanew, 4), i))[0]
|
newoff = struct.unpack("<I", capa.features.extractors.helpers.xor_static(idc.get_bytes(e_lfanew, 4), i))[0]
|
||||||
|
|
||||||
peoff = off + newoff
|
peoff = off + newoff
|
||||||
if seg_max < (peoff + 2):
|
if seg_max < (peoff + 2):
|
||||||
@@ -56,29 +63,29 @@ def _ida_check_segment_for_pe(seg):
|
|||||||
|
|
||||||
|
|
||||||
def extract_file_embedded_pe():
|
def extract_file_embedded_pe():
|
||||||
''' extract embedded PE features
|
""" extract embedded PE features
|
||||||
|
|
||||||
IDA must load resource sections for this to be complete
|
IDA must load resource sections for this to be complete
|
||||||
- '-R' from console
|
- '-R' from console
|
||||||
- Check 'Load resource sections' when opening binary in IDA manually
|
- Check 'Load resource sections' when opening binary in IDA manually
|
||||||
'''
|
"""
|
||||||
for seg in capa.features.extractors.ida.helpers.get_segments():
|
for seg in capa.features.extractors.ida.helpers.get_segments():
|
||||||
if seg.is_header_segm():
|
if seg.is_header_segm():
|
||||||
# IDA may load header segments, skip if present
|
# IDA may load header segments, skip if present
|
||||||
continue
|
continue
|
||||||
|
|
||||||
for ea, _ in _ida_check_segment_for_pe(seg):
|
for ea, _ in _ida_check_segment_for_pe(seg):
|
||||||
yield Characteristic('embedded pe', True), ea
|
yield Characteristic("embedded pe", True), ea
|
||||||
|
|
||||||
|
|
||||||
def extract_file_export_names():
|
def extract_file_export_names():
|
||||||
''' extract function exports '''
|
""" extract function exports """
|
||||||
for _, _, ea, name in idautils.Entries():
|
for _, _, ea, name in idautils.Entries():
|
||||||
yield Export(name), ea
|
yield Export(name), ea
|
||||||
|
|
||||||
|
|
||||||
def extract_file_import_names():
|
def extract_file_import_names():
|
||||||
''' extract function imports
|
""" extract function imports
|
||||||
|
|
||||||
1. imports by ordinal:
|
1. imports by ordinal:
|
||||||
- modulename.#ordinal
|
- modulename.#ordinal
|
||||||
@@ -87,25 +94,25 @@ def extract_file_import_names():
|
|||||||
matching:
|
matching:
|
||||||
- modulename.importname
|
- modulename.importname
|
||||||
- importname
|
- importname
|
||||||
'''
|
"""
|
||||||
for ea, imp_info in capa.features.extractors.ida.helpers.get_file_imports().items():
|
for ea, imp_info in capa.features.extractors.ida.helpers.get_file_imports().items():
|
||||||
dllname, name, ordi = imp_info
|
dllname, name, ordi = imp_info
|
||||||
|
|
||||||
if name:
|
if name:
|
||||||
yield Import('%s.%s' % (dllname, name)), ea
|
yield Import("%s.%s" % (dllname, name)), ea
|
||||||
yield Import(name), ea
|
yield Import(name), ea
|
||||||
|
|
||||||
if ordi:
|
if ordi:
|
||||||
yield Import('%s.#%s' % (dllname, str(ordi))), ea
|
yield Import("%s.#%s" % (dllname, str(ordi))), ea
|
||||||
|
|
||||||
|
|
||||||
def extract_file_section_names():
|
def extract_file_section_names():
|
||||||
''' extract section names
|
""" extract section names
|
||||||
|
|
||||||
IDA must load resource sections for this to be complete
|
IDA must load resource sections for this to be complete
|
||||||
- '-R' from console
|
- '-R' from console
|
||||||
- Check 'Load resource sections' when opening binary in IDA manually
|
- Check 'Load resource sections' when opening binary in IDA manually
|
||||||
'''
|
"""
|
||||||
for seg in capa.features.extractors.ida.helpers.get_segments():
|
for seg in capa.features.extractors.ida.helpers.get_segments():
|
||||||
if seg.is_header_segm():
|
if seg.is_header_segm():
|
||||||
# IDA may load header segments, skip if present
|
# IDA may load header segments, skip if present
|
||||||
@@ -115,12 +122,12 @@ def extract_file_section_names():
|
|||||||
|
|
||||||
|
|
||||||
def extract_file_strings():
|
def extract_file_strings():
|
||||||
''' extract ASCII and UTF-16 LE strings
|
""" extract ASCII and UTF-16 LE strings
|
||||||
|
|
||||||
IDA must load resource sections for this to be complete
|
IDA must load resource sections for this to be complete
|
||||||
- '-R' from console
|
- '-R' from console
|
||||||
- Check 'Load resource sections' when opening binary in IDA manually
|
- Check 'Load resource sections' when opening binary in IDA manually
|
||||||
'''
|
"""
|
||||||
for seg in capa.features.extractors.ida.helpers.get_segments():
|
for seg in capa.features.extractors.ida.helpers.get_segments():
|
||||||
seg_buff = capa.features.extractors.ida.helpers.get_segment_buffer(seg)
|
seg_buff = capa.features.extractors.ida.helpers.get_segment_buffer(seg)
|
||||||
|
|
||||||
@@ -132,7 +139,7 @@ def extract_file_strings():
|
|||||||
|
|
||||||
|
|
||||||
def extract_features():
|
def extract_features():
|
||||||
''' extract file features '''
|
""" extract file features """
|
||||||
for file_handler in FILE_HANDLERS:
|
for file_handler in FILE_HANDLERS:
|
||||||
for feature, va in file_handler():
|
for feature, va in file_handler():
|
||||||
yield feature, va
|
yield feature, va
|
||||||
@@ -151,5 +158,5 @@ def main():
|
|||||||
pprint.pprint(list(extract_features()))
|
pprint.pprint(list(extract_features()))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
@@ -6,14 +6,14 @@ from capa.features.extractors import loops
|
|||||||
|
|
||||||
|
|
||||||
def _ida_function_contains_switch(f):
|
def _ida_function_contains_switch(f):
|
||||||
''' check a function for switch statement indicators
|
""" check a function for switch statement indicators
|
||||||
|
|
||||||
adapted from:
|
adapted from:
|
||||||
https://reverseengineering.stackexchange.com/questions/17548/calc-switch-cases-in-idapython-cant-iterate-over-results?rq=1
|
https://reverseengineering.stackexchange.com/questions/17548/calc-switch-cases-in-idapython-cant-iterate-over-results?rq=1
|
||||||
|
|
||||||
arg:
|
arg:
|
||||||
f (IDA func_t)
|
f (IDA func_t)
|
||||||
'''
|
"""
|
||||||
for start, end in idautils.Chunks(f.start_ea):
|
for start, end in idautils.Chunks(f.start_ea):
|
||||||
for head in idautils.Heads(start, end):
|
for head in idautils.Heads(start, end):
|
||||||
if idaapi.get_switch_info(head):
|
if idaapi.get_switch_info(head):
|
||||||
@@ -23,68 +23,63 @@ def _ida_function_contains_switch(f):
|
|||||||
|
|
||||||
|
|
||||||
def extract_function_switch(f):
|
def extract_function_switch(f):
|
||||||
''' extract switch indicators from a function
|
""" extract switch indicators from a function
|
||||||
|
|
||||||
arg:
|
arg:
|
||||||
f (IDA func_t)
|
f (IDA func_t)
|
||||||
'''
|
"""
|
||||||
if _ida_function_contains_switch(f):
|
if _ida_function_contains_switch(f):
|
||||||
yield Characteristic('switch', True), f.start_ea
|
yield Characteristic("switch", True), f.start_ea
|
||||||
|
|
||||||
|
|
||||||
def extract_function_calls_to(f):
|
def extract_function_calls_to(f):
|
||||||
''' extract callers to a function
|
""" extract callers to a function
|
||||||
|
|
||||||
args:
|
args:
|
||||||
f (IDA func_t)
|
f (IDA func_t)
|
||||||
'''
|
"""
|
||||||
for ea in idautils.CodeRefsTo(f.start_ea, True):
|
for ea in idautils.CodeRefsTo(f.start_ea, True):
|
||||||
yield Characteristic('calls to', True), ea
|
yield Characteristic("calls to", True), ea
|
||||||
|
|
||||||
|
|
||||||
def extract_function_loop(f):
|
def extract_function_loop(f):
|
||||||
''' extract loop indicators from a function
|
""" extract loop indicators from a function
|
||||||
|
|
||||||
args:
|
args:
|
||||||
f (IDA func_t)
|
f (IDA func_t)
|
||||||
'''
|
"""
|
||||||
edges = []
|
edges = []
|
||||||
for bb in idaapi.FlowChart(f):
|
for bb in idaapi.FlowChart(f):
|
||||||
map(lambda s: edges.append((bb.start_ea, s.start_ea)), bb.succs())
|
map(lambda s: edges.append((bb.start_ea, s.start_ea)), bb.succs())
|
||||||
|
|
||||||
if edges and loops.has_loop(edges):
|
if edges and loops.has_loop(edges):
|
||||||
yield Characteristic('loop', True), f.start_ea
|
yield Characteristic("loop", True), f.start_ea
|
||||||
|
|
||||||
|
|
||||||
def extract_recursive_call(f):
|
def extract_recursive_call(f):
|
||||||
''' extract recursive function call
|
""" extract recursive function call
|
||||||
|
|
||||||
args:
|
args:
|
||||||
f (IDA func_t)
|
f (IDA func_t)
|
||||||
'''
|
"""
|
||||||
for ref in idautils.CodeRefsTo(f.start_ea, True):
|
for ref in idautils.CodeRefsTo(f.start_ea, True):
|
||||||
if f.contains(ref):
|
if f.contains(ref):
|
||||||
yield Characteristic('recursive call', True), f.start_ea
|
yield Characteristic("recursive call", True), f.start_ea
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
def extract_features(f):
|
def extract_features(f):
|
||||||
''' extract function features
|
""" extract function features
|
||||||
|
|
||||||
arg:
|
arg:
|
||||||
f (IDA func_t)
|
f (IDA func_t)
|
||||||
'''
|
"""
|
||||||
for func_handler in FUNCTION_HANDLERS:
|
for func_handler in FUNCTION_HANDLERS:
|
||||||
for feature, va in func_handler(f):
|
for feature, va in func_handler(f):
|
||||||
yield feature, va
|
yield feature, va
|
||||||
|
|
||||||
|
|
||||||
FUNCTION_HANDLERS = (
|
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_switch, extract_function_loop, extract_recursive_call)
|
||||||
extract_function_calls_to,
|
|
||||||
extract_function_switch,
|
|
||||||
extract_function_loop,
|
|
||||||
extract_recursive_call
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@@ -96,5 +91,5 @@ def main():
|
|||||||
pprint.pprint(features)
|
pprint.pprint(features)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
@@ -7,21 +7,21 @@ import idc
|
|||||||
|
|
||||||
|
|
||||||
def find_byte_sequence(start, end, seq):
|
def find_byte_sequence(start, end, seq):
|
||||||
''' find byte sequence
|
""" find byte sequence
|
||||||
|
|
||||||
args:
|
args:
|
||||||
start: min virtual address
|
start: min virtual address
|
||||||
end: max virtual address
|
end: max virtual address
|
||||||
seq: bytes to search e.g. b'\x01\x03'
|
seq: bytes to search e.g. b'\x01\x03'
|
||||||
'''
|
"""
|
||||||
if sys.version_info >= (3, 0):
|
if sys.version_info >= (3, 0):
|
||||||
return idaapi.find_binary(start, end, ' '.join(['%02x' % b for b in seq]), 0, idaapi.SEARCH_DOWN)
|
return idaapi.find_binary(start, end, " ".join(["%02x" % b for b in seq]), 0, idaapi.SEARCH_DOWN)
|
||||||
else:
|
else:
|
||||||
return idaapi.find_binary(start, end, ' '.join(['%02x' % ord(b) for b in seq]), 0, idaapi.SEARCH_DOWN)
|
return idaapi.find_binary(start, end, " ".join(["%02x" % ord(b) for b in seq]), 0, idaapi.SEARCH_DOWN)
|
||||||
|
|
||||||
|
|
||||||
def get_functions(start=None, end=None, ignore_thunks=False, ignore_libs=False):
|
def get_functions(start=None, end=None, ignore_thunks=False, ignore_libs=False):
|
||||||
''' get functions, range optional
|
""" get functions, range optional
|
||||||
|
|
||||||
args:
|
args:
|
||||||
start: min virtual address
|
start: min virtual address
|
||||||
@@ -29,7 +29,7 @@ def get_functions(start=None, end=None, ignore_thunks=False, ignore_libs=False):
|
|||||||
|
|
||||||
ret:
|
ret:
|
||||||
yield func_t*
|
yield func_t*
|
||||||
'''
|
"""
|
||||||
for ea in idautils.Functions(start=start, end=end):
|
for ea in idautils.Functions(start=start, end=end):
|
||||||
f = idaapi.get_func(ea)
|
f = idaapi.get_func(ea)
|
||||||
|
|
||||||
@@ -43,7 +43,7 @@ def get_functions(start=None, end=None, ignore_thunks=False, ignore_libs=False):
|
|||||||
|
|
||||||
|
|
||||||
def get_segments():
|
def get_segments():
|
||||||
''' Get list of segments (sections) in the binary image '''
|
""" Get list of segments (sections) in the binary image """
|
||||||
for n in range(idaapi.get_segm_qty()):
|
for n in range(idaapi.get_segm_qty()):
|
||||||
seg = idaapi.getnseg(n)
|
seg = idaapi.getnseg(n)
|
||||||
if seg:
|
if seg:
|
||||||
@@ -51,11 +51,11 @@ def get_segments():
|
|||||||
|
|
||||||
|
|
||||||
def get_segment_buffer(seg):
|
def get_segment_buffer(seg):
|
||||||
''' return bytes stored in a given segment
|
""" return bytes stored in a given segment
|
||||||
|
|
||||||
decrease buffer size until IDA is able to read bytes from the segment
|
decrease buffer size until IDA is able to read bytes from the segment
|
||||||
'''
|
"""
|
||||||
buff = b''
|
buff = b""
|
||||||
sz = seg.end_ea - seg.start_ea
|
sz = seg.end_ea - seg.start_ea
|
||||||
|
|
||||||
while sz > 0:
|
while sz > 0:
|
||||||
@@ -65,11 +65,11 @@ def get_segment_buffer(seg):
|
|||||||
sz -= 0x1000
|
sz -= 0x1000
|
||||||
|
|
||||||
# IDA returns None if get_bytes fails, so convert for consistent return type
|
# IDA returns None if get_bytes fails, so convert for consistent return type
|
||||||
return buff if buff else b''
|
return buff if buff else b""
|
||||||
|
|
||||||
|
|
||||||
def get_file_imports():
|
def get_file_imports():
|
||||||
''' get file imports '''
|
""" get file imports """
|
||||||
_imports = {}
|
_imports = {}
|
||||||
|
|
||||||
for idx in range(idaapi.get_import_module_qty()):
|
for idx in range(idaapi.get_import_module_qty()):
|
||||||
@@ -79,9 +79,9 @@ def get_file_imports():
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
def _inspect_import(ea, name, ordi):
|
def _inspect_import(ea, name, ordi):
|
||||||
if name and name.startswith('__imp_'):
|
if name and name.startswith("__imp_"):
|
||||||
# handle mangled names starting
|
# handle mangled names starting
|
||||||
name = name[len('__imp_'):]
|
name = name[len("__imp_") :]
|
||||||
_imports[ea] = (dllname.lower(), name, ordi)
|
_imports[ea] = (dllname.lower(), name, ordi)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@@ -91,14 +91,14 @@ def get_file_imports():
|
|||||||
|
|
||||||
|
|
||||||
def get_instructions_in_range(start, end):
|
def get_instructions_in_range(start, end):
|
||||||
''' yield instructions in range
|
""" yield instructions in range
|
||||||
|
|
||||||
args:
|
args:
|
||||||
start: virtual address (inclusive)
|
start: virtual address (inclusive)
|
||||||
end: virtual address (exclusive)
|
end: virtual address (exclusive)
|
||||||
yield:
|
yield:
|
||||||
(insn_t*)
|
(insn_t*)
|
||||||
'''
|
"""
|
||||||
for head in idautils.Heads(start, end):
|
for head in idautils.Heads(start, end):
|
||||||
inst = idautils.DecodeInstruction(head)
|
inst = idautils.DecodeInstruction(head)
|
||||||
if inst:
|
if inst:
|
||||||
@@ -106,7 +106,7 @@ def get_instructions_in_range(start, end):
|
|||||||
|
|
||||||
|
|
||||||
def is_operand_equal(op1, op2):
|
def is_operand_equal(op1, op2):
|
||||||
''' compare two IDA op_t '''
|
""" compare two IDA op_t """
|
||||||
if op1.flags != op2.flags:
|
if op1.flags != op2.flags:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -132,14 +132,12 @@ def is_operand_equal(op1, op2):
|
|||||||
|
|
||||||
|
|
||||||
def is_basic_block_equal(bb1, bb2):
|
def is_basic_block_equal(bb1, bb2):
|
||||||
''' compare two IDA BasicBlock '''
|
""" compare two IDA BasicBlock """
|
||||||
return bb1.start_ea == bb2.start_ea \
|
return bb1.start_ea == bb2.start_ea and bb1.end_ea == bb2.end_ea and bb1.type == bb2.type
|
||||||
and bb1.end_ea == bb2.end_ea \
|
|
||||||
and bb1.type == bb2.type
|
|
||||||
|
|
||||||
|
|
||||||
def basic_block_size(bb):
|
def basic_block_size(bb):
|
||||||
''' calculate size of basic block '''
|
""" calculate size of basic block """
|
||||||
return bb.end_ea - bb.start_ea
|
return bb.end_ea - bb.start_ea
|
||||||
|
|
||||||
|
|
||||||
@@ -152,11 +150,11 @@ def read_bytes_at(ea, count):
|
|||||||
|
|
||||||
|
|
||||||
def find_string_at(ea, min=4):
|
def find_string_at(ea, min=4):
|
||||||
''' check if ASCII string exists at a given virtual address '''
|
""" check if ASCII string exists at a given virtual address """
|
||||||
found = idaapi.get_strlit_contents(ea, -1, idaapi.STRTYPE_C)
|
found = idaapi.get_strlit_contents(ea, -1, idaapi.STRTYPE_C)
|
||||||
if found and len(found) > min:
|
if found and len(found) > min:
|
||||||
try:
|
try:
|
||||||
found = found.decode('ascii')
|
found = found.decode("ascii")
|
||||||
# hacky check for IDA bug; get_strlit_contents also reads Unicode as
|
# hacky check for IDA bug; get_strlit_contents also reads Unicode as
|
||||||
# myy__uunniiccoodde when searching in ASCII mode so we check for that here
|
# myy__uunniiccoodde when searching in ASCII mode so we check for that here
|
||||||
# and return the fixed up value
|
# and return the fixed up value
|
||||||
@@ -169,11 +167,11 @@ def find_string_at(ea, min=4):
|
|||||||
|
|
||||||
|
|
||||||
def get_op_phrase_info(op):
|
def get_op_phrase_info(op):
|
||||||
''' parse phrase features from operand
|
""" parse phrase features from operand
|
||||||
|
|
||||||
Pretty much dup of sark's implementation:
|
Pretty much dup of sark's implementation:
|
||||||
https://github.com/tmr232/Sark/blob/master/sark/code/instruction.py#L28-L73
|
https://github.com/tmr232/Sark/blob/master/sark/code/instruction.py#L28-L73
|
||||||
'''
|
"""
|
||||||
if op.type not in (idaapi.o_phrase, idaapi.o_displ):
|
if op.type not in (idaapi.o_phrase, idaapi.o_displ):
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -202,21 +200,21 @@ def get_op_phrase_info(op):
|
|||||||
# This is only relevant to Intel architectures.
|
# This is only relevant to Intel architectures.
|
||||||
index = None
|
index = None
|
||||||
|
|
||||||
return {'base': base, 'index': index, 'scale': scale, 'offset': offset}
|
return {"base": base, "index": index, "scale": scale, "offset": offset}
|
||||||
|
|
||||||
|
|
||||||
def is_op_write(insn, op):
|
def is_op_write(insn, op):
|
||||||
''' Check if an operand is written to (destination operand) '''
|
""" Check if an operand is written to (destination operand) """
|
||||||
return idaapi.has_cf_chg(insn.get_canon_feature(), op.n)
|
return idaapi.has_cf_chg(insn.get_canon_feature(), op.n)
|
||||||
|
|
||||||
|
|
||||||
def is_op_read(insn, op):
|
def is_op_read(insn, op):
|
||||||
''' Check if an operand is read from (source operand) '''
|
""" Check if an operand is read from (source operand) """
|
||||||
return idaapi.has_cf_use(insn.get_canon_feature(), op.n)
|
return idaapi.has_cf_use(insn.get_canon_feature(), op.n)
|
||||||
|
|
||||||
|
|
||||||
def is_sp_modified(insn):
|
def is_sp_modified(insn):
|
||||||
''' determine if instruction modifies SP, ESP, RSP '''
|
""" determine if instruction modifies SP, ESP, RSP """
|
||||||
for op in get_insn_ops(insn, op_type=(idaapi.o_reg,)):
|
for op in get_insn_ops(insn, op_type=(idaapi.o_reg,)):
|
||||||
if op.reg != idautils.procregs.sp.reg:
|
if op.reg != idautils.procregs.sp.reg:
|
||||||
continue
|
continue
|
||||||
@@ -228,7 +226,7 @@ def is_sp_modified(insn):
|
|||||||
|
|
||||||
|
|
||||||
def is_bp_modified(insn):
|
def is_bp_modified(insn):
|
||||||
''' check if instruction modifies BP, EBP, RBP '''
|
""" check if instruction modifies BP, EBP, RBP """
|
||||||
for op in get_insn_ops(insn, op_type=(idaapi.o_reg,)):
|
for op in get_insn_ops(insn, op_type=(idaapi.o_reg,)):
|
||||||
if op.reg != idautils.procregs.bp.reg:
|
if op.reg != idautils.procregs.bp.reg:
|
||||||
continue
|
continue
|
||||||
@@ -240,12 +238,12 @@ def is_bp_modified(insn):
|
|||||||
|
|
||||||
|
|
||||||
def is_frame_register(reg):
|
def is_frame_register(reg):
|
||||||
''' check if register is sp or bp '''
|
""" check if register is sp or bp """
|
||||||
return reg in (idautils.procregs.sp.reg, idautils.procregs.bp.reg)
|
return reg in (idautils.procregs.sp.reg, idautils.procregs.bp.reg)
|
||||||
|
|
||||||
|
|
||||||
def get_insn_ops(insn, op_type=None):
|
def get_insn_ops(insn, op_type=None):
|
||||||
''' yield op_t for instruction, filter on type if specified '''
|
""" yield op_t for instruction, filter on type if specified """
|
||||||
for op in insn.ops:
|
for op in insn.ops:
|
||||||
if op.type == idaapi.o_void:
|
if op.type == idaapi.o_void:
|
||||||
# avoid looping all 6 ops if only subset exists
|
# avoid looping all 6 ops if only subset exists
|
||||||
@@ -258,17 +256,17 @@ def get_insn_ops(insn, op_type=None):
|
|||||||
|
|
||||||
|
|
||||||
def ea_flags(ea):
|
def ea_flags(ea):
|
||||||
''' retrieve processor flags for a given address '''
|
""" retrieve processor flags for a given address """
|
||||||
return idaapi.get_flags(ea)
|
return idaapi.get_flags(ea)
|
||||||
|
|
||||||
|
|
||||||
def is_op_stack_var(ea, n):
|
def is_op_stack_var(ea, n):
|
||||||
''' check if operand is a stack variable '''
|
""" check if operand is a stack variable """
|
||||||
return idaapi.is_stkvar(ea_flags(ea), n)
|
return idaapi.is_stkvar(ea_flags(ea), n)
|
||||||
|
|
||||||
|
|
||||||
def mask_op_val(op):
|
def mask_op_val(op):
|
||||||
''' mask off a value based on data type
|
""" mask off a value based on data type
|
||||||
|
|
||||||
necesssary due to a bug in 64-bit
|
necesssary due to a bug in 64-bit
|
||||||
|
|
||||||
@@ -277,22 +275,22 @@ def mask_op_val(op):
|
|||||||
|
|
||||||
insn.Op2.dtype == idaapi.dt_dword
|
insn.Op2.dtype == idaapi.dt_dword
|
||||||
insn.Op2.value == 0xffffffffffffffff
|
insn.Op2.value == 0xffffffffffffffff
|
||||||
'''
|
"""
|
||||||
masks = {
|
masks = {
|
||||||
idaapi.dt_byte: 0xFF,
|
idaapi.dt_byte: 0xFF,
|
||||||
idaapi.dt_word: 0xFFFF,
|
idaapi.dt_word: 0xFFFF,
|
||||||
idaapi.dt_dword: 0xFFFFFFFF,
|
idaapi.dt_dword: 0xFFFFFFFF,
|
||||||
idaapi.dt_qword: 0xFFFFFFFFFFFFFFFF
|
idaapi.dt_qword: 0xFFFFFFFFFFFFFFFF,
|
||||||
}
|
}
|
||||||
|
|
||||||
mask = masks.get(op.dtype, None)
|
mask = masks.get(op.dtype, None)
|
||||||
|
|
||||||
if not mask:
|
if not mask:
|
||||||
raise ValueError('No support for operand data type 0x%x' % op.dtype)
|
raise ValueError("No support for operand data type 0x%x" % op.dtype)
|
||||||
|
|
||||||
return mask & op.value
|
return mask & op.value
|
||||||
|
|
||||||
|
|
||||||
def ea_to_offset(ea):
|
def ea_to_offset(ea):
|
||||||
''' convert virtual address to file offset '''
|
""" convert virtual address to file offset """
|
||||||
return idaapi.get_fileregion_offset(ea)
|
return idaapi.get_fileregion_offset(ea)
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ def get_imports():
|
|||||||
|
|
||||||
|
|
||||||
def _check_for_api_call(insn):
|
def _check_for_api_call(insn):
|
||||||
''' check instruction for API call '''
|
""" check instruction for API call """
|
||||||
if not idaapi.is_call_insn(insn):
|
if not idaapi.is_call_insn(insn):
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -34,7 +34,7 @@ def _check_for_api_call(insn):
|
|||||||
imp = get_imports().get(call_ref, None)
|
imp = get_imports().get(call_ref, None)
|
||||||
|
|
||||||
if imp:
|
if imp:
|
||||||
yield '%s.%s' % (imp[0], imp[1])
|
yield "%s.%s" % (imp[0], imp[1])
|
||||||
else:
|
else:
|
||||||
f = idaapi.get_func(call_ref)
|
f = idaapi.get_func(call_ref)
|
||||||
|
|
||||||
@@ -46,11 +46,11 @@ def _check_for_api_call(insn):
|
|||||||
imp = get_imports().get(thunk_ref, None)
|
imp = get_imports().get(thunk_ref, None)
|
||||||
|
|
||||||
if imp:
|
if imp:
|
||||||
yield '%s.%s' % (imp[0], imp[1])
|
yield "%s.%s" % (imp[0], imp[1])
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_api_features(f, bb, insn):
|
def extract_insn_api_features(f, bb, insn):
|
||||||
''' parse instruction API features
|
""" parse instruction API features
|
||||||
|
|
||||||
args:
|
args:
|
||||||
f (IDA func_t)
|
f (IDA func_t)
|
||||||
@@ -59,14 +59,14 @@ def extract_insn_api_features(f, bb, insn):
|
|||||||
|
|
||||||
example:
|
example:
|
||||||
call dword [0x00473038]
|
call dword [0x00473038]
|
||||||
'''
|
"""
|
||||||
for api_name in _check_for_api_call(insn):
|
for api_name in _check_for_api_call(insn):
|
||||||
for feature, va in capa.features.extractors.helpers.generate_api_features(api_name, insn.ea):
|
for feature, va in capa.features.extractors.helpers.generate_api_features(api_name, insn.ea):
|
||||||
yield feature, va
|
yield feature, va
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_number_features(f, bb, insn):
|
def extract_insn_number_features(f, bb, insn):
|
||||||
''' parse instruction number features
|
""" parse instruction number features
|
||||||
|
|
||||||
args:
|
args:
|
||||||
f (IDA func_t)
|
f (IDA func_t)
|
||||||
@@ -75,7 +75,7 @@ def extract_insn_number_features(f, bb, insn):
|
|||||||
|
|
||||||
example:
|
example:
|
||||||
push 3136B0h ; dwControlCode
|
push 3136B0h ; dwControlCode
|
||||||
'''
|
"""
|
||||||
if idaapi.is_ret_insn(insn):
|
if idaapi.is_ret_insn(insn):
|
||||||
# skip things like:
|
# skip things like:
|
||||||
# .text:0042250E retn 8
|
# .text:0042250E retn 8
|
||||||
@@ -97,7 +97,7 @@ def extract_insn_number_features(f, bb, insn):
|
|||||||
|
|
||||||
|
|
||||||
def extract_insn_bytes_features(f, bb, insn):
|
def extract_insn_bytes_features(f, bb, insn):
|
||||||
''' parse referenced byte sequences
|
""" parse referenced byte sequences
|
||||||
|
|
||||||
args:
|
args:
|
||||||
f (IDA func_t)
|
f (IDA func_t)
|
||||||
@@ -106,7 +106,7 @@ def extract_insn_bytes_features(f, bb, insn):
|
|||||||
|
|
||||||
example:
|
example:
|
||||||
push offset iid_004118d4_IShellLinkA ; riid
|
push offset iid_004118d4_IShellLinkA ; riid
|
||||||
'''
|
"""
|
||||||
if idaapi.is_call_insn(insn):
|
if idaapi.is_call_insn(insn):
|
||||||
# ignore call instructions
|
# ignore call instructions
|
||||||
return
|
return
|
||||||
@@ -119,7 +119,7 @@ def extract_insn_bytes_features(f, bb, insn):
|
|||||||
|
|
||||||
|
|
||||||
def extract_insn_string_features(f, bb, insn):
|
def extract_insn_string_features(f, bb, insn):
|
||||||
''' parse instruction string features
|
""" parse instruction string features
|
||||||
|
|
||||||
args:
|
args:
|
||||||
f (IDA func_t)
|
f (IDA func_t)
|
||||||
@@ -128,7 +128,7 @@ def extract_insn_string_features(f, bb, insn):
|
|||||||
|
|
||||||
example:
|
example:
|
||||||
push offset aAcr ; "ACR > "
|
push offset aAcr ; "ACR > "
|
||||||
'''
|
"""
|
||||||
for ref in idautils.DataRefsFrom(insn.ea):
|
for ref in idautils.DataRefsFrom(insn.ea):
|
||||||
found = capa.features.extractors.ida.helpers.find_string_at(ref)
|
found = capa.features.extractors.ida.helpers.find_string_at(ref)
|
||||||
if found:
|
if found:
|
||||||
@@ -136,7 +136,7 @@ def extract_insn_string_features(f, bb, insn):
|
|||||||
|
|
||||||
|
|
||||||
def extract_insn_offset_features(f, bb, insn):
|
def extract_insn_offset_features(f, bb, insn):
|
||||||
''' parse instruction structure offset features
|
""" parse instruction structure offset features
|
||||||
|
|
||||||
args:
|
args:
|
||||||
f (IDA func_t)
|
f (IDA func_t)
|
||||||
@@ -145,7 +145,7 @@ def extract_insn_offset_features(f, bb, insn):
|
|||||||
|
|
||||||
example:
|
example:
|
||||||
.text:0040112F cmp [esi+4], ebx
|
.text:0040112F cmp [esi+4], ebx
|
||||||
'''
|
"""
|
||||||
for op in capa.features.extractors.ida.helpers.get_insn_ops(insn, op_type=(idaapi.o_phrase, idaapi.o_displ)):
|
for op in capa.features.extractors.ida.helpers.get_insn_ops(insn, op_type=(idaapi.o_phrase, idaapi.o_displ)):
|
||||||
if capa.features.extractors.ida.helpers.is_op_stack_var(insn.ea, op.n):
|
if capa.features.extractors.ida.helpers.is_op_stack_var(insn.ea, op.n):
|
||||||
# skip stack offsets
|
# skip stack offsets
|
||||||
@@ -156,7 +156,7 @@ def extract_insn_offset_features(f, bb, insn):
|
|||||||
if not p_info:
|
if not p_info:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
op_off = p_info['offset']
|
op_off = p_info["offset"]
|
||||||
|
|
||||||
if 0 == op_off:
|
if 0 == op_off:
|
||||||
# TODO: Do we want to record offset of zero?
|
# TODO: Do we want to record offset of zero?
|
||||||
@@ -172,26 +172,26 @@ def extract_insn_offset_features(f, bb, insn):
|
|||||||
|
|
||||||
|
|
||||||
def _contains_stack_cookie_keywords(s):
|
def _contains_stack_cookie_keywords(s):
|
||||||
''' check if string contains stack cookie keywords
|
""" check if string contains stack cookie keywords
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
xor ecx, ebp ; StackCookie
|
xor ecx, ebp ; StackCookie
|
||||||
|
|
||||||
mov eax, ___security_cookie
|
mov eax, ___security_cookie
|
||||||
'''
|
"""
|
||||||
if not s:
|
if not s:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
s = s.strip().lower()
|
s = s.strip().lower()
|
||||||
|
|
||||||
if 'cookie' not in s:
|
if "cookie" not in s:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return any(keyword in s for keyword in ('stack', 'security'))
|
return any(keyword in s for keyword in ("stack", "security"))
|
||||||
|
|
||||||
|
|
||||||
def _bb_stack_cookie_registers(bb):
|
def _bb_stack_cookie_registers(bb):
|
||||||
''' scan basic block for stack cookie operations
|
""" scan basic block for stack cookie operations
|
||||||
|
|
||||||
yield registers ids that may have been used for stack cookie operations
|
yield registers ids that may have been used for stack cookie operations
|
||||||
|
|
||||||
@@ -215,7 +215,7 @@ def _bb_stack_cookie_registers(bb):
|
|||||||
.text:004062FA jnz loc_40639D
|
.text:004062FA jnz loc_40639D
|
||||||
|
|
||||||
TODO: this is expensive, but necessary?...
|
TODO: this is expensive, but necessary?...
|
||||||
'''
|
"""
|
||||||
for insn in capa.features.extractors.ida.helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
|
for insn in capa.features.extractors.ida.helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
|
||||||
if _contains_stack_cookie_keywords(idc.GetDisasm(insn.ea)):
|
if _contains_stack_cookie_keywords(idc.GetDisasm(insn.ea)):
|
||||||
for op in capa.features.extractors.ida.helpers.get_insn_ops(insn, op_type=(idaapi.o_reg,)):
|
for op in capa.features.extractors.ida.helpers.get_insn_ops(insn, op_type=(idaapi.o_reg,)):
|
||||||
@@ -225,7 +225,7 @@ def _bb_stack_cookie_registers(bb):
|
|||||||
|
|
||||||
|
|
||||||
def _is_nzxor_stack_cookie(f, bb, insn):
|
def _is_nzxor_stack_cookie(f, bb, insn):
|
||||||
''' check if nzxor is related to stack cookie '''
|
""" check if nzxor is related to stack cookie """
|
||||||
if _contains_stack_cookie_keywords(idaapi.get_cmt(insn.ea, False)):
|
if _contains_stack_cookie_keywords(idaapi.get_cmt(insn.ea, False)):
|
||||||
# Example:
|
# Example:
|
||||||
# xor ecx, ebp ; StackCookie
|
# xor ecx, ebp ; StackCookie
|
||||||
@@ -241,7 +241,7 @@ def _is_nzxor_stack_cookie(f, bb, insn):
|
|||||||
|
|
||||||
|
|
||||||
def extract_insn_nzxor_characteristic_features(f, bb, insn):
|
def extract_insn_nzxor_characteristic_features(f, bb, insn):
|
||||||
''' parse instruction non-zeroing XOR instruction
|
""" parse instruction non-zeroing XOR instruction
|
||||||
|
|
||||||
ignore expected non-zeroing XORs, e.g. security cookies
|
ignore expected non-zeroing XORs, e.g. security cookies
|
||||||
|
|
||||||
@@ -249,7 +249,7 @@ def extract_insn_nzxor_characteristic_features(f, bb, insn):
|
|||||||
f (IDA func_t)
|
f (IDA func_t)
|
||||||
bb (IDA BasicBlock)
|
bb (IDA BasicBlock)
|
||||||
insn (IDA insn_t)
|
insn (IDA insn_t)
|
||||||
'''
|
"""
|
||||||
if insn.itype != idaapi.NN_xor:
|
if insn.itype != idaapi.NN_xor:
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -259,28 +259,28 @@ def extract_insn_nzxor_characteristic_features(f, bb, insn):
|
|||||||
if _is_nzxor_stack_cookie(f, bb, insn):
|
if _is_nzxor_stack_cookie(f, bb, insn):
|
||||||
return
|
return
|
||||||
|
|
||||||
yield Characteristic('nzxor', True), insn.ea
|
yield Characteristic("nzxor", True), insn.ea
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_mnemonic_features(f, bb, insn):
|
def extract_insn_mnemonic_features(f, bb, insn):
|
||||||
''' parse instruction mnemonic features
|
""" parse instruction mnemonic features
|
||||||
|
|
||||||
args:
|
args:
|
||||||
f (IDA func_t)
|
f (IDA func_t)
|
||||||
bb (IDA BasicBlock)
|
bb (IDA BasicBlock)
|
||||||
insn (IDA insn_t)
|
insn (IDA insn_t)
|
||||||
'''
|
"""
|
||||||
yield Mnemonic(insn.get_canon_mnem()), insn.ea
|
yield Mnemonic(insn.get_canon_mnem()), insn.ea
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_peb_access_characteristic_features(f, bb, insn):
|
def extract_insn_peb_access_characteristic_features(f, bb, insn):
|
||||||
''' parse instruction peb access
|
""" parse instruction peb access
|
||||||
|
|
||||||
fs:[0x30] on x86, gs:[0x60] on x64
|
fs:[0x30] on x86, gs:[0x60] on x64
|
||||||
|
|
||||||
TODO:
|
TODO:
|
||||||
IDA should be able to do this..
|
IDA should be able to do this..
|
||||||
'''
|
"""
|
||||||
if insn.itype not in (idaapi.NN_push, idaapi.NN_mov):
|
if insn.itype not in (idaapi.NN_push, idaapi.NN_mov):
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -290,40 +290,40 @@ def extract_insn_peb_access_characteristic_features(f, bb, insn):
|
|||||||
|
|
||||||
disasm = idc.GetDisasm(insn.ea)
|
disasm = idc.GetDisasm(insn.ea)
|
||||||
|
|
||||||
if ' fs:30h' in disasm or ' gs:60h' in disasm:
|
if " fs:30h" in disasm or " gs:60h" in disasm:
|
||||||
# TODO: replace above with proper IDA
|
# TODO: replace above with proper IDA
|
||||||
yield Characteristic('peb access', True), insn.ea
|
yield Characteristic("peb access", True), insn.ea
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_segment_access_features(f, bb, insn):
|
def extract_insn_segment_access_features(f, bb, insn):
|
||||||
''' parse instruction fs or gs access
|
""" parse instruction fs or gs access
|
||||||
|
|
||||||
TODO:
|
TODO:
|
||||||
IDA should be able to do this...
|
IDA should be able to do this...
|
||||||
'''
|
"""
|
||||||
if all(map(lambda op: op.type != idaapi.o_mem, insn.ops)):
|
if all(map(lambda op: op.type != idaapi.o_mem, insn.ops)):
|
||||||
# try to optimize for only memory referencese
|
# try to optimize for only memory referencese
|
||||||
return
|
return
|
||||||
|
|
||||||
disasm = idc.GetDisasm(insn.ea)
|
disasm = idc.GetDisasm(insn.ea)
|
||||||
|
|
||||||
if ' fs:' in disasm:
|
if " fs:" in disasm:
|
||||||
# TODO: replace above with proper IDA
|
# TODO: replace above with proper IDA
|
||||||
yield Characteristic('fs access', True), insn.ea
|
yield Characteristic("fs access", True), insn.ea
|
||||||
|
|
||||||
if ' gs:' in disasm:
|
if " gs:" in disasm:
|
||||||
# TODO: replace above with proper IDA
|
# TODO: replace above with proper IDA
|
||||||
yield Characteristic('gs access', True), insn.ea
|
yield Characteristic("gs access", True), insn.ea
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_cross_section_cflow(f, bb, insn):
|
def extract_insn_cross_section_cflow(f, bb, insn):
|
||||||
''' inspect the instruction for a CALL or JMP that crosses section boundaries
|
""" inspect the instruction for a CALL or JMP that crosses section boundaries
|
||||||
|
|
||||||
args:
|
args:
|
||||||
f (IDA func_t)
|
f (IDA func_t)
|
||||||
bb (IDA BasicBlock)
|
bb (IDA BasicBlock)
|
||||||
insn (IDA insn_t)
|
insn (IDA insn_t)
|
||||||
'''
|
"""
|
||||||
for ref in idautils.CodeRefsFrom(insn.ea, False):
|
for ref in idautils.CodeRefsFrom(insn.ea, False):
|
||||||
if ref in get_imports().keys():
|
if ref in get_imports().keys():
|
||||||
# ignore API calls
|
# ignore API calls
|
||||||
@@ -336,11 +336,11 @@ def extract_insn_cross_section_cflow(f, bb, insn):
|
|||||||
if idaapi.getseg(ref) == idaapi.getseg(insn.ea):
|
if idaapi.getseg(ref) == idaapi.getseg(insn.ea):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
yield Characteristic('cross section flow', True), insn.ea
|
yield Characteristic("cross section flow", True), insn.ea
|
||||||
|
|
||||||
|
|
||||||
def extract_function_calls_from(f, bb, insn):
|
def extract_function_calls_from(f, bb, insn):
|
||||||
''' extract functions calls from features
|
""" extract functions calls from features
|
||||||
|
|
||||||
most relevant at the function scope, however, its most efficient to extract at the instruction scope
|
most relevant at the function scope, however, its most efficient to extract at the instruction scope
|
||||||
|
|
||||||
@@ -348,17 +348,17 @@ def extract_function_calls_from(f, bb, insn):
|
|||||||
f (IDA func_t)
|
f (IDA func_t)
|
||||||
bb (IDA BasicBlock)
|
bb (IDA BasicBlock)
|
||||||
insn (IDA insn_t)
|
insn (IDA insn_t)
|
||||||
'''
|
"""
|
||||||
if not idaapi.is_call_insn(insn):
|
if not idaapi.is_call_insn(insn):
|
||||||
# ignore jmp, etc.
|
# ignore jmp, etc.
|
||||||
return
|
return
|
||||||
|
|
||||||
for ref in idautils.CodeRefsFrom(insn.ea, False):
|
for ref in idautils.CodeRefsFrom(insn.ea, False):
|
||||||
yield Characteristic('calls from', True), ref
|
yield Characteristic("calls from", True), ref
|
||||||
|
|
||||||
|
|
||||||
def extract_function_indirect_call_characteristic_features(f, bb, insn):
|
def extract_function_indirect_call_characteristic_features(f, bb, insn):
|
||||||
''' extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
|
""" extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
|
||||||
does not include calls like => call ds:dword_ABD4974
|
does not include calls like => call ds:dword_ABD4974
|
||||||
|
|
||||||
most relevant at the function or basic block scope;
|
most relevant at the function or basic block scope;
|
||||||
@@ -368,22 +368,22 @@ def extract_function_indirect_call_characteristic_features(f, bb, insn):
|
|||||||
f (IDA func_t)
|
f (IDA func_t)
|
||||||
bb (IDA BasicBlock)
|
bb (IDA BasicBlock)
|
||||||
insn (IDA insn_t)
|
insn (IDA insn_t)
|
||||||
'''
|
"""
|
||||||
if not idaapi.is_call_insn(insn):
|
if not idaapi.is_call_insn(insn):
|
||||||
return
|
return
|
||||||
|
|
||||||
if idc.get_operand_type(insn.ea, 0) in (idc.o_reg, idc.o_phrase, idc.o_displ):
|
if idc.get_operand_type(insn.ea, 0) in (idc.o_reg, idc.o_phrase, idc.o_displ):
|
||||||
yield Characteristic('indirect call', True), insn.ea
|
yield Characteristic("indirect call", True), insn.ea
|
||||||
|
|
||||||
|
|
||||||
def extract_features(f, bb, insn):
|
def extract_features(f, bb, insn):
|
||||||
''' extract instruction features
|
""" extract instruction features
|
||||||
|
|
||||||
args:
|
args:
|
||||||
f (IDA func_t)
|
f (IDA func_t)
|
||||||
bb (IDA BasicBlock)
|
bb (IDA BasicBlock)
|
||||||
insn (IDA insn_t)
|
insn (IDA insn_t)
|
||||||
'''
|
"""
|
||||||
for inst_handler in INSTRUCTION_HANDLERS:
|
for inst_handler in INSTRUCTION_HANDLERS:
|
||||||
for feature, va in inst_handler(f, bb, insn):
|
for feature, va in inst_handler(f, bb, insn):
|
||||||
yield feature, va
|
yield feature, va
|
||||||
@@ -401,7 +401,7 @@ INSTRUCTION_HANDLERS = (
|
|||||||
extract_insn_cross_section_cflow,
|
extract_insn_cross_section_cflow,
|
||||||
extract_insn_segment_access_features,
|
extract_insn_segment_access_features,
|
||||||
extract_function_calls_from,
|
extract_function_calls_from,
|
||||||
extract_function_indirect_call_characteristic_features
|
extract_function_indirect_call_characteristic_features,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -416,5 +416,5 @@ def main():
|
|||||||
pprint.pprint(features)
|
pprint.pprint(features)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ from networkx import nx
|
|||||||
|
|
||||||
|
|
||||||
def has_loop(edges, threshold=2):
|
def has_loop(edges, threshold=2):
|
||||||
''' check if a list of edges representing a directed graph contains a loop
|
""" check if a list of edges representing a directed graph contains a loop
|
||||||
|
|
||||||
args:
|
args:
|
||||||
edges: list of edge sets representing a directed graph i.e. [(1, 2), (2, 1)]
|
edges: list of edge sets representing a directed graph i.e. [(1, 2), (2, 1)]
|
||||||
@@ -11,7 +11,7 @@ def has_loop(edges, threshold=2):
|
|||||||
|
|
||||||
returns:
|
returns:
|
||||||
bool
|
bool
|
||||||
'''
|
"""
|
||||||
g = nx.DiGraph()
|
g = nx.DiGraph()
|
||||||
g.add_edges_from(edges)
|
g.add_edges_from(edges)
|
||||||
return any(len(comp) >= threshold for comp in strongly_connected_components(g))
|
return any(len(comp) >= threshold for comp in strongly_connected_components(g))
|
||||||
|
|||||||
@@ -7,26 +7,28 @@ import re
|
|||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
|
|
||||||
|
|
||||||
ASCII_BYTE = r' !\"#\$%&\'\(\)\*\+,-\./0123456789:;<=>\?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\}\\\~\t'.encode('ascii')
|
ASCII_BYTE = r" !\"#\$%&\'\(\)\*\+,-\./0123456789:;<=>\?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\}\\\~\t".encode(
|
||||||
ASCII_RE_4 = re.compile(b'([%s]{%d,})' % (ASCII_BYTE, 4))
|
"ascii"
|
||||||
UNICODE_RE_4 = re.compile(b'((?:[%s]\x00){%d,})' % (ASCII_BYTE, 4))
|
)
|
||||||
REPEATS = [b'A', b'\x00', b'\xfe', b'\xff']
|
ASCII_RE_4 = re.compile(b"([%s]{%d,})" % (ASCII_BYTE, 4))
|
||||||
|
UNICODE_RE_4 = re.compile(b"((?:[%s]\x00){%d,})" % (ASCII_BYTE, 4))
|
||||||
|
REPEATS = [b"A", b"\x00", b"\xfe", b"\xff"]
|
||||||
SLICE_SIZE = 4096
|
SLICE_SIZE = 4096
|
||||||
|
|
||||||
String = namedtuple('String', ['s', 'offset'])
|
String = namedtuple("String", ["s", "offset"])
|
||||||
|
|
||||||
|
|
||||||
def buf_filled_with(buf, character):
|
def buf_filled_with(buf, character):
|
||||||
dupe_chunk = character * SLICE_SIZE
|
dupe_chunk = character * SLICE_SIZE
|
||||||
for offset in range(0, len(buf), SLICE_SIZE):
|
for offset in range(0, len(buf), SLICE_SIZE):
|
||||||
new_chunk = buf[offset: offset + SLICE_SIZE]
|
new_chunk = buf[offset : offset + SLICE_SIZE]
|
||||||
if dupe_chunk[:len(new_chunk)] != new_chunk:
|
if dupe_chunk[: len(new_chunk)] != new_chunk:
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def extract_ascii_strings(buf, n=4):
|
def extract_ascii_strings(buf, n=4):
|
||||||
'''
|
"""
|
||||||
Extract ASCII strings from the given binary data.
|
Extract ASCII strings from the given binary data.
|
||||||
|
|
||||||
:param buf: A bytestring.
|
:param buf: A bytestring.
|
||||||
@@ -34,7 +36,7 @@ def extract_ascii_strings(buf, n=4):
|
|||||||
:param n: The minimum length of strings to extract.
|
:param n: The minimum length of strings to extract.
|
||||||
:type n: int
|
:type n: int
|
||||||
:rtype: Sequence[String]
|
:rtype: Sequence[String]
|
||||||
'''
|
"""
|
||||||
|
|
||||||
if not buf:
|
if not buf:
|
||||||
return
|
return
|
||||||
@@ -46,14 +48,14 @@ def extract_ascii_strings(buf, n=4):
|
|||||||
if n == 4:
|
if n == 4:
|
||||||
r = ASCII_RE_4
|
r = ASCII_RE_4
|
||||||
else:
|
else:
|
||||||
reg = b'([%s]{%d,})' % (ASCII_BYTE, n)
|
reg = b"([%s]{%d,})" % (ASCII_BYTE, n)
|
||||||
r = re.compile(reg)
|
r = re.compile(reg)
|
||||||
for match in r.finditer(buf):
|
for match in r.finditer(buf):
|
||||||
yield String(match.group().decode('ascii'), match.start())
|
yield String(match.group().decode("ascii"), match.start())
|
||||||
|
|
||||||
|
|
||||||
def extract_unicode_strings(buf, n=4):
|
def extract_unicode_strings(buf, n=4):
|
||||||
'''
|
"""
|
||||||
Extract naive UTF-16 strings from the given binary data.
|
Extract naive UTF-16 strings from the given binary data.
|
||||||
|
|
||||||
:param buf: A bytestring.
|
:param buf: A bytestring.
|
||||||
@@ -61,7 +63,7 @@ def extract_unicode_strings(buf, n=4):
|
|||||||
:param n: The minimum length of strings to extract.
|
:param n: The minimum length of strings to extract.
|
||||||
:type n: int
|
:type n: int
|
||||||
:rtype: Sequence[String]
|
:rtype: Sequence[String]
|
||||||
'''
|
"""
|
||||||
|
|
||||||
if not buf:
|
if not buf:
|
||||||
return
|
return
|
||||||
@@ -72,11 +74,11 @@ def extract_unicode_strings(buf, n=4):
|
|||||||
if n == 4:
|
if n == 4:
|
||||||
r = UNICODE_RE_4
|
r = UNICODE_RE_4
|
||||||
else:
|
else:
|
||||||
reg = b'((?:[%s]\x00){%d,})' % (ASCII_BYTE, n)
|
reg = b"((?:[%s]\x00){%d,})" % (ASCII_BYTE, n)
|
||||||
r = re.compile(reg)
|
r = re.compile(reg)
|
||||||
for match in r.finditer(buf):
|
for match in r.finditer(buf):
|
||||||
try:
|
try:
|
||||||
yield String(match.group().decode('utf-16'), match.start())
|
yield String(match.group().decode("utf-16"), match.start())
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -84,15 +86,15 @@ def extract_unicode_strings(buf, n=4):
|
|||||||
def main():
|
def main():
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
with open(sys.argv[1], 'rb') as f:
|
with open(sys.argv[1], "rb") as f:
|
||||||
b = f.read()
|
b = f.read()
|
||||||
|
|
||||||
for s in extract_ascii_strings(b):
|
for s in extract_ascii_strings(b):
|
||||||
print('0x{:x}: {:s}'.format(s.offset, s.s))
|
print("0x{:x}: {:s}".format(s.offset, s.s))
|
||||||
|
|
||||||
for s in extract_unicode_strings(b):
|
for s in extract_unicode_strings(b):
|
||||||
print('0x{:x}: {:s}'.format(s.offset, s.s))
|
print("0x{:x}: {:s}".format(s.offset, s.s))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
@@ -13,7 +13,8 @@ import file
|
|||||||
import function
|
import function
|
||||||
import basicblock
|
import basicblock
|
||||||
import insn
|
import insn
|
||||||
__all__ = ['file', 'function', 'basicblock', 'insn']
|
|
||||||
|
__all__ = ["file", "function", "basicblock", "insn"]
|
||||||
|
|
||||||
|
|
||||||
def get_va(self):
|
def get_va(self):
|
||||||
@@ -27,14 +28,14 @@ def get_va(self):
|
|||||||
|
|
||||||
|
|
||||||
def add_va_int_cast(o):
|
def add_va_int_cast(o):
|
||||||
'''
|
"""
|
||||||
dynamically add a cast-to-int (`__int__`) method to the given object
|
dynamically add a cast-to-int (`__int__`) method to the given object
|
||||||
that returns the value of the `.va` property.
|
that returns the value of the `.va` property.
|
||||||
|
|
||||||
this bit of skullduggery lets use cast viv-utils objects as ints.
|
this bit of skullduggery lets use cast viv-utils objects as ints.
|
||||||
the correct way of doing this is to update viv-utils (or subclass the objects here).
|
the correct way of doing this is to update viv-utils (or subclass the objects here).
|
||||||
'''
|
"""
|
||||||
setattr(o, '__int__', types.MethodType(get_va, o, type(o)))
|
setattr(o, "__int__", types.MethodType(get_va, o, type(o)))
|
||||||
return o
|
return o
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
|
|||||||
|
|
||||||
|
|
||||||
def interface_extract_basic_block_XXX(f, bb):
|
def interface_extract_basic_block_XXX(f, bb):
|
||||||
'''
|
"""
|
||||||
parse features from the given basic block.
|
parse features from the given basic block.
|
||||||
|
|
||||||
args:
|
args:
|
||||||
@@ -19,14 +19,14 @@ def interface_extract_basic_block_XXX(f, bb):
|
|||||||
|
|
||||||
yields:
|
yields:
|
||||||
(Feature, int): the feature and the address at which its found.
|
(Feature, int): the feature and the address at which its found.
|
||||||
'''
|
"""
|
||||||
yield NotImplementedError('feature'), NotImplementedError('virtual address')
|
yield NotImplementedError("feature"), NotImplementedError("virtual address")
|
||||||
|
|
||||||
|
|
||||||
def _bb_has_tight_loop(f, bb):
|
def _bb_has_tight_loop(f, bb):
|
||||||
'''
|
"""
|
||||||
parse tight loops, true if last instruction in basic block branches to bb start
|
parse tight loops, true if last instruction in basic block branches to bb start
|
||||||
'''
|
"""
|
||||||
if len(bb.instructions) > 0:
|
if len(bb.instructions) > 0:
|
||||||
for bva, bflags in bb.instructions[-1].getBranches():
|
for bva, bflags in bb.instructions[-1].getBranches():
|
||||||
if bflags & vivisect.envi.BR_COND:
|
if bflags & vivisect.envi.BR_COND:
|
||||||
@@ -37,16 +37,16 @@ def _bb_has_tight_loop(f, bb):
|
|||||||
|
|
||||||
|
|
||||||
def extract_bb_tight_loop(f, bb):
|
def extract_bb_tight_loop(f, bb):
|
||||||
''' check basic block for tight loop indicators '''
|
""" check basic block for tight loop indicators """
|
||||||
if _bb_has_tight_loop(f, bb):
|
if _bb_has_tight_loop(f, bb):
|
||||||
yield Characteristic('tight loop', True), bb.va
|
yield Characteristic("tight loop", True), bb.va
|
||||||
|
|
||||||
|
|
||||||
def _bb_has_stackstring(f, bb):
|
def _bb_has_stackstring(f, bb):
|
||||||
'''
|
"""
|
||||||
extract potential stackstring creation, using the following heuristics:
|
extract potential stackstring creation, using the following heuristics:
|
||||||
- basic block contains enough moves of constant bytes to the stack
|
- basic block contains enough moves of constant bytes to the stack
|
||||||
'''
|
"""
|
||||||
count = 0
|
count = 0
|
||||||
for instr in bb.instructions:
|
for instr in bb.instructions:
|
||||||
if is_mov_imm_to_stack(instr):
|
if is_mov_imm_to_stack(instr):
|
||||||
@@ -60,16 +60,16 @@ def _bb_has_stackstring(f, bb):
|
|||||||
|
|
||||||
|
|
||||||
def extract_stackstring(f, bb):
|
def extract_stackstring(f, bb):
|
||||||
''' check basic block for stackstring indicators '''
|
""" check basic block for stackstring indicators """
|
||||||
if _bb_has_stackstring(f, bb):
|
if _bb_has_stackstring(f, bb):
|
||||||
yield Characteristic('stack string', True), bb.va
|
yield Characteristic("stack string", True), bb.va
|
||||||
|
|
||||||
|
|
||||||
def is_mov_imm_to_stack(instr):
|
def is_mov_imm_to_stack(instr):
|
||||||
'''
|
"""
|
||||||
Return if instruction moves immediate onto stack
|
Return if instruction moves immediate onto stack
|
||||||
'''
|
"""
|
||||||
if not instr.mnem.startswith('mov'):
|
if not instr.mnem.startswith("mov"):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -82,32 +82,33 @@ def is_mov_imm_to_stack(instr):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
# TODO what about 64-bit operands?
|
# TODO what about 64-bit operands?
|
||||||
if not isinstance(dst, envi.archs.i386.disasm.i386SibOper) and \
|
if not isinstance(dst, envi.archs.i386.disasm.i386SibOper) and not isinstance(
|
||||||
not isinstance(dst, envi.archs.i386.disasm.i386RegMemOper):
|
dst, envi.archs.i386.disasm.i386RegMemOper
|
||||||
|
):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if not dst.reg:
|
if not dst.reg:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
rname = dst._dis_regctx.getRegisterName(dst.reg)
|
rname = dst._dis_regctx.getRegisterName(dst.reg)
|
||||||
if rname not in ['ebp', 'rbp', 'esp', 'rsp']:
|
if rname not in ["ebp", "rbp", "esp", "rsp"]:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def get_printable_len(oper):
|
def get_printable_len(oper):
|
||||||
'''
|
"""
|
||||||
Return string length if all operand bytes are ascii or utf16-le printable
|
Return string length if all operand bytes are ascii or utf16-le printable
|
||||||
'''
|
"""
|
||||||
if oper.tsize == 1:
|
if oper.tsize == 1:
|
||||||
chars = struct.pack('<B', oper.imm)
|
chars = struct.pack("<B", oper.imm)
|
||||||
elif oper.tsize == 2:
|
elif oper.tsize == 2:
|
||||||
chars = struct.pack('<H', oper.imm)
|
chars = struct.pack("<H", oper.imm)
|
||||||
elif oper.tsize == 4:
|
elif oper.tsize == 4:
|
||||||
chars = struct.pack('<I', oper.imm)
|
chars = struct.pack("<I", oper.imm)
|
||||||
elif oper.tsize == 8:
|
elif oper.tsize == 8:
|
||||||
chars = struct.pack('<Q', oper.imm)
|
chars = struct.pack("<Q", oper.imm)
|
||||||
if is_printable_ascii(chars):
|
if is_printable_ascii(chars):
|
||||||
return oper.tsize
|
return oper.tsize
|
||||||
if is_printable_utf16le(chars):
|
if is_printable_utf16le(chars):
|
||||||
@@ -120,12 +121,12 @@ def is_printable_ascii(chars):
|
|||||||
|
|
||||||
|
|
||||||
def is_printable_utf16le(chars):
|
def is_printable_utf16le(chars):
|
||||||
if all(c == '\x00' for c in chars[1::2]):
|
if all(c == "\x00" for c in chars[1::2]):
|
||||||
return is_printable_ascii(chars[::2])
|
return is_printable_ascii(chars[::2])
|
||||||
|
|
||||||
|
|
||||||
def extract_features(f, bb):
|
def extract_features(f, bb):
|
||||||
'''
|
"""
|
||||||
extract features from the given basic block.
|
extract features from the given basic block.
|
||||||
|
|
||||||
args:
|
args:
|
||||||
@@ -134,7 +135,7 @@ def extract_features(f, bb):
|
|||||||
|
|
||||||
yields:
|
yields:
|
||||||
Feature, set[VA]: the features and their location found in this basic block.
|
Feature, set[VA]: the features and their location found in this basic block.
|
||||||
'''
|
"""
|
||||||
yield BasicBlock(), bb.va
|
yield BasicBlock(), bb.va
|
||||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||||
for feature, va in bb_handler(f, bb):
|
for feature, va in bb_handler(f, bb):
|
||||||
|
|||||||
@@ -9,11 +9,11 @@ import capa.features.extractors.strings
|
|||||||
|
|
||||||
|
|
||||||
def extract_file_embedded_pe(vw, file_path):
|
def extract_file_embedded_pe(vw, file_path):
|
||||||
with open(file_path, 'rb') as f:
|
with open(file_path, "rb") as f:
|
||||||
fbytes = f.read()
|
fbytes = f.read()
|
||||||
|
|
||||||
for offset, i in pe_carve.carve(fbytes, 1):
|
for offset, i in pe_carve.carve(fbytes, 1):
|
||||||
yield Characteristic('embedded pe', True), offset
|
yield Characteristic("embedded pe", True), offset
|
||||||
|
|
||||||
|
|
||||||
def extract_file_export_names(vw, file_path):
|
def extract_file_export_names(vw, file_path):
|
||||||
@@ -22,21 +22,21 @@ def extract_file_export_names(vw, file_path):
|
|||||||
|
|
||||||
|
|
||||||
def extract_file_import_names(vw, file_path):
|
def extract_file_import_names(vw, file_path):
|
||||||
'''
|
"""
|
||||||
extract imported function names
|
extract imported function names
|
||||||
1. imports by ordinal:
|
1. imports by ordinal:
|
||||||
- modulename.#ordinal
|
- modulename.#ordinal
|
||||||
2. imports by name, results in two features to support importname-only matching:
|
2. imports by name, results in two features to support importname-only matching:
|
||||||
- modulename.importname
|
- modulename.importname
|
||||||
- importname
|
- importname
|
||||||
'''
|
"""
|
||||||
for va, _, _, tinfo in vw.getImports():
|
for va, _, _, tinfo in vw.getImports():
|
||||||
# vivisect source: tinfo = "%s.%s" % (libname, impname)
|
# vivisect source: tinfo = "%s.%s" % (libname, impname)
|
||||||
modname, impname = tinfo.split('.')
|
modname, impname = tinfo.split(".")
|
||||||
if is_viv_ord_impname(impname):
|
if is_viv_ord_impname(impname):
|
||||||
# replace ord prefix with #
|
# replace ord prefix with #
|
||||||
impname = '#%s' % impname[len('ord'):]
|
impname = "#%s" % impname[len("ord") :]
|
||||||
tinfo = '%s.%s' % (modname, impname)
|
tinfo = "%s.%s" % (modname, impname)
|
||||||
yield Import(tinfo), va
|
yield Import(tinfo), va
|
||||||
else:
|
else:
|
||||||
yield Import(tinfo), va
|
yield Import(tinfo), va
|
||||||
@@ -44,13 +44,13 @@ def extract_file_import_names(vw, file_path):
|
|||||||
|
|
||||||
|
|
||||||
def is_viv_ord_impname(impname):
|
def is_viv_ord_impname(impname):
|
||||||
'''
|
"""
|
||||||
return if import name matches vivisect's ordinal naming scheme `'ord%d' % ord`
|
return if import name matches vivisect's ordinal naming scheme `'ord%d' % ord`
|
||||||
'''
|
"""
|
||||||
if not impname.startswith('ord'):
|
if not impname.startswith("ord"):
|
||||||
return False
|
return False
|
||||||
try:
|
try:
|
||||||
int(impname[len('ord'):])
|
int(impname[len("ord") :])
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
@@ -63,10 +63,10 @@ def extract_file_section_names(vw, file_path):
|
|||||||
|
|
||||||
|
|
||||||
def extract_file_strings(vw, file_path):
|
def extract_file_strings(vw, file_path):
|
||||||
'''
|
"""
|
||||||
extract ASCII and UTF-16 LE strings from file
|
extract ASCII and UTF-16 LE strings from file
|
||||||
'''
|
"""
|
||||||
with open(file_path, 'rb') as f:
|
with open(file_path, "rb") as f:
|
||||||
b = f.read()
|
b = f.read()
|
||||||
|
|
||||||
for s in capa.features.extractors.strings.extract_ascii_strings(b):
|
for s in capa.features.extractors.strings.extract_ascii_strings(b):
|
||||||
@@ -77,7 +77,7 @@ def extract_file_strings(vw, file_path):
|
|||||||
|
|
||||||
|
|
||||||
def extract_features(vw, file_path):
|
def extract_features(vw, file_path):
|
||||||
'''
|
"""
|
||||||
extract file features from given workspace
|
extract file features from given workspace
|
||||||
|
|
||||||
args:
|
args:
|
||||||
@@ -86,7 +86,7 @@ def extract_features(vw, file_path):
|
|||||||
|
|
||||||
yields:
|
yields:
|
||||||
Tuple[Feature, VA]: a feature and its location.
|
Tuple[Feature, VA]: a feature and its location.
|
||||||
'''
|
"""
|
||||||
|
|
||||||
for file_handler in FILE_HANDLERS:
|
for file_handler in FILE_HANDLERS:
|
||||||
for feature, va in file_handler(vw, file_path):
|
for feature, va in file_handler(vw, file_path):
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ from capa.features.extractors import loops
|
|||||||
|
|
||||||
|
|
||||||
def interface_extract_function_XXX(f):
|
def interface_extract_function_XXX(f):
|
||||||
'''
|
"""
|
||||||
parse features from the given function.
|
parse features from the given function.
|
||||||
|
|
||||||
args:
|
args:
|
||||||
@@ -13,58 +13,58 @@ def interface_extract_function_XXX(f):
|
|||||||
|
|
||||||
yields:
|
yields:
|
||||||
(Feature, int): the feature and the address at which its found.
|
(Feature, int): the feature and the address at which its found.
|
||||||
'''
|
"""
|
||||||
yield NotImplementedError('feature'), NotImplementedError('virtual address')
|
yield NotImplementedError("feature"), NotImplementedError("virtual address")
|
||||||
|
|
||||||
|
|
||||||
def get_switches(vw):
|
def get_switches(vw):
|
||||||
'''
|
"""
|
||||||
caching accessor to vivisect workspace switch constructs.
|
caching accessor to vivisect workspace switch constructs.
|
||||||
'''
|
"""
|
||||||
if 'switches' in vw.metadata:
|
if "switches" in vw.metadata:
|
||||||
return vw.metadata['switches']
|
return vw.metadata["switches"]
|
||||||
else:
|
else:
|
||||||
# addresses of switches in the program
|
# addresses of switches in the program
|
||||||
switches = set()
|
switches = set()
|
||||||
|
|
||||||
for case_va, _ in filter(lambda t: 'case' in t[1], vw.getNames()):
|
for case_va, _ in filter(lambda t: "case" in t[1], vw.getNames()):
|
||||||
# assume that the xref to a case location is a switch construct
|
# assume that the xref to a case location is a switch construct
|
||||||
for switch_va, _, _, _ in vw.getXrefsTo(case_va):
|
for switch_va, _, _, _ in vw.getXrefsTo(case_va):
|
||||||
switches.add(switch_va)
|
switches.add(switch_va)
|
||||||
|
|
||||||
vw.metadata['switches'] = switches
|
vw.metadata["switches"] = switches
|
||||||
return switches
|
return switches
|
||||||
|
|
||||||
|
|
||||||
def get_functions_with_switch(vw):
|
def get_functions_with_switch(vw):
|
||||||
if 'functions_with_switch' in vw.metadata:
|
if "functions_with_switch" in vw.metadata:
|
||||||
return vw.metadata['functions_with_switch']
|
return vw.metadata["functions_with_switch"]
|
||||||
else:
|
else:
|
||||||
functions = set()
|
functions = set()
|
||||||
for switch in get_switches(vw):
|
for switch in get_switches(vw):
|
||||||
functions.add(vw.getFunction(switch))
|
functions.add(vw.getFunction(switch))
|
||||||
vw.metadata['functions_with_switch'] = functions
|
vw.metadata["functions_with_switch"] = functions
|
||||||
return functions
|
return functions
|
||||||
|
|
||||||
|
|
||||||
def extract_function_switch(f):
|
def extract_function_switch(f):
|
||||||
'''
|
"""
|
||||||
parse if a function contains a switch statement based on location names
|
parse if a function contains a switch statement based on location names
|
||||||
method can be optimized
|
method can be optimized
|
||||||
'''
|
"""
|
||||||
if f.va in get_functions_with_switch(f.vw):
|
if f.va in get_functions_with_switch(f.vw):
|
||||||
yield Characteristic('switch', True), f.va
|
yield Characteristic("switch", True), f.va
|
||||||
|
|
||||||
|
|
||||||
def extract_function_calls_to(f):
|
def extract_function_calls_to(f):
|
||||||
for src, _, _, _ in f.vw.getXrefsTo(f.va, rtype=vivisect.const.REF_CODE):
|
for src, _, _, _ in f.vw.getXrefsTo(f.va, rtype=vivisect.const.REF_CODE):
|
||||||
yield Characteristic('calls to', True), src
|
yield Characteristic("calls to", True), src
|
||||||
|
|
||||||
|
|
||||||
def extract_function_loop(f):
|
def extract_function_loop(f):
|
||||||
'''
|
"""
|
||||||
parse if a function has a loop
|
parse if a function has a loop
|
||||||
'''
|
"""
|
||||||
edges = []
|
edges = []
|
||||||
|
|
||||||
for bb in f.basic_blocks:
|
for bb in f.basic_blocks:
|
||||||
@@ -74,11 +74,11 @@ def extract_function_loop(f):
|
|||||||
edges.append((bb.va, bva))
|
edges.append((bb.va, bva))
|
||||||
|
|
||||||
if edges and loops.has_loop(edges):
|
if edges and loops.has_loop(edges):
|
||||||
yield Characteristic('loop', True), f.va
|
yield Characteristic("loop", True), f.va
|
||||||
|
|
||||||
|
|
||||||
def extract_features(f):
|
def extract_features(f):
|
||||||
'''
|
"""
|
||||||
extract features from the given function.
|
extract features from the given function.
|
||||||
|
|
||||||
args:
|
args:
|
||||||
@@ -86,14 +86,10 @@ def extract_features(f):
|
|||||||
|
|
||||||
yields:
|
yields:
|
||||||
Feature, set[VA]: the features and their location found in this function.
|
Feature, set[VA]: the features and their location found in this function.
|
||||||
'''
|
"""
|
||||||
for func_handler in FUNCTION_HANDLERS:
|
for func_handler in FUNCTION_HANDLERS:
|
||||||
for feature, va in func_handler(f):
|
for feature, va in func_handler(f):
|
||||||
yield feature, va
|
yield feature, va
|
||||||
|
|
||||||
|
|
||||||
FUNCTION_HANDLERS = (
|
FUNCTION_HANDLERS = (extract_function_switch, extract_function_calls_to, extract_function_loop)
|
||||||
extract_function_switch,
|
|
||||||
extract_function_calls_to,
|
|
||||||
extract_function_loop
|
|
||||||
)
|
|
||||||
|
|||||||
@@ -14,13 +14,13 @@ Amd64RipRelOper = envi.archs.amd64.disasm.Amd64RipRelOper
|
|||||||
LOC_OP = vivisect.const.LOC_OP
|
LOC_OP = vivisect.const.LOC_OP
|
||||||
IF_NOFALL = envi.IF_NOFALL
|
IF_NOFALL = envi.IF_NOFALL
|
||||||
REF_CODE = vivisect.const.REF_CODE
|
REF_CODE = vivisect.const.REF_CODE
|
||||||
FAR_BRANCH_MASK = (envi.BR_PROC | envi.BR_DEREF | envi.BR_ARCH)
|
FAR_BRANCH_MASK = envi.BR_PROC | envi.BR_DEREF | envi.BR_ARCH
|
||||||
|
|
||||||
DESTRUCTIVE_MNEMONICS = ('mov', 'lea', 'pop', 'xor')
|
DESTRUCTIVE_MNEMONICS = ("mov", "lea", "pop", "xor")
|
||||||
|
|
||||||
|
|
||||||
def get_previous_instructions(vw, va):
|
def get_previous_instructions(vw, va):
|
||||||
'''
|
"""
|
||||||
collect the instructions that flow to the given address, local to the current function.
|
collect the instructions that flow to the given address, local to the current function.
|
||||||
|
|
||||||
args:
|
args:
|
||||||
@@ -29,7 +29,7 @@ def get_previous_instructions(vw, va):
|
|||||||
|
|
||||||
returns:
|
returns:
|
||||||
List[int]: the prior instructions, which may fallthrough and/or jump here
|
List[int]: the prior instructions, which may fallthrough and/or jump here
|
||||||
'''
|
"""
|
||||||
ret = []
|
ret = []
|
||||||
|
|
||||||
# find the immediate prior instruction.
|
# find the immediate prior instruction.
|
||||||
@@ -61,7 +61,7 @@ class NotFoundError(Exception):
|
|||||||
|
|
||||||
|
|
||||||
def find_definition(vw, va, reg):
|
def find_definition(vw, va, reg):
|
||||||
'''
|
"""
|
||||||
scan backwards from the given address looking for assignments to the given register.
|
scan backwards from the given address looking for assignments to the given register.
|
||||||
if a constant, return that value.
|
if a constant, return that value.
|
||||||
|
|
||||||
@@ -75,7 +75,7 @@ def find_definition(vw, va, reg):
|
|||||||
|
|
||||||
raises:
|
raises:
|
||||||
NotFoundError: when the definition cannot be found.
|
NotFoundError: when the definition cannot be found.
|
||||||
'''
|
"""
|
||||||
q = collections.deque()
|
q = collections.deque()
|
||||||
seen = set([])
|
seen = set([])
|
||||||
|
|
||||||
@@ -95,10 +95,7 @@ def find_definition(vw, va, reg):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
opnd0 = insn.opers[0]
|
opnd0 = insn.opers[0]
|
||||||
if not \
|
if not (isinstance(opnd0, i386RegOper) and opnd0.reg == reg and insn.mnem in DESTRUCTIVE_MNEMONICS):
|
||||||
(isinstance(opnd0, i386RegOper)
|
|
||||||
and opnd0.reg == reg
|
|
||||||
and insn.mnem in DESTRUCTIVE_MNEMONICS):
|
|
||||||
q.extend(get_previous_instructions(vw, cur))
|
q.extend(get_previous_instructions(vw, cur))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -107,7 +104,7 @@ def find_definition(vw, va, reg):
|
|||||||
# we currently only support extracting the constant from something like: `mov $reg, IAT`
|
# we currently only support extracting the constant from something like: `mov $reg, IAT`
|
||||||
# so, any other pattern results in an unknown value, represented by None.
|
# so, any other pattern results in an unknown value, represented by None.
|
||||||
# this is a good place to extend in the future, if we need more robust support.
|
# this is a good place to extend in the future, if we need more robust support.
|
||||||
if insn.mnem != 'mov':
|
if insn.mnem != "mov":
|
||||||
return (cur, None)
|
return (cur, None)
|
||||||
else:
|
else:
|
||||||
opnd1 = insn.opers[1]
|
opnd1 = insn.opers[1]
|
||||||
@@ -128,12 +125,11 @@ def is_indirect_call(vw, va, insn=None):
|
|||||||
if insn is None:
|
if insn is None:
|
||||||
insn = vw.parseOpcode(va)
|
insn = vw.parseOpcode(va)
|
||||||
|
|
||||||
return (insn.mnem == 'call'
|
return insn.mnem == "call" and isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper)
|
||||||
and isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper))
|
|
||||||
|
|
||||||
|
|
||||||
def resolve_indirect_call(vw, va, insn=None):
|
def resolve_indirect_call(vw, va, insn=None):
|
||||||
'''
|
"""
|
||||||
inspect the given indirect call instruction and attempt to resolve the target address.
|
inspect the given indirect call instruction and attempt to resolve the target address.
|
||||||
|
|
||||||
args:
|
args:
|
||||||
@@ -145,7 +141,7 @@ def resolve_indirect_call(vw, va, insn=None):
|
|||||||
|
|
||||||
raises:
|
raises:
|
||||||
NotFoundError: when the definition cannot be found.
|
NotFoundError: when the definition cannot be found.
|
||||||
'''
|
"""
|
||||||
if insn is None:
|
if insn is None:
|
||||||
insn = vw.parseOpcode(va)
|
insn = vw.parseOpcode(va)
|
||||||
|
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ from capa.features.extractors.viv.indirect_calls import resolve_indirect_call
|
|||||||
|
|
||||||
|
|
||||||
def interface_extract_instruction_XXX(f, bb, insn):
|
def interface_extract_instruction_XXX(f, bb, insn):
|
||||||
'''
|
"""
|
||||||
parse features from the given instruction.
|
parse features from the given instruction.
|
||||||
|
|
||||||
args:
|
args:
|
||||||
@@ -25,31 +25,31 @@ def interface_extract_instruction_XXX(f, bb, insn):
|
|||||||
|
|
||||||
yields:
|
yields:
|
||||||
(Feature, int): the feature and the address at which its found.
|
(Feature, int): the feature and the address at which its found.
|
||||||
'''
|
"""
|
||||||
yield NotImplementedError('feature'), NotImplementedError('virtual address')
|
yield NotImplementedError("feature"), NotImplementedError("virtual address")
|
||||||
|
|
||||||
|
|
||||||
def get_imports(vw):
|
def get_imports(vw):
|
||||||
'''
|
"""
|
||||||
caching accessor to vivisect workspace imports
|
caching accessor to vivisect workspace imports
|
||||||
avoids performance issues in vivisect when collecting locations
|
avoids performance issues in vivisect when collecting locations
|
||||||
'''
|
"""
|
||||||
if 'imports' in vw.metadata:
|
if "imports" in vw.metadata:
|
||||||
return vw.metadata['imports']
|
return vw.metadata["imports"]
|
||||||
else:
|
else:
|
||||||
imports = {p[0]: p[3] for p in vw.getImports()}
|
imports = {p[0]: p[3] for p in vw.getImports()}
|
||||||
vw.metadata['imports'] = imports
|
vw.metadata["imports"] = imports
|
||||||
return imports
|
return imports
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_api_features(f, bb, insn):
|
def extract_insn_api_features(f, bb, insn):
|
||||||
'''parse API features from the given instruction.'''
|
"""parse API features from the given instruction."""
|
||||||
|
|
||||||
# example:
|
# example:
|
||||||
#
|
#
|
||||||
# call dword [0x00473038]
|
# call dword [0x00473038]
|
||||||
|
|
||||||
if insn.mnem != 'call':
|
if insn.mnem != "call":
|
||||||
return
|
return
|
||||||
|
|
||||||
# traditional call via IAT
|
# traditional call via IAT
|
||||||
@@ -71,7 +71,7 @@ def extract_insn_api_features(f, bb, insn):
|
|||||||
target = insn.opers[0].getOperValue(insn)
|
target = insn.opers[0].getOperValue(insn)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
thunk = f.vw.getFunctionMeta(target, 'Thunk')
|
thunk = f.vw.getFunctionMeta(target, "Thunk")
|
||||||
except vivisect.exc.InvalidFunction:
|
except vivisect.exc.InvalidFunction:
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
@@ -108,7 +108,7 @@ def extract_insn_api_features(f, bb, insn):
|
|||||||
|
|
||||||
|
|
||||||
def extract_insn_number_features(f, bb, insn):
|
def extract_insn_number_features(f, bb, insn):
|
||||||
'''parse number features from the given instruction.'''
|
"""parse number features from the given instruction."""
|
||||||
# example:
|
# example:
|
||||||
#
|
#
|
||||||
# push 3136B0h ; dwControlCode
|
# push 3136B0h ; dwControlCode
|
||||||
@@ -124,9 +124,7 @@ def extract_insn_number_features(f, bb, insn):
|
|||||||
# assume its not also a constant.
|
# assume its not also a constant.
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if insn.mnem == 'add' \
|
if insn.mnem == "add" and insn.opers[0].isReg() and insn.opers[0].reg == envi.archs.i386.disasm.REG_ESP:
|
||||||
and insn.opers[0].isReg() \
|
|
||||||
and insn.opers[0].reg == envi.archs.i386.disasm.REG_ESP:
|
|
||||||
# skip things like:
|
# skip things like:
|
||||||
#
|
#
|
||||||
# .text:00401140 call sub_407E2B
|
# .text:00401140 call sub_407E2B
|
||||||
@@ -137,13 +135,13 @@ def extract_insn_number_features(f, bb, insn):
|
|||||||
|
|
||||||
|
|
||||||
def extract_insn_bytes_features(f, bb, insn):
|
def extract_insn_bytes_features(f, bb, insn):
|
||||||
'''
|
"""
|
||||||
parse byte sequence features from the given instruction.
|
parse byte sequence features from the given instruction.
|
||||||
example:
|
example:
|
||||||
# push offset iid_004118d4_IShellLinkA ; riid
|
# push offset iid_004118d4_IShellLinkA ; riid
|
||||||
'''
|
"""
|
||||||
for oper in insn.opers:
|
for oper in insn.opers:
|
||||||
if insn.mnem == 'call':
|
if insn.mnem == "call":
|
||||||
# ignore call instructions
|
# ignore call instructions
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -184,7 +182,7 @@ def read_string(vw, offset):
|
|||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
if alen > 0:
|
if alen > 0:
|
||||||
return vw.readMemory(offset, alen).decode('utf-8')
|
return vw.readMemory(offset, alen).decode("utf-8")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
ulen = vw.detectUnicode(offset)
|
ulen = vw.detectUnicode(offset)
|
||||||
@@ -199,13 +197,13 @@ def read_string(vw, offset):
|
|||||||
# vivisect seems to mis-detect the end unicode strings
|
# vivisect seems to mis-detect the end unicode strings
|
||||||
# off by one, too short
|
# off by one, too short
|
||||||
ulen += 1
|
ulen += 1
|
||||||
return vw.readMemory(offset, ulen).decode('utf-16')
|
return vw.readMemory(offset, ulen).decode("utf-16")
|
||||||
|
|
||||||
raise ValueError('not a string', offset)
|
raise ValueError("not a string", offset)
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_string_features(f, bb, insn):
|
def extract_insn_string_features(f, bb, insn):
|
||||||
'''parse string features from the given instruction.'''
|
"""parse string features from the given instruction."""
|
||||||
# example:
|
# example:
|
||||||
#
|
#
|
||||||
# push offset aAcr ; "ACR > "
|
# push offset aAcr ; "ACR > "
|
||||||
@@ -222,11 +220,11 @@ def extract_insn_string_features(f, bb, insn):
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
yield String(s.rstrip('\x00')), insn.va
|
yield String(s.rstrip("\x00")), insn.va
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_offset_features(f, bb, insn):
|
def extract_insn_offset_features(f, bb, insn):
|
||||||
'''parse structure offset features from the given instruction.'''
|
"""parse structure offset features from the given instruction."""
|
||||||
# example:
|
# example:
|
||||||
#
|
#
|
||||||
# .text:0040112F cmp [esi+4], ebx
|
# .text:0040112F cmp [esi+4], ebx
|
||||||
@@ -249,15 +247,18 @@ def extract_insn_offset_features(f, bb, insn):
|
|||||||
|
|
||||||
|
|
||||||
def is_security_cookie(f, bb, insn):
|
def is_security_cookie(f, bb, insn):
|
||||||
'''
|
"""
|
||||||
check if an instruction is related to security cookie checks
|
check if an instruction is related to security cookie checks
|
||||||
'''
|
"""
|
||||||
# security cookie check should use SP or BP
|
# security cookie check should use SP or BP
|
||||||
oper = insn.opers[1]
|
oper = insn.opers[1]
|
||||||
if oper.isReg() \
|
if oper.isReg() and oper.reg not in [
|
||||||
and oper.reg not in [envi.archs.i386.disasm.REG_ESP, envi.archs.i386.disasm.REG_EBP,
|
envi.archs.i386.disasm.REG_ESP,
|
||||||
# TODO: do x64 support for real.
|
envi.archs.i386.disasm.REG_EBP,
|
||||||
envi.archs.amd64.disasm.REG_RBP, envi.archs.amd64.disasm.REG_RSP]:
|
# TODO: do x64 support for real.
|
||||||
|
envi.archs.amd64.disasm.REG_RBP,
|
||||||
|
envi.archs.amd64.disasm.REG_RSP,
|
||||||
|
]:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# expect security cookie init in first basic block within first bytes (instructions)
|
# expect security cookie init in first basic block within first bytes (instructions)
|
||||||
@@ -273,11 +274,11 @@ def is_security_cookie(f, bb, insn):
|
|||||||
|
|
||||||
|
|
||||||
def extract_insn_nzxor_characteristic_features(f, bb, insn):
|
def extract_insn_nzxor_characteristic_features(f, bb, insn):
|
||||||
'''
|
"""
|
||||||
parse non-zeroing XOR instruction from the given instruction.
|
parse non-zeroing XOR instruction from the given instruction.
|
||||||
ignore expected non-zeroing XORs, e.g. security cookies.
|
ignore expected non-zeroing XORs, e.g. security cookies.
|
||||||
'''
|
"""
|
||||||
if insn.mnem != 'xor':
|
if insn.mnem != "xor":
|
||||||
return
|
return
|
||||||
|
|
||||||
if insn.opers[0] == insn.opers[1]:
|
if insn.opers[0] == insn.opers[1]:
|
||||||
@@ -286,24 +287,24 @@ def extract_insn_nzxor_characteristic_features(f, bb, insn):
|
|||||||
if is_security_cookie(f, bb, insn):
|
if is_security_cookie(f, bb, insn):
|
||||||
return
|
return
|
||||||
|
|
||||||
yield Characteristic('nzxor', True), insn.va
|
yield Characteristic("nzxor", True), insn.va
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_mnemonic_features(f, bb, insn):
|
def extract_insn_mnemonic_features(f, bb, insn):
|
||||||
'''parse mnemonic features from the given instruction.'''
|
"""parse mnemonic features from the given instruction."""
|
||||||
yield Mnemonic(insn.mnem), insn.va
|
yield Mnemonic(insn.mnem), insn.va
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_peb_access_characteristic_features(f, bb, insn):
|
def extract_insn_peb_access_characteristic_features(f, bb, insn):
|
||||||
'''
|
"""
|
||||||
parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64
|
parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64
|
||||||
'''
|
"""
|
||||||
# TODO handle where fs/gs are loaded into a register or onto the stack and used later
|
# TODO handle where fs/gs are loaded into a register or onto the stack and used later
|
||||||
|
|
||||||
if insn.mnem not in ['push', 'mov']:
|
if insn.mnem not in ["push", "mov"]:
|
||||||
return
|
return
|
||||||
|
|
||||||
if 'fs' in insn.getPrefixName():
|
if "fs" in insn.getPrefixName():
|
||||||
for oper in insn.opers:
|
for oper in insn.opers:
|
||||||
# examples
|
# examples
|
||||||
#
|
#
|
||||||
@@ -312,27 +313,29 @@ def extract_insn_peb_access_characteristic_features(f, bb, insn):
|
|||||||
# IDA: push large dword ptr fs:30h
|
# IDA: push large dword ptr fs:30h
|
||||||
# viv: fs: push dword [0x00000030]
|
# viv: fs: push dword [0x00000030]
|
||||||
# fs: push dword [eax + 0x30] ; i386RegMemOper, with eax = 0
|
# fs: push dword [eax + 0x30] ; i386RegMemOper, with eax = 0
|
||||||
if (isinstance(oper, envi.archs.i386.disasm.i386RegMemOper) and oper.disp == 0x30) or \
|
if (isinstance(oper, envi.archs.i386.disasm.i386RegMemOper) and oper.disp == 0x30) or (
|
||||||
(isinstance(oper, envi.archs.i386.disasm.i386ImmMemOper) and oper.imm == 0x30):
|
isinstance(oper, envi.archs.i386.disasm.i386ImmMemOper) and oper.imm == 0x30
|
||||||
yield Characteristic('peb access', True), insn.va
|
):
|
||||||
elif 'gs' in insn.getPrefixName():
|
yield Characteristic("peb access", True), insn.va
|
||||||
|
elif "gs" in insn.getPrefixName():
|
||||||
for oper in insn.opers:
|
for oper in insn.opers:
|
||||||
if (isinstance(oper, envi.archs.amd64.disasm.i386RegMemOper) and oper.disp == 0x60) or \
|
if (isinstance(oper, envi.archs.amd64.disasm.i386RegMemOper) and oper.disp == 0x60) or (
|
||||||
(isinstance(oper, envi.archs.amd64.disasm.i386ImmMemOper) and oper.imm == 0x60):
|
isinstance(oper, envi.archs.amd64.disasm.i386ImmMemOper) and oper.imm == 0x60
|
||||||
yield Characteristic('peb access', True), insn.va
|
):
|
||||||
|
yield Characteristic("peb access", True), insn.va
|
||||||
else:
|
else:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_segment_access_features(f, bb, insn):
|
def extract_insn_segment_access_features(f, bb, insn):
|
||||||
''' parse the instruction for access to fs or gs '''
|
""" parse the instruction for access to fs or gs """
|
||||||
prefix = insn.getPrefixName()
|
prefix = insn.getPrefixName()
|
||||||
|
|
||||||
if prefix == 'fs':
|
if prefix == "fs":
|
||||||
yield Characteristic('fs access', True), insn.va
|
yield Characteristic("fs access", True), insn.va
|
||||||
|
|
||||||
if prefix == 'gs':
|
if prefix == "gs":
|
||||||
yield Characteristic('gs access', True), insn.va
|
yield Characteristic("gs access", True), insn.va
|
||||||
|
|
||||||
|
|
||||||
def get_section(vw, va):
|
def get_section(vw, va):
|
||||||
@@ -344,16 +347,16 @@ def get_section(vw, va):
|
|||||||
|
|
||||||
|
|
||||||
def extract_insn_cross_section_cflow(f, bb, insn):
|
def extract_insn_cross_section_cflow(f, bb, insn):
|
||||||
'''
|
"""
|
||||||
inspect the instruction for a CALL or JMP that crosses section boundaries.
|
inspect the instruction for a CALL or JMP that crosses section boundaries.
|
||||||
'''
|
"""
|
||||||
for va, flags in insn.getBranches():
|
for va, flags in insn.getBranches():
|
||||||
if flags & envi.BR_FALL:
|
if flags & envi.BR_FALL:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# skip 32-bit calls to imports
|
# skip 32-bit calls to imports
|
||||||
if insn.mnem == 'call' and isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper):
|
if insn.mnem == "call" and isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper):
|
||||||
oper = insn.opers[0]
|
oper = insn.opers[0]
|
||||||
target = oper.getOperAddr(insn)
|
target = oper.getOperAddr(insn)
|
||||||
|
|
||||||
@@ -361,7 +364,7 @@ def extract_insn_cross_section_cflow(f, bb, insn):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# skip 64-bit calls to imports
|
# skip 64-bit calls to imports
|
||||||
elif insn.mnem == 'call' and isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper):
|
elif insn.mnem == "call" and isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper):
|
||||||
op = insn.opers[0]
|
op = insn.opers[0]
|
||||||
target = op.getOperAddr(insn)
|
target = op.getOperAddr(insn)
|
||||||
|
|
||||||
@@ -369,7 +372,7 @@ def extract_insn_cross_section_cflow(f, bb, insn):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
if get_section(f.vw, insn.va) != get_section(f.vw, va):
|
if get_section(f.vw, insn.va) != get_section(f.vw, va):
|
||||||
yield Characteristic('cross section flow', True), insn.va
|
yield Characteristic("cross section flow", True), insn.va
|
||||||
|
|
||||||
except KeyError:
|
except KeyError:
|
||||||
continue
|
continue
|
||||||
@@ -378,7 +381,7 @@ def extract_insn_cross_section_cflow(f, bb, insn):
|
|||||||
# this is a feature that's most relevant at the function scope,
|
# this is a feature that's most relevant at the function scope,
|
||||||
# however, its most efficient to extract at the instruction scope.
|
# however, its most efficient to extract at the instruction scope.
|
||||||
def extract_function_calls_from(f, bb, insn):
|
def extract_function_calls_from(f, bb, insn):
|
||||||
if insn.mnem != 'call':
|
if insn.mnem != "call":
|
||||||
return
|
return
|
||||||
|
|
||||||
target = None
|
target = None
|
||||||
@@ -387,7 +390,7 @@ def extract_function_calls_from(f, bb, insn):
|
|||||||
if isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper):
|
if isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper):
|
||||||
oper = insn.opers[0]
|
oper = insn.opers[0]
|
||||||
target = oper.getOperAddr(insn)
|
target = oper.getOperAddr(insn)
|
||||||
yield Characteristic('calls from', True), target
|
yield Characteristic("calls from", True), target
|
||||||
|
|
||||||
# call via thunk on x86,
|
# call via thunk on x86,
|
||||||
# see 9324d1a8ae37a36ae560c37448c9705a at 0x407985
|
# see 9324d1a8ae37a36ae560c37448c9705a at 0x407985
|
||||||
@@ -396,44 +399,44 @@ def extract_function_calls_from(f, bb, insn):
|
|||||||
# see Lab21-01.exe_:0x140001178
|
# see Lab21-01.exe_:0x140001178
|
||||||
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper):
|
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper):
|
||||||
target = insn.opers[0].getOperValue(insn)
|
target = insn.opers[0].getOperValue(insn)
|
||||||
yield Characteristic('calls from', True), target
|
yield Characteristic("calls from", True), target
|
||||||
|
|
||||||
# call via IAT, x64
|
# call via IAT, x64
|
||||||
elif isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper):
|
elif isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper):
|
||||||
op = insn.opers[0]
|
op = insn.opers[0]
|
||||||
target = op.getOperAddr(insn)
|
target = op.getOperAddr(insn)
|
||||||
yield Characteristic('calls from', True), target
|
yield Characteristic("calls from", True), target
|
||||||
|
|
||||||
if target and target == f.va:
|
if target and target == f.va:
|
||||||
# if we found a jump target and it's the function address
|
# if we found a jump target and it's the function address
|
||||||
# mark as recursive
|
# mark as recursive
|
||||||
yield Characteristic('recursive call', True), target
|
yield Characteristic("recursive call", True), target
|
||||||
|
|
||||||
|
|
||||||
# this is a feature that's most relevant at the function or basic block scope,
|
# this is a feature that's most relevant at the function or basic block scope,
|
||||||
# however, its most efficient to extract at the instruction scope.
|
# however, its most efficient to extract at the instruction scope.
|
||||||
def extract_function_indirect_call_characteristic_features(f, bb, insn):
|
def extract_function_indirect_call_characteristic_features(f, bb, insn):
|
||||||
'''
|
"""
|
||||||
extract indirect function call characteristic (e.g., call eax or call dword ptr [edx+4])
|
extract indirect function call characteristic (e.g., call eax or call dword ptr [edx+4])
|
||||||
does not include calls like => call ds:dword_ABD4974
|
does not include calls like => call ds:dword_ABD4974
|
||||||
'''
|
"""
|
||||||
if insn.mnem != 'call':
|
if insn.mnem != "call":
|
||||||
return
|
return
|
||||||
|
|
||||||
# Checks below work for x86 and x64
|
# Checks below work for x86 and x64
|
||||||
if isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper):
|
if isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper):
|
||||||
# call edx
|
# call edx
|
||||||
yield Characteristic('indirect call', True), insn.va
|
yield Characteristic("indirect call", True), insn.va
|
||||||
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegMemOper):
|
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegMemOper):
|
||||||
# call dword ptr [eax+50h]
|
# call dword ptr [eax+50h]
|
||||||
yield Characteristic('indirect call', True), insn.va
|
yield Characteristic("indirect call", True), insn.va
|
||||||
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386SibOper):
|
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386SibOper):
|
||||||
# call qword ptr [rsp+78h]
|
# call qword ptr [rsp+78h]
|
||||||
yield Characteristic('indirect call', True), insn.va
|
yield Characteristic("indirect call", True), insn.va
|
||||||
|
|
||||||
|
|
||||||
def extract_features(f, bb, insn):
|
def extract_features(f, bb, insn):
|
||||||
'''
|
"""
|
||||||
extract features from the given insn.
|
extract features from the given insn.
|
||||||
|
|
||||||
args:
|
args:
|
||||||
@@ -443,7 +446,7 @@ def extract_features(f, bb, insn):
|
|||||||
|
|
||||||
yields:
|
yields:
|
||||||
Feature, set[VA]: the features and their location found in this insn.
|
Feature, set[VA]: the features and their location found in this insn.
|
||||||
'''
|
"""
|
||||||
for insn_handler in INSTRUCTION_HANDLERS:
|
for insn_handler in INSTRUCTION_HANDLERS:
|
||||||
for feature, va in insn_handler(f, bb, insn):
|
for feature, va in insn_handler(f, bb, insn):
|
||||||
yield feature, va
|
yield feature, va
|
||||||
@@ -461,5 +464,5 @@ INSTRUCTION_HANDLERS = (
|
|||||||
extract_insn_cross_section_cflow,
|
extract_insn_cross_section_cflow,
|
||||||
extract_insn_segment_access_features,
|
extract_insn_segment_access_features,
|
||||||
extract_function_calls_from,
|
extract_function_calls_from,
|
||||||
extract_function_indirect_call_characteristic_features
|
extract_function_indirect_call_characteristic_features,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ class Export(Feature):
|
|||||||
self.value = value
|
self.value = value
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return 'Export(%s)' % (self.value)
|
return "Export(%s)" % (self.value)
|
||||||
|
|
||||||
|
|
||||||
class Import(Feature):
|
class Import(Feature):
|
||||||
@@ -18,7 +18,7 @@ class Import(Feature):
|
|||||||
self.value = value
|
self.value = value
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return 'Import(%s)' % (self.value)
|
return "Import(%s)" % (self.value)
|
||||||
|
|
||||||
|
|
||||||
class Section(Feature):
|
class Section(Feature):
|
||||||
@@ -28,4 +28,4 @@ class Section(Feature):
|
|||||||
self.value = value
|
self.value = value
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return 'Section(%s)' % (self.value)
|
return "Section(%s)" % (self.value)
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
'''
|
"""
|
||||||
capa freeze file format: `| capa0000 | + zlib(utf-8(json(...)))`
|
capa freeze file format: `| capa0000 | + zlib(utf-8(json(...)))`
|
||||||
|
|
||||||
json format:
|
json format:
|
||||||
@@ -39,7 +39,7 @@ json format:
|
|||||||
],
|
],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
'''
|
"""
|
||||||
import json
|
import json
|
||||||
import zlib
|
import zlib
|
||||||
import logging
|
import logging
|
||||||
@@ -61,10 +61,7 @@ def serialize_feature(feature):
|
|||||||
return feature.freeze_serialize()
|
return feature.freeze_serialize()
|
||||||
|
|
||||||
|
|
||||||
KNOWN_FEATURES = {
|
KNOWN_FEATURES = {F.__name__: F for F in capa.features.Feature.__subclasses__()}
|
||||||
F.__name__: F
|
|
||||||
for F in capa.features.Feature.__subclasses__()
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def deserialize_feature(doc):
|
def deserialize_feature(doc):
|
||||||
@@ -73,7 +70,7 @@ def deserialize_feature(doc):
|
|||||||
|
|
||||||
|
|
||||||
def dumps(extractor):
|
def dumps(extractor):
|
||||||
'''
|
"""
|
||||||
serialize the given extractor to a string
|
serialize the given extractor to a string
|
||||||
|
|
||||||
args:
|
args:
|
||||||
@@ -81,79 +78,64 @@ def dumps(extractor):
|
|||||||
|
|
||||||
returns:
|
returns:
|
||||||
str: the serialized features.
|
str: the serialized features.
|
||||||
'''
|
"""
|
||||||
ret = {
|
ret = {"version": 1, "functions": {}, "scopes": {"file": [], "function": [], "basic block": [], "instruction": [],}}
|
||||||
'version': 1,
|
|
||||||
'functions': {},
|
|
||||||
'scopes': {
|
|
||||||
'file': [],
|
|
||||||
'function': [],
|
|
||||||
'basic block': [],
|
|
||||||
'instruction': [],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for feature, va in extractor.extract_file_features():
|
for feature, va in extractor.extract_file_features():
|
||||||
ret['scopes']['file'].append(
|
ret["scopes"]["file"].append(serialize_feature(feature) + (hex(va), ()))
|
||||||
serialize_feature(feature) + (hex(va), ())
|
|
||||||
)
|
|
||||||
|
|
||||||
for f in extractor.get_functions():
|
for f in extractor.get_functions():
|
||||||
ret['functions'][hex(f)] = {}
|
ret["functions"][hex(f)] = {}
|
||||||
|
|
||||||
for feature, va in extractor.extract_function_features(f):
|
for feature, va in extractor.extract_function_features(f):
|
||||||
ret['scopes']['function'].append(
|
ret["scopes"]["function"].append(serialize_feature(feature) + (hex(va), (hex(f),)))
|
||||||
serialize_feature(feature) + (hex(va), (hex(f), ))
|
|
||||||
)
|
|
||||||
|
|
||||||
for bb in extractor.get_basic_blocks(f):
|
for bb in extractor.get_basic_blocks(f):
|
||||||
ret['functions'][hex(f)][hex(bb)] = []
|
ret["functions"][hex(f)][hex(bb)] = []
|
||||||
|
|
||||||
for feature, va in extractor.extract_basic_block_features(f, bb):
|
for feature, va in extractor.extract_basic_block_features(f, bb):
|
||||||
ret['scopes']['basic block'].append(
|
ret["scopes"]["basic block"].append(serialize_feature(feature) + (hex(va), (hex(f), hex(bb),)))
|
||||||
serialize_feature(feature) + (hex(va), (hex(f), hex(bb), ))
|
|
||||||
)
|
|
||||||
|
|
||||||
for insn, insnva in sorted([(insn, int(insn)) for insn in extractor.get_instructions(f, bb)]):
|
for insn, insnva in sorted([(insn, int(insn)) for insn in extractor.get_instructions(f, bb)]):
|
||||||
ret['functions'][hex(f)][hex(bb)].append(hex(insnva))
|
ret["functions"][hex(f)][hex(bb)].append(hex(insnva))
|
||||||
|
|
||||||
for feature, va in extractor.extract_insn_features(f, bb, insn):
|
for feature, va in extractor.extract_insn_features(f, bb, insn):
|
||||||
ret['scopes']['instruction'].append(
|
ret["scopes"]["instruction"].append(
|
||||||
serialize_feature(feature) + (hex(va), (hex(f), hex(bb), hex(insnva), ))
|
serialize_feature(feature) + (hex(va), (hex(f), hex(bb), hex(insnva),))
|
||||||
)
|
)
|
||||||
return json.dumps(ret)
|
return json.dumps(ret)
|
||||||
|
|
||||||
|
|
||||||
def loads(s):
|
def loads(s):
|
||||||
'''deserialize a set of features (as a NullFeatureExtractor) from a string.'''
|
"""deserialize a set of features (as a NullFeatureExtractor) from a string."""
|
||||||
doc = json.loads(s)
|
doc = json.loads(s)
|
||||||
|
|
||||||
if doc.get('version') != 1:
|
if doc.get("version") != 1:
|
||||||
raise ValueError('unsupported freeze format version: %d' % (doc.get('version')))
|
raise ValueError("unsupported freeze format version: %d" % (doc.get("version")))
|
||||||
|
|
||||||
features = {
|
features = {
|
||||||
'file features': [],
|
"file features": [],
|
||||||
'functions': {},
|
"functions": {},
|
||||||
}
|
}
|
||||||
|
|
||||||
for fva, function in doc.get('functions', {}).items():
|
for fva, function in doc.get("functions", {}).items():
|
||||||
fva = int(fva, 0x10)
|
fva = int(fva, 0x10)
|
||||||
features['functions'][fva] = {
|
features["functions"][fva] = {
|
||||||
'features': [],
|
"features": [],
|
||||||
'basic blocks': {},
|
"basic blocks": {},
|
||||||
}
|
}
|
||||||
|
|
||||||
for bbva, bb in function.items():
|
for bbva, bb in function.items():
|
||||||
bbva = int(bbva, 0x10)
|
bbva = int(bbva, 0x10)
|
||||||
features['functions'][fva]['basic blocks'][bbva] = {
|
features["functions"][fva]["basic blocks"][bbva] = {
|
||||||
'features': [],
|
"features": [],
|
||||||
'instructions': {},
|
"instructions": {},
|
||||||
}
|
}
|
||||||
|
|
||||||
for insnva in bb:
|
for insnva in bb:
|
||||||
insnva = int(insnva, 0x10)
|
insnva = int(insnva, 0x10)
|
||||||
features['functions'][fva]['basic blocks'][bbva]['instructions'][insnva] = {
|
features["functions"][fva]["basic blocks"][bbva]["instructions"][insnva] = {
|
||||||
'features': [],
|
"features": [],
|
||||||
}
|
}
|
||||||
|
|
||||||
# in the following blocks, each entry looks like:
|
# in the following blocks, each entry looks like:
|
||||||
@@ -161,13 +143,13 @@ def loads(s):
|
|||||||
# ('MatchedRule', ('foo', ), '0x401000', ('0x401000', ))
|
# ('MatchedRule', ('foo', ), '0x401000', ('0x401000', ))
|
||||||
# ^^^^^^^^^^^^^ ^^^^^^^^^ ^^^^^^^^^^ ^^^^^^^^^^^^^^
|
# ^^^^^^^^^^^^^ ^^^^^^^^^ ^^^^^^^^^^ ^^^^^^^^^^^^^^
|
||||||
# feature name args addr func/bb/insn
|
# feature name args addr func/bb/insn
|
||||||
for feature in doc.get('scopes', {}).get('file', []):
|
for feature in doc.get("scopes", {}).get("file", []):
|
||||||
va, loc = feature[2:]
|
va, loc = feature[2:]
|
||||||
va = int(va, 0x10)
|
va = int(va, 0x10)
|
||||||
feature = deserialize_feature(feature[:2])
|
feature = deserialize_feature(feature[:2])
|
||||||
features['file features'].append((va, feature))
|
features["file features"].append((va, feature))
|
||||||
|
|
||||||
for feature in doc.get('scopes', {}).get('function', []):
|
for feature in doc.get("scopes", {}).get("function", []):
|
||||||
# fetch the pair like:
|
# fetch the pair like:
|
||||||
#
|
#
|
||||||
# ('0x401000', ('0x401000', ))
|
# ('0x401000', ('0x401000', ))
|
||||||
@@ -183,42 +165,42 @@ def loads(s):
|
|||||||
# ^^^^^^^^^^^^^ ^^^^^^^^^
|
# ^^^^^^^^^^^^^ ^^^^^^^^^
|
||||||
# feature name args
|
# feature name args
|
||||||
feature = deserialize_feature(feature[:2])
|
feature = deserialize_feature(feature[:2])
|
||||||
features['functions'][loc[0]]['features'].append((va, feature))
|
features["functions"][loc[0]]["features"].append((va, feature))
|
||||||
|
|
||||||
for feature in doc.get('scopes', {}).get('basic block', []):
|
for feature in doc.get("scopes", {}).get("basic block", []):
|
||||||
va, loc = feature[2:]
|
va, loc = feature[2:]
|
||||||
va = int(va, 0x10)
|
va = int(va, 0x10)
|
||||||
loc = [int(lo, 0x10) for lo in loc]
|
loc = [int(lo, 0x10) for lo in loc]
|
||||||
feature = deserialize_feature(feature[:2])
|
feature = deserialize_feature(feature[:2])
|
||||||
features['functions'][loc[0]]['basic blocks'][loc[1]]['features'].append((va, feature))
|
features["functions"][loc[0]]["basic blocks"][loc[1]]["features"].append((va, feature))
|
||||||
|
|
||||||
for feature in doc.get('scopes', {}).get('instruction', []):
|
for feature in doc.get("scopes", {}).get("instruction", []):
|
||||||
va, loc = feature[2:]
|
va, loc = feature[2:]
|
||||||
va = int(va, 0x10)
|
va = int(va, 0x10)
|
||||||
loc = [int(lo, 0x10) for lo in loc]
|
loc = [int(lo, 0x10) for lo in loc]
|
||||||
feature = deserialize_feature(feature[:2])
|
feature = deserialize_feature(feature[:2])
|
||||||
features['functions'][loc[0]]['basic blocks'][loc[1]]['instructions'][loc[2]]['features'].append((va, feature))
|
features["functions"][loc[0]]["basic blocks"][loc[1]]["instructions"][loc[2]]["features"].append((va, feature))
|
||||||
|
|
||||||
return capa.features.extractors.NullFeatureExtractor(features)
|
return capa.features.extractors.NullFeatureExtractor(features)
|
||||||
|
|
||||||
|
|
||||||
MAGIC = 'capa0000'.encode('ascii')
|
MAGIC = "capa0000".encode("ascii")
|
||||||
|
|
||||||
|
|
||||||
def dump(extractor):
|
def dump(extractor):
|
||||||
'''serialize the given extractor to a byte array.'''
|
"""serialize the given extractor to a byte array."""
|
||||||
return MAGIC + zlib.compress(dumps(extractor).encode('utf-8'))
|
return MAGIC + zlib.compress(dumps(extractor).encode("utf-8"))
|
||||||
|
|
||||||
|
|
||||||
def is_freeze(buf):
|
def is_freeze(buf):
|
||||||
return buf[:len(MAGIC)] == MAGIC
|
return buf[: len(MAGIC)] == MAGIC
|
||||||
|
|
||||||
|
|
||||||
def load(buf):
|
def load(buf):
|
||||||
'''deserialize a set of features (as a NullFeatureExtractor) from a byte array.'''
|
"""deserialize a set of features (as a NullFeatureExtractor) from a byte array."""
|
||||||
if not is_freeze(buf):
|
if not is_freeze(buf):
|
||||||
raise ValueError('missing magic header')
|
raise ValueError("missing magic header")
|
||||||
return loads(zlib.decompress(buf[len(MAGIC):]).decode('utf-8'))
|
return loads(zlib.decompress(buf[len(MAGIC) :]).decode("utf-8"))
|
||||||
|
|
||||||
|
|
||||||
def main(argv=None):
|
def main(argv=None):
|
||||||
@@ -230,24 +212,21 @@ def main(argv=None):
|
|||||||
argv = sys.argv[1:]
|
argv = sys.argv[1:]
|
||||||
|
|
||||||
formats = [
|
formats = [
|
||||||
('auto', '(default) detect file type automatically'),
|
("auto", "(default) detect file type automatically"),
|
||||||
('pe', 'Windows PE file'),
|
("pe", "Windows PE file"),
|
||||||
('sc32', '32-bit shellcode'),
|
("sc32", "32-bit shellcode"),
|
||||||
('sc64', '64-bit shellcode'),
|
("sc64", "64-bit shellcode"),
|
||||||
]
|
]
|
||||||
format_help = ', '.join(['%s: %s' % (f[0], f[1]) for f in formats])
|
format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats])
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description='save capa features to a file')
|
parser = argparse.ArgumentParser(description="save capa features to a file")
|
||||||
parser.add_argument('sample', type=str,
|
parser.add_argument("sample", type=str, help="Path to sample to analyze")
|
||||||
help='Path to sample to analyze')
|
parser.add_argument("output", type=str, help="Path to output file")
|
||||||
parser.add_argument('output', type=str,
|
parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose output")
|
||||||
help='Path to output file')
|
parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
|
||||||
parser.add_argument('-v', '--verbose', action='store_true',
|
parser.add_argument(
|
||||||
help='Enable verbose output')
|
"-f", "--format", choices=[f[0] for f in formats], default="auto", help="Select sample format, %s" % format_help
|
||||||
parser.add_argument('-q', '--quiet', action='store_true',
|
)
|
||||||
help='Disable all output but errors')
|
|
||||||
parser.add_argument('-f', '--format', choices=[f[0] for f in formats], default='auto',
|
|
||||||
help='Select sample format, %s' % format_help)
|
|
||||||
args = parser.parse_args(args=argv)
|
args = parser.parse_args(args=argv)
|
||||||
|
|
||||||
if args.quiet:
|
if args.quiet:
|
||||||
@@ -264,13 +243,15 @@ def main(argv=None):
|
|||||||
|
|
||||||
# don't import this at top level to support ida/py3 backend
|
# don't import this at top level to support ida/py3 backend
|
||||||
import capa.features.extractors.viv
|
import capa.features.extractors.viv
|
||||||
|
|
||||||
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(vw, args.sample)
|
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(vw, args.sample)
|
||||||
with open(args.output, 'wb') as f:
|
with open(args.output, "wb") as f:
|
||||||
f.write(dump(extractor))
|
f.write(dump(extractor))
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
sys.exit(main())
|
sys.exit(main())
|
||||||
|
|||||||
@@ -4,9 +4,9 @@ from capa.features import Feature
|
|||||||
class API(Feature):
|
class API(Feature):
|
||||||
def __init__(self, name):
|
def __init__(self, name):
|
||||||
# Downcase library name if given
|
# Downcase library name if given
|
||||||
if '.' in name:
|
if "." in name:
|
||||||
modname, impname = name.split('.')
|
modname, impname = name.split(".")
|
||||||
name = modname.lower() + '.' + impname
|
name = modname.lower() + "." + impname
|
||||||
|
|
||||||
super(API, self).__init__([name])
|
super(API, self).__init__([name])
|
||||||
|
|
||||||
@@ -19,9 +19,9 @@ class Number(Feature):
|
|||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
if self.symbol:
|
if self.symbol:
|
||||||
return 'number(0x%x = %s)' % (self.value, self.symbol)
|
return "number(0x%x = %s)" % (self.value, self.symbol)
|
||||||
else:
|
else:
|
||||||
return 'number(0x%x)' % (self.value)
|
return "number(0x%x)" % (self.value)
|
||||||
|
|
||||||
|
|
||||||
class Offset(Feature):
|
class Offset(Feature):
|
||||||
@@ -32,9 +32,9 @@ class Offset(Feature):
|
|||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
if self.symbol:
|
if self.symbol:
|
||||||
return 'offset(0x%x = %s)' % (self.value, self.symbol)
|
return "offset(0x%x = %s)" % (self.value, self.symbol)
|
||||||
else:
|
else:
|
||||||
return 'offset(0x%x)' % (self.value)
|
return "offset(0x%x)" % (self.value)
|
||||||
|
|
||||||
|
|
||||||
class Mnemonic(Feature):
|
class Mnemonic(Feature):
|
||||||
@@ -43,4 +43,4 @@ class Mnemonic(Feature):
|
|||||||
self.value = value
|
self.value = value
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return 'mnemonic(%s)' % (self.value)
|
return "mnemonic(%s)" % (self.value)
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ _hex = hex
|
|||||||
def hex(i):
|
def hex(i):
|
||||||
# under py2.7, long integers get formatted with a trailing `L`
|
# under py2.7, long integers get formatted with a trailing `L`
|
||||||
# and this is not pretty. so strip it out.
|
# and this is not pretty. so strip it out.
|
||||||
return _hex(oint(i)).rstrip('L')
|
return _hex(oint(i)).rstrip("L")
|
||||||
|
|
||||||
|
|
||||||
def oint(i):
|
def oint(i):
|
||||||
|
|||||||
@@ -15,14 +15,14 @@ def info_to_name(display):
|
|||||||
e.g. function(my_function) => my_function
|
e.g. function(my_function) => my_function
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
return display.split('(')[1].rstrip(')')
|
return display.split("(")[1].rstrip(")")
|
||||||
except IndexError:
|
except IndexError:
|
||||||
return ''
|
return ""
|
||||||
|
|
||||||
|
|
||||||
def location_to_hex(location):
|
def location_to_hex(location):
|
||||||
""" convert location to hex for display """
|
""" convert location to hex for display """
|
||||||
return '%08X' % location
|
return "%08X" % location
|
||||||
|
|
||||||
|
|
||||||
class CapaExplorerDataItem(object):
|
class CapaExplorerDataItem(object):
|
||||||
@@ -35,7 +35,12 @@ class CapaExplorerDataItem(object):
|
|||||||
self.children = []
|
self.children = []
|
||||||
self._checked = False
|
self._checked = False
|
||||||
|
|
||||||
self.flags = (QtCore.Qt.ItemIsEnabled | QtCore.Qt.ItemIsSelectable | QtCore.Qt.ItemIsTristate | QtCore.Qt.ItemIsUserCheckable)
|
self.flags = (
|
||||||
|
QtCore.Qt.ItemIsEnabled
|
||||||
|
| QtCore.Qt.ItemIsSelectable
|
||||||
|
| QtCore.Qt.ItemIsTristate
|
||||||
|
| QtCore.Qt.ItemIsUserCheckable
|
||||||
|
)
|
||||||
|
|
||||||
if self.pred:
|
if self.pred:
|
||||||
self.pred.appendChild(self)
|
self.pred.appendChild(self)
|
||||||
@@ -109,7 +114,7 @@ class CapaExplorerDataItem(object):
|
|||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
""" get string representation of columns """
|
""" get string representation of columns """
|
||||||
return ' '.join([data for data in self._data if data])
|
return " ".join([data for data in self._data if data])
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def info(self):
|
def info(self):
|
||||||
@@ -133,12 +138,12 @@ class CapaExplorerDataItem(object):
|
|||||||
class CapaExplorerRuleItem(CapaExplorerDataItem):
|
class CapaExplorerRuleItem(CapaExplorerDataItem):
|
||||||
""" store data relevant to capa function result """
|
""" store data relevant to capa function result """
|
||||||
|
|
||||||
fmt = '%s (%d matches)'
|
fmt = "%s (%d matches)"
|
||||||
|
|
||||||
def __init__(self, parent, display, count, source):
|
def __init__(self, parent, display, count, source):
|
||||||
""" """
|
""" """
|
||||||
display = self.fmt % (display, count) if count > 1 else display
|
display = self.fmt % (display, count) if count > 1 else display
|
||||||
super(CapaExplorerRuleItem, self).__init__(parent, [display, '', ''])
|
super(CapaExplorerRuleItem, self).__init__(parent, [display, "", ""])
|
||||||
self._source = source
|
self._source = source
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@@ -150,9 +155,9 @@ class CapaExplorerRuleItem(CapaExplorerDataItem):
|
|||||||
class CapaExplorerRuleMatchItem(CapaExplorerDataItem):
|
class CapaExplorerRuleMatchItem(CapaExplorerDataItem):
|
||||||
""" store data relevant to capa function match result """
|
""" store data relevant to capa function match result """
|
||||||
|
|
||||||
def __init__(self, parent, display, source=''):
|
def __init__(self, parent, display, source=""):
|
||||||
""" """
|
""" """
|
||||||
super(CapaExplorerRuleMatchItem, self).__init__(parent, [display, '', ''])
|
super(CapaExplorerRuleMatchItem, self).__init__(parent, [display, "", ""])
|
||||||
self._source = source
|
self._source = source
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@@ -164,12 +169,13 @@ class CapaExplorerRuleMatchItem(CapaExplorerDataItem):
|
|||||||
class CapaExplorerFunctionItem(CapaExplorerDataItem):
|
class CapaExplorerFunctionItem(CapaExplorerDataItem):
|
||||||
""" store data relevant to capa function result """
|
""" store data relevant to capa function result """
|
||||||
|
|
||||||
fmt = 'function(%s)'
|
fmt = "function(%s)"
|
||||||
|
|
||||||
def __init__(self, parent, location):
|
def __init__(self, parent, location):
|
||||||
""" """
|
""" """
|
||||||
super(CapaExplorerFunctionItem, self).__init__(parent, [self.fmt % idaapi.get_name(location),
|
super(CapaExplorerFunctionItem, self).__init__(
|
||||||
location_to_hex(location), ''])
|
parent, [self.fmt % idaapi.get_name(location), location_to_hex(location), ""]
|
||||||
|
)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def info(self):
|
def info(self):
|
||||||
@@ -187,32 +193,31 @@ class CapaExplorerFunctionItem(CapaExplorerDataItem):
|
|||||||
class CapaExplorerBlockItem(CapaExplorerDataItem):
|
class CapaExplorerBlockItem(CapaExplorerDataItem):
|
||||||
""" store data relevant to capa basic block result """
|
""" store data relevant to capa basic block result """
|
||||||
|
|
||||||
fmt = 'basic block(loc_%08X)'
|
fmt = "basic block(loc_%08X)"
|
||||||
|
|
||||||
def __init__(self, parent, location):
|
def __init__(self, parent, location):
|
||||||
""" """
|
""" """
|
||||||
super(CapaExplorerBlockItem, self).__init__(parent, [self.fmt % location, location_to_hex(location), ''])
|
super(CapaExplorerBlockItem, self).__init__(parent, [self.fmt % location, location_to_hex(location), ""])
|
||||||
|
|
||||||
|
|
||||||
class CapaExplorerDefaultItem(CapaExplorerDataItem):
|
class CapaExplorerDefaultItem(CapaExplorerDataItem):
|
||||||
""" store data relevant to capa default result """
|
""" store data relevant to capa default result """
|
||||||
|
|
||||||
def __init__(self, parent, display, details='', location=None):
|
def __init__(self, parent, display, details="", location=None):
|
||||||
""" """
|
""" """
|
||||||
location = location_to_hex(location) if location else ''
|
location = location_to_hex(location) if location else ""
|
||||||
super(CapaExplorerDefaultItem, self).__init__(parent, [display, location, details])
|
super(CapaExplorerDefaultItem, self).__init__(parent, [display, location, details])
|
||||||
|
|
||||||
|
|
||||||
class CapaExplorerFeatureItem(CapaExplorerDataItem):
|
class CapaExplorerFeatureItem(CapaExplorerDataItem):
|
||||||
""" store data relevant to capa feature result """
|
""" store data relevant to capa feature result """
|
||||||
|
|
||||||
def __init__(self, parent, display, location='', details=''):
|
def __init__(self, parent, display, location="", details=""):
|
||||||
location = location_to_hex(location) if location else ''
|
location = location_to_hex(location) if location else ""
|
||||||
super(CapaExplorerFeatureItem, self).__init__(parent, [display, location, details])
|
super(CapaExplorerFeatureItem, self).__init__(parent, [display, location, details])
|
||||||
|
|
||||||
|
|
||||||
class CapaExplorerInstructionViewItem(CapaExplorerFeatureItem):
|
class CapaExplorerInstructionViewItem(CapaExplorerFeatureItem):
|
||||||
|
|
||||||
def __init__(self, parent, display, location):
|
def __init__(self, parent, display, location):
|
||||||
""" """
|
""" """
|
||||||
details = capa.ida.helpers.get_disasm_line(location)
|
details = capa.ida.helpers.get_disasm_line(location)
|
||||||
@@ -221,26 +226,24 @@ class CapaExplorerInstructionViewItem(CapaExplorerFeatureItem):
|
|||||||
|
|
||||||
|
|
||||||
class CapaExplorerByteViewItem(CapaExplorerFeatureItem):
|
class CapaExplorerByteViewItem(CapaExplorerFeatureItem):
|
||||||
|
|
||||||
def __init__(self, parent, display, location):
|
def __init__(self, parent, display, location):
|
||||||
""" """
|
""" """
|
||||||
byte_snap = idaapi.get_bytes(location, 32)
|
byte_snap = idaapi.get_bytes(location, 32)
|
||||||
|
|
||||||
if byte_snap:
|
if byte_snap:
|
||||||
byte_snap = codecs.encode(byte_snap, 'hex').upper()
|
byte_snap = codecs.encode(byte_snap, "hex").upper()
|
||||||
if sys.version_info >= (3, 0):
|
if sys.version_info >= (3, 0):
|
||||||
details = ' '.join([byte_snap[i:i + 2].decode() for i in range(0, len(byte_snap), 2)])
|
details = " ".join([byte_snap[i : i + 2].decode() for i in range(0, len(byte_snap), 2)])
|
||||||
else:
|
else:
|
||||||
details = ' '.join([byte_snap[i:i + 2] for i in range(0, len(byte_snap), 2)])
|
details = " ".join([byte_snap[i : i + 2] for i in range(0, len(byte_snap), 2)])
|
||||||
else:
|
else:
|
||||||
details = ''
|
details = ""
|
||||||
|
|
||||||
super(CapaExplorerByteViewItem, self).__init__(parent, display, location=location, details=details)
|
super(CapaExplorerByteViewItem, self).__init__(parent, display, location=location, details=details)
|
||||||
self.ida_highlight = idc.get_color(location, idc.CIC_ITEM)
|
self.ida_highlight = idc.get_color(location, idc.CIC_ITEM)
|
||||||
|
|
||||||
|
|
||||||
class CapaExplorerStringViewItem(CapaExplorerFeatureItem):
|
class CapaExplorerStringViewItem(CapaExplorerFeatureItem):
|
||||||
|
|
||||||
def __init__(self, parent, display, location):
|
def __init__(self, parent, display, location):
|
||||||
""" """
|
""" """
|
||||||
super(CapaExplorerStringViewItem, self).__init__(parent, display, location=location)
|
super(CapaExplorerStringViewItem, self).__init__(parent, display, location=location)
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ from capa.ida.explorer.item import (
|
|||||||
CapaExplorerByteViewItem,
|
CapaExplorerByteViewItem,
|
||||||
CapaExplorerBlockItem,
|
CapaExplorerBlockItem,
|
||||||
CapaExplorerRuleMatchItem,
|
CapaExplorerRuleMatchItem,
|
||||||
CapaExplorerFeatureItem
|
CapaExplorerFeatureItem,
|
||||||
)
|
)
|
||||||
|
|
||||||
import capa.ida.helpers
|
import capa.ida.helpers
|
||||||
@@ -37,7 +37,7 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
|||||||
def __init__(self, parent=None):
|
def __init__(self, parent=None):
|
||||||
""" """
|
""" """
|
||||||
super(CapaExplorerDataModel, self).__init__(parent)
|
super(CapaExplorerDataModel, self).__init__(parent)
|
||||||
self.root_node = CapaExplorerDataItem(None, ['Rule Information', 'Address', 'Details'])
|
self.root_node = CapaExplorerDataItem(None, ["Rule Information", "Address", "Details"])
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
""" """
|
""" """
|
||||||
@@ -86,8 +86,11 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
|||||||
# display data in corresponding column
|
# display data in corresponding column
|
||||||
return item.data(column)
|
return item.data(column)
|
||||||
|
|
||||||
if role == QtCore.Qt.ToolTipRole and isinstance(item, (CapaExplorerRuleItem, CapaExplorerRuleMatchItem)) and \
|
if (
|
||||||
CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION == column:
|
role == QtCore.Qt.ToolTipRole
|
||||||
|
and isinstance(item, (CapaExplorerRuleItem, CapaExplorerRuleMatchItem))
|
||||||
|
and CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION == column
|
||||||
|
):
|
||||||
# show tooltip containing rule source
|
# show tooltip containing rule source
|
||||||
return item.source
|
return item.source
|
||||||
|
|
||||||
@@ -95,18 +98,30 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
|||||||
# inform view how to display content of checkbox - un/checked
|
# inform view how to display content of checkbox - un/checked
|
||||||
return QtCore.Qt.Checked if item.isChecked() else QtCore.Qt.Unchecked
|
return QtCore.Qt.Checked if item.isChecked() else QtCore.Qt.Unchecked
|
||||||
|
|
||||||
if role == QtCore.Qt.FontRole and column in (CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS,
|
if role == QtCore.Qt.FontRole and column in (
|
||||||
CapaExplorerDataModel.COLUMN_INDEX_DETAILS):
|
CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS,
|
||||||
|
CapaExplorerDataModel.COLUMN_INDEX_DETAILS,
|
||||||
|
):
|
||||||
# set font for virtual address and details columns
|
# set font for virtual address and details columns
|
||||||
font = QtGui.QFont('Courier', weight=QtGui.QFont.Medium)
|
font = QtGui.QFont("Courier", weight=QtGui.QFont.Medium)
|
||||||
if column == CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS:
|
if column == CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS:
|
||||||
font.setBold(True)
|
font.setBold(True)
|
||||||
return font
|
return font
|
||||||
|
|
||||||
if role == QtCore.Qt.FontRole and isinstance(item, (CapaExplorerRuleItem, CapaExplorerRuleMatchItem,
|
if (
|
||||||
CapaExplorerBlockItem, CapaExplorerFunctionItem,
|
role == QtCore.Qt.FontRole
|
||||||
CapaExplorerFeatureItem)) and \
|
and isinstance(
|
||||||
column == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION:
|
item,
|
||||||
|
(
|
||||||
|
CapaExplorerRuleItem,
|
||||||
|
CapaExplorerRuleMatchItem,
|
||||||
|
CapaExplorerBlockItem,
|
||||||
|
CapaExplorerFunctionItem,
|
||||||
|
CapaExplorerFeatureItem,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
and column == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION
|
||||||
|
):
|
||||||
# set bold font for top-level rules
|
# set bold font for top-level rules
|
||||||
font = QtGui.QFont()
|
font = QtGui.QFont()
|
||||||
font.setBold(True)
|
font.setBold(True)
|
||||||
@@ -116,8 +131,11 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
|||||||
# set color for virtual address column
|
# set color for virtual address column
|
||||||
return QtGui.QColor(88, 139, 174)
|
return QtGui.QColor(88, 139, 174)
|
||||||
|
|
||||||
if role == QtCore.Qt.ForegroundRole and isinstance(item, CapaExplorerFeatureItem) and column == \
|
if (
|
||||||
CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION:
|
role == QtCore.Qt.ForegroundRole
|
||||||
|
and isinstance(item, CapaExplorerFeatureItem)
|
||||||
|
and column == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION
|
||||||
|
):
|
||||||
# set color for feature items
|
# set color for feature items
|
||||||
return QtGui.QColor(79, 121, 66)
|
return QtGui.QColor(79, 121, 66)
|
||||||
|
|
||||||
@@ -222,8 +240,9 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
|||||||
|
|
||||||
def util_reset_ida_highlighting(self, item, checked):
|
def util_reset_ida_highlighting(self, item, checked):
|
||||||
""" """
|
""" """
|
||||||
if not isinstance(item, (CapaExplorerStringViewItem, CapaExplorerInstructionViewItem,
|
if not isinstance(
|
||||||
CapaExplorerByteViewItem)):
|
item, (CapaExplorerStringViewItem, CapaExplorerInstructionViewItem, CapaExplorerByteViewItem)
|
||||||
|
):
|
||||||
# ignore other item types
|
# ignore other item types
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -254,8 +273,10 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
|||||||
if not model_index.isValid():
|
if not model_index.isValid():
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if role == QtCore.Qt.CheckStateRole and model_index.column() ==\
|
if (
|
||||||
CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION:
|
role == QtCore.Qt.CheckStateRole
|
||||||
|
and model_index.column() == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION
|
||||||
|
):
|
||||||
# user un/checked box - un/check parent and children
|
# user un/checked box - un/check parent and children
|
||||||
for child_index in self.iterateChildrenIndexFromRootIndex(model_index, ignore_root=False):
|
for child_index in self.iterateChildrenIndexFromRootIndex(model_index, ignore_root=False):
|
||||||
child_index.internalPointer().setChecked(value)
|
child_index.internalPointer().setChecked(value)
|
||||||
@@ -263,9 +284,12 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
|||||||
self.dataChanged.emit(child_index, child_index)
|
self.dataChanged.emit(child_index, child_index)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
if role == QtCore.Qt.EditRole and value and \
|
if (
|
||||||
model_index.column() == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION and \
|
role == QtCore.Qt.EditRole
|
||||||
isinstance(model_index.internalPointer(), CapaExplorerFunctionItem):
|
and value
|
||||||
|
and model_index.column() == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION
|
||||||
|
and isinstance(model_index.internalPointer(), CapaExplorerFunctionItem)
|
||||||
|
):
|
||||||
# user renamed function - update IDA database and data model
|
# user renamed function - update IDA database and data model
|
||||||
old_name = model_index.internalPointer().info
|
old_name = model_index.internalPointer().info
|
||||||
new_name = str(value)
|
new_name = str(value)
|
||||||
@@ -309,39 +333,39 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
|||||||
"type": "or"
|
"type": "or"
|
||||||
},
|
},
|
||||||
"""
|
"""
|
||||||
if statement['type'] in ('and', 'or', 'optional'):
|
if statement["type"] in ("and", "or", "optional"):
|
||||||
return CapaExplorerDefaultItem(parent, statement['type'])
|
return CapaExplorerDefaultItem(parent, statement["type"])
|
||||||
elif statement['type'] == 'not':
|
elif statement["type"] == "not":
|
||||||
# TODO: do we display 'not'
|
# TODO: do we display 'not'
|
||||||
pass
|
pass
|
||||||
elif statement['type'] == 'some':
|
elif statement["type"] == "some":
|
||||||
return CapaExplorerDefaultItem(parent, statement['count'] + ' or more')
|
return CapaExplorerDefaultItem(parent, statement["count"] + " or more")
|
||||||
elif statement['type'] == 'range':
|
elif statement["type"] == "range":
|
||||||
# `range` is a weird node, its almost a hybrid of statement + feature.
|
# `range` is a weird node, its almost a hybrid of statement + feature.
|
||||||
# it is a specific feature repeated multiple times.
|
# it is a specific feature repeated multiple times.
|
||||||
# there's no additional logic in the feature part, just the existence of a feature.
|
# there's no additional logic in the feature part, just the existence of a feature.
|
||||||
# so, we have to inline some of the feature rendering here.
|
# so, we have to inline some of the feature rendering here.
|
||||||
display = 'count(%s): ' % self.capa_doc_feature_to_display(statement['child'])
|
display = "count(%s): " % self.capa_doc_feature_to_display(statement["child"])
|
||||||
|
|
||||||
if statement['max'] == statement['min']:
|
if statement["max"] == statement["min"]:
|
||||||
display += '%d' % (statement['min'])
|
display += "%d" % (statement["min"])
|
||||||
elif statement['min'] == 0:
|
elif statement["min"] == 0:
|
||||||
display += '%d or fewer' % (statement['max'])
|
display += "%d or fewer" % (statement["max"])
|
||||||
elif statement['max'] == (1 << 64 - 1):
|
elif statement["max"] == (1 << 64 - 1):
|
||||||
display += '%d or more' % (statement['min'])
|
display += "%d or more" % (statement["min"])
|
||||||
else:
|
else:
|
||||||
display += 'between %d and %d' % (statement['min'], statement['max'])
|
display += "between %d and %d" % (statement["min"], statement["max"])
|
||||||
|
|
||||||
return CapaExplorerFeatureItem(parent, display=display)
|
return CapaExplorerFeatureItem(parent, display=display)
|
||||||
elif statement['type'] == 'subscope':
|
elif statement["type"] == "subscope":
|
||||||
return CapaExplorerFeatureItem(parent, 'subscope(%s)' % statement['subscope'])
|
return CapaExplorerFeatureItem(parent, "subscope(%s)" % statement["subscope"])
|
||||||
elif statement['type'] == 'regex':
|
elif statement["type"] == "regex":
|
||||||
# regex is a `Statement` not a `Feature`
|
# regex is a `Statement` not a `Feature`
|
||||||
# this is because it doesn't get extracted, but applies to all strings in scope.
|
# this is because it doesn't get extracted, but applies to all strings in scope.
|
||||||
# so we have to handle it here
|
# so we have to handle it here
|
||||||
return CapaExplorerFeatureItem(parent, 'regex(%s)' % statement['pattern'], details=statement['match'])
|
return CapaExplorerFeatureItem(parent, "regex(%s)" % statement["pattern"], details=statement["match"])
|
||||||
else:
|
else:
|
||||||
raise RuntimeError('unexpected match statement type: ' + str(statement))
|
raise RuntimeError("unexpected match statement type: " + str(statement))
|
||||||
|
|
||||||
def render_capa_doc_match(self, parent, match, doc):
|
def render_capa_doc_match(self, parent, match, doc):
|
||||||
""" render capa match read from doc
|
""" render capa match read from doc
|
||||||
@@ -367,23 +391,24 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"""
|
"""
|
||||||
if not match['success']:
|
if not match["success"]:
|
||||||
# TODO: display failed branches at some point? Help with debugging rules?
|
# TODO: display failed branches at some point? Help with debugging rules?
|
||||||
return
|
return
|
||||||
|
|
||||||
# optional statement with no successful children is empty
|
# optional statement with no successful children is empty
|
||||||
if (match['node'].get('statement', {}).get('type') == 'optional'
|
if match["node"].get("statement", {}).get("type") == "optional" and not any(
|
||||||
and not any(map(lambda m: m['success'], match['children']))):
|
map(lambda m: m["success"], match["children"])
|
||||||
|
):
|
||||||
return
|
return
|
||||||
|
|
||||||
if match['node']['type'] == 'statement':
|
if match["node"]["type"] == "statement":
|
||||||
parent2 = self.render_capa_doc_statement_node(parent, match['node']['statement'], doc)
|
parent2 = self.render_capa_doc_statement_node(parent, match["node"]["statement"], doc)
|
||||||
elif match['node']['type'] == 'feature':
|
elif match["node"]["type"] == "feature":
|
||||||
parent2 = self.render_capa_doc_feature_node(parent, match['node']['feature'], match['locations'], doc)
|
parent2 = self.render_capa_doc_feature_node(parent, match["node"]["feature"], match["locations"], doc)
|
||||||
else:
|
else:
|
||||||
raise RuntimeError('unexpected node type: ' + str(match['node']['type']))
|
raise RuntimeError("unexpected node type: " + str(match["node"]["type"]))
|
||||||
|
|
||||||
for child in match['children']:
|
for child in match["children"]:
|
||||||
self.render_capa_doc_match(parent2, child, doc)
|
self.render_capa_doc_match(parent2, child, doc)
|
||||||
|
|
||||||
def render_capa_doc(self, doc):
|
def render_capa_doc(self, doc):
|
||||||
@@ -394,17 +419,17 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
|||||||
self.beginResetModel()
|
self.beginResetModel()
|
||||||
|
|
||||||
for rule in rutils.capability_rules(doc):
|
for rule in rutils.capability_rules(doc):
|
||||||
parent = CapaExplorerRuleItem(self.root_node, rule['meta']['name'], len(rule['matches']), rule['source'])
|
parent = CapaExplorerRuleItem(self.root_node, rule["meta"]["name"], len(rule["matches"]), rule["source"])
|
||||||
|
|
||||||
for (location, match) in doc[rule['meta']['name']]['matches'].items():
|
for (location, match) in doc[rule["meta"]["name"]]["matches"].items():
|
||||||
if rule['meta']['scope'] == capa.rules.FILE_SCOPE:
|
if rule["meta"]["scope"] == capa.rules.FILE_SCOPE:
|
||||||
parent2 = parent
|
parent2 = parent
|
||||||
elif rule['meta']['scope'] == capa.rules.FUNCTION_SCOPE:
|
elif rule["meta"]["scope"] == capa.rules.FUNCTION_SCOPE:
|
||||||
parent2 = CapaExplorerFunctionItem(parent, location)
|
parent2 = CapaExplorerFunctionItem(parent, location)
|
||||||
elif rule['meta']['scope'] == capa.rules.BASIC_BLOCK_SCOPE:
|
elif rule["meta"]["scope"] == capa.rules.BASIC_BLOCK_SCOPE:
|
||||||
parent2 = CapaExplorerBlockItem(parent, location)
|
parent2 = CapaExplorerBlockItem(parent, location)
|
||||||
else:
|
else:
|
||||||
raise RuntimeError('unexpected rule scope: ' + str(rule['meta']['scope']))
|
raise RuntimeError("unexpected rule scope: " + str(rule["meta"]["scope"]))
|
||||||
|
|
||||||
self.render_capa_doc_match(parent2, match, doc)
|
self.render_capa_doc_match(parent2, match, doc)
|
||||||
|
|
||||||
@@ -421,20 +446,20 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
|||||||
},
|
},
|
||||||
"""
|
"""
|
||||||
mapping = {
|
mapping = {
|
||||||
'string': 'string(%s)',
|
"string": "string(%s)",
|
||||||
'bytes': 'bytes(%s)',
|
"bytes": "bytes(%s)",
|
||||||
'api': 'api(%s)',
|
"api": "api(%s)",
|
||||||
'mnemonic': 'mnemonic(%s)',
|
"mnemonic": "mnemonic(%s)",
|
||||||
'export': 'export(%s)',
|
"export": "export(%s)",
|
||||||
'import': 'import(%s)',
|
"import": "import(%s)",
|
||||||
'section': 'section(%s)',
|
"section": "section(%s)",
|
||||||
'number': 'number(0x%X)',
|
"number": "number(0x%X)",
|
||||||
'offset': 'offset(0x%X)',
|
"offset": "offset(0x%X)",
|
||||||
'characteristic': 'characteristic(%s)',
|
"characteristic": "characteristic(%s)",
|
||||||
'match': 'rule match(%s)'
|
"match": "rule match(%s)",
|
||||||
}
|
}
|
||||||
|
|
||||||
'''
|
"""
|
||||||
"feature": {
|
"feature": {
|
||||||
"characteristic": [
|
"characteristic": [
|
||||||
"loop",
|
"loop",
|
||||||
@@ -442,21 +467,23 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
|||||||
],
|
],
|
||||||
"type": "characteristic"
|
"type": "characteristic"
|
||||||
},
|
},
|
||||||
'''
|
"""
|
||||||
if feature['type'] == 'characteristic':
|
if feature["type"] == "characteristic":
|
||||||
return mapping['characteristic'] % feature['characteristic'][0]
|
return mapping["characteristic"] % feature["characteristic"][0]
|
||||||
|
|
||||||
# convert bytes feature from "410ab4" to "41 0A B4"
|
# convert bytes feature from "410ab4" to "41 0A B4"
|
||||||
if feature['type'] == 'bytes':
|
if feature["type"] == "bytes":
|
||||||
return mapping['bytes'] % ' '.join(feature['bytes'][i:i + 2] for i in
|
return (
|
||||||
range(0, len(feature['bytes']), 2)).upper()
|
mapping["bytes"]
|
||||||
|
% " ".join(feature["bytes"][i : i + 2] for i in range(0, len(feature["bytes"]), 2)).upper()
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
fmt = mapping[feature['type']]
|
fmt = mapping[feature["type"]]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
raise RuntimeError('unexpected doc type: ' + str(feature['type']))
|
raise RuntimeError("unexpected doc type: " + str(feature["type"]))
|
||||||
|
|
||||||
return fmt % feature[feature['type']]
|
return fmt % feature[feature["type"]]
|
||||||
|
|
||||||
def render_capa_doc_feature_node(self, parent, feature, locations, doc):
|
def render_capa_doc_feature_node(self, parent, feature, locations, doc):
|
||||||
""" """
|
""" """
|
||||||
@@ -473,7 +500,7 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
|||||||
|
|
||||||
return parent2
|
return parent2
|
||||||
|
|
||||||
def render_capa_doc_feature(self, parent, feature, location, doc, display='-'):
|
def render_capa_doc_feature(self, parent, feature, location, doc, display="-"):
|
||||||
""" render capa feature read from doc
|
""" render capa feature read from doc
|
||||||
|
|
||||||
@param parent: parent node to which new child is assigned
|
@param parent: parent node to which new child is assigned
|
||||||
@@ -491,51 +518,38 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
|||||||
@param location: address of feature
|
@param location: address of feature
|
||||||
@param display: text to display in plugin ui
|
@param display: text to display in plugin ui
|
||||||
"""
|
"""
|
||||||
instruction_view = (
|
instruction_view = ("bytes", "api", "mnemonic", "number", "offset")
|
||||||
'bytes',
|
byte_view = ("section",)
|
||||||
'api',
|
string_view = ("string",)
|
||||||
'mnemonic',
|
default_feature_view = ("import", "export")
|
||||||
'number',
|
|
||||||
'offset'
|
|
||||||
)
|
|
||||||
byte_view = (
|
|
||||||
'section',
|
|
||||||
)
|
|
||||||
string_view = (
|
|
||||||
'string',
|
|
||||||
)
|
|
||||||
default_feature_view = (
|
|
||||||
'import',
|
|
||||||
'export'
|
|
||||||
)
|
|
||||||
|
|
||||||
# special handling for characteristic pending type
|
# special handling for characteristic pending type
|
||||||
if feature['type'] == 'characteristic':
|
if feature["type"] == "characteristic":
|
||||||
if feature['characteristic'][0] in ('embedded pe',):
|
if feature["characteristic"][0] in ("embedded pe",):
|
||||||
return CapaExplorerByteViewItem(parent, display, location)
|
return CapaExplorerByteViewItem(parent, display, location)
|
||||||
|
|
||||||
if feature['characteristic'][0] in ('loop', 'recursive call', 'tight loop', 'switch'):
|
if feature["characteristic"][0] in ("loop", "recursive call", "tight loop", "switch"):
|
||||||
return CapaExplorerFeatureItem(parent, display=display)
|
return CapaExplorerFeatureItem(parent, display=display)
|
||||||
|
|
||||||
# default to instruction view
|
# default to instruction view
|
||||||
return CapaExplorerInstructionViewItem(parent, display, location)
|
return CapaExplorerInstructionViewItem(parent, display, location)
|
||||||
|
|
||||||
if feature['type'] == 'match':
|
if feature["type"] == "match":
|
||||||
return CapaExplorerRuleMatchItem(parent, display, source=doc.get(feature['match'], {}).get('source', ''))
|
return CapaExplorerRuleMatchItem(parent, display, source=doc.get(feature["match"], {}).get("source", ""))
|
||||||
|
|
||||||
if feature['type'] in instruction_view:
|
if feature["type"] in instruction_view:
|
||||||
return CapaExplorerInstructionViewItem(parent, display, location)
|
return CapaExplorerInstructionViewItem(parent, display, location)
|
||||||
|
|
||||||
if feature['type'] in byte_view:
|
if feature["type"] in byte_view:
|
||||||
return CapaExplorerByteViewItem(parent, display, location)
|
return CapaExplorerByteViewItem(parent, display, location)
|
||||||
|
|
||||||
if feature['type'] in string_view:
|
if feature["type"] in string_view:
|
||||||
return CapaExplorerStringViewItem(parent, display, location)
|
return CapaExplorerStringViewItem(parent, display, location)
|
||||||
|
|
||||||
if feature['type'] in default_feature_view:
|
if feature["type"] in default_feature_view:
|
||||||
return CapaExplorerFeatureItem(parent, display=display)
|
return CapaExplorerFeatureItem(parent, display=display)
|
||||||
|
|
||||||
raise RuntimeError('unexpected feature type: ' + str(feature['type']))
|
raise RuntimeError("unexpected feature type: " + str(feature["type"]))
|
||||||
|
|
||||||
def update_function_name(self, old_name, new_name):
|
def update_function_name(self, old_name, new_name):
|
||||||
""" update all instances of function name
|
""" update all instances of function name
|
||||||
@@ -548,8 +562,9 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
|||||||
# convert name to view format for matching
|
# convert name to view format for matching
|
||||||
old_name = CapaExplorerFunctionItem.fmt % old_name
|
old_name = CapaExplorerFunctionItem.fmt % old_name
|
||||||
|
|
||||||
for model_index in self.match(root_index, QtCore.Qt.DisplayRole, old_name, hits=-1,
|
for model_index in self.match(
|
||||||
flags=QtCore.Qt.MatchRecursive):
|
root_index, QtCore.Qt.DisplayRole, old_name, hits=-1, flags=QtCore.Qt.MatchRecursive
|
||||||
|
):
|
||||||
if not isinstance(model_index.internalPointer(), CapaExplorerFunctionItem):
|
if not isinstance(model_index.internalPointer(), CapaExplorerFunctionItem):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ from capa.ida.explorer.model import CapaExplorerDataModel
|
|||||||
|
|
||||||
|
|
||||||
class CapaExplorerSortFilterProxyModel(QtCore.QSortFilterProxyModel):
|
class CapaExplorerSortFilterProxyModel(QtCore.QSortFilterProxyModel):
|
||||||
|
|
||||||
def __init__(self, parent=None):
|
def __init__(self, parent=None):
|
||||||
""" """
|
""" """
|
||||||
super(CapaExplorerSortFilterProxyModel, self).__init__(parent)
|
super(CapaExplorerSortFilterProxyModel, self).__init__(parent)
|
||||||
@@ -20,8 +19,12 @@ class CapaExplorerSortFilterProxyModel(QtCore.QSortFilterProxyModel):
|
|||||||
ldata = left.internalPointer().data(left.column())
|
ldata = left.internalPointer().data(left.column())
|
||||||
rdata = right.internalPointer().data(right.column())
|
rdata = right.internalPointer().data(right.column())
|
||||||
|
|
||||||
if ldata and rdata and left.column() == CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS and left.column() \
|
if (
|
||||||
== right.column():
|
ldata
|
||||||
|
and rdata
|
||||||
|
and left.column() == CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS
|
||||||
|
and left.column() == right.column()
|
||||||
|
):
|
||||||
# convert virtual address before compare
|
# convert virtual address before compare
|
||||||
return int(ldata, 16) < int(rdata, 16)
|
return int(ldata, 16) < int(rdata, 16)
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -55,7 +55,7 @@ class CapaExplorerQtreeView(QtWidgets.QTreeView):
|
|||||||
self.doubleClicked.connect(self.slot_double_click)
|
self.doubleClicked.connect(self.slot_double_click)
|
||||||
# self.clicked.connect(self.slot_click)
|
# self.clicked.connect(self.slot_click)
|
||||||
|
|
||||||
self.setStyleSheet('QTreeView::item {padding-right: 15 px;padding-bottom: 2 px;}')
|
self.setStyleSheet("QTreeView::item {padding-right: 15 px;padding-bottom: 2 px;}")
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
""" reset user interface changes
|
""" reset user interface changes
|
||||||
@@ -114,8 +114,8 @@ class CapaExplorerQtreeView(QtWidgets.QTreeView):
|
|||||||
@yield QAction*
|
@yield QAction*
|
||||||
"""
|
"""
|
||||||
default_actions = [
|
default_actions = [
|
||||||
('Copy column', data, self.slot_copy_column),
|
("Copy column", data, self.slot_copy_column),
|
||||||
('Copy row', data, self.slot_copy_row),
|
("Copy row", data, self.slot_copy_row),
|
||||||
]
|
]
|
||||||
|
|
||||||
# add default actions
|
# add default actions
|
||||||
@@ -130,7 +130,7 @@ class CapaExplorerQtreeView(QtWidgets.QTreeView):
|
|||||||
@yield QAction*
|
@yield QAction*
|
||||||
"""
|
"""
|
||||||
function_actions = [
|
function_actions = [
|
||||||
('Rename function', data, self.slot_rename_function),
|
("Rename function", data, self.slot_rename_function),
|
||||||
]
|
]
|
||||||
|
|
||||||
# add function actions
|
# add function actions
|
||||||
|
|||||||
@@ -3,47 +3,48 @@ import logging
|
|||||||
import idaapi
|
import idaapi
|
||||||
import idc
|
import idc
|
||||||
|
|
||||||
logger = logging.getLogger('capa')
|
logger = logging.getLogger("capa")
|
||||||
|
|
||||||
# file type names as returned by idaapi.get_file_type_name()
|
# file type names as returned by idaapi.get_file_type_name()
|
||||||
SUPPORTED_FILE_TYPES = [
|
SUPPORTED_FILE_TYPES = [
|
||||||
'Portable executable for 80386 (PE)',
|
"Portable executable for 80386 (PE)",
|
||||||
'Portable executable for AMD64 (PE)',
|
"Portable executable for AMD64 (PE)",
|
||||||
'Binary file' # x86/AMD64 shellcode support
|
"Binary file", # x86/AMD64 shellcode support
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def inform_user_ida_ui(message):
|
def inform_user_ida_ui(message):
|
||||||
idaapi.info('%s. Please refer to IDA Output window for more information.' % message)
|
idaapi.info("%s. Please refer to IDA Output window for more information." % message)
|
||||||
|
|
||||||
|
|
||||||
def is_supported_file_type():
|
def is_supported_file_type():
|
||||||
file_type = idaapi.get_file_type_name()
|
file_type = idaapi.get_file_type_name()
|
||||||
if file_type not in SUPPORTED_FILE_TYPES:
|
if file_type not in SUPPORTED_FILE_TYPES:
|
||||||
logger.error('-' * 80)
|
logger.error("-" * 80)
|
||||||
logger.error(' Input file does not appear to be a PE file.')
|
logger.error(" Input file does not appear to be a PE file.")
|
||||||
logger.error(' ')
|
logger.error(" ")
|
||||||
logger.error(
|
logger.error(
|
||||||
' capa currently only supports analyzing PE files (or binary files containing x86/AMD64 shellcode) with IDA.')
|
" capa currently only supports analyzing PE files (or binary files containing x86/AMD64 shellcode) with IDA."
|
||||||
logger.error(' If you don\'t know the input file type, you can try using the `file` utility to guess it.')
|
)
|
||||||
logger.error('-' * 80)
|
logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.")
|
||||||
inform_user_ida_ui('capa does not support the format of this file')
|
logger.error("-" * 80)
|
||||||
|
inform_user_ida_ui("capa does not support the format of this file")
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def get_disasm_line(va):
|
def get_disasm_line(va):
|
||||||
''' '''
|
""" """
|
||||||
return idc.generate_disasm_line(va, idc.GENDSM_FORCE_CODE)
|
return idc.generate_disasm_line(va, idc.GENDSM_FORCE_CODE)
|
||||||
|
|
||||||
|
|
||||||
def is_func_start(ea):
|
def is_func_start(ea):
|
||||||
''' check if function stat exists at virtual address '''
|
""" check if function stat exists at virtual address """
|
||||||
f = idaapi.get_func(ea)
|
f = idaapi.get_func(ea)
|
||||||
return f and f.start_ea == ea
|
return f and f.start_ea == ea
|
||||||
|
|
||||||
|
|
||||||
def get_func_start_ea(ea):
|
def get_func_start_ea(ea):
|
||||||
''' '''
|
""" """
|
||||||
f = idaapi.get_func(ea)
|
f = idaapi.get_func(ea)
|
||||||
return f if f is None else f.start_ea
|
return f if f is None else f.start_ea
|
||||||
|
|||||||
@@ -2,11 +2,7 @@ import os
|
|||||||
import logging
|
import logging
|
||||||
import collections
|
import collections
|
||||||
|
|
||||||
from PyQt5 import (
|
from PyQt5 import QtWidgets, QtGui, QtCore
|
||||||
QtWidgets,
|
|
||||||
QtGui,
|
|
||||||
QtCore
|
|
||||||
)
|
|
||||||
|
|
||||||
import idaapi
|
import idaapi
|
||||||
|
|
||||||
@@ -20,13 +16,12 @@ from capa.ida.explorer.view import CapaExplorerQtreeView
|
|||||||
from capa.ida.explorer.model import CapaExplorerDataModel
|
from capa.ida.explorer.model import CapaExplorerDataModel
|
||||||
from capa.ida.explorer.proxy import CapaExplorerSortFilterProxyModel
|
from capa.ida.explorer.proxy import CapaExplorerSortFilterProxyModel
|
||||||
|
|
||||||
PLUGIN_NAME = 'capa explorer'
|
PLUGIN_NAME = "capa explorer"
|
||||||
|
|
||||||
logger = logging.getLogger('capa')
|
logger = logging.getLogger("capa")
|
||||||
|
|
||||||
|
|
||||||
class CapaExplorerIdaHooks(idaapi.UI_Hooks):
|
class CapaExplorerIdaHooks(idaapi.UI_Hooks):
|
||||||
|
|
||||||
def __init__(self, screen_ea_changed_hook, action_hooks):
|
def __init__(self, screen_ea_changed_hook, action_hooks):
|
||||||
""" facilitate IDA UI hooks
|
""" facilitate IDA UI hooks
|
||||||
|
|
||||||
@@ -78,7 +73,6 @@ class CapaExplorerIdaHooks(idaapi.UI_Hooks):
|
|||||||
|
|
||||||
|
|
||||||
class CapaExplorerForm(idaapi.PluginForm):
|
class CapaExplorerForm(idaapi.PluginForm):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
""" """
|
""" """
|
||||||
super(CapaExplorerForm, self).__init__()
|
super(CapaExplorerForm, self).__init__()
|
||||||
@@ -109,20 +103,20 @@ class CapaExplorerForm(idaapi.PluginForm):
|
|||||||
|
|
||||||
self.view_tree.reset()
|
self.view_tree.reset()
|
||||||
|
|
||||||
logger.info('form created.')
|
logger.info("form created.")
|
||||||
|
|
||||||
def Show(self):
|
def Show(self):
|
||||||
""" """
|
""" """
|
||||||
return idaapi.PluginForm.Show(self, self.form_title, options=(
|
return idaapi.PluginForm.Show(
|
||||||
idaapi.PluginForm.WOPN_TAB | idaapi.PluginForm.WCLS_CLOSE_LATER
|
self, self.form_title, options=(idaapi.PluginForm.WOPN_TAB | idaapi.PluginForm.WCLS_CLOSE_LATER)
|
||||||
))
|
)
|
||||||
|
|
||||||
def OnClose(self, form):
|
def OnClose(self, form):
|
||||||
""" form is closed """
|
""" form is closed """
|
||||||
self.unload_ida_hooks()
|
self.unload_ida_hooks()
|
||||||
self.ida_reset()
|
self.ida_reset()
|
||||||
|
|
||||||
logger.info('form closed.')
|
logger.info("form closed.")
|
||||||
|
|
||||||
def load_interface(self):
|
def load_interface(self):
|
||||||
""" load user interface """
|
""" load user interface """
|
||||||
@@ -165,8 +159,8 @@ class CapaExplorerForm(idaapi.PluginForm):
|
|||||||
def load_view_summary(self):
|
def load_view_summary(self):
|
||||||
""" """
|
""" """
|
||||||
table_headers = [
|
table_headers = [
|
||||||
'Capability',
|
"Capability",
|
||||||
'Namespace',
|
"Namespace",
|
||||||
]
|
]
|
||||||
|
|
||||||
table = QtWidgets.QTableWidget()
|
table = QtWidgets.QTableWidget()
|
||||||
@@ -180,15 +174,15 @@ class CapaExplorerForm(idaapi.PluginForm):
|
|||||||
table.setHorizontalHeaderLabels(table_headers)
|
table.setHorizontalHeaderLabels(table_headers)
|
||||||
table.horizontalHeader().setDefaultAlignment(QtCore.Qt.AlignLeft)
|
table.horizontalHeader().setDefaultAlignment(QtCore.Qt.AlignLeft)
|
||||||
table.setShowGrid(False)
|
table.setShowGrid(False)
|
||||||
table.setStyleSheet('QTableWidget::item { padding: 25px; }')
|
table.setStyleSheet("QTableWidget::item { padding: 25px; }")
|
||||||
|
|
||||||
self.view_summary = table
|
self.view_summary = table
|
||||||
|
|
||||||
def load_view_attack(self):
|
def load_view_attack(self):
|
||||||
""" """
|
""" """
|
||||||
table_headers = [
|
table_headers = [
|
||||||
'ATT&CK Tactic',
|
"ATT&CK Tactic",
|
||||||
'ATT&CK Technique ',
|
"ATT&CK Technique ",
|
||||||
]
|
]
|
||||||
|
|
||||||
table = QtWidgets.QTableWidget()
|
table = QtWidgets.QTableWidget()
|
||||||
@@ -202,13 +196,13 @@ class CapaExplorerForm(idaapi.PluginForm):
|
|||||||
table.setHorizontalHeaderLabels(table_headers)
|
table.setHorizontalHeaderLabels(table_headers)
|
||||||
table.horizontalHeader().setDefaultAlignment(QtCore.Qt.AlignLeft)
|
table.horizontalHeader().setDefaultAlignment(QtCore.Qt.AlignLeft)
|
||||||
table.setShowGrid(False)
|
table.setShowGrid(False)
|
||||||
table.setStyleSheet('QTableWidget::item { padding: 25px; }')
|
table.setStyleSheet("QTableWidget::item { padding: 25px; }")
|
||||||
|
|
||||||
self.view_attack = table
|
self.view_attack = table
|
||||||
|
|
||||||
def load_view_checkbox_limit_by(self):
|
def load_view_checkbox_limit_by(self):
|
||||||
""" """
|
""" """
|
||||||
check = QtWidgets.QCheckBox('Limit results to current function')
|
check = QtWidgets.QCheckBox("Limit results to current function")
|
||||||
check.setChecked(False)
|
check.setChecked(False)
|
||||||
check.stateChanged.connect(self.slot_checkbox_limit_by_changed)
|
check.stateChanged.connect(self.slot_checkbox_limit_by_changed)
|
||||||
|
|
||||||
@@ -231,7 +225,7 @@ class CapaExplorerForm(idaapi.PluginForm):
|
|||||||
tab = QtWidgets.QWidget()
|
tab = QtWidgets.QWidget()
|
||||||
tab.setLayout(layout)
|
tab.setLayout(layout)
|
||||||
|
|
||||||
self.view_tabs.addTab(tab, 'Tree View')
|
self.view_tabs.addTab(tab, "Tree View")
|
||||||
|
|
||||||
def load_view_summary_tab(self):
|
def load_view_summary_tab(self):
|
||||||
""" """
|
""" """
|
||||||
@@ -241,7 +235,7 @@ class CapaExplorerForm(idaapi.PluginForm):
|
|||||||
tab = QtWidgets.QWidget()
|
tab = QtWidgets.QWidget()
|
||||||
tab.setLayout(layout)
|
tab.setLayout(layout)
|
||||||
|
|
||||||
self.view_tabs.addTab(tab, 'Summary')
|
self.view_tabs.addTab(tab, "Summary")
|
||||||
|
|
||||||
def load_view_attack_tab(self):
|
def load_view_attack_tab(self):
|
||||||
""" """
|
""" """
|
||||||
@@ -251,16 +245,16 @@ class CapaExplorerForm(idaapi.PluginForm):
|
|||||||
tab = QtWidgets.QWidget()
|
tab = QtWidgets.QWidget()
|
||||||
tab.setLayout(layout)
|
tab.setLayout(layout)
|
||||||
|
|
||||||
self.view_tabs.addTab(tab, 'MITRE')
|
self.view_tabs.addTab(tab, "MITRE")
|
||||||
|
|
||||||
def load_file_menu(self):
|
def load_file_menu(self):
|
||||||
""" load file menu actions """
|
""" load file menu actions """
|
||||||
actions = (
|
actions = (
|
||||||
('Reset view', 'Reset plugin view', self.reset),
|
("Reset view", "Reset plugin view", self.reset),
|
||||||
('Run analysis', 'Run capa analysis on current database', self.reload),
|
("Run analysis", "Run capa analysis on current database", self.reload),
|
||||||
)
|
)
|
||||||
|
|
||||||
menu = self.view_menu_bar.addMenu('File')
|
menu = self.view_menu_bar.addMenu("File")
|
||||||
|
|
||||||
for name, _, handle in actions:
|
for name, _, handle in actions:
|
||||||
action = QtWidgets.QAction(name, self.parent)
|
action = QtWidgets.QAction(name, self.parent)
|
||||||
@@ -271,8 +265,8 @@ class CapaExplorerForm(idaapi.PluginForm):
|
|||||||
def load_ida_hooks(self):
|
def load_ida_hooks(self):
|
||||||
""" """
|
""" """
|
||||||
action_hooks = {
|
action_hooks = {
|
||||||
'MakeName': self.ida_hook_rename,
|
"MakeName": self.ida_hook_rename,
|
||||||
'EditFunction': self.ida_hook_rename,
|
"EditFunction": self.ida_hook_rename,
|
||||||
}
|
}
|
||||||
|
|
||||||
self.ida_hooks = CapaExplorerIdaHooks(self.ida_hook_screen_ea_changed, action_hooks)
|
self.ida_hooks = CapaExplorerIdaHooks(self.ida_hook_screen_ea_changed, action_hooks)
|
||||||
@@ -300,10 +294,10 @@ class CapaExplorerForm(idaapi.PluginForm):
|
|||||||
|
|
||||||
if post:
|
if post:
|
||||||
# post action update data model w/ current name
|
# post action update data model w/ current name
|
||||||
self.model_data.update_function_name(meta.get('prev_name', ''), curr_name)
|
self.model_data.update_function_name(meta.get("prev_name", ""), curr_name)
|
||||||
else:
|
else:
|
||||||
# pre action so save current name for replacement later
|
# pre action so save current name for replacement later
|
||||||
meta['prev_name'] = curr_name
|
meta["prev_name"] = curr_name
|
||||||
|
|
||||||
def ida_hook_screen_ea_changed(self, widget, new_ea, old_ea):
|
def ida_hook_screen_ea_changed(self, widget, new_ea, old_ea):
|
||||||
""" """
|
""" """
|
||||||
@@ -328,21 +322,21 @@ class CapaExplorerForm(idaapi.PluginForm):
|
|||||||
match = capa.ida.explorer.item.ea_to_hex_str(new_func_start)
|
match = capa.ida.explorer.item.ea_to_hex_str(new_func_start)
|
||||||
else:
|
else:
|
||||||
# navigated to virtual address not in valid function - clear filter
|
# navigated to virtual address not in valid function - clear filter
|
||||||
match = ''
|
match = ""
|
||||||
|
|
||||||
# filter on virtual address to avoid updating filter string if function name is changed
|
# filter on virtual address to avoid updating filter string if function name is changed
|
||||||
self.model_proxy.add_single_string_filter(CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS, match)
|
self.model_proxy.add_single_string_filter(CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS, match)
|
||||||
|
|
||||||
def load_capa_results(self):
|
def load_capa_results(self):
|
||||||
""" """
|
""" """
|
||||||
logger.info('-' * 80)
|
logger.info("-" * 80)
|
||||||
logger.info(' Using default embedded rules.')
|
logger.info(" Using default embedded rules.")
|
||||||
logger.info(' ')
|
logger.info(" ")
|
||||||
logger.info(' You can see the current default rule set here:')
|
logger.info(" You can see the current default rule set here:")
|
||||||
logger.info(' https://github.com/fireeye/capa-rules')
|
logger.info(" https://github.com/fireeye/capa-rules")
|
||||||
logger.info('-' * 80)
|
logger.info("-" * 80)
|
||||||
|
|
||||||
rules_path = os.path.join(os.path.dirname(self.file_loc), '../..', 'rules')
|
rules_path = os.path.join(os.path.dirname(self.file_loc), "../..", "rules")
|
||||||
rules = capa.main.get_rules(rules_path)
|
rules = capa.main.get_rules(rules_path)
|
||||||
rules = capa.rules.RuleSet(rules)
|
rules = capa.rules.RuleSet(rules)
|
||||||
capabilities = capa.main.find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor(), True)
|
capabilities = capa.main.find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor(), True)
|
||||||
@@ -350,27 +344,30 @@ class CapaExplorerForm(idaapi.PluginForm):
|
|||||||
# support binary files specifically for x86/AMD64 shellcode
|
# support binary files specifically for x86/AMD64 shellcode
|
||||||
# warn user binary file is loaded but still allow capa to process it
|
# warn user binary file is loaded but still allow capa to process it
|
||||||
# TODO: check specific architecture of binary files based on how user configured IDA processors
|
# TODO: check specific architecture of binary files based on how user configured IDA processors
|
||||||
if idaapi.get_file_type_name() == 'Binary file':
|
if idaapi.get_file_type_name() == "Binary file":
|
||||||
logger.warning('-' * 80)
|
logger.warning("-" * 80)
|
||||||
logger.warning(' Input file appears to be a binary file.')
|
logger.warning(" Input file appears to be a binary file.")
|
||||||
logger.warning(' ')
|
logger.warning(" ")
|
||||||
logger.warning(
|
logger.warning(
|
||||||
' capa currently only supports analyzing binary files containing x86/AMD64 shellcode with IDA.')
|
" capa currently only supports analyzing binary files containing x86/AMD64 shellcode with IDA."
|
||||||
|
)
|
||||||
logger.warning(
|
logger.warning(
|
||||||
' This means the results may be misleading or incomplete if the binary file loaded in IDA is not x86/AMD64.')
|
" This means the results may be misleading or incomplete if the binary file loaded in IDA is not x86/AMD64."
|
||||||
logger.warning(' If you don\'t know the input file type, you can try using the `file` utility to guess it.')
|
)
|
||||||
logger.warning('-' * 80)
|
logger.warning(" If you don't know the input file type, you can try using the `file` utility to guess it.")
|
||||||
|
logger.warning("-" * 80)
|
||||||
|
|
||||||
capa.ida.helpers.inform_user_ida_ui('capa encountered warnings during analysis')
|
capa.ida.helpers.inform_user_ida_ui("capa encountered warnings during analysis")
|
||||||
|
|
||||||
if capa.main.has_file_limitation(rules, capabilities, is_standalone=False):
|
if capa.main.has_file_limitation(rules, capabilities, is_standalone=False):
|
||||||
capa.ida.helpers.inform_user_ida_ui('capa encountered warnings during analysis')
|
capa.ida.helpers.inform_user_ida_ui("capa encountered warnings during analysis")
|
||||||
|
|
||||||
logger.info('analysis completed.')
|
logger.info("analysis completed.")
|
||||||
|
|
||||||
doc = capa.render.convert_capabilities_to_result_document(rules, capabilities)
|
doc = capa.render.convert_capabilities_to_result_document(rules, capabilities)
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
with open("C:\\Users\\spring\\Desktop\\hmm.json", "w") as twitter_data_file:
|
with open("C:\\Users\\spring\\Desktop\\hmm.json", "w") as twitter_data_file:
|
||||||
json.dump(doc, twitter_data_file, indent=4, sort_keys=True, cls=capa.render.CapaJsonObjectEncoder)
|
json.dump(doc, twitter_data_file, indent=4, sort_keys=True, cls=capa.render.CapaJsonObjectEncoder)
|
||||||
|
|
||||||
@@ -380,22 +377,22 @@ class CapaExplorerForm(idaapi.PluginForm):
|
|||||||
|
|
||||||
self.view_tree.sortByColumn(CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION, QtCore.Qt.AscendingOrder)
|
self.view_tree.sortByColumn(CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION, QtCore.Qt.AscendingOrder)
|
||||||
|
|
||||||
logger.info('render views completed.')
|
logger.info("render views completed.")
|
||||||
|
|
||||||
def render_capa_doc_summary(self, doc):
|
def render_capa_doc_summary(self, doc):
|
||||||
""" """
|
""" """
|
||||||
for (row, rule) in enumerate(rutils.capability_rules(doc)):
|
for (row, rule) in enumerate(rutils.capability_rules(doc)):
|
||||||
count = len(rule['matches'])
|
count = len(rule["matches"])
|
||||||
|
|
||||||
if count == 1:
|
if count == 1:
|
||||||
capability = rule['meta']['name']
|
capability = rule["meta"]["name"]
|
||||||
else:
|
else:
|
||||||
capability = '%s (%d matches)' % (rule['meta']['name'], count)
|
capability = "%s (%d matches)" % (rule["meta"]["name"], count)
|
||||||
|
|
||||||
self.view_summary.setRowCount(row + 1)
|
self.view_summary.setRowCount(row + 1)
|
||||||
|
|
||||||
self.view_summary.setItem(row, 0, self.render_new_table_header_item(capability))
|
self.view_summary.setItem(row, 0, self.render_new_table_header_item(capability))
|
||||||
self.view_summary.setItem(row, 1, QtWidgets.QTableWidgetItem(rule['meta']['namespace']))
|
self.view_summary.setItem(row, 1, QtWidgets.QTableWidgetItem(rule["meta"]["namespace"]))
|
||||||
|
|
||||||
# resize columns to content
|
# resize columns to content
|
||||||
self.view_summary.resizeColumnsToContents()
|
self.view_summary.resizeColumnsToContents()
|
||||||
@@ -404,17 +401,17 @@ class CapaExplorerForm(idaapi.PluginForm):
|
|||||||
""" """
|
""" """
|
||||||
tactics = collections.defaultdict(set)
|
tactics = collections.defaultdict(set)
|
||||||
for rule in rutils.capability_rules(doc):
|
for rule in rutils.capability_rules(doc):
|
||||||
if not rule['meta'].get('att&ck'):
|
if not rule["meta"].get("att&ck"):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
for attack in rule['meta']['att&ck']:
|
for attack in rule["meta"]["att&ck"]:
|
||||||
tactic, _, rest = attack.partition('::')
|
tactic, _, rest = attack.partition("::")
|
||||||
if '::' in rest:
|
if "::" in rest:
|
||||||
technique, _, rest = rest.partition('::')
|
technique, _, rest = rest.partition("::")
|
||||||
subtechnique, _, id = rest.rpartition(' ')
|
subtechnique, _, id = rest.rpartition(" ")
|
||||||
tactics[tactic].add((technique, subtechnique, id))
|
tactics[tactic].add((technique, subtechnique, id))
|
||||||
else:
|
else:
|
||||||
technique, _, id = rest.rpartition(' ')
|
technique, _, id = rest.rpartition(" ")
|
||||||
tactics[tactic].add((technique, id))
|
tactics[tactic].add((technique, id))
|
||||||
|
|
||||||
column_one = []
|
column_one = []
|
||||||
@@ -422,17 +419,17 @@ class CapaExplorerForm(idaapi.PluginForm):
|
|||||||
|
|
||||||
for tactic, techniques in sorted(tactics.items()):
|
for tactic, techniques in sorted(tactics.items()):
|
||||||
column_one.append(tactic.upper())
|
column_one.append(tactic.upper())
|
||||||
column_one.extend(['' for i in range(len(techniques) - 1)])
|
column_one.extend(["" for i in range(len(techniques) - 1)])
|
||||||
|
|
||||||
for spec in sorted(techniques):
|
for spec in sorted(techniques):
|
||||||
if len(spec) == 2:
|
if len(spec) == 2:
|
||||||
technique, id = spec
|
technique, id = spec
|
||||||
column_two.append('%s %s' % (technique, id))
|
column_two.append("%s %s" % (technique, id))
|
||||||
elif len(spec) == 3:
|
elif len(spec) == 3:
|
||||||
technique, subtechnique, id = spec
|
technique, subtechnique, id = spec
|
||||||
column_two.append('%s::%s %s' % (technique, subtechnique, id))
|
column_two.append("%s::%s %s" % (technique, subtechnique, id))
|
||||||
else:
|
else:
|
||||||
raise RuntimeError('unexpected ATT&CK spec format')
|
raise RuntimeError("unexpected ATT&CK spec format")
|
||||||
|
|
||||||
self.view_attack.setRowCount(max(len(column_one), len(column_two)))
|
self.view_attack.setRowCount(max(len(column_one), len(column_two)))
|
||||||
|
|
||||||
@@ -471,8 +468,8 @@ class CapaExplorerForm(idaapi.PluginForm):
|
|||||||
self.view_summary.setRowCount(0)
|
self.view_summary.setRowCount(0)
|
||||||
self.load_capa_results()
|
self.load_capa_results()
|
||||||
|
|
||||||
logger.info('reload complete.')
|
logger.info("reload complete.")
|
||||||
idaapi.info('%s reload completed.' % PLUGIN_NAME)
|
idaapi.info("%s reload completed." % PLUGIN_NAME)
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
""" reset user interface elements
|
""" reset user interface elements
|
||||||
@@ -481,8 +478,8 @@ class CapaExplorerForm(idaapi.PluginForm):
|
|||||||
"""
|
"""
|
||||||
self.ida_reset()
|
self.ida_reset()
|
||||||
|
|
||||||
logger.info('reset completed.')
|
logger.info("reset completed.")
|
||||||
idaapi.info('%s reset completed.' % PLUGIN_NAME)
|
idaapi.info("%s reset completed." % PLUGIN_NAME)
|
||||||
|
|
||||||
def slot_menu_bar_hovered(self, action):
|
def slot_menu_bar_hovered(self, action):
|
||||||
""" display menu action tooltip
|
""" display menu action tooltip
|
||||||
@@ -491,7 +488,9 @@ class CapaExplorerForm(idaapi.PluginForm):
|
|||||||
|
|
||||||
@reference: https://stackoverflow.com/questions/21725119/why-wont-qtooltips-appear-on-qactions-within-a-qmenu
|
@reference: https://stackoverflow.com/questions/21725119/why-wont-qtooltips-appear-on-qactions-within-a-qmenu
|
||||||
"""
|
"""
|
||||||
QtWidgets.QToolTip.showText(QtGui.QCursor.pos(), action.toolTip(), self.view_menu_bar, self.view_menu_bar.actionGeometry(action))
|
QtWidgets.QToolTip.showText(
|
||||||
|
QtGui.QCursor.pos(), action.toolTip(), self.view_menu_bar, self.view_menu_bar.actionGeometry(action)
|
||||||
|
)
|
||||||
|
|
||||||
def slot_checkbox_limit_by_changed(self):
|
def slot_checkbox_limit_by_changed(self):
|
||||||
""" slot activated if checkbox clicked
|
""" slot activated if checkbox clicked
|
||||||
@@ -499,7 +498,7 @@ class CapaExplorerForm(idaapi.PluginForm):
|
|||||||
if checked, configure function filter if screen location is located
|
if checked, configure function filter if screen location is located
|
||||||
in function, otherwise clear filter
|
in function, otherwise clear filter
|
||||||
"""
|
"""
|
||||||
match = ''
|
match = ""
|
||||||
if self.view_checkbox_limit_by.isChecked():
|
if self.view_checkbox_limit_by.isChecked():
|
||||||
location = capa.ida.helpers.get_func_start_ea(idaapi.get_screen_ea())
|
location = capa.ida.helpers.get_func_start_ea(idaapi.get_screen_ea())
|
||||||
if location:
|
if location:
|
||||||
@@ -530,5 +529,5 @@ def main():
|
|||||||
CAPA_EXPLORER_FORM.Show()
|
CAPA_EXPLORER_FORM.Show()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
@@ -19,10 +19,10 @@ from capa.ida import plugin_helpers
|
|||||||
import capa.features.extractors.ida.helpers
|
import capa.features.extractors.ida.helpers
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger('rulegen')
|
logger = logging.getLogger("rulegen")
|
||||||
|
|
||||||
|
|
||||||
AUTHOR_NAME = ''
|
AUTHOR_NAME = ""
|
||||||
COLOR_HIGHLIGHT = 0xD096FF
|
COLOR_HIGHLIGHT = 0xD096FF
|
||||||
|
|
||||||
|
|
||||||
@@ -35,11 +35,11 @@ def get_func_start(ea):
|
|||||||
|
|
||||||
|
|
||||||
class Hooks(idaapi.UI_Hooks):
|
class Hooks(idaapi.UI_Hooks):
|
||||||
'''
|
"""
|
||||||
Notifies the plugin when navigating to another function
|
Notifies the plugin when navigating to another function
|
||||||
NOTE: it uses the global variable FLEX to access the
|
NOTE: it uses the global variable FLEX to access the
|
||||||
PluginForm object. This looks nasty, maybe there is a better way?
|
PluginForm object. This looks nasty, maybe there is a better way?
|
||||||
'''
|
"""
|
||||||
|
|
||||||
def screen_ea_changed(self, ea, prev_ea):
|
def screen_ea_changed(self, ea, prev_ea):
|
||||||
widget = idaapi.get_current_widget()
|
widget = idaapi.get_current_widget()
|
||||||
@@ -55,14 +55,13 @@ class Hooks(idaapi.UI_Hooks):
|
|||||||
# changed to another function
|
# changed to another function
|
||||||
RULE_GEN_FORM.reload_features_tree()
|
RULE_GEN_FORM.reload_features_tree()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warn('exception: %s', e)
|
logger.warn("exception: %s", e)
|
||||||
|
|
||||||
|
|
||||||
class RuleGeneratorForm(idaapi.PluginForm):
|
class RuleGeneratorForm(idaapi.PluginForm):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super(RuleGeneratorForm, self).__init__()
|
super(RuleGeneratorForm, self).__init__()
|
||||||
self.title = 'capa rule generator'
|
self.title = "capa rule generator"
|
||||||
|
|
||||||
self.parent = None
|
self.parent = None
|
||||||
self.parent_items = {}
|
self.parent_items = {}
|
||||||
@@ -70,7 +69,7 @@ class RuleGeneratorForm(idaapi.PluginForm):
|
|||||||
|
|
||||||
self.hooks = Hooks() # dirty?
|
self.hooks = Hooks() # dirty?
|
||||||
if self.hooks.hook():
|
if self.hooks.hook():
|
||||||
logger.info('UI notification hook installed successfully')
|
logger.info("UI notification hook installed successfully")
|
||||||
|
|
||||||
def init_ui(self):
|
def init_ui(self):
|
||||||
self.tree = QTreeWidget()
|
self.tree = QTreeWidget()
|
||||||
@@ -79,7 +78,7 @@ class RuleGeneratorForm(idaapi.PluginForm):
|
|||||||
|
|
||||||
self.reload_features_tree()
|
self.reload_features_tree()
|
||||||
|
|
||||||
button_reset = QtWidgets.QPushButton('&Reset')
|
button_reset = QtWidgets.QPushButton("&Reset")
|
||||||
button_reset.clicked.connect(self.reset)
|
button_reset.clicked.connect(self.reset)
|
||||||
|
|
||||||
h_layout = QtWidgets.QHBoxLayout()
|
h_layout = QtWidgets.QHBoxLayout()
|
||||||
@@ -96,7 +95,7 @@ class RuleGeneratorForm(idaapi.PluginForm):
|
|||||||
def reset(self):
|
def reset(self):
|
||||||
plugin_helpers.reset_selection(self.tree)
|
plugin_helpers.reset_selection(self.tree)
|
||||||
plugin_helpers.reset_colors(self.orig_colors)
|
plugin_helpers.reset_colors(self.orig_colors)
|
||||||
self.rule_text.setText('')
|
self.rule_text.setText("")
|
||||||
|
|
||||||
def reload_features_tree(self):
|
def reload_features_tree(self):
|
||||||
self.reset()
|
self.reset()
|
||||||
@@ -119,7 +118,7 @@ class RuleGeneratorForm(idaapi.PluginForm):
|
|||||||
extractor = capa.features.extractors.ida.IdaFeatureExtractor()
|
extractor = capa.features.extractors.ida.IdaFeatureExtractor()
|
||||||
f = idaapi.get_func(idaapi.get_screen_ea())
|
f = idaapi.get_func(idaapi.get_screen_ea())
|
||||||
if not f:
|
if not f:
|
||||||
logger.info('function does not exist at 0x%x', idaapi.get_screen_ea())
|
logger.info("function does not exist at 0x%x", idaapi.get_screen_ea())
|
||||||
return
|
return
|
||||||
|
|
||||||
return self.extract_function_features(f)
|
return self.extract_function_features(f)
|
||||||
@@ -137,7 +136,7 @@ class RuleGeneratorForm(idaapi.PluginForm):
|
|||||||
def create_tree(self, features):
|
def create_tree(self, features):
|
||||||
self.tree.setMinimumWidth(400)
|
self.tree.setMinimumWidth(400)
|
||||||
# self.tree.setMinimumHeight(300)
|
# self.tree.setMinimumHeight(300)
|
||||||
self.tree.setHeaderLabels(['Feature', 'Virtual Address', 'Disassembly'])
|
self.tree.setHeaderLabels(["Feature", "Virtual Address", "Disassembly"])
|
||||||
# auto resize columns
|
# auto resize columns
|
||||||
self.tree.header().setSectionResizeMode(QHeaderView.ResizeToContents)
|
self.tree.header().setSectionResizeMode(QHeaderView.ResizeToContents)
|
||||||
self.tree.itemClicked.connect(self.on_item_clicked)
|
self.tree.itemClicked.connect(self.on_item_clicked)
|
||||||
@@ -151,16 +150,22 @@ class RuleGeneratorForm(idaapi.PluginForm):
|
|||||||
|
|
||||||
# level 1
|
# level 1
|
||||||
if feature not in self.parent_items:
|
if feature not in self.parent_items:
|
||||||
self.parent_items[feature] = plugin_helpers.add_child_item(self.parent_items[type(feature)], [str(feature)])
|
self.parent_items[feature] = plugin_helpers.add_child_item(
|
||||||
|
self.parent_items[type(feature)], [str(feature)]
|
||||||
|
)
|
||||||
|
|
||||||
# level n > 1
|
# level n > 1
|
||||||
if len(vas) > 1:
|
if len(vas) > 1:
|
||||||
for va in sorted(vas):
|
for va in sorted(vas):
|
||||||
plugin_helpers.add_child_item(self.parent_items[feature], [str(feature), '0x%X' % va, plugin_helpers.get_disasm_line(va)], feature)
|
plugin_helpers.add_child_item(
|
||||||
|
self.parent_items[feature],
|
||||||
|
[str(feature), "0x%X" % va, plugin_helpers.get_disasm_line(va)],
|
||||||
|
feature,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
va = vas.pop()
|
va = vas.pop()
|
||||||
self.parent_items[feature].setText(0, str(feature))
|
self.parent_items[feature].setText(0, str(feature))
|
||||||
self.parent_items[feature].setText(1, '0x%X' % va)
|
self.parent_items[feature].setText(1, "0x%X" % va)
|
||||||
self.parent_items[feature].setText(2, plugin_helpers.get_disasm_line(va))
|
self.parent_items[feature].setText(2, plugin_helpers.get_disasm_line(va))
|
||||||
self.parent_items[feature].setData(0, 0x100, feature)
|
self.parent_items[feature].setData(0, 0x100, feature)
|
||||||
|
|
||||||
@@ -188,29 +193,31 @@ class RuleGeneratorForm(idaapi.PluginForm):
|
|||||||
|
|
||||||
def get_rule_from_features(self, features):
|
def get_rule_from_features(self, features):
|
||||||
rule_parts = []
|
rule_parts = []
|
||||||
counted = zip(Counter(features).keys(), # equals to list(set(words))
|
counted = zip(
|
||||||
Counter(features).values()) # counts the elements' frequency
|
Counter(features).keys(), Counter(features).values() # equals to list(set(words))
|
||||||
|
) # counts the elements' frequency
|
||||||
|
|
||||||
# single features
|
# single features
|
||||||
for k, v in filter(lambda t: t[1] == 1, counted):
|
for k, v in filter(lambda t: t[1] == 1, counted):
|
||||||
# TODO args to hex if int
|
# TODO args to hex if int
|
||||||
if k.name.lower() == 'bytes':
|
if k.name.lower() == "bytes":
|
||||||
# Convert raw bytes to uppercase hex representation (e.g., '12 34 56')
|
# Convert raw bytes to uppercase hex representation (e.g., '12 34 56')
|
||||||
upper_hex_bytes = binascii.hexlify(args_to_str(k.args)).upper()
|
upper_hex_bytes = binascii.hexlify(args_to_str(k.args)).upper()
|
||||||
rule_value_str = ''
|
rule_value_str = ""
|
||||||
for i in range(0, len(upper_hex_bytes), 2):
|
for i in range(0, len(upper_hex_bytes), 2):
|
||||||
rule_value_str += upper_hex_bytes[i:i + 2] + ' '
|
rule_value_str += upper_hex_bytes[i : i + 2] + " "
|
||||||
r = ' - %s: %s' % (k.name.lower(), rule_value_str)
|
r = " - %s: %s" % (k.name.lower(), rule_value_str)
|
||||||
else:
|
else:
|
||||||
r = ' - %s: %s' % (k.name.lower(), args_to_str(k.args))
|
r = " - %s: %s" % (k.name.lower(), args_to_str(k.args))
|
||||||
rule_parts.append(r)
|
rule_parts.append(r)
|
||||||
|
|
||||||
# counted features
|
# counted features
|
||||||
for k, v in filter(lambda t: t[1] > 1, counted):
|
for k, v in filter(lambda t: t[1] > 1, counted):
|
||||||
r = ' - count(%s): %d' % (str(k), v)
|
r = " - count(%s): %d" % (str(k), v)
|
||||||
rule_parts.append(r)
|
rule_parts.append(r)
|
||||||
|
|
||||||
rule_prefix = textwrap.dedent('''
|
rule_prefix = textwrap.dedent(
|
||||||
|
"""
|
||||||
rule:
|
rule:
|
||||||
meta:
|
meta:
|
||||||
name:
|
name:
|
||||||
@@ -219,8 +226,10 @@ class RuleGeneratorForm(idaapi.PluginForm):
|
|||||||
examples:
|
examples:
|
||||||
- %s:0x%X
|
- %s:0x%X
|
||||||
features:
|
features:
|
||||||
''' % (AUTHOR_NAME, idc.retrieve_input_file_md5(), get_func_start(idc.here()))).strip()
|
"""
|
||||||
return '%s\n%s' % (rule_prefix, '\n'.join(sorted(rule_parts)))
|
% (AUTHOR_NAME, idc.retrieve_input_file_md5(), get_func_start(idc.here()))
|
||||||
|
).strip()
|
||||||
|
return "%s\n%s" % (rule_prefix, "\n".join(sorted(rule_parts)))
|
||||||
|
|
||||||
# TODO merge into capa_idautils, get feature data
|
# TODO merge into capa_idautils, get feature data
|
||||||
def get_selected_items(self):
|
def get_selected_items(self):
|
||||||
@@ -242,26 +251,25 @@ class RuleGeneratorForm(idaapi.PluginForm):
|
|||||||
self.init_ui()
|
self.init_ui()
|
||||||
|
|
||||||
def Show(self):
|
def Show(self):
|
||||||
return idaapi.PluginForm.Show(self, self.title, options=(
|
return idaapi.PluginForm.Show(
|
||||||
idaapi.PluginForm.WOPN_RESTORE
|
self, self.title, options=(idaapi.PluginForm.WOPN_RESTORE | idaapi.PluginForm.WOPN_PERSIST)
|
||||||
| idaapi.PluginForm.WOPN_PERSIST
|
)
|
||||||
))
|
|
||||||
|
|
||||||
def OnClose(self, form):
|
def OnClose(self, form):
|
||||||
self.reset()
|
self.reset()
|
||||||
if self.hooks.unhook():
|
if self.hooks.unhook():
|
||||||
logger.info('UI notification hook uninstalled successfully')
|
logger.info("UI notification hook uninstalled successfully")
|
||||||
logger.info('RuleGeneratorForm closed')
|
logger.info("RuleGeneratorForm closed")
|
||||||
|
|
||||||
|
|
||||||
def args_to_str(args):
|
def args_to_str(args):
|
||||||
a = []
|
a = []
|
||||||
for arg in args:
|
for arg in args:
|
||||||
if (isinstance(arg, int) or isinstance(arg, long)) and arg > 10:
|
if (isinstance(arg, int) or isinstance(arg, long)) and arg > 10:
|
||||||
a.append('0x%X' % arg)
|
a.append("0x%X" % arg)
|
||||||
else:
|
else:
|
||||||
a.append(str(arg))
|
a.append(str(arg))
|
||||||
return ','.join(a)
|
return ",".join(a)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@@ -280,5 +288,5 @@ def main():
|
|||||||
RULE_GEN_FORM.Show()
|
RULE_GEN_FORM.Show()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
@@ -8,34 +8,34 @@ import idc
|
|||||||
import idaapi
|
import idaapi
|
||||||
|
|
||||||
|
|
||||||
CAPA_EXTENSION = '.capas'
|
CAPA_EXTENSION = ".capas"
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger('capa_ida')
|
logger = logging.getLogger("capa_ida")
|
||||||
|
|
||||||
|
|
||||||
def get_input_file(freeze=True):
|
def get_input_file(freeze=True):
|
||||||
'''
|
"""
|
||||||
get input file path
|
get input file path
|
||||||
|
|
||||||
freeze (bool): if True, get freeze file if it exists
|
freeze (bool): if True, get freeze file if it exists
|
||||||
'''
|
"""
|
||||||
# try original file in same directory as idb/i64 without idb/i64 file extension
|
# try original file in same directory as idb/i64 without idb/i64 file extension
|
||||||
input_file = idc.get_idb_path()[:-4]
|
input_file = idc.get_idb_path()[:-4]
|
||||||
|
|
||||||
if freeze:
|
if freeze:
|
||||||
# use frozen file if it exists
|
# use frozen file if it exists
|
||||||
freeze_file_cand = '%s%s' % (input_file, CAPA_EXTENSION)
|
freeze_file_cand = "%s%s" % (input_file, CAPA_EXTENSION)
|
||||||
if os.path.isfile(freeze_file_cand):
|
if os.path.isfile(freeze_file_cand):
|
||||||
return freeze_file_cand
|
return freeze_file_cand
|
||||||
|
|
||||||
if not os.path.isfile(input_file):
|
if not os.path.isfile(input_file):
|
||||||
# TM naming
|
# TM naming
|
||||||
input_file = '%s.mal_' % idc.get_idb_path()[:-4]
|
input_file = "%s.mal_" % idc.get_idb_path()[:-4]
|
||||||
if not os.path.isfile(input_file):
|
if not os.path.isfile(input_file):
|
||||||
input_file = idaapi.ask_file(0, '*.*', 'Please specify input file.')
|
input_file = idaapi.ask_file(0, "*.*", "Please specify input file.")
|
||||||
if not input_file:
|
if not input_file:
|
||||||
raise ValueError('could not find input file')
|
raise ValueError("could not find input file")
|
||||||
return input_file
|
return input_file
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
328
capa/main.py
328
capa/main.py
@@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
'''
|
"""
|
||||||
capa - detect capabilities in programs.
|
capa - detect capabilities in programs.
|
||||||
'''
|
"""
|
||||||
import os
|
import os
|
||||||
import os.path
|
import os.path
|
||||||
import sys
|
import sys
|
||||||
@@ -23,16 +23,16 @@ import capa.features.extractors
|
|||||||
from capa.helpers import oint
|
from capa.helpers import oint
|
||||||
|
|
||||||
|
|
||||||
SUPPORTED_FILE_MAGIC = set(['MZ'])
|
SUPPORTED_FILE_MAGIC = set(["MZ"])
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger('capa')
|
logger = logging.getLogger("capa")
|
||||||
|
|
||||||
|
|
||||||
def set_vivisect_log_level(level):
|
def set_vivisect_log_level(level):
|
||||||
logging.getLogger('vivisect').setLevel(level)
|
logging.getLogger("vivisect").setLevel(level)
|
||||||
logging.getLogger('vtrace').setLevel(level)
|
logging.getLogger("vtrace").setLevel(level)
|
||||||
logging.getLogger('envi').setLevel(level)
|
logging.getLogger("envi").setLevel(level)
|
||||||
|
|
||||||
|
|
||||||
def find_function_capabilities(ruleset, extractor, f):
|
def find_function_capabilities(ruleset, extractor, f):
|
||||||
@@ -83,7 +83,7 @@ def find_file_capabilities(ruleset, extractor, function_features):
|
|||||||
if feature not in file_features:
|
if feature not in file_features:
|
||||||
file_features[feature] = set()
|
file_features[feature] = set()
|
||||||
|
|
||||||
logger.info('analyzed file and extracted %d features', len(file_features))
|
logger.info("analyzed file and extracted %d features", len(file_features))
|
||||||
|
|
||||||
file_features.update(function_features)
|
file_features.update(function_features)
|
||||||
|
|
||||||
@@ -95,7 +95,7 @@ def find_capabilities(ruleset, extractor, disable_progress=None):
|
|||||||
all_function_matches = collections.defaultdict(list)
|
all_function_matches = collections.defaultdict(list)
|
||||||
all_bb_matches = collections.defaultdict(list)
|
all_bb_matches = collections.defaultdict(list)
|
||||||
|
|
||||||
for f in tqdm.tqdm(extractor.get_functions(), disable=disable_progress, unit=' functions'):
|
for f in tqdm.tqdm(extractor.get_functions(), disable=disable_progress, unit=" functions"):
|
||||||
function_matches, bb_matches = find_function_capabilities(ruleset, extractor, f)
|
function_matches, bb_matches = find_function_capabilities(ruleset, extractor, f)
|
||||||
for rule_name, res in function_matches.items():
|
for rule_name, res in function_matches.items():
|
||||||
all_function_matches[rule_name].extend(res)
|
all_function_matches[rule_name].extend(res)
|
||||||
@@ -104,8 +104,10 @@ def find_capabilities(ruleset, extractor, disable_progress=None):
|
|||||||
|
|
||||||
# mapping from matched rule feature to set of addresses at which it matched.
|
# mapping from matched rule feature to set of addresses at which it matched.
|
||||||
# type: Dict[MatchedRule, Set[int]]
|
# type: Dict[MatchedRule, Set[int]]
|
||||||
function_features = {capa.features.MatchedRule(rule_name): set(map(lambda p: p[0], results))
|
function_features = {
|
||||||
for rule_name, results in all_function_matches.items()}
|
capa.features.MatchedRule(rule_name): set(map(lambda p: p[0], results))
|
||||||
|
for rule_name, results in all_function_matches.items()
|
||||||
|
}
|
||||||
|
|
||||||
all_file_matches = find_file_capabilities(ruleset, extractor, function_features)
|
all_file_matches = find_file_capabilities(ruleset, extractor, function_features)
|
||||||
|
|
||||||
@@ -119,7 +121,7 @@ def find_capabilities(ruleset, extractor, disable_progress=None):
|
|||||||
|
|
||||||
def has_rule_with_namespace(rules, capabilities, rule_cat):
|
def has_rule_with_namespace(rules, capabilities, rule_cat):
|
||||||
for rule_name in capabilities.keys():
|
for rule_name in capabilities.keys():
|
||||||
if rules.rules[rule_name].meta.get('namespace', '').startswith(rule_cat):
|
if rules.rules[rule_name].meta.get("namespace", "").startswith(rule_cat):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -128,61 +130,61 @@ def has_file_limitation(rules, capabilities, is_standalone=True):
|
|||||||
file_limitations = {
|
file_limitations = {
|
||||||
# capa will likely detect installer specific functionality.
|
# capa will likely detect installer specific functionality.
|
||||||
# this is probably not what the user wants.
|
# this is probably not what the user wants.
|
||||||
'executable/installer': [
|
"executable/installer": [
|
||||||
' This sample appears to be an installer.',
|
" This sample appears to be an installer.",
|
||||||
' ',
|
" ",
|
||||||
' capa cannot handle installers well. This means the results may be misleading or incomplete.'
|
" capa cannot handle installers well. This means the results may be misleading or incomplete."
|
||||||
' You should try to understand the install mechanism and analyze created files with capa.'
|
" You should try to understand the install mechanism and analyze created files with capa.",
|
||||||
],
|
],
|
||||||
# capa won't detect much in .NET samples.
|
# capa won't detect much in .NET samples.
|
||||||
# it might match some file-level things.
|
# it might match some file-level things.
|
||||||
# for consistency, bail on things that we don't support.
|
# for consistency, bail on things that we don't support.
|
||||||
'runtime/dotnet': [
|
"runtime/dotnet": [
|
||||||
' This sample appears to be a .NET module.',
|
" This sample appears to be a .NET module.",
|
||||||
' ',
|
" ",
|
||||||
' .NET is a cross-platform framework for running managed applications.',
|
" .NET is a cross-platform framework for running managed applications.",
|
||||||
' capa cannot handle non-native files. This means that the results may be misleading or incomplete.',
|
" capa cannot handle non-native files. This means that the results may be misleading or incomplete.",
|
||||||
' You may have to analyze the file manually, using a tool like the .NET decompiler dnSpy.'
|
" You may have to analyze the file manually, using a tool like the .NET decompiler dnSpy.",
|
||||||
],
|
],
|
||||||
# capa will detect dozens of capabilities for AutoIt samples,
|
# capa will detect dozens of capabilities for AutoIt samples,
|
||||||
# but these are due to the AutoIt runtime, not the payload script.
|
# but these are due to the AutoIt runtime, not the payload script.
|
||||||
# so, don't confuse the user with FP matches - bail instead
|
# so, don't confuse the user with FP matches - bail instead
|
||||||
'compiler/autoit': [
|
"compiler/autoit": [
|
||||||
' This sample appears to be compiled with AutoIt.',
|
" This sample appears to be compiled with AutoIt.",
|
||||||
' ',
|
" ",
|
||||||
' AutoIt is a freeware BASIC-like scripting language designed for automating the Windows GUI.',
|
" AutoIt is a freeware BASIC-like scripting language designed for automating the Windows GUI.",
|
||||||
' capa cannot handle AutoIt scripts. This means that the results will be misleading or incomplete.',
|
" capa cannot handle AutoIt scripts. This means that the results will be misleading or incomplete.",
|
||||||
' You may have to analyze the file manually, using a tool like the AutoIt decompiler MyAut2Exe.'
|
" You may have to analyze the file manually, using a tool like the AutoIt decompiler MyAut2Exe.",
|
||||||
],
|
],
|
||||||
# capa won't detect much in packed samples
|
# capa won't detect much in packed samples
|
||||||
'anti-analysis/packer/': [
|
"anti-analysis/packer/": [
|
||||||
' This sample appears to be packed.',
|
" This sample appears to be packed.",
|
||||||
' ',
|
" ",
|
||||||
' Packed samples have often been obfuscated to hide their logic.',
|
" Packed samples have often been obfuscated to hide their logic.",
|
||||||
' capa cannot handle obfuscation well. This means the results may be misleading or incomplete.',
|
" capa cannot handle obfuscation well. This means the results may be misleading or incomplete.",
|
||||||
' If possible, you should try to unpack this input file before analyzing it with capa.'
|
" If possible, you should try to unpack this input file before analyzing it with capa.",
|
||||||
]
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
for category, dialogue in file_limitations.items():
|
for category, dialogue in file_limitations.items():
|
||||||
if not has_rule_with_namespace(rules, capabilities, category):
|
if not has_rule_with_namespace(rules, capabilities, category):
|
||||||
continue
|
continue
|
||||||
logger.warning('-' * 80)
|
logger.warning("-" * 80)
|
||||||
for line in dialogue:
|
for line in dialogue:
|
||||||
logger.warning(line)
|
logger.warning(line)
|
||||||
if is_standalone:
|
if is_standalone:
|
||||||
logger.warning(' ')
|
logger.warning(" ")
|
||||||
logger.warning(' Use -v or -vv if you really want to see the capabilities identified by capa.')
|
logger.warning(" Use -v or -vv if you really want to see the capabilities identified by capa.")
|
||||||
logger.warning('-' * 80)
|
logger.warning("-" * 80)
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def is_supported_file_type(sample):
|
def is_supported_file_type(sample):
|
||||||
'''
|
"""
|
||||||
Return if this is a supported file based on magic header values
|
Return if this is a supported file based on magic header values
|
||||||
'''
|
"""
|
||||||
with open(sample, 'rb') as f:
|
with open(sample, "rb") as f:
|
||||||
magic = f.read(2)
|
magic = f.read(2)
|
||||||
if magic in SUPPORTED_FILE_MAGIC:
|
if magic in SUPPORTED_FILE_MAGIC:
|
||||||
return True
|
return True
|
||||||
@@ -190,36 +192,37 @@ def is_supported_file_type(sample):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def get_shellcode_vw(sample, arch='auto'):
|
def get_shellcode_vw(sample, arch="auto"):
|
||||||
'''
|
"""
|
||||||
Return shellcode workspace using explicit arch or via auto detect
|
Return shellcode workspace using explicit arch or via auto detect
|
||||||
'''
|
"""
|
||||||
import viv_utils
|
import viv_utils
|
||||||
with open(sample, 'rb') as f:
|
|
||||||
|
with open(sample, "rb") as f:
|
||||||
sample_bytes = f.read()
|
sample_bytes = f.read()
|
||||||
if arch == 'auto':
|
if arch == "auto":
|
||||||
# choose arch with most functions, idea by Jay G.
|
# choose arch with most functions, idea by Jay G.
|
||||||
vw_cands = []
|
vw_cands = []
|
||||||
for arch in ['i386', 'amd64']:
|
for arch in ["i386", "amd64"]:
|
||||||
vw_cands.append(viv_utils.getShellcodeWorkspace(sample_bytes, arch))
|
vw_cands.append(viv_utils.getShellcodeWorkspace(sample_bytes, arch))
|
||||||
if not vw_cands:
|
if not vw_cands:
|
||||||
raise ValueError('could not generate vivisect workspace')
|
raise ValueError("could not generate vivisect workspace")
|
||||||
vw = max(vw_cands, key=lambda vw: len(vw.getFunctions()))
|
vw = max(vw_cands, key=lambda vw: len(vw.getFunctions()))
|
||||||
else:
|
else:
|
||||||
vw = viv_utils.getShellcodeWorkspace(sample_bytes, arch)
|
vw = viv_utils.getShellcodeWorkspace(sample_bytes, arch)
|
||||||
vw.setMeta('Format', 'blob') # TODO fix in viv_utils
|
vw.setMeta("Format", "blob") # TODO fix in viv_utils
|
||||||
return vw
|
return vw
|
||||||
|
|
||||||
|
|
||||||
def get_meta_str(vw):
|
def get_meta_str(vw):
|
||||||
'''
|
"""
|
||||||
Return workspace meta information string
|
Return workspace meta information string
|
||||||
'''
|
"""
|
||||||
meta = []
|
meta = []
|
||||||
for k in ['Format', 'Platform', 'Architecture']:
|
for k in ["Format", "Platform", "Architecture"]:
|
||||||
if k in vw.metadata:
|
if k in vw.metadata:
|
||||||
meta.append('%s: %s' % (k.lower(), vw.metadata[k]))
|
meta.append("%s: %s" % (k.lower(), vw.metadata[k]))
|
||||||
return '%s, number of functions: %d' % (', '.join(meta), len(vw.getFunctions()))
|
return "%s, number of functions: %d" % (", ".join(meta), len(vw.getFunctions()))
|
||||||
|
|
||||||
|
|
||||||
class UnsupportedFormatError(ValueError):
|
class UnsupportedFormatError(ValueError):
|
||||||
@@ -228,23 +231,25 @@ class UnsupportedFormatError(ValueError):
|
|||||||
|
|
||||||
def get_workspace(path, format):
|
def get_workspace(path, format):
|
||||||
import viv_utils
|
import viv_utils
|
||||||
logger.info('generating vivisect workspace for: %s', path)
|
|
||||||
if format == 'auto':
|
logger.info("generating vivisect workspace for: %s", path)
|
||||||
|
if format == "auto":
|
||||||
if not is_supported_file_type(path):
|
if not is_supported_file_type(path):
|
||||||
raise UnsupportedFormatError()
|
raise UnsupportedFormatError()
|
||||||
vw = viv_utils.getWorkspace(path)
|
vw = viv_utils.getWorkspace(path)
|
||||||
elif format == 'pe':
|
elif format == "pe":
|
||||||
vw = viv_utils.getWorkspace(path)
|
vw = viv_utils.getWorkspace(path)
|
||||||
elif format == 'sc32':
|
elif format == "sc32":
|
||||||
vw = get_shellcode_vw(path, arch='i386')
|
vw = get_shellcode_vw(path, arch="i386")
|
||||||
elif format == 'sc64':
|
elif format == "sc64":
|
||||||
vw = get_shellcode_vw(path, arch='amd64')
|
vw = get_shellcode_vw(path, arch="amd64")
|
||||||
logger.info('%s', get_meta_str(vw))
|
logger.info("%s", get_meta_str(vw))
|
||||||
return vw
|
return vw
|
||||||
|
|
||||||
|
|
||||||
def get_extractor_py2(path, format):
|
def get_extractor_py2(path, format):
|
||||||
import capa.features.extractors.viv
|
import capa.features.extractors.viv
|
||||||
|
|
||||||
vw = get_workspace(path, format)
|
vw = get_workspace(path, format)
|
||||||
return capa.features.extractors.viv.VivisectFeatureExtractor(vw, path)
|
return capa.features.extractors.viv.VivisectFeatureExtractor(vw, path)
|
||||||
|
|
||||||
@@ -258,10 +263,10 @@ def get_extractor_py3(path, format):
|
|||||||
|
|
||||||
|
|
||||||
def get_extractor(path, format):
|
def get_extractor(path, format):
|
||||||
'''
|
"""
|
||||||
raises:
|
raises:
|
||||||
UnsupportedFormatError:
|
UnsupportedFormatError:
|
||||||
'''
|
"""
|
||||||
if sys.version_info >= (3, 0):
|
if sys.version_info >= (3, 0):
|
||||||
return get_extractor_py3(path, format)
|
return get_extractor_py3(path, format)
|
||||||
else:
|
else:
|
||||||
@@ -269,7 +274,7 @@ def get_extractor(path, format):
|
|||||||
|
|
||||||
|
|
||||||
def is_nursery_rule_path(path):
|
def is_nursery_rule_path(path):
|
||||||
'''
|
"""
|
||||||
The nursery is a spot for rules that have not yet been fully polished.
|
The nursery is a spot for rules that have not yet been fully polished.
|
||||||
For example, they may not have references to public example of a technique.
|
For example, they may not have references to public example of a technique.
|
||||||
Yet, we still want to capture and report on their matches.
|
Yet, we still want to capture and report on their matches.
|
||||||
@@ -277,23 +282,23 @@ def is_nursery_rule_path(path):
|
|||||||
|
|
||||||
When nursery rules are loaded, their metadata section should be updated with:
|
When nursery rules are loaded, their metadata section should be updated with:
|
||||||
`nursery=True`.
|
`nursery=True`.
|
||||||
'''
|
"""
|
||||||
return 'nursery' in path
|
return "nursery" in path
|
||||||
|
|
||||||
|
|
||||||
def get_rules(rule_path):
|
def get_rules(rule_path):
|
||||||
if not os.path.exists(rule_path):
|
if not os.path.exists(rule_path):
|
||||||
raise IOError('%s does not exist or cannot be accessed' % rule_path)
|
raise IOError("%s does not exist or cannot be accessed" % rule_path)
|
||||||
|
|
||||||
rule_paths = []
|
rule_paths = []
|
||||||
if os.path.isfile(rule_path):
|
if os.path.isfile(rule_path):
|
||||||
rule_paths.append(rule_path)
|
rule_paths.append(rule_path)
|
||||||
elif os.path.isdir(rule_path):
|
elif os.path.isdir(rule_path):
|
||||||
logger.debug('reading rules from directory %s', rule_path)
|
logger.debug("reading rules from directory %s", rule_path)
|
||||||
for root, dirs, files in os.walk(rule_path):
|
for root, dirs, files in os.walk(rule_path):
|
||||||
for file in files:
|
for file in files:
|
||||||
if not file.endswith('.yml'):
|
if not file.endswith(".yml"):
|
||||||
logger.warning('skipping non-.yml file: %s', file)
|
logger.warning("skipping non-.yml file: %s", file)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
rule_path = os.path.join(root, file)
|
rule_path = os.path.join(root, file)
|
||||||
@@ -301,18 +306,18 @@ def get_rules(rule_path):
|
|||||||
|
|
||||||
rules = []
|
rules = []
|
||||||
for rule_path in rule_paths:
|
for rule_path in rule_paths:
|
||||||
logger.debug('reading rule file: %s', rule_path)
|
logger.debug("reading rule file: %s", rule_path)
|
||||||
try:
|
try:
|
||||||
rule = capa.rules.Rule.from_yaml_file(rule_path)
|
rule = capa.rules.Rule.from_yaml_file(rule_path)
|
||||||
except capa.rules.InvalidRule:
|
except capa.rules.InvalidRule:
|
||||||
raise
|
raise
|
||||||
else:
|
else:
|
||||||
rule.meta['capa/path'] = rule_path
|
rule.meta["capa/path"] = rule_path
|
||||||
if is_nursery_rule_path(rule_path):
|
if is_nursery_rule_path(rule_path):
|
||||||
rule.meta['capa/nursery'] = True
|
rule.meta["capa/nursery"] = True
|
||||||
|
|
||||||
rules.append(rule)
|
rules.append(rule)
|
||||||
logger.debug('rule: %s scope: %s', rule.name, rule.scope)
|
logger.debug("rule: %s scope: %s", rule.name, rule.scope)
|
||||||
|
|
||||||
return rules
|
return rules
|
||||||
|
|
||||||
@@ -322,35 +327,37 @@ def main(argv=None):
|
|||||||
argv = sys.argv[1:]
|
argv = sys.argv[1:]
|
||||||
|
|
||||||
formats = [
|
formats = [
|
||||||
('auto', '(default) detect file type automatically'),
|
("auto", "(default) detect file type automatically"),
|
||||||
('pe', 'Windows PE file'),
|
("pe", "Windows PE file"),
|
||||||
('sc32', '32-bit shellcode'),
|
("sc32", "32-bit shellcode"),
|
||||||
('sc64', '64-bit shellcode'),
|
("sc64", "64-bit shellcode"),
|
||||||
('freeze', 'features previously frozen by capa'),
|
("freeze", "features previously frozen by capa"),
|
||||||
]
|
]
|
||||||
format_help = ', '.join(['%s: %s' % (f[0], f[1]) for f in formats])
|
format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats])
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description='detect capabilities in programs.')
|
parser = argparse.ArgumentParser(description="detect capabilities in programs.")
|
||||||
parser.add_argument('sample', type=str,
|
parser.add_argument("sample", type=str, help="Path to sample to analyze")
|
||||||
help='Path to sample to analyze')
|
parser.add_argument(
|
||||||
parser.add_argument('-r', '--rules', type=str, default='(embedded rules)',
|
"-r",
|
||||||
help='Path to rule file or directory, use embedded rules by default')
|
"--rules",
|
||||||
parser.add_argument('-t', '--tag', type=str,
|
type=str,
|
||||||
help='Filter on rule meta field values')
|
default="(embedded rules)",
|
||||||
parser.add_argument('--version', action='store_true',
|
help="Path to rule file or directory, use embedded rules by default",
|
||||||
help='Print the executable version and exit')
|
)
|
||||||
parser.add_argument('-j', '--json', action='store_true',
|
parser.add_argument("-t", "--tag", type=str, help="Filter on rule meta field values")
|
||||||
help='Emit JSON instead of text')
|
parser.add_argument("--version", action="store_true", help="Print the executable version and exit")
|
||||||
parser.add_argument('-v', '--verbose', action='store_true',
|
parser.add_argument("-j", "--json", action="store_true", help="Emit JSON instead of text")
|
||||||
help='Enable verbose result document (no effect with --json)')
|
parser.add_argument(
|
||||||
parser.add_argument('-vv', '--vverbose', action='store_true',
|
"-v", "--verbose", action="store_true", help="Enable verbose result document (no effect with --json)"
|
||||||
help='Enable very verbose result document (no effect with --json)')
|
)
|
||||||
parser.add_argument('-d', '--debug', action='store_true',
|
parser.add_argument(
|
||||||
help='Enable debugging output on STDERR')
|
"-vv", "--vverbose", action="store_true", help="Enable very verbose result document (no effect with --json)"
|
||||||
parser.add_argument('-q', '--quiet', action='store_true',
|
)
|
||||||
help='Disable all output but errors')
|
parser.add_argument("-d", "--debug", action="store_true", help="Enable debugging output on STDERR")
|
||||||
parser.add_argument('-f', '--format', choices=[f[0] for f in formats], default='auto',
|
parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
|
||||||
help='Select sample format, %s' % format_help)
|
parser.add_argument(
|
||||||
|
"-f", "--format", choices=[f[0] for f in formats], default="auto", help="Select sample format, %s" % format_help
|
||||||
|
)
|
||||||
args = parser.parse_args(args=argv)
|
args = parser.parse_args(args=argv)
|
||||||
|
|
||||||
if args.version:
|
if args.version:
|
||||||
@@ -375,68 +382,70 @@ def main(argv=None):
|
|||||||
# because cp65001 is utf-8, we just map that codepage to the utf-8 codec.
|
# because cp65001 is utf-8, we just map that codepage to the utf-8 codec.
|
||||||
# see #380 and: https://stackoverflow.com/a/3259271/87207
|
# see #380 and: https://stackoverflow.com/a/3259271/87207
|
||||||
import codecs
|
import codecs
|
||||||
codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
|
|
||||||
|
|
||||||
if args.rules == '(embedded rules)':
|
codecs.register(lambda name: codecs.lookup("utf-8") if name == "cp65001" else None)
|
||||||
logger.info('-' * 80)
|
|
||||||
logger.info(' Using default embedded rules.')
|
|
||||||
logger.info(' To provide your own rules, use the form `capa.exe ./path/to/rules/ /path/to/mal.exe`.')
|
|
||||||
logger.info(' You can see the current default rule set here:')
|
|
||||||
logger.info(' https://github.com/fireeye/capa-rules')
|
|
||||||
logger.info('-' * 80)
|
|
||||||
|
|
||||||
if hasattr(sys, 'frozen') and hasattr(sys, '_MEIPASS'):
|
if args.rules == "(embedded rules)":
|
||||||
logger.debug('detected running under PyInstaller')
|
logger.info("-" * 80)
|
||||||
args.rules = os.path.join(sys._MEIPASS, 'rules')
|
logger.info(" Using default embedded rules.")
|
||||||
logger.debug('default rule path (PyInstaller method): %s', args.rules)
|
logger.info(" To provide your own rules, use the form `capa.exe ./path/to/rules/ /path/to/mal.exe`.")
|
||||||
|
logger.info(" You can see the current default rule set here:")
|
||||||
|
logger.info(" https://github.com/fireeye/capa-rules")
|
||||||
|
logger.info("-" * 80)
|
||||||
|
|
||||||
|
if hasattr(sys, "frozen") and hasattr(sys, "_MEIPASS"):
|
||||||
|
logger.debug("detected running under PyInstaller")
|
||||||
|
args.rules = os.path.join(sys._MEIPASS, "rules")
|
||||||
|
logger.debug("default rule path (PyInstaller method): %s", args.rules)
|
||||||
else:
|
else:
|
||||||
logger.debug('detected running from source')
|
logger.debug("detected running from source")
|
||||||
args.rules = os.path.join(os.path.dirname(__file__), '..', 'rules')
|
args.rules = os.path.join(os.path.dirname(__file__), "..", "rules")
|
||||||
logger.debug('default rule path (source method): %s', args.rules)
|
logger.debug("default rule path (source method): %s", args.rules)
|
||||||
else:
|
else:
|
||||||
logger.info('using rules path: %s', args.rules)
|
logger.info("using rules path: %s", args.rules)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
rules = get_rules(args.rules)
|
rules = get_rules(args.rules)
|
||||||
rules = capa.rules.RuleSet(rules)
|
rules = capa.rules.RuleSet(rules)
|
||||||
logger.info('successfully loaded %s rules', len(rules))
|
logger.info("successfully loaded %s rules", len(rules))
|
||||||
if args.tag:
|
if args.tag:
|
||||||
rules = rules.filter_rules_by_meta(args.tag)
|
rules = rules.filter_rules_by_meta(args.tag)
|
||||||
logger.info('selected %s rules', len(rules))
|
logger.info("selected %s rules", len(rules))
|
||||||
for i, r in enumerate(rules.rules, 1):
|
for i, r in enumerate(rules.rules, 1):
|
||||||
# TODO don't display subscope rules?
|
# TODO don't display subscope rules?
|
||||||
logger.debug(' %d. %s', i, r)
|
logger.debug(" %d. %s", i, r)
|
||||||
except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e:
|
except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e:
|
||||||
logger.error('%s', str(e))
|
logger.error("%s", str(e))
|
||||||
return -1
|
return -1
|
||||||
|
|
||||||
with open(args.sample, 'rb') as f:
|
with open(args.sample, "rb") as f:
|
||||||
taste = f.read(8)
|
taste = f.read(8)
|
||||||
|
|
||||||
if ((args.format == 'freeze')
|
if (args.format == "freeze") or (args.format == "auto" and capa.features.freeze.is_freeze(taste)):
|
||||||
or (args.format == 'auto' and capa.features.freeze.is_freeze(taste))):
|
with open(args.sample, "rb") as f:
|
||||||
with open(args.sample, 'rb') as f:
|
|
||||||
extractor = capa.features.freeze.load(f.read())
|
extractor = capa.features.freeze.load(f.read())
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
extractor = get_extractor(args.sample, args.format)
|
extractor = get_extractor(args.sample, args.format)
|
||||||
except UnsupportedFormatError:
|
except UnsupportedFormatError:
|
||||||
logger.error('-' * 80)
|
logger.error("-" * 80)
|
||||||
logger.error(' Input file does not appear to be a PE file.')
|
logger.error(" Input file does not appear to be a PE file.")
|
||||||
logger.error(' ')
|
logger.error(" ")
|
||||||
logger.error(' capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64).')
|
logger.error(
|
||||||
logger.error(' If you don\'t know the input file type, you can try using the `file` utility to guess it.')
|
" capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64)."
|
||||||
logger.error('-' * 80)
|
)
|
||||||
|
logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.")
|
||||||
|
logger.error("-" * 80)
|
||||||
return -1
|
return -1
|
||||||
except UnsupportedRuntimeError:
|
except UnsupportedRuntimeError:
|
||||||
logger.error('-' * 80)
|
logger.error("-" * 80)
|
||||||
logger.error(' Unsupported runtime or Python interpreter.')
|
logger.error(" Unsupported runtime or Python interpreter.")
|
||||||
logger.error(' ')
|
logger.error(" ")
|
||||||
logger.error(' capa supports running under Python 2.7 using Vivisect for binary analysis.')
|
logger.error(" capa supports running under Python 2.7 using Vivisect for binary analysis.")
|
||||||
logger.error(' It can also run within IDA Pro, using either Python 2.7 or 3.5+.')
|
logger.error(" It can also run within IDA Pro, using either Python 2.7 or 3.5+.")
|
||||||
logger.error(' ')
|
logger.error(" ")
|
||||||
logger.error(' If you\'re seeing this message on the command line, please ensure you\'re running Python 2.7.')
|
logger.error(" If you're seeing this message on the command line, please ensure you're running Python 2.7.")
|
||||||
logger.error('-' * 80)
|
logger.error("-" * 80)
|
||||||
return -1
|
return -1
|
||||||
|
|
||||||
capabilities = find_capabilities(rules, extractor)
|
capabilities = find_capabilities(rules, extractor)
|
||||||
@@ -462,7 +471,7 @@ def main(argv=None):
|
|||||||
print(capa.render.render_default(rules, capabilities))
|
print(capa.render.render_default(rules, capabilities))
|
||||||
colorama.deinit()
|
colorama.deinit()
|
||||||
|
|
||||||
logger.info('done.')
|
logger.info("done.")
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
@@ -472,34 +481,37 @@ def ida_main():
|
|||||||
logging.getLogger().setLevel(logging.INFO)
|
logging.getLogger().setLevel(logging.INFO)
|
||||||
|
|
||||||
import capa.ida.helpers
|
import capa.ida.helpers
|
||||||
|
|
||||||
if not capa.ida.helpers.is_supported_file_type():
|
if not capa.ida.helpers.is_supported_file_type():
|
||||||
return -1
|
return -1
|
||||||
|
|
||||||
logger.info('-' * 80)
|
logger.info("-" * 80)
|
||||||
logger.info(' Using default embedded rules.')
|
logger.info(" Using default embedded rules.")
|
||||||
logger.info(' ')
|
logger.info(" ")
|
||||||
logger.info(' You can see the current default rule set here:')
|
logger.info(" You can see the current default rule set here:")
|
||||||
logger.info(' https://github.com/fireeye/capa-rules')
|
logger.info(" https://github.com/fireeye/capa-rules")
|
||||||
logger.info('-' * 80)
|
logger.info("-" * 80)
|
||||||
|
|
||||||
if hasattr(sys, 'frozen') and hasattr(sys, '_MEIPASS'):
|
if hasattr(sys, "frozen") and hasattr(sys, "_MEIPASS"):
|
||||||
logger.debug('detected running under PyInstaller')
|
logger.debug("detected running under PyInstaller")
|
||||||
rules_path = os.path.join(sys._MEIPASS, 'rules')
|
rules_path = os.path.join(sys._MEIPASS, "rules")
|
||||||
logger.debug('default rule path (PyInstaller method): %s', rules_path)
|
logger.debug("default rule path (PyInstaller method): %s", rules_path)
|
||||||
else:
|
else:
|
||||||
logger.debug('detected running from source')
|
logger.debug("detected running from source")
|
||||||
rules_path = os.path.join(os.path.dirname(__file__), '..', 'rules')
|
rules_path = os.path.join(os.path.dirname(__file__), "..", "rules")
|
||||||
logger.debug('default rule path (source method): %s', rules_path)
|
logger.debug("default rule path (source method): %s", rules_path)
|
||||||
|
|
||||||
rules = get_rules(rules_path)
|
rules = get_rules(rules_path)
|
||||||
import capa.rules
|
import capa.rules
|
||||||
|
|
||||||
rules = capa.rules.RuleSet(rules)
|
rules = capa.rules.RuleSet(rules)
|
||||||
|
|
||||||
import capa.features.extractors.ida
|
import capa.features.extractors.ida
|
||||||
|
|
||||||
capabilities = find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor())
|
capabilities = find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor())
|
||||||
|
|
||||||
if has_file_limitation(rules, capabilities, is_standalone=False):
|
if has_file_limitation(rules, capabilities, is_standalone=False):
|
||||||
capa.ida.helpers.inform_user_ida_ui('capa encountered warnings during analysis')
|
capa.ida.helpers.inform_user_ida_ui("capa encountered warnings during analysis")
|
||||||
|
|
||||||
render_capabilities_default(rules, capabilities)
|
render_capabilities_default(rules, capabilities)
|
||||||
|
|
||||||
@@ -513,7 +525,7 @@ def is_runtime_ida():
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
if is_runtime_ida():
|
if is_runtime_ida():
|
||||||
ida_main()
|
ida_main()
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -18,43 +18,41 @@ def convert_statement_to_result_document(statement):
|
|||||||
"""
|
"""
|
||||||
if isinstance(statement, capa.engine.And):
|
if isinstance(statement, capa.engine.And):
|
||||||
return {
|
return {
|
||||||
'type': 'and',
|
"type": "and",
|
||||||
}
|
}
|
||||||
elif isinstance(statement, capa.engine.Or):
|
elif isinstance(statement, capa.engine.Or):
|
||||||
return {
|
return {
|
||||||
'type': 'or',
|
"type": "or",
|
||||||
}
|
}
|
||||||
elif isinstance(statement, capa.engine.Not):
|
elif isinstance(statement, capa.engine.Not):
|
||||||
return {
|
return {
|
||||||
'type': 'not',
|
"type": "not",
|
||||||
}
|
}
|
||||||
elif isinstance(statement, capa.engine.Some) and statement.count == 0:
|
elif isinstance(statement, capa.engine.Some) and statement.count == 0:
|
||||||
return {
|
return {"type": "optional"}
|
||||||
'type': 'optional'
|
|
||||||
}
|
|
||||||
elif isinstance(statement, capa.engine.Some) and statement.count > 0:
|
elif isinstance(statement, capa.engine.Some) and statement.count > 0:
|
||||||
return {
|
return {
|
||||||
'type': 'some',
|
"type": "some",
|
||||||
'count': statement.count,
|
"count": statement.count,
|
||||||
}
|
}
|
||||||
elif isinstance(statement, capa.engine.Range):
|
elif isinstance(statement, capa.engine.Range):
|
||||||
return {
|
return {
|
||||||
'type': 'range',
|
"type": "range",
|
||||||
'min': statement.min,
|
"min": statement.min,
|
||||||
'max': statement.max,
|
"max": statement.max,
|
||||||
'child': convert_feature_to_result_document(statement.child),
|
"child": convert_feature_to_result_document(statement.child),
|
||||||
}
|
}
|
||||||
elif isinstance(statement, capa.engine.Regex):
|
elif isinstance(statement, capa.engine.Regex):
|
||||||
return {
|
return {
|
||||||
'type': 'regex',
|
"type": "regex",
|
||||||
'pattern': statement.pattern,
|
"pattern": statement.pattern,
|
||||||
# the string that was matched
|
# the string that was matched
|
||||||
'match': statement.match,
|
"match": statement.match,
|
||||||
}
|
}
|
||||||
elif isinstance(statement, capa.engine.Subscope):
|
elif isinstance(statement, capa.engine.Subscope):
|
||||||
return {
|
return {
|
||||||
'type': 'subscope',
|
"type": "subscope",
|
||||||
'subscope': statement.scope,
|
"subscope": statement.scope,
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
raise RuntimeError("unexpected match statement type: " + str(statement))
|
raise RuntimeError("unexpected match statement type: " + str(statement))
|
||||||
@@ -89,8 +87,8 @@ def convert_feature_to_result_document(feature):
|
|||||||
|
|
||||||
# make the terms pretty
|
# make the terms pretty
|
||||||
name = name.lower()
|
name = name.lower()
|
||||||
if name == 'matchedrule':
|
if name == "matchedrule":
|
||||||
name = 'match'
|
name = "match"
|
||||||
|
|
||||||
# in the common case, there's a single argument
|
# in the common case, there's a single argument
|
||||||
# so use it directly.
|
# so use it directly.
|
||||||
@@ -99,7 +97,7 @@ def convert_feature_to_result_document(feature):
|
|||||||
value = value[0]
|
value = value[0]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'type': name,
|
"type": name,
|
||||||
name: value,
|
name: value,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -119,13 +117,13 @@ def convert_node_to_result_document(node):
|
|||||||
|
|
||||||
if isinstance(node, capa.engine.Statement):
|
if isinstance(node, capa.engine.Statement):
|
||||||
return {
|
return {
|
||||||
'type': 'statement',
|
"type": "statement",
|
||||||
'statement': convert_statement_to_result_document(node),
|
"statement": convert_statement_to_result_document(node),
|
||||||
}
|
}
|
||||||
elif isinstance(node, capa.features.Feature):
|
elif isinstance(node, capa.features.Feature):
|
||||||
return {
|
return {
|
||||||
'type': 'feature',
|
"type": "feature",
|
||||||
'feature': convert_feature_to_result_document(node),
|
"feature": convert_feature_to_result_document(node),
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
raise RuntimeError("unexpected match node type")
|
raise RuntimeError("unexpected match node type")
|
||||||
@@ -137,19 +135,16 @@ def convert_match_to_result_document(rules, capabilities, result):
|
|||||||
this will become part of the "result document" format that can be emitted to JSON.
|
this will become part of the "result document" format that can be emitted to JSON.
|
||||||
"""
|
"""
|
||||||
doc = {
|
doc = {
|
||||||
'success': bool(result.success),
|
"success": bool(result.success),
|
||||||
'node': convert_node_to_result_document(result.statement),
|
"node": convert_node_to_result_document(result.statement),
|
||||||
'children': [
|
"children": [convert_match_to_result_document(rules, capabilities, child) for child in result.children],
|
||||||
convert_match_to_result_document(rules, capabilities, child)
|
|
||||||
for child in result.children
|
|
||||||
],
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# logic expression, like `and`, don't have locations - their children do.
|
# logic expression, like `and`, don't have locations - their children do.
|
||||||
# so only add `locations` to feature nodes.
|
# so only add `locations` to feature nodes.
|
||||||
if isinstance(result.statement, capa.features.Feature):
|
if isinstance(result.statement, capa.features.Feature):
|
||||||
if bool(result.success):
|
if bool(result.success):
|
||||||
doc['locations'] = result.locations
|
doc["locations"] = result.locations
|
||||||
|
|
||||||
# if we have a `match` statement, then we're referencing another rule.
|
# if we have a `match` statement, then we're referencing another rule.
|
||||||
# this could an external rule (written by a human), or
|
# this could an external rule (written by a human), or
|
||||||
@@ -159,31 +154,30 @@ def convert_match_to_result_document(rules, capabilities, result):
|
|||||||
# so, we need to lookup the other rule results
|
# so, we need to lookup the other rule results
|
||||||
# and then filter those down to the address used here.
|
# and then filter those down to the address used here.
|
||||||
# finally, splice that logic into this tree.
|
# finally, splice that logic into this tree.
|
||||||
if (doc['node']['type'] == 'feature'
|
if (
|
||||||
and doc['node']['feature']['type'] == 'match'
|
doc["node"]["type"] == "feature"
|
||||||
# only add subtree on success,
|
and doc["node"]["feature"]["type"] == "match"
|
||||||
# because there won't be results for the other rule on failure.
|
# only add subtree on success,
|
||||||
and doc['success']):
|
# because there won't be results for the other rule on failure.
|
||||||
|
and doc["success"]
|
||||||
|
):
|
||||||
|
|
||||||
rule_name = doc['node']['feature']['match']
|
rule_name = doc["node"]["feature"]["match"]
|
||||||
rule = rules[rule_name]
|
rule = rules[rule_name]
|
||||||
rule_matches = {address: result for (address, result) in capabilities[rule_name]}
|
rule_matches = {address: result for (address, result) in capabilities[rule_name]}
|
||||||
|
|
||||||
if rule.meta.get('capa/subscope-rule'):
|
if rule.meta.get("capa/subscope-rule"):
|
||||||
# for a subscope rule, fixup the node to be a scope node, rather than a match feature node.
|
# for a subscope rule, fixup the node to be a scope node, rather than a match feature node.
|
||||||
#
|
#
|
||||||
# e.g. `contain loop/30c4c78e29bf4d54894fc74f664c62e8` -> `basic block`
|
# e.g. `contain loop/30c4c78e29bf4d54894fc74f664c62e8` -> `basic block`
|
||||||
scope = rule.meta['scope']
|
scope = rule.meta["scope"]
|
||||||
doc['node'] = {
|
doc["node"] = {
|
||||||
'type': 'statement',
|
"type": "statement",
|
||||||
'statement': {
|
"statement": {"type": "subscope", "subscope": scope,},
|
||||||
'type': 'subscope',
|
|
||||||
'subscope': scope,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for location in doc['locations']:
|
for location in doc["locations"]:
|
||||||
doc['children'].append(convert_match_to_result_document(rules, capabilities, rule_matches[location]))
|
doc["children"].append(convert_match_to_result_document(rules, capabilities, rule_matches[location]))
|
||||||
|
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
@@ -220,15 +214,14 @@ def convert_capabilities_to_result_document(rules, capabilities):
|
|||||||
for rule_name, matches in capabilities.items():
|
for rule_name, matches in capabilities.items():
|
||||||
rule = rules[rule_name]
|
rule = rules[rule_name]
|
||||||
|
|
||||||
if rule.meta.get('capa/subscope-rule'):
|
if rule.meta.get("capa/subscope-rule"):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
doc[rule_name] = {
|
doc[rule_name] = {
|
||||||
'meta': dict(rule.meta),
|
"meta": dict(rule.meta),
|
||||||
'source': rule.definition,
|
"source": rule.definition,
|
||||||
'matches': {
|
"matches": {
|
||||||
addr: convert_match_to_result_document(rules, capabilities, match)
|
addr: convert_match_to_result_document(rules, capabilities, match) for (addr, match) in matches
|
||||||
for (addr, match) in matches
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -241,6 +234,7 @@ def render_vverbose(rules, capabilities):
|
|||||||
# and capa.render.vverbose import capa.render (implicitly, as a submodule)
|
# and capa.render.vverbose import capa.render (implicitly, as a submodule)
|
||||||
# so, defer the import until routine is called, breaking the import loop.
|
# so, defer the import until routine is called, breaking the import loop.
|
||||||
import capa.render.vverbose
|
import capa.render.vverbose
|
||||||
|
|
||||||
doc = convert_capabilities_to_result_document(rules, capabilities)
|
doc = convert_capabilities_to_result_document(rules, capabilities)
|
||||||
return capa.render.vverbose.render_vverbose(doc)
|
return capa.render.vverbose.render_vverbose(doc)
|
||||||
|
|
||||||
@@ -248,6 +242,7 @@ def render_vverbose(rules, capabilities):
|
|||||||
def render_verbose(rules, capabilities):
|
def render_verbose(rules, capabilities):
|
||||||
# break import loop
|
# break import loop
|
||||||
import capa.render.verbose
|
import capa.render.verbose
|
||||||
|
|
||||||
doc = convert_capabilities_to_result_document(rules, capabilities)
|
doc = convert_capabilities_to_result_document(rules, capabilities)
|
||||||
return capa.render.verbose.render_verbose(doc)
|
return capa.render.verbose.render_verbose(doc)
|
||||||
|
|
||||||
@@ -256,6 +251,7 @@ def render_default(rules, capabilities):
|
|||||||
# break import loop
|
# break import loop
|
||||||
import capa.render.verbose
|
import capa.render.verbose
|
||||||
import capa.render.default
|
import capa.render.default
|
||||||
|
|
||||||
doc = convert_capabilities_to_result_document(rules, capabilities)
|
doc = convert_capabilities_to_result_document(rules, capabilities)
|
||||||
return capa.render.default.render_default(doc)
|
return capa.render.default.render_default(doc)
|
||||||
|
|
||||||
@@ -273,7 +269,5 @@ class CapaJsonObjectEncoder(json.JSONEncoder):
|
|||||||
|
|
||||||
def render_json(rules, capabilities):
|
def render_json(rules, capabilities):
|
||||||
return json.dumps(
|
return json.dumps(
|
||||||
convert_capabilities_to_result_document(rules, capabilities),
|
convert_capabilities_to_result_document(rules, capabilities), cls=CapaJsonObjectEncoder, sort_keys=True,
|
||||||
cls=CapaJsonObjectEncoder,
|
|
||||||
sort_keys=True,
|
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import capa.render.utils as rutils
|
|||||||
def width(s, character_count):
|
def width(s, character_count):
|
||||||
"""pad the given string to at least `character_count`"""
|
"""pad the given string to at least `character_count`"""
|
||||||
if len(s) < character_count:
|
if len(s) < character_count:
|
||||||
return s + ' ' * (character_count - len(s))
|
return s + " " * (character_count - len(s))
|
||||||
else:
|
else:
|
||||||
return s
|
return s
|
||||||
|
|
||||||
@@ -28,15 +28,15 @@ def render_capabilities(doc, ostream):
|
|||||||
"""
|
"""
|
||||||
rows = []
|
rows = []
|
||||||
for rule in rutils.capability_rules(doc):
|
for rule in rutils.capability_rules(doc):
|
||||||
count = len(rule['matches'])
|
count = len(rule["matches"])
|
||||||
if count == 1:
|
if count == 1:
|
||||||
capability = rutils.bold(rule['meta']['name'])
|
capability = rutils.bold(rule["meta"]["name"])
|
||||||
else:
|
else:
|
||||||
capability = '%s (%d matches)' % (rutils.bold(rule['meta']['name']), count)
|
capability = "%s (%d matches)" % (rutils.bold(rule["meta"]["name"]), count)
|
||||||
rows.append((capability, rule['meta']['namespace']))
|
rows.append((capability, rule["meta"]["namespace"]))
|
||||||
|
|
||||||
ostream.write(tabulate.tabulate(rows, headers=[width('CAPABILITY', 40), width('NAMESPACE', 40)], tablefmt='psql'))
|
ostream.write(tabulate.tabulate(rows, headers=[width("CAPABILITY", 40), width("NAMESPACE", 40)], tablefmt="psql"))
|
||||||
ostream.write('\n')
|
ostream.write("\n")
|
||||||
|
|
||||||
|
|
||||||
def render_attack(doc, ostream):
|
def render_attack(doc, ostream):
|
||||||
@@ -57,17 +57,17 @@ def render_attack(doc, ostream):
|
|||||||
"""
|
"""
|
||||||
tactics = collections.defaultdict(set)
|
tactics = collections.defaultdict(set)
|
||||||
for rule in rutils.capability_rules(doc):
|
for rule in rutils.capability_rules(doc):
|
||||||
if not rule['meta'].get('att&ck'):
|
if not rule["meta"].get("att&ck"):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
for attack in rule['meta']['att&ck']:
|
for attack in rule["meta"]["att&ck"]:
|
||||||
tactic, _, rest = attack.partition('::')
|
tactic, _, rest = attack.partition("::")
|
||||||
if '::' in rest:
|
if "::" in rest:
|
||||||
technique, _, rest = rest.partition('::')
|
technique, _, rest = rest.partition("::")
|
||||||
subtechnique, _, id = rest.rpartition(' ')
|
subtechnique, _, id = rest.rpartition(" ")
|
||||||
tactics[tactic].add((technique, subtechnique, id))
|
tactics[tactic].add((technique, subtechnique, id))
|
||||||
else:
|
else:
|
||||||
technique, _, id = rest.rpartition(' ')
|
technique, _, id = rest.rpartition(" ")
|
||||||
tactics[tactic].add((technique, id))
|
tactics[tactic].add((technique, id))
|
||||||
|
|
||||||
rows = []
|
rows = []
|
||||||
@@ -76,15 +76,17 @@ def render_attack(doc, ostream):
|
|||||||
for spec in sorted(techniques):
|
for spec in sorted(techniques):
|
||||||
if len(spec) == 2:
|
if len(spec) == 2:
|
||||||
technique, id = spec
|
technique, id = spec
|
||||||
inner_rows.append('%s %s' % (rutils.bold(technique), id))
|
inner_rows.append("%s %s" % (rutils.bold(technique), id))
|
||||||
elif len(spec) == 3:
|
elif len(spec) == 3:
|
||||||
technique, subtechnique, id = spec
|
technique, subtechnique, id = spec
|
||||||
inner_rows.append('%s::%s %s' % (rutils.bold(technique), subtechnique, id))
|
inner_rows.append("%s::%s %s" % (rutils.bold(technique), subtechnique, id))
|
||||||
else:
|
else:
|
||||||
raise RuntimeError('unexpected ATT&CK spec format')
|
raise RuntimeError("unexpected ATT&CK spec format")
|
||||||
rows.append((rutils.bold(tactic.upper()), '\n'.join(inner_rows), ))
|
rows.append((rutils.bold(tactic.upper()), "\n".join(inner_rows),))
|
||||||
ostream.write(tabulate.tabulate(rows, headers=[width('ATT&CK Tactic', 20), width('ATT&CK Technique', 60)], tablefmt='psql'))
|
ostream.write(
|
||||||
ostream.write('\n')
|
tabulate.tabulate(rows, headers=[width("ATT&CK Tactic", 20), width("ATT&CK Technique", 60)], tablefmt="psql")
|
||||||
|
)
|
||||||
|
ostream.write("\n")
|
||||||
|
|
||||||
|
|
||||||
def render_default(doc):
|
def render_default(doc):
|
||||||
|
|||||||
@@ -4,38 +4,40 @@ import termcolor
|
|||||||
|
|
||||||
def bold(s):
|
def bold(s):
|
||||||
"""draw attention to the given string"""
|
"""draw attention to the given string"""
|
||||||
return termcolor.colored(s, 'blue')
|
return termcolor.colored(s, "blue")
|
||||||
|
|
||||||
|
|
||||||
def bold2(s):
|
def bold2(s):
|
||||||
"""draw attention to the given string, within a `bold` section"""
|
"""draw attention to the given string, within a `bold` section"""
|
||||||
return termcolor.colored(s, 'green')
|
return termcolor.colored(s, "green")
|
||||||
|
|
||||||
|
|
||||||
def hex(n):
|
def hex(n):
|
||||||
"""render the given number using upper case hex, like: 0x123ABC"""
|
"""render the given number using upper case hex, like: 0x123ABC"""
|
||||||
return '0x%X' % n
|
return "0x%X" % n
|
||||||
|
|
||||||
|
|
||||||
def hex_string(h):
|
def hex_string(h):
|
||||||
""" render hex string e.g. "0a40b1" as "0A 40 B1" """
|
""" render hex string e.g. "0a40b1" as "0A 40 B1" """
|
||||||
return ' '.join(h[i:i + 2] for i in range(0, len(h), 2)).upper()
|
return " ".join(h[i : i + 2] for i in range(0, len(h), 2)).upper()
|
||||||
|
|
||||||
|
|
||||||
def capability_rules(doc):
|
def capability_rules(doc):
|
||||||
"""enumerate the rules in (namespace, name) order that are 'capability' rules (not lib/subscope/disposition/etc)."""
|
"""enumerate the rules in (namespace, name) order that are 'capability' rules (not lib/subscope/disposition/etc)."""
|
||||||
for (_, _, rule) in sorted(map(lambda rule: (rule['meta'].get('namespace', ''), rule['meta']['name'], rule), doc.values())):
|
for (_, _, rule) in sorted(
|
||||||
if rule['meta'].get('lib'):
|
map(lambda rule: (rule["meta"].get("namespace", ""), rule["meta"]["name"], rule), doc.values())
|
||||||
|
):
|
||||||
|
if rule["meta"].get("lib"):
|
||||||
continue
|
continue
|
||||||
if rule['meta'].get('capa/subscope'):
|
if rule["meta"].get("capa/subscope"):
|
||||||
continue
|
continue
|
||||||
if rule['meta'].get('maec/analysis-conclusion'):
|
if rule["meta"].get("maec/analysis-conclusion"):
|
||||||
continue
|
continue
|
||||||
if rule['meta'].get('maec/analysis-conclusion-ov'):
|
if rule["meta"].get("maec/analysis-conclusion-ov"):
|
||||||
continue
|
continue
|
||||||
if rule['meta'].get('maec/malware-category'):
|
if rule["meta"].get("maec/malware-category"):
|
||||||
continue
|
continue
|
||||||
if rule['meta'].get('maec/malware-category-ov'):
|
if rule["meta"].get("maec/malware-category-ov"):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
yield rule
|
yield rule
|
||||||
@@ -44,4 +46,4 @@ def capability_rules(doc):
|
|||||||
class StringIO(six.StringIO):
|
class StringIO(six.StringIO):
|
||||||
def writeln(self, s):
|
def writeln(self, s):
|
||||||
self.write(s)
|
self.write(s)
|
||||||
self.write('\n')
|
self.write("\n")
|
||||||
|
|||||||
@@ -24,29 +24,29 @@ def render_verbose(doc):
|
|||||||
ostream = rutils.StringIO()
|
ostream = rutils.StringIO()
|
||||||
|
|
||||||
for rule in rutils.capability_rules(doc):
|
for rule in rutils.capability_rules(doc):
|
||||||
count = len(rule['matches'])
|
count = len(rule["matches"])
|
||||||
if count == 1:
|
if count == 1:
|
||||||
capability = rutils.bold(rule['meta']['name'])
|
capability = rutils.bold(rule["meta"]["name"])
|
||||||
else:
|
else:
|
||||||
capability = '%s (%d matches)' % (rutils.bold(rule['meta']['name']), count)
|
capability = "%s (%d matches)" % (rutils.bold(rule["meta"]["name"]), count)
|
||||||
|
|
||||||
ostream.writeln(capability)
|
ostream.writeln(capability)
|
||||||
|
|
||||||
rows = []
|
rows = []
|
||||||
for key in ('namespace', 'description', 'scope'):
|
for key in ("namespace", "description", "scope"):
|
||||||
if key == 'name' or key not in rule['meta']:
|
if key == "name" or key not in rule["meta"]:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
v = rule['meta'][key]
|
v = rule["meta"][key]
|
||||||
if isinstance(v, list) and len(v) == 1:
|
if isinstance(v, list) and len(v) == 1:
|
||||||
v = v[0]
|
v = v[0]
|
||||||
rows.append((key, v))
|
rows.append((key, v))
|
||||||
|
|
||||||
if rule['meta']['scope'] != capa.rules.FILE_SCOPE:
|
if rule["meta"]["scope"] != capa.rules.FILE_SCOPE:
|
||||||
locations = doc[rule['meta']['name']]['matches'].keys()
|
locations = doc[rule["meta"]["name"]]["matches"].keys()
|
||||||
rows.append(('matches', '\n'.join(map(rutils.hex, locations))))
|
rows.append(("matches", "\n".join(map(rutils.hex, locations))))
|
||||||
|
|
||||||
ostream.writeln(tabulate.tabulate(rows, tablefmt='plain'))
|
ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
|
||||||
ostream.write('\n')
|
ostream.write("\n")
|
||||||
|
|
||||||
return ostream.getvalue()
|
return ostream.getvalue()
|
||||||
|
|||||||
@@ -5,145 +5,147 @@ import capa.render.utils as rutils
|
|||||||
|
|
||||||
|
|
||||||
def render_statement(ostream, statement, indent=0):
|
def render_statement(ostream, statement, indent=0):
|
||||||
ostream.write(' ' * indent)
|
ostream.write(" " * indent)
|
||||||
if statement['type'] in ('and', 'or', 'optional'):
|
if statement["type"] in ("and", "or", "optional"):
|
||||||
ostream.write(statement['type'])
|
ostream.write(statement["type"])
|
||||||
ostream.writeln(':')
|
ostream.writeln(":")
|
||||||
elif statement['type'] == 'not':
|
elif statement["type"] == "not":
|
||||||
# this statement is handled specially in `render_match` using the MODE_SUCCESS/MODE_FAILURE flags.
|
# this statement is handled specially in `render_match` using the MODE_SUCCESS/MODE_FAILURE flags.
|
||||||
ostream.writeln('not:')
|
ostream.writeln("not:")
|
||||||
elif statement['type'] == 'some':
|
elif statement["type"] == "some":
|
||||||
ostream.write(statement['count'] + ' or more')
|
ostream.write(statement["count"] + " or more")
|
||||||
ostream.writeln(':')
|
ostream.writeln(":")
|
||||||
elif statement['type'] == 'range':
|
elif statement["type"] == "range":
|
||||||
# `range` is a weird node, its almost a hybrid of statement+feature.
|
# `range` is a weird node, its almost a hybrid of statement+feature.
|
||||||
# it is a specific feature repeated multiple times.
|
# it is a specific feature repeated multiple times.
|
||||||
# there's no additional logic in the feature part, just the existence of a feature.
|
# there's no additional logic in the feature part, just the existence of a feature.
|
||||||
# so, we have to inline some of the feature rendering here.
|
# so, we have to inline some of the feature rendering here.
|
||||||
|
|
||||||
child = statement['child']
|
child = statement["child"]
|
||||||
if child['type'] in ('string', 'api', 'mnemonic', 'basic block', 'export', 'import', 'section', 'match'):
|
if child["type"] in ("string", "api", "mnemonic", "basic block", "export", "import", "section", "match"):
|
||||||
feature = '%s(%s)' % (child['type'], rutils.bold2(child[child['type']]))
|
feature = "%s(%s)" % (child["type"], rutils.bold2(child[child["type"]]))
|
||||||
elif child['type'] in ('number', 'offset'):
|
elif child["type"] in ("number", "offset"):
|
||||||
feature = '%s(%s)' % (child['type'], rutils.bold2(rutils.hex(child[child['type']])))
|
feature = "%s(%s)" % (child["type"], rutils.bold2(rutils.hex(child[child["type"]])))
|
||||||
elif child['type'] == 'bytes':
|
elif child["type"] == "bytes":
|
||||||
feature = '%s(%s)' % (child['type'], rutils.bold2(rutils.hex_string(child[child['type']])))
|
feature = "%s(%s)" % (child["type"], rutils.bold2(rutils.hex_string(child[child["type"]])))
|
||||||
elif child['type'] == 'characteristic':
|
elif child["type"] == "characteristic":
|
||||||
feature = 'characteristic(%s)' % (rutils.bold2(child['characteristic'][0]))
|
feature = "characteristic(%s)" % (rutils.bold2(child["characteristic"][0]))
|
||||||
else:
|
else:
|
||||||
raise RuntimeError('unexpected feature type: ' + str(child))
|
raise RuntimeError("unexpected feature type: " + str(child))
|
||||||
|
|
||||||
ostream.write('count(%s): ' % feature)
|
ostream.write("count(%s): " % feature)
|
||||||
|
|
||||||
if statement['max'] == statement['min']:
|
if statement["max"] == statement["min"]:
|
||||||
ostream.writeln('%d' % (statement['min']))
|
ostream.writeln("%d" % (statement["min"]))
|
||||||
elif statement['min'] == 0:
|
elif statement["min"] == 0:
|
||||||
ostream.writeln('%d or fewer' % (statement['max']))
|
ostream.writeln("%d or fewer" % (statement["max"]))
|
||||||
elif statement['max'] == (1 << 64 - 1):
|
elif statement["max"] == (1 << 64 - 1):
|
||||||
ostream.writeln('%d or more' % (statement['min']))
|
ostream.writeln("%d or more" % (statement["min"]))
|
||||||
else:
|
else:
|
||||||
ostream.writeln('between %d and %d' % (statement['min'], statement['max']))
|
ostream.writeln("between %d and %d" % (statement["min"], statement["max"]))
|
||||||
elif statement['type'] == 'subscope':
|
elif statement["type"] == "subscope":
|
||||||
ostream.write(statement['subscope'])
|
ostream.write(statement["subscope"])
|
||||||
ostream.writeln(':')
|
ostream.writeln(":")
|
||||||
elif statement['type'] == 'regex':
|
elif statement["type"] == "regex":
|
||||||
# regex is a `Statement` not a `Feature`
|
# regex is a `Statement` not a `Feature`
|
||||||
# this is because it doesn't get extracted, but applies to all strings in scope.
|
# this is because it doesn't get extracted, but applies to all strings in scope.
|
||||||
# so we have to handle it here
|
# so we have to handle it here
|
||||||
ostream.writeln('string: %s' % (statement['match']))
|
ostream.writeln("string: %s" % (statement["match"]))
|
||||||
else:
|
else:
|
||||||
raise RuntimeError("unexpected match statement type: " + str(statement))
|
raise RuntimeError("unexpected match statement type: " + str(statement))
|
||||||
|
|
||||||
|
|
||||||
def render_feature(ostream, match, feature, indent=0):
|
def render_feature(ostream, match, feature, indent=0):
|
||||||
ostream.write(' ' * indent)
|
ostream.write(" " * indent)
|
||||||
|
|
||||||
if feature['type'] in ('string', 'api', 'mnemonic', 'basic block', 'export', 'import', 'section', 'match'):
|
if feature["type"] in ("string", "api", "mnemonic", "basic block", "export", "import", "section", "match"):
|
||||||
ostream.write(feature['type'])
|
ostream.write(feature["type"])
|
||||||
ostream.write(': ')
|
ostream.write(": ")
|
||||||
ostream.write(rutils.bold2(feature[feature['type']]))
|
ostream.write(rutils.bold2(feature[feature["type"]]))
|
||||||
elif feature['type'] in ('number', 'offset'):
|
elif feature["type"] in ("number", "offset"):
|
||||||
ostream.write(feature['type'])
|
ostream.write(feature["type"])
|
||||||
ostream.write(': ')
|
ostream.write(": ")
|
||||||
ostream.write(rutils.bold2(rutils.hex(feature[feature['type']])))
|
ostream.write(rutils.bold2(rutils.hex(feature[feature["type"]])))
|
||||||
elif feature['type'] == 'bytes':
|
elif feature["type"] == "bytes":
|
||||||
ostream.write('bytes: ')
|
ostream.write("bytes: ")
|
||||||
# bytes is the uppercase, hex-encoded string.
|
# bytes is the uppercase, hex-encoded string.
|
||||||
# it should always be an even number of characters (its hex).
|
# it should always be an even number of characters (its hex).
|
||||||
ostream.write(rutils.bold2(rutils.hex_string(feature[feature['type']])))
|
ostream.write(rutils.bold2(rutils.hex_string(feature[feature["type"]])))
|
||||||
elif feature['type'] == 'characteristic':
|
elif feature["type"] == "characteristic":
|
||||||
ostream.write('characteristic(%s)' % (rutils.bold2(feature['characteristic'][0])))
|
ostream.write("characteristic(%s)" % (rutils.bold2(feature["characteristic"][0])))
|
||||||
# note that regex is found in `render_statement`
|
# note that regex is found in `render_statement`
|
||||||
else:
|
else:
|
||||||
raise RuntimeError('unexpected feature type: ' + str(feature))
|
raise RuntimeError("unexpected feature type: " + str(feature))
|
||||||
|
|
||||||
# its possible to have an empty locations array here,
|
# its possible to have an empty locations array here,
|
||||||
# such as when we're in MODE_FAILURE and showing the logic
|
# such as when we're in MODE_FAILURE and showing the logic
|
||||||
# under a `not` statement (which will have no matched locations).
|
# under a `not` statement (which will have no matched locations).
|
||||||
locations = list(sorted(match.get('locations', [])))
|
locations = list(sorted(match.get("locations", [])))
|
||||||
if len(locations) == 1:
|
if len(locations) == 1:
|
||||||
ostream.write(' @ ')
|
ostream.write(" @ ")
|
||||||
ostream.write(rutils.hex(locations[0]))
|
ostream.write(rutils.hex(locations[0]))
|
||||||
elif len(locations) > 1:
|
elif len(locations) > 1:
|
||||||
ostream.write(' @ ')
|
ostream.write(" @ ")
|
||||||
if len(locations) > 4:
|
if len(locations) > 4:
|
||||||
# don't display too many locations, because it becomes very noisy.
|
# don't display too many locations, because it becomes very noisy.
|
||||||
# probably only the first handful of locations will be useful for inspection.
|
# probably only the first handful of locations will be useful for inspection.
|
||||||
ostream.write(', '.join(map(rutils.hex, locations[0:4])))
|
ostream.write(", ".join(map(rutils.hex, locations[0:4])))
|
||||||
ostream.write(', and %d more...' % (len(locations) - 4))
|
ostream.write(", and %d more..." % (len(locations) - 4))
|
||||||
else:
|
else:
|
||||||
ostream.write(', '.join(map(rutils.hex, locations)))
|
ostream.write(", ".join(map(rutils.hex, locations)))
|
||||||
|
|
||||||
ostream.write('\n')
|
ostream.write("\n")
|
||||||
|
|
||||||
|
|
||||||
def render_node(ostream, match, node, indent=0):
|
def render_node(ostream, match, node, indent=0):
|
||||||
if node['type'] == 'statement':
|
if node["type"] == "statement":
|
||||||
render_statement(ostream, node['statement'], indent=indent)
|
render_statement(ostream, node["statement"], indent=indent)
|
||||||
elif node['type'] == 'feature':
|
elif node["type"] == "feature":
|
||||||
render_feature(ostream, match, node['feature'], indent=indent)
|
render_feature(ostream, match, node["feature"], indent=indent)
|
||||||
else:
|
else:
|
||||||
raise RuntimeError('unexpected node type: ' + str(node))
|
raise RuntimeError("unexpected node type: " + str(node))
|
||||||
|
|
||||||
|
|
||||||
# display nodes that successfully evaluated against the sample.
|
# display nodes that successfully evaluated against the sample.
|
||||||
MODE_SUCCESS = 'success'
|
MODE_SUCCESS = "success"
|
||||||
|
|
||||||
# display nodes that did not evaluate to True against the sample.
|
# display nodes that did not evaluate to True against the sample.
|
||||||
# this is useful when rendering the logic tree under a `not` node.
|
# this is useful when rendering the logic tree under a `not` node.
|
||||||
MODE_FAILURE = 'failure'
|
MODE_FAILURE = "failure"
|
||||||
|
|
||||||
|
|
||||||
def render_match(ostream, match, indent=0, mode=MODE_SUCCESS):
|
def render_match(ostream, match, indent=0, mode=MODE_SUCCESS):
|
||||||
child_mode = mode
|
child_mode = mode
|
||||||
if mode == MODE_SUCCESS:
|
if mode == MODE_SUCCESS:
|
||||||
# display only nodes that evaluated successfully.
|
# display only nodes that evaluated successfully.
|
||||||
if not match['success']:
|
if not match["success"]:
|
||||||
return
|
return
|
||||||
# optional statement with no successful children is empty
|
# optional statement with no successful children is empty
|
||||||
if (match['node'].get('statement', {}).get('type') == 'optional'
|
if match["node"].get("statement", {}).get("type") == "optional" and not any(
|
||||||
and not any(map(lambda m: m['success'], match['children']))):
|
map(lambda m: m["success"], match["children"])
|
||||||
|
):
|
||||||
return
|
return
|
||||||
# not statement, so invert the child mode to show failed evaluations
|
# not statement, so invert the child mode to show failed evaluations
|
||||||
if match['node'].get('statement', {}).get('type') == 'not':
|
if match["node"].get("statement", {}).get("type") == "not":
|
||||||
child_mode = MODE_FAILURE
|
child_mode = MODE_FAILURE
|
||||||
elif mode == MODE_FAILURE:
|
elif mode == MODE_FAILURE:
|
||||||
# display only nodes that did not evaluate to True
|
# display only nodes that did not evaluate to True
|
||||||
if match['success']:
|
if match["success"]:
|
||||||
return
|
return
|
||||||
# optional statement with successful children is not relevant
|
# optional statement with successful children is not relevant
|
||||||
if (match['node'].get('statement', {}).get('type') == 'optional'
|
if match["node"].get("statement", {}).get("type") == "optional" and any(
|
||||||
and any(map(lambda m: m['success'], match['children']))):
|
map(lambda m: m["success"], match["children"])
|
||||||
|
):
|
||||||
return
|
return
|
||||||
# not statement, so invert the child mode to show successful evaluations
|
# not statement, so invert the child mode to show successful evaluations
|
||||||
if match['node'].get('statement', {}).get('type') == 'not':
|
if match["node"].get("statement", {}).get("type") == "not":
|
||||||
child_mode = MODE_SUCCESS
|
child_mode = MODE_SUCCESS
|
||||||
else:
|
else:
|
||||||
raise RuntimeError('unexpected mode: ' + mode)
|
raise RuntimeError("unexpected mode: " + mode)
|
||||||
|
|
||||||
render_node(ostream, match, match['node'], indent=indent)
|
render_node(ostream, match, match["node"], indent=indent)
|
||||||
|
|
||||||
for child in match['children']:
|
for child in match["children"]:
|
||||||
render_match(ostream, child, indent=indent + 1, mode=child_mode)
|
render_match(ostream, child, indent=indent + 1, mode=child_mode)
|
||||||
|
|
||||||
|
|
||||||
@@ -151,44 +153,44 @@ def render_vverbose(doc):
|
|||||||
ostream = rutils.StringIO()
|
ostream = rutils.StringIO()
|
||||||
|
|
||||||
for rule in rutils.capability_rules(doc):
|
for rule in rutils.capability_rules(doc):
|
||||||
count = len(rule['matches'])
|
count = len(rule["matches"])
|
||||||
if count == 1:
|
if count == 1:
|
||||||
capability = rutils.bold(rule['meta']['name'])
|
capability = rutils.bold(rule["meta"]["name"])
|
||||||
else:
|
else:
|
||||||
capability = '%s (%d matches)' % (rutils.bold(rule['meta']['name']), count)
|
capability = "%s (%d matches)" % (rutils.bold(rule["meta"]["name"]), count)
|
||||||
|
|
||||||
ostream.writeln(capability)
|
ostream.writeln(capability)
|
||||||
|
|
||||||
rows = []
|
rows = []
|
||||||
for key in capa.rules.META_KEYS:
|
for key in capa.rules.META_KEYS:
|
||||||
if key == 'name' or key not in rule['meta']:
|
if key == "name" or key not in rule["meta"]:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
v = rule['meta'][key]
|
v = rule["meta"][key]
|
||||||
if isinstance(v, list) and len(v) == 1:
|
if isinstance(v, list) and len(v) == 1:
|
||||||
v = v[0]
|
v = v[0]
|
||||||
elif isinstance(v, list) and len(v) > 1:
|
elif isinstance(v, list) and len(v) > 1:
|
||||||
v = ', '.join(v)
|
v = ", ".join(v)
|
||||||
rows.append((key, v))
|
rows.append((key, v))
|
||||||
|
|
||||||
ostream.writeln(tabulate.tabulate(rows, tablefmt='plain'))
|
ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
|
||||||
|
|
||||||
if rule['meta']['scope'] == capa.rules.FILE_SCOPE:
|
if rule["meta"]["scope"] == capa.rules.FILE_SCOPE:
|
||||||
matches = list(doc[rule['meta']['name']]['matches'].values())
|
matches = list(doc[rule["meta"]["name"]]["matches"].values())
|
||||||
if len(matches) != 1:
|
if len(matches) != 1:
|
||||||
# i think there should only ever be one match per file-scope rule,
|
# i think there should only ever be one match per file-scope rule,
|
||||||
# because we do the file-scope evaluation a single time.
|
# because we do the file-scope evaluation a single time.
|
||||||
# but i'm not 100% sure if this is/will always be true.
|
# but i'm not 100% sure if this is/will always be true.
|
||||||
# so, lets be explicit about our assumptions and raise an exception if they fail.
|
# so, lets be explicit about our assumptions and raise an exception if they fail.
|
||||||
raise RuntimeError('unexpected file scope match count: ' + len(matches))
|
raise RuntimeError("unexpected file scope match count: " + len(matches))
|
||||||
render_match(ostream, matches[0], indent=0)
|
render_match(ostream, matches[0], indent=0)
|
||||||
else:
|
else:
|
||||||
for location, match in sorted(doc[rule['meta']['name']]['matches'].items()):
|
for location, match in sorted(doc[rule["meta"]["name"]]["matches"].items()):
|
||||||
ostream.write(rule['meta']['scope'])
|
ostream.write(rule["meta"]["scope"])
|
||||||
ostream.write(' @ ')
|
ostream.write(" @ ")
|
||||||
ostream.writeln(rutils.hex(location))
|
ostream.writeln(rutils.hex(location))
|
||||||
render_match(ostream, match, indent=1)
|
render_match(ostream, match, indent=1)
|
||||||
|
|
||||||
ostream.write('\n')
|
ostream.write("\n")
|
||||||
|
|
||||||
return ostream.getvalue()
|
return ostream.getvalue()
|
||||||
|
|||||||
352
capa/rules.py
352
capa/rules.py
@@ -22,32 +22,32 @@ logger = logging.getLogger(__name__)
|
|||||||
# these are the standard metadata fields, in the preferred order.
|
# these are the standard metadata fields, in the preferred order.
|
||||||
# when reformatted, any custom keys will come after these.
|
# when reformatted, any custom keys will come after these.
|
||||||
META_KEYS = (
|
META_KEYS = (
|
||||||
'name',
|
"name",
|
||||||
'namespace',
|
"namespace",
|
||||||
'rule-category',
|
"rule-category",
|
||||||
'maec/analysis-conclusion',
|
"maec/analysis-conclusion",
|
||||||
'maec/analysis-conclusion-ov',
|
"maec/analysis-conclusion-ov",
|
||||||
'maec/malware-category',
|
"maec/malware-category",
|
||||||
'maec/malware-category-ov',
|
"maec/malware-category-ov",
|
||||||
'author',
|
"author",
|
||||||
'description',
|
"description",
|
||||||
'lib',
|
"lib",
|
||||||
'scope',
|
"scope",
|
||||||
'att&ck',
|
"att&ck",
|
||||||
'mbc',
|
"mbc",
|
||||||
'references',
|
"references",
|
||||||
'examples'
|
"examples",
|
||||||
)
|
)
|
||||||
# these are meta fields that are internal to capa,
|
# these are meta fields that are internal to capa,
|
||||||
# and added during rule reading/construction.
|
# and added during rule reading/construction.
|
||||||
# they may help use manipulate or index rules,
|
# they may help use manipulate or index rules,
|
||||||
# but should not be exposed to clients.
|
# but should not be exposed to clients.
|
||||||
HIDDEN_META_KEYS = ('capa/nursery', 'capa/path')
|
HIDDEN_META_KEYS = ("capa/nursery", "capa/path")
|
||||||
|
|
||||||
|
|
||||||
FILE_SCOPE = 'file'
|
FILE_SCOPE = "file"
|
||||||
FUNCTION_SCOPE = 'function'
|
FUNCTION_SCOPE = "function"
|
||||||
BASIC_BLOCK_SCOPE = 'basic block'
|
BASIC_BLOCK_SCOPE = "basic block"
|
||||||
|
|
||||||
|
|
||||||
SUPPORTED_FEATURES = {
|
SUPPORTED_FEATURES = {
|
||||||
@@ -56,7 +56,7 @@ SUPPORTED_FEATURES = {
|
|||||||
capa.features.file.Export,
|
capa.features.file.Export,
|
||||||
capa.features.file.Import,
|
capa.features.file.Import,
|
||||||
capa.features.file.Section,
|
capa.features.file.Section,
|
||||||
capa.features.Characteristic('embedded pe'),
|
capa.features.Characteristic("embedded pe"),
|
||||||
capa.features.String,
|
capa.features.String,
|
||||||
},
|
},
|
||||||
FUNCTION_SCOPE: {
|
FUNCTION_SCOPE: {
|
||||||
@@ -68,18 +68,18 @@ SUPPORTED_FEATURES = {
|
|||||||
capa.features.insn.Offset,
|
capa.features.insn.Offset,
|
||||||
capa.features.insn.Mnemonic,
|
capa.features.insn.Mnemonic,
|
||||||
capa.features.basicblock.BasicBlock,
|
capa.features.basicblock.BasicBlock,
|
||||||
capa.features.Characteristic('switch'),
|
capa.features.Characteristic("switch"),
|
||||||
capa.features.Characteristic('nzxor'),
|
capa.features.Characteristic("nzxor"),
|
||||||
capa.features.Characteristic('peb access'),
|
capa.features.Characteristic("peb access"),
|
||||||
capa.features.Characteristic('fs access'),
|
capa.features.Characteristic("fs access"),
|
||||||
capa.features.Characteristic('gs access'),
|
capa.features.Characteristic("gs access"),
|
||||||
capa.features.Characteristic('cross section flow'),
|
capa.features.Characteristic("cross section flow"),
|
||||||
capa.features.Characteristic('stack string'),
|
capa.features.Characteristic("stack string"),
|
||||||
capa.features.Characteristic('calls from'),
|
capa.features.Characteristic("calls from"),
|
||||||
capa.features.Characteristic('calls to'),
|
capa.features.Characteristic("calls to"),
|
||||||
capa.features.Characteristic('indirect call'),
|
capa.features.Characteristic("indirect call"),
|
||||||
capa.features.Characteristic('loop'),
|
capa.features.Characteristic("loop"),
|
||||||
capa.features.Characteristic('recursive call')
|
capa.features.Characteristic("recursive call"),
|
||||||
},
|
},
|
||||||
BASIC_BLOCK_SCOPE: {
|
BASIC_BLOCK_SCOPE: {
|
||||||
capa.features.MatchedRule,
|
capa.features.MatchedRule,
|
||||||
@@ -89,14 +89,14 @@ SUPPORTED_FEATURES = {
|
|||||||
capa.features.Bytes,
|
capa.features.Bytes,
|
||||||
capa.features.insn.Offset,
|
capa.features.insn.Offset,
|
||||||
capa.features.insn.Mnemonic,
|
capa.features.insn.Mnemonic,
|
||||||
capa.features.Characteristic('nzxor'),
|
capa.features.Characteristic("nzxor"),
|
||||||
capa.features.Characteristic('peb access'),
|
capa.features.Characteristic("peb access"),
|
||||||
capa.features.Characteristic('fs access'),
|
capa.features.Characteristic("fs access"),
|
||||||
capa.features.Characteristic('gs access'),
|
capa.features.Characteristic("gs access"),
|
||||||
capa.features.Characteristic('cross section flow'),
|
capa.features.Characteristic("cross section flow"),
|
||||||
capa.features.Characteristic('tight loop'),
|
capa.features.Characteristic("tight loop"),
|
||||||
capa.features.Characteristic('stack string'),
|
capa.features.Characteristic("stack string"),
|
||||||
capa.features.Characteristic('indirect call')
|
capa.features.Characteristic("indirect call"),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -107,7 +107,7 @@ class InvalidRule(ValueError):
|
|||||||
self.msg = msg
|
self.msg = msg
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return 'invalid rule: %s' % (self.msg)
|
return "invalid rule: %s" % (self.msg)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return str(self)
|
return str(self)
|
||||||
@@ -121,7 +121,7 @@ class InvalidRuleWithPath(InvalidRule):
|
|||||||
self.__cause__ = None
|
self.__cause__ = None
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return 'invalid rule: %s: %s' % (self.path, self.msg)
|
return "invalid rule: %s: %s" % (self.path, self.msg)
|
||||||
|
|
||||||
|
|
||||||
class InvalidRuleSet(ValueError):
|
class InvalidRuleSet(ValueError):
|
||||||
@@ -130,7 +130,7 @@ class InvalidRuleSet(ValueError):
|
|||||||
self.msg = msg
|
self.msg = msg
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return 'invalid rule set: %s' % (self.msg)
|
return "invalid rule set: %s" % (self.msg)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return str(self)
|
return str(self)
|
||||||
@@ -139,111 +139,112 @@ class InvalidRuleSet(ValueError):
|
|||||||
def ensure_feature_valid_for_scope(scope, feature):
|
def ensure_feature_valid_for_scope(scope, feature):
|
||||||
if isinstance(feature, capa.features.Characteristic):
|
if isinstance(feature, capa.features.Characteristic):
|
||||||
if capa.features.Characteristic(feature.name) not in SUPPORTED_FEATURES[scope]:
|
if capa.features.Characteristic(feature.name) not in SUPPORTED_FEATURES[scope]:
|
||||||
raise InvalidRule('feature %s not support for scope %s' % (feature, scope))
|
raise InvalidRule("feature %s not support for scope %s" % (feature, scope))
|
||||||
elif not isinstance(feature, tuple(filter(lambda t: isinstance(t, type), SUPPORTED_FEATURES[scope]))):
|
elif not isinstance(feature, tuple(filter(lambda t: isinstance(t, type), SUPPORTED_FEATURES[scope]))):
|
||||||
raise InvalidRule('feature %s not support for scope %s' % (feature, scope))
|
raise InvalidRule("feature %s not support for scope %s" % (feature, scope))
|
||||||
|
|
||||||
|
|
||||||
def parse_int(s):
|
def parse_int(s):
|
||||||
if s.startswith('0x'):
|
if s.startswith("0x"):
|
||||||
return int(s, 0x10)
|
return int(s, 0x10)
|
||||||
else:
|
else:
|
||||||
return int(s, 10)
|
return int(s, 10)
|
||||||
|
|
||||||
|
|
||||||
def parse_range(s):
|
def parse_range(s):
|
||||||
'''
|
"""
|
||||||
parse a string "(0, 1)" into a range (min, max).
|
parse a string "(0, 1)" into a range (min, max).
|
||||||
min and/or max may by None to indicate an unbound range.
|
min and/or max may by None to indicate an unbound range.
|
||||||
'''
|
"""
|
||||||
# we want to use `{` characters, but this is a dict in yaml.
|
# we want to use `{` characters, but this is a dict in yaml.
|
||||||
if not s.startswith('('):
|
if not s.startswith("("):
|
||||||
raise InvalidRule('invalid range: %s' % (s))
|
raise InvalidRule("invalid range: %s" % (s))
|
||||||
|
|
||||||
if not s.endswith(')'):
|
if not s.endswith(")"):
|
||||||
raise InvalidRule('invalid range: %s' % (s))
|
raise InvalidRule("invalid range: %s" % (s))
|
||||||
|
|
||||||
s = s[len('('):-len(')')]
|
s = s[len("(") : -len(")")]
|
||||||
min, _, max = s.partition(',')
|
min, _, max = s.partition(",")
|
||||||
min = min.strip()
|
min = min.strip()
|
||||||
max = max.strip()
|
max = max.strip()
|
||||||
|
|
||||||
if min:
|
if min:
|
||||||
min = parse_int(min.strip())
|
min = parse_int(min.strip())
|
||||||
if min < 0:
|
if min < 0:
|
||||||
raise InvalidRule('range min less than zero')
|
raise InvalidRule("range min less than zero")
|
||||||
else:
|
else:
|
||||||
min = None
|
min = None
|
||||||
|
|
||||||
if max:
|
if max:
|
||||||
max = parse_int(max.strip())
|
max = parse_int(max.strip())
|
||||||
if max < 0:
|
if max < 0:
|
||||||
raise InvalidRule('range max less than zero')
|
raise InvalidRule("range max less than zero")
|
||||||
else:
|
else:
|
||||||
max = None
|
max = None
|
||||||
|
|
||||||
if min is not None and max is not None:
|
if min is not None and max is not None:
|
||||||
if max < min:
|
if max < min:
|
||||||
raise InvalidRule('range max less than min')
|
raise InvalidRule("range max less than min")
|
||||||
|
|
||||||
return min, max
|
return min, max
|
||||||
|
|
||||||
|
|
||||||
def parse_feature(key):
|
def parse_feature(key):
|
||||||
# keep this in sync with supported features
|
# keep this in sync with supported features
|
||||||
if key == 'api':
|
if key == "api":
|
||||||
return capa.features.insn.API
|
return capa.features.insn.API
|
||||||
elif key == 'string':
|
elif key == "string":
|
||||||
return capa.features.String
|
return capa.features.String
|
||||||
elif key == 'bytes':
|
elif key == "bytes":
|
||||||
return capa.features.Bytes
|
return capa.features.Bytes
|
||||||
elif key == 'number':
|
elif key == "number":
|
||||||
return capa.features.insn.Number
|
return capa.features.insn.Number
|
||||||
elif key == 'offset':
|
elif key == "offset":
|
||||||
return capa.features.insn.Offset
|
return capa.features.insn.Offset
|
||||||
elif key == 'mnemonic':
|
elif key == "mnemonic":
|
||||||
return capa.features.insn.Mnemonic
|
return capa.features.insn.Mnemonic
|
||||||
elif key == 'basic blocks':
|
elif key == "basic blocks":
|
||||||
return capa.features.basicblock.BasicBlock
|
return capa.features.basicblock.BasicBlock
|
||||||
elif key.startswith('characteristic(') and key.endswith(')'):
|
elif key.startswith("characteristic(") and key.endswith(")"):
|
||||||
characteristic = key[len('characteristic('):-len(')')]
|
characteristic = key[len("characteristic(") : -len(")")]
|
||||||
return lambda v: capa.features.Characteristic(characteristic, v)
|
return lambda v: capa.features.Characteristic(characteristic, v)
|
||||||
elif key == 'export':
|
elif key == "export":
|
||||||
return capa.features.file.Export
|
return capa.features.file.Export
|
||||||
elif key == 'import':
|
elif key == "import":
|
||||||
return capa.features.file.Import
|
return capa.features.file.Import
|
||||||
elif key == 'section':
|
elif key == "section":
|
||||||
return capa.features.file.Section
|
return capa.features.file.Section
|
||||||
elif key == 'match':
|
elif key == "match":
|
||||||
return capa.features.MatchedRule
|
return capa.features.MatchedRule
|
||||||
else:
|
else:
|
||||||
raise InvalidRule('unexpected statement: %s' % key)
|
raise InvalidRule("unexpected statement: %s" % key)
|
||||||
|
|
||||||
|
|
||||||
def parse_symbol(s, value_type):
|
def parse_symbol(s, value_type):
|
||||||
'''
|
"""
|
||||||
s can be an int or a string
|
s can be an int or a string
|
||||||
'''
|
"""
|
||||||
if isinstance(s, str) and '=' in s:
|
if isinstance(s, str) and "=" in s:
|
||||||
value, symbol = s.split('=', 1)
|
value, symbol = s.split("=", 1)
|
||||||
symbol = symbol.strip()
|
symbol = symbol.strip()
|
||||||
if symbol == '':
|
if symbol == "":
|
||||||
raise InvalidRule('unexpected value: "%s", symbol name cannot be empty' % s)
|
raise InvalidRule('unexpected value: "%s", symbol name cannot be empty' % s)
|
||||||
else:
|
else:
|
||||||
value = s
|
value = s
|
||||||
symbol = None
|
symbol = None
|
||||||
|
|
||||||
if isinstance(value, str):
|
if isinstance(value, str):
|
||||||
if value_type == 'bytes':
|
if value_type == "bytes":
|
||||||
try:
|
try:
|
||||||
value = codecs.decode(value.replace(' ', ''), 'hex')
|
value = codecs.decode(value.replace(" ", ""), "hex")
|
||||||
# TODO: Remove TypeError when Python2 is not used anymore
|
# TODO: Remove TypeError when Python2 is not used anymore
|
||||||
except (TypeError, binascii.Error):
|
except (TypeError, binascii.Error):
|
||||||
raise InvalidRule('unexpected bytes value: "%s", must be a valid hex sequence' % value)
|
raise InvalidRule('unexpected bytes value: "%s", must be a valid hex sequence' % value)
|
||||||
|
|
||||||
if len(value) > MAX_BYTES_FEATURE_SIZE:
|
if len(value) > MAX_BYTES_FEATURE_SIZE:
|
||||||
raise InvalidRule('unexpected bytes value: byte sequences must be no larger than %s bytes' %
|
raise InvalidRule(
|
||||||
MAX_BYTES_FEATURE_SIZE)
|
"unexpected bytes value: byte sequences must be no larger than %s bytes" % MAX_BYTES_FEATURE_SIZE
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
value = parse_int(value)
|
value = parse_int(value)
|
||||||
@@ -255,54 +256,54 @@ def parse_symbol(s, value_type):
|
|||||||
|
|
||||||
def build_statements(d, scope):
|
def build_statements(d, scope):
|
||||||
if len(d.keys()) != 1:
|
if len(d.keys()) != 1:
|
||||||
raise InvalidRule('too many statements')
|
raise InvalidRule("too many statements")
|
||||||
|
|
||||||
key = list(d.keys())[0]
|
key = list(d.keys())[0]
|
||||||
if key == 'and':
|
if key == "and":
|
||||||
return And(*[build_statements(dd, scope) for dd in d[key]])
|
return And(*[build_statements(dd, scope) for dd in d[key]])
|
||||||
elif key == 'or':
|
elif key == "or":
|
||||||
return Or(*[build_statements(dd, scope) for dd in d[key]])
|
return Or(*[build_statements(dd, scope) for dd in d[key]])
|
||||||
elif key == 'not':
|
elif key == "not":
|
||||||
if len(d[key]) != 1:
|
if len(d[key]) != 1:
|
||||||
raise InvalidRule('not statement must have exactly one child statement')
|
raise InvalidRule("not statement must have exactly one child statement")
|
||||||
return Not(*[build_statements(dd, scope) for dd in d[key]])
|
return Not(*[build_statements(dd, scope) for dd in d[key]])
|
||||||
elif key.endswith(' or more'):
|
elif key.endswith(" or more"):
|
||||||
count = int(key[:-len('or more')])
|
count = int(key[: -len("or more")])
|
||||||
return Some(count, *[build_statements(dd, scope) for dd in d[key]])
|
return Some(count, *[build_statements(dd, scope) for dd in d[key]])
|
||||||
elif key == 'optional':
|
elif key == "optional":
|
||||||
# `optional` is an alias for `0 or more`
|
# `optional` is an alias for `0 or more`
|
||||||
# which is useful for documenting behaviors,
|
# which is useful for documenting behaviors,
|
||||||
# like with `write file`, we might say that `WriteFile` is optionally found alongside `CreateFileA`.
|
# like with `write file`, we might say that `WriteFile` is optionally found alongside `CreateFileA`.
|
||||||
return Some(0, *[build_statements(dd, scope) for dd in d[key]])
|
return Some(0, *[build_statements(dd, scope) for dd in d[key]])
|
||||||
|
|
||||||
elif key == 'function':
|
elif key == "function":
|
||||||
if scope != FILE_SCOPE:
|
if scope != FILE_SCOPE:
|
||||||
raise InvalidRule('function subscope supported only for file scope')
|
raise InvalidRule("function subscope supported only for file scope")
|
||||||
|
|
||||||
if len(d[key]) != 1:
|
if len(d[key]) != 1:
|
||||||
raise InvalidRule('subscope must have exactly one child statement')
|
raise InvalidRule("subscope must have exactly one child statement")
|
||||||
|
|
||||||
return Subscope(FUNCTION_SCOPE, *[build_statements(dd, FUNCTION_SCOPE) for dd in d[key]])
|
return Subscope(FUNCTION_SCOPE, *[build_statements(dd, FUNCTION_SCOPE) for dd in d[key]])
|
||||||
|
|
||||||
elif key == 'basic block':
|
elif key == "basic block":
|
||||||
if scope != FUNCTION_SCOPE:
|
if scope != FUNCTION_SCOPE:
|
||||||
raise InvalidRule('basic block subscope supported only for function scope')
|
raise InvalidRule("basic block subscope supported only for function scope")
|
||||||
|
|
||||||
if len(d[key]) != 1:
|
if len(d[key]) != 1:
|
||||||
raise InvalidRule('subscope must have exactly one child statement')
|
raise InvalidRule("subscope must have exactly one child statement")
|
||||||
|
|
||||||
return Subscope(BASIC_BLOCK_SCOPE, *[build_statements(dd, BASIC_BLOCK_SCOPE) for dd in d[key]])
|
return Subscope(BASIC_BLOCK_SCOPE, *[build_statements(dd, BASIC_BLOCK_SCOPE) for dd in d[key]])
|
||||||
|
|
||||||
elif key.startswith('count(') and key.endswith(')'):
|
elif key.startswith("count(") and key.endswith(")"):
|
||||||
# e.g.:
|
# e.g.:
|
||||||
#
|
#
|
||||||
# count(basic block)
|
# count(basic block)
|
||||||
# count(mnemonic(mov))
|
# count(mnemonic(mov))
|
||||||
# count(characteristic(nzxor))
|
# count(characteristic(nzxor))
|
||||||
|
|
||||||
term = key[len('count('):-len(')')]
|
term = key[len("count(") : -len(")")]
|
||||||
|
|
||||||
if term.startswith('characteristic('):
|
if term.startswith("characteristic("):
|
||||||
# characteristic features are specified a bit specially:
|
# characteristic features are specified a bit specially:
|
||||||
# they simply indicate the presence of something unusual/interesting,
|
# they simply indicate the presence of something unusual/interesting,
|
||||||
# and we embed the name in the feature name, like `characteristic(nzxor)`.
|
# and we embed the name in the feature name, like `characteristic(nzxor)`.
|
||||||
@@ -320,18 +321,18 @@ def build_statements(d, scope):
|
|||||||
# - mnemonic: mov
|
# - mnemonic: mov
|
||||||
#
|
#
|
||||||
# but here we deal with the form: `mnemonic(mov)`.
|
# but here we deal with the form: `mnemonic(mov)`.
|
||||||
term, _, arg = term.partition('(')
|
term, _, arg = term.partition("(")
|
||||||
Feature = parse_feature(term)
|
Feature = parse_feature(term)
|
||||||
|
|
||||||
if arg:
|
if arg:
|
||||||
arg = arg[:-len(')')]
|
arg = arg[: -len(")")]
|
||||||
# can't rely on yaml parsing ints embedded within strings
|
# can't rely on yaml parsing ints embedded within strings
|
||||||
# like:
|
# like:
|
||||||
#
|
#
|
||||||
# count(offset(0xC))
|
# count(offset(0xC))
|
||||||
# count(number(0x11223344))
|
# count(number(0x11223344))
|
||||||
# count(number(0x100 = symbol name))
|
# count(number(0x100 = symbol name))
|
||||||
if term in ('number', 'offset', 'bytes'):
|
if term in ("number", "offset", "bytes"):
|
||||||
value, symbol = parse_symbol(arg, term)
|
value, symbol = parse_symbol(arg, term)
|
||||||
feature = Feature(value, symbol)
|
feature = Feature(value, symbol)
|
||||||
else:
|
else:
|
||||||
@@ -348,29 +349,31 @@ def build_statements(d, scope):
|
|||||||
count = d[key]
|
count = d[key]
|
||||||
if isinstance(count, int):
|
if isinstance(count, int):
|
||||||
return Range(feature, min=count, max=count)
|
return Range(feature, min=count, max=count)
|
||||||
elif count.endswith(' or more'):
|
elif count.endswith(" or more"):
|
||||||
min = parse_int(count[:-len(' or more')])
|
min = parse_int(count[: -len(" or more")])
|
||||||
max = None
|
max = None
|
||||||
return Range(feature, min=min, max=max)
|
return Range(feature, min=min, max=max)
|
||||||
elif count.endswith(' or fewer'):
|
elif count.endswith(" or fewer"):
|
||||||
min = None
|
min = None
|
||||||
max = parse_int(count[:-len(' or fewer')])
|
max = parse_int(count[: -len(" or fewer")])
|
||||||
return Range(feature, min=min, max=max)
|
return Range(feature, min=min, max=max)
|
||||||
elif count.startswith('('):
|
elif count.startswith("("):
|
||||||
min, max = parse_range(count)
|
min, max = parse_range(count)
|
||||||
return Range(feature, min=min, max=max)
|
return Range(feature, min=min, max=max)
|
||||||
else:
|
else:
|
||||||
raise InvalidRule('unexpected range: %s' % (count))
|
raise InvalidRule("unexpected range: %s" % (count))
|
||||||
elif key == 'string' and d[key].startswith('/') and (d[key].endswith('/') or d[key].endswith('/i')):
|
elif key == "string" and d[key].startswith("/") and (d[key].endswith("/") or d[key].endswith("/i")):
|
||||||
try:
|
try:
|
||||||
return Regex(d[key])
|
return Regex(d[key])
|
||||||
except re.error:
|
except re.error:
|
||||||
if d[key].endswith('/i'):
|
if d[key].endswith("/i"):
|
||||||
d[key] = d[key][:-len('i')]
|
d[key] = d[key][: -len("i")]
|
||||||
raise InvalidRule('invalid regular expression: %s it should use Python syntax, try it at https://pythex.org' % d[key])
|
raise InvalidRule(
|
||||||
|
"invalid regular expression: %s it should use Python syntax, try it at https://pythex.org" % d[key]
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
Feature = parse_feature(key)
|
Feature = parse_feature(key)
|
||||||
if key in ('number', 'offset', 'bytes'):
|
if key in ("number", "offset", "bytes"):
|
||||||
# parse numbers with symbol description, e.g. 0x4550 = IMAGE_DOS_SIGNATURE
|
# parse numbers with symbol description, e.g. 0x4550 = IMAGE_DOS_SIGNATURE
|
||||||
# or regular numbers, e.g. 37
|
# or regular numbers, e.g. 37
|
||||||
value, symbol = parse_symbol(d[key], key)
|
value, symbol = parse_symbol(d[key], key)
|
||||||
@@ -390,7 +393,7 @@ def second(s):
|
|||||||
|
|
||||||
|
|
||||||
# we use the ruamel.yaml parser because it supports roundtripping of documents with comments.
|
# we use the ruamel.yaml parser because it supports roundtripping of documents with comments.
|
||||||
yaml = ruamel.yaml.YAML(typ='rt')
|
yaml = ruamel.yaml.YAML(typ="rt")
|
||||||
|
|
||||||
|
|
||||||
# use block mode, not inline json-like mode
|
# use block mode, not inline json-like mode
|
||||||
@@ -410,7 +413,7 @@ yaml.width = 4096
|
|||||||
|
|
||||||
|
|
||||||
class Rule(object):
|
class Rule(object):
|
||||||
def __init__(self, name, scope, statement, meta, definition=''):
|
def __init__(self, name, scope, statement, meta, definition=""):
|
||||||
super(Rule, self).__init__()
|
super(Rule, self).__init__()
|
||||||
self.name = name
|
self.name = name
|
||||||
self.scope = scope
|
self.scope = scope
|
||||||
@@ -419,13 +422,13 @@ class Rule(object):
|
|||||||
self.definition = definition
|
self.definition = definition
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return 'Rule(name=%s)' % (self.name)
|
return "Rule(name=%s)" % (self.name)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return 'Rule(scope=%s, name=%s)' % (self.scope, self.name)
|
return "Rule(scope=%s, name=%s)" % (self.scope, self.name)
|
||||||
|
|
||||||
def get_dependencies(self, namespaces):
|
def get_dependencies(self, namespaces):
|
||||||
'''
|
"""
|
||||||
fetch the names of rules this rule relies upon.
|
fetch the names of rules this rule relies upon.
|
||||||
these are only the direct dependencies; a user must
|
these are only the direct dependencies; a user must
|
||||||
compute the transitive dependency graph themself, if they want it.
|
compute the transitive dependency graph themself, if they want it.
|
||||||
@@ -436,7 +439,7 @@ class Rule(object):
|
|||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List[str]: names of rules upon which this rule depends.
|
List[str]: names of rules upon which this rule depends.
|
||||||
'''
|
"""
|
||||||
deps = set([])
|
deps = set([])
|
||||||
|
|
||||||
def rec(statement):
|
def rec(statement):
|
||||||
@@ -469,24 +472,31 @@ class Rule(object):
|
|||||||
def _extract_subscope_rules_rec(self, statement):
|
def _extract_subscope_rules_rec(self, statement):
|
||||||
if isinstance(statement, Statement):
|
if isinstance(statement, Statement):
|
||||||
# for each child that is a subscope,
|
# for each child that is a subscope,
|
||||||
for subscope in filter(lambda statement: isinstance(statement, capa.engine.Subscope), statement.get_children()):
|
for subscope in filter(
|
||||||
|
lambda statement: isinstance(statement, capa.engine.Subscope), statement.get_children()
|
||||||
|
):
|
||||||
|
|
||||||
# create a new rule from it.
|
# create a new rule from it.
|
||||||
# the name is a randomly generated, hopefully unique value.
|
# the name is a randomly generated, hopefully unique value.
|
||||||
# ideally, this won't every be rendered to a user.
|
# ideally, this won't every be rendered to a user.
|
||||||
name = self.name + '/' + uuid.uuid4().hex
|
name = self.name + "/" + uuid.uuid4().hex
|
||||||
new_rule = Rule(name, subscope.scope, subscope.child, {
|
new_rule = Rule(
|
||||||
'name': name,
|
name,
|
||||||
'scope': subscope.scope,
|
subscope.scope,
|
||||||
# these derived rules are never meant to be inspected separately,
|
subscope.child,
|
||||||
# they are dependencies for the parent rule,
|
{
|
||||||
# so mark it as such.
|
"name": name,
|
||||||
'lib': True,
|
"scope": subscope.scope,
|
||||||
# metadata that indicates this is derived from a subscope statement
|
# these derived rules are never meant to be inspected separately,
|
||||||
'capa/subscope-rule': True,
|
# they are dependencies for the parent rule,
|
||||||
# metadata that links the child rule the parent rule
|
# so mark it as such.
|
||||||
'capa/parent': self.name,
|
"lib": True,
|
||||||
})
|
# metadata that indicates this is derived from a subscope statement
|
||||||
|
"capa/subscope-rule": True,
|
||||||
|
# metadata that links the child rule the parent rule
|
||||||
|
"capa/parent": self.name,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
# update the existing statement to `match` the new rule
|
# update the existing statement to `match` the new rule
|
||||||
new_node = capa.features.MatchedRule(name)
|
new_node = capa.features.MatchedRule(name)
|
||||||
@@ -503,7 +513,7 @@ class Rule(object):
|
|||||||
yield new_rule
|
yield new_rule
|
||||||
|
|
||||||
def extract_subscope_rules(self):
|
def extract_subscope_rules(self):
|
||||||
'''
|
"""
|
||||||
scan through the statements of this rule,
|
scan through the statements of this rule,
|
||||||
replacing subscope statements with `match` references to a newly created rule,
|
replacing subscope statements with `match` references to a newly created rule,
|
||||||
which are yielded from this routine.
|
which are yielded from this routine.
|
||||||
@@ -514,7 +524,7 @@ class Rule(object):
|
|||||||
|
|
||||||
for derived_rule in rule.extract_subscope_rules():
|
for derived_rule in rule.extract_subscope_rules():
|
||||||
assert derived_rule.meta['capa/parent'] == rule.name
|
assert derived_rule.meta['capa/parent'] == rule.name
|
||||||
'''
|
"""
|
||||||
|
|
||||||
# recurse through statements
|
# recurse through statements
|
||||||
# when encounter Subscope statement
|
# when encounter Subscope statement
|
||||||
@@ -531,27 +541,21 @@ class Rule(object):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_dict(cls, d, s):
|
def from_dict(cls, d, s):
|
||||||
name = d['rule']['meta']['name']
|
name = d["rule"]["meta"]["name"]
|
||||||
# if scope is not specified, default to function scope.
|
# if scope is not specified, default to function scope.
|
||||||
# this is probably the mode that rule authors will start with.
|
# this is probably the mode that rule authors will start with.
|
||||||
scope = d['rule']['meta'].get('scope', FUNCTION_SCOPE)
|
scope = d["rule"]["meta"].get("scope", FUNCTION_SCOPE)
|
||||||
statements = d['rule']['features']
|
statements = d["rule"]["features"]
|
||||||
|
|
||||||
# the rule must start with a single logic node.
|
# the rule must start with a single logic node.
|
||||||
# doing anything else is too implicit and difficult to remove (AND vs OR ???).
|
# doing anything else is too implicit and difficult to remove (AND vs OR ???).
|
||||||
if len(statements) != 1:
|
if len(statements) != 1:
|
||||||
raise InvalidRule('rule must begin with a single top level statement')
|
raise InvalidRule("rule must begin with a single top level statement")
|
||||||
|
|
||||||
if isinstance(statements[0], capa.engine.Subscope):
|
if isinstance(statements[0], capa.engine.Subscope):
|
||||||
raise InvalidRule('top level statement may not be a subscope')
|
raise InvalidRule("top level statement may not be a subscope")
|
||||||
|
|
||||||
return cls(
|
return cls(name, scope, build_statements(statements[0], scope), d["rule"]["meta"], s)
|
||||||
name,
|
|
||||||
scope,
|
|
||||||
build_statements(statements[0], scope),
|
|
||||||
d['rule']['meta'],
|
|
||||||
s
|
|
||||||
)
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_yaml(cls, s):
|
def from_yaml(cls, s):
|
||||||
@@ -559,9 +563,9 @@ class Rule(object):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_yaml_file(cls, path):
|
def from_yaml_file(cls, path):
|
||||||
with open(path, 'rb') as f:
|
with open(path, "rb") as f:
|
||||||
try:
|
try:
|
||||||
return cls.from_yaml(f.read().decode('utf-8'))
|
return cls.from_yaml(f.read().decode("utf-8"))
|
||||||
except InvalidRule as e:
|
except InvalidRule as e:
|
||||||
raise InvalidRuleWithPath(path, str(e))
|
raise InvalidRuleWithPath(path, str(e))
|
||||||
|
|
||||||
@@ -578,11 +582,11 @@ class Rule(object):
|
|||||||
definition = yaml.load(self.definition)
|
definition = yaml.load(self.definition)
|
||||||
# definition retains a reference to `meta`,
|
# definition retains a reference to `meta`,
|
||||||
# so we're updating that in place.
|
# so we're updating that in place.
|
||||||
definition['rule']['meta'] = self.meta
|
definition["rule"]["meta"] = self.meta
|
||||||
meta = self.meta
|
meta = self.meta
|
||||||
|
|
||||||
meta['name'] = self.name
|
meta["name"] = self.name
|
||||||
meta['scope'] = self.scope
|
meta["scope"] = self.scope
|
||||||
|
|
||||||
def move_to_end(m, k):
|
def move_to_end(m, k):
|
||||||
# ruamel.yaml uses an ordereddict-like structure to track maps (CommentedMap).
|
# ruamel.yaml uses an ordereddict-like structure to track maps (CommentedMap).
|
||||||
@@ -592,8 +596,8 @@ class Rule(object):
|
|||||||
del m[k]
|
del m[k]
|
||||||
m[k] = v
|
m[k] = v
|
||||||
|
|
||||||
move_to_end(definition['rule'], 'meta')
|
move_to_end(definition["rule"], "meta")
|
||||||
move_to_end(definition['rule'], 'features')
|
move_to_end(definition["rule"], "features")
|
||||||
|
|
||||||
for key in META_KEYS:
|
for key in META_KEYS:
|
||||||
if key in meta:
|
if key in meta:
|
||||||
@@ -624,11 +628,11 @@ class Rule(object):
|
|||||||
continue
|
continue
|
||||||
meta[key] = value
|
meta[key] = value
|
||||||
|
|
||||||
return ostream.getvalue().decode('utf-8').rstrip('\n') + '\n'
|
return ostream.getvalue().decode("utf-8").rstrip("\n") + "\n"
|
||||||
|
|
||||||
|
|
||||||
def get_rules_with_scope(rules, scope):
|
def get_rules_with_scope(rules, scope):
|
||||||
'''
|
"""
|
||||||
from the given collection of rules, select those with the given scope.
|
from the given collection of rules, select those with the given scope.
|
||||||
|
|
||||||
args:
|
args:
|
||||||
@@ -637,12 +641,12 @@ def get_rules_with_scope(rules, scope):
|
|||||||
|
|
||||||
returns:
|
returns:
|
||||||
List[capa.rules.Rule]:
|
List[capa.rules.Rule]:
|
||||||
'''
|
"""
|
||||||
return list(rule for rule in rules if rule.scope == scope)
|
return list(rule for rule in rules if rule.scope == scope)
|
||||||
|
|
||||||
|
|
||||||
def get_rules_and_dependencies(rules, rule_name):
|
def get_rules_and_dependencies(rules, rule_name):
|
||||||
'''
|
"""
|
||||||
from the given collection of rules, select a rule and its dependencies (transitively).
|
from the given collection of rules, select a rule and its dependencies (transitively).
|
||||||
|
|
||||||
args:
|
args:
|
||||||
@@ -651,7 +655,7 @@ def get_rules_and_dependencies(rules, rule_name):
|
|||||||
|
|
||||||
yields:
|
yields:
|
||||||
Rule:
|
Rule:
|
||||||
'''
|
"""
|
||||||
# we evaluate `rules` multiple times, so if its a generator, realize it into a list.
|
# we evaluate `rules` multiple times, so if its a generator, realize it into a list.
|
||||||
rules = list(rules)
|
rules = list(rules)
|
||||||
namespaces = index_rules_by_namespace(rules)
|
namespaces = index_rules_by_namespace(rules)
|
||||||
@@ -674,17 +678,17 @@ def ensure_rules_are_unique(rules):
|
|||||||
seen = set([])
|
seen = set([])
|
||||||
for rule in rules:
|
for rule in rules:
|
||||||
if rule.name in seen:
|
if rule.name in seen:
|
||||||
raise InvalidRule('duplicate rule name: ' + rule.name)
|
raise InvalidRule("duplicate rule name: " + rule.name)
|
||||||
seen.add(rule.name)
|
seen.add(rule.name)
|
||||||
|
|
||||||
|
|
||||||
def ensure_rule_dependencies_are_met(rules):
|
def ensure_rule_dependencies_are_met(rules):
|
||||||
'''
|
"""
|
||||||
raise an exception if a rule dependency does not exist.
|
raise an exception if a rule dependency does not exist.
|
||||||
|
|
||||||
raises:
|
raises:
|
||||||
InvalidRule: if a dependency is not met.
|
InvalidRule: if a dependency is not met.
|
||||||
'''
|
"""
|
||||||
# we evaluate `rules` multiple times, so if its a generator, realize it into a list.
|
# we evaluate `rules` multiple times, so if its a generator, realize it into a list.
|
||||||
rules = list(rules)
|
rules = list(rules)
|
||||||
namespaces = index_rules_by_namespace(rules)
|
namespaces = index_rules_by_namespace(rules)
|
||||||
@@ -696,7 +700,7 @@ def ensure_rule_dependencies_are_met(rules):
|
|||||||
|
|
||||||
|
|
||||||
def index_rules_by_namespace(rules):
|
def index_rules_by_namespace(rules):
|
||||||
'''
|
"""
|
||||||
compute the rules that fit into each namespace found within the given rules.
|
compute the rules that fit into each namespace found within the given rules.
|
||||||
|
|
||||||
for example, given:
|
for example, given:
|
||||||
@@ -714,23 +718,23 @@ def index_rules_by_namespace(rules):
|
|||||||
rules (List[Rule]):
|
rules (List[Rule]):
|
||||||
|
|
||||||
Returns: Dict[str, List[Rule]]
|
Returns: Dict[str, List[Rule]]
|
||||||
'''
|
"""
|
||||||
namespaces = collections.defaultdict(list)
|
namespaces = collections.defaultdict(list)
|
||||||
|
|
||||||
for rule in rules:
|
for rule in rules:
|
||||||
namespace = rule.meta.get('namespace')
|
namespace = rule.meta.get("namespace")
|
||||||
if not namespace:
|
if not namespace:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
while namespace:
|
while namespace:
|
||||||
namespaces[namespace].append(rule)
|
namespaces[namespace].append(rule)
|
||||||
namespace, _, _ = namespace.rpartition('/')
|
namespace, _, _ = namespace.rpartition("/")
|
||||||
|
|
||||||
return dict(namespaces)
|
return dict(namespaces)
|
||||||
|
|
||||||
|
|
||||||
class RuleSet(object):
|
class RuleSet(object):
|
||||||
'''
|
"""
|
||||||
a ruleset is initialized with a collection of rules, which it verifies and sorts into scopes.
|
a ruleset is initialized with a collection of rules, which it verifies and sorts into scopes.
|
||||||
each set of scoped rules is sorted topologically, which enables rules to match on past rule matches.
|
each set of scoped rules is sorted topologically, which enables rules to match on past rule matches.
|
||||||
|
|
||||||
@@ -742,7 +746,7 @@ class RuleSet(object):
|
|||||||
...
|
...
|
||||||
])
|
])
|
||||||
capa.engine.match(ruleset.file_rules, ...)
|
capa.engine.match(ruleset.file_rules, ...)
|
||||||
'''
|
"""
|
||||||
|
|
||||||
def __init__(self, rules):
|
def __init__(self, rules):
|
||||||
super(RuleSet, self).__init__()
|
super(RuleSet, self).__init__()
|
||||||
@@ -754,7 +758,7 @@ class RuleSet(object):
|
|||||||
ensure_rule_dependencies_are_met(rules)
|
ensure_rule_dependencies_are_met(rules)
|
||||||
|
|
||||||
if len(rules) == 0:
|
if len(rules) == 0:
|
||||||
raise InvalidRuleSet('no rules selected')
|
raise InvalidRuleSet("no rules selected")
|
||||||
|
|
||||||
self.file_rules = self._get_rules_for_scope(rules, FILE_SCOPE)
|
self.file_rules = self._get_rules_for_scope(rules, FILE_SCOPE)
|
||||||
self.function_rules = self._get_rules_for_scope(rules, FUNCTION_SCOPE)
|
self.function_rules = self._get_rules_for_scope(rules, FUNCTION_SCOPE)
|
||||||
@@ -769,12 +773,12 @@ class RuleSet(object):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_rules_for_scope(rules, scope):
|
def _get_rules_for_scope(rules, scope):
|
||||||
'''
|
"""
|
||||||
given a collection of rules, collect the rules that are needed at the given scope.
|
given a collection of rules, collect the rules that are needed at the given scope.
|
||||||
these rules are ordered topologically.
|
these rules are ordered topologically.
|
||||||
|
|
||||||
don't include "lib" rules, unless they are dependencies of other rules.
|
don't include "lib" rules, unless they are dependencies of other rules.
|
||||||
'''
|
"""
|
||||||
scope_rules = set([])
|
scope_rules = set([])
|
||||||
|
|
||||||
# we need to process all rules, not just rules with the given scope.
|
# we need to process all rules, not just rules with the given scope.
|
||||||
@@ -782,7 +786,7 @@ class RuleSet(object):
|
|||||||
# at lower scope, e.g. function scope.
|
# at lower scope, e.g. function scope.
|
||||||
# so, we find all dependencies of all rules, and later will filter them down.
|
# so, we find all dependencies of all rules, and later will filter them down.
|
||||||
for rule in rules:
|
for rule in rules:
|
||||||
if rule.meta.get('lib', False):
|
if rule.meta.get("lib", False):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
scope_rules.update(get_rules_and_dependencies(rules, rule.name))
|
scope_rules.update(get_rules_and_dependencies(rules, rule.name))
|
||||||
@@ -790,7 +794,7 @@ class RuleSet(object):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_subscope_rules(rules):
|
def _extract_subscope_rules(rules):
|
||||||
'''
|
"""
|
||||||
process the given sequence of rules.
|
process the given sequence of rules.
|
||||||
for each one, extract any embedded subscope rules into their own rule.
|
for each one, extract any embedded subscope rules into their own rule.
|
||||||
process these recursively.
|
process these recursively.
|
||||||
@@ -798,7 +802,7 @@ class RuleSet(object):
|
|||||||
|
|
||||||
note: this operation mutates the rules passed in - they may now have `match` statements
|
note: this operation mutates the rules passed in - they may now have `match` statements
|
||||||
for the extracted subscope rules.
|
for the extracted subscope rules.
|
||||||
'''
|
"""
|
||||||
done = []
|
done = []
|
||||||
|
|
||||||
# use a queue of rules, because we'll be modifying the list (appending new items) as we go.
|
# use a queue of rules, because we'll be modifying the list (appending new items) as we go.
|
||||||
@@ -811,14 +815,14 @@ class RuleSet(object):
|
|||||||
return done
|
return done
|
||||||
|
|
||||||
def filter_rules_by_meta(self, tag):
|
def filter_rules_by_meta(self, tag):
|
||||||
'''
|
"""
|
||||||
return new rule set with rules filtered based on all meta field values, adds all dependency rules
|
return new rule set with rules filtered based on all meta field values, adds all dependency rules
|
||||||
apply tag-based rule filter assuming that all required rules are loaded
|
apply tag-based rule filter assuming that all required rules are loaded
|
||||||
can be used to specify selected rules vs. providing a rules child directory where capa cannot resolve
|
can be used to specify selected rules vs. providing a rules child directory where capa cannot resolve
|
||||||
dependencies from unknown paths
|
dependencies from unknown paths
|
||||||
TODO handle circular dependencies?
|
TODO handle circular dependencies?
|
||||||
TODO support -t=metafield <k>
|
TODO support -t=metafield <k>
|
||||||
'''
|
"""
|
||||||
rules = self.rules.values()
|
rules = self.rules.values()
|
||||||
rules_filtered = set([])
|
rules_filtered = set([])
|
||||||
for rule in rules:
|
for rule in rules:
|
||||||
|
|||||||
@@ -1,2 +1,2 @@
|
|||||||
__version__ = '0.0.0'
|
__version__ = "0.0.0"
|
||||||
__commit__ = '00000000'
|
__commit__ = "00000000"
|
||||||
|
|||||||
Reference in New Issue
Block a user