This commit is contained in:
William Ballenthin
2020-07-01 12:43:12 -06:00
parent 23e70b4e85
commit b9d017ad10
39 changed files with 1368 additions and 1333 deletions

View File

@@ -7,23 +7,24 @@ import capa.features
class Statement(object): class Statement(object):
''' """
superclass for structural nodes, such as and/or/not. superclass for structural nodes, such as and/or/not.
this exists to provide a default impl for `__str__` and `__repr__`, this exists to provide a default impl for `__str__` and `__repr__`,
and to declare the interface method `evaluate` and to declare the interface method `evaluate`
''' """
def __init__(self): def __init__(self):
super(Statement, self).__init__() super(Statement, self).__init__()
self.name = self.__class__.__name__ self.name = self.__class__.__name__
def __str__(self): def __str__(self):
return '%s(%s)' % (self.name.lower(), ','.join(map(str, self.get_children()))) return "%s(%s)" % (self.name.lower(), ",".join(map(str, self.get_children())))
def __repr__(self): def __repr__(self):
return str(self) return str(self)
def evaluate(self, ctx): def evaluate(self, ctx):
''' """
classes that inherit `Statement` must implement `evaluate` classes that inherit `Statement` must implement `evaluate`
args: args:
@@ -31,30 +32,30 @@ class Statement(object):
returns: returns:
Result Result
''' """
raise NotImplementedError() raise NotImplementedError()
def get_children(self): def get_children(self):
if hasattr(self, 'child'): if hasattr(self, "child"):
yield self.child yield self.child
if hasattr(self, 'children'): if hasattr(self, "children"):
for child in self.children: for child in self.children:
yield child yield child
def replace_child(self, existing, new): def replace_child(self, existing, new):
if hasattr(self, 'child'): if hasattr(self, "child"):
if self.child is existing: if self.child is existing:
self.child = new self.child = new
if hasattr(self, 'children'): if hasattr(self, "children"):
for i, child in enumerate(self.children): for i, child in enumerate(self.children):
if child is existing: if child is existing:
self.children[i] = new self.children[i] = new
class Result(object): class Result(object):
''' """
represents the results of an evaluation of statements against features. represents the results of an evaluation of statements against features.
instances of this class should behave like a bool, instances of this class should behave like a bool,
@@ -65,15 +66,16 @@ class Result(object):
as well as the children Result instances. as well as the children Result instances.
we need this so that we can render the tree of expressions and their results. we need this so that we can render the tree of expressions and their results.
''' """
def __init__(self, success, statement, children, locations=None): def __init__(self, success, statement, children, locations=None):
''' """
args: args:
success (bool) success (bool)
statement (capa.engine.Statement or capa.features.Feature) statement (capa.engine.Statement or capa.features.Feature)
children (list[Result]) children (list[Result])
locations (iterable[VA]) locations (iterable[VA])
''' """
super(Result, self).__init__() super(Result, self).__init__()
self.success = success self.success = success
self.statement = statement self.statement = statement
@@ -93,7 +95,8 @@ class Result(object):
class And(Statement): class And(Statement):
'''match if all of the children evaluate to True.''' """match if all of the children evaluate to True."""
def __init__(self, *children): def __init__(self, *children):
super(And, self).__init__() super(And, self).__init__()
self.children = list(children) self.children = list(children)
@@ -105,7 +108,8 @@ class And(Statement):
class Or(Statement): class Or(Statement):
'''match if any of the children evaluate to True.''' """match if any of the children evaluate to True."""
def __init__(self, *children): def __init__(self, *children):
super(Or, self).__init__() super(Or, self).__init__()
self.children = list(children) self.children = list(children)
@@ -117,7 +121,8 @@ class Or(Statement):
class Not(Statement): class Not(Statement):
'''match only if the child evaluates to False.''' """match only if the child evaluates to False."""
def __init__(self, child): def __init__(self, child):
super(Not, self).__init__() super(Not, self).__init__()
self.child = child self.child = child
@@ -129,7 +134,8 @@ class Not(Statement):
class Some(Statement): class Some(Statement):
'''match if at least N of the children evaluate to True.''' """match if at least N of the children evaluate to True."""
def __init__(self, count, *children): def __init__(self, count, *children):
super(Some, self).__init__() super(Some, self).__init__()
self.count = count self.count = count
@@ -146,7 +152,8 @@ class Some(Statement):
class Range(Statement): class Range(Statement):
'''match if the child is contained in the ctx set with a count in the given range.''' """match if the child is contained in the ctx set with a count in the given range."""
def __init__(self, child, min=None, max=None): def __init__(self, child, min=None, max=None):
super(Range, self).__init__() super(Range, self).__init__()
self.child = child self.child = child
@@ -162,27 +169,28 @@ class Range(Statement):
def __str__(self): def __str__(self):
if self.max == (1 << 64 - 1): if self.max == (1 << 64 - 1):
return 'range(%s, min=%d, max=infinity)' % (str(self.child), self.min) return "range(%s, min=%d, max=infinity)" % (str(self.child), self.min)
else: else:
return 'range(%s, min=%d, max=%d)' % (str(self.child), self.min, self.max) return "range(%s, min=%d, max=%d)" % (str(self.child), self.min, self.max)
class Regex(Statement): class Regex(Statement):
'''match if the given pattern matches a String feature.''' """match if the given pattern matches a String feature."""
def __init__(self, pattern): def __init__(self, pattern):
super(Regex, self).__init__() super(Regex, self).__init__()
self.pattern = pattern self.pattern = pattern
pat = self.pattern[len('/'):-len('/')] pat = self.pattern[len("/") : -len("/")]
flags = re.DOTALL flags = re.DOTALL
if pattern.endswith('/i'): if pattern.endswith("/i"):
pat = self.pattern[len('/'):-len('/i')] pat = self.pattern[len("/") : -len("/i")]
flags |= re.IGNORECASE flags |= re.IGNORECASE
self.re = re.compile(pat, flags) self.re = re.compile(pat, flags)
self.match = '' self.match = ""
def evaluate(self, ctx): def evaluate(self, ctx):
for feature, locations in ctx.items(): for feature, locations in ctx.items():
if not isinstance(feature, (capa.features.String, )): if not isinstance(feature, (capa.features.String,)):
continue continue
# `re.search` finds a match anywhere in the given string # `re.search` finds a match anywhere in the given string
@@ -200,27 +208,28 @@ class Regex(Statement):
class Subscope(Statement): class Subscope(Statement):
''' """
a subscope element is a placeholder in a rule - it should not be evaluated directly. a subscope element is a placeholder in a rule - it should not be evaluated directly.
the engine should preprocess rules to extract subscope statements into their own rules. the engine should preprocess rules to extract subscope statements into their own rules.
''' """
def __init__(self, scope, child): def __init__(self, scope, child):
super(Subscope, self).__init__() super(Subscope, self).__init__()
self.scope = scope self.scope = scope
self.child = child self.child = child
def evaluate(self, ctx): def evaluate(self, ctx):
raise ValueError('cannot evaluate a subscope directly!') raise ValueError("cannot evaluate a subscope directly!")
def topologically_order_rules(rules): def topologically_order_rules(rules):
''' """
order the given rules such that dependencies show up before dependents. order the given rules such that dependencies show up before dependents.
this means that as we match rules, we can add features for the matches, and these this means that as we match rules, we can add features for the matches, and these
will be matched by subsequent rules if they follow this order. will be matched by subsequent rules if they follow this order.
assumes that the rule dependency graph is a DAG. assumes that the rule dependency graph is a DAG.
''' """
# we evaluate `rules` multiple times, so if its a generator, realize it into a list. # we evaluate `rules` multiple times, so if its a generator, realize it into a list.
rules = list(rules) rules = list(rules)
namespaces = capa.rules.index_rules_by_namespace(rules) namespaces = capa.rules.index_rules_by_namespace(rules)
@@ -245,7 +254,7 @@ def topologically_order_rules(rules):
def match(rules, features, va): def match(rules, features, va):
''' """
Args: Args:
rules (List[capa.rules.Rule]): these must already be ordered topologically by dependency. rules (List[capa.rules.Rule]): these must already be ordered topologically by dependency.
features (Mapping[capa.features.Feature, int]): features (Mapping[capa.features.Feature, int]):
@@ -255,7 +264,7 @@ def match(rules, features, va):
Tuple[List[capa.features.Feature], Dict[str, Tuple[int, capa.engine.Result]]]: two-tuple with entries: Tuple[List[capa.features.Feature], Dict[str, Tuple[int, capa.engine.Result]]]: two-tuple with entries:
- list of features used for matching (which may be greater than argument, due to rule match features), and - list of features used for matching (which may be greater than argument, due to rule match features), and
- mapping from rule name to (location of match, result object) - mapping from rule name to (location of match, result object)
''' """
results = collections.defaultdict(list) results = collections.defaultdict(list)
# copy features so that we can modify it # copy features so that we can modify it
@@ -270,10 +279,10 @@ def match(rules, features, va):
results[rule.name].append((va, res)) results[rule.name].append((va, res))
features[capa.features.MatchedRule(rule.name)].add(va) features[capa.features.MatchedRule(rule.name)].add(va)
namespace = rule.meta.get('namespace') namespace = rule.meta.get("namespace")
if namespace: if namespace:
while namespace: while namespace:
features[capa.features.MatchedRule(namespace)].add(va) features[capa.features.MatchedRule(namespace)].add(va)
namespace, _, _ = namespace.rpartition('/') namespace, _, _ = namespace.rpartition("/")
return (features, results) return (features, results)

View File

@@ -11,9 +11,9 @@ MAX_BYTES_FEATURE_SIZE = 0x100
def bytes_to_str(b): def bytes_to_str(b):
if sys.version_info[0] >= 3: if sys.version_info[0] >= 3:
return str(codecs.encode(b, 'hex').decode('utf-8')) return str(codecs.encode(b, "hex").decode("utf-8"))
else: else:
return codecs.encode(b, 'hex') return codecs.encode(b, "hex")
class Feature(object): class Feature(object):
@@ -29,7 +29,7 @@ class Feature(object):
return self.name == other.name and self.args == other.args return self.name == other.name and self.args == other.args
def __str__(self): def __str__(self):
return '%s(%s)' % (self.name.lower(), ','.join(self.args)) return "%s(%s)" % (self.name.lower(), ",".join(self.args))
def __repr__(self): def __repr__(self):
return str(self) return str(self)
@@ -41,8 +41,7 @@ class Feature(object):
return self.__dict__ return self.__dict__
def freeze_serialize(self): def freeze_serialize(self):
return (self.__class__.__name__, return (self.__class__.__name__, self.args)
self.args)
@classmethod @classmethod
def freeze_deserialize(cls, args): def freeze_deserialize(cls, args):
@@ -55,30 +54,30 @@ class MatchedRule(Feature):
self.rule_name = rule_name self.rule_name = rule_name
def __str__(self): def __str__(self):
return 'match(%s)' % (self.rule_name) return "match(%s)" % (self.rule_name)
class Characteristic(Feature): class Characteristic(Feature):
def __init__(self, name, value=None): def __init__(self, name, value=None):
''' """
when `value` is not provided, this serves as descriptor for a class of characteristics. when `value` is not provided, this serves as descriptor for a class of characteristics.
this is only used internally, such as in `rules.py` when checking if a statement is this is only used internally, such as in `rules.py` when checking if a statement is
supported by a given scope. supported by a given scope.
''' """
super(Characteristic, self).__init__([name, value]) super(Characteristic, self).__init__([name, value])
self.name = name self.name = name
self.value = value self.value = value
def evaluate(self, ctx): def evaluate(self, ctx):
if self.value is None: if self.value is None:
raise ValueError('cannot evaluate characteristc %s with empty value' % (str(self))) raise ValueError("cannot evaluate characteristc %s with empty value" % (str(self)))
return super(Characteristic, self).evaluate(ctx) return super(Characteristic, self).evaluate(ctx)
def __str__(self): def __str__(self):
if self.value is None: if self.value is None:
return 'characteristic(%s)' % (self.name) return "characteristic(%s)" % (self.name)
else: else:
return 'characteristic(%s(%s))' % (self.name, self.value) return "characteristic(%s(%s))" % (self.name, self.value)
class String(Feature): class String(Feature):
@@ -98,7 +97,7 @@ class Bytes(Feature):
def evaluate(self, ctx): def evaluate(self, ctx):
for feature, locations in ctx.items(): for feature, locations in ctx.items():
if not isinstance(feature, (capa.features.Bytes, )): if not isinstance(feature, (capa.features.Bytes,)):
continue continue
if feature.value.startswith(self.value): if feature.value.startswith(self.value):
@@ -108,14 +107,13 @@ class Bytes(Feature):
def __str__(self): def __str__(self):
if self.symbol: if self.symbol:
return 'bytes(0x%s = %s)' % (bytes_to_str(self.value).upper(), self.symbol) return "bytes(0x%s = %s)" % (bytes_to_str(self.value).upper(), self.symbol)
else: else:
return 'bytes(0x%s)' % (bytes_to_str(self.value).upper()) return "bytes(0x%s)" % (bytes_to_str(self.value).upper())
def freeze_serialize(self): def freeze_serialize(self):
return (self.__class__.__name__, return (self.__class__.__name__, [bytes_to_str(x).upper() for x in self.args])
[bytes_to_str(x).upper() for x in self.args])
@classmethod @classmethod
def freeze_deserialize(cls, args): def freeze_deserialize(cls, args):
return cls(*[codecs.decode(x, 'hex') for x in args]) return cls(*[codecs.decode(x, "hex") for x in args])

View File

@@ -6,4 +6,4 @@ class BasicBlock(Feature):
super(BasicBlock, self).__init__([]) super(BasicBlock, self).__init__([])
def __str__(self): def __str__(self):
return 'basic block' return "basic block"

View File

@@ -10,11 +10,11 @@ try:
except (ImportError, SyntaxError): except (ImportError, SyntaxError):
pass pass
__all__ = ['ida', 'viv'] __all__ = ["ida", "viv"]
class FeatureExtractor(object): class FeatureExtractor(object):
''' """
FeatureExtractor defines the interface for fetching features from a sample. FeatureExtractor defines the interface for fetching features from a sample.
There may be multiple backends that support fetching features for capa. There may be multiple backends that support fetching features for capa.
@@ -27,7 +27,8 @@ class FeatureExtractor(object):
Also, this provides a way to hook in an IDA backend. Also, this provides a way to hook in an IDA backend.
This class is not instantiated directly; it is the base class for other implementations. This class is not instantiated directly; it is the base class for other implementations.
''' """
__metaclass__ = abc.ABCMeta __metaclass__ = abc.ABCMeta
def __init__(self): def __init__(self):
@@ -40,7 +41,7 @@ class FeatureExtractor(object):
@abc.abstractmethod @abc.abstractmethod
def extract_file_features(self): def extract_file_features(self):
''' """
extract file-scope features. extract file-scope features.
example:: example::
@@ -51,12 +52,12 @@ class FeatureExtractor(object):
yields: yields:
Tuple[capa.features.Feature, int]: feature and its location Tuple[capa.features.Feature, int]: feature and its location
''' """
raise NotImplemented raise NotImplemented
@abc.abstractmethod @abc.abstractmethod
def get_functions(self): def get_functions(self):
''' """
enumerate the functions and provide opaque values that will enumerate the functions and provide opaque values that will
subsequently be provided to `.extract_function_features()`, etc. subsequently be provided to `.extract_function_features()`, etc.
@@ -67,12 +68,12 @@ class FeatureExtractor(object):
yields: yields:
any: the opaque function value. any: the opaque function value.
''' """
raise NotImplemented raise NotImplemented
@abc.abstractmethod @abc.abstractmethod
def extract_function_features(self, f): def extract_function_features(self, f):
''' """
extract function-scope features. extract function-scope features.
the arguments are opaque values previously provided by `.get_functions()`, etc. the arguments are opaque values previously provided by `.get_functions()`, etc.
@@ -88,12 +89,12 @@ class FeatureExtractor(object):
yields: yields:
Tuple[capa.features.Feature, int]: feature and its location Tuple[capa.features.Feature, int]: feature and its location
''' """
raise NotImplemented raise NotImplemented
@abc.abstractmethod @abc.abstractmethod
def get_basic_blocks(self, f): def get_basic_blocks(self, f):
''' """
enumerate the basic blocks in the given function and provide opaque values that will enumerate the basic blocks in the given function and provide opaque values that will
subsequently be provided to `.extract_basic_block_features()`, etc. subsequently be provided to `.extract_basic_block_features()`, etc.
@@ -104,12 +105,12 @@ class FeatureExtractor(object):
yields: yields:
any: the opaque basic block value. any: the opaque basic block value.
''' """
raise NotImplemented raise NotImplemented
@abc.abstractmethod @abc.abstractmethod
def extract_basic_block_features(self, f, bb): def extract_basic_block_features(self, f, bb):
''' """
extract basic block-scope features. extract basic block-scope features.
the arguments are opaque values previously provided by `.get_functions()`, etc. the arguments are opaque values previously provided by `.get_functions()`, etc.
@@ -127,12 +128,12 @@ class FeatureExtractor(object):
yields: yields:
Tuple[capa.features.Feature, int]: feature and its location Tuple[capa.features.Feature, int]: feature and its location
''' """
raise NotImplemented raise NotImplemented
@abc.abstractmethod @abc.abstractmethod
def get_instructions(self, f, bb): def get_instructions(self, f, bb):
''' """
enumerate the instructions in the given basic block and provide opaque values that will enumerate the instructions in the given basic block and provide opaque values that will
subsequently be provided to `.extract_insn_features()`, etc. subsequently be provided to `.extract_insn_features()`, etc.
@@ -143,12 +144,12 @@ class FeatureExtractor(object):
yields: yields:
any: the opaque function value. any: the opaque function value.
''' """
raise NotImplemented raise NotImplemented
@abc.abstractmethod @abc.abstractmethod
def extract_insn_features(self, f, bb, insn): def extract_insn_features(self, f, bb, insn):
''' """
extract instruction-scope features. extract instruction-scope features.
the arguments are opaque values previously provided by `.get_functions()`, etc. the arguments are opaque values previously provided by `.get_functions()`, etc.
@@ -168,12 +169,12 @@ class FeatureExtractor(object):
yields: yields:
Tuple[capa.features.Feature, int]: feature and its location Tuple[capa.features.Feature, int]: feature and its location
''' """
raise NotImplemented raise NotImplemented
class NullFeatureExtractor(FeatureExtractor): class NullFeatureExtractor(FeatureExtractor):
''' """
An extractor that extracts some user-provided features. An extractor that extracts some user-provided features.
The structure of the single parameter is demonstrated in the example below. The structure of the single parameter is demonstrated in the example below.
@@ -211,64 +212,66 @@ class NullFeatureExtractor(FeatureExtractor):
0x40200: ... 0x40200: ...
} }
) )
''' """
def __init__(self, features): def __init__(self, features):
super(NullFeatureExtractor, self).__init__() super(NullFeatureExtractor, self).__init__()
self.features = features self.features = features
def extract_file_features(self): def extract_file_features(self):
for p in self.features.get('file features', []): for p in self.features.get("file features", []):
va, feature = p va, feature = p
yield feature, va yield feature, va
def get_functions(self): def get_functions(self):
for va in sorted(self.features['functions'].keys()): for va in sorted(self.features["functions"].keys()):
yield va yield va
def extract_function_features(self, f): def extract_function_features(self, f):
for p in (self.features # noqa: E127 line over-indented for p in self.features.get("functions", {}).get(f, {}).get("features", []): # noqa: E127 line over-indented
.get('functions', {})
.get(f, {})
.get('features', [])):
va, feature = p va, feature = p
yield feature, va yield feature, va
def get_basic_blocks(self, f): def get_basic_blocks(self, f):
for va in sorted(self.features # noqa: E127 line over-indented for va in sorted(
.get('functions', {}) self.features.get("functions", {}) # noqa: E127 line over-indented
.get(f, {}) .get(f, {})
.get('basic blocks', {}) .get("basic blocks", {})
.keys()): .keys()
):
yield va yield va
def extract_basic_block_features(self, f, bb): def extract_basic_block_features(self, f, bb):
for p in (self.features # noqa: E127 line over-indented for p in (
.get('functions', {}) self.features.get("functions", {}) # noqa: E127 line over-indented
.get(f, {}) .get(f, {})
.get('basic blocks', {}) .get("basic blocks", {})
.get(bb, {}) .get(bb, {})
.get('features', [])): .get("features", [])
):
va, feature = p va, feature = p
yield feature, va yield feature, va
def get_instructions(self, f, bb): def get_instructions(self, f, bb):
for va in sorted(self.features # noqa: E127 line over-indented for va in sorted(
.get('functions', {}) self.features.get("functions", {}) # noqa: E127 line over-indented
.get(f, {}) .get(f, {})
.get('basic blocks', {}) .get("basic blocks", {})
.get(bb, {}) .get(bb, {})
.get('instructions', {}) .get("instructions", {})
.keys()): .keys()
):
yield va yield va
def extract_insn_features(self, f, bb, insn): def extract_insn_features(self, f, bb, insn):
for p in (self.features # noqa: E127 line over-indented for p in (
.get('functions', {}) self.features.get("functions", {}) # noqa: E127 line over-indented
.get(f, {}) .get(f, {})
.get('basic blocks', {}) .get("basic blocks", {})
.get(bb, {}) .get(bb, {})
.get('instructions', {}) .get("instructions", {})
.get(insn, {}) .get(insn, {})
.get('features', [])): .get("features", [])
):
va, feature = p va, feature = p
yield feature, va yield feature, va

View File

@@ -10,27 +10,27 @@ def xor_static(data, i):
if sys.version_info >= (3, 0): if sys.version_info >= (3, 0):
return bytes(c ^ i for c in data) return bytes(c ^ i for c in data)
else: else:
return ''.join(chr(ord(c) ^ i) for c in data) return "".join(chr(ord(c) ^ i) for c in data)
def is_aw_function(function_name): def is_aw_function(function_name):
''' """
is the given function name an A/W function? is the given function name an A/W function?
these are variants of functions that, on Windows, accept either a narrow or wide string. these are variants of functions that, on Windows, accept either a narrow or wide string.
''' """
if len(function_name) < 2: if len(function_name) < 2:
return False return False
# last character should be 'A' or 'W' # last character should be 'A' or 'W'
if function_name[-1] not in ('A', 'W'): if function_name[-1] not in ("A", "W"):
return False return False
# second to last character should be lowercase letter # second to last character should be lowercase letter
return 'a' <= function_name[-2] <= 'z' or '0' <= function_name[-2] <= '9' return "a" <= function_name[-2] <= "z" or "0" <= function_name[-2] <= "9"
def generate_api_features(apiname, va): def generate_api_features(apiname, va):
''' """
for a given function name and address, generate API names. for a given function name and address, generate API names.
we over-generate features to make matching easier. we over-generate features to make matching easier.
these include: these include:
@@ -38,7 +38,7 @@ def generate_api_features(apiname, va):
- kernel32.CreateFile - kernel32.CreateFile
- CreateFileA - CreateFileA
- CreateFile - CreateFile
''' """
# (kernel32.CreateFileA, 0x401000) # (kernel32.CreateFileA, 0x401000)
yield API(apiname), va yield API(apiname), va
@@ -46,8 +46,8 @@ def generate_api_features(apiname, va):
# (kernel32.CreateFile, 0x401000) # (kernel32.CreateFile, 0x401000)
yield API(apiname[:-1]), va yield API(apiname[:-1]), va
if '.' in apiname: if "." in apiname:
modname, impname = apiname.split('.') modname, impname = apiname.split(".")
# strip modname to support importname-only matching # strip modname to support importname-only matching
# (CreateFileA, 0x401000) # (CreateFileA, 0x401000)
yield API(impname), va yield API(impname), va

View File

@@ -26,17 +26,17 @@ def get_va(self):
def add_va_int_cast(o): def add_va_int_cast(o):
''' """
dynamically add a cast-to-int (`__int__`) method to the given object dynamically add a cast-to-int (`__int__`) method to the given object
that returns the value of the `.va` property. that returns the value of the `.va` property.
this bit of skullduggery lets use cast viv-utils objects as ints. this bit of skullduggery lets use cast viv-utils objects as ints.
the correct way of doing this is to update viv-utils (or subclass the objects here). the correct way of doing this is to update viv-utils (or subclass the objects here).
''' """
if sys.version_info >= (3, 0): if sys.version_info >= (3, 0):
setattr(o, '__int__', types.MethodType(get_va, o)) setattr(o, "__int__", types.MethodType(get_va, o))
else: else:
setattr(o, '__int__', types.MethodType(get_va, o, type(o))) setattr(o, "__int__", types.MethodType(get_va, o, type(o)))
return o return o

View File

@@ -15,23 +15,23 @@ from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
def _ida_get_printable_len(op): def _ida_get_printable_len(op):
''' Return string length if all operand bytes are ascii or utf16-le printable """ Return string length if all operand bytes are ascii or utf16-le printable
args: args:
op (IDA op_t) op (IDA op_t)
''' """
op_val = helpers.mask_op_val(op) op_val = helpers.mask_op_val(op)
if op.dtype == idaapi.dt_byte: if op.dtype == idaapi.dt_byte:
chars = struct.pack('<B', op_val) chars = struct.pack("<B", op_val)
elif op.dtype == idaapi.dt_word: elif op.dtype == idaapi.dt_word:
chars = struct.pack('<H', op_val) chars = struct.pack("<H", op_val)
elif op.dtype == idaapi.dt_dword: elif op.dtype == idaapi.dt_dword:
chars = struct.pack('<I', op_val) chars = struct.pack("<I", op_val)
elif op.dtype == idaapi.dt_qword: elif op.dtype == idaapi.dt_qword:
chars = struct.pack('<Q', op_val) chars = struct.pack("<Q", op_val)
else: else:
raise ValueError('Unhandled operand data type 0x%x.' % op.dtype) raise ValueError("Unhandled operand data type 0x%x." % op.dtype)
def _is_printable_ascii(chars): def _is_printable_ascii(chars):
if sys.version_info >= (3, 0): if sys.version_info >= (3, 0):
@@ -44,7 +44,7 @@ def _ida_get_printable_len(op):
if all(c == 0x00 for c in chars[1::2]): if all(c == 0x00 for c in chars[1::2]):
return _is_printable_ascii(chars[::2]) return _is_printable_ascii(chars[::2])
else: else:
if all(c == '\x00' for c in chars[1::2]): if all(c == "\x00" for c in chars[1::2]):
return _is_printable_ascii(chars[::2]) return _is_printable_ascii(chars[::2])
if _is_printable_ascii(chars): if _is_printable_ascii(chars):
@@ -57,32 +57,32 @@ def _ida_get_printable_len(op):
def _is_mov_imm_to_stack(insn): def _is_mov_imm_to_stack(insn):
''' verify instruction moves immediate onto stack """ verify instruction moves immediate onto stack
args: args:
insn (IDA insn_t) insn (IDA insn_t)
''' """
if insn.Op2.type != idaapi.o_imm: if insn.Op2.type != idaapi.o_imm:
return False return False
if not helpers.is_op_stack_var(insn.ea, 0): if not helpers.is_op_stack_var(insn.ea, 0):
return False return False
if not insn.get_canon_mnem().startswith('mov'): if not insn.get_canon_mnem().startswith("mov"):
return False return False
return True return True
def _ida_bb_contains_stackstring(f, bb): def _ida_bb_contains_stackstring(f, bb):
''' check basic block for stackstring indicators """ check basic block for stackstring indicators
true if basic block contains enough moves of constant bytes to the stack true if basic block contains enough moves of constant bytes to the stack
args: args:
f (IDA func_t) f (IDA func_t)
bb (IDA BasicBlock) bb (IDA BasicBlock)
''' """
count = 0 count = 0
for insn in helpers.get_instructions_in_range(bb.start_ea, bb.end_ea): for insn in helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
@@ -96,25 +96,25 @@ def _ida_bb_contains_stackstring(f, bb):
def extract_bb_stackstring(f, bb): def extract_bb_stackstring(f, bb):
''' extract stackstring indicators from basic block """ extract stackstring indicators from basic block
args: args:
f (IDA func_t) f (IDA func_t)
bb (IDA BasicBlock) bb (IDA BasicBlock)
''' """
if _ida_bb_contains_stackstring(f, bb): if _ida_bb_contains_stackstring(f, bb):
yield Characteristic('stack string', True), bb.start_ea yield Characteristic("stack string", True), bb.start_ea
def _ida_bb_contains_tight_loop(f, bb): def _ida_bb_contains_tight_loop(f, bb):
''' check basic block for stackstring indicators """ check basic block for stackstring indicators
true if last instruction in basic block branches to basic block start true if last instruction in basic block branches to basic block start
args: args:
f (IDA func_t) f (IDA func_t)
bb (IDA BasicBlock) bb (IDA BasicBlock)
''' """
bb_end = idc.prev_head(bb.end_ea) bb_end = idc.prev_head(bb.end_ea)
if bb.start_ea < bb_end: if bb.start_ea < bb_end:
@@ -126,23 +126,23 @@ def _ida_bb_contains_tight_loop(f, bb):
def extract_bb_tight_loop(f, bb): def extract_bb_tight_loop(f, bb):
''' extract tight loop indicators from a basic block """ extract tight loop indicators from a basic block
args: args:
f (IDA func_t) f (IDA func_t)
bb (IDA BasicBlock) bb (IDA BasicBlock)
''' """
if _ida_bb_contains_tight_loop(f, bb): if _ida_bb_contains_tight_loop(f, bb):
yield Characteristic('tight loop', True), bb.start_ea yield Characteristic("tight loop", True), bb.start_ea
def extract_features(f, bb): def extract_features(f, bb):
''' extract basic block features """ extract basic block features
args: args:
f (IDA func_t) f (IDA func_t)
bb (IDA BasicBlock) bb (IDA BasicBlock)
''' """
yield BasicBlock(), bb.start_ea yield BasicBlock(), bb.start_ea
for bb_handler in BASIC_BLOCK_HANDLERS: for bb_handler in BASIC_BLOCK_HANDLERS:
@@ -166,5 +166,5 @@ def main():
pprint.pprint(features) pprint.pprint(features)
if __name__ == '__main__': if __name__ == "__main__":
main() main()

View File

@@ -16,32 +16,39 @@ import capa.features.extractors.ida.helpers
def _ida_check_segment_for_pe(seg): def _ida_check_segment_for_pe(seg):
''' check segment for embedded PE """ check segment for embedded PE
adapted for IDA from: adapted for IDA from:
https://github.com/vivisect/vivisect/blob/7be4037b1cecc4551b397f840405a1fc606f9b53/PE/carve.py#L19 https://github.com/vivisect/vivisect/blob/7be4037b1cecc4551b397f840405a1fc606f9b53/PE/carve.py#L19
args: args:
seg (IDA segment_t) seg (IDA segment_t)
''' """
seg_max = seg.end_ea seg_max = seg.end_ea
mz_xor = [(capa.features.extractors.helpers.xor_static(b'MZ', i), mz_xor = [
capa.features.extractors.helpers.xor_static(b'PE', i), (
i) capa.features.extractors.helpers.xor_static(b"MZ", i),
for i in range(256)] capa.features.extractors.helpers.xor_static(b"PE", i),
todo = [(capa.features.extractors.ida.helpers.find_byte_sequence(seg.start_ea, seg.end_ea, mzx), mzx, pex, i) for mzx, pex, i in mz_xor] i,
)
for i in range(256)
]
todo = [
(capa.features.extractors.ida.helpers.find_byte_sequence(seg.start_ea, seg.end_ea, mzx), mzx, pex, i)
for mzx, pex, i in mz_xor
]
todo = [(off, mzx, pex, i) for (off, mzx, pex, i) in todo if off != idaapi.BADADDR] todo = [(off, mzx, pex, i) for (off, mzx, pex, i) in todo if off != idaapi.BADADDR]
while len(todo): while len(todo):
off, mzx, pex, i = todo.pop() off, mzx, pex, i = todo.pop()
# The MZ header has one field we will check e_lfanew is at 0x3c # The MZ header has one field we will check e_lfanew is at 0x3c
e_lfanew = off + 0x3c e_lfanew = off + 0x3C
if seg_max < (e_lfanew + 4): if seg_max < (e_lfanew + 4):
continue continue
newoff = struct.unpack('<I', capa.features.extractors.helpers.xor_static(idc.get_bytes(e_lfanew, 4), i))[0] newoff = struct.unpack("<I", capa.features.extractors.helpers.xor_static(idc.get_bytes(e_lfanew, 4), i))[0]
peoff = off + newoff peoff = off + newoff
if seg_max < (peoff + 2): if seg_max < (peoff + 2):
@@ -56,29 +63,29 @@ def _ida_check_segment_for_pe(seg):
def extract_file_embedded_pe(): def extract_file_embedded_pe():
''' extract embedded PE features """ extract embedded PE features
IDA must load resource sections for this to be complete IDA must load resource sections for this to be complete
- '-R' from console - '-R' from console
- Check 'Load resource sections' when opening binary in IDA manually - Check 'Load resource sections' when opening binary in IDA manually
''' """
for seg in capa.features.extractors.ida.helpers.get_segments(): for seg in capa.features.extractors.ida.helpers.get_segments():
if seg.is_header_segm(): if seg.is_header_segm():
# IDA may load header segments, skip if present # IDA may load header segments, skip if present
continue continue
for ea, _ in _ida_check_segment_for_pe(seg): for ea, _ in _ida_check_segment_for_pe(seg):
yield Characteristic('embedded pe', True), ea yield Characteristic("embedded pe", True), ea
def extract_file_export_names(): def extract_file_export_names():
''' extract function exports ''' """ extract function exports """
for _, _, ea, name in idautils.Entries(): for _, _, ea, name in idautils.Entries():
yield Export(name), ea yield Export(name), ea
def extract_file_import_names(): def extract_file_import_names():
''' extract function imports """ extract function imports
1. imports by ordinal: 1. imports by ordinal:
- modulename.#ordinal - modulename.#ordinal
@@ -87,25 +94,25 @@ def extract_file_import_names():
matching: matching:
- modulename.importname - modulename.importname
- importname - importname
''' """
for ea, imp_info in capa.features.extractors.ida.helpers.get_file_imports().items(): for ea, imp_info in capa.features.extractors.ida.helpers.get_file_imports().items():
dllname, name, ordi = imp_info dllname, name, ordi = imp_info
if name: if name:
yield Import('%s.%s' % (dllname, name)), ea yield Import("%s.%s" % (dllname, name)), ea
yield Import(name), ea yield Import(name), ea
if ordi: if ordi:
yield Import('%s.#%s' % (dllname, str(ordi))), ea yield Import("%s.#%s" % (dllname, str(ordi))), ea
def extract_file_section_names(): def extract_file_section_names():
''' extract section names """ extract section names
IDA must load resource sections for this to be complete IDA must load resource sections for this to be complete
- '-R' from console - '-R' from console
- Check 'Load resource sections' when opening binary in IDA manually - Check 'Load resource sections' when opening binary in IDA manually
''' """
for seg in capa.features.extractors.ida.helpers.get_segments(): for seg in capa.features.extractors.ida.helpers.get_segments():
if seg.is_header_segm(): if seg.is_header_segm():
# IDA may load header segments, skip if present # IDA may load header segments, skip if present
@@ -115,12 +122,12 @@ def extract_file_section_names():
def extract_file_strings(): def extract_file_strings():
''' extract ASCII and UTF-16 LE strings """ extract ASCII and UTF-16 LE strings
IDA must load resource sections for this to be complete IDA must load resource sections for this to be complete
- '-R' from console - '-R' from console
- Check 'Load resource sections' when opening binary in IDA manually - Check 'Load resource sections' when opening binary in IDA manually
''' """
for seg in capa.features.extractors.ida.helpers.get_segments(): for seg in capa.features.extractors.ida.helpers.get_segments():
seg_buff = capa.features.extractors.ida.helpers.get_segment_buffer(seg) seg_buff = capa.features.extractors.ida.helpers.get_segment_buffer(seg)
@@ -132,7 +139,7 @@ def extract_file_strings():
def extract_features(): def extract_features():
''' extract file features ''' """ extract file features """
for file_handler in FILE_HANDLERS: for file_handler in FILE_HANDLERS:
for feature, va in file_handler(): for feature, va in file_handler():
yield feature, va yield feature, va
@@ -151,5 +158,5 @@ def main():
pprint.pprint(list(extract_features())) pprint.pprint(list(extract_features()))
if __name__ == '__main__': if __name__ == "__main__":
main() main()

View File

@@ -6,14 +6,14 @@ from capa.features.extractors import loops
def _ida_function_contains_switch(f): def _ida_function_contains_switch(f):
''' check a function for switch statement indicators """ check a function for switch statement indicators
adapted from: adapted from:
https://reverseengineering.stackexchange.com/questions/17548/calc-switch-cases-in-idapython-cant-iterate-over-results?rq=1 https://reverseengineering.stackexchange.com/questions/17548/calc-switch-cases-in-idapython-cant-iterate-over-results?rq=1
arg: arg:
f (IDA func_t) f (IDA func_t)
''' """
for start, end in idautils.Chunks(f.start_ea): for start, end in idautils.Chunks(f.start_ea):
for head in idautils.Heads(start, end): for head in idautils.Heads(start, end):
if idaapi.get_switch_info(head): if idaapi.get_switch_info(head):
@@ -23,68 +23,63 @@ def _ida_function_contains_switch(f):
def extract_function_switch(f): def extract_function_switch(f):
''' extract switch indicators from a function """ extract switch indicators from a function
arg: arg:
f (IDA func_t) f (IDA func_t)
''' """
if _ida_function_contains_switch(f): if _ida_function_contains_switch(f):
yield Characteristic('switch', True), f.start_ea yield Characteristic("switch", True), f.start_ea
def extract_function_calls_to(f): def extract_function_calls_to(f):
''' extract callers to a function """ extract callers to a function
args: args:
f (IDA func_t) f (IDA func_t)
''' """
for ea in idautils.CodeRefsTo(f.start_ea, True): for ea in idautils.CodeRefsTo(f.start_ea, True):
yield Characteristic('calls to', True), ea yield Characteristic("calls to", True), ea
def extract_function_loop(f): def extract_function_loop(f):
''' extract loop indicators from a function """ extract loop indicators from a function
args: args:
f (IDA func_t) f (IDA func_t)
''' """
edges = [] edges = []
for bb in idaapi.FlowChart(f): for bb in idaapi.FlowChart(f):
map(lambda s: edges.append((bb.start_ea, s.start_ea)), bb.succs()) map(lambda s: edges.append((bb.start_ea, s.start_ea)), bb.succs())
if edges and loops.has_loop(edges): if edges and loops.has_loop(edges):
yield Characteristic('loop', True), f.start_ea yield Characteristic("loop", True), f.start_ea
def extract_recursive_call(f): def extract_recursive_call(f):
''' extract recursive function call """ extract recursive function call
args: args:
f (IDA func_t) f (IDA func_t)
''' """
for ref in idautils.CodeRefsTo(f.start_ea, True): for ref in idautils.CodeRefsTo(f.start_ea, True):
if f.contains(ref): if f.contains(ref):
yield Characteristic('recursive call', True), f.start_ea yield Characteristic("recursive call", True), f.start_ea
break break
def extract_features(f): def extract_features(f):
''' extract function features """ extract function features
arg: arg:
f (IDA func_t) f (IDA func_t)
''' """
for func_handler in FUNCTION_HANDLERS: for func_handler in FUNCTION_HANDLERS:
for feature, va in func_handler(f): for feature, va in func_handler(f):
yield feature, va yield feature, va
FUNCTION_HANDLERS = ( FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_switch, extract_function_loop, extract_recursive_call)
extract_function_calls_to,
extract_function_switch,
extract_function_loop,
extract_recursive_call
)
def main(): def main():
@@ -96,5 +91,5 @@ def main():
pprint.pprint(features) pprint.pprint(features)
if __name__ == '__main__': if __name__ == "__main__":
main() main()

View File

@@ -7,21 +7,21 @@ import idc
def find_byte_sequence(start, end, seq): def find_byte_sequence(start, end, seq):
''' find byte sequence """ find byte sequence
args: args:
start: min virtual address start: min virtual address
end: max virtual address end: max virtual address
seq: bytes to search e.g. b'\x01\x03' seq: bytes to search e.g. b'\x01\x03'
''' """
if sys.version_info >= (3, 0): if sys.version_info >= (3, 0):
return idaapi.find_binary(start, end, ' '.join(['%02x' % b for b in seq]), 0, idaapi.SEARCH_DOWN) return idaapi.find_binary(start, end, " ".join(["%02x" % b for b in seq]), 0, idaapi.SEARCH_DOWN)
else: else:
return idaapi.find_binary(start, end, ' '.join(['%02x' % ord(b) for b in seq]), 0, idaapi.SEARCH_DOWN) return idaapi.find_binary(start, end, " ".join(["%02x" % ord(b) for b in seq]), 0, idaapi.SEARCH_DOWN)
def get_functions(start=None, end=None, ignore_thunks=False, ignore_libs=False): def get_functions(start=None, end=None, ignore_thunks=False, ignore_libs=False):
''' get functions, range optional """ get functions, range optional
args: args:
start: min virtual address start: min virtual address
@@ -29,7 +29,7 @@ def get_functions(start=None, end=None, ignore_thunks=False, ignore_libs=False):
ret: ret:
yield func_t* yield func_t*
''' """
for ea in idautils.Functions(start=start, end=end): for ea in idautils.Functions(start=start, end=end):
f = idaapi.get_func(ea) f = idaapi.get_func(ea)
@@ -43,7 +43,7 @@ def get_functions(start=None, end=None, ignore_thunks=False, ignore_libs=False):
def get_segments(): def get_segments():
''' Get list of segments (sections) in the binary image ''' """ Get list of segments (sections) in the binary image """
for n in range(idaapi.get_segm_qty()): for n in range(idaapi.get_segm_qty()):
seg = idaapi.getnseg(n) seg = idaapi.getnseg(n)
if seg: if seg:
@@ -51,11 +51,11 @@ def get_segments():
def get_segment_buffer(seg): def get_segment_buffer(seg):
''' return bytes stored in a given segment """ return bytes stored in a given segment
decrease buffer size until IDA is able to read bytes from the segment decrease buffer size until IDA is able to read bytes from the segment
''' """
buff = b'' buff = b""
sz = seg.end_ea - seg.start_ea sz = seg.end_ea - seg.start_ea
while sz > 0: while sz > 0:
@@ -65,11 +65,11 @@ def get_segment_buffer(seg):
sz -= 0x1000 sz -= 0x1000
# IDA returns None if get_bytes fails, so convert for consistent return type # IDA returns None if get_bytes fails, so convert for consistent return type
return buff if buff else b'' return buff if buff else b""
def get_file_imports(): def get_file_imports():
''' get file imports ''' """ get file imports """
_imports = {} _imports = {}
for idx in range(idaapi.get_import_module_qty()): for idx in range(idaapi.get_import_module_qty()):
@@ -79,9 +79,9 @@ def get_file_imports():
continue continue
def _inspect_import(ea, name, ordi): def _inspect_import(ea, name, ordi):
if name and name.startswith('__imp_'): if name and name.startswith("__imp_"):
# handle mangled names starting # handle mangled names starting
name = name[len('__imp_'):] name = name[len("__imp_") :]
_imports[ea] = (dllname.lower(), name, ordi) _imports[ea] = (dllname.lower(), name, ordi)
return True return True
@@ -91,14 +91,14 @@ def get_file_imports():
def get_instructions_in_range(start, end): def get_instructions_in_range(start, end):
''' yield instructions in range """ yield instructions in range
args: args:
start: virtual address (inclusive) start: virtual address (inclusive)
end: virtual address (exclusive) end: virtual address (exclusive)
yield: yield:
(insn_t*) (insn_t*)
''' """
for head in idautils.Heads(start, end): for head in idautils.Heads(start, end):
inst = idautils.DecodeInstruction(head) inst = idautils.DecodeInstruction(head)
if inst: if inst:
@@ -106,7 +106,7 @@ def get_instructions_in_range(start, end):
def is_operand_equal(op1, op2): def is_operand_equal(op1, op2):
''' compare two IDA op_t ''' """ compare two IDA op_t """
if op1.flags != op2.flags: if op1.flags != op2.flags:
return False return False
@@ -132,14 +132,12 @@ def is_operand_equal(op1, op2):
def is_basic_block_equal(bb1, bb2): def is_basic_block_equal(bb1, bb2):
''' compare two IDA BasicBlock ''' """ compare two IDA BasicBlock """
return bb1.start_ea == bb2.start_ea \ return bb1.start_ea == bb2.start_ea and bb1.end_ea == bb2.end_ea and bb1.type == bb2.type
and bb1.end_ea == bb2.end_ea \
and bb1.type == bb2.type
def basic_block_size(bb): def basic_block_size(bb):
''' calculate size of basic block ''' """ calculate size of basic block """
return bb.end_ea - bb.start_ea return bb.end_ea - bb.start_ea
@@ -152,11 +150,11 @@ def read_bytes_at(ea, count):
def find_string_at(ea, min=4): def find_string_at(ea, min=4):
''' check if ASCII string exists at a given virtual address ''' """ check if ASCII string exists at a given virtual address """
found = idaapi.get_strlit_contents(ea, -1, idaapi.STRTYPE_C) found = idaapi.get_strlit_contents(ea, -1, idaapi.STRTYPE_C)
if found and len(found) > min: if found and len(found) > min:
try: try:
found = found.decode('ascii') found = found.decode("ascii")
# hacky check for IDA bug; get_strlit_contents also reads Unicode as # hacky check for IDA bug; get_strlit_contents also reads Unicode as
# myy__uunniiccoodde when searching in ASCII mode so we check for that here # myy__uunniiccoodde when searching in ASCII mode so we check for that here
# and return the fixed up value # and return the fixed up value
@@ -169,11 +167,11 @@ def find_string_at(ea, min=4):
def get_op_phrase_info(op): def get_op_phrase_info(op):
''' parse phrase features from operand """ parse phrase features from operand
Pretty much dup of sark's implementation: Pretty much dup of sark's implementation:
https://github.com/tmr232/Sark/blob/master/sark/code/instruction.py#L28-L73 https://github.com/tmr232/Sark/blob/master/sark/code/instruction.py#L28-L73
''' """
if op.type not in (idaapi.o_phrase, idaapi.o_displ): if op.type not in (idaapi.o_phrase, idaapi.o_displ):
return return
@@ -202,21 +200,21 @@ def get_op_phrase_info(op):
# This is only relevant to Intel architectures. # This is only relevant to Intel architectures.
index = None index = None
return {'base': base, 'index': index, 'scale': scale, 'offset': offset} return {"base": base, "index": index, "scale": scale, "offset": offset}
def is_op_write(insn, op): def is_op_write(insn, op):
''' Check if an operand is written to (destination operand) ''' """ Check if an operand is written to (destination operand) """
return idaapi.has_cf_chg(insn.get_canon_feature(), op.n) return idaapi.has_cf_chg(insn.get_canon_feature(), op.n)
def is_op_read(insn, op): def is_op_read(insn, op):
''' Check if an operand is read from (source operand) ''' """ Check if an operand is read from (source operand) """
return idaapi.has_cf_use(insn.get_canon_feature(), op.n) return idaapi.has_cf_use(insn.get_canon_feature(), op.n)
def is_sp_modified(insn): def is_sp_modified(insn):
''' determine if instruction modifies SP, ESP, RSP ''' """ determine if instruction modifies SP, ESP, RSP """
for op in get_insn_ops(insn, op_type=(idaapi.o_reg,)): for op in get_insn_ops(insn, op_type=(idaapi.o_reg,)):
if op.reg != idautils.procregs.sp.reg: if op.reg != idautils.procregs.sp.reg:
continue continue
@@ -228,7 +226,7 @@ def is_sp_modified(insn):
def is_bp_modified(insn): def is_bp_modified(insn):
''' check if instruction modifies BP, EBP, RBP ''' """ check if instruction modifies BP, EBP, RBP """
for op in get_insn_ops(insn, op_type=(idaapi.o_reg,)): for op in get_insn_ops(insn, op_type=(idaapi.o_reg,)):
if op.reg != idautils.procregs.bp.reg: if op.reg != idautils.procregs.bp.reg:
continue continue
@@ -240,12 +238,12 @@ def is_bp_modified(insn):
def is_frame_register(reg): def is_frame_register(reg):
''' check if register is sp or bp ''' """ check if register is sp or bp """
return reg in (idautils.procregs.sp.reg, idautils.procregs.bp.reg) return reg in (idautils.procregs.sp.reg, idautils.procregs.bp.reg)
def get_insn_ops(insn, op_type=None): def get_insn_ops(insn, op_type=None):
''' yield op_t for instruction, filter on type if specified ''' """ yield op_t for instruction, filter on type if specified """
for op in insn.ops: for op in insn.ops:
if op.type == idaapi.o_void: if op.type == idaapi.o_void:
# avoid looping all 6 ops if only subset exists # avoid looping all 6 ops if only subset exists
@@ -258,17 +256,17 @@ def get_insn_ops(insn, op_type=None):
def ea_flags(ea): def ea_flags(ea):
''' retrieve processor flags for a given address ''' """ retrieve processor flags for a given address """
return idaapi.get_flags(ea) return idaapi.get_flags(ea)
def is_op_stack_var(ea, n): def is_op_stack_var(ea, n):
''' check if operand is a stack variable ''' """ check if operand is a stack variable """
return idaapi.is_stkvar(ea_flags(ea), n) return idaapi.is_stkvar(ea_flags(ea), n)
def mask_op_val(op): def mask_op_val(op):
''' mask off a value based on data type """ mask off a value based on data type
necesssary due to a bug in 64-bit necesssary due to a bug in 64-bit
@@ -277,22 +275,22 @@ def mask_op_val(op):
insn.Op2.dtype == idaapi.dt_dword insn.Op2.dtype == idaapi.dt_dword
insn.Op2.value == 0xffffffffffffffff insn.Op2.value == 0xffffffffffffffff
''' """
masks = { masks = {
idaapi.dt_byte: 0xFF, idaapi.dt_byte: 0xFF,
idaapi.dt_word: 0xFFFF, idaapi.dt_word: 0xFFFF,
idaapi.dt_dword: 0xFFFFFFFF, idaapi.dt_dword: 0xFFFFFFFF,
idaapi.dt_qword: 0xFFFFFFFFFFFFFFFF idaapi.dt_qword: 0xFFFFFFFFFFFFFFFF,
} }
mask = masks.get(op.dtype, None) mask = masks.get(op.dtype, None)
if not mask: if not mask:
raise ValueError('No support for operand data type 0x%x' % op.dtype) raise ValueError("No support for operand data type 0x%x" % op.dtype)
return mask & op.value return mask & op.value
def ea_to_offset(ea): def ea_to_offset(ea):
''' convert virtual address to file offset ''' """ convert virtual address to file offset """
return idaapi.get_fileregion_offset(ea) return idaapi.get_fileregion_offset(ea)

View File

@@ -26,7 +26,7 @@ def get_imports():
def _check_for_api_call(insn): def _check_for_api_call(insn):
''' check instruction for API call ''' """ check instruction for API call """
if not idaapi.is_call_insn(insn): if not idaapi.is_call_insn(insn):
return return
@@ -34,7 +34,7 @@ def _check_for_api_call(insn):
imp = get_imports().get(call_ref, None) imp = get_imports().get(call_ref, None)
if imp: if imp:
yield '%s.%s' % (imp[0], imp[1]) yield "%s.%s" % (imp[0], imp[1])
else: else:
f = idaapi.get_func(call_ref) f = idaapi.get_func(call_ref)
@@ -46,11 +46,11 @@ def _check_for_api_call(insn):
imp = get_imports().get(thunk_ref, None) imp = get_imports().get(thunk_ref, None)
if imp: if imp:
yield '%s.%s' % (imp[0], imp[1]) yield "%s.%s" % (imp[0], imp[1])
def extract_insn_api_features(f, bb, insn): def extract_insn_api_features(f, bb, insn):
''' parse instruction API features """ parse instruction API features
args: args:
f (IDA func_t) f (IDA func_t)
@@ -59,14 +59,14 @@ def extract_insn_api_features(f, bb, insn):
example: example:
call dword [0x00473038] call dword [0x00473038]
''' """
for api_name in _check_for_api_call(insn): for api_name in _check_for_api_call(insn):
for feature, va in capa.features.extractors.helpers.generate_api_features(api_name, insn.ea): for feature, va in capa.features.extractors.helpers.generate_api_features(api_name, insn.ea):
yield feature, va yield feature, va
def extract_insn_number_features(f, bb, insn): def extract_insn_number_features(f, bb, insn):
''' parse instruction number features """ parse instruction number features
args: args:
f (IDA func_t) f (IDA func_t)
@@ -75,7 +75,7 @@ def extract_insn_number_features(f, bb, insn):
example: example:
push 3136B0h ; dwControlCode push 3136B0h ; dwControlCode
''' """
if idaapi.is_ret_insn(insn): if idaapi.is_ret_insn(insn):
# skip things like: # skip things like:
# .text:0042250E retn 8 # .text:0042250E retn 8
@@ -97,7 +97,7 @@ def extract_insn_number_features(f, bb, insn):
def extract_insn_bytes_features(f, bb, insn): def extract_insn_bytes_features(f, bb, insn):
''' parse referenced byte sequences """ parse referenced byte sequences
args: args:
f (IDA func_t) f (IDA func_t)
@@ -106,7 +106,7 @@ def extract_insn_bytes_features(f, bb, insn):
example: example:
push offset iid_004118d4_IShellLinkA ; riid push offset iid_004118d4_IShellLinkA ; riid
''' """
if idaapi.is_call_insn(insn): if idaapi.is_call_insn(insn):
# ignore call instructions # ignore call instructions
return return
@@ -119,7 +119,7 @@ def extract_insn_bytes_features(f, bb, insn):
def extract_insn_string_features(f, bb, insn): def extract_insn_string_features(f, bb, insn):
''' parse instruction string features """ parse instruction string features
args: args:
f (IDA func_t) f (IDA func_t)
@@ -128,7 +128,7 @@ def extract_insn_string_features(f, bb, insn):
example: example:
push offset aAcr ; "ACR > " push offset aAcr ; "ACR > "
''' """
for ref in idautils.DataRefsFrom(insn.ea): for ref in idautils.DataRefsFrom(insn.ea):
found = capa.features.extractors.ida.helpers.find_string_at(ref) found = capa.features.extractors.ida.helpers.find_string_at(ref)
if found: if found:
@@ -136,7 +136,7 @@ def extract_insn_string_features(f, bb, insn):
def extract_insn_offset_features(f, bb, insn): def extract_insn_offset_features(f, bb, insn):
''' parse instruction structure offset features """ parse instruction structure offset features
args: args:
f (IDA func_t) f (IDA func_t)
@@ -145,7 +145,7 @@ def extract_insn_offset_features(f, bb, insn):
example: example:
.text:0040112F cmp [esi+4], ebx .text:0040112F cmp [esi+4], ebx
''' """
for op in capa.features.extractors.ida.helpers.get_insn_ops(insn, op_type=(idaapi.o_phrase, idaapi.o_displ)): for op in capa.features.extractors.ida.helpers.get_insn_ops(insn, op_type=(idaapi.o_phrase, idaapi.o_displ)):
if capa.features.extractors.ida.helpers.is_op_stack_var(insn.ea, op.n): if capa.features.extractors.ida.helpers.is_op_stack_var(insn.ea, op.n):
# skip stack offsets # skip stack offsets
@@ -156,7 +156,7 @@ def extract_insn_offset_features(f, bb, insn):
if not p_info: if not p_info:
continue continue
op_off = p_info['offset'] op_off = p_info["offset"]
if 0 == op_off: if 0 == op_off:
# TODO: Do we want to record offset of zero? # TODO: Do we want to record offset of zero?
@@ -172,26 +172,26 @@ def extract_insn_offset_features(f, bb, insn):
def _contains_stack_cookie_keywords(s): def _contains_stack_cookie_keywords(s):
''' check if string contains stack cookie keywords """ check if string contains stack cookie keywords
Examples: Examples:
xor ecx, ebp ; StackCookie xor ecx, ebp ; StackCookie
mov eax, ___security_cookie mov eax, ___security_cookie
''' """
if not s: if not s:
return False return False
s = s.strip().lower() s = s.strip().lower()
if 'cookie' not in s: if "cookie" not in s:
return False return False
return any(keyword in s for keyword in ('stack', 'security')) return any(keyword in s for keyword in ("stack", "security"))
def _bb_stack_cookie_registers(bb): def _bb_stack_cookie_registers(bb):
''' scan basic block for stack cookie operations """ scan basic block for stack cookie operations
yield registers ids that may have been used for stack cookie operations yield registers ids that may have been used for stack cookie operations
@@ -215,7 +215,7 @@ def _bb_stack_cookie_registers(bb):
.text:004062FA jnz loc_40639D .text:004062FA jnz loc_40639D
TODO: this is expensive, but necessary?... TODO: this is expensive, but necessary?...
''' """
for insn in capa.features.extractors.ida.helpers.get_instructions_in_range(bb.start_ea, bb.end_ea): for insn in capa.features.extractors.ida.helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
if _contains_stack_cookie_keywords(idc.GetDisasm(insn.ea)): if _contains_stack_cookie_keywords(idc.GetDisasm(insn.ea)):
for op in capa.features.extractors.ida.helpers.get_insn_ops(insn, op_type=(idaapi.o_reg,)): for op in capa.features.extractors.ida.helpers.get_insn_ops(insn, op_type=(idaapi.o_reg,)):
@@ -225,7 +225,7 @@ def _bb_stack_cookie_registers(bb):
def _is_nzxor_stack_cookie(f, bb, insn): def _is_nzxor_stack_cookie(f, bb, insn):
''' check if nzxor is related to stack cookie ''' """ check if nzxor is related to stack cookie """
if _contains_stack_cookie_keywords(idaapi.get_cmt(insn.ea, False)): if _contains_stack_cookie_keywords(idaapi.get_cmt(insn.ea, False)):
# Example: # Example:
# xor ecx, ebp ; StackCookie # xor ecx, ebp ; StackCookie
@@ -241,7 +241,7 @@ def _is_nzxor_stack_cookie(f, bb, insn):
def extract_insn_nzxor_characteristic_features(f, bb, insn): def extract_insn_nzxor_characteristic_features(f, bb, insn):
''' parse instruction non-zeroing XOR instruction """ parse instruction non-zeroing XOR instruction
ignore expected non-zeroing XORs, e.g. security cookies ignore expected non-zeroing XORs, e.g. security cookies
@@ -249,7 +249,7 @@ def extract_insn_nzxor_characteristic_features(f, bb, insn):
f (IDA func_t) f (IDA func_t)
bb (IDA BasicBlock) bb (IDA BasicBlock)
insn (IDA insn_t) insn (IDA insn_t)
''' """
if insn.itype != idaapi.NN_xor: if insn.itype != idaapi.NN_xor:
return return
@@ -259,28 +259,28 @@ def extract_insn_nzxor_characteristic_features(f, bb, insn):
if _is_nzxor_stack_cookie(f, bb, insn): if _is_nzxor_stack_cookie(f, bb, insn):
return return
yield Characteristic('nzxor', True), insn.ea yield Characteristic("nzxor", True), insn.ea
def extract_insn_mnemonic_features(f, bb, insn): def extract_insn_mnemonic_features(f, bb, insn):
''' parse instruction mnemonic features """ parse instruction mnemonic features
args: args:
f (IDA func_t) f (IDA func_t)
bb (IDA BasicBlock) bb (IDA BasicBlock)
insn (IDA insn_t) insn (IDA insn_t)
''' """
yield Mnemonic(insn.get_canon_mnem()), insn.ea yield Mnemonic(insn.get_canon_mnem()), insn.ea
def extract_insn_peb_access_characteristic_features(f, bb, insn): def extract_insn_peb_access_characteristic_features(f, bb, insn):
''' parse instruction peb access """ parse instruction peb access
fs:[0x30] on x86, gs:[0x60] on x64 fs:[0x30] on x86, gs:[0x60] on x64
TODO: TODO:
IDA should be able to do this.. IDA should be able to do this..
''' """
if insn.itype not in (idaapi.NN_push, idaapi.NN_mov): if insn.itype not in (idaapi.NN_push, idaapi.NN_mov):
return return
@@ -290,40 +290,40 @@ def extract_insn_peb_access_characteristic_features(f, bb, insn):
disasm = idc.GetDisasm(insn.ea) disasm = idc.GetDisasm(insn.ea)
if ' fs:30h' in disasm or ' gs:60h' in disasm: if " fs:30h" in disasm or " gs:60h" in disasm:
# TODO: replace above with proper IDA # TODO: replace above with proper IDA
yield Characteristic('peb access', True), insn.ea yield Characteristic("peb access", True), insn.ea
def extract_insn_segment_access_features(f, bb, insn): def extract_insn_segment_access_features(f, bb, insn):
''' parse instruction fs or gs access """ parse instruction fs or gs access
TODO: TODO:
IDA should be able to do this... IDA should be able to do this...
''' """
if all(map(lambda op: op.type != idaapi.o_mem, insn.ops)): if all(map(lambda op: op.type != idaapi.o_mem, insn.ops)):
# try to optimize for only memory referencese # try to optimize for only memory referencese
return return
disasm = idc.GetDisasm(insn.ea) disasm = idc.GetDisasm(insn.ea)
if ' fs:' in disasm: if " fs:" in disasm:
# TODO: replace above with proper IDA # TODO: replace above with proper IDA
yield Characteristic('fs access', True), insn.ea yield Characteristic("fs access", True), insn.ea
if ' gs:' in disasm: if " gs:" in disasm:
# TODO: replace above with proper IDA # TODO: replace above with proper IDA
yield Characteristic('gs access', True), insn.ea yield Characteristic("gs access", True), insn.ea
def extract_insn_cross_section_cflow(f, bb, insn): def extract_insn_cross_section_cflow(f, bb, insn):
''' inspect the instruction for a CALL or JMP that crosses section boundaries """ inspect the instruction for a CALL or JMP that crosses section boundaries
args: args:
f (IDA func_t) f (IDA func_t)
bb (IDA BasicBlock) bb (IDA BasicBlock)
insn (IDA insn_t) insn (IDA insn_t)
''' """
for ref in idautils.CodeRefsFrom(insn.ea, False): for ref in idautils.CodeRefsFrom(insn.ea, False):
if ref in get_imports().keys(): if ref in get_imports().keys():
# ignore API calls # ignore API calls
@@ -336,11 +336,11 @@ def extract_insn_cross_section_cflow(f, bb, insn):
if idaapi.getseg(ref) == idaapi.getseg(insn.ea): if idaapi.getseg(ref) == idaapi.getseg(insn.ea):
continue continue
yield Characteristic('cross section flow', True), insn.ea yield Characteristic("cross section flow", True), insn.ea
def extract_function_calls_from(f, bb, insn): def extract_function_calls_from(f, bb, insn):
''' extract functions calls from features """ extract functions calls from features
most relevant at the function scope, however, its most efficient to extract at the instruction scope most relevant at the function scope, however, its most efficient to extract at the instruction scope
@@ -348,17 +348,17 @@ def extract_function_calls_from(f, bb, insn):
f (IDA func_t) f (IDA func_t)
bb (IDA BasicBlock) bb (IDA BasicBlock)
insn (IDA insn_t) insn (IDA insn_t)
''' """
if not idaapi.is_call_insn(insn): if not idaapi.is_call_insn(insn):
# ignore jmp, etc. # ignore jmp, etc.
return return
for ref in idautils.CodeRefsFrom(insn.ea, False): for ref in idautils.CodeRefsFrom(insn.ea, False):
yield Characteristic('calls from', True), ref yield Characteristic("calls from", True), ref
def extract_function_indirect_call_characteristic_features(f, bb, insn): def extract_function_indirect_call_characteristic_features(f, bb, insn):
''' extract indirect function calls (e.g., call eax or call dword ptr [edx+4]) """ extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
does not include calls like => call ds:dword_ABD4974 does not include calls like => call ds:dword_ABD4974
most relevant at the function or basic block scope; most relevant at the function or basic block scope;
@@ -368,22 +368,22 @@ def extract_function_indirect_call_characteristic_features(f, bb, insn):
f (IDA func_t) f (IDA func_t)
bb (IDA BasicBlock) bb (IDA BasicBlock)
insn (IDA insn_t) insn (IDA insn_t)
''' """
if not idaapi.is_call_insn(insn): if not idaapi.is_call_insn(insn):
return return
if idc.get_operand_type(insn.ea, 0) in (idc.o_reg, idc.o_phrase, idc.o_displ): if idc.get_operand_type(insn.ea, 0) in (idc.o_reg, idc.o_phrase, idc.o_displ):
yield Characteristic('indirect call', True), insn.ea yield Characteristic("indirect call", True), insn.ea
def extract_features(f, bb, insn): def extract_features(f, bb, insn):
''' extract instruction features """ extract instruction features
args: args:
f (IDA func_t) f (IDA func_t)
bb (IDA BasicBlock) bb (IDA BasicBlock)
insn (IDA insn_t) insn (IDA insn_t)
''' """
for inst_handler in INSTRUCTION_HANDLERS: for inst_handler in INSTRUCTION_HANDLERS:
for feature, va in inst_handler(f, bb, insn): for feature, va in inst_handler(f, bb, insn):
yield feature, va yield feature, va
@@ -401,7 +401,7 @@ INSTRUCTION_HANDLERS = (
extract_insn_cross_section_cflow, extract_insn_cross_section_cflow,
extract_insn_segment_access_features, extract_insn_segment_access_features,
extract_function_calls_from, extract_function_calls_from,
extract_function_indirect_call_characteristic_features extract_function_indirect_call_characteristic_features,
) )
@@ -416,5 +416,5 @@ def main():
pprint.pprint(features) pprint.pprint(features)
if __name__ == '__main__': if __name__ == "__main__":
main() main()

View File

@@ -3,7 +3,7 @@ from networkx import nx
def has_loop(edges, threshold=2): def has_loop(edges, threshold=2):
''' check if a list of edges representing a directed graph contains a loop """ check if a list of edges representing a directed graph contains a loop
args: args:
edges: list of edge sets representing a directed graph i.e. [(1, 2), (2, 1)] edges: list of edge sets representing a directed graph i.e. [(1, 2), (2, 1)]
@@ -11,7 +11,7 @@ def has_loop(edges, threshold=2):
returns: returns:
bool bool
''' """
g = nx.DiGraph() g = nx.DiGraph()
g.add_edges_from(edges) g.add_edges_from(edges)
return any(len(comp) >= threshold for comp in strongly_connected_components(g)) return any(len(comp) >= threshold for comp in strongly_connected_components(g))

View File

@@ -7,26 +7,28 @@ import re
from collections import namedtuple from collections import namedtuple
ASCII_BYTE = r' !\"#\$%&\'\(\)\*\+,-\./0123456789:;<=>\?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\}\\\~\t'.encode('ascii') ASCII_BYTE = r" !\"#\$%&\'\(\)\*\+,-\./0123456789:;<=>\?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\}\\\~\t".encode(
ASCII_RE_4 = re.compile(b'([%s]{%d,})' % (ASCII_BYTE, 4)) "ascii"
UNICODE_RE_4 = re.compile(b'((?:[%s]\x00){%d,})' % (ASCII_BYTE, 4)) )
REPEATS = [b'A', b'\x00', b'\xfe', b'\xff'] ASCII_RE_4 = re.compile(b"([%s]{%d,})" % (ASCII_BYTE, 4))
UNICODE_RE_4 = re.compile(b"((?:[%s]\x00){%d,})" % (ASCII_BYTE, 4))
REPEATS = [b"A", b"\x00", b"\xfe", b"\xff"]
SLICE_SIZE = 4096 SLICE_SIZE = 4096
String = namedtuple('String', ['s', 'offset']) String = namedtuple("String", ["s", "offset"])
def buf_filled_with(buf, character): def buf_filled_with(buf, character):
dupe_chunk = character * SLICE_SIZE dupe_chunk = character * SLICE_SIZE
for offset in range(0, len(buf), SLICE_SIZE): for offset in range(0, len(buf), SLICE_SIZE):
new_chunk = buf[offset: offset + SLICE_SIZE] new_chunk = buf[offset : offset + SLICE_SIZE]
if dupe_chunk[:len(new_chunk)] != new_chunk: if dupe_chunk[: len(new_chunk)] != new_chunk:
return False return False
return True return True
def extract_ascii_strings(buf, n=4): def extract_ascii_strings(buf, n=4):
''' """
Extract ASCII strings from the given binary data. Extract ASCII strings from the given binary data.
:param buf: A bytestring. :param buf: A bytestring.
@@ -34,7 +36,7 @@ def extract_ascii_strings(buf, n=4):
:param n: The minimum length of strings to extract. :param n: The minimum length of strings to extract.
:type n: int :type n: int
:rtype: Sequence[String] :rtype: Sequence[String]
''' """
if not buf: if not buf:
return return
@@ -46,14 +48,14 @@ def extract_ascii_strings(buf, n=4):
if n == 4: if n == 4:
r = ASCII_RE_4 r = ASCII_RE_4
else: else:
reg = b'([%s]{%d,})' % (ASCII_BYTE, n) reg = b"([%s]{%d,})" % (ASCII_BYTE, n)
r = re.compile(reg) r = re.compile(reg)
for match in r.finditer(buf): for match in r.finditer(buf):
yield String(match.group().decode('ascii'), match.start()) yield String(match.group().decode("ascii"), match.start())
def extract_unicode_strings(buf, n=4): def extract_unicode_strings(buf, n=4):
''' """
Extract naive UTF-16 strings from the given binary data. Extract naive UTF-16 strings from the given binary data.
:param buf: A bytestring. :param buf: A bytestring.
@@ -61,7 +63,7 @@ def extract_unicode_strings(buf, n=4):
:param n: The minimum length of strings to extract. :param n: The minimum length of strings to extract.
:type n: int :type n: int
:rtype: Sequence[String] :rtype: Sequence[String]
''' """
if not buf: if not buf:
return return
@@ -72,11 +74,11 @@ def extract_unicode_strings(buf, n=4):
if n == 4: if n == 4:
r = UNICODE_RE_4 r = UNICODE_RE_4
else: else:
reg = b'((?:[%s]\x00){%d,})' % (ASCII_BYTE, n) reg = b"((?:[%s]\x00){%d,})" % (ASCII_BYTE, n)
r = re.compile(reg) r = re.compile(reg)
for match in r.finditer(buf): for match in r.finditer(buf):
try: try:
yield String(match.group().decode('utf-16'), match.start()) yield String(match.group().decode("utf-16"), match.start())
except UnicodeDecodeError: except UnicodeDecodeError:
pass pass
@@ -84,15 +86,15 @@ def extract_unicode_strings(buf, n=4):
def main(): def main():
import sys import sys
with open(sys.argv[1], 'rb') as f: with open(sys.argv[1], "rb") as f:
b = f.read() b = f.read()
for s in extract_ascii_strings(b): for s in extract_ascii_strings(b):
print('0x{:x}: {:s}'.format(s.offset, s.s)) print("0x{:x}: {:s}".format(s.offset, s.s))
for s in extract_unicode_strings(b): for s in extract_unicode_strings(b):
print('0x{:x}: {:s}'.format(s.offset, s.s)) print("0x{:x}: {:s}".format(s.offset, s.s))
if __name__ == '__main__': if __name__ == "__main__":
main() main()

View File

@@ -13,7 +13,8 @@ import file
import function import function
import basicblock import basicblock
import insn import insn
__all__ = ['file', 'function', 'basicblock', 'insn']
__all__ = ["file", "function", "basicblock", "insn"]
def get_va(self): def get_va(self):
@@ -27,14 +28,14 @@ def get_va(self):
def add_va_int_cast(o): def add_va_int_cast(o):
''' """
dynamically add a cast-to-int (`__int__`) method to the given object dynamically add a cast-to-int (`__int__`) method to the given object
that returns the value of the `.va` property. that returns the value of the `.va` property.
this bit of skullduggery lets use cast viv-utils objects as ints. this bit of skullduggery lets use cast viv-utils objects as ints.
the correct way of doing this is to update viv-utils (or subclass the objects here). the correct way of doing this is to update viv-utils (or subclass the objects here).
''' """
setattr(o, '__int__', types.MethodType(get_va, o, type(o))) setattr(o, "__int__", types.MethodType(get_va, o, type(o)))
return o return o

View File

@@ -10,7 +10,7 @@ from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
def interface_extract_basic_block_XXX(f, bb): def interface_extract_basic_block_XXX(f, bb):
''' """
parse features from the given basic block. parse features from the given basic block.
args: args:
@@ -19,14 +19,14 @@ def interface_extract_basic_block_XXX(f, bb):
yields: yields:
(Feature, int): the feature and the address at which its found. (Feature, int): the feature and the address at which its found.
''' """
yield NotImplementedError('feature'), NotImplementedError('virtual address') yield NotImplementedError("feature"), NotImplementedError("virtual address")
def _bb_has_tight_loop(f, bb): def _bb_has_tight_loop(f, bb):
''' """
parse tight loops, true if last instruction in basic block branches to bb start parse tight loops, true if last instruction in basic block branches to bb start
''' """
if len(bb.instructions) > 0: if len(bb.instructions) > 0:
for bva, bflags in bb.instructions[-1].getBranches(): for bva, bflags in bb.instructions[-1].getBranches():
if bflags & vivisect.envi.BR_COND: if bflags & vivisect.envi.BR_COND:
@@ -37,16 +37,16 @@ def _bb_has_tight_loop(f, bb):
def extract_bb_tight_loop(f, bb): def extract_bb_tight_loop(f, bb):
''' check basic block for tight loop indicators ''' """ check basic block for tight loop indicators """
if _bb_has_tight_loop(f, bb): if _bb_has_tight_loop(f, bb):
yield Characteristic('tight loop', True), bb.va yield Characteristic("tight loop", True), bb.va
def _bb_has_stackstring(f, bb): def _bb_has_stackstring(f, bb):
''' """
extract potential stackstring creation, using the following heuristics: extract potential stackstring creation, using the following heuristics:
- basic block contains enough moves of constant bytes to the stack - basic block contains enough moves of constant bytes to the stack
''' """
count = 0 count = 0
for instr in bb.instructions: for instr in bb.instructions:
if is_mov_imm_to_stack(instr): if is_mov_imm_to_stack(instr):
@@ -60,16 +60,16 @@ def _bb_has_stackstring(f, bb):
def extract_stackstring(f, bb): def extract_stackstring(f, bb):
''' check basic block for stackstring indicators ''' """ check basic block for stackstring indicators """
if _bb_has_stackstring(f, bb): if _bb_has_stackstring(f, bb):
yield Characteristic('stack string', True), bb.va yield Characteristic("stack string", True), bb.va
def is_mov_imm_to_stack(instr): def is_mov_imm_to_stack(instr):
''' """
Return if instruction moves immediate onto stack Return if instruction moves immediate onto stack
''' """
if not instr.mnem.startswith('mov'): if not instr.mnem.startswith("mov"):
return False return False
try: try:
@@ -82,32 +82,33 @@ def is_mov_imm_to_stack(instr):
return False return False
# TODO what about 64-bit operands? # TODO what about 64-bit operands?
if not isinstance(dst, envi.archs.i386.disasm.i386SibOper) and \ if not isinstance(dst, envi.archs.i386.disasm.i386SibOper) and not isinstance(
not isinstance(dst, envi.archs.i386.disasm.i386RegMemOper): dst, envi.archs.i386.disasm.i386RegMemOper
):
return False return False
if not dst.reg: if not dst.reg:
return False return False
rname = dst._dis_regctx.getRegisterName(dst.reg) rname = dst._dis_regctx.getRegisterName(dst.reg)
if rname not in ['ebp', 'rbp', 'esp', 'rsp']: if rname not in ["ebp", "rbp", "esp", "rsp"]:
return False return False
return True return True
def get_printable_len(oper): def get_printable_len(oper):
''' """
Return string length if all operand bytes are ascii or utf16-le printable Return string length if all operand bytes are ascii or utf16-le printable
''' """
if oper.tsize == 1: if oper.tsize == 1:
chars = struct.pack('<B', oper.imm) chars = struct.pack("<B", oper.imm)
elif oper.tsize == 2: elif oper.tsize == 2:
chars = struct.pack('<H', oper.imm) chars = struct.pack("<H", oper.imm)
elif oper.tsize == 4: elif oper.tsize == 4:
chars = struct.pack('<I', oper.imm) chars = struct.pack("<I", oper.imm)
elif oper.tsize == 8: elif oper.tsize == 8:
chars = struct.pack('<Q', oper.imm) chars = struct.pack("<Q", oper.imm)
if is_printable_ascii(chars): if is_printable_ascii(chars):
return oper.tsize return oper.tsize
if is_printable_utf16le(chars): if is_printable_utf16le(chars):
@@ -120,12 +121,12 @@ def is_printable_ascii(chars):
def is_printable_utf16le(chars): def is_printable_utf16le(chars):
if all(c == '\x00' for c in chars[1::2]): if all(c == "\x00" for c in chars[1::2]):
return is_printable_ascii(chars[::2]) return is_printable_ascii(chars[::2])
def extract_features(f, bb): def extract_features(f, bb):
''' """
extract features from the given basic block. extract features from the given basic block.
args: args:
@@ -134,7 +135,7 @@ def extract_features(f, bb):
yields: yields:
Feature, set[VA]: the features and their location found in this basic block. Feature, set[VA]: the features and their location found in this basic block.
''' """
yield BasicBlock(), bb.va yield BasicBlock(), bb.va
for bb_handler in BASIC_BLOCK_HANDLERS: for bb_handler in BASIC_BLOCK_HANDLERS:
for feature, va in bb_handler(f, bb): for feature, va in bb_handler(f, bb):

View File

@@ -9,11 +9,11 @@ import capa.features.extractors.strings
def extract_file_embedded_pe(vw, file_path): def extract_file_embedded_pe(vw, file_path):
with open(file_path, 'rb') as f: with open(file_path, "rb") as f:
fbytes = f.read() fbytes = f.read()
for offset, i in pe_carve.carve(fbytes, 1): for offset, i in pe_carve.carve(fbytes, 1):
yield Characteristic('embedded pe', True), offset yield Characteristic("embedded pe", True), offset
def extract_file_export_names(vw, file_path): def extract_file_export_names(vw, file_path):
@@ -22,21 +22,21 @@ def extract_file_export_names(vw, file_path):
def extract_file_import_names(vw, file_path): def extract_file_import_names(vw, file_path):
''' """
extract imported function names extract imported function names
1. imports by ordinal: 1. imports by ordinal:
- modulename.#ordinal - modulename.#ordinal
2. imports by name, results in two features to support importname-only matching: 2. imports by name, results in two features to support importname-only matching:
- modulename.importname - modulename.importname
- importname - importname
''' """
for va, _, _, tinfo in vw.getImports(): for va, _, _, tinfo in vw.getImports():
# vivisect source: tinfo = "%s.%s" % (libname, impname) # vivisect source: tinfo = "%s.%s" % (libname, impname)
modname, impname = tinfo.split('.') modname, impname = tinfo.split(".")
if is_viv_ord_impname(impname): if is_viv_ord_impname(impname):
# replace ord prefix with # # replace ord prefix with #
impname = '#%s' % impname[len('ord'):] impname = "#%s" % impname[len("ord") :]
tinfo = '%s.%s' % (modname, impname) tinfo = "%s.%s" % (modname, impname)
yield Import(tinfo), va yield Import(tinfo), va
else: else:
yield Import(tinfo), va yield Import(tinfo), va
@@ -44,13 +44,13 @@ def extract_file_import_names(vw, file_path):
def is_viv_ord_impname(impname): def is_viv_ord_impname(impname):
''' """
return if import name matches vivisect's ordinal naming scheme `'ord%d' % ord` return if import name matches vivisect's ordinal naming scheme `'ord%d' % ord`
''' """
if not impname.startswith('ord'): if not impname.startswith("ord"):
return False return False
try: try:
int(impname[len('ord'):]) int(impname[len("ord") :])
except ValueError: except ValueError:
return False return False
else: else:
@@ -63,10 +63,10 @@ def extract_file_section_names(vw, file_path):
def extract_file_strings(vw, file_path): def extract_file_strings(vw, file_path):
''' """
extract ASCII and UTF-16 LE strings from file extract ASCII and UTF-16 LE strings from file
''' """
with open(file_path, 'rb') as f: with open(file_path, "rb") as f:
b = f.read() b = f.read()
for s in capa.features.extractors.strings.extract_ascii_strings(b): for s in capa.features.extractors.strings.extract_ascii_strings(b):
@@ -77,7 +77,7 @@ def extract_file_strings(vw, file_path):
def extract_features(vw, file_path): def extract_features(vw, file_path):
''' """
extract file features from given workspace extract file features from given workspace
args: args:
@@ -86,7 +86,7 @@ def extract_features(vw, file_path):
yields: yields:
Tuple[Feature, VA]: a feature and its location. Tuple[Feature, VA]: a feature and its location.
''' """
for file_handler in FILE_HANDLERS: for file_handler in FILE_HANDLERS:
for feature, va in file_handler(vw, file_path): for feature, va in file_handler(vw, file_path):

View File

@@ -5,7 +5,7 @@ from capa.features.extractors import loops
def interface_extract_function_XXX(f): def interface_extract_function_XXX(f):
''' """
parse features from the given function. parse features from the given function.
args: args:
@@ -13,58 +13,58 @@ def interface_extract_function_XXX(f):
yields: yields:
(Feature, int): the feature and the address at which its found. (Feature, int): the feature and the address at which its found.
''' """
yield NotImplementedError('feature'), NotImplementedError('virtual address') yield NotImplementedError("feature"), NotImplementedError("virtual address")
def get_switches(vw): def get_switches(vw):
''' """
caching accessor to vivisect workspace switch constructs. caching accessor to vivisect workspace switch constructs.
''' """
if 'switches' in vw.metadata: if "switches" in vw.metadata:
return vw.metadata['switches'] return vw.metadata["switches"]
else: else:
# addresses of switches in the program # addresses of switches in the program
switches = set() switches = set()
for case_va, _ in filter(lambda t: 'case' in t[1], vw.getNames()): for case_va, _ in filter(lambda t: "case" in t[1], vw.getNames()):
# assume that the xref to a case location is a switch construct # assume that the xref to a case location is a switch construct
for switch_va, _, _, _ in vw.getXrefsTo(case_va): for switch_va, _, _, _ in vw.getXrefsTo(case_va):
switches.add(switch_va) switches.add(switch_va)
vw.metadata['switches'] = switches vw.metadata["switches"] = switches
return switches return switches
def get_functions_with_switch(vw): def get_functions_with_switch(vw):
if 'functions_with_switch' in vw.metadata: if "functions_with_switch" in vw.metadata:
return vw.metadata['functions_with_switch'] return vw.metadata["functions_with_switch"]
else: else:
functions = set() functions = set()
for switch in get_switches(vw): for switch in get_switches(vw):
functions.add(vw.getFunction(switch)) functions.add(vw.getFunction(switch))
vw.metadata['functions_with_switch'] = functions vw.metadata["functions_with_switch"] = functions
return functions return functions
def extract_function_switch(f): def extract_function_switch(f):
''' """
parse if a function contains a switch statement based on location names parse if a function contains a switch statement based on location names
method can be optimized method can be optimized
''' """
if f.va in get_functions_with_switch(f.vw): if f.va in get_functions_with_switch(f.vw):
yield Characteristic('switch', True), f.va yield Characteristic("switch", True), f.va
def extract_function_calls_to(f): def extract_function_calls_to(f):
for src, _, _, _ in f.vw.getXrefsTo(f.va, rtype=vivisect.const.REF_CODE): for src, _, _, _ in f.vw.getXrefsTo(f.va, rtype=vivisect.const.REF_CODE):
yield Characteristic('calls to', True), src yield Characteristic("calls to", True), src
def extract_function_loop(f): def extract_function_loop(f):
''' """
parse if a function has a loop parse if a function has a loop
''' """
edges = [] edges = []
for bb in f.basic_blocks: for bb in f.basic_blocks:
@@ -74,11 +74,11 @@ def extract_function_loop(f):
edges.append((bb.va, bva)) edges.append((bb.va, bva))
if edges and loops.has_loop(edges): if edges and loops.has_loop(edges):
yield Characteristic('loop', True), f.va yield Characteristic("loop", True), f.va
def extract_features(f): def extract_features(f):
''' """
extract features from the given function. extract features from the given function.
args: args:
@@ -86,14 +86,10 @@ def extract_features(f):
yields: yields:
Feature, set[VA]: the features and their location found in this function. Feature, set[VA]: the features and their location found in this function.
''' """
for func_handler in FUNCTION_HANDLERS: for func_handler in FUNCTION_HANDLERS:
for feature, va in func_handler(f): for feature, va in func_handler(f):
yield feature, va yield feature, va
FUNCTION_HANDLERS = ( FUNCTION_HANDLERS = (extract_function_switch, extract_function_calls_to, extract_function_loop)
extract_function_switch,
extract_function_calls_to,
extract_function_loop
)

View File

@@ -14,13 +14,13 @@ Amd64RipRelOper = envi.archs.amd64.disasm.Amd64RipRelOper
LOC_OP = vivisect.const.LOC_OP LOC_OP = vivisect.const.LOC_OP
IF_NOFALL = envi.IF_NOFALL IF_NOFALL = envi.IF_NOFALL
REF_CODE = vivisect.const.REF_CODE REF_CODE = vivisect.const.REF_CODE
FAR_BRANCH_MASK = (envi.BR_PROC | envi.BR_DEREF | envi.BR_ARCH) FAR_BRANCH_MASK = envi.BR_PROC | envi.BR_DEREF | envi.BR_ARCH
DESTRUCTIVE_MNEMONICS = ('mov', 'lea', 'pop', 'xor') DESTRUCTIVE_MNEMONICS = ("mov", "lea", "pop", "xor")
def get_previous_instructions(vw, va): def get_previous_instructions(vw, va):
''' """
collect the instructions that flow to the given address, local to the current function. collect the instructions that flow to the given address, local to the current function.
args: args:
@@ -29,7 +29,7 @@ def get_previous_instructions(vw, va):
returns: returns:
List[int]: the prior instructions, which may fallthrough and/or jump here List[int]: the prior instructions, which may fallthrough and/or jump here
''' """
ret = [] ret = []
# find the immediate prior instruction. # find the immediate prior instruction.
@@ -61,7 +61,7 @@ class NotFoundError(Exception):
def find_definition(vw, va, reg): def find_definition(vw, va, reg):
''' """
scan backwards from the given address looking for assignments to the given register. scan backwards from the given address looking for assignments to the given register.
if a constant, return that value. if a constant, return that value.
@@ -75,7 +75,7 @@ def find_definition(vw, va, reg):
raises: raises:
NotFoundError: when the definition cannot be found. NotFoundError: when the definition cannot be found.
''' """
q = collections.deque() q = collections.deque()
seen = set([]) seen = set([])
@@ -95,10 +95,7 @@ def find_definition(vw, va, reg):
continue continue
opnd0 = insn.opers[0] opnd0 = insn.opers[0]
if not \ if not (isinstance(opnd0, i386RegOper) and opnd0.reg == reg and insn.mnem in DESTRUCTIVE_MNEMONICS):
(isinstance(opnd0, i386RegOper)
and opnd0.reg == reg
and insn.mnem in DESTRUCTIVE_MNEMONICS):
q.extend(get_previous_instructions(vw, cur)) q.extend(get_previous_instructions(vw, cur))
continue continue
@@ -107,7 +104,7 @@ def find_definition(vw, va, reg):
# we currently only support extracting the constant from something like: `mov $reg, IAT` # we currently only support extracting the constant from something like: `mov $reg, IAT`
# so, any other pattern results in an unknown value, represented by None. # so, any other pattern results in an unknown value, represented by None.
# this is a good place to extend in the future, if we need more robust support. # this is a good place to extend in the future, if we need more robust support.
if insn.mnem != 'mov': if insn.mnem != "mov":
return (cur, None) return (cur, None)
else: else:
opnd1 = insn.opers[1] opnd1 = insn.opers[1]
@@ -128,12 +125,11 @@ def is_indirect_call(vw, va, insn=None):
if insn is None: if insn is None:
insn = vw.parseOpcode(va) insn = vw.parseOpcode(va)
return (insn.mnem == 'call' return insn.mnem == "call" and isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper)
and isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper))
def resolve_indirect_call(vw, va, insn=None): def resolve_indirect_call(vw, va, insn=None):
''' """
inspect the given indirect call instruction and attempt to resolve the target address. inspect the given indirect call instruction and attempt to resolve the target address.
args: args:
@@ -145,7 +141,7 @@ def resolve_indirect_call(vw, va, insn=None):
raises: raises:
NotFoundError: when the definition cannot be found. NotFoundError: when the definition cannot be found.
''' """
if insn is None: if insn is None:
insn = vw.parseOpcode(va) insn = vw.parseOpcode(va)

View File

@@ -15,7 +15,7 @@ from capa.features.extractors.viv.indirect_calls import resolve_indirect_call
def interface_extract_instruction_XXX(f, bb, insn): def interface_extract_instruction_XXX(f, bb, insn):
''' """
parse features from the given instruction. parse features from the given instruction.
args: args:
@@ -25,31 +25,31 @@ def interface_extract_instruction_XXX(f, bb, insn):
yields: yields:
(Feature, int): the feature and the address at which its found. (Feature, int): the feature and the address at which its found.
''' """
yield NotImplementedError('feature'), NotImplementedError('virtual address') yield NotImplementedError("feature"), NotImplementedError("virtual address")
def get_imports(vw): def get_imports(vw):
''' """
caching accessor to vivisect workspace imports caching accessor to vivisect workspace imports
avoids performance issues in vivisect when collecting locations avoids performance issues in vivisect when collecting locations
''' """
if 'imports' in vw.metadata: if "imports" in vw.metadata:
return vw.metadata['imports'] return vw.metadata["imports"]
else: else:
imports = {p[0]: p[3] for p in vw.getImports()} imports = {p[0]: p[3] for p in vw.getImports()}
vw.metadata['imports'] = imports vw.metadata["imports"] = imports
return imports return imports
def extract_insn_api_features(f, bb, insn): def extract_insn_api_features(f, bb, insn):
'''parse API features from the given instruction.''' """parse API features from the given instruction."""
# example: # example:
# #
# call dword [0x00473038] # call dword [0x00473038]
if insn.mnem != 'call': if insn.mnem != "call":
return return
# traditional call via IAT # traditional call via IAT
@@ -71,7 +71,7 @@ def extract_insn_api_features(f, bb, insn):
target = insn.opers[0].getOperValue(insn) target = insn.opers[0].getOperValue(insn)
try: try:
thunk = f.vw.getFunctionMeta(target, 'Thunk') thunk = f.vw.getFunctionMeta(target, "Thunk")
except vivisect.exc.InvalidFunction: except vivisect.exc.InvalidFunction:
return return
else: else:
@@ -108,7 +108,7 @@ def extract_insn_api_features(f, bb, insn):
def extract_insn_number_features(f, bb, insn): def extract_insn_number_features(f, bb, insn):
'''parse number features from the given instruction.''' """parse number features from the given instruction."""
# example: # example:
# #
# push 3136B0h ; dwControlCode # push 3136B0h ; dwControlCode
@@ -124,9 +124,7 @@ def extract_insn_number_features(f, bb, insn):
# assume its not also a constant. # assume its not also a constant.
continue continue
if insn.mnem == 'add' \ if insn.mnem == "add" and insn.opers[0].isReg() and insn.opers[0].reg == envi.archs.i386.disasm.REG_ESP:
and insn.opers[0].isReg() \
and insn.opers[0].reg == envi.archs.i386.disasm.REG_ESP:
# skip things like: # skip things like:
# #
# .text:00401140 call sub_407E2B # .text:00401140 call sub_407E2B
@@ -137,13 +135,13 @@ def extract_insn_number_features(f, bb, insn):
def extract_insn_bytes_features(f, bb, insn): def extract_insn_bytes_features(f, bb, insn):
''' """
parse byte sequence features from the given instruction. parse byte sequence features from the given instruction.
example: example:
# push offset iid_004118d4_IShellLinkA ; riid # push offset iid_004118d4_IShellLinkA ; riid
''' """
for oper in insn.opers: for oper in insn.opers:
if insn.mnem == 'call': if insn.mnem == "call":
# ignore call instructions # ignore call instructions
continue continue
@@ -184,7 +182,7 @@ def read_string(vw, offset):
pass pass
else: else:
if alen > 0: if alen > 0:
return vw.readMemory(offset, alen).decode('utf-8') return vw.readMemory(offset, alen).decode("utf-8")
try: try:
ulen = vw.detectUnicode(offset) ulen = vw.detectUnicode(offset)
@@ -199,13 +197,13 @@ def read_string(vw, offset):
# vivisect seems to mis-detect the end unicode strings # vivisect seems to mis-detect the end unicode strings
# off by one, too short # off by one, too short
ulen += 1 ulen += 1
return vw.readMemory(offset, ulen).decode('utf-16') return vw.readMemory(offset, ulen).decode("utf-16")
raise ValueError('not a string', offset) raise ValueError("not a string", offset)
def extract_insn_string_features(f, bb, insn): def extract_insn_string_features(f, bb, insn):
'''parse string features from the given instruction.''' """parse string features from the given instruction."""
# example: # example:
# #
# push offset aAcr ; "ACR > " # push offset aAcr ; "ACR > "
@@ -222,11 +220,11 @@ def extract_insn_string_features(f, bb, insn):
except ValueError: except ValueError:
continue continue
else: else:
yield String(s.rstrip('\x00')), insn.va yield String(s.rstrip("\x00")), insn.va
def extract_insn_offset_features(f, bb, insn): def extract_insn_offset_features(f, bb, insn):
'''parse structure offset features from the given instruction.''' """parse structure offset features from the given instruction."""
# example: # example:
# #
# .text:0040112F cmp [esi+4], ebx # .text:0040112F cmp [esi+4], ebx
@@ -249,15 +247,18 @@ def extract_insn_offset_features(f, bb, insn):
def is_security_cookie(f, bb, insn): def is_security_cookie(f, bb, insn):
''' """
check if an instruction is related to security cookie checks check if an instruction is related to security cookie checks
''' """
# security cookie check should use SP or BP # security cookie check should use SP or BP
oper = insn.opers[1] oper = insn.opers[1]
if oper.isReg() \ if oper.isReg() and oper.reg not in [
and oper.reg not in [envi.archs.i386.disasm.REG_ESP, envi.archs.i386.disasm.REG_EBP, envi.archs.i386.disasm.REG_ESP,
# TODO: do x64 support for real. envi.archs.i386.disasm.REG_EBP,
envi.archs.amd64.disasm.REG_RBP, envi.archs.amd64.disasm.REG_RSP]: # TODO: do x64 support for real.
envi.archs.amd64.disasm.REG_RBP,
envi.archs.amd64.disasm.REG_RSP,
]:
return False return False
# expect security cookie init in first basic block within first bytes (instructions) # expect security cookie init in first basic block within first bytes (instructions)
@@ -273,11 +274,11 @@ def is_security_cookie(f, bb, insn):
def extract_insn_nzxor_characteristic_features(f, bb, insn): def extract_insn_nzxor_characteristic_features(f, bb, insn):
''' """
parse non-zeroing XOR instruction from the given instruction. parse non-zeroing XOR instruction from the given instruction.
ignore expected non-zeroing XORs, e.g. security cookies. ignore expected non-zeroing XORs, e.g. security cookies.
''' """
if insn.mnem != 'xor': if insn.mnem != "xor":
return return
if insn.opers[0] == insn.opers[1]: if insn.opers[0] == insn.opers[1]:
@@ -286,24 +287,24 @@ def extract_insn_nzxor_characteristic_features(f, bb, insn):
if is_security_cookie(f, bb, insn): if is_security_cookie(f, bb, insn):
return return
yield Characteristic('nzxor', True), insn.va yield Characteristic("nzxor", True), insn.va
def extract_insn_mnemonic_features(f, bb, insn): def extract_insn_mnemonic_features(f, bb, insn):
'''parse mnemonic features from the given instruction.''' """parse mnemonic features from the given instruction."""
yield Mnemonic(insn.mnem), insn.va yield Mnemonic(insn.mnem), insn.va
def extract_insn_peb_access_characteristic_features(f, bb, insn): def extract_insn_peb_access_characteristic_features(f, bb, insn):
''' """
parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64 parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64
''' """
# TODO handle where fs/gs are loaded into a register or onto the stack and used later # TODO handle where fs/gs are loaded into a register or onto the stack and used later
if insn.mnem not in ['push', 'mov']: if insn.mnem not in ["push", "mov"]:
return return
if 'fs' in insn.getPrefixName(): if "fs" in insn.getPrefixName():
for oper in insn.opers: for oper in insn.opers:
# examples # examples
# #
@@ -312,27 +313,29 @@ def extract_insn_peb_access_characteristic_features(f, bb, insn):
# IDA: push large dword ptr fs:30h # IDA: push large dword ptr fs:30h
# viv: fs: push dword [0x00000030] # viv: fs: push dword [0x00000030]
# fs: push dword [eax + 0x30] ; i386RegMemOper, with eax = 0 # fs: push dword [eax + 0x30] ; i386RegMemOper, with eax = 0
if (isinstance(oper, envi.archs.i386.disasm.i386RegMemOper) and oper.disp == 0x30) or \ if (isinstance(oper, envi.archs.i386.disasm.i386RegMemOper) and oper.disp == 0x30) or (
(isinstance(oper, envi.archs.i386.disasm.i386ImmMemOper) and oper.imm == 0x30): isinstance(oper, envi.archs.i386.disasm.i386ImmMemOper) and oper.imm == 0x30
yield Characteristic('peb access', True), insn.va ):
elif 'gs' in insn.getPrefixName(): yield Characteristic("peb access", True), insn.va
elif "gs" in insn.getPrefixName():
for oper in insn.opers: for oper in insn.opers:
if (isinstance(oper, envi.archs.amd64.disasm.i386RegMemOper) and oper.disp == 0x60) or \ if (isinstance(oper, envi.archs.amd64.disasm.i386RegMemOper) and oper.disp == 0x60) or (
(isinstance(oper, envi.archs.amd64.disasm.i386ImmMemOper) and oper.imm == 0x60): isinstance(oper, envi.archs.amd64.disasm.i386ImmMemOper) and oper.imm == 0x60
yield Characteristic('peb access', True), insn.va ):
yield Characteristic("peb access", True), insn.va
else: else:
pass pass
def extract_insn_segment_access_features(f, bb, insn): def extract_insn_segment_access_features(f, bb, insn):
''' parse the instruction for access to fs or gs ''' """ parse the instruction for access to fs or gs """
prefix = insn.getPrefixName() prefix = insn.getPrefixName()
if prefix == 'fs': if prefix == "fs":
yield Characteristic('fs access', True), insn.va yield Characteristic("fs access", True), insn.va
if prefix == 'gs': if prefix == "gs":
yield Characteristic('gs access', True), insn.va yield Characteristic("gs access", True), insn.va
def get_section(vw, va): def get_section(vw, va):
@@ -344,16 +347,16 @@ def get_section(vw, va):
def extract_insn_cross_section_cflow(f, bb, insn): def extract_insn_cross_section_cflow(f, bb, insn):
''' """
inspect the instruction for a CALL or JMP that crosses section boundaries. inspect the instruction for a CALL or JMP that crosses section boundaries.
''' """
for va, flags in insn.getBranches(): for va, flags in insn.getBranches():
if flags & envi.BR_FALL: if flags & envi.BR_FALL:
continue continue
try: try:
# skip 32-bit calls to imports # skip 32-bit calls to imports
if insn.mnem == 'call' and isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper): if insn.mnem == "call" and isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper):
oper = insn.opers[0] oper = insn.opers[0]
target = oper.getOperAddr(insn) target = oper.getOperAddr(insn)
@@ -361,7 +364,7 @@ def extract_insn_cross_section_cflow(f, bb, insn):
continue continue
# skip 64-bit calls to imports # skip 64-bit calls to imports
elif insn.mnem == 'call' and isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper): elif insn.mnem == "call" and isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper):
op = insn.opers[0] op = insn.opers[0]
target = op.getOperAddr(insn) target = op.getOperAddr(insn)
@@ -369,7 +372,7 @@ def extract_insn_cross_section_cflow(f, bb, insn):
continue continue
if get_section(f.vw, insn.va) != get_section(f.vw, va): if get_section(f.vw, insn.va) != get_section(f.vw, va):
yield Characteristic('cross section flow', True), insn.va yield Characteristic("cross section flow", True), insn.va
except KeyError: except KeyError:
continue continue
@@ -378,7 +381,7 @@ def extract_insn_cross_section_cflow(f, bb, insn):
# this is a feature that's most relevant at the function scope, # this is a feature that's most relevant at the function scope,
# however, its most efficient to extract at the instruction scope. # however, its most efficient to extract at the instruction scope.
def extract_function_calls_from(f, bb, insn): def extract_function_calls_from(f, bb, insn):
if insn.mnem != 'call': if insn.mnem != "call":
return return
target = None target = None
@@ -387,7 +390,7 @@ def extract_function_calls_from(f, bb, insn):
if isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper): if isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper):
oper = insn.opers[0] oper = insn.opers[0]
target = oper.getOperAddr(insn) target = oper.getOperAddr(insn)
yield Characteristic('calls from', True), target yield Characteristic("calls from", True), target
# call via thunk on x86, # call via thunk on x86,
# see 9324d1a8ae37a36ae560c37448c9705a at 0x407985 # see 9324d1a8ae37a36ae560c37448c9705a at 0x407985
@@ -396,44 +399,44 @@ def extract_function_calls_from(f, bb, insn):
# see Lab21-01.exe_:0x140001178 # see Lab21-01.exe_:0x140001178
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper): elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper):
target = insn.opers[0].getOperValue(insn) target = insn.opers[0].getOperValue(insn)
yield Characteristic('calls from', True), target yield Characteristic("calls from", True), target
# call via IAT, x64 # call via IAT, x64
elif isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper): elif isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper):
op = insn.opers[0] op = insn.opers[0]
target = op.getOperAddr(insn) target = op.getOperAddr(insn)
yield Characteristic('calls from', True), target yield Characteristic("calls from", True), target
if target and target == f.va: if target and target == f.va:
# if we found a jump target and it's the function address # if we found a jump target and it's the function address
# mark as recursive # mark as recursive
yield Characteristic('recursive call', True), target yield Characteristic("recursive call", True), target
# this is a feature that's most relevant at the function or basic block scope, # this is a feature that's most relevant at the function or basic block scope,
# however, its most efficient to extract at the instruction scope. # however, its most efficient to extract at the instruction scope.
def extract_function_indirect_call_characteristic_features(f, bb, insn): def extract_function_indirect_call_characteristic_features(f, bb, insn):
''' """
extract indirect function call characteristic (e.g., call eax or call dword ptr [edx+4]) extract indirect function call characteristic (e.g., call eax or call dword ptr [edx+4])
does not include calls like => call ds:dword_ABD4974 does not include calls like => call ds:dword_ABD4974
''' """
if insn.mnem != 'call': if insn.mnem != "call":
return return
# Checks below work for x86 and x64 # Checks below work for x86 and x64
if isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper): if isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper):
# call edx # call edx
yield Characteristic('indirect call', True), insn.va yield Characteristic("indirect call", True), insn.va
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegMemOper): elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegMemOper):
# call dword ptr [eax+50h] # call dword ptr [eax+50h]
yield Characteristic('indirect call', True), insn.va yield Characteristic("indirect call", True), insn.va
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386SibOper): elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386SibOper):
# call qword ptr [rsp+78h] # call qword ptr [rsp+78h]
yield Characteristic('indirect call', True), insn.va yield Characteristic("indirect call", True), insn.va
def extract_features(f, bb, insn): def extract_features(f, bb, insn):
''' """
extract features from the given insn. extract features from the given insn.
args: args:
@@ -443,7 +446,7 @@ def extract_features(f, bb, insn):
yields: yields:
Feature, set[VA]: the features and their location found in this insn. Feature, set[VA]: the features and their location found in this insn.
''' """
for insn_handler in INSTRUCTION_HANDLERS: for insn_handler in INSTRUCTION_HANDLERS:
for feature, va in insn_handler(f, bb, insn): for feature, va in insn_handler(f, bb, insn):
yield feature, va yield feature, va
@@ -461,5 +464,5 @@ INSTRUCTION_HANDLERS = (
extract_insn_cross_section_cflow, extract_insn_cross_section_cflow,
extract_insn_segment_access_features, extract_insn_segment_access_features,
extract_function_calls_from, extract_function_calls_from,
extract_function_indirect_call_characteristic_features extract_function_indirect_call_characteristic_features,
) )

View File

@@ -8,7 +8,7 @@ class Export(Feature):
self.value = value self.value = value
def __str__(self): def __str__(self):
return 'Export(%s)' % (self.value) return "Export(%s)" % (self.value)
class Import(Feature): class Import(Feature):
@@ -18,7 +18,7 @@ class Import(Feature):
self.value = value self.value = value
def __str__(self): def __str__(self):
return 'Import(%s)' % (self.value) return "Import(%s)" % (self.value)
class Section(Feature): class Section(Feature):
@@ -28,4 +28,4 @@ class Section(Feature):
self.value = value self.value = value
def __str__(self): def __str__(self):
return 'Section(%s)' % (self.value) return "Section(%s)" % (self.value)

View File

@@ -1,4 +1,4 @@
''' """
capa freeze file format: `| capa0000 | + zlib(utf-8(json(...)))` capa freeze file format: `| capa0000 | + zlib(utf-8(json(...)))`
json format: json format:
@@ -39,7 +39,7 @@ json format:
], ],
} }
} }
''' """
import json import json
import zlib import zlib
import logging import logging
@@ -61,10 +61,7 @@ def serialize_feature(feature):
return feature.freeze_serialize() return feature.freeze_serialize()
KNOWN_FEATURES = { KNOWN_FEATURES = {F.__name__: F for F in capa.features.Feature.__subclasses__()}
F.__name__: F
for F in capa.features.Feature.__subclasses__()
}
def deserialize_feature(doc): def deserialize_feature(doc):
@@ -73,7 +70,7 @@ def deserialize_feature(doc):
def dumps(extractor): def dumps(extractor):
''' """
serialize the given extractor to a string serialize the given extractor to a string
args: args:
@@ -81,79 +78,64 @@ def dumps(extractor):
returns: returns:
str: the serialized features. str: the serialized features.
''' """
ret = { ret = {"version": 1, "functions": {}, "scopes": {"file": [], "function": [], "basic block": [], "instruction": [],}}
'version': 1,
'functions': {},
'scopes': {
'file': [],
'function': [],
'basic block': [],
'instruction': [],
}
}
for feature, va in extractor.extract_file_features(): for feature, va in extractor.extract_file_features():
ret['scopes']['file'].append( ret["scopes"]["file"].append(serialize_feature(feature) + (hex(va), ()))
serialize_feature(feature) + (hex(va), ())
)
for f in extractor.get_functions(): for f in extractor.get_functions():
ret['functions'][hex(f)] = {} ret["functions"][hex(f)] = {}
for feature, va in extractor.extract_function_features(f): for feature, va in extractor.extract_function_features(f):
ret['scopes']['function'].append( ret["scopes"]["function"].append(serialize_feature(feature) + (hex(va), (hex(f),)))
serialize_feature(feature) + (hex(va), (hex(f), ))
)
for bb in extractor.get_basic_blocks(f): for bb in extractor.get_basic_blocks(f):
ret['functions'][hex(f)][hex(bb)] = [] ret["functions"][hex(f)][hex(bb)] = []
for feature, va in extractor.extract_basic_block_features(f, bb): for feature, va in extractor.extract_basic_block_features(f, bb):
ret['scopes']['basic block'].append( ret["scopes"]["basic block"].append(serialize_feature(feature) + (hex(va), (hex(f), hex(bb),)))
serialize_feature(feature) + (hex(va), (hex(f), hex(bb), ))
)
for insn, insnva in sorted([(insn, int(insn)) for insn in extractor.get_instructions(f, bb)]): for insn, insnva in sorted([(insn, int(insn)) for insn in extractor.get_instructions(f, bb)]):
ret['functions'][hex(f)][hex(bb)].append(hex(insnva)) ret["functions"][hex(f)][hex(bb)].append(hex(insnva))
for feature, va in extractor.extract_insn_features(f, bb, insn): for feature, va in extractor.extract_insn_features(f, bb, insn):
ret['scopes']['instruction'].append( ret["scopes"]["instruction"].append(
serialize_feature(feature) + (hex(va), (hex(f), hex(bb), hex(insnva), )) serialize_feature(feature) + (hex(va), (hex(f), hex(bb), hex(insnva),))
) )
return json.dumps(ret) return json.dumps(ret)
def loads(s): def loads(s):
'''deserialize a set of features (as a NullFeatureExtractor) from a string.''' """deserialize a set of features (as a NullFeatureExtractor) from a string."""
doc = json.loads(s) doc = json.loads(s)
if doc.get('version') != 1: if doc.get("version") != 1:
raise ValueError('unsupported freeze format version: %d' % (doc.get('version'))) raise ValueError("unsupported freeze format version: %d" % (doc.get("version")))
features = { features = {
'file features': [], "file features": [],
'functions': {}, "functions": {},
} }
for fva, function in doc.get('functions', {}).items(): for fva, function in doc.get("functions", {}).items():
fva = int(fva, 0x10) fva = int(fva, 0x10)
features['functions'][fva] = { features["functions"][fva] = {
'features': [], "features": [],
'basic blocks': {}, "basic blocks": {},
} }
for bbva, bb in function.items(): for bbva, bb in function.items():
bbva = int(bbva, 0x10) bbva = int(bbva, 0x10)
features['functions'][fva]['basic blocks'][bbva] = { features["functions"][fva]["basic blocks"][bbva] = {
'features': [], "features": [],
'instructions': {}, "instructions": {},
} }
for insnva in bb: for insnva in bb:
insnva = int(insnva, 0x10) insnva = int(insnva, 0x10)
features['functions'][fva]['basic blocks'][bbva]['instructions'][insnva] = { features["functions"][fva]["basic blocks"][bbva]["instructions"][insnva] = {
'features': [], "features": [],
} }
# in the following blocks, each entry looks like: # in the following blocks, each entry looks like:
@@ -161,13 +143,13 @@ def loads(s):
# ('MatchedRule', ('foo', ), '0x401000', ('0x401000', )) # ('MatchedRule', ('foo', ), '0x401000', ('0x401000', ))
# ^^^^^^^^^^^^^ ^^^^^^^^^ ^^^^^^^^^^ ^^^^^^^^^^^^^^ # ^^^^^^^^^^^^^ ^^^^^^^^^ ^^^^^^^^^^ ^^^^^^^^^^^^^^
# feature name args addr func/bb/insn # feature name args addr func/bb/insn
for feature in doc.get('scopes', {}).get('file', []): for feature in doc.get("scopes", {}).get("file", []):
va, loc = feature[2:] va, loc = feature[2:]
va = int(va, 0x10) va = int(va, 0x10)
feature = deserialize_feature(feature[:2]) feature = deserialize_feature(feature[:2])
features['file features'].append((va, feature)) features["file features"].append((va, feature))
for feature in doc.get('scopes', {}).get('function', []): for feature in doc.get("scopes", {}).get("function", []):
# fetch the pair like: # fetch the pair like:
# #
# ('0x401000', ('0x401000', )) # ('0x401000', ('0x401000', ))
@@ -183,42 +165,42 @@ def loads(s):
# ^^^^^^^^^^^^^ ^^^^^^^^^ # ^^^^^^^^^^^^^ ^^^^^^^^^
# feature name args # feature name args
feature = deserialize_feature(feature[:2]) feature = deserialize_feature(feature[:2])
features['functions'][loc[0]]['features'].append((va, feature)) features["functions"][loc[0]]["features"].append((va, feature))
for feature in doc.get('scopes', {}).get('basic block', []): for feature in doc.get("scopes", {}).get("basic block", []):
va, loc = feature[2:] va, loc = feature[2:]
va = int(va, 0x10) va = int(va, 0x10)
loc = [int(lo, 0x10) for lo in loc] loc = [int(lo, 0x10) for lo in loc]
feature = deserialize_feature(feature[:2]) feature = deserialize_feature(feature[:2])
features['functions'][loc[0]]['basic blocks'][loc[1]]['features'].append((va, feature)) features["functions"][loc[0]]["basic blocks"][loc[1]]["features"].append((va, feature))
for feature in doc.get('scopes', {}).get('instruction', []): for feature in doc.get("scopes", {}).get("instruction", []):
va, loc = feature[2:] va, loc = feature[2:]
va = int(va, 0x10) va = int(va, 0x10)
loc = [int(lo, 0x10) for lo in loc] loc = [int(lo, 0x10) for lo in loc]
feature = deserialize_feature(feature[:2]) feature = deserialize_feature(feature[:2])
features['functions'][loc[0]]['basic blocks'][loc[1]]['instructions'][loc[2]]['features'].append((va, feature)) features["functions"][loc[0]]["basic blocks"][loc[1]]["instructions"][loc[2]]["features"].append((va, feature))
return capa.features.extractors.NullFeatureExtractor(features) return capa.features.extractors.NullFeatureExtractor(features)
MAGIC = 'capa0000'.encode('ascii') MAGIC = "capa0000".encode("ascii")
def dump(extractor): def dump(extractor):
'''serialize the given extractor to a byte array.''' """serialize the given extractor to a byte array."""
return MAGIC + zlib.compress(dumps(extractor).encode('utf-8')) return MAGIC + zlib.compress(dumps(extractor).encode("utf-8"))
def is_freeze(buf): def is_freeze(buf):
return buf[:len(MAGIC)] == MAGIC return buf[: len(MAGIC)] == MAGIC
def load(buf): def load(buf):
'''deserialize a set of features (as a NullFeatureExtractor) from a byte array.''' """deserialize a set of features (as a NullFeatureExtractor) from a byte array."""
if not is_freeze(buf): if not is_freeze(buf):
raise ValueError('missing magic header') raise ValueError("missing magic header")
return loads(zlib.decompress(buf[len(MAGIC):]).decode('utf-8')) return loads(zlib.decompress(buf[len(MAGIC) :]).decode("utf-8"))
def main(argv=None): def main(argv=None):
@@ -230,24 +212,21 @@ def main(argv=None):
argv = sys.argv[1:] argv = sys.argv[1:]
formats = [ formats = [
('auto', '(default) detect file type automatically'), ("auto", "(default) detect file type automatically"),
('pe', 'Windows PE file'), ("pe", "Windows PE file"),
('sc32', '32-bit shellcode'), ("sc32", "32-bit shellcode"),
('sc64', '64-bit shellcode'), ("sc64", "64-bit shellcode"),
] ]
format_help = ', '.join(['%s: %s' % (f[0], f[1]) for f in formats]) format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats])
parser = argparse.ArgumentParser(description='save capa features to a file') parser = argparse.ArgumentParser(description="save capa features to a file")
parser.add_argument('sample', type=str, parser.add_argument("sample", type=str, help="Path to sample to analyze")
help='Path to sample to analyze') parser.add_argument("output", type=str, help="Path to output file")
parser.add_argument('output', type=str, parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose output")
help='Path to output file') parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
parser.add_argument('-v', '--verbose', action='store_true', parser.add_argument(
help='Enable verbose output') "-f", "--format", choices=[f[0] for f in formats], default="auto", help="Select sample format, %s" % format_help
parser.add_argument('-q', '--quiet', action='store_true', )
help='Disable all output but errors')
parser.add_argument('-f', '--format', choices=[f[0] for f in formats], default='auto',
help='Select sample format, %s' % format_help)
args = parser.parse_args(args=argv) args = parser.parse_args(args=argv)
if args.quiet: if args.quiet:
@@ -264,13 +243,15 @@ def main(argv=None):
# don't import this at top level to support ida/py3 backend # don't import this at top level to support ida/py3 backend
import capa.features.extractors.viv import capa.features.extractors.viv
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(vw, args.sample) extractor = capa.features.extractors.viv.VivisectFeatureExtractor(vw, args.sample)
with open(args.output, 'wb') as f: with open(args.output, "wb") as f:
f.write(dump(extractor)) f.write(dump(extractor))
return 0 return 0
if __name__ == '__main__': if __name__ == "__main__":
import sys import sys
sys.exit(main()) sys.exit(main())

View File

@@ -4,9 +4,9 @@ from capa.features import Feature
class API(Feature): class API(Feature):
def __init__(self, name): def __init__(self, name):
# Downcase library name if given # Downcase library name if given
if '.' in name: if "." in name:
modname, impname = name.split('.') modname, impname = name.split(".")
name = modname.lower() + '.' + impname name = modname.lower() + "." + impname
super(API, self).__init__([name]) super(API, self).__init__([name])
@@ -19,9 +19,9 @@ class Number(Feature):
def __str__(self): def __str__(self):
if self.symbol: if self.symbol:
return 'number(0x%x = %s)' % (self.value, self.symbol) return "number(0x%x = %s)" % (self.value, self.symbol)
else: else:
return 'number(0x%x)' % (self.value) return "number(0x%x)" % (self.value)
class Offset(Feature): class Offset(Feature):
@@ -32,9 +32,9 @@ class Offset(Feature):
def __str__(self): def __str__(self):
if self.symbol: if self.symbol:
return 'offset(0x%x = %s)' % (self.value, self.symbol) return "offset(0x%x = %s)" % (self.value, self.symbol)
else: else:
return 'offset(0x%x)' % (self.value) return "offset(0x%x)" % (self.value)
class Mnemonic(Feature): class Mnemonic(Feature):
@@ -43,4 +43,4 @@ class Mnemonic(Feature):
self.value = value self.value = value
def __str__(self): def __str__(self):
return 'mnemonic(%s)' % (self.value) return "mnemonic(%s)" % (self.value)

View File

@@ -4,7 +4,7 @@ _hex = hex
def hex(i): def hex(i):
# under py2.7, long integers get formatted with a trailing `L` # under py2.7, long integers get formatted with a trailing `L`
# and this is not pretty. so strip it out. # and this is not pretty. so strip it out.
return _hex(oint(i)).rstrip('L') return _hex(oint(i)).rstrip("L")
def oint(i): def oint(i):

View File

@@ -15,14 +15,14 @@ def info_to_name(display):
e.g. function(my_function) => my_function e.g. function(my_function) => my_function
""" """
try: try:
return display.split('(')[1].rstrip(')') return display.split("(")[1].rstrip(")")
except IndexError: except IndexError:
return '' return ""
def location_to_hex(location): def location_to_hex(location):
""" convert location to hex for display """ """ convert location to hex for display """
return '%08X' % location return "%08X" % location
class CapaExplorerDataItem(object): class CapaExplorerDataItem(object):
@@ -35,7 +35,12 @@ class CapaExplorerDataItem(object):
self.children = [] self.children = []
self._checked = False self._checked = False
self.flags = (QtCore.Qt.ItemIsEnabled | QtCore.Qt.ItemIsSelectable | QtCore.Qt.ItemIsTristate | QtCore.Qt.ItemIsUserCheckable) self.flags = (
QtCore.Qt.ItemIsEnabled
| QtCore.Qt.ItemIsSelectable
| QtCore.Qt.ItemIsTristate
| QtCore.Qt.ItemIsUserCheckable
)
if self.pred: if self.pred:
self.pred.appendChild(self) self.pred.appendChild(self)
@@ -109,7 +114,7 @@ class CapaExplorerDataItem(object):
def __str__(self): def __str__(self):
""" get string representation of columns """ """ get string representation of columns """
return ' '.join([data for data in self._data if data]) return " ".join([data for data in self._data if data])
@property @property
def info(self): def info(self):
@@ -133,12 +138,12 @@ class CapaExplorerDataItem(object):
class CapaExplorerRuleItem(CapaExplorerDataItem): class CapaExplorerRuleItem(CapaExplorerDataItem):
""" store data relevant to capa function result """ """ store data relevant to capa function result """
fmt = '%s (%d matches)' fmt = "%s (%d matches)"
def __init__(self, parent, display, count, source): def __init__(self, parent, display, count, source):
""" """ """ """
display = self.fmt % (display, count) if count > 1 else display display = self.fmt % (display, count) if count > 1 else display
super(CapaExplorerRuleItem, self).__init__(parent, [display, '', '']) super(CapaExplorerRuleItem, self).__init__(parent, [display, "", ""])
self._source = source self._source = source
@property @property
@@ -150,9 +155,9 @@ class CapaExplorerRuleItem(CapaExplorerDataItem):
class CapaExplorerRuleMatchItem(CapaExplorerDataItem): class CapaExplorerRuleMatchItem(CapaExplorerDataItem):
""" store data relevant to capa function match result """ """ store data relevant to capa function match result """
def __init__(self, parent, display, source=''): def __init__(self, parent, display, source=""):
""" """ """ """
super(CapaExplorerRuleMatchItem, self).__init__(parent, [display, '', '']) super(CapaExplorerRuleMatchItem, self).__init__(parent, [display, "", ""])
self._source = source self._source = source
@property @property
@@ -164,12 +169,13 @@ class CapaExplorerRuleMatchItem(CapaExplorerDataItem):
class CapaExplorerFunctionItem(CapaExplorerDataItem): class CapaExplorerFunctionItem(CapaExplorerDataItem):
""" store data relevant to capa function result """ """ store data relevant to capa function result """
fmt = 'function(%s)' fmt = "function(%s)"
def __init__(self, parent, location): def __init__(self, parent, location):
""" """ """ """
super(CapaExplorerFunctionItem, self).__init__(parent, [self.fmt % idaapi.get_name(location), super(CapaExplorerFunctionItem, self).__init__(
location_to_hex(location), '']) parent, [self.fmt % idaapi.get_name(location), location_to_hex(location), ""]
)
@property @property
def info(self): def info(self):
@@ -187,32 +193,31 @@ class CapaExplorerFunctionItem(CapaExplorerDataItem):
class CapaExplorerBlockItem(CapaExplorerDataItem): class CapaExplorerBlockItem(CapaExplorerDataItem):
""" store data relevant to capa basic block result """ """ store data relevant to capa basic block result """
fmt = 'basic block(loc_%08X)' fmt = "basic block(loc_%08X)"
def __init__(self, parent, location): def __init__(self, parent, location):
""" """ """ """
super(CapaExplorerBlockItem, self).__init__(parent, [self.fmt % location, location_to_hex(location), '']) super(CapaExplorerBlockItem, self).__init__(parent, [self.fmt % location, location_to_hex(location), ""])
class CapaExplorerDefaultItem(CapaExplorerDataItem): class CapaExplorerDefaultItem(CapaExplorerDataItem):
""" store data relevant to capa default result """ """ store data relevant to capa default result """
def __init__(self, parent, display, details='', location=None): def __init__(self, parent, display, details="", location=None):
""" """ """ """
location = location_to_hex(location) if location else '' location = location_to_hex(location) if location else ""
super(CapaExplorerDefaultItem, self).__init__(parent, [display, location, details]) super(CapaExplorerDefaultItem, self).__init__(parent, [display, location, details])
class CapaExplorerFeatureItem(CapaExplorerDataItem): class CapaExplorerFeatureItem(CapaExplorerDataItem):
""" store data relevant to capa feature result """ """ store data relevant to capa feature result """
def __init__(self, parent, display, location='', details=''): def __init__(self, parent, display, location="", details=""):
location = location_to_hex(location) if location else '' location = location_to_hex(location) if location else ""
super(CapaExplorerFeatureItem, self).__init__(parent, [display, location, details]) super(CapaExplorerFeatureItem, self).__init__(parent, [display, location, details])
class CapaExplorerInstructionViewItem(CapaExplorerFeatureItem): class CapaExplorerInstructionViewItem(CapaExplorerFeatureItem):
def __init__(self, parent, display, location): def __init__(self, parent, display, location):
""" """ """ """
details = capa.ida.helpers.get_disasm_line(location) details = capa.ida.helpers.get_disasm_line(location)
@@ -221,26 +226,24 @@ class CapaExplorerInstructionViewItem(CapaExplorerFeatureItem):
class CapaExplorerByteViewItem(CapaExplorerFeatureItem): class CapaExplorerByteViewItem(CapaExplorerFeatureItem):
def __init__(self, parent, display, location): def __init__(self, parent, display, location):
""" """ """ """
byte_snap = idaapi.get_bytes(location, 32) byte_snap = idaapi.get_bytes(location, 32)
if byte_snap: if byte_snap:
byte_snap = codecs.encode(byte_snap, 'hex').upper() byte_snap = codecs.encode(byte_snap, "hex").upper()
if sys.version_info >= (3, 0): if sys.version_info >= (3, 0):
details = ' '.join([byte_snap[i:i + 2].decode() for i in range(0, len(byte_snap), 2)]) details = " ".join([byte_snap[i : i + 2].decode() for i in range(0, len(byte_snap), 2)])
else: else:
details = ' '.join([byte_snap[i:i + 2] for i in range(0, len(byte_snap), 2)]) details = " ".join([byte_snap[i : i + 2] for i in range(0, len(byte_snap), 2)])
else: else:
details = '' details = ""
super(CapaExplorerByteViewItem, self).__init__(parent, display, location=location, details=details) super(CapaExplorerByteViewItem, self).__init__(parent, display, location=location, details=details)
self.ida_highlight = idc.get_color(location, idc.CIC_ITEM) self.ida_highlight = idc.get_color(location, idc.CIC_ITEM)
class CapaExplorerStringViewItem(CapaExplorerFeatureItem): class CapaExplorerStringViewItem(CapaExplorerFeatureItem):
def __init__(self, parent, display, location): def __init__(self, parent, display, location):
""" """ """ """
super(CapaExplorerStringViewItem, self).__init__(parent, display, location=location) super(CapaExplorerStringViewItem, self).__init__(parent, display, location=location)

View File

@@ -16,7 +16,7 @@ from capa.ida.explorer.item import (
CapaExplorerByteViewItem, CapaExplorerByteViewItem,
CapaExplorerBlockItem, CapaExplorerBlockItem,
CapaExplorerRuleMatchItem, CapaExplorerRuleMatchItem,
CapaExplorerFeatureItem CapaExplorerFeatureItem,
) )
import capa.ida.helpers import capa.ida.helpers
@@ -37,7 +37,7 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
def __init__(self, parent=None): def __init__(self, parent=None):
""" """ """ """
super(CapaExplorerDataModel, self).__init__(parent) super(CapaExplorerDataModel, self).__init__(parent)
self.root_node = CapaExplorerDataItem(None, ['Rule Information', 'Address', 'Details']) self.root_node = CapaExplorerDataItem(None, ["Rule Information", "Address", "Details"])
def reset(self): def reset(self):
""" """ """ """
@@ -86,8 +86,11 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
# display data in corresponding column # display data in corresponding column
return item.data(column) return item.data(column)
if role == QtCore.Qt.ToolTipRole and isinstance(item, (CapaExplorerRuleItem, CapaExplorerRuleMatchItem)) and \ if (
CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION == column: role == QtCore.Qt.ToolTipRole
and isinstance(item, (CapaExplorerRuleItem, CapaExplorerRuleMatchItem))
and CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION == column
):
# show tooltip containing rule source # show tooltip containing rule source
return item.source return item.source
@@ -95,18 +98,30 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
# inform view how to display content of checkbox - un/checked # inform view how to display content of checkbox - un/checked
return QtCore.Qt.Checked if item.isChecked() else QtCore.Qt.Unchecked return QtCore.Qt.Checked if item.isChecked() else QtCore.Qt.Unchecked
if role == QtCore.Qt.FontRole and column in (CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS, if role == QtCore.Qt.FontRole and column in (
CapaExplorerDataModel.COLUMN_INDEX_DETAILS): CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS,
CapaExplorerDataModel.COLUMN_INDEX_DETAILS,
):
# set font for virtual address and details columns # set font for virtual address and details columns
font = QtGui.QFont('Courier', weight=QtGui.QFont.Medium) font = QtGui.QFont("Courier", weight=QtGui.QFont.Medium)
if column == CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS: if column == CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS:
font.setBold(True) font.setBold(True)
return font return font
if role == QtCore.Qt.FontRole and isinstance(item, (CapaExplorerRuleItem, CapaExplorerRuleMatchItem, if (
CapaExplorerBlockItem, CapaExplorerFunctionItem, role == QtCore.Qt.FontRole
CapaExplorerFeatureItem)) and \ and isinstance(
column == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION: item,
(
CapaExplorerRuleItem,
CapaExplorerRuleMatchItem,
CapaExplorerBlockItem,
CapaExplorerFunctionItem,
CapaExplorerFeatureItem,
),
)
and column == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION
):
# set bold font for top-level rules # set bold font for top-level rules
font = QtGui.QFont() font = QtGui.QFont()
font.setBold(True) font.setBold(True)
@@ -116,8 +131,11 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
# set color for virtual address column # set color for virtual address column
return QtGui.QColor(88, 139, 174) return QtGui.QColor(88, 139, 174)
if role == QtCore.Qt.ForegroundRole and isinstance(item, CapaExplorerFeatureItem) and column == \ if (
CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION: role == QtCore.Qt.ForegroundRole
and isinstance(item, CapaExplorerFeatureItem)
and column == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION
):
# set color for feature items # set color for feature items
return QtGui.QColor(79, 121, 66) return QtGui.QColor(79, 121, 66)
@@ -222,8 +240,9 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
def util_reset_ida_highlighting(self, item, checked): def util_reset_ida_highlighting(self, item, checked):
""" """ """ """
if not isinstance(item, (CapaExplorerStringViewItem, CapaExplorerInstructionViewItem, if not isinstance(
CapaExplorerByteViewItem)): item, (CapaExplorerStringViewItem, CapaExplorerInstructionViewItem, CapaExplorerByteViewItem)
):
# ignore other item types # ignore other item types
return return
@@ -254,8 +273,10 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
if not model_index.isValid(): if not model_index.isValid():
return False return False
if role == QtCore.Qt.CheckStateRole and model_index.column() ==\ if (
CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION: role == QtCore.Qt.CheckStateRole
and model_index.column() == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION
):
# user un/checked box - un/check parent and children # user un/checked box - un/check parent and children
for child_index in self.iterateChildrenIndexFromRootIndex(model_index, ignore_root=False): for child_index in self.iterateChildrenIndexFromRootIndex(model_index, ignore_root=False):
child_index.internalPointer().setChecked(value) child_index.internalPointer().setChecked(value)
@@ -263,9 +284,12 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
self.dataChanged.emit(child_index, child_index) self.dataChanged.emit(child_index, child_index)
return True return True
if role == QtCore.Qt.EditRole and value and \ if (
model_index.column() == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION and \ role == QtCore.Qt.EditRole
isinstance(model_index.internalPointer(), CapaExplorerFunctionItem): and value
and model_index.column() == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION
and isinstance(model_index.internalPointer(), CapaExplorerFunctionItem)
):
# user renamed function - update IDA database and data model # user renamed function - update IDA database and data model
old_name = model_index.internalPointer().info old_name = model_index.internalPointer().info
new_name = str(value) new_name = str(value)
@@ -309,39 +333,39 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
"type": "or" "type": "or"
}, },
""" """
if statement['type'] in ('and', 'or', 'optional'): if statement["type"] in ("and", "or", "optional"):
return CapaExplorerDefaultItem(parent, statement['type']) return CapaExplorerDefaultItem(parent, statement["type"])
elif statement['type'] == 'not': elif statement["type"] == "not":
# TODO: do we display 'not' # TODO: do we display 'not'
pass pass
elif statement['type'] == 'some': elif statement["type"] == "some":
return CapaExplorerDefaultItem(parent, statement['count'] + ' or more') return CapaExplorerDefaultItem(parent, statement["count"] + " or more")
elif statement['type'] == 'range': elif statement["type"] == "range":
# `range` is a weird node, its almost a hybrid of statement + feature. # `range` is a weird node, its almost a hybrid of statement + feature.
# it is a specific feature repeated multiple times. # it is a specific feature repeated multiple times.
# there's no additional logic in the feature part, just the existence of a feature. # there's no additional logic in the feature part, just the existence of a feature.
# so, we have to inline some of the feature rendering here. # so, we have to inline some of the feature rendering here.
display = 'count(%s): ' % self.capa_doc_feature_to_display(statement['child']) display = "count(%s): " % self.capa_doc_feature_to_display(statement["child"])
if statement['max'] == statement['min']: if statement["max"] == statement["min"]:
display += '%d' % (statement['min']) display += "%d" % (statement["min"])
elif statement['min'] == 0: elif statement["min"] == 0:
display += '%d or fewer' % (statement['max']) display += "%d or fewer" % (statement["max"])
elif statement['max'] == (1 << 64 - 1): elif statement["max"] == (1 << 64 - 1):
display += '%d or more' % (statement['min']) display += "%d or more" % (statement["min"])
else: else:
display += 'between %d and %d' % (statement['min'], statement['max']) display += "between %d and %d" % (statement["min"], statement["max"])
return CapaExplorerFeatureItem(parent, display=display) return CapaExplorerFeatureItem(parent, display=display)
elif statement['type'] == 'subscope': elif statement["type"] == "subscope":
return CapaExplorerFeatureItem(parent, 'subscope(%s)' % statement['subscope']) return CapaExplorerFeatureItem(parent, "subscope(%s)" % statement["subscope"])
elif statement['type'] == 'regex': elif statement["type"] == "regex":
# regex is a `Statement` not a `Feature` # regex is a `Statement` not a `Feature`
# this is because it doesn't get extracted, but applies to all strings in scope. # this is because it doesn't get extracted, but applies to all strings in scope.
# so we have to handle it here # so we have to handle it here
return CapaExplorerFeatureItem(parent, 'regex(%s)' % statement['pattern'], details=statement['match']) return CapaExplorerFeatureItem(parent, "regex(%s)" % statement["pattern"], details=statement["match"])
else: else:
raise RuntimeError('unexpected match statement type: ' + str(statement)) raise RuntimeError("unexpected match statement type: " + str(statement))
def render_capa_doc_match(self, parent, match, doc): def render_capa_doc_match(self, parent, match, doc):
""" render capa match read from doc """ render capa match read from doc
@@ -367,23 +391,24 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
} }
}, },
""" """
if not match['success']: if not match["success"]:
# TODO: display failed branches at some point? Help with debugging rules? # TODO: display failed branches at some point? Help with debugging rules?
return return
# optional statement with no successful children is empty # optional statement with no successful children is empty
if (match['node'].get('statement', {}).get('type') == 'optional' if match["node"].get("statement", {}).get("type") == "optional" and not any(
and not any(map(lambda m: m['success'], match['children']))): map(lambda m: m["success"], match["children"])
):
return return
if match['node']['type'] == 'statement': if match["node"]["type"] == "statement":
parent2 = self.render_capa_doc_statement_node(parent, match['node']['statement'], doc) parent2 = self.render_capa_doc_statement_node(parent, match["node"]["statement"], doc)
elif match['node']['type'] == 'feature': elif match["node"]["type"] == "feature":
parent2 = self.render_capa_doc_feature_node(parent, match['node']['feature'], match['locations'], doc) parent2 = self.render_capa_doc_feature_node(parent, match["node"]["feature"], match["locations"], doc)
else: else:
raise RuntimeError('unexpected node type: ' + str(match['node']['type'])) raise RuntimeError("unexpected node type: " + str(match["node"]["type"]))
for child in match['children']: for child in match["children"]:
self.render_capa_doc_match(parent2, child, doc) self.render_capa_doc_match(parent2, child, doc)
def render_capa_doc(self, doc): def render_capa_doc(self, doc):
@@ -394,17 +419,17 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
self.beginResetModel() self.beginResetModel()
for rule in rutils.capability_rules(doc): for rule in rutils.capability_rules(doc):
parent = CapaExplorerRuleItem(self.root_node, rule['meta']['name'], len(rule['matches']), rule['source']) parent = CapaExplorerRuleItem(self.root_node, rule["meta"]["name"], len(rule["matches"]), rule["source"])
for (location, match) in doc[rule['meta']['name']]['matches'].items(): for (location, match) in doc[rule["meta"]["name"]]["matches"].items():
if rule['meta']['scope'] == capa.rules.FILE_SCOPE: if rule["meta"]["scope"] == capa.rules.FILE_SCOPE:
parent2 = parent parent2 = parent
elif rule['meta']['scope'] == capa.rules.FUNCTION_SCOPE: elif rule["meta"]["scope"] == capa.rules.FUNCTION_SCOPE:
parent2 = CapaExplorerFunctionItem(parent, location) parent2 = CapaExplorerFunctionItem(parent, location)
elif rule['meta']['scope'] == capa.rules.BASIC_BLOCK_SCOPE: elif rule["meta"]["scope"] == capa.rules.BASIC_BLOCK_SCOPE:
parent2 = CapaExplorerBlockItem(parent, location) parent2 = CapaExplorerBlockItem(parent, location)
else: else:
raise RuntimeError('unexpected rule scope: ' + str(rule['meta']['scope'])) raise RuntimeError("unexpected rule scope: " + str(rule["meta"]["scope"]))
self.render_capa_doc_match(parent2, match, doc) self.render_capa_doc_match(parent2, match, doc)
@@ -421,20 +446,20 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
}, },
""" """
mapping = { mapping = {
'string': 'string(%s)', "string": "string(%s)",
'bytes': 'bytes(%s)', "bytes": "bytes(%s)",
'api': 'api(%s)', "api": "api(%s)",
'mnemonic': 'mnemonic(%s)', "mnemonic": "mnemonic(%s)",
'export': 'export(%s)', "export": "export(%s)",
'import': 'import(%s)', "import": "import(%s)",
'section': 'section(%s)', "section": "section(%s)",
'number': 'number(0x%X)', "number": "number(0x%X)",
'offset': 'offset(0x%X)', "offset": "offset(0x%X)",
'characteristic': 'characteristic(%s)', "characteristic": "characteristic(%s)",
'match': 'rule match(%s)' "match": "rule match(%s)",
} }
''' """
"feature": { "feature": {
"characteristic": [ "characteristic": [
"loop", "loop",
@@ -442,21 +467,23 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
], ],
"type": "characteristic" "type": "characteristic"
}, },
''' """
if feature['type'] == 'characteristic': if feature["type"] == "characteristic":
return mapping['characteristic'] % feature['characteristic'][0] return mapping["characteristic"] % feature["characteristic"][0]
# convert bytes feature from "410ab4" to "41 0A B4" # convert bytes feature from "410ab4" to "41 0A B4"
if feature['type'] == 'bytes': if feature["type"] == "bytes":
return mapping['bytes'] % ' '.join(feature['bytes'][i:i + 2] for i in return (
range(0, len(feature['bytes']), 2)).upper() mapping["bytes"]
% " ".join(feature["bytes"][i : i + 2] for i in range(0, len(feature["bytes"]), 2)).upper()
)
try: try:
fmt = mapping[feature['type']] fmt = mapping[feature["type"]]
except KeyError: except KeyError:
raise RuntimeError('unexpected doc type: ' + str(feature['type'])) raise RuntimeError("unexpected doc type: " + str(feature["type"]))
return fmt % feature[feature['type']] return fmt % feature[feature["type"]]
def render_capa_doc_feature_node(self, parent, feature, locations, doc): def render_capa_doc_feature_node(self, parent, feature, locations, doc):
""" """ """ """
@@ -473,7 +500,7 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
return parent2 return parent2
def render_capa_doc_feature(self, parent, feature, location, doc, display='-'): def render_capa_doc_feature(self, parent, feature, location, doc, display="-"):
""" render capa feature read from doc """ render capa feature read from doc
@param parent: parent node to which new child is assigned @param parent: parent node to which new child is assigned
@@ -491,51 +518,38 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
@param location: address of feature @param location: address of feature
@param display: text to display in plugin ui @param display: text to display in plugin ui
""" """
instruction_view = ( instruction_view = ("bytes", "api", "mnemonic", "number", "offset")
'bytes', byte_view = ("section",)
'api', string_view = ("string",)
'mnemonic', default_feature_view = ("import", "export")
'number',
'offset'
)
byte_view = (
'section',
)
string_view = (
'string',
)
default_feature_view = (
'import',
'export'
)
# special handling for characteristic pending type # special handling for characteristic pending type
if feature['type'] == 'characteristic': if feature["type"] == "characteristic":
if feature['characteristic'][0] in ('embedded pe',): if feature["characteristic"][0] in ("embedded pe",):
return CapaExplorerByteViewItem(parent, display, location) return CapaExplorerByteViewItem(parent, display, location)
if feature['characteristic'][0] in ('loop', 'recursive call', 'tight loop', 'switch'): if feature["characteristic"][0] in ("loop", "recursive call", "tight loop", "switch"):
return CapaExplorerFeatureItem(parent, display=display) return CapaExplorerFeatureItem(parent, display=display)
# default to instruction view # default to instruction view
return CapaExplorerInstructionViewItem(parent, display, location) return CapaExplorerInstructionViewItem(parent, display, location)
if feature['type'] == 'match': if feature["type"] == "match":
return CapaExplorerRuleMatchItem(parent, display, source=doc.get(feature['match'], {}).get('source', '')) return CapaExplorerRuleMatchItem(parent, display, source=doc.get(feature["match"], {}).get("source", ""))
if feature['type'] in instruction_view: if feature["type"] in instruction_view:
return CapaExplorerInstructionViewItem(parent, display, location) return CapaExplorerInstructionViewItem(parent, display, location)
if feature['type'] in byte_view: if feature["type"] in byte_view:
return CapaExplorerByteViewItem(parent, display, location) return CapaExplorerByteViewItem(parent, display, location)
if feature['type'] in string_view: if feature["type"] in string_view:
return CapaExplorerStringViewItem(parent, display, location) return CapaExplorerStringViewItem(parent, display, location)
if feature['type'] in default_feature_view: if feature["type"] in default_feature_view:
return CapaExplorerFeatureItem(parent, display=display) return CapaExplorerFeatureItem(parent, display=display)
raise RuntimeError('unexpected feature type: ' + str(feature['type'])) raise RuntimeError("unexpected feature type: " + str(feature["type"]))
def update_function_name(self, old_name, new_name): def update_function_name(self, old_name, new_name):
""" update all instances of function name """ update all instances of function name
@@ -548,8 +562,9 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
# convert name to view format for matching # convert name to view format for matching
old_name = CapaExplorerFunctionItem.fmt % old_name old_name = CapaExplorerFunctionItem.fmt % old_name
for model_index in self.match(root_index, QtCore.Qt.DisplayRole, old_name, hits=-1, for model_index in self.match(
flags=QtCore.Qt.MatchRecursive): root_index, QtCore.Qt.DisplayRole, old_name, hits=-1, flags=QtCore.Qt.MatchRecursive
):
if not isinstance(model_index.internalPointer(), CapaExplorerFunctionItem): if not isinstance(model_index.internalPointer(), CapaExplorerFunctionItem):
continue continue

View File

@@ -4,7 +4,6 @@ from capa.ida.explorer.model import CapaExplorerDataModel
class CapaExplorerSortFilterProxyModel(QtCore.QSortFilterProxyModel): class CapaExplorerSortFilterProxyModel(QtCore.QSortFilterProxyModel):
def __init__(self, parent=None): def __init__(self, parent=None):
""" """ """ """
super(CapaExplorerSortFilterProxyModel, self).__init__(parent) super(CapaExplorerSortFilterProxyModel, self).__init__(parent)
@@ -20,8 +19,12 @@ class CapaExplorerSortFilterProxyModel(QtCore.QSortFilterProxyModel):
ldata = left.internalPointer().data(left.column()) ldata = left.internalPointer().data(left.column())
rdata = right.internalPointer().data(right.column()) rdata = right.internalPointer().data(right.column())
if ldata and rdata and left.column() == CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS and left.column() \ if (
== right.column(): ldata
and rdata
and left.column() == CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS
and left.column() == right.column()
):
# convert virtual address before compare # convert virtual address before compare
return int(ldata, 16) < int(rdata, 16) return int(ldata, 16) < int(rdata, 16)
else: else:

View File

@@ -55,7 +55,7 @@ class CapaExplorerQtreeView(QtWidgets.QTreeView):
self.doubleClicked.connect(self.slot_double_click) self.doubleClicked.connect(self.slot_double_click)
# self.clicked.connect(self.slot_click) # self.clicked.connect(self.slot_click)
self.setStyleSheet('QTreeView::item {padding-right: 15 px;padding-bottom: 2 px;}') self.setStyleSheet("QTreeView::item {padding-right: 15 px;padding-bottom: 2 px;}")
def reset(self): def reset(self):
""" reset user interface changes """ reset user interface changes
@@ -114,8 +114,8 @@ class CapaExplorerQtreeView(QtWidgets.QTreeView):
@yield QAction* @yield QAction*
""" """
default_actions = [ default_actions = [
('Copy column', data, self.slot_copy_column), ("Copy column", data, self.slot_copy_column),
('Copy row', data, self.slot_copy_row), ("Copy row", data, self.slot_copy_row),
] ]
# add default actions # add default actions
@@ -130,7 +130,7 @@ class CapaExplorerQtreeView(QtWidgets.QTreeView):
@yield QAction* @yield QAction*
""" """
function_actions = [ function_actions = [
('Rename function', data, self.slot_rename_function), ("Rename function", data, self.slot_rename_function),
] ]
# add function actions # add function actions

View File

@@ -3,47 +3,48 @@ import logging
import idaapi import idaapi
import idc import idc
logger = logging.getLogger('capa') logger = logging.getLogger("capa")
# file type names as returned by idaapi.get_file_type_name() # file type names as returned by idaapi.get_file_type_name()
SUPPORTED_FILE_TYPES = [ SUPPORTED_FILE_TYPES = [
'Portable executable for 80386 (PE)', "Portable executable for 80386 (PE)",
'Portable executable for AMD64 (PE)', "Portable executable for AMD64 (PE)",
'Binary file' # x86/AMD64 shellcode support "Binary file", # x86/AMD64 shellcode support
] ]
def inform_user_ida_ui(message): def inform_user_ida_ui(message):
idaapi.info('%s. Please refer to IDA Output window for more information.' % message) idaapi.info("%s. Please refer to IDA Output window for more information." % message)
def is_supported_file_type(): def is_supported_file_type():
file_type = idaapi.get_file_type_name() file_type = idaapi.get_file_type_name()
if file_type not in SUPPORTED_FILE_TYPES: if file_type not in SUPPORTED_FILE_TYPES:
logger.error('-' * 80) logger.error("-" * 80)
logger.error(' Input file does not appear to be a PE file.') logger.error(" Input file does not appear to be a PE file.")
logger.error(' ') logger.error(" ")
logger.error( logger.error(
' capa currently only supports analyzing PE files (or binary files containing x86/AMD64 shellcode) with IDA.') " capa currently only supports analyzing PE files (or binary files containing x86/AMD64 shellcode) with IDA."
logger.error(' If you don\'t know the input file type, you can try using the `file` utility to guess it.') )
logger.error('-' * 80) logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.")
inform_user_ida_ui('capa does not support the format of this file') logger.error("-" * 80)
inform_user_ida_ui("capa does not support the format of this file")
return False return False
return True return True
def get_disasm_line(va): def get_disasm_line(va):
''' ''' """ """
return idc.generate_disasm_line(va, idc.GENDSM_FORCE_CODE) return idc.generate_disasm_line(va, idc.GENDSM_FORCE_CODE)
def is_func_start(ea): def is_func_start(ea):
''' check if function stat exists at virtual address ''' """ check if function stat exists at virtual address """
f = idaapi.get_func(ea) f = idaapi.get_func(ea)
return f and f.start_ea == ea return f and f.start_ea == ea
def get_func_start_ea(ea): def get_func_start_ea(ea):
''' ''' """ """
f = idaapi.get_func(ea) f = idaapi.get_func(ea)
return f if f is None else f.start_ea return f if f is None else f.start_ea

View File

@@ -2,11 +2,7 @@ import os
import logging import logging
import collections import collections
from PyQt5 import ( from PyQt5 import QtWidgets, QtGui, QtCore
QtWidgets,
QtGui,
QtCore
)
import idaapi import idaapi
@@ -20,13 +16,12 @@ from capa.ida.explorer.view import CapaExplorerQtreeView
from capa.ida.explorer.model import CapaExplorerDataModel from capa.ida.explorer.model import CapaExplorerDataModel
from capa.ida.explorer.proxy import CapaExplorerSortFilterProxyModel from capa.ida.explorer.proxy import CapaExplorerSortFilterProxyModel
PLUGIN_NAME = 'capa explorer' PLUGIN_NAME = "capa explorer"
logger = logging.getLogger('capa') logger = logging.getLogger("capa")
class CapaExplorerIdaHooks(idaapi.UI_Hooks): class CapaExplorerIdaHooks(idaapi.UI_Hooks):
def __init__(self, screen_ea_changed_hook, action_hooks): def __init__(self, screen_ea_changed_hook, action_hooks):
""" facilitate IDA UI hooks """ facilitate IDA UI hooks
@@ -78,7 +73,6 @@ class CapaExplorerIdaHooks(idaapi.UI_Hooks):
class CapaExplorerForm(idaapi.PluginForm): class CapaExplorerForm(idaapi.PluginForm):
def __init__(self): def __init__(self):
""" """ """ """
super(CapaExplorerForm, self).__init__() super(CapaExplorerForm, self).__init__()
@@ -109,20 +103,20 @@ class CapaExplorerForm(idaapi.PluginForm):
self.view_tree.reset() self.view_tree.reset()
logger.info('form created.') logger.info("form created.")
def Show(self): def Show(self):
""" """ """ """
return idaapi.PluginForm.Show(self, self.form_title, options=( return idaapi.PluginForm.Show(
idaapi.PluginForm.WOPN_TAB | idaapi.PluginForm.WCLS_CLOSE_LATER self, self.form_title, options=(idaapi.PluginForm.WOPN_TAB | idaapi.PluginForm.WCLS_CLOSE_LATER)
)) )
def OnClose(self, form): def OnClose(self, form):
""" form is closed """ """ form is closed """
self.unload_ida_hooks() self.unload_ida_hooks()
self.ida_reset() self.ida_reset()
logger.info('form closed.') logger.info("form closed.")
def load_interface(self): def load_interface(self):
""" load user interface """ """ load user interface """
@@ -165,8 +159,8 @@ class CapaExplorerForm(idaapi.PluginForm):
def load_view_summary(self): def load_view_summary(self):
""" """ """ """
table_headers = [ table_headers = [
'Capability', "Capability",
'Namespace', "Namespace",
] ]
table = QtWidgets.QTableWidget() table = QtWidgets.QTableWidget()
@@ -180,15 +174,15 @@ class CapaExplorerForm(idaapi.PluginForm):
table.setHorizontalHeaderLabels(table_headers) table.setHorizontalHeaderLabels(table_headers)
table.horizontalHeader().setDefaultAlignment(QtCore.Qt.AlignLeft) table.horizontalHeader().setDefaultAlignment(QtCore.Qt.AlignLeft)
table.setShowGrid(False) table.setShowGrid(False)
table.setStyleSheet('QTableWidget::item { padding: 25px; }') table.setStyleSheet("QTableWidget::item { padding: 25px; }")
self.view_summary = table self.view_summary = table
def load_view_attack(self): def load_view_attack(self):
""" """ """ """
table_headers = [ table_headers = [
'ATT&CK Tactic', "ATT&CK Tactic",
'ATT&CK Technique ', "ATT&CK Technique ",
] ]
table = QtWidgets.QTableWidget() table = QtWidgets.QTableWidget()
@@ -202,13 +196,13 @@ class CapaExplorerForm(idaapi.PluginForm):
table.setHorizontalHeaderLabels(table_headers) table.setHorizontalHeaderLabels(table_headers)
table.horizontalHeader().setDefaultAlignment(QtCore.Qt.AlignLeft) table.horizontalHeader().setDefaultAlignment(QtCore.Qt.AlignLeft)
table.setShowGrid(False) table.setShowGrid(False)
table.setStyleSheet('QTableWidget::item { padding: 25px; }') table.setStyleSheet("QTableWidget::item { padding: 25px; }")
self.view_attack = table self.view_attack = table
def load_view_checkbox_limit_by(self): def load_view_checkbox_limit_by(self):
""" """ """ """
check = QtWidgets.QCheckBox('Limit results to current function') check = QtWidgets.QCheckBox("Limit results to current function")
check.setChecked(False) check.setChecked(False)
check.stateChanged.connect(self.slot_checkbox_limit_by_changed) check.stateChanged.connect(self.slot_checkbox_limit_by_changed)
@@ -231,7 +225,7 @@ class CapaExplorerForm(idaapi.PluginForm):
tab = QtWidgets.QWidget() tab = QtWidgets.QWidget()
tab.setLayout(layout) tab.setLayout(layout)
self.view_tabs.addTab(tab, 'Tree View') self.view_tabs.addTab(tab, "Tree View")
def load_view_summary_tab(self): def load_view_summary_tab(self):
""" """ """ """
@@ -241,7 +235,7 @@ class CapaExplorerForm(idaapi.PluginForm):
tab = QtWidgets.QWidget() tab = QtWidgets.QWidget()
tab.setLayout(layout) tab.setLayout(layout)
self.view_tabs.addTab(tab, 'Summary') self.view_tabs.addTab(tab, "Summary")
def load_view_attack_tab(self): def load_view_attack_tab(self):
""" """ """ """
@@ -251,16 +245,16 @@ class CapaExplorerForm(idaapi.PluginForm):
tab = QtWidgets.QWidget() tab = QtWidgets.QWidget()
tab.setLayout(layout) tab.setLayout(layout)
self.view_tabs.addTab(tab, 'MITRE') self.view_tabs.addTab(tab, "MITRE")
def load_file_menu(self): def load_file_menu(self):
""" load file menu actions """ """ load file menu actions """
actions = ( actions = (
('Reset view', 'Reset plugin view', self.reset), ("Reset view", "Reset plugin view", self.reset),
('Run analysis', 'Run capa analysis on current database', self.reload), ("Run analysis", "Run capa analysis on current database", self.reload),
) )
menu = self.view_menu_bar.addMenu('File') menu = self.view_menu_bar.addMenu("File")
for name, _, handle in actions: for name, _, handle in actions:
action = QtWidgets.QAction(name, self.parent) action = QtWidgets.QAction(name, self.parent)
@@ -271,8 +265,8 @@ class CapaExplorerForm(idaapi.PluginForm):
def load_ida_hooks(self): def load_ida_hooks(self):
""" """ """ """
action_hooks = { action_hooks = {
'MakeName': self.ida_hook_rename, "MakeName": self.ida_hook_rename,
'EditFunction': self.ida_hook_rename, "EditFunction": self.ida_hook_rename,
} }
self.ida_hooks = CapaExplorerIdaHooks(self.ida_hook_screen_ea_changed, action_hooks) self.ida_hooks = CapaExplorerIdaHooks(self.ida_hook_screen_ea_changed, action_hooks)
@@ -300,10 +294,10 @@ class CapaExplorerForm(idaapi.PluginForm):
if post: if post:
# post action update data model w/ current name # post action update data model w/ current name
self.model_data.update_function_name(meta.get('prev_name', ''), curr_name) self.model_data.update_function_name(meta.get("prev_name", ""), curr_name)
else: else:
# pre action so save current name for replacement later # pre action so save current name for replacement later
meta['prev_name'] = curr_name meta["prev_name"] = curr_name
def ida_hook_screen_ea_changed(self, widget, new_ea, old_ea): def ida_hook_screen_ea_changed(self, widget, new_ea, old_ea):
""" """ """ """
@@ -328,21 +322,21 @@ class CapaExplorerForm(idaapi.PluginForm):
match = capa.ida.explorer.item.ea_to_hex_str(new_func_start) match = capa.ida.explorer.item.ea_to_hex_str(new_func_start)
else: else:
# navigated to virtual address not in valid function - clear filter # navigated to virtual address not in valid function - clear filter
match = '' match = ""
# filter on virtual address to avoid updating filter string if function name is changed # filter on virtual address to avoid updating filter string if function name is changed
self.model_proxy.add_single_string_filter(CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS, match) self.model_proxy.add_single_string_filter(CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS, match)
def load_capa_results(self): def load_capa_results(self):
""" """ """ """
logger.info('-' * 80) logger.info("-" * 80)
logger.info(' Using default embedded rules.') logger.info(" Using default embedded rules.")
logger.info(' ') logger.info(" ")
logger.info(' You can see the current default rule set here:') logger.info(" You can see the current default rule set here:")
logger.info(' https://github.com/fireeye/capa-rules') logger.info(" https://github.com/fireeye/capa-rules")
logger.info('-' * 80) logger.info("-" * 80)
rules_path = os.path.join(os.path.dirname(self.file_loc), '../..', 'rules') rules_path = os.path.join(os.path.dirname(self.file_loc), "../..", "rules")
rules = capa.main.get_rules(rules_path) rules = capa.main.get_rules(rules_path)
rules = capa.rules.RuleSet(rules) rules = capa.rules.RuleSet(rules)
capabilities = capa.main.find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor(), True) capabilities = capa.main.find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor(), True)
@@ -350,27 +344,30 @@ class CapaExplorerForm(idaapi.PluginForm):
# support binary files specifically for x86/AMD64 shellcode # support binary files specifically for x86/AMD64 shellcode
# warn user binary file is loaded but still allow capa to process it # warn user binary file is loaded but still allow capa to process it
# TODO: check specific architecture of binary files based on how user configured IDA processors # TODO: check specific architecture of binary files based on how user configured IDA processors
if idaapi.get_file_type_name() == 'Binary file': if idaapi.get_file_type_name() == "Binary file":
logger.warning('-' * 80) logger.warning("-" * 80)
logger.warning(' Input file appears to be a binary file.') logger.warning(" Input file appears to be a binary file.")
logger.warning(' ') logger.warning(" ")
logger.warning( logger.warning(
' capa currently only supports analyzing binary files containing x86/AMD64 shellcode with IDA.') " capa currently only supports analyzing binary files containing x86/AMD64 shellcode with IDA."
)
logger.warning( logger.warning(
' This means the results may be misleading or incomplete if the binary file loaded in IDA is not x86/AMD64.') " This means the results may be misleading or incomplete if the binary file loaded in IDA is not x86/AMD64."
logger.warning(' If you don\'t know the input file type, you can try using the `file` utility to guess it.') )
logger.warning('-' * 80) logger.warning(" If you don't know the input file type, you can try using the `file` utility to guess it.")
logger.warning("-" * 80)
capa.ida.helpers.inform_user_ida_ui('capa encountered warnings during analysis') capa.ida.helpers.inform_user_ida_ui("capa encountered warnings during analysis")
if capa.main.has_file_limitation(rules, capabilities, is_standalone=False): if capa.main.has_file_limitation(rules, capabilities, is_standalone=False):
capa.ida.helpers.inform_user_ida_ui('capa encountered warnings during analysis') capa.ida.helpers.inform_user_ida_ui("capa encountered warnings during analysis")
logger.info('analysis completed.') logger.info("analysis completed.")
doc = capa.render.convert_capabilities_to_result_document(rules, capabilities) doc = capa.render.convert_capabilities_to_result_document(rules, capabilities)
import json import json
with open("C:\\Users\\spring\\Desktop\\hmm.json", "w") as twitter_data_file: with open("C:\\Users\\spring\\Desktop\\hmm.json", "w") as twitter_data_file:
json.dump(doc, twitter_data_file, indent=4, sort_keys=True, cls=capa.render.CapaJsonObjectEncoder) json.dump(doc, twitter_data_file, indent=4, sort_keys=True, cls=capa.render.CapaJsonObjectEncoder)
@@ -380,22 +377,22 @@ class CapaExplorerForm(idaapi.PluginForm):
self.view_tree.sortByColumn(CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION, QtCore.Qt.AscendingOrder) self.view_tree.sortByColumn(CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION, QtCore.Qt.AscendingOrder)
logger.info('render views completed.') logger.info("render views completed.")
def render_capa_doc_summary(self, doc): def render_capa_doc_summary(self, doc):
""" """ """ """
for (row, rule) in enumerate(rutils.capability_rules(doc)): for (row, rule) in enumerate(rutils.capability_rules(doc)):
count = len(rule['matches']) count = len(rule["matches"])
if count == 1: if count == 1:
capability = rule['meta']['name'] capability = rule["meta"]["name"]
else: else:
capability = '%s (%d matches)' % (rule['meta']['name'], count) capability = "%s (%d matches)" % (rule["meta"]["name"], count)
self.view_summary.setRowCount(row + 1) self.view_summary.setRowCount(row + 1)
self.view_summary.setItem(row, 0, self.render_new_table_header_item(capability)) self.view_summary.setItem(row, 0, self.render_new_table_header_item(capability))
self.view_summary.setItem(row, 1, QtWidgets.QTableWidgetItem(rule['meta']['namespace'])) self.view_summary.setItem(row, 1, QtWidgets.QTableWidgetItem(rule["meta"]["namespace"]))
# resize columns to content # resize columns to content
self.view_summary.resizeColumnsToContents() self.view_summary.resizeColumnsToContents()
@@ -404,17 +401,17 @@ class CapaExplorerForm(idaapi.PluginForm):
""" """ """ """
tactics = collections.defaultdict(set) tactics = collections.defaultdict(set)
for rule in rutils.capability_rules(doc): for rule in rutils.capability_rules(doc):
if not rule['meta'].get('att&ck'): if not rule["meta"].get("att&ck"):
continue continue
for attack in rule['meta']['att&ck']: for attack in rule["meta"]["att&ck"]:
tactic, _, rest = attack.partition('::') tactic, _, rest = attack.partition("::")
if '::' in rest: if "::" in rest:
technique, _, rest = rest.partition('::') technique, _, rest = rest.partition("::")
subtechnique, _, id = rest.rpartition(' ') subtechnique, _, id = rest.rpartition(" ")
tactics[tactic].add((technique, subtechnique, id)) tactics[tactic].add((technique, subtechnique, id))
else: else:
technique, _, id = rest.rpartition(' ') technique, _, id = rest.rpartition(" ")
tactics[tactic].add((technique, id)) tactics[tactic].add((technique, id))
column_one = [] column_one = []
@@ -422,17 +419,17 @@ class CapaExplorerForm(idaapi.PluginForm):
for tactic, techniques in sorted(tactics.items()): for tactic, techniques in sorted(tactics.items()):
column_one.append(tactic.upper()) column_one.append(tactic.upper())
column_one.extend(['' for i in range(len(techniques) - 1)]) column_one.extend(["" for i in range(len(techniques) - 1)])
for spec in sorted(techniques): for spec in sorted(techniques):
if len(spec) == 2: if len(spec) == 2:
technique, id = spec technique, id = spec
column_two.append('%s %s' % (technique, id)) column_two.append("%s %s" % (technique, id))
elif len(spec) == 3: elif len(spec) == 3:
technique, subtechnique, id = spec technique, subtechnique, id = spec
column_two.append('%s::%s %s' % (technique, subtechnique, id)) column_two.append("%s::%s %s" % (technique, subtechnique, id))
else: else:
raise RuntimeError('unexpected ATT&CK spec format') raise RuntimeError("unexpected ATT&CK spec format")
self.view_attack.setRowCount(max(len(column_one), len(column_two))) self.view_attack.setRowCount(max(len(column_one), len(column_two)))
@@ -471,8 +468,8 @@ class CapaExplorerForm(idaapi.PluginForm):
self.view_summary.setRowCount(0) self.view_summary.setRowCount(0)
self.load_capa_results() self.load_capa_results()
logger.info('reload complete.') logger.info("reload complete.")
idaapi.info('%s reload completed.' % PLUGIN_NAME) idaapi.info("%s reload completed." % PLUGIN_NAME)
def reset(self): def reset(self):
""" reset user interface elements """ reset user interface elements
@@ -481,8 +478,8 @@ class CapaExplorerForm(idaapi.PluginForm):
""" """
self.ida_reset() self.ida_reset()
logger.info('reset completed.') logger.info("reset completed.")
idaapi.info('%s reset completed.' % PLUGIN_NAME) idaapi.info("%s reset completed." % PLUGIN_NAME)
def slot_menu_bar_hovered(self, action): def slot_menu_bar_hovered(self, action):
""" display menu action tooltip """ display menu action tooltip
@@ -491,7 +488,9 @@ class CapaExplorerForm(idaapi.PluginForm):
@reference: https://stackoverflow.com/questions/21725119/why-wont-qtooltips-appear-on-qactions-within-a-qmenu @reference: https://stackoverflow.com/questions/21725119/why-wont-qtooltips-appear-on-qactions-within-a-qmenu
""" """
QtWidgets.QToolTip.showText(QtGui.QCursor.pos(), action.toolTip(), self.view_menu_bar, self.view_menu_bar.actionGeometry(action)) QtWidgets.QToolTip.showText(
QtGui.QCursor.pos(), action.toolTip(), self.view_menu_bar, self.view_menu_bar.actionGeometry(action)
)
def slot_checkbox_limit_by_changed(self): def slot_checkbox_limit_by_changed(self):
""" slot activated if checkbox clicked """ slot activated if checkbox clicked
@@ -499,7 +498,7 @@ class CapaExplorerForm(idaapi.PluginForm):
if checked, configure function filter if screen location is located if checked, configure function filter if screen location is located
in function, otherwise clear filter in function, otherwise clear filter
""" """
match = '' match = ""
if self.view_checkbox_limit_by.isChecked(): if self.view_checkbox_limit_by.isChecked():
location = capa.ida.helpers.get_func_start_ea(idaapi.get_screen_ea()) location = capa.ida.helpers.get_func_start_ea(idaapi.get_screen_ea())
if location: if location:
@@ -530,5 +529,5 @@ def main():
CAPA_EXPLORER_FORM.Show() CAPA_EXPLORER_FORM.Show()
if __name__ == '__main__': if __name__ == "__main__":
main() main()

View File

@@ -19,10 +19,10 @@ from capa.ida import plugin_helpers
import capa.features.extractors.ida.helpers import capa.features.extractors.ida.helpers
logger = logging.getLogger('rulegen') logger = logging.getLogger("rulegen")
AUTHOR_NAME = '' AUTHOR_NAME = ""
COLOR_HIGHLIGHT = 0xD096FF COLOR_HIGHLIGHT = 0xD096FF
@@ -35,11 +35,11 @@ def get_func_start(ea):
class Hooks(idaapi.UI_Hooks): class Hooks(idaapi.UI_Hooks):
''' """
Notifies the plugin when navigating to another function Notifies the plugin when navigating to another function
NOTE: it uses the global variable FLEX to access the NOTE: it uses the global variable FLEX to access the
PluginForm object. This looks nasty, maybe there is a better way? PluginForm object. This looks nasty, maybe there is a better way?
''' """
def screen_ea_changed(self, ea, prev_ea): def screen_ea_changed(self, ea, prev_ea):
widget = idaapi.get_current_widget() widget = idaapi.get_current_widget()
@@ -55,14 +55,13 @@ class Hooks(idaapi.UI_Hooks):
# changed to another function # changed to another function
RULE_GEN_FORM.reload_features_tree() RULE_GEN_FORM.reload_features_tree()
except Exception as e: except Exception as e:
logger.warn('exception: %s', e) logger.warn("exception: %s", e)
class RuleGeneratorForm(idaapi.PluginForm): class RuleGeneratorForm(idaapi.PluginForm):
def __init__(self): def __init__(self):
super(RuleGeneratorForm, self).__init__() super(RuleGeneratorForm, self).__init__()
self.title = 'capa rule generator' self.title = "capa rule generator"
self.parent = None self.parent = None
self.parent_items = {} self.parent_items = {}
@@ -70,7 +69,7 @@ class RuleGeneratorForm(idaapi.PluginForm):
self.hooks = Hooks() # dirty? self.hooks = Hooks() # dirty?
if self.hooks.hook(): if self.hooks.hook():
logger.info('UI notification hook installed successfully') logger.info("UI notification hook installed successfully")
def init_ui(self): def init_ui(self):
self.tree = QTreeWidget() self.tree = QTreeWidget()
@@ -79,7 +78,7 @@ class RuleGeneratorForm(idaapi.PluginForm):
self.reload_features_tree() self.reload_features_tree()
button_reset = QtWidgets.QPushButton('&Reset') button_reset = QtWidgets.QPushButton("&Reset")
button_reset.clicked.connect(self.reset) button_reset.clicked.connect(self.reset)
h_layout = QtWidgets.QHBoxLayout() h_layout = QtWidgets.QHBoxLayout()
@@ -96,7 +95,7 @@ class RuleGeneratorForm(idaapi.PluginForm):
def reset(self): def reset(self):
plugin_helpers.reset_selection(self.tree) plugin_helpers.reset_selection(self.tree)
plugin_helpers.reset_colors(self.orig_colors) plugin_helpers.reset_colors(self.orig_colors)
self.rule_text.setText('') self.rule_text.setText("")
def reload_features_tree(self): def reload_features_tree(self):
self.reset() self.reset()
@@ -119,7 +118,7 @@ class RuleGeneratorForm(idaapi.PluginForm):
extractor = capa.features.extractors.ida.IdaFeatureExtractor() extractor = capa.features.extractors.ida.IdaFeatureExtractor()
f = idaapi.get_func(idaapi.get_screen_ea()) f = idaapi.get_func(idaapi.get_screen_ea())
if not f: if not f:
logger.info('function does not exist at 0x%x', idaapi.get_screen_ea()) logger.info("function does not exist at 0x%x", idaapi.get_screen_ea())
return return
return self.extract_function_features(f) return self.extract_function_features(f)
@@ -137,7 +136,7 @@ class RuleGeneratorForm(idaapi.PluginForm):
def create_tree(self, features): def create_tree(self, features):
self.tree.setMinimumWidth(400) self.tree.setMinimumWidth(400)
# self.tree.setMinimumHeight(300) # self.tree.setMinimumHeight(300)
self.tree.setHeaderLabels(['Feature', 'Virtual Address', 'Disassembly']) self.tree.setHeaderLabels(["Feature", "Virtual Address", "Disassembly"])
# auto resize columns # auto resize columns
self.tree.header().setSectionResizeMode(QHeaderView.ResizeToContents) self.tree.header().setSectionResizeMode(QHeaderView.ResizeToContents)
self.tree.itemClicked.connect(self.on_item_clicked) self.tree.itemClicked.connect(self.on_item_clicked)
@@ -151,16 +150,22 @@ class RuleGeneratorForm(idaapi.PluginForm):
# level 1 # level 1
if feature not in self.parent_items: if feature not in self.parent_items:
self.parent_items[feature] = plugin_helpers.add_child_item(self.parent_items[type(feature)], [str(feature)]) self.parent_items[feature] = plugin_helpers.add_child_item(
self.parent_items[type(feature)], [str(feature)]
)
# level n > 1 # level n > 1
if len(vas) > 1: if len(vas) > 1:
for va in sorted(vas): for va in sorted(vas):
plugin_helpers.add_child_item(self.parent_items[feature], [str(feature), '0x%X' % va, plugin_helpers.get_disasm_line(va)], feature) plugin_helpers.add_child_item(
self.parent_items[feature],
[str(feature), "0x%X" % va, plugin_helpers.get_disasm_line(va)],
feature,
)
else: else:
va = vas.pop() va = vas.pop()
self.parent_items[feature].setText(0, str(feature)) self.parent_items[feature].setText(0, str(feature))
self.parent_items[feature].setText(1, '0x%X' % va) self.parent_items[feature].setText(1, "0x%X" % va)
self.parent_items[feature].setText(2, plugin_helpers.get_disasm_line(va)) self.parent_items[feature].setText(2, plugin_helpers.get_disasm_line(va))
self.parent_items[feature].setData(0, 0x100, feature) self.parent_items[feature].setData(0, 0x100, feature)
@@ -188,29 +193,31 @@ class RuleGeneratorForm(idaapi.PluginForm):
def get_rule_from_features(self, features): def get_rule_from_features(self, features):
rule_parts = [] rule_parts = []
counted = zip(Counter(features).keys(), # equals to list(set(words)) counted = zip(
Counter(features).values()) # counts the elements' frequency Counter(features).keys(), Counter(features).values() # equals to list(set(words))
) # counts the elements' frequency
# single features # single features
for k, v in filter(lambda t: t[1] == 1, counted): for k, v in filter(lambda t: t[1] == 1, counted):
# TODO args to hex if int # TODO args to hex if int
if k.name.lower() == 'bytes': if k.name.lower() == "bytes":
# Convert raw bytes to uppercase hex representation (e.g., '12 34 56') # Convert raw bytes to uppercase hex representation (e.g., '12 34 56')
upper_hex_bytes = binascii.hexlify(args_to_str(k.args)).upper() upper_hex_bytes = binascii.hexlify(args_to_str(k.args)).upper()
rule_value_str = '' rule_value_str = ""
for i in range(0, len(upper_hex_bytes), 2): for i in range(0, len(upper_hex_bytes), 2):
rule_value_str += upper_hex_bytes[i:i + 2] + ' ' rule_value_str += upper_hex_bytes[i : i + 2] + " "
r = ' - %s: %s' % (k.name.lower(), rule_value_str) r = " - %s: %s" % (k.name.lower(), rule_value_str)
else: else:
r = ' - %s: %s' % (k.name.lower(), args_to_str(k.args)) r = " - %s: %s" % (k.name.lower(), args_to_str(k.args))
rule_parts.append(r) rule_parts.append(r)
# counted features # counted features
for k, v in filter(lambda t: t[1] > 1, counted): for k, v in filter(lambda t: t[1] > 1, counted):
r = ' - count(%s): %d' % (str(k), v) r = " - count(%s): %d" % (str(k), v)
rule_parts.append(r) rule_parts.append(r)
rule_prefix = textwrap.dedent(''' rule_prefix = textwrap.dedent(
"""
rule: rule:
meta: meta:
name: name:
@@ -219,8 +226,10 @@ class RuleGeneratorForm(idaapi.PluginForm):
examples: examples:
- %s:0x%X - %s:0x%X
features: features:
''' % (AUTHOR_NAME, idc.retrieve_input_file_md5(), get_func_start(idc.here()))).strip() """
return '%s\n%s' % (rule_prefix, '\n'.join(sorted(rule_parts))) % (AUTHOR_NAME, idc.retrieve_input_file_md5(), get_func_start(idc.here()))
).strip()
return "%s\n%s" % (rule_prefix, "\n".join(sorted(rule_parts)))
# TODO merge into capa_idautils, get feature data # TODO merge into capa_idautils, get feature data
def get_selected_items(self): def get_selected_items(self):
@@ -242,26 +251,25 @@ class RuleGeneratorForm(idaapi.PluginForm):
self.init_ui() self.init_ui()
def Show(self): def Show(self):
return idaapi.PluginForm.Show(self, self.title, options=( return idaapi.PluginForm.Show(
idaapi.PluginForm.WOPN_RESTORE self, self.title, options=(idaapi.PluginForm.WOPN_RESTORE | idaapi.PluginForm.WOPN_PERSIST)
| idaapi.PluginForm.WOPN_PERSIST )
))
def OnClose(self, form): def OnClose(self, form):
self.reset() self.reset()
if self.hooks.unhook(): if self.hooks.unhook():
logger.info('UI notification hook uninstalled successfully') logger.info("UI notification hook uninstalled successfully")
logger.info('RuleGeneratorForm closed') logger.info("RuleGeneratorForm closed")
def args_to_str(args): def args_to_str(args):
a = [] a = []
for arg in args: for arg in args:
if (isinstance(arg, int) or isinstance(arg, long)) and arg > 10: if (isinstance(arg, int) or isinstance(arg, long)) and arg > 10:
a.append('0x%X' % arg) a.append("0x%X" % arg)
else: else:
a.append(str(arg)) a.append(str(arg))
return ','.join(a) return ",".join(a)
def main(): def main():
@@ -280,5 +288,5 @@ def main():
RULE_GEN_FORM.Show() RULE_GEN_FORM.Show()
if __name__ == '__main__': if __name__ == "__main__":
main() main()

View File

@@ -8,34 +8,34 @@ import idc
import idaapi import idaapi
CAPA_EXTENSION = '.capas' CAPA_EXTENSION = ".capas"
logger = logging.getLogger('capa_ida') logger = logging.getLogger("capa_ida")
def get_input_file(freeze=True): def get_input_file(freeze=True):
''' """
get input file path get input file path
freeze (bool): if True, get freeze file if it exists freeze (bool): if True, get freeze file if it exists
''' """
# try original file in same directory as idb/i64 without idb/i64 file extension # try original file in same directory as idb/i64 without idb/i64 file extension
input_file = idc.get_idb_path()[:-4] input_file = idc.get_idb_path()[:-4]
if freeze: if freeze:
# use frozen file if it exists # use frozen file if it exists
freeze_file_cand = '%s%s' % (input_file, CAPA_EXTENSION) freeze_file_cand = "%s%s" % (input_file, CAPA_EXTENSION)
if os.path.isfile(freeze_file_cand): if os.path.isfile(freeze_file_cand):
return freeze_file_cand return freeze_file_cand
if not os.path.isfile(input_file): if not os.path.isfile(input_file):
# TM naming # TM naming
input_file = '%s.mal_' % idc.get_idb_path()[:-4] input_file = "%s.mal_" % idc.get_idb_path()[:-4]
if not os.path.isfile(input_file): if not os.path.isfile(input_file):
input_file = idaapi.ask_file(0, '*.*', 'Please specify input file.') input_file = idaapi.ask_file(0, "*.*", "Please specify input file.")
if not input_file: if not input_file:
raise ValueError('could not find input file') raise ValueError("could not find input file")
return input_file return input_file

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
''' """
capa - detect capabilities in programs. capa - detect capabilities in programs.
''' """
import os import os
import os.path import os.path
import sys import sys
@@ -23,16 +23,16 @@ import capa.features.extractors
from capa.helpers import oint from capa.helpers import oint
SUPPORTED_FILE_MAGIC = set(['MZ']) SUPPORTED_FILE_MAGIC = set(["MZ"])
logger = logging.getLogger('capa') logger = logging.getLogger("capa")
def set_vivisect_log_level(level): def set_vivisect_log_level(level):
logging.getLogger('vivisect').setLevel(level) logging.getLogger("vivisect").setLevel(level)
logging.getLogger('vtrace').setLevel(level) logging.getLogger("vtrace").setLevel(level)
logging.getLogger('envi').setLevel(level) logging.getLogger("envi").setLevel(level)
def find_function_capabilities(ruleset, extractor, f): def find_function_capabilities(ruleset, extractor, f):
@@ -83,7 +83,7 @@ def find_file_capabilities(ruleset, extractor, function_features):
if feature not in file_features: if feature not in file_features:
file_features[feature] = set() file_features[feature] = set()
logger.info('analyzed file and extracted %d features', len(file_features)) logger.info("analyzed file and extracted %d features", len(file_features))
file_features.update(function_features) file_features.update(function_features)
@@ -95,7 +95,7 @@ def find_capabilities(ruleset, extractor, disable_progress=None):
all_function_matches = collections.defaultdict(list) all_function_matches = collections.defaultdict(list)
all_bb_matches = collections.defaultdict(list) all_bb_matches = collections.defaultdict(list)
for f in tqdm.tqdm(extractor.get_functions(), disable=disable_progress, unit=' functions'): for f in tqdm.tqdm(extractor.get_functions(), disable=disable_progress, unit=" functions"):
function_matches, bb_matches = find_function_capabilities(ruleset, extractor, f) function_matches, bb_matches = find_function_capabilities(ruleset, extractor, f)
for rule_name, res in function_matches.items(): for rule_name, res in function_matches.items():
all_function_matches[rule_name].extend(res) all_function_matches[rule_name].extend(res)
@@ -104,8 +104,10 @@ def find_capabilities(ruleset, extractor, disable_progress=None):
# mapping from matched rule feature to set of addresses at which it matched. # mapping from matched rule feature to set of addresses at which it matched.
# type: Dict[MatchedRule, Set[int]] # type: Dict[MatchedRule, Set[int]]
function_features = {capa.features.MatchedRule(rule_name): set(map(lambda p: p[0], results)) function_features = {
for rule_name, results in all_function_matches.items()} capa.features.MatchedRule(rule_name): set(map(lambda p: p[0], results))
for rule_name, results in all_function_matches.items()
}
all_file_matches = find_file_capabilities(ruleset, extractor, function_features) all_file_matches = find_file_capabilities(ruleset, extractor, function_features)
@@ -119,7 +121,7 @@ def find_capabilities(ruleset, extractor, disable_progress=None):
def has_rule_with_namespace(rules, capabilities, rule_cat): def has_rule_with_namespace(rules, capabilities, rule_cat):
for rule_name in capabilities.keys(): for rule_name in capabilities.keys():
if rules.rules[rule_name].meta.get('namespace', '').startswith(rule_cat): if rules.rules[rule_name].meta.get("namespace", "").startswith(rule_cat):
return True return True
return False return False
@@ -128,61 +130,61 @@ def has_file_limitation(rules, capabilities, is_standalone=True):
file_limitations = { file_limitations = {
# capa will likely detect installer specific functionality. # capa will likely detect installer specific functionality.
# this is probably not what the user wants. # this is probably not what the user wants.
'executable/installer': [ "executable/installer": [
' This sample appears to be an installer.', " This sample appears to be an installer.",
' ', " ",
' capa cannot handle installers well. This means the results may be misleading or incomplete.' " capa cannot handle installers well. This means the results may be misleading or incomplete."
' You should try to understand the install mechanism and analyze created files with capa.' " You should try to understand the install mechanism and analyze created files with capa.",
], ],
# capa won't detect much in .NET samples. # capa won't detect much in .NET samples.
# it might match some file-level things. # it might match some file-level things.
# for consistency, bail on things that we don't support. # for consistency, bail on things that we don't support.
'runtime/dotnet': [ "runtime/dotnet": [
' This sample appears to be a .NET module.', " This sample appears to be a .NET module.",
' ', " ",
' .NET is a cross-platform framework for running managed applications.', " .NET is a cross-platform framework for running managed applications.",
' capa cannot handle non-native files. This means that the results may be misleading or incomplete.', " capa cannot handle non-native files. This means that the results may be misleading or incomplete.",
' You may have to analyze the file manually, using a tool like the .NET decompiler dnSpy.' " You may have to analyze the file manually, using a tool like the .NET decompiler dnSpy.",
], ],
# capa will detect dozens of capabilities for AutoIt samples, # capa will detect dozens of capabilities for AutoIt samples,
# but these are due to the AutoIt runtime, not the payload script. # but these are due to the AutoIt runtime, not the payload script.
# so, don't confuse the user with FP matches - bail instead # so, don't confuse the user with FP matches - bail instead
'compiler/autoit': [ "compiler/autoit": [
' This sample appears to be compiled with AutoIt.', " This sample appears to be compiled with AutoIt.",
' ', " ",
' AutoIt is a freeware BASIC-like scripting language designed for automating the Windows GUI.', " AutoIt is a freeware BASIC-like scripting language designed for automating the Windows GUI.",
' capa cannot handle AutoIt scripts. This means that the results will be misleading or incomplete.', " capa cannot handle AutoIt scripts. This means that the results will be misleading or incomplete.",
' You may have to analyze the file manually, using a tool like the AutoIt decompiler MyAut2Exe.' " You may have to analyze the file manually, using a tool like the AutoIt decompiler MyAut2Exe.",
], ],
# capa won't detect much in packed samples # capa won't detect much in packed samples
'anti-analysis/packer/': [ "anti-analysis/packer/": [
' This sample appears to be packed.', " This sample appears to be packed.",
' ', " ",
' Packed samples have often been obfuscated to hide their logic.', " Packed samples have often been obfuscated to hide their logic.",
' capa cannot handle obfuscation well. This means the results may be misleading or incomplete.', " capa cannot handle obfuscation well. This means the results may be misleading or incomplete.",
' If possible, you should try to unpack this input file before analyzing it with capa.' " If possible, you should try to unpack this input file before analyzing it with capa.",
] ],
} }
for category, dialogue in file_limitations.items(): for category, dialogue in file_limitations.items():
if not has_rule_with_namespace(rules, capabilities, category): if not has_rule_with_namespace(rules, capabilities, category):
continue continue
logger.warning('-' * 80) logger.warning("-" * 80)
for line in dialogue: for line in dialogue:
logger.warning(line) logger.warning(line)
if is_standalone: if is_standalone:
logger.warning(' ') logger.warning(" ")
logger.warning(' Use -v or -vv if you really want to see the capabilities identified by capa.') logger.warning(" Use -v or -vv if you really want to see the capabilities identified by capa.")
logger.warning('-' * 80) logger.warning("-" * 80)
return True return True
return False return False
def is_supported_file_type(sample): def is_supported_file_type(sample):
''' """
Return if this is a supported file based on magic header values Return if this is a supported file based on magic header values
''' """
with open(sample, 'rb') as f: with open(sample, "rb") as f:
magic = f.read(2) magic = f.read(2)
if magic in SUPPORTED_FILE_MAGIC: if magic in SUPPORTED_FILE_MAGIC:
return True return True
@@ -190,36 +192,37 @@ def is_supported_file_type(sample):
return False return False
def get_shellcode_vw(sample, arch='auto'): def get_shellcode_vw(sample, arch="auto"):
''' """
Return shellcode workspace using explicit arch or via auto detect Return shellcode workspace using explicit arch or via auto detect
''' """
import viv_utils import viv_utils
with open(sample, 'rb') as f:
with open(sample, "rb") as f:
sample_bytes = f.read() sample_bytes = f.read()
if arch == 'auto': if arch == "auto":
# choose arch with most functions, idea by Jay G. # choose arch with most functions, idea by Jay G.
vw_cands = [] vw_cands = []
for arch in ['i386', 'amd64']: for arch in ["i386", "amd64"]:
vw_cands.append(viv_utils.getShellcodeWorkspace(sample_bytes, arch)) vw_cands.append(viv_utils.getShellcodeWorkspace(sample_bytes, arch))
if not vw_cands: if not vw_cands:
raise ValueError('could not generate vivisect workspace') raise ValueError("could not generate vivisect workspace")
vw = max(vw_cands, key=lambda vw: len(vw.getFunctions())) vw = max(vw_cands, key=lambda vw: len(vw.getFunctions()))
else: else:
vw = viv_utils.getShellcodeWorkspace(sample_bytes, arch) vw = viv_utils.getShellcodeWorkspace(sample_bytes, arch)
vw.setMeta('Format', 'blob') # TODO fix in viv_utils vw.setMeta("Format", "blob") # TODO fix in viv_utils
return vw return vw
def get_meta_str(vw): def get_meta_str(vw):
''' """
Return workspace meta information string Return workspace meta information string
''' """
meta = [] meta = []
for k in ['Format', 'Platform', 'Architecture']: for k in ["Format", "Platform", "Architecture"]:
if k in vw.metadata: if k in vw.metadata:
meta.append('%s: %s' % (k.lower(), vw.metadata[k])) meta.append("%s: %s" % (k.lower(), vw.metadata[k]))
return '%s, number of functions: %d' % (', '.join(meta), len(vw.getFunctions())) return "%s, number of functions: %d" % (", ".join(meta), len(vw.getFunctions()))
class UnsupportedFormatError(ValueError): class UnsupportedFormatError(ValueError):
@@ -228,23 +231,25 @@ class UnsupportedFormatError(ValueError):
def get_workspace(path, format): def get_workspace(path, format):
import viv_utils import viv_utils
logger.info('generating vivisect workspace for: %s', path)
if format == 'auto': logger.info("generating vivisect workspace for: %s", path)
if format == "auto":
if not is_supported_file_type(path): if not is_supported_file_type(path):
raise UnsupportedFormatError() raise UnsupportedFormatError()
vw = viv_utils.getWorkspace(path) vw = viv_utils.getWorkspace(path)
elif format == 'pe': elif format == "pe":
vw = viv_utils.getWorkspace(path) vw = viv_utils.getWorkspace(path)
elif format == 'sc32': elif format == "sc32":
vw = get_shellcode_vw(path, arch='i386') vw = get_shellcode_vw(path, arch="i386")
elif format == 'sc64': elif format == "sc64":
vw = get_shellcode_vw(path, arch='amd64') vw = get_shellcode_vw(path, arch="amd64")
logger.info('%s', get_meta_str(vw)) logger.info("%s", get_meta_str(vw))
return vw return vw
def get_extractor_py2(path, format): def get_extractor_py2(path, format):
import capa.features.extractors.viv import capa.features.extractors.viv
vw = get_workspace(path, format) vw = get_workspace(path, format)
return capa.features.extractors.viv.VivisectFeatureExtractor(vw, path) return capa.features.extractors.viv.VivisectFeatureExtractor(vw, path)
@@ -258,10 +263,10 @@ def get_extractor_py3(path, format):
def get_extractor(path, format): def get_extractor(path, format):
''' """
raises: raises:
UnsupportedFormatError: UnsupportedFormatError:
''' """
if sys.version_info >= (3, 0): if sys.version_info >= (3, 0):
return get_extractor_py3(path, format) return get_extractor_py3(path, format)
else: else:
@@ -269,7 +274,7 @@ def get_extractor(path, format):
def is_nursery_rule_path(path): def is_nursery_rule_path(path):
''' """
The nursery is a spot for rules that have not yet been fully polished. The nursery is a spot for rules that have not yet been fully polished.
For example, they may not have references to public example of a technique. For example, they may not have references to public example of a technique.
Yet, we still want to capture and report on their matches. Yet, we still want to capture and report on their matches.
@@ -277,23 +282,23 @@ def is_nursery_rule_path(path):
When nursery rules are loaded, their metadata section should be updated with: When nursery rules are loaded, their metadata section should be updated with:
`nursery=True`. `nursery=True`.
''' """
return 'nursery' in path return "nursery" in path
def get_rules(rule_path): def get_rules(rule_path):
if not os.path.exists(rule_path): if not os.path.exists(rule_path):
raise IOError('%s does not exist or cannot be accessed' % rule_path) raise IOError("%s does not exist or cannot be accessed" % rule_path)
rule_paths = [] rule_paths = []
if os.path.isfile(rule_path): if os.path.isfile(rule_path):
rule_paths.append(rule_path) rule_paths.append(rule_path)
elif os.path.isdir(rule_path): elif os.path.isdir(rule_path):
logger.debug('reading rules from directory %s', rule_path) logger.debug("reading rules from directory %s", rule_path)
for root, dirs, files in os.walk(rule_path): for root, dirs, files in os.walk(rule_path):
for file in files: for file in files:
if not file.endswith('.yml'): if not file.endswith(".yml"):
logger.warning('skipping non-.yml file: %s', file) logger.warning("skipping non-.yml file: %s", file)
continue continue
rule_path = os.path.join(root, file) rule_path = os.path.join(root, file)
@@ -301,18 +306,18 @@ def get_rules(rule_path):
rules = [] rules = []
for rule_path in rule_paths: for rule_path in rule_paths:
logger.debug('reading rule file: %s', rule_path) logger.debug("reading rule file: %s", rule_path)
try: try:
rule = capa.rules.Rule.from_yaml_file(rule_path) rule = capa.rules.Rule.from_yaml_file(rule_path)
except capa.rules.InvalidRule: except capa.rules.InvalidRule:
raise raise
else: else:
rule.meta['capa/path'] = rule_path rule.meta["capa/path"] = rule_path
if is_nursery_rule_path(rule_path): if is_nursery_rule_path(rule_path):
rule.meta['capa/nursery'] = True rule.meta["capa/nursery"] = True
rules.append(rule) rules.append(rule)
logger.debug('rule: %s scope: %s', rule.name, rule.scope) logger.debug("rule: %s scope: %s", rule.name, rule.scope)
return rules return rules
@@ -322,35 +327,37 @@ def main(argv=None):
argv = sys.argv[1:] argv = sys.argv[1:]
formats = [ formats = [
('auto', '(default) detect file type automatically'), ("auto", "(default) detect file type automatically"),
('pe', 'Windows PE file'), ("pe", "Windows PE file"),
('sc32', '32-bit shellcode'), ("sc32", "32-bit shellcode"),
('sc64', '64-bit shellcode'), ("sc64", "64-bit shellcode"),
('freeze', 'features previously frozen by capa'), ("freeze", "features previously frozen by capa"),
] ]
format_help = ', '.join(['%s: %s' % (f[0], f[1]) for f in formats]) format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats])
parser = argparse.ArgumentParser(description='detect capabilities in programs.') parser = argparse.ArgumentParser(description="detect capabilities in programs.")
parser.add_argument('sample', type=str, parser.add_argument("sample", type=str, help="Path to sample to analyze")
help='Path to sample to analyze') parser.add_argument(
parser.add_argument('-r', '--rules', type=str, default='(embedded rules)', "-r",
help='Path to rule file or directory, use embedded rules by default') "--rules",
parser.add_argument('-t', '--tag', type=str, type=str,
help='Filter on rule meta field values') default="(embedded rules)",
parser.add_argument('--version', action='store_true', help="Path to rule file or directory, use embedded rules by default",
help='Print the executable version and exit') )
parser.add_argument('-j', '--json', action='store_true', parser.add_argument("-t", "--tag", type=str, help="Filter on rule meta field values")
help='Emit JSON instead of text') parser.add_argument("--version", action="store_true", help="Print the executable version and exit")
parser.add_argument('-v', '--verbose', action='store_true', parser.add_argument("-j", "--json", action="store_true", help="Emit JSON instead of text")
help='Enable verbose result document (no effect with --json)') parser.add_argument(
parser.add_argument('-vv', '--vverbose', action='store_true', "-v", "--verbose", action="store_true", help="Enable verbose result document (no effect with --json)"
help='Enable very verbose result document (no effect with --json)') )
parser.add_argument('-d', '--debug', action='store_true', parser.add_argument(
help='Enable debugging output on STDERR') "-vv", "--vverbose", action="store_true", help="Enable very verbose result document (no effect with --json)"
parser.add_argument('-q', '--quiet', action='store_true', )
help='Disable all output but errors') parser.add_argument("-d", "--debug", action="store_true", help="Enable debugging output on STDERR")
parser.add_argument('-f', '--format', choices=[f[0] for f in formats], default='auto', parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
help='Select sample format, %s' % format_help) parser.add_argument(
"-f", "--format", choices=[f[0] for f in formats], default="auto", help="Select sample format, %s" % format_help
)
args = parser.parse_args(args=argv) args = parser.parse_args(args=argv)
if args.version: if args.version:
@@ -375,68 +382,70 @@ def main(argv=None):
# because cp65001 is utf-8, we just map that codepage to the utf-8 codec. # because cp65001 is utf-8, we just map that codepage to the utf-8 codec.
# see #380 and: https://stackoverflow.com/a/3259271/87207 # see #380 and: https://stackoverflow.com/a/3259271/87207
import codecs import codecs
codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
if args.rules == '(embedded rules)': codecs.register(lambda name: codecs.lookup("utf-8") if name == "cp65001" else None)
logger.info('-' * 80)
logger.info(' Using default embedded rules.')
logger.info(' To provide your own rules, use the form `capa.exe ./path/to/rules/ /path/to/mal.exe`.')
logger.info(' You can see the current default rule set here:')
logger.info(' https://github.com/fireeye/capa-rules')
logger.info('-' * 80)
if hasattr(sys, 'frozen') and hasattr(sys, '_MEIPASS'): if args.rules == "(embedded rules)":
logger.debug('detected running under PyInstaller') logger.info("-" * 80)
args.rules = os.path.join(sys._MEIPASS, 'rules') logger.info(" Using default embedded rules.")
logger.debug('default rule path (PyInstaller method): %s', args.rules) logger.info(" To provide your own rules, use the form `capa.exe ./path/to/rules/ /path/to/mal.exe`.")
logger.info(" You can see the current default rule set here:")
logger.info(" https://github.com/fireeye/capa-rules")
logger.info("-" * 80)
if hasattr(sys, "frozen") and hasattr(sys, "_MEIPASS"):
logger.debug("detected running under PyInstaller")
args.rules = os.path.join(sys._MEIPASS, "rules")
logger.debug("default rule path (PyInstaller method): %s", args.rules)
else: else:
logger.debug('detected running from source') logger.debug("detected running from source")
args.rules = os.path.join(os.path.dirname(__file__), '..', 'rules') args.rules = os.path.join(os.path.dirname(__file__), "..", "rules")
logger.debug('default rule path (source method): %s', args.rules) logger.debug("default rule path (source method): %s", args.rules)
else: else:
logger.info('using rules path: %s', args.rules) logger.info("using rules path: %s", args.rules)
try: try:
rules = get_rules(args.rules) rules = get_rules(args.rules)
rules = capa.rules.RuleSet(rules) rules = capa.rules.RuleSet(rules)
logger.info('successfully loaded %s rules', len(rules)) logger.info("successfully loaded %s rules", len(rules))
if args.tag: if args.tag:
rules = rules.filter_rules_by_meta(args.tag) rules = rules.filter_rules_by_meta(args.tag)
logger.info('selected %s rules', len(rules)) logger.info("selected %s rules", len(rules))
for i, r in enumerate(rules.rules, 1): for i, r in enumerate(rules.rules, 1):
# TODO don't display subscope rules? # TODO don't display subscope rules?
logger.debug(' %d. %s', i, r) logger.debug(" %d. %s", i, r)
except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e: except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e:
logger.error('%s', str(e)) logger.error("%s", str(e))
return -1 return -1
with open(args.sample, 'rb') as f: with open(args.sample, "rb") as f:
taste = f.read(8) taste = f.read(8)
if ((args.format == 'freeze') if (args.format == "freeze") or (args.format == "auto" and capa.features.freeze.is_freeze(taste)):
or (args.format == 'auto' and capa.features.freeze.is_freeze(taste))): with open(args.sample, "rb") as f:
with open(args.sample, 'rb') as f:
extractor = capa.features.freeze.load(f.read()) extractor = capa.features.freeze.load(f.read())
else: else:
try: try:
extractor = get_extractor(args.sample, args.format) extractor = get_extractor(args.sample, args.format)
except UnsupportedFormatError: except UnsupportedFormatError:
logger.error('-' * 80) logger.error("-" * 80)
logger.error(' Input file does not appear to be a PE file.') logger.error(" Input file does not appear to be a PE file.")
logger.error(' ') logger.error(" ")
logger.error(' capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64).') logger.error(
logger.error(' If you don\'t know the input file type, you can try using the `file` utility to guess it.') " capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64)."
logger.error('-' * 80) )
logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.")
logger.error("-" * 80)
return -1 return -1
except UnsupportedRuntimeError: except UnsupportedRuntimeError:
logger.error('-' * 80) logger.error("-" * 80)
logger.error(' Unsupported runtime or Python interpreter.') logger.error(" Unsupported runtime or Python interpreter.")
logger.error(' ') logger.error(" ")
logger.error(' capa supports running under Python 2.7 using Vivisect for binary analysis.') logger.error(" capa supports running under Python 2.7 using Vivisect for binary analysis.")
logger.error(' It can also run within IDA Pro, using either Python 2.7 or 3.5+.') logger.error(" It can also run within IDA Pro, using either Python 2.7 or 3.5+.")
logger.error(' ') logger.error(" ")
logger.error(' If you\'re seeing this message on the command line, please ensure you\'re running Python 2.7.') logger.error(" If you're seeing this message on the command line, please ensure you're running Python 2.7.")
logger.error('-' * 80) logger.error("-" * 80)
return -1 return -1
capabilities = find_capabilities(rules, extractor) capabilities = find_capabilities(rules, extractor)
@@ -462,7 +471,7 @@ def main(argv=None):
print(capa.render.render_default(rules, capabilities)) print(capa.render.render_default(rules, capabilities))
colorama.deinit() colorama.deinit()
logger.info('done.') logger.info("done.")
return 0 return 0
@@ -472,34 +481,37 @@ def ida_main():
logging.getLogger().setLevel(logging.INFO) logging.getLogger().setLevel(logging.INFO)
import capa.ida.helpers import capa.ida.helpers
if not capa.ida.helpers.is_supported_file_type(): if not capa.ida.helpers.is_supported_file_type():
return -1 return -1
logger.info('-' * 80) logger.info("-" * 80)
logger.info(' Using default embedded rules.') logger.info(" Using default embedded rules.")
logger.info(' ') logger.info(" ")
logger.info(' You can see the current default rule set here:') logger.info(" You can see the current default rule set here:")
logger.info(' https://github.com/fireeye/capa-rules') logger.info(" https://github.com/fireeye/capa-rules")
logger.info('-' * 80) logger.info("-" * 80)
if hasattr(sys, 'frozen') and hasattr(sys, '_MEIPASS'): if hasattr(sys, "frozen") and hasattr(sys, "_MEIPASS"):
logger.debug('detected running under PyInstaller') logger.debug("detected running under PyInstaller")
rules_path = os.path.join(sys._MEIPASS, 'rules') rules_path = os.path.join(sys._MEIPASS, "rules")
logger.debug('default rule path (PyInstaller method): %s', rules_path) logger.debug("default rule path (PyInstaller method): %s", rules_path)
else: else:
logger.debug('detected running from source') logger.debug("detected running from source")
rules_path = os.path.join(os.path.dirname(__file__), '..', 'rules') rules_path = os.path.join(os.path.dirname(__file__), "..", "rules")
logger.debug('default rule path (source method): %s', rules_path) logger.debug("default rule path (source method): %s", rules_path)
rules = get_rules(rules_path) rules = get_rules(rules_path)
import capa.rules import capa.rules
rules = capa.rules.RuleSet(rules) rules = capa.rules.RuleSet(rules)
import capa.features.extractors.ida import capa.features.extractors.ida
capabilities = find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor()) capabilities = find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor())
if has_file_limitation(rules, capabilities, is_standalone=False): if has_file_limitation(rules, capabilities, is_standalone=False):
capa.ida.helpers.inform_user_ida_ui('capa encountered warnings during analysis') capa.ida.helpers.inform_user_ida_ui("capa encountered warnings during analysis")
render_capabilities_default(rules, capabilities) render_capabilities_default(rules, capabilities)
@@ -513,7 +525,7 @@ def is_runtime_ida():
return True return True
if __name__ == '__main__': if __name__ == "__main__":
if is_runtime_ida(): if is_runtime_ida():
ida_main() ida_main()
else: else:

View File

@@ -18,43 +18,41 @@ def convert_statement_to_result_document(statement):
""" """
if isinstance(statement, capa.engine.And): if isinstance(statement, capa.engine.And):
return { return {
'type': 'and', "type": "and",
} }
elif isinstance(statement, capa.engine.Or): elif isinstance(statement, capa.engine.Or):
return { return {
'type': 'or', "type": "or",
} }
elif isinstance(statement, capa.engine.Not): elif isinstance(statement, capa.engine.Not):
return { return {
'type': 'not', "type": "not",
} }
elif isinstance(statement, capa.engine.Some) and statement.count == 0: elif isinstance(statement, capa.engine.Some) and statement.count == 0:
return { return {"type": "optional"}
'type': 'optional'
}
elif isinstance(statement, capa.engine.Some) and statement.count > 0: elif isinstance(statement, capa.engine.Some) and statement.count > 0:
return { return {
'type': 'some', "type": "some",
'count': statement.count, "count": statement.count,
} }
elif isinstance(statement, capa.engine.Range): elif isinstance(statement, capa.engine.Range):
return { return {
'type': 'range', "type": "range",
'min': statement.min, "min": statement.min,
'max': statement.max, "max": statement.max,
'child': convert_feature_to_result_document(statement.child), "child": convert_feature_to_result_document(statement.child),
} }
elif isinstance(statement, capa.engine.Regex): elif isinstance(statement, capa.engine.Regex):
return { return {
'type': 'regex', "type": "regex",
'pattern': statement.pattern, "pattern": statement.pattern,
# the string that was matched # the string that was matched
'match': statement.match, "match": statement.match,
} }
elif isinstance(statement, capa.engine.Subscope): elif isinstance(statement, capa.engine.Subscope):
return { return {
'type': 'subscope', "type": "subscope",
'subscope': statement.scope, "subscope": statement.scope,
} }
else: else:
raise RuntimeError("unexpected match statement type: " + str(statement)) raise RuntimeError("unexpected match statement type: " + str(statement))
@@ -89,8 +87,8 @@ def convert_feature_to_result_document(feature):
# make the terms pretty # make the terms pretty
name = name.lower() name = name.lower()
if name == 'matchedrule': if name == "matchedrule":
name = 'match' name = "match"
# in the common case, there's a single argument # in the common case, there's a single argument
# so use it directly. # so use it directly.
@@ -99,7 +97,7 @@ def convert_feature_to_result_document(feature):
value = value[0] value = value[0]
return { return {
'type': name, "type": name,
name: value, name: value,
} }
@@ -119,13 +117,13 @@ def convert_node_to_result_document(node):
if isinstance(node, capa.engine.Statement): if isinstance(node, capa.engine.Statement):
return { return {
'type': 'statement', "type": "statement",
'statement': convert_statement_to_result_document(node), "statement": convert_statement_to_result_document(node),
} }
elif isinstance(node, capa.features.Feature): elif isinstance(node, capa.features.Feature):
return { return {
'type': 'feature', "type": "feature",
'feature': convert_feature_to_result_document(node), "feature": convert_feature_to_result_document(node),
} }
else: else:
raise RuntimeError("unexpected match node type") raise RuntimeError("unexpected match node type")
@@ -137,19 +135,16 @@ def convert_match_to_result_document(rules, capabilities, result):
this will become part of the "result document" format that can be emitted to JSON. this will become part of the "result document" format that can be emitted to JSON.
""" """
doc = { doc = {
'success': bool(result.success), "success": bool(result.success),
'node': convert_node_to_result_document(result.statement), "node": convert_node_to_result_document(result.statement),
'children': [ "children": [convert_match_to_result_document(rules, capabilities, child) for child in result.children],
convert_match_to_result_document(rules, capabilities, child)
for child in result.children
],
} }
# logic expression, like `and`, don't have locations - their children do. # logic expression, like `and`, don't have locations - their children do.
# so only add `locations` to feature nodes. # so only add `locations` to feature nodes.
if isinstance(result.statement, capa.features.Feature): if isinstance(result.statement, capa.features.Feature):
if bool(result.success): if bool(result.success):
doc['locations'] = result.locations doc["locations"] = result.locations
# if we have a `match` statement, then we're referencing another rule. # if we have a `match` statement, then we're referencing another rule.
# this could an external rule (written by a human), or # this could an external rule (written by a human), or
@@ -159,31 +154,30 @@ def convert_match_to_result_document(rules, capabilities, result):
# so, we need to lookup the other rule results # so, we need to lookup the other rule results
# and then filter those down to the address used here. # and then filter those down to the address used here.
# finally, splice that logic into this tree. # finally, splice that logic into this tree.
if (doc['node']['type'] == 'feature' if (
and doc['node']['feature']['type'] == 'match' doc["node"]["type"] == "feature"
# only add subtree on success, and doc["node"]["feature"]["type"] == "match"
# because there won't be results for the other rule on failure. # only add subtree on success,
and doc['success']): # because there won't be results for the other rule on failure.
and doc["success"]
):
rule_name = doc['node']['feature']['match'] rule_name = doc["node"]["feature"]["match"]
rule = rules[rule_name] rule = rules[rule_name]
rule_matches = {address: result for (address, result) in capabilities[rule_name]} rule_matches = {address: result for (address, result) in capabilities[rule_name]}
if rule.meta.get('capa/subscope-rule'): if rule.meta.get("capa/subscope-rule"):
# for a subscope rule, fixup the node to be a scope node, rather than a match feature node. # for a subscope rule, fixup the node to be a scope node, rather than a match feature node.
# #
# e.g. `contain loop/30c4c78e29bf4d54894fc74f664c62e8` -> `basic block` # e.g. `contain loop/30c4c78e29bf4d54894fc74f664c62e8` -> `basic block`
scope = rule.meta['scope'] scope = rule.meta["scope"]
doc['node'] = { doc["node"] = {
'type': 'statement', "type": "statement",
'statement': { "statement": {"type": "subscope", "subscope": scope,},
'type': 'subscope',
'subscope': scope,
},
} }
for location in doc['locations']: for location in doc["locations"]:
doc['children'].append(convert_match_to_result_document(rules, capabilities, rule_matches[location])) doc["children"].append(convert_match_to_result_document(rules, capabilities, rule_matches[location]))
return doc return doc
@@ -220,15 +214,14 @@ def convert_capabilities_to_result_document(rules, capabilities):
for rule_name, matches in capabilities.items(): for rule_name, matches in capabilities.items():
rule = rules[rule_name] rule = rules[rule_name]
if rule.meta.get('capa/subscope-rule'): if rule.meta.get("capa/subscope-rule"):
continue continue
doc[rule_name] = { doc[rule_name] = {
'meta': dict(rule.meta), "meta": dict(rule.meta),
'source': rule.definition, "source": rule.definition,
'matches': { "matches": {
addr: convert_match_to_result_document(rules, capabilities, match) addr: convert_match_to_result_document(rules, capabilities, match) for (addr, match) in matches
for (addr, match) in matches
}, },
} }
@@ -241,6 +234,7 @@ def render_vverbose(rules, capabilities):
# and capa.render.vverbose import capa.render (implicitly, as a submodule) # and capa.render.vverbose import capa.render (implicitly, as a submodule)
# so, defer the import until routine is called, breaking the import loop. # so, defer the import until routine is called, breaking the import loop.
import capa.render.vverbose import capa.render.vverbose
doc = convert_capabilities_to_result_document(rules, capabilities) doc = convert_capabilities_to_result_document(rules, capabilities)
return capa.render.vverbose.render_vverbose(doc) return capa.render.vverbose.render_vverbose(doc)
@@ -248,6 +242,7 @@ def render_vverbose(rules, capabilities):
def render_verbose(rules, capabilities): def render_verbose(rules, capabilities):
# break import loop # break import loop
import capa.render.verbose import capa.render.verbose
doc = convert_capabilities_to_result_document(rules, capabilities) doc = convert_capabilities_to_result_document(rules, capabilities)
return capa.render.verbose.render_verbose(doc) return capa.render.verbose.render_verbose(doc)
@@ -256,6 +251,7 @@ def render_default(rules, capabilities):
# break import loop # break import loop
import capa.render.verbose import capa.render.verbose
import capa.render.default import capa.render.default
doc = convert_capabilities_to_result_document(rules, capabilities) doc = convert_capabilities_to_result_document(rules, capabilities)
return capa.render.default.render_default(doc) return capa.render.default.render_default(doc)
@@ -273,7 +269,5 @@ class CapaJsonObjectEncoder(json.JSONEncoder):
def render_json(rules, capabilities): def render_json(rules, capabilities):
return json.dumps( return json.dumps(
convert_capabilities_to_result_document(rules, capabilities), convert_capabilities_to_result_document(rules, capabilities), cls=CapaJsonObjectEncoder, sort_keys=True,
cls=CapaJsonObjectEncoder,
sort_keys=True,
) )

View File

@@ -9,7 +9,7 @@ import capa.render.utils as rutils
def width(s, character_count): def width(s, character_count):
"""pad the given string to at least `character_count`""" """pad the given string to at least `character_count`"""
if len(s) < character_count: if len(s) < character_count:
return s + ' ' * (character_count - len(s)) return s + " " * (character_count - len(s))
else: else:
return s return s
@@ -28,15 +28,15 @@ def render_capabilities(doc, ostream):
""" """
rows = [] rows = []
for rule in rutils.capability_rules(doc): for rule in rutils.capability_rules(doc):
count = len(rule['matches']) count = len(rule["matches"])
if count == 1: if count == 1:
capability = rutils.bold(rule['meta']['name']) capability = rutils.bold(rule["meta"]["name"])
else: else:
capability = '%s (%d matches)' % (rutils.bold(rule['meta']['name']), count) capability = "%s (%d matches)" % (rutils.bold(rule["meta"]["name"]), count)
rows.append((capability, rule['meta']['namespace'])) rows.append((capability, rule["meta"]["namespace"]))
ostream.write(tabulate.tabulate(rows, headers=[width('CAPABILITY', 40), width('NAMESPACE', 40)], tablefmt='psql')) ostream.write(tabulate.tabulate(rows, headers=[width("CAPABILITY", 40), width("NAMESPACE", 40)], tablefmt="psql"))
ostream.write('\n') ostream.write("\n")
def render_attack(doc, ostream): def render_attack(doc, ostream):
@@ -57,17 +57,17 @@ def render_attack(doc, ostream):
""" """
tactics = collections.defaultdict(set) tactics = collections.defaultdict(set)
for rule in rutils.capability_rules(doc): for rule in rutils.capability_rules(doc):
if not rule['meta'].get('att&ck'): if not rule["meta"].get("att&ck"):
continue continue
for attack in rule['meta']['att&ck']: for attack in rule["meta"]["att&ck"]:
tactic, _, rest = attack.partition('::') tactic, _, rest = attack.partition("::")
if '::' in rest: if "::" in rest:
technique, _, rest = rest.partition('::') technique, _, rest = rest.partition("::")
subtechnique, _, id = rest.rpartition(' ') subtechnique, _, id = rest.rpartition(" ")
tactics[tactic].add((technique, subtechnique, id)) tactics[tactic].add((technique, subtechnique, id))
else: else:
technique, _, id = rest.rpartition(' ') technique, _, id = rest.rpartition(" ")
tactics[tactic].add((technique, id)) tactics[tactic].add((technique, id))
rows = [] rows = []
@@ -76,15 +76,17 @@ def render_attack(doc, ostream):
for spec in sorted(techniques): for spec in sorted(techniques):
if len(spec) == 2: if len(spec) == 2:
technique, id = spec technique, id = spec
inner_rows.append('%s %s' % (rutils.bold(technique), id)) inner_rows.append("%s %s" % (rutils.bold(technique), id))
elif len(spec) == 3: elif len(spec) == 3:
technique, subtechnique, id = spec technique, subtechnique, id = spec
inner_rows.append('%s::%s %s' % (rutils.bold(technique), subtechnique, id)) inner_rows.append("%s::%s %s" % (rutils.bold(technique), subtechnique, id))
else: else:
raise RuntimeError('unexpected ATT&CK spec format') raise RuntimeError("unexpected ATT&CK spec format")
rows.append((rutils.bold(tactic.upper()), '\n'.join(inner_rows), )) rows.append((rutils.bold(tactic.upper()), "\n".join(inner_rows),))
ostream.write(tabulate.tabulate(rows, headers=[width('ATT&CK Tactic', 20), width('ATT&CK Technique', 60)], tablefmt='psql')) ostream.write(
ostream.write('\n') tabulate.tabulate(rows, headers=[width("ATT&CK Tactic", 20), width("ATT&CK Technique", 60)], tablefmt="psql")
)
ostream.write("\n")
def render_default(doc): def render_default(doc):

View File

@@ -4,38 +4,40 @@ import termcolor
def bold(s): def bold(s):
"""draw attention to the given string""" """draw attention to the given string"""
return termcolor.colored(s, 'blue') return termcolor.colored(s, "blue")
def bold2(s): def bold2(s):
"""draw attention to the given string, within a `bold` section""" """draw attention to the given string, within a `bold` section"""
return termcolor.colored(s, 'green') return termcolor.colored(s, "green")
def hex(n): def hex(n):
"""render the given number using upper case hex, like: 0x123ABC""" """render the given number using upper case hex, like: 0x123ABC"""
return '0x%X' % n return "0x%X" % n
def hex_string(h): def hex_string(h):
""" render hex string e.g. "0a40b1" as "0A 40 B1" """ """ render hex string e.g. "0a40b1" as "0A 40 B1" """
return ' '.join(h[i:i + 2] for i in range(0, len(h), 2)).upper() return " ".join(h[i : i + 2] for i in range(0, len(h), 2)).upper()
def capability_rules(doc): def capability_rules(doc):
"""enumerate the rules in (namespace, name) order that are 'capability' rules (not lib/subscope/disposition/etc).""" """enumerate the rules in (namespace, name) order that are 'capability' rules (not lib/subscope/disposition/etc)."""
for (_, _, rule) in sorted(map(lambda rule: (rule['meta'].get('namespace', ''), rule['meta']['name'], rule), doc.values())): for (_, _, rule) in sorted(
if rule['meta'].get('lib'): map(lambda rule: (rule["meta"].get("namespace", ""), rule["meta"]["name"], rule), doc.values())
):
if rule["meta"].get("lib"):
continue continue
if rule['meta'].get('capa/subscope'): if rule["meta"].get("capa/subscope"):
continue continue
if rule['meta'].get('maec/analysis-conclusion'): if rule["meta"].get("maec/analysis-conclusion"):
continue continue
if rule['meta'].get('maec/analysis-conclusion-ov'): if rule["meta"].get("maec/analysis-conclusion-ov"):
continue continue
if rule['meta'].get('maec/malware-category'): if rule["meta"].get("maec/malware-category"):
continue continue
if rule['meta'].get('maec/malware-category-ov'): if rule["meta"].get("maec/malware-category-ov"):
continue continue
yield rule yield rule
@@ -44,4 +46,4 @@ def capability_rules(doc):
class StringIO(six.StringIO): class StringIO(six.StringIO):
def writeln(self, s): def writeln(self, s):
self.write(s) self.write(s)
self.write('\n') self.write("\n")

View File

@@ -24,29 +24,29 @@ def render_verbose(doc):
ostream = rutils.StringIO() ostream = rutils.StringIO()
for rule in rutils.capability_rules(doc): for rule in rutils.capability_rules(doc):
count = len(rule['matches']) count = len(rule["matches"])
if count == 1: if count == 1:
capability = rutils.bold(rule['meta']['name']) capability = rutils.bold(rule["meta"]["name"])
else: else:
capability = '%s (%d matches)' % (rutils.bold(rule['meta']['name']), count) capability = "%s (%d matches)" % (rutils.bold(rule["meta"]["name"]), count)
ostream.writeln(capability) ostream.writeln(capability)
rows = [] rows = []
for key in ('namespace', 'description', 'scope'): for key in ("namespace", "description", "scope"):
if key == 'name' or key not in rule['meta']: if key == "name" or key not in rule["meta"]:
continue continue
v = rule['meta'][key] v = rule["meta"][key]
if isinstance(v, list) and len(v) == 1: if isinstance(v, list) and len(v) == 1:
v = v[0] v = v[0]
rows.append((key, v)) rows.append((key, v))
if rule['meta']['scope'] != capa.rules.FILE_SCOPE: if rule["meta"]["scope"] != capa.rules.FILE_SCOPE:
locations = doc[rule['meta']['name']]['matches'].keys() locations = doc[rule["meta"]["name"]]["matches"].keys()
rows.append(('matches', '\n'.join(map(rutils.hex, locations)))) rows.append(("matches", "\n".join(map(rutils.hex, locations))))
ostream.writeln(tabulate.tabulate(rows, tablefmt='plain')) ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
ostream.write('\n') ostream.write("\n")
return ostream.getvalue() return ostream.getvalue()

View File

@@ -5,145 +5,147 @@ import capa.render.utils as rutils
def render_statement(ostream, statement, indent=0): def render_statement(ostream, statement, indent=0):
ostream.write(' ' * indent) ostream.write(" " * indent)
if statement['type'] in ('and', 'or', 'optional'): if statement["type"] in ("and", "or", "optional"):
ostream.write(statement['type']) ostream.write(statement["type"])
ostream.writeln(':') ostream.writeln(":")
elif statement['type'] == 'not': elif statement["type"] == "not":
# this statement is handled specially in `render_match` using the MODE_SUCCESS/MODE_FAILURE flags. # this statement is handled specially in `render_match` using the MODE_SUCCESS/MODE_FAILURE flags.
ostream.writeln('not:') ostream.writeln("not:")
elif statement['type'] == 'some': elif statement["type"] == "some":
ostream.write(statement['count'] + ' or more') ostream.write(statement["count"] + " or more")
ostream.writeln(':') ostream.writeln(":")
elif statement['type'] == 'range': elif statement["type"] == "range":
# `range` is a weird node, its almost a hybrid of statement+feature. # `range` is a weird node, its almost a hybrid of statement+feature.
# it is a specific feature repeated multiple times. # it is a specific feature repeated multiple times.
# there's no additional logic in the feature part, just the existence of a feature. # there's no additional logic in the feature part, just the existence of a feature.
# so, we have to inline some of the feature rendering here. # so, we have to inline some of the feature rendering here.
child = statement['child'] child = statement["child"]
if child['type'] in ('string', 'api', 'mnemonic', 'basic block', 'export', 'import', 'section', 'match'): if child["type"] in ("string", "api", "mnemonic", "basic block", "export", "import", "section", "match"):
feature = '%s(%s)' % (child['type'], rutils.bold2(child[child['type']])) feature = "%s(%s)" % (child["type"], rutils.bold2(child[child["type"]]))
elif child['type'] in ('number', 'offset'): elif child["type"] in ("number", "offset"):
feature = '%s(%s)' % (child['type'], rutils.bold2(rutils.hex(child[child['type']]))) feature = "%s(%s)" % (child["type"], rutils.bold2(rutils.hex(child[child["type"]])))
elif child['type'] == 'bytes': elif child["type"] == "bytes":
feature = '%s(%s)' % (child['type'], rutils.bold2(rutils.hex_string(child[child['type']]))) feature = "%s(%s)" % (child["type"], rutils.bold2(rutils.hex_string(child[child["type"]])))
elif child['type'] == 'characteristic': elif child["type"] == "characteristic":
feature = 'characteristic(%s)' % (rutils.bold2(child['characteristic'][0])) feature = "characteristic(%s)" % (rutils.bold2(child["characteristic"][0]))
else: else:
raise RuntimeError('unexpected feature type: ' + str(child)) raise RuntimeError("unexpected feature type: " + str(child))
ostream.write('count(%s): ' % feature) ostream.write("count(%s): " % feature)
if statement['max'] == statement['min']: if statement["max"] == statement["min"]:
ostream.writeln('%d' % (statement['min'])) ostream.writeln("%d" % (statement["min"]))
elif statement['min'] == 0: elif statement["min"] == 0:
ostream.writeln('%d or fewer' % (statement['max'])) ostream.writeln("%d or fewer" % (statement["max"]))
elif statement['max'] == (1 << 64 - 1): elif statement["max"] == (1 << 64 - 1):
ostream.writeln('%d or more' % (statement['min'])) ostream.writeln("%d or more" % (statement["min"]))
else: else:
ostream.writeln('between %d and %d' % (statement['min'], statement['max'])) ostream.writeln("between %d and %d" % (statement["min"], statement["max"]))
elif statement['type'] == 'subscope': elif statement["type"] == "subscope":
ostream.write(statement['subscope']) ostream.write(statement["subscope"])
ostream.writeln(':') ostream.writeln(":")
elif statement['type'] == 'regex': elif statement["type"] == "regex":
# regex is a `Statement` not a `Feature` # regex is a `Statement` not a `Feature`
# this is because it doesn't get extracted, but applies to all strings in scope. # this is because it doesn't get extracted, but applies to all strings in scope.
# so we have to handle it here # so we have to handle it here
ostream.writeln('string: %s' % (statement['match'])) ostream.writeln("string: %s" % (statement["match"]))
else: else:
raise RuntimeError("unexpected match statement type: " + str(statement)) raise RuntimeError("unexpected match statement type: " + str(statement))
def render_feature(ostream, match, feature, indent=0): def render_feature(ostream, match, feature, indent=0):
ostream.write(' ' * indent) ostream.write(" " * indent)
if feature['type'] in ('string', 'api', 'mnemonic', 'basic block', 'export', 'import', 'section', 'match'): if feature["type"] in ("string", "api", "mnemonic", "basic block", "export", "import", "section", "match"):
ostream.write(feature['type']) ostream.write(feature["type"])
ostream.write(': ') ostream.write(": ")
ostream.write(rutils.bold2(feature[feature['type']])) ostream.write(rutils.bold2(feature[feature["type"]]))
elif feature['type'] in ('number', 'offset'): elif feature["type"] in ("number", "offset"):
ostream.write(feature['type']) ostream.write(feature["type"])
ostream.write(': ') ostream.write(": ")
ostream.write(rutils.bold2(rutils.hex(feature[feature['type']]))) ostream.write(rutils.bold2(rutils.hex(feature[feature["type"]])))
elif feature['type'] == 'bytes': elif feature["type"] == "bytes":
ostream.write('bytes: ') ostream.write("bytes: ")
# bytes is the uppercase, hex-encoded string. # bytes is the uppercase, hex-encoded string.
# it should always be an even number of characters (its hex). # it should always be an even number of characters (its hex).
ostream.write(rutils.bold2(rutils.hex_string(feature[feature['type']]))) ostream.write(rutils.bold2(rutils.hex_string(feature[feature["type"]])))
elif feature['type'] == 'characteristic': elif feature["type"] == "characteristic":
ostream.write('characteristic(%s)' % (rutils.bold2(feature['characteristic'][0]))) ostream.write("characteristic(%s)" % (rutils.bold2(feature["characteristic"][0])))
# note that regex is found in `render_statement` # note that regex is found in `render_statement`
else: else:
raise RuntimeError('unexpected feature type: ' + str(feature)) raise RuntimeError("unexpected feature type: " + str(feature))
# its possible to have an empty locations array here, # its possible to have an empty locations array here,
# such as when we're in MODE_FAILURE and showing the logic # such as when we're in MODE_FAILURE and showing the logic
# under a `not` statement (which will have no matched locations). # under a `not` statement (which will have no matched locations).
locations = list(sorted(match.get('locations', []))) locations = list(sorted(match.get("locations", [])))
if len(locations) == 1: if len(locations) == 1:
ostream.write(' @ ') ostream.write(" @ ")
ostream.write(rutils.hex(locations[0])) ostream.write(rutils.hex(locations[0]))
elif len(locations) > 1: elif len(locations) > 1:
ostream.write(' @ ') ostream.write(" @ ")
if len(locations) > 4: if len(locations) > 4:
# don't display too many locations, because it becomes very noisy. # don't display too many locations, because it becomes very noisy.
# probably only the first handful of locations will be useful for inspection. # probably only the first handful of locations will be useful for inspection.
ostream.write(', '.join(map(rutils.hex, locations[0:4]))) ostream.write(", ".join(map(rutils.hex, locations[0:4])))
ostream.write(', and %d more...' % (len(locations) - 4)) ostream.write(", and %d more..." % (len(locations) - 4))
else: else:
ostream.write(', '.join(map(rutils.hex, locations))) ostream.write(", ".join(map(rutils.hex, locations)))
ostream.write('\n') ostream.write("\n")
def render_node(ostream, match, node, indent=0): def render_node(ostream, match, node, indent=0):
if node['type'] == 'statement': if node["type"] == "statement":
render_statement(ostream, node['statement'], indent=indent) render_statement(ostream, node["statement"], indent=indent)
elif node['type'] == 'feature': elif node["type"] == "feature":
render_feature(ostream, match, node['feature'], indent=indent) render_feature(ostream, match, node["feature"], indent=indent)
else: else:
raise RuntimeError('unexpected node type: ' + str(node)) raise RuntimeError("unexpected node type: " + str(node))
# display nodes that successfully evaluated against the sample. # display nodes that successfully evaluated against the sample.
MODE_SUCCESS = 'success' MODE_SUCCESS = "success"
# display nodes that did not evaluate to True against the sample. # display nodes that did not evaluate to True against the sample.
# this is useful when rendering the logic tree under a `not` node. # this is useful when rendering the logic tree under a `not` node.
MODE_FAILURE = 'failure' MODE_FAILURE = "failure"
def render_match(ostream, match, indent=0, mode=MODE_SUCCESS): def render_match(ostream, match, indent=0, mode=MODE_SUCCESS):
child_mode = mode child_mode = mode
if mode == MODE_SUCCESS: if mode == MODE_SUCCESS:
# display only nodes that evaluated successfully. # display only nodes that evaluated successfully.
if not match['success']: if not match["success"]:
return return
# optional statement with no successful children is empty # optional statement with no successful children is empty
if (match['node'].get('statement', {}).get('type') == 'optional' if match["node"].get("statement", {}).get("type") == "optional" and not any(
and not any(map(lambda m: m['success'], match['children']))): map(lambda m: m["success"], match["children"])
):
return return
# not statement, so invert the child mode to show failed evaluations # not statement, so invert the child mode to show failed evaluations
if match['node'].get('statement', {}).get('type') == 'not': if match["node"].get("statement", {}).get("type") == "not":
child_mode = MODE_FAILURE child_mode = MODE_FAILURE
elif mode == MODE_FAILURE: elif mode == MODE_FAILURE:
# display only nodes that did not evaluate to True # display only nodes that did not evaluate to True
if match['success']: if match["success"]:
return return
# optional statement with successful children is not relevant # optional statement with successful children is not relevant
if (match['node'].get('statement', {}).get('type') == 'optional' if match["node"].get("statement", {}).get("type") == "optional" and any(
and any(map(lambda m: m['success'], match['children']))): map(lambda m: m["success"], match["children"])
):
return return
# not statement, so invert the child mode to show successful evaluations # not statement, so invert the child mode to show successful evaluations
if match['node'].get('statement', {}).get('type') == 'not': if match["node"].get("statement", {}).get("type") == "not":
child_mode = MODE_SUCCESS child_mode = MODE_SUCCESS
else: else:
raise RuntimeError('unexpected mode: ' + mode) raise RuntimeError("unexpected mode: " + mode)
render_node(ostream, match, match['node'], indent=indent) render_node(ostream, match, match["node"], indent=indent)
for child in match['children']: for child in match["children"]:
render_match(ostream, child, indent=indent + 1, mode=child_mode) render_match(ostream, child, indent=indent + 1, mode=child_mode)
@@ -151,44 +153,44 @@ def render_vverbose(doc):
ostream = rutils.StringIO() ostream = rutils.StringIO()
for rule in rutils.capability_rules(doc): for rule in rutils.capability_rules(doc):
count = len(rule['matches']) count = len(rule["matches"])
if count == 1: if count == 1:
capability = rutils.bold(rule['meta']['name']) capability = rutils.bold(rule["meta"]["name"])
else: else:
capability = '%s (%d matches)' % (rutils.bold(rule['meta']['name']), count) capability = "%s (%d matches)" % (rutils.bold(rule["meta"]["name"]), count)
ostream.writeln(capability) ostream.writeln(capability)
rows = [] rows = []
for key in capa.rules.META_KEYS: for key in capa.rules.META_KEYS:
if key == 'name' or key not in rule['meta']: if key == "name" or key not in rule["meta"]:
continue continue
v = rule['meta'][key] v = rule["meta"][key]
if isinstance(v, list) and len(v) == 1: if isinstance(v, list) and len(v) == 1:
v = v[0] v = v[0]
elif isinstance(v, list) and len(v) > 1: elif isinstance(v, list) and len(v) > 1:
v = ', '.join(v) v = ", ".join(v)
rows.append((key, v)) rows.append((key, v))
ostream.writeln(tabulate.tabulate(rows, tablefmt='plain')) ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
if rule['meta']['scope'] == capa.rules.FILE_SCOPE: if rule["meta"]["scope"] == capa.rules.FILE_SCOPE:
matches = list(doc[rule['meta']['name']]['matches'].values()) matches = list(doc[rule["meta"]["name"]]["matches"].values())
if len(matches) != 1: if len(matches) != 1:
# i think there should only ever be one match per file-scope rule, # i think there should only ever be one match per file-scope rule,
# because we do the file-scope evaluation a single time. # because we do the file-scope evaluation a single time.
# but i'm not 100% sure if this is/will always be true. # but i'm not 100% sure if this is/will always be true.
# so, lets be explicit about our assumptions and raise an exception if they fail. # so, lets be explicit about our assumptions and raise an exception if they fail.
raise RuntimeError('unexpected file scope match count: ' + len(matches)) raise RuntimeError("unexpected file scope match count: " + len(matches))
render_match(ostream, matches[0], indent=0) render_match(ostream, matches[0], indent=0)
else: else:
for location, match in sorted(doc[rule['meta']['name']]['matches'].items()): for location, match in sorted(doc[rule["meta"]["name"]]["matches"].items()):
ostream.write(rule['meta']['scope']) ostream.write(rule["meta"]["scope"])
ostream.write(' @ ') ostream.write(" @ ")
ostream.writeln(rutils.hex(location)) ostream.writeln(rutils.hex(location))
render_match(ostream, match, indent=1) render_match(ostream, match, indent=1)
ostream.write('\n') ostream.write("\n")
return ostream.getvalue() return ostream.getvalue()

View File

@@ -22,32 +22,32 @@ logger = logging.getLogger(__name__)
# these are the standard metadata fields, in the preferred order. # these are the standard metadata fields, in the preferred order.
# when reformatted, any custom keys will come after these. # when reformatted, any custom keys will come after these.
META_KEYS = ( META_KEYS = (
'name', "name",
'namespace', "namespace",
'rule-category', "rule-category",
'maec/analysis-conclusion', "maec/analysis-conclusion",
'maec/analysis-conclusion-ov', "maec/analysis-conclusion-ov",
'maec/malware-category', "maec/malware-category",
'maec/malware-category-ov', "maec/malware-category-ov",
'author', "author",
'description', "description",
'lib', "lib",
'scope', "scope",
'att&ck', "att&ck",
'mbc', "mbc",
'references', "references",
'examples' "examples",
) )
# these are meta fields that are internal to capa, # these are meta fields that are internal to capa,
# and added during rule reading/construction. # and added during rule reading/construction.
# they may help use manipulate or index rules, # they may help use manipulate or index rules,
# but should not be exposed to clients. # but should not be exposed to clients.
HIDDEN_META_KEYS = ('capa/nursery', 'capa/path') HIDDEN_META_KEYS = ("capa/nursery", "capa/path")
FILE_SCOPE = 'file' FILE_SCOPE = "file"
FUNCTION_SCOPE = 'function' FUNCTION_SCOPE = "function"
BASIC_BLOCK_SCOPE = 'basic block' BASIC_BLOCK_SCOPE = "basic block"
SUPPORTED_FEATURES = { SUPPORTED_FEATURES = {
@@ -56,7 +56,7 @@ SUPPORTED_FEATURES = {
capa.features.file.Export, capa.features.file.Export,
capa.features.file.Import, capa.features.file.Import,
capa.features.file.Section, capa.features.file.Section,
capa.features.Characteristic('embedded pe'), capa.features.Characteristic("embedded pe"),
capa.features.String, capa.features.String,
}, },
FUNCTION_SCOPE: { FUNCTION_SCOPE: {
@@ -68,18 +68,18 @@ SUPPORTED_FEATURES = {
capa.features.insn.Offset, capa.features.insn.Offset,
capa.features.insn.Mnemonic, capa.features.insn.Mnemonic,
capa.features.basicblock.BasicBlock, capa.features.basicblock.BasicBlock,
capa.features.Characteristic('switch'), capa.features.Characteristic("switch"),
capa.features.Characteristic('nzxor'), capa.features.Characteristic("nzxor"),
capa.features.Characteristic('peb access'), capa.features.Characteristic("peb access"),
capa.features.Characteristic('fs access'), capa.features.Characteristic("fs access"),
capa.features.Characteristic('gs access'), capa.features.Characteristic("gs access"),
capa.features.Characteristic('cross section flow'), capa.features.Characteristic("cross section flow"),
capa.features.Characteristic('stack string'), capa.features.Characteristic("stack string"),
capa.features.Characteristic('calls from'), capa.features.Characteristic("calls from"),
capa.features.Characteristic('calls to'), capa.features.Characteristic("calls to"),
capa.features.Characteristic('indirect call'), capa.features.Characteristic("indirect call"),
capa.features.Characteristic('loop'), capa.features.Characteristic("loop"),
capa.features.Characteristic('recursive call') capa.features.Characteristic("recursive call"),
}, },
BASIC_BLOCK_SCOPE: { BASIC_BLOCK_SCOPE: {
capa.features.MatchedRule, capa.features.MatchedRule,
@@ -89,14 +89,14 @@ SUPPORTED_FEATURES = {
capa.features.Bytes, capa.features.Bytes,
capa.features.insn.Offset, capa.features.insn.Offset,
capa.features.insn.Mnemonic, capa.features.insn.Mnemonic,
capa.features.Characteristic('nzxor'), capa.features.Characteristic("nzxor"),
capa.features.Characteristic('peb access'), capa.features.Characteristic("peb access"),
capa.features.Characteristic('fs access'), capa.features.Characteristic("fs access"),
capa.features.Characteristic('gs access'), capa.features.Characteristic("gs access"),
capa.features.Characteristic('cross section flow'), capa.features.Characteristic("cross section flow"),
capa.features.Characteristic('tight loop'), capa.features.Characteristic("tight loop"),
capa.features.Characteristic('stack string'), capa.features.Characteristic("stack string"),
capa.features.Characteristic('indirect call') capa.features.Characteristic("indirect call"),
}, },
} }
@@ -107,7 +107,7 @@ class InvalidRule(ValueError):
self.msg = msg self.msg = msg
def __str__(self): def __str__(self):
return 'invalid rule: %s' % (self.msg) return "invalid rule: %s" % (self.msg)
def __repr__(self): def __repr__(self):
return str(self) return str(self)
@@ -121,7 +121,7 @@ class InvalidRuleWithPath(InvalidRule):
self.__cause__ = None self.__cause__ = None
def __str__(self): def __str__(self):
return 'invalid rule: %s: %s' % (self.path, self.msg) return "invalid rule: %s: %s" % (self.path, self.msg)
class InvalidRuleSet(ValueError): class InvalidRuleSet(ValueError):
@@ -130,7 +130,7 @@ class InvalidRuleSet(ValueError):
self.msg = msg self.msg = msg
def __str__(self): def __str__(self):
return 'invalid rule set: %s' % (self.msg) return "invalid rule set: %s" % (self.msg)
def __repr__(self): def __repr__(self):
return str(self) return str(self)
@@ -139,111 +139,112 @@ class InvalidRuleSet(ValueError):
def ensure_feature_valid_for_scope(scope, feature): def ensure_feature_valid_for_scope(scope, feature):
if isinstance(feature, capa.features.Characteristic): if isinstance(feature, capa.features.Characteristic):
if capa.features.Characteristic(feature.name) not in SUPPORTED_FEATURES[scope]: if capa.features.Characteristic(feature.name) not in SUPPORTED_FEATURES[scope]:
raise InvalidRule('feature %s not support for scope %s' % (feature, scope)) raise InvalidRule("feature %s not support for scope %s" % (feature, scope))
elif not isinstance(feature, tuple(filter(lambda t: isinstance(t, type), SUPPORTED_FEATURES[scope]))): elif not isinstance(feature, tuple(filter(lambda t: isinstance(t, type), SUPPORTED_FEATURES[scope]))):
raise InvalidRule('feature %s not support for scope %s' % (feature, scope)) raise InvalidRule("feature %s not support for scope %s" % (feature, scope))
def parse_int(s): def parse_int(s):
if s.startswith('0x'): if s.startswith("0x"):
return int(s, 0x10) return int(s, 0x10)
else: else:
return int(s, 10) return int(s, 10)
def parse_range(s): def parse_range(s):
''' """
parse a string "(0, 1)" into a range (min, max). parse a string "(0, 1)" into a range (min, max).
min and/or max may by None to indicate an unbound range. min and/or max may by None to indicate an unbound range.
''' """
# we want to use `{` characters, but this is a dict in yaml. # we want to use `{` characters, but this is a dict in yaml.
if not s.startswith('('): if not s.startswith("("):
raise InvalidRule('invalid range: %s' % (s)) raise InvalidRule("invalid range: %s" % (s))
if not s.endswith(')'): if not s.endswith(")"):
raise InvalidRule('invalid range: %s' % (s)) raise InvalidRule("invalid range: %s" % (s))
s = s[len('('):-len(')')] s = s[len("(") : -len(")")]
min, _, max = s.partition(',') min, _, max = s.partition(",")
min = min.strip() min = min.strip()
max = max.strip() max = max.strip()
if min: if min:
min = parse_int(min.strip()) min = parse_int(min.strip())
if min < 0: if min < 0:
raise InvalidRule('range min less than zero') raise InvalidRule("range min less than zero")
else: else:
min = None min = None
if max: if max:
max = parse_int(max.strip()) max = parse_int(max.strip())
if max < 0: if max < 0:
raise InvalidRule('range max less than zero') raise InvalidRule("range max less than zero")
else: else:
max = None max = None
if min is not None and max is not None: if min is not None and max is not None:
if max < min: if max < min:
raise InvalidRule('range max less than min') raise InvalidRule("range max less than min")
return min, max return min, max
def parse_feature(key): def parse_feature(key):
# keep this in sync with supported features # keep this in sync with supported features
if key == 'api': if key == "api":
return capa.features.insn.API return capa.features.insn.API
elif key == 'string': elif key == "string":
return capa.features.String return capa.features.String
elif key == 'bytes': elif key == "bytes":
return capa.features.Bytes return capa.features.Bytes
elif key == 'number': elif key == "number":
return capa.features.insn.Number return capa.features.insn.Number
elif key == 'offset': elif key == "offset":
return capa.features.insn.Offset return capa.features.insn.Offset
elif key == 'mnemonic': elif key == "mnemonic":
return capa.features.insn.Mnemonic return capa.features.insn.Mnemonic
elif key == 'basic blocks': elif key == "basic blocks":
return capa.features.basicblock.BasicBlock return capa.features.basicblock.BasicBlock
elif key.startswith('characteristic(') and key.endswith(')'): elif key.startswith("characteristic(") and key.endswith(")"):
characteristic = key[len('characteristic('):-len(')')] characteristic = key[len("characteristic(") : -len(")")]
return lambda v: capa.features.Characteristic(characteristic, v) return lambda v: capa.features.Characteristic(characteristic, v)
elif key == 'export': elif key == "export":
return capa.features.file.Export return capa.features.file.Export
elif key == 'import': elif key == "import":
return capa.features.file.Import return capa.features.file.Import
elif key == 'section': elif key == "section":
return capa.features.file.Section return capa.features.file.Section
elif key == 'match': elif key == "match":
return capa.features.MatchedRule return capa.features.MatchedRule
else: else:
raise InvalidRule('unexpected statement: %s' % key) raise InvalidRule("unexpected statement: %s" % key)
def parse_symbol(s, value_type): def parse_symbol(s, value_type):
''' """
s can be an int or a string s can be an int or a string
''' """
if isinstance(s, str) and '=' in s: if isinstance(s, str) and "=" in s:
value, symbol = s.split('=', 1) value, symbol = s.split("=", 1)
symbol = symbol.strip() symbol = symbol.strip()
if symbol == '': if symbol == "":
raise InvalidRule('unexpected value: "%s", symbol name cannot be empty' % s) raise InvalidRule('unexpected value: "%s", symbol name cannot be empty' % s)
else: else:
value = s value = s
symbol = None symbol = None
if isinstance(value, str): if isinstance(value, str):
if value_type == 'bytes': if value_type == "bytes":
try: try:
value = codecs.decode(value.replace(' ', ''), 'hex') value = codecs.decode(value.replace(" ", ""), "hex")
# TODO: Remove TypeError when Python2 is not used anymore # TODO: Remove TypeError when Python2 is not used anymore
except (TypeError, binascii.Error): except (TypeError, binascii.Error):
raise InvalidRule('unexpected bytes value: "%s", must be a valid hex sequence' % value) raise InvalidRule('unexpected bytes value: "%s", must be a valid hex sequence' % value)
if len(value) > MAX_BYTES_FEATURE_SIZE: if len(value) > MAX_BYTES_FEATURE_SIZE:
raise InvalidRule('unexpected bytes value: byte sequences must be no larger than %s bytes' % raise InvalidRule(
MAX_BYTES_FEATURE_SIZE) "unexpected bytes value: byte sequences must be no larger than %s bytes" % MAX_BYTES_FEATURE_SIZE
)
else: else:
try: try:
value = parse_int(value) value = parse_int(value)
@@ -255,54 +256,54 @@ def parse_symbol(s, value_type):
def build_statements(d, scope): def build_statements(d, scope):
if len(d.keys()) != 1: if len(d.keys()) != 1:
raise InvalidRule('too many statements') raise InvalidRule("too many statements")
key = list(d.keys())[0] key = list(d.keys())[0]
if key == 'and': if key == "and":
return And(*[build_statements(dd, scope) for dd in d[key]]) return And(*[build_statements(dd, scope) for dd in d[key]])
elif key == 'or': elif key == "or":
return Or(*[build_statements(dd, scope) for dd in d[key]]) return Or(*[build_statements(dd, scope) for dd in d[key]])
elif key == 'not': elif key == "not":
if len(d[key]) != 1: if len(d[key]) != 1:
raise InvalidRule('not statement must have exactly one child statement') raise InvalidRule("not statement must have exactly one child statement")
return Not(*[build_statements(dd, scope) for dd in d[key]]) return Not(*[build_statements(dd, scope) for dd in d[key]])
elif key.endswith(' or more'): elif key.endswith(" or more"):
count = int(key[:-len('or more')]) count = int(key[: -len("or more")])
return Some(count, *[build_statements(dd, scope) for dd in d[key]]) return Some(count, *[build_statements(dd, scope) for dd in d[key]])
elif key == 'optional': elif key == "optional":
# `optional` is an alias for `0 or more` # `optional` is an alias for `0 or more`
# which is useful for documenting behaviors, # which is useful for documenting behaviors,
# like with `write file`, we might say that `WriteFile` is optionally found alongside `CreateFileA`. # like with `write file`, we might say that `WriteFile` is optionally found alongside `CreateFileA`.
return Some(0, *[build_statements(dd, scope) for dd in d[key]]) return Some(0, *[build_statements(dd, scope) for dd in d[key]])
elif key == 'function': elif key == "function":
if scope != FILE_SCOPE: if scope != FILE_SCOPE:
raise InvalidRule('function subscope supported only for file scope') raise InvalidRule("function subscope supported only for file scope")
if len(d[key]) != 1: if len(d[key]) != 1:
raise InvalidRule('subscope must have exactly one child statement') raise InvalidRule("subscope must have exactly one child statement")
return Subscope(FUNCTION_SCOPE, *[build_statements(dd, FUNCTION_SCOPE) for dd in d[key]]) return Subscope(FUNCTION_SCOPE, *[build_statements(dd, FUNCTION_SCOPE) for dd in d[key]])
elif key == 'basic block': elif key == "basic block":
if scope != FUNCTION_SCOPE: if scope != FUNCTION_SCOPE:
raise InvalidRule('basic block subscope supported only for function scope') raise InvalidRule("basic block subscope supported only for function scope")
if len(d[key]) != 1: if len(d[key]) != 1:
raise InvalidRule('subscope must have exactly one child statement') raise InvalidRule("subscope must have exactly one child statement")
return Subscope(BASIC_BLOCK_SCOPE, *[build_statements(dd, BASIC_BLOCK_SCOPE) for dd in d[key]]) return Subscope(BASIC_BLOCK_SCOPE, *[build_statements(dd, BASIC_BLOCK_SCOPE) for dd in d[key]])
elif key.startswith('count(') and key.endswith(')'): elif key.startswith("count(") and key.endswith(")"):
# e.g.: # e.g.:
# #
# count(basic block) # count(basic block)
# count(mnemonic(mov)) # count(mnemonic(mov))
# count(characteristic(nzxor)) # count(characteristic(nzxor))
term = key[len('count('):-len(')')] term = key[len("count(") : -len(")")]
if term.startswith('characteristic('): if term.startswith("characteristic("):
# characteristic features are specified a bit specially: # characteristic features are specified a bit specially:
# they simply indicate the presence of something unusual/interesting, # they simply indicate the presence of something unusual/interesting,
# and we embed the name in the feature name, like `characteristic(nzxor)`. # and we embed the name in the feature name, like `characteristic(nzxor)`.
@@ -320,18 +321,18 @@ def build_statements(d, scope):
# - mnemonic: mov # - mnemonic: mov
# #
# but here we deal with the form: `mnemonic(mov)`. # but here we deal with the form: `mnemonic(mov)`.
term, _, arg = term.partition('(') term, _, arg = term.partition("(")
Feature = parse_feature(term) Feature = parse_feature(term)
if arg: if arg:
arg = arg[:-len(')')] arg = arg[: -len(")")]
# can't rely on yaml parsing ints embedded within strings # can't rely on yaml parsing ints embedded within strings
# like: # like:
# #
# count(offset(0xC)) # count(offset(0xC))
# count(number(0x11223344)) # count(number(0x11223344))
# count(number(0x100 = symbol name)) # count(number(0x100 = symbol name))
if term in ('number', 'offset', 'bytes'): if term in ("number", "offset", "bytes"):
value, symbol = parse_symbol(arg, term) value, symbol = parse_symbol(arg, term)
feature = Feature(value, symbol) feature = Feature(value, symbol)
else: else:
@@ -348,29 +349,31 @@ def build_statements(d, scope):
count = d[key] count = d[key]
if isinstance(count, int): if isinstance(count, int):
return Range(feature, min=count, max=count) return Range(feature, min=count, max=count)
elif count.endswith(' or more'): elif count.endswith(" or more"):
min = parse_int(count[:-len(' or more')]) min = parse_int(count[: -len(" or more")])
max = None max = None
return Range(feature, min=min, max=max) return Range(feature, min=min, max=max)
elif count.endswith(' or fewer'): elif count.endswith(" or fewer"):
min = None min = None
max = parse_int(count[:-len(' or fewer')]) max = parse_int(count[: -len(" or fewer")])
return Range(feature, min=min, max=max) return Range(feature, min=min, max=max)
elif count.startswith('('): elif count.startswith("("):
min, max = parse_range(count) min, max = parse_range(count)
return Range(feature, min=min, max=max) return Range(feature, min=min, max=max)
else: else:
raise InvalidRule('unexpected range: %s' % (count)) raise InvalidRule("unexpected range: %s" % (count))
elif key == 'string' and d[key].startswith('/') and (d[key].endswith('/') or d[key].endswith('/i')): elif key == "string" and d[key].startswith("/") and (d[key].endswith("/") or d[key].endswith("/i")):
try: try:
return Regex(d[key]) return Regex(d[key])
except re.error: except re.error:
if d[key].endswith('/i'): if d[key].endswith("/i"):
d[key] = d[key][:-len('i')] d[key] = d[key][: -len("i")]
raise InvalidRule('invalid regular expression: %s it should use Python syntax, try it at https://pythex.org' % d[key]) raise InvalidRule(
"invalid regular expression: %s it should use Python syntax, try it at https://pythex.org" % d[key]
)
else: else:
Feature = parse_feature(key) Feature = parse_feature(key)
if key in ('number', 'offset', 'bytes'): if key in ("number", "offset", "bytes"):
# parse numbers with symbol description, e.g. 0x4550 = IMAGE_DOS_SIGNATURE # parse numbers with symbol description, e.g. 0x4550 = IMAGE_DOS_SIGNATURE
# or regular numbers, e.g. 37 # or regular numbers, e.g. 37
value, symbol = parse_symbol(d[key], key) value, symbol = parse_symbol(d[key], key)
@@ -390,7 +393,7 @@ def second(s):
# we use the ruamel.yaml parser because it supports roundtripping of documents with comments. # we use the ruamel.yaml parser because it supports roundtripping of documents with comments.
yaml = ruamel.yaml.YAML(typ='rt') yaml = ruamel.yaml.YAML(typ="rt")
# use block mode, not inline json-like mode # use block mode, not inline json-like mode
@@ -410,7 +413,7 @@ yaml.width = 4096
class Rule(object): class Rule(object):
def __init__(self, name, scope, statement, meta, definition=''): def __init__(self, name, scope, statement, meta, definition=""):
super(Rule, self).__init__() super(Rule, self).__init__()
self.name = name self.name = name
self.scope = scope self.scope = scope
@@ -419,13 +422,13 @@ class Rule(object):
self.definition = definition self.definition = definition
def __str__(self): def __str__(self):
return 'Rule(name=%s)' % (self.name) return "Rule(name=%s)" % (self.name)
def __repr__(self): def __repr__(self):
return 'Rule(scope=%s, name=%s)' % (self.scope, self.name) return "Rule(scope=%s, name=%s)" % (self.scope, self.name)
def get_dependencies(self, namespaces): def get_dependencies(self, namespaces):
''' """
fetch the names of rules this rule relies upon. fetch the names of rules this rule relies upon.
these are only the direct dependencies; a user must these are only the direct dependencies; a user must
compute the transitive dependency graph themself, if they want it. compute the transitive dependency graph themself, if they want it.
@@ -436,7 +439,7 @@ class Rule(object):
Returns: Returns:
List[str]: names of rules upon which this rule depends. List[str]: names of rules upon which this rule depends.
''' """
deps = set([]) deps = set([])
def rec(statement): def rec(statement):
@@ -469,24 +472,31 @@ class Rule(object):
def _extract_subscope_rules_rec(self, statement): def _extract_subscope_rules_rec(self, statement):
if isinstance(statement, Statement): if isinstance(statement, Statement):
# for each child that is a subscope, # for each child that is a subscope,
for subscope in filter(lambda statement: isinstance(statement, capa.engine.Subscope), statement.get_children()): for subscope in filter(
lambda statement: isinstance(statement, capa.engine.Subscope), statement.get_children()
):
# create a new rule from it. # create a new rule from it.
# the name is a randomly generated, hopefully unique value. # the name is a randomly generated, hopefully unique value.
# ideally, this won't every be rendered to a user. # ideally, this won't every be rendered to a user.
name = self.name + '/' + uuid.uuid4().hex name = self.name + "/" + uuid.uuid4().hex
new_rule = Rule(name, subscope.scope, subscope.child, { new_rule = Rule(
'name': name, name,
'scope': subscope.scope, subscope.scope,
# these derived rules are never meant to be inspected separately, subscope.child,
# they are dependencies for the parent rule, {
# so mark it as such. "name": name,
'lib': True, "scope": subscope.scope,
# metadata that indicates this is derived from a subscope statement # these derived rules are never meant to be inspected separately,
'capa/subscope-rule': True, # they are dependencies for the parent rule,
# metadata that links the child rule the parent rule # so mark it as such.
'capa/parent': self.name, "lib": True,
}) # metadata that indicates this is derived from a subscope statement
"capa/subscope-rule": True,
# metadata that links the child rule the parent rule
"capa/parent": self.name,
},
)
# update the existing statement to `match` the new rule # update the existing statement to `match` the new rule
new_node = capa.features.MatchedRule(name) new_node = capa.features.MatchedRule(name)
@@ -503,7 +513,7 @@ class Rule(object):
yield new_rule yield new_rule
def extract_subscope_rules(self): def extract_subscope_rules(self):
''' """
scan through the statements of this rule, scan through the statements of this rule,
replacing subscope statements with `match` references to a newly created rule, replacing subscope statements with `match` references to a newly created rule,
which are yielded from this routine. which are yielded from this routine.
@@ -514,7 +524,7 @@ class Rule(object):
for derived_rule in rule.extract_subscope_rules(): for derived_rule in rule.extract_subscope_rules():
assert derived_rule.meta['capa/parent'] == rule.name assert derived_rule.meta['capa/parent'] == rule.name
''' """
# recurse through statements # recurse through statements
# when encounter Subscope statement # when encounter Subscope statement
@@ -531,27 +541,21 @@ class Rule(object):
@classmethod @classmethod
def from_dict(cls, d, s): def from_dict(cls, d, s):
name = d['rule']['meta']['name'] name = d["rule"]["meta"]["name"]
# if scope is not specified, default to function scope. # if scope is not specified, default to function scope.
# this is probably the mode that rule authors will start with. # this is probably the mode that rule authors will start with.
scope = d['rule']['meta'].get('scope', FUNCTION_SCOPE) scope = d["rule"]["meta"].get("scope", FUNCTION_SCOPE)
statements = d['rule']['features'] statements = d["rule"]["features"]
# the rule must start with a single logic node. # the rule must start with a single logic node.
# doing anything else is too implicit and difficult to remove (AND vs OR ???). # doing anything else is too implicit and difficult to remove (AND vs OR ???).
if len(statements) != 1: if len(statements) != 1:
raise InvalidRule('rule must begin with a single top level statement') raise InvalidRule("rule must begin with a single top level statement")
if isinstance(statements[0], capa.engine.Subscope): if isinstance(statements[0], capa.engine.Subscope):
raise InvalidRule('top level statement may not be a subscope') raise InvalidRule("top level statement may not be a subscope")
return cls( return cls(name, scope, build_statements(statements[0], scope), d["rule"]["meta"], s)
name,
scope,
build_statements(statements[0], scope),
d['rule']['meta'],
s
)
@classmethod @classmethod
def from_yaml(cls, s): def from_yaml(cls, s):
@@ -559,9 +563,9 @@ class Rule(object):
@classmethod @classmethod
def from_yaml_file(cls, path): def from_yaml_file(cls, path):
with open(path, 'rb') as f: with open(path, "rb") as f:
try: try:
return cls.from_yaml(f.read().decode('utf-8')) return cls.from_yaml(f.read().decode("utf-8"))
except InvalidRule as e: except InvalidRule as e:
raise InvalidRuleWithPath(path, str(e)) raise InvalidRuleWithPath(path, str(e))
@@ -578,11 +582,11 @@ class Rule(object):
definition = yaml.load(self.definition) definition = yaml.load(self.definition)
# definition retains a reference to `meta`, # definition retains a reference to `meta`,
# so we're updating that in place. # so we're updating that in place.
definition['rule']['meta'] = self.meta definition["rule"]["meta"] = self.meta
meta = self.meta meta = self.meta
meta['name'] = self.name meta["name"] = self.name
meta['scope'] = self.scope meta["scope"] = self.scope
def move_to_end(m, k): def move_to_end(m, k):
# ruamel.yaml uses an ordereddict-like structure to track maps (CommentedMap). # ruamel.yaml uses an ordereddict-like structure to track maps (CommentedMap).
@@ -592,8 +596,8 @@ class Rule(object):
del m[k] del m[k]
m[k] = v m[k] = v
move_to_end(definition['rule'], 'meta') move_to_end(definition["rule"], "meta")
move_to_end(definition['rule'], 'features') move_to_end(definition["rule"], "features")
for key in META_KEYS: for key in META_KEYS:
if key in meta: if key in meta:
@@ -624,11 +628,11 @@ class Rule(object):
continue continue
meta[key] = value meta[key] = value
return ostream.getvalue().decode('utf-8').rstrip('\n') + '\n' return ostream.getvalue().decode("utf-8").rstrip("\n") + "\n"
def get_rules_with_scope(rules, scope): def get_rules_with_scope(rules, scope):
''' """
from the given collection of rules, select those with the given scope. from the given collection of rules, select those with the given scope.
args: args:
@@ -637,12 +641,12 @@ def get_rules_with_scope(rules, scope):
returns: returns:
List[capa.rules.Rule]: List[capa.rules.Rule]:
''' """
return list(rule for rule in rules if rule.scope == scope) return list(rule for rule in rules if rule.scope == scope)
def get_rules_and_dependencies(rules, rule_name): def get_rules_and_dependencies(rules, rule_name):
''' """
from the given collection of rules, select a rule and its dependencies (transitively). from the given collection of rules, select a rule and its dependencies (transitively).
args: args:
@@ -651,7 +655,7 @@ def get_rules_and_dependencies(rules, rule_name):
yields: yields:
Rule: Rule:
''' """
# we evaluate `rules` multiple times, so if its a generator, realize it into a list. # we evaluate `rules` multiple times, so if its a generator, realize it into a list.
rules = list(rules) rules = list(rules)
namespaces = index_rules_by_namespace(rules) namespaces = index_rules_by_namespace(rules)
@@ -674,17 +678,17 @@ def ensure_rules_are_unique(rules):
seen = set([]) seen = set([])
for rule in rules: for rule in rules:
if rule.name in seen: if rule.name in seen:
raise InvalidRule('duplicate rule name: ' + rule.name) raise InvalidRule("duplicate rule name: " + rule.name)
seen.add(rule.name) seen.add(rule.name)
def ensure_rule_dependencies_are_met(rules): def ensure_rule_dependencies_are_met(rules):
''' """
raise an exception if a rule dependency does not exist. raise an exception if a rule dependency does not exist.
raises: raises:
InvalidRule: if a dependency is not met. InvalidRule: if a dependency is not met.
''' """
# we evaluate `rules` multiple times, so if its a generator, realize it into a list. # we evaluate `rules` multiple times, so if its a generator, realize it into a list.
rules = list(rules) rules = list(rules)
namespaces = index_rules_by_namespace(rules) namespaces = index_rules_by_namespace(rules)
@@ -696,7 +700,7 @@ def ensure_rule_dependencies_are_met(rules):
def index_rules_by_namespace(rules): def index_rules_by_namespace(rules):
''' """
compute the rules that fit into each namespace found within the given rules. compute the rules that fit into each namespace found within the given rules.
for example, given: for example, given:
@@ -714,23 +718,23 @@ def index_rules_by_namespace(rules):
rules (List[Rule]): rules (List[Rule]):
Returns: Dict[str, List[Rule]] Returns: Dict[str, List[Rule]]
''' """
namespaces = collections.defaultdict(list) namespaces = collections.defaultdict(list)
for rule in rules: for rule in rules:
namespace = rule.meta.get('namespace') namespace = rule.meta.get("namespace")
if not namespace: if not namespace:
continue continue
while namespace: while namespace:
namespaces[namespace].append(rule) namespaces[namespace].append(rule)
namespace, _, _ = namespace.rpartition('/') namespace, _, _ = namespace.rpartition("/")
return dict(namespaces) return dict(namespaces)
class RuleSet(object): class RuleSet(object):
''' """
a ruleset is initialized with a collection of rules, which it verifies and sorts into scopes. a ruleset is initialized with a collection of rules, which it verifies and sorts into scopes.
each set of scoped rules is sorted topologically, which enables rules to match on past rule matches. each set of scoped rules is sorted topologically, which enables rules to match on past rule matches.
@@ -742,7 +746,7 @@ class RuleSet(object):
... ...
]) ])
capa.engine.match(ruleset.file_rules, ...) capa.engine.match(ruleset.file_rules, ...)
''' """
def __init__(self, rules): def __init__(self, rules):
super(RuleSet, self).__init__() super(RuleSet, self).__init__()
@@ -754,7 +758,7 @@ class RuleSet(object):
ensure_rule_dependencies_are_met(rules) ensure_rule_dependencies_are_met(rules)
if len(rules) == 0: if len(rules) == 0:
raise InvalidRuleSet('no rules selected') raise InvalidRuleSet("no rules selected")
self.file_rules = self._get_rules_for_scope(rules, FILE_SCOPE) self.file_rules = self._get_rules_for_scope(rules, FILE_SCOPE)
self.function_rules = self._get_rules_for_scope(rules, FUNCTION_SCOPE) self.function_rules = self._get_rules_for_scope(rules, FUNCTION_SCOPE)
@@ -769,12 +773,12 @@ class RuleSet(object):
@staticmethod @staticmethod
def _get_rules_for_scope(rules, scope): def _get_rules_for_scope(rules, scope):
''' """
given a collection of rules, collect the rules that are needed at the given scope. given a collection of rules, collect the rules that are needed at the given scope.
these rules are ordered topologically. these rules are ordered topologically.
don't include "lib" rules, unless they are dependencies of other rules. don't include "lib" rules, unless they are dependencies of other rules.
''' """
scope_rules = set([]) scope_rules = set([])
# we need to process all rules, not just rules with the given scope. # we need to process all rules, not just rules with the given scope.
@@ -782,7 +786,7 @@ class RuleSet(object):
# at lower scope, e.g. function scope. # at lower scope, e.g. function scope.
# so, we find all dependencies of all rules, and later will filter them down. # so, we find all dependencies of all rules, and later will filter them down.
for rule in rules: for rule in rules:
if rule.meta.get('lib', False): if rule.meta.get("lib", False):
continue continue
scope_rules.update(get_rules_and_dependencies(rules, rule.name)) scope_rules.update(get_rules_and_dependencies(rules, rule.name))
@@ -790,7 +794,7 @@ class RuleSet(object):
@staticmethod @staticmethod
def _extract_subscope_rules(rules): def _extract_subscope_rules(rules):
''' """
process the given sequence of rules. process the given sequence of rules.
for each one, extract any embedded subscope rules into their own rule. for each one, extract any embedded subscope rules into their own rule.
process these recursively. process these recursively.
@@ -798,7 +802,7 @@ class RuleSet(object):
note: this operation mutates the rules passed in - they may now have `match` statements note: this operation mutates the rules passed in - they may now have `match` statements
for the extracted subscope rules. for the extracted subscope rules.
''' """
done = [] done = []
# use a queue of rules, because we'll be modifying the list (appending new items) as we go. # use a queue of rules, because we'll be modifying the list (appending new items) as we go.
@@ -811,14 +815,14 @@ class RuleSet(object):
return done return done
def filter_rules_by_meta(self, tag): def filter_rules_by_meta(self, tag):
''' """
return new rule set with rules filtered based on all meta field values, adds all dependency rules return new rule set with rules filtered based on all meta field values, adds all dependency rules
apply tag-based rule filter assuming that all required rules are loaded apply tag-based rule filter assuming that all required rules are loaded
can be used to specify selected rules vs. providing a rules child directory where capa cannot resolve can be used to specify selected rules vs. providing a rules child directory where capa cannot resolve
dependencies from unknown paths dependencies from unknown paths
TODO handle circular dependencies? TODO handle circular dependencies?
TODO support -t=metafield <k> TODO support -t=metafield <k>
''' """
rules = self.rules.values() rules = self.rules.values()
rules_filtered = set([]) rules_filtered = set([])
for rule in rules: for rule in rules:

View File

@@ -1,2 +1,2 @@
__version__ = '0.0.0' __version__ = "0.0.0"
__commit__ = '00000000' __commit__ = "00000000"