diff --git a/README.md b/README.md index b8647373..bbc8fc0d 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,7 @@ Download capa from the [Releases](/releases) page or get the nightly builds here - [section](#section) - [counting](#counting) - [matching prior rule matches](#matching-prior-rule-matches) + - [descriptions](#descriptions) - [limitations](#Limitations) # installation @@ -339,8 +340,9 @@ For example, a crypto constant. The parameter is a number; if prefixed with `0x` then in hex format, otherwise, decimal format. -To associate context with a number, e.g. for constant definitions, append an equal sign and the respective name to -the number definition. This helps with documenting rules and provides context in capa's output. +To help humans understand the meaning of a number, such that the constant `0x40` means `PAGE_EXECUTE_READWRITE`, you may provide a description alongside the definition. +Use the inline syntax (preferred) by ending the line with ` = DESCRIPTION STRING`. +Check the [description section](#description) for more details. Examples: @@ -362,20 +364,31 @@ Regexes should be surrounded with `/` characters. By default, capa uses case-sensitive matching and assumes leading and trailing wildcards. To perform case-insensitive matching append an `i`. To anchor the regex at the start or end of a string, use `^` and/or `$`. +To add context to a string use the two-line syntax, using the `description` tag: `description: DESCRIPTION STRING`. +The inline syntax is not supported. +Check the [description section](#description) for more details. + Examples: - string: This program cannot be run in DOS mode. - string: Firefox 64.0 - string: /SELECT.*FROM.*WHERE/ - string: /Hardware\\Description\\System\\CentralProcessor/i - +``` +- string: This program cannot be run in DOS mode. + description: MS-DOS stub message +- string: '{3E5FC7F9-9A51-4367-9063-A120244FBEC7}' + description: CLSID_CMSTPLUA +- string: Firefox 64.0 +- string:'/SELECT.*FROM.*WHERE/ +- string: /Hardware\\Description\\System\\CentralProcessor/i +``` + Note that regex matching is expensive (`O(features)` rather than `O(1)`) so they should be used sparingly. ### bytes A sequence of bytes referenced by the logic of the program. The provided sequence must match from the beginning of the referenced bytes and be no more than `0x100` bytes. -The parameter is a sequence of hexadecimal bytes followed by an optional description. - +The parameter is a sequence of hexadecimal bytes. +To help humans understand the meaning of the bytes sequence, you may provide a description. +Use the inline syntax (preferred) by ending the line with ` = DESCRIPTION STRING`. +Check the [description section](#description) for more details. The example below illustrates byte matching given a COM CLSID pushed onto the stack prior to `CoCreateInstance`. @@ -397,6 +410,7 @@ A structure offset referenced by the logic of the program. This should not be a stack offset. The parameter is a number; if prefixed with `0x` then in hex format, otherwise, decimal format. +It can be followed by an optional description. Examples: @@ -453,6 +467,7 @@ These are the features supported at the file-scope: - [import](#import) - [section](#section) + ### file string An ASCII or UTF-16 LE string present in the file. @@ -511,6 +526,10 @@ These rules can be expressed like: count(mnemonic(mov)): 3 count(basic block): 4 +`count` supports inline descriptions, except for [strings](#string), using the following syntax: + + count(number(2 = AF_INET/SOCK_DGRAM)): 2 + ## matching prior rule matches capa rules can specify logic for matching on other rule matches. @@ -532,6 +551,28 @@ By default, library rules will not be output to the user as a rule match, but can be matched by other rules. When no active rules depend on a library rule, these the library rules will not be evaluated - maintaining performance. +## description + +All features support an optional description which helps with documenting rules and provides context in capa's output. +For all features except for [strings](#string), the description can be specified inline preceded by ` = `: ` = DESCRIPTION STRING`. +For example: + +``` +- number: 0x4550 = IMAGE_DOS_SIGNATURE (MZ) +``` + +The inline syntax is preferred. +For [strings](#string) or if the description is long or contains newlines, use the two-line syntax. +It uses the `description` tag in the following way: `description: DESCRIPTION STRING` +For example: + +``` +- string: This program cannot be run in DOS mode. + description: MS-DOS stub message +- number: 0x4550 + description: IMAGE_DOS_SIGNATURE (MZ) +``` + # limitations To learn more about capa's current limitations see [here](doc/limitations.md). diff --git a/capa/engine.py b/capa/engine.py index 9ce4397a..a1d53b1a 100644 --- a/capa/engine.py +++ b/capa/engine.py @@ -161,11 +161,11 @@ class Range(Statement): self.max = max if max is not None else (1 << 64 - 1) def evaluate(self, ctx): - if self.child not in ctx: - return Result(False, self, []) + count = len(ctx.get(self.child, [])) + if self.min == 0 and count == 0: + return Result(True, self, []) - count = len(ctx[self.child]) - return Result(self.min <= count <= self.max, self, [], locations=ctx[self.child]) + return Result(self.min <= count <= self.max, self, [], locations=ctx.get(self.child)) def __str__(self): if self.max == (1 << 64 - 1): diff --git a/capa/features/__init__.py b/capa/features/__init__.py index 3e71481e..04ddc1b7 100644 --- a/capa/features/__init__.py +++ b/capa/features/__init__.py @@ -17,10 +17,11 @@ def bytes_to_str(b): class Feature(object): - def __init__(self, args): + def __init__(self, args, description=None): super(Feature, self).__init__() - self.name = self.__class__.__name__ + self.name = self.__class__.__name__.lower() self.args = args + self.description = description def __hash__(self): return hash((self.name, tuple(self.args))) @@ -28,8 +29,16 @@ class Feature(object): def __eq__(self, other): return self.name == other.name and self.args == other.args + # Used to overwrite the rendering of the feature args in `__str__` and the + # json output + def get_args_str(self): + return ','.join(self.args) + def __str__(self): - return "%s(%s)" % (self.name.lower(), ",".join(self.args)) + if self.description: + return '%s(%s = %s)' % (self.name, self.get_args_str(), self.description) + else: + return '%s(%s)' % (self.name, self.get_args_str()) def __repr__(self): return str(self) @@ -49,51 +58,41 @@ class Feature(object): class MatchedRule(Feature): - def __init__(self, rule_name): - super(MatchedRule, self).__init__([rule_name]) + def __init__(self, rule_name, description=None): + super(MatchedRule, self).__init__([rule_name], description) + self.name = 'match' self.rule_name = rule_name - def __str__(self): - return "match(%s)" % (self.rule_name) - class Characteristic(Feature): - def __init__(self, name, value=None): - """ - when `value` is not provided, this serves as descriptor for a class of characteristics. - this is only used internally, such as in `rules.py` when checking if a statement is - supported by a given scope. - """ - super(Characteristic, self).__init__([name, value]) - self.name = name + def __init__(self, value, description=None): + super(Characteristic, self).__init__([value], description) self.value = value - def evaluate(self, ctx): - if self.value is None: - raise ValueError("cannot evaluate characteristc %s with empty value" % (str(self))) - return super(Characteristic, self).evaluate(ctx) + def freeze_serialize(self): + # in an older version of capa, characteristics could theoretically match non-existence (value=False). + # but we found this was never used (and better expressed with `not: characteristic: ...`). + # this was represented using an additional parameter for Characteristic. + # its been removed, but we keep it around in the freeze format to maintain backwards compatibility. + # this value is ignored, however. + return (self.__class__.__name__, [self.value, True]) - def __str__(self): - if self.value is None: - return "characteristic(%s)" % (self.name) - else: - return "characteristic(%s(%s))" % (self.name, self.value) + @classmethod + def freeze_deserialize(cls, args): + # see above. we ignore the second element in the 2-tuple here. + return cls(args[0]) class String(Feature): - def __init__(self, value): - super(String, self).__init__([value]) + def __init__(self, value, description=None): + super(String, self).__init__([value], description) self.value = value - def __str__(self): - return 'string("%s")' % (self.value) - class Bytes(Feature): - def __init__(self, value, symbol=None): - super(Bytes, self).__init__([value]) + def __init__(self, value, description=None): + super(Bytes, self).__init__([value], description) self.value = value - self.symbol = symbol def evaluate(self, ctx): for feature, locations in ctx.items(): @@ -105,11 +104,8 @@ class Bytes(Feature): return capa.engine.Result(False, self, []) - def __str__(self): - if self.symbol: - return "bytes(0x%s = %s)" % (bytes_to_str(self.value).upper(), self.symbol) - else: - return "bytes(0x%s)" % (bytes_to_str(self.value).upper()) + def get_args_str(self): + return bytes_to_str(self.value).upper() def freeze_serialize(self): return (self.__class__.__name__, [bytes_to_str(x).upper() for x in self.args]) diff --git a/capa/features/extractors/__init__.py b/capa/features/extractors/__init__.py index eb0f0b10..b44adbc6 100644 --- a/capa/features/extractors/__init__.py +++ b/capa/features/extractors/__init__.py @@ -185,22 +185,22 @@ class NullFeatureExtractor(FeatureExtractor): extractor = NullFeatureExtractor({ 'file features': [ - (0x402345, capa.features.Characteristic('embedded pe', True)), + (0x402345, capa.features.Characteristic('embedded pe')), ], 'functions': { 0x401000: { 'features': [ - (0x401000, capa.features.Characteristic('switch', True)), + (0x401000, capa.features.Characteristic('switch')), ], 'basic blocks': { 0x401000: { 'features': [ - (0x401000, capa.features.Characteristic('tight-loop', True)), + (0x401000, capa.features.Characteristic('tight-loop')), ], 'instructions': { 0x401000: { 'features': [ - (0x401000, capa.features.Characteristic('nzxor', True)), + (0x401000, capa.features.Characteristic('nzxor')), ], }, 0x401002: ... diff --git a/capa/features/extractors/ida/basicblock.py b/capa/features/extractors/ida/basicblock.py index 41e67eb4..82d77f90 100644 --- a/capa/features/extractors/ida/basicblock.py +++ b/capa/features/extractors/ida/basicblock.py @@ -103,7 +103,7 @@ def extract_bb_stackstring(f, bb): bb (IDA BasicBlock) """ if _ida_bb_contains_stackstring(f, bb): - yield Characteristic("stack string", True), bb.start_ea + yield Characteristic('stack string'), bb.start_ea def _ida_bb_contains_tight_loop(f, bb): @@ -133,7 +133,7 @@ def extract_bb_tight_loop(f, bb): bb (IDA BasicBlock) """ if _ida_bb_contains_tight_loop(f, bb): - yield Characteristic("tight loop", True), bb.start_ea + yield Characteristic('tight loop'), bb.start_ea def extract_features(f, bb): diff --git a/capa/features/extractors/ida/file.py b/capa/features/extractors/ida/file.py index b38f5ffb..15564e04 100644 --- a/capa/features/extractors/ida/file.py +++ b/capa/features/extractors/ida/file.py @@ -75,7 +75,7 @@ def extract_file_embedded_pe(): continue for ea, _ in _ida_check_segment_for_pe(seg): - yield Characteristic("embedded pe", True), ea + yield Characteristic('embedded pe'), ea def extract_file_export_names(): diff --git a/capa/features/extractors/ida/function.py b/capa/features/extractors/ida/function.py index a64e1016..06dfefde 100644 --- a/capa/features/extractors/ida/function.py +++ b/capa/features/extractors/ida/function.py @@ -29,7 +29,7 @@ def extract_function_switch(f): f (IDA func_t) """ if _ida_function_contains_switch(f): - yield Characteristic("switch", True), f.start_ea + yield Characteristic('switch'), f.start_ea def extract_function_calls_to(f): @@ -39,7 +39,7 @@ def extract_function_calls_to(f): f (IDA func_t) """ for ea in idautils.CodeRefsTo(f.start_ea, True): - yield Characteristic("calls to", True), ea + yield Characteristic('calls to'), ea def extract_function_loop(f): @@ -53,7 +53,7 @@ def extract_function_loop(f): map(lambda s: edges.append((bb.start_ea, s.start_ea)), bb.succs()) if edges and loops.has_loop(edges): - yield Characteristic("loop", True), f.start_ea + yield Characteristic('loop'), f.start_ea def extract_recursive_call(f): @@ -64,7 +64,7 @@ def extract_recursive_call(f): """ for ref in idautils.CodeRefsTo(f.start_ea, True): if f.contains(ref): - yield Characteristic("recursive call", True), f.start_ea + yield Characteristic('recursive call'), f.start_ea break diff --git a/capa/features/extractors/ida/insn.py b/capa/features/extractors/ida/insn.py index ff8bdf41..ed2f7548 100644 --- a/capa/features/extractors/ida/insn.py +++ b/capa/features/extractors/ida/insn.py @@ -259,7 +259,7 @@ def extract_insn_nzxor_characteristic_features(f, bb, insn): if _is_nzxor_stack_cookie(f, bb, insn): return - yield Characteristic("nzxor", True), insn.ea + yield Characteristic('nzxor'), insn.ea def extract_insn_mnemonic_features(f, bb, insn): @@ -292,7 +292,7 @@ def extract_insn_peb_access_characteristic_features(f, bb, insn): if " fs:30h" in disasm or " gs:60h" in disasm: # TODO: replace above with proper IDA - yield Characteristic("peb access", True), insn.ea + yield Characteristic('peb access'), insn.ea def extract_insn_segment_access_features(f, bb, insn): @@ -309,11 +309,11 @@ def extract_insn_segment_access_features(f, bb, insn): if " fs:" in disasm: # TODO: replace above with proper IDA - yield Characteristic("fs access", True), insn.ea + yield Characteristic('fs access'), insn.ea if " gs:" in disasm: # TODO: replace above with proper IDA - yield Characteristic("gs access", True), insn.ea + yield Characteristic('gs access'), insn.ea def extract_insn_cross_section_cflow(f, bb, insn): @@ -336,7 +336,7 @@ def extract_insn_cross_section_cflow(f, bb, insn): if idaapi.getseg(ref) == idaapi.getseg(insn.ea): continue - yield Characteristic("cross section flow", True), insn.ea + yield Characteristic('cross section flow'), insn.ea def extract_function_calls_from(f, bb, insn): @@ -354,7 +354,7 @@ def extract_function_calls_from(f, bb, insn): return for ref in idautils.CodeRefsFrom(insn.ea, False): - yield Characteristic("calls from", True), ref + yield Characteristic('calls from'), ref def extract_function_indirect_call_characteristic_features(f, bb, insn): @@ -373,7 +373,7 @@ def extract_function_indirect_call_characteristic_features(f, bb, insn): return if idc.get_operand_type(insn.ea, 0) in (idc.o_reg, idc.o_phrase, idc.o_displ): - yield Characteristic("indirect call", True), insn.ea + yield Characteristic('indirect call'), insn.ea def extract_features(f, bb, insn): diff --git a/capa/features/extractors/viv/basicblock.py b/capa/features/extractors/viv/basicblock.py index 7847c6d4..2501fa9f 100644 --- a/capa/features/extractors/viv/basicblock.py +++ b/capa/features/extractors/viv/basicblock.py @@ -39,7 +39,7 @@ def _bb_has_tight_loop(f, bb): def extract_bb_tight_loop(f, bb): """ check basic block for tight loop indicators """ if _bb_has_tight_loop(f, bb): - yield Characteristic("tight loop", True), bb.va + yield Characteristic('tight loop'), bb.va def _bb_has_stackstring(f, bb): @@ -62,7 +62,7 @@ def _bb_has_stackstring(f, bb): def extract_stackstring(f, bb): """ check basic block for stackstring indicators """ if _bb_has_stackstring(f, bb): - yield Characteristic("stack string", True), bb.va + yield Characteristic('stack string'), bb.va def is_mov_imm_to_stack(instr): diff --git a/capa/features/extractors/viv/file.py b/capa/features/extractors/viv/file.py index c4050531..545bf613 100644 --- a/capa/features/extractors/viv/file.py +++ b/capa/features/extractors/viv/file.py @@ -13,7 +13,7 @@ def extract_file_embedded_pe(vw, file_path): fbytes = f.read() for offset, i in pe_carve.carve(fbytes, 1): - yield Characteristic("embedded pe", True), offset + yield Characteristic('embedded pe'), offset def extract_file_export_names(vw, file_path): diff --git a/capa/features/extractors/viv/function.py b/capa/features/extractors/viv/function.py index 495c0de2..ed37516d 100644 --- a/capa/features/extractors/viv/function.py +++ b/capa/features/extractors/viv/function.py @@ -53,12 +53,12 @@ def extract_function_switch(f): method can be optimized """ if f.va in get_functions_with_switch(f.vw): - yield Characteristic("switch", True), f.va + yield Characteristic('switch'), f.va def extract_function_calls_to(f): for src, _, _, _ in f.vw.getXrefsTo(f.va, rtype=vivisect.const.REF_CODE): - yield Characteristic("calls to", True), src + yield Characteristic('calls to'), src def extract_function_loop(f): @@ -74,7 +74,7 @@ def extract_function_loop(f): edges.append((bb.va, bva)) if edges and loops.has_loop(edges): - yield Characteristic("loop", True), f.va + yield Characteristic('loop'), f.va def extract_features(f): diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 656c1047..257240a8 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -287,7 +287,7 @@ def extract_insn_nzxor_characteristic_features(f, bb, insn): if is_security_cookie(f, bb, insn): return - yield Characteristic("nzxor", True), insn.va + yield Characteristic('nzxor'), insn.va def extract_insn_mnemonic_features(f, bb, insn): @@ -313,16 +313,14 @@ def extract_insn_peb_access_characteristic_features(f, bb, insn): # IDA: push large dword ptr fs:30h # viv: fs: push dword [0x00000030] # fs: push dword [eax + 0x30] ; i386RegMemOper, with eax = 0 - if (isinstance(oper, envi.archs.i386.disasm.i386RegMemOper) and oper.disp == 0x30) or ( - isinstance(oper, envi.archs.i386.disasm.i386ImmMemOper) and oper.imm == 0x30 - ): - yield Characteristic("peb access", True), insn.va - elif "gs" in insn.getPrefixName(): + if (isinstance(oper, envi.archs.i386.disasm.i386RegMemOper) and oper.disp == 0x30) or \ + (isinstance(oper, envi.archs.i386.disasm.i386ImmMemOper) and oper.imm == 0x30): + yield Characteristic('peb access'), insn.va + elif 'gs' in insn.getPrefixName(): for oper in insn.opers: - if (isinstance(oper, envi.archs.amd64.disasm.i386RegMemOper) and oper.disp == 0x60) or ( - isinstance(oper, envi.archs.amd64.disasm.i386ImmMemOper) and oper.imm == 0x60 - ): - yield Characteristic("peb access", True), insn.va + if (isinstance(oper, envi.archs.amd64.disasm.i386RegMemOper) and oper.disp == 0x60) or \ + (isinstance(oper, envi.archs.amd64.disasm.i386ImmMemOper) and oper.imm == 0x60): + yield Characteristic('peb access'), insn.va else: pass @@ -331,11 +329,11 @@ def extract_insn_segment_access_features(f, bb, insn): """ parse the instruction for access to fs or gs """ prefix = insn.getPrefixName() - if prefix == "fs": - yield Characteristic("fs access", True), insn.va + if prefix == 'fs': + yield Characteristic('fs access'), insn.va - if prefix == "gs": - yield Characteristic("gs access", True), insn.va + if prefix == 'gs': + yield Characteristic('gs access'), insn.va def get_section(vw, va): @@ -372,7 +370,7 @@ def extract_insn_cross_section_cflow(f, bb, insn): continue if get_section(f.vw, insn.va) != get_section(f.vw, va): - yield Characteristic("cross section flow", True), insn.va + yield Characteristic('cross section flow'), insn.va except KeyError: continue @@ -390,7 +388,7 @@ def extract_function_calls_from(f, bb, insn): if isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper): oper = insn.opers[0] target = oper.getOperAddr(insn) - yield Characteristic("calls from", True), target + yield Characteristic('calls from'), target # call via thunk on x86, # see 9324d1a8ae37a36ae560c37448c9705a at 0x407985 @@ -399,18 +397,18 @@ def extract_function_calls_from(f, bb, insn): # see Lab21-01.exe_:0x140001178 elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper): target = insn.opers[0].getOperValue(insn) - yield Characteristic("calls from", True), target + yield Characteristic('calls from'), target # call via IAT, x64 elif isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper): op = insn.opers[0] target = op.getOperAddr(insn) - yield Characteristic("calls from", True), target + yield Characteristic('calls from'), target if target and target == f.va: # if we found a jump target and it's the function address # mark as recursive - yield Characteristic("recursive call", True), target + yield Characteristic('recursive call'), target # this is a feature that's most relevant at the function or basic block scope, @@ -426,13 +424,13 @@ def extract_function_indirect_call_characteristic_features(f, bb, insn): # Checks below work for x86 and x64 if isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper): # call edx - yield Characteristic("indirect call", True), insn.va + yield Characteristic('indirect call'), insn.va elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegMemOper): # call dword ptr [eax+50h] - yield Characteristic("indirect call", True), insn.va + yield Characteristic('indirect call'), insn.va elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386SibOper): # call qword ptr [rsp+78h] - yield Characteristic("indirect call", True), insn.va + yield Characteristic('indirect call'), insn.va def extract_features(f, bb, insn): diff --git a/capa/features/file.py b/capa/features/file.py index c5d16879..396edd1f 100644 --- a/capa/features/file.py +++ b/capa/features/file.py @@ -2,30 +2,21 @@ from capa.features import Feature class Export(Feature): - def __init__(self, value): + def __init__(self, value, description=None): # value is export name - super(Export, self).__init__([value]) + super(Export, self).__init__([value], description) self.value = value - def __str__(self): - return "Export(%s)" % (self.value) - class Import(Feature): - def __init__(self, value): + def __init__(self, value, description=None): # value is import name - super(Import, self).__init__([value]) + super(Import, self).__init__([value], description) self.value = value - def __str__(self): - return "Import(%s)" % (self.value) - class Section(Feature): - def __init__(self, value): + def __init__(self, value, description=None): # value is section name - super(Section, self).__init__([value]) + super(Section, self).__init__([value], description) self.value = value - - def __str__(self): - return "Section(%s)" % (self.value) diff --git a/capa/features/insn.py b/capa/features/insn.py index 63abae98..937211f8 100644 --- a/capa/features/insn.py +++ b/capa/features/insn.py @@ -2,45 +2,34 @@ from capa.features import Feature class API(Feature): - def __init__(self, name): + def __init__(self, name, description=None): # Downcase library name if given if "." in name: modname, impname = name.split(".") name = modname.lower() + "." + impname - super(API, self).__init__([name]) + super(API, self).__init__([name], description) class Number(Feature): - def __init__(self, value, symbol=None): - super(Number, self).__init__([value]) + def __init__(self, value, description=None): + super(Number, self).__init__([value], description) self.value = value - self.symbol = symbol - def __str__(self): - if self.symbol: - return "number(0x%x = %s)" % (self.value, self.symbol) - else: - return "number(0x%x)" % (self.value) + def get_args_str(self): + return '0x%X' % self.value class Offset(Feature): - def __init__(self, value, symbol=None): + def __init__(self, value, description=None): super(Offset, self).__init__([value]) self.value = value - self.symbol = symbol - def __str__(self): - if self.symbol: - return "offset(0x%x = %s)" % (self.value, self.symbol) - else: - return "offset(0x%x)" % (self.value) + def get_args_str(self): + return '0x%X' % self.value class Mnemonic(Feature): - def __init__(self, value): - super(Mnemonic, self).__init__([value]) + def __init__(self, value, description=None): + super(Mnemonic, self).__init__([value], description) self.value = value - - def __str__(self): - return "mnemonic(%s)" % (self.value) diff --git a/capa/ida/explorer/item.py b/capa/ida/explorer/item.py index 35d8b54c..8bc9dca0 100644 --- a/capa/ida/explorer/item.py +++ b/capa/ida/explorer/item.py @@ -190,6 +190,14 @@ class CapaExplorerFunctionItem(CapaExplorerDataItem): self._data[0] = self.fmt % display +class CapaExplorerSubscopeItem(CapaExplorerDataItem): + + fmt = 'subscope(%s)' + + def __init__(self, parent, scope): + super(CapaExplorerSubscopeItem, self).__init__(parent, [self.fmt % scope, '', '']) + + class CapaExplorerBlockItem(CapaExplorerDataItem): """ store data relevant to capa basic block result """ diff --git a/capa/ida/explorer/model.py b/capa/ida/explorer/model.py index ce55a039..f06d95fa 100644 --- a/capa/ida/explorer/model.py +++ b/capa/ida/explorer/model.py @@ -17,6 +17,7 @@ from capa.ida.explorer.item import ( CapaExplorerBlockItem, CapaExplorerRuleMatchItem, CapaExplorerFeatureItem, + CapaExplorerSubscopeItem ) import capa.ida.helpers @@ -108,20 +109,10 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel): font.setBold(True) return font - if ( - role == QtCore.Qt.FontRole - and isinstance( - item, - ( - CapaExplorerRuleItem, - CapaExplorerRuleMatchItem, - CapaExplorerBlockItem, - CapaExplorerFunctionItem, - CapaExplorerFeatureItem, - ), - ) - and column == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION - ): + if role == QtCore.Qt.FontRole and isinstance(item, (CapaExplorerRuleItem, CapaExplorerRuleMatchItem, + CapaExplorerBlockItem, CapaExplorerFunctionItem, + CapaExplorerFeatureItem, CapaExplorerSubscopeItem)) and \ + column == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION: # set bold font for top-level rules font = QtGui.QFont() font.setBold(True) @@ -322,11 +313,12 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel): return item.childCount() - def render_capa_doc_statement_node(self, parent, statement, doc): + def render_capa_doc_statement_node(self, parent, statement, locations, doc): """ render capa statement read from doc @param parent: parent to which new child is assigned @param statement: statement read from doc + @param locations: locations of children (applies to range only?) @param doc: capa result doc "statement": { @@ -356,10 +348,16 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel): else: display += "between %d and %d" % (statement["min"], statement["max"]) - return CapaExplorerFeatureItem(parent, display=display) - elif statement["type"] == "subscope": - return CapaExplorerFeatureItem(parent, "subscope(%s)" % statement["subscope"]) - elif statement["type"] == "regex": + parent2 = CapaExplorerFeatureItem(parent, display=display) + + for location in locations: + # for each location render child node for range statement + self.render_capa_doc_feature(parent2, statement['child'], location, doc) + + return parent2 + elif statement['type'] == 'subscope': + return CapaExplorerSubscopeItem(parent, statement['subscope']) + elif statement['type'] == 'regex': # regex is a `Statement` not a `Feature` # this is because it doesn't get extracted, but applies to all strings in scope. # so we have to handle it here @@ -401,10 +399,11 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel): ): return - if match["node"]["type"] == "statement": - parent2 = self.render_capa_doc_statement_node(parent, match["node"]["statement"], doc) - elif match["node"]["type"] == "feature": - parent2 = self.render_capa_doc_feature_node(parent, match["node"]["feature"], match["locations"], doc) + if match['node']['type'] == 'statement': + parent2 = self.render_capa_doc_statement_node(parent, match['node']['statement'], + match.get('locations', []), doc) + elif match['node']['type'] == 'feature': + parent2 = self.render_capa_doc_feature_node(parent, match['node']['feature'], match['locations'], doc) else: raise RuntimeError("unexpected node type: " + str(match["node"]["type"])) diff --git a/capa/ida/ida_capa_explorer.py b/capa/ida/ida_capa_explorer.py index 1c6f263e..0735bd00 100644 --- a/capa/ida/ida_capa_explorer.py +++ b/capa/ida/ida_capa_explorer.py @@ -375,10 +375,14 @@ class CapaExplorerForm(idaapi.PluginForm): self.render_capa_doc_summary(doc) self.render_capa_doc_mitre_summary(doc) - self.view_tree.sortByColumn(CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION, QtCore.Qt.AscendingOrder) + self.set_view_tree_default_sort_order() logger.info("render views completed.") + def set_view_tree_default_sort_order(self): + """ """ + self.view_tree.sortByColumn(CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION, QtCore.Qt.AscendingOrder) + def render_capa_doc_summary(self, doc): """ """ for (row, rule) in enumerate(rutils.capability_rules(doc)): @@ -459,6 +463,7 @@ class CapaExplorerForm(idaapi.PluginForm): self.model_data.reset() self.view_tree.reset() self.view_checkbox_limit_by.setChecked(False) + self.set_view_tree_default_sort_order() def reload(self): """ reload views and re-run capa analysis """ diff --git a/capa/render/__init__.py b/capa/render/__init__.py index 1f8dbb55..b9880fb8 100644 --- a/capa/render/__init__.py +++ b/capa/render/__init__.py @@ -1,6 +1,7 @@ import json import six +import capa.rules import capa.engine @@ -83,23 +84,11 @@ def convert_feature_to_result_document(feature): "type": "characteristic" }, """ - name, value = feature.freeze_serialize() + result = {'type': feature.name, feature.name: feature.get_args_str()} + if feature.description: + result['description'] = feature.description - # make the terms pretty - name = name.lower() - if name == "matchedrule": - name = "match" - - # in the common case, there's a single argument - # so use it directly. - # like: name=number value=1 - if isinstance(value, list) and len(value) == 1: - value = value[0] - - return { - "type": name, - name: value, - } + return result def convert_node_to_result_document(node): @@ -144,7 +133,10 @@ def convert_match_to_result_document(rules, capabilities, result): # so only add `locations` to feature nodes. if isinstance(result.statement, capa.features.Feature): if bool(result.success): - doc["locations"] = result.locations + doc['locations'] = result.locations + elif isinstance(result.statement, capa.rules.Range): + if bool(result.success): + doc['locations'] = result.locations # if we have a `match` statement, then we're referencing another rule. # this could an external rule (written by a human), or diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index 7dd8174b..9b02fbe5 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -4,12 +4,31 @@ import capa.rules import capa.render.utils as rutils -def render_statement(ostream, statement, indent=0): - ostream.write(" " * indent) - if statement["type"] in ("and", "or", "optional"): - ostream.write(statement["type"]) - ostream.writeln(":") - elif statement["type"] == "not": +def render_locations(ostream, match): + # its possible to have an empty locations array here, + # such as when we're in MODE_FAILURE and showing the logic + # under a `not` statement (which will have no matched locations). + locations = list(sorted(match.get('locations', []))) + if len(locations) == 1: + ostream.write(' @ ') + ostream.write(rutils.hex(locations[0])) + elif len(locations) > 1: + ostream.write(' @ ') + if len(locations) > 4: + # don't display too many locations, because it becomes very noisy. + # probably only the first handful of locations will be useful for inspection. + ostream.write(', '.join(map(rutils.hex, locations[0:4]))) + ostream.write(', and %d more...' % (len(locations) - 4)) + else: + ostream.write(', '.join(map(rutils.hex, locations))) + + +def render_statement(ostream, match, statement, indent=0): + ostream.write(' ' * indent) + if statement['type'] in ('and', 'or', 'optional'): + ostream.write(statement['type']) + ostream.writeln(':') + elif statement['type'] == 'not': # this statement is handled specially in `render_match` using the MODE_SUCCESS/MODE_FAILURE flags. ostream.writeln("not:") elif statement["type"] == "some": @@ -21,32 +40,36 @@ def render_statement(ostream, statement, indent=0): # there's no additional logic in the feature part, just the existence of a feature. # so, we have to inline some of the feature rendering here. - child = statement["child"] - if child["type"] in ("string", "api", "mnemonic", "basic block", "export", "import", "section", "match"): - feature = "%s(%s)" % (child["type"], rutils.bold2(child[child["type"]])) - elif child["type"] in ("number", "offset"): - feature = "%s(%s)" % (child["type"], rutils.bold2(rutils.hex(child[child["type"]]))) - elif child["type"] == "bytes": - feature = "%s(%s)" % (child["type"], rutils.bold2(rutils.hex_string(child[child["type"]]))) - elif child["type"] == "characteristic": - feature = "characteristic(%s)" % (rutils.bold2(child["characteristic"][0])) + child = statement['child'] + if child['type'] in ('string', 'api', 'mnemonic', 'basic block', 'export', 'import', 'section', 'match', 'characteristic'): + value = rutils.bold2(child[child['type']]) + elif child['type'] in ('number', 'offset'): + value = rutils.bold2(rutils.hex(child[child['type']])) + elif child['type'] == 'bytes': + value = rutils.bold2(rutils.hex_string(child[child['type']])) else: raise RuntimeError("unexpected feature type: " + str(child)) - ostream.write("count(%s): " % feature) - - if statement["max"] == statement["min"]: - ostream.writeln("%d" % (statement["min"])) - elif statement["min"] == 0: - ostream.writeln("%d or fewer" % (statement["max"])) - elif statement["max"] == (1 << 64 - 1): - ostream.writeln("%d or more" % (statement["min"])) + if child['description']: + ostream.write('count(%s(%s = %s)): ' % (child['type'], value, child['description'])) else: - ostream.writeln("between %d and %d" % (statement["min"], statement["max"])) - elif statement["type"] == "subscope": - ostream.write(statement["subscope"]) - ostream.writeln(":") - elif statement["type"] == "regex": + ostream.write('count(%s(%s)): ' % (child['type'], value)) + + if statement['max'] == statement['min']: + ostream.write('%d' % (statement['min'])) + elif statement['min'] == 0: + ostream.write('%d or fewer' % (statement['max'])) + elif statement['max'] == (1 << 64 - 1): + ostream.write('%d or more' % (statement['min'])) + else: + ostream.write('between %d and %d' % (statement['min'], statement['max'])) + + render_locations(ostream, match) + ostream.write('\n') + elif statement['type'] == 'subscope': + ostream.write(statement['subscope']) + ostream.writeln(':') + elif statement['type'] == 'regex': # regex is a `Statement` not a `Feature` # this is because it doesn't get extracted, but applies to all strings in scope. # so we have to handle it here @@ -56,52 +79,38 @@ def render_statement(ostream, statement, indent=0): def render_feature(ostream, match, feature, indent=0): - ostream.write(" " * indent) + ostream.write(' ' * indent) - if feature["type"] in ("string", "api", "mnemonic", "basic block", "export", "import", "section", "match"): - ostream.write(feature["type"]) - ostream.write(": ") - ostream.write(rutils.bold2(feature[feature["type"]])) - elif feature["type"] in ("number", "offset"): - ostream.write(feature["type"]) - ostream.write(": ") - ostream.write(rutils.bold2(rutils.hex(feature[feature["type"]]))) - elif feature["type"] == "bytes": - ostream.write("bytes: ") + if feature['type'] in ('string', 'api', 'mnemonic', 'basic block', 'export', 'import', 'section', 'match', 'characteristic'): + ostream.write(feature['type']) + ostream.write(': ') + ostream.write(rutils.bold2(feature[feature['type']])) + elif feature['type'] in ('number', 'offset'): + ostream.write(feature['type']) + ostream.write(': ') + ostream.write(rutils.bold2(rutils.hex(feature[feature['type']]))) + elif feature['type'] == 'bytes': + ostream.write('bytes: ') # bytes is the uppercase, hex-encoded string. # it should always be an even number of characters (its hex). - ostream.write(rutils.bold2(rutils.hex_string(feature[feature["type"]]))) - elif feature["type"] == "characteristic": - ostream.write("characteristic(%s)" % (rutils.bold2(feature["characteristic"][0]))) + ostream.write(rutils.bold2(rutils.hex_string(feature[feature['type']]))) # note that regex is found in `render_statement` else: raise RuntimeError("unexpected feature type: " + str(feature)) - # its possible to have an empty locations array here, - # such as when we're in MODE_FAILURE and showing the logic - # under a `not` statement (which will have no matched locations). - locations = list(sorted(match.get("locations", []))) - if len(locations) == 1: - ostream.write(" @ ") - ostream.write(rutils.hex(locations[0])) - elif len(locations) > 1: - ostream.write(" @ ") - if len(locations) > 4: - # don't display too many locations, because it becomes very noisy. - # probably only the first handful of locations will be useful for inspection. - ostream.write(", ".join(map(rutils.hex, locations[0:4]))) - ostream.write(", and %d more..." % (len(locations) - 4)) - else: - ostream.write(", ".join(map(rutils.hex, locations))) + if 'description' in feature: + ostream.write(' = ') + ostream.write(feature['description']) - ostream.write("\n") + render_locations(ostream, match) + ostream.write('\n') def render_node(ostream, match, node, indent=0): - if node["type"] == "statement": - render_statement(ostream, node["statement"], indent=indent) - elif node["type"] == "feature": - render_feature(ostream, match, node["feature"], indent=indent) + if node['type'] == 'statement': + render_statement(ostream, match, node['statement'], indent=indent) + elif node['type'] == 'feature': + render_feature(ostream, match, node['feature'], indent=indent) else: raise RuntimeError("unexpected node type: " + str(node)) diff --git a/capa/rules.py b/capa/rules.py index 8b307175..5f3da602 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -138,8 +138,8 @@ class InvalidRuleSet(ValueError): def ensure_feature_valid_for_scope(scope, feature): if isinstance(feature, capa.features.Characteristic): - if capa.features.Characteristic(feature.name) not in SUPPORTED_FEATURES[scope]: - raise InvalidRule("feature %s not support for scope %s" % (feature, scope)) + if capa.features.Characteristic(feature.value) not in SUPPORTED_FEATURES[scope]: + raise InvalidRule('feature %s not support for scope %s' % (feature, scope)) elif not isinstance(feature, tuple(filter(lambda t: isinstance(t, type), SUPPORTED_FEATURES[scope]))): raise InvalidRule("feature %s not support for scope %s" % (feature, scope)) @@ -205,10 +205,9 @@ def parse_feature(key): return capa.features.insn.Mnemonic elif key == "basic blocks": return capa.features.basicblock.BasicBlock - elif key.startswith("characteristic(") and key.endswith(")"): - characteristic = key[len("characteristic(") : -len(")")] - return lambda v: capa.features.Characteristic(characteristic, v) - elif key == "export": + elif key == 'characteristic': + return capa.features.Characteristic + elif key == 'export': return capa.features.file.Export elif key == "import": return capa.features.file.Import @@ -220,18 +219,18 @@ def parse_feature(key): raise InvalidRule("unexpected statement: %s" % key) -def parse_symbol(s, value_type): - """ +def parse_description(s, value_type, description=None): + ''' s can be an int or a string - """ - if isinstance(s, str) and "=" in s: - value, symbol = s.split("=", 1) - symbol = symbol.strip() - if symbol == "": - raise InvalidRule('unexpected value: "%s", symbol name cannot be empty' % s) + ''' + if value_type != 'string' and isinstance(s, str) and ' = ' in s: + if description: + raise InvalidRule('unexpected value: "%s", only one description allowed (inline description with ` = `)' % s) + value, description = s.split(' = ', 1) + if description == '': + raise InvalidRule('unexpected value: "%s", description cannot be empty' % s) else: value = s - symbol = None if isinstance(value, str): if value_type == "bytes": @@ -242,21 +241,20 @@ def parse_symbol(s, value_type): raise InvalidRule('unexpected bytes value: "%s", must be a valid hex sequence' % value) if len(value) > MAX_BYTES_FEATURE_SIZE: - raise InvalidRule( - "unexpected bytes value: byte sequences must be no larger than %s bytes" % MAX_BYTES_FEATURE_SIZE - ) - else: + raise InvalidRule('unexpected bytes value: byte sequences must be no larger than %s bytes' % + MAX_BYTES_FEATURE_SIZE) + elif value_type in {'number', 'offset'}: try: value = parse_int(value) except ValueError: raise InvalidRule('unexpected value: "%s", must begin with numerical value' % value) - return value, symbol + return value, description def build_statements(d, scope): - if len(d.keys()) != 1: - raise InvalidRule("too many statements") + if len(d.keys()) > 2: + raise InvalidRule('too many statements') key = list(d.keys())[0] if key == "and": @@ -303,48 +301,33 @@ def build_statements(d, scope): term = key[len("count(") : -len(")")] - if term.startswith("characteristic("): - # characteristic features are specified a bit specially: - # they simply indicate the presence of something unusual/interesting, - # and we embed the name in the feature name, like `characteristic(nzxor)`. - # - # when we're dealing with counts, like `count(characteristic(nzxor))`, - # we can simply extract the feature and assume we're looking for `True` values. - Feature = parse_feature(term) - feature = Feature(True) - ensure_feature_valid_for_scope(scope, feature) - else: - # however, for remaining counted features, like `count(mnemonic(mov))`, - # we have to jump through hoops. - # - # when looking for the existance of such a feature, our rule might look like: - # - mnemonic: mov - # - # but here we deal with the form: `mnemonic(mov)`. - term, _, arg = term.partition("(") - Feature = parse_feature(term) + # when looking for the existence of such a feature, our rule might look like: + # - mnemonic: mov + # + # but here we deal with the form: `mnemonic(mov)`. + term, _, arg = term.partition('(') + Feature = parse_feature(term) - if arg: - arg = arg[: -len(")")] - # can't rely on yaml parsing ints embedded within strings - # like: - # - # count(offset(0xC)) - # count(number(0x11223344)) - # count(number(0x100 = symbol name)) - if term in ("number", "offset", "bytes"): - value, symbol = parse_symbol(arg, term) - feature = Feature(value, symbol) - else: - # arg is string, like: - # - # count(mnemonic(mov)) - # count(string(error)) - # TODO: what about embedded newlines? - feature = Feature(arg) + if arg: + arg = arg[:-len(')')] + # can't rely on yaml parsing ints embedded within strings + # like: + # + # count(offset(0xC)) + # count(number(0x11223344)) + # count(number(0x100 = description)) + if term != 'string': + value, description = parse_description(arg, term) + feature = Feature(value, description) else: - feature = Feature() - ensure_feature_valid_for_scope(scope, feature) + # arg is string (which doesn't support inline descriptions), like: + # + # count(string(error)) + # TODO: what about embedded newlines? + feature = Feature(arg) + else: + feature = Feature() + ensure_feature_valid_for_scope(scope, feature) count = d[key] if isinstance(count, int): @@ -373,13 +356,8 @@ def build_statements(d, scope): ) else: Feature = parse_feature(key) - if key in ("number", "offset", "bytes"): - # parse numbers with symbol description, e.g. 0x4550 = IMAGE_DOS_SIGNATURE - # or regular numbers, e.g. 37 - value, symbol = parse_symbol(d[key], key) - feature = Feature(value, symbol) - else: - feature = Feature(d[key]) + value, description = parse_description(d[key], key, d.get('description')) + feature = Feature(value, description) ensure_feature_valid_for_scope(scope, feature) return feature diff --git a/ci/hooks/hook-vivisect.py b/ci/hooks/hook-vivisect.py new file mode 100644 index 00000000..3714cfb0 --- /dev/null +++ b/ci/hooks/hook-vivisect.py @@ -0,0 +1,13 @@ +from PyInstaller.utils.hooks import copy_metadata + +# in order for viv-utils to use pkg_resources to fetch +# the installed version of vivisect, +# we need to instruct pyinstaller to embed this metadata. +# +# so we set the pyinstaller.spec/hookspath to reference +# the directory with this hook. +# +# this hook runs at analysis time and updates the embedded metadata. +# +# ref: https://github.com/pyinstaller/pyinstaller/issues/1713#issuecomment-162682084 +datas = copy_metadata('vivisect') diff --git a/ci/pyinstaller.spec b/ci/pyinstaller.spec new file mode 100644 index 00000000..785db528 --- /dev/null +++ b/ci/pyinstaller.spec @@ -0,0 +1,193 @@ +# -*- mode: python -*- +import os.path +import subprocess + +import wcwidth + + +with open('./capa/version.py', 'wb') as f: + f.write("__version__ = '%s'" + % subprocess.check_output(["git", "describe", "--always"]).strip()) + +a = Analysis( + ['../capa/main.py'], + pathex=['capa'], + binaries=None, + datas=[ + ('../rules', 'rules'), + # capa.render.default uses tabulate that depends on wcwidth. + # it seems wcwidth uses a json file `version.json` + # and this doesn't get picked up by pyinstaller automatically. + # so we manually embed the wcwidth resources here. + # + # ref: https://stackoverflow.com/a/62278462/87207 + (os.path.dirname(wcwidth.__file__), 'wcwidth') + ], + hiddenimports=[ + # vivisect does manual/runtime importing of its modules, + # so declare the things that could be imported here. + "pycparser", + "vivisect", + "vivisect.analysis", + "vivisect.analysis.amd64", + "vivisect.analysis.amd64", + "vivisect.analysis.amd64.emulation", + "vivisect.analysis.amd64.golang", + "vivisect.analysis.crypto", + "vivisect.analysis.crypto", + "vivisect.analysis.crypto.constants", + "vivisect.analysis.elf", + "vivisect.analysis.elf", + "vivisect.analysis.elf.elfplt", + "vivisect.analysis.elf.libc_start_main", + "vivisect.analysis.generic", + "vivisect.analysis.generic", + "vivisect.analysis.generic.codeblocks", + "vivisect.analysis.generic.emucode", + "vivisect.analysis.generic.entrypoints", + "vivisect.analysis.generic.funcentries", + "vivisect.analysis.generic.impapi", + "vivisect.analysis.generic.mkpointers", + "vivisect.analysis.generic.pointers", + "vivisect.analysis.generic.pointertables", + "vivisect.analysis.generic.relocations", + "vivisect.analysis.generic.strconst", + "vivisect.analysis.generic.switchcase", + "vivisect.analysis.generic.thunks", + "vivisect.analysis.i386", + "vivisect.analysis.i386", + "vivisect.analysis.i386.calling", + "vivisect.analysis.i386.golang", + "vivisect.analysis.i386.importcalls", + "vivisect.analysis.i386.instrhook", + "vivisect.analysis.i386.thunk_bx", + "vivisect.analysis.ms", + "vivisect.analysis.ms", + "vivisect.analysis.ms.hotpatch", + "vivisect.analysis.ms.localhints", + "vivisect.analysis.ms.msvc", + "vivisect.analysis.ms.msvcfunc", + "vivisect.analysis.ms.vftables", + "vivisect.analysis.pe", + "vivisect.impapi.posix.amd64", + "vivisect.impapi.posix.i386", + "vivisect.impapi.windows", + "vivisect.impapi.windows.amd64", + "vivisect.impapi.windows.i386", + "vivisect.parsers.blob", + "vivisect.parsers.elf", + "vivisect.parsers.ihex", + "vivisect.parsers.macho", + "vivisect.parsers.parse_pe", + "vivisect.parsers.utils", + "vivisect.storage", + "vivisect.storage.basicfile", + "vstruct.constants", + "vstruct.constants.ntstatus", + "vstruct.defs", + "vstruct.defs.arm7", + "vstruct.defs.bmp", + "vstruct.defs.dns", + "vstruct.defs.elf", + "vstruct.defs.gif", + "vstruct.defs.ihex", + "vstruct.defs.inet", + "vstruct.defs.java", + "vstruct.defs.kdcom", + "vstruct.defs.macho", + "vstruct.defs.macho.const", + "vstruct.defs.macho.fat", + "vstruct.defs.macho.loader", + "vstruct.defs.macho.stabs", + "vstruct.defs.minidump", + "vstruct.defs.pcap", + "vstruct.defs.pe", + "vstruct.defs.pptp", + "vstruct.defs.rar", + "vstruct.defs.swf", + "vstruct.defs.win32", + "vstruct.defs.windows", + "vstruct.defs.windows.win_5_1_i386", + "vstruct.defs.windows.win_5_1_i386.ntdll", + "vstruct.defs.windows.win_5_1_i386.ntoskrnl", + "vstruct.defs.windows.win_5_1_i386.win32k", + "vstruct.defs.windows.win_5_2_i386", + "vstruct.defs.windows.win_5_2_i386.ntdll", + "vstruct.defs.windows.win_5_2_i386.ntoskrnl", + "vstruct.defs.windows.win_5_2_i386.win32k", + "vstruct.defs.windows.win_6_1_amd64", + "vstruct.defs.windows.win_6_1_amd64.ntdll", + "vstruct.defs.windows.win_6_1_amd64.ntoskrnl", + "vstruct.defs.windows.win_6_1_amd64.win32k", + "vstruct.defs.windows.win_6_1_i386", + "vstruct.defs.windows.win_6_1_i386.ntdll", + "vstruct.defs.windows.win_6_1_i386.ntoskrnl", + "vstruct.defs.windows.win_6_1_i386.win32k", + "vstruct.defs.windows.win_6_1_wow64", + "vstruct.defs.windows.win_6_1_wow64.ntdll", + "vstruct.defs.windows.win_6_2_amd64", + "vstruct.defs.windows.win_6_2_amd64.ntdll", + "vstruct.defs.windows.win_6_2_amd64.ntoskrnl", + "vstruct.defs.windows.win_6_2_amd64.win32k", + "vstruct.defs.windows.win_6_2_i386", + "vstruct.defs.windows.win_6_2_i386.ntdll", + "vstruct.defs.windows.win_6_2_i386.ntoskrnl", + "vstruct.defs.windows.win_6_2_i386.win32k", + "vstruct.defs.windows.win_6_2_wow64", + "vstruct.defs.windows.win_6_2_wow64.ntdll", + "vstruct.defs.windows.win_6_3_amd64", + "vstruct.defs.windows.win_6_3_amd64.ntdll", + "vstruct.defs.windows.win_6_3_amd64.ntoskrnl", + "vstruct.defs.windows.win_6_3_i386", + "vstruct.defs.windows.win_6_3_i386.ntdll", + "vstruct.defs.windows.win_6_3_i386.ntoskrnl", + "vstruct.defs.windows.win_6_3_wow64", + "vstruct.defs.windows.win_6_3_wow64.ntdll", + ], + hookspath=['ci/hooks'], + runtime_hooks=None, + excludes=[ + # ignore packages that would otherwise be bundled with the .exe. + # review: build/pyinstaller/xref-pyinstaller.html + + # we don't do any GUI stuff, so ignore these modules + "tkinter", + "_tkinter", + "Tkinter", + # tqdm provides renderers for ipython, + # however, this drags in a lot of dependencies. + # since we don't spawn a notebook, we can safely remove these. + "IPython", + "ipywidgets", + ]) + +a.binaries = a.binaries - TOC([ + ('tcl85.dll', None, None), + ('tk85.dll', None, None), + ('_tkinter', None, None)]) + +pyz = PYZ(a.pure, a.zipped_data) + +exe = EXE(pyz, + a.scripts, + a.binaries, + a.zipfiles, + a.datas, + exclude_binaries=False, + name='capa', + icon='logo.ico', + debug=False, + strip=None, + upx=True, + console=True ) + +# enable the following to debug the contents of the .exe +# +#coll = COLLECT(exe, +# a.binaries, +# a.zipfiles, +# a.datas, +# strip=None, +# upx=True, +# name='capa-dat') + diff --git a/rules b/rules index e5db2268..bb1df027 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit e5db22684432c7fb951cd2bf4cde921f90e62f68 +Subproject commit bb1df0277d5823179626388d3a2da81a03d6a723 diff --git a/scripts/lint.py b/scripts/lint.py index 1a1aee9f..e79627c6 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -323,7 +323,8 @@ def lint_features(ctx, rule): def get_features(ctx, rule): # get features from rule and all dependencies including subscopes and matched rules features = [] - deps = [ctx["rules"].rules[dep] for dep in rule.get_dependencies()] + namespaces = capa.rules.index_rules_by_namespace([rule]) + deps = [ctx['rules'].rules[dep] for dep in rule.get_dependencies(namespaces)] for r in [rule] + deps: features.extend(get_rule_features(r)) return features diff --git a/tests/test_engine.py b/tests/test_engine.py index 3f2ff7e0..5034d18f 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -86,7 +86,8 @@ def test_complex(): def test_range(): # unbounded range, but no matching feature - assert Range(Number(1)).evaluate({Number(2): {}}) == False + # since the lower bound is zero, and there are zero matches, ok + assert Range(Number(1)).evaluate({Number(2): {}}) == True # unbounded range with matching feature should always match assert Range(Number(1)).evaluate({Number(1): {}}) == True @@ -117,6 +118,103 @@ def test_range(): assert Range(Number(1), min=1, max=3).evaluate({Number(1): {1, 2, 3, 4}}) == False +def test_range_exact(): + rule = textwrap.dedent(''' + rule: + meta: + name: test rule + features: + - count(number(100)): 2 + ''') + r = capa.rules.Rule.from_yaml(rule) + + # just enough matches + features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0) + assert 'test rule' in matches + + # not enough matches + features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1}}, 0x0) + assert 'test rule' not in matches + + # too many matches + features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2, 3}}, 0x0) + assert 'test rule' not in matches + + +def test_range_range(): + rule = textwrap.dedent(''' + rule: + meta: + name: test rule + features: + - count(number(100)): (2, 3) + ''') + r = capa.rules.Rule.from_yaml(rule) + + # just enough matches + features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0) + assert 'test rule' in matches + + # enough matches + features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2, 3}}, 0x0) + assert 'test rule' in matches + + # not enough matches + features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1}}, 0x0) + assert 'test rule' not in matches + + # too many matches + features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2, 3, 4}}, 0x0) + assert 'test rule' not in matches + + +def test_range_exact_zero(): + rule = textwrap.dedent(''' + rule: + meta: + name: test rule + features: + - count(number(100)): 0 + ''') + r = capa.rules.Rule.from_yaml(rule) + + # feature isn't indexed - good. + features, matches = capa.engine.match([r], {}, 0x0) + assert 'test rule' in matches + + # feature is indexed, but no matches. + # i don't think we should ever really have this case, but good to check anyways. + features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {}}, 0x0) + assert 'test rule' in matches + + # too many matches + features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1}}, 0x0) + assert 'test rule' not in matches + + +def test_range_with_zero(): + rule = textwrap.dedent(''' + rule: + meta: + name: test rule + features: + - count(number(100)): (0, 1) + ''') + r = capa.rules.Rule.from_yaml(rule) + + # ok + features, matches = capa.engine.match([r], {}, 0x0) + assert 'test rule' in matches + features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {}}, 0x0) + assert 'test rule' in matches + features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1}}, 0x0) + assert 'test rule' in matches + + # too many matches + features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0) + assert 'test rule' not in matches + + def test_match_adds_matched_rule_feature(): """show that using `match` adds a feature for matched rules.""" rule = textwrap.dedent( diff --git a/tests/test_freeze.py b/tests/test_freeze.py index a78333b9..42562179 100644 --- a/tests/test_freeze.py +++ b/tests/test_freeze.py @@ -10,37 +10,26 @@ import capa.features.freeze from fixtures import * -EXTRACTOR = capa.features.extractors.NullFeatureExtractor( - { - "file features": [ - (0x402345, capa.features.Characteristic("embedded pe", True)), - ], - "functions": { - 0x401000: { - "features": [(0x401000, capa.features.Characteristic("switch", True)),], - "basic blocks": { - 0x401000: { - "features": [ - ( - 0x401000, - capa.features.Characteristic("tight loop", True), - ), - ], - "instructions": { - 0x401000: { - "features": [ - (0x401000, capa.features.insn.Mnemonic("xor")), - ( - 0x401000, - capa.features.Characteristic("nzxor", True), - ), - ], - }, - 0x401002: { - "features": [ - (0x401002, capa.features.insn.Mnemonic("mov")), - ] - }, +EXTRACTOR = capa.features.extractors.NullFeatureExtractor({ + 'file features': [ + (0x402345, capa.features.Characteristic('embedded pe')), + ], + 'functions': { + 0x401000: { + 'features': [ + (0x401000, capa.features.Characteristic('switch')), + ], + 'basic blocks': { + 0x401000: { + 'features': [ + (0x401000, capa.features.Characteristic('tight loop')), + ], + 'instructions': { + 0x401000: { + 'features': [ + (0x401000, capa.features.insn.Mnemonic('xor')), + (0x401000, capa.features.Characteristic('nzxor')), + ], }, }, }, @@ -55,25 +44,19 @@ def test_null_feature_extractor(): assert list(EXTRACTOR.get_basic_blocks(0x401000)) == [0x401000] assert list(EXTRACTOR.get_instructions(0x401000, 0x0401000)) == [0x401000, 0x401002] - rules = capa.rules.RuleSet( - [ - capa.rules.Rule.from_yaml( - textwrap.dedent( - """ - rule: - meta: - name: xor loop - scope: basic block - features: - - and: - - characteristic(tight loop): true - - mnemonic: xor - - characteristic(nzxor): true - """ - ) - ), - ] - ) + rules = capa.rules.RuleSet([ + capa.rules.Rule.from_yaml(textwrap.dedent(''' + rule: + meta: + name: xor loop + scope: basic block + features: + - and: + - characteristic: tight loop + - mnemonic: xor + - characteristic: nzxor + ''')), + ]) capabilities = capa.main.find_capabilities(rules, EXTRACTOR) assert "xor loop" in capabilities @@ -178,9 +161,9 @@ def test_serialize_features(): roundtrip_feature(capa.features.String("SCardControl")) roundtrip_feature(capa.features.insn.Number(0xFF)) roundtrip_feature(capa.features.insn.Offset(0x0)) - roundtrip_feature(capa.features.insn.Mnemonic("push")) - roundtrip_feature(capa.features.file.Section(".rsrc")) - roundtrip_feature(capa.features.Characteristic("tight loop", True)) + roundtrip_feature(capa.features.insn.Mnemonic('push')) + roundtrip_feature(capa.features.file.Section('.rsrc')) + roundtrip_feature(capa.features.Characteristic('tight loop')) roundtrip_feature(capa.features.basicblock.BasicBlock()) roundtrip_feature(capa.features.file.Export("BaseThreadInitThunk")) roundtrip_feature(capa.features.file.Import("kernel32.IsWow64Process")) diff --git a/tests/test_main.py b/tests/test_main.py index 8640c4f1..4592a092 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -52,46 +52,33 @@ def test_main_shellcode(sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32): def test_ruleset(): - rules = capa.rules.RuleSet( - [ - capa.rules.Rule.from_yaml( - textwrap.dedent( - """ - rule: - meta: - name: file rule - scope: file - features: - - characteristic(embedded pe): y - """ - ) - ), - capa.rules.Rule.from_yaml( - textwrap.dedent( - """ - rule: - meta: - name: function rule - scope: function - features: - - characteristic(switch): y - """ - ) - ), - capa.rules.Rule.from_yaml( - textwrap.dedent( - """ - rule: - meta: - name: basic block rule - scope: basic block - features: - - characteristic(nzxor): y - """ - ) - ), - ] - ) + rules = capa.rules.RuleSet([ + capa.rules.Rule.from_yaml(textwrap.dedent(''' + rule: + meta: + name: file rule + scope: file + features: + - characteristic: embedded pe + ''')), + capa.rules.Rule.from_yaml(textwrap.dedent(''' + rule: + meta: + name: function rule + scope: function + features: + - characteristic: switch + ''')), + capa.rules.Rule.from_yaml(textwrap.dedent(''' + rule: + meta: + name: basic block rule + scope: basic block + features: + - characteristic: nzxor + ''')), + + ]) assert len(rules.file_rules) == 1 assert len(rules.function_rules) == 1 assert len(rules.basic_block_rules) == 1 @@ -165,65 +152,48 @@ def test_match_across_scopes_file_function(sample_9324d1a8ae37a36ae560c37448c970 def test_match_across_scopes(sample_9324d1a8ae37a36ae560c37448c9705a): - rules = capa.rules.RuleSet( - [ - # this rule should match on a basic block (including at least 0x403685) - capa.rules.Rule.from_yaml( - textwrap.dedent( - """ - rule: - meta: - name: tight loop - scope: basic block - examples: - - 9324d1a8ae37a36ae560c37448c9705a:0x403685 - features: - - characteristic(tight loop): true - """ - ) - ), - # this rule should match on a function (0x403660) - # based on API, as well as prior basic block rule match - capa.rules.Rule.from_yaml( - textwrap.dedent( - """ - rule: - meta: - name: kill thread loop - scope: function - examples: - - 9324d1a8ae37a36ae560c37448c9705a:0x403660 - features: - - and: - - api: kernel32.TerminateThread - - api: kernel32.CloseHandle - - match: tight loop - """ - ) - ), - # this rule should match on a file feature and a prior function rule match - capa.rules.Rule.from_yaml( - textwrap.dedent( - """ - rule: - meta: - name: kill thread program - scope: file - examples: - - 9324d1a8ae37a36ae560c37448c9705a - features: - - and: - - section: .text - - match: kill thread loop - """ - ) - ), - ] - ) - extractor = capa.features.extractors.viv.VivisectFeatureExtractor( - sample_9324d1a8ae37a36ae560c37448c9705a.vw, - sample_9324d1a8ae37a36ae560c37448c9705a.path, - ) + rules = capa.rules.RuleSet([ + # this rule should match on a basic block (including at least 0x403685) + capa.rules.Rule.from_yaml(textwrap.dedent(''' + rule: + meta: + name: tight loop + scope: basic block + examples: + - 9324d1a8ae37a36ae560c37448c9705a:0x403685 + features: + - characteristic: tight loop + ''')), + # this rule should match on a function (0x403660) + # based on API, as well as prior basic block rule match + capa.rules.Rule.from_yaml(textwrap.dedent(''' + rule: + meta: + name: kill thread loop + scope: function + examples: + - 9324d1a8ae37a36ae560c37448c9705a:0x403660 + features: + - and: + - api: kernel32.TerminateThread + - api: kernel32.CloseHandle + - match: tight loop + ''')), + # this rule should match on a file feature and a prior function rule match + capa.rules.Rule.from_yaml(textwrap.dedent(''' + rule: + meta: + name: kill thread program + scope: file + examples: + - 9324d1a8ae37a36ae560c37448c9705a + features: + - and: + - section: .text + - match: kill thread loop + ''')), + ]) + extractor = capa.features.extractors.viv.VivisectFeatureExtractor(sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path) capabilities = capa.main.find_capabilities(rules, extractor) assert "tight loop" in capabilities assert "kill thread loop" in capabilities @@ -231,24 +201,18 @@ def test_match_across_scopes(sample_9324d1a8ae37a36ae560c37448c9705a): def test_subscope_bb_rules(sample_9324d1a8ae37a36ae560c37448c9705a): - rules = capa.rules.RuleSet( - [ - capa.rules.Rule.from_yaml( - textwrap.dedent( - """ - rule: - meta: - name: test rule - scope: function - features: - - and: - - basic block: - - characteristic(tight loop): true - """ - ) - ) - ] - ) + rules = capa.rules.RuleSet([ + capa.rules.Rule.from_yaml(textwrap.dedent(''' + rule: + meta: + name: test rule + scope: function + features: + - and: + - basic block: + - characteristic: tight loop + ''')) + ]) # tight loop at 0x403685 extractor = capa.features.extractors.viv.VivisectFeatureExtractor( sample_9324d1a8ae37a36ae560c37448c9705a.vw, diff --git a/tests/test_rules.py b/tests/test_rules.py index 95b88d24..039edb64 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -4,6 +4,7 @@ import pytest import capa.rules from capa.features.insn import Number, Offset +from capa.features import String def test_rule_ctor(): @@ -66,6 +67,22 @@ def test_rule_yaml_complex(): assert r.evaluate({Number(6): {1}, Number(7): {1}, Number(8): {1}}) == False +def test_rule_yaml_descriptions(): + rule = textwrap.dedent(''' + rule: + meta: + name: test rule + features: + - and: + - number: 1 = This is the number 1 + - string: This program cannot be run in DOS mode. + description: MS-DOS stub message + - count(number(2 = AF_INET/SOCK_DGRAM)): 2 + ''') + r = capa.rules.Rule.from_yaml(rule) + assert r.evaluate({Number(1): {1}, Number(2): {2, 3}, String('This program cannot be run in DOS mode.'): {4}}) == True + + def test_rule_yaml_not(): rule = textwrap.dedent( """ @@ -132,37 +149,47 @@ def test_invalid_rule_feature(): ) with pytest.raises(capa.rules.InvalidRule): - capa.rules.Rule.from_yaml( - textwrap.dedent( - """ - rule: - meta: - name: test rule - scope: file - features: - - characteristic(nzxor): true - """ - ) - ) + capa.rules.Rule.from_yaml(textwrap.dedent(''' + rule: + meta: + name: test rule + scope: file + features: + - characteristic: nzxor + ''')) with pytest.raises(capa.rules.InvalidRule): - capa.rules.Rule.from_yaml( - textwrap.dedent( - """ - rule: - meta: - name: test rule - scope: function - features: - - characteristic(embedded pe): true - """ - ) - ) + capa.rules.Rule.from_yaml(textwrap.dedent(''' + rule: + meta: + name: test rule + scope: function + features: + - characteristic: embedded pe + ''')) with pytest.raises(capa.rules.InvalidRule): - capa.rules.Rule.from_yaml( - textwrap.dedent( - """ + capa.rules.Rule.from_yaml(textwrap.dedent(''' + rule: + meta: + name: test rule + scope: basic block + features: + - characteristic: embedded pe + ''')) + + +def test_lib_rules(): + rules = capa.rules.RuleSet([ + capa.rules.Rule.from_yaml(textwrap.dedent(''' + rule: + meta: + name: a lib rule + lib: true + features: + - api: CreateFileA + ''')), + capa.rules.Rule.from_yaml(textwrap.dedent(''' rule: meta: name: test rule @@ -207,27 +234,21 @@ def test_lib_rules(): def test_subscope_rules(): - rules = capa.rules.RuleSet( - [ - capa.rules.Rule.from_yaml( - textwrap.dedent( - """ - rule: - meta: - name: test rule - scope: file - features: + rules = capa.rules.RuleSet([ + capa.rules.Rule.from_yaml(textwrap.dedent(''' + rule: + meta: + name: test rule + scope: file + features: + - and: + - characteristic: embedded pe + - function: - and: - - characteristic(embedded pe): true - - function: - - and: - - characteristic(nzxor): true - - characteristic(switch): true - """ - ) - ) - ] - ) + - characteristic: nzxor + - characteristic: switch + ''')) + ]) # the file rule scope will have one rules: # - `test rule` assert len(rules.file_rules) == 1 @@ -295,10 +316,8 @@ def test_invalid_rules(): meta: name: test rule features: - - characteristic(number(1)): True - """ - ) - ) + - characteristic: number(1) + ''')) with pytest.raises(capa.rules.InvalidRule): r = capa.rules.Rule.from_yaml( @@ -308,10 +327,8 @@ def test_invalid_rules(): meta: name: test rule features: - - characteristic(count(number(100))): True - """ - ) - ) + - characteristic: count(number(100)) + ''')) def test_number_symbol(): diff --git a/tests/test_viv_features.py b/tests/test_viv_features.py index c32606aa..2d57eada 100644 --- a/tests/test_viv_features.py +++ b/tests/test_viv_features.py @@ -130,7 +130,7 @@ def test_offset_features(mimikatz): def test_nzxor_features(mimikatz): features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x410DFC)) - assert capa.features.Characteristic("nzxor", True) in features # 0x0410F0B + assert capa.features.Characteristic('nzxor') in features # 0x0410F0B def get_bb_insn(f, va): @@ -169,10 +169,8 @@ def test_mnemonic_features(mimikatz): def test_peb_access_features(sample_a933a1a402775cfa94b6bee0963f4b46): - features = extract_function_features( - viv_utils.Function(sample_a933a1a402775cfa94b6bee0963f4b46.vw, 0xABA6FEC) - ) - assert capa.features.Characteristic("peb access", True) in features + features = extract_function_features(viv_utils.Function(sample_a933a1a402775cfa94b6bee0963f4b46.vw, 0xABA6FEC)) + assert capa.features.Characteristic('peb access') in features def test_file_section_name_features(mimikatz): @@ -188,7 +186,7 @@ def test_tight_loop_features(mimikatz): if bb.va != 0x402F8E: continue features = extract_basic_block_features(f, bb) - assert capa.features.Characteristic("tight loop", True) in features + assert capa.features.Characteristic('tight loop') in features assert capa.features.basicblock.BasicBlock() in features @@ -198,7 +196,7 @@ def test_tight_loop_bb_features(mimikatz): if bb.va != 0x402F8E: continue features = extract_basic_block_features(f, bb) - assert capa.features.Characteristic("tight loop", True) in features + assert capa.features.Characteristic('tight loop') in features assert capa.features.basicblock.BasicBlock() in features @@ -219,24 +217,18 @@ def test_file_import_name_features(mimikatz): def test_cross_section_flow_features(sample_a198216798ca38f280dc413f8c57f2c2): - features = extract_function_features( - viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x4014D0) - ) - assert capa.features.Characteristic("cross section flow", True) in features + features = extract_function_features(viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x4014D0)) + assert capa.features.Characteristic('cross section flow') in features # this function has calls to some imports, # which should not trigger cross-section flow characteristic - features = extract_function_features( - viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x401563) - ) - assert capa.features.Characteristic("cross section flow", True) not in features + features = extract_function_features(viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x401563)) + assert capa.features.Characteristic('cross section flow') not in features def test_segment_access_features(sample_a933a1a402775cfa94b6bee0963f4b46): - features = extract_function_features( - viv_utils.Function(sample_a933a1a402775cfa94b6bee0963f4b46.vw, 0xABA6FEC) - ) - assert capa.features.Characteristic("fs access", True) in features + features = extract_function_features(viv_utils.Function(sample_a933a1a402775cfa94b6bee0963f4b46.vw, 0xABA6FEC)) + assert capa.features.Characteristic('fs access') in features def test_thunk_features(sample_9324d1a8ae37a36ae560c37448c9705a): @@ -249,60 +241,36 @@ def test_thunk_features(sample_9324d1a8ae37a36ae560c37448c9705a): def test_file_embedded_pe(pma_lab_12_04): features = extract_file_features(pma_lab_12_04.vw, pma_lab_12_04.path) - assert capa.features.Characteristic("embedded pe", True) in features + assert capa.features.Characteristic('embedded pe') in features def test_stackstring_features(mimikatz): features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x4556E5)) - assert capa.features.Characteristic("stack string", True) in features + assert capa.features.Characteristic('stack string') in features def test_switch_features(mimikatz): features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x409411)) - assert capa.features.Characteristic("switch", True) in features + assert capa.features.Characteristic('switch') in features features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x409393)) - assert capa.features.Characteristic("switch", True) not in features + assert capa.features.Characteristic('switch') not in features -def test_recursive_call_feature( - sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41, -): - features = extract_function_features( - viv_utils.Function( - sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, - 0x10003100, - ) - ) - assert capa.features.Characteristic("recursive call", True) in features +def test_recursive_call_feature(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41): + features = extract_function_features(viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10003100)) + assert capa.features.Characteristic('recursive call') in features - features = extract_function_features( - viv_utils.Function( - sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, - 0x10007B00, - ) - ) - assert capa.features.Characteristic("recursive call", True) not in features + features = extract_function_features(viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10007B00)) + assert capa.features.Characteristic('recursive call') not in features -def test_loop_feature( - sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41, -): - features = extract_function_features( - viv_utils.Function( - sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, - 0x10003D30, - ) - ) - assert capa.features.Characteristic("loop", True) in features +def test_loop_feature(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41): + features = extract_function_features(viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10003D30)) + assert capa.features.Characteristic('loop') in features - features = extract_function_features( - viv_utils.Function( - sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, - 0x10007250, - ) - ) - assert capa.features.Characteristic("loop", True) not in features + features = extract_function_features(viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10007250)) + assert capa.features.Characteristic('loop') not in features def test_file_string_features(sample_bfb9b5391a13d0afd787e87ab90f14f5): @@ -315,27 +283,21 @@ def test_file_string_features(sample_bfb9b5391a13d0afd787e87ab90f14f5): def test_function_calls_to(sample_9324d1a8ae37a36ae560c37448c9705a): - features = extract_function_features( - viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60) - ) - assert capa.features.Characteristic("calls to", True) in features - assert len(features[capa.features.Characteristic("calls to", True)]) == 1 + features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60)) + assert capa.features.Characteristic('calls to') in features + assert len(features[capa.features.Characteristic('calls to')]) == 1 def test_function_calls_to64(sample_lab21_01): - features = extract_function_features( - viv_utils.Function(sample_lab21_01.vw, 0x1400052D0) - ) # memcpy - assert capa.features.Characteristic("calls to", True) in features - assert len(features[capa.features.Characteristic("calls to", True)]) == 8 + features = extract_function_features(viv_utils.Function(sample_lab21_01.vw, 0x1400052D0)) # memcpy + assert capa.features.Characteristic('calls to') in features + assert len(features[capa.features.Characteristic('calls to')]) == 8 def test_function_calls_from(sample_9324d1a8ae37a36ae560c37448c9705a): - features = extract_function_features( - viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60) - ) - assert capa.features.Characteristic("calls from", True) in features - assert len(features[capa.features.Characteristic("calls from", True)]) == 23 + features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60)) + assert capa.features.Characteristic('calls from') in features + assert len(features[capa.features.Characteristic('calls from')]) == 23 def test_basic_block_count(sample_9324d1a8ae37a36ae560c37448c9705a): @@ -346,11 +308,9 @@ def test_basic_block_count(sample_9324d1a8ae37a36ae560c37448c9705a): def test_indirect_call_features(sample_a933a1a402775cfa94b6bee0963f4b46): - features = extract_function_features( - viv_utils.Function(sample_a933a1a402775cfa94b6bee0963f4b46.vw, 0xABA68A0) - ) - assert capa.features.Characteristic("indirect call", True) in features - assert len(features[capa.features.Characteristic("indirect call", True)]) == 3 + features = extract_function_features(viv_utils.Function(sample_a933a1a402775cfa94b6bee0963f4b46.vw, 0xABA68A0)) + assert capa.features.Characteristic('indirect call') in features + assert len(features[capa.features.Characteristic('indirect call')]) == 3 def test_indirect_calls_resolved(sample_c91887d861d9bd4a5872249b641bc9f9):