Merge branch 'master' into fmt-black

2025-12-21 23:00:29 -08:00 · 2020-07-02 10:25:24 -06:00
parent 92bea58347 acbcd0c4b0
commit db2b1caeae
29 changed files with 847 additions and 612 deletions
--- a/README.md
+++ b/README.md
@@ -84,6 +84,7 @@ Download capa from the [Releases](/releases) page or get the nightly builds here
    - [section](#section)
  - [counting](#counting)
  - [matching prior rule matches](#matching-prior-rule-matches)
  - [descriptions](#descriptions)
 - [limitations](#Limitations)
 # installation
@@ -339,8 +340,9 @@ For example, a crypto constant.
 The parameter is a number; if prefixed with `0x` then in hex format, otherwise, decimal format.
-To associate context with a number, e.g. for constant definitions, append an equal sign and the respective name to
+To help humans understand the meaning of a number, such that the constant `0x40` means `PAGE_EXECUTE_READWRITE`, you may provide a description alongside the definition.
-the number definition. This helps with documenting rules and provides context in capa's output.
+Use the inline syntax (preferred) by ending the line with ` = DESCRIPTION STRING`.
 Check the [description section](#description) for more details.
 Examples:
@@ -362,20 +364,31 @@ Regexes should be surrounded with `/` characters.
 By default, capa uses case-sensitive matching and assumes leading and trailing wildcards.
 To perform case-insensitive matching append an `i`. To anchor the regex at the start or end of a string, use `^` and/or `$`.
 To add context to a string use the two-line syntax, using  the `description` tag: `description: DESCRIPTION STRING`.
 The inline syntax is not supported.
 Check the [description section](#description) for more details.
 Examples:
-    string: This program cannot be run in DOS mode.
+```
-    string: Firefox 64.0
+- string: This program cannot be run in DOS mode.
-    string: /SELECT.*FROM.*WHERE/
+  description: MS-DOS stub message
-    string: /Hardware\\Description\\System\\CentralProcessor/i
+- string: '{3E5FC7F9-9A51-4367-9063-A120244FBEC7}'
-    
+  description: CLSID_CMSTPLUA
 - string: Firefox 64.0
 - string:'/SELECT.*FROM.*WHERE/
 - string: /Hardware\\Description\\System\\CentralProcessor/i
 ```
 Note that regex matching is expensive (`O(features)` rather than `O(1)`) so they should be used sparingly.
 ### bytes
 A sequence of bytes referenced by the logic of the program. 
 The provided sequence must match from the beginning of the referenced bytes and be no more than `0x100` bytes.
-The parameter is a sequence of hexadecimal bytes followed by an optional description.
+The parameter is a sequence of hexadecimal bytes.
- 
+To help humans understand the meaning of the bytes sequence, you may provide a description.
 Use the inline syntax (preferred) by ending the line with ` = DESCRIPTION STRING`.
 Check the [description section](#description) for more details.
 The example below illustrates byte matching given a COM CLSID pushed onto the stack prior to `CoCreateInstance`.
@@ -397,6 +410,7 @@ A structure offset referenced by the logic of the program.
 This should not be a stack offset.
 The parameter is a number; if prefixed with `0x` then in hex format, otherwise, decimal format.
 It can be followed by an optional description.
 Examples:
@@ -453,6 +467,7 @@ These are the features supported at the file-scope:
  - [import](#import)
  - [section](#section)
 ### file string
 An ASCII or UTF-16 LE string present in the file.
@@ -511,6 +526,10 @@ These rules can be expressed like:
    count(mnemonic(mov)): 3
    count(basic block): 4
 `count` supports inline descriptions, except for [strings](#string), using the following syntax:
    count(number(2 = AF_INET/SOCK_DGRAM)): 2
 ## matching prior rule matches
 capa rules can specify logic for matching on other rule matches.
@@ -532,6 +551,28 @@ By default, library rules will not be output to the user as a rule match,
 but can be matched by other rules.
 When no active rules depend on a library rule, these the library rules will not be evaluated - maintaining performance.
 ## description
 All features support an optional description which helps with documenting rules and provides context in capa's output.
 For all features except for [strings](#string), the description can be specified inline preceded by ` = `: ` = DESCRIPTION STRING`.
 For example:
 ```
 - number: 0x4550 = IMAGE_DOS_SIGNATURE (MZ)
 ```
 The inline syntax is preferred.
 For [strings](#string) or if the description is long or contains newlines, use the two-line syntax.
 It uses the `description` tag in the following way: `description: DESCRIPTION STRING`
 For example:
 ```
 - string: This program cannot be run in DOS mode.
  description: MS-DOS stub message
 - number: 0x4550
  description: IMAGE_DOS_SIGNATURE (MZ)
 ```
 # limitations
 To learn more about capa's current limitations see [here](doc/limitations.md).
--- a/capa/engine.py
+++ b/capa/engine.py
@@ -161,11 +161,11 @@ class Range(Statement):
        self.max = max if max is not None else (1 << 64 - 1)
    def evaluate(self, ctx):
-        if self.child not in ctx:
+        count = len(ctx.get(self.child, []))
-            return Result(False, self, [])
+        if self.min == 0 and count == 0:
            return Result(True, self, [])
-        count = len(ctx[self.child])
+        return Result(self.min <= count <= self.max, self, [], locations=ctx.get(self.child))
        return Result(self.min <= count <= self.max, self, [], locations=ctx[self.child])
    def __str__(self):
        if self.max == (1 << 64 - 1):
--- a/capa/features/init.py
+++ b/capa/features/init.py
@@ -17,10 +17,11 @@ def bytes_to_str(b):
 class Feature(object):
-    def __init__(self, args):
+    def __init__(self, args, description=None):
        super(Feature, self).__init__()
-        self.name = self.__class__.__name__
+        self.name = self.__class__.__name__.lower()
        self.args = args
        self.description = description
    def __hash__(self):
        return hash((self.name, tuple(self.args)))
@@ -28,8 +29,16 @@ class Feature(object):
    def __eq__(self, other):
        return self.name == other.name and self.args == other.args
    # Used to overwrite the rendering of the feature args in `__str__` and the
    # json output
    def get_args_str(self):
        return ','.join(self.args)
    def __str__(self):
-        return "%s(%s)" % (self.name.lower(), ",".join(self.args))
+        if self.description:
            return '%s(%s = %s)' % (self.name, self.get_args_str(), self.description)
        else:
            return '%s(%s)' % (self.name, self.get_args_str())
    def __repr__(self):
        return str(self)
@@ -49,51 +58,41 @@ class Feature(object):
 class MatchedRule(Feature):
-    def __init__(self, rule_name):
+    def __init__(self, rule_name, description=None):
-        super(MatchedRule, self).__init__([rule_name])
+        super(MatchedRule, self).__init__([rule_name], description)
        self.name = 'match'
        self.rule_name = rule_name
    def __str__(self):
        return "match(%s)" % (self.rule_name)
 class Characteristic(Feature):
-    def __init__(self, name, value=None):
+    def __init__(self, value, description=None):
-        """
+        super(Characteristic, self).__init__([value], description)
        when `value` is not provided, this serves as descriptor for a class of characteristics.
        this is only used internally, such as in `rules.py` when checking if a statement is
          supported by a given scope.
        """
        super(Characteristic, self).__init__([name, value])
        self.name = name
        self.value = value
-    def evaluate(self, ctx):
+    def freeze_serialize(self):
-        if self.value is None:
+        # in an older version of capa, characteristics could theoretically match non-existence (value=False).
-            raise ValueError("cannot evaluate characteristc %s with empty value" % (str(self)))
+        # but we found this was never used (and better expressed with `not: characteristic: ...`).
-        return super(Characteristic, self).evaluate(ctx)
+        # this was represented using an additional parameter for Characteristic.
        # its been removed, but we keep it around in the freeze format to maintain backwards compatibility.
        # this value is ignored, however.
        return (self.__class__.__name__, [self.value, True])
-    def __str__(self):
+    @classmethod
-        if self.value is None:
+    def freeze_deserialize(cls, args):
-            return "characteristic(%s)" % (self.name)
+        # see above. we ignore the second element in the 2-tuple here.
-        else:
+        return cls(args[0])
            return "characteristic(%s(%s))" % (self.name, self.value)
 class String(Feature):
-    def __init__(self, value):
+    def __init__(self, value, description=None):
-        super(String, self).__init__([value])
+        super(String, self).__init__([value], description)
        self.value = value
    def __str__(self):
        return 'string("%s")' % (self.value)
 class Bytes(Feature):
-    def __init__(self, value, symbol=None):
+    def __init__(self, value, description=None):
-        super(Bytes, self).__init__([value])
+        super(Bytes, self).__init__([value], description)
        self.value = value
        self.symbol = symbol
    def evaluate(self, ctx):
        for feature, locations in ctx.items():
@@ -105,11 +104,8 @@ class Bytes(Feature):
        return capa.engine.Result(False, self, [])
-    def __str__(self):
+    def get_args_str(self):
-        if self.symbol:
+        return bytes_to_str(self.value).upper()
            return "bytes(0x%s = %s)" % (bytes_to_str(self.value).upper(), self.symbol)
        else:
            return "bytes(0x%s)" % (bytes_to_str(self.value).upper())
    def freeze_serialize(self):
        return (self.__class__.__name__, [bytes_to_str(x).upper() for x in self.args])
--- a/capa/features/extractors/init.py
+++ b/capa/features/extractors/init.py
@@ -185,22 +185,22 @@ class NullFeatureExtractor(FeatureExtractor):
        extractor = NullFeatureExtractor({
            'file features': [
-                (0x402345, capa.features.Characteristic('embedded pe', True)),
+                (0x402345, capa.features.Characteristic('embedded pe')),
            ],
            'functions': {
                0x401000: {
                    'features': [
-                        (0x401000, capa.features.Characteristic('switch', True)),
+                        (0x401000, capa.features.Characteristic('switch')),
                    ],
                    'basic blocks': {
                        0x401000: {
                            'features': [
-                                (0x401000, capa.features.Characteristic('tight-loop', True)),
+                                (0x401000, capa.features.Characteristic('tight-loop')),
                            ],
                            'instructions': {
                                0x401000: {
                                    'features': [
-                                        (0x401000, capa.features.Characteristic('nzxor', True)),
+                                        (0x401000, capa.features.Characteristic('nzxor')),
                                    ],
                                },
                                0x401002: ...
--- a/capa/features/extractors/ida/basicblock.py
+++ b/capa/features/extractors/ida/basicblock.py
@@ -103,7 +103,7 @@ def extract_bb_stackstring(f, bb):
            bb (IDA BasicBlock)
    """
    if _ida_bb_contains_stackstring(f, bb):
-        yield Characteristic("stack string", True), bb.start_ea
+        yield Characteristic('stack string'), bb.start_ea
 def _ida_bb_contains_tight_loop(f, bb):
@@ -133,7 +133,7 @@ def extract_bb_tight_loop(f, bb):
            bb (IDA BasicBlock)
    """
    if _ida_bb_contains_tight_loop(f, bb):
-        yield Characteristic("tight loop", True), bb.start_ea
+        yield Characteristic('tight loop'), bb.start_ea
 def extract_features(f, bb):
--- a/capa/features/extractors/ida/file.py
+++ b/capa/features/extractors/ida/file.py
@@ -75,7 +75,7 @@ def extract_file_embedded_pe():
            continue
        for ea, _ in _ida_check_segment_for_pe(seg):
-            yield Characteristic("embedded pe", True), ea
+            yield Characteristic('embedded pe'), ea
 def extract_file_export_names():
--- a/capa/features/extractors/ida/function.py
+++ b/capa/features/extractors/ida/function.py
@@ -29,7 +29,7 @@ def extract_function_switch(f):
            f (IDA func_t)
    """
    if _ida_function_contains_switch(f):
-        yield Characteristic("switch", True), f.start_ea
+        yield Characteristic('switch'), f.start_ea
 def extract_function_calls_to(f):
@@ -39,7 +39,7 @@ def extract_function_calls_to(f):
            f (IDA func_t)
    """
    for ea in idautils.CodeRefsTo(f.start_ea, True):
-        yield Characteristic("calls to", True), ea
+        yield Characteristic('calls to'), ea
 def extract_function_loop(f):
@@ -53,7 +53,7 @@ def extract_function_loop(f):
        map(lambda s: edges.append((bb.start_ea, s.start_ea)), bb.succs())
    if edges and loops.has_loop(edges):
-        yield Characteristic("loop", True), f.start_ea
+        yield Characteristic('loop'), f.start_ea
 def extract_recursive_call(f):
@@ -64,7 +64,7 @@ def extract_recursive_call(f):
    """
    for ref in idautils.CodeRefsTo(f.start_ea, True):
        if f.contains(ref):
-            yield Characteristic("recursive call", True), f.start_ea
+            yield Characteristic('recursive call'), f.start_ea
            break
--- a/capa/features/extractors/ida/insn.py
+++ b/capa/features/extractors/ida/insn.py
@@ -259,7 +259,7 @@ def extract_insn_nzxor_characteristic_features(f, bb, insn):
    if _is_nzxor_stack_cookie(f, bb, insn):
        return
-    yield Characteristic("nzxor", True), insn.ea
+    yield Characteristic('nzxor'), insn.ea
 def extract_insn_mnemonic_features(f, bb, insn):
@@ -292,7 +292,7 @@ def extract_insn_peb_access_characteristic_features(f, bb, insn):
    if " fs:30h" in disasm or " gs:60h" in disasm:
        # TODO: replace above with proper IDA
-        yield Characteristic("peb access", True), insn.ea
+        yield Characteristic('peb access'), insn.ea
 def extract_insn_segment_access_features(f, bb, insn):
@@ -309,11 +309,11 @@ def extract_insn_segment_access_features(f, bb, insn):
    if " fs:" in disasm:
        # TODO: replace above with proper IDA
-        yield Characteristic("fs access", True), insn.ea
+        yield Characteristic('fs access'), insn.ea
    if " gs:" in disasm:
        # TODO: replace above with proper IDA
-        yield Characteristic("gs access", True), insn.ea
+        yield Characteristic('gs access'), insn.ea
 def extract_insn_cross_section_cflow(f, bb, insn):
@@ -336,7 +336,7 @@ def extract_insn_cross_section_cflow(f, bb, insn):
        if idaapi.getseg(ref) == idaapi.getseg(insn.ea):
            continue
-        yield Characteristic("cross section flow", True), insn.ea
+        yield Characteristic('cross section flow'), insn.ea
 def extract_function_calls_from(f, bb, insn):
@@ -354,7 +354,7 @@ def extract_function_calls_from(f, bb, insn):
        return
    for ref in idautils.CodeRefsFrom(insn.ea, False):
-        yield Characteristic("calls from", True), ref
+        yield Characteristic('calls from'), ref
 def extract_function_indirect_call_characteristic_features(f, bb, insn):
@@ -373,7 +373,7 @@ def extract_function_indirect_call_characteristic_features(f, bb, insn):
        return
    if idc.get_operand_type(insn.ea, 0) in (idc.o_reg, idc.o_phrase, idc.o_displ):
-        yield Characteristic("indirect call", True), insn.ea
+        yield Characteristic('indirect call'), insn.ea
 def extract_features(f, bb, insn):
--- a/capa/features/extractors/viv/basicblock.py
+++ b/capa/features/extractors/viv/basicblock.py
@@ -39,7 +39,7 @@ def _bb_has_tight_loop(f, bb):
 def extract_bb_tight_loop(f, bb):
    """ check basic block for tight loop indicators """
    if _bb_has_tight_loop(f, bb):
-        yield Characteristic("tight loop", True), bb.va
+        yield Characteristic('tight loop'), bb.va
 def _bb_has_stackstring(f, bb):
@@ -62,7 +62,7 @@ def _bb_has_stackstring(f, bb):
 def extract_stackstring(f, bb):
    """ check basic block for stackstring indicators """
    if _bb_has_stackstring(f, bb):
-        yield Characteristic("stack string", True), bb.va
+        yield Characteristic('stack string'), bb.va
 def is_mov_imm_to_stack(instr):
--- a/capa/features/extractors/viv/file.py
+++ b/capa/features/extractors/viv/file.py
@@ -13,7 +13,7 @@ def extract_file_embedded_pe(vw, file_path):
        fbytes = f.read()
    for offset, i in pe_carve.carve(fbytes, 1):
-        yield Characteristic("embedded pe", True), offset
+        yield Characteristic('embedded pe'), offset
 def extract_file_export_names(vw, file_path):
--- a/capa/features/extractors/viv/function.py
+++ b/capa/features/extractors/viv/function.py
@@ -53,12 +53,12 @@ def extract_function_switch(f):
    method can be optimized
    """
    if f.va in get_functions_with_switch(f.vw):
-        yield Characteristic("switch", True), f.va
+        yield Characteristic('switch'), f.va
 def extract_function_calls_to(f):
    for src, _, _, _ in f.vw.getXrefsTo(f.va, rtype=vivisect.const.REF_CODE):
-        yield Characteristic("calls to", True), src
+        yield Characteristic('calls to'), src
 def extract_function_loop(f):
@@ -74,7 +74,7 @@ def extract_function_loop(f):
                    edges.append((bb.va, bva))
    if edges and loops.has_loop(edges):
-        yield Characteristic("loop", True), f.va
+        yield Characteristic('loop'), f.va
 def extract_features(f):
--- a/capa/features/extractors/viv/insn.py
+++ b/capa/features/extractors/viv/insn.py
@@ -287,7 +287,7 @@ def extract_insn_nzxor_characteristic_features(f, bb, insn):
    if is_security_cookie(f, bb, insn):
        return
-    yield Characteristic("nzxor", True), insn.va
+    yield Characteristic('nzxor'), insn.va
 def extract_insn_mnemonic_features(f, bb, insn):
@@ -313,16 +313,14 @@ def extract_insn_peb_access_characteristic_features(f, bb, insn):
            #     IDA: push    large dword ptr fs:30h
            #     viv: fs: push dword [0x00000030]
            #     fs: push dword [eax + 0x30]  ; i386RegMemOper, with eax = 0
-            if (isinstance(oper, envi.archs.i386.disasm.i386RegMemOper) and oper.disp == 0x30) or (
+            if (isinstance(oper, envi.archs.i386.disasm.i386RegMemOper) and oper.disp == 0x30) or \
-                isinstance(oper, envi.archs.i386.disasm.i386ImmMemOper) and oper.imm == 0x30
+                    (isinstance(oper, envi.archs.i386.disasm.i386ImmMemOper) and oper.imm == 0x30):
-            ):
+                yield Characteristic('peb access'), insn.va
-                yield Characteristic("peb access", True), insn.va
+    elif 'gs' in insn.getPrefixName():
    elif "gs" in insn.getPrefixName():
        for oper in insn.opers:
-            if (isinstance(oper, envi.archs.amd64.disasm.i386RegMemOper) and oper.disp == 0x60) or (
+            if (isinstance(oper, envi.archs.amd64.disasm.i386RegMemOper) and oper.disp == 0x60) or \
-                isinstance(oper, envi.archs.amd64.disasm.i386ImmMemOper) and oper.imm == 0x60
+                    (isinstance(oper, envi.archs.amd64.disasm.i386ImmMemOper) and oper.imm == 0x60):
-            ):
+                yield Characteristic('peb access'), insn.va
                yield Characteristic("peb access", True), insn.va
    else:
        pass
@@ -331,11 +329,11 @@ def extract_insn_segment_access_features(f, bb, insn):
    """ parse the instruction for access to fs or gs """
    prefix = insn.getPrefixName()
-    if prefix == "fs":
+    if prefix == 'fs':
-        yield Characteristic("fs access", True), insn.va
+        yield Characteristic('fs access'), insn.va
-    if prefix == "gs":
+    if prefix == 'gs':
-        yield Characteristic("gs access", True), insn.va
+        yield Characteristic('gs access'), insn.va
 def get_section(vw, va):
@@ -372,7 +370,7 @@ def extract_insn_cross_section_cflow(f, bb, insn):
                    continue
            if get_section(f.vw, insn.va) != get_section(f.vw, va):
-                yield Characteristic("cross section flow", True), insn.va
+                yield Characteristic('cross section flow'), insn.va
        except KeyError:
            continue
@@ -390,7 +388,7 @@ def extract_function_calls_from(f, bb, insn):
    if isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper):
        oper = insn.opers[0]
        target = oper.getOperAddr(insn)
-        yield Characteristic("calls from", True), target
+        yield Characteristic('calls from'), target
    # call via thunk on x86,
    # see 9324d1a8ae37a36ae560c37448c9705a at 0x407985
@@ -399,18 +397,18 @@ def extract_function_calls_from(f, bb, insn):
    # see Lab21-01.exe_:0x140001178
    elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper):
        target = insn.opers[0].getOperValue(insn)
-        yield Characteristic("calls from", True), target
+        yield Characteristic('calls from'), target
    # call via IAT, x64
    elif isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper):
        op = insn.opers[0]
        target = op.getOperAddr(insn)
-        yield Characteristic("calls from", True), target
+        yield Characteristic('calls from'), target
    if target and target == f.va:
        # if we found a jump target and it's the function address
        # mark as recursive
-        yield Characteristic("recursive call", True), target
+        yield Characteristic('recursive call'), target
 # this is a feature that's most relevant at the function or basic block scope,
@@ -426,13 +424,13 @@ def extract_function_indirect_call_characteristic_features(f, bb, insn):
    # Checks below work for x86 and x64
    if isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper):
        # call edx
-        yield Characteristic("indirect call", True), insn.va
+        yield Characteristic('indirect call'), insn.va
    elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegMemOper):
        # call dword ptr [eax+50h]
-        yield Characteristic("indirect call", True), insn.va
+        yield Characteristic('indirect call'), insn.va
    elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386SibOper):
        # call qword ptr [rsp+78h]
-        yield Characteristic("indirect call", True), insn.va
+        yield Characteristic('indirect call'), insn.va
 def extract_features(f, bb, insn):
--- a/capa/features/file.py
+++ b/capa/features/file.py
@@ -2,30 +2,21 @@ from capa.features import Feature
 class Export(Feature):
-    def __init__(self, value):
+    def __init__(self, value, description=None):
        # value is export name
-        super(Export, self).__init__([value])
+        super(Export, self).__init__([value], description)
        self.value = value
    def __str__(self):
        return "Export(%s)" % (self.value)
 class Import(Feature):
-    def __init__(self, value):
+    def __init__(self, value, description=None):
        # value is import name
-        super(Import, self).__init__([value])
+        super(Import, self).__init__([value], description)
        self.value = value
    def __str__(self):
        return "Import(%s)" % (self.value)
 class Section(Feature):
-    def __init__(self, value):
+    def __init__(self, value, description=None):
        # value is section name
-        super(Section, self).__init__([value])
+        super(Section, self).__init__([value], description)
        self.value = value
    def __str__(self):
        return "Section(%s)" % (self.value)
--- a/capa/features/insn.py
+++ b/capa/features/insn.py
@@ -2,45 +2,34 @@ from capa.features import Feature
 class API(Feature):
-    def __init__(self, name):
+    def __init__(self, name, description=None):
        # Downcase library name if given
        if "." in name:
            modname, impname = name.split(".")
            name = modname.lower() + "." + impname
-        super(API, self).__init__([name])
+        super(API, self).__init__([name], description)
 class Number(Feature):
-    def __init__(self, value, symbol=None):
+    def __init__(self, value, description=None):
-        super(Number, self).__init__([value])
+        super(Number, self).__init__([value], description)
        self.value = value
        self.symbol = symbol
-    def __str__(self):
+    def get_args_str(self):
-        if self.symbol:
+        return '0x%X' % self.value
            return "number(0x%x = %s)" % (self.value, self.symbol)
        else:
            return "number(0x%x)" % (self.value)
 class Offset(Feature):
-    def __init__(self, value, symbol=None):
+    def __init__(self, value, description=None):
        super(Offset, self).__init__([value])
        self.value = value
        self.symbol = symbol
-    def __str__(self):
+    def get_args_str(self):
-        if self.symbol:
+        return '0x%X' % self.value
            return "offset(0x%x = %s)" % (self.value, self.symbol)
        else:
            return "offset(0x%x)" % (self.value)
 class Mnemonic(Feature):
-    def __init__(self, value):
+    def __init__(self, value, description=None):
-        super(Mnemonic, self).__init__([value])
+        super(Mnemonic, self).__init__([value], description)
        self.value = value
    def __str__(self):
        return "mnemonic(%s)" % (self.value)
--- a/capa/ida/explorer/item.py
+++ b/capa/ida/explorer/item.py
@@ -190,6 +190,14 @@ class CapaExplorerFunctionItem(CapaExplorerDataItem):
        self._data[0] = self.fmt % display
 class CapaExplorerSubscopeItem(CapaExplorerDataItem):
    fmt = 'subscope(%s)'
    def __init__(self, parent, scope):
        super(CapaExplorerSubscopeItem, self).__init__(parent, [self.fmt % scope, '', ''])
 class CapaExplorerBlockItem(CapaExplorerDataItem):
    """ store data relevant to capa basic block result """
--- a/capa/ida/explorer/model.py
+++ b/capa/ida/explorer/model.py
@@ -17,6 +17,7 @@ from capa.ida.explorer.item import (
    CapaExplorerBlockItem,
    CapaExplorerRuleMatchItem,
    CapaExplorerFeatureItem,
    CapaExplorerSubscopeItem
 )
 import capa.ida.helpers
@@ -108,20 +109,10 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
                font.setBold(True)
            return font
-        if (
+        if role == QtCore.Qt.FontRole and isinstance(item, (CapaExplorerRuleItem, CapaExplorerRuleMatchItem,
-            role == QtCore.Qt.FontRole
+                                                            CapaExplorerBlockItem, CapaExplorerFunctionItem,
-            and isinstance(
+                                                            CapaExplorerFeatureItem, CapaExplorerSubscopeItem)) and \
-                item,
+                column == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION:
                (
                    CapaExplorerRuleItem,
                    CapaExplorerRuleMatchItem,
                    CapaExplorerBlockItem,
                    CapaExplorerFunctionItem,
                    CapaExplorerFeatureItem,
                ),
            )
            and column == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION
        ):
            # set bold font for top-level rules
            font = QtGui.QFont()
            font.setBold(True)
@@ -322,11 +313,12 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
        return item.childCount()
-    def render_capa_doc_statement_node(self, parent, statement, doc):
+    def render_capa_doc_statement_node(self, parent, statement, locations, doc):
        """ render capa statement read from doc
            @param parent: parent to which new child is assigned
            @param statement: statement read from doc
            @param locations: locations of children (applies to range only?)
            @param doc: capa result doc
            "statement": {
@@ -356,10 +348,16 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
            else:
                display += "between %d and %d" % (statement["min"], statement["max"])
-            return CapaExplorerFeatureItem(parent, display=display)
+            parent2 = CapaExplorerFeatureItem(parent, display=display)
-        elif statement["type"] == "subscope":
+
-            return CapaExplorerFeatureItem(parent, "subscope(%s)" % statement["subscope"])
+            for location in locations:
-        elif statement["type"] == "regex":
+                # for each location render child node for range statement
                self.render_capa_doc_feature(parent2, statement['child'], location, doc)
            return parent2
        elif statement['type'] == 'subscope':
            return CapaExplorerSubscopeItem(parent, statement['subscope'])
        elif statement['type'] == 'regex':
            # regex is a `Statement` not a `Feature`
            # this is because it doesn't get extracted, but applies to all strings in scope.
            # so we have to handle it here
@@ -401,10 +399,11 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
        ):
            return
-        if match["node"]["type"] == "statement":
+        if match['node']['type'] == 'statement':
-            parent2 = self.render_capa_doc_statement_node(parent, match["node"]["statement"], doc)
+            parent2 = self.render_capa_doc_statement_node(parent, match['node']['statement'],
-        elif match["node"]["type"] == "feature":
+                                                          match.get('locations', []), doc)
-            parent2 = self.render_capa_doc_feature_node(parent, match["node"]["feature"], match["locations"], doc)
+        elif match['node']['type'] == 'feature':
            parent2 = self.render_capa_doc_feature_node(parent, match['node']['feature'], match['locations'], doc)
        else:
            raise RuntimeError("unexpected node type: " + str(match["node"]["type"]))
--- a/capa/ida/ida_capa_explorer.py
+++ b/capa/ida/ida_capa_explorer.py
@@ -375,10 +375,14 @@ class CapaExplorerForm(idaapi.PluginForm):
        self.render_capa_doc_summary(doc)
        self.render_capa_doc_mitre_summary(doc)
-        self.view_tree.sortByColumn(CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION, QtCore.Qt.AscendingOrder)
+        self.set_view_tree_default_sort_order()
        logger.info("render views completed.")
    def set_view_tree_default_sort_order(self):
        """ """
        self.view_tree.sortByColumn(CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION, QtCore.Qt.AscendingOrder)
    def render_capa_doc_summary(self, doc):
        """ """
        for (row, rule) in enumerate(rutils.capability_rules(doc)):
@@ -459,6 +463,7 @@ class CapaExplorerForm(idaapi.PluginForm):
        self.model_data.reset()
        self.view_tree.reset()
        self.view_checkbox_limit_by.setChecked(False)
        self.set_view_tree_default_sort_order()
    def reload(self):
        """ reload views and re-run capa analysis """
--- a/capa/render/init.py
+++ b/capa/render/init.py
@@ -1,6 +1,7 @@
 import json
 import six
 import capa.rules
 import capa.engine
@@ -83,23 +84,11 @@ def convert_feature_to_result_document(feature):
            "type": "characteristic"
        },
    """
-    name, value = feature.freeze_serialize()
+    result = {'type': feature.name, feature.name: feature.get_args_str()}
    if feature.description:
        result['description'] = feature.description
-    # make the terms pretty
+    return result
    name = name.lower()
    if name == "matchedrule":
        name = "match"
    # in the common case, there's a single argument
    # so use it directly.
    # like: name=number value=1
    if isinstance(value, list) and len(value) == 1:
        value = value[0]
    return {
        "type": name,
        name: value,
    }
 def convert_node_to_result_document(node):
@@ -144,7 +133,10 @@ def convert_match_to_result_document(rules, capabilities, result):
    # so only add `locations` to feature nodes.
    if isinstance(result.statement, capa.features.Feature):
        if bool(result.success):
-            doc["locations"] = result.locations
+            doc['locations'] = result.locations
    elif isinstance(result.statement, capa.rules.Range):
        if bool(result.success):
            doc['locations'] = result.locations
    # if we have a `match` statement, then we're referencing another rule.
    # this could an external rule (written by a human), or
--- a/capa/render/vverbose.py
+++ b/capa/render/vverbose.py
@@ -4,12 +4,31 @@ import capa.rules
 import capa.render.utils as rutils
-def render_statement(ostream, statement, indent=0):
+def render_locations(ostream, match):
-    ostream.write("  " * indent)
+    # its possible to have an empty locations array here,
-    if statement["type"] in ("and", "or", "optional"):
+    # such as when we're in MODE_FAILURE and showing the logic
-        ostream.write(statement["type"])
+    # under a `not` statement (which will have no matched locations).
-        ostream.writeln(":")
+    locations = list(sorted(match.get('locations', [])))
-    elif statement["type"] == "not":
+    if len(locations) == 1:
        ostream.write(' @ ')
        ostream.write(rutils.hex(locations[0]))
    elif len(locations) > 1:
        ostream.write(' @ ')
        if len(locations) > 4:
            # don't display too many locations, because it becomes very noisy.
            # probably only the first handful of locations will be useful for inspection.
            ostream.write(', '.join(map(rutils.hex, locations[0:4])))
            ostream.write(', and %d more...' % (len(locations) - 4))
        else:
            ostream.write(', '.join(map(rutils.hex, locations)))
 def render_statement(ostream, match, statement, indent=0):
    ostream.write('  ' * indent)
    if statement['type'] in ('and', 'or', 'optional'):
        ostream.write(statement['type'])
        ostream.writeln(':')
    elif statement['type'] == 'not':
        # this statement is handled specially in `render_match` using the MODE_SUCCESS/MODE_FAILURE flags.
        ostream.writeln("not:")
    elif statement["type"] == "some":
@@ -21,32 +40,36 @@ def render_statement(ostream, statement, indent=0):
        # there's no additional logic in the feature part, just the existence of a feature.
        # so, we have to inline some of the feature rendering here.
-        child = statement["child"]
+        child = statement['child']
-        if child["type"] in ("string", "api", "mnemonic", "basic block", "export", "import", "section", "match"):
+        if child['type'] in ('string', 'api', 'mnemonic', 'basic block', 'export', 'import', 'section', 'match', 'characteristic'):
-            feature = "%s(%s)" % (child["type"], rutils.bold2(child[child["type"]]))
+            value = rutils.bold2(child[child['type']])
-        elif child["type"] in ("number", "offset"):
+        elif child['type'] in ('number', 'offset'):
-            feature = "%s(%s)" % (child["type"], rutils.bold2(rutils.hex(child[child["type"]])))
+            value = rutils.bold2(rutils.hex(child[child['type']]))
-        elif child["type"] == "bytes":
+        elif child['type'] == 'bytes':
-            feature = "%s(%s)" % (child["type"], rutils.bold2(rutils.hex_string(child[child["type"]])))
+            value = rutils.bold2(rutils.hex_string(child[child['type']]))
        elif child["type"] == "characteristic":
            feature = "characteristic(%s)" % (rutils.bold2(child["characteristic"][0]))
        else:
            raise RuntimeError("unexpected feature type: " + str(child))
-        ostream.write("count(%s): " % feature)
+        if child['description']:
-
+            ostream.write('count(%s(%s = %s)): ' % (child['type'], value, child['description']))
        if statement["max"] == statement["min"]:
            ostream.writeln("%d" % (statement["min"]))
        elif statement["min"] == 0:
            ostream.writeln("%d or fewer" % (statement["max"]))
        elif statement["max"] == (1 << 64 - 1):
            ostream.writeln("%d or more" % (statement["min"]))
        else:
-            ostream.writeln("between %d and %d" % (statement["min"], statement["max"]))
+            ostream.write('count(%s(%s)): ' % (child['type'], value))
-    elif statement["type"] == "subscope":
+
-        ostream.write(statement["subscope"])
+        if statement['max'] == statement['min']:
-        ostream.writeln(":")
+            ostream.write('%d' % (statement['min']))
-    elif statement["type"] == "regex":
+        elif statement['min'] == 0:
            ostream.write('%d or fewer' % (statement['max']))
        elif statement['max'] == (1 << 64 - 1):
            ostream.write('%d or more' % (statement['min']))
        else:
            ostream.write('between %d and %d' % (statement['min'], statement['max']))
        render_locations(ostream, match)
        ostream.write('\n')
    elif statement['type'] == 'subscope':
        ostream.write(statement['subscope'])
        ostream.writeln(':')
    elif statement['type'] == 'regex':
        # regex is a `Statement` not a `Feature`
        # this is because it doesn't get extracted, but applies to all strings in scope.
        # so we have to handle it here
@@ -56,52 +79,38 @@ def render_statement(ostream, statement, indent=0):
 def render_feature(ostream, match, feature, indent=0):
-    ostream.write("  " * indent)
+    ostream.write('  ' * indent)
-    if feature["type"] in ("string", "api", "mnemonic", "basic block", "export", "import", "section", "match"):
+    if feature['type'] in ('string', 'api', 'mnemonic', 'basic block', 'export', 'import', 'section', 'match', 'characteristic'):
-        ostream.write(feature["type"])
+        ostream.write(feature['type'])
-        ostream.write(": ")
+        ostream.write(': ')
-        ostream.write(rutils.bold2(feature[feature["type"]]))
+        ostream.write(rutils.bold2(feature[feature['type']]))
-    elif feature["type"] in ("number", "offset"):
+    elif feature['type'] in ('number', 'offset'):
-        ostream.write(feature["type"])
+        ostream.write(feature['type'])
-        ostream.write(": ")
+        ostream.write(': ')
-        ostream.write(rutils.bold2(rutils.hex(feature[feature["type"]])))
+        ostream.write(rutils.bold2(rutils.hex(feature[feature['type']])))
-    elif feature["type"] == "bytes":
+    elif feature['type'] == 'bytes':
-        ostream.write("bytes: ")
+        ostream.write('bytes: ')
        # bytes is the uppercase, hex-encoded string.
        # it should always be an even number of characters (its hex).
-        ostream.write(rutils.bold2(rutils.hex_string(feature[feature["type"]])))
+        ostream.write(rutils.bold2(rutils.hex_string(feature[feature['type']])))
    elif feature["type"] == "characteristic":
        ostream.write("characteristic(%s)" % (rutils.bold2(feature["characteristic"][0])))
    # note that regex is found in `render_statement`
    else:
        raise RuntimeError("unexpected feature type: " + str(feature))
-    # its possible to have an empty locations array here,
+    if 'description' in feature:
-    # such as when we're in MODE_FAILURE and showing the logic
+        ostream.write(' = ')
-    # under a `not` statement (which will have no matched locations).
+        ostream.write(feature['description'])
    locations = list(sorted(match.get("locations", [])))
    if len(locations) == 1:
        ostream.write(" @ ")
        ostream.write(rutils.hex(locations[0]))
    elif len(locations) > 1:
        ostream.write(" @ ")
        if len(locations) > 4:
            # don't display too many locations, because it becomes very noisy.
            # probably only the first handful of locations will be useful for inspection.
            ostream.write(", ".join(map(rutils.hex, locations[0:4])))
            ostream.write(", and %d more..." % (len(locations) - 4))
        else:
            ostream.write(", ".join(map(rutils.hex, locations)))
-    ostream.write("\n")
+    render_locations(ostream, match)
    ostream.write('\n')
 def render_node(ostream, match, node, indent=0):
-    if node["type"] == "statement":
+    if node['type'] == 'statement':
-        render_statement(ostream, node["statement"], indent=indent)
+        render_statement(ostream, match, node['statement'], indent=indent)
-    elif node["type"] == "feature":
+    elif node['type'] == 'feature':
-        render_feature(ostream, match, node["feature"], indent=indent)
+        render_feature(ostream, match, node['feature'], indent=indent)
    else:
        raise RuntimeError("unexpected node type: " + str(node))
--- a/capa/rules.py
+++ b/capa/rules.py
@@ -138,8 +138,8 @@ class InvalidRuleSet(ValueError):
 def ensure_feature_valid_for_scope(scope, feature):
    if isinstance(feature, capa.features.Characteristic):
-        if capa.features.Characteristic(feature.name) not in SUPPORTED_FEATURES[scope]:
+        if capa.features.Characteristic(feature.value) not in SUPPORTED_FEATURES[scope]:
-            raise InvalidRule("feature %s not support for scope %s" % (feature, scope))
+            raise InvalidRule('feature %s not support for scope %s' % (feature, scope))
    elif not isinstance(feature, tuple(filter(lambda t: isinstance(t, type), SUPPORTED_FEATURES[scope]))):
        raise InvalidRule("feature %s not support for scope %s" % (feature, scope))
@@ -205,10 +205,9 @@ def parse_feature(key):
        return capa.features.insn.Mnemonic
    elif key == "basic blocks":
        return capa.features.basicblock.BasicBlock
-    elif key.startswith("characteristic(") and key.endswith(")"):
+    elif key == 'characteristic':
-        characteristic = key[len("characteristic(") : -len(")")]
+        return capa.features.Characteristic
-        return lambda v: capa.features.Characteristic(characteristic, v)
+    elif key == 'export':
    elif key == "export":
        return capa.features.file.Export
    elif key == "import":
        return capa.features.file.Import
@@ -220,18 +219,18 @@ def parse_feature(key):
        raise InvalidRule("unexpected statement: %s" % key)
-def parse_symbol(s, value_type):
+def parse_description(s, value_type, description=None):
-    """
+    '''
    s can be an int or a string
-    """
+    '''
-    if isinstance(s, str) and "=" in s:
+    if value_type != 'string' and isinstance(s, str) and ' = ' in s:
-        value, symbol = s.split("=", 1)
+        if description:
-        symbol = symbol.strip()
+            raise InvalidRule('unexpected value: "%s", only one description allowed (inline description with ` = `)' % s)
-        if symbol == "":
+        value, description = s.split(' = ', 1)
-            raise InvalidRule('unexpected value: "%s", symbol name cannot be empty' % s)
+        if description == '':
            raise InvalidRule('unexpected value: "%s", description cannot be empty' % s)
    else:
        value = s
        symbol = None
    if isinstance(value, str):
        if value_type == "bytes":
@@ -242,21 +241,20 @@ def parse_symbol(s, value_type):
                raise InvalidRule('unexpected bytes value: "%s", must be a valid hex sequence' % value)
            if len(value) > MAX_BYTES_FEATURE_SIZE:
-                raise InvalidRule(
+                raise InvalidRule('unexpected bytes value: byte sequences must be no larger than %s bytes' %
-                    "unexpected bytes value: byte sequences must be no larger than %s bytes" % MAX_BYTES_FEATURE_SIZE
+                                  MAX_BYTES_FEATURE_SIZE)
-                )
+        elif value_type in {'number', 'offset'}:
        else:
            try:
                value = parse_int(value)
            except ValueError:
                raise InvalidRule('unexpected value: "%s", must begin with numerical value' % value)
-    return value, symbol
+    return value, description
 def build_statements(d, scope):
-    if len(d.keys()) != 1:
+    if len(d.keys()) > 2:
-        raise InvalidRule("too many statements")
+        raise InvalidRule('too many statements')
    key = list(d.keys())[0]
    if key == "and":
@@ -303,48 +301,33 @@ def build_statements(d, scope):
        term = key[len("count(") : -len(")")]
-        if term.startswith("characteristic("):
+        # when looking for the existence of such a feature, our rule might look like:
-            # characteristic features are specified a bit specially:
+        #     - mnemonic: mov
-            # they simply indicate the presence of something unusual/interesting,
+        #
-            # and we embed the name in the feature name, like `characteristic(nzxor)`.
+        # but here we deal with the form: `mnemonic(mov)`.
-            #
+        term, _, arg = term.partition('(')
-            # when we're dealing with counts, like `count(characteristic(nzxor))`,
+        Feature = parse_feature(term)
            # we can simply extract the feature and assume we're looking for `True` values.
            Feature = parse_feature(term)
            feature = Feature(True)
            ensure_feature_valid_for_scope(scope, feature)
        else:
            # however, for remaining counted features, like `count(mnemonic(mov))`,
            # we have to jump through hoops.
            #
            # when looking for the existance of such a feature, our rule might look like:
            #     - mnemonic: mov
            #
            # but here we deal with the form: `mnemonic(mov)`.
            term, _, arg = term.partition("(")
            Feature = parse_feature(term)
-            if arg:
+        if arg:
-                arg = arg[: -len(")")]
+            arg = arg[:-len(')')]
-                # can't rely on yaml parsing ints embedded within strings
+            # can't rely on yaml parsing ints embedded within strings
-                # like:
+            # like:
-                #
+            #
-                #     count(offset(0xC))
+            #     count(offset(0xC))
-                #     count(number(0x11223344))
+            #     count(number(0x11223344))
-                #     count(number(0x100 = symbol name))
+            #     count(number(0x100 = description))
-                if term in ("number", "offset", "bytes"):
+            if term != 'string':
-                    value, symbol = parse_symbol(arg, term)
+                value, description = parse_description(arg, term)
-                    feature = Feature(value, symbol)
+                feature = Feature(value, description)
                else:
                    # arg is string, like:
                    #
                    #     count(mnemonic(mov))
                    #     count(string(error))
                    # TODO: what about embedded newlines?
                    feature = Feature(arg)
            else:
-                feature = Feature()
+                # arg is string (which doesn't support inline descriptions), like:
-            ensure_feature_valid_for_scope(scope, feature)
+                #
                #     count(string(error))
                # TODO: what about embedded newlines?
                feature = Feature(arg)
        else:
            feature = Feature()
        ensure_feature_valid_for_scope(scope, feature)
        count = d[key]
        if isinstance(count, int):
@@ -373,13 +356,8 @@ def build_statements(d, scope):
            )
    else:
        Feature = parse_feature(key)
-        if key in ("number", "offset", "bytes"):
+        value, description = parse_description(d[key], key, d.get('description'))
-            # parse numbers with symbol description, e.g. 0x4550 = IMAGE_DOS_SIGNATURE
+        feature = Feature(value, description)
            # or regular numbers, e.g. 37
            value, symbol = parse_symbol(d[key], key)
            feature = Feature(value, symbol)
        else:
            feature = Feature(d[key])
        ensure_feature_valid_for_scope(scope, feature)
        return feature
--- a/ci/hooks/hook-vivisect.py
+++ b/ci/hooks/hook-vivisect.py
@@ -0,0 +1,13 @@
 from PyInstaller.utils.hooks import copy_metadata
 # in order for viv-utils to use pkg_resources to fetch
 # the installed version of vivisect,
 # we need to instruct pyinstaller to embed this metadata.
 #
 # so we set the pyinstaller.spec/hookspath to reference
 #  the directory with this hook.
 #
 # this hook runs at analysis time and updates the embedded metadata.
 #
 # ref: https://github.com/pyinstaller/pyinstaller/issues/1713#issuecomment-162682084
 datas = copy_metadata('vivisect')
--- a/ci/pyinstaller.spec
+++ b/ci/pyinstaller.spec
@@ -0,0 +1,193 @@
 # -*- mode: python -*-
 import os.path
 import subprocess
 import wcwidth
 with open('./capa/version.py', 'wb') as f:
    f.write("__version__ = '%s'"
            % subprocess.check_output(["git", "describe", "--always"]).strip())
 a = Analysis(
    ['../capa/main.py'],
    pathex=['capa'],
    binaries=None,
    datas=[
        ('../rules', 'rules'),
        # capa.render.default uses tabulate that depends on wcwidth.
        # it seems wcwidth uses a json file `version.json`
        # and this doesn't get picked up by pyinstaller automatically.
        # so we manually embed the wcwidth resources here.
        #
        # ref: https://stackoverflow.com/a/62278462/87207
        (os.path.dirname(wcwidth.__file__), 'wcwidth')
    ],
    hiddenimports=[
        # vivisect does manual/runtime importing of its modules,
        # so declare the things that could be imported here.
        "pycparser",
        "vivisect",
        "vivisect.analysis",
        "vivisect.analysis.amd64",
        "vivisect.analysis.amd64",
        "vivisect.analysis.amd64.emulation",
        "vivisect.analysis.amd64.golang",
        "vivisect.analysis.crypto",
        "vivisect.analysis.crypto",
        "vivisect.analysis.crypto.constants",
        "vivisect.analysis.elf",
        "vivisect.analysis.elf",
        "vivisect.analysis.elf.elfplt",
        "vivisect.analysis.elf.libc_start_main",
        "vivisect.analysis.generic",
        "vivisect.analysis.generic",
        "vivisect.analysis.generic.codeblocks",
        "vivisect.analysis.generic.emucode",
        "vivisect.analysis.generic.entrypoints",
        "vivisect.analysis.generic.funcentries",
        "vivisect.analysis.generic.impapi",
        "vivisect.analysis.generic.mkpointers",
        "vivisect.analysis.generic.pointers",
        "vivisect.analysis.generic.pointertables",
        "vivisect.analysis.generic.relocations",
        "vivisect.analysis.generic.strconst",
        "vivisect.analysis.generic.switchcase",
        "vivisect.analysis.generic.thunks",
        "vivisect.analysis.i386",
        "vivisect.analysis.i386",
        "vivisect.analysis.i386.calling",
        "vivisect.analysis.i386.golang",
        "vivisect.analysis.i386.importcalls",
        "vivisect.analysis.i386.instrhook",
        "vivisect.analysis.i386.thunk_bx",
        "vivisect.analysis.ms",
        "vivisect.analysis.ms",
        "vivisect.analysis.ms.hotpatch",
        "vivisect.analysis.ms.localhints",
        "vivisect.analysis.ms.msvc",
        "vivisect.analysis.ms.msvcfunc",
        "vivisect.analysis.ms.vftables",
        "vivisect.analysis.pe",
        "vivisect.impapi.posix.amd64",
        "vivisect.impapi.posix.i386",
        "vivisect.impapi.windows",
        "vivisect.impapi.windows.amd64",
        "vivisect.impapi.windows.i386",
        "vivisect.parsers.blob",
        "vivisect.parsers.elf",
        "vivisect.parsers.ihex",
        "vivisect.parsers.macho",
        "vivisect.parsers.parse_pe",
        "vivisect.parsers.utils",
        "vivisect.storage",
        "vivisect.storage.basicfile",
        "vstruct.constants",
        "vstruct.constants.ntstatus",
        "vstruct.defs",
        "vstruct.defs.arm7",
        "vstruct.defs.bmp",
        "vstruct.defs.dns",
        "vstruct.defs.elf",
        "vstruct.defs.gif",
        "vstruct.defs.ihex",
        "vstruct.defs.inet",
        "vstruct.defs.java",
        "vstruct.defs.kdcom",
        "vstruct.defs.macho",
        "vstruct.defs.macho.const",
        "vstruct.defs.macho.fat",
        "vstruct.defs.macho.loader",
        "vstruct.defs.macho.stabs",
        "vstruct.defs.minidump",
        "vstruct.defs.pcap",
        "vstruct.defs.pe",
        "vstruct.defs.pptp",
        "vstruct.defs.rar",
        "vstruct.defs.swf",
        "vstruct.defs.win32",
        "vstruct.defs.windows",
        "vstruct.defs.windows.win_5_1_i386",
        "vstruct.defs.windows.win_5_1_i386.ntdll",
        "vstruct.defs.windows.win_5_1_i386.ntoskrnl",
        "vstruct.defs.windows.win_5_1_i386.win32k",
        "vstruct.defs.windows.win_5_2_i386",
        "vstruct.defs.windows.win_5_2_i386.ntdll",
        "vstruct.defs.windows.win_5_2_i386.ntoskrnl",
        "vstruct.defs.windows.win_5_2_i386.win32k",
        "vstruct.defs.windows.win_6_1_amd64",
        "vstruct.defs.windows.win_6_1_amd64.ntdll",
        "vstruct.defs.windows.win_6_1_amd64.ntoskrnl",
        "vstruct.defs.windows.win_6_1_amd64.win32k",
        "vstruct.defs.windows.win_6_1_i386",
        "vstruct.defs.windows.win_6_1_i386.ntdll",
        "vstruct.defs.windows.win_6_1_i386.ntoskrnl",
        "vstruct.defs.windows.win_6_1_i386.win32k",
        "vstruct.defs.windows.win_6_1_wow64",
        "vstruct.defs.windows.win_6_1_wow64.ntdll",
        "vstruct.defs.windows.win_6_2_amd64",
        "vstruct.defs.windows.win_6_2_amd64.ntdll",
        "vstruct.defs.windows.win_6_2_amd64.ntoskrnl",
        "vstruct.defs.windows.win_6_2_amd64.win32k",
        "vstruct.defs.windows.win_6_2_i386",
        "vstruct.defs.windows.win_6_2_i386.ntdll",
        "vstruct.defs.windows.win_6_2_i386.ntoskrnl",
        "vstruct.defs.windows.win_6_2_i386.win32k",
        "vstruct.defs.windows.win_6_2_wow64",
        "vstruct.defs.windows.win_6_2_wow64.ntdll",
        "vstruct.defs.windows.win_6_3_amd64",
        "vstruct.defs.windows.win_6_3_amd64.ntdll",
        "vstruct.defs.windows.win_6_3_amd64.ntoskrnl",
        "vstruct.defs.windows.win_6_3_i386",
        "vstruct.defs.windows.win_6_3_i386.ntdll",
        "vstruct.defs.windows.win_6_3_i386.ntoskrnl",
        "vstruct.defs.windows.win_6_3_wow64",
        "vstruct.defs.windows.win_6_3_wow64.ntdll",
    ],
    hookspath=['ci/hooks'],
    runtime_hooks=None,
    excludes=[
        # ignore packages that would otherwise be bundled with the .exe.
        # review: build/pyinstaller/xref-pyinstaller.html
        # we don't do any GUI stuff, so ignore these modules
        "tkinter",
        "_tkinter",
        "Tkinter",
        # tqdm provides renderers for ipython,
        # however, this drags in a lot of dependencies.
        # since we don't spawn a notebook, we can safely remove these.
        "IPython",
        "ipywidgets",
    ])
 a.binaries = a.binaries - TOC([
 ('tcl85.dll', None, None),
 ('tk85.dll', None, None),
 ('_tkinter', None, None)])
 pyz = PYZ(a.pure, a.zipped_data)
 exe = EXE(pyz,
          a.scripts,
          a.binaries,
          a.zipfiles,
          a.datas,
          exclude_binaries=False,
          name='capa',
          icon='logo.ico',
          debug=False,
          strip=None,
          upx=True,
          console=True )
 # enable the following to debug the contents of the .exe
 #
 #coll = COLLECT(exe,
 #               a.binaries,
 #               a.zipfiles,
 #               a.datas,
 #               strip=None,
 #               upx=True,
 #               name='capa-dat')
--- a/2
+++ b/2
--- a/scripts/lint.py
+++ b/scripts/lint.py
@@ -323,7 +323,8 @@ def lint_features(ctx, rule):
 def get_features(ctx, rule):
    # get features from rule and all dependencies including subscopes and matched rules
    features = []
-    deps = [ctx["rules"].rules[dep] for dep in rule.get_dependencies()]
+    namespaces = capa.rules.index_rules_by_namespace([rule])
    deps = [ctx['rules'].rules[dep] for dep in rule.get_dependencies(namespaces)]
    for r in [rule] + deps:
        features.extend(get_rule_features(r))
    return features
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@@ -86,7 +86,8 @@ def test_complex():
 def test_range():
    # unbounded range, but no matching feature
-    assert Range(Number(1)).evaluate({Number(2): {}}) == False
+    # since the lower bound is zero, and there are zero matches, ok
    assert Range(Number(1)).evaluate({Number(2): {}}) == True
    # unbounded range with matching feature should always match
    assert Range(Number(1)).evaluate({Number(1): {}}) == True
@@ -117,6 +118,103 @@ def test_range():
    assert Range(Number(1), min=1, max=3).evaluate({Number(1): {1, 2, 3, 4}}) == False
 def test_range_exact():
    rule = textwrap.dedent('''
        rule:
            meta:
                name: test rule
            features:
                - count(number(100)): 2
    ''')
    r = capa.rules.Rule.from_yaml(rule)
    # just enough matches
    features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
    assert 'test rule' in matches
    # not enough matches
    features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1}}, 0x0)
    assert 'test rule' not in matches
    # too many matches
    features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2, 3}}, 0x0)
    assert 'test rule' not in matches
 def test_range_range():
    rule = textwrap.dedent('''
         rule:
             meta:
                 name: test rule
             features:
                 - count(number(100)): (2, 3)
     ''')
    r = capa.rules.Rule.from_yaml(rule)
    # just enough matches
    features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
    assert 'test rule' in matches
    # enough matches
    features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2, 3}}, 0x0)
    assert 'test rule' in matches
    # not enough matches
    features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1}}, 0x0)
    assert 'test rule' not in matches
    # too many matches
    features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2, 3, 4}}, 0x0)
    assert 'test rule' not in matches
 def test_range_exact_zero():
    rule = textwrap.dedent('''
        rule:
            meta:
                name: test rule
            features:
                - count(number(100)): 0
    ''')
    r = capa.rules.Rule.from_yaml(rule)
    # feature isn't indexed - good.
    features, matches = capa.engine.match([r], {}, 0x0)
    assert 'test rule' in matches
    # feature is indexed, but no matches.
    # i don't think we should ever really have this case, but good to check anyways.
    features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {}}, 0x0)
    assert 'test rule' in matches
    # too many matches
    features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1}}, 0x0)
    assert 'test rule' not in matches
 def test_range_with_zero():
    rule = textwrap.dedent('''
         rule:
             meta:
                 name: test rule
             features:
                 - count(number(100)): (0, 1)
     ''')
    r = capa.rules.Rule.from_yaml(rule)
    # ok
    features, matches = capa.engine.match([r], {}, 0x0)
    assert 'test rule' in matches
    features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {}}, 0x0)
    assert 'test rule' in matches
    features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1}}, 0x0)
    assert 'test rule' in matches
    # too many matches
    features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
    assert 'test rule' not in matches
 def test_match_adds_matched_rule_feature():
    """show that using `match` adds a feature for matched rules."""
    rule = textwrap.dedent(
--- a/tests/test_freeze.py
+++ b/tests/test_freeze.py
@@ -10,37 +10,26 @@ import capa.features.freeze
 from fixtures import *
-EXTRACTOR = capa.features.extractors.NullFeatureExtractor(
+EXTRACTOR = capa.features.extractors.NullFeatureExtractor({
-    {
+    'file features': [
-        "file features": [
+        (0x402345, capa.features.Characteristic('embedded pe')),
-            (0x402345, capa.features.Characteristic("embedded pe", True)),
+    ],
-        ],
+    'functions': {
-        "functions": {
+        0x401000: {
-            0x401000: {
+            'features': [
-                "features": [(0x401000, capa.features.Characteristic("switch", True)),],
+                (0x401000, capa.features.Characteristic('switch')),
-                "basic blocks": {
+            ],
-                    0x401000: {
+            'basic blocks': {
-                        "features": [
+                0x401000: {
-                            (
+                    'features': [
-                                0x401000,
+                        (0x401000, capa.features.Characteristic('tight loop')),
-                                capa.features.Characteristic("tight loop", True),
+                    ],
-                            ),
+                    'instructions': {
-                        ],
+                        0x401000: {
-                        "instructions": {
+                            'features': [
-                            0x401000: {
+                                (0x401000, capa.features.insn.Mnemonic('xor')),
-                                "features": [
+                                (0x401000, capa.features.Characteristic('nzxor')),
-                                    (0x401000, capa.features.insn.Mnemonic("xor")),
+                            ],
                                    (
                                        0x401000,
                                        capa.features.Characteristic("nzxor", True),
                                    ),
                                ],
                            },
                            0x401002: {
                                "features": [
                                    (0x401002, capa.features.insn.Mnemonic("mov")),
                                ]
                            },
                        },
                    },
                },
@@ -55,25 +44,19 @@ def test_null_feature_extractor():
    assert list(EXTRACTOR.get_basic_blocks(0x401000)) == [0x401000]
    assert list(EXTRACTOR.get_instructions(0x401000, 0x0401000)) == [0x401000, 0x401002]
-    rules = capa.rules.RuleSet(
+    rules = capa.rules.RuleSet([
-        [
+        capa.rules.Rule.from_yaml(textwrap.dedent('''
-            capa.rules.Rule.from_yaml(
+            rule:
-                textwrap.dedent(
+                meta:
-                    """
+                    name: xor loop
-                    rule:
+                    scope: basic block
-                        meta:
+                features:
-                            name: xor loop
+                    - and:
-                            scope: basic block
+                        - characteristic: tight loop
-                        features:
+                        - mnemonic: xor
-                            - and:
+                        - characteristic: nzxor
-                                - characteristic(tight loop): true
+        ''')),
-                                - mnemonic: xor
+    ])
                                - characteristic(nzxor): true
                    """
                )
            ),
        ]
    )
    capabilities = capa.main.find_capabilities(rules, EXTRACTOR)
    assert "xor loop" in capabilities
@@ -178,9 +161,9 @@ def test_serialize_features():
    roundtrip_feature(capa.features.String("SCardControl"))
    roundtrip_feature(capa.features.insn.Number(0xFF))
    roundtrip_feature(capa.features.insn.Offset(0x0))
-    roundtrip_feature(capa.features.insn.Mnemonic("push"))
+    roundtrip_feature(capa.features.insn.Mnemonic('push'))
-    roundtrip_feature(capa.features.file.Section(".rsrc"))
+    roundtrip_feature(capa.features.file.Section('.rsrc'))
-    roundtrip_feature(capa.features.Characteristic("tight loop", True))
+    roundtrip_feature(capa.features.Characteristic('tight loop'))
    roundtrip_feature(capa.features.basicblock.BasicBlock())
    roundtrip_feature(capa.features.file.Export("BaseThreadInitThunk"))
    roundtrip_feature(capa.features.file.Import("kernel32.IsWow64Process"))
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -52,46 +52,33 @@ def test_main_shellcode(sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32):
 def test_ruleset():
-    rules = capa.rules.RuleSet(
+    rules = capa.rules.RuleSet([
-        [
+        capa.rules.Rule.from_yaml(textwrap.dedent('''
-            capa.rules.Rule.from_yaml(
+            rule:
-                textwrap.dedent(
+                meta:
-                    """
+                    name: file rule
-                    rule:
+                    scope: file
-                        meta:
+                features:
-                            name: file rule
+                  - characteristic: embedded pe
-                            scope: file
+        ''')),
-                        features:
+        capa.rules.Rule.from_yaml(textwrap.dedent('''
-                          - characteristic(embedded pe): y
+            rule:
-                    """
+                meta:
-                )
+                    name: function rule
-            ),
+                    scope: function
-            capa.rules.Rule.from_yaml(
+                features:
-                textwrap.dedent(
+                  - characteristic: switch
-                    """
+        ''')),
-                    rule:
+         capa.rules.Rule.from_yaml(textwrap.dedent('''
-                        meta:
+            rule:
-                            name: function rule
+                meta:
-                            scope: function
+                    name: basic block rule
-                        features:
+                    scope: basic block
-                          - characteristic(switch): y
+                features:
-                    """
+                  - characteristic: nzxor
-                )
+        ''')),
-            ),
+
-            capa.rules.Rule.from_yaml(
+    ])
                textwrap.dedent(
                    """
                    rule:
                        meta:
                            name: basic block rule
                            scope: basic block
                        features:
                          - characteristic(nzxor): y
                    """
                )
            ),
        ]
    )
    assert len(rules.file_rules) == 1
    assert len(rules.function_rules) == 1
    assert len(rules.basic_block_rules) == 1
@@ -165,65 +152,48 @@ def test_match_across_scopes_file_function(sample_9324d1a8ae37a36ae560c37448c970
 def test_match_across_scopes(sample_9324d1a8ae37a36ae560c37448c9705a):
-    rules = capa.rules.RuleSet(
+    rules = capa.rules.RuleSet([
-        [
+        # this rule should match on a basic block (including at least 0x403685)
-            # this rule should match on a basic block (including at least 0x403685)
+        capa.rules.Rule.from_yaml(textwrap.dedent('''
-            capa.rules.Rule.from_yaml(
+            rule:
-                textwrap.dedent(
+                meta:
-                    """
+                    name: tight loop
-                    rule:
+                    scope: basic block
-                        meta:
+                    examples:
-                            name: tight loop
+                      - 9324d1a8ae37a36ae560c37448c9705a:0x403685
-                            scope: basic block
+                features:
-                            examples:
+                  - characteristic: tight loop
-                              - 9324d1a8ae37a36ae560c37448c9705a:0x403685
+        ''')),
-                        features:
+        # this rule should match on a function (0x403660)
-                          - characteristic(tight loop): true
+        # based on API, as well as prior basic block rule match
-                    """
+        capa.rules.Rule.from_yaml(textwrap.dedent('''
-                )
+            rule:
-            ),
+                meta:
-            # this rule should match on a function (0x403660)
+                    name: kill thread loop
-            # based on API, as well as prior basic block rule match
+                    scope: function
-            capa.rules.Rule.from_yaml(
+                    examples:
-                textwrap.dedent(
+                      - 9324d1a8ae37a36ae560c37448c9705a:0x403660
-                    """
+                features:
-                    rule:
+                  - and:
-                        meta:
+                    - api: kernel32.TerminateThread
-                            name: kill thread loop
+                    - api: kernel32.CloseHandle
-                            scope: function
+                    - match: tight loop
-                            examples:
+        ''')),
-                              - 9324d1a8ae37a36ae560c37448c9705a:0x403660
+        # this rule should match on a file feature and a prior function rule match
-                        features:
+        capa.rules.Rule.from_yaml(textwrap.dedent('''
-                          - and:
+            rule:
-                            - api: kernel32.TerminateThread
+                meta:
-                            - api: kernel32.CloseHandle
+                    name: kill thread program
-                            - match: tight loop
+                    scope: file
-                    """
+                    examples:
-                )
+                      - 9324d1a8ae37a36ae560c37448c9705a
-            ),
+                features:
-            # this rule should match on a file feature and a prior function rule match
+                  - and:
-            capa.rules.Rule.from_yaml(
+                    - section: .text
-                textwrap.dedent(
+                    - match: kill thread loop
-                    """
+        ''')),
-                    rule:
+    ])
-                        meta:
+    extractor = capa.features.extractors.viv.VivisectFeatureExtractor(sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path)
                            name: kill thread program
                            scope: file
                            examples:
                              - 9324d1a8ae37a36ae560c37448c9705a
                        features:
                          - and:
                            - section: .text
                            - match: kill thread loop
                    """
                )
            ),
        ]
    )
    extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
        sample_9324d1a8ae37a36ae560c37448c9705a.vw,
        sample_9324d1a8ae37a36ae560c37448c9705a.path,
    )
    capabilities = capa.main.find_capabilities(rules, extractor)
    assert "tight loop" in capabilities
    assert "kill thread loop" in capabilities
@@ -231,24 +201,18 @@ def test_match_across_scopes(sample_9324d1a8ae37a36ae560c37448c9705a):
 def test_subscope_bb_rules(sample_9324d1a8ae37a36ae560c37448c9705a):
-    rules = capa.rules.RuleSet(
+    rules = capa.rules.RuleSet([
-        [
+        capa.rules.Rule.from_yaml(textwrap.dedent('''
-            capa.rules.Rule.from_yaml(
+             rule:
-                textwrap.dedent(
+                 meta:
-                    """
+                     name: test rule
-                    rule:
+                     scope: function
-                        meta:
+                 features:
-                            name: test rule
+                     - and:
-                            scope: function
+                         - basic block:
-                        features:
+                             - characteristic: tight loop
-                            - and:
+         '''))
-                                - basic block:
+    ])
                                    - characteristic(tight loop): true
                    """
                )
            )
        ]
    )
    # tight loop at 0x403685
    extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
        sample_9324d1a8ae37a36ae560c37448c9705a.vw,
--- a/tests/test_rules.py
+++ b/tests/test_rules.py
@@ -4,6 +4,7 @@ import pytest
 import capa.rules
 from capa.features.insn import Number, Offset
 from capa.features import String
 def test_rule_ctor():
@@ -66,6 +67,22 @@ def test_rule_yaml_complex():
    assert r.evaluate({Number(6): {1}, Number(7): {1}, Number(8): {1}}) == False
 def test_rule_yaml_descriptions():
    rule = textwrap.dedent('''
        rule:
            meta:
                name: test rule
            features:
                - and:
                    - number: 1 = This is the number 1
                    - string: This program cannot be run in DOS mode.
                      description: MS-DOS stub message
                    - count(number(2 = AF_INET/SOCK_DGRAM)): 2
    ''')
    r = capa.rules.Rule.from_yaml(rule)
    assert r.evaluate({Number(1): {1}, Number(2): {2, 3}, String('This program cannot be run in DOS mode.'): {4}}) == True
 def test_rule_yaml_not():
    rule = textwrap.dedent(
        """
@@ -132,37 +149,47 @@ def test_invalid_rule_feature():
        )
    with pytest.raises(capa.rules.InvalidRule):
-        capa.rules.Rule.from_yaml(
+        capa.rules.Rule.from_yaml(textwrap.dedent('''
-            textwrap.dedent(
+            rule:
-                """
+                meta:
-                rule:
+                    name: test rule
-                    meta:
+                    scope: file
-                        name: test rule
+                features:
-                        scope: file
+                    - characteristic: nzxor
-                    features:
+        '''))
                        - characteristic(nzxor): true
                """
            )
        )
    with pytest.raises(capa.rules.InvalidRule):
-        capa.rules.Rule.from_yaml(
+        capa.rules.Rule.from_yaml(textwrap.dedent('''
-            textwrap.dedent(
+            rule:
-                """
+                meta:
-                rule:
+                    name: test rule
-                    meta:
+                    scope: function
-                        name: test rule
+                features:
-                        scope: function
+                    - characteristic: embedded pe
-                    features:
+        '''))
                        - characteristic(embedded pe): true
                """
            )
        )
    with pytest.raises(capa.rules.InvalidRule):
-        capa.rules.Rule.from_yaml(
+        capa.rules.Rule.from_yaml(textwrap.dedent('''
-            textwrap.dedent(
+            rule:
-                """
+                meta:
                    name: test rule
                    scope: basic block
                features:
                    - characteristic: embedded pe
        '''))
 def test_lib_rules():
    rules = capa.rules.RuleSet([
        capa.rules.Rule.from_yaml(textwrap.dedent('''
            rule:
                meta:
                    name: a lib rule
                    lib: true
                features:
                    - api: CreateFileA
        ''')),
        capa.rules.Rule.from_yaml(textwrap.dedent('''
                rule:
                    meta:
                        name: test rule
@@ -207,27 +234,21 @@ def test_lib_rules():
 def test_subscope_rules():
-    rules = capa.rules.RuleSet(
+    rules = capa.rules.RuleSet([
-        [
+        capa.rules.Rule.from_yaml(textwrap.dedent('''
-            capa.rules.Rule.from_yaml(
+            rule:
-                textwrap.dedent(
+                meta:
-                    """
+                    name: test rule
-                    rule:
+                    scope: file
-                        meta:
+                features:
-                            name: test rule
+                    - and:
-                            scope: file
+                        - characteristic: embedded pe
-                        features:
+                        - function:
                            - and:
-                                - characteristic(embedded pe): true
+                                - characteristic: nzxor
-                                - function:
+                                - characteristic: switch
-                                    - and:
+        '''))
-                                        - characteristic(nzxor): true
+    ])
                                        - characteristic(switch): true
                    """
                )
            )
        ]
    )
    # the file rule scope will have one rules:
    #  - `test rule`
    assert len(rules.file_rules) == 1
@@ -295,10 +316,8 @@ def test_invalid_rules():
                    meta:
                        name: test rule
                    features:
-                        - characteristic(number(1)): True
+                        - characteristic: number(1)
-                """
+            '''))
            )
        )
    with pytest.raises(capa.rules.InvalidRule):
        r = capa.rules.Rule.from_yaml(
@@ -308,10 +327,8 @@ def test_invalid_rules():
                    meta:
                        name: test rule
                    features:
-                        - characteristic(count(number(100))): True
+                        - characteristic: count(number(100))
-                """
+            '''))
            )
        )
 def test_number_symbol():
--- a/tests/test_viv_features.py
+++ b/tests/test_viv_features.py
@@ -130,7 +130,7 @@ def test_offset_features(mimikatz):
 def test_nzxor_features(mimikatz):
    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x410DFC))
-    assert capa.features.Characteristic("nzxor", True) in features  # 0x0410F0B
+    assert capa.features.Characteristic('nzxor') in features  # 0x0410F0B
 def get_bb_insn(f, va):
@@ -169,10 +169,8 @@ def test_mnemonic_features(mimikatz):
 def test_peb_access_features(sample_a933a1a402775cfa94b6bee0963f4b46):
-    features = extract_function_features(
+    features = extract_function_features(viv_utils.Function(sample_a933a1a402775cfa94b6bee0963f4b46.vw, 0xABA6FEC))
-        viv_utils.Function(sample_a933a1a402775cfa94b6bee0963f4b46.vw, 0xABA6FEC)
+    assert capa.features.Characteristic('peb access') in features
    )
    assert capa.features.Characteristic("peb access", True) in features
 def test_file_section_name_features(mimikatz):
@@ -188,7 +186,7 @@ def test_tight_loop_features(mimikatz):
        if bb.va != 0x402F8E:
            continue
        features = extract_basic_block_features(f, bb)
-        assert capa.features.Characteristic("tight loop", True) in features
+        assert capa.features.Characteristic('tight loop') in features
        assert capa.features.basicblock.BasicBlock() in features
@@ -198,7 +196,7 @@ def test_tight_loop_bb_features(mimikatz):
        if bb.va != 0x402F8E:
            continue
        features = extract_basic_block_features(f, bb)
-        assert capa.features.Characteristic("tight loop", True) in features
+        assert capa.features.Characteristic('tight loop') in features
        assert capa.features.basicblock.BasicBlock() in features
@@ -219,24 +217,18 @@ def test_file_import_name_features(mimikatz):
 def test_cross_section_flow_features(sample_a198216798ca38f280dc413f8c57f2c2):
-    features = extract_function_features(
+    features = extract_function_features(viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x4014D0))
-        viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x4014D0)
+    assert capa.features.Characteristic('cross section flow') in features
    )
    assert capa.features.Characteristic("cross section flow", True) in features
    # this function has calls to some imports,
    # which should not trigger cross-section flow characteristic
-    features = extract_function_features(
+    features = extract_function_features(viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x401563))
-        viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x401563)
+    assert capa.features.Characteristic('cross section flow') not in features
    )
    assert capa.features.Characteristic("cross section flow", True) not in features
 def test_segment_access_features(sample_a933a1a402775cfa94b6bee0963f4b46):
-    features = extract_function_features(
+    features = extract_function_features(viv_utils.Function(sample_a933a1a402775cfa94b6bee0963f4b46.vw, 0xABA6FEC))
-        viv_utils.Function(sample_a933a1a402775cfa94b6bee0963f4b46.vw, 0xABA6FEC)
+    assert capa.features.Characteristic('fs access') in features
    )
    assert capa.features.Characteristic("fs access", True) in features
 def test_thunk_features(sample_9324d1a8ae37a36ae560c37448c9705a):
@@ -249,60 +241,36 @@ def test_thunk_features(sample_9324d1a8ae37a36ae560c37448c9705a):
 def test_file_embedded_pe(pma_lab_12_04):
    features = extract_file_features(pma_lab_12_04.vw, pma_lab_12_04.path)
-    assert capa.features.Characteristic("embedded pe", True) in features
+    assert capa.features.Characteristic('embedded pe') in features
 def test_stackstring_features(mimikatz):
    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x4556E5))
-    assert capa.features.Characteristic("stack string", True) in features
+    assert capa.features.Characteristic('stack string') in features
 def test_switch_features(mimikatz):
    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x409411))
-    assert capa.features.Characteristic("switch", True) in features
+    assert capa.features.Characteristic('switch') in features
    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x409393))
-    assert capa.features.Characteristic("switch", True) not in features
+    assert capa.features.Characteristic('switch') not in features
-def test_recursive_call_feature(
+def test_recursive_call_feature(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41):
-    sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41,
+    features = extract_function_features(viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10003100))
-):
+    assert capa.features.Characteristic('recursive call') in features
    features = extract_function_features(
        viv_utils.Function(
            sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw,
            0x10003100,
        )
    )
    assert capa.features.Characteristic("recursive call", True) in features
-    features = extract_function_features(
+    features = extract_function_features(viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10007B00))
-        viv_utils.Function(
+    assert capa.features.Characteristic('recursive call') not in features
            sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw,
            0x10007B00,
        )
    )
    assert capa.features.Characteristic("recursive call", True) not in features
-def test_loop_feature(
+def test_loop_feature(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41):
-    sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41,
+    features = extract_function_features(viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10003D30))
-):
+    assert capa.features.Characteristic('loop') in features
    features = extract_function_features(
        viv_utils.Function(
            sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw,
            0x10003D30,
        )
    )
    assert capa.features.Characteristic("loop", True) in features
-    features = extract_function_features(
+    features = extract_function_features(viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10007250))
-        viv_utils.Function(
+    assert capa.features.Characteristic('loop') not in features
            sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw,
            0x10007250,
        )
    )
    assert capa.features.Characteristic("loop", True) not in features
 def test_file_string_features(sample_bfb9b5391a13d0afd787e87ab90f14f5):
@@ -315,27 +283,21 @@ def test_file_string_features(sample_bfb9b5391a13d0afd787e87ab90f14f5):
 def test_function_calls_to(sample_9324d1a8ae37a36ae560c37448c9705a):
-    features = extract_function_features(
+    features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60))
-        viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60)
+    assert capa.features.Characteristic('calls to') in features
-    )
+    assert len(features[capa.features.Characteristic('calls to')]) == 1
    assert capa.features.Characteristic("calls to", True) in features
    assert len(features[capa.features.Characteristic("calls to", True)]) == 1
 def test_function_calls_to64(sample_lab21_01):
-    features = extract_function_features(
+    features = extract_function_features(viv_utils.Function(sample_lab21_01.vw, 0x1400052D0))  # memcpy
-        viv_utils.Function(sample_lab21_01.vw, 0x1400052D0)
+    assert capa.features.Characteristic('calls to') in features
-    )  # memcpy
+    assert len(features[capa.features.Characteristic('calls to')]) == 8
    assert capa.features.Characteristic("calls to", True) in features
    assert len(features[capa.features.Characteristic("calls to", True)]) == 8
 def test_function_calls_from(sample_9324d1a8ae37a36ae560c37448c9705a):
-    features = extract_function_features(
+    features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60))
-        viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60)
+    assert capa.features.Characteristic('calls from') in features
-    )
+    assert len(features[capa.features.Characteristic('calls from')]) == 23
    assert capa.features.Characteristic("calls from", True) in features
    assert len(features[capa.features.Characteristic("calls from", True)]) == 23
 def test_basic_block_count(sample_9324d1a8ae37a36ae560c37448c9705a):
@@ -346,11 +308,9 @@ def test_basic_block_count(sample_9324d1a8ae37a36ae560c37448c9705a):
 def test_indirect_call_features(sample_a933a1a402775cfa94b6bee0963f4b46):
-    features = extract_function_features(
+    features = extract_function_features(viv_utils.Function(sample_a933a1a402775cfa94b6bee0963f4b46.vw, 0xABA68A0))
-        viv_utils.Function(sample_a933a1a402775cfa94b6bee0963f4b46.vw, 0xABA68A0)
+    assert capa.features.Characteristic('indirect call') in features
-    )
+    assert len(features[capa.features.Characteristic('indirect call')]) == 3
    assert capa.features.Characteristic("indirect call", True) in features
    assert len(features[capa.features.Characteristic("indirect call", True)]) == 3
 def test_indirect_calls_resolved(sample_c91887d861d9bd4a5872249b641bc9f9):