dos2unix

2025-12-21 23:00:29 -08:00 · 2020-07-02 11:08:21 -06:00
parent 8f7cb6dad0
commit 612eefe2e8
9 changed files with 1074 additions and 1074 deletions
--- a/capa/features/extractors/viv/indirect_calls.py
+++ b/capa/features/extractors/viv/indirect_calls.py
@@ -1,149 +1,149 @@
-import collections
+import collections
-
+
-import envi
+import envi
-import vivisect.const
+import vivisect.const
-import envi.archs.i386.disasm
+import envi.archs.i386.disasm
-import envi.archs.amd64.disasm
+import envi.archs.amd64.disasm
-
+
-# pull out consts for lookup performance
+# pull out consts for lookup performance
-i386RegOper = envi.archs.i386.disasm.i386RegOper
+i386RegOper = envi.archs.i386.disasm.i386RegOper
-i386ImmOper = envi.archs.i386.disasm.i386ImmOper
+i386ImmOper = envi.archs.i386.disasm.i386ImmOper
-i386ImmMemOper = envi.archs.i386.disasm.i386ImmMemOper
+i386ImmMemOper = envi.archs.i386.disasm.i386ImmMemOper
-Amd64RipRelOper = envi.archs.amd64.disasm.Amd64RipRelOper
+Amd64RipRelOper = envi.archs.amd64.disasm.Amd64RipRelOper
-LOC_OP = vivisect.const.LOC_OP
+LOC_OP = vivisect.const.LOC_OP
-IF_NOFALL = envi.IF_NOFALL
+IF_NOFALL = envi.IF_NOFALL
-REF_CODE = vivisect.const.REF_CODE
+REF_CODE = vivisect.const.REF_CODE
-FAR_BRANCH_MASK = envi.BR_PROC | envi.BR_DEREF | envi.BR_ARCH
+FAR_BRANCH_MASK = envi.BR_PROC | envi.BR_DEREF | envi.BR_ARCH
-
+
-DESTRUCTIVE_MNEMONICS = ("mov", "lea", "pop", "xor")
+DESTRUCTIVE_MNEMONICS = ("mov", "lea", "pop", "xor")
-
+
-
+
-def get_previous_instructions(vw, va):
+def get_previous_instructions(vw, va):
-    """
+    """
-    collect the instructions that flow to the given address, local to the current function.
+    collect the instructions that flow to the given address, local to the current function.
-
+
-    args:
+    args:
-      vw (vivisect.Workspace)
+      vw (vivisect.Workspace)
-      va (int): the virtual address to inspect
+      va (int): the virtual address to inspect
-
+
-    returns:
+    returns:
-      List[int]: the prior instructions, which may fallthrough and/or jump here
+      List[int]: the prior instructions, which may fallthrough and/or jump here
-    """
+    """
-    ret = []
+    ret = []
-
+
-    # find the immediate prior instruction.
+    # find the immediate prior instruction.
-    # ensure that it fallsthrough to this one.
+    # ensure that it fallsthrough to this one.
-    loc = vw.getPrevLocation(va, adjacent=True)
+    loc = vw.getPrevLocation(va, adjacent=True)
-    if loc is not None:
+    if loc is not None:
-        # from vivisect.const:
+        # from vivisect.const:
-        # location: (L_VA, L_SIZE, L_LTYPE, L_TINFO)
+        # location: (L_VA, L_SIZE, L_LTYPE, L_TINFO)
-        (pva, _, ptype, pinfo) = vw.getPrevLocation(va, adjacent=True)
+        (pva, _, ptype, pinfo) = vw.getPrevLocation(va, adjacent=True)
-
+
-        if ptype == LOC_OP and not (pinfo & IF_NOFALL):
+        if ptype == LOC_OP and not (pinfo & IF_NOFALL):
-            ret.append(pva)
+            ret.append(pva)
-
+
-    # find any code refs, e.g. jmp, to this location.
+    # find any code refs, e.g. jmp, to this location.
-    # ignore any calls.
+    # ignore any calls.
-    #
+    #
-    # from vivisect.const:
+    # from vivisect.const:
-    # xref: (XR_FROM, XR_TO, XR_RTYPE, XR_RFLAG)
+    # xref: (XR_FROM, XR_TO, XR_RTYPE, XR_RFLAG)
-    for (xfrom, _, _, xflag) in vw.getXrefsTo(va, REF_CODE):
+    for (xfrom, _, _, xflag) in vw.getXrefsTo(va, REF_CODE):
-        if (xflag & FAR_BRANCH_MASK) != 0:
+        if (xflag & FAR_BRANCH_MASK) != 0:
-            continue
+            continue
-        ret.append(xfrom)
+        ret.append(xfrom)
-
+
-    return ret
+    return ret
-
+
-
+
-class NotFoundError(Exception):
+class NotFoundError(Exception):
-    pass
+    pass
-
+
-
+
-def find_definition(vw, va, reg):
+def find_definition(vw, va, reg):
-    """
+    """
-    scan backwards from the given address looking for assignments to the given register.
+    scan backwards from the given address looking for assignments to the given register.
-    if a constant, return that value.
+    if a constant, return that value.
-
+
-    args:
+    args:
-      vw (vivisect.Workspace)
+      vw (vivisect.Workspace)
-      va (int): the virtual address at which to start analysis
+      va (int): the virtual address at which to start analysis
-      reg (int): the vivisect register to study
+      reg (int): the vivisect register to study
-
+
-    returns:
+    returns:
-      (va: int, value?: int|None): the address of the assignment and the value, if a constant.
+      (va: int, value?: int|None): the address of the assignment and the value, if a constant.
-
+
-    raises:
+    raises:
-      NotFoundError: when the definition cannot be found.
+      NotFoundError: when the definition cannot be found.
-    """
+    """
-    q = collections.deque()
+    q = collections.deque()
-    seen = set([])
+    seen = set([])
-
+
-    q.extend(get_previous_instructions(vw, va))
+    q.extend(get_previous_instructions(vw, va))
-    while q:
+    while q:
-        cur = q.popleft()
+        cur = q.popleft()
-
+
-        # skip if we've already processed this location
+        # skip if we've already processed this location
-        if cur in seen:
+        if cur in seen:
-            continue
+            continue
-        seen.add(cur)
+        seen.add(cur)
-
+
-        insn = vw.parseOpcode(cur)
+        insn = vw.parseOpcode(cur)
-
+
-        if len(insn.opers) == 0:
+        if len(insn.opers) == 0:
-            q.extend(get_previous_instructions(vw, cur))
+            q.extend(get_previous_instructions(vw, cur))
-            continue
+            continue
-
+
-        opnd0 = insn.opers[0]
+        opnd0 = insn.opers[0]
-        if not (isinstance(opnd0, i386RegOper) and opnd0.reg == reg and insn.mnem in DESTRUCTIVE_MNEMONICS):
+        if not (isinstance(opnd0, i386RegOper) and opnd0.reg == reg and insn.mnem in DESTRUCTIVE_MNEMONICS):
-            q.extend(get_previous_instructions(vw, cur))
+            q.extend(get_previous_instructions(vw, cur))
-            continue
+            continue
-
+
-        # if we reach here, the instruction is destructive to our target register.
+        # if we reach here, the instruction is destructive to our target register.
-
+
-        # we currently only support extracting the constant from something like: `mov $reg, IAT`
+        # we currently only support extracting the constant from something like: `mov $reg, IAT`
-        # so, any other pattern results in an unknown value, represented by None.
+        # so, any other pattern results in an unknown value, represented by None.
-        # this is a good place to extend in the future, if we need more robust support.
+        # this is a good place to extend in the future, if we need more robust support.
-        if insn.mnem != "mov":
+        if insn.mnem != "mov":
-            return (cur, None)
+            return (cur, None)
-        else:
+        else:
-            opnd1 = insn.opers[1]
+            opnd1 = insn.opers[1]
-            if isinstance(opnd1, i386ImmOper):
+            if isinstance(opnd1, i386ImmOper):
-                return (cur, opnd1.getOperValue(opnd1))
+                return (cur, opnd1.getOperValue(opnd1))
-            elif isinstance(opnd1, i386ImmMemOper):
+            elif isinstance(opnd1, i386ImmMemOper):
-                return (cur, opnd1.getOperAddr(opnd1))
+                return (cur, opnd1.getOperAddr(opnd1))
-            elif isinstance(opnd1, Amd64RipRelOper):
+            elif isinstance(opnd1, Amd64RipRelOper):
-                return (cur, opnd1.getOperAddr(insn))
+                return (cur, opnd1.getOperAddr(insn))
-            else:
+            else:
-                # might be something like: `mov $reg, dword_401000[eax]`
+                # might be something like: `mov $reg, dword_401000[eax]`
-                return (cur, None)
+                return (cur, None)
-
+
-    raise NotFoundError()
+    raise NotFoundError()
-
+
-
+
-def is_indirect_call(vw, va, insn=None):
+def is_indirect_call(vw, va, insn=None):
-    if insn is None:
+    if insn is None:
-        insn = vw.parseOpcode(va)
+        insn = vw.parseOpcode(va)
-
+
-    return insn.mnem == "call" and isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper)
+    return insn.mnem == "call" and isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper)
-
+
-
+
-def resolve_indirect_call(vw, va, insn=None):
+def resolve_indirect_call(vw, va, insn=None):
-    """
+    """
-    inspect the given indirect call instruction and attempt to resolve the target address.
+    inspect the given indirect call instruction and attempt to resolve the target address.
-
+
-    args:
+    args:
-      vw (vivisect.Workspace)
+      vw (vivisect.Workspace)
-      va (int): the virtual address at which to start analysis
+      va (int): the virtual address at which to start analysis
-
+
-    returns:
+    returns:
-      (va: int, value?: int|None): the address of the assignment and the value, if a constant.
+      (va: int, value?: int|None): the address of the assignment and the value, if a constant.
-
+
-    raises:
+    raises:
-      NotFoundError: when the definition cannot be found.
+      NotFoundError: when the definition cannot be found.
-    """
+    """
-    if insn is None:
+    if insn is None:
-        insn = vw.parseOpcode(va)
+        insn = vw.parseOpcode(va)
-
+
-    assert is_indirect_call(vw, va, insn=insn)
+    assert is_indirect_call(vw, va, insn=insn)
-
+
-    return find_definition(vw, va, insn.opers[0].reg)
+    return find_definition(vw, va, insn.opers[0].reg)
--- a/capa/render/init.py
+++ b/capa/render/init.py
@@ -1,266 +1,266 @@
-import json
+import json
-
+
-import six
+import six
-
+
-import capa.rules
+import capa.rules
-import capa.engine
+import capa.engine
-
+
-
+
-def convert_statement_to_result_document(statement):
+def convert_statement_to_result_document(statement):
-    """
+    """
-        "statement": {
+        "statement": {
-            "type": "or"
+            "type": "or"
-        },
+        },
-
+
-        "statement": {
+        "statement": {
-            "max": 9223372036854775808,
+            "max": 9223372036854775808,
-            "min": 2,
+            "min": 2,
-            "type": "range"
+            "type": "range"
-        },
+        },
-    """
+    """
-    if isinstance(statement, capa.engine.And):
+    if isinstance(statement, capa.engine.And):
-        return {
+        return {
-            "type": "and",
+            "type": "and",
-        }
+        }
-    elif isinstance(statement, capa.engine.Or):
+    elif isinstance(statement, capa.engine.Or):
-        return {
+        return {
-            "type": "or",
+            "type": "or",
-        }
+        }
-    elif isinstance(statement, capa.engine.Not):
+    elif isinstance(statement, capa.engine.Not):
-        return {
+        return {
-            "type": "not",
+            "type": "not",
-        }
+        }
-    elif isinstance(statement, capa.engine.Some) and statement.count == 0:
+    elif isinstance(statement, capa.engine.Some) and statement.count == 0:
-        return {"type": "optional"}
+        return {"type": "optional"}
-    elif isinstance(statement, capa.engine.Some) and statement.count > 0:
+    elif isinstance(statement, capa.engine.Some) and statement.count > 0:
-        return {
+        return {
-            "type": "some",
+            "type": "some",
-            "count": statement.count,
+            "count": statement.count,
-        }
+        }
-    elif isinstance(statement, capa.engine.Range):
+    elif isinstance(statement, capa.engine.Range):
-        return {
+        return {
-            "type": "range",
+            "type": "range",
-            "min": statement.min,
+            "min": statement.min,
-            "max": statement.max,
+            "max": statement.max,
-            "child": convert_feature_to_result_document(statement.child),
+            "child": convert_feature_to_result_document(statement.child),
-        }
+        }
-    elif isinstance(statement, capa.engine.Regex):
+    elif isinstance(statement, capa.engine.Regex):
-        return {
+        return {
-            "type": "regex",
+            "type": "regex",
-            "pattern": statement.pattern,
+            "pattern": statement.pattern,
-            # the string that was matched
+            # the string that was matched
-            "match": statement.match,
+            "match": statement.match,
-        }
+        }
-    elif isinstance(statement, capa.engine.Subscope):
+    elif isinstance(statement, capa.engine.Subscope):
-        return {
+        return {
-            "type": "subscope",
+            "type": "subscope",
-            "subscope": statement.scope,
+            "subscope": statement.scope,
-        }
+        }
-    else:
+    else:
-        raise RuntimeError("unexpected match statement type: " + str(statement))
+        raise RuntimeError("unexpected match statement type: " + str(statement))
-
+
-
+
-def convert_feature_to_result_document(feature):
+def convert_feature_to_result_document(feature):
-    """
+    """
-        "feature": {
+        "feature": {
-            "number": 6,
+            "number": 6,
-            "type": "number"
+            "type": "number"
-        },
+        },
-
+
-        "feature": {
+        "feature": {
-            "api": "ws2_32.WSASocket",
+            "api": "ws2_32.WSASocket",
-            "type": "api"
+            "type": "api"
-        },
+        },
-
+
-        "feature": {
+        "feature": {
-            "match": "create TCP socket",
+            "match": "create TCP socket",
-            "type": "match"
+            "type": "match"
-        },
+        },
-
+
-        "feature": {
+        "feature": {
-            "characteristic": [
+            "characteristic": [
-                "loop",
+                "loop",
-                true
+                true
-            ],
+            ],
-            "type": "characteristic"
+            "type": "characteristic"
-        },
+        },
-    """
+    """
-    result = {"type": feature.name, feature.name: feature.get_args_str()}
+    result = {"type": feature.name, feature.name: feature.get_args_str()}
-    if feature.description:
+    if feature.description:
-        result["description"] = feature.description
+        result["description"] = feature.description
-
+
-    return result
+    return result
-
+
-
+
-def convert_node_to_result_document(node):
+def convert_node_to_result_document(node):
-    """
+    """
-        "node": {
+        "node": {
-            "type": "statement",
+            "type": "statement",
-            "statement": { ... }
+            "statement": { ... }
-        },
+        },
-
+
-        "node": {
+        "node": {
-            "type": "feature",
+            "type": "feature",
-            "feature": { ... }
+            "feature": { ... }
-        },
+        },
-    """
+    """
-
+
-    if isinstance(node, capa.engine.Statement):
+    if isinstance(node, capa.engine.Statement):
-        return {
+        return {
-            "type": "statement",
+            "type": "statement",
-            "statement": convert_statement_to_result_document(node),
+            "statement": convert_statement_to_result_document(node),
-        }
+        }
-    elif isinstance(node, capa.features.Feature):
+    elif isinstance(node, capa.features.Feature):
-        return {
+        return {
-            "type": "feature",
+            "type": "feature",
-            "feature": convert_feature_to_result_document(node),
+            "feature": convert_feature_to_result_document(node),
-        }
+        }
-    else:
+    else:
-        raise RuntimeError("unexpected match node type")
+        raise RuntimeError("unexpected match node type")
-
+
-
+
-def convert_match_to_result_document(rules, capabilities, result):
+def convert_match_to_result_document(rules, capabilities, result):
-    """
+    """
-    convert the given Result instance into a common, Python-native data structure.
+    convert the given Result instance into a common, Python-native data structure.
-    this will become part of the "result document" format that can be emitted to JSON.
+    this will become part of the "result document" format that can be emitted to JSON.
-    """
+    """
-    doc = {
+    doc = {
-        "success": bool(result.success),
+        "success": bool(result.success),
-        "node": convert_node_to_result_document(result.statement),
+        "node": convert_node_to_result_document(result.statement),
-        "children": [convert_match_to_result_document(rules, capabilities, child) for child in result.children],
+        "children": [convert_match_to_result_document(rules, capabilities, child) for child in result.children],
-    }
+    }
-
+
-    # logic expression, like `and`, don't have locations - their children do.
+    # logic expression, like `and`, don't have locations - their children do.
-    # so only add `locations` to feature nodes.
+    # so only add `locations` to feature nodes.
-    if isinstance(result.statement, capa.features.Feature):
+    if isinstance(result.statement, capa.features.Feature):
-        if bool(result.success):
+        if bool(result.success):
-            doc["locations"] = result.locations
+            doc["locations"] = result.locations
-    elif isinstance(result.statement, capa.rules.Range):
+    elif isinstance(result.statement, capa.rules.Range):
-        if bool(result.success):
+        if bool(result.success):
-            doc["locations"] = result.locations
+            doc["locations"] = result.locations
-
+
-    # if we have a `match` statement, then we're referencing another rule.
+    # if we have a `match` statement, then we're referencing another rule.
-    # this could an external rule (written by a human), or
+    # this could an external rule (written by a human), or
-    #  rule generated to support a subscope (basic block, etc.)
+    #  rule generated to support a subscope (basic block, etc.)
-    # we still want to include the matching logic in this tree.
+    # we still want to include the matching logic in this tree.
-    #
+    #
-    # so, we need to lookup the other rule results
+    # so, we need to lookup the other rule results
-    # and then filter those down to the address used here.
+    # and then filter those down to the address used here.
-    # finally, splice that logic into this tree.
+    # finally, splice that logic into this tree.
-    if (
+    if (
-        doc["node"]["type"] == "feature"
+        doc["node"]["type"] == "feature"
-        and doc["node"]["feature"]["type"] == "match"
+        and doc["node"]["feature"]["type"] == "match"
-        # only add subtree on success,
+        # only add subtree on success,
-        # because there won't be results for the other rule on failure.
+        # because there won't be results for the other rule on failure.
-        and doc["success"]
+        and doc["success"]
-    ):
+    ):
-
+
-        rule_name = doc["node"]["feature"]["match"]
+        rule_name = doc["node"]["feature"]["match"]
-        rule = rules[rule_name]
+        rule = rules[rule_name]
-        rule_matches = {address: result for (address, result) in capabilities[rule_name]}
+        rule_matches = {address: result for (address, result) in capabilities[rule_name]}
-
+
-        if rule.meta.get("capa/subscope-rule"):
+        if rule.meta.get("capa/subscope-rule"):
-            # for a subscope rule, fixup the node to be a scope node, rather than a match feature node.
+            # for a subscope rule, fixup the node to be a scope node, rather than a match feature node.
-            #
+            #
-            # e.g. `contain loop/30c4c78e29bf4d54894fc74f664c62e8` -> `basic block`
+            # e.g. `contain loop/30c4c78e29bf4d54894fc74f664c62e8` -> `basic block`
-            scope = rule.meta["scope"]
+            scope = rule.meta["scope"]
-            doc["node"] = {
+            doc["node"] = {
-                "type": "statement",
+                "type": "statement",
-                "statement": {"type": "subscope", "subscope": scope,},
+                "statement": {"type": "subscope", "subscope": scope,},
-            }
+            }
-
+
-        for location in doc["locations"]:
+        for location in doc["locations"]:
-            doc["children"].append(convert_match_to_result_document(rules, capabilities, rule_matches[location]))
+            doc["children"].append(convert_match_to_result_document(rules, capabilities, rule_matches[location]))
-
+
-    return doc
+    return doc
-
+
-
+
-def convert_capabilities_to_result_document(rules, capabilities):
+def convert_capabilities_to_result_document(rules, capabilities):
-    """
+    """
-    convert the given rule set and capabilities result to a common, Python-native data structure.
+    convert the given rule set and capabilities result to a common, Python-native data structure.
-    this format can be directly emitted to JSON, or passed to the other `render_*` routines
+    this format can be directly emitted to JSON, or passed to the other `render_*` routines
-     to render as text.
+     to render as text.
-
+
-    see examples of substructures in above routines.
+    see examples of substructures in above routines.
-
+
-    schema:
+    schema:
-
+
-    ```json
+    ```json
-    {
+    {
-      $rule-name: {
+      $rule-name: {
-        "meta": {...copied from rule.meta...},
+        "meta": {...copied from rule.meta...},
-        "matches: {
+        "matches: {
-          $address: {...match details...},
+          $address: {...match details...},
-          ...
+          ...
-        }
+        }
-      },
+      },
-      ...
+      ...
-    }
+    }
-    ```
+    ```
-
+
-    Args:
+    Args:
-      rules (RuleSet):
+      rules (RuleSet):
-      capabilities (Dict[str, List[Tuple[int, Result]]]):
+      capabilities (Dict[str, List[Tuple[int, Result]]]):
-    """
+    """
-    doc = {}
+    doc = {}
-
+
-    for rule_name, matches in capabilities.items():
+    for rule_name, matches in capabilities.items():
-        rule = rules[rule_name]
+        rule = rules[rule_name]
-
+
-        if rule.meta.get("capa/subscope-rule"):
+        if rule.meta.get("capa/subscope-rule"):
-            continue
+            continue
-
+
-        doc[rule_name] = {
+        doc[rule_name] = {
-            "meta": dict(rule.meta),
+            "meta": dict(rule.meta),
-            "source": rule.definition,
+            "source": rule.definition,
-            "matches": {
+            "matches": {
-                addr: convert_match_to_result_document(rules, capabilities, match) for (addr, match) in matches
+                addr: convert_match_to_result_document(rules, capabilities, match) for (addr, match) in matches
-            },
+            },
-        }
+        }
-
+
-    return doc
+    return doc
-
+
-
+
-def render_vverbose(rules, capabilities):
+def render_vverbose(rules, capabilities):
-    # there's an import loop here
+    # there's an import loop here
-    # if capa.render imports capa.render.vverbose
+    # if capa.render imports capa.render.vverbose
-    # and capa.render.vverbose import capa.render (implicitly, as a submodule)
+    # and capa.render.vverbose import capa.render (implicitly, as a submodule)
-    # so, defer the import until routine is called, breaking the import loop.
+    # so, defer the import until routine is called, breaking the import loop.
-    import capa.render.vverbose
+    import capa.render.vverbose
-
+
-    doc = convert_capabilities_to_result_document(rules, capabilities)
+    doc = convert_capabilities_to_result_document(rules, capabilities)
-    return capa.render.vverbose.render_vverbose(doc)
+    return capa.render.vverbose.render_vverbose(doc)
-
+
-
+
-def render_verbose(rules, capabilities):
+def render_verbose(rules, capabilities):
-    # break import loop
+    # break import loop
-    import capa.render.verbose
+    import capa.render.verbose
-
+
-    doc = convert_capabilities_to_result_document(rules, capabilities)
+    doc = convert_capabilities_to_result_document(rules, capabilities)
-    return capa.render.verbose.render_verbose(doc)
+    return capa.render.verbose.render_verbose(doc)
-
+
-
+
-def render_default(rules, capabilities):
+def render_default(rules, capabilities):
-    # break import loop
+    # break import loop
-    import capa.render.verbose
+    import capa.render.verbose
-    import capa.render.default
+    import capa.render.default
-
+
-    doc = convert_capabilities_to_result_document(rules, capabilities)
+    doc = convert_capabilities_to_result_document(rules, capabilities)
-    return capa.render.default.render_default(doc)
+    return capa.render.default.render_default(doc)
-
+
-
+
-class CapaJsonObjectEncoder(json.JSONEncoder):
+class CapaJsonObjectEncoder(json.JSONEncoder):
-    def default(self, obj):
+    def default(self, obj):
-        if isinstance(obj, (list, dict, int, float, bool, type(None))) or isinstance(obj, six.string_types):
+        if isinstance(obj, (list, dict, int, float, bool, type(None))) or isinstance(obj, six.string_types):
-            return json.JSONEncoder.default(self, obj)
+            return json.JSONEncoder.default(self, obj)
-        elif isinstance(obj, set):
+        elif isinstance(obj, set):
-            return list(sorted(obj))
+            return list(sorted(obj))
-        else:
+        else:
-            # probably will TypeError
+            # probably will TypeError
-            return json.JSONEncoder.default(self, obj)
+            return json.JSONEncoder.default(self, obj)
-
+
-
+
-def render_json(rules, capabilities):
+def render_json(rules, capabilities):
-    return json.dumps(
+    return json.dumps(
-        convert_capabilities_to_result_document(rules, capabilities), cls=CapaJsonObjectEncoder, sort_keys=True,
+        convert_capabilities_to_result_document(rules, capabilities), cls=CapaJsonObjectEncoder, sort_keys=True,
-    )
+    )
--- a/capa/render/default.py
+++ b/capa/render/default.py
@@ -1,99 +1,99 @@
-import collections
+import collections
-
+
-import six
+import six
-import tabulate
+import tabulate
-
+
-import capa.render.utils as rutils
+import capa.render.utils as rutils
-
+
-
+
-def width(s, character_count):
+def width(s, character_count):
-    """pad the given string to at least `character_count`"""
+    """pad the given string to at least `character_count`"""
-    if len(s) < character_count:
+    if len(s) < character_count:
-        return s + " " * (character_count - len(s))
+        return s + " " * (character_count - len(s))
-    else:
+    else:
-        return s
+        return s
-
+
-
+
-def render_capabilities(doc, ostream):
+def render_capabilities(doc, ostream):
-    """
+    """
-    example::
+    example::
-
+
-        +-------------------------------------------------------+-------------------------------------------------+
+        +-------------------------------------------------------+-------------------------------------------------+
-        | CAPABILITY                                            | NAMESPACE                                       |
+        | CAPABILITY                                            | NAMESPACE                                       |
-        |-------------------------------------------------------+-------------------------------------------------|
+        |-------------------------------------------------------+-------------------------------------------------|
-        | check for OutputDebugString error (2 matches)         | anti-analysis/anti-debugging/debugger-detection |
+        | check for OutputDebugString error (2 matches)         | anti-analysis/anti-debugging/debugger-detection |
-        | read and send data from client to server              | c2/file-transfer                                |
+        | read and send data from client to server              | c2/file-transfer                                |
-        | ...                                                   | ...                                             |
+        | ...                                                   | ...                                             |
-        +-------------------------------------------------------+-------------------------------------------------+
+        +-------------------------------------------------------+-------------------------------------------------+
-    """
+    """
-    rows = []
+    rows = []
-    for rule in rutils.capability_rules(doc):
+    for rule in rutils.capability_rules(doc):
-        count = len(rule["matches"])
+        count = len(rule["matches"])
-        if count == 1:
+        if count == 1:
-            capability = rutils.bold(rule["meta"]["name"])
+            capability = rutils.bold(rule["meta"]["name"])
-        else:
+        else:
-            capability = "%s (%d matches)" % (rutils.bold(rule["meta"]["name"]), count)
+            capability = "%s (%d matches)" % (rutils.bold(rule["meta"]["name"]), count)
-        rows.append((capability, rule["meta"]["namespace"]))
+        rows.append((capability, rule["meta"]["namespace"]))
-
+
-    ostream.write(tabulate.tabulate(rows, headers=[width("CAPABILITY", 40), width("NAMESPACE", 40)], tablefmt="psql"))
+    ostream.write(tabulate.tabulate(rows, headers=[width("CAPABILITY", 40), width("NAMESPACE", 40)], tablefmt="psql"))
-    ostream.write("\n")
+    ostream.write("\n")
-
+
-
+
-def render_attack(doc, ostream):
+def render_attack(doc, ostream):
-    """
+    """
-    example::
+    example::
-
+
-        +------------------------+----------------------------------------------------------------------+
+        +------------------------+----------------------------------------------------------------------+
-        | ATT&CK Tactic          | ATT&CK Technique                                                     |
+        | ATT&CK Tactic          | ATT&CK Technique                                                     |
-        |------------------------+----------------------------------------------------------------------|
+        |------------------------+----------------------------------------------------------------------|
-        | DEFENSE EVASION        | Obfuscated Files or Information [T1027]                              |
+        | DEFENSE EVASION        | Obfuscated Files or Information [T1027]                              |
-        | DISCOVERY              | Query Registry [T1012]                                               |
+        | DISCOVERY              | Query Registry [T1012]                                               |
-        |                        | System Information Discovery [T1082]                                 |
+        |                        | System Information Discovery [T1082]                                 |
-        | EXECUTION              | Command and Scripting Interpreter::Windows Command Shell [T1059.003] |
+        | EXECUTION              | Command and Scripting Interpreter::Windows Command Shell [T1059.003] |
-        |                        | Shared Modules [T1129]                                               |
+        |                        | Shared Modules [T1129]                                               |
-        | EXFILTRATION           | Exfiltration Over C2 Channel [T1041]                                 |
+        | EXFILTRATION           | Exfiltration Over C2 Channel [T1041]                                 |
-        | PERSISTENCE            | Create or Modify System Process::Windows Service [T1543.003]         |
+        | PERSISTENCE            | Create or Modify System Process::Windows Service [T1543.003]         |
-        +------------------------+----------------------------------------------------------------------+
+        +------------------------+----------------------------------------------------------------------+
-    """
+    """
-    tactics = collections.defaultdict(set)
+    tactics = collections.defaultdict(set)
-    for rule in rutils.capability_rules(doc):
+    for rule in rutils.capability_rules(doc):
-        if not rule["meta"].get("att&ck"):
+        if not rule["meta"].get("att&ck"):
-            continue
+            continue
-
+
-        for attack in rule["meta"]["att&ck"]:
+        for attack in rule["meta"]["att&ck"]:
-            tactic, _, rest = attack.partition("::")
+            tactic, _, rest = attack.partition("::")
-            if "::" in rest:
+            if "::" in rest:
-                technique, _, rest = rest.partition("::")
+                technique, _, rest = rest.partition("::")
-                subtechnique, _, id = rest.rpartition(" ")
+                subtechnique, _, id = rest.rpartition(" ")
-                tactics[tactic].add((technique, subtechnique, id))
+                tactics[tactic].add((technique, subtechnique, id))
-            else:
+            else:
-                technique, _, id = rest.rpartition(" ")
+                technique, _, id = rest.rpartition(" ")
-                tactics[tactic].add((technique, id))
+                tactics[tactic].add((technique, id))
-
+
-    rows = []
+    rows = []
-    for tactic, techniques in sorted(tactics.items()):
+    for tactic, techniques in sorted(tactics.items()):
-        inner_rows = []
+        inner_rows = []
-        for spec in sorted(techniques):
+        for spec in sorted(techniques):
-            if len(spec) == 2:
+            if len(spec) == 2:
-                technique, id = spec
+                technique, id = spec
-                inner_rows.append("%s %s" % (rutils.bold(technique), id))
+                inner_rows.append("%s %s" % (rutils.bold(technique), id))
-            elif len(spec) == 3:
+            elif len(spec) == 3:
-                technique, subtechnique, id = spec
+                technique, subtechnique, id = spec
-                inner_rows.append("%s::%s %s" % (rutils.bold(technique), subtechnique, id))
+                inner_rows.append("%s::%s %s" % (rutils.bold(technique), subtechnique, id))
-            else:
+            else:
-                raise RuntimeError("unexpected ATT&CK spec format")
+                raise RuntimeError("unexpected ATT&CK spec format")
-        rows.append((rutils.bold(tactic.upper()), "\n".join(inner_rows),))
+        rows.append((rutils.bold(tactic.upper()), "\n".join(inner_rows),))
-    ostream.write(
+    ostream.write(
-        tabulate.tabulate(rows, headers=[width("ATT&CK Tactic", 20), width("ATT&CK Technique", 60)], tablefmt="psql")
+        tabulate.tabulate(rows, headers=[width("ATT&CK Tactic", 20), width("ATT&CK Technique", 60)], tablefmt="psql")
-    )
+    )
-    ostream.write("\n")
+    ostream.write("\n")
-
+
-
+
-def render_default(doc):
+def render_default(doc):
-    ostream = six.StringIO()
+    ostream = six.StringIO()
-
+
-    render_attack(doc, ostream)
+    render_attack(doc, ostream)
-    ostream.write("\n")
+    ostream.write("\n")
-    render_capabilities(doc, ostream)
+    render_capabilities(doc, ostream)
-
+
-    return ostream.getvalue()
+    return ostream.getvalue()
--- a/capa/render/utils.py
+++ b/capa/render/utils.py
@@ -1,44 +1,44 @@
-import six
+import six
-import termcolor
+import termcolor
-
+
-
+
-def bold(s):
+def bold(s):
-    """draw attention to the given string"""
+    """draw attention to the given string"""
-    return termcolor.colored(s, "blue")
+    return termcolor.colored(s, "blue")
-
+
-
+
-def bold2(s):
+def bold2(s):
-    """draw attention to the given string, within a `bold` section"""
+    """draw attention to the given string, within a `bold` section"""
-    return termcolor.colored(s, "green")
+    return termcolor.colored(s, "green")
-
+
-
+
-def hex(n):
+def hex(n):
-    """render the given number using upper case hex, like: 0x123ABC"""
+    """render the given number using upper case hex, like: 0x123ABC"""
-    return "0x%X" % n
+    return "0x%X" % n
-
+
-
+
-def capability_rules(doc):
+def capability_rules(doc):
-    """enumerate the rules in (namespace, name) order that are 'capability' rules (not lib/subscope/disposition/etc)."""
+    """enumerate the rules in (namespace, name) order that are 'capability' rules (not lib/subscope/disposition/etc)."""
-    for (_, _, rule) in sorted(
+    for (_, _, rule) in sorted(
-        map(lambda rule: (rule["meta"].get("namespace", ""), rule["meta"]["name"], rule), doc.values())
+        map(lambda rule: (rule["meta"].get("namespace", ""), rule["meta"]["name"], rule), doc.values())
-    ):
+    ):
-        if rule["meta"].get("lib"):
+        if rule["meta"].get("lib"):
-            continue
+            continue
-        if rule["meta"].get("capa/subscope"):
+        if rule["meta"].get("capa/subscope"):
-            continue
+            continue
-        if rule["meta"].get("maec/analysis-conclusion"):
+        if rule["meta"].get("maec/analysis-conclusion"):
-            continue
+            continue
-        if rule["meta"].get("maec/analysis-conclusion-ov"):
+        if rule["meta"].get("maec/analysis-conclusion-ov"):
-            continue
+            continue
-        if rule["meta"].get("maec/malware-category"):
+        if rule["meta"].get("maec/malware-category"):
-            continue
+            continue
-        if rule["meta"].get("maec/malware-category-ov"):
+        if rule["meta"].get("maec/malware-category-ov"):
-            continue
+            continue
-
+
-        yield rule
+        yield rule
-
+
-
+
-class StringIO(six.StringIO):
+class StringIO(six.StringIO):
-    def writeln(self, s):
+    def writeln(self, s):
-        self.write(s)
+        self.write(s)
-        self.write("\n")
+        self.write("\n")
--- a/capa/render/verbose.py
+++ b/capa/render/verbose.py
@@ -1,52 +1,52 @@
-"""
+"""
-example::
+example::
-
+
-    send data
+    send data
-    namespace    communication
+    namespace    communication
-    author       william.ballenthin@fireeye.com
+    author       william.ballenthin@fireeye.com
-    description  all known techniques for sending data to a potential C2 server
+    description  all known techniques for sending data to a potential C2 server
-    scope        function
+    scope        function
-    examples     BFB9B5391A13D0AFD787E87AB90F14F5:0x13145D60
+    examples     BFB9B5391A13D0AFD787E87AB90F14F5:0x13145D60
-    matches      0x10004363
+    matches      0x10004363
-                 0x100046c9
+                 0x100046c9
-                 0x1000454e
+                 0x1000454e
-                 0x10003a13
+                 0x10003a13
-                 0x10003415
+                 0x10003415
-                 0x10003797
+                 0x10003797
-"""
+"""
-import tabulate
+import tabulate
-
+
-import capa.rules
+import capa.rules
-import capa.render.utils as rutils
+import capa.render.utils as rutils
-
+
-
+
-def render_verbose(doc):
+def render_verbose(doc):
-    ostream = rutils.StringIO()
+    ostream = rutils.StringIO()
-
+
-    for rule in rutils.capability_rules(doc):
+    for rule in rutils.capability_rules(doc):
-        count = len(rule["matches"])
+        count = len(rule["matches"])
-        if count == 1:
+        if count == 1:
-            capability = rutils.bold(rule["meta"]["name"])
+            capability = rutils.bold(rule["meta"]["name"])
-        else:
+        else:
-            capability = "%s (%d matches)" % (rutils.bold(rule["meta"]["name"]), count)
+            capability = "%s (%d matches)" % (rutils.bold(rule["meta"]["name"]), count)
-
+
-        ostream.writeln(capability)
+        ostream.writeln(capability)
-
+
-        rows = []
+        rows = []
-        for key in ("namespace", "description", "scope"):
+        for key in ("namespace", "description", "scope"):
-            if key == "name" or key not in rule["meta"]:
+            if key == "name" or key not in rule["meta"]:
-                continue
+                continue
-
+
-            v = rule["meta"][key]
+            v = rule["meta"][key]
-            if isinstance(v, list) and len(v) == 1:
+            if isinstance(v, list) and len(v) == 1:
-                v = v[0]
+                v = v[0]
-            rows.append((key, v))
+            rows.append((key, v))
-
+
-        if rule["meta"]["scope"] != capa.rules.FILE_SCOPE:
+        if rule["meta"]["scope"] != capa.rules.FILE_SCOPE:
-            locations = doc[rule["meta"]["name"]]["matches"].keys()
+            locations = doc[rule["meta"]["name"]]["matches"].keys()
-            rows.append(("matches", "\n".join(map(rutils.hex, locations))))
+            rows.append(("matches", "\n".join(map(rutils.hex, locations))))
-
+
-        ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
+        ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
-        ostream.write("\n")
+        ostream.write("\n")
-
+
-    return ostream.getvalue()
+    return ostream.getvalue()
--- a/capa/render/vverbose.py
+++ b/capa/render/vverbose.py
@@ -1,185 +1,185 @@
-import tabulate
+import tabulate
-
+
-import capa.rules
+import capa.rules
-import capa.render.utils as rutils
+import capa.render.utils as rutils
-
+
-
+
-def render_locations(ostream, match):
+def render_locations(ostream, match):
-    # its possible to have an empty locations array here,
+    # its possible to have an empty locations array here,
-    # such as when we're in MODE_FAILURE and showing the logic
+    # such as when we're in MODE_FAILURE and showing the logic
-    # under a `not` statement (which will have no matched locations).
+    # under a `not` statement (which will have no matched locations).
-    locations = list(sorted(match.get("locations", [])))
+    locations = list(sorted(match.get("locations", [])))
-    if len(locations) == 1:
+    if len(locations) == 1:
-        ostream.write(" @ ")
+        ostream.write(" @ ")
-        ostream.write(rutils.hex(locations[0]))
+        ostream.write(rutils.hex(locations[0]))
-    elif len(locations) > 1:
+    elif len(locations) > 1:
-        ostream.write(" @ ")
+        ostream.write(" @ ")
-        if len(locations) > 4:
+        if len(locations) > 4:
-            # don't display too many locations, because it becomes very noisy.
+            # don't display too many locations, because it becomes very noisy.
-            # probably only the first handful of locations will be useful for inspection.
+            # probably only the first handful of locations will be useful for inspection.
-            ostream.write(", ".join(map(rutils.hex, locations[0:4])))
+            ostream.write(", ".join(map(rutils.hex, locations[0:4])))
-            ostream.write(", and %d more..." % (len(locations) - 4))
+            ostream.write(", and %d more..." % (len(locations) - 4))
-        else:
+        else:
-            ostream.write(", ".join(map(rutils.hex, locations)))
+            ostream.write(", ".join(map(rutils.hex, locations)))
-
+
-
+
-def render_statement(ostream, match, statement, indent=0):
+def render_statement(ostream, match, statement, indent=0):
-    ostream.write("  " * indent)
+    ostream.write("  " * indent)
-    if statement["type"] in ("and", "or", "optional"):
+    if statement["type"] in ("and", "or", "optional"):
-        ostream.write(statement["type"])
+        ostream.write(statement["type"])
-        ostream.writeln(":")
+        ostream.writeln(":")
-    elif statement["type"] == "not":
+    elif statement["type"] == "not":
-        # this statement is handled specially in `render_match` using the MODE_SUCCESS/MODE_FAILURE flags.
+        # this statement is handled specially in `render_match` using the MODE_SUCCESS/MODE_FAILURE flags.
-        ostream.writeln("not:")
+        ostream.writeln("not:")
-    elif statement["type"] == "some":
+    elif statement["type"] == "some":
-        ostream.write(statement["count"] + " or more")
+        ostream.write(statement["count"] + " or more")
-        ostream.writeln(":")
+        ostream.writeln(":")
-    elif statement["type"] == "range":
+    elif statement["type"] == "range":
-        # `range` is a weird node, its almost a hybrid of statement+feature.
+        # `range` is a weird node, its almost a hybrid of statement+feature.
-        # it is a specific feature repeated multiple times.
+        # it is a specific feature repeated multiple times.
-        # there's no additional logic in the feature part, just the existence of a feature.
+        # there's no additional logic in the feature part, just the existence of a feature.
-        # so, we have to inline some of the feature rendering here.
+        # so, we have to inline some of the feature rendering here.
-
+
-        child = statement["child"]
+        child = statement["child"]
-        value = rutils.bold2(child[child["type"]])
+        value = rutils.bold2(child[child["type"]])
-
+
-        if child.get("description"):
+        if child.get("description"):
-            ostream.write("count(%s(%s = %s)): " % (child["type"], value, child["description"]))
+            ostream.write("count(%s(%s = %s)): " % (child["type"], value, child["description"]))
-        else:
+        else:
-            ostream.write("count(%s(%s)): " % (child["type"], value))
+            ostream.write("count(%s(%s)): " % (child["type"], value))
-
+
-        if statement["max"] == statement["min"]:
+        if statement["max"] == statement["min"]:
-            ostream.write("%d" % (statement["min"]))
+            ostream.write("%d" % (statement["min"]))
-        elif statement["min"] == 0:
+        elif statement["min"] == 0:
-            ostream.write("%d or fewer" % (statement["max"]))
+            ostream.write("%d or fewer" % (statement["max"]))
-        elif statement["max"] == (1 << 64 - 1):
+        elif statement["max"] == (1 << 64 - 1):
-            ostream.write("%d or more" % (statement["min"]))
+            ostream.write("%d or more" % (statement["min"]))
-        else:
+        else:
-            ostream.write("between %d and %d" % (statement["min"], statement["max"]))
+            ostream.write("between %d and %d" % (statement["min"], statement["max"]))
-
+
-        render_locations(ostream, match)
+        render_locations(ostream, match)
-        ostream.write("\n")
+        ostream.write("\n")
-    elif statement["type"] == "subscope":
+    elif statement["type"] == "subscope":
-        ostream.write(statement["subscope"])
+        ostream.write(statement["subscope"])
-        ostream.writeln(":")
+        ostream.writeln(":")
-    elif statement["type"] == "regex":
+    elif statement["type"] == "regex":
-        # regex is a `Statement` not a `Feature`
+        # regex is a `Statement` not a `Feature`
-        # this is because it doesn't get extracted, but applies to all strings in scope.
+        # this is because it doesn't get extracted, but applies to all strings in scope.
-        # so we have to handle it here
+        # so we have to handle it here
-        ostream.writeln("string: %s" % (statement["match"]))
+        ostream.writeln("string: %s" % (statement["match"]))
-    else:
+    else:
-        raise RuntimeError("unexpected match statement type: " + str(statement))
+        raise RuntimeError("unexpected match statement type: " + str(statement))
-
+
-
+
-def render_feature(ostream, match, feature, indent=0):
+def render_feature(ostream, match, feature, indent=0):
-    ostream.write("  " * indent)
+    ostream.write("  " * indent)
-
+
-    ostream.write(feature["type"])
+    ostream.write(feature["type"])
-    ostream.write(": ")
+    ostream.write(": ")
-    ostream.write(rutils.bold2(feature[feature["type"]]))
+    ostream.write(rutils.bold2(feature[feature["type"]]))
-
+
-    if "description" in feature:
+    if "description" in feature:
-        ostream.write(" = ")
+        ostream.write(" = ")
-        ostream.write(feature["description"])
+        ostream.write(feature["description"])
-
+
-    render_locations(ostream, match)
+    render_locations(ostream, match)
-    ostream.write("\n")
+    ostream.write("\n")
-
+
-
+
-def render_node(ostream, match, node, indent=0):
+def render_node(ostream, match, node, indent=0):
-    if node["type"] == "statement":
+    if node["type"] == "statement":
-        render_statement(ostream, match, node["statement"], indent=indent)
+        render_statement(ostream, match, node["statement"], indent=indent)
-    elif node["type"] == "feature":
+    elif node["type"] == "feature":
-        render_feature(ostream, match, node["feature"], indent=indent)
+        render_feature(ostream, match, node["feature"], indent=indent)
-    else:
+    else:
-        raise RuntimeError("unexpected node type: " + str(node))
+        raise RuntimeError("unexpected node type: " + str(node))
-
+
-
+
-# display nodes that successfully evaluated against the sample.
+# display nodes that successfully evaluated against the sample.
-MODE_SUCCESS = "success"
+MODE_SUCCESS = "success"
-
+
-# display nodes that did not evaluate to True against the sample.
+# display nodes that did not evaluate to True against the sample.
-# this is useful when rendering the logic tree under a `not` node.
+# this is useful when rendering the logic tree under a `not` node.
-MODE_FAILURE = "failure"
+MODE_FAILURE = "failure"
-
+
-
+
-def render_match(ostream, match, indent=0, mode=MODE_SUCCESS):
+def render_match(ostream, match, indent=0, mode=MODE_SUCCESS):
-    child_mode = mode
+    child_mode = mode
-    if mode == MODE_SUCCESS:
+    if mode == MODE_SUCCESS:
-        # display only nodes that evaluated successfully.
+        # display only nodes that evaluated successfully.
-        if not match["success"]:
+        if not match["success"]:
-            return
+            return
-        # optional statement with no successful children is empty
+        # optional statement with no successful children is empty
-        if match["node"].get("statement", {}).get("type") == "optional" and not any(
+        if match["node"].get("statement", {}).get("type") == "optional" and not any(
-            map(lambda m: m["success"], match["children"])
+            map(lambda m: m["success"], match["children"])
-        ):
+        ):
-            return
+            return
-        # not statement, so invert the child mode to show failed evaluations
+        # not statement, so invert the child mode to show failed evaluations
-        if match["node"].get("statement", {}).get("type") == "not":
+        if match["node"].get("statement", {}).get("type") == "not":
-            child_mode = MODE_FAILURE
+            child_mode = MODE_FAILURE
-    elif mode == MODE_FAILURE:
+    elif mode == MODE_FAILURE:
-        # display only nodes that did not evaluate to True
+        # display only nodes that did not evaluate to True
-        if match["success"]:
+        if match["success"]:
-            return
+            return
-        # optional statement with successful children is not relevant
+        # optional statement with successful children is not relevant
-        if match["node"].get("statement", {}).get("type") == "optional" and any(
+        if match["node"].get("statement", {}).get("type") == "optional" and any(
-            map(lambda m: m["success"], match["children"])
+            map(lambda m: m["success"], match["children"])
-        ):
+        ):
-            return
+            return
-        # not statement, so invert the child mode to show successful evaluations
+        # not statement, so invert the child mode to show successful evaluations
-        if match["node"].get("statement", {}).get("type") == "not":
+        if match["node"].get("statement", {}).get("type") == "not":
-            child_mode = MODE_SUCCESS
+            child_mode = MODE_SUCCESS
-    else:
+    else:
-        raise RuntimeError("unexpected mode: " + mode)
+        raise RuntimeError("unexpected mode: " + mode)
-
+
-    render_node(ostream, match, match["node"], indent=indent)
+    render_node(ostream, match, match["node"], indent=indent)
-
+
-    for child in match["children"]:
+    for child in match["children"]:
-        render_match(ostream, child, indent=indent + 1, mode=child_mode)
+        render_match(ostream, child, indent=indent + 1, mode=child_mode)
-
+
-
+
-def render_vverbose(doc):
+def render_vverbose(doc):
-    ostream = rutils.StringIO()
+    ostream = rutils.StringIO()
-
+
-    for rule in rutils.capability_rules(doc):
+    for rule in rutils.capability_rules(doc):
-        count = len(rule["matches"])
+        count = len(rule["matches"])
-        if count == 1:
+        if count == 1:
-            capability = rutils.bold(rule["meta"]["name"])
+            capability = rutils.bold(rule["meta"]["name"])
-        else:
+        else:
-            capability = "%s (%d matches)" % (rutils.bold(rule["meta"]["name"]), count)
+            capability = "%s (%d matches)" % (rutils.bold(rule["meta"]["name"]), count)
-
+
-        ostream.writeln(capability)
+        ostream.writeln(capability)
-
+
-        rows = []
+        rows = []
-        for key in capa.rules.META_KEYS:
+        for key in capa.rules.META_KEYS:
-            if key == "name" or key not in rule["meta"]:
+            if key == "name" or key not in rule["meta"]:
-                continue
+                continue
-
+
-            v = rule["meta"][key]
+            v = rule["meta"][key]
-            if isinstance(v, list) and len(v) == 1:
+            if isinstance(v, list) and len(v) == 1:
-                v = v[0]
+                v = v[0]
-            elif isinstance(v, list) and len(v) > 1:
+            elif isinstance(v, list) and len(v) > 1:
-                v = ", ".join(v)
+                v = ", ".join(v)
-            rows.append((key, v))
+            rows.append((key, v))
-
+
-        ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
+        ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
-
+
-        if rule["meta"]["scope"] == capa.rules.FILE_SCOPE:
+        if rule["meta"]["scope"] == capa.rules.FILE_SCOPE:
-            matches = list(doc[rule["meta"]["name"]]["matches"].values())
+            matches = list(doc[rule["meta"]["name"]]["matches"].values())
-            if len(matches) != 1:
+            if len(matches) != 1:
-                # i think there should only ever be one match per file-scope rule,
+                # i think there should only ever be one match per file-scope rule,
-                # because we do the file-scope evaluation a single time.
+                # because we do the file-scope evaluation a single time.
-                # but i'm not 100% sure if this is/will always be true.
+                # but i'm not 100% sure if this is/will always be true.
-                # so, lets be explicit about our assumptions and raise an exception if they fail.
+                # so, lets be explicit about our assumptions and raise an exception if they fail.
-                raise RuntimeError("unexpected file scope match count: " + len(matches))
+                raise RuntimeError("unexpected file scope match count: " + len(matches))
-            render_match(ostream, matches[0], indent=0)
+            render_match(ostream, matches[0], indent=0)
-        else:
+        else:
-            for location, match in sorted(doc[rule["meta"]["name"]]["matches"].items()):
+            for location, match in sorted(doc[rule["meta"]["name"]]["matches"].items()):
-                ostream.write(rule["meta"]["scope"])
+                ostream.write(rule["meta"]["scope"])
-                ostream.write(" @ ")
+                ostream.write(" @ ")
-                ostream.writeln(rutils.hex(location))
+                ostream.writeln(rutils.hex(location))
-                render_match(ostream, match, indent=1)
+                render_match(ostream, match, indent=1)
-
+
-        ostream.write("\n")
+        ostream.write("\n")
-
+
-    return ostream.getvalue()
+    return ostream.getvalue()
--- a/ci/hooks/hook-vivisect.py
+++ b/ci/hooks/hook-vivisect.py
@@ -1,13 +1,13 @@
-from PyInstaller.utils.hooks import copy_metadata
+from PyInstaller.utils.hooks import copy_metadata
-
+
-# in order for viv-utils to use pkg_resources to fetch
+# in order for viv-utils to use pkg_resources to fetch
-# the installed version of vivisect,
+# the installed version of vivisect,
-# we need to instruct pyinstaller to embed this metadata.
+# we need to instruct pyinstaller to embed this metadata.
-#
+#
-# so we set the pyinstaller.spec/hookspath to reference
+# so we set the pyinstaller.spec/hookspath to reference
-#  the directory with this hook.
+#  the directory with this hook.
-#
+#
-# this hook runs at analysis time and updates the embedded metadata.
+# this hook runs at analysis time and updates the embedded metadata.
-#
+#
-# ref: https://github.com/pyinstaller/pyinstaller/issues/1713#issuecomment-162682084
+# ref: https://github.com/pyinstaller/pyinstaller/issues/1713#issuecomment-162682084
-datas = copy_metadata("vivisect")
+datas = copy_metadata("vivisect")
--- a/scripts/migrate-rules.py
+++ b/scripts/migrate-rules.py
@@ -1,160 +1,160 @@
-#!/usr/bin/env python
+#!/usr/bin/env python
-"""
+"""
-migrate rules and their namespaces.
+migrate rules and their namespaces.
-
+
-example:
+example:
-
+
-    $ python scripts/migrate-rules.py migration.csv ./rules ./new-rules
+    $ python scripts/migrate-rules.py migration.csv ./rules ./new-rules
-"""
+"""
-import os
+import os
-import csv
+import csv
-import sys
+import sys
-import logging
+import logging
-import os.path
+import os.path
-import collections
+import collections
-
+
-import argparse
+import argparse
-
+
-import capa.rules
+import capa.rules
-
+
-logger = logging.getLogger("migrate-rules")
+logger = logging.getLogger("migrate-rules")
-
+
-
+
-def read_plan(plan_path):
+def read_plan(plan_path):
-    with open(plan_path, "rb") as f:
+    with open(plan_path, "rb") as f:
-        return list(
+        return list(
-            csv.DictReader(
+            csv.DictReader(
-                f,
+                f,
-                restkey="other",
+                restkey="other",
-                fieldnames=(
+                fieldnames=(
-                    "existing path",
+                    "existing path",
-                    "existing name",
+                    "existing name",
-                    "existing rule-category",
+                    "existing rule-category",
-                    "proposed name",
+                    "proposed name",
-                    "proposed namespace",
+                    "proposed namespace",
-                    "ATT&CK",
+                    "ATT&CK",
-                    "MBC",
+                    "MBC",
-                    "comment1",
+                    "comment1",
-                ),
+                ),
-            )
+            )
-        )
+        )
-
+
-
+
-def read_rules(rule_directory):
+def read_rules(rule_directory):
-    rules = {}
+    rules = {}
-    for root, dirs, files in os.walk(rule_directory):
+    for root, dirs, files in os.walk(rule_directory):
-        for file in files:
+        for file in files:
-            path = os.path.join(root, file)
+            path = os.path.join(root, file)
-            if not path.endswith(".yml"):
+            if not path.endswith(".yml"):
-                logger.info("skipping file: %s", path)
+                logger.info("skipping file: %s", path)
-                continue
+                continue
-
+
-            rule = capa.rules.Rule.from_yaml_file(path)
+            rule = capa.rules.Rule.from_yaml_file(path)
-            rules[rule.name] = rule
+            rules[rule.name] = rule
-
+
-            if "nursery" in path:
+            if "nursery" in path:
-                rule.meta["capa/nursery"] = True
+                rule.meta["capa/nursery"] = True
-    return rules
+    return rules
-
+
-
+
-def main(argv=None):
+def main(argv=None):
-    if argv is None:
+    if argv is None:
-        argv = sys.argv[1:]
+        argv = sys.argv[1:]
-
+
-    parser = argparse.ArgumentParser(description="migrate rules.")
+    parser = argparse.ArgumentParser(description="migrate rules.")
-    parser.add_argument("plan", type=str, help="Path to CSV describing migration")
+    parser.add_argument("plan", type=str, help="Path to CSV describing migration")
-    parser.add_argument("source", type=str, help="Source directory of rules")
+    parser.add_argument("source", type=str, help="Source directory of rules")
-    parser.add_argument("destination", type=str, help="Destination directory of rules")
+    parser.add_argument("destination", type=str, help="Destination directory of rules")
-    args = parser.parse_args(args=argv)
+    args = parser.parse_args(args=argv)
-
+
-    logging.basicConfig(level=logging.INFO)
+    logging.basicConfig(level=logging.INFO)
-    logging.getLogger().setLevel(logging.INFO)
+    logging.getLogger().setLevel(logging.INFO)
-
+
-    plan = read_plan(args.plan)
+    plan = read_plan(args.plan)
-    logger.info("read %d plan entries", len(plan))
+    logger.info("read %d plan entries", len(plan))
-
+
-    rules = read_rules(args.source)
+    rules = read_rules(args.source)
-    logger.info("read %d rules", len(rules))
+    logger.info("read %d rules", len(rules))
-
+
-    planned_rules = set([row["existing name"] for row in plan])
+    planned_rules = set([row["existing name"] for row in plan])
-    unplanned_rules = [rule for (name, rule) in rules.items() if name not in planned_rules]
+    unplanned_rules = [rule for (name, rule) in rules.items() if name not in planned_rules]
-
+
-    if unplanned_rules:
+    if unplanned_rules:
-        logger.error("plan does not account for %d rules:" % (len(unplanned_rules)))
+        logger.error("plan does not account for %d rules:" % (len(unplanned_rules)))
-        for rule in unplanned_rules:
+        for rule in unplanned_rules:
-            logger.error("  " + rule.name)
+            logger.error("  " + rule.name)
-        return -1
+        return -1
-
+
-    # pairs of strings (needle, replacement)
+    # pairs of strings (needle, replacement)
-    match_translations = []
+    match_translations = []
-
+
-    for row in plan:
+    for row in plan:
-        if not row["existing name"]:
+        if not row["existing name"]:
-            continue
+            continue
-
+
-        rule = rules[row["existing name"]]
+        rule = rules[row["existing name"]]
-
+
-        if rule.meta["name"] != row["proposed name"]:
+        if rule.meta["name"] != row["proposed name"]:
-            logger.info("renaming rule '%s' -> '%s'", rule.meta["name"], row["proposed name"])
+            logger.info("renaming rule '%s' -> '%s'", rule.meta["name"], row["proposed name"])
-
+
-            # assume the yaml is formatted like `- match: $rule-name`.
+            # assume the yaml is formatted like `- match: $rule-name`.
-            # but since its been linted, this should be ok.
+            # but since its been linted, this should be ok.
-            match_translations.append(("- match: " + rule.meta["name"], "- match: " + row["proposed name"]))
+            match_translations.append(("- match: " + rule.meta["name"], "- match: " + row["proposed name"]))
-
+
-            rule.meta["name"] = row["proposed name"]
+            rule.meta["name"] = row["proposed name"]
-            rule.name = row["proposed name"]
+            rule.name = row["proposed name"]
-
+
-        if "rule-category" in rule.meta:
+        if "rule-category" in rule.meta:
-            logger.info("deleting rule category '%s'", rule.meta["rule-category"])
+            logger.info("deleting rule category '%s'", rule.meta["rule-category"])
-            del rule.meta["rule-category"]
+            del rule.meta["rule-category"]
-
+
-        rule.meta["namespace"] = row["proposed namespace"]
+        rule.meta["namespace"] = row["proposed namespace"]
-
+
-        if row["ATT&CK"] != "n/a" and row["ATT&CK"] != "":
+        if row["ATT&CK"] != "n/a" and row["ATT&CK"] != "":
-            tag = row["ATT&CK"]
+            tag = row["ATT&CK"]
-            name, _, id = tag.rpartition(" ")
+            name, _, id = tag.rpartition(" ")
-            tag = "%s [%s]" % (name, id)
+            tag = "%s [%s]" % (name, id)
-            rule.meta["att&ck"] = [tag]
+            rule.meta["att&ck"] = [tag]
-
+
-        if row["MBC"] != "n/a" and row["MBC"] != "":
+        if row["MBC"] != "n/a" and row["MBC"] != "":
-            tag = row["MBC"]
+            tag = row["MBC"]
-            rule.meta["mbc"] = [tag]
+            rule.meta["mbc"] = [tag]
-
+
-    for rule in rules.values():
+    for rule in rules.values():
-        filename = rule.name
+        filename = rule.name
-        filename = filename.lower()
+        filename = filename.lower()
-        filename = filename.replace(" ", "-")
+        filename = filename.replace(" ", "-")
-        filename = filename.replace("(", "")
+        filename = filename.replace("(", "")
-        filename = filename.replace(")", "")
+        filename = filename.replace(")", "")
-        filename = filename.replace("+", "")
+        filename = filename.replace("+", "")
-        filename = filename.replace("/", "")
+        filename = filename.replace("/", "")
-        filename = filename + ".yml"
+        filename = filename + ".yml"
-
+
-        try:
+        try:
-            if rule.meta.get("capa/nursery"):
+            if rule.meta.get("capa/nursery"):
-                directory = os.path.join(args.destination, "nursery")
+                directory = os.path.join(args.destination, "nursery")
-            elif rule.meta.get("lib"):
+            elif rule.meta.get("lib"):
-                directory = os.path.join(args.destination, "lib")
+                directory = os.path.join(args.destination, "lib")
-            else:
+            else:
-                directory = os.path.join(args.destination, rule.meta.get("namespace"))
+                directory = os.path.join(args.destination, rule.meta.get("namespace"))
-            os.makedirs(directory)
+            os.makedirs(directory)
-        except OSError:
+        except OSError:
-            pass
+            pass
-        else:
+        else:
-            logger.info("created namespace: %s", directory)
+            logger.info("created namespace: %s", directory)
-
+
-        path = os.path.join(directory, filename)
+        path = os.path.join(directory, filename)
-        logger.info("writing rule %s", path)
+        logger.info("writing rule %s", path)
-
+
-        doc = rule.to_yaml().decode("utf-8")
+        doc = rule.to_yaml().decode("utf-8")
-        for (needle, replacement) in match_translations:
+        for (needle, replacement) in match_translations:
-            doc = doc.replace(needle, replacement)
+            doc = doc.replace(needle, replacement)
-
+
-        with open(path, "wb") as f:
+        with open(path, "wb") as f:
-            f.write(doc.encode("utf-8"))
+            f.write(doc.encode("utf-8"))
-
+
-    return 0
+    return 0
-
+
-
+
-if __name__ == "__main__":
+if __name__ == "__main__":
-    sys.exit(main())
+    sys.exit(main())
--- a/tests/test_fmt.py
+++ b/tests/test_fmt.py
@@ -1,106 +1,106 @@
-import textwrap
+import textwrap
-
+
-import capa.rules
+import capa.rules
-
+
-EXPECTED = textwrap.dedent(
+EXPECTED = textwrap.dedent(
-    """\
+    """\
-    rule:
+    rule:
-      meta:
+      meta:
-        name: test rule
+        name: test rule
-        author: user@domain.com
+        author: user@domain.com
-        scope: function
+        scope: function
-        examples:
+        examples:
-          - foo1234
+          - foo1234
-          - bar5678
+          - bar5678
-      features:
+      features:
-        - and:
+        - and:
-          - number: 1
+          - number: 1
-          - number: 2
+          - number: 2
-    """
+    """
-)
+)
-
+
-
+
-def test_rule_reformat_top_level_elements():
+def test_rule_reformat_top_level_elements():
-    rule = textwrap.dedent(
+    rule = textwrap.dedent(
-        """
+        """
-        rule:
+        rule:
-          features:
+          features:
-            - and:
+            - and:
-              - number: 1
+              - number: 1
-              - number: 2
+              - number: 2
-          meta:
+          meta:
-            name: test rule
+            name: test rule
-            author: user@domain.com
+            author: user@domain.com
-            scope: function
+            scope: function
-            examples:
+            examples:
-              - foo1234
+              - foo1234
-              - bar5678
+              - bar5678
-        """
+        """
-    )
+    )
-
+
-    assert capa.rules.Rule.from_yaml(rule).to_yaml() == EXPECTED
+    assert capa.rules.Rule.from_yaml(rule).to_yaml() == EXPECTED
-
+
-
+
-def test_rule_reformat_indentation():
+def test_rule_reformat_indentation():
-    rule = textwrap.dedent(
+    rule = textwrap.dedent(
-        """
+        """
-        rule:
+        rule:
-         meta:
+         meta:
-            name: test rule
+            name: test rule
-            author: user@domain.com
+            author: user@domain.com
-            scope: function
+            scope: function
-            examples:
+            examples:
-            - foo1234
+            - foo1234
-            - bar5678
+            - bar5678
-         features:
+         features:
-                - and:
+                - and:
-                             - number: 1
+                             - number: 1
-                             - number: 2
+                             - number: 2
-        """
+        """
-    )
+    )
-
+
-    assert capa.rules.Rule.from_yaml(rule).to_yaml() == EXPECTED
+    assert capa.rules.Rule.from_yaml(rule).to_yaml() == EXPECTED
-
+
-
+
-def test_rule_reformat_order():
+def test_rule_reformat_order():
-    rule = textwrap.dedent(
+    rule = textwrap.dedent(
-        """
+        """
-        rule:
+        rule:
-          meta:
+          meta:
-            author: user@domain.com
+            author: user@domain.com
-            examples:
+            examples:
-              - foo1234
+              - foo1234
-              - bar5678
+              - bar5678
-            scope: function
+            scope: function
-            name: test rule
+            name: test rule
-          features:
+          features:
-            - and:
+            - and:
-              - number: 1
+              - number: 1
-              - number: 2
+              - number: 2
-        """
+        """
-    )
+    )
-
+
-    assert capa.rules.Rule.from_yaml(rule).to_yaml() == EXPECTED
+    assert capa.rules.Rule.from_yaml(rule).to_yaml() == EXPECTED
-
+
-
+
-def test_rule_reformat_meta_update():
+def test_rule_reformat_meta_update():
-    rule = textwrap.dedent(
+    rule = textwrap.dedent(
-        """
+        """
-        rule:
+        rule:
-          meta:
+          meta:
-            author: user@domain.com
+            author: user@domain.com
-            examples:
+            examples:
-              - foo1234
+              - foo1234
-              - bar5678
+              - bar5678
-            scope: function
+            scope: function
-            name: AAAA
+            name: AAAA
-          features:
+          features:
-            - and:
+            - and:
-              - number: 1
+              - number: 1
-              - number: 2
+              - number: 2
-        """
+        """
-    )
+    )
-
+
-    rule = capa.rules.Rule.from_yaml(rule)
+    rule = capa.rules.Rule.from_yaml(rule)
-    rule.name = "test rule"
+    rule.name = "test rule"
-    assert rule.to_yaml() == EXPECTED
+    assert rule.to_yaml() == EXPECTED