This commit is contained in:
William Ballenthin
2020-07-02 11:08:21 -06:00
parent 8f7cb6dad0
commit 612eefe2e8
9 changed files with 1074 additions and 1074 deletions

View File

@@ -1,149 +1,149 @@
import collections import collections
import envi import envi
import vivisect.const import vivisect.const
import envi.archs.i386.disasm import envi.archs.i386.disasm
import envi.archs.amd64.disasm import envi.archs.amd64.disasm
# pull out consts for lookup performance # pull out consts for lookup performance
i386RegOper = envi.archs.i386.disasm.i386RegOper i386RegOper = envi.archs.i386.disasm.i386RegOper
i386ImmOper = envi.archs.i386.disasm.i386ImmOper i386ImmOper = envi.archs.i386.disasm.i386ImmOper
i386ImmMemOper = envi.archs.i386.disasm.i386ImmMemOper i386ImmMemOper = envi.archs.i386.disasm.i386ImmMemOper
Amd64RipRelOper = envi.archs.amd64.disasm.Amd64RipRelOper Amd64RipRelOper = envi.archs.amd64.disasm.Amd64RipRelOper
LOC_OP = vivisect.const.LOC_OP LOC_OP = vivisect.const.LOC_OP
IF_NOFALL = envi.IF_NOFALL IF_NOFALL = envi.IF_NOFALL
REF_CODE = vivisect.const.REF_CODE REF_CODE = vivisect.const.REF_CODE
FAR_BRANCH_MASK = envi.BR_PROC | envi.BR_DEREF | envi.BR_ARCH FAR_BRANCH_MASK = envi.BR_PROC | envi.BR_DEREF | envi.BR_ARCH
DESTRUCTIVE_MNEMONICS = ("mov", "lea", "pop", "xor") DESTRUCTIVE_MNEMONICS = ("mov", "lea", "pop", "xor")
def get_previous_instructions(vw, va): def get_previous_instructions(vw, va):
""" """
collect the instructions that flow to the given address, local to the current function. collect the instructions that flow to the given address, local to the current function.
args: args:
vw (vivisect.Workspace) vw (vivisect.Workspace)
va (int): the virtual address to inspect va (int): the virtual address to inspect
returns: returns:
List[int]: the prior instructions, which may fallthrough and/or jump here List[int]: the prior instructions, which may fallthrough and/or jump here
""" """
ret = [] ret = []
# find the immediate prior instruction. # find the immediate prior instruction.
# ensure that it fallsthrough to this one. # ensure that it fallsthrough to this one.
loc = vw.getPrevLocation(va, adjacent=True) loc = vw.getPrevLocation(va, adjacent=True)
if loc is not None: if loc is not None:
# from vivisect.const: # from vivisect.const:
# location: (L_VA, L_SIZE, L_LTYPE, L_TINFO) # location: (L_VA, L_SIZE, L_LTYPE, L_TINFO)
(pva, _, ptype, pinfo) = vw.getPrevLocation(va, adjacent=True) (pva, _, ptype, pinfo) = vw.getPrevLocation(va, adjacent=True)
if ptype == LOC_OP and not (pinfo & IF_NOFALL): if ptype == LOC_OP and not (pinfo & IF_NOFALL):
ret.append(pva) ret.append(pva)
# find any code refs, e.g. jmp, to this location. # find any code refs, e.g. jmp, to this location.
# ignore any calls. # ignore any calls.
# #
# from vivisect.const: # from vivisect.const:
# xref: (XR_FROM, XR_TO, XR_RTYPE, XR_RFLAG) # xref: (XR_FROM, XR_TO, XR_RTYPE, XR_RFLAG)
for (xfrom, _, _, xflag) in vw.getXrefsTo(va, REF_CODE): for (xfrom, _, _, xflag) in vw.getXrefsTo(va, REF_CODE):
if (xflag & FAR_BRANCH_MASK) != 0: if (xflag & FAR_BRANCH_MASK) != 0:
continue continue
ret.append(xfrom) ret.append(xfrom)
return ret return ret
class NotFoundError(Exception): class NotFoundError(Exception):
pass pass
def find_definition(vw, va, reg): def find_definition(vw, va, reg):
""" """
scan backwards from the given address looking for assignments to the given register. scan backwards from the given address looking for assignments to the given register.
if a constant, return that value. if a constant, return that value.
args: args:
vw (vivisect.Workspace) vw (vivisect.Workspace)
va (int): the virtual address at which to start analysis va (int): the virtual address at which to start analysis
reg (int): the vivisect register to study reg (int): the vivisect register to study
returns: returns:
(va: int, value?: int|None): the address of the assignment and the value, if a constant. (va: int, value?: int|None): the address of the assignment and the value, if a constant.
raises: raises:
NotFoundError: when the definition cannot be found. NotFoundError: when the definition cannot be found.
""" """
q = collections.deque() q = collections.deque()
seen = set([]) seen = set([])
q.extend(get_previous_instructions(vw, va)) q.extend(get_previous_instructions(vw, va))
while q: while q:
cur = q.popleft() cur = q.popleft()
# skip if we've already processed this location # skip if we've already processed this location
if cur in seen: if cur in seen:
continue continue
seen.add(cur) seen.add(cur)
insn = vw.parseOpcode(cur) insn = vw.parseOpcode(cur)
if len(insn.opers) == 0: if len(insn.opers) == 0:
q.extend(get_previous_instructions(vw, cur)) q.extend(get_previous_instructions(vw, cur))
continue continue
opnd0 = insn.opers[0] opnd0 = insn.opers[0]
if not (isinstance(opnd0, i386RegOper) and opnd0.reg == reg and insn.mnem in DESTRUCTIVE_MNEMONICS): if not (isinstance(opnd0, i386RegOper) and opnd0.reg == reg and insn.mnem in DESTRUCTIVE_MNEMONICS):
q.extend(get_previous_instructions(vw, cur)) q.extend(get_previous_instructions(vw, cur))
continue continue
# if we reach here, the instruction is destructive to our target register. # if we reach here, the instruction is destructive to our target register.
# we currently only support extracting the constant from something like: `mov $reg, IAT` # we currently only support extracting the constant from something like: `mov $reg, IAT`
# so, any other pattern results in an unknown value, represented by None. # so, any other pattern results in an unknown value, represented by None.
# this is a good place to extend in the future, if we need more robust support. # this is a good place to extend in the future, if we need more robust support.
if insn.mnem != "mov": if insn.mnem != "mov":
return (cur, None) return (cur, None)
else: else:
opnd1 = insn.opers[1] opnd1 = insn.opers[1]
if isinstance(opnd1, i386ImmOper): if isinstance(opnd1, i386ImmOper):
return (cur, opnd1.getOperValue(opnd1)) return (cur, opnd1.getOperValue(opnd1))
elif isinstance(opnd1, i386ImmMemOper): elif isinstance(opnd1, i386ImmMemOper):
return (cur, opnd1.getOperAddr(opnd1)) return (cur, opnd1.getOperAddr(opnd1))
elif isinstance(opnd1, Amd64RipRelOper): elif isinstance(opnd1, Amd64RipRelOper):
return (cur, opnd1.getOperAddr(insn)) return (cur, opnd1.getOperAddr(insn))
else: else:
# might be something like: `mov $reg, dword_401000[eax]` # might be something like: `mov $reg, dword_401000[eax]`
return (cur, None) return (cur, None)
raise NotFoundError() raise NotFoundError()
def is_indirect_call(vw, va, insn=None): def is_indirect_call(vw, va, insn=None):
if insn is None: if insn is None:
insn = vw.parseOpcode(va) insn = vw.parseOpcode(va)
return insn.mnem == "call" and isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper) return insn.mnem == "call" and isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper)
def resolve_indirect_call(vw, va, insn=None): def resolve_indirect_call(vw, va, insn=None):
""" """
inspect the given indirect call instruction and attempt to resolve the target address. inspect the given indirect call instruction and attempt to resolve the target address.
args: args:
vw (vivisect.Workspace) vw (vivisect.Workspace)
va (int): the virtual address at which to start analysis va (int): the virtual address at which to start analysis
returns: returns:
(va: int, value?: int|None): the address of the assignment and the value, if a constant. (va: int, value?: int|None): the address of the assignment and the value, if a constant.
raises: raises:
NotFoundError: when the definition cannot be found. NotFoundError: when the definition cannot be found.
""" """
if insn is None: if insn is None:
insn = vw.parseOpcode(va) insn = vw.parseOpcode(va)
assert is_indirect_call(vw, va, insn=insn) assert is_indirect_call(vw, va, insn=insn)
return find_definition(vw, va, insn.opers[0].reg) return find_definition(vw, va, insn.opers[0].reg)

View File

@@ -1,266 +1,266 @@
import json import json
import six import six
import capa.rules import capa.rules
import capa.engine import capa.engine
def convert_statement_to_result_document(statement): def convert_statement_to_result_document(statement):
""" """
"statement": { "statement": {
"type": "or" "type": "or"
}, },
"statement": { "statement": {
"max": 9223372036854775808, "max": 9223372036854775808,
"min": 2, "min": 2,
"type": "range" "type": "range"
}, },
""" """
if isinstance(statement, capa.engine.And): if isinstance(statement, capa.engine.And):
return { return {
"type": "and", "type": "and",
} }
elif isinstance(statement, capa.engine.Or): elif isinstance(statement, capa.engine.Or):
return { return {
"type": "or", "type": "or",
} }
elif isinstance(statement, capa.engine.Not): elif isinstance(statement, capa.engine.Not):
return { return {
"type": "not", "type": "not",
} }
elif isinstance(statement, capa.engine.Some) and statement.count == 0: elif isinstance(statement, capa.engine.Some) and statement.count == 0:
return {"type": "optional"} return {"type": "optional"}
elif isinstance(statement, capa.engine.Some) and statement.count > 0: elif isinstance(statement, capa.engine.Some) and statement.count > 0:
return { return {
"type": "some", "type": "some",
"count": statement.count, "count": statement.count,
} }
elif isinstance(statement, capa.engine.Range): elif isinstance(statement, capa.engine.Range):
return { return {
"type": "range", "type": "range",
"min": statement.min, "min": statement.min,
"max": statement.max, "max": statement.max,
"child": convert_feature_to_result_document(statement.child), "child": convert_feature_to_result_document(statement.child),
} }
elif isinstance(statement, capa.engine.Regex): elif isinstance(statement, capa.engine.Regex):
return { return {
"type": "regex", "type": "regex",
"pattern": statement.pattern, "pattern": statement.pattern,
# the string that was matched # the string that was matched
"match": statement.match, "match": statement.match,
} }
elif isinstance(statement, capa.engine.Subscope): elif isinstance(statement, capa.engine.Subscope):
return { return {
"type": "subscope", "type": "subscope",
"subscope": statement.scope, "subscope": statement.scope,
} }
else: else:
raise RuntimeError("unexpected match statement type: " + str(statement)) raise RuntimeError("unexpected match statement type: " + str(statement))
def convert_feature_to_result_document(feature): def convert_feature_to_result_document(feature):
""" """
"feature": { "feature": {
"number": 6, "number": 6,
"type": "number" "type": "number"
}, },
"feature": { "feature": {
"api": "ws2_32.WSASocket", "api": "ws2_32.WSASocket",
"type": "api" "type": "api"
}, },
"feature": { "feature": {
"match": "create TCP socket", "match": "create TCP socket",
"type": "match" "type": "match"
}, },
"feature": { "feature": {
"characteristic": [ "characteristic": [
"loop", "loop",
true true
], ],
"type": "characteristic" "type": "characteristic"
}, },
""" """
result = {"type": feature.name, feature.name: feature.get_args_str()} result = {"type": feature.name, feature.name: feature.get_args_str()}
if feature.description: if feature.description:
result["description"] = feature.description result["description"] = feature.description
return result return result
def convert_node_to_result_document(node): def convert_node_to_result_document(node):
""" """
"node": { "node": {
"type": "statement", "type": "statement",
"statement": { ... } "statement": { ... }
}, },
"node": { "node": {
"type": "feature", "type": "feature",
"feature": { ... } "feature": { ... }
}, },
""" """
if isinstance(node, capa.engine.Statement): if isinstance(node, capa.engine.Statement):
return { return {
"type": "statement", "type": "statement",
"statement": convert_statement_to_result_document(node), "statement": convert_statement_to_result_document(node),
} }
elif isinstance(node, capa.features.Feature): elif isinstance(node, capa.features.Feature):
return { return {
"type": "feature", "type": "feature",
"feature": convert_feature_to_result_document(node), "feature": convert_feature_to_result_document(node),
} }
else: else:
raise RuntimeError("unexpected match node type") raise RuntimeError("unexpected match node type")
def convert_match_to_result_document(rules, capabilities, result): def convert_match_to_result_document(rules, capabilities, result):
""" """
convert the given Result instance into a common, Python-native data structure. convert the given Result instance into a common, Python-native data structure.
this will become part of the "result document" format that can be emitted to JSON. this will become part of the "result document" format that can be emitted to JSON.
""" """
doc = { doc = {
"success": bool(result.success), "success": bool(result.success),
"node": convert_node_to_result_document(result.statement), "node": convert_node_to_result_document(result.statement),
"children": [convert_match_to_result_document(rules, capabilities, child) for child in result.children], "children": [convert_match_to_result_document(rules, capabilities, child) for child in result.children],
} }
# logic expression, like `and`, don't have locations - their children do. # logic expression, like `and`, don't have locations - their children do.
# so only add `locations` to feature nodes. # so only add `locations` to feature nodes.
if isinstance(result.statement, capa.features.Feature): if isinstance(result.statement, capa.features.Feature):
if bool(result.success): if bool(result.success):
doc["locations"] = result.locations doc["locations"] = result.locations
elif isinstance(result.statement, capa.rules.Range): elif isinstance(result.statement, capa.rules.Range):
if bool(result.success): if bool(result.success):
doc["locations"] = result.locations doc["locations"] = result.locations
# if we have a `match` statement, then we're referencing another rule. # if we have a `match` statement, then we're referencing another rule.
# this could an external rule (written by a human), or # this could an external rule (written by a human), or
# rule generated to support a subscope (basic block, etc.) # rule generated to support a subscope (basic block, etc.)
# we still want to include the matching logic in this tree. # we still want to include the matching logic in this tree.
# #
# so, we need to lookup the other rule results # so, we need to lookup the other rule results
# and then filter those down to the address used here. # and then filter those down to the address used here.
# finally, splice that logic into this tree. # finally, splice that logic into this tree.
if ( if (
doc["node"]["type"] == "feature" doc["node"]["type"] == "feature"
and doc["node"]["feature"]["type"] == "match" and doc["node"]["feature"]["type"] == "match"
# only add subtree on success, # only add subtree on success,
# because there won't be results for the other rule on failure. # because there won't be results for the other rule on failure.
and doc["success"] and doc["success"]
): ):
rule_name = doc["node"]["feature"]["match"] rule_name = doc["node"]["feature"]["match"]
rule = rules[rule_name] rule = rules[rule_name]
rule_matches = {address: result for (address, result) in capabilities[rule_name]} rule_matches = {address: result for (address, result) in capabilities[rule_name]}
if rule.meta.get("capa/subscope-rule"): if rule.meta.get("capa/subscope-rule"):
# for a subscope rule, fixup the node to be a scope node, rather than a match feature node. # for a subscope rule, fixup the node to be a scope node, rather than a match feature node.
# #
# e.g. `contain loop/30c4c78e29bf4d54894fc74f664c62e8` -> `basic block` # e.g. `contain loop/30c4c78e29bf4d54894fc74f664c62e8` -> `basic block`
scope = rule.meta["scope"] scope = rule.meta["scope"]
doc["node"] = { doc["node"] = {
"type": "statement", "type": "statement",
"statement": {"type": "subscope", "subscope": scope,}, "statement": {"type": "subscope", "subscope": scope,},
} }
for location in doc["locations"]: for location in doc["locations"]:
doc["children"].append(convert_match_to_result_document(rules, capabilities, rule_matches[location])) doc["children"].append(convert_match_to_result_document(rules, capabilities, rule_matches[location]))
return doc return doc
def convert_capabilities_to_result_document(rules, capabilities): def convert_capabilities_to_result_document(rules, capabilities):
""" """
convert the given rule set and capabilities result to a common, Python-native data structure. convert the given rule set and capabilities result to a common, Python-native data structure.
this format can be directly emitted to JSON, or passed to the other `render_*` routines this format can be directly emitted to JSON, or passed to the other `render_*` routines
to render as text. to render as text.
see examples of substructures in above routines. see examples of substructures in above routines.
schema: schema:
```json ```json
{ {
$rule-name: { $rule-name: {
"meta": {...copied from rule.meta...}, "meta": {...copied from rule.meta...},
"matches: { "matches: {
$address: {...match details...}, $address: {...match details...},
... ...
} }
}, },
... ...
} }
``` ```
Args: Args:
rules (RuleSet): rules (RuleSet):
capabilities (Dict[str, List[Tuple[int, Result]]]): capabilities (Dict[str, List[Tuple[int, Result]]]):
""" """
doc = {} doc = {}
for rule_name, matches in capabilities.items(): for rule_name, matches in capabilities.items():
rule = rules[rule_name] rule = rules[rule_name]
if rule.meta.get("capa/subscope-rule"): if rule.meta.get("capa/subscope-rule"):
continue continue
doc[rule_name] = { doc[rule_name] = {
"meta": dict(rule.meta), "meta": dict(rule.meta),
"source": rule.definition, "source": rule.definition,
"matches": { "matches": {
addr: convert_match_to_result_document(rules, capabilities, match) for (addr, match) in matches addr: convert_match_to_result_document(rules, capabilities, match) for (addr, match) in matches
}, },
} }
return doc return doc
def render_vverbose(rules, capabilities): def render_vverbose(rules, capabilities):
# there's an import loop here # there's an import loop here
# if capa.render imports capa.render.vverbose # if capa.render imports capa.render.vverbose
# and capa.render.vverbose import capa.render (implicitly, as a submodule) # and capa.render.vverbose import capa.render (implicitly, as a submodule)
# so, defer the import until routine is called, breaking the import loop. # so, defer the import until routine is called, breaking the import loop.
import capa.render.vverbose import capa.render.vverbose
doc = convert_capabilities_to_result_document(rules, capabilities) doc = convert_capabilities_to_result_document(rules, capabilities)
return capa.render.vverbose.render_vverbose(doc) return capa.render.vverbose.render_vverbose(doc)
def render_verbose(rules, capabilities): def render_verbose(rules, capabilities):
# break import loop # break import loop
import capa.render.verbose import capa.render.verbose
doc = convert_capabilities_to_result_document(rules, capabilities) doc = convert_capabilities_to_result_document(rules, capabilities)
return capa.render.verbose.render_verbose(doc) return capa.render.verbose.render_verbose(doc)
def render_default(rules, capabilities): def render_default(rules, capabilities):
# break import loop # break import loop
import capa.render.verbose import capa.render.verbose
import capa.render.default import capa.render.default
doc = convert_capabilities_to_result_document(rules, capabilities) doc = convert_capabilities_to_result_document(rules, capabilities)
return capa.render.default.render_default(doc) return capa.render.default.render_default(doc)
class CapaJsonObjectEncoder(json.JSONEncoder): class CapaJsonObjectEncoder(json.JSONEncoder):
def default(self, obj): def default(self, obj):
if isinstance(obj, (list, dict, int, float, bool, type(None))) or isinstance(obj, six.string_types): if isinstance(obj, (list, dict, int, float, bool, type(None))) or isinstance(obj, six.string_types):
return json.JSONEncoder.default(self, obj) return json.JSONEncoder.default(self, obj)
elif isinstance(obj, set): elif isinstance(obj, set):
return list(sorted(obj)) return list(sorted(obj))
else: else:
# probably will TypeError # probably will TypeError
return json.JSONEncoder.default(self, obj) return json.JSONEncoder.default(self, obj)
def render_json(rules, capabilities): def render_json(rules, capabilities):
return json.dumps( return json.dumps(
convert_capabilities_to_result_document(rules, capabilities), cls=CapaJsonObjectEncoder, sort_keys=True, convert_capabilities_to_result_document(rules, capabilities), cls=CapaJsonObjectEncoder, sort_keys=True,
) )

View File

@@ -1,99 +1,99 @@
import collections import collections
import six import six
import tabulate import tabulate
import capa.render.utils as rutils import capa.render.utils as rutils
def width(s, character_count): def width(s, character_count):
"""pad the given string to at least `character_count`""" """pad the given string to at least `character_count`"""
if len(s) < character_count: if len(s) < character_count:
return s + " " * (character_count - len(s)) return s + " " * (character_count - len(s))
else: else:
return s return s
def render_capabilities(doc, ostream): def render_capabilities(doc, ostream):
""" """
example:: example::
+-------------------------------------------------------+-------------------------------------------------+ +-------------------------------------------------------+-------------------------------------------------+
| CAPABILITY | NAMESPACE | | CAPABILITY | NAMESPACE |
|-------------------------------------------------------+-------------------------------------------------| |-------------------------------------------------------+-------------------------------------------------|
| check for OutputDebugString error (2 matches) | anti-analysis/anti-debugging/debugger-detection | | check for OutputDebugString error (2 matches) | anti-analysis/anti-debugging/debugger-detection |
| read and send data from client to server | c2/file-transfer | | read and send data from client to server | c2/file-transfer |
| ... | ... | | ... | ... |
+-------------------------------------------------------+-------------------------------------------------+ +-------------------------------------------------------+-------------------------------------------------+
""" """
rows = [] rows = []
for rule in rutils.capability_rules(doc): for rule in rutils.capability_rules(doc):
count = len(rule["matches"]) count = len(rule["matches"])
if count == 1: if count == 1:
capability = rutils.bold(rule["meta"]["name"]) capability = rutils.bold(rule["meta"]["name"])
else: else:
capability = "%s (%d matches)" % (rutils.bold(rule["meta"]["name"]), count) capability = "%s (%d matches)" % (rutils.bold(rule["meta"]["name"]), count)
rows.append((capability, rule["meta"]["namespace"])) rows.append((capability, rule["meta"]["namespace"]))
ostream.write(tabulate.tabulate(rows, headers=[width("CAPABILITY", 40), width("NAMESPACE", 40)], tablefmt="psql")) ostream.write(tabulate.tabulate(rows, headers=[width("CAPABILITY", 40), width("NAMESPACE", 40)], tablefmt="psql"))
ostream.write("\n") ostream.write("\n")
def render_attack(doc, ostream): def render_attack(doc, ostream):
""" """
example:: example::
+------------------------+----------------------------------------------------------------------+ +------------------------+----------------------------------------------------------------------+
| ATT&CK Tactic | ATT&CK Technique | | ATT&CK Tactic | ATT&CK Technique |
|------------------------+----------------------------------------------------------------------| |------------------------+----------------------------------------------------------------------|
| DEFENSE EVASION | Obfuscated Files or Information [T1027] | | DEFENSE EVASION | Obfuscated Files or Information [T1027] |
| DISCOVERY | Query Registry [T1012] | | DISCOVERY | Query Registry [T1012] |
| | System Information Discovery [T1082] | | | System Information Discovery [T1082] |
| EXECUTION | Command and Scripting Interpreter::Windows Command Shell [T1059.003] | | EXECUTION | Command and Scripting Interpreter::Windows Command Shell [T1059.003] |
| | Shared Modules [T1129] | | | Shared Modules [T1129] |
| EXFILTRATION | Exfiltration Over C2 Channel [T1041] | | EXFILTRATION | Exfiltration Over C2 Channel [T1041] |
| PERSISTENCE | Create or Modify System Process::Windows Service [T1543.003] | | PERSISTENCE | Create or Modify System Process::Windows Service [T1543.003] |
+------------------------+----------------------------------------------------------------------+ +------------------------+----------------------------------------------------------------------+
""" """
tactics = collections.defaultdict(set) tactics = collections.defaultdict(set)
for rule in rutils.capability_rules(doc): for rule in rutils.capability_rules(doc):
if not rule["meta"].get("att&ck"): if not rule["meta"].get("att&ck"):
continue continue
for attack in rule["meta"]["att&ck"]: for attack in rule["meta"]["att&ck"]:
tactic, _, rest = attack.partition("::") tactic, _, rest = attack.partition("::")
if "::" in rest: if "::" in rest:
technique, _, rest = rest.partition("::") technique, _, rest = rest.partition("::")
subtechnique, _, id = rest.rpartition(" ") subtechnique, _, id = rest.rpartition(" ")
tactics[tactic].add((technique, subtechnique, id)) tactics[tactic].add((technique, subtechnique, id))
else: else:
technique, _, id = rest.rpartition(" ") technique, _, id = rest.rpartition(" ")
tactics[tactic].add((technique, id)) tactics[tactic].add((technique, id))
rows = [] rows = []
for tactic, techniques in sorted(tactics.items()): for tactic, techniques in sorted(tactics.items()):
inner_rows = [] inner_rows = []
for spec in sorted(techniques): for spec in sorted(techniques):
if len(spec) == 2: if len(spec) == 2:
technique, id = spec technique, id = spec
inner_rows.append("%s %s" % (rutils.bold(technique), id)) inner_rows.append("%s %s" % (rutils.bold(technique), id))
elif len(spec) == 3: elif len(spec) == 3:
technique, subtechnique, id = spec technique, subtechnique, id = spec
inner_rows.append("%s::%s %s" % (rutils.bold(technique), subtechnique, id)) inner_rows.append("%s::%s %s" % (rutils.bold(technique), subtechnique, id))
else: else:
raise RuntimeError("unexpected ATT&CK spec format") raise RuntimeError("unexpected ATT&CK spec format")
rows.append((rutils.bold(tactic.upper()), "\n".join(inner_rows),)) rows.append((rutils.bold(tactic.upper()), "\n".join(inner_rows),))
ostream.write( ostream.write(
tabulate.tabulate(rows, headers=[width("ATT&CK Tactic", 20), width("ATT&CK Technique", 60)], tablefmt="psql") tabulate.tabulate(rows, headers=[width("ATT&CK Tactic", 20), width("ATT&CK Technique", 60)], tablefmt="psql")
) )
ostream.write("\n") ostream.write("\n")
def render_default(doc): def render_default(doc):
ostream = six.StringIO() ostream = six.StringIO()
render_attack(doc, ostream) render_attack(doc, ostream)
ostream.write("\n") ostream.write("\n")
render_capabilities(doc, ostream) render_capabilities(doc, ostream)
return ostream.getvalue() return ostream.getvalue()

View File

@@ -1,44 +1,44 @@
import six import six
import termcolor import termcolor
def bold(s): def bold(s):
"""draw attention to the given string""" """draw attention to the given string"""
return termcolor.colored(s, "blue") return termcolor.colored(s, "blue")
def bold2(s): def bold2(s):
"""draw attention to the given string, within a `bold` section""" """draw attention to the given string, within a `bold` section"""
return termcolor.colored(s, "green") return termcolor.colored(s, "green")
def hex(n): def hex(n):
"""render the given number using upper case hex, like: 0x123ABC""" """render the given number using upper case hex, like: 0x123ABC"""
return "0x%X" % n return "0x%X" % n
def capability_rules(doc): def capability_rules(doc):
"""enumerate the rules in (namespace, name) order that are 'capability' rules (not lib/subscope/disposition/etc).""" """enumerate the rules in (namespace, name) order that are 'capability' rules (not lib/subscope/disposition/etc)."""
for (_, _, rule) in sorted( for (_, _, rule) in sorted(
map(lambda rule: (rule["meta"].get("namespace", ""), rule["meta"]["name"], rule), doc.values()) map(lambda rule: (rule["meta"].get("namespace", ""), rule["meta"]["name"], rule), doc.values())
): ):
if rule["meta"].get("lib"): if rule["meta"].get("lib"):
continue continue
if rule["meta"].get("capa/subscope"): if rule["meta"].get("capa/subscope"):
continue continue
if rule["meta"].get("maec/analysis-conclusion"): if rule["meta"].get("maec/analysis-conclusion"):
continue continue
if rule["meta"].get("maec/analysis-conclusion-ov"): if rule["meta"].get("maec/analysis-conclusion-ov"):
continue continue
if rule["meta"].get("maec/malware-category"): if rule["meta"].get("maec/malware-category"):
continue continue
if rule["meta"].get("maec/malware-category-ov"): if rule["meta"].get("maec/malware-category-ov"):
continue continue
yield rule yield rule
class StringIO(six.StringIO): class StringIO(six.StringIO):
def writeln(self, s): def writeln(self, s):
self.write(s) self.write(s)
self.write("\n") self.write("\n")

View File

@@ -1,52 +1,52 @@
""" """
example:: example::
send data send data
namespace communication namespace communication
author william.ballenthin@fireeye.com author william.ballenthin@fireeye.com
description all known techniques for sending data to a potential C2 server description all known techniques for sending data to a potential C2 server
scope function scope function
examples BFB9B5391A13D0AFD787E87AB90F14F5:0x13145D60 examples BFB9B5391A13D0AFD787E87AB90F14F5:0x13145D60
matches 0x10004363 matches 0x10004363
0x100046c9 0x100046c9
0x1000454e 0x1000454e
0x10003a13 0x10003a13
0x10003415 0x10003415
0x10003797 0x10003797
""" """
import tabulate import tabulate
import capa.rules import capa.rules
import capa.render.utils as rutils import capa.render.utils as rutils
def render_verbose(doc): def render_verbose(doc):
ostream = rutils.StringIO() ostream = rutils.StringIO()
for rule in rutils.capability_rules(doc): for rule in rutils.capability_rules(doc):
count = len(rule["matches"]) count = len(rule["matches"])
if count == 1: if count == 1:
capability = rutils.bold(rule["meta"]["name"]) capability = rutils.bold(rule["meta"]["name"])
else: else:
capability = "%s (%d matches)" % (rutils.bold(rule["meta"]["name"]), count) capability = "%s (%d matches)" % (rutils.bold(rule["meta"]["name"]), count)
ostream.writeln(capability) ostream.writeln(capability)
rows = [] rows = []
for key in ("namespace", "description", "scope"): for key in ("namespace", "description", "scope"):
if key == "name" or key not in rule["meta"]: if key == "name" or key not in rule["meta"]:
continue continue
v = rule["meta"][key] v = rule["meta"][key]
if isinstance(v, list) and len(v) == 1: if isinstance(v, list) and len(v) == 1:
v = v[0] v = v[0]
rows.append((key, v)) rows.append((key, v))
if rule["meta"]["scope"] != capa.rules.FILE_SCOPE: if rule["meta"]["scope"] != capa.rules.FILE_SCOPE:
locations = doc[rule["meta"]["name"]]["matches"].keys() locations = doc[rule["meta"]["name"]]["matches"].keys()
rows.append(("matches", "\n".join(map(rutils.hex, locations)))) rows.append(("matches", "\n".join(map(rutils.hex, locations))))
ostream.writeln(tabulate.tabulate(rows, tablefmt="plain")) ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
ostream.write("\n") ostream.write("\n")
return ostream.getvalue() return ostream.getvalue()

View File

@@ -1,185 +1,185 @@
import tabulate import tabulate
import capa.rules import capa.rules
import capa.render.utils as rutils import capa.render.utils as rutils
def render_locations(ostream, match): def render_locations(ostream, match):
# its possible to have an empty locations array here, # its possible to have an empty locations array here,
# such as when we're in MODE_FAILURE and showing the logic # such as when we're in MODE_FAILURE and showing the logic
# under a `not` statement (which will have no matched locations). # under a `not` statement (which will have no matched locations).
locations = list(sorted(match.get("locations", []))) locations = list(sorted(match.get("locations", [])))
if len(locations) == 1: if len(locations) == 1:
ostream.write(" @ ") ostream.write(" @ ")
ostream.write(rutils.hex(locations[0])) ostream.write(rutils.hex(locations[0]))
elif len(locations) > 1: elif len(locations) > 1:
ostream.write(" @ ") ostream.write(" @ ")
if len(locations) > 4: if len(locations) > 4:
# don't display too many locations, because it becomes very noisy. # don't display too many locations, because it becomes very noisy.
# probably only the first handful of locations will be useful for inspection. # probably only the first handful of locations will be useful for inspection.
ostream.write(", ".join(map(rutils.hex, locations[0:4]))) ostream.write(", ".join(map(rutils.hex, locations[0:4])))
ostream.write(", and %d more..." % (len(locations) - 4)) ostream.write(", and %d more..." % (len(locations) - 4))
else: else:
ostream.write(", ".join(map(rutils.hex, locations))) ostream.write(", ".join(map(rutils.hex, locations)))
def render_statement(ostream, match, statement, indent=0): def render_statement(ostream, match, statement, indent=0):
ostream.write(" " * indent) ostream.write(" " * indent)
if statement["type"] in ("and", "or", "optional"): if statement["type"] in ("and", "or", "optional"):
ostream.write(statement["type"]) ostream.write(statement["type"])
ostream.writeln(":") ostream.writeln(":")
elif statement["type"] == "not": elif statement["type"] == "not":
# this statement is handled specially in `render_match` using the MODE_SUCCESS/MODE_FAILURE flags. # this statement is handled specially in `render_match` using the MODE_SUCCESS/MODE_FAILURE flags.
ostream.writeln("not:") ostream.writeln("not:")
elif statement["type"] == "some": elif statement["type"] == "some":
ostream.write(statement["count"] + " or more") ostream.write(statement["count"] + " or more")
ostream.writeln(":") ostream.writeln(":")
elif statement["type"] == "range": elif statement["type"] == "range":
# `range` is a weird node, its almost a hybrid of statement+feature. # `range` is a weird node, its almost a hybrid of statement+feature.
# it is a specific feature repeated multiple times. # it is a specific feature repeated multiple times.
# there's no additional logic in the feature part, just the existence of a feature. # there's no additional logic in the feature part, just the existence of a feature.
# so, we have to inline some of the feature rendering here. # so, we have to inline some of the feature rendering here.
child = statement["child"] child = statement["child"]
value = rutils.bold2(child[child["type"]]) value = rutils.bold2(child[child["type"]])
if child.get("description"): if child.get("description"):
ostream.write("count(%s(%s = %s)): " % (child["type"], value, child["description"])) ostream.write("count(%s(%s = %s)): " % (child["type"], value, child["description"]))
else: else:
ostream.write("count(%s(%s)): " % (child["type"], value)) ostream.write("count(%s(%s)): " % (child["type"], value))
if statement["max"] == statement["min"]: if statement["max"] == statement["min"]:
ostream.write("%d" % (statement["min"])) ostream.write("%d" % (statement["min"]))
elif statement["min"] == 0: elif statement["min"] == 0:
ostream.write("%d or fewer" % (statement["max"])) ostream.write("%d or fewer" % (statement["max"]))
elif statement["max"] == (1 << 64 - 1): elif statement["max"] == (1 << 64 - 1):
ostream.write("%d or more" % (statement["min"])) ostream.write("%d or more" % (statement["min"]))
else: else:
ostream.write("between %d and %d" % (statement["min"], statement["max"])) ostream.write("between %d and %d" % (statement["min"], statement["max"]))
render_locations(ostream, match) render_locations(ostream, match)
ostream.write("\n") ostream.write("\n")
elif statement["type"] == "subscope": elif statement["type"] == "subscope":
ostream.write(statement["subscope"]) ostream.write(statement["subscope"])
ostream.writeln(":") ostream.writeln(":")
elif statement["type"] == "regex": elif statement["type"] == "regex":
# regex is a `Statement` not a `Feature` # regex is a `Statement` not a `Feature`
# this is because it doesn't get extracted, but applies to all strings in scope. # this is because it doesn't get extracted, but applies to all strings in scope.
# so we have to handle it here # so we have to handle it here
ostream.writeln("string: %s" % (statement["match"])) ostream.writeln("string: %s" % (statement["match"]))
else: else:
raise RuntimeError("unexpected match statement type: " + str(statement)) raise RuntimeError("unexpected match statement type: " + str(statement))
def render_feature(ostream, match, feature, indent=0): def render_feature(ostream, match, feature, indent=0):
ostream.write(" " * indent) ostream.write(" " * indent)
ostream.write(feature["type"]) ostream.write(feature["type"])
ostream.write(": ") ostream.write(": ")
ostream.write(rutils.bold2(feature[feature["type"]])) ostream.write(rutils.bold2(feature[feature["type"]]))
if "description" in feature: if "description" in feature:
ostream.write(" = ") ostream.write(" = ")
ostream.write(feature["description"]) ostream.write(feature["description"])
render_locations(ostream, match) render_locations(ostream, match)
ostream.write("\n") ostream.write("\n")
def render_node(ostream, match, node, indent=0): def render_node(ostream, match, node, indent=0):
if node["type"] == "statement": if node["type"] == "statement":
render_statement(ostream, match, node["statement"], indent=indent) render_statement(ostream, match, node["statement"], indent=indent)
elif node["type"] == "feature": elif node["type"] == "feature":
render_feature(ostream, match, node["feature"], indent=indent) render_feature(ostream, match, node["feature"], indent=indent)
else: else:
raise RuntimeError("unexpected node type: " + str(node)) raise RuntimeError("unexpected node type: " + str(node))
# display nodes that successfully evaluated against the sample. # display nodes that successfully evaluated against the sample.
MODE_SUCCESS = "success" MODE_SUCCESS = "success"
# display nodes that did not evaluate to True against the sample. # display nodes that did not evaluate to True against the sample.
# this is useful when rendering the logic tree under a `not` node. # this is useful when rendering the logic tree under a `not` node.
MODE_FAILURE = "failure" MODE_FAILURE = "failure"
def render_match(ostream, match, indent=0, mode=MODE_SUCCESS): def render_match(ostream, match, indent=0, mode=MODE_SUCCESS):
child_mode = mode child_mode = mode
if mode == MODE_SUCCESS: if mode == MODE_SUCCESS:
# display only nodes that evaluated successfully. # display only nodes that evaluated successfully.
if not match["success"]: if not match["success"]:
return return
# optional statement with no successful children is empty # optional statement with no successful children is empty
if match["node"].get("statement", {}).get("type") == "optional" and not any( if match["node"].get("statement", {}).get("type") == "optional" and not any(
map(lambda m: m["success"], match["children"]) map(lambda m: m["success"], match["children"])
): ):
return return
# not statement, so invert the child mode to show failed evaluations # not statement, so invert the child mode to show failed evaluations
if match["node"].get("statement", {}).get("type") == "not": if match["node"].get("statement", {}).get("type") == "not":
child_mode = MODE_FAILURE child_mode = MODE_FAILURE
elif mode == MODE_FAILURE: elif mode == MODE_FAILURE:
# display only nodes that did not evaluate to True # display only nodes that did not evaluate to True
if match["success"]: if match["success"]:
return return
# optional statement with successful children is not relevant # optional statement with successful children is not relevant
if match["node"].get("statement", {}).get("type") == "optional" and any( if match["node"].get("statement", {}).get("type") == "optional" and any(
map(lambda m: m["success"], match["children"]) map(lambda m: m["success"], match["children"])
): ):
return return
# not statement, so invert the child mode to show successful evaluations # not statement, so invert the child mode to show successful evaluations
if match["node"].get("statement", {}).get("type") == "not": if match["node"].get("statement", {}).get("type") == "not":
child_mode = MODE_SUCCESS child_mode = MODE_SUCCESS
else: else:
raise RuntimeError("unexpected mode: " + mode) raise RuntimeError("unexpected mode: " + mode)
render_node(ostream, match, match["node"], indent=indent) render_node(ostream, match, match["node"], indent=indent)
for child in match["children"]: for child in match["children"]:
render_match(ostream, child, indent=indent + 1, mode=child_mode) render_match(ostream, child, indent=indent + 1, mode=child_mode)
def render_vverbose(doc): def render_vverbose(doc):
ostream = rutils.StringIO() ostream = rutils.StringIO()
for rule in rutils.capability_rules(doc): for rule in rutils.capability_rules(doc):
count = len(rule["matches"]) count = len(rule["matches"])
if count == 1: if count == 1:
capability = rutils.bold(rule["meta"]["name"]) capability = rutils.bold(rule["meta"]["name"])
else: else:
capability = "%s (%d matches)" % (rutils.bold(rule["meta"]["name"]), count) capability = "%s (%d matches)" % (rutils.bold(rule["meta"]["name"]), count)
ostream.writeln(capability) ostream.writeln(capability)
rows = [] rows = []
for key in capa.rules.META_KEYS: for key in capa.rules.META_KEYS:
if key == "name" or key not in rule["meta"]: if key == "name" or key not in rule["meta"]:
continue continue
v = rule["meta"][key] v = rule["meta"][key]
if isinstance(v, list) and len(v) == 1: if isinstance(v, list) and len(v) == 1:
v = v[0] v = v[0]
elif isinstance(v, list) and len(v) > 1: elif isinstance(v, list) and len(v) > 1:
v = ", ".join(v) v = ", ".join(v)
rows.append((key, v)) rows.append((key, v))
ostream.writeln(tabulate.tabulate(rows, tablefmt="plain")) ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
if rule["meta"]["scope"] == capa.rules.FILE_SCOPE: if rule["meta"]["scope"] == capa.rules.FILE_SCOPE:
matches = list(doc[rule["meta"]["name"]]["matches"].values()) matches = list(doc[rule["meta"]["name"]]["matches"].values())
if len(matches) != 1: if len(matches) != 1:
# i think there should only ever be one match per file-scope rule, # i think there should only ever be one match per file-scope rule,
# because we do the file-scope evaluation a single time. # because we do the file-scope evaluation a single time.
# but i'm not 100% sure if this is/will always be true. # but i'm not 100% sure if this is/will always be true.
# so, lets be explicit about our assumptions and raise an exception if they fail. # so, lets be explicit about our assumptions and raise an exception if they fail.
raise RuntimeError("unexpected file scope match count: " + len(matches)) raise RuntimeError("unexpected file scope match count: " + len(matches))
render_match(ostream, matches[0], indent=0) render_match(ostream, matches[0], indent=0)
else: else:
for location, match in sorted(doc[rule["meta"]["name"]]["matches"].items()): for location, match in sorted(doc[rule["meta"]["name"]]["matches"].items()):
ostream.write(rule["meta"]["scope"]) ostream.write(rule["meta"]["scope"])
ostream.write(" @ ") ostream.write(" @ ")
ostream.writeln(rutils.hex(location)) ostream.writeln(rutils.hex(location))
render_match(ostream, match, indent=1) render_match(ostream, match, indent=1)
ostream.write("\n") ostream.write("\n")
return ostream.getvalue() return ostream.getvalue()

View File

@@ -1,13 +1,13 @@
from PyInstaller.utils.hooks import copy_metadata from PyInstaller.utils.hooks import copy_metadata
# in order for viv-utils to use pkg_resources to fetch # in order for viv-utils to use pkg_resources to fetch
# the installed version of vivisect, # the installed version of vivisect,
# we need to instruct pyinstaller to embed this metadata. # we need to instruct pyinstaller to embed this metadata.
# #
# so we set the pyinstaller.spec/hookspath to reference # so we set the pyinstaller.spec/hookspath to reference
# the directory with this hook. # the directory with this hook.
# #
# this hook runs at analysis time and updates the embedded metadata. # this hook runs at analysis time and updates the embedded metadata.
# #
# ref: https://github.com/pyinstaller/pyinstaller/issues/1713#issuecomment-162682084 # ref: https://github.com/pyinstaller/pyinstaller/issues/1713#issuecomment-162682084
datas = copy_metadata("vivisect") datas = copy_metadata("vivisect")

View File

@@ -1,160 +1,160 @@
#!/usr/bin/env python #!/usr/bin/env python
""" """
migrate rules and their namespaces. migrate rules and their namespaces.
example: example:
$ python scripts/migrate-rules.py migration.csv ./rules ./new-rules $ python scripts/migrate-rules.py migration.csv ./rules ./new-rules
""" """
import os import os
import csv import csv
import sys import sys
import logging import logging
import os.path import os.path
import collections import collections
import argparse import argparse
import capa.rules import capa.rules
logger = logging.getLogger("migrate-rules") logger = logging.getLogger("migrate-rules")
def read_plan(plan_path): def read_plan(plan_path):
with open(plan_path, "rb") as f: with open(plan_path, "rb") as f:
return list( return list(
csv.DictReader( csv.DictReader(
f, f,
restkey="other", restkey="other",
fieldnames=( fieldnames=(
"existing path", "existing path",
"existing name", "existing name",
"existing rule-category", "existing rule-category",
"proposed name", "proposed name",
"proposed namespace", "proposed namespace",
"ATT&CK", "ATT&CK",
"MBC", "MBC",
"comment1", "comment1",
), ),
) )
) )
def read_rules(rule_directory): def read_rules(rule_directory):
rules = {} rules = {}
for root, dirs, files in os.walk(rule_directory): for root, dirs, files in os.walk(rule_directory):
for file in files: for file in files:
path = os.path.join(root, file) path = os.path.join(root, file)
if not path.endswith(".yml"): if not path.endswith(".yml"):
logger.info("skipping file: %s", path) logger.info("skipping file: %s", path)
continue continue
rule = capa.rules.Rule.from_yaml_file(path) rule = capa.rules.Rule.from_yaml_file(path)
rules[rule.name] = rule rules[rule.name] = rule
if "nursery" in path: if "nursery" in path:
rule.meta["capa/nursery"] = True rule.meta["capa/nursery"] = True
return rules return rules
def main(argv=None): def main(argv=None):
if argv is None: if argv is None:
argv = sys.argv[1:] argv = sys.argv[1:]
parser = argparse.ArgumentParser(description="migrate rules.") parser = argparse.ArgumentParser(description="migrate rules.")
parser.add_argument("plan", type=str, help="Path to CSV describing migration") parser.add_argument("plan", type=str, help="Path to CSV describing migration")
parser.add_argument("source", type=str, help="Source directory of rules") parser.add_argument("source", type=str, help="Source directory of rules")
parser.add_argument("destination", type=str, help="Destination directory of rules") parser.add_argument("destination", type=str, help="Destination directory of rules")
args = parser.parse_args(args=argv) args = parser.parse_args(args=argv)
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logging.getLogger().setLevel(logging.INFO) logging.getLogger().setLevel(logging.INFO)
plan = read_plan(args.plan) plan = read_plan(args.plan)
logger.info("read %d plan entries", len(plan)) logger.info("read %d plan entries", len(plan))
rules = read_rules(args.source) rules = read_rules(args.source)
logger.info("read %d rules", len(rules)) logger.info("read %d rules", len(rules))
planned_rules = set([row["existing name"] for row in plan]) planned_rules = set([row["existing name"] for row in plan])
unplanned_rules = [rule for (name, rule) in rules.items() if name not in planned_rules] unplanned_rules = [rule for (name, rule) in rules.items() if name not in planned_rules]
if unplanned_rules: if unplanned_rules:
logger.error("plan does not account for %d rules:" % (len(unplanned_rules))) logger.error("plan does not account for %d rules:" % (len(unplanned_rules)))
for rule in unplanned_rules: for rule in unplanned_rules:
logger.error(" " + rule.name) logger.error(" " + rule.name)
return -1 return -1
# pairs of strings (needle, replacement) # pairs of strings (needle, replacement)
match_translations = [] match_translations = []
for row in plan: for row in plan:
if not row["existing name"]: if not row["existing name"]:
continue continue
rule = rules[row["existing name"]] rule = rules[row["existing name"]]
if rule.meta["name"] != row["proposed name"]: if rule.meta["name"] != row["proposed name"]:
logger.info("renaming rule '%s' -> '%s'", rule.meta["name"], row["proposed name"]) logger.info("renaming rule '%s' -> '%s'", rule.meta["name"], row["proposed name"])
# assume the yaml is formatted like `- match: $rule-name`. # assume the yaml is formatted like `- match: $rule-name`.
# but since its been linted, this should be ok. # but since its been linted, this should be ok.
match_translations.append(("- match: " + rule.meta["name"], "- match: " + row["proposed name"])) match_translations.append(("- match: " + rule.meta["name"], "- match: " + row["proposed name"]))
rule.meta["name"] = row["proposed name"] rule.meta["name"] = row["proposed name"]
rule.name = row["proposed name"] rule.name = row["proposed name"]
if "rule-category" in rule.meta: if "rule-category" in rule.meta:
logger.info("deleting rule category '%s'", rule.meta["rule-category"]) logger.info("deleting rule category '%s'", rule.meta["rule-category"])
del rule.meta["rule-category"] del rule.meta["rule-category"]
rule.meta["namespace"] = row["proposed namespace"] rule.meta["namespace"] = row["proposed namespace"]
if row["ATT&CK"] != "n/a" and row["ATT&CK"] != "": if row["ATT&CK"] != "n/a" and row["ATT&CK"] != "":
tag = row["ATT&CK"] tag = row["ATT&CK"]
name, _, id = tag.rpartition(" ") name, _, id = tag.rpartition(" ")
tag = "%s [%s]" % (name, id) tag = "%s [%s]" % (name, id)
rule.meta["att&ck"] = [tag] rule.meta["att&ck"] = [tag]
if row["MBC"] != "n/a" and row["MBC"] != "": if row["MBC"] != "n/a" and row["MBC"] != "":
tag = row["MBC"] tag = row["MBC"]
rule.meta["mbc"] = [tag] rule.meta["mbc"] = [tag]
for rule in rules.values(): for rule in rules.values():
filename = rule.name filename = rule.name
filename = filename.lower() filename = filename.lower()
filename = filename.replace(" ", "-") filename = filename.replace(" ", "-")
filename = filename.replace("(", "") filename = filename.replace("(", "")
filename = filename.replace(")", "") filename = filename.replace(")", "")
filename = filename.replace("+", "") filename = filename.replace("+", "")
filename = filename.replace("/", "") filename = filename.replace("/", "")
filename = filename + ".yml" filename = filename + ".yml"
try: try:
if rule.meta.get("capa/nursery"): if rule.meta.get("capa/nursery"):
directory = os.path.join(args.destination, "nursery") directory = os.path.join(args.destination, "nursery")
elif rule.meta.get("lib"): elif rule.meta.get("lib"):
directory = os.path.join(args.destination, "lib") directory = os.path.join(args.destination, "lib")
else: else:
directory = os.path.join(args.destination, rule.meta.get("namespace")) directory = os.path.join(args.destination, rule.meta.get("namespace"))
os.makedirs(directory) os.makedirs(directory)
except OSError: except OSError:
pass pass
else: else:
logger.info("created namespace: %s", directory) logger.info("created namespace: %s", directory)
path = os.path.join(directory, filename) path = os.path.join(directory, filename)
logger.info("writing rule %s", path) logger.info("writing rule %s", path)
doc = rule.to_yaml().decode("utf-8") doc = rule.to_yaml().decode("utf-8")
for (needle, replacement) in match_translations: for (needle, replacement) in match_translations:
doc = doc.replace(needle, replacement) doc = doc.replace(needle, replacement)
with open(path, "wb") as f: with open(path, "wb") as f:
f.write(doc.encode("utf-8")) f.write(doc.encode("utf-8"))
return 0 return 0
if __name__ == "__main__": if __name__ == "__main__":
sys.exit(main()) sys.exit(main())

View File

@@ -1,106 +1,106 @@
import textwrap import textwrap
import capa.rules import capa.rules
EXPECTED = textwrap.dedent( EXPECTED = textwrap.dedent(
"""\ """\
rule: rule:
meta: meta:
name: test rule name: test rule
author: user@domain.com author: user@domain.com
scope: function scope: function
examples: examples:
- foo1234 - foo1234
- bar5678 - bar5678
features: features:
- and: - and:
- number: 1 - number: 1
- number: 2 - number: 2
""" """
) )
def test_rule_reformat_top_level_elements(): def test_rule_reformat_top_level_elements():
rule = textwrap.dedent( rule = textwrap.dedent(
""" """
rule: rule:
features: features:
- and: - and:
- number: 1 - number: 1
- number: 2 - number: 2
meta: meta:
name: test rule name: test rule
author: user@domain.com author: user@domain.com
scope: function scope: function
examples: examples:
- foo1234 - foo1234
- bar5678 - bar5678
""" """
) )
assert capa.rules.Rule.from_yaml(rule).to_yaml() == EXPECTED assert capa.rules.Rule.from_yaml(rule).to_yaml() == EXPECTED
def test_rule_reformat_indentation(): def test_rule_reformat_indentation():
rule = textwrap.dedent( rule = textwrap.dedent(
""" """
rule: rule:
meta: meta:
name: test rule name: test rule
author: user@domain.com author: user@domain.com
scope: function scope: function
examples: examples:
- foo1234 - foo1234
- bar5678 - bar5678
features: features:
- and: - and:
- number: 1 - number: 1
- number: 2 - number: 2
""" """
) )
assert capa.rules.Rule.from_yaml(rule).to_yaml() == EXPECTED assert capa.rules.Rule.from_yaml(rule).to_yaml() == EXPECTED
def test_rule_reformat_order(): def test_rule_reformat_order():
rule = textwrap.dedent( rule = textwrap.dedent(
""" """
rule: rule:
meta: meta:
author: user@domain.com author: user@domain.com
examples: examples:
- foo1234 - foo1234
- bar5678 - bar5678
scope: function scope: function
name: test rule name: test rule
features: features:
- and: - and:
- number: 1 - number: 1
- number: 2 - number: 2
""" """
) )
assert capa.rules.Rule.from_yaml(rule).to_yaml() == EXPECTED assert capa.rules.Rule.from_yaml(rule).to_yaml() == EXPECTED
def test_rule_reformat_meta_update(): def test_rule_reformat_meta_update():
rule = textwrap.dedent( rule = textwrap.dedent(
""" """
rule: rule:
meta: meta:
author: user@domain.com author: user@domain.com
examples: examples:
- foo1234 - foo1234
- bar5678 - bar5678
scope: function scope: function
name: AAAA name: AAAA
features: features:
- and: - and:
- number: 1 - number: 1
- number: 2 - number: 2
""" """
) )
rule = capa.rules.Rule.from_yaml(rule) rule = capa.rules.Rule.from_yaml(rule)
rule.name = "test rule" rule.name = "test rule"
assert rule.to_yaml() == EXPECTED assert rule.to_yaml() == EXPECTED