mirror of
https://github.com/mandiant/capa.git
synced 2025-12-22 07:10:29 -08:00
pep8: black
This commit is contained in:
@@ -32,13 +32,13 @@ class Feature(object):
|
||||
# Used to overwrite the rendering of the feature args in `__str__` and the
|
||||
# json output
|
||||
def get_args_str(self):
|
||||
return ','.join(self.args)
|
||||
return ",".join(self.args)
|
||||
|
||||
def __str__(self):
|
||||
if self.description:
|
||||
return '%s(%s = %s)' % (self.name, self.get_args_str(), self.description)
|
||||
return "%s(%s = %s)" % (self.name, self.get_args_str(), self.description)
|
||||
else:
|
||||
return '%s(%s)' % (self.name, self.get_args_str())
|
||||
return "%s(%s)" % (self.name, self.get_args_str())
|
||||
|
||||
def __repr__(self):
|
||||
return str(self)
|
||||
@@ -60,7 +60,7 @@ class Feature(object):
|
||||
class MatchedRule(Feature):
|
||||
def __init__(self, rule_name, description=None):
|
||||
super(MatchedRule, self).__init__([rule_name], description)
|
||||
self.name = 'match'
|
||||
self.name = "match"
|
||||
self.rule_name = rule_name
|
||||
|
||||
|
||||
|
||||
@@ -103,7 +103,7 @@ def extract_bb_stackstring(f, bb):
|
||||
bb (IDA BasicBlock)
|
||||
"""
|
||||
if _ida_bb_contains_stackstring(f, bb):
|
||||
yield Characteristic('stack string'), bb.start_ea
|
||||
yield Characteristic("stack string"), bb.start_ea
|
||||
|
||||
|
||||
def _ida_bb_contains_tight_loop(f, bb):
|
||||
@@ -133,7 +133,7 @@ def extract_bb_tight_loop(f, bb):
|
||||
bb (IDA BasicBlock)
|
||||
"""
|
||||
if _ida_bb_contains_tight_loop(f, bb):
|
||||
yield Characteristic('tight loop'), bb.start_ea
|
||||
yield Characteristic("tight loop"), bb.start_ea
|
||||
|
||||
|
||||
def extract_features(f, bb):
|
||||
|
||||
@@ -75,7 +75,7 @@ def extract_file_embedded_pe():
|
||||
continue
|
||||
|
||||
for ea, _ in _ida_check_segment_for_pe(seg):
|
||||
yield Characteristic('embedded pe'), ea
|
||||
yield Characteristic("embedded pe"), ea
|
||||
|
||||
|
||||
def extract_file_export_names():
|
||||
|
||||
@@ -29,7 +29,7 @@ def extract_function_switch(f):
|
||||
f (IDA func_t)
|
||||
"""
|
||||
if _ida_function_contains_switch(f):
|
||||
yield Characteristic('switch'), f.start_ea
|
||||
yield Characteristic("switch"), f.start_ea
|
||||
|
||||
|
||||
def extract_function_calls_to(f):
|
||||
@@ -39,7 +39,7 @@ def extract_function_calls_to(f):
|
||||
f (IDA func_t)
|
||||
"""
|
||||
for ea in idautils.CodeRefsTo(f.start_ea, True):
|
||||
yield Characteristic('calls to'), ea
|
||||
yield Characteristic("calls to"), ea
|
||||
|
||||
|
||||
def extract_function_loop(f):
|
||||
@@ -53,7 +53,7 @@ def extract_function_loop(f):
|
||||
map(lambda s: edges.append((bb.start_ea, s.start_ea)), bb.succs())
|
||||
|
||||
if edges and loops.has_loop(edges):
|
||||
yield Characteristic('loop'), f.start_ea
|
||||
yield Characteristic("loop"), f.start_ea
|
||||
|
||||
|
||||
def extract_recursive_call(f):
|
||||
@@ -64,7 +64,7 @@ def extract_recursive_call(f):
|
||||
"""
|
||||
for ref in idautils.CodeRefsTo(f.start_ea, True):
|
||||
if f.contains(ref):
|
||||
yield Characteristic('recursive call'), f.start_ea
|
||||
yield Characteristic("recursive call"), f.start_ea
|
||||
break
|
||||
|
||||
|
||||
|
||||
@@ -259,7 +259,7 @@ def extract_insn_nzxor_characteristic_features(f, bb, insn):
|
||||
if _is_nzxor_stack_cookie(f, bb, insn):
|
||||
return
|
||||
|
||||
yield Characteristic('nzxor'), insn.ea
|
||||
yield Characteristic("nzxor"), insn.ea
|
||||
|
||||
|
||||
def extract_insn_mnemonic_features(f, bb, insn):
|
||||
@@ -292,7 +292,7 @@ def extract_insn_peb_access_characteristic_features(f, bb, insn):
|
||||
|
||||
if " fs:30h" in disasm or " gs:60h" in disasm:
|
||||
# TODO: replace above with proper IDA
|
||||
yield Characteristic('peb access'), insn.ea
|
||||
yield Characteristic("peb access"), insn.ea
|
||||
|
||||
|
||||
def extract_insn_segment_access_features(f, bb, insn):
|
||||
@@ -309,11 +309,11 @@ def extract_insn_segment_access_features(f, bb, insn):
|
||||
|
||||
if " fs:" in disasm:
|
||||
# TODO: replace above with proper IDA
|
||||
yield Characteristic('fs access'), insn.ea
|
||||
yield Characteristic("fs access"), insn.ea
|
||||
|
||||
if " gs:" in disasm:
|
||||
# TODO: replace above with proper IDA
|
||||
yield Characteristic('gs access'), insn.ea
|
||||
yield Characteristic("gs access"), insn.ea
|
||||
|
||||
|
||||
def extract_insn_cross_section_cflow(f, bb, insn):
|
||||
@@ -336,7 +336,7 @@ def extract_insn_cross_section_cflow(f, bb, insn):
|
||||
if idaapi.getseg(ref) == idaapi.getseg(insn.ea):
|
||||
continue
|
||||
|
||||
yield Characteristic('cross section flow'), insn.ea
|
||||
yield Characteristic("cross section flow"), insn.ea
|
||||
|
||||
|
||||
def extract_function_calls_from(f, bb, insn):
|
||||
@@ -354,7 +354,7 @@ def extract_function_calls_from(f, bb, insn):
|
||||
return
|
||||
|
||||
for ref in idautils.CodeRefsFrom(insn.ea, False):
|
||||
yield Characteristic('calls from'), ref
|
||||
yield Characteristic("calls from"), ref
|
||||
|
||||
|
||||
def extract_function_indirect_call_characteristic_features(f, bb, insn):
|
||||
@@ -373,7 +373,7 @@ def extract_function_indirect_call_characteristic_features(f, bb, insn):
|
||||
return
|
||||
|
||||
if idc.get_operand_type(insn.ea, 0) in (idc.o_reg, idc.o_phrase, idc.o_displ):
|
||||
yield Characteristic('indirect call'), insn.ea
|
||||
yield Characteristic("indirect call"), insn.ea
|
||||
|
||||
|
||||
def extract_features(f, bb, insn):
|
||||
|
||||
@@ -39,7 +39,7 @@ def _bb_has_tight_loop(f, bb):
|
||||
def extract_bb_tight_loop(f, bb):
|
||||
""" check basic block for tight loop indicators """
|
||||
if _bb_has_tight_loop(f, bb):
|
||||
yield Characteristic('tight loop'), bb.va
|
||||
yield Characteristic("tight loop"), bb.va
|
||||
|
||||
|
||||
def _bb_has_stackstring(f, bb):
|
||||
@@ -62,7 +62,7 @@ def _bb_has_stackstring(f, bb):
|
||||
def extract_stackstring(f, bb):
|
||||
""" check basic block for stackstring indicators """
|
||||
if _bb_has_stackstring(f, bb):
|
||||
yield Characteristic('stack string'), bb.va
|
||||
yield Characteristic("stack string"), bb.va
|
||||
|
||||
|
||||
def is_mov_imm_to_stack(instr):
|
||||
|
||||
@@ -13,7 +13,7 @@ def extract_file_embedded_pe(vw, file_path):
|
||||
fbytes = f.read()
|
||||
|
||||
for offset, i in pe_carve.carve(fbytes, 1):
|
||||
yield Characteristic('embedded pe'), offset
|
||||
yield Characteristic("embedded pe"), offset
|
||||
|
||||
|
||||
def extract_file_export_names(vw, file_path):
|
||||
|
||||
@@ -53,12 +53,12 @@ def extract_function_switch(f):
|
||||
method can be optimized
|
||||
"""
|
||||
if f.va in get_functions_with_switch(f.vw):
|
||||
yield Characteristic('switch'), f.va
|
||||
yield Characteristic("switch"), f.va
|
||||
|
||||
|
||||
def extract_function_calls_to(f):
|
||||
for src, _, _, _ in f.vw.getXrefsTo(f.va, rtype=vivisect.const.REF_CODE):
|
||||
yield Characteristic('calls to'), src
|
||||
yield Characteristic("calls to"), src
|
||||
|
||||
|
||||
def extract_function_loop(f):
|
||||
@@ -74,7 +74,7 @@ def extract_function_loop(f):
|
||||
edges.append((bb.va, bva))
|
||||
|
||||
if edges and loops.has_loop(edges):
|
||||
yield Characteristic('loop'), f.va
|
||||
yield Characteristic("loop"), f.va
|
||||
|
||||
|
||||
def extract_features(f):
|
||||
|
||||
@@ -287,7 +287,7 @@ def extract_insn_nzxor_characteristic_features(f, bb, insn):
|
||||
if is_security_cookie(f, bb, insn):
|
||||
return
|
||||
|
||||
yield Characteristic('nzxor'), insn.va
|
||||
yield Characteristic("nzxor"), insn.va
|
||||
|
||||
|
||||
def extract_insn_mnemonic_features(f, bb, insn):
|
||||
@@ -313,14 +313,16 @@ def extract_insn_peb_access_characteristic_features(f, bb, insn):
|
||||
# IDA: push large dword ptr fs:30h
|
||||
# viv: fs: push dword [0x00000030]
|
||||
# fs: push dword [eax + 0x30] ; i386RegMemOper, with eax = 0
|
||||
if (isinstance(oper, envi.archs.i386.disasm.i386RegMemOper) and oper.disp == 0x30) or \
|
||||
(isinstance(oper, envi.archs.i386.disasm.i386ImmMemOper) and oper.imm == 0x30):
|
||||
yield Characteristic('peb access'), insn.va
|
||||
elif 'gs' in insn.getPrefixName():
|
||||
if (isinstance(oper, envi.archs.i386.disasm.i386RegMemOper) and oper.disp == 0x30) or (
|
||||
isinstance(oper, envi.archs.i386.disasm.i386ImmMemOper) and oper.imm == 0x30
|
||||
):
|
||||
yield Characteristic("peb access"), insn.va
|
||||
elif "gs" in insn.getPrefixName():
|
||||
for oper in insn.opers:
|
||||
if (isinstance(oper, envi.archs.amd64.disasm.i386RegMemOper) and oper.disp == 0x60) or \
|
||||
(isinstance(oper, envi.archs.amd64.disasm.i386ImmMemOper) and oper.imm == 0x60):
|
||||
yield Characteristic('peb access'), insn.va
|
||||
if (isinstance(oper, envi.archs.amd64.disasm.i386RegMemOper) and oper.disp == 0x60) or (
|
||||
isinstance(oper, envi.archs.amd64.disasm.i386ImmMemOper) and oper.imm == 0x60
|
||||
):
|
||||
yield Characteristic("peb access"), insn.va
|
||||
else:
|
||||
pass
|
||||
|
||||
@@ -329,11 +331,11 @@ def extract_insn_segment_access_features(f, bb, insn):
|
||||
""" parse the instruction for access to fs or gs """
|
||||
prefix = insn.getPrefixName()
|
||||
|
||||
if prefix == 'fs':
|
||||
yield Characteristic('fs access'), insn.va
|
||||
if prefix == "fs":
|
||||
yield Characteristic("fs access"), insn.va
|
||||
|
||||
if prefix == 'gs':
|
||||
yield Characteristic('gs access'), insn.va
|
||||
if prefix == "gs":
|
||||
yield Characteristic("gs access"), insn.va
|
||||
|
||||
|
||||
def get_section(vw, va):
|
||||
@@ -370,7 +372,7 @@ def extract_insn_cross_section_cflow(f, bb, insn):
|
||||
continue
|
||||
|
||||
if get_section(f.vw, insn.va) != get_section(f.vw, va):
|
||||
yield Characteristic('cross section flow'), insn.va
|
||||
yield Characteristic("cross section flow"), insn.va
|
||||
|
||||
except KeyError:
|
||||
continue
|
||||
@@ -388,7 +390,7 @@ def extract_function_calls_from(f, bb, insn):
|
||||
if isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper):
|
||||
oper = insn.opers[0]
|
||||
target = oper.getOperAddr(insn)
|
||||
yield Characteristic('calls from'), target
|
||||
yield Characteristic("calls from"), target
|
||||
|
||||
# call via thunk on x86,
|
||||
# see 9324d1a8ae37a36ae560c37448c9705a at 0x407985
|
||||
@@ -397,18 +399,18 @@ def extract_function_calls_from(f, bb, insn):
|
||||
# see Lab21-01.exe_:0x140001178
|
||||
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper):
|
||||
target = insn.opers[0].getOperValue(insn)
|
||||
yield Characteristic('calls from'), target
|
||||
yield Characteristic("calls from"), target
|
||||
|
||||
# call via IAT, x64
|
||||
elif isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper):
|
||||
op = insn.opers[0]
|
||||
target = op.getOperAddr(insn)
|
||||
yield Characteristic('calls from'), target
|
||||
yield Characteristic("calls from"), target
|
||||
|
||||
if target and target == f.va:
|
||||
# if we found a jump target and it's the function address
|
||||
# mark as recursive
|
||||
yield Characteristic('recursive call'), target
|
||||
yield Characteristic("recursive call"), target
|
||||
|
||||
|
||||
# this is a feature that's most relevant at the function or basic block scope,
|
||||
@@ -424,13 +426,13 @@ def extract_function_indirect_call_characteristic_features(f, bb, insn):
|
||||
# Checks below work for x86 and x64
|
||||
if isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper):
|
||||
# call edx
|
||||
yield Characteristic('indirect call'), insn.va
|
||||
yield Characteristic("indirect call"), insn.va
|
||||
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegMemOper):
|
||||
# call dword ptr [eax+50h]
|
||||
yield Characteristic('indirect call'), insn.va
|
||||
yield Characteristic("indirect call"), insn.va
|
||||
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386SibOper):
|
||||
# call qword ptr [rsp+78h]
|
||||
yield Characteristic('indirect call'), insn.va
|
||||
yield Characteristic("indirect call"), insn.va
|
||||
|
||||
|
||||
def extract_features(f, bb, insn):
|
||||
|
||||
@@ -17,7 +17,7 @@ class Number(Feature):
|
||||
self.value = value
|
||||
|
||||
def get_args_str(self):
|
||||
return '0x%X' % self.value
|
||||
return "0x%X" % self.value
|
||||
|
||||
|
||||
class Offset(Feature):
|
||||
@@ -26,7 +26,7 @@ class Offset(Feature):
|
||||
self.value = value
|
||||
|
||||
def get_args_str(self):
|
||||
return '0x%X' % self.value
|
||||
return "0x%X" % self.value
|
||||
|
||||
|
||||
class Mnemonic(Feature):
|
||||
|
||||
@@ -192,10 +192,10 @@ class CapaExplorerFunctionItem(CapaExplorerDataItem):
|
||||
|
||||
class CapaExplorerSubscopeItem(CapaExplorerDataItem):
|
||||
|
||||
fmt = 'subscope(%s)'
|
||||
fmt = "subscope(%s)"
|
||||
|
||||
def __init__(self, parent, scope):
|
||||
super(CapaExplorerSubscopeItem, self).__init__(parent, [self.fmt % scope, '', ''])
|
||||
super(CapaExplorerSubscopeItem, self).__init__(parent, [self.fmt % scope, "", ""])
|
||||
|
||||
|
||||
class CapaExplorerBlockItem(CapaExplorerDataItem):
|
||||
|
||||
@@ -17,7 +17,7 @@ from capa.ida.explorer.item import (
|
||||
CapaExplorerBlockItem,
|
||||
CapaExplorerRuleMatchItem,
|
||||
CapaExplorerFeatureItem,
|
||||
CapaExplorerSubscopeItem
|
||||
CapaExplorerSubscopeItem,
|
||||
)
|
||||
|
||||
import capa.ida.helpers
|
||||
@@ -109,10 +109,21 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
font.setBold(True)
|
||||
return font
|
||||
|
||||
if role == QtCore.Qt.FontRole and isinstance(item, (CapaExplorerRuleItem, CapaExplorerRuleMatchItem,
|
||||
CapaExplorerBlockItem, CapaExplorerFunctionItem,
|
||||
CapaExplorerFeatureItem, CapaExplorerSubscopeItem)) and \
|
||||
column == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION:
|
||||
if (
|
||||
role == QtCore.Qt.FontRole
|
||||
and isinstance(
|
||||
item,
|
||||
(
|
||||
CapaExplorerRuleItem,
|
||||
CapaExplorerRuleMatchItem,
|
||||
CapaExplorerBlockItem,
|
||||
CapaExplorerFunctionItem,
|
||||
CapaExplorerFeatureItem,
|
||||
CapaExplorerSubscopeItem,
|
||||
),
|
||||
)
|
||||
and column == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION
|
||||
):
|
||||
# set bold font for top-level rules
|
||||
font = QtGui.QFont()
|
||||
font.setBold(True)
|
||||
@@ -352,12 +363,12 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
|
||||
for location in locations:
|
||||
# for each location render child node for range statement
|
||||
self.render_capa_doc_feature(parent2, statement['child'], location, doc)
|
||||
self.render_capa_doc_feature(parent2, statement["child"], location, doc)
|
||||
|
||||
return parent2
|
||||
elif statement['type'] == 'subscope':
|
||||
return CapaExplorerSubscopeItem(parent, statement['subscope'])
|
||||
elif statement['type'] == 'regex':
|
||||
elif statement["type"] == "subscope":
|
||||
return CapaExplorerSubscopeItem(parent, statement["subscope"])
|
||||
elif statement["type"] == "regex":
|
||||
# regex is a `Statement` not a `Feature`
|
||||
# this is because it doesn't get extracted, but applies to all strings in scope.
|
||||
# so we have to handle it here
|
||||
@@ -399,11 +410,12 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
):
|
||||
return
|
||||
|
||||
if match['node']['type'] == 'statement':
|
||||
parent2 = self.render_capa_doc_statement_node(parent, match['node']['statement'],
|
||||
match.get('locations', []), doc)
|
||||
elif match['node']['type'] == 'feature':
|
||||
parent2 = self.render_capa_doc_feature_node(parent, match['node']['feature'], match['locations'], doc)
|
||||
if match["node"]["type"] == "statement":
|
||||
parent2 = self.render_capa_doc_statement_node(
|
||||
parent, match["node"]["statement"], match.get("locations", []), doc
|
||||
)
|
||||
elif match["node"]["type"] == "feature":
|
||||
parent2 = self.render_capa_doc_feature_node(parent, match["node"]["feature"], match["locations"], doc)
|
||||
else:
|
||||
raise RuntimeError("unexpected node type: " + str(match["node"]["type"]))
|
||||
|
||||
|
||||
@@ -84,9 +84,9 @@ def convert_feature_to_result_document(feature):
|
||||
"type": "characteristic"
|
||||
},
|
||||
"""
|
||||
result = {'type': feature.name, feature.name: feature.get_args_str()}
|
||||
result = {"type": feature.name, feature.name: feature.get_args_str()}
|
||||
if feature.description:
|
||||
result['description'] = feature.description
|
||||
result["description"] = feature.description
|
||||
|
||||
return result
|
||||
|
||||
@@ -133,10 +133,10 @@ def convert_match_to_result_document(rules, capabilities, result):
|
||||
# so only add `locations` to feature nodes.
|
||||
if isinstance(result.statement, capa.features.Feature):
|
||||
if bool(result.success):
|
||||
doc['locations'] = result.locations
|
||||
doc["locations"] = result.locations
|
||||
elif isinstance(result.statement, capa.rules.Range):
|
||||
if bool(result.success):
|
||||
doc['locations'] = result.locations
|
||||
doc["locations"] = result.locations
|
||||
|
||||
# if we have a `match` statement, then we're referencing another rule.
|
||||
# this could an external rule (written by a human), or
|
||||
|
||||
@@ -8,27 +8,27 @@ def render_locations(ostream, match):
|
||||
# its possible to have an empty locations array here,
|
||||
# such as when we're in MODE_FAILURE and showing the logic
|
||||
# under a `not` statement (which will have no matched locations).
|
||||
locations = list(sorted(match.get('locations', [])))
|
||||
locations = list(sorted(match.get("locations", [])))
|
||||
if len(locations) == 1:
|
||||
ostream.write(' @ ')
|
||||
ostream.write(" @ ")
|
||||
ostream.write(rutils.hex(locations[0]))
|
||||
elif len(locations) > 1:
|
||||
ostream.write(' @ ')
|
||||
ostream.write(" @ ")
|
||||
if len(locations) > 4:
|
||||
# don't display too many locations, because it becomes very noisy.
|
||||
# probably only the first handful of locations will be useful for inspection.
|
||||
ostream.write(', '.join(map(rutils.hex, locations[0:4])))
|
||||
ostream.write(', and %d more...' % (len(locations) - 4))
|
||||
ostream.write(", ".join(map(rutils.hex, locations[0:4])))
|
||||
ostream.write(", and %d more..." % (len(locations) - 4))
|
||||
else:
|
||||
ostream.write(', '.join(map(rutils.hex, locations)))
|
||||
ostream.write(", ".join(map(rutils.hex, locations)))
|
||||
|
||||
|
||||
def render_statement(ostream, match, statement, indent=0):
|
||||
ostream.write(' ' * indent)
|
||||
if statement['type'] in ('and', 'or', 'optional'):
|
||||
ostream.write(statement['type'])
|
||||
ostream.writeln(':')
|
||||
elif statement['type'] == 'not':
|
||||
ostream.write(" " * indent)
|
||||
if statement["type"] in ("and", "or", "optional"):
|
||||
ostream.write(statement["type"])
|
||||
ostream.writeln(":")
|
||||
elif statement["type"] == "not":
|
||||
# this statement is handled specially in `render_match` using the MODE_SUCCESS/MODE_FAILURE flags.
|
||||
ostream.writeln("not:")
|
||||
elif statement["type"] == "some":
|
||||
@@ -40,36 +40,46 @@ def render_statement(ostream, match, statement, indent=0):
|
||||
# there's no additional logic in the feature part, just the existence of a feature.
|
||||
# so, we have to inline some of the feature rendering here.
|
||||
|
||||
child = statement['child']
|
||||
if child['type'] in ('string', 'api', 'mnemonic', 'basic block', 'export', 'import', 'section', 'match', 'characteristic'):
|
||||
value = rutils.bold2(child[child['type']])
|
||||
elif child['type'] in ('number', 'offset'):
|
||||
value = rutils.bold2(rutils.hex(child[child['type']]))
|
||||
elif child['type'] == 'bytes':
|
||||
value = rutils.bold2(rutils.hex_string(child[child['type']]))
|
||||
child = statement["child"]
|
||||
if child["type"] in (
|
||||
"string",
|
||||
"api",
|
||||
"mnemonic",
|
||||
"basic block",
|
||||
"export",
|
||||
"import",
|
||||
"section",
|
||||
"match",
|
||||
"characteristic",
|
||||
):
|
||||
value = rutils.bold2(child[child["type"]])
|
||||
elif child["type"] in ("number", "offset"):
|
||||
value = rutils.bold2(rutils.hex(child[child["type"]]))
|
||||
elif child["type"] == "bytes":
|
||||
value = rutils.bold2(rutils.hex_string(child[child["type"]]))
|
||||
else:
|
||||
raise RuntimeError("unexpected feature type: " + str(child))
|
||||
|
||||
if child['description']:
|
||||
ostream.write('count(%s(%s = %s)): ' % (child['type'], value, child['description']))
|
||||
if child["description"]:
|
||||
ostream.write("count(%s(%s = %s)): " % (child["type"], value, child["description"]))
|
||||
else:
|
||||
ostream.write('count(%s(%s)): ' % (child['type'], value))
|
||||
ostream.write("count(%s(%s)): " % (child["type"], value))
|
||||
|
||||
if statement['max'] == statement['min']:
|
||||
ostream.write('%d' % (statement['min']))
|
||||
elif statement['min'] == 0:
|
||||
ostream.write('%d or fewer' % (statement['max']))
|
||||
elif statement['max'] == (1 << 64 - 1):
|
||||
ostream.write('%d or more' % (statement['min']))
|
||||
if statement["max"] == statement["min"]:
|
||||
ostream.write("%d" % (statement["min"]))
|
||||
elif statement["min"] == 0:
|
||||
ostream.write("%d or fewer" % (statement["max"]))
|
||||
elif statement["max"] == (1 << 64 - 1):
|
||||
ostream.write("%d or more" % (statement["min"]))
|
||||
else:
|
||||
ostream.write('between %d and %d' % (statement['min'], statement['max']))
|
||||
ostream.write("between %d and %d" % (statement["min"], statement["max"]))
|
||||
|
||||
render_locations(ostream, match)
|
||||
ostream.write('\n')
|
||||
elif statement['type'] == 'subscope':
|
||||
ostream.write(statement['subscope'])
|
||||
ostream.writeln(':')
|
||||
elif statement['type'] == 'regex':
|
||||
ostream.write("\n")
|
||||
elif statement["type"] == "subscope":
|
||||
ostream.write(statement["subscope"])
|
||||
ostream.writeln(":")
|
||||
elif statement["type"] == "regex":
|
||||
# regex is a `Statement` not a `Feature`
|
||||
# this is because it doesn't get extracted, but applies to all strings in scope.
|
||||
# so we have to handle it here
|
||||
@@ -79,38 +89,48 @@ def render_statement(ostream, match, statement, indent=0):
|
||||
|
||||
|
||||
def render_feature(ostream, match, feature, indent=0):
|
||||
ostream.write(' ' * indent)
|
||||
ostream.write(" " * indent)
|
||||
|
||||
if feature['type'] in ('string', 'api', 'mnemonic', 'basic block', 'export', 'import', 'section', 'match', 'characteristic'):
|
||||
ostream.write(feature['type'])
|
||||
ostream.write(': ')
|
||||
ostream.write(rutils.bold2(feature[feature['type']]))
|
||||
elif feature['type'] in ('number', 'offset'):
|
||||
ostream.write(feature['type'])
|
||||
ostream.write(': ')
|
||||
ostream.write(rutils.bold2(rutils.hex(feature[feature['type']])))
|
||||
elif feature['type'] == 'bytes':
|
||||
ostream.write('bytes: ')
|
||||
if feature["type"] in (
|
||||
"string",
|
||||
"api",
|
||||
"mnemonic",
|
||||
"basic block",
|
||||
"export",
|
||||
"import",
|
||||
"section",
|
||||
"match",
|
||||
"characteristic",
|
||||
):
|
||||
ostream.write(feature["type"])
|
||||
ostream.write(": ")
|
||||
ostream.write(rutils.bold2(feature[feature["type"]]))
|
||||
elif feature["type"] in ("number", "offset"):
|
||||
ostream.write(feature["type"])
|
||||
ostream.write(": ")
|
||||
ostream.write(rutils.bold2(rutils.hex(feature[feature["type"]])))
|
||||
elif feature["type"] == "bytes":
|
||||
ostream.write("bytes: ")
|
||||
# bytes is the uppercase, hex-encoded string.
|
||||
# it should always be an even number of characters (its hex).
|
||||
ostream.write(rutils.bold2(rutils.hex_string(feature[feature['type']])))
|
||||
ostream.write(rutils.bold2(rutils.hex_string(feature[feature["type"]])))
|
||||
# note that regex is found in `render_statement`
|
||||
else:
|
||||
raise RuntimeError("unexpected feature type: " + str(feature))
|
||||
|
||||
if 'description' in feature:
|
||||
ostream.write(' = ')
|
||||
ostream.write(feature['description'])
|
||||
if "description" in feature:
|
||||
ostream.write(" = ")
|
||||
ostream.write(feature["description"])
|
||||
|
||||
render_locations(ostream, match)
|
||||
ostream.write('\n')
|
||||
ostream.write("\n")
|
||||
|
||||
|
||||
def render_node(ostream, match, node, indent=0):
|
||||
if node['type'] == 'statement':
|
||||
render_statement(ostream, match, node['statement'], indent=indent)
|
||||
elif node['type'] == 'feature':
|
||||
render_feature(ostream, match, node['feature'], indent=indent)
|
||||
if node["type"] == "statement":
|
||||
render_statement(ostream, match, node["statement"], indent=indent)
|
||||
elif node["type"] == "feature":
|
||||
render_feature(ostream, match, node["feature"], indent=indent)
|
||||
else:
|
||||
raise RuntimeError("unexpected node type: " + str(node))
|
||||
|
||||
|
||||
@@ -139,7 +139,7 @@ class InvalidRuleSet(ValueError):
|
||||
def ensure_feature_valid_for_scope(scope, feature):
|
||||
if isinstance(feature, capa.features.Characteristic):
|
||||
if capa.features.Characteristic(feature.value) not in SUPPORTED_FEATURES[scope]:
|
||||
raise InvalidRule('feature %s not support for scope %s' % (feature, scope))
|
||||
raise InvalidRule("feature %s not support for scope %s" % (feature, scope))
|
||||
elif not isinstance(feature, tuple(filter(lambda t: isinstance(t, type), SUPPORTED_FEATURES[scope]))):
|
||||
raise InvalidRule("feature %s not support for scope %s" % (feature, scope))
|
||||
|
||||
@@ -205,9 +205,9 @@ def parse_feature(key):
|
||||
return capa.features.insn.Mnemonic
|
||||
elif key == "basic blocks":
|
||||
return capa.features.basicblock.BasicBlock
|
||||
elif key == 'characteristic':
|
||||
elif key == "characteristic":
|
||||
return capa.features.Characteristic
|
||||
elif key == 'export':
|
||||
elif key == "export":
|
||||
return capa.features.file.Export
|
||||
elif key == "import":
|
||||
return capa.features.file.Import
|
||||
@@ -220,14 +220,16 @@ def parse_feature(key):
|
||||
|
||||
|
||||
def parse_description(s, value_type, description=None):
|
||||
'''
|
||||
"""
|
||||
s can be an int or a string
|
||||
'''
|
||||
if value_type != 'string' and isinstance(s, str) and ' = ' in s:
|
||||
"""
|
||||
if value_type != "string" and isinstance(s, str) and " = " in s:
|
||||
if description:
|
||||
raise InvalidRule('unexpected value: "%s", only one description allowed (inline description with ` = `)' % s)
|
||||
value, description = s.split(' = ', 1)
|
||||
if description == '':
|
||||
raise InvalidRule(
|
||||
'unexpected value: "%s", only one description allowed (inline description with ` = `)' % s
|
||||
)
|
||||
value, description = s.split(" = ", 1)
|
||||
if description == "":
|
||||
raise InvalidRule('unexpected value: "%s", description cannot be empty' % s)
|
||||
else:
|
||||
value = s
|
||||
@@ -241,9 +243,10 @@ def parse_description(s, value_type, description=None):
|
||||
raise InvalidRule('unexpected bytes value: "%s", must be a valid hex sequence' % value)
|
||||
|
||||
if len(value) > MAX_BYTES_FEATURE_SIZE:
|
||||
raise InvalidRule('unexpected bytes value: byte sequences must be no larger than %s bytes' %
|
||||
MAX_BYTES_FEATURE_SIZE)
|
||||
elif value_type in {'number', 'offset'}:
|
||||
raise InvalidRule(
|
||||
"unexpected bytes value: byte sequences must be no larger than %s bytes" % MAX_BYTES_FEATURE_SIZE
|
||||
)
|
||||
elif value_type in {"number", "offset"}:
|
||||
try:
|
||||
value = parse_int(value)
|
||||
except ValueError:
|
||||
@@ -254,7 +257,7 @@ def parse_description(s, value_type, description=None):
|
||||
|
||||
def build_statements(d, scope):
|
||||
if len(d.keys()) > 2:
|
||||
raise InvalidRule('too many statements')
|
||||
raise InvalidRule("too many statements")
|
||||
|
||||
key = list(d.keys())[0]
|
||||
if key == "and":
|
||||
@@ -305,18 +308,18 @@ def build_statements(d, scope):
|
||||
# - mnemonic: mov
|
||||
#
|
||||
# but here we deal with the form: `mnemonic(mov)`.
|
||||
term, _, arg = term.partition('(')
|
||||
term, _, arg = term.partition("(")
|
||||
Feature = parse_feature(term)
|
||||
|
||||
if arg:
|
||||
arg = arg[:-len(')')]
|
||||
arg = arg[: -len(")")]
|
||||
# can't rely on yaml parsing ints embedded within strings
|
||||
# like:
|
||||
#
|
||||
# count(offset(0xC))
|
||||
# count(number(0x11223344))
|
||||
# count(number(0x100 = description))
|
||||
if term != 'string':
|
||||
if term != "string":
|
||||
value, description = parse_description(arg, term)
|
||||
feature = Feature(value, description)
|
||||
else:
|
||||
@@ -356,7 +359,7 @@ def build_statements(d, scope):
|
||||
)
|
||||
else:
|
||||
Feature = parse_feature(key)
|
||||
value, description = parse_description(d[key], key, d.get('description'))
|
||||
value, description = parse_description(d[key], key, d.get("description"))
|
||||
feature = Feature(value, description)
|
||||
ensure_feature_valid_for_scope(scope, feature)
|
||||
return feature
|
||||
|
||||
@@ -10,4 +10,4 @@ from PyInstaller.utils.hooks import copy_metadata
|
||||
# this hook runs at analysis time and updates the embedded metadata.
|
||||
#
|
||||
# ref: https://github.com/pyinstaller/pyinstaller/issues/1713#issuecomment-162682084
|
||||
datas = copy_metadata('vivisect')
|
||||
datas = copy_metadata("vivisect")
|
||||
|
||||
@@ -324,7 +324,7 @@ def get_features(ctx, rule):
|
||||
# get features from rule and all dependencies including subscopes and matched rules
|
||||
features = []
|
||||
namespaces = capa.rules.index_rules_by_namespace([rule])
|
||||
deps = [ctx['rules'].rules[dep] for dep in rule.get_dependencies(namespaces)]
|
||||
deps = [ctx["rules"].rules[dep] for dep in rule.get_dependencies(namespaces)]
|
||||
for r in [rule] + deps:
|
||||
features.extend(get_rule_features(r))
|
||||
return features
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
#!/usr/bin/env python
|
||||
'''
|
||||
"""
|
||||
migrate rules and their namespaces.
|
||||
|
||||
example:
|
||||
|
||||
$ python scripts/migrate-rules.py migration.csv ./rules ./new-rules
|
||||
'''
|
||||
"""
|
||||
import os
|
||||
import os.path
|
||||
import sys
|
||||
@@ -18,22 +18,27 @@ import argparse
|
||||
import capa.rules
|
||||
|
||||
|
||||
logger = logging.getLogger('migrate-rules')
|
||||
|
||||
logger = logging.getLogger("migrate-rules")
|
||||
|
||||
|
||||
def read_plan(plan_path):
|
||||
with open(plan_path, 'rb') as f:
|
||||
return list(csv.DictReader(f, restkey='other', fieldnames=(
|
||||
'existing path',
|
||||
'existing name',
|
||||
'existing rule-category',
|
||||
'proposed name',
|
||||
'proposed namespace',
|
||||
'ATT&CK',
|
||||
'MBC',
|
||||
'comment1',
|
||||
)))
|
||||
with open(plan_path, "rb") as f:
|
||||
return list(
|
||||
csv.DictReader(
|
||||
f,
|
||||
restkey="other",
|
||||
fieldnames=(
|
||||
"existing path",
|
||||
"existing name",
|
||||
"existing rule-category",
|
||||
"proposed name",
|
||||
"proposed namespace",
|
||||
"ATT&CK",
|
||||
"MBC",
|
||||
"comment1",
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def read_rules(rule_directory):
|
||||
@@ -41,15 +46,15 @@ def read_rules(rule_directory):
|
||||
for root, dirs, files in os.walk(rule_directory):
|
||||
for file in files:
|
||||
path = os.path.join(root, file)
|
||||
if not path.endswith('.yml'):
|
||||
logger.info('skipping file: %s', path)
|
||||
if not path.endswith(".yml"):
|
||||
logger.info("skipping file: %s", path)
|
||||
continue
|
||||
|
||||
rule = capa.rules.Rule.from_yaml_file(path)
|
||||
rules[rule.name] = rule
|
||||
|
||||
if 'nursery' in path:
|
||||
rule.meta['capa/nursery'] = True
|
||||
if "nursery" in path:
|
||||
rule.meta["capa/nursery"] = True
|
||||
return rules
|
||||
|
||||
|
||||
@@ -57,105 +62,100 @@ def main(argv=None):
|
||||
if argv is None:
|
||||
argv = sys.argv[1:]
|
||||
|
||||
parser = argparse.ArgumentParser(description='migrate rules.')
|
||||
parser.add_argument('plan', type=str,
|
||||
help='Path to CSV describing migration')
|
||||
parser.add_argument('source', type=str,
|
||||
help='Source directory of rules')
|
||||
parser.add_argument('destination', type=str,
|
||||
help='Destination directory of rules')
|
||||
parser = argparse.ArgumentParser(description="migrate rules.")
|
||||
parser.add_argument("plan", type=str, help="Path to CSV describing migration")
|
||||
parser.add_argument("source", type=str, help="Source directory of rules")
|
||||
parser.add_argument("destination", type=str, help="Destination directory of rules")
|
||||
args = parser.parse_args(args=argv)
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
plan = read_plan(args.plan)
|
||||
logger.info('read %d plan entries', len(plan))
|
||||
logger.info("read %d plan entries", len(plan))
|
||||
|
||||
rules = read_rules(args.source)
|
||||
logger.info('read %d rules', len(rules))
|
||||
logger.info("read %d rules", len(rules))
|
||||
|
||||
planned_rules = set([row['existing name'] for row in plan])
|
||||
planned_rules = set([row["existing name"] for row in plan])
|
||||
unplanned_rules = [rule for (name, rule) in rules.items() if name not in planned_rules]
|
||||
|
||||
if unplanned_rules:
|
||||
logger.error('plan does not account for %d rules:' % (len(unplanned_rules)))
|
||||
logger.error("plan does not account for %d rules:" % (len(unplanned_rules)))
|
||||
for rule in unplanned_rules:
|
||||
logger.error(' ' + rule.name)
|
||||
logger.error(" " + rule.name)
|
||||
return -1
|
||||
|
||||
# pairs of strings (needle, replacement)
|
||||
match_translations = []
|
||||
|
||||
for row in plan:
|
||||
if not row['existing name']:
|
||||
if not row["existing name"]:
|
||||
continue
|
||||
|
||||
rule = rules[row['existing name']]
|
||||
rule = rules[row["existing name"]]
|
||||
|
||||
if rule.meta['name'] != row['proposed name']:
|
||||
logger.info("renaming rule '%s' -> '%s'", rule.meta['name'], row['proposed name'])
|
||||
if rule.meta["name"] != row["proposed name"]:
|
||||
logger.info("renaming rule '%s' -> '%s'", rule.meta["name"], row["proposed name"])
|
||||
|
||||
# assume the yaml is formatted like `- match: $rule-name`.
|
||||
# but since its been linted, this should be ok.
|
||||
match_translations.append(
|
||||
('- match: ' + rule.meta['name'],
|
||||
'- match: ' + row['proposed name']))
|
||||
match_translations.append(("- match: " + rule.meta["name"], "- match: " + row["proposed name"]))
|
||||
|
||||
rule.meta['name'] = row['proposed name']
|
||||
rule.name = row['proposed name']
|
||||
rule.meta["name"] = row["proposed name"]
|
||||
rule.name = row["proposed name"]
|
||||
|
||||
if 'rule-category' in rule.meta:
|
||||
logger.info("deleting rule category '%s'", rule.meta['rule-category'])
|
||||
del rule.meta['rule-category']
|
||||
if "rule-category" in rule.meta:
|
||||
logger.info("deleting rule category '%s'", rule.meta["rule-category"])
|
||||
del rule.meta["rule-category"]
|
||||
|
||||
rule.meta['namespace'] = row['proposed namespace']
|
||||
rule.meta["namespace"] = row["proposed namespace"]
|
||||
|
||||
if row['ATT&CK'] != 'n/a' and row['ATT&CK'] != '':
|
||||
tag = row['ATT&CK']
|
||||
name, _, id = tag.rpartition(' ')
|
||||
tag = '%s [%s]' % (name, id)
|
||||
rule.meta['att&ck'] = [tag]
|
||||
if row["ATT&CK"] != "n/a" and row["ATT&CK"] != "":
|
||||
tag = row["ATT&CK"]
|
||||
name, _, id = tag.rpartition(" ")
|
||||
tag = "%s [%s]" % (name, id)
|
||||
rule.meta["att&ck"] = [tag]
|
||||
|
||||
if row['MBC'] != 'n/a' and row['MBC'] != '':
|
||||
tag = row['MBC']
|
||||
rule.meta['mbc'] = [tag]
|
||||
if row["MBC"] != "n/a" and row["MBC"] != "":
|
||||
tag = row["MBC"]
|
||||
rule.meta["mbc"] = [tag]
|
||||
|
||||
for rule in rules.values():
|
||||
filename = rule.name
|
||||
filename = filename.lower()
|
||||
filename = filename.replace(' ', '-')
|
||||
filename = filename.replace('(', '')
|
||||
filename = filename.replace(')', '')
|
||||
filename = filename.replace('+', '')
|
||||
filename = filename.replace('/', '')
|
||||
filename = filename + '.yml'
|
||||
filename = filename.replace(" ", "-")
|
||||
filename = filename.replace("(", "")
|
||||
filename = filename.replace(")", "")
|
||||
filename = filename.replace("+", "")
|
||||
filename = filename.replace("/", "")
|
||||
filename = filename + ".yml"
|
||||
|
||||
try:
|
||||
if rule.meta.get('capa/nursery'):
|
||||
directory = os.path.join(args.destination, 'nursery')
|
||||
elif rule.meta.get('lib'):
|
||||
directory = os.path.join(args.destination, 'lib')
|
||||
if rule.meta.get("capa/nursery"):
|
||||
directory = os.path.join(args.destination, "nursery")
|
||||
elif rule.meta.get("lib"):
|
||||
directory = os.path.join(args.destination, "lib")
|
||||
else:
|
||||
directory = os.path.join(args.destination, rule.meta.get('namespace'))
|
||||
directory = os.path.join(args.destination, rule.meta.get("namespace"))
|
||||
os.makedirs(directory)
|
||||
except OSError:
|
||||
pass
|
||||
else:
|
||||
logger.info('created namespace: %s', directory)
|
||||
logger.info("created namespace: %s", directory)
|
||||
|
||||
path = os.path.join(directory, filename)
|
||||
logger.info('writing rule %s', path)
|
||||
logger.info("writing rule %s", path)
|
||||
|
||||
doc = rule.to_yaml().decode('utf-8')
|
||||
doc = rule.to_yaml().decode("utf-8")
|
||||
for (needle, replacement) in match_translations:
|
||||
doc = doc.replace(needle, replacement)
|
||||
|
||||
with open(path, 'wb') as f:
|
||||
f.write(doc.encode('utf-8'))
|
||||
with open(path, "wb") as f:
|
||||
f.write(doc.encode("utf-8"))
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -1,2 +1,2 @@
|
||||
FNAMES_EXTENSION = '.fnames'
|
||||
FREEZE_EXTENSION = '.frz'
|
||||
FNAMES_EXTENSION = ".fnames"
|
||||
FREEZE_EXTENSION = ".frz"
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
'''
|
||||
"""
|
||||
IDAPython script to dump JSON file of functions names { fva: fname }.
|
||||
Meant to be run on benign files with PDB information. IDA should apply function names from the PDB files automatically.
|
||||
Can also be run on annotated IDA database files.
|
||||
|
||||
Example usage (via IDA autonomous mode):
|
||||
ida.exe -A -S_dump_fnames.py "<output path>" <sample_path>
|
||||
'''
|
||||
"""
|
||||
|
||||
import json
|
||||
|
||||
@@ -26,7 +26,7 @@ def main():
|
||||
fnames = {}
|
||||
for f in idautils.Functions():
|
||||
fname = idc.get_name(f)
|
||||
if fname.startswith('sub_'):
|
||||
if fname.startswith("sub_"):
|
||||
continue
|
||||
|
||||
name_demangled = idc.demangle_name(fname, INF_SHORT_DN_ATTR)
|
||||
@@ -35,12 +35,12 @@ def main():
|
||||
|
||||
fnames[f] = fname
|
||||
|
||||
with open(idc.ARGV[1], 'w') as f:
|
||||
with open(idc.ARGV[1], "w") as f:
|
||||
json.dump(fnames, f)
|
||||
|
||||
# exit IDA
|
||||
idc.qexit(0)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
'''
|
||||
"""
|
||||
IDAPython script to export DOT files of function graphs.
|
||||
|
||||
Example usage (via IDA autonomous mode):
|
||||
ida.exe -A -S_export_fimages.py "<output dir>" <fva1> [<fva2> ...] <sample_path>
|
||||
'''
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
@@ -26,13 +26,19 @@ def main():
|
||||
|
||||
for fva in fvas:
|
||||
fstart = idc.get_func_attr(fva, idc.FUNCATTR_START)
|
||||
name = '%s_0x%x' % (idb_name.replace('.', '_'), fstart)
|
||||
name = "%s_0x%x" % (idb_name.replace(".", "_"), fstart)
|
||||
out_path = os.path.join(out_dir, name)
|
||||
fname = idc.get_name(fstart)
|
||||
|
||||
if not ida_gdl.gen_flow_graph(out_path, '%s (0x%x)' % (fname, fstart), idaapi.get_func(fstart), 0, 0,
|
||||
ida_gdl.CHART_GEN_DOT | ida_gdl.CHART_PRINT_NAMES):
|
||||
print 'IDA error generating flow graph'
|
||||
if not ida_gdl.gen_flow_graph(
|
||||
out_path,
|
||||
"%s (0x%x)" % (fname, fstart),
|
||||
idaapi.get_func(fstart),
|
||||
0,
|
||||
0,
|
||||
ida_gdl.CHART_GEN_DOT | ida_gdl.CHART_PRINT_NAMES,
|
||||
):
|
||||
print "IDA error generating flow graph"
|
||||
# TODO add label to DOT file, see https://stackoverflow.com/a/6452088/10548020
|
||||
# TODO highlight where rule matched
|
||||
|
||||
@@ -40,5 +46,5 @@ def main():
|
||||
idc.qexit(0)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
'''
|
||||
"""
|
||||
Freeze capa features.
|
||||
|
||||
Example usage:
|
||||
freeze_features.py <test files dir>
|
||||
freeze_features.py samples\benign
|
||||
'''
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
@@ -18,63 +18,54 @@ from capa.features.freeze import main as freeze_features
|
||||
|
||||
|
||||
# only process files with these extensions
|
||||
TARGET_EXTENSIONS = [
|
||||
'.mal_',
|
||||
'.exe_',
|
||||
'.dll_',
|
||||
'.sys_'
|
||||
]
|
||||
TARGET_EXTENSIONS = [".mal_", ".exe_", ".dll_", ".sys_"]
|
||||
|
||||
|
||||
logger = logging.getLogger('check_rule')
|
||||
logger = logging.getLogger("check_rule")
|
||||
|
||||
|
||||
def freeze(input_path, reprocess):
|
||||
if not os.path.exists(input_path):
|
||||
raise IOError('%s does not exist or cannot be accessed' % input_path)
|
||||
raise IOError("%s does not exist or cannot be accessed" % input_path)
|
||||
|
||||
if os.path.isfile(input_path):
|
||||
outfile = '%s%s' % (input_path, FREEZE_EXTENSION)
|
||||
outfile = "%s%s" % (input_path, FREEZE_EXTENSION)
|
||||
freeze_file(input_path, outfile, reprocess)
|
||||
|
||||
elif os.path.isdir(input_path):
|
||||
logger.info('freezing features of %s files in %s', '|'.join(TARGET_EXTENSIONS), input_path)
|
||||
logger.info("freezing features of %s files in %s", "|".join(TARGET_EXTENSIONS), input_path)
|
||||
for root, dirs, files in os.walk(input_path):
|
||||
for file in files:
|
||||
if not os.path.splitext(file)[1] in TARGET_EXTENSIONS:
|
||||
logger.debug('skipping non-target file: %s', file)
|
||||
logger.debug("skipping non-target file: %s", file)
|
||||
continue
|
||||
path = os.path.join(root, file)
|
||||
outfile = '%s%s' % (path, FREEZE_EXTENSION)
|
||||
outfile = "%s%s" % (path, FREEZE_EXTENSION)
|
||||
freeze_file(path, outfile, reprocess)
|
||||
|
||||
|
||||
def freeze_file(path, output, reprocess=False):
|
||||
logger.info('freezing features of %s', path)
|
||||
logger.info("freezing features of %s", path)
|
||||
|
||||
if os.path.exists(output) and not reprocess:
|
||||
logger.info('%s already exists, provide -r argument to reprocess', output)
|
||||
logger.info("%s already exists, provide -r argument to reprocess", output)
|
||||
return
|
||||
|
||||
try:
|
||||
freeze_features([path, output]) # args: sample, output
|
||||
except Exception as e:
|
||||
logger.error('could not freeze features for %s: %s', path, str(e))
|
||||
logger.error("could not freeze features for %s: %s", path, str(e))
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
if argv is None:
|
||||
argv = sys.argv[1:]
|
||||
|
||||
parser = argparse.ArgumentParser(description='Freeze capa features of a file or of files in a directory')
|
||||
parser.add_argument('file_path', type=str,
|
||||
help='Path to file or directory to analyze')
|
||||
parser.add_argument('-r', '--reprocess', action='store_true', default=False,
|
||||
help='Overwrite existing analysis')
|
||||
parser.add_argument('-v', '--verbose', action='store_true',
|
||||
help='Enable verbose output')
|
||||
parser.add_argument('-q', '--quiet', action='store_true',
|
||||
help='Disable all output but errors')
|
||||
parser = argparse.ArgumentParser(description="Freeze capa features of a file or of files in a directory")
|
||||
parser.add_argument("file_path", type=str, help="Path to file or directory to analyze")
|
||||
parser.add_argument("-r", "--reprocess", action="store_true", default=False, help="Overwrite existing analysis")
|
||||
parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose output")
|
||||
parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
|
||||
args = parser.parse_args(args=argv)
|
||||
|
||||
if args.quiet:
|
||||
@@ -91,12 +82,12 @@ def main(argv=None):
|
||||
try:
|
||||
freeze(args.file_path, args.reprocess)
|
||||
except IOError as e:
|
||||
logger.error('%s', str(e))
|
||||
logger.error("%s", str(e))
|
||||
return -1
|
||||
|
||||
logger.info('freezing features took %d seconds', time.time() - time0)
|
||||
logger.info("freezing features took %d seconds", time.time() - time0)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
'''
|
||||
"""
|
||||
Run a capa rule file against the testbed (frozen features in a directory).
|
||||
|
||||
Example usage:
|
||||
run_rule_on_testbed.py <path to rules> <rule name> <testbed dir>
|
||||
run_rule_on_testbed.py ..\\rules "create pipe" samples
|
||||
'''
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
@@ -37,8 +37,8 @@ function_names = set([])
|
||||
|
||||
|
||||
CATEGORY = {
|
||||
'malicious': 'MAL',
|
||||
'benign': 'BEN',
|
||||
"malicious": "MAL",
|
||||
"benign": "BEN",
|
||||
}
|
||||
|
||||
|
||||
@@ -48,7 +48,7 @@ def check_rule(path, rules, rule_name, only_matching, save_image, verbose):
|
||||
try:
|
||||
capabilities = get_capabilities(path, rules)
|
||||
except (ValueError, KeyError) as e:
|
||||
logger.error('cannot load %s due to %s: %s', path, type(e).__name__, str(e))
|
||||
logger.error("cannot load %s due to %s: %s", path, type(e).__name__, str(e))
|
||||
errors += 1
|
||||
return
|
||||
|
||||
@@ -58,12 +58,12 @@ def check_rule(path, rules, rule_name, only_matching, save_image, verbose):
|
||||
if not only_matching:
|
||||
render_no_hit(path)
|
||||
else:
|
||||
print('[x] rule matches %d function(s) in %s (%s)' % (hits, path, get_category(path)))
|
||||
print ("[x] rule matches %d function(s) in %s (%s)" % (hits, path, get_category(path)))
|
||||
|
||||
file_hits += 1
|
||||
function_hits += hits
|
||||
|
||||
if get_category(path) == 'MAL':
|
||||
if get_category(path) == "MAL":
|
||||
mal_hits += 1
|
||||
else:
|
||||
other_hits += 1
|
||||
@@ -72,29 +72,29 @@ def check_rule(path, rules, rule_name, only_matching, save_image, verbose):
|
||||
render_hit_verbose(capabilities, path, verbose > 1)
|
||||
|
||||
if save_image:
|
||||
fvas = ['0x%x' % fva for fva in get_hit_fvas(capabilities)]
|
||||
fvas = ["0x%x" % fva for fva in get_hit_fvas(capabilities)]
|
||||
file_path = get_idb_or_sample_path(path)
|
||||
if file_path:
|
||||
if not export_fimages(file_path, save_image, fvas):
|
||||
logger.warning('exporting images failed')
|
||||
logger.warning("exporting images failed")
|
||||
else:
|
||||
logger.warning('could not get IDB or sample path')
|
||||
logger.warning("could not get IDB or sample path")
|
||||
|
||||
|
||||
def get_idb_or_sample_path(path):
|
||||
exts = ['.idb', '.i64', '.exe_', '.dll_', '.mal_']
|
||||
exts = [".idb", ".i64", ".exe_", ".dll_", ".mal_"]
|
||||
roots = [os.path.splitext(path)[0], path]
|
||||
for e in exts:
|
||||
for r in roots:
|
||||
p = '%s%s' % (r, e)
|
||||
p = "%s%s" % (r, e)
|
||||
if os.path.exists(p):
|
||||
return p
|
||||
return None
|
||||
|
||||
|
||||
def get_capabilities(path, rules):
|
||||
logger.debug('matching rules in %s', path)
|
||||
with open(path, 'rb') as f:
|
||||
logger.debug("matching rules in %s", path)
|
||||
with open(path, "rb") as f:
|
||||
extractor = capa.features.freeze.load(f.read())
|
||||
return capa.main.find_capabilities(rules, extractor, disable_progress=True)
|
||||
|
||||
@@ -107,18 +107,18 @@ def get_category(path):
|
||||
for c in CATEGORY:
|
||||
if c in path:
|
||||
return CATEGORY[c]
|
||||
return 'UNK'
|
||||
return "UNK"
|
||||
|
||||
|
||||
def render_no_hit(path):
|
||||
print('[ ] no match in %s (%s)' % (path, get_category(path)))
|
||||
print ("[ ] no match in %s (%s)" % (path, get_category(path)))
|
||||
|
||||
|
||||
def render_hit_verbose(capabilities, path, vverbose):
|
||||
try:
|
||||
fnames = load_fnames(path)
|
||||
except IOError as e:
|
||||
logger.error('%s', str(e))
|
||||
logger.error("%s", str(e))
|
||||
fnames = None
|
||||
|
||||
for rule, ress in capabilities.items():
|
||||
@@ -127,11 +127,11 @@ def render_hit_verbose(capabilities, path, vverbose):
|
||||
fname = fnames[fva]
|
||||
function_names.add(fname)
|
||||
else:
|
||||
fname = '<name unknown>'
|
||||
print(' - function 0x%x (%s)' % (fva, fname))
|
||||
fname = "<name unknown>"
|
||||
print (" - function 0x%x (%s)" % (fva, fname))
|
||||
|
||||
if vverbose:
|
||||
capa.main.render_result(res, indent=' ')
|
||||
capa.main.render_result(res, indent=" ")
|
||||
|
||||
|
||||
def get_hit_fvas(capabilities):
|
||||
@@ -145,39 +145,39 @@ def get_hit_fvas(capabilities):
|
||||
def load_fnames(path):
|
||||
fnames_path = path.replace(FREEZE_EXTENSION, FNAMES_EXTENSION)
|
||||
if not os.path.exists(fnames_path):
|
||||
raise IOError('%s does not exist' % fnames_path)
|
||||
raise IOError("%s does not exist" % fnames_path)
|
||||
|
||||
logger.debug('fnames path: %s', fnames_path)
|
||||
logger.debug("fnames path: %s", fnames_path)
|
||||
try:
|
||||
# json file with format { fva: fname }
|
||||
fnames = load_json(fnames_path)
|
||||
logger.debug('loaded JSON file')
|
||||
logger.debug("loaded JSON file")
|
||||
except TypeError:
|
||||
# csv file with format idbmd5;md5;fva;fname
|
||||
fnames = load_csv(fnames_path)
|
||||
logger.debug('loaded CSV file')
|
||||
logger.debug("loaded CSV file")
|
||||
fnames = convert_keys_to_int(fnames)
|
||||
logger.debug('read %d function names' % len(fnames))
|
||||
logger.debug("read %d function names" % len(fnames))
|
||||
return fnames
|
||||
|
||||
|
||||
def load_json(path):
|
||||
with open(path, 'r') as f:
|
||||
with open(path, "r") as f:
|
||||
try:
|
||||
funcs = json.load(f)
|
||||
except ValueError as e:
|
||||
logger.debug('not a JSON file, %s', str(e))
|
||||
logger.debug("not a JSON file, %s", str(e))
|
||||
raise TypeError
|
||||
return funcs
|
||||
|
||||
|
||||
def load_csv(path):
|
||||
funcs = defaultdict(str)
|
||||
with open(path, 'r') as f:
|
||||
with open(path, "r") as f:
|
||||
data = f.read().splitlines()
|
||||
for line in data:
|
||||
try:
|
||||
idbmd5, md5, fva, name = line.split(':', 3)
|
||||
idbmd5, md5, fva, name = line.split(":", 3)
|
||||
except ValueError as e:
|
||||
logger.warning('%s: "%s"', str(e), line)
|
||||
funcs[fva] = name
|
||||
@@ -198,42 +198,38 @@ def convert_keys_to_int(funcs_in):
|
||||
def print_summary(verbose, start_time):
|
||||
global file_count, file_hits, function_hits, errors
|
||||
|
||||
print('\n[SUMMARY]')
|
||||
print ("\n[SUMMARY]")
|
||||
m, s = divmod(time.time() - start_time, 60)
|
||||
logger.info('ran for %d:%02d minutes', m, s)
|
||||
ratio = ' (%d%%)' % ((float(file_hits) / file_count) * 100) if file_count else ''
|
||||
print('matched %d function(s) in %d/%d%s sample(s), encountered %d error(s)' % (
|
||||
function_hits, file_hits, file_count, ratio, errors))
|
||||
print('%d hits on (MAL) files; %d hits on other files' % (mal_hits, other_hits))
|
||||
logger.info("ran for %d:%02d minutes", m, s)
|
||||
ratio = " (%d%%)" % ((float(file_hits) / file_count) * 100) if file_count else ""
|
||||
print (
|
||||
"matched %d function(s) in %d/%d%s sample(s), encountered %d error(s)"
|
||||
% (function_hits, file_hits, file_count, ratio, errors)
|
||||
)
|
||||
print ("%d hits on (MAL) files; %d hits on other files" % (mal_hits, other_hits))
|
||||
|
||||
if verbose:
|
||||
if len(function_names) > 0:
|
||||
print('matched function names (unique):')
|
||||
print ("matched function names (unique):")
|
||||
for fname in function_names:
|
||||
print ' - %s' % fname
|
||||
print " - %s" % fname
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
if argv is None:
|
||||
argv = sys.argv[1:]
|
||||
|
||||
parser = argparse.ArgumentParser(description='Run capa rule file against frozen features in a directory')
|
||||
parser.add_argument('rules', type=str,
|
||||
help='Path to directory containing rules')
|
||||
parser.add_argument('rule_name', type=str,
|
||||
help='Name of rule to test')
|
||||
parser.add_argument('frozen_path', type=str,
|
||||
help='Path to frozen feature file or directory')
|
||||
parser.add_argument('-f', '--fast', action='store_true',
|
||||
help='Don't test slow files')
|
||||
parser.add_argument('-o', '--only_matching', action='store_true',
|
||||
help='Print only if rule matches')
|
||||
parser.add_argument('-s', '--save_image', action='store',
|
||||
help='Directory to save exported images of function graphs')
|
||||
parser.add_argument('-v', '--verbose', action='count', default=0,
|
||||
help='Increase output verbosity')
|
||||
parser.add_argument('-q', '--quiet', action='store_true',
|
||||
help='Disable all output but errors')
|
||||
parser = argparse.ArgumentParser(description="Run capa rule file against frozen features in a directory")
|
||||
parser.add_argument("rules", type=str, help="Path to directory containing rules")
|
||||
parser.add_argument("rule_name", type=str, help="Name of rule to test")
|
||||
parser.add_argument("frozen_path", type=str, help="Path to frozen feature file or directory")
|
||||
parser.add_argument("-f", "--fast", action="store_true", help="Don't test slow files")
|
||||
parser.add_argument("-o", "--only_matching", action="store_true", help="Print only if rule matches")
|
||||
parser.add_argument(
|
||||
"-s", "--save_image", action="store", help="Directory to save exported images of function graphs"
|
||||
)
|
||||
parser.add_argument("-v", "--verbose", action="count", default=0, help="Increase output verbosity")
|
||||
parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
|
||||
args = parser.parse_args(args=argv)
|
||||
|
||||
if args.quiet:
|
||||
@@ -247,7 +243,7 @@ def main(argv=None):
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
if not os.path.isdir(args.rules):
|
||||
logger.error('%s is not a directory', args.rules)
|
||||
logger.error("%s is not a directory", args.rules)
|
||||
return -1
|
||||
|
||||
# load rule
|
||||
@@ -256,15 +252,15 @@ def main(argv=None):
|
||||
rules = list(capa.rules.get_rules_and_dependencies(rules, args.rule_name))
|
||||
rules = capa.rules.RuleSet(rules)
|
||||
except IOError as e:
|
||||
logger.error('%s', str(e))
|
||||
logger.error("%s", str(e))
|
||||
return -1
|
||||
except capa.rules.InvalidRule as e:
|
||||
logger.error('%s', str(e))
|
||||
logger.error("%s", str(e))
|
||||
return -1
|
||||
|
||||
time0 = time.time()
|
||||
|
||||
print('[RULE %s]' % args.rule_name)
|
||||
print ("[RULE %s]" % args.rule_name)
|
||||
if os.path.isfile(args.frozen_path):
|
||||
check_rule(args.frozen_path, rules, args.rule_name, args.only_matching, args.save_image, args.verbose)
|
||||
|
||||
@@ -277,8 +273,8 @@ def main(argv=None):
|
||||
continue
|
||||
|
||||
path = os.path.join(root, file)
|
||||
if args.fast and 'slow' in path:
|
||||
logger.debug('fast mode skipping %s', path)
|
||||
if args.fast and "slow" in path:
|
||||
logger.debug("fast mode skipping %s", path)
|
||||
continue
|
||||
|
||||
freeze_files.append(path)
|
||||
@@ -286,12 +282,12 @@ def main(argv=None):
|
||||
for path in sorted(freeze_files):
|
||||
sample_time0 = time.time()
|
||||
check_rule(path, rules, args.rule_name, args.only_matching, args.save_image, args.verbose)
|
||||
logger.debug('rule check took %d seconds', time.time() - sample_time0)
|
||||
logger.debug("rule check took %d seconds", time.time() - sample_time0)
|
||||
except KeyboardInterrupt:
|
||||
logger.info('Received keyboard interrupt, terminating')
|
||||
logger.info("Received keyboard interrupt, terminating")
|
||||
|
||||
print_summary(args.verbose, time0)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
'''
|
||||
"""
|
||||
Start IDA Pro in autonomous mode to dump JSON file of function names { fva: fname }.
|
||||
Processes a single file or a directory.
|
||||
Only runs on files with supported file extensions.
|
||||
@@ -6,7 +6,7 @@ Only runs on files with supported file extensions.
|
||||
Example usage:
|
||||
start_ida_dump_fnames.py <candidate files dir>
|
||||
start_ida_dump_fnames.py samples\benign
|
||||
'''
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
@@ -19,18 +19,18 @@ import argparse
|
||||
|
||||
from scripts.testbed import FNAMES_EXTENSION
|
||||
|
||||
IDA32_PATH = 'C:\\Program Files\\IDA Pro 7.3\\ida.exe'
|
||||
IDA64_PATH = 'C:\\Program Files\\IDA Pro 7.3\\ida64.exe'
|
||||
IDA32_PATH = "C:\\Program Files\\IDA Pro 7.3\\ida.exe"
|
||||
IDA64_PATH = "C:\\Program Files\\IDA Pro 7.3\\ida64.exe"
|
||||
|
||||
# expected in same directory as this file
|
||||
DUMP_SCRIPT_PATH = os.path.abspath('_dump_fnames.py')
|
||||
DUMP_SCRIPT_PATH = os.path.abspath("_dump_fnames.py")
|
||||
|
||||
SUPPORTED_EXTENSIONS = [
|
||||
'.exe_',
|
||||
'.dll_',
|
||||
'.sys_',
|
||||
'.idb',
|
||||
'.i64',
|
||||
".exe_",
|
||||
".dll_",
|
||||
".sys_",
|
||||
".idb",
|
||||
".i64",
|
||||
]
|
||||
|
||||
|
||||
@@ -38,40 +38,43 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def call_ida_dump_script(sample_path, reprocess):
|
||||
''' call IDA in autonomous mode and return True if success, False on failure '''
|
||||
logger.info('processing %s (MD5: %s)', sample_path, get_md5_hexdigest(sample_path))
|
||||
""" call IDA in autonomous mode and return True if success, False on failure """
|
||||
logger.info("processing %s (MD5: %s)", sample_path, get_md5_hexdigest(sample_path))
|
||||
|
||||
# TODO detect 64-bit binaries
|
||||
if os.path.splitext(sample_path)[-1] == '.i64':
|
||||
if os.path.splitext(sample_path)[-1] == ".i64":
|
||||
IDA_PATH = IDA64_PATH
|
||||
else:
|
||||
IDA_PATH = IDA32_PATH
|
||||
|
||||
if sample_path.endswith('.idb') or sample_path.endswith('.i64'):
|
||||
if sample_path.endswith(".idb") or sample_path.endswith(".i64"):
|
||||
sample_path = sample_path[:-4]
|
||||
|
||||
fnames = '%s%s' % (sample_path, FNAMES_EXTENSION)
|
||||
fnames = "%s%s" % (sample_path, FNAMES_EXTENSION)
|
||||
if os.path.exists(fnames) and not reprocess:
|
||||
logger.info('%s already exists and contains %d function names, provide -r argument to reprocess',
|
||||
fnames, len(get_function_names(fnames)))
|
||||
logger.info(
|
||||
"%s already exists and contains %d function names, provide -r argument to reprocess",
|
||||
fnames,
|
||||
len(get_function_names(fnames)),
|
||||
)
|
||||
return True
|
||||
|
||||
out_path = os.path.split(fnames)[-1] # relative to IDA database file
|
||||
args = [IDA_PATH, '-A', '-S%s "%s"' % (DUMP_SCRIPT_PATH, out_path), sample_path]
|
||||
logger.debug('calling "%s"' % ' '.join(args))
|
||||
args = [IDA_PATH, "-A", '-S%s "%s"' % (DUMP_SCRIPT_PATH, out_path), sample_path]
|
||||
logger.debug('calling "%s"' % " ".join(args))
|
||||
subprocess.call(args)
|
||||
|
||||
if not os.path.exists(fnames):
|
||||
logger.warning('%s was not created', fnames)
|
||||
logger.warning("%s was not created", fnames)
|
||||
return False
|
||||
|
||||
logger.debug('extracted %d function names to %s', len(get_function_names(fnames)), fnames)
|
||||
logger.debug("extracted %d function names to %s", len(get_function_names(fnames)), fnames)
|
||||
return True
|
||||
|
||||
|
||||
def get_md5_hexdigest(sample_path):
|
||||
m = hashlib.md5()
|
||||
with open(sample_path, 'rb') as f:
|
||||
with open(sample_path, "rb") as f:
|
||||
m.update(f.read())
|
||||
return m.hexdigest()
|
||||
|
||||
@@ -79,19 +82,17 @@ def get_md5_hexdigest(sample_path):
|
||||
def get_function_names(fnames_file):
|
||||
if not os.path.exists(fnames_file):
|
||||
return None
|
||||
with open(fnames_file, 'r') as f:
|
||||
with open(fnames_file, "r") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Launch IDA Pro in autonomous mode to dump function names of a file or of files in a directory')
|
||||
parser.add_argument('file_path', type=str,
|
||||
help='File or directory path to analyze')
|
||||
parser.add_argument('-r', '--reprocess', action='store_true', default=False,
|
||||
help='Overwrite existing analysis')
|
||||
parser.add_argument('-v', '--verbose', action='store_true',
|
||||
help='Enable verbose output')
|
||||
description="Launch IDA Pro in autonomous mode to dump function names of a file or of files in a directory"
|
||||
)
|
||||
parser.add_argument("file_path", type=str, help="File or directory path to analyze")
|
||||
parser.add_argument("-r", "--reprocess", action="store_true", default=False, help="Overwrite existing analysis")
|
||||
parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose output")
|
||||
args = parser.parse_args(args=sys.argv[1:])
|
||||
|
||||
if args.verbose:
|
||||
@@ -102,7 +103,7 @@ def main():
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
if not os.path.exists(args.file_path):
|
||||
logger.warning('%s does not exist', args.file_path)
|
||||
logger.warning("%s does not exist", args.file_path)
|
||||
return -1
|
||||
|
||||
if os.path.isfile(args.file_path):
|
||||
@@ -111,21 +112,21 @@ def main():
|
||||
|
||||
errors = 0
|
||||
|
||||
logger.info('processing files in %s with file extension %s', args.file_path, '|'.join(SUPPORTED_EXTENSIONS))
|
||||
logger.info("processing files in %s with file extension %s", args.file_path, "|".join(SUPPORTED_EXTENSIONS))
|
||||
for root, dirs, files in os.walk(args.file_path):
|
||||
for file in files:
|
||||
if not os.path.splitext(file)[1] in SUPPORTED_EXTENSIONS:
|
||||
logger.debug('%s does not have supported file extension', file)
|
||||
logger.debug("%s does not have supported file extension", file)
|
||||
continue
|
||||
path = os.path.join(root, file)
|
||||
if not call_ida_dump_script(path, args.reprocess):
|
||||
errors += 1
|
||||
|
||||
if errors:
|
||||
logger.warning('encountered %d errors', errors)
|
||||
logger.warning("encountered %d errors", errors)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
'''
|
||||
"""
|
||||
Start IDA Pro in autonomous mode to export images of function graphs.
|
||||
|
||||
Example usage:
|
||||
start_ida_export_fimages.py <target file> <output dir> -f <function list>
|
||||
start_ida_export_fimages.py test.exe imgs -f 0x401000,0x402F90
|
||||
'''
|
||||
"""
|
||||
|
||||
import os
|
||||
import imp
|
||||
@@ -16,34 +16,35 @@ import subprocess
|
||||
import argparse
|
||||
|
||||
try:
|
||||
imp.find_module('graphviz')
|
||||
imp.find_module("graphviz")
|
||||
from graphviz import Source
|
||||
|
||||
graphviz_found = True
|
||||
except ImportError:
|
||||
graphviz_found = False
|
||||
|
||||
|
||||
IDA32_PATH = 'C:\\Program Files\\IDA Pro 7.3\\ida.exe'
|
||||
IDA64_PATH = 'C:\\Program Files\\IDA Pro 7.3\\ida64.exe'
|
||||
IDA32_PATH = "C:\\Program Files\\IDA Pro 7.3\\ida.exe"
|
||||
IDA64_PATH = "C:\\Program Files\\IDA Pro 7.3\\ida64.exe"
|
||||
|
||||
# expected in same directory as this file
|
||||
EXPORT_SCRIPT_PATH = os.path.abspath('_export_fimages.py')
|
||||
EXPORT_SCRIPT_PATH = os.path.abspath("_export_fimages.py")
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def export_fimages(file_path, out_dir, functions, manual=False):
|
||||
'''
|
||||
"""
|
||||
Export images of function graphs.
|
||||
:param file_path: file to analyze
|
||||
:param out_dir: output directory
|
||||
:param functions: list of strings of hex formatted fvas
|
||||
:param manual: non-autonomous mode
|
||||
:return: True on success, False otherwise
|
||||
'''
|
||||
"""
|
||||
if not graphviz_found:
|
||||
logger.warning('please install graphviz to export images')
|
||||
logger.warning("please install graphviz to export images")
|
||||
return False
|
||||
|
||||
if not os.path.exists(out_dir):
|
||||
@@ -55,37 +56,37 @@ def export_fimages(file_path, out_dir, functions, manual=False):
|
||||
img_count = 0
|
||||
for root, dirs, files in os.walk(out_dir):
|
||||
for file in files:
|
||||
if not file.endswith('.dot'):
|
||||
if not file.endswith(".dot"):
|
||||
continue
|
||||
try:
|
||||
s = Source.from_file(file, directory=out_dir)
|
||||
s.render(file, directory=out_dir, format='png', cleanup=True)
|
||||
s.render(file, directory=out_dir, format="png", cleanup=True)
|
||||
img_count += 1
|
||||
except BaseException:
|
||||
logger.warning('graphviz error rendering file')
|
||||
logger.warning("graphviz error rendering file")
|
||||
if img_count > 0:
|
||||
logger.info('exported %d function graph images to "%s"', img_count, os.path.abspath(out_dir))
|
||||
return True
|
||||
else:
|
||||
logger.warning('failed to export function graph images')
|
||||
logger.warning("failed to export function graph images")
|
||||
return False
|
||||
|
||||
|
||||
def call_ida_script(script_path, script_args, sample_path, manual):
|
||||
logger.info('processing %s (MD5: %s)', sample_path, get_md5_hexdigest(sample_path))
|
||||
logger.info("processing %s (MD5: %s)", sample_path, get_md5_hexdigest(sample_path))
|
||||
|
||||
# TODO detect 64-bit binaries
|
||||
if os.path.splitext(sample_path)[-1] == '.i64':
|
||||
if os.path.splitext(sample_path)[-1] == ".i64":
|
||||
IDA_PATH = IDA64_PATH
|
||||
else:
|
||||
IDA_PATH = IDA32_PATH
|
||||
|
||||
args = [IDA_PATH, '-A', '-S%s %s' % (script_path, ' '.join(script_args)), sample_path]
|
||||
args = [IDA_PATH, "-A", "-S%s %s" % (script_path, " ".join(script_args)), sample_path]
|
||||
|
||||
if manual:
|
||||
args.remove('-A')
|
||||
args.remove("-A")
|
||||
|
||||
logger.debug('calling "%s"' % ' '.join(args))
|
||||
logger.debug('calling "%s"' % " ".join(args))
|
||||
if subprocess.call(args) == 0:
|
||||
return True
|
||||
else:
|
||||
@@ -94,24 +95,20 @@ def call_ida_script(script_path, script_args, sample_path, manual):
|
||||
|
||||
def get_md5_hexdigest(sample_path):
|
||||
m = hashlib.md5()
|
||||
with open(sample_path, 'rb') as f:
|
||||
with open(sample_path, "rb") as f:
|
||||
m.update(f.read())
|
||||
return m.hexdigest()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Launch IDA Pro in autonomous mode to export images of function graphs')
|
||||
parser.add_argument('file_path', type=str,
|
||||
help='File to export from')
|
||||
parser.add_argument('out_dir', type=str,
|
||||
help='Export target directory')
|
||||
parser.add_argument('-f', '--functions', action='store',
|
||||
help='Comma separated list of functions to export')
|
||||
parser.add_argument('-m', '--manual', action='store_true',
|
||||
help='Manual mode: show IDA dialog boxes')
|
||||
parser.add_argument('-v', '--verbose', action='store_true',
|
||||
help='Enable verbose output')
|
||||
description="Launch IDA Pro in autonomous mode to export images of function graphs"
|
||||
)
|
||||
parser.add_argument("file_path", type=str, help="File to export from")
|
||||
parser.add_argument("out_dir", type=str, help="Export target directory")
|
||||
parser.add_argument("-f", "--functions", action="store", help="Comma separated list of functions to export")
|
||||
parser.add_argument("-m", "--manual", action="store_true", help="Manual mode: show IDA dialog boxes")
|
||||
parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose output")
|
||||
args = parser.parse_args(args=sys.argv[1:])
|
||||
|
||||
if args.verbose:
|
||||
@@ -122,14 +119,14 @@ def main():
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
if not os.path.isfile(args.file_path):
|
||||
logger.warning('%s is not a file', args.file_path)
|
||||
logger.warning("%s is not a file", args.file_path)
|
||||
return -1
|
||||
|
||||
functions = args.functions.split(',')
|
||||
functions = args.functions.split(",")
|
||||
export_fimages(args.file_path, args.out_dir, functions, args.manual)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
||||
68
setup.py
68
setup.py
@@ -4,69 +4,51 @@ import sys
|
||||
import setuptools
|
||||
|
||||
|
||||
requirements = [
|
||||
"six",
|
||||
"tqdm",
|
||||
"pyyaml",
|
||||
"tabulate",
|
||||
"colorama",
|
||||
"termcolor",
|
||||
"ruamel.yaml"
|
||||
]
|
||||
requirements = ["six", "tqdm", "pyyaml", "tabulate", "colorama", "termcolor", "ruamel.yaml"]
|
||||
|
||||
if sys.version_info >= (3, 0):
|
||||
# py3
|
||||
requirements.append('networkx')
|
||||
requirements.append("networkx")
|
||||
else:
|
||||
# py2
|
||||
requirements.append('enum34')
|
||||
requirements.append('vivisect')
|
||||
requirements.append('viv-utils')
|
||||
requirements.append('networkx==2.2') # v2.2 is last version supported by Python 2.7
|
||||
requirements.append("enum34")
|
||||
requirements.append("vivisect")
|
||||
requirements.append("viv-utils")
|
||||
requirements.append("networkx==2.2") # v2.2 is last version supported by Python 2.7
|
||||
|
||||
# this sets __version__
|
||||
# via: http://stackoverflow.com/a/7071358/87207
|
||||
# and: http://stackoverflow.com/a/2073599/87207
|
||||
with open(os.path.join('capa', 'version.py'), 'rb') as f:
|
||||
with open(os.path.join("capa", "version.py"), "rb") as f:
|
||||
exec(f.read())
|
||||
|
||||
|
||||
def get_rule_paths():
|
||||
return [os.path.join('..', x[0], '*.yml') for x in os.walk('rules')]
|
||||
return [os.path.join("..", x[0], "*.yml") for x in os.walk("rules")]
|
||||
|
||||
|
||||
setuptools.setup(
|
||||
name='capa',
|
||||
name="capa",
|
||||
version=__version__,
|
||||
description='',
|
||||
long_description='',
|
||||
author='Willi Ballenthin, Moritz Raabe',
|
||||
author_email='william.ballenthin@mandiant.com, moritz.raabe@mandiant.com',
|
||||
url='https://www.github.com/fireeye/capa',
|
||||
packages=setuptools.find_packages(exclude=['tests', 'testbed']),
|
||||
package_dir={'capa': 'capa'},
|
||||
package_data={'capa': get_rule_paths()},
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
'capa=capa.main:main',
|
||||
]
|
||||
},
|
||||
description="",
|
||||
long_description="",
|
||||
author="Willi Ballenthin, Moritz Raabe",
|
||||
author_email="william.ballenthin@mandiant.com, moritz.raabe@mandiant.com",
|
||||
url="https://www.github.com/fireeye/capa",
|
||||
packages=setuptools.find_packages(exclude=["tests", "testbed"]),
|
||||
package_dir={"capa": "capa"},
|
||||
package_data={"capa": get_rule_paths()},
|
||||
entry_points={"console_scripts": ["capa=capa.main:main",]},
|
||||
include_package_data=True,
|
||||
install_requires=requirements,
|
||||
extras_require={
|
||||
'dev': [
|
||||
'pytest',
|
||||
'pytest-sugar',
|
||||
'pycodestyle',
|
||||
]
|
||||
},
|
||||
extras_require={"dev": ["pytest", "pytest-sugar", "pycodestyle",]},
|
||||
zip_safe=False,
|
||||
keywords='capa',
|
||||
keywords="capa",
|
||||
classifiers=[
|
||||
'Development Status :: 3 - Alpha',
|
||||
'Intended Audience :: Developers',
|
||||
'Natural Language :: English',
|
||||
'Programming Language :: Python :: 2',
|
||||
'Programming Language :: Python :: 3',
|
||||
"Development Status :: 3 - Alpha",
|
||||
"Intended Audience :: Developers",
|
||||
"Natural Language :: English",
|
||||
"Programming Language :: Python :: 2",
|
||||
"Programming Language :: Python :: 3",
|
||||
],
|
||||
)
|
||||
|
||||
@@ -68,11 +68,7 @@ def sample_c91887d861d9bd4a5872249b641bc9f9():
|
||||
|
||||
@pytest.fixture
|
||||
def sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41():
|
||||
path = os.path.join(
|
||||
CD,
|
||||
"data",
|
||||
"39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.dll_",
|
||||
)
|
||||
path = os.path.join(CD, "data", "39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.dll_",)
|
||||
return Sample(viv_utils.getWorkspace(path), path)
|
||||
|
||||
|
||||
|
||||
@@ -41,18 +41,8 @@ def test_some():
|
||||
assert Some(1, Number(1)).evaluate({Number(0): {1}}) == False
|
||||
|
||||
assert Some(2, Number(1), Number(2), Number(3)).evaluate({Number(0): {1}}) == False
|
||||
assert (
|
||||
Some(2, Number(1), Number(2), Number(3)).evaluate(
|
||||
{Number(0): {1}, Number(1): {1}}
|
||||
)
|
||||
== False
|
||||
)
|
||||
assert (
|
||||
Some(2, Number(1), Number(2), Number(3)).evaluate(
|
||||
{Number(0): {1}, Number(1): {1}, Number(2): {1}}
|
||||
)
|
||||
== True
|
||||
)
|
||||
assert Some(2, Number(1), Number(2), Number(3)).evaluate({Number(0): {1}, Number(1): {1}}) == False
|
||||
assert Some(2, Number(1), Number(2), Number(3)).evaluate({Number(0): {1}, Number(1): {1}, Number(2): {1}}) == True
|
||||
assert (
|
||||
Some(2, Number(1), Number(2), Number(3)).evaluate(
|
||||
{Number(0): {1}, Number(1): {1}, Number(2): {1}, Number(3): {1}}
|
||||
@@ -61,27 +51,20 @@ def test_some():
|
||||
)
|
||||
assert (
|
||||
Some(2, Number(1), Number(2), Number(3)).evaluate(
|
||||
{
|
||||
Number(0): {1},
|
||||
Number(1): {1},
|
||||
Number(2): {1},
|
||||
Number(3): {1},
|
||||
Number(4): {1},
|
||||
}
|
||||
{Number(0): {1}, Number(1): {1}, Number(2): {1}, Number(3): {1}, Number(4): {1},}
|
||||
)
|
||||
== True
|
||||
)
|
||||
|
||||
|
||||
def test_complex():
|
||||
assert True == Or(
|
||||
And(Number(1), Number(2)),
|
||||
Or(Number(3), Some(2, Number(4), Number(5), Number(6))),
|
||||
).evaluate({Number(5): {1}, Number(6): {1}, Number(7): {1}, Number(8): {1}})
|
||||
assert True == Or(And(Number(1), Number(2)), Or(Number(3), Some(2, Number(4), Number(5), Number(6))),).evaluate(
|
||||
{Number(5): {1}, Number(6): {1}, Number(7): {1}, Number(8): {1}}
|
||||
)
|
||||
|
||||
assert False == Or(
|
||||
And(Number(1), Number(2)), Or(Number(3), Some(2, Number(4), Number(5)))
|
||||
).evaluate({Number(5): {1}, Number(6): {1}, Number(7): {1}, Number(8): {1}})
|
||||
assert False == Or(And(Number(1), Number(2)), Or(Number(3), Some(2, Number(4), Number(5)))).evaluate(
|
||||
{Number(5): {1}, Number(6): {1}, Number(7): {1}, Number(8): {1}}
|
||||
)
|
||||
|
||||
|
||||
def test_range():
|
||||
@@ -119,100 +102,108 @@ def test_range():
|
||||
|
||||
|
||||
def test_range_exact():
|
||||
rule = textwrap.dedent('''
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
features:
|
||||
- count(number(100)): 2
|
||||
''')
|
||||
"""
|
||||
)
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
|
||||
# just enough matches
|
||||
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
|
||||
assert 'test rule' in matches
|
||||
assert "test rule" in matches
|
||||
|
||||
# not enough matches
|
||||
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1}}, 0x0)
|
||||
assert 'test rule' not in matches
|
||||
assert "test rule" not in matches
|
||||
|
||||
# too many matches
|
||||
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2, 3}}, 0x0)
|
||||
assert 'test rule' not in matches
|
||||
assert "test rule" not in matches
|
||||
|
||||
|
||||
def test_range_range():
|
||||
rule = textwrap.dedent('''
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
features:
|
||||
- count(number(100)): (2, 3)
|
||||
''')
|
||||
"""
|
||||
)
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
|
||||
# just enough matches
|
||||
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
|
||||
assert 'test rule' in matches
|
||||
assert "test rule" in matches
|
||||
|
||||
# enough matches
|
||||
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2, 3}}, 0x0)
|
||||
assert 'test rule' in matches
|
||||
assert "test rule" in matches
|
||||
|
||||
# not enough matches
|
||||
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1}}, 0x0)
|
||||
assert 'test rule' not in matches
|
||||
assert "test rule" not in matches
|
||||
|
||||
# too many matches
|
||||
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2, 3, 4}}, 0x0)
|
||||
assert 'test rule' not in matches
|
||||
assert "test rule" not in matches
|
||||
|
||||
|
||||
def test_range_exact_zero():
|
||||
rule = textwrap.dedent('''
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
features:
|
||||
- count(number(100)): 0
|
||||
''')
|
||||
"""
|
||||
)
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
|
||||
# feature isn't indexed - good.
|
||||
features, matches = capa.engine.match([r], {}, 0x0)
|
||||
assert 'test rule' in matches
|
||||
assert "test rule" in matches
|
||||
|
||||
# feature is indexed, but no matches.
|
||||
# i don't think we should ever really have this case, but good to check anyways.
|
||||
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {}}, 0x0)
|
||||
assert 'test rule' in matches
|
||||
assert "test rule" in matches
|
||||
|
||||
# too many matches
|
||||
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1}}, 0x0)
|
||||
assert 'test rule' not in matches
|
||||
assert "test rule" not in matches
|
||||
|
||||
|
||||
def test_range_with_zero():
|
||||
rule = textwrap.dedent('''
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
features:
|
||||
- count(number(100)): (0, 1)
|
||||
''')
|
||||
"""
|
||||
)
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
|
||||
# ok
|
||||
features, matches = capa.engine.match([r], {}, 0x0)
|
||||
assert 'test rule' in matches
|
||||
assert "test rule" in matches
|
||||
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {}}, 0x0)
|
||||
assert 'test rule' in matches
|
||||
assert "test rule" in matches
|
||||
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1}}, 0x0)
|
||||
assert 'test rule' in matches
|
||||
assert "test rule" in matches
|
||||
|
||||
# too many matches
|
||||
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
|
||||
assert 'test rule' not in matches
|
||||
assert "test rule" not in matches
|
||||
|
||||
|
||||
def test_match_adds_matched_rule_feature():
|
||||
@@ -227,9 +218,7 @@ def test_match_adds_matched_rule_feature():
|
||||
"""
|
||||
)
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
features, matches = capa.engine.match(
|
||||
[r], {capa.features.insn.Number(100): {1}}, 0x0
|
||||
)
|
||||
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1}}, 0x0)
|
||||
assert capa.features.MatchedRule("test rule") in features
|
||||
|
||||
|
||||
@@ -261,9 +250,7 @@ def test_match_matched_rules():
|
||||
]
|
||||
|
||||
features, matches = capa.engine.match(
|
||||
capa.engine.topologically_order_rules(rules),
|
||||
{capa.features.insn.Number(100): {1}},
|
||||
0x0,
|
||||
capa.engine.topologically_order_rules(rules), {capa.features.insn.Number(100): {1}}, 0x0,
|
||||
)
|
||||
assert capa.features.MatchedRule("test rule1") in features
|
||||
assert capa.features.MatchedRule("test rule2") in features
|
||||
@@ -271,9 +258,7 @@ def test_match_matched_rules():
|
||||
# the ordering of the rules must not matter,
|
||||
# the engine should match rules in an appropriate order.
|
||||
features, matches = capa.engine.match(
|
||||
capa.engine.topologically_order_rules(reversed(rules)),
|
||||
{capa.features.insn.Number(100): {1}},
|
||||
0x0,
|
||||
capa.engine.topologically_order_rules(reversed(rules)), {capa.features.insn.Number(100): {1}}, 0x0,
|
||||
)
|
||||
assert capa.features.MatchedRule("test rule1") in features
|
||||
assert capa.features.MatchedRule("test rule2") in features
|
||||
@@ -319,30 +304,22 @@ def test_regex():
|
||||
),
|
||||
]
|
||||
features, matches = capa.engine.match(
|
||||
capa.engine.topologically_order_rules(rules),
|
||||
{capa.features.insn.Number(100): {1}},
|
||||
0x0,
|
||||
capa.engine.topologically_order_rules(rules), {capa.features.insn.Number(100): {1}}, 0x0,
|
||||
)
|
||||
assert capa.features.MatchedRule("test rule") not in features
|
||||
|
||||
features, matches = capa.engine.match(
|
||||
capa.engine.topologically_order_rules(rules),
|
||||
{capa.features.String("aaaa"): {1}},
|
||||
0x0,
|
||||
capa.engine.topologically_order_rules(rules), {capa.features.String("aaaa"): {1}}, 0x0,
|
||||
)
|
||||
assert capa.features.MatchedRule("test rule") not in features
|
||||
|
||||
features, matches = capa.engine.match(
|
||||
capa.engine.topologically_order_rules(rules),
|
||||
{capa.features.String("aBBBBa"): {1}},
|
||||
0x0,
|
||||
capa.engine.topologically_order_rules(rules), {capa.features.String("aBBBBa"): {1}}, 0x0,
|
||||
)
|
||||
assert capa.features.MatchedRule("test rule") not in features
|
||||
|
||||
features, matches = capa.engine.match(
|
||||
capa.engine.topologically_order_rules(rules),
|
||||
{capa.features.String("abbbba"): {1}},
|
||||
0x0,
|
||||
capa.engine.topologically_order_rules(rules), {capa.features.String("abbbba"): {1}}, 0x0,
|
||||
)
|
||||
assert capa.features.MatchedRule("test rule") in features
|
||||
assert capa.features.MatchedRule("rule with implied wildcards") in features
|
||||
@@ -365,9 +342,7 @@ def test_regex_ignorecase():
|
||||
),
|
||||
]
|
||||
features, matches = capa.engine.match(
|
||||
capa.engine.topologically_order_rules(rules),
|
||||
{capa.features.String("aBBBBa"): {1}},
|
||||
0x0,
|
||||
capa.engine.topologically_order_rules(rules), {capa.features.String("aBBBBa"): {1}}, 0x0,
|
||||
)
|
||||
assert capa.features.MatchedRule("test rule") in features
|
||||
|
||||
@@ -446,9 +421,7 @@ def test_match_namespace():
|
||||
]
|
||||
|
||||
features, matches = capa.engine.match(
|
||||
capa.engine.topologically_order_rules(rules),
|
||||
{capa.features.insn.API("CreateFile"): {1}},
|
||||
0x0,
|
||||
capa.engine.topologically_order_rules(rules), {capa.features.insn.API("CreateFile"): {1}}, 0x0,
|
||||
)
|
||||
assert "CreateFile API" in matches
|
||||
assert "file-create" in matches
|
||||
@@ -458,9 +431,7 @@ def test_match_namespace():
|
||||
assert capa.features.MatchedRule("file/create/CreateFile") in features
|
||||
|
||||
features, matches = capa.engine.match(
|
||||
capa.engine.topologically_order_rules(rules),
|
||||
{capa.features.insn.API("WriteFile"): {1}},
|
||||
0x0,
|
||||
capa.engine.topologically_order_rules(rules), {capa.features.insn.API("WriteFile"): {1}}, 0x0,
|
||||
)
|
||||
assert "WriteFile API" in matches
|
||||
assert "file-create" not in matches
|
||||
|
||||
@@ -10,26 +10,22 @@ import capa.features.freeze
|
||||
from fixtures import *
|
||||
|
||||
|
||||
EXTRACTOR = capa.features.extractors.NullFeatureExtractor({
|
||||
'file features': [
|
||||
(0x402345, capa.features.Characteristic('embedded pe')),
|
||||
],
|
||||
'functions': {
|
||||
0x401000: {
|
||||
'features': [
|
||||
(0x401000, capa.features.Characteristic('switch')),
|
||||
],
|
||||
'basic blocks': {
|
||||
0x401000: {
|
||||
'features': [
|
||||
(0x401000, capa.features.Characteristic('tight loop')),
|
||||
],
|
||||
'instructions': {
|
||||
0x401000: {
|
||||
'features': [
|
||||
(0x401000, capa.features.insn.Mnemonic('xor')),
|
||||
(0x401000, capa.features.Characteristic('nzxor')),
|
||||
],
|
||||
EXTRACTOR = capa.features.extractors.NullFeatureExtractor(
|
||||
{
|
||||
"file features": [(0x402345, capa.features.Characteristic("embedded pe")),],
|
||||
"functions": {
|
||||
0x401000: {
|
||||
"features": [(0x401000, capa.features.Characteristic("switch")),],
|
||||
"basic blocks": {
|
||||
0x401000: {
|
||||
"features": [(0x401000, capa.features.Characteristic("tight loop")),],
|
||||
"instructions": {
|
||||
0x401000: {
|
||||
"features": [
|
||||
(0x401000, capa.features.insn.Mnemonic("xor")),
|
||||
(0x401000, capa.features.Characteristic("nzxor")),
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -44,19 +40,25 @@ def test_null_feature_extractor():
|
||||
assert list(EXTRACTOR.get_basic_blocks(0x401000)) == [0x401000]
|
||||
assert list(EXTRACTOR.get_instructions(0x401000, 0x0401000)) == [0x401000, 0x401002]
|
||||
|
||||
rules = capa.rules.RuleSet([
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent('''
|
||||
rule:
|
||||
meta:
|
||||
name: xor loop
|
||||
scope: basic block
|
||||
features:
|
||||
- and:
|
||||
- characteristic: tight loop
|
||||
- mnemonic: xor
|
||||
- characteristic: nzxor
|
||||
''')),
|
||||
])
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: xor loop
|
||||
scope: basic block
|
||||
features:
|
||||
- and:
|
||||
- characteristic: tight loop
|
||||
- mnemonic: xor
|
||||
- characteristic: nzxor
|
||||
"""
|
||||
)
|
||||
),
|
||||
]
|
||||
)
|
||||
capabilities = capa.main.find_capabilities(rules, EXTRACTOR)
|
||||
assert "xor loop" in capabilities
|
||||
|
||||
@@ -74,20 +76,14 @@ def compare_extractors(a, b):
|
||||
assert list(a.get_functions()) == list(b.get_functions())
|
||||
for f in a.get_functions():
|
||||
assert list(a.get_basic_blocks(f)) == list(b.get_basic_blocks(f))
|
||||
assert list(a.extract_function_features(f)) == list(
|
||||
b.extract_function_features(f)
|
||||
)
|
||||
assert list(a.extract_function_features(f)) == list(b.extract_function_features(f))
|
||||
|
||||
for bb in a.get_basic_blocks(f):
|
||||
assert list(a.get_instructions(f, bb)) == list(b.get_instructions(f, bb))
|
||||
assert list(a.extract_basic_block_features(f, bb)) == list(
|
||||
b.extract_basic_block_features(f, bb)
|
||||
)
|
||||
assert list(a.extract_basic_block_features(f, bb)) == list(b.extract_basic_block_features(f, bb))
|
||||
|
||||
for insn in a.get_instructions(f, bb):
|
||||
assert list(a.extract_insn_features(f, bb, insn)) == list(
|
||||
b.extract_insn_features(f, bb, insn)
|
||||
)
|
||||
assert list(a.extract_insn_features(f, bb, insn)) == list(b.extract_insn_features(f, bb, insn))
|
||||
|
||||
|
||||
def compare_extractors_viv_null(viv_ext, null_ext):
|
||||
@@ -102,17 +98,11 @@ def compare_extractors_viv_null(viv_ext, null_ext):
|
||||
|
||||
# TODO: ordering of these things probably doesn't work yet
|
||||
|
||||
assert list(viv_ext.extract_file_features()) == list(
|
||||
null_ext.extract_file_features()
|
||||
)
|
||||
assert list(viv_ext.extract_file_features()) == list(null_ext.extract_file_features())
|
||||
assert to_int(list(viv_ext.get_functions())) == list(null_ext.get_functions())
|
||||
for f in viv_ext.get_functions():
|
||||
assert to_int(list(viv_ext.get_basic_blocks(f))) == list(
|
||||
null_ext.get_basic_blocks(to_int(f))
|
||||
)
|
||||
assert list(viv_ext.extract_function_features(f)) == list(
|
||||
null_ext.extract_function_features(to_int(f))
|
||||
)
|
||||
assert to_int(list(viv_ext.get_basic_blocks(f))) == list(null_ext.get_basic_blocks(to_int(f)))
|
||||
assert list(viv_ext.extract_function_features(f)) == list(null_ext.extract_function_features(to_int(f)))
|
||||
|
||||
for bb in viv_ext.get_basic_blocks(f):
|
||||
assert to_int(list(viv_ext.get_instructions(f, bb))) == list(
|
||||
@@ -161,9 +151,9 @@ def test_serialize_features():
|
||||
roundtrip_feature(capa.features.String("SCardControl"))
|
||||
roundtrip_feature(capa.features.insn.Number(0xFF))
|
||||
roundtrip_feature(capa.features.insn.Offset(0x0))
|
||||
roundtrip_feature(capa.features.insn.Mnemonic('push'))
|
||||
roundtrip_feature(capa.features.file.Section('.rsrc'))
|
||||
roundtrip_feature(capa.features.Characteristic('tight loop'))
|
||||
roundtrip_feature(capa.features.insn.Mnemonic("push"))
|
||||
roundtrip_feature(capa.features.file.Section(".rsrc"))
|
||||
roundtrip_feature(capa.features.Characteristic("tight loop"))
|
||||
roundtrip_feature(capa.features.basicblock.BasicBlock())
|
||||
roundtrip_feature(capa.features.file.Export("BaseThreadInitThunk"))
|
||||
roundtrip_feature(capa.features.file.Import("kernel32.IsWow64Process"))
|
||||
@@ -173,19 +163,13 @@ def test_serialize_features():
|
||||
def test_freeze_sample(tmpdir, sample_9324d1a8ae37a36ae560c37448c9705a):
|
||||
# tmpdir fixture handles cleanup
|
||||
o = tmpdir.mkdir("capa").join("test.frz").strpath
|
||||
assert (
|
||||
capa.features.freeze.main(
|
||||
[sample_9324d1a8ae37a36ae560c37448c9705a.path, o, "-v"]
|
||||
)
|
||||
== 0
|
||||
)
|
||||
assert capa.features.freeze.main([sample_9324d1a8ae37a36ae560c37448c9705a.path, o, "-v"]) == 0
|
||||
|
||||
|
||||
def test_freeze_load_sample(tmpdir, sample_9324d1a8ae37a36ae560c37448c9705a):
|
||||
o = tmpdir.mkdir("capa").join("test.frz")
|
||||
viv_extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
|
||||
sample_9324d1a8ae37a36ae560c37448c9705a.vw,
|
||||
sample_9324d1a8ae37a36ae560c37448c9705a.path,
|
||||
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
|
||||
)
|
||||
with open(o.strpath, "wb") as f:
|
||||
f.write(capa.features.freeze.dump(viv_extractor))
|
||||
|
||||
@@ -29,56 +29,54 @@ def test_main_single_rule(sample_9324d1a8ae37a36ae560c37448c9705a, tmpdir):
|
||||
)
|
||||
rule_file = tmpdir.mkdir("capa").join("rule.yml")
|
||||
rule_file.write(RULE_CONTENT)
|
||||
assert (
|
||||
capa.main.main(
|
||||
[
|
||||
sample_9324d1a8ae37a36ae560c37448c9705a.path,
|
||||
"-v",
|
||||
"-r",
|
||||
rule_file.strpath,
|
||||
]
|
||||
)
|
||||
== 0
|
||||
)
|
||||
assert capa.main.main([sample_9324d1a8ae37a36ae560c37448c9705a.path, "-v", "-r", rule_file.strpath,]) == 0
|
||||
|
||||
|
||||
def test_main_shellcode(sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32):
|
||||
assert (
|
||||
capa.main.main(
|
||||
[sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32.path, "-v", "-f", "sc32"]
|
||||
)
|
||||
== 0
|
||||
)
|
||||
assert capa.main.main([sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32.path, "-v", "-f", "sc32"]) == 0
|
||||
|
||||
|
||||
def test_ruleset():
|
||||
rules = capa.rules.RuleSet([
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent('''
|
||||
rule:
|
||||
meta:
|
||||
name: file rule
|
||||
scope: file
|
||||
features:
|
||||
- characteristic: embedded pe
|
||||
''')),
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent('''
|
||||
rule:
|
||||
meta:
|
||||
name: function rule
|
||||
scope: function
|
||||
features:
|
||||
- characteristic: switch
|
||||
''')),
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent('''
|
||||
rule:
|
||||
meta:
|
||||
name: basic block rule
|
||||
scope: basic block
|
||||
features:
|
||||
- characteristic: nzxor
|
||||
''')),
|
||||
|
||||
])
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: file rule
|
||||
scope: file
|
||||
features:
|
||||
- characteristic: embedded pe
|
||||
"""
|
||||
)
|
||||
),
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: function rule
|
||||
scope: function
|
||||
features:
|
||||
- characteristic: switch
|
||||
"""
|
||||
)
|
||||
),
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: basic block rule
|
||||
scope: basic block
|
||||
features:
|
||||
- characteristic: nzxor
|
||||
"""
|
||||
)
|
||||
),
|
||||
]
|
||||
)
|
||||
assert len(rules.file_rules) == 1
|
||||
assert len(rules.function_rules) == 1
|
||||
assert len(rules.basic_block_rules) == 1
|
||||
@@ -142,8 +140,7 @@ def test_match_across_scopes_file_function(sample_9324d1a8ae37a36ae560c37448c970
|
||||
]
|
||||
)
|
||||
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
|
||||
sample_9324d1a8ae37a36ae560c37448c9705a.vw,
|
||||
sample_9324d1a8ae37a36ae560c37448c9705a.path,
|
||||
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
|
||||
)
|
||||
capabilities = capa.main.find_capabilities(rules, extractor)
|
||||
assert "install service" in capabilities
|
||||
@@ -152,48 +149,64 @@ def test_match_across_scopes_file_function(sample_9324d1a8ae37a36ae560c37448c970
|
||||
|
||||
|
||||
def test_match_across_scopes(sample_9324d1a8ae37a36ae560c37448c9705a):
|
||||
rules = capa.rules.RuleSet([
|
||||
# this rule should match on a basic block (including at least 0x403685)
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent('''
|
||||
rule:
|
||||
meta:
|
||||
name: tight loop
|
||||
scope: basic block
|
||||
examples:
|
||||
- 9324d1a8ae37a36ae560c37448c9705a:0x403685
|
||||
features:
|
||||
- characteristic: tight loop
|
||||
''')),
|
||||
# this rule should match on a function (0x403660)
|
||||
# based on API, as well as prior basic block rule match
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent('''
|
||||
rule:
|
||||
meta:
|
||||
name: kill thread loop
|
||||
scope: function
|
||||
examples:
|
||||
- 9324d1a8ae37a36ae560c37448c9705a:0x403660
|
||||
features:
|
||||
- and:
|
||||
- api: kernel32.TerminateThread
|
||||
- api: kernel32.CloseHandle
|
||||
- match: tight loop
|
||||
''')),
|
||||
# this rule should match on a file feature and a prior function rule match
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent('''
|
||||
rule:
|
||||
meta:
|
||||
name: kill thread program
|
||||
scope: file
|
||||
examples:
|
||||
- 9324d1a8ae37a36ae560c37448c9705a
|
||||
features:
|
||||
- and:
|
||||
- section: .text
|
||||
- match: kill thread loop
|
||||
''')),
|
||||
])
|
||||
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path)
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
# this rule should match on a basic block (including at least 0x403685)
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: tight loop
|
||||
scope: basic block
|
||||
examples:
|
||||
- 9324d1a8ae37a36ae560c37448c9705a:0x403685
|
||||
features:
|
||||
- characteristic: tight loop
|
||||
"""
|
||||
)
|
||||
),
|
||||
# this rule should match on a function (0x403660)
|
||||
# based on API, as well as prior basic block rule match
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: kill thread loop
|
||||
scope: function
|
||||
examples:
|
||||
- 9324d1a8ae37a36ae560c37448c9705a:0x403660
|
||||
features:
|
||||
- and:
|
||||
- api: kernel32.TerminateThread
|
||||
- api: kernel32.CloseHandle
|
||||
- match: tight loop
|
||||
"""
|
||||
)
|
||||
),
|
||||
# this rule should match on a file feature and a prior function rule match
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: kill thread program
|
||||
scope: file
|
||||
examples:
|
||||
- 9324d1a8ae37a36ae560c37448c9705a
|
||||
features:
|
||||
- and:
|
||||
- section: .text
|
||||
- match: kill thread loop
|
||||
"""
|
||||
)
|
||||
),
|
||||
]
|
||||
)
|
||||
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
|
||||
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path
|
||||
)
|
||||
capabilities = capa.main.find_capabilities(rules, extractor)
|
||||
assert "tight loop" in capabilities
|
||||
assert "kill thread loop" in capabilities
|
||||
@@ -201,22 +214,27 @@ def test_match_across_scopes(sample_9324d1a8ae37a36ae560c37448c9705a):
|
||||
|
||||
|
||||
def test_subscope_bb_rules(sample_9324d1a8ae37a36ae560c37448c9705a):
|
||||
rules = capa.rules.RuleSet([
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent('''
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scope: function
|
||||
features:
|
||||
- and:
|
||||
- basic block:
|
||||
- characteristic: tight loop
|
||||
'''))
|
||||
])
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scope: function
|
||||
features:
|
||||
- and:
|
||||
- basic block:
|
||||
- characteristic: tight loop
|
||||
"""
|
||||
)
|
||||
)
|
||||
]
|
||||
)
|
||||
# tight loop at 0x403685
|
||||
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
|
||||
sample_9324d1a8ae37a36ae560c37448c9705a.vw,
|
||||
sample_9324d1a8ae37a36ae560c37448c9705a.path,
|
||||
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
|
||||
)
|
||||
capabilities = capa.main.find_capabilities(rules, extractor)
|
||||
assert "test rule" in capabilities
|
||||
@@ -242,8 +260,7 @@ def test_byte_matching(sample_9324d1a8ae37a36ae560c37448c9705a):
|
||||
)
|
||||
|
||||
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
|
||||
sample_9324d1a8ae37a36ae560c37448c9705a.vw,
|
||||
sample_9324d1a8ae37a36ae560c37448c9705a.path,
|
||||
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
|
||||
)
|
||||
capabilities = capa.main.find_capabilities(rules, extractor)
|
||||
assert "byte match test" in capabilities
|
||||
|
||||
@@ -34,10 +34,7 @@ def test_rule_yaml():
|
||||
assert r.evaluate({Number(0): {1}}) == False
|
||||
assert r.evaluate({Number(0): {1}, Number(1): {1}}) == False
|
||||
assert r.evaluate({Number(0): {1}, Number(1): {1}, Number(2): {1}}) == True
|
||||
assert (
|
||||
r.evaluate({Number(0): {1}, Number(1): {1}, Number(2): {1}, Number(3): {1}})
|
||||
== True
|
||||
)
|
||||
assert r.evaluate({Number(0): {1}, Number(1): {1}, Number(2): {1}, Number(3): {1}}) == True
|
||||
|
||||
|
||||
def test_rule_yaml_complex():
|
||||
@@ -60,15 +57,13 @@ def test_rule_yaml_complex():
|
||||
"""
|
||||
)
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
assert (
|
||||
r.evaluate({Number(5): {1}, Number(6): {1}, Number(7): {1}, Number(8): {1}})
|
||||
== True
|
||||
)
|
||||
assert r.evaluate({Number(5): {1}, Number(6): {1}, Number(7): {1}, Number(8): {1}}) == True
|
||||
assert r.evaluate({Number(6): {1}, Number(7): {1}, Number(8): {1}}) == False
|
||||
|
||||
|
||||
def test_rule_yaml_descriptions():
|
||||
rule = textwrap.dedent('''
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
@@ -78,9 +73,12 @@ def test_rule_yaml_descriptions():
|
||||
- string: This program cannot be run in DOS mode.
|
||||
description: MS-DOS stub message
|
||||
- count(number(2 = AF_INET/SOCK_DGRAM)): 2
|
||||
''')
|
||||
"""
|
||||
)
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
assert r.evaluate({Number(1): {1}, Number(2): {2, 3}, String('This program cannot be run in DOS mode.'): {4}}) == True
|
||||
assert (
|
||||
r.evaluate({Number(1): {1}, Number(2): {2, 3}, String("This program cannot be run in DOS mode."): {4}}) == True
|
||||
)
|
||||
|
||||
|
||||
def test_rule_yaml_not():
|
||||
@@ -149,53 +147,43 @@ def test_invalid_rule_feature():
|
||||
)
|
||||
|
||||
with pytest.raises(capa.rules.InvalidRule):
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent('''
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scope: file
|
||||
features:
|
||||
- characteristic: nzxor
|
||||
'''))
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scope: file
|
||||
features:
|
||||
- characteristic: nzxor
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
with pytest.raises(capa.rules.InvalidRule):
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent('''
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scope: function
|
||||
features:
|
||||
- characteristic: embedded pe
|
||||
'''))
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scope: function
|
||||
features:
|
||||
- characteristic: embedded pe
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
with pytest.raises(capa.rules.InvalidRule):
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent('''
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scope: basic block
|
||||
features:
|
||||
- characteristic: embedded pe
|
||||
'''))
|
||||
|
||||
|
||||
def test_lib_rules():
|
||||
rules = capa.rules.RuleSet([
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent('''
|
||||
rule:
|
||||
meta:
|
||||
name: a lib rule
|
||||
lib: true
|
||||
features:
|
||||
- api: CreateFileA
|
||||
''')),
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent('''
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scope: basic block
|
||||
features:
|
||||
- characteristic(embedded pe): true
|
||||
- characteristic: embedded pe
|
||||
"""
|
||||
)
|
||||
)
|
||||
@@ -234,21 +222,27 @@ def test_lib_rules():
|
||||
|
||||
|
||||
def test_subscope_rules():
|
||||
rules = capa.rules.RuleSet([
|
||||
capa.rules.Rule.from_yaml(textwrap.dedent('''
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scope: file
|
||||
features:
|
||||
- and:
|
||||
- characteristic: embedded pe
|
||||
- function:
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scope: file
|
||||
features:
|
||||
- and:
|
||||
- characteristic: nzxor
|
||||
- characteristic: switch
|
||||
'''))
|
||||
])
|
||||
- characteristic: embedded pe
|
||||
- function:
|
||||
- and:
|
||||
- characteristic: nzxor
|
||||
- characteristic: switch
|
||||
"""
|
||||
)
|
||||
)
|
||||
]
|
||||
)
|
||||
# the file rule scope will have one rules:
|
||||
# - `test rule`
|
||||
assert len(rules.file_rules) == 1
|
||||
@@ -317,7 +311,9 @@ def test_invalid_rules():
|
||||
name: test rule
|
||||
features:
|
||||
- characteristic: number(1)
|
||||
'''))
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
with pytest.raises(capa.rules.InvalidRule):
|
||||
r = capa.rules.Rule.from_yaml(
|
||||
@@ -328,7 +324,9 @@ def test_invalid_rules():
|
||||
name: test rule
|
||||
features:
|
||||
- characteristic: count(number(100))
|
||||
'''))
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def test_number_symbol():
|
||||
@@ -654,16 +652,12 @@ def test_rules_namespace_dependencies():
|
||||
),
|
||||
]
|
||||
|
||||
r3 = set(
|
||||
map(lambda r: r.name, capa.rules.get_rules_and_dependencies(rules, "rule 3"))
|
||||
)
|
||||
r3 = set(map(lambda r: r.name, capa.rules.get_rules_and_dependencies(rules, "rule 3")))
|
||||
assert "rule 1" in r3
|
||||
assert "rule 2" not in r3
|
||||
assert "rule 4" not in r3
|
||||
|
||||
r4 = set(
|
||||
map(lambda r: r.name, capa.rules.get_rules_and_dependencies(rules, "rule 4"))
|
||||
)
|
||||
r4 = set(map(lambda r: r.name, capa.rules.get_rules_and_dependencies(rules, "rule 4")))
|
||||
assert "rule 1" in r4
|
||||
assert "rule 2" in r4
|
||||
assert "rule 3" not in r4
|
||||
|
||||
@@ -24,13 +24,9 @@ def extract_function_features(f):
|
||||
features = collections.defaultdict(set)
|
||||
for bb in f.basic_blocks:
|
||||
for insn in bb.instructions:
|
||||
for feature, va in capa.features.extractors.viv.insn.extract_features(
|
||||
f, bb, insn
|
||||
):
|
||||
for feature, va in capa.features.extractors.viv.insn.extract_features(f, bb, insn):
|
||||
features[feature].add(va)
|
||||
for feature, va in capa.features.extractors.viv.basicblock.extract_features(
|
||||
f, bb
|
||||
):
|
||||
for feature, va in capa.features.extractors.viv.basicblock.extract_features(f, bb):
|
||||
features[feature].add(va)
|
||||
for feature, va in capa.features.extractors.viv.function.extract_features(f):
|
||||
features[feature].add(va)
|
||||
@@ -40,9 +36,7 @@ def extract_function_features(f):
|
||||
def extract_basic_block_features(f, bb):
|
||||
features = set({})
|
||||
for insn in bb.instructions:
|
||||
for feature, _ in capa.features.extractors.viv.insn.extract_features(
|
||||
f, bb, insn
|
||||
):
|
||||
for feature, _ in capa.features.extractors.viv.insn.extract_features(f, bb, insn):
|
||||
features.add(feature)
|
||||
for feature, _ in capa.features.extractors.viv.basicblock.extract_features(f, bb):
|
||||
features.add(feature)
|
||||
@@ -64,18 +58,14 @@ def test_api_features(mimikatz):
|
||||
|
||||
|
||||
def test_api_features_64_bit(sample_a198216798ca38f280dc413f8c57f2c2):
|
||||
features = extract_function_features(
|
||||
viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x4011B0)
|
||||
)
|
||||
features = extract_function_features(viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x4011B0))
|
||||
assert capa.features.insn.API("kernel32.GetStringTypeA") in features
|
||||
assert capa.features.insn.API("kernel32.GetStringTypeW") not in features
|
||||
assert capa.features.insn.API("kernel32.GetStringType") in features
|
||||
assert capa.features.insn.API("GetStringTypeA") in features
|
||||
assert capa.features.insn.API("GetStringType") in features
|
||||
# call via thunk in IDA Pro
|
||||
features = extract_function_features(
|
||||
viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x401CB0)
|
||||
)
|
||||
features = extract_function_features(viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x401CB0))
|
||||
assert capa.features.insn.API("msvcrt.vfprintf") in features
|
||||
assert capa.features.insn.API("vfprintf") in features
|
||||
|
||||
@@ -90,20 +80,14 @@ def test_string_features(mimikatz):
|
||||
|
||||
|
||||
def test_byte_features(sample_9324d1a8ae37a36ae560c37448c9705a):
|
||||
features = extract_function_features(
|
||||
viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60)
|
||||
)
|
||||
wanted = capa.features.Bytes(
|
||||
b"\xED\x24\x9E\xF4\x52\xA9\x07\x47\x55\x8E\xE1\xAB\x30\x8E\x23\x61"
|
||||
)
|
||||
features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60))
|
||||
wanted = capa.features.Bytes(b"\xED\x24\x9E\xF4\x52\xA9\x07\x47\x55\x8E\xE1\xAB\x30\x8E\x23\x61")
|
||||
# use `==` rather than `is` because the result is not `True` but a truthy value.
|
||||
assert wanted.evaluate(features) == True
|
||||
|
||||
|
||||
def test_byte_features64(sample_lab21_01):
|
||||
features = extract_function_features(
|
||||
viv_utils.Function(sample_lab21_01.vw, 0x1400010C0)
|
||||
)
|
||||
features = extract_function_features(viv_utils.Function(sample_lab21_01.vw, 0x1400010C0))
|
||||
wanted = capa.features.Bytes(b"\x32\xA2\xDF\x2D\x99\x2B\x00\x00")
|
||||
# use `==` rather than `is` because the result is not `True` but a truthy value.
|
||||
assert wanted.evaluate(features) == True
|
||||
@@ -130,7 +114,7 @@ def test_offset_features(mimikatz):
|
||||
|
||||
def test_nzxor_features(mimikatz):
|
||||
features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x410DFC))
|
||||
assert capa.features.Characteristic('nzxor') in features # 0x0410F0B
|
||||
assert capa.features.Characteristic("nzxor") in features # 0x0410F0B
|
||||
|
||||
|
||||
def get_bb_insn(f, va):
|
||||
@@ -147,9 +131,7 @@ def test_is_security_cookie(mimikatz):
|
||||
f = viv_utils.Function(mimikatz.vw, 0x410DFC)
|
||||
for va in [0x0410F0B]:
|
||||
bb, insn = get_bb_insn(f, va)
|
||||
assert (
|
||||
capa.features.extractors.viv.insn.is_security_cookie(f, bb, insn) == False
|
||||
)
|
||||
assert capa.features.extractors.viv.insn.is_security_cookie(f, bb, insn) == False
|
||||
|
||||
# security cookie initial set and final check
|
||||
f = viv_utils.Function(mimikatz.vw, 0x46C54A)
|
||||
@@ -170,7 +152,7 @@ def test_mnemonic_features(mimikatz):
|
||||
|
||||
def test_peb_access_features(sample_a933a1a402775cfa94b6bee0963f4b46):
|
||||
features = extract_function_features(viv_utils.Function(sample_a933a1a402775cfa94b6bee0963f4b46.vw, 0xABA6FEC))
|
||||
assert capa.features.Characteristic('peb access') in features
|
||||
assert capa.features.Characteristic("peb access") in features
|
||||
|
||||
|
||||
def test_file_section_name_features(mimikatz):
|
||||
@@ -186,7 +168,7 @@ def test_tight_loop_features(mimikatz):
|
||||
if bb.va != 0x402F8E:
|
||||
continue
|
||||
features = extract_basic_block_features(f, bb)
|
||||
assert capa.features.Characteristic('tight loop') in features
|
||||
assert capa.features.Characteristic("tight loop") in features
|
||||
assert capa.features.basicblock.BasicBlock() in features
|
||||
|
||||
|
||||
@@ -196,7 +178,7 @@ def test_tight_loop_bb_features(mimikatz):
|
||||
if bb.va != 0x402F8E:
|
||||
continue
|
||||
features = extract_basic_block_features(f, bb)
|
||||
assert capa.features.Characteristic('tight loop') in features
|
||||
assert capa.features.Characteristic("tight loop") in features
|
||||
assert capa.features.basicblock.BasicBlock() in features
|
||||
|
||||
|
||||
@@ -218,65 +200,70 @@ def test_file_import_name_features(mimikatz):
|
||||
|
||||
def test_cross_section_flow_features(sample_a198216798ca38f280dc413f8c57f2c2):
|
||||
features = extract_function_features(viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x4014D0))
|
||||
assert capa.features.Characteristic('cross section flow') in features
|
||||
assert capa.features.Characteristic("cross section flow") in features
|
||||
|
||||
# this function has calls to some imports,
|
||||
# which should not trigger cross-section flow characteristic
|
||||
features = extract_function_features(viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x401563))
|
||||
assert capa.features.Characteristic('cross section flow') not in features
|
||||
assert capa.features.Characteristic("cross section flow") not in features
|
||||
|
||||
|
||||
def test_segment_access_features(sample_a933a1a402775cfa94b6bee0963f4b46):
|
||||
features = extract_function_features(viv_utils.Function(sample_a933a1a402775cfa94b6bee0963f4b46.vw, 0xABA6FEC))
|
||||
assert capa.features.Characteristic('fs access') in features
|
||||
assert capa.features.Characteristic("fs access") in features
|
||||
|
||||
|
||||
def test_thunk_features(sample_9324d1a8ae37a36ae560c37448c9705a):
|
||||
features = extract_function_features(
|
||||
viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x407970)
|
||||
)
|
||||
features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x407970))
|
||||
assert capa.features.insn.API("kernel32.CreateToolhelp32Snapshot") in features
|
||||
assert capa.features.insn.API("CreateToolhelp32Snapshot") in features
|
||||
|
||||
|
||||
def test_file_embedded_pe(pma_lab_12_04):
|
||||
features = extract_file_features(pma_lab_12_04.vw, pma_lab_12_04.path)
|
||||
assert capa.features.Characteristic('embedded pe') in features
|
||||
assert capa.features.Characteristic("embedded pe") in features
|
||||
|
||||
|
||||
def test_stackstring_features(mimikatz):
|
||||
features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x4556E5))
|
||||
assert capa.features.Characteristic('stack string') in features
|
||||
assert capa.features.Characteristic("stack string") in features
|
||||
|
||||
|
||||
def test_switch_features(mimikatz):
|
||||
features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x409411))
|
||||
assert capa.features.Characteristic('switch') in features
|
||||
assert capa.features.Characteristic("switch") in features
|
||||
|
||||
features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x409393))
|
||||
assert capa.features.Characteristic('switch') not in features
|
||||
assert capa.features.Characteristic("switch") not in features
|
||||
|
||||
|
||||
def test_recursive_call_feature(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41):
|
||||
features = extract_function_features(viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10003100))
|
||||
assert capa.features.Characteristic('recursive call') in features
|
||||
features = extract_function_features(
|
||||
viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10003100)
|
||||
)
|
||||
assert capa.features.Characteristic("recursive call") in features
|
||||
|
||||
features = extract_function_features(viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10007B00))
|
||||
assert capa.features.Characteristic('recursive call') not in features
|
||||
features = extract_function_features(
|
||||
viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10007B00)
|
||||
)
|
||||
assert capa.features.Characteristic("recursive call") not in features
|
||||
|
||||
|
||||
def test_loop_feature(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41):
|
||||
features = extract_function_features(viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10003D30))
|
||||
assert capa.features.Characteristic('loop') in features
|
||||
features = extract_function_features(
|
||||
viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10003D30)
|
||||
)
|
||||
assert capa.features.Characteristic("loop") in features
|
||||
|
||||
features = extract_function_features(viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10007250))
|
||||
assert capa.features.Characteristic('loop') not in features
|
||||
features = extract_function_features(
|
||||
viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10007250)
|
||||
)
|
||||
assert capa.features.Characteristic("loop") not in features
|
||||
|
||||
|
||||
def test_file_string_features(sample_bfb9b5391a13d0afd787e87ab90f14f5):
|
||||
features = extract_file_features(
|
||||
sample_bfb9b5391a13d0afd787e87ab90f14f5.vw,
|
||||
sample_bfb9b5391a13d0afd787e87ab90f14f5.path,
|
||||
sample_bfb9b5391a13d0afd787e87ab90f14f5.vw, sample_bfb9b5391a13d0afd787e87ab90f14f5.path,
|
||||
)
|
||||
assert capa.features.String("WarStop") in features # ASCII, offset 0x40EC
|
||||
assert capa.features.String("cimage/png") in features # UTF-16 LE, offset 0x350E
|
||||
@@ -284,39 +271,35 @@ def test_file_string_features(sample_bfb9b5391a13d0afd787e87ab90f14f5):
|
||||
|
||||
def test_function_calls_to(sample_9324d1a8ae37a36ae560c37448c9705a):
|
||||
features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60))
|
||||
assert capa.features.Characteristic('calls to') in features
|
||||
assert len(features[capa.features.Characteristic('calls to')]) == 1
|
||||
assert capa.features.Characteristic("calls to") in features
|
||||
assert len(features[capa.features.Characteristic("calls to")]) == 1
|
||||
|
||||
|
||||
def test_function_calls_to64(sample_lab21_01):
|
||||
features = extract_function_features(viv_utils.Function(sample_lab21_01.vw, 0x1400052D0)) # memcpy
|
||||
assert capa.features.Characteristic('calls to') in features
|
||||
assert len(features[capa.features.Characteristic('calls to')]) == 8
|
||||
assert capa.features.Characteristic("calls to") in features
|
||||
assert len(features[capa.features.Characteristic("calls to")]) == 8
|
||||
|
||||
|
||||
def test_function_calls_from(sample_9324d1a8ae37a36ae560c37448c9705a):
|
||||
features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60))
|
||||
assert capa.features.Characteristic('calls from') in features
|
||||
assert len(features[capa.features.Characteristic('calls from')]) == 23
|
||||
assert capa.features.Characteristic("calls from") in features
|
||||
assert len(features[capa.features.Characteristic("calls from")]) == 23
|
||||
|
||||
|
||||
def test_basic_block_count(sample_9324d1a8ae37a36ae560c37448c9705a):
|
||||
features = extract_function_features(
|
||||
viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60)
|
||||
)
|
||||
features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60))
|
||||
assert len(features[capa.features.basicblock.BasicBlock()]) == 26
|
||||
|
||||
|
||||
def test_indirect_call_features(sample_a933a1a402775cfa94b6bee0963f4b46):
|
||||
features = extract_function_features(viv_utils.Function(sample_a933a1a402775cfa94b6bee0963f4b46.vw, 0xABA68A0))
|
||||
assert capa.features.Characteristic('indirect call') in features
|
||||
assert len(features[capa.features.Characteristic('indirect call')]) == 3
|
||||
assert capa.features.Characteristic("indirect call") in features
|
||||
assert len(features[capa.features.Characteristic("indirect call")]) == 3
|
||||
|
||||
|
||||
def test_indirect_calls_resolved(sample_c91887d861d9bd4a5872249b641bc9f9):
|
||||
features = extract_function_features(
|
||||
viv_utils.Function(sample_c91887d861d9bd4a5872249b641bc9f9.vw, 0x401A77)
|
||||
)
|
||||
features = extract_function_features(viv_utils.Function(sample_c91887d861d9bd4a5872249b641bc9f9.vw, 0x401A77))
|
||||
assert capa.features.insn.API("kernel32.CreatePipe") in features
|
||||
assert capa.features.insn.API("kernel32.SetHandleInformation") in features
|
||||
assert capa.features.insn.API("kernel32.CloseHandle") in features
|
||||
|
||||
Reference in New Issue
Block a user