Merge pull request #274 from fireeye/fix-246

fix 246
This commit is contained in:
Willi Ballenthin
2020-09-01 15:51:58 -06:00
committed by GitHub
6 changed files with 70 additions and 44 deletions

View File

@@ -9,6 +9,7 @@
import sys
import builtins
from capa.features.file import Import
from capa.features.insn import API
MIN_STACKSTRING_LEN = 8
@@ -21,25 +22,32 @@ def xor_static(data, i):
return "".join(chr(ord(c) ^ i) for c in data)
def is_aw_function(function_name):
def is_aw_function(symbol):
"""
is the given function name an A/W function?
these are variants of functions that, on Windows, accept either a narrow or wide string.
"""
if len(function_name) < 2:
if len(symbol) < 2:
return False
# last character should be 'A' or 'W'
if function_name[-1] not in ("A", "W"):
if symbol[-1] not in ("A", "W"):
return False
# second to last character should be lowercase letter
return "a" <= function_name[-2] <= "z" or "0" <= function_name[-2] <= "9"
return "a" <= symbol[-2] <= "z" or "0" <= symbol[-2] <= "9"
def generate_api_features(apiname, va):
def is_ordinal(symbol):
"""
for a given function name and address, generate API names.
is the given symbol an ordinal that is prefixed by "#"?
"""
return symbol[0] == "#"
def generate_symbols(dll, symbol):
"""
for a given dll and symbol name, generate variants.
we over-generate features to make matching easier.
these include:
- kernel32.CreateFileA
@@ -47,22 +55,20 @@ def generate_api_features(apiname, va):
- CreateFileA
- CreateFile
"""
# (kernel32.CreateFileA, 0x401000)
yield API(apiname), va
# kernel32.CreateFileA
yield "%s.%s" % (dll, symbol)
if is_aw_function(apiname):
# (kernel32.CreateFile, 0x401000)
yield API(apiname[:-1]), va
if not is_ordinal(symbol):
# CreateFileA
yield symbol
if "." in apiname:
modname, impname = apiname.split(".")
# strip modname to support importname-only matching
# (CreateFileA, 0x401000)
yield API(impname), va
if is_aw_function(symbol):
# kernel32.CreateFile
yield "%s.%s" % (dll, symbol[:-1])
if is_aw_function(impname):
# (CreateFile, 0x401000)
yield API(impname[:-1]), va
if not is_ordinal(symbol):
# CreateFile
yield symbol[:-1]
def all_zeros(bytez):

View File

@@ -97,10 +97,16 @@ def extract_file_import_names():
"""
for (ea, info) in capa.features.extractors.ida.helpers.get_file_imports().items():
if info[1]:
yield Import("%s.%s" % (info[0], info[1])), ea
yield Import(info[1]), ea
if info[2]:
yield Import("%s.#%s" % (info[0], str(info[2]))), ea
dll = info[0]
symbol = info[1]
elif info[2]:
dll = info[0]
symbol = "#%d" % (info[2])
else:
continue
for name in capa.features.extractors.helpers.generate_symbols(dll, symbol):
yield Import(name), ea
def extract_file_section_names():

View File

@@ -13,7 +13,7 @@ import idautils
import capa.features.extractors.helpers
import capa.features.extractors.ida.helpers
from capa.features import ARCH_X32, ARCH_X64, MAX_BYTES_FEATURE_SIZE, Bytes, String, Characteristic
from capa.features.insn import Number, Offset, Mnemonic
from capa.features.insn import API, Number, Offset, Mnemonic
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
# byte range within the first and returning basic blocks, this helps to reduce FP features
@@ -77,8 +77,9 @@ def extract_insn_api_features(f, bb, insn):
call dword [0x00473038]
"""
for api in check_for_api_call(f.ctx, insn):
for (feature, ea) in capa.features.extractors.helpers.generate_api_features(api, insn.ea):
yield feature, ea
dll, _, symbol = api.rpartition(".")
for name in capa.features.extractors.helpers.generate_symbols(dll, symbol):
yield API(name), insn.ea
def extract_insn_number_features(f, bb, insn):

View File

@@ -8,6 +8,7 @@
import PE.carve as pe_carve # vivisect PE
import capa.features.extractors.helpers
import capa.features.extractors.strings
from capa.features import String, Characteristic
from capa.features.file import Export, Import, Section
@@ -41,11 +42,9 @@ def extract_file_import_names(vw, file_path):
if is_viv_ord_impname(impname):
# replace ord prefix with #
impname = "#%s" % impname[len("ord") :]
tinfo = "%s.%s" % (modname, impname)
yield Import(tinfo), va
else:
yield Import(tinfo), va
yield Import(impname), va
for name in capa.features.extractors.helpers.generate_symbols(modname, impname):
yield Import(name), va
def is_viv_ord_impname(impname):

View File

@@ -12,7 +12,7 @@ import envi.archs.i386.disasm
import capa.features.extractors.helpers
from capa.features import ARCH_X32, ARCH_X64, MAX_BYTES_FEATURE_SIZE, Bytes, String, Characteristic
from capa.features.insn import Number, Offset, Mnemonic
from capa.features.insn import API, Number, Offset, Mnemonic
from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_indirect_call
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
@@ -47,11 +47,15 @@ def get_imports(vw):
"""
caching accessor to vivisect workspace imports
avoids performance issues in vivisect when collecting locations
returns: Dict[int, Tuple[str, str]]
"""
if "imports" in vw.metadata:
return vw.metadata["imports"]
else:
imports = {p[0]: p[3] for p in vw.getImports()}
imports = {
p[0]: (p[3].rpartition(".")[0], p[3].replace(".ord", ".#").rpartition(".")[2]) for p in vw.getImports()
}
vw.metadata["imports"] = imports
return imports
@@ -72,9 +76,10 @@ def extract_insn_api_features(f, bb, insn):
target = oper.getOperAddr(insn)
imports = get_imports(f.vw)
if target in imports.keys():
for feature, va in capa.features.extractors.helpers.generate_api_features(imports[target], insn.va):
yield feature, va
if target in imports:
dll, symbol = imports[target]
for name in capa.features.extractors.helpers.generate_symbols(dll, symbol):
yield API(name), insn.va
# call via thunk on x86,
# see 9324d1a8ae37a36ae560c37448c9705a at 0x407985
@@ -90,8 +95,11 @@ def extract_insn_api_features(f, bb, insn):
return
else:
if thunk:
for feature, va in capa.features.extractors.helpers.generate_api_features(thunk, insn.va):
yield feature, va
dll, _, symbol = thunk.rpartition(".")
if symbol.startswith("ord"):
symbol = "#" + symbol[len("ord") :]
for name in capa.features.extractors.helpers.generate_symbols(dll, symbol):
yield API(name), insn.va
# call via import on x64
# see Lab21-01.exe_:0x14000118C
@@ -100,9 +108,10 @@ def extract_insn_api_features(f, bb, insn):
target = op.getOperAddr(insn)
imports = get_imports(f.vw)
if target in imports.keys():
for feature, va in capa.features.extractors.helpers.generate_api_features(imports[target], insn.va):
yield feature, va
if target in imports:
dll, symbol = imports[target]
for name in capa.features.extractors.helpers.generate_symbols(dll, symbol):
yield API(name), insn.va
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper):
try:
@@ -116,9 +125,10 @@ def extract_insn_api_features(f, bb, insn):
return
imports = get_imports(f.vw)
if target in imports.keys():
for feature, va in capa.features.extractors.helpers.generate_api_features(imports[target], insn.va):
yield feature, va
if target in imports:
dll, symbol = imports[target]
for name in capa.features.extractors.helpers.generate_symbols(dll, symbol):
yield API(name), insn.va
def extract_insn_number_features(f, bb, insn):

View File

@@ -289,6 +289,10 @@ FEATURE_PRESENCE_TESTS = [
("mimikatz", "file", capa.features.file.Import("#11"), False),
("mimikatz", "file", capa.features.file.Import("#nope"), False),
("mimikatz", "file", capa.features.file.Import("nope"), False),
("mimikatz", "file", capa.features.file.Import("advapi32.CryptAcquireContextW"), True),
("mimikatz", "file", capa.features.file.Import("advapi32.CryptAcquireContext"), True),
("mimikatz", "file", capa.features.file.Import("CryptAcquireContextW"), True),
("mimikatz", "file", capa.features.file.Import("CryptAcquireContext"), True),
# function/characteristic(loop)
("mimikatz", "function=0x401517", capa.features.Characteristic("loop"), True),
("mimikatz", "function=0x401000", capa.features.Characteristic("loop"), False),