diff --git a/capa/features/extractors/helpers.py b/capa/features/extractors/helpers.py index 27fba835..7dcacbba 100644 --- a/capa/features/extractors/helpers.py +++ b/capa/features/extractors/helpers.py @@ -9,6 +9,7 @@ import sys import builtins +from capa.features.file import Import from capa.features.insn import API MIN_STACKSTRING_LEN = 8 @@ -21,25 +22,32 @@ def xor_static(data, i): return "".join(chr(ord(c) ^ i) for c in data) -def is_aw_function(function_name): +def is_aw_function(symbol): """ is the given function name an A/W function? these are variants of functions that, on Windows, accept either a narrow or wide string. """ - if len(function_name) < 2: + if len(symbol) < 2: return False # last character should be 'A' or 'W' - if function_name[-1] not in ("A", "W"): + if symbol[-1] not in ("A", "W"): return False # second to last character should be lowercase letter - return "a" <= function_name[-2] <= "z" or "0" <= function_name[-2] <= "9" + return "a" <= symbol[-2] <= "z" or "0" <= symbol[-2] <= "9" -def generate_api_features(apiname, va): +def is_ordinal(symbol): """ - for a given function name and address, generate API names. + is the given symbol an ordinal that is prefixed by "#"? + """ + return symbol[0] == "#" + + +def generate_symbols(dll, symbol): + """ + for a given dll and symbol name, generate variants. we over-generate features to make matching easier. these include: - kernel32.CreateFileA @@ -47,22 +55,20 @@ def generate_api_features(apiname, va): - CreateFileA - CreateFile """ - # (kernel32.CreateFileA, 0x401000) - yield API(apiname), va + # kernel32.CreateFileA + yield "%s.%s" % (dll, symbol) - if is_aw_function(apiname): - # (kernel32.CreateFile, 0x401000) - yield API(apiname[:-1]), va + if not is_ordinal(symbol): + # CreateFileA + yield symbol - if "." in apiname: - modname, impname = apiname.split(".") - # strip modname to support importname-only matching - # (CreateFileA, 0x401000) - yield API(impname), va + if is_aw_function(symbol): + # kernel32.CreateFile + yield "%s.%s" % (dll, symbol[:-1]) - if is_aw_function(impname): - # (CreateFile, 0x401000) - yield API(impname[:-1]), va + if not is_ordinal(symbol): + # CreateFile + yield symbol[:-1] def all_zeros(bytez): diff --git a/capa/features/extractors/ida/file.py b/capa/features/extractors/ida/file.py index 2acc398b..0b8718a5 100644 --- a/capa/features/extractors/ida/file.py +++ b/capa/features/extractors/ida/file.py @@ -97,10 +97,16 @@ def extract_file_import_names(): """ for (ea, info) in capa.features.extractors.ida.helpers.get_file_imports().items(): if info[1]: - yield Import("%s.%s" % (info[0], info[1])), ea - yield Import(info[1]), ea - if info[2]: - yield Import("%s.#%s" % (info[0], str(info[2]))), ea + dll = info[0] + symbol = info[1] + elif info[2]: + dll = info[0] + symbol = "#%d" % (info[2]) + else: + continue + + for name in capa.features.extractors.helpers.generate_symbols(dll, symbol): + yield Import(name), ea def extract_file_section_names(): diff --git a/capa/features/extractors/ida/insn.py b/capa/features/extractors/ida/insn.py index 73f98056..665d24e6 100644 --- a/capa/features/extractors/ida/insn.py +++ b/capa/features/extractors/ida/insn.py @@ -13,7 +13,7 @@ import idautils import capa.features.extractors.helpers import capa.features.extractors.ida.helpers from capa.features import ARCH_X32, ARCH_X64, MAX_BYTES_FEATURE_SIZE, Bytes, String, Characteristic -from capa.features.insn import Number, Offset, Mnemonic +from capa.features.insn import API, Number, Offset, Mnemonic # security cookie checks may perform non-zeroing XORs, these are expected within a certain # byte range within the first and returning basic blocks, this helps to reduce FP features @@ -77,8 +77,9 @@ def extract_insn_api_features(f, bb, insn): call dword [0x00473038] """ for api in check_for_api_call(f.ctx, insn): - for (feature, ea) in capa.features.extractors.helpers.generate_api_features(api, insn.ea): - yield feature, ea + dll, _, symbol = api.rpartition(".") + for name in capa.features.extractors.helpers.generate_symbols(dll, symbol): + yield API(name), insn.ea def extract_insn_number_features(f, bb, insn): diff --git a/capa/features/extractors/viv/file.py b/capa/features/extractors/viv/file.py index f0b4d6db..feb6381d 100644 --- a/capa/features/extractors/viv/file.py +++ b/capa/features/extractors/viv/file.py @@ -8,6 +8,7 @@ import PE.carve as pe_carve # vivisect PE +import capa.features.extractors.helpers import capa.features.extractors.strings from capa.features import String, Characteristic from capa.features.file import Export, Import, Section @@ -41,11 +42,9 @@ def extract_file_import_names(vw, file_path): if is_viv_ord_impname(impname): # replace ord prefix with # impname = "#%s" % impname[len("ord") :] - tinfo = "%s.%s" % (modname, impname) - yield Import(tinfo), va - else: - yield Import(tinfo), va - yield Import(impname), va + + for name in capa.features.extractors.helpers.generate_symbols(modname, impname): + yield Import(name), va def is_viv_ord_impname(impname): diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 7375bc37..48dbcd8b 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -12,7 +12,7 @@ import envi.archs.i386.disasm import capa.features.extractors.helpers from capa.features import ARCH_X32, ARCH_X64, MAX_BYTES_FEATURE_SIZE, Bytes, String, Characteristic -from capa.features.insn import Number, Offset, Mnemonic +from capa.features.insn import API, Number, Offset, Mnemonic from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_indirect_call # security cookie checks may perform non-zeroing XORs, these are expected within a certain @@ -47,11 +47,15 @@ def get_imports(vw): """ caching accessor to vivisect workspace imports avoids performance issues in vivisect when collecting locations + + returns: Dict[int, Tuple[str, str]] """ if "imports" in vw.metadata: return vw.metadata["imports"] else: - imports = {p[0]: p[3] for p in vw.getImports()} + imports = { + p[0]: (p[3].rpartition(".")[0], p[3].replace(".ord", ".#").rpartition(".")[2]) for p in vw.getImports() + } vw.metadata["imports"] = imports return imports @@ -72,9 +76,10 @@ def extract_insn_api_features(f, bb, insn): target = oper.getOperAddr(insn) imports = get_imports(f.vw) - if target in imports.keys(): - for feature, va in capa.features.extractors.helpers.generate_api_features(imports[target], insn.va): - yield feature, va + if target in imports: + dll, symbol = imports[target] + for name in capa.features.extractors.helpers.generate_symbols(dll, symbol): + yield API(name), insn.va # call via thunk on x86, # see 9324d1a8ae37a36ae560c37448c9705a at 0x407985 @@ -90,8 +95,11 @@ def extract_insn_api_features(f, bb, insn): return else: if thunk: - for feature, va in capa.features.extractors.helpers.generate_api_features(thunk, insn.va): - yield feature, va + dll, _, symbol = thunk.rpartition(".") + if symbol.startswith("ord"): + symbol = "#" + symbol[len("ord") :] + for name in capa.features.extractors.helpers.generate_symbols(dll, symbol): + yield API(name), insn.va # call via import on x64 # see Lab21-01.exe_:0x14000118C @@ -100,9 +108,10 @@ def extract_insn_api_features(f, bb, insn): target = op.getOperAddr(insn) imports = get_imports(f.vw) - if target in imports.keys(): - for feature, va in capa.features.extractors.helpers.generate_api_features(imports[target], insn.va): - yield feature, va + if target in imports: + dll, symbol = imports[target] + for name in capa.features.extractors.helpers.generate_symbols(dll, symbol): + yield API(name), insn.va elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper): try: @@ -116,9 +125,10 @@ def extract_insn_api_features(f, bb, insn): return imports = get_imports(f.vw) - if target in imports.keys(): - for feature, va in capa.features.extractors.helpers.generate_api_features(imports[target], insn.va): - yield feature, va + if target in imports: + dll, symbol = imports[target] + for name in capa.features.extractors.helpers.generate_symbols(dll, symbol): + yield API(name), insn.va def extract_insn_number_features(f, bb, insn): diff --git a/tests/fixtures.py b/tests/fixtures.py index 8ca5437d..d0754b2d 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -289,6 +289,10 @@ FEATURE_PRESENCE_TESTS = [ ("mimikatz", "file", capa.features.file.Import("#11"), False), ("mimikatz", "file", capa.features.file.Import("#nope"), False), ("mimikatz", "file", capa.features.file.Import("nope"), False), + ("mimikatz", "file", capa.features.file.Import("advapi32.CryptAcquireContextW"), True), + ("mimikatz", "file", capa.features.file.Import("advapi32.CryptAcquireContext"), True), + ("mimikatz", "file", capa.features.file.Import("CryptAcquireContextW"), True), + ("mimikatz", "file", capa.features.file.Import("CryptAcquireContext"), True), # function/characteristic(loop) ("mimikatz", "function=0x401517", capa.features.Characteristic("loop"), True), ("mimikatz", "function=0x401000", capa.features.Characteristic("loop"), False),