From 5b349c1df88b93225f57fd21bda0c42dc413c200 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 31 Aug 2020 16:59:55 -0600 Subject: [PATCH 1/5] tests: add feature tests for #246 --- tests/fixtures.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/fixtures.py b/tests/fixtures.py index f5eb6e53..e1f33225 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -281,6 +281,8 @@ FEATURE_PRESENCE_TESTS = [ ("mimikatz", "file", capa.features.file.Import("#11"), False), ("mimikatz", "file", capa.features.file.Import("#nope"), False), ("mimikatz", "file", capa.features.file.Import("nope"), False), + ("mimikatz", "file", capa.features.file.Import("advapi32.CryptAcquireContextW"), True), + ("mimikatz", "file", capa.features.file.Import("advapi32.CryptAcquireContext"), True), # function/characteristic(loop) ("mimikatz", "function=0x401517", capa.features.Characteristic("loop"), True), ("mimikatz", "function=0x401000", capa.features.Characteristic("loop"), False), From 090ec46ca4e6aec6959fc0600f4d6fc687156cec Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 31 Aug 2020 17:13:10 -0600 Subject: [PATCH 2/5] features: extract import A/W variants and their base names closes #246 --- capa/features/extractors/helpers.py | 29 ++++++++++++++++++++++++++++ capa/features/extractors/ida/file.py | 14 ++++++++++---- capa/features/extractors/viv/file.py | 11 +++++------ tests/fixtures.py | 2 ++ 4 files changed, 46 insertions(+), 10 deletions(-) diff --git a/capa/features/extractors/helpers.py b/capa/features/extractors/helpers.py index 27fba835..2b1c66d6 100644 --- a/capa/features/extractors/helpers.py +++ b/capa/features/extractors/helpers.py @@ -9,6 +9,7 @@ import sys import builtins +from capa.features.file import Import from capa.features.insn import API MIN_STACKSTRING_LEN = 8 @@ -65,6 +66,34 @@ def generate_api_features(apiname, va): yield API(impname[:-1]), va +def is_ordinal(symbol): + return symbol[0] == "#" + + +def generate_import_features(dll, symbol, va): + """ + for a given dll, symbol, and address, generate import features. + we over-generate features to make matching easier. + these include: + - kernel32.CreateFileA + - kernel32.CreateFile + - CreateFileA + - CreateFile + """ + # (kernel32.CreateFileA, 0x401000) + yield Import(dll + "." + symbol), va + # (CreateFileA, 0x401000) + if not is_ordinal(symbol): + yield Import(symbol), va + + if is_aw_function(symbol): + # (kernel32.CreateFile, 0x401000) + yield Import(dll + "." + symbol[:-1]), va + # (CreateFile, 0x401000) + if not is_ordinal(symbol): + yield Import(symbol[:-1]), va + + def all_zeros(bytez): return all(b == 0 for b in builtins.bytes(bytez)) diff --git a/capa/features/extractors/ida/file.py b/capa/features/extractors/ida/file.py index 2acc398b..6721344c 100644 --- a/capa/features/extractors/ida/file.py +++ b/capa/features/extractors/ida/file.py @@ -97,10 +97,16 @@ def extract_file_import_names(): """ for (ea, info) in capa.features.extractors.ida.helpers.get_file_imports().items(): if info[1]: - yield Import("%s.%s" % (info[0], info[1])), ea - yield Import(info[1]), ea - if info[2]: - yield Import("%s.#%s" % (info[0], str(info[2]))), ea + dll = info[0] + symbol = info[1] + elif info[2]: + dll = info[0] + symbol = "#%d" % (info[2]) + else: + continue + + for feature, ea in capa.features.extractors.helpers.generate_import_features(dll, symbol, ea): + yield feature, ea def extract_file_section_names(): diff --git a/capa/features/extractors/viv/file.py b/capa/features/extractors/viv/file.py index f0b4d6db..32086e28 100644 --- a/capa/features/extractors/viv/file.py +++ b/capa/features/extractors/viv/file.py @@ -9,8 +9,9 @@ import PE.carve as pe_carve # vivisect PE import capa.features.extractors.strings +import capa.features.extractors.helpers from capa.features import String, Characteristic -from capa.features.file import Export, Import, Section +from capa.features.file import Export, Section def extract_file_embedded_pe(vw, file_path): @@ -41,11 +42,9 @@ def extract_file_import_names(vw, file_path): if is_viv_ord_impname(impname): # replace ord prefix with # impname = "#%s" % impname[len("ord") :] - tinfo = "%s.%s" % (modname, impname) - yield Import(tinfo), va - else: - yield Import(tinfo), va - yield Import(impname), va + + for feature, va in capa.features.extractors.helpers.generate_import_features(modname, impname, va): + yield feature, va def is_viv_ord_impname(impname): diff --git a/tests/fixtures.py b/tests/fixtures.py index e1f33225..2351730f 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -283,6 +283,8 @@ FEATURE_PRESENCE_TESTS = [ ("mimikatz", "file", capa.features.file.Import("nope"), False), ("mimikatz", "file", capa.features.file.Import("advapi32.CryptAcquireContextW"), True), ("mimikatz", "file", capa.features.file.Import("advapi32.CryptAcquireContext"), True), + ("mimikatz", "file", capa.features.file.Import("CryptAcquireContextW"), True), + ("mimikatz", "file", capa.features.file.Import("CryptAcquireContext"), True), # function/characteristic(loop) ("mimikatz", "function=0x401517", capa.features.Characteristic("loop"), True), ("mimikatz", "function=0x401000", capa.features.Characteristic("loop"), False), From 13abd175aa47f53a0bc42202c14b23715f28ab87 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 31 Aug 2020 17:15:30 -0600 Subject: [PATCH 3/5] pep8 --- capa/features/extractors/viv/file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/viv/file.py b/capa/features/extractors/viv/file.py index 32086e28..eff879db 100644 --- a/capa/features/extractors/viv/file.py +++ b/capa/features/extractors/viv/file.py @@ -8,8 +8,8 @@ import PE.carve as pe_carve # vivisect PE -import capa.features.extractors.strings import capa.features.extractors.helpers +import capa.features.extractors.strings from capa.features import String, Characteristic from capa.features.file import Export, Section From 2b2656c2a334677ae0ea80800418d1f61cc0233c Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Tue, 1 Sep 2020 01:04:51 -0600 Subject: [PATCH 4/5] features: extractors: merge import and API variant generators --- capa/features/extractors/helpers.py | 61 +++++++++------------------- capa/features/extractors/ida/file.py | 4 +- capa/features/extractors/ida/insn.py | 7 ++-- capa/features/extractors/viv/file.py | 6 +-- capa/features/extractors/viv/insn.py | 36 ++++++++++------ 5 files changed, 51 insertions(+), 63 deletions(-) diff --git a/capa/features/extractors/helpers.py b/capa/features/extractors/helpers.py index 2b1c66d6..7dcacbba 100644 --- a/capa/features/extractors/helpers.py +++ b/capa/features/extractors/helpers.py @@ -22,57 +22,32 @@ def xor_static(data, i): return "".join(chr(ord(c) ^ i) for c in data) -def is_aw_function(function_name): +def is_aw_function(symbol): """ is the given function name an A/W function? these are variants of functions that, on Windows, accept either a narrow or wide string. """ - if len(function_name) < 2: + if len(symbol) < 2: return False # last character should be 'A' or 'W' - if function_name[-1] not in ("A", "W"): + if symbol[-1] not in ("A", "W"): return False # second to last character should be lowercase letter - return "a" <= function_name[-2] <= "z" or "0" <= function_name[-2] <= "9" - - -def generate_api_features(apiname, va): - """ - for a given function name and address, generate API names. - we over-generate features to make matching easier. - these include: - - kernel32.CreateFileA - - kernel32.CreateFile - - CreateFileA - - CreateFile - """ - # (kernel32.CreateFileA, 0x401000) - yield API(apiname), va - - if is_aw_function(apiname): - # (kernel32.CreateFile, 0x401000) - yield API(apiname[:-1]), va - - if "." in apiname: - modname, impname = apiname.split(".") - # strip modname to support importname-only matching - # (CreateFileA, 0x401000) - yield API(impname), va - - if is_aw_function(impname): - # (CreateFile, 0x401000) - yield API(impname[:-1]), va + return "a" <= symbol[-2] <= "z" or "0" <= symbol[-2] <= "9" def is_ordinal(symbol): + """ + is the given symbol an ordinal that is prefixed by "#"? + """ return symbol[0] == "#" -def generate_import_features(dll, symbol, va): +def generate_symbols(dll, symbol): """ - for a given dll, symbol, and address, generate import features. + for a given dll and symbol name, generate variants. we over-generate features to make matching easier. these include: - kernel32.CreateFileA @@ -80,18 +55,20 @@ def generate_import_features(dll, symbol, va): - CreateFileA - CreateFile """ - # (kernel32.CreateFileA, 0x401000) - yield Import(dll + "." + symbol), va - # (CreateFileA, 0x401000) + # kernel32.CreateFileA + yield "%s.%s" % (dll, symbol) + if not is_ordinal(symbol): - yield Import(symbol), va + # CreateFileA + yield symbol if is_aw_function(symbol): - # (kernel32.CreateFile, 0x401000) - yield Import(dll + "." + symbol[:-1]), va - # (CreateFile, 0x401000) + # kernel32.CreateFile + yield "%s.%s" % (dll, symbol[:-1]) + if not is_ordinal(symbol): - yield Import(symbol[:-1]), va + # CreateFile + yield symbol[:-1] def all_zeros(bytez): diff --git a/capa/features/extractors/ida/file.py b/capa/features/extractors/ida/file.py index 6721344c..0b8718a5 100644 --- a/capa/features/extractors/ida/file.py +++ b/capa/features/extractors/ida/file.py @@ -105,8 +105,8 @@ def extract_file_import_names(): else: continue - for feature, ea in capa.features.extractors.helpers.generate_import_features(dll, symbol, ea): - yield feature, ea + for name in capa.features.extractors.helpers.generate_symbols(dll, symbol): + yield Import(name), ea def extract_file_section_names(): diff --git a/capa/features/extractors/ida/insn.py b/capa/features/extractors/ida/insn.py index 73f98056..205cc18c 100644 --- a/capa/features/extractors/ida/insn.py +++ b/capa/features/extractors/ida/insn.py @@ -13,7 +13,7 @@ import idautils import capa.features.extractors.helpers import capa.features.extractors.ida.helpers from capa.features import ARCH_X32, ARCH_X64, MAX_BYTES_FEATURE_SIZE, Bytes, String, Characteristic -from capa.features.insn import Number, Offset, Mnemonic +from capa.features.insn import Number, Offset, Mnemonic, API # security cookie checks may perform non-zeroing XORs, these are expected within a certain # byte range within the first and returning basic blocks, this helps to reduce FP features @@ -77,8 +77,9 @@ def extract_insn_api_features(f, bb, insn): call dword [0x00473038] """ for api in check_for_api_call(f.ctx, insn): - for (feature, ea) in capa.features.extractors.helpers.generate_api_features(api, insn.ea): - yield feature, ea + dll, _, symbol = api.rpartition(".") + for name in capa.features.extractors.helpers.generate_symbols(dll, symbol): + yield API(name), insn.ea def extract_insn_number_features(f, bb, insn): diff --git a/capa/features/extractors/viv/file.py b/capa/features/extractors/viv/file.py index eff879db..feb6381d 100644 --- a/capa/features/extractors/viv/file.py +++ b/capa/features/extractors/viv/file.py @@ -11,7 +11,7 @@ import PE.carve as pe_carve # vivisect PE import capa.features.extractors.helpers import capa.features.extractors.strings from capa.features import String, Characteristic -from capa.features.file import Export, Section +from capa.features.file import Export, Import, Section def extract_file_embedded_pe(vw, file_path): @@ -43,8 +43,8 @@ def extract_file_import_names(vw, file_path): # replace ord prefix with # impname = "#%s" % impname[len("ord") :] - for feature, va in capa.features.extractors.helpers.generate_import_features(modname, impname, va): - yield feature, va + for name in capa.features.extractors.helpers.generate_symbols(modname, impname): + yield Import(name), va def is_viv_ord_impname(impname): diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 7375bc37..48dbcd8b 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -12,7 +12,7 @@ import envi.archs.i386.disasm import capa.features.extractors.helpers from capa.features import ARCH_X32, ARCH_X64, MAX_BYTES_FEATURE_SIZE, Bytes, String, Characteristic -from capa.features.insn import Number, Offset, Mnemonic +from capa.features.insn import API, Number, Offset, Mnemonic from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_indirect_call # security cookie checks may perform non-zeroing XORs, these are expected within a certain @@ -47,11 +47,15 @@ def get_imports(vw): """ caching accessor to vivisect workspace imports avoids performance issues in vivisect when collecting locations + + returns: Dict[int, Tuple[str, str]] """ if "imports" in vw.metadata: return vw.metadata["imports"] else: - imports = {p[0]: p[3] for p in vw.getImports()} + imports = { + p[0]: (p[3].rpartition(".")[0], p[3].replace(".ord", ".#").rpartition(".")[2]) for p in vw.getImports() + } vw.metadata["imports"] = imports return imports @@ -72,9 +76,10 @@ def extract_insn_api_features(f, bb, insn): target = oper.getOperAddr(insn) imports = get_imports(f.vw) - if target in imports.keys(): - for feature, va in capa.features.extractors.helpers.generate_api_features(imports[target], insn.va): - yield feature, va + if target in imports: + dll, symbol = imports[target] + for name in capa.features.extractors.helpers.generate_symbols(dll, symbol): + yield API(name), insn.va # call via thunk on x86, # see 9324d1a8ae37a36ae560c37448c9705a at 0x407985 @@ -90,8 +95,11 @@ def extract_insn_api_features(f, bb, insn): return else: if thunk: - for feature, va in capa.features.extractors.helpers.generate_api_features(thunk, insn.va): - yield feature, va + dll, _, symbol = thunk.rpartition(".") + if symbol.startswith("ord"): + symbol = "#" + symbol[len("ord") :] + for name in capa.features.extractors.helpers.generate_symbols(dll, symbol): + yield API(name), insn.va # call via import on x64 # see Lab21-01.exe_:0x14000118C @@ -100,9 +108,10 @@ def extract_insn_api_features(f, bb, insn): target = op.getOperAddr(insn) imports = get_imports(f.vw) - if target in imports.keys(): - for feature, va in capa.features.extractors.helpers.generate_api_features(imports[target], insn.va): - yield feature, va + if target in imports: + dll, symbol = imports[target] + for name in capa.features.extractors.helpers.generate_symbols(dll, symbol): + yield API(name), insn.va elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper): try: @@ -116,9 +125,10 @@ def extract_insn_api_features(f, bb, insn): return imports = get_imports(f.vw) - if target in imports.keys(): - for feature, va in capa.features.extractors.helpers.generate_api_features(imports[target], insn.va): - yield feature, va + if target in imports: + dll, symbol = imports[target] + for name in capa.features.extractors.helpers.generate_symbols(dll, symbol): + yield API(name), insn.va def extract_insn_number_features(f, bb, insn): From 99d5f0638323c967f98a63c494fe518568649545 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Tue, 1 Sep 2020 15:50:24 -0600 Subject: [PATCH 5/5] pep8 --- capa/features/extractors/ida/insn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/ida/insn.py b/capa/features/extractors/ida/insn.py index 205cc18c..665d24e6 100644 --- a/capa/features/extractors/ida/insn.py +++ b/capa/features/extractors/ida/insn.py @@ -13,7 +13,7 @@ import idautils import capa.features.extractors.helpers import capa.features.extractors.ida.helpers from capa.features import ARCH_X32, ARCH_X64, MAX_BYTES_FEATURE_SIZE, Bytes, String, Characteristic -from capa.features.insn import Number, Offset, Mnemonic, API +from capa.features.insn import API, Number, Offset, Mnemonic # security cookie checks may perform non-zeroing XORs, these are expected within a certain # byte range within the first and returning basic blocks, this helps to reduce FP features