Ignore DLL names for API features (#1824)

* ignore DLL name for api features

* keep DLL name for import features

* fix tests
This commit is contained in:
Moritz
2023-10-20 13:39:15 +02:00
committed by GitHub
parent 62d4b008c5
commit c9df78252a
13 changed files with 88 additions and 47 deletions

View File

@@ -20,6 +20,7 @@
- protobuf: deprecate `RuleMetadata.scope` in favor of `RuleMetadata.scopes` @williballenthin
- protobuf: deprecate `Metadata.analysis` in favor of `Metadata.analysis2` that is dynamic analysis aware @williballenthin
- update freeze format to v3, adding support for dynamic analysis @williballenthin
- extractor: ignore DLL name for api features #1815 @mr-tz
### New Rules (19)

View File

@@ -115,13 +115,13 @@ def extract_file_import_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address
for sym in bv.get_symbols_of_type(SymbolType.ImportAddressSymbol):
lib_name = str(sym.namespace)
addr = AbsoluteVirtualAddress(sym.address)
for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym.short_name):
for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym.short_name, include_dll=True):
yield Import(name), addr
ordinal = sym.ordinal
if ordinal != 0 and (lib_name != ""):
ordinal_name = f"#{ordinal}"
for name in capa.features.extractors.helpers.generate_symbols(lib_name, ordinal_name):
for name in capa.features.extractors.helpers.generate_symbols(lib_name, ordinal_name, include_dll=True):
yield Import(name), addr

View File

@@ -58,7 +58,7 @@ def extract_import_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]
if not function.name:
continue
for name in generate_symbols(library.dll, function.name):
for name in generate_symbols(library.dll, function.name, include_dll=True):
yield Import(name), AbsoluteVirtualAddress(function.address)
@@ -126,6 +126,7 @@ FILE_HANDLERS = (
extract_used_regkeys,
extract_used_files,
extract_used_mutexes,
extract_used_commands,
extract_used_apis,
extract_used_services,
)

View File

@@ -10,7 +10,7 @@ import logging
from typing import Iterator
from capa.features.address import DynamicCallAddress
from capa.features.extractors.helpers import is_aw_function
from capa.features.extractors.helpers import generate_symbols
from capa.features.extractors.cape.models import Process
from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle
@@ -25,22 +25,8 @@ def get_calls(ph: ProcessHandle, th: ThreadHandle) -> Iterator[CallHandle]:
if call.thread_id != tid:
continue
for symbol in generate_symbols(call.api):
for symbol in generate_symbols("", call.api):
call.api = symbol
addr = DynamicCallAddress(thread=th.address, id=call_index)
yield CallHandle(address=addr, inner=call)
def generate_symbols(symbol: str) -> Iterator[str]:
"""
for a given symbol name, generate variants.
we over-generate features to make matching easier.
"""
# CreateFileA
yield symbol
if is_aw_function(symbol):
# CreateFile
yield symbol[:-1]

View File

@@ -57,7 +57,7 @@ def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Impor
for imp in get_dotnet_unmanaged_imports(pe):
# like kernel32.CreateFileA
for name in capa.features.extractors.helpers.generate_symbols(imp.module, imp.method):
for name in capa.features.extractors.helpers.generate_symbols(imp.module, imp.method, include_dll=True):
yield Import(name), DNTokenAddress(imp.token)

View File

@@ -112,7 +112,7 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
if "Ordinal_" in fstr[1]:
fstr[1] = f"#{fstr[1].split('_')[1]}"
for name in capa.features.extractors.helpers.generate_symbols(fstr[0][:-4], fstr[1]):
for name in capa.features.extractors.helpers.generate_symbols(fstr[0][:-4], fstr[1], include_dll=True):
yield Import(name), AbsoluteVirtualAddress(addr)

View File

@@ -41,15 +41,20 @@ def is_ordinal(symbol: str) -> bool:
return False
def generate_symbols(dll: str, symbol: str) -> Iterator[str]:
def generate_symbols(dll: str, symbol: str, include_dll=False) -> Iterator[str]:
"""
for a given dll and symbol name, generate variants.
we over-generate features to make matching easier.
these include:
- kernel32.CreateFileA
- kernel32.CreateFile
- CreateFileA
- CreateFile
- ws2_32.#1
note that since capa v7 only `import` features include DLL names:
- kernel32.CreateFileA
- kernel32.CreateFile
for `api` features dll names are good for documentation but not used during matching
"""
# normalize dll name
dll = dll.lower()
@@ -58,25 +63,27 @@ def generate_symbols(dll: str, symbol: str) -> Iterator[str]:
dll = dll[0:-4] if dll.endswith(".dll") else dll
dll = dll[0:-4] if dll.endswith(".drv") else dll
# kernel32.CreateFileA
yield f"{dll}.{symbol}"
if include_dll:
# ws2_32.#1
# kernel32.CreateFileA
yield f"{dll}.{symbol}"
if not is_ordinal(symbol):
# CreateFileA
yield symbol
if is_aw_function(symbol):
# kernel32.CreateFile
yield f"{dll}.{symbol[:-1]}"
if include_dll:
# kernel32.CreateFile
yield f"{dll}.{symbol[:-1]}"
if not is_ordinal(symbol):
if is_aw_function(symbol):
# CreateFile
yield symbol[:-1]
def reformat_forwarded_export_name(forwarded_name: str) -> str:
"""
a forwarded export has a DLL name/path an symbol name.
a forwarded export has a DLL name/path and symbol name.
we want the former to be lowercase, and the latter to be verbatim.
"""

View File

@@ -110,7 +110,7 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
if info[1] and info[2]:
# e.g. in mimikatz: ('cabinet', 'FCIAddFile', 11L)
# extract by name here and by ordinal below
for name in capa.features.extractors.helpers.generate_symbols(info[0], info[1]):
for name in capa.features.extractors.helpers.generate_symbols(info[0], info[1], include_dll=True):
yield Import(name), addr
dll = info[0]
symbol = f"#{info[2]}"
@@ -123,7 +123,7 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
else:
continue
for name in capa.features.extractors.helpers.generate_symbols(dll, symbol):
for name in capa.features.extractors.helpers.generate_symbols(dll, symbol, include_dll=True):
yield Import(name), addr
for ea, info in capa.features.extractors.ida.helpers.get_file_externs().items():

View File

@@ -84,7 +84,7 @@ def extract_file_import_names(pe, **kwargs):
except UnicodeDecodeError:
continue
for name in capa.features.extractors.helpers.generate_symbols(modname, impname):
for name in capa.features.extractors.helpers.generate_symbols(modname, impname, include_dll=True):
yield Import(name), AbsoluteVirtualAddress(imp.address)

View File

@@ -73,7 +73,7 @@ def extract_file_import_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address]]
impname = "#" + impname[len("ord") :]
addr = AbsoluteVirtualAddress(va)
for name in capa.features.extractors.helpers.generate_symbols(modname, impname):
for name in capa.features.extractors.helpers.generate_symbols(modname, impname, include_dll=True):
yield Import(name), addr

View File

@@ -595,6 +595,13 @@ def pop_statement_description_entry(d):
return description["description"]
def trim_dll_part(api: str) -> str:
# kernel32.CreateFileA
if api.count(".") == 1:
api = api.split(".")[1]
return api
def build_statements(d, scopes: Scopes):
if len(d.keys()) > 2:
raise InvalidRule("too many statements")
@@ -722,6 +729,10 @@ def build_statements(d, scopes: Scopes):
# count(number(0x100 = description))
if term != "string":
value, description = parse_description(arg, term)
if term == "api":
value = trim_dll_part(value)
feature = Feature(value, description=description)
else:
# arg is string (which doesn't support inline descriptions), like:
@@ -816,6 +827,10 @@ def build_statements(d, scopes: Scopes):
else:
Feature = parse_feature(key)
value, description = parse_description(d[key], key, d.get("description"))
if key == "api":
value = trim_dll_part(value)
try:
feature = Feature(value, description=description)
except ValueError as e:

View File

@@ -779,6 +779,7 @@ FEATURE_PRESENCE_TESTS = sorted(
("mimikatz", "file", capa.features.file.Import("advapi32.CryptSetHashParam"), True),
("mimikatz", "file", capa.features.file.Import("CryptSetHashParam"), True),
("mimikatz", "file", capa.features.file.Import("kernel32.IsWow64Process"), True),
("mimikatz", "file", capa.features.file.Import("IsWow64Process"), True),
("mimikatz", "file", capa.features.file.Import("msvcrt.exit"), True),
("mimikatz", "file", capa.features.file.Import("cabinet.#11"), True),
("mimikatz", "file", capa.features.file.Import("#11"), False),
@@ -859,11 +860,12 @@ FEATURE_PRESENCE_TESTS = sorted(
# .text:004018C0 8D 4B 02 lea ecx, [ebx+2]
("mimikatz", "function=0x401873,bb=0x4018B2,insn=0x4018C0", capa.features.insn.Number(0x2), True),
# insn/api
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContextW"), True),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContext"), True),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptGenKey"), True),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptImportKey"), True),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptDestroyKey"), True),
# not extracting dll anymore
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContextW"), False),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContext"), False),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptGenKey"), False),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptImportKey"), False),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptDestroyKey"), False),
("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptAcquireContextW"), True),
("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptAcquireContext"), True),
("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptGenKey"), True),
@@ -872,7 +874,8 @@ FEATURE_PRESENCE_TESTS = sorted(
("mimikatz", "function=0x403BAC", capa.features.insn.API("Nope"), False),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.Nope"), False),
# insn/api: thunk
("mimikatz", "function=0x4556E5", capa.features.insn.API("advapi32.LsaQueryInformationPolicy"), True),
# not extracting dll anymore
("mimikatz", "function=0x4556E5", capa.features.insn.API("advapi32.LsaQueryInformationPolicy"), False),
("mimikatz", "function=0x4556E5", capa.features.insn.API("LsaQueryInformationPolicy"), True),
# insn/api: x64
(
@@ -896,10 +899,15 @@ FEATURE_PRESENCE_TESTS = sorted(
("mimikatz", "function=0x40B3C6", capa.features.insn.API("LocalFree"), True),
("c91887...", "function=0x40156F", capa.features.insn.API("CloseClipboard"), True),
# insn/api: resolve indirect calls
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CreatePipe"), True),
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.SetHandleInformation"), True),
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CloseHandle"), True),
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.WriteFile"), True),
# not extracting dll anymore
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CreatePipe"), False),
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.SetHandleInformation"), False),
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CloseHandle"), False),
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.WriteFile"), False),
("c91887...", "function=0x401A77", capa.features.insn.API("CreatePipe"), True),
("c91887...", "function=0x401A77", capa.features.insn.API("SetHandleInformation"), True),
("c91887...", "function=0x401A77", capa.features.insn.API("CloseHandle"), True),
("c91887...", "function=0x401A77", capa.features.insn.API("WriteFile"), True),
# insn/string
("mimikatz", "function=0x40105D", capa.features.common.String("SCardControl"), True),
("mimikatz", "function=0x40105D", capa.features.common.String("SCardTransmit"), True),
@@ -1074,7 +1082,8 @@ FEATURE_PRESENCE_TESTS_DOTNET = sorted(
("_1c444", "file", capa.features.file.Import("CreateCompatibleBitmap"), True),
("_1c444", "file", capa.features.file.Import("gdi32::CreateCompatibleBitmap"), False),
("_1c444", "function=0x1F68", capa.features.insn.API("GetWindowDC"), True),
("_1c444", "function=0x1F68", capa.features.insn.API("user32.GetWindowDC"), True),
# not extracting dll anymore
("_1c444", "function=0x1F68", capa.features.insn.API("user32.GetWindowDC"), False),
("_1c444", "function=0x1F68", capa.features.insn.Number(0xCC0020), True),
("_1c444", "token=0x600001D", capa.features.common.Characteristic("calls to"), True),
("_1c444", "token=0x6000018", capa.features.common.Characteristic("calls to"), False),

View File

@@ -16,7 +16,7 @@ import capa.features.common
import capa.features.address
from capa.engine import Or
from capa.features.file import FunctionName
from capa.features.insn import Number, Offset, Property
from capa.features.insn import API, Number, Offset, Property
from capa.features.common import (
OS,
OS_LINUX,
@@ -937,6 +937,28 @@ def test_count_number_symbol():
assert bool(r.evaluate({Number(0x100, description="symbol name"): {ADDR1, ADDR2, ADDR3}})) is True
def test_count_api():
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: thread
features:
- or:
- count(api(kernel32.CreateFileA)): 1
"""
)
r = capa.rules.Rule.from_yaml(rule)
# apis including their DLL names are not extracted anymore
assert bool(r.evaluate({API("kernel32.CreateFileA"): set()})) is False
assert bool(r.evaluate({API("kernel32.CreateFile"): set()})) is False
assert bool(r.evaluate({API("CreateFile"): {ADDR1}})) is False
assert bool(r.evaluate({API("CreateFileA"): {ADDR1}})) is True
def test_invalid_number():
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(