mirror of
https://github.com/mandiant/capa.git
synced 2026-02-04 11:07:53 -08:00
Merge pull request #342 from fireeye/viv/extractor/api-thunk-chains
extract api features for thunk chains
This commit is contained in:
@@ -16,6 +16,9 @@ import capa.engine
|
||||
logger = logging.getLogger(__name__)
|
||||
MAX_BYTES_FEATURE_SIZE = 0x100
|
||||
|
||||
# thunks may be chained so we specify a delta to control the depth to which these chains are explored
|
||||
THUNK_CHAIN_DEPTH_DELTA = 5
|
||||
|
||||
# identifiers for supported architectures names that tweak a feature
|
||||
# for example, offset/x32
|
||||
ARCH_X32 = "x32"
|
||||
|
||||
@@ -12,17 +12,21 @@ import idautils
|
||||
|
||||
import capa.features.extractors.helpers
|
||||
import capa.features.extractors.ida.helpers
|
||||
from capa.features import ARCH_X32, ARCH_X64, MAX_BYTES_FEATURE_SIZE, Bytes, String, Characteristic
|
||||
from capa.features import (
|
||||
ARCH_X32,
|
||||
ARCH_X64,
|
||||
MAX_BYTES_FEATURE_SIZE,
|
||||
THUNK_CHAIN_DEPTH_DELTA,
|
||||
Bytes,
|
||||
String,
|
||||
Characteristic,
|
||||
)
|
||||
from capa.features.insn import API, Number, Offset, Mnemonic
|
||||
|
||||
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
|
||||
# byte range within the first and returning basic blocks, this helps to reduce FP features
|
||||
SECURITY_COOKIE_BYTES_DELTA = 0x40
|
||||
|
||||
# thunks may be chained so we specify a delta here to control the depth to which these chains
|
||||
# are explored
|
||||
THUNK_CHAIN_DEPTH_DELTA = 0x5
|
||||
|
||||
|
||||
def get_arch(ctx):
|
||||
"""
|
||||
@@ -73,7 +77,7 @@ def check_for_api_call(ctx, insn):
|
||||
break
|
||||
|
||||
f = idaapi.get_func(ref)
|
||||
if not (f.flags & idaapi.FUNC_THUNK):
|
||||
if not f or not (f.flags & idaapi.FUNC_THUNK):
|
||||
break
|
||||
|
||||
if info:
|
||||
|
||||
20
capa/features/extractors/viv/helpers.py
Normal file
20
capa/features/extractors/viv/helpers.py
Normal file
@@ -0,0 +1,20 @@
|
||||
# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from vivisect.const import XR_TO, REF_CODE
|
||||
|
||||
|
||||
def get_coderef_from(vw, va):
|
||||
"""
|
||||
return first code `tova` whose origin is the specified va
|
||||
return None if no code reference is found
|
||||
"""
|
||||
xrefs = vw.getXrefsFrom(va, REF_CODE)
|
||||
if len(xrefs) > 0:
|
||||
return xrefs[0][XR_TO]
|
||||
else:
|
||||
return None
|
||||
@@ -7,11 +7,19 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import envi.memory
|
||||
import vivisect.const
|
||||
import envi.archs.i386.disasm
|
||||
|
||||
import capa.features.extractors.helpers
|
||||
from capa.features import ARCH_X32, ARCH_X64, MAX_BYTES_FEATURE_SIZE, Bytes, String, Characteristic
|
||||
import capa.features.extractors.viv.helpers
|
||||
from capa.features import (
|
||||
ARCH_X32,
|
||||
ARCH_X64,
|
||||
MAX_BYTES_FEATURE_SIZE,
|
||||
THUNK_CHAIN_DEPTH_DELTA,
|
||||
Bytes,
|
||||
String,
|
||||
Characteristic,
|
||||
)
|
||||
from capa.features.insn import API, Number, Offset, Mnemonic
|
||||
from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_indirect_call
|
||||
|
||||
@@ -86,21 +94,29 @@ def extract_insn_api_features(f, bb, insn):
|
||||
#
|
||||
# this is also how calls to internal functions may be decoded on x64.
|
||||
# see Lab21-01.exe_:0x140001178
|
||||
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper):
|
||||
target = insn.opers[0].getOperValue(insn)
|
||||
#
|
||||
# follow chained thunks, e.g. in 82bf6347acf15e5d883715dc289d8a2b at 0x14005E0FF in
|
||||
# 0x140059342 (viv) / 0x14005E0C0 (IDA)
|
||||
# 14005E0FF call j_ElfClearEventLogFileW (14005AAF8)
|
||||
# 14005AAF8 jmp ElfClearEventLogFileW (14005E196)
|
||||
# 14005E196 jmp cs:__imp_ElfClearEventLogFileW
|
||||
|
||||
try:
|
||||
thunk = f.vw.getFunctionMeta(target, "Thunk")
|
||||
except vivisect.exc.InvalidFunction:
|
||||
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper):
|
||||
imports = get_imports(f.vw)
|
||||
target = capa.features.extractors.viv.helpers.get_coderef_from(f.vw, insn.va)
|
||||
if not target:
|
||||
return
|
||||
else:
|
||||
if thunk:
|
||||
dll, _, symbol = thunk.rpartition(".")
|
||||
if symbol.startswith("ord"):
|
||||
symbol = "#" + symbol[len("ord") :]
|
||||
|
||||
for _ in range(THUNK_CHAIN_DEPTH_DELTA):
|
||||
if target in imports:
|
||||
dll, symbol = imports[target]
|
||||
for name in capa.features.extractors.helpers.generate_symbols(dll, symbol):
|
||||
yield API(name), insn.va
|
||||
|
||||
target = capa.features.extractors.viv.helpers.get_coderef_from(f.vw, target)
|
||||
if not target:
|
||||
return
|
||||
|
||||
# call via import on x64
|
||||
# see Lab21-01.exe_:0x14000118C
|
||||
elif isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper):
|
||||
|
||||
@@ -1,5 +1,13 @@
|
||||
#!/usr/bin/env python2
|
||||
"""
|
||||
Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
show-features
|
||||
|
||||
Show the features that capa extracts from the given sample,
|
||||
@@ -55,14 +63,6 @@ Example::
|
||||
insn: 0x10001027: number(0x1)
|
||||
insn: 0x10001027: mnemonic(shl)
|
||||
...
|
||||
|
||||
Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and limitations under the License.
|
||||
"""
|
||||
import sys
|
||||
import logging
|
||||
@@ -89,12 +89,12 @@ def main(argv=None):
|
||||
]
|
||||
format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats])
|
||||
|
||||
parser = argparse.ArgumentParser(description="detect capabilities in programs.")
|
||||
parser = argparse.ArgumentParser(description="Show the features that capa extracts from the given sample")
|
||||
parser.add_argument("sample", type=str, help="Path to sample to analyze")
|
||||
parser.add_argument(
|
||||
"-f", "--format", choices=[f[0] for f in formats], default="auto", help="Select sample format, %s" % format_help
|
||||
)
|
||||
parser.add_argument("-F", "--function", type=lambda x: int(x, 0), help="Show features for specific function")
|
||||
parser.add_argument("-F", "--function", type=lambda x: int(x, 0x10), help="Show features for specific function")
|
||||
args = parser.parse_args(args=argv)
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
@@ -122,6 +122,50 @@ def main(argv=None):
|
||||
else:
|
||||
functions = filter(lambda f: f.va == args.function, functions)
|
||||
|
||||
if args.function not in [f.va for f in functions]:
|
||||
print("0x%X not a function, creating it" % args.function)
|
||||
vw.makeFunction(args.function)
|
||||
functions = extractor.get_functions()
|
||||
functions = filter(lambda f: f.va == args.function, functions)
|
||||
|
||||
if len(functions) == 0:
|
||||
print("0x%X not a function")
|
||||
return -1
|
||||
|
||||
print_features(functions, extractor)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def ida_main():
|
||||
function = idc.get_func_attr(idc.here(), idc.FUNCATTR_START)
|
||||
print("getting features for current function 0x%X" % function)
|
||||
|
||||
extractor = capa.features.extractors.ida.IdaFeatureExtractor()
|
||||
|
||||
if not function:
|
||||
for feature, va in extractor.extract_file_features():
|
||||
if va:
|
||||
print("file: 0x%08x: %s" % (va, feature))
|
||||
else:
|
||||
print("file: 0x00000000: %s" % (feature))
|
||||
return
|
||||
|
||||
functions = extractor.get_functions()
|
||||
|
||||
if function:
|
||||
functions = filter(lambda f: f.start_ea == function, functions)
|
||||
|
||||
if len(functions) == 0:
|
||||
print("0x%X not a function" % function)
|
||||
return -1
|
||||
|
||||
print_features(functions, extractor)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def print_features(functions, extractor):
|
||||
for f in functions:
|
||||
for feature, va in extractor.extract_function_features(f):
|
||||
print("func: 0x%08x: %s" % (va, feature))
|
||||
@@ -138,8 +182,9 @@ def main(argv=None):
|
||||
# may be an issue while piping to less and encountering non-ascii characters
|
||||
continue
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
if capa.main.is_runtime_ida():
|
||||
ida_main()
|
||||
else:
|
||||
sys.exit(main())
|
||||
|
||||
@@ -144,6 +144,8 @@ def get_data_path_by_name(name):
|
||||
return os.path.join(CD, "data", "c91887d861d9bd4a5872249b641bc9f9.exe_")
|
||||
elif name.startswith("64d9f"):
|
||||
return os.path.join(CD, "data", "64d9f7d96b99467f36e22fada623c3bb.dll_")
|
||||
elif name.startswith("82bf6"):
|
||||
return os.path.join(CD, "data", "82BF6347ACF15E5D883715DC289D8A2B.exe_")
|
||||
else:
|
||||
raise ValueError("unexpected sample fixture")
|
||||
|
||||
@@ -180,6 +182,8 @@ def get_sample_md5_by_name(name):
|
||||
return "c91887d861d9bd4a5872249b641bc9f9"
|
||||
elif name.startswith("64d9f"):
|
||||
return "64d9f7d96b99467f36e22fada623c3bb"
|
||||
elif name.startswith("82bf6"):
|
||||
return "82bf6347acf15e5d883715dc289d8a2b"
|
||||
else:
|
||||
raise ValueError("unexpected sample fixture")
|
||||
|
||||
@@ -369,6 +373,8 @@ FEATURE_PRESENCE_TESTS = [
|
||||
True,
|
||||
),
|
||||
("kernel32-64", "function=0x1800202B0", capa.features.insn.API("RtlCaptureContext"), True),
|
||||
# insn/api: x64 nested thunk
|
||||
("82bf6", "function=0x140059342", capa.features.insn.API("ElfClearEventLogFile"), True),
|
||||
# insn/api: resolve indirect calls
|
||||
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CreatePipe"), True),
|
||||
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.SetHandleInformation"), True),
|
||||
|
||||
Reference in New Issue
Block a user