mirror of
https://github.com/mandiant/capa.git
synced 2025-12-09 22:30:36 -08:00
@@ -140,6 +140,33 @@ def extract_insn_number_features(f, bb, insn):
|
||||
yield Number(v), insn.va
|
||||
|
||||
|
||||
def derefs(vw, p):
|
||||
"""
|
||||
recursively follow the given pointer, yielding the valid memory addresses along the way.
|
||||
useful when you may have a pointer to string, or pointer to pointer to string, etc.
|
||||
|
||||
this is a "do what i mean" type of helper function.
|
||||
"""
|
||||
depth = 0
|
||||
while True:
|
||||
if not vw.isValidPointer(p):
|
||||
return
|
||||
yield p
|
||||
|
||||
next = vw.readMemoryPtr(p)
|
||||
|
||||
# sanity: pointer points to self
|
||||
if next == p:
|
||||
return
|
||||
|
||||
# sanity: avoid chains of pointers that are unreasonably deep
|
||||
depth += 1
|
||||
if depth > 10:
|
||||
return
|
||||
|
||||
p = next
|
||||
|
||||
|
||||
def extract_insn_bytes_features(f, bb, insn):
|
||||
"""
|
||||
parse byte sequence features from the given instruction.
|
||||
@@ -157,28 +184,32 @@ def extract_insn_bytes_features(f, bb, insn):
|
||||
# handle case like:
|
||||
# movzx ecx, ds:byte_423258[eax]
|
||||
v = oper.disp
|
||||
elif isinstance(oper, envi.archs.i386.disasm.i386SibOper):
|
||||
# like 0x401000 in `mov eax, 0x401000[2 * ebx]`
|
||||
v = oper.imm
|
||||
elif isinstance(oper, envi.archs.amd64.disasm.Amd64RipRelOper):
|
||||
# see: Lab21-01.exe_:0x1400010D3
|
||||
v = oper.getOperAddr(insn)
|
||||
else:
|
||||
continue
|
||||
|
||||
segm = f.vw.getSegment(v)
|
||||
if not segm:
|
||||
continue
|
||||
for v in derefs(f.vw, v):
|
||||
segm = f.vw.getSegment(v)
|
||||
if not segm:
|
||||
continue
|
||||
|
||||
segm_end = segm[0] + segm[1]
|
||||
try:
|
||||
# Do not read beyond the end of a segment
|
||||
if v + MAX_BYTES_FEATURE_SIZE > segm_end:
|
||||
extracted_bytes = f.vw.readMemory(v, segm_end - v)
|
||||
segm_end = segm[0] + segm[1]
|
||||
try:
|
||||
# Do not read beyond the end of a segment
|
||||
if v + MAX_BYTES_FEATURE_SIZE > segm_end:
|
||||
extracted_bytes = f.vw.readMemory(v, segm_end - v)
|
||||
else:
|
||||
extracted_bytes = f.vw.readMemory(v, MAX_BYTES_FEATURE_SIZE)
|
||||
except envi.SegmentationViolation:
|
||||
pass
|
||||
else:
|
||||
extracted_bytes = f.vw.readMemory(v, MAX_BYTES_FEATURE_SIZE)
|
||||
except envi.SegmentationViolation:
|
||||
pass
|
||||
else:
|
||||
if not capa.features.extractors.helpers.all_zeros(extracted_bytes):
|
||||
yield Bytes(extracted_bytes), insn.va
|
||||
if not capa.features.extractors.helpers.all_zeros(extracted_bytes):
|
||||
yield Bytes(extracted_bytes), insn.va
|
||||
|
||||
|
||||
def read_memory(vw, va, size):
|
||||
@@ -229,20 +260,25 @@ def extract_insn_string_features(f, bb, insn):
|
||||
# example:
|
||||
#
|
||||
# push offset aAcr ; "ACR > "
|
||||
|
||||
for oper in insn.opers:
|
||||
if isinstance(oper, envi.archs.i386.disasm.i386ImmOper):
|
||||
v = oper.getOperValue(oper)
|
||||
elif isinstance(oper, envi.archs.i386.disasm.i386SibOper):
|
||||
# like 0x401000 in `mov eax, 0x401000[2 * ebx]`
|
||||
v = oper.imm
|
||||
elif isinstance(oper, envi.archs.amd64.disasm.Amd64RipRelOper):
|
||||
v = oper.getOperAddr(insn)
|
||||
else:
|
||||
continue
|
||||
|
||||
try:
|
||||
s = read_string(f.vw, v)
|
||||
except ValueError:
|
||||
continue
|
||||
else:
|
||||
yield String(s.rstrip("\x00")), insn.va
|
||||
for v in derefs(f.vw, v):
|
||||
try:
|
||||
s = read_string(f.vw, v)
|
||||
except ValueError:
|
||||
continue
|
||||
else:
|
||||
yield String(s.rstrip("\x00")), insn.va
|
||||
|
||||
|
||||
def extract_insn_offset_features(f, bb, insn):
|
||||
|
||||
@@ -85,6 +85,15 @@ def test_string_features(mimikatz):
|
||||
assert capa.features.String("bcrypt.dll") not in features
|
||||
|
||||
|
||||
def test_string_pointer_features(mimikatz):
|
||||
# viv doesn't identify this function, because its only referenced by vtable
|
||||
# but thats not the point of this test
|
||||
mimikatz.vw.makeFunction(0x44EE5A)
|
||||
|
||||
features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x44EE5A))
|
||||
assert capa.features.String("INPUTEVENT") in features
|
||||
|
||||
|
||||
def test_byte_features(sample_9324d1a8ae37a36ae560c37448c9705a):
|
||||
features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60))
|
||||
wanted = capa.features.Bytes(b"\xED\x24\x9E\xF4\x52\xA9\x07\x47\x55\x8E\xE1\xAB\x30\x8E\x23\x61")
|
||||
@@ -99,6 +108,15 @@ def test_byte_features64(sample_lab21_01):
|
||||
assert wanted.evaluate(features) == True
|
||||
|
||||
|
||||
def test_bytes_pointer_features(mimikatz):
|
||||
# viv doesn't identify this function, because its only referenced by vtable
|
||||
# but thats not the point of this test
|
||||
mimikatz.vw.makeFunction(0x44EE5A)
|
||||
|
||||
features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x44EE5A))
|
||||
assert capa.features.Bytes("INPUTEVENT".encode("utf-16le")).evaluate(features) == True
|
||||
|
||||
|
||||
def test_number_features(mimikatz):
|
||||
features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x40105D))
|
||||
assert capa.features.insn.Number(0xFF) in features
|
||||
|
||||
Reference in New Issue
Block a user