features: viv: extract strings/bytes from nested pointers

closes #200
This commit is contained in:
William Ballenthin
2020-08-03 17:35:29 -06:00
parent c982c2d04e
commit b09f29a996
2 changed files with 74 additions and 20 deletions

View File

@@ -140,6 +140,33 @@ def extract_insn_number_features(f, bb, insn):
yield Number(v), insn.va
def derefs(vw, p):
"""
recursively follow the given pointer, yielding the valid memory addresses along the way.
useful when you may have a pointer to string, or pointer to pointer to string, etc.
this is a "do what i mean" type of helper function.
"""
depth = 0
while True:
if not vw.isValidPointer(p):
return
yield p
next = vw.readMemoryPtr(p)
# sanity: pointer points to self
if next == p:
return
# sanity: avoid chains of pointers that are unreasonably deep
depth += 1
if depth > 10:
return
p = next
def extract_insn_bytes_features(f, bb, insn):
"""
parse byte sequence features from the given instruction.
@@ -157,28 +184,32 @@ def extract_insn_bytes_features(f, bb, insn):
# handle case like:
# movzx ecx, ds:byte_423258[eax]
v = oper.disp
elif isinstance(oper, envi.archs.i386.disasm.i386SibOper):
# like 0x401000 in `mov eax, 0x401000[2 * ebx]`
v = oper.imm
elif isinstance(oper, envi.archs.amd64.disasm.Amd64RipRelOper):
# see: Lab21-01.exe_:0x1400010D3
v = oper.getOperAddr(insn)
else:
continue
segm = f.vw.getSegment(v)
if not segm:
continue
for v in derefs(f.vw, v):
segm = f.vw.getSegment(v)
if not segm:
continue
segm_end = segm[0] + segm[1]
try:
# Do not read beyond the end of a segment
if v + MAX_BYTES_FEATURE_SIZE > segm_end:
extracted_bytes = f.vw.readMemory(v, segm_end - v)
segm_end = segm[0] + segm[1]
try:
# Do not read beyond the end of a segment
if v + MAX_BYTES_FEATURE_SIZE > segm_end:
extracted_bytes = f.vw.readMemory(v, segm_end - v)
else:
extracted_bytes = f.vw.readMemory(v, MAX_BYTES_FEATURE_SIZE)
except envi.SegmentationViolation:
pass
else:
extracted_bytes = f.vw.readMemory(v, MAX_BYTES_FEATURE_SIZE)
except envi.SegmentationViolation:
pass
else:
if not capa.features.extractors.helpers.all_zeros(extracted_bytes):
yield Bytes(extracted_bytes), insn.va
if not capa.features.extractors.helpers.all_zeros(extracted_bytes):
yield Bytes(extracted_bytes), insn.va
def read_memory(vw, va, size):
@@ -229,20 +260,25 @@ def extract_insn_string_features(f, bb, insn):
# example:
#
# push offset aAcr ; "ACR > "
for oper in insn.opers:
if isinstance(oper, envi.archs.i386.disasm.i386ImmOper):
v = oper.getOperValue(oper)
elif isinstance(oper, envi.archs.i386.disasm.i386SibOper):
# like 0x401000 in `mov eax, 0x401000[2 * ebx]`
v = oper.imm
elif isinstance(oper, envi.archs.amd64.disasm.Amd64RipRelOper):
v = oper.getOperAddr(insn)
else:
continue
try:
s = read_string(f.vw, v)
except ValueError:
continue
else:
yield String(s.rstrip("\x00")), insn.va
for v in derefs(f.vw, v):
try:
s = read_string(f.vw, v)
except ValueError:
continue
else:
yield String(s.rstrip("\x00")), insn.va
def extract_insn_offset_features(f, bb, insn):

View File

@@ -85,6 +85,15 @@ def test_string_features(mimikatz):
assert capa.features.String("bcrypt.dll") not in features
def test_string_pointer_features(mimikatz):
# viv doesn't identify this function, because its only referenced by vtable
# but thats not the point of this test
mimikatz.vw.makeFunction(0x44EE5A)
features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x44EE5A))
assert capa.features.String("INPUTEVENT") in features
def test_byte_features(sample_9324d1a8ae37a36ae560c37448c9705a):
features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60))
wanted = capa.features.Bytes(b"\xED\x24\x9E\xF4\x52\xA9\x07\x47\x55\x8E\xE1\xAB\x30\x8E\x23\x61")
@@ -99,6 +108,15 @@ def test_byte_features64(sample_lab21_01):
assert wanted.evaluate(features) == True
def test_bytes_pointer_features(mimikatz):
# viv doesn't identify this function, because its only referenced by vtable
# but thats not the point of this test
mimikatz.vw.makeFunction(0x44EE5A)
features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x44EE5A))
assert capa.features.Bytes("INPUTEVENT".encode("utf-16le")).evaluate(features) == True
def test_number_features(mimikatz):
features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x40105D))
assert capa.features.insn.Number(0xFF) in features