smda: implement additional offset and number features

This commit is contained in:
Willi Ballenthin
2022-04-07 12:56:24 -06:00
parent 8a2276f398
commit 351d70aafe

View File

@@ -5,7 +5,7 @@ import struct
from smda.common.SmdaReport import SmdaReport
import capa.features.extractors.helpers
from capa.features.insn import API, Number, Offset, Mnemonic, OperandNumber, OperandOffset
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Characteristic
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
@@ -69,11 +69,20 @@ def extract_insn_number_features(f, bb, insn):
# The result of bitwise operations is calculated as though carried out
# in twos complement with an infinite number of sign bits
value = int(operand, 16) & ((1 << f.smda_report.bitness) - 1)
except ValueError:
continue
else:
yield Number(value), insn.offset
yield OperandNumber(i, value), insn.offset
except:
continue
if insn.mnemonic == "add" and 0 < value < MAX_STRUCTURE_SIZE:
# for pattern like:
#
# add eax, 0x10
#
# assume 0x10 is also an offset (imagine eax is a pointer).
yield Offset(value), insn.offset
yield OperandOffset(i, value), insn.offset
def read_bytes(smda_report, va, num_bytes=None):
@@ -200,10 +209,9 @@ def extract_insn_offset_features(f, bb, insn):
# mov eax, [esi + ecx + 16384]
operands = [o.strip() for o in insn.operands.split(",")]
for i, operand in enumerate(operands):
if "ptr" not in operand:
continue
if "esp" in operand or "ebp" in operand or "rbp" in operand:
continue
number = 0
number_hex = re.search(PATTERN_HEXNUM, operand)
number_int = re.search(PATTERN_SINGLENUM, operand)
@@ -213,6 +221,24 @@ def extract_insn_offset_features(f, bb, insn):
elif number_int:
number = int(number_int.group("num"))
number = -1 * number if number_int.group().startswith("-") else number
if "ptr" not in operand:
if (
insn.mnemonic == "lea"
and i == 1
and (operand.count("+") + operand.count("-")) == 1
and operand.count("*") == 0
):
# for pattern like:
#
# lea eax, [ebx + 1]
#
# assume 1 is also an offset (imagine ebx is a zero register).
yield Number(number), insn.offset
yield OperandNumber(i, number), insn.offset
continue
yield Offset(number), insn.offset
yield OperandOffset(i, number), insn.offset