From 351d70aafe088f878e1c4fc1f4e65cc294eaf181 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Thu, 7 Apr 2022 12:56:24 -0600 Subject: [PATCH] smda: implement additional offset and number features --- capa/features/extractors/smda/insn.py | 38 ++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/capa/features/extractors/smda/insn.py b/capa/features/extractors/smda/insn.py index 37879838..1635d84a 100644 --- a/capa/features/extractors/smda/insn.py +++ b/capa/features/extractors/smda/insn.py @@ -5,7 +5,7 @@ import struct from smda.common.SmdaReport import SmdaReport import capa.features.extractors.helpers -from capa.features.insn import API, Number, Offset, Mnemonic, OperandNumber, OperandOffset +from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Characteristic # security cookie checks may perform non-zeroing XORs, these are expected within a certain @@ -69,11 +69,20 @@ def extract_insn_number_features(f, bb, insn): # The result of bitwise operations is calculated as though carried out # in two’s complement with an infinite number of sign bits value = int(operand, 16) & ((1 << f.smda_report.bitness) - 1) - + except ValueError: + continue + else: yield Number(value), insn.offset yield OperandNumber(i, value), insn.offset - except: - continue + + if insn.mnemonic == "add" and 0 < value < MAX_STRUCTURE_SIZE: + # for pattern like: + # + # add eax, 0x10 + # + # assume 0x10 is also an offset (imagine eax is a pointer). + yield Offset(value), insn.offset + yield OperandOffset(i, value), insn.offset def read_bytes(smda_report, va, num_bytes=None): @@ -200,10 +209,9 @@ def extract_insn_offset_features(f, bb, insn): # mov eax, [esi + ecx + 16384] operands = [o.strip() for o in insn.operands.split(",")] for i, operand in enumerate(operands): - if "ptr" not in operand: - continue if "esp" in operand or "ebp" in operand or "rbp" in operand: continue + number = 0 number_hex = re.search(PATTERN_HEXNUM, operand) number_int = re.search(PATTERN_SINGLENUM, operand) @@ -213,6 +221,24 @@ def extract_insn_offset_features(f, bb, insn): elif number_int: number = int(number_int.group("num")) number = -1 * number if number_int.group().startswith("-") else number + + if "ptr" not in operand: + if ( + insn.mnemonic == "lea" + and i == 1 + and (operand.count("+") + operand.count("-")) == 1 + and operand.count("*") == 0 + ): + # for pattern like: + # + # lea eax, [ebx + 1] + # + # assume 1 is also an offset (imagine ebx is a zero register). + yield Number(number), insn.offset + yield OperandNumber(i, number), insn.offset + + continue + yield Offset(number), insn.offset yield OperandOffset(i, number), insn.offset