ida, viv: implement extra offset/number extraction

This commit is contained in:
Willi Ballenthin
2022-04-06 14:57:51 -06:00
parent b843cef986
commit 47dfeafdc8
3 changed files with 42 additions and 2 deletions

View File

@@ -12,7 +12,7 @@ import idautils
import capa.features.extractors.helpers
import capa.features.extractors.ida.helpers
from capa.features.insn import API, Number, Offset, Mnemonic, OperandNumber, OperandOffset
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Characteristic
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
@@ -135,6 +135,15 @@ def extract_insn_number_features(f, bb, insn):
yield Number(const), insn.ea
yield OperandNumber(i, const), insn.ea
if insn.itype == idaapi.NN_add and 0 < const < MAX_STRUCTURE_SIZE and op.type == idaapi.o_imm:
# for pattern like:
#
# add eax, 0x10
#
# assume 0x10 is also an offset (imagine eax is a pointer).
yield Offset(const), insn.va
yield OperandOffset(i, const), insn.va
def extract_insn_bytes_features(f, bb, insn):
"""parse referenced byte sequences
@@ -209,6 +218,15 @@ def extract_insn_offset_features(f, bb, insn):
yield Offset(op_off), insn.ea
yield OperandOffset(i, op_off), insn.ea
if i == 1 and op.type == idaapi.o_phrase:
# for pattern like:
#
# lea eax, [ebx + 1]
#
# assume 1 is also an offset (imagine ebx is a zero register).
yield Number(op_off), insn.va
yield OperandNumber(i, op_off), insn.va
def contains_stack_cookie_keywords(s):
"""check if string contains stack cookie keywords

View File

@@ -17,7 +17,7 @@ import envi.archs.amd64.disasm
import capa.features.extractors.helpers
import capa.features.extractors.viv.helpers
from capa.features.insn import API, Number, Offset, Mnemonic, OperandNumber, OperandOffset
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Characteristic
from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_indirect_call
@@ -539,6 +539,15 @@ def extract_op_number_features(f, bb, insn, i, oper):
yield Number(v), insn.va
yield OperandNumber(i, v), insn.va
if insn.mnem == "add" and 0 < v < MAX_STRUCTURE_SIZE and isinstance(oper, envi.archs.i386.disasm.i386ImmOper):
# for pattern like:
#
# add eax, 0x10
#
# assume 0x10 is also an offset (imagine eax is a pointer).
yield Offset(v), insn.va
yield OperandOffset(i, v), insn.va
def extract_op_offset_features(f, bb, insn, i, oper):
"""parse structure offset features from the given operand."""
@@ -567,6 +576,15 @@ def extract_op_offset_features(f, bb, insn, i, oper):
yield Offset(v), insn.va
yield OperandOffset(i, v), insn.va
if i == 1 and not f.vw.probeMemory(v, 1, envi.memory.MM_READ):
# for pattern like:
#
# lea eax, [ebx + 1]
#
# assume 1 is also an offset (imagine ebx is a zero register).
yield Number(v), insn.va
yield OperandNumber(i, v), insn.va
# like: [esi + ecx + 16384]
# reg ^ ^
# index ^

View File

@@ -29,6 +29,10 @@ class Number(Feature):
return capa.render.utils.hex(self.value)
# max recognized structure size (and therefore, offset size)
MAX_STRUCTURE_SIZE = 0x10000
class Offset(Feature):
def __init__(self, value: int, description=None):
super(Offset, self).__init__(value, description=description)