From 854e586f4044ead9d7294bd5cef70a16f7388117 Mon Sep 17 00:00:00 2001 From: David Cannings Date: Sat, 5 Sep 2020 16:00:36 +0100 Subject: [PATCH] Fix #280: Test if op is an offset Check whether the auto-analyser (or user) has marked an operand as an offset, instead of checking whether the value is mapped. --- capa/features/extractors/ida/helpers.py | 7 +++++++ capa/features/extractors/ida/insn.py | 11 ++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/capa/features/extractors/ida/helpers.py b/capa/features/extractors/ida/helpers.py index 22a99f71..4eb29be5 100644 --- a/capa/features/extractors/ida/helpers.py +++ b/capa/features/extractors/ida/helpers.py @@ -12,6 +12,7 @@ import string import idc import idaapi import idautils +import ida_bytes def find_byte_sequence(start, end, seq): @@ -229,6 +230,12 @@ def is_op_read(insn, op): return idaapi.has_cf_use(insn.get_canon_feature(), op.n) +def is_op_offset(insn, op): + """ Check is an operand has been marked as an offset (by auto-analysis or manually) """ + flags = idaapi.get_flags(insn.ea) + return ida_bytes.is_off(flags, op.n) + + def is_sp_modified(insn): """ determine if instruction modifies SP, ESP, RSP """ for op in get_insn_ops(insn, target_ops=(idaapi.o_reg,)): diff --git a/capa/features/extractors/ida/insn.py b/capa/features/extractors/ida/insn.py index 665d24e6..0fe4ade7 100644 --- a/capa/features/extractors/ida/insn.py +++ b/capa/features/extractors/ida/insn.py @@ -104,13 +104,18 @@ def extract_insn_number_features(f, bb, insn): return for op in capa.features.extractors.ida.helpers.get_insn_ops(insn, target_ops=(idaapi.o_imm, idaapi.o_mem)): + # skip things like: + # .text:00401100 shr eax, offset loc_C + if capa.features.extractors.ida.helpers.is_op_offset(insn, op): + continue + if op.type == idaapi.o_imm: const = capa.features.extractors.ida.helpers.mask_op_val(op) else: const = op.addr - if not idaapi.is_mapped(const): - yield Number(const), insn.ea - yield Number(const, arch=get_arch(f.ctx)), insn.ea + + yield Number(const), insn.ea + yield Number(const, arch=get_arch(f.ctx)), insn.ea def extract_insn_bytes_features(f, bb, insn):