From 9b5aaa40dea5102fc19be93bd2f4783d596282bf Mon Sep 17 00:00:00 2001 From: Moritz Raabe Date: Fri, 29 Jan 2021 11:16:12 +0100 Subject: [PATCH] improve bytes feature extraction --- capa/features/extractors/ida/helpers.py | 4 ++++ capa/features/extractors/ida/insn.py | 3 +++ capa/features/extractors/viv/insn.py | 6 +++--- tests/fixtures.py | 3 +++ 4 files changed, 13 insertions(+), 3 deletions(-) diff --git a/capa/features/extractors/ida/helpers.py b/capa/features/extractors/ida/helpers.py index 1d996311..487e6686 100644 --- a/capa/features/extractors/ida/helpers.py +++ b/capa/features/extractors/ida/helpers.py @@ -166,6 +166,10 @@ def basic_block_size(bb): def read_bytes_at(ea, count): """ """ + # check if byte has a value, see get_wide_byte doc + if not idc.is_loaded(ea): + return b"" + segm_end = idc.get_segm_end(ea) if ea + count > segm_end: return idc.get_bytes(ea, segm_end - ea) diff --git a/capa/features/extractors/ida/insn.py b/capa/features/extractors/ida/insn.py index 7513cde2..3f5aef52 100644 --- a/capa/features/extractors/ida/insn.py +++ b/capa/features/extractors/ida/insn.py @@ -148,6 +148,9 @@ def extract_insn_bytes_features(f, bb, insn): example: push offset iid_004118d4_IShellLinkA ; riid """ + if idaapi.is_call_insn(insn): + return + ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn) if ref != insn.ea: extracted_bytes = capa.features.extractors.ida.helpers.read_bytes_at(ref, MAX_BYTES_FEATURE_SIZE) diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 0aadc512..e708ee47 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -258,10 +258,10 @@ def extract_insn_bytes_features(f, bb, insn): example: # push offset iid_004118d4_IShellLinkA ; riid """ - for oper in insn.opers: - if insn.mnem == "call": - continue + if insn.mnem == "call": + return + for oper in insn.opers: if isinstance(oper, envi.archs.i386.disasm.i386ImmOper): v = oper.getOperValue(oper) elif isinstance(oper, envi.archs.i386.disasm.i386RegMemOper): diff --git a/tests/fixtures.py b/tests/fixtures.py index a9fc6913..4261408b 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -10,6 +10,7 @@ import os import sys import os.path +import binascii import contextlib import collections @@ -444,6 +445,8 @@ FEATURE_PRESENCE_TESTS = [ ("mimikatz", "function=0x40105D", capa.features.Bytes("SCardTransmit".encode("utf-16le")), True), ("mimikatz", "function=0x40105D", capa.features.Bytes("ACR > ".encode("utf-16le")), True), ("mimikatz", "function=0x40105D", capa.features.Bytes("nope".encode("ascii")), False), + # IDA features included byte sequences read from invalid memory, fixed in #409 + ("mimikatz", "function=0x44570F", capa.features.Bytes(binascii.unhexlify("FF" * 256)), False), # insn/bytes, pointer to bytes ("mimikatz", "function=0x44EDEF", capa.features.Bytes("INPUTEVENT".encode("utf-16le")), True), # insn/characteristic(nzxor)