adding IDA extractor code to resolve nested data references for string and bytes features

This commit is contained in:
Michael Hunhoff
2020-08-10 15:01:16 -06:00
parent f7cd52826e
commit 79d94144c6
3 changed files with 43 additions and 6 deletions

View File

@@ -331,3 +331,28 @@ def is_basic_block_tight_loop(bb):
if ref == bb.start_ea:
return True
return False
def find_data_reference_from_insn_helper(ea, max_depth=10):
""" recursive search for data reference, if exists, from instruction by resolving nested pointers, if exist """
if 0 == max_depth:
# return when max depth reached
return ea
data_refs = list(idautils.DataRefsFrom(ea))
if len(data_refs) != 1:
# return if no refs or more than one ref (assume nested pointers only have one data reference)
return ea
if ea == data_refs[0]:
# return if circular reference
return ea
# continue searching
return find_data_reference_from_insn_helper(data_refs[0], max_depth - 1)
def find_data_reference_from_insn(insn):
""" return address of data reference, if exists, otherwise address of instruction """
return find_data_reference_from_insn_helper(insn.ea)

View File

@@ -116,11 +116,8 @@ def extract_insn_bytes_features(f, bb, insn):
example:
push offset iid_004118d4_IShellLinkA ; riid
"""
if idaapi.is_call_insn(insn):
# ignore call instructions
return
for ref in idautils.DataRefsFrom(insn.ea):
ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
if ref != insn.ea:
extracted_bytes = capa.features.extractors.ida.helpers.read_bytes_at(ref, MAX_BYTES_FEATURE_SIZE)
if extracted_bytes and not capa.features.extractors.helpers.all_zeros(extracted_bytes):
yield Bytes(extracted_bytes), insn.ea
@@ -137,7 +134,8 @@ def extract_insn_string_features(f, bb, insn):
example:
push offset aAcr ; "ACR > "
"""
for ref in idautils.DataRefsFrom(insn.ea):
ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
if ref != insn.ea:
found = capa.features.extractors.ida.helpers.find_string_at(ref)
if found:
yield String(found), insn.ea

View File

@@ -100,6 +100,13 @@ def test_string_features():
assert capa.features.String("bcrypt.dll") not in features
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
def test_string_pointer_features():
f = get_extractor().get_function(0x0044EDEF)
features = extract_function_features(f)
assert capa.features.String("INPUTEVENT") in features
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
def test_byte_features():
f = get_extractor().get_function(0x40105D)
@@ -109,6 +116,13 @@ def test_byte_features():
assert wanted.evaluate(features) == True
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
def test_bytes_pointer_features():
f = get_extractor().get_function(0x0044EDEF)
features = extract_function_features(f)
assert capa.features.Bytes("INPUTEVENT".encode("utf-16le")).evaluate(features) == True
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
def test_number_features():
f = get_extractor().get_function(0x40105D)