mirror of
https://github.com/mandiant/capa.git
synced 2025-12-22 07:10:29 -08:00
adding IDA extractor code to resolve nested data references for string and bytes features
This commit is contained in:
@@ -331,3 +331,28 @@ def is_basic_block_tight_loop(bb):
|
||||
if ref == bb.start_ea:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def find_data_reference_from_insn_helper(ea, max_depth=10):
|
||||
""" recursive search for data reference, if exists, from instruction by resolving nested pointers, if exist """
|
||||
if 0 == max_depth:
|
||||
# return when max depth reached
|
||||
return ea
|
||||
|
||||
data_refs = list(idautils.DataRefsFrom(ea))
|
||||
|
||||
if len(data_refs) != 1:
|
||||
# return if no refs or more than one ref (assume nested pointers only have one data reference)
|
||||
return ea
|
||||
|
||||
if ea == data_refs[0]:
|
||||
# return if circular reference
|
||||
return ea
|
||||
|
||||
# continue searching
|
||||
return find_data_reference_from_insn_helper(data_refs[0], max_depth - 1)
|
||||
|
||||
|
||||
def find_data_reference_from_insn(insn):
|
||||
""" return address of data reference, if exists, otherwise address of instruction """
|
||||
return find_data_reference_from_insn_helper(insn.ea)
|
||||
|
||||
@@ -116,11 +116,8 @@ def extract_insn_bytes_features(f, bb, insn):
|
||||
example:
|
||||
push offset iid_004118d4_IShellLinkA ; riid
|
||||
"""
|
||||
if idaapi.is_call_insn(insn):
|
||||
# ignore call instructions
|
||||
return
|
||||
|
||||
for ref in idautils.DataRefsFrom(insn.ea):
|
||||
ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
|
||||
if ref != insn.ea:
|
||||
extracted_bytes = capa.features.extractors.ida.helpers.read_bytes_at(ref, MAX_BYTES_FEATURE_SIZE)
|
||||
if extracted_bytes and not capa.features.extractors.helpers.all_zeros(extracted_bytes):
|
||||
yield Bytes(extracted_bytes), insn.ea
|
||||
@@ -137,7 +134,8 @@ def extract_insn_string_features(f, bb, insn):
|
||||
example:
|
||||
push offset aAcr ; "ACR > "
|
||||
"""
|
||||
for ref in idautils.DataRefsFrom(insn.ea):
|
||||
ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
|
||||
if ref != insn.ea:
|
||||
found = capa.features.extractors.ida.helpers.find_string_at(ref)
|
||||
if found:
|
||||
yield String(found), insn.ea
|
||||
|
||||
@@ -100,6 +100,13 @@ def test_string_features():
|
||||
assert capa.features.String("bcrypt.dll") not in features
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
||||
def test_string_pointer_features():
|
||||
f = get_extractor().get_function(0x0044EDEF)
|
||||
features = extract_function_features(f)
|
||||
assert capa.features.String("INPUTEVENT") in features
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
||||
def test_byte_features():
|
||||
f = get_extractor().get_function(0x40105D)
|
||||
@@ -109,6 +116,13 @@ def test_byte_features():
|
||||
assert wanted.evaluate(features) == True
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
||||
def test_bytes_pointer_features():
|
||||
f = get_extractor().get_function(0x0044EDEF)
|
||||
features = extract_function_features(f)
|
||||
assert capa.features.Bytes("INPUTEVENT".encode("utf-16le")).evaluate(features) == True
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
||||
def test_number_features():
|
||||
f = get_extractor().get_function(0x40105D)
|
||||
|
||||
Reference in New Issue
Block a user