mirror of
https://github.com/mandiant/capa.git
synced 2025-12-22 15:16:22 -08:00
adding IDA extractor code to resolve nested data references for string and bytes features
This commit is contained in:
@@ -331,3 +331,28 @@ def is_basic_block_tight_loop(bb):
|
|||||||
if ref == bb.start_ea:
|
if ref == bb.start_ea:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def find_data_reference_from_insn_helper(ea, max_depth=10):
|
||||||
|
""" recursive search for data reference, if exists, from instruction by resolving nested pointers, if exist """
|
||||||
|
if 0 == max_depth:
|
||||||
|
# return when max depth reached
|
||||||
|
return ea
|
||||||
|
|
||||||
|
data_refs = list(idautils.DataRefsFrom(ea))
|
||||||
|
|
||||||
|
if len(data_refs) != 1:
|
||||||
|
# return if no refs or more than one ref (assume nested pointers only have one data reference)
|
||||||
|
return ea
|
||||||
|
|
||||||
|
if ea == data_refs[0]:
|
||||||
|
# return if circular reference
|
||||||
|
return ea
|
||||||
|
|
||||||
|
# continue searching
|
||||||
|
return find_data_reference_from_insn_helper(data_refs[0], max_depth - 1)
|
||||||
|
|
||||||
|
|
||||||
|
def find_data_reference_from_insn(insn):
|
||||||
|
""" return address of data reference, if exists, otherwise address of instruction """
|
||||||
|
return find_data_reference_from_insn_helper(insn.ea)
|
||||||
|
|||||||
@@ -116,11 +116,8 @@ def extract_insn_bytes_features(f, bb, insn):
|
|||||||
example:
|
example:
|
||||||
push offset iid_004118d4_IShellLinkA ; riid
|
push offset iid_004118d4_IShellLinkA ; riid
|
||||||
"""
|
"""
|
||||||
if idaapi.is_call_insn(insn):
|
ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
|
||||||
# ignore call instructions
|
if ref != insn.ea:
|
||||||
return
|
|
||||||
|
|
||||||
for ref in idautils.DataRefsFrom(insn.ea):
|
|
||||||
extracted_bytes = capa.features.extractors.ida.helpers.read_bytes_at(ref, MAX_BYTES_FEATURE_SIZE)
|
extracted_bytes = capa.features.extractors.ida.helpers.read_bytes_at(ref, MAX_BYTES_FEATURE_SIZE)
|
||||||
if extracted_bytes and not capa.features.extractors.helpers.all_zeros(extracted_bytes):
|
if extracted_bytes and not capa.features.extractors.helpers.all_zeros(extracted_bytes):
|
||||||
yield Bytes(extracted_bytes), insn.ea
|
yield Bytes(extracted_bytes), insn.ea
|
||||||
@@ -137,7 +134,8 @@ def extract_insn_string_features(f, bb, insn):
|
|||||||
example:
|
example:
|
||||||
push offset aAcr ; "ACR > "
|
push offset aAcr ; "ACR > "
|
||||||
"""
|
"""
|
||||||
for ref in idautils.DataRefsFrom(insn.ea):
|
ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
|
||||||
|
if ref != insn.ea:
|
||||||
found = capa.features.extractors.ida.helpers.find_string_at(ref)
|
found = capa.features.extractors.ida.helpers.find_string_at(ref)
|
||||||
if found:
|
if found:
|
||||||
yield String(found), insn.ea
|
yield String(found), insn.ea
|
||||||
|
|||||||
@@ -100,6 +100,13 @@ def test_string_features():
|
|||||||
assert capa.features.String("bcrypt.dll") not in features
|
assert capa.features.String("bcrypt.dll") not in features
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
||||||
|
def test_string_pointer_features():
|
||||||
|
f = get_extractor().get_function(0x0044EDEF)
|
||||||
|
features = extract_function_features(f)
|
||||||
|
assert capa.features.String("INPUTEVENT") in features
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
||||||
def test_byte_features():
|
def test_byte_features():
|
||||||
f = get_extractor().get_function(0x40105D)
|
f = get_extractor().get_function(0x40105D)
|
||||||
@@ -109,6 +116,13 @@ def test_byte_features():
|
|||||||
assert wanted.evaluate(features) == True
|
assert wanted.evaluate(features) == True
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
||||||
|
def test_bytes_pointer_features():
|
||||||
|
f = get_extractor().get_function(0x0044EDEF)
|
||||||
|
features = extract_function_features(f)
|
||||||
|
assert capa.features.Bytes("INPUTEVENT".encode("utf-16le")).evaluate(features) == True
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
|
||||||
def test_number_features():
|
def test_number_features():
|
||||||
f = get_extractor().get_function(0x40105D)
|
f = get_extractor().get_function(0x40105D)
|
||||||
|
|||||||
Reference in New Issue
Block a user