mirror of
https://github.com/mandiant/capa.git
synced 2025-12-23 15:37:37 -08:00
Fix byte/string extraction and unit tests (#1339)
* Fix wrong expected results on string and bytes tests. Fix https://github.com/mandiant/capa/issues/1336 * Fix IDA insn/byte extractor checks wrong address. Fix https://github.com/mandiant/capa/issues/1327 * fix vivisect string check and tests --------- Co-authored-by: Xusheng <xusheng@vector35.com>
This commit is contained in:
@@ -25,6 +25,7 @@
|
|||||||
### Bug Fixes
|
### Bug Fixes
|
||||||
- extractor: fix vivisect loop detection corner case #1310 @mr-tz
|
- extractor: fix vivisect loop detection corner case #1310 @mr-tz
|
||||||
- match: extend OS characteristic to match OS_ANY to all supported OSes #1324 @mike-hunhoff
|
- match: extend OS characteristic to match OS_ANY to all supported OSes #1324 @mike-hunhoff
|
||||||
|
- extractor: fix IDA and vivisect string and bytes features overlap and tests #1327 #1336 @xusheng6
|
||||||
|
|
||||||
### capa explorer IDA Pro plugin
|
### capa explorer IDA Pro plugin
|
||||||
|
|
||||||
|
|||||||
@@ -172,7 +172,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
|||||||
if ref != insn.ea:
|
if ref != insn.ea:
|
||||||
extracted_bytes = capa.features.extractors.ida.helpers.read_bytes_at(ref, MAX_BYTES_FEATURE_SIZE)
|
extracted_bytes = capa.features.extractors.ida.helpers.read_bytes_at(ref, MAX_BYTES_FEATURE_SIZE)
|
||||||
if extracted_bytes and not capa.features.extractors.helpers.all_zeros(extracted_bytes):
|
if extracted_bytes and not capa.features.extractors.helpers.all_zeros(extracted_bytes):
|
||||||
if not capa.features.extractors.ida.helpers.find_string_at(insn.ea):
|
if not capa.features.extractors.ida.helpers.find_string_at(ref):
|
||||||
# don't extract byte features for obvious strings
|
# don't extract byte features for obvious strings
|
||||||
yield Bytes(extracted_bytes), ih.address
|
yield Bytes(extracted_bytes), ih.address
|
||||||
|
|
||||||
|
|||||||
@@ -175,8 +175,13 @@ def derefs(vw, p):
|
|||||||
while True:
|
while True:
|
||||||
if not vw.isValidPointer(p):
|
if not vw.isValidPointer(p):
|
||||||
return
|
return
|
||||||
|
|
||||||
yield p
|
yield p
|
||||||
|
|
||||||
|
if vw.isProbablyString(p) or vw.isProbablyUnicode(p):
|
||||||
|
# don't deref strings that coincidentally are pointers
|
||||||
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
next = vw.readMemoryPtr(p)
|
next = vw.readMemoryPtr(p)
|
||||||
except Exception:
|
except Exception:
|
||||||
@@ -271,7 +276,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Itera
|
|||||||
if capa.features.extractors.helpers.all_zeros(buf):
|
if capa.features.extractors.helpers.all_zeros(buf):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if f.vw.isProbablyString(v):
|
if f.vw.isProbablyString(v) or f.vw.isProbablyUnicode(v):
|
||||||
# don't extract byte features for obvious strings
|
# don't extract byte features for obvious strings
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|||||||
@@ -337,6 +337,9 @@ def get_sample_md5_by_name(name):
|
|||||||
return "946a99f36a46d335dec080d9a4371940"
|
return "946a99f36a46d335dec080d9a4371940"
|
||||||
elif name.startswith("b9f5b"):
|
elif name.startswith("b9f5b"):
|
||||||
return "b9f5bd514485fb06da39beff051b9fdc"
|
return "b9f5bd514485fb06da39beff051b9fdc"
|
||||||
|
elif name.startswith("294b8d"):
|
||||||
|
# file name is SHA256 hash
|
||||||
|
return "3db3e55b16a7b1b1afb970d5e77c5d98"
|
||||||
else:
|
else:
|
||||||
raise ValueError("unexpected sample fixture: %s" % name)
|
raise ValueError("unexpected sample fixture: %s" % name)
|
||||||
|
|
||||||
@@ -643,14 +646,19 @@ FEATURE_PRESENCE_TESTS = sorted(
|
|||||||
# insn/string, direct memory reference
|
# insn/string, direct memory reference
|
||||||
("mimikatz", "function=0x46D6CE", capa.features.common.String("(null)"), True),
|
("mimikatz", "function=0x46D6CE", capa.features.common.String("(null)"), True),
|
||||||
# insn/bytes
|
# insn/bytes
|
||||||
("mimikatz", "function=0x40105D", capa.features.common.Bytes("SCardControl".encode("utf-16le")), True),
|
("mimikatz", "function=0x401517", capa.features.common.Bytes(binascii.unhexlify("CA3B0E000000F8AF47")), True),
|
||||||
("mimikatz", "function=0x40105D", capa.features.common.Bytes("SCardTransmit".encode("utf-16le")), True),
|
("mimikatz", "function=0x404414", capa.features.common.Bytes(binascii.unhexlify("0180000040EA4700")), True),
|
||||||
("mimikatz", "function=0x40105D", capa.features.common.Bytes("ACR > ".encode("utf-16le")), True),
|
# don't extract byte features for obvious strings
|
||||||
|
("mimikatz", "function=0x40105D", capa.features.common.Bytes("SCardControl".encode("utf-16le")), False),
|
||||||
|
("mimikatz", "function=0x40105D", capa.features.common.Bytes("SCardTransmit".encode("utf-16le")), False),
|
||||||
|
("mimikatz", "function=0x40105D", capa.features.common.Bytes("ACR > ".encode("utf-16le")), False),
|
||||||
("mimikatz", "function=0x40105D", capa.features.common.Bytes("nope".encode("ascii")), False),
|
("mimikatz", "function=0x40105D", capa.features.common.Bytes("nope".encode("ascii")), False),
|
||||||
|
# push offset aAcsAcr1220 ; "ACS..." -> where ACS == 41 00 43 00 == valid pointer to middle of instruction
|
||||||
|
("mimikatz", "function=0x401000", capa.features.common.Bytes(binascii.unhexlify("FDFF59F647")), False),
|
||||||
# IDA features included byte sequences read from invalid memory, fixed in #409
|
# IDA features included byte sequences read from invalid memory, fixed in #409
|
||||||
("mimikatz", "function=0x44570F", capa.features.common.Bytes(binascii.unhexlify("FF" * 256)), False),
|
("mimikatz", "function=0x44570F", capa.features.common.Bytes(binascii.unhexlify("FF" * 256)), False),
|
||||||
# insn/bytes, pointer to bytes
|
# insn/bytes, pointer to string bytes
|
||||||
("mimikatz", "function=0x44EDEF", capa.features.common.Bytes("INPUTEVENT".encode("utf-16le")), True),
|
("mimikatz", "function=0x44EDEF", capa.features.common.Bytes("INPUTEVENT".encode("utf-16le")), False),
|
||||||
# insn/characteristic(nzxor)
|
# insn/characteristic(nzxor)
|
||||||
("mimikatz", "function=0x410DFC", capa.features.common.Characteristic("nzxor"), True),
|
("mimikatz", "function=0x410DFC", capa.features.common.Characteristic("nzxor"), True),
|
||||||
("mimikatz", "function=0x40105D", capa.features.common.Characteristic("nzxor"), False),
|
("mimikatz", "function=0x40105D", capa.features.common.Characteristic("nzxor"), False),
|
||||||
|
|||||||
Reference in New Issue
Block a user