mirror of
https://github.com/mandiant/capa.git
synced 2025-12-23 15:37:37 -08:00
Fix byte/string extraction and unit tests (#1339)
* Fix wrong expected results on string and bytes tests. Fix https://github.com/mandiant/capa/issues/1336 * Fix IDA insn/byte extractor checks wrong address. Fix https://github.com/mandiant/capa/issues/1327 * fix vivisect string check and tests --------- Co-authored-by: Xusheng <xusheng@vector35.com>
This commit is contained in:
@@ -25,6 +25,7 @@
|
||||
### Bug Fixes
|
||||
- extractor: fix vivisect loop detection corner case #1310 @mr-tz
|
||||
- match: extend OS characteristic to match OS_ANY to all supported OSes #1324 @mike-hunhoff
|
||||
- extractor: fix IDA and vivisect string and bytes features overlap and tests #1327 #1336 @xusheng6
|
||||
|
||||
### capa explorer IDA Pro plugin
|
||||
|
||||
|
||||
@@ -172,7 +172,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
||||
if ref != insn.ea:
|
||||
extracted_bytes = capa.features.extractors.ida.helpers.read_bytes_at(ref, MAX_BYTES_FEATURE_SIZE)
|
||||
if extracted_bytes and not capa.features.extractors.helpers.all_zeros(extracted_bytes):
|
||||
if not capa.features.extractors.ida.helpers.find_string_at(insn.ea):
|
||||
if not capa.features.extractors.ida.helpers.find_string_at(ref):
|
||||
# don't extract byte features for obvious strings
|
||||
yield Bytes(extracted_bytes), ih.address
|
||||
|
||||
|
||||
@@ -175,8 +175,13 @@ def derefs(vw, p):
|
||||
while True:
|
||||
if not vw.isValidPointer(p):
|
||||
return
|
||||
|
||||
yield p
|
||||
|
||||
if vw.isProbablyString(p) or vw.isProbablyUnicode(p):
|
||||
# don't deref strings that coincidentally are pointers
|
||||
return
|
||||
|
||||
try:
|
||||
next = vw.readMemoryPtr(p)
|
||||
except Exception:
|
||||
@@ -271,7 +276,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Itera
|
||||
if capa.features.extractors.helpers.all_zeros(buf):
|
||||
continue
|
||||
|
||||
if f.vw.isProbablyString(v):
|
||||
if f.vw.isProbablyString(v) or f.vw.isProbablyUnicode(v):
|
||||
# don't extract byte features for obvious strings
|
||||
continue
|
||||
|
||||
|
||||
@@ -337,6 +337,9 @@ def get_sample_md5_by_name(name):
|
||||
return "946a99f36a46d335dec080d9a4371940"
|
||||
elif name.startswith("b9f5b"):
|
||||
return "b9f5bd514485fb06da39beff051b9fdc"
|
||||
elif name.startswith("294b8d"):
|
||||
# file name is SHA256 hash
|
||||
return "3db3e55b16a7b1b1afb970d5e77c5d98"
|
||||
else:
|
||||
raise ValueError("unexpected sample fixture: %s" % name)
|
||||
|
||||
@@ -643,14 +646,19 @@ FEATURE_PRESENCE_TESTS = sorted(
|
||||
# insn/string, direct memory reference
|
||||
("mimikatz", "function=0x46D6CE", capa.features.common.String("(null)"), True),
|
||||
# insn/bytes
|
||||
("mimikatz", "function=0x40105D", capa.features.common.Bytes("SCardControl".encode("utf-16le")), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.common.Bytes("SCardTransmit".encode("utf-16le")), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.common.Bytes("ACR > ".encode("utf-16le")), True),
|
||||
("mimikatz", "function=0x401517", capa.features.common.Bytes(binascii.unhexlify("CA3B0E000000F8AF47")), True),
|
||||
("mimikatz", "function=0x404414", capa.features.common.Bytes(binascii.unhexlify("0180000040EA4700")), True),
|
||||
# don't extract byte features for obvious strings
|
||||
("mimikatz", "function=0x40105D", capa.features.common.Bytes("SCardControl".encode("utf-16le")), False),
|
||||
("mimikatz", "function=0x40105D", capa.features.common.Bytes("SCardTransmit".encode("utf-16le")), False),
|
||||
("mimikatz", "function=0x40105D", capa.features.common.Bytes("ACR > ".encode("utf-16le")), False),
|
||||
("mimikatz", "function=0x40105D", capa.features.common.Bytes("nope".encode("ascii")), False),
|
||||
# push offset aAcsAcr1220 ; "ACS..." -> where ACS == 41 00 43 00 == valid pointer to middle of instruction
|
||||
("mimikatz", "function=0x401000", capa.features.common.Bytes(binascii.unhexlify("FDFF59F647")), False),
|
||||
# IDA features included byte sequences read from invalid memory, fixed in #409
|
||||
("mimikatz", "function=0x44570F", capa.features.common.Bytes(binascii.unhexlify("FF" * 256)), False),
|
||||
# insn/bytes, pointer to bytes
|
||||
("mimikatz", "function=0x44EDEF", capa.features.common.Bytes("INPUTEVENT".encode("utf-16le")), True),
|
||||
# insn/bytes, pointer to string bytes
|
||||
("mimikatz", "function=0x44EDEF", capa.features.common.Bytes("INPUTEVENT".encode("utf-16le")), False),
|
||||
# insn/characteristic(nzxor)
|
||||
("mimikatz", "function=0x410DFC", capa.features.common.Characteristic("nzxor"), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.common.Characteristic("nzxor"), False),
|
||||
|
||||
Reference in New Issue
Block a user