Fix byte/string extraction and unit tests (#1339)

* Fix wrong expected results on string and bytes tests. Fix https://github.com/mandiant/capa/issues/1336

* Fix IDA insn/byte extractor checks wrong address. Fix https://github.com/mandiant/capa/issues/1327

* fix vivisect string check and tests

---------

Co-authored-by: Xusheng <xusheng@vector35.com>
This commit is contained in:
Moritz
2023-03-02 10:33:14 +01:00
committed by GitHub
parent be6bb879f3
commit 52de09a032
4 changed files with 21 additions and 7 deletions

View File

@@ -25,6 +25,7 @@
### Bug Fixes
- extractor: fix vivisect loop detection corner case #1310 @mr-tz
- match: extend OS characteristic to match OS_ANY to all supported OSes #1324 @mike-hunhoff
- extractor: fix IDA and vivisect string and bytes features overlap and tests #1327 #1336 @xusheng6
### capa explorer IDA Pro plugin

View File

@@ -172,7 +172,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
if ref != insn.ea:
extracted_bytes = capa.features.extractors.ida.helpers.read_bytes_at(ref, MAX_BYTES_FEATURE_SIZE)
if extracted_bytes and not capa.features.extractors.helpers.all_zeros(extracted_bytes):
if not capa.features.extractors.ida.helpers.find_string_at(insn.ea):
if not capa.features.extractors.ida.helpers.find_string_at(ref):
# don't extract byte features for obvious strings
yield Bytes(extracted_bytes), ih.address

View File

@@ -175,8 +175,13 @@ def derefs(vw, p):
while True:
if not vw.isValidPointer(p):
return
yield p
if vw.isProbablyString(p) or vw.isProbablyUnicode(p):
# don't deref strings that coincidentally are pointers
return
try:
next = vw.readMemoryPtr(p)
except Exception:
@@ -271,7 +276,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Itera
if capa.features.extractors.helpers.all_zeros(buf):
continue
if f.vw.isProbablyString(v):
if f.vw.isProbablyString(v) or f.vw.isProbablyUnicode(v):
# don't extract byte features for obvious strings
continue

View File

@@ -337,6 +337,9 @@ def get_sample_md5_by_name(name):
return "946a99f36a46d335dec080d9a4371940"
elif name.startswith("b9f5b"):
return "b9f5bd514485fb06da39beff051b9fdc"
elif name.startswith("294b8d"):
# file name is SHA256 hash
return "3db3e55b16a7b1b1afb970d5e77c5d98"
else:
raise ValueError("unexpected sample fixture: %s" % name)
@@ -643,14 +646,19 @@ FEATURE_PRESENCE_TESTS = sorted(
# insn/string, direct memory reference
("mimikatz", "function=0x46D6CE", capa.features.common.String("(null)"), True),
# insn/bytes
("mimikatz", "function=0x40105D", capa.features.common.Bytes("SCardControl".encode("utf-16le")), True),
("mimikatz", "function=0x40105D", capa.features.common.Bytes("SCardTransmit".encode("utf-16le")), True),
("mimikatz", "function=0x40105D", capa.features.common.Bytes("ACR > ".encode("utf-16le")), True),
("mimikatz", "function=0x401517", capa.features.common.Bytes(binascii.unhexlify("CA3B0E000000F8AF47")), True),
("mimikatz", "function=0x404414", capa.features.common.Bytes(binascii.unhexlify("0180000040EA4700")), True),
# don't extract byte features for obvious strings
("mimikatz", "function=0x40105D", capa.features.common.Bytes("SCardControl".encode("utf-16le")), False),
("mimikatz", "function=0x40105D", capa.features.common.Bytes("SCardTransmit".encode("utf-16le")), False),
("mimikatz", "function=0x40105D", capa.features.common.Bytes("ACR > ".encode("utf-16le")), False),
("mimikatz", "function=0x40105D", capa.features.common.Bytes("nope".encode("ascii")), False),
# push offset aAcsAcr1220 ; "ACS..." -> where ACS == 41 00 43 00 == valid pointer to middle of instruction
("mimikatz", "function=0x401000", capa.features.common.Bytes(binascii.unhexlify("FDFF59F647")), False),
# IDA features included byte sequences read from invalid memory, fixed in #409
("mimikatz", "function=0x44570F", capa.features.common.Bytes(binascii.unhexlify("FF" * 256)), False),
# insn/bytes, pointer to bytes
("mimikatz", "function=0x44EDEF", capa.features.common.Bytes("INPUTEVENT".encode("utf-16le")), True),
# insn/bytes, pointer to string bytes
("mimikatz", "function=0x44EDEF", capa.features.common.Bytes("INPUTEVENT".encode("utf-16le")), False),
# insn/characteristic(nzxor)
("mimikatz", "function=0x410DFC", capa.features.common.Characteristic("nzxor"), True),
("mimikatz", "function=0x40105D", capa.features.common.Characteristic("nzxor"), False),