diff --git a/README.md b/README.md index 5d0f40b7..1aece2d0 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ ![capa](.github/logo.png) [![CI status](https://github.com/fireeye/capa/workflows/CI/badge.svg)](https://github.com/fireeye/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster) -[![Number of rules](https://img.shields.io/badge/rules-289-blue.svg)](https://github.com/fireeye/capa-rules) +[![Number of rules](https://img.shields.io/badge/rules-290-blue.svg)](https://github.com/fireeye/capa-rules) [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt) capa detects capabilities in executable files. diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 2629e0dc..a00cfeb7 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -149,45 +149,31 @@ def extract_insn_number_features(f, bb, insn): yield Number(v, arch=get_arch(f.vw)), insn.va -def extract_insn_bytes_features(f, bb, insn): +def derefs(vw, p): """ - parse byte sequence features from the given instruction. - example: - # push offset iid_004118d4_IShellLinkA ; riid + recursively follow the given pointer, yielding the valid memory addresses along the way. + useful when you may have a pointer to string, or pointer to pointer to string, etc. + + this is a "do what i mean" type of helper function. """ - for oper in insn.opers: - if insn.mnem == "call": - # ignore call instructions - continue + depth = 0 + while True: + if not vw.isValidPointer(p): + return + yield p - if isinstance(oper, envi.archs.i386.disasm.i386ImmOper): - v = oper.getOperValue(oper) - elif isinstance(oper, envi.archs.i386.disasm.i386RegMemOper): - # handle case like: - # movzx ecx, ds:byte_423258[eax] - v = oper.disp - elif isinstance(oper, envi.archs.amd64.disasm.Amd64RipRelOper): - # see: Lab21-01.exe_:0x1400010D3 - v = oper.getOperAddr(insn) - else: - continue + next = vw.readMemoryPtr(p) - segm = f.vw.getSegment(v) - if not segm: - continue + # sanity: pointer points to self + if next == p: + return - segm_end = segm[0] + segm[1] - try: - # Do not read beyond the end of a segment - if v + MAX_BYTES_FEATURE_SIZE > segm_end: - extracted_bytes = f.vw.readMemory(v, segm_end - v) - else: - extracted_bytes = f.vw.readMemory(v, MAX_BYTES_FEATURE_SIZE) - except envi.SegmentationViolation: - pass - else: - if not capa.features.extractors.helpers.all_zeros(extracted_bytes): - yield Bytes(extracted_bytes), insn.va + # sanity: avoid chains of pointers that are unreasonably deep + depth += 1 + if depth > 10: + return + + p = next def read_memory(vw, va, size): @@ -206,6 +192,65 @@ def read_memory(vw, va, size): raise envi.SegmentationViolation(va) +def read_bytes(vw, va): + """ + read up to MAX_BYTES_FEATURE_SIZE from the given address. + + raises: + envi.SegmentationViolation: if the given address is not valid. + """ + segm = vw.getSegment(va) + if not segm: + raise envi.SegmentationViolation() + + segm_end = segm[0] + segm[1] + try: + # Do not read beyond the end of a segment + if va + MAX_BYTES_FEATURE_SIZE > segm_end: + return read_memory(vw, va, segm_end - va) + else: + return read_memory(vw, va, MAX_BYTES_FEATURE_SIZE) + except envi.SegmentationViolation: + raise + + +def extract_insn_bytes_features(f, bb, insn): + """ + parse byte sequence features from the given instruction. + example: + # push offset iid_004118d4_IShellLinkA ; riid + """ + for oper in insn.opers: + if insn.mnem == "call": + continue + + if isinstance(oper, envi.archs.i386.disasm.i386ImmOper): + v = oper.getOperValue(oper) + elif isinstance(oper, envi.archs.i386.disasm.i386RegMemOper): + # handle case like: + # movzx ecx, ds:byte_423258[eax] + v = oper.disp + elif isinstance(oper, envi.archs.i386.disasm.i386SibOper): + # like 0x401000 in `mov eax, 0x401000[2 * ebx]` + v = oper.imm + elif isinstance(oper, envi.archs.amd64.disasm.Amd64RipRelOper): + # see: Lab21-01.exe_:0x1400010D3 + v = oper.getOperAddr(insn) + else: + continue + + for v in derefs(f.vw, v): + try: + buf = read_bytes(f.vw, v) + except envi.SegmentationViolation: + continue + + if capa.features.extractors.helpers.all_zeros(buf): + continue + + yield Bytes(buf), insn.va + + def read_string(vw, offset): try: alen = vw.detectString(offset) @@ -238,20 +283,25 @@ def extract_insn_string_features(f, bb, insn): # example: # # push offset aAcr ; "ACR > " + for oper in insn.opers: if isinstance(oper, envi.archs.i386.disasm.i386ImmOper): v = oper.getOperValue(oper) + elif isinstance(oper, envi.archs.i386.disasm.i386SibOper): + # like 0x401000 in `mov eax, 0x401000[2 * ebx]` + v = oper.imm elif isinstance(oper, envi.archs.amd64.disasm.Amd64RipRelOper): v = oper.getOperAddr(insn) else: continue - try: - s = read_string(f.vw, v) - except ValueError: - continue - else: - yield String(s.rstrip("\x00")), insn.va + for v in derefs(f.vw, v): + try: + s = read_string(f.vw, v) + except ValueError: + continue + else: + yield String(s.rstrip("\x00")), insn.va def extract_insn_offset_features(f, bb, insn): diff --git a/rules b/rules index 3a803f34..b3dfadeb 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 3a803f3401fd30f191792fcb40df8880cf6b7d68 +Subproject commit b3dfadebeaa446c24b9810374c906ea53eb54a71 diff --git a/tests/test_viv_features.py b/tests/test_viv_features.py index 67c38ee4..ce480a7e 100644 --- a/tests/test_viv_features.py +++ b/tests/test_viv_features.py @@ -86,6 +86,11 @@ def test_string_features(mimikatz): assert capa.features.String("bcrypt.dll") not in features +def test_string_pointer_features(mimikatz): + features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x44EDEF)) + assert capa.features.String("INPUTEVENT") in features + + def test_byte_features(sample_9324d1a8ae37a36ae560c37448c9705a): features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60)) wanted = capa.features.Bytes(b"\xED\x24\x9E\xF4\x52\xA9\x07\x47\x55\x8E\xE1\xAB\x30\x8E\x23\x61") @@ -100,6 +105,11 @@ def test_byte_features64(sample_lab21_01): assert wanted.evaluate(features) == True +def test_bytes_pointer_features(mimikatz): + features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x44EDEF)) + assert capa.features.Bytes("INPUTEVENT".encode("utf-16le")).evaluate(features) == True + + def test_number_features(mimikatz): features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x40105D)) assert capa.features.insn.Number(0xFF) in features