From b09f29a9961d28fef8df8208e80090f7dfab33da Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 3 Aug 2020 17:35:29 -0600 Subject: [PATCH 1/8] features: viv: extract strings/bytes from nested pointers closes #200 --- capa/features/extractors/viv/insn.py | 76 ++++++++++++++++++++-------- tests/test_viv_features.py | 18 +++++++ 2 files changed, 74 insertions(+), 20 deletions(-) diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index ef545ffd..d69953ae 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -140,6 +140,33 @@ def extract_insn_number_features(f, bb, insn): yield Number(v), insn.va +def derefs(vw, p): + """ + recursively follow the given pointer, yielding the valid memory addresses along the way. + useful when you may have a pointer to string, or pointer to pointer to string, etc. + + this is a "do what i mean" type of helper function. + """ + depth = 0 + while True: + if not vw.isValidPointer(p): + return + yield p + + next = vw.readMemoryPtr(p) + + # sanity: pointer points to self + if next == p: + return + + # sanity: avoid chains of pointers that are unreasonably deep + depth += 1 + if depth > 10: + return + + p = next + + def extract_insn_bytes_features(f, bb, insn): """ parse byte sequence features from the given instruction. @@ -157,28 +184,32 @@ def extract_insn_bytes_features(f, bb, insn): # handle case like: # movzx ecx, ds:byte_423258[eax] v = oper.disp + elif isinstance(oper, envi.archs.i386.disasm.i386SibOper): + # like 0x401000 in `mov eax, 0x401000[2 * ebx]` + v = oper.imm elif isinstance(oper, envi.archs.amd64.disasm.Amd64RipRelOper): # see: Lab21-01.exe_:0x1400010D3 v = oper.getOperAddr(insn) else: continue - segm = f.vw.getSegment(v) - if not segm: - continue + for v in derefs(f.vw, v): + segm = f.vw.getSegment(v) + if not segm: + continue - segm_end = segm[0] + segm[1] - try: - # Do not read beyond the end of a segment - if v + MAX_BYTES_FEATURE_SIZE > segm_end: - extracted_bytes = f.vw.readMemory(v, segm_end - v) + segm_end = segm[0] + segm[1] + try: + # Do not read beyond the end of a segment + if v + MAX_BYTES_FEATURE_SIZE > segm_end: + extracted_bytes = f.vw.readMemory(v, segm_end - v) + else: + extracted_bytes = f.vw.readMemory(v, MAX_BYTES_FEATURE_SIZE) + except envi.SegmentationViolation: + pass else: - extracted_bytes = f.vw.readMemory(v, MAX_BYTES_FEATURE_SIZE) - except envi.SegmentationViolation: - pass - else: - if not capa.features.extractors.helpers.all_zeros(extracted_bytes): - yield Bytes(extracted_bytes), insn.va + if not capa.features.extractors.helpers.all_zeros(extracted_bytes): + yield Bytes(extracted_bytes), insn.va def read_memory(vw, va, size): @@ -229,20 +260,25 @@ def extract_insn_string_features(f, bb, insn): # example: # # push offset aAcr ; "ACR > " + for oper in insn.opers: if isinstance(oper, envi.archs.i386.disasm.i386ImmOper): v = oper.getOperValue(oper) + elif isinstance(oper, envi.archs.i386.disasm.i386SibOper): + # like 0x401000 in `mov eax, 0x401000[2 * ebx]` + v = oper.imm elif isinstance(oper, envi.archs.amd64.disasm.Amd64RipRelOper): v = oper.getOperAddr(insn) else: continue - try: - s = read_string(f.vw, v) - except ValueError: - continue - else: - yield String(s.rstrip("\x00")), insn.va + for v in derefs(f.vw, v): + try: + s = read_string(f.vw, v) + except ValueError: + continue + else: + yield String(s.rstrip("\x00")), insn.va def extract_insn_offset_features(f, bb, insn): diff --git a/tests/test_viv_features.py b/tests/test_viv_features.py index dae5339b..ba3c5e4b 100644 --- a/tests/test_viv_features.py +++ b/tests/test_viv_features.py @@ -85,6 +85,15 @@ def test_string_features(mimikatz): assert capa.features.String("bcrypt.dll") not in features +def test_string_pointer_features(mimikatz): + # viv doesn't identify this function, because its only referenced by vtable + # but thats not the point of this test + mimikatz.vw.makeFunction(0x44EE5A) + + features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x44EE5A)) + assert capa.features.String("INPUTEVENT") in features + + def test_byte_features(sample_9324d1a8ae37a36ae560c37448c9705a): features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60)) wanted = capa.features.Bytes(b"\xED\x24\x9E\xF4\x52\xA9\x07\x47\x55\x8E\xE1\xAB\x30\x8E\x23\x61") @@ -99,6 +108,15 @@ def test_byte_features64(sample_lab21_01): assert wanted.evaluate(features) == True +def test_bytes_pointer_features(mimikatz): + # viv doesn't identify this function, because its only referenced by vtable + # but thats not the point of this test + mimikatz.vw.makeFunction(0x44EE5A) + + features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x44EE5A)) + assert capa.features.Bytes("INPUTEVENT".encode("utf-16le")).evaluate(features) == True + + def test_number_features(mimikatz): features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x40105D)) assert capa.features.insn.Number(0xFF) in features From 65264f354943f8603705ae1a0cb87b0fa718c003 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 3 Aug 2020 17:44:06 -0600 Subject: [PATCH 2/8] features: viv: refactor reading of byte features --- capa/features/extractors/viv/insn.py | 74 +++++++++++++++++----------- 1 file changed, 44 insertions(+), 30 deletions(-) diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index d69953ae..6126df26 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -167,6 +167,44 @@ def derefs(vw, p): p = next +def read_memory(vw, va, size): + # as documented in #176, vivisect will not readMemory() when the section is not marked readable. + # + # but here, we don't care about permissions. + # so, copy the viv implementation of readMemory and remove the permissions check. + # + # this is derived from: + # https://github.com/vivisect/vivisect/blob/5eb4d237bddd4069449a6bc094d332ceed6f9a96/envi/memory.py#L453-L462 + for mva, mmaxva, mmap, mbytes in vw._map_defs: + if va >= mva and va < mmaxva: + mva, msize, mperms, mfname = mmap + offset = va - mva + return mbytes[offset : offset + size] + raise envi.SegmentationViolation(va) + + +def read_bytes(vw, va): + """ + read up to MAX_BYTES_FEATURE_SIZE from the given address. + + raises: + envi.SegmentationViolation: if the given address is not valid. + """ + segm = vw.getSegment(va) + if not segm: + raise envi.SegmentationViolation() + + segm_end = segm[0] + segm[1] + try: + # Do not read beyond the end of a segment + if va + MAX_BYTES_FEATURE_SIZE > segm_end: + return read_memory(vw, va, segm_end - va) + else: + return read_memory(vw, va, MAX_BYTES_FEATURE_SIZE) + except envi.SegmentationViolation: + raise + + def extract_insn_bytes_features(f, bb, insn): """ parse byte sequence features from the given instruction. @@ -175,7 +213,6 @@ def extract_insn_bytes_features(f, bb, insn): """ for oper in insn.opers: if insn.mnem == "call": - # ignore call instructions continue if isinstance(oper, envi.archs.i386.disasm.i386ImmOper): @@ -194,38 +231,15 @@ def extract_insn_bytes_features(f, bb, insn): continue for v in derefs(f.vw, v): - segm = f.vw.getSegment(v) - if not segm: + try: + buf = read_bytes(f.vw, v) + except envi.SegmentationViolation: continue - segm_end = segm[0] + segm[1] - try: - # Do not read beyond the end of a segment - if v + MAX_BYTES_FEATURE_SIZE > segm_end: - extracted_bytes = f.vw.readMemory(v, segm_end - v) - else: - extracted_bytes = f.vw.readMemory(v, MAX_BYTES_FEATURE_SIZE) - except envi.SegmentationViolation: - pass - else: - if not capa.features.extractors.helpers.all_zeros(extracted_bytes): - yield Bytes(extracted_bytes), insn.va + if capa.features.extractors.helpers.all_zeros(buf): + continue - -def read_memory(vw, va, size): - # as documented in #176, vivisect will not readMemory() when the section is not marked readable. - # - # but here, we don't care about permissions. - # so, copy the viv implementation of readMemory and remove the permissions check. - # - # this is derived from: - # https://github.com/vivisect/vivisect/blob/5eb4d237bddd4069449a6bc094d332ceed6f9a96/envi/memory.py#L453-L462 - for mva, mmaxva, mmap, mbytes in vw._map_defs: - if va >= mva and va < mmaxva: - mva, msize, mperms, mfname = mmap - offset = va - mva - return mbytes[offset : offset + size] - raise envi.SegmentationViolation(va) + yield Bytes(buf), insn.va def read_string(vw, offset): From 7458014b21fa61deb7114b5462ce43e39897c858 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 3 Aug 2020 17:50:21 -0600 Subject: [PATCH 3/8] ci: use pytest-xdist for parallel testing --- .github/workflows/tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index d6cdd802..fac4fc9c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -52,6 +52,8 @@ jobs: uses: actions/setup-python@v2 with: python-version: 2.7 + - name: Install pytest-xdist + run: pip install pytest-xdist - name: Install capa run: pip install -e .[dev] - name: Run tests From de0f9043faf45909051a4df1d650e285d61cd3f1 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 3 Aug 2020 17:51:02 -0600 Subject: [PATCH 4/8] ci: use NUM_CPU processes to test --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index fac4fc9c..e83bd8bb 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -57,5 +57,5 @@ jobs: - name: Install capa run: pip install -e .[dev] - name: Run tests - run: pytest tests/ + run: pytest -n auto tests/ From 8550a8bbe99b35d9bbe7a9ea7ba4b4610ff56587 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 3 Aug 2020 17:56:19 -0600 Subject: [PATCH 5/8] Revert "ci: use NUM_CPU processes to test" This reverts commit de0f9043faf45909051a4df1d650e285d61cd3f1. --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index e83bd8bb..fac4fc9c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -57,5 +57,5 @@ jobs: - name: Install capa run: pip install -e .[dev] - name: Run tests - run: pytest -n auto tests/ + run: pytest tests/ From c3b848183dda380c0c5d3a2956ae317ddd2530b2 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 3 Aug 2020 17:56:28 -0600 Subject: [PATCH 6/8] Revert "ci: use pytest-xdist for parallel testing" This reverts commit 7458014b21fa61deb7114b5462ce43e39897c858. --- .github/workflows/tests.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index fac4fc9c..d6cdd802 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -52,8 +52,6 @@ jobs: uses: actions/setup-python@v2 with: python-version: 2.7 - - name: Install pytest-xdist - run: pip install pytest-xdist - name: Install capa run: pip install -e .[dev] - name: Run tests From f02412bcc5c75f23a9644c0782e900339ab53a2e Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 3 Aug 2020 19:10:05 -0600 Subject: [PATCH 7/8] tests: fix function address --- tests/test_viv_features.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/tests/test_viv_features.py b/tests/test_viv_features.py index ba3c5e4b..9bcddb54 100644 --- a/tests/test_viv_features.py +++ b/tests/test_viv_features.py @@ -86,11 +86,7 @@ def test_string_features(mimikatz): def test_string_pointer_features(mimikatz): - # viv doesn't identify this function, because its only referenced by vtable - # but thats not the point of this test - mimikatz.vw.makeFunction(0x44EE5A) - - features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x44EE5A)) + features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x44EDEF)) assert capa.features.String("INPUTEVENT") in features @@ -109,11 +105,7 @@ def test_byte_features64(sample_lab21_01): def test_bytes_pointer_features(mimikatz): - # viv doesn't identify this function, because its only referenced by vtable - # but thats not the point of this test - mimikatz.vw.makeFunction(0x44EE5A) - - features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x44EE5A)) + features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x44EDEF)) assert capa.features.Bytes("INPUTEVENT".encode("utf-16le")).evaluate(features) == True From e5921e9267efe4d9c8f5798dceac95587a56c104 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Tue, 4 Aug 2020 14:35:08 +0000 Subject: [PATCH 8/8] Sync capa rules submodule --- README.md | 2 +- rules | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5d0f40b7..1aece2d0 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ ![capa](.github/logo.png) [![CI status](https://github.com/fireeye/capa/workflows/CI/badge.svg)](https://github.com/fireeye/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster) -[![Number of rules](https://img.shields.io/badge/rules-289-blue.svg)](https://github.com/fireeye/capa-rules) +[![Number of rules](https://img.shields.io/badge/rules-290-blue.svg)](https://github.com/fireeye/capa-rules) [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt) capa detects capabilities in executable files. diff --git a/rules b/rules index 096d2b29..0a9c31fd 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 096d2b29dd307f400f04d96084e4823049dcf2ff +Subproject commit 0a9c31fd3a5e62c425261452e480c4d2d9fcbd13