From e4abe46d16f163ea91865ce9b471043ff0211b02 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 11 May 2022 12:53:17 -0600 Subject: [PATCH 01/22] elf: better detect Linux OS closes #988 --- capa/features/extractors/elf.py | 57 ++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 16 deletions(-) diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index 60200a7c..7155773a 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -47,6 +47,19 @@ class OS(str, Enum): NACL = "nacl" +# via readelf: https://github.com/bminor/binutils-gdb/blob/c0e94211e1ac05049a4ce7c192c9d14d1764eb3e/binutils/readelf.c#L19635-L19658 +# and here: https://github.com/bminor/binutils-gdb/blob/34c54daa337da9fadf87d2706d6a590ae1f88f4d/include/elf/common.h#L933-L939 +GNU_ABI_TAG = { + 0: OS.LINUX, + 1: OS.HURD, + 2: OS.SOLARIS, + 3: OS.FREEBSD, + 4: OS.NETBSD, + 5: OS.SYLLABLE, + 6: OS.NACL, +} + + def detect_elf_os(f: BinaryIO) -> str: f.seek(0x0) file_header = f.read(0x40) @@ -141,7 +154,7 @@ def detect_elf_os(f: BinaryIO) -> str: PT_NOTE = 0x4 (p_type,) = struct.unpack_from(endian + "I", phent, 0x0) - logger.debug("p_type: 0x%04x", p_type) + logger.debug("ph:p_type: 0x%04x", p_type) if p_type != PT_NOTE: continue @@ -152,7 +165,7 @@ def detect_elf_os(f: BinaryIO) -> str: else: raise NotImplementedError() - logger.debug("p_offset: 0x%02x p_filesz: 0x%04x", p_offset, p_filesz) + logger.debug("ph:p_offset: 0x%02x p_filesz: 0x%04x", p_offset, p_filesz) f.seek(p_offset) note = f.read(p_filesz) @@ -164,7 +177,7 @@ def detect_elf_os(f: BinaryIO) -> str: name_offset = 0xC desc_offset = name_offset + align(namesz, 0x4) - logger.debug("namesz: 0x%02x descsz: 0x%02x type: 0x%04x", namesz, descsz, type_) + logger.debug("ph:namesz: 0x%02x descsz: 0x%02x type: 0x%04x", namesz, descsz, type_) name = note[name_offset : name_offset + namesz].partition(b"\x00")[0].decode("ascii") logger.debug("name: %s", name) @@ -178,17 +191,6 @@ def detect_elf_os(f: BinaryIO) -> str: desc = note[desc_offset : desc_offset + descsz] abi_tag, kmajor, kminor, kpatch = struct.unpack_from(endian + "IIII", desc, 0x0) - # via readelf: https://github.com/bminor/binutils-gdb/blob/c0e94211e1ac05049a4ce7c192c9d14d1764eb3e/binutils/readelf.c#L19635-L19658 - # and here: https://github.com/bminor/binutils-gdb/blob/34c54daa337da9fadf87d2706d6a590ae1f88f4d/include/elf/common.h#L933-L939 - GNU_ABI_TAG = { - 0: OS.LINUX, - 1: OS.HURD, - 2: OS.SOLARIS, - 3: OS.FREEBSD, - 4: OS.NETBSD, - 5: OS.SYLLABLE, - 6: OS.NACL, - } logger.debug("GNU_ABI_TAG: 0x%02x", abi_tag) if abi_tag in GNU_ABI_TAG: @@ -262,7 +264,7 @@ def detect_elf_os(f: BinaryIO) -> str: if sh_type != SHT_NOTE: continue - logger.debug("sh_offset: 0x%02x sh_size: 0x%04x", sh_offset, sh_size) + logger.debug("sh:sh_offset: 0x%02x sh_size: 0x%04x", sh_offset, sh_size) f.seek(sh_offset) note = f.read(sh_size) @@ -274,7 +276,7 @@ def detect_elf_os(f: BinaryIO) -> str: name_offset = 0xC desc_offset = name_offset + align(namesz, 0x4) - logger.debug("namesz: 0x%02x descsz: 0x%02x type: 0x%04x", namesz, descsz, type_) + logger.debug("sh:namesz: 0x%02x descsz: 0x%02x type: 0x%04x", namesz, descsz, type_) name = note[name_offset : name_offset + namesz].partition(b"\x00")[0].decode("ascii") logger.debug("name: %s", name) @@ -282,6 +284,29 @@ def detect_elf_os(f: BinaryIO) -> str: if name == "Linux": logger.debug("note owner: %s", "LINUX") ret = OS.LINUX if not ret else ret + elif name == "OpenBSD": + logger.debug("note owner: %s", "OPENBSD") + ret = OS.OPENBSD if not ret else ret + elif name == "NetBSD": + logger.debug("note owner: %s", "NETBSD") + ret = OS.NETBSD if not ret else ret + elif name == "FreeBSD": + logger.debug("note owner: %s", "FREEBSD") + ret = OS.FREEBSD if not ret else ret + elif name == "GNU": + if descsz < 16: + continue + + desc = note[desc_offset : desc_offset + descsz] + abi_tag, kmajor, kminor, kpatch = struct.unpack_from(endian + "IIII", desc, 0x0) + logger.debug("GNU_ABI_TAG: 0x%02x", abi_tag) + + if abi_tag in GNU_ABI_TAG: + # update only if not set + # so we can get the debugging output of subsequent strategies + ret = GNU_ABI_TAG[abi_tag] if not ret else ret + logger.debug("abi tag: %s earliest compatible kernel: %d.%d.%d", ret, kmajor, kminor, kpatch) + return ret.value if ret is not None else "unknown" From c30ce6e73a71b707283cc6010e4e57ea61a9f92c Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 11 May 2022 12:54:30 -0600 Subject: [PATCH 02/22] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5168b849..6c855cf0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ ### Bug Fixes - improve handling _ prefix compile/link artifact #924 @mike-hunhoff +- better detect OS in ELF samples #988 @williballenthin ### capa explorer IDA Pro plugin - improve file format extraction #918 @mike-hunhoff From a6427364e0c47f593c842d9a814af9eae85f2835 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 11 May 2022 13:09:12 -0600 Subject: [PATCH 03/22] tests: add test demonstrating elf OS detection --- tests/fixtures.py | 2 ++ tests/test_os_detection.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 tests/test_os_detection.py diff --git a/tests/fixtures.py b/tests/fixtures.py index a23f0c57..bd02cbb6 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -220,6 +220,8 @@ def get_data_path_by_name(name): return os.path.join(CD, "data", "79abd17391adc6251ecdc58d13d76baf.dll_") elif name.startswith("946a9"): return os.path.join(CD, "data", "946a99f36a46d335dec080d9a4371940.dll_") + elif name.startswith("2f7f5f"): + return os.path.join(CD, "data", "2f7f5fb5de175e770d7eae87666f9831.elf_") else: raise ValueError("unexpected sample fixture: %s" % name) diff --git a/tests/test_os_detection.py b/tests/test_os_detection.py new file mode 100644 index 00000000..3f8ed3ea --- /dev/null +++ b/tests/test_os_detection.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- + +# Copyright (C) 2022 FireEye, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +import pytest + +import capa.features.extractors.elf + +from fixtures import * + + +def test_elf_section_gnu_abi_tag(): + path = get_data_path_by_name("2f7f5f") + with open(path, "rb") as f: + assert capa.features.extractors.elf.detect_elf_os(f) == "linux" + + +def test_elf_program_header_gnu_abi_tag(): + path = get_data_path_by_name("7351f.elf") + with open(path, "rb") as f: + assert capa.features.extractors.elf.detect_elf_os(f) == "linux" + From 521cbf91042aa6adb9fb29fe10c5a26835e1db89 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 11 May 2022 13:10:08 -0600 Subject: [PATCH 04/22] pep8 --- capa/features/extractors/elf.py | 1 - tests/test_os_detection.py | 6 ++---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index 7155773a..49d72004 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -306,7 +306,6 @@ def detect_elf_os(f: BinaryIO) -> str: # so we can get the debugging output of subsequent strategies ret = GNU_ABI_TAG[abi_tag] if not ret else ret logger.debug("abi tag: %s earliest compatible kernel: %d.%d.%d", ret, kmajor, kminor, kpatch) - return ret.value if ret is not None else "unknown" diff --git a/tests/test_os_detection.py b/tests/test_os_detection.py index 3f8ed3ea..e2f850d7 100644 --- a/tests/test_os_detection.py +++ b/tests/test_os_detection.py @@ -9,20 +9,18 @@ # See the License for the specific language governing permissions and limitations under the License. import pytest +from fixtures import * import capa.features.extractors.elf -from fixtures import * - def test_elf_section_gnu_abi_tag(): path = get_data_path_by_name("2f7f5f") with open(path, "rb") as f: assert capa.features.extractors.elf.detect_elf_os(f) == "linux" - + def test_elf_program_header_gnu_abi_tag(): path = get_data_path_by_name("7351f.elf") with open(path, "rb") as f: assert capa.features.extractors.elf.detect_elf_os(f) == "linux" - From cded1d31250d7c8bf7fc2ae8405d86594f9b75ab Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Thu, 12 May 2022 06:35:04 +0000 Subject: [PATCH 05/22] Sync capa-testfiles submodule --- tests/data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data b/tests/data index 11ae8d0d..e7ffa167 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit 11ae8d0d38b9703b999c988f927198c8fd132ff5 +Subproject commit e7ffa1671778309dfbaed1c9d2389b69addc4e5c From 8f2c9cbd11e22827145005df9f7c34edbb182cbc Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Fri, 20 May 2022 14:36:37 +0000 Subject: [PATCH 06/22] Sync capa rules submodule --- CHANGELOG.md | 3 ++- README.md | 2 +- rules | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b52eb948..9a44fea6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,7 +21,7 @@ - remove /x32 and /x64 flavors of number and operand features #932 @williballenthin - the tool now accepts multiple paths to rules, and JSON doc updated accordingly @williballenthin -### New Rules (6) +### New Rules (7) - data-manipulation/encryption/aes/manually-build-aes-constants huynh.t.nhan@gmail.com - nursery/get-process-image-filename michael.hunhoff@mandiant.com @@ -29,6 +29,7 @@ - compiler/zig/compiled-with-zig jakub.jozwiak@mandiant.com - anti-analysis/packer/huan/packed-with-huan jakub.jozwiak@mandiant.com - internal/limitation/file/internal-dotnet-file-limitation william.ballenthin@mandiant.com +- nursery/get-os-information-via-kuser_shared_data @mr-tz - ### Bug Fixes diff --git a/README.md b/README.md index f3bcf29c..065169f0 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa) [![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases) -[![Number of rules](https://img.shields.io/badge/rules-665-blue.svg)](https://github.com/mandiant/capa-rules) +[![Number of rules](https://img.shields.io/badge/rules-666-blue.svg)](https://github.com/mandiant/capa-rules) [![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster) [![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases) [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt) diff --git a/rules b/rules index 52ff654c..eaa568ab 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 52ff654ca0a73235df7d2e9bfbd52961f957cbc8 +Subproject commit eaa568abf6c71b306eb66626b584d08ed1f26b64 From ddb6c810eb6c7569f427b980fc6d0f97fb2103aa Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 23 May 2022 14:10:21 +0000 Subject: [PATCH 07/22] build(deps-dev): bump types-requests from 2.27.25 to 2.27.27 Bumps [types-requests](https://github.com/python/typeshed) from 2.27.25 to 2.27.27. - [Release notes](https://github.com/python/typeshed/releases) - [Commits](https://github.com/python/typeshed/commits) --- updated-dependencies: - dependency-name: types-requests dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 6960d9ec..236a6ade 100644 --- a/setup.py +++ b/setup.py @@ -86,7 +86,7 @@ setuptools.setup( "types-tabulate==0.8.9", "types-termcolor==1.1.4", "types-psutil==5.8.22", - "types_requests==2.27.25", + "types_requests==2.27.27", ], "build": [ "pyinstaller==5.0.1", From 6f234b57fcf967bc7e7bbfa8d09f41649e65f70d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 23 May 2022 14:10:27 +0000 Subject: [PATCH 08/22] build(deps-dev): bump psutil from 5.9.0 to 5.9.1 Bumps [psutil](https://github.com/giampaolo/psutil) from 5.9.0 to 5.9.1. - [Release notes](https://github.com/giampaolo/psutil/releases) - [Changelog](https://github.com/giampaolo/psutil/blob/master/HISTORY.rst) - [Commits](https://github.com/giampaolo/psutil/compare/release-5.9.0...release-5.9.1) --- updated-dependencies: - dependency-name: psutil dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 6960d9ec..a382cbc2 100644 --- a/setup.py +++ b/setup.py @@ -76,7 +76,7 @@ setuptools.setup( "black==22.3.0", "isort==5.10.1", "mypy==0.950", - "psutil==5.9.0", + "psutil==5.9.1", "stix2==3.0.1", "requests==2.27.1", # type stubs for mypy From 5a71caf09cc07ca29ef5757f207ae10c5ad35652 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 25 May 2022 14:44:10 +0000 Subject: [PATCH 09/22] add Github Codespaces config --- .devcontainer/Dockerfile | 21 ++++++++++++++ .devcontainer/devcontainer.json | 51 +++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) create mode 100644 .devcontainer/Dockerfile create mode 100644 .devcontainer/devcontainer.json diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 00000000..33398f53 --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,21 @@ +# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.233.0/containers/python-3/.devcontainer/base.Dockerfile + +# [Choice] Python version (use -bullseye variants on local arm64/Apple Silicon): 3, 3.10, 3.9, 3.8, 3.7, 3.6, 3-bullseye, 3.10-bullseye, 3.9-bullseye, 3.8-bullseye, 3.7-bullseye, 3.6-bullseye, 3-buster, 3.10-buster, 3.9-buster, 3.8-buster, 3.7-buster, 3.6-buster +ARG VARIANT="3.10-bullseye" +FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT} + +# [Choice] Node.js version: none, lts/*, 16, 14, 12, 10 +ARG NODE_VERSION="none" +RUN if [ "${NODE_VERSION}" != "none" ]; then su vscode -c "umask 0002 && . /usr/local/share/nvm/nvm.sh && nvm install ${NODE_VERSION} 2>&1"; fi + +# [Optional] If your pip requirements rarely change, uncomment this section to add them to the image. +# COPY requirements.txt /tmp/pip-tmp/ +# RUN pip3 --disable-pip-version-check --no-cache-dir install -r /tmp/pip-tmp/requirements.txt \ +# && rm -rf /tmp/pip-tmp + +# [Optional] Uncomment this section to install additional OS packages. +# RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ +# && apt-get -y install --no-install-recommends + +# [Optional] Uncomment this line to install global node packages. +# RUN su vscode -c "source /usr/local/share/nvm/nvm.sh && npm install -g " 2>&1 \ No newline at end of file diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000..d76fc748 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,51 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at: +// https://github.com/microsoft/vscode-dev-containers/tree/v0.233.0/containers/python-3 +{ + "name": "Python 3", + "build": { + "dockerfile": "Dockerfile", + "context": "..", + "args": { + // Update 'VARIANT' to pick a Python version: 3, 3.10, 3.9, 3.8, 3.7, 3.6 + // Append -bullseye or -buster to pin to an OS version. + // Use -bullseye variants on local on arm64/Apple Silicon. + "VARIANT": "3.10", + // Options + "NODE_VERSION": "none" + } + }, + + // Set *default* container specific settings.json values on container create. + "settings": { + "python.defaultInterpreterPath": "/usr/local/bin/python", + "python.linting.enabled": true, + "python.linting.pylintEnabled": true, + "python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8", + "python.formatting.blackPath": "/usr/local/py-utils/bin/black", + "python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf", + "python.linting.banditPath": "/usr/local/py-utils/bin/bandit", + "python.linting.flake8Path": "/usr/local/py-utils/bin/flake8", + "python.linting.mypyPath": "/usr/local/py-utils/bin/mypy", + "python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle", + "python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle", + "python.linting.pylintPath": "/usr/local/py-utils/bin/pylint" + }, + + // Add the IDs of extensions you want installed when the container is created. + "extensions": [ + "ms-python.python", + "ms-python.vscode-pylance" + ], + + // Use 'forwardPorts' to make a list of ports inside the container available locally. + // "forwardPorts": [], + + // Use 'postCreateCommand' to run commands after the container is created. + "postCreateCommand": "git submodule update --init && pip3 install --user -e .[dev]", + + // Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. + "remoteUser": "vscode", + "features": { + "git": "latest" + } +} From 3514d5c05cc5e5c6f1e42ac86eb5f791ac72499f Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Thu, 26 May 2022 11:19:31 -0600 Subject: [PATCH 10/22] dotnet: support file/function scope class and namespace features (#1030) --- CHANGELOG.md | 1 + capa/features/common.py | 10 + capa/features/extractors/dnfile/file.py | 27 ++- capa/features/extractors/dnfile/helpers.py | 201 +++++++++++++-------- capa/features/extractors/dnfile/insn.py | 92 +++++++--- capa/features/extractors/dotnetfile.py | 60 ++++-- capa/rules.py | 9 +- tests/fixtures.py | 5 + 8 files changed, 283 insertions(+), 122 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6dd85d7d..35e7f1b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ - add file function-name extraction for dotnet files #1015 @mike-hunhoff - add unmanaged call characteristic for dotnet files #1023 @mike-hunhoff - add mixed mode characteristic feature extraction for dotnet files #1024 @mike-hunhoff + - emit class and namespace features for dotnet files #1030 @mike-hunhoff ### Breaking Changes diff --git a/capa/features/common.py b/capa/features/common.py index ed2b288e..b22bbd37 100644 --- a/capa/features/common.py +++ b/capa/features/common.py @@ -176,6 +176,16 @@ class String(Feature): super(String, self).__init__(value, description=description) +class Class(Feature): + def __init__(self, value: str, description=None): + super(Class, self).__init__(value, description=description) + + +class Namespace(Feature): + def __init__(self, value: str, description=None): + super(Namespace, self).__init__(value, description=description) + + class Substring(String): def __init__(self, value: str, description=None): super(Substring, self).__init__(value, description=description) diff --git a/capa/features/extractors/dnfile/file.py b/capa/features/extractors/dnfile/file.py index 248d8108..0774eb96 100644 --- a/capa/features/extractors/dnfile/file.py +++ b/capa/features/extractors/dnfile/file.py @@ -12,10 +12,17 @@ from typing import TYPE_CHECKING, Tuple, Iterator if TYPE_CHECKING: import dnfile - from capa.features.common import Feature, Format, String, Characteristic - from capa.features.file import Import, FunctionName + from capa.features.common import ( + Feature, + Format, + String, + Characteristic, + Namespace, + Class, + ) import capa.features.extractors +from capa.features.file import Import, FunctionName def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[Tuple[Import, int]]: @@ -34,8 +41,16 @@ def extract_file_strings(pe: dnfile.dnPE) -> Iterator[Tuple[String, int]]: yield from capa.features.extractors.dotnetfile.extract_file_strings(pe=pe) -def extract_mixed_mode_characteristic_features(pe: dnfile.dnPE) -> Iterator[Tuple[Characteristic, int]]: - yield from capa.features.extractors.dotnetfile.extract_mixed_mode_characteristic_features(pe=pe) +def extract_file_mixed_mode_characteristic_features(pe: dnfile.dnPE) -> Iterator[Tuple[Characteristic, int]]: + yield from capa.features.extractors.dotnetfile.extract_file_mixed_mode_characteristic_features(pe=pe) + + +def extract_file_namespace_features(pe: dnfile.dnPE) -> Iterator[Tuple[Namespace, int]]: + yield from capa.features.extractors.dotnetfile.extract_file_namespace_features(pe=pe) + + +def extract_file_class_features(pe: dnfile.dnPE) -> Iterator[Tuple[Class, int]]: + yield from capa.features.extractors.dotnetfile.extract_file_class_features(pe=pe) def extract_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]: @@ -49,5 +64,7 @@ FILE_HANDLERS = ( extract_file_function_names, extract_file_strings, extract_file_format, - extract_mixed_mode_characteristic_features, + extract_file_mixed_mode_characteristic_features, + extract_file_namespace_features, + extract_file_class_features, ) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index 7c8adcbd..968bbd10 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -41,8 +41,72 @@ class DnfileMethodBodyReader(CilMethodBodyReaderBase): return self.offset -def calculate_dotnet_token_value(table: int, rid: int) -> int: - return ((table & 0xFF) << Token.TABLE_SHIFT) | (rid & Token.RID_MASK) +class DnClass(object): + def __init__(self, token: int, namespace: str, classname: str): + self.token: int = token + self.namespace: str = namespace + self.classname: str = classname + + def __hash__(self): + return hash((self.token,)) + + def __eq__(self, other): + return self.token == other.token + + def __str__(self): + return DnClass.format_name(self.namespace, self.classname) + + def __repr__(self): + return str(self) + + @staticmethod + def format_name(namespace: str, classname: str): + name: str = classname + if namespace: + # like System.IO.File::OpenRead + name = f"{namespace}.{name}" + return name + + +class DnMethod(DnClass): + def __init__(self, token: int, namespace: str, classname: str, methodname: str): + super(DnMethod, self).__init__(token, namespace, classname) + self.methodname: str = methodname + + def __str__(self): + return DnMethod.format_name(self.namespace, self.classname, self.methodname) + + @staticmethod + def format_name(namespace: str, classname: str, methodname: str): # type: ignore + # like File::OpenRead + name: str = f"{classname}::{methodname}" + if namespace: + # like System.IO.File::OpenRead + name = f"{namespace}.{name}" + return name + + +class DnUnmanagedMethod: + def __init__(self, token: int, modulename: str, methodname: str): + self.token: int = token + self.modulename: str = modulename + self.methodname: str = methodname + + def __hash__(self): + return hash((self.token,)) + + def __eq__(self, other): + return self.token == other.token + + def __str__(self): + return DnUnmanagedMethod.format_name(self.modulename, self.methodname) + + def __repr__(self): + return str(self) + + @staticmethod + def format_name(modulename, methodname): + return f"{modulename}.{methodname}" def resolve_dotnet_token(pe: dnfile.dnPE, token: Token) -> Any: @@ -86,12 +150,14 @@ def read_dotnet_user_string(pe: dnfile.dnPE, token: StringToken) -> Optional[str except UnicodeDecodeError as e: logger.warn("failed to decode #US stream index 0x%08x (%s)" % (token.rid, e)) return None + if user_string is None: return None + return user_string.value -def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: +def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnMethod]: """get managed imports from MemberRef table see https://www.ntcore.com/files/dotnetformat.htm @@ -105,65 +171,34 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: TypeName (index into String heap) TypeNamespace (index into String heap) """ - if not is_dotnet_table_valid(pe, "MemberRef"): - return - - for (rid, row) in enumerate(pe.net.mdtables.MemberRef): - if not isinstance(row.Class.row, (dnfile.mdtable.TypeRefRow,)): + for (rid, row) in enumerate(iter_dotnet_table(pe, "MemberRef")): + if not isinstance(row.Class.row, dnfile.mdtable.TypeRefRow): continue - # like File::OpenRead - name = f"{row.Class.row.TypeName}::{row.Name}" - - # ECMA II.22.38: TypeNamespace can be null or non-null - if row.Class.row.TypeNamespace: - # like System.IO.File::OpenRead - name = f"{row.Class.row.TypeNamespace}.{name}" - token: int = calculate_dotnet_token_value(pe.net.mdtables.MemberRef.number, rid + 1) - - yield token, name + yield DnMethod(token, row.Class.row.TypeNamespace, row.Class.row.TypeName, row.Name) -def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: - """get unmanaged imports from ImplMap table +def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnMethod]: + """get managed method names from TypeDef table see https://www.ntcore.com/files/dotnetformat.htm - 28 - ImplMap Table - ImplMap table holds information about unmanaged methods that can be reached from managed code, using PInvoke dispatch - MemberForwarded (index into the Field or MethodDef table; more precisely, a MemberForwarded coded index) - ImportName (index into the String heap) - ImportScope (index into the ModuleRef table) + 02 - TypeDef Table + Each row represents a class in the current assembly. + TypeName (index into String heap) + TypeNamespace (index into String heap) + MethodList (index into MethodDef table; it marks the first of a continguous run of Methods owned by this Type) """ - if not is_dotnet_table_valid(pe, "ImplMap"): - return - - for row in pe.net.mdtables.ImplMap: - dll: str = row.ImportScope.row.Name - symbol: str = row.ImportName - - # ECMA says "Each row of the ImplMap table associates a row in the MethodDef table (MemberForwarded) with the - # name of a routine (ImportName) in some unmanaged DLL (ImportScope)"; so we calculate and map the MemberForwarded - # MethodDef table token to help us later record native import method calls made from CIL - token: int = calculate_dotnet_token_value(row.MemberForwarded.table.number, row.MemberForwarded.row_index) - - # like Kernel32.dll - if dll and "." in dll: - dll = dll.split(".")[0] - - # like kernel32.CreateFileA - name: str = f"{dll}.{symbol}" - - yield token, name + for row in iter_dotnet_table(pe, "TypeDef"): + for index in row.MethodList: + token = calculate_dotnet_token_value(index.table.number, index.row_index) + yield DnMethod(token, row.TypeNamespace, row.TypeName, index.row.Name) def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[CilMethodBody]: """get managed methods from MethodDef table""" - if not is_dotnet_table_valid(pe, "MethodDef"): - return - - for row in pe.net.mdtables.MethodDef: + for row in iter_dotnet_table(pe, "MethodDef"): if not row.ImplFlags.miIL or any((row.Flags.mdAbstract, row.Flags.mdPinvokeImpl)): # skip methods that do not have a method body continue @@ -175,38 +210,48 @@ def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[CilMethodBody] yield body +def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[DnUnmanagedMethod]: + """get unmanaged imports from ImplMap table + + see https://www.ntcore.com/files/dotnetformat.htm + + 28 - ImplMap Table + ImplMap table holds information about unmanaged methods that can be reached from managed code, using PInvoke dispatch + MemberForwarded (index into the Field or MethodDef table; more precisely, a MemberForwarded coded index) + ImportName (index into the String heap) + ImportScope (index into the ModuleRef table) + """ + for row in iter_dotnet_table(pe, "ImplMap"): + modulename: str = row.ImportScope.row.Name + methodname: str = row.ImportName + + # ECMA says "Each row of the ImplMap table associates a row in the MethodDef table (MemberForwarded) with the + # name of a routine (ImportName) in some unmanaged DLL (ImportScope)"; so we calculate and map the MemberForwarded + # MethodDef table token to help us later record native import method calls made from CIL + token: int = calculate_dotnet_token_value(row.MemberForwarded.table.number, row.MemberForwarded.row_index) + + # like Kernel32.dll + if modulename and "." in modulename: + modulename = modulename.split(".")[0] + + # like kernel32.CreateFileA + yield DnUnmanagedMethod(token, modulename, methodname) + + +def calculate_dotnet_token_value(table: int, rid: int) -> int: + return ((table & 0xFF) << Token.TABLE_SHIFT) | (rid & Token.RID_MASK) + + def is_dotnet_table_valid(pe: dnfile.dnPE, table_name: str) -> bool: return bool(getattr(pe.net.mdtables, table_name, None)) -def get_dotnet_managed_method_names(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: - """get managed method names from TypeDef table - - see https://www.ntcore.com/files/dotnetformat.htm - - 02 - TypeDef Table - Each row represents a class in the current assembly. - TypeName (index into String heap) - TypeNamespace (index into String heap) - MethodList (index into MethodDef table; it marks the first of a continguous run of Methods owned by this Type) - """ - if not is_dotnet_table_valid(pe, "TypeDef"): - return - - for row in pe.net.mdtables.TypeDef: - for index in row.MethodList: - # like File::OpenRead - name = f"{row.TypeName}::{index.row.Name}" - - # ECMA II.22.37: TypeNamespace can be null or non-null - if row.TypeNamespace: - # like System.IO.File::OpenRead - name = f"{row.TypeNamespace}.{name}" - - token = calculate_dotnet_token_value(index.table.number, index.row_index) - - yield token, name - - def is_dotnet_mixed_mode(pe: dnfile.dnPE) -> bool: return not bool(pe.net.Flags.CLR_ILONLY) + + +def iter_dotnet_table(pe: dnfile.dnPE, name: str) -> Iterator[Any]: + if not is_dotnet_table_valid(pe, name): + return + for row in getattr(pe.net.mdtables, name): + yield row diff --git a/capa/features/extractors/dnfile/insn.py b/capa/features/extractors/dnfile/insn.py index 5974cde1..974ffa61 100644 --- a/capa/features/extractors/dnfile/insn.py +++ b/capa/features/extractors/dnfile/insn.py @@ -8,7 +8,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Dict, Tuple, Iterator, Optional +from typing import TYPE_CHECKING, Any, Dict, Tuple, Union, Iterator, Optional if TYPE_CHECKING: from dncil.cil.instruction import Instruction @@ -16,55 +16,58 @@ if TYPE_CHECKING: from capa.features.common import Feature import dnfile -from dncil.clr.token import StringToken, InvalidToken +from dncil.clr.token import Token, StringToken, InvalidToken from dncil.cil.opcode import OpCodes import capa.features.extractors.helpers from capa.features.insn import API, Number -from capa.features.common import String, Characteristic +from capa.features.common import Class, String, Namespace, Characteristic from capa.features.extractors.dnfile.helpers import ( + DnClass, + DnMethod, + DnUnmanagedMethod, resolve_dotnet_token, read_dotnet_user_string, get_dotnet_managed_imports, + get_dotnet_managed_methods, get_dotnet_unmanaged_imports, - get_dotnet_managed_method_names, ) def get_managed_imports(ctx: Dict) -> Dict: if "managed_imports_cache" not in ctx: ctx["managed_imports_cache"] = {} - for (token, name) in get_dotnet_managed_imports(ctx["pe"]): - ctx["managed_imports_cache"][token] = name + for method in get_dotnet_managed_imports(ctx["pe"]): + ctx["managed_imports_cache"][method.token] = method return ctx["managed_imports_cache"] def get_unmanaged_imports(ctx: Dict) -> Dict: if "unmanaged_imports_cache" not in ctx: ctx["unmanaged_imports_cache"] = {} - for (token, name) in get_dotnet_unmanaged_imports(ctx["pe"]): - ctx["unmanaged_imports_cache"][token] = name + for imp in get_dotnet_unmanaged_imports(ctx["pe"]): + ctx["unmanaged_imports_cache"][imp.token] = imp return ctx["unmanaged_imports_cache"] def get_methods(ctx: Dict) -> Dict: if "methods_cache" not in ctx: ctx["methods_cache"] = {} - for (token, name) in get_dotnet_managed_method_names(ctx["pe"]): - ctx["methods_cache"][token] = name + for method in get_dotnet_managed_methods(ctx["pe"]): + ctx["methods_cache"][method.token] = method return ctx["methods_cache"] -def get_callee_name(ctx: Dict, token: int) -> str: - """map dotnet token to method name""" - name: str = get_managed_imports(ctx).get(token, "") - if not name: +def get_callee(ctx: Dict, token: int) -> Union[DnMethod, DnUnmanagedMethod, None]: + """map dotnet token to un/managed method""" + callee: Union[DnMethod, DnUnmanagedMethod, None] = get_managed_imports(ctx).get(token, None) + if not callee: # we must check unmanaged imports before managed methods because we map forwarded managed methods # to their unmanaged imports; we prefer a forwarded managed method be mapped to its unmanaged import for analysis - name = get_unmanaged_imports(ctx).get(token, "") - if not name: - name = get_methods(ctx).get(token, "") - return name + callee = get_unmanaged_imports(ctx).get(token, None) + if not callee: + callee = get_methods(ctx).get(token, None) + return callee def extract_insn_api_features(f: CilMethodBody, bb: CilMethodBody, insn: Instruction) -> Iterator[Tuple[API, int]]: @@ -72,18 +75,51 @@ def extract_insn_api_features(f: CilMethodBody, bb: CilMethodBody, insn: Instruc if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli): return - name: str = get_callee_name(f.ctx, insn.operand.value) - if not name: + callee: Union[DnMethod, DnUnmanagedMethod, None] = get_callee(f.ctx, insn.operand.value) + if callee is None: return - if "::" in name: - # like System.IO.File::OpenRead - yield API(name), insn.offset - else: + if isinstance(callee, DnUnmanagedMethod): # like kernel32.CreateFileA - dll, _, symbol = name.rpartition(".") - for name_variant in capa.features.extractors.helpers.generate_symbols(dll, symbol): - yield API(name_variant), insn.offset + for name in capa.features.extractors.helpers.generate_symbols(callee.modulename, callee.methodname): + yield API(name), insn.offset + else: + # like System.IO.File::Delete + yield API(str(callee)), insn.offset + + +def extract_insn_class_features(f: CilMethodBody, bb: CilMethodBody, insn: Instruction) -> Iterator[Tuple[Class, int]]: + """parse instruction class features""" + if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli): + return + + row: Any = resolve_dotnet_token(f.ctx["pe"], Token(insn.operand.value)) + + if not isinstance(row, dnfile.mdtable.MemberRefRow): + return + if not isinstance(row.Class.row, (dnfile.mdtable.TypeRefRow, dnfile.mdtable.TypeDefRow)): + return + + yield Class(DnClass.format_name(row.Class.row.TypeNamespace, row.Class.row.TypeName)), insn.offset + + +def extract_insn_namespace_features( + f: CilMethodBody, bb: CilMethodBody, insn: Instruction +) -> Iterator[Tuple[Namespace, int]]: + """parse instruction namespace features""" + if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli): + return + + row: Any = resolve_dotnet_token(f.ctx["pe"], Token(insn.operand.value)) + + if not isinstance(row, dnfile.mdtable.MemberRefRow): + return + if not isinstance(row.Class.row, (dnfile.mdtable.TypeRefRow, dnfile.mdtable.TypeDefRow)): + return + if not row.Class.row.TypeNamespace: + return + + yield Namespace(row.Class.row.TypeNamespace), insn.offset def extract_insn_number_features( @@ -138,5 +174,7 @@ INSTRUCTION_HANDLERS = ( extract_insn_api_features, extract_insn_number_features, extract_insn_string_features, + extract_insn_namespace_features, + extract_insn_class_features, extract_unmanaged_call_characteristic_features, ) diff --git a/capa/features/extractors/dotnetfile.py b/capa/features/extractors/dotnetfile.py index 965c4ea4..e43d75f3 100644 --- a/capa/features/extractors/dotnetfile.py +++ b/capa/features/extractors/dotnetfile.py @@ -1,4 +1,5 @@ import logging +import itertools from typing import Tuple, Iterator import dnfile @@ -14,18 +15,23 @@ from capa.features.common import ( ARCH_AMD64, FORMAT_DOTNET, Arch, + Class, Format, String, Feature, + Namespace, Characteristic, ) from capa.features.extractors.base_extractor import FeatureExtractor from capa.features.extractors.dnfile.helpers import ( + DnClass, + DnMethod, + iter_dotnet_table, is_dotnet_mixed_mode, get_dotnet_managed_imports, + get_dotnet_managed_methods, calculate_dotnet_token_value, get_dotnet_unmanaged_imports, - get_dotnet_managed_method_names, ) logger = logging.getLogger(__name__) @@ -36,20 +42,50 @@ def extract_file_format(**kwargs) -> Iterator[Tuple[Format, int]]: def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Import, int]]: - for (token, name) in get_dotnet_managed_imports(pe): + for method in get_dotnet_managed_imports(pe): # like System.IO.File::OpenRead - yield Import(name), token + yield Import(str(method)), method.token - for (token, name) in get_dotnet_unmanaged_imports(pe): + for imp in get_dotnet_unmanaged_imports(pe): # like kernel32.CreateFileA - dll, _, symbol = name.rpartition(".") - for name_variant in capa.features.extractors.helpers.generate_symbols(dll, symbol): - yield Import(name_variant), token + for name in capa.features.extractors.helpers.generate_symbols(imp.modulename, imp.methodname): + yield Import(name), imp.token def extract_file_function_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[FunctionName, int]]: - for (token, name) in get_dotnet_managed_method_names(pe): - yield FunctionName(name), token + for method in get_dotnet_managed_methods(pe): + yield FunctionName(str(method)), method.token + + +def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Namespace, int]]: + """emit namespace features from TypeRef and TypeDef tables""" + + # namespaces may be referenced multiple times, so we need to filter + namespaces = set() + + for row in iter_dotnet_table(pe, "TypeDef"): + namespaces.add(row.TypeNamespace) + + for row in iter_dotnet_table(pe, "TypeRef"): + namespaces.add(row.TypeNamespace) + + # namespaces may be empty, discard + namespaces.discard("") + + for namespace in namespaces: + # namespace do not have an associated token, so we yield 0x0 + yield Namespace(namespace), 0x0 + + +def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Class, int]]: + """emit class features from TypeRef and TypeDef tables""" + for (rid, row) in enumerate(iter_dotnet_table(pe, "TypeDef")): + token = calculate_dotnet_token_value(pe.net.mdtables.TypeDef.number, rid + 1) + yield Class(DnClass.format_name(row.TypeNamespace, row.TypeName)), token + + for (rid, row) in enumerate(iter_dotnet_table(pe, "TypeRef")): + token = calculate_dotnet_token_value(pe.net.mdtables.TypeRef.number, rid + 1) + yield Class(DnClass.format_name(row.TypeNamespace, row.TypeName)), token def extract_file_os(**kwargs) -> Iterator[Tuple[OS, int]]: @@ -71,7 +107,7 @@ def extract_file_strings(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[String, in yield from capa.features.extractors.common.extract_file_strings(pe.__data__) -def extract_mixed_mode_characteristic_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Characteristic, int]]: +def extract_file_mixed_mode_characteristic_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Characteristic, int]]: if is_dotnet_mixed_mode(pe): yield Characteristic("mixed mode"), 0x0 @@ -87,7 +123,9 @@ FILE_HANDLERS = ( extract_file_function_names, extract_file_strings, extract_file_format, - extract_mixed_mode_characteristic_features, + extract_file_mixed_mode_characteristic_features, + extract_file_namespace_features, + extract_file_class_features, ) diff --git a/capa/rules.py b/capa/rules.py index 1421c70a..ca824728 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -101,6 +101,8 @@ SUPPORTED_FEATURES: Dict[str, Set] = { capa.features.common.Characteristic("embedded pe"), capa.features.common.String, capa.features.common.Format, + capa.features.common.Class, + capa.features.common.Namespace, capa.features.common.Characteristic("mixed mode"), }, FUNCTION_SCOPE: { @@ -136,6 +138,8 @@ SUPPORTED_FEATURES: Dict[str, Set] = { capa.features.common.Characteristic("call $+5"), capa.features.common.Characteristic("cross section flow"), capa.features.common.Characteristic("unmanaged call"), + capa.features.common.Class, + capa.features.common.Namespace, }, } @@ -289,8 +293,11 @@ def parse_feature(key: str): elif key == "format": return capa.features.common.Format elif key == "arch": - return capa.features.common.Arch + elif key == "class": + return capa.features.common.Class + elif key == "namespace": + return capa.features.common.Namespace else: raise InvalidRule("unexpected statement: %s" % key) diff --git a/tests/fixtures.py b/tests/fixtures.py index a3ff826e..ca9e8629 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -679,8 +679,13 @@ FEATURE_PRESENCE_TESTS_DOTNET = sorted( ("hello-world", "file", capa.features.file.FunctionName("HelloWorld::.ctor"), True), ("hello-world", "file", capa.features.file.FunctionName("HelloWorld::.cctor"), False), ("hello-world", "file", capa.features.common.String("Hello World!"), True), + ("hello-world", "file", capa.features.common.Class("HelloWorld"), True), + ("hello-world", "file", capa.features.common.Class("System.Console"), True), + ("hello-world", "file", capa.features.common.Namespace("System.Diagnostics"), True), ("hello-world", "function=0x250", capa.features.common.String("Hello World!"), True), ("hello-world", "function=0x250, bb=0x250, insn=0x252", capa.features.common.String("Hello World!"), True), + ("hello-world", "function=0x250, bb=0x250, insn=0x257", capa.features.common.Class("System.Console"), True), + ("hello-world", "function=0x250, bb=0x250, insn=0x257", capa.features.common.Namespace("System"), True), ("hello-world", "function=0x250", capa.features.insn.API("System.Console::WriteLine"), True), ("hello-world", "file", capa.features.file.Import("System.Console::WriteLine"), True), ("_1c444", "file", capa.features.common.String(r"SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall"), True), From 02444d801ee4e8fc196695c92424b7aa5a201ce0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 30 May 2022 14:14:14 +0000 Subject: [PATCH 11/22] build(deps-dev): bump mypy from 0.950 to 0.960 Bumps [mypy](https://github.com/python/mypy) from 0.950 to 0.960. - [Release notes](https://github.com/python/mypy/releases) - [Commits](https://github.com/python/mypy/compare/v0.950...v0.960) --- updated-dependencies: - dependency-name: mypy dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 635e02b9..8ef02774 100644 --- a/setup.py +++ b/setup.py @@ -75,7 +75,7 @@ setuptools.setup( "pycodestyle==2.8.0", "black==22.3.0", "isort==5.10.1", - "mypy==0.950", + "mypy==0.960", "psutil==5.9.1", "stix2==3.0.1", "requests==2.27.1", From 2f519cba303948ddab515bfb231166ce7c945514 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 30 May 2022 14:14:17 +0000 Subject: [PATCH 12/22] build(deps-dev): bump types-requests from 2.27.27 to 2.27.29 Bumps [types-requests](https://github.com/python/typeshed) from 2.27.27 to 2.27.29. - [Release notes](https://github.com/python/typeshed/releases) - [Commits](https://github.com/python/typeshed/commits) --- updated-dependencies: - dependency-name: types-requests dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 635e02b9..7250058f 100644 --- a/setup.py +++ b/setup.py @@ -86,7 +86,7 @@ setuptools.setup( "types-tabulate==0.8.9", "types-termcolor==1.1.4", "types-psutil==5.8.22", - "types_requests==2.27.27", + "types_requests==2.27.29", ], "build": [ "pyinstaller==5.0.1", From 44a5dc0cd0eb6dd23aea82c6e19bd63e4147d84a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 31 May 2022 09:18:38 +0000 Subject: [PATCH 13/22] build(deps-dev): bump pyinstaller from 5.0.1 to 5.1 Bumps [pyinstaller](https://github.com/pyinstaller/pyinstaller) from 5.0.1 to 5.1. - [Release notes](https://github.com/pyinstaller/pyinstaller/releases) - [Changelog](https://github.com/pyinstaller/pyinstaller/blob/develop/doc/CHANGES.rst) - [Commits](https://github.com/pyinstaller/pyinstaller/compare/v5.0.1...v5.1) --- updated-dependencies: - dependency-name: pyinstaller dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e1849b31..3cc5461f 100644 --- a/setup.py +++ b/setup.py @@ -89,7 +89,7 @@ setuptools.setup( "types_requests==2.27.29", ], "build": [ - "pyinstaller==5.0.1", + "pyinstaller==5.1", ], }, zip_safe=False, From 3d0a59cf747b33864344b7ffe0fed45bb8a4959b Mon Sep 17 00:00:00 2001 From: Moritz Date: Tue, 31 May 2022 13:29:04 +0200 Subject: [PATCH 14/22] Update pyinstaller.spec (#1042) * Update pyinstaller.spec --- .github/pyinstaller/pyinstaller.spec | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/pyinstaller/pyinstaller.spec b/.github/pyinstaller/pyinstaller.spec index f1c4fb1b..5cd7b18b 100644 --- a/.github/pyinstaller/pyinstaller.spec +++ b/.github/pyinstaller/pyinstaller.spec @@ -95,7 +95,7 @@ exe = EXE(pyz, a.datas, exclude_binaries=False, name='capa', - # TODO not working anymore for unknown reason icon='logo.ico', + icon='logo.ico', debug=False, strip=None, upx=True, @@ -109,4 +109,4 @@ exe = EXE(pyz, # a.datas, # strip=None, # upx=True, -# name='capa-dat') \ No newline at end of file +# name='capa-dat') From 2f47fddda94f0d48a3732ca45dc41afad55509be Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Tue, 31 May 2022 13:42:34 +0000 Subject: [PATCH 15/22] Sync capa rules submodule --- rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules b/rules index eaa568ab..1fee68e7 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit eaa568abf6c71b306eb66626b584d08ed1f26b64 +Subproject commit 1fee68e72e5da3cf05cbf349d8b4df13dcf47a85 From cb703aea18b90c5eb4be90060dee63d796f4273b Mon Sep 17 00:00:00 2001 From: Moritz Date: Tue, 31 May 2022 22:33:53 +0200 Subject: [PATCH 16/22] Update build.yml --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 022fc0ee..ca56d4e3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -38,7 +38,7 @@ jobs: - if: matrix.os == 'ubuntu-18.04' run: sudo apt-get install -y libyaml-dev - name: Upgrade pip, setuptools - run: pip install --upgrade pip setuptools + run: python -m pip install --upgrade pip setuptools - name: Install capa with build requirements run: pip install -e .[build] - name: Build standalone executable From 486ffed4bd0f6e3e800ecde8312ba9bbcfbc34c6 Mon Sep 17 00:00:00 2001 From: Zander Work Date: Tue, 31 May 2022 16:35:28 -0400 Subject: [PATCH 17/22] ignore .git/ when loading capa rules (#1038) * ignore .git/ when loading capa rules * moved check outside of for loop --- capa/ida/plugin/form.py | 4 +++- capa/main.py | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/capa/ida/plugin/form.py b/capa/ida/plugin/form.py index d7a2a679..a2cc5beb 100644 --- a/capa/ida/plugin/form.py +++ b/capa/ida/plugin/form.py @@ -650,10 +650,12 @@ class CapaExplorerForm(idaapi.PluginForm): rule_paths.append(rule_path) elif os.path.isdir(rule_path): for root, dirs, files in os.walk(rule_path): - if ".github" in root: + if ".git" in root: # the .github directory contains CI config in capa-rules # this includes some .yml files # these are not rules + # additionally, .git has files that are not .yml and generate the warning + # skip those too continue for file in files: if not file.endswith(".yml"): diff --git a/capa/main.py b/capa/main.py index 016f3fd0..8fe4085f 100644 --- a/capa/main.py +++ b/capa/main.py @@ -583,12 +583,13 @@ def get_rules(rule_paths: List[str], disable_progress=False) -> List[Rule]: elif os.path.isdir(rule_path): logger.debug("reading rules from directory %s", rule_path) for root, dirs, files in os.walk(rule_path): - if ".github" in root: + if ".git" in root: # the .github directory contains CI config in capa-rules # this includes some .yml files # these are not rules + # additionally, .git has files that are not .yml and generate the warning + # skip those too continue - for file in files: if not file.endswith(".yml"): if not (file.startswith(".git") or file.endswith((".git", ".md", ".txt"))): @@ -596,7 +597,6 @@ def get_rules(rule_paths: List[str], disable_progress=False) -> List[Rule]: # other things maybe are rules, but are mis-named. logger.warning("skipping non-.yml file: %s", file) continue - rule_path = os.path.join(root, file) rule_file_paths.append(rule_path) From 1df60186f0b1c0795da5ce69adf106a0b2d4972c Mon Sep 17 00:00:00 2001 From: Moritz Raabe Date: Tue, 31 May 2022 23:05:13 +0200 Subject: [PATCH 18/22] fix!: authors instead of author --- capa/ida/plugin/view.py | 3 ++- capa/rules.py | 8 +++++++- scripts/lint.py | 15 +++++++-------- tests/test_fmt.py | 18 ++++++++++++------ tests/test_rules.py | 9 ++++++--- 5 files changed, 34 insertions(+), 19 deletions(-) diff --git a/capa/ida/plugin/view.py b/capa/ida/plugin/view.py index 3a1d324d..e76cc265 100644 --- a/capa/ida/plugin/view.py +++ b/capa/ida/plugin/view.py @@ -196,7 +196,8 @@ class CapaExplorerRulgenPreview(QtWidgets.QTextEdit): " meta:", " name: ", " namespace: ", - " author: %s" % author, + " authors:", + " - %s" % author, " scope: %s" % scope, " references: ", " examples:", diff --git a/capa/rules.py b/capa/rules.py index ca824728..9ca630ff 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -54,7 +54,7 @@ META_KEYS = ( "maec/malware-family", "maec/malware-category", "maec/malware-category-ov", - "author", + "authors", "description", "lib", "scope", @@ -1298,6 +1298,12 @@ class RuleSet: logger.debug('using rule "%s" and dependencies, found tag in meta.%s: %s', rule.name, k, v) rules_filtered.update(set(capa.rules.get_rules_and_dependencies(rules, rule.name))) break + if isinstance(v, list): + for vv in v: + if tag in vv: + logger.debug('using rule "%s" and dependencies, found tag in meta.%s: %s', rule.name, k, vv) + rules_filtered.update(set(capa.rules.get_rules_and_dependencies(rules, rule.name))) + break return RuleSet(list(rules_filtered)) def match(self, scope: Scope, features: FeatureSet, va: int) -> Tuple[FeatureSet, ceng.MatchResults]: diff --git a/scripts/lint.py b/scripts/lint.py index 553e9f2e..56dbec2e 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -43,9 +43,8 @@ import capa.rules import capa.engine import capa.helpers import capa.features.insn -import capa.features.common from capa.rules import Rule, RuleSet -from capa.features.common import Feature +from capa.features.common import String, Feature, Substring logger = logging.getLogger("lint") @@ -168,12 +167,12 @@ class InvalidScope(Lint): return rule.meta.get("scope") not in ("file", "function", "basic block", "instruction") -class MissingAuthor(Lint): - name = "missing author" - recommendation = "Add meta.author so that users know who to contact with questions" +class MissingAuthors(Lint): + name = "missing authors" + recommendation = "Add meta.authors so that users know who to contact with questions" def check_rule(self, ctx: Context, rule: Rule): - return "author" not in rule.meta + return "authors" not in rule.meta class MissingExamples(Lint): @@ -490,7 +489,7 @@ class FeatureStringTooShort(Lint): def check_features(self, ctx: Context, features: List[Feature]): for feature in features: - if isinstance(feature, (capa.features.common.String, capa.features.common.Substring)): + if isinstance(feature, (String, Substring)): assert isinstance(feature.value, str) if len(feature.value) < 4: self.recommendation = self.recommendation.format(feature.value) @@ -697,7 +696,7 @@ def lint_scope(ctx: Context, rule: Rule): META_LINTS = ( MissingNamespace(), NamespaceDoesntMatchRulePath(), - MissingAuthor(), + MissingAuthors(), MissingExamples(), MissingExampleOffset(), ExampleFileDNE(), diff --git a/tests/test_fmt.py b/tests/test_fmt.py index 92bd4ffa..de96a1f4 100644 --- a/tests/test_fmt.py +++ b/tests/test_fmt.py @@ -15,7 +15,8 @@ EXPECTED = textwrap.dedent( rule: meta: name: test rule - author: user@domain.com + authors: + - user@domain.com scope: function examples: - foo1234 @@ -38,7 +39,8 @@ def test_rule_reformat_top_level_elements(): - number: 2 meta: name: test rule - author: user@domain.com + authors: + - user@domain.com scope: function examples: - foo1234 @@ -55,7 +57,8 @@ def test_rule_reformat_indentation(): rule: meta: name: test rule - author: user@domain.com + authors: + - user@domain.com scope: function examples: - foo1234 @@ -75,7 +78,8 @@ def test_rule_reformat_order(): """ rule: meta: - author: user@domain.com + authors: + - user@domain.com examples: - foo1234 - bar5678 @@ -98,7 +102,8 @@ def test_rule_reformat_meta_update(): """ rule: meta: - author: user@domain.com + authors: + - user@domain.com examples: - foo1234 - bar5678 @@ -124,7 +129,8 @@ def test_rule_reformat_string_description(): rule: meta: name: test rule - author: user@domain.com + authors: + - user@domain.com scope: function features: - and: diff --git a/tests/test_rules.py b/tests/test_rules.py index 68afea7d..1f221e45 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -42,7 +42,8 @@ def test_rule_yaml(): rule: meta: name: test rule - author: user@domain.com + authors: + - user@domain.com scope: function examples: - foo1234 @@ -724,7 +725,8 @@ def test_filter_rules(): rule: meta: name: rule 1 - author: joe + authors: + - joe features: - api: CreateFile """ @@ -803,7 +805,8 @@ def test_filter_rules_missing_dependency(): rule: meta: name: rule 1 - author: joe + authors: + - joe features: - match: rule 2 """ From 781ec743106ecc6c4f368d31c46578ec1ef2923a Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Wed, 1 Jun 2022 12:12:01 +0000 Subject: [PATCH 19/22] Sync capa-testfiles submodule --- tests/data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data b/tests/data index e7ffa167..66200b7a 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit e7ffa1671778309dfbaed1c9d2389b69addc4e5c +Subproject commit 66200b7ac474f4abf307f05ea10c3885c322080e From bc8c4a032393ef9b62f36e05d91a0fe97c24ca19 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Wed, 1 Jun 2022 12:13:22 +0000 Subject: [PATCH 20/22] Sync capa rules submodule --- rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules b/rules index 1fee68e7..c90bfeab 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 1fee68e72e5da3cf05cbf349d8b4df13dcf47a85 +Subproject commit c90bfeaba25c60257a4416dc32a350871ebae331 From 43dca13f26514f92b1c23ed1df03aed105c266d2 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Fri, 3 Jun 2022 17:16:28 +0000 Subject: [PATCH 21/22] Sync capa rules submodule --- rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules b/rules index c90bfeab..20c46c64 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit c90bfeaba25c60257a4416dc32a350871ebae331 +Subproject commit 20c46c64c867c38699e8db4826558db97d31f753 From 6cb4493b8e3c2f81e76b755e878d8b47a80a8143 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Mon, 6 Jun 2022 19:18:31 +0000 Subject: [PATCH 22/22] Sync capa rules submodule --- rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules b/rules index 20c46c64..b96b279d 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 20c46c64c867c38699e8db4826558db97d31f753 +Subproject commit b96b279d81091a439a760ffb7495e0413723ac4f