Merge branch 'master' into feature-981

This commit is contained in:
Willi Ballenthin
2022-06-06 14:07:51 -06:00
25 changed files with 479 additions and 187 deletions

21
.devcontainer/Dockerfile Normal file
View File

@@ -0,0 +1,21 @@
# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.233.0/containers/python-3/.devcontainer/base.Dockerfile
# [Choice] Python version (use -bullseye variants on local arm64/Apple Silicon): 3, 3.10, 3.9, 3.8, 3.7, 3.6, 3-bullseye, 3.10-bullseye, 3.9-bullseye, 3.8-bullseye, 3.7-bullseye, 3.6-bullseye, 3-buster, 3.10-buster, 3.9-buster, 3.8-buster, 3.7-buster, 3.6-buster
ARG VARIANT="3.10-bullseye"
FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT}
# [Choice] Node.js version: none, lts/*, 16, 14, 12, 10
ARG NODE_VERSION="none"
RUN if [ "${NODE_VERSION}" != "none" ]; then su vscode -c "umask 0002 && . /usr/local/share/nvm/nvm.sh && nvm install ${NODE_VERSION} 2>&1"; fi
# [Optional] If your pip requirements rarely change, uncomment this section to add them to the image.
# COPY requirements.txt /tmp/pip-tmp/
# RUN pip3 --disable-pip-version-check --no-cache-dir install -r /tmp/pip-tmp/requirements.txt \
# && rm -rf /tmp/pip-tmp
# [Optional] Uncomment this section to install additional OS packages.
# RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
# && apt-get -y install --no-install-recommends <your-package-list-here>
# [Optional] Uncomment this line to install global node packages.
# RUN su vscode -c "source /usr/local/share/nvm/nvm.sh && npm install -g <your-package-here>" 2>&1

View File

@@ -0,0 +1,51 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at:
// https://github.com/microsoft/vscode-dev-containers/tree/v0.233.0/containers/python-3
{
"name": "Python 3",
"build": {
"dockerfile": "Dockerfile",
"context": "..",
"args": {
// Update 'VARIANT' to pick a Python version: 3, 3.10, 3.9, 3.8, 3.7, 3.6
// Append -bullseye or -buster to pin to an OS version.
// Use -bullseye variants on local on arm64/Apple Silicon.
"VARIANT": "3.10",
// Options
"NODE_VERSION": "none"
}
},
// Set *default* container specific settings.json values on container create.
"settings": {
"python.defaultInterpreterPath": "/usr/local/bin/python",
"python.linting.enabled": true,
"python.linting.pylintEnabled": true,
"python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8",
"python.formatting.blackPath": "/usr/local/py-utils/bin/black",
"python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf",
"python.linting.banditPath": "/usr/local/py-utils/bin/bandit",
"python.linting.flake8Path": "/usr/local/py-utils/bin/flake8",
"python.linting.mypyPath": "/usr/local/py-utils/bin/mypy",
"python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle",
"python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle",
"python.linting.pylintPath": "/usr/local/py-utils/bin/pylint"
},
// Add the IDs of extensions you want installed when the container is created.
"extensions": [
"ms-python.python",
"ms-python.vscode-pylance"
],
// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],
// Use 'postCreateCommand' to run commands after the container is created.
"postCreateCommand": "git submodule update --init && pip3 install --user -e .[dev]",
// Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root.
"remoteUser": "vscode",
"features": {
"git": "latest"
}
}

View File

@@ -95,7 +95,7 @@ exe = EXE(pyz,
a.datas,
exclude_binaries=False,
name='capa',
# TODO not working anymore for unknown reason icon='logo.ico',
icon='logo.ico',
debug=False,
strip=None,
upx=True,

View File

@@ -38,7 +38,7 @@ jobs:
- if: matrix.os == 'ubuntu-18.04'
run: sudo apt-get install -y libyaml-dev
- name: Upgrade pip, setuptools
run: pip install --upgrade pip setuptools
run: python -m pip install --upgrade pip setuptools
- name: Install capa with build requirements
run: pip install -e .[build]
- name: Build standalone executable

View File

@@ -13,6 +13,7 @@
- add file function-name extraction for dotnet files #1015 @mike-hunhoff
- add unmanaged call characteristic for dotnet files #1023 @mike-hunhoff
- add mixed mode characteristic feature extraction for dotnet files #1024 @mike-hunhoff
- emit class and namespace features for dotnet files #1030 @mike-hunhoff
- render: support Addresses that aren't simple integers, like .NET token+offset #981 @williballenthin
### Breaking Changes
@@ -24,7 +25,7 @@
- extractors must use handles to identify functions/basic blocks/instructions #981 @williballenthin
- the freeze file format schema was updated, including format version bump to v2 #986 @williballenthin
### New Rules (6)
### New Rules (7)
- data-manipulation/encryption/aes/manually-build-aes-constants huynh.t.nhan@gmail.com
- nursery/get-process-image-filename michael.hunhoff@mandiant.com
@@ -32,10 +33,12 @@
- compiler/zig/compiled-with-zig jakub.jozwiak@mandiant.com
- anti-analysis/packer/huan/packed-with-huan jakub.jozwiak@mandiant.com
- internal/limitation/file/internal-dotnet-file-limitation william.ballenthin@mandiant.com
- nursery/get-os-information-via-kuser_shared_data @mr-tz
-
### Bug Fixes
- improve handling _ prefix compile/link artifact #924 @mike-hunhoff
- better detect OS in ELF samples #988 @williballenthin
### capa explorer IDA Pro plugin
- improve file format extraction #918 @mike-hunhoff

View File

@@ -2,7 +2,7 @@
[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa)
[![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases)
[![Number of rules](https://img.shields.io/badge/rules-665-blue.svg)](https://github.com/mandiant/capa-rules)
[![Number of rules](https://img.shields.io/badge/rules-666-blue.svg)](https://github.com/mandiant/capa-rules)
[![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster)
[![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases)
[![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt)

View File

@@ -162,6 +162,16 @@ class String(Feature):
super(String, self).__init__(value, description=description)
class Class(Feature):
def __init__(self, value: str, description=None):
super(Class, self).__init__(value, description=description)
class Namespace(Feature):
def __init__(self, value: str, description=None):
super(Namespace, self).__init__(value, description=description)
class Substring(String):
def __init__(self, value: str, description=None):
super(Substring, self).__init__(value, description=description)

View File

@@ -8,12 +8,7 @@
from __future__ import annotations
from typing import TYPE_CHECKING, List, Tuple, Iterator
from capa.features.address import Address, DNTokenAddress, DNTokenOffsetAddress, AbsoluteVirtualAddress
if TYPE_CHECKING:
from capa.features.common import Feature
from typing import List, Tuple, Iterator
import dnfile
from dncil.clr.token import Token
@@ -21,6 +16,8 @@ from dncil.clr.token import Token
import capa.features.extractors
import capa.features.extractors.dnfile.file
import capa.features.extractors.dnfile.insn
from capa.features.common import Feature
from capa.features.address import Address, DNTokenAddress, DNTokenOffsetAddress, AbsoluteVirtualAddress
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
from capa.features.extractors.dnfile.helpers import get_dotnet_managed_method_bodies

View File

@@ -8,15 +8,14 @@
from __future__ import annotations
from typing import TYPE_CHECKING, Tuple, Iterator
from typing import Tuple, Iterator
if TYPE_CHECKING:
import dnfile
from capa.features.common import Feature, Format, String, Characteristic
from capa.features.file import Import, FunctionName
from capa.features.address import Address
import dnfile
import capa.features.extractors
import capa.features.extractors.dotnetfile
from capa.features.file import Import, FunctionName
from capa.features.common import Class, Format, String, Feature, Namespace, Characteristic
from capa.features.address import Address
def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[Tuple[Import, Address]]:
@@ -35,8 +34,16 @@ def extract_file_strings(pe: dnfile.dnPE) -> Iterator[Tuple[String, Address]]:
yield from capa.features.extractors.dotnetfile.extract_file_strings(pe=pe)
def extract_mixed_mode_characteristic_features(pe: dnfile.dnPE) -> Iterator[Tuple[Characteristic, Address]]:
yield from capa.features.extractors.dotnetfile.extract_mixed_mode_characteristic_features(pe=pe)
def extract_file_mixed_mode_characteristic_features(pe: dnfile.dnPE) -> Iterator[Tuple[Characteristic, Address]]:
yield from capa.features.extractors.dotnetfile.extract_file_mixed_mode_characteristic_features(pe=pe)
def extract_file_namespace_features(pe: dnfile.dnPE) -> Iterator[Tuple[Namespace, Address]]:
yield from capa.features.extractors.dotnetfile.extract_file_namespace_features(pe=pe)
def extract_file_class_features(pe: dnfile.dnPE) -> Iterator[Tuple[Class, Address]]:
yield from capa.features.extractors.dotnetfile.extract_file_class_features(pe=pe)
def extract_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]:
@@ -50,5 +57,7 @@ FILE_HANDLERS = (
extract_file_function_names,
extract_file_strings,
extract_file_format,
extract_mixed_mode_characteristic_features,
extract_file_mixed_mode_characteristic_features,
extract_file_namespace_features,
extract_file_class_features,
)

View File

@@ -41,8 +41,72 @@ class DnfileMethodBodyReader(CilMethodBodyReaderBase):
return self.offset
def calculate_dotnet_token_value(table: int, rid: int) -> int:
return ((table & 0xFF) << Token.TABLE_SHIFT) | (rid & Token.RID_MASK)
class DnClass(object):
def __init__(self, token: int, namespace: str, classname: str):
self.token: int = token
self.namespace: str = namespace
self.classname: str = classname
def __hash__(self):
return hash((self.token,))
def __eq__(self, other):
return self.token == other.token
def __str__(self):
return DnClass.format_name(self.namespace, self.classname)
def __repr__(self):
return str(self)
@staticmethod
def format_name(namespace: str, classname: str):
name: str = classname
if namespace:
# like System.IO.File::OpenRead
name = f"{namespace}.{name}"
return name
class DnMethod(DnClass):
def __init__(self, token: int, namespace: str, classname: str, methodname: str):
super(DnMethod, self).__init__(token, namespace, classname)
self.methodname: str = methodname
def __str__(self):
return DnMethod.format_name(self.namespace, self.classname, self.methodname)
@staticmethod
def format_name(namespace: str, classname: str, methodname: str): # type: ignore
# like File::OpenRead
name: str = f"{classname}::{methodname}"
if namespace:
# like System.IO.File::OpenRead
name = f"{namespace}.{name}"
return name
class DnUnmanagedMethod:
def __init__(self, token: int, modulename: str, methodname: str):
self.token: int = token
self.modulename: str = modulename
self.methodname: str = methodname
def __hash__(self):
return hash((self.token,))
def __eq__(self, other):
return self.token == other.token
def __str__(self):
return DnUnmanagedMethod.format_name(self.modulename, self.methodname)
def __repr__(self):
return str(self)
@staticmethod
def format_name(modulename, methodname):
return f"{modulename}.{methodname}"
def resolve_dotnet_token(pe: dnfile.dnPE, token: Token) -> Any:
@@ -86,12 +150,14 @@ def read_dotnet_user_string(pe: dnfile.dnPE, token: StringToken) -> Optional[str
except UnicodeDecodeError as e:
logger.warn("failed to decode #US stream index 0x%08x (%s)" % (token.rid, e))
return None
if user_string is None:
return None
return user_string.value
def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]:
def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnMethod]:
"""get managed imports from MemberRef table
see https://www.ntcore.com/files/dotnetformat.htm
@@ -105,62 +171,34 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]:
TypeName (index into String heap)
TypeNamespace (index into String heap)
"""
if not is_dotnet_table_valid(pe, "MemberRef"):
return
for (rid, row) in enumerate(pe.net.mdtables.MemberRef):
if not isinstance(row.Class.row, (dnfile.mdtable.TypeRefRow,)):
for (rid, row) in enumerate(iter_dotnet_table(pe, "MemberRef")):
if not isinstance(row.Class.row, dnfile.mdtable.TypeRefRow):
continue
# like File::OpenRead
name = f"{row.Class.row.TypeName}::{row.Name}"
# ECMA II.22.38: TypeNamespace can be null or non-null
if row.Class.row.TypeNamespace:
# like System.IO.File::OpenRead
name = f"{row.Class.row.TypeNamespace}.{name}"
token: int = calculate_dotnet_token_value(pe.net.mdtables.MemberRef.number, rid + 1)
yield token, name
yield DnMethod(token, row.Class.row.TypeNamespace, row.Class.row.TypeName, row.Name)
def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]:
"""get unmanaged imports from ImplMap table
def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnMethod]:
"""get managed method names from TypeDef table
see https://www.ntcore.com/files/dotnetformat.htm
28 - ImplMap Table
ImplMap table holds information about unmanaged methods that can be reached from managed code, using PInvoke dispatch
MemberForwarded (index into the Field or MethodDef table; more precisely, a MemberForwarded coded index)
ImportName (index into the String heap)
ImportScope (index into the ModuleRef table)
02 - TypeDef Table
Each row represents a class in the current assembly.
TypeName (index into String heap)
TypeNamespace (index into String heap)
MethodList (index into MethodDef table; it marks the first of a continguous run of Methods owned by this Type)
"""
if not is_dotnet_table_valid(pe, "ImplMap"):
return
for row in pe.net.mdtables.ImplMap:
dll: str = row.ImportScope.row.Name
symbol: str = row.ImportName
# ECMA says "Each row of the ImplMap table associates a row in the MethodDef table (MemberForwarded) with the
# name of a routine (ImportName) in some unmanaged DLL (ImportScope)"; so we calculate and map the MemberForwarded
# MethodDef table token to help us later record native import method calls made from CIL
token: int = calculate_dotnet_token_value(row.MemberForwarded.table.number, row.MemberForwarded.row_index)
# like Kernel32.dll
if dll and "." in dll:
dll = dll.split(".")[0]
# like kernel32.CreateFileA
name: str = f"{dll}.{symbol}"
yield token, name
for row in iter_dotnet_table(pe, "TypeDef"):
for index in row.MethodList:
token = calculate_dotnet_token_value(index.table.number, index.row_index)
yield DnMethod(token, row.TypeNamespace, row.TypeName, index.row.Name)
def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[Tuple[int, CilMethodBody]]:
"""get managed methods from MethodDef table"""
if not is_dotnet_table_valid(pe, "MethodDef"):
if not hasattr(pe.net.mdtables, "MethodDef"):
return
for (rid, row) in enumerate(pe.net.mdtables.MethodDef):
@@ -176,38 +214,48 @@ def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[Tuple[int, Cil
yield token, body
def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[DnUnmanagedMethod]:
"""get unmanaged imports from ImplMap table
see https://www.ntcore.com/files/dotnetformat.htm
28 - ImplMap Table
ImplMap table holds information about unmanaged methods that can be reached from managed code, using PInvoke dispatch
MemberForwarded (index into the Field or MethodDef table; more precisely, a MemberForwarded coded index)
ImportName (index into the String heap)
ImportScope (index into the ModuleRef table)
"""
for row in iter_dotnet_table(pe, "ImplMap"):
modulename: str = row.ImportScope.row.Name
methodname: str = row.ImportName
# ECMA says "Each row of the ImplMap table associates a row in the MethodDef table (MemberForwarded) with the
# name of a routine (ImportName) in some unmanaged DLL (ImportScope)"; so we calculate and map the MemberForwarded
# MethodDef table token to help us later record native import method calls made from CIL
token: int = calculate_dotnet_token_value(row.MemberForwarded.table.number, row.MemberForwarded.row_index)
# like Kernel32.dll
if modulename and "." in modulename:
modulename = modulename.split(".")[0]
# like kernel32.CreateFileA
yield DnUnmanagedMethod(token, modulename, methodname)
def calculate_dotnet_token_value(table: int, rid: int) -> int:
return ((table & 0xFF) << Token.TABLE_SHIFT) | (rid & Token.RID_MASK)
def is_dotnet_table_valid(pe: dnfile.dnPE, table_name: str) -> bool:
return bool(getattr(pe.net.mdtables, table_name, None))
def get_dotnet_managed_method_names(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]:
"""get managed method names from TypeDef table
see https://www.ntcore.com/files/dotnetformat.htm
02 - TypeDef Table
Each row represents a class in the current assembly.
TypeName (index into String heap)
TypeNamespace (index into String heap)
MethodList (index into MethodDef table; it marks the first of a continguous run of Methods owned by this Type)
"""
if not is_dotnet_table_valid(pe, "TypeDef"):
return
for row in pe.net.mdtables.TypeDef:
for index in row.MethodList:
# like File::OpenRead
name = f"{row.TypeName}::{index.row.Name}"
# ECMA II.22.37: TypeNamespace can be null or non-null
if row.TypeNamespace:
# like System.IO.File::OpenRead
name = f"{row.TypeNamespace}.{name}"
token = calculate_dotnet_token_value(index.table.number, index.row_index)
yield token, name
def is_dotnet_mixed_mode(pe: dnfile.dnPE) -> bool:
return not bool(pe.net.Flags.CLR_ILONLY)
def iter_dotnet_table(pe: dnfile.dnPE, name: str) -> Iterator[Any]:
if not is_dotnet_table_valid(pe, name):
return
for row in getattr(pe.net.mdtables, name):
yield row

View File

@@ -8,66 +8,65 @@
from __future__ import annotations
from typing import TYPE_CHECKING, Any, Dict, Tuple, Iterator, Optional
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
if TYPE_CHECKING:
from dncil.cil.instruction import Instruction
from dncil.cil.body import CilMethodBody
from capa.features.common import Feature
from capa.features.address import Address
from typing import Any, Dict, Tuple, Union, Iterator, Optional
import dnfile
from dncil.clr.token import StringToken, InvalidToken
from dncil.cil.body import CilMethodBody
from dncil.clr.token import Token, StringToken, InvalidToken
from dncil.cil.opcode import OpCodes
from dncil.cil.instruction import Instruction
import capa.features.extractors.helpers
from capa.features.insn import API, Number
from capa.features.common import String, Characteristic
from capa.features.common import Class, String, Feature, Namespace, Characteristic
from capa.features.address import Address
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
from capa.features.extractors.dnfile.helpers import (
DnClass,
DnMethod,
DnUnmanagedMethod,
resolve_dotnet_token,
read_dotnet_user_string,
get_dotnet_managed_imports,
get_dotnet_managed_methods,
get_dotnet_unmanaged_imports,
get_dotnet_managed_method_names,
)
def get_managed_imports(ctx: Dict) -> Dict:
if "managed_imports_cache" not in ctx:
ctx["managed_imports_cache"] = {}
for (token, name) in get_dotnet_managed_imports(ctx["pe"]):
ctx["managed_imports_cache"][token] = name
for method in get_dotnet_managed_imports(ctx["pe"]):
ctx["managed_imports_cache"][method.token] = method
return ctx["managed_imports_cache"]
def get_unmanaged_imports(ctx: Dict) -> Dict:
if "unmanaged_imports_cache" not in ctx:
ctx["unmanaged_imports_cache"] = {}
for (token, name) in get_dotnet_unmanaged_imports(ctx["pe"]):
ctx["unmanaged_imports_cache"][token] = name
for imp in get_dotnet_unmanaged_imports(ctx["pe"]):
ctx["unmanaged_imports_cache"][imp.token] = imp
return ctx["unmanaged_imports_cache"]
def get_methods(ctx: Dict) -> Dict:
if "methods_cache" not in ctx:
ctx["methods_cache"] = {}
for (token, name) in get_dotnet_managed_method_names(ctx["pe"]):
ctx["methods_cache"][token] = name
for method in get_dotnet_managed_methods(ctx["pe"]):
ctx["methods_cache"][method.token] = method
return ctx["methods_cache"]
def get_callee_name(ctx: Dict, token: int) -> str:
"""map dotnet token to method name"""
name: str = get_managed_imports(ctx).get(token, "")
if not name:
def get_callee(ctx: Dict, token: int) -> Union[DnMethod, DnUnmanagedMethod, None]:
"""map dotnet token to un/managed method"""
callee: Union[DnMethod, DnUnmanagedMethod, None] = get_managed_imports(ctx).get(token, None)
if not callee:
# we must check unmanaged imports before managed methods because we map forwarded managed methods
# to their unmanaged imports; we prefer a forwarded managed method be mapped to its unmanaged import for analysis
name = get_unmanaged_imports(ctx).get(token, "")
if not name:
name = get_methods(ctx).get(token, "")
return name
callee = get_unmanaged_imports(ctx).get(token, None)
if not callee:
callee = get_methods(ctx).get(token, None)
return callee
def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
@@ -77,18 +76,49 @@ def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterato
if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli):
return
name: str = get_callee_name(fh.ctx, insn.operand.value)
if not name:
callee: Union[DnMethod, DnUnmanagedMethod, None] = get_callee(fh.ctx, insn.operand.value)
if callee is None:
return
if "::" in name:
# like System.IO.File::OpenRead
if isinstance(callee, DnUnmanagedMethod):
# like kernel32.CreateFileA
for name in capa.features.extractors.helpers.generate_symbols(callee.modulename, callee.methodname):
yield API(name), ih.address
else:
# like kernel32.CreateFileA
dll, _, symbol = name.rpartition(".")
for name_variant in capa.features.extractors.helpers.generate_symbols(dll, symbol):
yield API(name_variant), ih.address
# like System.IO.File::Delete
yield API(str(callee)), ih.address
def extract_insn_class_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Class, Address]]:
"""parse instruction class features"""
if ih.inner.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli):
return
row: Any = resolve_dotnet_token(fh.ctx["pe"], Token(ih.inner.operand.value))
if not isinstance(row, dnfile.mdtable.MemberRefRow):
return
if not isinstance(row.Class.row, (dnfile.mdtable.TypeRefRow, dnfile.mdtable.TypeDefRow)):
return
yield Class(DnClass.format_name(row.Class.row.TypeNamespace, row.Class.row.TypeName)), ih.address
def extract_insn_namespace_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Namespace, Address]]:
"""parse instruction namespace features"""
if ih.inner.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli):
return
row: Any = resolve_dotnet_token(fh.ctx["pe"], Token(ih.inner.operand.value))
if not isinstance(row, dnfile.mdtable.MemberRefRow):
return
if not isinstance(row.Class.row, (dnfile.mdtable.TypeRefRow, dnfile.mdtable.TypeDefRow)):
return
if not row.Class.row.TypeNamespace:
return
yield Namespace(row.Class.row.TypeNamespace), ih.address
def extract_insn_number_features(fh, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
@@ -138,6 +168,7 @@ def extract_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Itera
"""extract instruction features"""
for inst_handler in INSTRUCTION_HANDLERS:
for (feature, addr) in inst_handler(fh, bbh, ih):
assert isinstance(addr, Address)
yield feature, addr
@@ -145,5 +176,7 @@ INSTRUCTION_HANDLERS = (
extract_insn_api_features,
extract_insn_number_features,
extract_insn_string_features,
extract_insn_namespace_features,
extract_insn_class_features,
extract_unmanaged_call_characteristic_features,
)

View File

@@ -1,4 +1,5 @@
import logging
import itertools
from typing import Tuple, Iterator
import dnfile
@@ -15,19 +16,24 @@ from capa.features.common import (
ARCH_AMD64,
FORMAT_DOTNET,
Arch,
Class,
Format,
String,
Feature,
Namespace,
Characteristic,
)
from capa.features.address import NO_ADDRESS, Address, DNTokenAddress, DNTokenOffsetAddress, AbsoluteVirtualAddress
from capa.features.extractors.base_extractor import FeatureExtractor
from capa.features.extractors.dnfile.helpers import (
DnClass,
DnMethod,
iter_dotnet_table,
is_dotnet_mixed_mode,
get_dotnet_managed_imports,
get_dotnet_managed_methods,
calculate_dotnet_token_value,
get_dotnet_unmanaged_imports,
get_dotnet_managed_method_names,
)
logger = logging.getLogger(__name__)
@@ -38,20 +44,50 @@ def extract_file_format(**kwargs) -> Iterator[Tuple[Format, Address]]:
def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Import, Address]]:
for (token, name) in get_dotnet_managed_imports(pe):
for method in get_dotnet_managed_imports(pe):
# like System.IO.File::OpenRead
yield Import(name), DNTokenAddress(Token(token))
yield Import(str(method)), DNTokenAddress(Token(method.token))
for (token, name) in get_dotnet_unmanaged_imports(pe):
for imp in get_dotnet_unmanaged_imports(pe):
# like kernel32.CreateFileA
dll, _, symbol = name.rpartition(".")
for name_variant in capa.features.extractors.helpers.generate_symbols(dll, symbol):
yield Import(name_variant), DNTokenAddress(Token(token))
for name in capa.features.extractors.helpers.generate_symbols(imp.modulename, imp.methodname):
yield Import(name), DNTokenAddress(Token(imp.token))
def extract_file_function_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[FunctionName, Address]]:
for (token, name) in get_dotnet_managed_method_names(pe):
yield FunctionName(name), DNTokenAddress(Token(token))
for method in get_dotnet_managed_methods(pe):
yield FunctionName(str(method)), DNTokenAddress(Token(method.token))
def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Namespace, Address]]:
"""emit namespace features from TypeRef and TypeDef tables"""
# namespaces may be referenced multiple times, so we need to filter
namespaces = set()
for row in iter_dotnet_table(pe, "TypeDef"):
namespaces.add(row.TypeNamespace)
for row in iter_dotnet_table(pe, "TypeRef"):
namespaces.add(row.TypeNamespace)
# namespaces may be empty, discard
namespaces.discard("")
for namespace in namespaces:
# namespace do not have an associated token, so we yield 0x0
yield Namespace(namespace), NO_ADDRESS
def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Class, Address]]:
"""emit class features from TypeRef and TypeDef tables"""
for (rid, row) in enumerate(iter_dotnet_table(pe, "TypeDef")):
token = calculate_dotnet_token_value(pe.net.mdtables.TypeDef.number, rid + 1)
yield Class(DnClass.format_name(row.TypeNamespace, row.TypeName)), DNTokenAddress(Token(token))
for (rid, row) in enumerate(iter_dotnet_table(pe, "TypeRef")):
token = calculate_dotnet_token_value(pe.net.mdtables.TypeRef.number, rid + 1)
yield Class(DnClass.format_name(row.TypeNamespace, row.TypeName)), DNTokenAddress(Token(token))
def extract_file_os(**kwargs) -> Iterator[Tuple[OS, Address]]:
@@ -73,7 +109,9 @@ def extract_file_strings(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[String, Ad
yield from capa.features.extractors.common.extract_file_strings(pe.__data__)
def extract_mixed_mode_characteristic_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Characteristic, Address]]:
def extract_file_mixed_mode_characteristic_features(
pe: dnfile.dnPE, **kwargs
) -> Iterator[Tuple[Characteristic, Address]]:
if is_dotnet_mixed_mode(pe):
yield Characteristic("mixed mode"), NO_ADDRESS
@@ -89,7 +127,9 @@ FILE_HANDLERS = (
extract_file_function_names,
extract_file_strings,
extract_file_format,
extract_mixed_mode_characteristic_features,
extract_file_mixed_mode_characteristic_features,
extract_file_namespace_features,
extract_file_class_features,
)

View File

@@ -47,6 +47,19 @@ class OS(str, Enum):
NACL = "nacl"
# via readelf: https://github.com/bminor/binutils-gdb/blob/c0e94211e1ac05049a4ce7c192c9d14d1764eb3e/binutils/readelf.c#L19635-L19658
# and here: https://github.com/bminor/binutils-gdb/blob/34c54daa337da9fadf87d2706d6a590ae1f88f4d/include/elf/common.h#L933-L939
GNU_ABI_TAG = {
0: OS.LINUX,
1: OS.HURD,
2: OS.SOLARIS,
3: OS.FREEBSD,
4: OS.NETBSD,
5: OS.SYLLABLE,
6: OS.NACL,
}
def detect_elf_os(f) -> str:
"""
f: type Union[BinaryIO, IDAIO]
@@ -144,7 +157,7 @@ def detect_elf_os(f) -> str:
PT_NOTE = 0x4
(p_type,) = struct.unpack_from(endian + "I", phent, 0x0)
logger.debug("p_type: 0x%04x", p_type)
logger.debug("ph:p_type: 0x%04x", p_type)
if p_type != PT_NOTE:
continue
@@ -155,7 +168,7 @@ def detect_elf_os(f) -> str:
else:
raise NotImplementedError()
logger.debug("p_offset: 0x%02x p_filesz: 0x%04x", p_offset, p_filesz)
logger.debug("ph:p_offset: 0x%02x p_filesz: 0x%04x", p_offset, p_filesz)
f.seek(p_offset)
note = f.read(p_filesz)
@@ -167,7 +180,7 @@ def detect_elf_os(f) -> str:
name_offset = 0xC
desc_offset = name_offset + align(namesz, 0x4)
logger.debug("namesz: 0x%02x descsz: 0x%02x type: 0x%04x", namesz, descsz, type_)
logger.debug("ph:namesz: 0x%02x descsz: 0x%02x type: 0x%04x", namesz, descsz, type_)
name = note[name_offset : name_offset + namesz].partition(b"\x00")[0].decode("ascii")
logger.debug("name: %s", name)
@@ -181,17 +194,6 @@ def detect_elf_os(f) -> str:
desc = note[desc_offset : desc_offset + descsz]
abi_tag, kmajor, kminor, kpatch = struct.unpack_from(endian + "IIII", desc, 0x0)
# via readelf: https://github.com/bminor/binutils-gdb/blob/c0e94211e1ac05049a4ce7c192c9d14d1764eb3e/binutils/readelf.c#L19635-L19658
# and here: https://github.com/bminor/binutils-gdb/blob/34c54daa337da9fadf87d2706d6a590ae1f88f4d/include/elf/common.h#L933-L939
GNU_ABI_TAG = {
0: OS.LINUX,
1: OS.HURD,
2: OS.SOLARIS,
3: OS.FREEBSD,
4: OS.NETBSD,
5: OS.SYLLABLE,
6: OS.NACL,
}
logger.debug("GNU_ABI_TAG: 0x%02x", abi_tag)
if abi_tag in GNU_ABI_TAG:
@@ -265,7 +267,7 @@ def detect_elf_os(f) -> str:
if sh_type != SHT_NOTE:
continue
logger.debug("sh_offset: 0x%02x sh_size: 0x%04x", sh_offset, sh_size)
logger.debug("sh:sh_offset: 0x%02x sh_size: 0x%04x", sh_offset, sh_size)
f.seek(sh_offset)
note = f.read(sh_size)
@@ -277,7 +279,7 @@ def detect_elf_os(f) -> str:
name_offset = 0xC
desc_offset = name_offset + align(namesz, 0x4)
logger.debug("namesz: 0x%02x descsz: 0x%02x type: 0x%04x", namesz, descsz, type_)
logger.debug("sh:namesz: 0x%02x descsz: 0x%02x type: 0x%04x", namesz, descsz, type_)
name = note[name_offset : name_offset + namesz].partition(b"\x00")[0].decode("ascii")
logger.debug("name: %s", name)
@@ -285,6 +287,28 @@ def detect_elf_os(f) -> str:
if name == "Linux":
logger.debug("note owner: %s", "LINUX")
ret = OS.LINUX if not ret else ret
elif name == "OpenBSD":
logger.debug("note owner: %s", "OPENBSD")
ret = OS.OPENBSD if not ret else ret
elif name == "NetBSD":
logger.debug("note owner: %s", "NETBSD")
ret = OS.NETBSD if not ret else ret
elif name == "FreeBSD":
logger.debug("note owner: %s", "FREEBSD")
ret = OS.FREEBSD if not ret else ret
elif name == "GNU":
if descsz < 16:
continue
desc = note[desc_offset : desc_offset + descsz]
abi_tag, kmajor, kminor, kpatch = struct.unpack_from(endian + "IIII", desc, 0x0)
logger.debug("GNU_ABI_TAG: 0x%02x", abi_tag)
if abi_tag in GNU_ABI_TAG:
# update only if not set
# so we can get the debugging output of subsequent strategies
ret = GNU_ABI_TAG[abi_tag] if not ret else ret
logger.debug("abi tag: %s earliest compatible kernel: %d.%d.%d", ret, kmajor, kminor, kpatch)
return ret.value if ret is not None else "unknown"

View File

@@ -7,7 +7,7 @@
# See the License for the specific language governing permissions and limitations under the License.
import collections
from typing import TYPE_CHECKING, Set, List, Deque, Tuple, Union, Optional
from typing import Set, List, Deque, Tuple, Union, Optional
import envi
import vivisect.const
@@ -15,8 +15,7 @@ import envi.archs.i386.disasm
import envi.archs.amd64.disasm
from vivisect import VivWorkspace
if TYPE_CHECKING:
from capa.features.extractors.viv.extractor import VivInstructionHandle
from capa.features.extractors.viv.extractor import VivInstructionHandle
# pull out consts for lookup performance
i386RegOper = envi.archs.i386.disasm.i386RegOper

View File

@@ -655,10 +655,12 @@ class CapaExplorerForm(idaapi.PluginForm):
rule_paths.append(rule_path)
elif os.path.isdir(rule_path):
for root, dirs, files in os.walk(rule_path):
if ".github" in root:
if ".git" in root:
# the .github directory contains CI config in capa-rules
# this includes some .yml files
# these are not rules
# additionally, .git has files that are not .yml and generate the warning
# skip those too
continue
for file in files:
if not file.endswith(".yml"):

View File

@@ -197,7 +197,8 @@ class CapaExplorerRulgenPreview(QtWidgets.QTextEdit):
" meta:",
" name: <insert_name>",
" namespace: <insert_namespace>",
" author: %s" % author,
" authors:",
" - %s" % author,
" scope: %s" % scope,
" references: <insert_references>",
" examples:",

View File

@@ -583,12 +583,13 @@ def get_rules(rule_paths: List[str], disable_progress=False) -> List[Rule]:
elif os.path.isdir(rule_path):
logger.debug("reading rules from directory %s", rule_path)
for root, dirs, files in os.walk(rule_path):
if ".github" in root:
if ".git" in root:
# the .github directory contains CI config in capa-rules
# this includes some .yml files
# these are not rules
# additionally, .git has files that are not .yml and generate the warning
# skip those too
continue
for file in files:
if not file.endswith(".yml"):
if not (file.startswith(".git") or file.endswith((".git", ".md", ".txt"))):
@@ -596,7 +597,6 @@ def get_rules(rule_paths: List[str], disable_progress=False) -> List[Rule]:
# other things maybe are rules, but are mis-named.
logger.warning("skipping non-.yml file: %s", file)
continue
rule_path = os.path.join(root, file)
rule_file_paths.append(rule_path)

View File

@@ -100,6 +100,8 @@ SUPPORTED_FEATURES: Dict[str, Set] = {
capa.features.common.Characteristic("embedded pe"),
capa.features.common.String,
capa.features.common.Format,
capa.features.common.Class,
capa.features.common.Namespace,
capa.features.common.Characteristic("mixed mode"),
},
FUNCTION_SCOPE: {
@@ -135,6 +137,8 @@ SUPPORTED_FEATURES: Dict[str, Set] = {
capa.features.common.Characteristic("call $+5"),
capa.features.common.Characteristic("cross section flow"),
capa.features.common.Characteristic("unmanaged call"),
capa.features.common.Class,
capa.features.common.Namespace,
},
}
@@ -288,8 +292,11 @@ def parse_feature(key: str):
elif key == "format":
return capa.features.common.Format
elif key == "arch":
return capa.features.common.Arch
elif key == "class":
return capa.features.common.Class
elif key == "namespace":
return capa.features.common.Namespace
else:
raise InvalidRule("unexpected statement: %s" % key)
@@ -1293,6 +1300,12 @@ class RuleSet:
logger.debug('using rule "%s" and dependencies, found tag in meta.%s: %s', rule.name, k, v)
rules_filtered.update(set(capa.rules.get_rules_and_dependencies(rules, rule.name)))
break
if isinstance(v, list):
for vv in v:
if tag in vv:
logger.debug('using rule "%s" and dependencies, found tag in meta.%s: %s', rule.name, k, vv)
rules_filtered.update(set(capa.rules.get_rules_and_dependencies(rules, rule.name)))
break
return RuleSet(list(rules_filtered))
def match(self, scope: Scope, features: FeatureSet, addr: Address) -> Tuple[FeatureSet, ceng.MatchResults]:

2
rules

Submodule rules updated: 1fee68e72e...88c9c786ca

View File

@@ -43,9 +43,8 @@ import capa.rules
import capa.engine
import capa.helpers
import capa.features.insn
import capa.features.common
from capa.rules import Rule, RuleSet
from capa.features.common import Feature
from capa.features.common import String, Feature, Substring
logger = logging.getLogger("lint")
@@ -168,8 +167,8 @@ class InvalidScope(Lint):
return rule.meta.get("scope") not in ("file", "function", "basic block", "instruction")
class MissingAuthor(Lint):
name = "missing author"
class MissingAuthors(Lint):
name = "missing authors"
recommendation = "Add meta.authors so that users know who to contact with questions"
def check_rule(self, ctx: Context, rule: Rule):
@@ -490,7 +489,7 @@ class FeatureStringTooShort(Lint):
def check_features(self, ctx: Context, features: List[Feature]):
for feature in features:
if isinstance(feature, (capa.features.common.String, capa.features.common.Substring)):
if isinstance(feature, (String, Substring)):
assert isinstance(feature.value, str)
if len(feature.value) < 4:
self.recommendation = self.recommendation.format(feature.value)
@@ -697,7 +696,7 @@ def lint_scope(ctx: Context, rule: Rule):
META_LINTS = (
MissingNamespace(),
NamespaceDoesntMatchRulePath(),
MissingAuthor(),
MissingAuthors(),
MissingExamples(),
MissingExampleOffset(),
ExampleFileDNE(),

View File

@@ -76,8 +76,8 @@ setuptools.setup(
"pycodestyle==2.8.0",
"black==22.3.0",
"isort==5.10.1",
"mypy==0.950",
"psutil==5.9.0",
"mypy==0.960",
"psutil==5.9.1",
"stix2==3.0.1",
"requests==2.27.1",
# type stubs for mypy
@@ -87,10 +87,10 @@ setuptools.setup(
"types-tabulate==0.8.9",
"types-termcolor==1.1.4",
"types-psutil==5.8.22",
"types_requests==2.27.25",
"types_requests==2.27.29",
],
"build": [
"pyinstaller==5.0.1",
"pyinstaller==5.1",
],
},
zip_safe=False,

View File

@@ -252,6 +252,8 @@ def get_data_path_by_name(name):
return os.path.join(CD, "data", "79abd17391adc6251ecdc58d13d76baf.dll_")
elif name.startswith("946a9"):
return os.path.join(CD, "data", "946a99f36a46d335dec080d9a4371940.dll_")
elif name.startswith("2f7f5f"):
return os.path.join(CD, "data", "2f7f5fb5de175e770d7eae87666f9831.elf_")
elif name.startswith("b9f5b"):
return os.path.join(CD, "data", "b9f5bd514485fb06da39beff051b9fdc.exe_")
elif name.startswith("mixed-mode-64"):
@@ -704,8 +706,13 @@ FEATURE_PRESENCE_TESTS_DOTNET = sorted(
("hello-world", "file", capa.features.file.FunctionName("HelloWorld::.ctor"), True),
("hello-world", "file", capa.features.file.FunctionName("HelloWorld::.cctor"), False),
("hello-world", "file", capa.features.common.String("Hello World!"), True),
("hello-world", "file", capa.features.common.Class("HelloWorld"), True),
("hello-world", "file", capa.features.common.Class("System.Console"), True),
("hello-world", "file", capa.features.common.Namespace("System.Diagnostics"), True),
("hello-world", "function=0x250", capa.features.common.String("Hello World!"), True),
("hello-world", "function=0x250, bb=0x250, insn=0x252", capa.features.common.String("Hello World!"), True),
("hello-world", "function=0x250, bb=0x250, insn=0x257", capa.features.common.Class("System.Console"), True),
("hello-world", "function=0x250, bb=0x250, insn=0x257", capa.features.common.Namespace("System"), True),
("hello-world", "function=0x250", capa.features.insn.API("System.Console::WriteLine"), True),
("hello-world", "file", capa.features.file.Import("System.Console::WriteLine"), True),
("_1c444", "file", capa.features.common.String(r"SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall"), True),

View File

@@ -15,7 +15,8 @@ EXPECTED = textwrap.dedent(
rule:
meta:
name: test rule
author: user@domain.com
authors:
- user@domain.com
scope: function
examples:
- foo1234
@@ -38,7 +39,8 @@ def test_rule_reformat_top_level_elements():
- number: 2
meta:
name: test rule
author: user@domain.com
authors:
- user@domain.com
scope: function
examples:
- foo1234
@@ -55,7 +57,8 @@ def test_rule_reformat_indentation():
rule:
meta:
name: test rule
author: user@domain.com
authors:
- user@domain.com
scope: function
examples:
- foo1234
@@ -75,7 +78,8 @@ def test_rule_reformat_order():
"""
rule:
meta:
author: user@domain.com
authors:
- user@domain.com
examples:
- foo1234
- bar5678
@@ -98,7 +102,8 @@ def test_rule_reformat_meta_update():
"""
rule:
meta:
author: user@domain.com
authors:
- user@domain.com
examples:
- foo1234
- bar5678
@@ -124,7 +129,8 @@ def test_rule_reformat_string_description():
rule:
meta:
name: test rule
author: user@domain.com
authors:
- user@domain.com
scope: function
features:
- and:

View File

@@ -0,0 +1,26 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2022 FireEye, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import pytest
from fixtures import *
import capa.features.extractors.elf
def test_elf_section_gnu_abi_tag():
path = get_data_path_by_name("2f7f5f")
with open(path, "rb") as f:
assert capa.features.extractors.elf.detect_elf_os(f) == "linux"
def test_elf_program_header_gnu_abi_tag():
path = get_data_path_by_name("7351f.elf")
with open(path, "rb") as f:
assert capa.features.extractors.elf.detect_elf_os(f) == "linux"

View File

@@ -42,7 +42,8 @@ def test_rule_yaml():
rule:
meta:
name: test rule
author: user@domain.com
authors:
- user@domain.com
scope: function
examples:
- foo1234
@@ -724,7 +725,8 @@ def test_filter_rules():
rule:
meta:
name: rule 1
author: joe
authors:
- joe
features:
- api: CreateFile
"""
@@ -803,7 +805,8 @@ def test_filter_rules_missing_dependency():
rule:
meta:
name: rule 1
author: joe
authors:
- joe
features:
- match: rule 2
"""