dotnet: address unhandled exceptions through improved type checking (#1230)

* dotnet: bump dncil version

* dotnet: check #US stream valid before access

* dotnet: use assert statements to guard types
This commit is contained in:
Mike Hunhoff
2022-12-15 12:55:57 -07:00
committed by GitHub
parent d7548c0b20
commit 3af7fe0b08
4 changed files with 134 additions and 77 deletions

View File

@@ -53,6 +53,7 @@
- do not overwrite version in version.py during PyInstaller build #1169 @mr-tz
- render: fix vverbose rendering of offsets #1215 @williballenthin
- elf: better detect OS via GLIBC ABI version needed and dependencies #1221 @williballenthin
- dotnet: address unhandled exceptions with improved type checking #1230 @mike-hunhoff
### capa explorer IDA Pro plugin
- fix: display instruction items #1154 @mr-tz

View File

@@ -10,7 +10,7 @@ from __future__ import annotations
import logging
from enum import Enum
from typing import Any, Tuple, Iterator, Optional
from typing import Any, Tuple, Union, Iterator, Optional
import dnfile
from dncil.cil.body import CilMethodBody
@@ -140,19 +140,23 @@ def read_dotnet_method_body(pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow) -
try:
return CilMethodBody(DnfileMethodBodyReader(pe, row))
except MethodBodyFormatError as e:
logger.warning("failed to parse managed method body @ 0x%08x (%s)" % (row.Rva, e))
logger.debug("failed to parse managed method body @ 0x%08x (%s)", row.Rva, e)
return None
def read_dotnet_user_string(pe: dnfile.dnPE, token: StringToken) -> Optional[str]:
"""read user string from #US stream"""
assert pe.net is not None
assert pe.net.user_strings is not None
if pe.net.user_strings is None:
# stream may not exist (seen in obfuscated .NET)
logger.debug("#US stream does not exist for stream index 0x%08x", token.rid)
return None
try:
user_string: Optional[dnfile.stream.UserString] = pe.net.user_strings.get_us(token.rid)
except UnicodeDecodeError as e:
logger.warning("failed to decode #US stream index 0x%08x (%s)" % (token.rid, e))
logger.debug("failed to decode #US stream index 0x%08x (%s)", token.rid, e)
return None
if user_string is None:
@@ -175,15 +179,17 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]:
TypeName (index into String heap)
TypeNamespace (index into String heap)
"""
assert pe.net is not None
assert pe.net.mdtables is not None
assert pe.net.mdtables.MemberRef is not None
for (rid, member_ref) in iter_dotnet_table(pe, dnfile.mdtable.MemberRef.number):
assert isinstance(member_ref, dnfile.mdtable.MemberRefRow)
for (rid, row) in enumerate(iter_dotnet_table(pe, "MemberRef")):
if not isinstance(row.Class.row, dnfile.mdtable.TypeRefRow):
if not isinstance(member_ref.Class.row, dnfile.mdtable.TypeRefRow):
# only process class imports from TypeRef table
continue
token: int = calculate_dotnet_token_value(pe.net.mdtables.MemberRef.number, rid + 1)
yield DnType(token, row.Class.row.TypeName, namespace=row.Class.row.TypeNamespace, member=row.Name)
token: int = calculate_dotnet_token_value(dnfile.mdtable.MemberRef.number, rid)
yield DnType(
token, member_ref.Class.row.TypeName, namespace=member_ref.Class.row.TypeNamespace, member=member_ref.Name
)
def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]:
@@ -197,22 +203,47 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]:
TypeNamespace (index into String heap)
MethodList (index into MethodDef table; it marks the first of a continguous run of Methods owned by this Type)
"""
for row in iter_dotnet_table(pe, "TypeDef"):
for index in row.MethodList:
token = calculate_dotnet_token_value(index.table.number, index.row_index)
yield DnType(token, row.TypeName, namespace=row.TypeNamespace, member=index.row.Name)
for (rid, typedef) in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number):
assert isinstance(typedef, dnfile.mdtable.TypeDefRow)
for (idx, method) in enumerate(typedef.MethodList):
if method.table is None:
logger.debug("TypeDef[0x%X] MethodList[0x%X] table is None", rid, idx)
continue
if method.row is None:
logger.debug("TypeDef[0x%X] MethodList[0x%X] row is None", rid, idx)
continue
token = calculate_dotnet_token_value(method.table.number, method.row_index)
yield DnType(token, typedef.TypeName, namespace=typedef.TypeNamespace, member=method.row.Name)
def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]:
"""get fields from TypeDef table"""
for row in iter_dotnet_table(pe, "TypeDef"):
for index in row.FieldList:
token = calculate_dotnet_token_value(index.table.number, index.row_index)
yield DnType(token, row.TypeName, namespace=row.TypeNamespace, member=index.row.Name)
"""get fields from TypeDef table
see https://www.ntcore.com/files/dotnetformat.htm
02 - TypeDef Table
Each row represents a class in the current assembly.
TypeName (index into String heap)
TypeNamespace (index into String heap)
FieldList (index into Field table; it marks the first of a continguous run of Fields owned by this Type)
"""
for (rid, typedef) in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number):
assert isinstance(typedef, dnfile.mdtable.TypeDefRow)
for (idx, field) in enumerate(typedef.FieldList):
if field.table is None:
logger.debug("TypeDef[0x%X] FieldList[0x%X] table is None", rid, idx)
continue
if field.row is None:
logger.debug("TypeDef[0x%X] FieldList[0x%X] row is None", rid, idx)
continue
token: int = calculate_dotnet_token_value(field.table.number, field.row_index)
yield DnType(token, typedef.TypeName, namespace=typedef.TypeNamespace, member=field.row.Name)
def get_dotnet_property_map(
pe: dnfile.dnPE, property_row: dnfile.mdtable.PropertyRow
pe: dnfile.dnPE, mapped_property: Union[dnfile.mdtable.PropertyRow, dnfile.mdtable.EventRow]
) -> Optional[dnfile.mdtable.TypeDefRow]:
"""get property map from PropertyMap table
@@ -225,10 +256,15 @@ def get_dotnet_property_map(
the last row of the Property table
the next run of Properties, found by inspecting the PropertyList of the next row in this PropertyMap table
"""
for row in iter_dotnet_table(pe, "PropertyMap"):
for index in row.PropertyList:
if index.row.Name == property_row.Name:
return row.Parent.row
for (rid, property_map) in iter_dotnet_table(pe, dnfile.mdtable.PropertyMap.number):
assert isinstance(property_map, dnfile.mdtable.PropertyMapRow)
for (idx, property_) in enumerate(property_map.PropertyList):
if property_.row is None:
logger.debug("PropertyMap[0x%X] PropertyList[0x%x] row is None", rid, idx)
continue
if property_.row.Name == mapped_property.Name:
return property_map.Parent.row
return None
@@ -243,48 +279,57 @@ def get_dotnet_properties(pe: dnfile.dnPE) -> Iterator[DnType]:
Method (index into the MethodDef table)
Association (index into the Event or Property table; more precisely, a HasSemantics coded index)
"""
for row in iter_dotnet_table(pe, "MethodSemantics"):
typedef_row = get_dotnet_property_map(pe, row.Association.row)
if typedef_row is None:
for (rid, method_semantics) in iter_dotnet_table(pe, dnfile.mdtable.MethodSemantics.number):
assert isinstance(method_semantics, dnfile.mdtable.MethodSemanticsRow)
if method_semantics.Association.row is None:
logger.debug("MethodSemantics[0x%X] Association row is None", rid)
continue
if method_semantics.Method.table is None:
logger.debug("MethodSemantics[0x%X] Method table is None", rid)
continue
token = calculate_dotnet_token_value(row.Method.table.number, row.Method.row_index)
typedef: Optional[dnfile.mdtable.TypeDefRow] = get_dotnet_property_map(pe, method_semantics.Association.row)
if typedef is None:
logger.debug("MethodSemantics[0x%X] TypeDef is None", rid)
continue
if row.Semantics.msSetter:
token: int = calculate_dotnet_token_value(
method_semantics.Method.table.number, method_semantics.Method.row_index
)
access: Optional[str]
if method_semantics.Semantics.msSetter:
access = FeatureAccess.WRITE
elif row.Semantics.msGetter:
elif method_semantics.Semantics.msGetter:
access = FeatureAccess.READ
else:
access = None
yield DnType(
token,
typedef_row.TypeName,
typedef.TypeName,
access=access,
namespace=typedef_row.TypeNamespace,
member=row.Association.row.Name,
namespace=typedef.TypeNamespace,
member=method_semantics.Association.row.Name,
)
def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[Tuple[int, CilMethodBody]]:
"""get managed methods from MethodDef table"""
assert pe.net is not None
assert pe.net.mdtables is not None
assert pe.net.mdtables.MethodDef is not None
for (rid, method_def) in iter_dotnet_table(pe, dnfile.mdtable.MethodDef.number):
assert isinstance(method_def, dnfile.mdtable.MethodDefRow)
if not hasattr(pe.net.mdtables, "MethodDef"):
return
for (rid, row) in enumerate(pe.net.mdtables.MethodDef):
if not row.ImplFlags.miIL or any((row.Flags.mdAbstract, row.Flags.mdPinvokeImpl)):
if not method_def.ImplFlags.miIL or any((method_def.Flags.mdAbstract, method_def.Flags.mdPinvokeImpl)):
# skip methods that do not have a method body
continue
body: Optional[CilMethodBody] = read_dotnet_method_body(pe, row)
body: Optional[CilMethodBody] = read_dotnet_method_body(pe, method_def)
if body is None:
logger.debug("MethodDef[0x%X] method body is None", rid)
continue
token: int = calculate_dotnet_token_value(dnfile.enums.MetadataTables.MethodDef.value, rid + 1)
token: int = calculate_dotnet_token_value(dnfile.mdtable.MethodDef.number, rid)
yield token, body
@@ -299,14 +344,29 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[DnUnmanagedMethod]
ImportName (index into the String heap)
ImportScope (index into the ModuleRef table)
"""
for row in iter_dotnet_table(pe, "ImplMap"):
module: str = row.ImportScope.row.Name
method: str = row.ImportName
for (rid, impl_map) in iter_dotnet_table(pe, dnfile.mdtable.ImplMap.number):
assert isinstance(impl_map, dnfile.mdtable.ImplMapRow)
module: str
if impl_map.ImportScope.row is None:
logger.debug("ImplMap[0x%X] ImportScope row is None", rid)
module = ""
else:
module = impl_map.ImportScope.row.Name
method: str = impl_map.ImportName
member_forward_table: int
if impl_map.MemberForwarded.table is None:
logger.debug("ImplMap[0x%X] MemberForwarded table is None", rid)
continue
else:
member_forward_table = impl_map.MemberForwarded.table.number
member_forward_row: int = impl_map.MemberForwarded.row_index
# ECMA says "Each row of the ImplMap table associates a row in the MethodDef table (MemberForwarded) with the
# name of a routine (ImportName) in some unmanaged DLL (ImportScope)"; so we calculate and map the MemberForwarded
# MethodDef table token to help us later record native import method calls made from CIL
token: int = calculate_dotnet_token_value(row.MemberForwarded.table.number, row.MemberForwarded.row_index)
token: int = calculate_dotnet_token_value(member_forward_table, member_forward_row)
# like Kernel32.dll
if module and "." in module:
@@ -320,13 +380,6 @@ def calculate_dotnet_token_value(table: int, rid: int) -> int:
return ((table & 0xFF) << Token.TABLE_SHIFT) | (rid & Token.RID_MASK)
def is_dotnet_table_valid(pe: dnfile.dnPE, table_name: str) -> bool:
assert pe.net is not None
assert pe.net.mdtables is not None
return bool(getattr(pe.net.mdtables, table_name, None))
def is_dotnet_mixed_mode(pe: dnfile.dnPE) -> bool:
assert pe.net is not None
assert pe.net.Flags is not None
@@ -334,12 +387,10 @@ def is_dotnet_mixed_mode(pe: dnfile.dnPE) -> bool:
return not bool(pe.net.Flags.CLR_ILONLY)
def iter_dotnet_table(pe: dnfile.dnPE, name: str) -> Iterator[Any]:
def iter_dotnet_table(pe: dnfile.dnPE, table_index: int) -> Iterator[Tuple[int, dnfile.base.MDTableRow]]:
assert pe.net is not None
assert pe.net.mdtables is not None
if not is_dotnet_table_valid(pe, name):
return
for row in getattr(pe.net.mdtables, name):
yield row
for (rid, row) in enumerate(pe.net.mdtables.tables.get(table_index, [])):
# .NET tables are 1-indexed
yield rid + 1, row

View File

@@ -1,5 +1,5 @@
import logging
from typing import Tuple, Iterator
from typing import Tuple, Iterator, cast
import dnfile
import pefile
@@ -62,11 +62,15 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple
# namespaces may be referenced multiple times, so we need to filter
namespaces = set()
for row in iter_dotnet_table(pe, "TypeDef"):
namespaces.add(row.TypeNamespace)
for (_, typedef) in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number):
# emit internal .NET namespaces
assert isinstance(typedef, dnfile.mdtable.TypeDefRow)
namespaces.add(typedef.TypeNamespace)
for row in iter_dotnet_table(pe, "TypeRef"):
namespaces.add(row.TypeNamespace)
for (_, typeref) in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number):
# emit external .NET namespaces
assert isinstance(typeref, dnfile.mdtable.TypeRefRow)
namespaces.add(typeref.TypeNamespace)
# namespaces may be empty, discard
namespaces.discard("")
@@ -78,18 +82,19 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple
def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Class, Address]]:
"""emit class features from TypeRef and TypeDef tables"""
assert pe.net is not None
assert pe.net.mdtables is not None
assert pe.net.mdtables.TypeDef is not None
assert pe.net.mdtables.TypeRef is not None
for (rid, typedef) in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number):
# emit internal .NET classes
assert isinstance(typedef, dnfile.mdtable.TypeDefRow)
for (rid, row) in enumerate(iter_dotnet_table(pe, "TypeDef")):
token = calculate_dotnet_token_value(pe.net.mdtables.TypeDef.number, rid + 1)
yield Class(DnType.format_name(row.TypeName, namespace=row.TypeNamespace)), DNTokenAddress(token)
token = calculate_dotnet_token_value(dnfile.mdtable.TypeDef.number, rid)
yield Class(DnType.format_name(typedef.TypeName, namespace=typedef.TypeNamespace)), DNTokenAddress(token)
for (rid, row) in enumerate(iter_dotnet_table(pe, "TypeRef")):
token = calculate_dotnet_token_value(pe.net.mdtables.TypeRef.number, rid + 1)
yield Class(DnType.format_name(row.TypeName, namespace=row.TypeNamespace)), DNTokenAddress(token)
for (rid, typeref) in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number):
# emit external .NET classes
assert isinstance(typeref, dnfile.mdtable.TypeRefRow)
token = calculate_dotnet_token_value(dnfile.mdtable.TypeRef.number, rid)
yield Class(DnType.format_name(typeref.TypeName, namespace=typeref.TypeNamespace)), DNTokenAddress(token)
def extract_file_os(**kwargs) -> Iterator[Tuple[OS, Address]]:

View File

@@ -27,7 +27,7 @@ requirements = [
"pefile==2022.5.30",
"pyelftools==0.29",
"dnfile==0.12.0",
"dncil==1.0.1",
"dncil==1.0.2",
"pydantic==1.10.2",
]