dotnet: address unhandled exceptions through improved type checking (#1230)

* dotnet: bump dncil version

* dotnet: check #US stream valid before access

* dotnet: use assert statements to guard types
This commit is contained in:
Mike Hunhoff
2022-12-15 12:55:57 -07:00
committed by GitHub
parent d7548c0b20
commit 3af7fe0b08
4 changed files with 134 additions and 77 deletions

View File

@@ -53,6 +53,7 @@
- do not overwrite version in version.py during PyInstaller build #1169 @mr-tz - do not overwrite version in version.py during PyInstaller build #1169 @mr-tz
- render: fix vverbose rendering of offsets #1215 @williballenthin - render: fix vverbose rendering of offsets #1215 @williballenthin
- elf: better detect OS via GLIBC ABI version needed and dependencies #1221 @williballenthin - elf: better detect OS via GLIBC ABI version needed and dependencies #1221 @williballenthin
- dotnet: address unhandled exceptions with improved type checking #1230 @mike-hunhoff
### capa explorer IDA Pro plugin ### capa explorer IDA Pro plugin
- fix: display instruction items #1154 @mr-tz - fix: display instruction items #1154 @mr-tz

View File

@@ -10,7 +10,7 @@ from __future__ import annotations
import logging import logging
from enum import Enum from enum import Enum
from typing import Any, Tuple, Iterator, Optional from typing import Any, Tuple, Union, Iterator, Optional
import dnfile import dnfile
from dncil.cil.body import CilMethodBody from dncil.cil.body import CilMethodBody
@@ -140,19 +140,23 @@ def read_dotnet_method_body(pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow) -
try: try:
return CilMethodBody(DnfileMethodBodyReader(pe, row)) return CilMethodBody(DnfileMethodBodyReader(pe, row))
except MethodBodyFormatError as e: except MethodBodyFormatError as e:
logger.warning("failed to parse managed method body @ 0x%08x (%s)" % (row.Rva, e)) logger.debug("failed to parse managed method body @ 0x%08x (%s)", row.Rva, e)
return None return None
def read_dotnet_user_string(pe: dnfile.dnPE, token: StringToken) -> Optional[str]: def read_dotnet_user_string(pe: dnfile.dnPE, token: StringToken) -> Optional[str]:
"""read user string from #US stream""" """read user string from #US stream"""
assert pe.net is not None assert pe.net is not None
assert pe.net.user_strings is not None
if pe.net.user_strings is None:
# stream may not exist (seen in obfuscated .NET)
logger.debug("#US stream does not exist for stream index 0x%08x", token.rid)
return None
try: try:
user_string: Optional[dnfile.stream.UserString] = pe.net.user_strings.get_us(token.rid) user_string: Optional[dnfile.stream.UserString] = pe.net.user_strings.get_us(token.rid)
except UnicodeDecodeError as e: except UnicodeDecodeError as e:
logger.warning("failed to decode #US stream index 0x%08x (%s)" % (token.rid, e)) logger.debug("failed to decode #US stream index 0x%08x (%s)", token.rid, e)
return None return None
if user_string is None: if user_string is None:
@@ -175,15 +179,17 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]:
TypeName (index into String heap) TypeName (index into String heap)
TypeNamespace (index into String heap) TypeNamespace (index into String heap)
""" """
assert pe.net is not None for (rid, member_ref) in iter_dotnet_table(pe, dnfile.mdtable.MemberRef.number):
assert pe.net.mdtables is not None assert isinstance(member_ref, dnfile.mdtable.MemberRefRow)
assert pe.net.mdtables.MemberRef is not None
for (rid, row) in enumerate(iter_dotnet_table(pe, "MemberRef")): if not isinstance(member_ref.Class.row, dnfile.mdtable.TypeRefRow):
if not isinstance(row.Class.row, dnfile.mdtable.TypeRefRow): # only process class imports from TypeRef table
continue continue
token: int = calculate_dotnet_token_value(pe.net.mdtables.MemberRef.number, rid + 1)
yield DnType(token, row.Class.row.TypeName, namespace=row.Class.row.TypeNamespace, member=row.Name) token: int = calculate_dotnet_token_value(dnfile.mdtable.MemberRef.number, rid)
yield DnType(
token, member_ref.Class.row.TypeName, namespace=member_ref.Class.row.TypeNamespace, member=member_ref.Name
)
def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]: def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]:
@@ -197,22 +203,47 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]:
TypeNamespace (index into String heap) TypeNamespace (index into String heap)
MethodList (index into MethodDef table; it marks the first of a continguous run of Methods owned by this Type) MethodList (index into MethodDef table; it marks the first of a continguous run of Methods owned by this Type)
""" """
for row in iter_dotnet_table(pe, "TypeDef"): for (rid, typedef) in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number):
for index in row.MethodList: assert isinstance(typedef, dnfile.mdtable.TypeDefRow)
token = calculate_dotnet_token_value(index.table.number, index.row_index)
yield DnType(token, row.TypeName, namespace=row.TypeNamespace, member=index.row.Name) for (idx, method) in enumerate(typedef.MethodList):
if method.table is None:
logger.debug("TypeDef[0x%X] MethodList[0x%X] table is None", rid, idx)
continue
if method.row is None:
logger.debug("TypeDef[0x%X] MethodList[0x%X] row is None", rid, idx)
continue
token = calculate_dotnet_token_value(method.table.number, method.row_index)
yield DnType(token, typedef.TypeName, namespace=typedef.TypeNamespace, member=method.row.Name)
def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]: def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]:
"""get fields from TypeDef table""" """get fields from TypeDef table
for row in iter_dotnet_table(pe, "TypeDef"):
for index in row.FieldList: see https://www.ntcore.com/files/dotnetformat.htm
token = calculate_dotnet_token_value(index.table.number, index.row_index)
yield DnType(token, row.TypeName, namespace=row.TypeNamespace, member=index.row.Name) 02 - TypeDef Table
Each row represents a class in the current assembly.
TypeName (index into String heap)
TypeNamespace (index into String heap)
FieldList (index into Field table; it marks the first of a continguous run of Fields owned by this Type)
"""
for (rid, typedef) in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number):
assert isinstance(typedef, dnfile.mdtable.TypeDefRow)
for (idx, field) in enumerate(typedef.FieldList):
if field.table is None:
logger.debug("TypeDef[0x%X] FieldList[0x%X] table is None", rid, idx)
continue
if field.row is None:
logger.debug("TypeDef[0x%X] FieldList[0x%X] row is None", rid, idx)
continue
token: int = calculate_dotnet_token_value(field.table.number, field.row_index)
yield DnType(token, typedef.TypeName, namespace=typedef.TypeNamespace, member=field.row.Name)
def get_dotnet_property_map( def get_dotnet_property_map(
pe: dnfile.dnPE, property_row: dnfile.mdtable.PropertyRow pe: dnfile.dnPE, mapped_property: Union[dnfile.mdtable.PropertyRow, dnfile.mdtable.EventRow]
) -> Optional[dnfile.mdtable.TypeDefRow]: ) -> Optional[dnfile.mdtable.TypeDefRow]:
"""get property map from PropertyMap table """get property map from PropertyMap table
@@ -225,10 +256,15 @@ def get_dotnet_property_map(
the last row of the Property table the last row of the Property table
the next run of Properties, found by inspecting the PropertyList of the next row in this PropertyMap table the next run of Properties, found by inspecting the PropertyList of the next row in this PropertyMap table
""" """
for row in iter_dotnet_table(pe, "PropertyMap"): for (rid, property_map) in iter_dotnet_table(pe, dnfile.mdtable.PropertyMap.number):
for index in row.PropertyList: assert isinstance(property_map, dnfile.mdtable.PropertyMapRow)
if index.row.Name == property_row.Name:
return row.Parent.row for (idx, property_) in enumerate(property_map.PropertyList):
if property_.row is None:
logger.debug("PropertyMap[0x%X] PropertyList[0x%x] row is None", rid, idx)
continue
if property_.row.Name == mapped_property.Name:
return property_map.Parent.row
return None return None
@@ -243,48 +279,57 @@ def get_dotnet_properties(pe: dnfile.dnPE) -> Iterator[DnType]:
Method (index into the MethodDef table) Method (index into the MethodDef table)
Association (index into the Event or Property table; more precisely, a HasSemantics coded index) Association (index into the Event or Property table; more precisely, a HasSemantics coded index)
""" """
for row in iter_dotnet_table(pe, "MethodSemantics"): for (rid, method_semantics) in iter_dotnet_table(pe, dnfile.mdtable.MethodSemantics.number):
typedef_row = get_dotnet_property_map(pe, row.Association.row) assert isinstance(method_semantics, dnfile.mdtable.MethodSemanticsRow)
if typedef_row is None:
if method_semantics.Association.row is None:
logger.debug("MethodSemantics[0x%X] Association row is None", rid)
continue
if method_semantics.Method.table is None:
logger.debug("MethodSemantics[0x%X] Method table is None", rid)
continue continue
token = calculate_dotnet_token_value(row.Method.table.number, row.Method.row_index) typedef: Optional[dnfile.mdtable.TypeDefRow] = get_dotnet_property_map(pe, method_semantics.Association.row)
if typedef is None:
logger.debug("MethodSemantics[0x%X] TypeDef is None", rid)
continue
if row.Semantics.msSetter: token: int = calculate_dotnet_token_value(
method_semantics.Method.table.number, method_semantics.Method.row_index
)
access: Optional[str]
if method_semantics.Semantics.msSetter:
access = FeatureAccess.WRITE access = FeatureAccess.WRITE
elif row.Semantics.msGetter: elif method_semantics.Semantics.msGetter:
access = FeatureAccess.READ access = FeatureAccess.READ
else: else:
access = None access = None
yield DnType( yield DnType(
token, token,
typedef_row.TypeName, typedef.TypeName,
access=access, access=access,
namespace=typedef_row.TypeNamespace, namespace=typedef.TypeNamespace,
member=row.Association.row.Name, member=method_semantics.Association.row.Name,
) )
def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[Tuple[int, CilMethodBody]]: def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[Tuple[int, CilMethodBody]]:
"""get managed methods from MethodDef table""" """get managed methods from MethodDef table"""
assert pe.net is not None for (rid, method_def) in iter_dotnet_table(pe, dnfile.mdtable.MethodDef.number):
assert pe.net.mdtables is not None assert isinstance(method_def, dnfile.mdtable.MethodDefRow)
assert pe.net.mdtables.MethodDef is not None
if not hasattr(pe.net.mdtables, "MethodDef"): if not method_def.ImplFlags.miIL or any((method_def.Flags.mdAbstract, method_def.Flags.mdPinvokeImpl)):
return
for (rid, row) in enumerate(pe.net.mdtables.MethodDef):
if not row.ImplFlags.miIL or any((row.Flags.mdAbstract, row.Flags.mdPinvokeImpl)):
# skip methods that do not have a method body # skip methods that do not have a method body
continue continue
body: Optional[CilMethodBody] = read_dotnet_method_body(pe, row) body: Optional[CilMethodBody] = read_dotnet_method_body(pe, method_def)
if body is None: if body is None:
logger.debug("MethodDef[0x%X] method body is None", rid)
continue continue
token: int = calculate_dotnet_token_value(dnfile.enums.MetadataTables.MethodDef.value, rid + 1) token: int = calculate_dotnet_token_value(dnfile.mdtable.MethodDef.number, rid)
yield token, body yield token, body
@@ -299,14 +344,29 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[DnUnmanagedMethod]
ImportName (index into the String heap) ImportName (index into the String heap)
ImportScope (index into the ModuleRef table) ImportScope (index into the ModuleRef table)
""" """
for row in iter_dotnet_table(pe, "ImplMap"): for (rid, impl_map) in iter_dotnet_table(pe, dnfile.mdtable.ImplMap.number):
module: str = row.ImportScope.row.Name assert isinstance(impl_map, dnfile.mdtable.ImplMapRow)
method: str = row.ImportName
module: str
if impl_map.ImportScope.row is None:
logger.debug("ImplMap[0x%X] ImportScope row is None", rid)
module = ""
else:
module = impl_map.ImportScope.row.Name
method: str = impl_map.ImportName
member_forward_table: int
if impl_map.MemberForwarded.table is None:
logger.debug("ImplMap[0x%X] MemberForwarded table is None", rid)
continue
else:
member_forward_table = impl_map.MemberForwarded.table.number
member_forward_row: int = impl_map.MemberForwarded.row_index
# ECMA says "Each row of the ImplMap table associates a row in the MethodDef table (MemberForwarded) with the # ECMA says "Each row of the ImplMap table associates a row in the MethodDef table (MemberForwarded) with the
# name of a routine (ImportName) in some unmanaged DLL (ImportScope)"; so we calculate and map the MemberForwarded # name of a routine (ImportName) in some unmanaged DLL (ImportScope)"; so we calculate and map the MemberForwarded
# MethodDef table token to help us later record native import method calls made from CIL # MethodDef table token to help us later record native import method calls made from CIL
token: int = calculate_dotnet_token_value(row.MemberForwarded.table.number, row.MemberForwarded.row_index) token: int = calculate_dotnet_token_value(member_forward_table, member_forward_row)
# like Kernel32.dll # like Kernel32.dll
if module and "." in module: if module and "." in module:
@@ -320,13 +380,6 @@ def calculate_dotnet_token_value(table: int, rid: int) -> int:
return ((table & 0xFF) << Token.TABLE_SHIFT) | (rid & Token.RID_MASK) return ((table & 0xFF) << Token.TABLE_SHIFT) | (rid & Token.RID_MASK)
def is_dotnet_table_valid(pe: dnfile.dnPE, table_name: str) -> bool:
assert pe.net is not None
assert pe.net.mdtables is not None
return bool(getattr(pe.net.mdtables, table_name, None))
def is_dotnet_mixed_mode(pe: dnfile.dnPE) -> bool: def is_dotnet_mixed_mode(pe: dnfile.dnPE) -> bool:
assert pe.net is not None assert pe.net is not None
assert pe.net.Flags is not None assert pe.net.Flags is not None
@@ -334,12 +387,10 @@ def is_dotnet_mixed_mode(pe: dnfile.dnPE) -> bool:
return not bool(pe.net.Flags.CLR_ILONLY) return not bool(pe.net.Flags.CLR_ILONLY)
def iter_dotnet_table(pe: dnfile.dnPE, name: str) -> Iterator[Any]: def iter_dotnet_table(pe: dnfile.dnPE, table_index: int) -> Iterator[Tuple[int, dnfile.base.MDTableRow]]:
assert pe.net is not None assert pe.net is not None
assert pe.net.mdtables is not None assert pe.net.mdtables is not None
if not is_dotnet_table_valid(pe, name): for (rid, row) in enumerate(pe.net.mdtables.tables.get(table_index, [])):
return # .NET tables are 1-indexed
yield rid + 1, row
for row in getattr(pe.net.mdtables, name):
yield row

View File

@@ -1,5 +1,5 @@
import logging import logging
from typing import Tuple, Iterator from typing import Tuple, Iterator, cast
import dnfile import dnfile
import pefile import pefile
@@ -62,11 +62,15 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple
# namespaces may be referenced multiple times, so we need to filter # namespaces may be referenced multiple times, so we need to filter
namespaces = set() namespaces = set()
for row in iter_dotnet_table(pe, "TypeDef"): for (_, typedef) in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number):
namespaces.add(row.TypeNamespace) # emit internal .NET namespaces
assert isinstance(typedef, dnfile.mdtable.TypeDefRow)
namespaces.add(typedef.TypeNamespace)
for row in iter_dotnet_table(pe, "TypeRef"): for (_, typeref) in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number):
namespaces.add(row.TypeNamespace) # emit external .NET namespaces
assert isinstance(typeref, dnfile.mdtable.TypeRefRow)
namespaces.add(typeref.TypeNamespace)
# namespaces may be empty, discard # namespaces may be empty, discard
namespaces.discard("") namespaces.discard("")
@@ -78,18 +82,19 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple
def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Class, Address]]: def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Class, Address]]:
"""emit class features from TypeRef and TypeDef tables""" """emit class features from TypeRef and TypeDef tables"""
assert pe.net is not None for (rid, typedef) in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number):
assert pe.net.mdtables is not None # emit internal .NET classes
assert pe.net.mdtables.TypeDef is not None assert isinstance(typedef, dnfile.mdtable.TypeDefRow)
assert pe.net.mdtables.TypeRef is not None
for (rid, row) in enumerate(iter_dotnet_table(pe, "TypeDef")): token = calculate_dotnet_token_value(dnfile.mdtable.TypeDef.number, rid)
token = calculate_dotnet_token_value(pe.net.mdtables.TypeDef.number, rid + 1) yield Class(DnType.format_name(typedef.TypeName, namespace=typedef.TypeNamespace)), DNTokenAddress(token)
yield Class(DnType.format_name(row.TypeName, namespace=row.TypeNamespace)), DNTokenAddress(token)
for (rid, row) in enumerate(iter_dotnet_table(pe, "TypeRef")): for (rid, typeref) in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number):
token = calculate_dotnet_token_value(pe.net.mdtables.TypeRef.number, rid + 1) # emit external .NET classes
yield Class(DnType.format_name(row.TypeName, namespace=row.TypeNamespace)), DNTokenAddress(token) assert isinstance(typeref, dnfile.mdtable.TypeRefRow)
token = calculate_dotnet_token_value(dnfile.mdtable.TypeRef.number, rid)
yield Class(DnType.format_name(typeref.TypeName, namespace=typeref.TypeNamespace)), DNTokenAddress(token)
def extract_file_os(**kwargs) -> Iterator[Tuple[OS, Address]]: def extract_file_os(**kwargs) -> Iterator[Tuple[OS, Address]]:

View File

@@ -27,7 +27,7 @@ requirements = [
"pefile==2022.5.30", "pefile==2022.5.30",
"pyelftools==0.29", "pyelftools==0.29",
"dnfile==0.12.0", "dnfile==0.12.0",
"dncil==1.0.1", "dncil==1.0.2",
"pydantic==1.10.2", "pydantic==1.10.2",
] ]