mirror of
https://github.com/mandiant/capa.git
synced 2025-12-22 07:10:29 -08:00
dotnet: address unhandled exceptions through improved type checking (#1230)
* dotnet: bump dncil version * dotnet: check #US stream valid before access * dotnet: use assert statements to guard types
This commit is contained in:
@@ -53,6 +53,7 @@
|
||||
- do not overwrite version in version.py during PyInstaller build #1169 @mr-tz
|
||||
- render: fix vverbose rendering of offsets #1215 @williballenthin
|
||||
- elf: better detect OS via GLIBC ABI version needed and dependencies #1221 @williballenthin
|
||||
- dotnet: address unhandled exceptions with improved type checking #1230 @mike-hunhoff
|
||||
|
||||
### capa explorer IDA Pro plugin
|
||||
- fix: display instruction items #1154 @mr-tz
|
||||
|
||||
@@ -10,7 +10,7 @@ from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from enum import Enum
|
||||
from typing import Any, Tuple, Iterator, Optional
|
||||
from typing import Any, Tuple, Union, Iterator, Optional
|
||||
|
||||
import dnfile
|
||||
from dncil.cil.body import CilMethodBody
|
||||
@@ -140,19 +140,23 @@ def read_dotnet_method_body(pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow) -
|
||||
try:
|
||||
return CilMethodBody(DnfileMethodBodyReader(pe, row))
|
||||
except MethodBodyFormatError as e:
|
||||
logger.warning("failed to parse managed method body @ 0x%08x (%s)" % (row.Rva, e))
|
||||
logger.debug("failed to parse managed method body @ 0x%08x (%s)", row.Rva, e)
|
||||
return None
|
||||
|
||||
|
||||
def read_dotnet_user_string(pe: dnfile.dnPE, token: StringToken) -> Optional[str]:
|
||||
"""read user string from #US stream"""
|
||||
assert pe.net is not None
|
||||
assert pe.net.user_strings is not None
|
||||
|
||||
if pe.net.user_strings is None:
|
||||
# stream may not exist (seen in obfuscated .NET)
|
||||
logger.debug("#US stream does not exist for stream index 0x%08x", token.rid)
|
||||
return None
|
||||
|
||||
try:
|
||||
user_string: Optional[dnfile.stream.UserString] = pe.net.user_strings.get_us(token.rid)
|
||||
except UnicodeDecodeError as e:
|
||||
logger.warning("failed to decode #US stream index 0x%08x (%s)" % (token.rid, e))
|
||||
logger.debug("failed to decode #US stream index 0x%08x (%s)", token.rid, e)
|
||||
return None
|
||||
|
||||
if user_string is None:
|
||||
@@ -175,15 +179,17 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]:
|
||||
TypeName (index into String heap)
|
||||
TypeNamespace (index into String heap)
|
||||
"""
|
||||
assert pe.net is not None
|
||||
assert pe.net.mdtables is not None
|
||||
assert pe.net.mdtables.MemberRef is not None
|
||||
for (rid, member_ref) in iter_dotnet_table(pe, dnfile.mdtable.MemberRef.number):
|
||||
assert isinstance(member_ref, dnfile.mdtable.MemberRefRow)
|
||||
|
||||
for (rid, row) in enumerate(iter_dotnet_table(pe, "MemberRef")):
|
||||
if not isinstance(row.Class.row, dnfile.mdtable.TypeRefRow):
|
||||
if not isinstance(member_ref.Class.row, dnfile.mdtable.TypeRefRow):
|
||||
# only process class imports from TypeRef table
|
||||
continue
|
||||
token: int = calculate_dotnet_token_value(pe.net.mdtables.MemberRef.number, rid + 1)
|
||||
yield DnType(token, row.Class.row.TypeName, namespace=row.Class.row.TypeNamespace, member=row.Name)
|
||||
|
||||
token: int = calculate_dotnet_token_value(dnfile.mdtable.MemberRef.number, rid)
|
||||
yield DnType(
|
||||
token, member_ref.Class.row.TypeName, namespace=member_ref.Class.row.TypeNamespace, member=member_ref.Name
|
||||
)
|
||||
|
||||
|
||||
def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]:
|
||||
@@ -197,22 +203,47 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]:
|
||||
TypeNamespace (index into String heap)
|
||||
MethodList (index into MethodDef table; it marks the first of a continguous run of Methods owned by this Type)
|
||||
"""
|
||||
for row in iter_dotnet_table(pe, "TypeDef"):
|
||||
for index in row.MethodList:
|
||||
token = calculate_dotnet_token_value(index.table.number, index.row_index)
|
||||
yield DnType(token, row.TypeName, namespace=row.TypeNamespace, member=index.row.Name)
|
||||
for (rid, typedef) in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number):
|
||||
assert isinstance(typedef, dnfile.mdtable.TypeDefRow)
|
||||
|
||||
for (idx, method) in enumerate(typedef.MethodList):
|
||||
if method.table is None:
|
||||
logger.debug("TypeDef[0x%X] MethodList[0x%X] table is None", rid, idx)
|
||||
continue
|
||||
if method.row is None:
|
||||
logger.debug("TypeDef[0x%X] MethodList[0x%X] row is None", rid, idx)
|
||||
continue
|
||||
token = calculate_dotnet_token_value(method.table.number, method.row_index)
|
||||
yield DnType(token, typedef.TypeName, namespace=typedef.TypeNamespace, member=method.row.Name)
|
||||
|
||||
|
||||
def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]:
|
||||
"""get fields from TypeDef table"""
|
||||
for row in iter_dotnet_table(pe, "TypeDef"):
|
||||
for index in row.FieldList:
|
||||
token = calculate_dotnet_token_value(index.table.number, index.row_index)
|
||||
yield DnType(token, row.TypeName, namespace=row.TypeNamespace, member=index.row.Name)
|
||||
"""get fields from TypeDef table
|
||||
|
||||
see https://www.ntcore.com/files/dotnetformat.htm
|
||||
|
||||
02 - TypeDef Table
|
||||
Each row represents a class in the current assembly.
|
||||
TypeName (index into String heap)
|
||||
TypeNamespace (index into String heap)
|
||||
FieldList (index into Field table; it marks the first of a continguous run of Fields owned by this Type)
|
||||
"""
|
||||
for (rid, typedef) in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number):
|
||||
assert isinstance(typedef, dnfile.mdtable.TypeDefRow)
|
||||
|
||||
for (idx, field) in enumerate(typedef.FieldList):
|
||||
if field.table is None:
|
||||
logger.debug("TypeDef[0x%X] FieldList[0x%X] table is None", rid, idx)
|
||||
continue
|
||||
if field.row is None:
|
||||
logger.debug("TypeDef[0x%X] FieldList[0x%X] row is None", rid, idx)
|
||||
continue
|
||||
token: int = calculate_dotnet_token_value(field.table.number, field.row_index)
|
||||
yield DnType(token, typedef.TypeName, namespace=typedef.TypeNamespace, member=field.row.Name)
|
||||
|
||||
|
||||
def get_dotnet_property_map(
|
||||
pe: dnfile.dnPE, property_row: dnfile.mdtable.PropertyRow
|
||||
pe: dnfile.dnPE, mapped_property: Union[dnfile.mdtable.PropertyRow, dnfile.mdtable.EventRow]
|
||||
) -> Optional[dnfile.mdtable.TypeDefRow]:
|
||||
"""get property map from PropertyMap table
|
||||
|
||||
@@ -225,10 +256,15 @@ def get_dotnet_property_map(
|
||||
the last row of the Property table
|
||||
the next run of Properties, found by inspecting the PropertyList of the next row in this PropertyMap table
|
||||
"""
|
||||
for row in iter_dotnet_table(pe, "PropertyMap"):
|
||||
for index in row.PropertyList:
|
||||
if index.row.Name == property_row.Name:
|
||||
return row.Parent.row
|
||||
for (rid, property_map) in iter_dotnet_table(pe, dnfile.mdtable.PropertyMap.number):
|
||||
assert isinstance(property_map, dnfile.mdtable.PropertyMapRow)
|
||||
|
||||
for (idx, property_) in enumerate(property_map.PropertyList):
|
||||
if property_.row is None:
|
||||
logger.debug("PropertyMap[0x%X] PropertyList[0x%x] row is None", rid, idx)
|
||||
continue
|
||||
if property_.row.Name == mapped_property.Name:
|
||||
return property_map.Parent.row
|
||||
return None
|
||||
|
||||
|
||||
@@ -243,48 +279,57 @@ def get_dotnet_properties(pe: dnfile.dnPE) -> Iterator[DnType]:
|
||||
Method (index into the MethodDef table)
|
||||
Association (index into the Event or Property table; more precisely, a HasSemantics coded index)
|
||||
"""
|
||||
for row in iter_dotnet_table(pe, "MethodSemantics"):
|
||||
typedef_row = get_dotnet_property_map(pe, row.Association.row)
|
||||
if typedef_row is None:
|
||||
for (rid, method_semantics) in iter_dotnet_table(pe, dnfile.mdtable.MethodSemantics.number):
|
||||
assert isinstance(method_semantics, dnfile.mdtable.MethodSemanticsRow)
|
||||
|
||||
if method_semantics.Association.row is None:
|
||||
logger.debug("MethodSemantics[0x%X] Association row is None", rid)
|
||||
continue
|
||||
if method_semantics.Method.table is None:
|
||||
logger.debug("MethodSemantics[0x%X] Method table is None", rid)
|
||||
continue
|
||||
|
||||
token = calculate_dotnet_token_value(row.Method.table.number, row.Method.row_index)
|
||||
typedef: Optional[dnfile.mdtable.TypeDefRow] = get_dotnet_property_map(pe, method_semantics.Association.row)
|
||||
if typedef is None:
|
||||
logger.debug("MethodSemantics[0x%X] TypeDef is None", rid)
|
||||
continue
|
||||
|
||||
if row.Semantics.msSetter:
|
||||
token: int = calculate_dotnet_token_value(
|
||||
method_semantics.Method.table.number, method_semantics.Method.row_index
|
||||
)
|
||||
|
||||
access: Optional[str]
|
||||
if method_semantics.Semantics.msSetter:
|
||||
access = FeatureAccess.WRITE
|
||||
elif row.Semantics.msGetter:
|
||||
elif method_semantics.Semantics.msGetter:
|
||||
access = FeatureAccess.READ
|
||||
else:
|
||||
access = None
|
||||
|
||||
yield DnType(
|
||||
token,
|
||||
typedef_row.TypeName,
|
||||
typedef.TypeName,
|
||||
access=access,
|
||||
namespace=typedef_row.TypeNamespace,
|
||||
member=row.Association.row.Name,
|
||||
namespace=typedef.TypeNamespace,
|
||||
member=method_semantics.Association.row.Name,
|
||||
)
|
||||
|
||||
|
||||
def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[Tuple[int, CilMethodBody]]:
|
||||
"""get managed methods from MethodDef table"""
|
||||
assert pe.net is not None
|
||||
assert pe.net.mdtables is not None
|
||||
assert pe.net.mdtables.MethodDef is not None
|
||||
for (rid, method_def) in iter_dotnet_table(pe, dnfile.mdtable.MethodDef.number):
|
||||
assert isinstance(method_def, dnfile.mdtable.MethodDefRow)
|
||||
|
||||
if not hasattr(pe.net.mdtables, "MethodDef"):
|
||||
return
|
||||
|
||||
for (rid, row) in enumerate(pe.net.mdtables.MethodDef):
|
||||
if not row.ImplFlags.miIL or any((row.Flags.mdAbstract, row.Flags.mdPinvokeImpl)):
|
||||
if not method_def.ImplFlags.miIL or any((method_def.Flags.mdAbstract, method_def.Flags.mdPinvokeImpl)):
|
||||
# skip methods that do not have a method body
|
||||
continue
|
||||
|
||||
body: Optional[CilMethodBody] = read_dotnet_method_body(pe, row)
|
||||
body: Optional[CilMethodBody] = read_dotnet_method_body(pe, method_def)
|
||||
if body is None:
|
||||
logger.debug("MethodDef[0x%X] method body is None", rid)
|
||||
continue
|
||||
|
||||
token: int = calculate_dotnet_token_value(dnfile.enums.MetadataTables.MethodDef.value, rid + 1)
|
||||
token: int = calculate_dotnet_token_value(dnfile.mdtable.MethodDef.number, rid)
|
||||
yield token, body
|
||||
|
||||
|
||||
@@ -299,14 +344,29 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[DnUnmanagedMethod]
|
||||
ImportName (index into the String heap)
|
||||
ImportScope (index into the ModuleRef table)
|
||||
"""
|
||||
for row in iter_dotnet_table(pe, "ImplMap"):
|
||||
module: str = row.ImportScope.row.Name
|
||||
method: str = row.ImportName
|
||||
for (rid, impl_map) in iter_dotnet_table(pe, dnfile.mdtable.ImplMap.number):
|
||||
assert isinstance(impl_map, dnfile.mdtable.ImplMapRow)
|
||||
|
||||
module: str
|
||||
if impl_map.ImportScope.row is None:
|
||||
logger.debug("ImplMap[0x%X] ImportScope row is None", rid)
|
||||
module = ""
|
||||
else:
|
||||
module = impl_map.ImportScope.row.Name
|
||||
method: str = impl_map.ImportName
|
||||
|
||||
member_forward_table: int
|
||||
if impl_map.MemberForwarded.table is None:
|
||||
logger.debug("ImplMap[0x%X] MemberForwarded table is None", rid)
|
||||
continue
|
||||
else:
|
||||
member_forward_table = impl_map.MemberForwarded.table.number
|
||||
member_forward_row: int = impl_map.MemberForwarded.row_index
|
||||
|
||||
# ECMA says "Each row of the ImplMap table associates a row in the MethodDef table (MemberForwarded) with the
|
||||
# name of a routine (ImportName) in some unmanaged DLL (ImportScope)"; so we calculate and map the MemberForwarded
|
||||
# MethodDef table token to help us later record native import method calls made from CIL
|
||||
token: int = calculate_dotnet_token_value(row.MemberForwarded.table.number, row.MemberForwarded.row_index)
|
||||
token: int = calculate_dotnet_token_value(member_forward_table, member_forward_row)
|
||||
|
||||
# like Kernel32.dll
|
||||
if module and "." in module:
|
||||
@@ -320,13 +380,6 @@ def calculate_dotnet_token_value(table: int, rid: int) -> int:
|
||||
return ((table & 0xFF) << Token.TABLE_SHIFT) | (rid & Token.RID_MASK)
|
||||
|
||||
|
||||
def is_dotnet_table_valid(pe: dnfile.dnPE, table_name: str) -> bool:
|
||||
assert pe.net is not None
|
||||
assert pe.net.mdtables is not None
|
||||
|
||||
return bool(getattr(pe.net.mdtables, table_name, None))
|
||||
|
||||
|
||||
def is_dotnet_mixed_mode(pe: dnfile.dnPE) -> bool:
|
||||
assert pe.net is not None
|
||||
assert pe.net.Flags is not None
|
||||
@@ -334,12 +387,10 @@ def is_dotnet_mixed_mode(pe: dnfile.dnPE) -> bool:
|
||||
return not bool(pe.net.Flags.CLR_ILONLY)
|
||||
|
||||
|
||||
def iter_dotnet_table(pe: dnfile.dnPE, name: str) -> Iterator[Any]:
|
||||
def iter_dotnet_table(pe: dnfile.dnPE, table_index: int) -> Iterator[Tuple[int, dnfile.base.MDTableRow]]:
|
||||
assert pe.net is not None
|
||||
assert pe.net.mdtables is not None
|
||||
|
||||
if not is_dotnet_table_valid(pe, name):
|
||||
return
|
||||
|
||||
for row in getattr(pe.net.mdtables, name):
|
||||
yield row
|
||||
for (rid, row) in enumerate(pe.net.mdtables.tables.get(table_index, [])):
|
||||
# .NET tables are 1-indexed
|
||||
yield rid + 1, row
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Tuple, Iterator, cast
|
||||
|
||||
import dnfile
|
||||
import pefile
|
||||
@@ -62,11 +62,15 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple
|
||||
# namespaces may be referenced multiple times, so we need to filter
|
||||
namespaces = set()
|
||||
|
||||
for row in iter_dotnet_table(pe, "TypeDef"):
|
||||
namespaces.add(row.TypeNamespace)
|
||||
for (_, typedef) in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number):
|
||||
# emit internal .NET namespaces
|
||||
assert isinstance(typedef, dnfile.mdtable.TypeDefRow)
|
||||
namespaces.add(typedef.TypeNamespace)
|
||||
|
||||
for row in iter_dotnet_table(pe, "TypeRef"):
|
||||
namespaces.add(row.TypeNamespace)
|
||||
for (_, typeref) in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number):
|
||||
# emit external .NET namespaces
|
||||
assert isinstance(typeref, dnfile.mdtable.TypeRefRow)
|
||||
namespaces.add(typeref.TypeNamespace)
|
||||
|
||||
# namespaces may be empty, discard
|
||||
namespaces.discard("")
|
||||
@@ -78,18 +82,19 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple
|
||||
|
||||
def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Class, Address]]:
|
||||
"""emit class features from TypeRef and TypeDef tables"""
|
||||
assert pe.net is not None
|
||||
assert pe.net.mdtables is not None
|
||||
assert pe.net.mdtables.TypeDef is not None
|
||||
assert pe.net.mdtables.TypeRef is not None
|
||||
for (rid, typedef) in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number):
|
||||
# emit internal .NET classes
|
||||
assert isinstance(typedef, dnfile.mdtable.TypeDefRow)
|
||||
|
||||
for (rid, row) in enumerate(iter_dotnet_table(pe, "TypeDef")):
|
||||
token = calculate_dotnet_token_value(pe.net.mdtables.TypeDef.number, rid + 1)
|
||||
yield Class(DnType.format_name(row.TypeName, namespace=row.TypeNamespace)), DNTokenAddress(token)
|
||||
token = calculate_dotnet_token_value(dnfile.mdtable.TypeDef.number, rid)
|
||||
yield Class(DnType.format_name(typedef.TypeName, namespace=typedef.TypeNamespace)), DNTokenAddress(token)
|
||||
|
||||
for (rid, row) in enumerate(iter_dotnet_table(pe, "TypeRef")):
|
||||
token = calculate_dotnet_token_value(pe.net.mdtables.TypeRef.number, rid + 1)
|
||||
yield Class(DnType.format_name(row.TypeName, namespace=row.TypeNamespace)), DNTokenAddress(token)
|
||||
for (rid, typeref) in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number):
|
||||
# emit external .NET classes
|
||||
assert isinstance(typeref, dnfile.mdtable.TypeRefRow)
|
||||
|
||||
token = calculate_dotnet_token_value(dnfile.mdtable.TypeRef.number, rid)
|
||||
yield Class(DnType.format_name(typeref.TypeName, namespace=typeref.TypeNamespace)), DNTokenAddress(token)
|
||||
|
||||
|
||||
def extract_file_os(**kwargs) -> Iterator[Tuple[OS, Address]]:
|
||||
|
||||
Reference in New Issue
Block a user