dotnet: emit API features for generic methods (#1231)

* dotnet: emit API features for generic methods

* dotnet: improve type checking

* dotnet: emit namespace/class features for generic methods

* dotnet: update for dnfile 0.13.0

* dotnet: refactor property extraction
This commit is contained in:
Mike Hunhoff
2022-12-19 14:45:21 -07:00
committed by GitHub
parent fa3d658f33
commit e0491097b0
4 changed files with 241 additions and 243 deletions

View File

@@ -4,9 +4,9 @@
### New Features ### New Features
- verify rule metadata format on load #1160 @mr-tz - verify rule metadata format on load #1160 @mr-tz
- extract property features from .NET PE files #1168 @anushkavirgaonkar - dotnet: emit property features #1168 @anushkavirgaonkar
- emit features for .NET newobj instruction #1186 @mike-hunhoff - dotnet: emit API features for objects created via the newobj instruction #1186 @mike-hunhoff
- fix import-to-ida script formatting #1208 @williballenthin - dotnet: emit API features for generic methods #1231 @mike-hunhoff
- Python 3.11 support #1192 @williballenthin - Python 3.11 support #1192 @williballenthin
### Breaking Changes ### Breaking Changes
@@ -54,6 +54,7 @@
- render: fix vverbose rendering of offsets #1215 @williballenthin - render: fix vverbose rendering of offsets #1215 @williballenthin
- elf: better detect OS via GLIBC ABI version needed and dependencies #1221 @williballenthin - elf: better detect OS via GLIBC ABI version needed and dependencies #1221 @williballenthin
- dotnet: address unhandled exceptions with improved type checking #1230 @mike-hunhoff - dotnet: address unhandled exceptions with improved type checking #1230 @mike-hunhoff
- fix import-to-ida script formatting #1208 @williballenthin
### capa explorer IDA Pro plugin ### capa explorer IDA Pro plugin
- fix: display instruction items #1154 @mr-tz - fix: display instruction items #1154 @mr-tz

View File

@@ -9,8 +9,7 @@
from __future__ import annotations from __future__ import annotations
import logging import logging
from enum import Enum from typing import Dict, Tuple, Union, Iterator, Optional
from typing import Any, Tuple, Union, Iterator, Optional
import dnfile import dnfile
from dncil.cil.body import CilMethodBody from dncil.cil.body import CilMethodBody
@@ -22,9 +21,6 @@ from capa.features.common import FeatureAccess
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# key indexes to dotnet metadata tables
DOTNET_META_TABLES_BY_INDEX = {table.value: table.name for table in dnfile.enums.MetadataTables}
class DnfileMethodBodyReader(CilMethodBodyReaderBase): class DnfileMethodBodyReader(CilMethodBodyReaderBase):
def __init__(self, pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow): def __init__(self, pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow):
@@ -47,6 +43,7 @@ class DnfileMethodBodyReader(CilMethodBodyReaderBase):
class DnType(object): class DnType(object):
def __init__(self, token: int, class_: str, namespace: str = "", member: str = "", access: Optional[str] = None): def __init__(self, token: int, class_: str, namespace: str = "", member: str = "", access: Optional[str] = None):
self.token = token self.token = token
# property access
self.access = access self.access = access
self.namespace = namespace self.namespace = namespace
self.class_ = class_ self.class_ = class_
@@ -107,7 +104,7 @@ class DnUnmanagedMethod:
return f"{module}.{method}" return f"{module}.{method}"
def resolve_dotnet_token(pe: dnfile.dnPE, token: Token) -> Any: def resolve_dotnet_token(pe: dnfile.dnPE, token: Token) -> Union[dnfile.base.MDTableRow, InvalidToken, str]:
"""map generic token to string or table row""" """map generic token to string or table row"""
assert pe.net is not None assert pe.net is not None
assert pe.net.mdtables is not None assert pe.net.mdtables is not None
@@ -118,14 +115,9 @@ def resolve_dotnet_token(pe: dnfile.dnPE, token: Token) -> Any:
return InvalidToken(token.value) return InvalidToken(token.value)
return user_string return user_string
table_name: str = DOTNET_META_TABLES_BY_INDEX.get(token.table, "") table: Optional[dnfile.base.ClrMetaDataTable] = pe.net.mdtables.tables.get(token.table, None)
if not table_name:
# table_index is not valid
return InvalidToken(token.value)
table: Any = getattr(pe.net.mdtables, table_name, None)
if table is None: if table is None:
# table index is valid but table is not present # table index is not valid
return InvalidToken(token.value) return InvalidToken(token.value)
try: try:
@@ -187,11 +179,67 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]:
continue continue
token: int = calculate_dotnet_token_value(dnfile.mdtable.MemberRef.number, rid) token: int = calculate_dotnet_token_value(dnfile.mdtable.MemberRef.number, rid)
access: Optional[str]
# assume .NET imports starting with get_/set_ are used to access a property
if member_ref.Name.startswith("get_"):
access = FeatureAccess.READ
elif member_ref.Name.startswith("set_"):
access = FeatureAccess.WRITE
else:
access = None
member_ref_name: str = member_ref.Name
if member_ref_name.startswith(("get_", "set_")):
# remove get_/set_ from MemberRef name
member_ref_name = member_ref_name[4:]
yield DnType( yield DnType(
token, member_ref.Class.row.TypeName, namespace=member_ref.Class.row.TypeNamespace, member=member_ref.Name token,
member_ref.Class.row.TypeName,
namespace=member_ref.Class.row.TypeNamespace,
member=member_ref_name,
access=access,
) )
def get_dotnet_methoddef_property_accessors(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]:
"""get MethodDef methods used to access properties
see https://www.ntcore.com/files/dotnetformat.htm
24 - MethodSemantics Table
Links Events and Properties to specific methods. For example one Event can be associated to more methods. A property uses this table to associate get/set methods.
Semantics (a 2-byte bitmask of type MethodSemanticsAttributes)
Method (index into the MethodDef table)
Association (index into the Event or Property table; more precisely, a HasSemantics coded index)
"""
for (rid, method_semantics) in iter_dotnet_table(pe, dnfile.mdtable.MethodSemantics.number):
assert isinstance(method_semantics, dnfile.mdtable.MethodSemanticsRow)
if method_semantics.Association.row is None:
logger.debug("MethodSemantics[0x%X] Association row is None", rid)
continue
if isinstance(method_semantics.Association.row, dnfile.mdtable.EventRow):
# ignore events
logger.debug("MethodSemantics[0x%X] ignoring Event", rid)
continue
if method_semantics.Method.table is None:
logger.debug("MethodSemantics[0x%X] Method table is None", rid)
continue
token: int = calculate_dotnet_token_value(
method_semantics.Method.table.number, method_semantics.Method.row_index
)
if method_semantics.Semantics.msSetter:
yield token, FeatureAccess.WRITE
elif method_semantics.Semantics.msGetter:
yield token, FeatureAccess.READ
def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]: def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]:
"""get managed method names from TypeDef table """get managed method names from TypeDef table
@@ -203,6 +251,10 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]:
TypeNamespace (index into String heap) TypeNamespace (index into String heap)
MethodList (index into MethodDef table; it marks the first of a continguous run of Methods owned by this Type) MethodList (index into MethodDef table; it marks the first of a continguous run of Methods owned by this Type)
""" """
accessor_map: Dict[int, str] = {}
for (methoddef, methoddef_access) in get_dotnet_methoddef_property_accessors(pe):
accessor_map[methoddef] = methoddef_access
for (rid, typedef) in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): for (rid, typedef) in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number):
assert isinstance(typedef, dnfile.mdtable.TypeDefRow) assert isinstance(typedef, dnfile.mdtable.TypeDefRow)
@@ -213,8 +265,16 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]:
if method.row is None: if method.row is None:
logger.debug("TypeDef[0x%X] MethodList[0x%X] row is None", rid, idx) logger.debug("TypeDef[0x%X] MethodList[0x%X] row is None", rid, idx)
continue continue
token = calculate_dotnet_token_value(method.table.number, method.row_index)
yield DnType(token, typedef.TypeName, namespace=typedef.TypeNamespace, member=method.row.Name) token: int = calculate_dotnet_token_value(method.table.number, method.row_index)
access: Optional[str] = accessor_map.get(token, None)
method_name: str = method.row.Name
if method_name.startswith(("get_", "set_")):
# remove get_/set_
method_name = method_name[4:]
yield DnType(token, typedef.TypeName, namespace=typedef.TypeNamespace, member=method_name, access=access)
def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]: def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]:
@@ -242,79 +302,6 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]:
yield DnType(token, typedef.TypeName, namespace=typedef.TypeNamespace, member=field.row.Name) yield DnType(token, typedef.TypeName, namespace=typedef.TypeNamespace, member=field.row.Name)
def get_dotnet_property_map(
pe: dnfile.dnPE, mapped_property: Union[dnfile.mdtable.PropertyRow, dnfile.mdtable.EventRow]
) -> Optional[dnfile.mdtable.TypeDefRow]:
"""get property map from PropertyMap table
see https://www.ntcore.com/files/dotnetformat.htm
21 - PropertyMap Table
List of Properties owned by a specific class.
Parent (index into the TypeDef table)
PropertyList (index into Property table). It marks the first of a contiguous run of Properties owned by Parent. The run continues to the smaller of:
the last row of the Property table
the next run of Properties, found by inspecting the PropertyList of the next row in this PropertyMap table
"""
for (rid, property_map) in iter_dotnet_table(pe, dnfile.mdtable.PropertyMap.number):
assert isinstance(property_map, dnfile.mdtable.PropertyMapRow)
for (idx, property_) in enumerate(property_map.PropertyList):
if property_.row is None:
logger.debug("PropertyMap[0x%X] PropertyList[0x%x] row is None", rid, idx)
continue
if property_.row.Name == mapped_property.Name:
return property_map.Parent.row
return None
def get_dotnet_properties(pe: dnfile.dnPE) -> Iterator[DnType]:
"""get property from MethodSemantics table
see https://www.ntcore.com/files/dotnetformat.htm
24 - MethodSemantics Table
Links Events and Properties to specific methods. For example one Event can be associated to more methods. A property uses this table to associate get/set methods.
Semantics (a 2-byte bitmask of type MethodSemanticsAttributes)
Method (index into the MethodDef table)
Association (index into the Event or Property table; more precisely, a HasSemantics coded index)
"""
for (rid, method_semantics) in iter_dotnet_table(pe, dnfile.mdtable.MethodSemantics.number):
assert isinstance(method_semantics, dnfile.mdtable.MethodSemanticsRow)
if method_semantics.Association.row is None:
logger.debug("MethodSemantics[0x%X] Association row is None", rid)
continue
if method_semantics.Method.table is None:
logger.debug("MethodSemantics[0x%X] Method table is None", rid)
continue
typedef: Optional[dnfile.mdtable.TypeDefRow] = get_dotnet_property_map(pe, method_semantics.Association.row)
if typedef is None:
logger.debug("MethodSemantics[0x%X] TypeDef is None", rid)
continue
token: int = calculate_dotnet_token_value(
method_semantics.Method.table.number, method_semantics.Method.row_index
)
access: Optional[str]
if method_semantics.Semantics.msSetter:
access = FeatureAccess.WRITE
elif method_semantics.Semantics.msGetter:
access = FeatureAccess.READ
else:
access = None
yield DnType(
token,
typedef.TypeName,
access=access,
namespace=typedef.TypeNamespace,
member=method_semantics.Association.row.Name,
)
def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[Tuple[int, CilMethodBody]]: def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[Tuple[int, CilMethodBody]]:
"""get managed methods from MethodDef table""" """get managed methods from MethodDef table"""
for (rid, method_def) in iter_dotnet_table(pe, dnfile.mdtable.MethodDef.number): for (rid, method_def) in iter_dotnet_table(pe, dnfile.mdtable.MethodDef.number):

View File

@@ -8,13 +8,12 @@
from __future__ import annotations from __future__ import annotations
from typing import Any, Dict, Tuple, Union, Iterator, Optional import logging
from typing import Dict, Tuple, Union, Iterator, Optional
import dnfile import dnfile
from dncil.cil.body import CilMethodBody
from dncil.clr.token import Token, StringToken, InvalidToken from dncil.clr.token import Token, StringToken, InvalidToken
from dncil.cil.opcode import OpCodes from dncil.cil.opcode import OpCodes
from dncil.cil.instruction import Instruction
import capa.features.extractors.helpers import capa.features.extractors.helpers
from capa.features.insn import API, Number, Property from capa.features.insn import API, Number, Property
@@ -26,16 +25,14 @@ from capa.features.extractors.dnfile.helpers import (
DnUnmanagedMethod, DnUnmanagedMethod,
get_dotnet_fields, get_dotnet_fields,
resolve_dotnet_token, resolve_dotnet_token,
get_dotnet_properties,
read_dotnet_user_string, read_dotnet_user_string,
get_dotnet_managed_imports, get_dotnet_managed_imports,
get_dotnet_managed_methods, get_dotnet_managed_methods,
calculate_dotnet_token_value,
get_dotnet_unmanaged_imports, get_dotnet_unmanaged_imports,
) )
METHODDEF_TABLE = dnfile.mdtable.MethodDef.number logger = logging.getLogger(__name__)
MEMBERREF_TABLE = dnfile.mdtable.MemberRef.number
FIELD_TABLE = dnfile.mdtable.Field.number
def get_managed_imports(ctx: Dict) -> Dict: def get_managed_imports(ctx: Dict) -> Dict:
@@ -62,26 +59,6 @@ def get_methods(ctx: Dict) -> Dict:
return ctx["methods_cache"] return ctx["methods_cache"]
def get_callee(ctx: Dict, token: int) -> Union[DnType, DnUnmanagedMethod, None]:
"""map dotnet token to un/managed method"""
callee: Union[DnType, DnUnmanagedMethod, None] = get_managed_imports(ctx).get(token, None)
if callee is None:
# we must check unmanaged imports before managed methods because we map forwarded managed methods
# to their unmanaged imports; we prefer a forwarded managed method be mapped to its unmanaged import for analysis
callee = get_unmanaged_imports(ctx).get(token, None)
if callee is None:
callee = get_methods(ctx).get(token, None)
return callee
def get_properties(ctx: Dict) -> Dict:
if "properties_cache" not in ctx:
ctx["properties_cache"] = {}
for prop in get_dotnet_properties(ctx["pe"]):
ctx["properties_cache"][prop.token] = prop
return ctx["properties_cache"]
def get_fields(ctx: Dict) -> Dict: def get_fields(ctx: Dict) -> Dict:
if "fields_cache" not in ctx: if "fields_cache" not in ctx:
ctx["fields_cache"] = {} ctx["fields_cache"] = {}
@@ -90,31 +67,45 @@ def get_fields(ctx: Dict) -> Dict:
return ctx["fields_cache"] return ctx["fields_cache"]
def get_callee(ctx: Dict, token: Token) -> Union[DnType, DnUnmanagedMethod, None]:
"""map .NET token to un/managed (generic) method"""
row: Union[dnfile.base.MDTableRow, InvalidToken, str] = resolve_dotnet_token(ctx["pe"], token)
if not isinstance(row, (dnfile.mdtable.MethodDefRow, dnfile.mdtable.MemberRefRow, dnfile.mdtable.MethodSpecRow)):
# we only handle MethodDef (internal), MemberRef (external), and MethodSpec (generic)
return None
token_: int
if isinstance(row, dnfile.mdtable.MethodSpecRow):
# map MethodSpec to MethodDef or MemberRef
if row.Method.table is None:
logger.debug("MethodSpec[0x%X] Method table is None", token.rid)
return None
token_ = calculate_dotnet_token_value(row.Method.table.number, row.Method.row_index)
else:
token_ = token.value
callee: Union[DnType, DnUnmanagedMethod, None] = get_managed_imports(ctx).get(token_, None)
if callee is None:
# we must check unmanaged imports before managed methods because we map forwarded managed methods
# to their unmanaged imports; we prefer a forwarded managed method be mapped to its unmanaged import for analysis
callee = get_unmanaged_imports(ctx).get(token_, None)
if callee is None:
callee = get_methods(ctx).get(token_, None)
return callee
def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
"""parse instruction API features""" """parse instruction API features"""
insn: Instruction = ih.inner if ih.inner.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli, OpCodes.Newobj):
if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli, OpCodes.Newobj):
return
callee: Union[DnType, DnUnmanagedMethod, None] = get_callee(fh.ctx, insn.operand.value)
if callee is None:
return return
callee: Union[DnType, DnUnmanagedMethod, None] = get_callee(fh.ctx, ih.inner.operand)
if isinstance(callee, DnType): if isinstance(callee, DnType):
if callee.member.startswith(("get_", "set_")): # ignore methods used to access properties
if insn.operand.table == METHODDEF_TABLE: if callee.access is None:
# check if the method belongs to the MethodDef table and whether it is used to access a property # like System.IO.File::Delete
if get_properties(fh.ctx).get(insn.operand.value, None) is not None: yield API(str(callee)), ih.address
return elif isinstance(callee, DnUnmanagedMethod):
elif insn.operand.table == MEMBERREF_TABLE:
# if the method belongs to the MemberRef table, we assume it is used to access a property
return
# like System.IO.File::Delete
yield API(str(callee)), ih.address
else:
# like kernel32.CreateFileA # like kernel32.CreateFileA
for name in capa.features.extractors.helpers.generate_symbols(callee.module, callee.method): for name in capa.features.extractors.helpers.generate_symbols(callee.module, callee.method):
yield API(name), ih.address yield API(name), ih.address
@@ -122,52 +113,30 @@ def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterato
def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
"""parse instruction property features""" """parse instruction property features"""
insn: Instruction = ih.inner
name: Optional[str] = None name: Optional[str] = None
access: Optional[str] = None access: Optional[str] = None
if insn.opcode in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli): if ih.inner.opcode in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli):
if insn.operand.table == METHODDEF_TABLE: # property access via MethodDef or MemberRef
# check if the method belongs to the MethodDef table and whether it is used to access a property callee: Union[DnType, DnUnmanagedMethod, None] = get_callee(fh.ctx, ih.inner.operand)
prop = get_properties(fh.ctx).get(insn.operand.value, None) if isinstance(callee, DnType):
if prop is not None: if callee.access is not None:
name = str(prop) name = str(callee)
access = prop.access access = callee.access
elif insn.operand.table == MEMBERREF_TABLE: elif ih.inner.opcode in (OpCodes.Ldfld, OpCodes.Ldflda, OpCodes.Ldsfld, OpCodes.Ldsflda):
# if the method belongs to the MemberRef table, we assume it is used to access a property # property read via Field
row: Any = resolve_dotnet_token(fh.ctx["pe"], insn.operand) read_field: Optional[DnType] = get_fields(fh.ctx).get(ih.inner.operand.value, None)
if row is None: if read_field is not None:
return name = str(read_field)
if not isinstance(row.Class.row, (dnfile.mdtable.TypeRefRow, dnfile.mdtable.TypeDefRow)): access = FeatureAccess.READ
return
if not row.Name.startswith(("get_", "set_")):
return
name = DnType.format_name( elif ih.inner.opcode in (OpCodes.Stfld, OpCodes.Stsfld):
row.Class.row.TypeName, namespace=row.Class.row.TypeNamespace, member=row.Name[4:] # property write via Field
) write_field: Optional[DnType] = get_fields(fh.ctx).get(ih.inner.operand.value, None)
if row.Name.startswith("get_"): if write_field is not None:
access = FeatureAccess.READ name = str(write_field)
elif row.Name.startswith("set_"): access = FeatureAccess.WRITE
access = FeatureAccess.WRITE
elif insn.opcode in (OpCodes.Ldfld, OpCodes.Ldflda, OpCodes.Ldsfld, OpCodes.Ldsflda):
if insn.operand.table == FIELD_TABLE:
# determine whether the operand is a field by checking if it belongs to the Field table
read_field: Optional[DnType] = get_fields(fh.ctx).get(insn.operand.value, None)
if read_field:
name = str(read_field)
access = FeatureAccess.READ
elif insn.opcode in (OpCodes.Stfld, OpCodes.Stsfld):
if insn.operand.table == FIELD_TABLE:
# determine whether the operand is a field by checking if it belongs to the Field table
write_field: Optional[DnType] = get_fields(fh.ctx).get(insn.operand.value, None)
if write_field:
name = str(write_field)
access = FeatureAccess.WRITE
if name is not None: if name is not None:
if access is not None: if access is not None:
@@ -177,92 +146,74 @@ def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> It
def extract_insn_class_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Class, Address]]: def extract_insn_class_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Class, Address]]:
"""parse instruction class features""" """parse instruction class features"""
if ih.inner.opcode not in ( if ih.inner.opcode in (
OpCodes.Call, OpCodes.Call,
OpCodes.Callvirt, OpCodes.Callvirt,
OpCodes.Jmp, OpCodes.Jmp,
OpCodes.Calli, OpCodes.Calli,
OpCodes.Newobj,
):
# method call - includes managed methods (MethodDef, TypeRef) and properties (MethodSemantics, TypeRef)
callee: Union[DnType, DnUnmanagedMethod, None] = get_callee(fh.ctx, ih.inner.operand)
if isinstance(callee, DnType):
yield Class(DnType.format_name(callee.class_, namespace=callee.namespace)), ih.address
elif ih.inner.opcode in (
OpCodes.Ldfld, OpCodes.Ldfld,
OpCodes.Ldflda, OpCodes.Ldflda,
OpCodes.Ldsfld, OpCodes.Ldsfld,
OpCodes.Ldsflda, OpCodes.Ldsflda,
OpCodes.Stfld, OpCodes.Stfld,
OpCodes.Stsfld, OpCodes.Stsfld,
OpCodes.Newobj,
): ):
return # field access
row: Any = resolve_dotnet_token(fh.ctx["pe"], ih.inner.operand)
if isinstance(row, dnfile.mdtable.MemberRefRow):
if isinstance(row.Class.row, (dnfile.mdtable.TypeRefRow, dnfile.mdtable.TypeDefRow)):
yield Class(DnType.format_name(row.Class.row.TypeName, namespace=row.Class.row.TypeNamespace)), ih.address
elif isinstance(row, dnfile.mdtable.MethodDefRow):
callee: Union[DnType, DnUnmanagedMethod, None] = get_callee(fh.ctx, ih.inner.operand.value)
if isinstance(callee, DnType):
yield Class(DnType.format_name(callee.class_, namespace=callee.namespace)), ih.address
elif isinstance(row, dnfile.mdtable.FieldRow):
field: Optional[DnType] = get_fields(fh.ctx).get(ih.inner.operand.value, None) field: Optional[DnType] = get_fields(fh.ctx).get(ih.inner.operand.value, None)
if field is not None: if isinstance(field, DnType):
yield Class(DnType.format_name(field.class_, namespace=field.namespace)), ih.address yield Class(DnType.format_name(field.class_, namespace=field.namespace)), ih.address
def extract_insn_namespace_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Namespace, Address]]: def extract_insn_namespace_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Namespace, Address]]:
"""parse instruction namespace features""" """parse instruction namespace features"""
if ih.inner.opcode not in ( if ih.inner.opcode in (
OpCodes.Call, OpCodes.Call,
OpCodes.Callvirt, OpCodes.Callvirt,
OpCodes.Jmp, OpCodes.Jmp,
OpCodes.Calli, OpCodes.Calli,
OpCodes.Newobj,
):
# method call - includes managed methods (MethodDef, TypeRef) and properties (MethodSemantics, TypeRef)
callee: Union[DnType, DnUnmanagedMethod, None] = get_callee(fh.ctx, ih.inner.operand)
if isinstance(callee, DnType) and callee.namespace is not None:
yield Namespace(callee.namespace), ih.address
elif ih.inner.opcode in (
OpCodes.Ldfld, OpCodes.Ldfld,
OpCodes.Ldflda, OpCodes.Ldflda,
OpCodes.Ldsfld, OpCodes.Ldsfld,
OpCodes.Ldsflda, OpCodes.Ldsflda,
OpCodes.Stfld, OpCodes.Stfld,
OpCodes.Stsfld, OpCodes.Stsfld,
OpCodes.Newobj,
): ):
return
row: Any = resolve_dotnet_token(fh.ctx["pe"], Token(ih.inner.operand.value))
if isinstance(row, dnfile.mdtable.MemberRefRow):
if isinstance(row.Class.row, (dnfile.mdtable.TypeRefRow, dnfile.mdtable.TypeDefRow)):
if row.Class.row.TypeNamespace:
yield Namespace(row.Class.row.TypeNamespace), ih.address
elif isinstance(row, dnfile.mdtable.MethodDefRow):
callee: Union[DnType, DnUnmanagedMethod, None] = get_callee(fh.ctx, ih.inner.operand.value)
if isinstance(callee, DnType) and callee.namespace is not None:
yield Namespace(callee.namespace), ih.address
elif isinstance(row, dnfile.mdtable.FieldRow):
field: Optional[DnType] = get_fields(fh.ctx).get(ih.inner.operand.value, None) field: Optional[DnType] = get_fields(fh.ctx).get(ih.inner.operand.value, None)
if field is not None: if isinstance(field, DnType) and field.namespace is not None:
yield Namespace(field.namespace), ih.address yield Namespace(field.namespace), ih.address
def extract_insn_number_features(fh, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: def extract_insn_number_features(fh, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
"""parse instruction number features""" """parse instruction number features"""
insn: Instruction = ih.inner if ih.inner.is_ldc():
yield Number(ih.inner.get_ldc()), ih.address
if insn.is_ldc():
yield Number(insn.get_ldc()), ih.address
def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
"""parse instruction string features""" """parse instruction string features"""
f: CilMethodBody = fh.inner if not ih.inner.is_ldstr():
insn: Instruction = ih.inner
if not insn.is_ldstr():
return return
if not isinstance(insn.operand, StringToken): if not isinstance(ih.inner.operand, StringToken):
return return
user_string: Optional[str] = read_dotnet_user_string(fh.ctx["pe"], insn.operand) user_string: Optional[str] = read_dotnet_user_string(fh.ctx["pe"], ih.inner.operand)
if user_string is None: if user_string is None:
return return
@@ -272,17 +223,14 @@ def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iter
def extract_unmanaged_call_characteristic_features( def extract_unmanaged_call_characteristic_features(
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
) -> Iterator[Tuple[Characteristic, Address]]: ) -> Iterator[Tuple[Characteristic, Address]]:
insn: Instruction = ih.inner if ih.inner.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli):
if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli):
return return
token: Any = resolve_dotnet_token(fh.ctx["pe"], insn.operand) row: Union[str, InvalidToken, dnfile.base.MDTableRow] = resolve_dotnet_token(fh.ctx["pe"], ih.inner.operand)
if isinstance(token, InvalidToken): if not isinstance(row, dnfile.mdtable.MethodDefRow):
return
if not isinstance(token, dnfile.mdtable.MethodDefRow):
return return
if any((token.Flags.mdPinvokeImpl, token.ImplFlags.miUnmanaged, token.ImplFlags.miNative)): if any((row.Flags.mdPinvokeImpl, row.ImplFlags.miUnmanaged, row.ImplFlags.miNative)):
yield Characteristic("unmanaged call"), ih.address yield Characteristic("unmanaged call"), ih.address

View File

@@ -278,6 +278,10 @@ def get_data_path_by_name(name):
return os.path.join(DNFILE_TESTFILES, "hello-world", "hello-world.exe") return os.path.join(DNFILE_TESTFILES, "hello-world", "hello-world.exe")
elif name.startswith("_1c444"): elif name.startswith("_1c444"):
return os.path.join(CD, "data", "dotnet", "1c444ebeba24dcba8628b7dfe5fec7c6.exe_") return os.path.join(CD, "data", "dotnet", "1c444ebeba24dcba8628b7dfe5fec7c6.exe_")
elif name.startswith("_387f15"):
return os.path.join(
CD, "data", "dotnet", "387f15043f0198fd3a637b0758c2b6dde9ead795c3ed70803426fc355731b173.dll_"
)
elif name.startswith("_692f"): elif name.startswith("_692f"):
return os.path.join(CD, "data", "dotnet", "692f7fd6d198e804d6af98eb9e390d61.exe_") return os.path.join(CD, "data", "dotnet", "692f7fd6d198e804d6af98eb9e390d61.exe_")
elif name.startswith("_0953c"): elif name.startswith("_0953c"):
@@ -751,6 +755,9 @@ FEATURE_PRESENCE_TESTS_DOTNET = sorted(
("_1c444", "function=0x1F68", capa.features.insn.Number(0xCC0020), True), ("_1c444", "function=0x1F68", capa.features.insn.Number(0xCC0020), True),
("_1c444", "function=0x1F68", capa.features.insn.Number(0x0), True), ("_1c444", "function=0x1F68", capa.features.insn.Number(0x0), True),
("_1c444", "function=0x1F68", capa.features.insn.Number(0x1), False), ("_1c444", "function=0x1F68", capa.features.insn.Number(0x1), False),
("_692f", "token=0x6000004", capa.features.insn.API("System.Linq.Enumerable::First"), True), # generic method
("_692f", "token=0x6000004", capa.features.common.Namespace("System.Linq"), True), # generic method
("_692f", "token=0x6000004", capa.features.common.Class("System.Linq.Enumerable"), True), # generic method
( (
"_1c444", "_1c444",
"function=0x1F59, bb=0x1F59, insn=0x1F5B", "function=0x1F59, bb=0x1F59, insn=0x1F5B",
@@ -772,25 +779,25 @@ FEATURE_PRESENCE_TESTS_DOTNET = sorted(
"token=0x600002B", "token=0x600002B",
capa.features.insn.Property("System.IO.FileInfo::Length", access=FeatureAccess.READ), capa.features.insn.Property("System.IO.FileInfo::Length", access=FeatureAccess.READ),
True, True,
), # MemberRef method ), # MemberRef property access
( (
"_1c444", "_1c444",
"token=0x600002B", "token=0x600002B",
capa.features.insn.Property("System.IO.FileInfo::Length"), capa.features.insn.Property("System.IO.FileInfo::Length"),
True, True,
), # MemberRef method ), # MemberRef property access
( (
"_1c444", "_1c444",
"token=0x6000081", "token=0x6000081",
capa.features.insn.API("System.Diagnostics.Process::Start"), capa.features.insn.API("System.Diagnostics.Process::Start"),
True, True,
), # MemberRef method ), # MemberRef property access
( (
"_1c444", "_1c444",
"token=0x6000081", "token=0x6000081",
capa.features.insn.Property( capa.features.insn.Property(
"System.Diagnostics.ProcessStartInfo::UseShellExecute", access=FeatureAccess.WRITE "System.Diagnostics.ProcessStartInfo::UseShellExecute", access=FeatureAccess.WRITE
), # MemberRef method ), # MemberRef property access
True, True,
), ),
( (
@@ -798,7 +805,7 @@ FEATURE_PRESENCE_TESTS_DOTNET = sorted(
"token=0x6000081", "token=0x6000081",
capa.features.insn.Property( capa.features.insn.Property(
"System.Diagnostics.ProcessStartInfo::WorkingDirectory", access=FeatureAccess.WRITE "System.Diagnostics.ProcessStartInfo::WorkingDirectory", access=FeatureAccess.WRITE
), # MemberRef method ), # MemberRef property access
True, True,
), ),
( (
@@ -806,41 +813,96 @@ FEATURE_PRESENCE_TESTS_DOTNET = sorted(
"token=0x6000081", "token=0x6000081",
capa.features.insn.Property( capa.features.insn.Property(
"System.Diagnostics.ProcessStartInfo::FileName", access=FeatureAccess.WRITE "System.Diagnostics.ProcessStartInfo::FileName", access=FeatureAccess.WRITE
), # MemberRef method ), # MemberRef property access
True, True,
), ),
( (
"_1c444", "_1c444",
"token=0x6000087", "token=0x6000087",
capa.features.insn.Property("Sockets.MySocket::reConnectionDelay", access=FeatureAccess.WRITE), # Field capa.features.insn.Property(
"Sockets.MySocket::reConnectionDelay", access=FeatureAccess.WRITE
), # Field property access
True, True,
), ),
( (
"_1c444", "_1c444",
"token=0x600008A", "token=0x600008A",
capa.features.insn.Property("Sockets.MySocket::isConnected", access=FeatureAccess.WRITE), # Field capa.features.insn.Property(
"Sockets.MySocket::isConnected", access=FeatureAccess.WRITE
), # Field property access
True, True,
), ),
( (
"_1c444", "_1c444",
"token=0x600008A", "token=0x600008A",
capa.features.insn.Property("Sockets.MySocket::onConnected", access=FeatureAccess.READ), # Field capa.features.common.Class("Sockets.MySocket"), # Field property access
True,
),
(
"_1c444",
"token=0x600008A",
capa.features.common.Namespace("Sockets"), # Field property access
True,
),
(
"_1c444",
"token=0x600008A",
capa.features.insn.Property(
"Sockets.MySocket::onConnected", access=FeatureAccess.READ
), # Field property access
True, True,
), ),
( (
"_0953c", "_0953c",
"token=0x6000004", "token=0x6000004",
capa.features.insn.Property("System.Diagnostics.Debugger::IsAttached", access=FeatureAccess.READ), capa.features.insn.Property(
"System.Diagnostics.Debugger::IsAttached", access=FeatureAccess.READ
), # MemberRef property access
True, True,
), # MemberRef method ),
(
"_0953c",
"token=0x6000004",
capa.features.common.Class("System.Diagnostics.Debugger"), # MemberRef property access
True,
),
(
"_0953c",
"token=0x6000004",
capa.features.common.Namespace("System.Diagnostics"), # MemberRef property access
True,
),
( (
"_692f", "_692f",
"token=0x6000006", "token=0x6000006",
capa.features.insn.Property( capa.features.insn.Property(
"System.Management.Automation.PowerShell::Streams", access=FeatureAccess.READ "System.Management.Automation.PowerShell::Streams", access=FeatureAccess.READ
), # MemberRef method ), # MemberRef property access
False, False,
), ),
(
"_387f15",
"token=0x600009E",
capa.features.insn.Property(
"Modulo.IqQzcRDvSTulAhyLtZHqyeYGgaXGbuLwhxUKXYmhtnOmgpnPJDTSIPhYPpnE::geoplugin_countryCode",
access=FeatureAccess.READ,
), # MethodDef property access
True,
),
(
"_387f15",
"token=0x600009E",
capa.features.common.Class(
"Modulo.IqQzcRDvSTulAhyLtZHqyeYGgaXGbuLwhxUKXYmhtnOmgpnPJDTSIPhYPpnE"
), # MethodDef property access
True,
),
(
"_387f15",
"token=0x600009E",
capa.features.common.Namespace("Modulo"), # MethodDef property access
True,
),
( (
"_039a6", "_039a6",
"token=0x6000007", "token=0x6000007",