From e0491097b029289a2add7fe577bb6b8aac166f5b Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Mon, 19 Dec 2022 14:45:21 -0700 Subject: [PATCH] dotnet: emit API features for generic methods (#1231) * dotnet: emit API features for generic methods * dotnet: improve type checking * dotnet: emit namespace/class features for generic methods * dotnet: update for dnfile 0.13.0 * dotnet: refactor property extraction --- CHANGELOG.md | 7 +- capa/features/extractors/dnfile/helpers.py | 165 +++++++-------- capa/features/extractors/dnfile/insn.py | 226 ++++++++------------- tests/fixtures.py | 86 ++++++-- 4 files changed, 241 insertions(+), 243 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 27ce7c67..11f2a995 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,9 +4,9 @@ ### New Features - verify rule metadata format on load #1160 @mr-tz -- extract property features from .NET PE files #1168 @anushkavirgaonkar -- emit features for .NET newobj instruction #1186 @mike-hunhoff -- fix import-to-ida script formatting #1208 @williballenthin +- dotnet: emit property features #1168 @anushkavirgaonkar +- dotnet: emit API features for objects created via the newobj instruction #1186 @mike-hunhoff +- dotnet: emit API features for generic methods #1231 @mike-hunhoff - Python 3.11 support #1192 @williballenthin ### Breaking Changes @@ -54,6 +54,7 @@ - render: fix vverbose rendering of offsets #1215 @williballenthin - elf: better detect OS via GLIBC ABI version needed and dependencies #1221 @williballenthin - dotnet: address unhandled exceptions with improved type checking #1230 @mike-hunhoff +- fix import-to-ida script formatting #1208 @williballenthin ### capa explorer IDA Pro plugin - fix: display instruction items #1154 @mr-tz diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index 27b0c91a..086ad3f8 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -9,8 +9,7 @@ from __future__ import annotations import logging -from enum import Enum -from typing import Any, Tuple, Union, Iterator, Optional +from typing import Dict, Tuple, Union, Iterator, Optional import dnfile from dncil.cil.body import CilMethodBody @@ -22,9 +21,6 @@ from capa.features.common import FeatureAccess logger = logging.getLogger(__name__) -# key indexes to dotnet metadata tables -DOTNET_META_TABLES_BY_INDEX = {table.value: table.name for table in dnfile.enums.MetadataTables} - class DnfileMethodBodyReader(CilMethodBodyReaderBase): def __init__(self, pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow): @@ -47,6 +43,7 @@ class DnfileMethodBodyReader(CilMethodBodyReaderBase): class DnType(object): def __init__(self, token: int, class_: str, namespace: str = "", member: str = "", access: Optional[str] = None): self.token = token + # property access self.access = access self.namespace = namespace self.class_ = class_ @@ -107,7 +104,7 @@ class DnUnmanagedMethod: return f"{module}.{method}" -def resolve_dotnet_token(pe: dnfile.dnPE, token: Token) -> Any: +def resolve_dotnet_token(pe: dnfile.dnPE, token: Token) -> Union[dnfile.base.MDTableRow, InvalidToken, str]: """map generic token to string or table row""" assert pe.net is not None assert pe.net.mdtables is not None @@ -118,14 +115,9 @@ def resolve_dotnet_token(pe: dnfile.dnPE, token: Token) -> Any: return InvalidToken(token.value) return user_string - table_name: str = DOTNET_META_TABLES_BY_INDEX.get(token.table, "") - if not table_name: - # table_index is not valid - return InvalidToken(token.value) - - table: Any = getattr(pe.net.mdtables, table_name, None) + table: Optional[dnfile.base.ClrMetaDataTable] = pe.net.mdtables.tables.get(token.table, None) if table is None: - # table index is valid but table is not present + # table index is not valid return InvalidToken(token.value) try: @@ -187,11 +179,67 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]: continue token: int = calculate_dotnet_token_value(dnfile.mdtable.MemberRef.number, rid) + access: Optional[str] + + # assume .NET imports starting with get_/set_ are used to access a property + if member_ref.Name.startswith("get_"): + access = FeatureAccess.READ + elif member_ref.Name.startswith("set_"): + access = FeatureAccess.WRITE + else: + access = None + + member_ref_name: str = member_ref.Name + if member_ref_name.startswith(("get_", "set_")): + # remove get_/set_ from MemberRef name + member_ref_name = member_ref_name[4:] + yield DnType( - token, member_ref.Class.row.TypeName, namespace=member_ref.Class.row.TypeNamespace, member=member_ref.Name + token, + member_ref.Class.row.TypeName, + namespace=member_ref.Class.row.TypeNamespace, + member=member_ref_name, + access=access, ) +def get_dotnet_methoddef_property_accessors(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: + """get MethodDef methods used to access properties + + see https://www.ntcore.com/files/dotnetformat.htm + + 24 - MethodSemantics Table + Links Events and Properties to specific methods. For example one Event can be associated to more methods. A property uses this table to associate get/set methods. + Semantics (a 2-byte bitmask of type MethodSemanticsAttributes) + Method (index into the MethodDef table) + Association (index into the Event or Property table; more precisely, a HasSemantics coded index) + """ + for (rid, method_semantics) in iter_dotnet_table(pe, dnfile.mdtable.MethodSemantics.number): + assert isinstance(method_semantics, dnfile.mdtable.MethodSemanticsRow) + + if method_semantics.Association.row is None: + logger.debug("MethodSemantics[0x%X] Association row is None", rid) + continue + + if isinstance(method_semantics.Association.row, dnfile.mdtable.EventRow): + # ignore events + logger.debug("MethodSemantics[0x%X] ignoring Event", rid) + continue + + if method_semantics.Method.table is None: + logger.debug("MethodSemantics[0x%X] Method table is None", rid) + continue + + token: int = calculate_dotnet_token_value( + method_semantics.Method.table.number, method_semantics.Method.row_index + ) + + if method_semantics.Semantics.msSetter: + yield token, FeatureAccess.WRITE + elif method_semantics.Semantics.msGetter: + yield token, FeatureAccess.READ + + def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]: """get managed method names from TypeDef table @@ -203,6 +251,10 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]: TypeNamespace (index into String heap) MethodList (index into MethodDef table; it marks the first of a continguous run of Methods owned by this Type) """ + accessor_map: Dict[int, str] = {} + for (methoddef, methoddef_access) in get_dotnet_methoddef_property_accessors(pe): + accessor_map[methoddef] = methoddef_access + for (rid, typedef) in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): assert isinstance(typedef, dnfile.mdtable.TypeDefRow) @@ -213,8 +265,16 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]: if method.row is None: logger.debug("TypeDef[0x%X] MethodList[0x%X] row is None", rid, idx) continue - token = calculate_dotnet_token_value(method.table.number, method.row_index) - yield DnType(token, typedef.TypeName, namespace=typedef.TypeNamespace, member=method.row.Name) + + token: int = calculate_dotnet_token_value(method.table.number, method.row_index) + access: Optional[str] = accessor_map.get(token, None) + + method_name: str = method.row.Name + if method_name.startswith(("get_", "set_")): + # remove get_/set_ + method_name = method_name[4:] + + yield DnType(token, typedef.TypeName, namespace=typedef.TypeNamespace, member=method_name, access=access) def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]: @@ -242,79 +302,6 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]: yield DnType(token, typedef.TypeName, namespace=typedef.TypeNamespace, member=field.row.Name) -def get_dotnet_property_map( - pe: dnfile.dnPE, mapped_property: Union[dnfile.mdtable.PropertyRow, dnfile.mdtable.EventRow] -) -> Optional[dnfile.mdtable.TypeDefRow]: - """get property map from PropertyMap table - - see https://www.ntcore.com/files/dotnetformat.htm - - 21 - PropertyMap Table - List of Properties owned by a specific class. - Parent (index into the TypeDef table) - PropertyList (index into Property table). It marks the first of a contiguous run of Properties owned by Parent. The run continues to the smaller of: - the last row of the Property table - the next run of Properties, found by inspecting the PropertyList of the next row in this PropertyMap table - """ - for (rid, property_map) in iter_dotnet_table(pe, dnfile.mdtable.PropertyMap.number): - assert isinstance(property_map, dnfile.mdtable.PropertyMapRow) - - for (idx, property_) in enumerate(property_map.PropertyList): - if property_.row is None: - logger.debug("PropertyMap[0x%X] PropertyList[0x%x] row is None", rid, idx) - continue - if property_.row.Name == mapped_property.Name: - return property_map.Parent.row - return None - - -def get_dotnet_properties(pe: dnfile.dnPE) -> Iterator[DnType]: - """get property from MethodSemantics table - - see https://www.ntcore.com/files/dotnetformat.htm - - 24 - MethodSemantics Table - Links Events and Properties to specific methods. For example one Event can be associated to more methods. A property uses this table to associate get/set methods. - Semantics (a 2-byte bitmask of type MethodSemanticsAttributes) - Method (index into the MethodDef table) - Association (index into the Event or Property table; more precisely, a HasSemantics coded index) - """ - for (rid, method_semantics) in iter_dotnet_table(pe, dnfile.mdtable.MethodSemantics.number): - assert isinstance(method_semantics, dnfile.mdtable.MethodSemanticsRow) - - if method_semantics.Association.row is None: - logger.debug("MethodSemantics[0x%X] Association row is None", rid) - continue - if method_semantics.Method.table is None: - logger.debug("MethodSemantics[0x%X] Method table is None", rid) - continue - - typedef: Optional[dnfile.mdtable.TypeDefRow] = get_dotnet_property_map(pe, method_semantics.Association.row) - if typedef is None: - logger.debug("MethodSemantics[0x%X] TypeDef is None", rid) - continue - - token: int = calculate_dotnet_token_value( - method_semantics.Method.table.number, method_semantics.Method.row_index - ) - - access: Optional[str] - if method_semantics.Semantics.msSetter: - access = FeatureAccess.WRITE - elif method_semantics.Semantics.msGetter: - access = FeatureAccess.READ - else: - access = None - - yield DnType( - token, - typedef.TypeName, - access=access, - namespace=typedef.TypeNamespace, - member=method_semantics.Association.row.Name, - ) - - def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[Tuple[int, CilMethodBody]]: """get managed methods from MethodDef table""" for (rid, method_def) in iter_dotnet_table(pe, dnfile.mdtable.MethodDef.number): diff --git a/capa/features/extractors/dnfile/insn.py b/capa/features/extractors/dnfile/insn.py index da88464f..341a6505 100644 --- a/capa/features/extractors/dnfile/insn.py +++ b/capa/features/extractors/dnfile/insn.py @@ -8,13 +8,12 @@ from __future__ import annotations -from typing import Any, Dict, Tuple, Union, Iterator, Optional +import logging +from typing import Dict, Tuple, Union, Iterator, Optional import dnfile -from dncil.cil.body import CilMethodBody from dncil.clr.token import Token, StringToken, InvalidToken from dncil.cil.opcode import OpCodes -from dncil.cil.instruction import Instruction import capa.features.extractors.helpers from capa.features.insn import API, Number, Property @@ -26,16 +25,14 @@ from capa.features.extractors.dnfile.helpers import ( DnUnmanagedMethod, get_dotnet_fields, resolve_dotnet_token, - get_dotnet_properties, read_dotnet_user_string, get_dotnet_managed_imports, get_dotnet_managed_methods, + calculate_dotnet_token_value, get_dotnet_unmanaged_imports, ) -METHODDEF_TABLE = dnfile.mdtable.MethodDef.number -MEMBERREF_TABLE = dnfile.mdtable.MemberRef.number -FIELD_TABLE = dnfile.mdtable.Field.number +logger = logging.getLogger(__name__) def get_managed_imports(ctx: Dict) -> Dict: @@ -62,26 +59,6 @@ def get_methods(ctx: Dict) -> Dict: return ctx["methods_cache"] -def get_callee(ctx: Dict, token: int) -> Union[DnType, DnUnmanagedMethod, None]: - """map dotnet token to un/managed method""" - callee: Union[DnType, DnUnmanagedMethod, None] = get_managed_imports(ctx).get(token, None) - if callee is None: - # we must check unmanaged imports before managed methods because we map forwarded managed methods - # to their unmanaged imports; we prefer a forwarded managed method be mapped to its unmanaged import for analysis - callee = get_unmanaged_imports(ctx).get(token, None) - if callee is None: - callee = get_methods(ctx).get(token, None) - return callee - - -def get_properties(ctx: Dict) -> Dict: - if "properties_cache" not in ctx: - ctx["properties_cache"] = {} - for prop in get_dotnet_properties(ctx["pe"]): - ctx["properties_cache"][prop.token] = prop - return ctx["properties_cache"] - - def get_fields(ctx: Dict) -> Dict: if "fields_cache" not in ctx: ctx["fields_cache"] = {} @@ -90,31 +67,45 @@ def get_fields(ctx: Dict) -> Dict: return ctx["fields_cache"] +def get_callee(ctx: Dict, token: Token) -> Union[DnType, DnUnmanagedMethod, None]: + """map .NET token to un/managed (generic) method""" + row: Union[dnfile.base.MDTableRow, InvalidToken, str] = resolve_dotnet_token(ctx["pe"], token) + if not isinstance(row, (dnfile.mdtable.MethodDefRow, dnfile.mdtable.MemberRefRow, dnfile.mdtable.MethodSpecRow)): + # we only handle MethodDef (internal), MemberRef (external), and MethodSpec (generic) + return None + + token_: int + if isinstance(row, dnfile.mdtable.MethodSpecRow): + # map MethodSpec to MethodDef or MemberRef + if row.Method.table is None: + logger.debug("MethodSpec[0x%X] Method table is None", token.rid) + return None + token_ = calculate_dotnet_token_value(row.Method.table.number, row.Method.row_index) + else: + token_ = token.value + + callee: Union[DnType, DnUnmanagedMethod, None] = get_managed_imports(ctx).get(token_, None) + if callee is None: + # we must check unmanaged imports before managed methods because we map forwarded managed methods + # to their unmanaged imports; we prefer a forwarded managed method be mapped to its unmanaged import for analysis + callee = get_unmanaged_imports(ctx).get(token_, None) + if callee is None: + callee = get_methods(ctx).get(token_, None) + return callee + + def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: """parse instruction API features""" - insn: Instruction = ih.inner - - if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli, OpCodes.Newobj): - return - - callee: Union[DnType, DnUnmanagedMethod, None] = get_callee(fh.ctx, insn.operand.value) - if callee is None: + if ih.inner.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli, OpCodes.Newobj): return + callee: Union[DnType, DnUnmanagedMethod, None] = get_callee(fh.ctx, ih.inner.operand) if isinstance(callee, DnType): - if callee.member.startswith(("get_", "set_")): - if insn.operand.table == METHODDEF_TABLE: - # check if the method belongs to the MethodDef table and whether it is used to access a property - if get_properties(fh.ctx).get(insn.operand.value, None) is not None: - return - elif insn.operand.table == MEMBERREF_TABLE: - # if the method belongs to the MemberRef table, we assume it is used to access a property - return - - # like System.IO.File::Delete - yield API(str(callee)), ih.address - - else: + # ignore methods used to access properties + if callee.access is None: + # like System.IO.File::Delete + yield API(str(callee)), ih.address + elif isinstance(callee, DnUnmanagedMethod): # like kernel32.CreateFileA for name in capa.features.extractors.helpers.generate_symbols(callee.module, callee.method): yield API(name), ih.address @@ -122,52 +113,30 @@ def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterato def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: """parse instruction property features""" - insn: Instruction = ih.inner - name: Optional[str] = None access: Optional[str] = None - if insn.opcode in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli): - if insn.operand.table == METHODDEF_TABLE: - # check if the method belongs to the MethodDef table and whether it is used to access a property - prop = get_properties(fh.ctx).get(insn.operand.value, None) - if prop is not None: - name = str(prop) - access = prop.access + if ih.inner.opcode in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli): + # property access via MethodDef or MemberRef + callee: Union[DnType, DnUnmanagedMethod, None] = get_callee(fh.ctx, ih.inner.operand) + if isinstance(callee, DnType): + if callee.access is not None: + name = str(callee) + access = callee.access - elif insn.operand.table == MEMBERREF_TABLE: - # if the method belongs to the MemberRef table, we assume it is used to access a property - row: Any = resolve_dotnet_token(fh.ctx["pe"], insn.operand) - if row is None: - return - if not isinstance(row.Class.row, (dnfile.mdtable.TypeRefRow, dnfile.mdtable.TypeDefRow)): - return - if not row.Name.startswith(("get_", "set_")): - return + elif ih.inner.opcode in (OpCodes.Ldfld, OpCodes.Ldflda, OpCodes.Ldsfld, OpCodes.Ldsflda): + # property read via Field + read_field: Optional[DnType] = get_fields(fh.ctx).get(ih.inner.operand.value, None) + if read_field is not None: + name = str(read_field) + access = FeatureAccess.READ - name = DnType.format_name( - row.Class.row.TypeName, namespace=row.Class.row.TypeNamespace, member=row.Name[4:] - ) - if row.Name.startswith("get_"): - access = FeatureAccess.READ - elif row.Name.startswith("set_"): - access = FeatureAccess.WRITE - - elif insn.opcode in (OpCodes.Ldfld, OpCodes.Ldflda, OpCodes.Ldsfld, OpCodes.Ldsflda): - if insn.operand.table == FIELD_TABLE: - # determine whether the operand is a field by checking if it belongs to the Field table - read_field: Optional[DnType] = get_fields(fh.ctx).get(insn.operand.value, None) - if read_field: - name = str(read_field) - access = FeatureAccess.READ - - elif insn.opcode in (OpCodes.Stfld, OpCodes.Stsfld): - if insn.operand.table == FIELD_TABLE: - # determine whether the operand is a field by checking if it belongs to the Field table - write_field: Optional[DnType] = get_fields(fh.ctx).get(insn.operand.value, None) - if write_field: - name = str(write_field) - access = FeatureAccess.WRITE + elif ih.inner.opcode in (OpCodes.Stfld, OpCodes.Stsfld): + # property write via Field + write_field: Optional[DnType] = get_fields(fh.ctx).get(ih.inner.operand.value, None) + if write_field is not None: + name = str(write_field) + access = FeatureAccess.WRITE if name is not None: if access is not None: @@ -177,92 +146,74 @@ def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> It def extract_insn_class_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Class, Address]]: """parse instruction class features""" - if ih.inner.opcode not in ( + if ih.inner.opcode in ( OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli, + OpCodes.Newobj, + ): + # method call - includes managed methods (MethodDef, TypeRef) and properties (MethodSemantics, TypeRef) + callee: Union[DnType, DnUnmanagedMethod, None] = get_callee(fh.ctx, ih.inner.operand) + if isinstance(callee, DnType): + yield Class(DnType.format_name(callee.class_, namespace=callee.namespace)), ih.address + + elif ih.inner.opcode in ( OpCodes.Ldfld, OpCodes.Ldflda, OpCodes.Ldsfld, OpCodes.Ldsflda, OpCodes.Stfld, OpCodes.Stsfld, - OpCodes.Newobj, ): - return - - row: Any = resolve_dotnet_token(fh.ctx["pe"], ih.inner.operand) - if isinstance(row, dnfile.mdtable.MemberRefRow): - if isinstance(row.Class.row, (dnfile.mdtable.TypeRefRow, dnfile.mdtable.TypeDefRow)): - yield Class(DnType.format_name(row.Class.row.TypeName, namespace=row.Class.row.TypeNamespace)), ih.address - - elif isinstance(row, dnfile.mdtable.MethodDefRow): - callee: Union[DnType, DnUnmanagedMethod, None] = get_callee(fh.ctx, ih.inner.operand.value) - if isinstance(callee, DnType): - yield Class(DnType.format_name(callee.class_, namespace=callee.namespace)), ih.address - - elif isinstance(row, dnfile.mdtable.FieldRow): + # field access field: Optional[DnType] = get_fields(fh.ctx).get(ih.inner.operand.value, None) - if field is not None: + if isinstance(field, DnType): yield Class(DnType.format_name(field.class_, namespace=field.namespace)), ih.address def extract_insn_namespace_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Namespace, Address]]: """parse instruction namespace features""" - if ih.inner.opcode not in ( + if ih.inner.opcode in ( OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli, + OpCodes.Newobj, + ): + # method call - includes managed methods (MethodDef, TypeRef) and properties (MethodSemantics, TypeRef) + callee: Union[DnType, DnUnmanagedMethod, None] = get_callee(fh.ctx, ih.inner.operand) + if isinstance(callee, DnType) and callee.namespace is not None: + yield Namespace(callee.namespace), ih.address + + elif ih.inner.opcode in ( OpCodes.Ldfld, OpCodes.Ldflda, OpCodes.Ldsfld, OpCodes.Ldsflda, OpCodes.Stfld, OpCodes.Stsfld, - OpCodes.Newobj, ): - return - - row: Any = resolve_dotnet_token(fh.ctx["pe"], Token(ih.inner.operand.value)) - - if isinstance(row, dnfile.mdtable.MemberRefRow): - if isinstance(row.Class.row, (dnfile.mdtable.TypeRefRow, dnfile.mdtable.TypeDefRow)): - if row.Class.row.TypeNamespace: - yield Namespace(row.Class.row.TypeNamespace), ih.address - - elif isinstance(row, dnfile.mdtable.MethodDefRow): - callee: Union[DnType, DnUnmanagedMethod, None] = get_callee(fh.ctx, ih.inner.operand.value) - if isinstance(callee, DnType) and callee.namespace is not None: - yield Namespace(callee.namespace), ih.address - - elif isinstance(row, dnfile.mdtable.FieldRow): field: Optional[DnType] = get_fields(fh.ctx).get(ih.inner.operand.value, None) - if field is not None: + if isinstance(field, DnType) and field.namespace is not None: yield Namespace(field.namespace), ih.address def extract_insn_number_features(fh, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: """parse instruction number features""" - insn: Instruction = ih.inner - - if insn.is_ldc(): - yield Number(insn.get_ldc()), ih.address + if ih.inner.is_ldc(): + yield Number(ih.inner.get_ldc()), ih.address def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: """parse instruction string features""" - f: CilMethodBody = fh.inner - insn: Instruction = ih.inner - - if not insn.is_ldstr(): + if not ih.inner.is_ldstr(): return - if not isinstance(insn.operand, StringToken): + if not isinstance(ih.inner.operand, StringToken): return - user_string: Optional[str] = read_dotnet_user_string(fh.ctx["pe"], insn.operand) + user_string: Optional[str] = read_dotnet_user_string(fh.ctx["pe"], ih.inner.operand) if user_string is None: return @@ -272,17 +223,14 @@ def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iter def extract_unmanaged_call_characteristic_features( fh: FunctionHandle, bb: BBHandle, ih: InsnHandle ) -> Iterator[Tuple[Characteristic, Address]]: - insn: Instruction = ih.inner - if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli): + if ih.inner.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli): return - token: Any = resolve_dotnet_token(fh.ctx["pe"], insn.operand) - if isinstance(token, InvalidToken): - return - if not isinstance(token, dnfile.mdtable.MethodDefRow): + row: Union[str, InvalidToken, dnfile.base.MDTableRow] = resolve_dotnet_token(fh.ctx["pe"], ih.inner.operand) + if not isinstance(row, dnfile.mdtable.MethodDefRow): return - if any((token.Flags.mdPinvokeImpl, token.ImplFlags.miUnmanaged, token.ImplFlags.miNative)): + if any((row.Flags.mdPinvokeImpl, row.ImplFlags.miUnmanaged, row.ImplFlags.miNative)): yield Characteristic("unmanaged call"), ih.address diff --git a/tests/fixtures.py b/tests/fixtures.py index 1d0ba0fa..6deb0e24 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -278,6 +278,10 @@ def get_data_path_by_name(name): return os.path.join(DNFILE_TESTFILES, "hello-world", "hello-world.exe") elif name.startswith("_1c444"): return os.path.join(CD, "data", "dotnet", "1c444ebeba24dcba8628b7dfe5fec7c6.exe_") + elif name.startswith("_387f15"): + return os.path.join( + CD, "data", "dotnet", "387f15043f0198fd3a637b0758c2b6dde9ead795c3ed70803426fc355731b173.dll_" + ) elif name.startswith("_692f"): return os.path.join(CD, "data", "dotnet", "692f7fd6d198e804d6af98eb9e390d61.exe_") elif name.startswith("_0953c"): @@ -751,6 +755,9 @@ FEATURE_PRESENCE_TESTS_DOTNET = sorted( ("_1c444", "function=0x1F68", capa.features.insn.Number(0xCC0020), True), ("_1c444", "function=0x1F68", capa.features.insn.Number(0x0), True), ("_1c444", "function=0x1F68", capa.features.insn.Number(0x1), False), + ("_692f", "token=0x6000004", capa.features.insn.API("System.Linq.Enumerable::First"), True), # generic method + ("_692f", "token=0x6000004", capa.features.common.Namespace("System.Linq"), True), # generic method + ("_692f", "token=0x6000004", capa.features.common.Class("System.Linq.Enumerable"), True), # generic method ( "_1c444", "function=0x1F59, bb=0x1F59, insn=0x1F5B", @@ -772,25 +779,25 @@ FEATURE_PRESENCE_TESTS_DOTNET = sorted( "token=0x600002B", capa.features.insn.Property("System.IO.FileInfo::Length", access=FeatureAccess.READ), True, - ), # MemberRef method + ), # MemberRef property access ( "_1c444", "token=0x600002B", capa.features.insn.Property("System.IO.FileInfo::Length"), True, - ), # MemberRef method + ), # MemberRef property access ( "_1c444", "token=0x6000081", capa.features.insn.API("System.Diagnostics.Process::Start"), True, - ), # MemberRef method + ), # MemberRef property access ( "_1c444", "token=0x6000081", capa.features.insn.Property( "System.Diagnostics.ProcessStartInfo::UseShellExecute", access=FeatureAccess.WRITE - ), # MemberRef method + ), # MemberRef property access True, ), ( @@ -798,7 +805,7 @@ FEATURE_PRESENCE_TESTS_DOTNET = sorted( "token=0x6000081", capa.features.insn.Property( "System.Diagnostics.ProcessStartInfo::WorkingDirectory", access=FeatureAccess.WRITE - ), # MemberRef method + ), # MemberRef property access True, ), ( @@ -806,41 +813,96 @@ FEATURE_PRESENCE_TESTS_DOTNET = sorted( "token=0x6000081", capa.features.insn.Property( "System.Diagnostics.ProcessStartInfo::FileName", access=FeatureAccess.WRITE - ), # MemberRef method + ), # MemberRef property access True, ), ( "_1c444", "token=0x6000087", - capa.features.insn.Property("Sockets.MySocket::reConnectionDelay", access=FeatureAccess.WRITE), # Field + capa.features.insn.Property( + "Sockets.MySocket::reConnectionDelay", access=FeatureAccess.WRITE + ), # Field property access True, ), ( "_1c444", "token=0x600008A", - capa.features.insn.Property("Sockets.MySocket::isConnected", access=FeatureAccess.WRITE), # Field + capa.features.insn.Property( + "Sockets.MySocket::isConnected", access=FeatureAccess.WRITE + ), # Field property access True, ), ( "_1c444", "token=0x600008A", - capa.features.insn.Property("Sockets.MySocket::onConnected", access=FeatureAccess.READ), # Field + capa.features.common.Class("Sockets.MySocket"), # Field property access + True, + ), + ( + "_1c444", + "token=0x600008A", + capa.features.common.Namespace("Sockets"), # Field property access + True, + ), + ( + "_1c444", + "token=0x600008A", + capa.features.insn.Property( + "Sockets.MySocket::onConnected", access=FeatureAccess.READ + ), # Field property access True, ), ( "_0953c", "token=0x6000004", - capa.features.insn.Property("System.Diagnostics.Debugger::IsAttached", access=FeatureAccess.READ), + capa.features.insn.Property( + "System.Diagnostics.Debugger::IsAttached", access=FeatureAccess.READ + ), # MemberRef property access True, - ), # MemberRef method + ), + ( + "_0953c", + "token=0x6000004", + capa.features.common.Class("System.Diagnostics.Debugger"), # MemberRef property access + True, + ), + ( + "_0953c", + "token=0x6000004", + capa.features.common.Namespace("System.Diagnostics"), # MemberRef property access + True, + ), ( "_692f", "token=0x6000006", capa.features.insn.Property( "System.Management.Automation.PowerShell::Streams", access=FeatureAccess.READ - ), # MemberRef method + ), # MemberRef property access False, ), + ( + "_387f15", + "token=0x600009E", + capa.features.insn.Property( + "Modulo.IqQzcRDvSTulAhyLtZHqyeYGgaXGbuLwhxUKXYmhtnOmgpnPJDTSIPhYPpnE::geoplugin_countryCode", + access=FeatureAccess.READ, + ), # MethodDef property access + True, + ), + ( + "_387f15", + "token=0x600009E", + capa.features.common.Class( + "Modulo.IqQzcRDvSTulAhyLtZHqyeYGgaXGbuLwhxUKXYmhtnOmgpnPJDTSIPhYPpnE" + ), # MethodDef property access + True, + ), + ( + "_387f15", + "token=0x600009E", + capa.features.common.Namespace("Modulo"), # MethodDef property access + True, + ), ( "_039a6", "token=0x6000007",