dnfile: use Address

This commit is contained in:
Willi Ballenthin
2022-04-08 18:33:39 -06:00
parent 70c3487bc7
commit d9ede95cf7
3 changed files with 55 additions and 45 deletions

View File

@@ -8,17 +8,20 @@
from __future__ import annotations
from typing import TYPE_CHECKING, Any, List, Tuple
from typing import TYPE_CHECKING, List, Tuple
from capa.features.address import Address, DNTokenAddress, DNTokenOffsetAddress, AbsoluteVirtualAddress
if TYPE_CHECKING:
from capa.features.common import Feature
import dnfile
from dncil.clr.token import Token
import capa.features.extractors
import capa.features.extractors.dnfile.file
import capa.features.extractors.dnfile.insn
from capa.features.extractors.base_extractor import FeatureExtractor
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
from capa.features.extractors.dnfile.helpers import get_dotnet_managed_method_bodies
@@ -28,12 +31,12 @@ class DnfileFeatureExtractor(FeatureExtractor):
self.pe: dnfile.dnPE = dnfile.dnPE(path)
# pre-compute these because we'll yield them at *every* scope.
self.global_features: List[Tuple[Feature, int]] = []
self.global_features: List[Tuple[Feature, Address]] = []
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_os(pe=self.pe))
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_arch(pe=self.pe))
def get_base_address(self):
return 0x0
return AbsoluteVirtualAddress(0x0)
def extract_global_features(self):
yield from self.global_features
@@ -42,14 +45,8 @@ class DnfileFeatureExtractor(FeatureExtractor):
yield from capa.features.extractors.dnfile.file.extract_features(self.pe)
def get_functions(self):
# data structure shared across functions yielded here.
# useful for caching analysis relevant across a single workspace.
ctx = {}
ctx["pe"] = self.pe
for f in get_dotnet_managed_method_bodies(self.pe):
setattr(f, "ctx", ctx)
yield f
for token, f in get_dotnet_managed_method_bodies(self.pe):
yield FunctionHandle(address=DNTokenAddress(Token(token)), inner=f, ctx={"pe": self.pe})
def extract_function_features(self, f):
# TODO
@@ -57,14 +54,18 @@ class DnfileFeatureExtractor(FeatureExtractor):
def get_basic_blocks(self, f):
# each dotnet method is considered 1 basic block
yield f
yield BBHandle(
address=f.address,
inner=f.inner,
)
def extract_basic_block_features(self, f, bb):
# we don't support basic block features
yield from []
def get_instructions(self, f, bb):
yield from f.instructions
for insn in bb.inner.instructions:
yield InsnHandle(address=DNTokenOffsetAddress(bb.address.token, insn.offset), inner=insn)
def extract_insn_features(self, f, bb, insn):
yield from capa.features.extractors.dnfile.insn.extract_features(f, bb, insn)

View File

@@ -11,10 +11,13 @@ from __future__ import annotations
from typing import TYPE_CHECKING, Dict, Tuple, Iterator, Optional
from itertools import chain
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
if TYPE_CHECKING:
from dncil.cil.instruction import Instruction
from dncil.cil.body import CilMethodBody
from capa.features.common import Feature
from capa.features.address import Address
from dncil.clr.token import StringToken
from dncil.cil.opcode import OpCodes
@@ -38,8 +41,11 @@ def get_imports(ctx: Dict) -> Dict:
return ctx["imports_cache"]
def extract_insn_api_features(f: CilMethodBody, bb: CilMethodBody, insn: Instruction) -> Iterator[Tuple[API, int]]:
def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
"""parse instruction API features"""
f: CilMethodBody = fh.inner
insn: Instruction = ih.inner
if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli):
return
@@ -49,26 +55,27 @@ def extract_insn_api_features(f: CilMethodBody, bb: CilMethodBody, insn: Instruc
if "::" in name:
# like System.IO.File::OpenRead
yield API(name), insn.offset
yield API(name), ih.address
else:
# like kernel32.CreateFileA
dll, _, symbol = name.rpartition(".")
for name_variant in capa.features.extractors.helpers.generate_symbols(dll, symbol):
yield API(name_variant), insn.offset
yield API(name_variant), ih.address
def extract_insn_number_features(
f: CilMethodBody, bb: CilMethodBody, insn: Instruction
) -> Iterator[Tuple[Number, int]]:
def extract_insn_number_features(fh, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
"""parse instruction number features"""
insn: Instruction = ih.inner
if insn.is_ldc():
yield Number(insn.get_ldc()), insn.offset
yield Number(insn.get_ldc()), ih.address
def extract_insn_string_features(
f: CilMethodBody, bb: CilMethodBody, insn: Instruction
) -> Iterator[Tuple[String, int]]:
def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
"""parse instruction string features"""
f: CilMethodBody = fh.inner
insn: Instruction = ih.inner
if not insn.is_ldstr():
return
@@ -79,14 +86,14 @@ def extract_insn_string_features(
if user_string is None:
return
yield String(user_string), insn.offset
yield String(user_string), ih.address
def extract_features(f: CilMethodBody, bb: CilMethodBody, insn: Instruction) -> Iterator[Tuple[Feature, int]]:
def extract_features(f: FunctionHandle, bb: BBHandle, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
"""extract instruction features"""
for inst_handler in INSTRUCTION_HANDLERS:
for (feature, offset) in inst_handler(f, bb, insn):
yield feature, offset
for (feature, addr) in inst_handler(f, bb, insn):
yield feature, addr
INSTRUCTION_HANDLERS = (

View File

@@ -4,51 +4,53 @@ from itertools import chain
import dnfile
import pefile
from dncil.clr.token import Token
import capa.features.extractors.helpers
from capa.features.file import Import
from capa.features.common import OS, OS_ANY, ARCH_ANY, ARCH_I386, ARCH_AMD64, FORMAT_DOTNET, Arch, Format, Feature
from capa.features.address import NO_ADDRESS, Address, DNTokenAddress, DNTokenOffsetAddress, AbsoluteVirtualAddress
from capa.features.extractors.base_extractor import FeatureExtractor
from capa.features.extractors.dnfile.helpers import get_dotnet_managed_imports, get_dotnet_unmanaged_imports
logger = logging.getLogger(__name__)
def extract_file_format(**kwargs) -> Iterator[Tuple[Format, int]]:
yield Format(FORMAT_DOTNET), 0x0
def extract_file_format(**kwargs) -> Iterator[Tuple[Format, Address]]:
yield Format(FORMAT_DOTNET), NO_ADDRESS
def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Import, int]]:
def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Import, Address]]:
for (token, imp) in chain(get_dotnet_managed_imports(pe), get_dotnet_unmanaged_imports(pe)):
if "::" in imp:
# like System.IO.File::OpenRead
yield Import(imp), token
yield Import(imp), DNTokenAddress(Token(token))
else:
# like kernel32.CreateFileA
dll, _, symbol = imp.rpartition(".")
for symbol_variant in capa.features.extractors.helpers.generate_symbols(dll, symbol):
yield Import(symbol_variant), token
yield Import(symbol_variant), DNTokenAddress(Token(token))
def extract_file_os(**kwargs) -> Iterator[Tuple[OS, int]]:
yield OS(OS_ANY), 0x0
def extract_file_os(**kwargs) -> Iterator[Tuple[OS, Address]]:
yield OS(OS_ANY), NO_ADDRESS
def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Arch, int]]:
def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Arch, Address]]:
# to distinguish in more detail, see https://stackoverflow.com/a/23614024/10548020
# .NET 4.5 added option: any CPU, 32-bit preferred
if pe.net.Flags.CLR_32BITREQUIRED and pe.PE_TYPE == pefile.OPTIONAL_HEADER_MAGIC_PE:
yield Arch(ARCH_I386), 0x0
yield Arch(ARCH_I386), NO_ADDRESS
elif not pe.net.Flags.CLR_32BITREQUIRED and pe.PE_TYPE == pefile.OPTIONAL_HEADER_MAGIC_PE_PLUS:
yield Arch(ARCH_AMD64), 0x0
yield Arch(ARCH_AMD64), NO_ADDRESS
else:
yield Arch(ARCH_ANY), 0x0
yield Arch(ARCH_ANY), NO_ADDRESS
def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]:
def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]:
for file_handler in FILE_HANDLERS:
for feature, va in file_handler(pe=pe): # type: ignore
yield feature, va
for feature, addr in file_handler(pe=pe): # type: ignore
yield feature, addr
FILE_HANDLERS = (
@@ -59,7 +61,7 @@ FILE_HANDLERS = (
)
def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]:
def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]:
for handler in GLOBAL_HANDLERS:
for feature, va in handler(pe=pe): # type: ignore
yield feature, va
@@ -77,8 +79,8 @@ class DotnetFileFeatureExtractor(FeatureExtractor):
self.path: str = path
self.pe: dnfile.dnPE = dnfile.dnPE(path)
def get_base_address(self) -> int:
return 0x0
def get_base_address(self):
return AbsoluteVirtualAddress(0x0)
def get_entry_point(self) -> int:
# self.pe.net.Flags.CLT_NATIVE_ENTRYPOINT