mirror of
https://github.com/mandiant/capa.git
synced 2026-02-04 11:07:53 -08:00
dnfile: use Address
This commit is contained in:
@@ -8,17 +8,20 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, List, Tuple
|
||||
from typing import TYPE_CHECKING, List, Tuple
|
||||
|
||||
from capa.features.address import Address, DNTokenAddress, DNTokenOffsetAddress, AbsoluteVirtualAddress
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from capa.features.common import Feature
|
||||
|
||||
import dnfile
|
||||
from dncil.clr.token import Token
|
||||
|
||||
import capa.features.extractors
|
||||
import capa.features.extractors.dnfile.file
|
||||
import capa.features.extractors.dnfile.insn
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
|
||||
from capa.features.extractors.dnfile.helpers import get_dotnet_managed_method_bodies
|
||||
|
||||
|
||||
@@ -28,12 +31,12 @@ class DnfileFeatureExtractor(FeatureExtractor):
|
||||
self.pe: dnfile.dnPE = dnfile.dnPE(path)
|
||||
|
||||
# pre-compute these because we'll yield them at *every* scope.
|
||||
self.global_features: List[Tuple[Feature, int]] = []
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_os(pe=self.pe))
|
||||
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_arch(pe=self.pe))
|
||||
|
||||
def get_base_address(self):
|
||||
return 0x0
|
||||
return AbsoluteVirtualAddress(0x0)
|
||||
|
||||
def extract_global_features(self):
|
||||
yield from self.global_features
|
||||
@@ -42,14 +45,8 @@ class DnfileFeatureExtractor(FeatureExtractor):
|
||||
yield from capa.features.extractors.dnfile.file.extract_features(self.pe)
|
||||
|
||||
def get_functions(self):
|
||||
# data structure shared across functions yielded here.
|
||||
# useful for caching analysis relevant across a single workspace.
|
||||
ctx = {}
|
||||
ctx["pe"] = self.pe
|
||||
|
||||
for f in get_dotnet_managed_method_bodies(self.pe):
|
||||
setattr(f, "ctx", ctx)
|
||||
yield f
|
||||
for token, f in get_dotnet_managed_method_bodies(self.pe):
|
||||
yield FunctionHandle(address=DNTokenAddress(Token(token)), inner=f, ctx={"pe": self.pe})
|
||||
|
||||
def extract_function_features(self, f):
|
||||
# TODO
|
||||
@@ -57,14 +54,18 @@ class DnfileFeatureExtractor(FeatureExtractor):
|
||||
|
||||
def get_basic_blocks(self, f):
|
||||
# each dotnet method is considered 1 basic block
|
||||
yield f
|
||||
yield BBHandle(
|
||||
address=f.address,
|
||||
inner=f.inner,
|
||||
)
|
||||
|
||||
def extract_basic_block_features(self, f, bb):
|
||||
# we don't support basic block features
|
||||
yield from []
|
||||
|
||||
def get_instructions(self, f, bb):
|
||||
yield from f.instructions
|
||||
for insn in bb.inner.instructions:
|
||||
yield InsnHandle(address=DNTokenOffsetAddress(bb.address.token, insn.offset), inner=insn)
|
||||
|
||||
def extract_insn_features(self, f, bb, insn):
|
||||
yield from capa.features.extractors.dnfile.insn.extract_features(f, bb, insn)
|
||||
|
||||
@@ -11,10 +11,13 @@ from __future__ import annotations
|
||||
from typing import TYPE_CHECKING, Dict, Tuple, Iterator, Optional
|
||||
from itertools import chain
|
||||
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from dncil.cil.instruction import Instruction
|
||||
from dncil.cil.body import CilMethodBody
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import Address
|
||||
|
||||
from dncil.clr.token import StringToken
|
||||
from dncil.cil.opcode import OpCodes
|
||||
@@ -38,8 +41,11 @@ def get_imports(ctx: Dict) -> Dict:
|
||||
return ctx["imports_cache"]
|
||||
|
||||
|
||||
def extract_insn_api_features(f: CilMethodBody, bb: CilMethodBody, insn: Instruction) -> Iterator[Tuple[API, int]]:
|
||||
def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""parse instruction API features"""
|
||||
f: CilMethodBody = fh.inner
|
||||
insn: Instruction = ih.inner
|
||||
|
||||
if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli):
|
||||
return
|
||||
|
||||
@@ -49,26 +55,27 @@ def extract_insn_api_features(f: CilMethodBody, bb: CilMethodBody, insn: Instruc
|
||||
|
||||
if "::" in name:
|
||||
# like System.IO.File::OpenRead
|
||||
yield API(name), insn.offset
|
||||
yield API(name), ih.address
|
||||
else:
|
||||
# like kernel32.CreateFileA
|
||||
dll, _, symbol = name.rpartition(".")
|
||||
for name_variant in capa.features.extractors.helpers.generate_symbols(dll, symbol):
|
||||
yield API(name_variant), insn.offset
|
||||
yield API(name_variant), ih.address
|
||||
|
||||
|
||||
def extract_insn_number_features(
|
||||
f: CilMethodBody, bb: CilMethodBody, insn: Instruction
|
||||
) -> Iterator[Tuple[Number, int]]:
|
||||
def extract_insn_number_features(fh, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""parse instruction number features"""
|
||||
insn: Instruction = ih.inner
|
||||
|
||||
if insn.is_ldc():
|
||||
yield Number(insn.get_ldc()), insn.offset
|
||||
yield Number(insn.get_ldc()), ih.address
|
||||
|
||||
|
||||
def extract_insn_string_features(
|
||||
f: CilMethodBody, bb: CilMethodBody, insn: Instruction
|
||||
) -> Iterator[Tuple[String, int]]:
|
||||
def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""parse instruction string features"""
|
||||
f: CilMethodBody = fh.inner
|
||||
insn: Instruction = ih.inner
|
||||
|
||||
if not insn.is_ldstr():
|
||||
return
|
||||
|
||||
@@ -79,14 +86,14 @@ def extract_insn_string_features(
|
||||
if user_string is None:
|
||||
return
|
||||
|
||||
yield String(user_string), insn.offset
|
||||
yield String(user_string), ih.address
|
||||
|
||||
|
||||
def extract_features(f: CilMethodBody, bb: CilMethodBody, insn: Instruction) -> Iterator[Tuple[Feature, int]]:
|
||||
def extract_features(f: FunctionHandle, bb: BBHandle, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract instruction features"""
|
||||
for inst_handler in INSTRUCTION_HANDLERS:
|
||||
for (feature, offset) in inst_handler(f, bb, insn):
|
||||
yield feature, offset
|
||||
for (feature, addr) in inst_handler(f, bb, insn):
|
||||
yield feature, addr
|
||||
|
||||
|
||||
INSTRUCTION_HANDLERS = (
|
||||
|
||||
@@ -4,51 +4,53 @@ from itertools import chain
|
||||
|
||||
import dnfile
|
||||
import pefile
|
||||
from dncil.clr.token import Token
|
||||
|
||||
import capa.features.extractors.helpers
|
||||
from capa.features.file import Import
|
||||
from capa.features.common import OS, OS_ANY, ARCH_ANY, ARCH_I386, ARCH_AMD64, FORMAT_DOTNET, Arch, Format, Feature
|
||||
from capa.features.address import NO_ADDRESS, Address, DNTokenAddress, DNTokenOffsetAddress, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
from capa.features.extractors.dnfile.helpers import get_dotnet_managed_imports, get_dotnet_unmanaged_imports
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_file_format(**kwargs) -> Iterator[Tuple[Format, int]]:
|
||||
yield Format(FORMAT_DOTNET), 0x0
|
||||
def extract_file_format(**kwargs) -> Iterator[Tuple[Format, Address]]:
|
||||
yield Format(FORMAT_DOTNET), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Import, int]]:
|
||||
def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Import, Address]]:
|
||||
for (token, imp) in chain(get_dotnet_managed_imports(pe), get_dotnet_unmanaged_imports(pe)):
|
||||
if "::" in imp:
|
||||
# like System.IO.File::OpenRead
|
||||
yield Import(imp), token
|
||||
yield Import(imp), DNTokenAddress(Token(token))
|
||||
else:
|
||||
# like kernel32.CreateFileA
|
||||
dll, _, symbol = imp.rpartition(".")
|
||||
for symbol_variant in capa.features.extractors.helpers.generate_symbols(dll, symbol):
|
||||
yield Import(symbol_variant), token
|
||||
yield Import(symbol_variant), DNTokenAddress(Token(token))
|
||||
|
||||
|
||||
def extract_file_os(**kwargs) -> Iterator[Tuple[OS, int]]:
|
||||
yield OS(OS_ANY), 0x0
|
||||
def extract_file_os(**kwargs) -> Iterator[Tuple[OS, Address]]:
|
||||
yield OS(OS_ANY), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Arch, int]]:
|
||||
def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Arch, Address]]:
|
||||
# to distinguish in more detail, see https://stackoverflow.com/a/23614024/10548020
|
||||
# .NET 4.5 added option: any CPU, 32-bit preferred
|
||||
if pe.net.Flags.CLR_32BITREQUIRED and pe.PE_TYPE == pefile.OPTIONAL_HEADER_MAGIC_PE:
|
||||
yield Arch(ARCH_I386), 0x0
|
||||
yield Arch(ARCH_I386), NO_ADDRESS
|
||||
elif not pe.net.Flags.CLR_32BITREQUIRED and pe.PE_TYPE == pefile.OPTIONAL_HEADER_MAGIC_PE_PLUS:
|
||||
yield Arch(ARCH_AMD64), 0x0
|
||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||
else:
|
||||
yield Arch(ARCH_ANY), 0x0
|
||||
yield Arch(ARCH_ANY), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]:
|
||||
def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]:
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, va in file_handler(pe=pe): # type: ignore
|
||||
yield feature, va
|
||||
for feature, addr in file_handler(pe=pe): # type: ignore
|
||||
yield feature, addr
|
||||
|
||||
|
||||
FILE_HANDLERS = (
|
||||
@@ -59,7 +61,7 @@ FILE_HANDLERS = (
|
||||
)
|
||||
|
||||
|
||||
def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]:
|
||||
def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]:
|
||||
for handler in GLOBAL_HANDLERS:
|
||||
for feature, va in handler(pe=pe): # type: ignore
|
||||
yield feature, va
|
||||
@@ -77,8 +79,8 @@ class DotnetFileFeatureExtractor(FeatureExtractor):
|
||||
self.path: str = path
|
||||
self.pe: dnfile.dnPE = dnfile.dnPE(path)
|
||||
|
||||
def get_base_address(self) -> int:
|
||||
return 0x0
|
||||
def get_base_address(self):
|
||||
return AbsoluteVirtualAddress(0x0)
|
||||
|
||||
def get_entry_point(self) -> int:
|
||||
# self.pe.net.Flags.CLT_NATIVE_ENTRYPOINT
|
||||
|
||||
Reference in New Issue
Block a user