Initial plumbing to support DEX files

This commit is contained in:
Duncan Ogilvie
2023-12-06 15:29:10 +01:00
parent d6f7d2180f
commit e90be5a9bb
7 changed files with 212 additions and 3 deletions

View File

@@ -409,7 +409,9 @@ ARCH_I386 = "i386"
ARCH_AMD64 = "amd64"
# dotnet
ARCH_ANY = "any"
VALID_ARCH = (ARCH_I386, ARCH_AMD64, ARCH_ANY)
# dex
ARCH_DALVIK = "dalvik"
VALID_ARCH = (ARCH_I386, ARCH_AMD64, ARCH_ANY, ARCH_DALVIK)
class Arch(Feature):
@@ -421,10 +423,11 @@ class Arch(Feature):
OS_WINDOWS = "windows"
OS_LINUX = "linux"
OS_MACOS = "macos"
OS_ANDROID = "android"
# dotnet
OS_ANY = "any"
VALID_OS = {os.value for os in capa.features.extractors.elf.OS}
VALID_OS.update({OS_WINDOWS, OS_LINUX, OS_MACOS, OS_ANY})
VALID_OS.update({OS_WINDOWS, OS_LINUX, OS_MACOS, OS_ANY, OS_ANDROID})
# internal only, not to be used in rules
OS_AUTO = "auto"
@@ -452,7 +455,8 @@ class OS(Feature):
FORMAT_PE = "pe"
FORMAT_ELF = "elf"
FORMAT_DOTNET = "dotnet"
VALID_FORMAT = (FORMAT_PE, FORMAT_ELF, FORMAT_DOTNET)
FORMAT_DEX = "dex"
VALID_FORMAT = (FORMAT_PE, FORMAT_ELF, FORMAT_DOTNET, FORMAT_DEX)
# internal only, not to be used in rules
FORMAT_AUTO = "auto"
FORMAT_SC32 = "sc32"
@@ -464,6 +468,7 @@ STATIC_FORMATS = {
FORMAT_PE,
FORMAT_ELF,
FORMAT_DOTNET,
FORMAT_DEX,
}
DYNAMIC_FORMATS = {
FORMAT_CAPE,

View File

@@ -24,8 +24,11 @@ from capa.features.common import (
OS_AUTO,
ARCH_ANY,
FORMAT_PE,
FORMAT_DEX,
FORMAT_ELF,
OS_ANDROID,
OS_WINDOWS,
ARCH_DALVIK,
FORMAT_FREEZE,
FORMAT_RESULT,
Arch,
@@ -41,6 +44,7 @@ logger = logging.getLogger(__name__)
# match strings for formats
MATCH_PE = b"MZ"
MATCH_ELF = b"\x7fELF"
MATCH_DEX = b"dex\n"
MATCH_RESULT = b'{"meta":'
MATCH_JSON_OBJECT = b'{"'
@@ -61,6 +65,8 @@ def extract_format(buf) -> Iterator[Tuple[Feature, Address]]:
yield Format(FORMAT_PE), NO_ADDRESS
elif buf.startswith(MATCH_ELF):
yield Format(FORMAT_ELF), NO_ADDRESS
elif len(buf) > 8 and buf.startswith(MATCH_DEX) and buf[7] == 0x00:
yield Format(FORMAT_DEX), NO_ADDRESS
elif is_freeze(buf):
yield Format(FORMAT_FREEZE), NO_ADDRESS
elif buf.startswith(MATCH_RESULT):
@@ -96,6 +102,9 @@ def extract_arch(buf) -> Iterator[Tuple[Feature, Address]]:
yield Arch(arch), NO_ADDRESS
elif len(buf) > 8 and buf.startswith(MATCH_DEX) and buf[7] == 0x00:
yield Arch(ARCH_DALVIK), NO_ADDRESS
else:
# we likely end up here:
# 1. handling shellcode, or
@@ -129,6 +138,9 @@ def extract_os(buf, os=OS_AUTO) -> Iterator[Tuple[Feature, Address]]:
yield OS(os), NO_ADDRESS
elif len(buf) > 8 and buf.startswith(MATCH_DEX) and buf[7] == 0x00:
yield OS(OS_ANDROID), NO_ADDRESS
else:
# we likely end up here:
# 1. handling shellcode, or

View File

@@ -0,0 +1,91 @@
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import logging
from typing import Tuple, Iterator
from pathlib import Path
from dexparser import DEXParser
from capa.features.common import OS, FORMAT_DEX, OS_ANDROID, ARCH_DALVIK, Arch, Format, Feature
from capa.features.address import NO_ADDRESS, Address
from capa.features.extractors.base_extractor import SampleHashes, StaticFeatureExtractor
logger = logging.getLogger(__name__)
def extract_file_format(**kwargs) -> Iterator[Tuple[Format, Address]]:
yield Format(FORMAT_DEX), NO_ADDRESS
FILE_HANDLERS = (extract_file_format,)
def extract_file_features(dex: DEXParser) -> Iterator[Tuple[Feature, Address]]:
for file_handler in FILE_HANDLERS:
for feature, addr in file_handler(dex=dex): # type: ignore
yield feature, addr
def extract_file_os(**kwargs) -> Iterator[Tuple[OS, Address]]:
yield OS(OS_ANDROID), NO_ADDRESS
def extract_file_arch(**kwargs) -> Iterator[Tuple[Arch, Address]]:
yield Arch(ARCH_DALVIK), NO_ADDRESS
GLOBAL_HANDLERS = (
extract_file_os,
extract_file_arch,
)
def extract_global_features(dex: DEXParser) -> Iterator[Tuple[Feature, Address]]:
for handler in GLOBAL_HANDLERS:
for feature, va in handler(dex=dex): # type: ignore
yield feature, va
class DexFileFeatureExtractor(StaticFeatureExtractor):
def __init__(self, path: Path):
super().__init__(hashes=SampleHashes.from_bytes(path.read_bytes()))
self.path: Path = path
self.dex = DEXParser(filedir=str(path))
def get_base_address(self):
return NO_ADDRESS
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
yield from extract_global_features(self.dex)
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
yield from extract_file_features(self.dex)
def get_functions(self):
raise NotImplementedError("DexFileFeatureExtractor can only be used to extract file features")
def extract_function_features(self, f):
raise NotImplementedError("DexFileFeatureExtractor can only be used to extract file features")
def get_basic_blocks(self, f):
raise NotImplementedError("DexFileFeatureExtractor can only be used to extract file features")
def extract_basic_block_features(self, f, bb):
raise NotImplementedError("DexFileFeatureExtractor can only be used to extract file features")
def get_instructions(self, f, bb):
raise NotImplementedError("DexFileFeatureExtractor can only be used to extract file features")
def extract_insn_features(self, f, bb, insn):
raise NotImplementedError("DexFileFeatureExtractor can only be used to extract file features")
def is_library_function(self, va):
raise NotImplementedError("DexFileFeatureExtractor can only be used to extract file features")
def get_function_name(self, va):
raise NotImplementedError("DexFileFeatureExtractor can only be used to extract file features")

View File

@@ -0,0 +1,89 @@
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
from __future__ import annotations
import logging
from typing import List, Tuple, Iterator
from pathlib import Path
import dexparser
import capa.features.extractors
import capa.features.extractors.dexfile
from capa.features.common import Feature
from capa.features.address import NO_ADDRESS, Address
from capa.features.extractors.base_extractor import (
BBHandle,
InsnHandle,
SampleHashes,
FunctionHandle,
StaticFeatureExtractor,
)
logger = logging.getLogger(__name__)
class DexparserFeatureExtractorCache:
def __init__(self, dex: dexparser.DEXParser):
self.dex = dex
class DexparserFeatureExtractor(StaticFeatureExtractor):
def __init__(self, path: Path):
self.dex = dexparser.DEXParser(filedir=str(path))
super().__init__(hashes=SampleHashes.from_bytes(path.read_bytes()))
self.cache = DexparserFeatureExtractorCache(self.dex)
# pre-compute these because we'll yield them at *every* scope.
self.global_features: List[Tuple[Feature, Address]] = []
self.global_features.extend(capa.features.extractors.dexfile.extract_file_format())
self.global_features.extend(capa.features.extractors.dexfile.extract_file_os(dex=self.dex))
self.global_features.extend(capa.features.extractors.dexfile.extract_file_arch(dex=self.dex))
def todo(self):
import inspect
logger.debug("[DexparserFeatureExtractor:TODO] " + inspect.stack()[1].function)
def get_base_address(self):
return NO_ADDRESS
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
yield from self.global_features
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
return self.todo()
yield
def get_functions(self) -> Iterator[FunctionHandle]:
return self.todo()
yield
def extract_function_features(self, f: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
return self.todo()
yield
def get_basic_blocks(self, f: FunctionHandle) -> Iterator[BBHandle]:
return self.todo()
yield
def extract_basic_block_features(self, f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
return self.todo()
yield
def get_instructions(self, f: FunctionHandle, bb: BBHandle) -> Iterator[InsnHandle]:
return self.todo()
yield
def extract_insn_features(
self, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
) -> Iterator[Tuple[Feature, Address]]:
return self.todo()
yield

View File

@@ -44,6 +44,7 @@ import capa.render.result_document
import capa.render.result_document as rdoc
import capa.features.extractors.common
import capa.features.extractors.pefile
import capa.features.extractors.dexfile
import capa.features.extractors.elffile
import capa.features.extractors.dotnetfile
import capa.features.extractors.base_extractor
@@ -71,6 +72,7 @@ from capa.features.common import (
OS_LINUX,
OS_MACOS,
FORMAT_PE,
FORMAT_DEX,
FORMAT_ELF,
OS_WINDOWS,
FORMAT_AUTO,
@@ -306,6 +308,11 @@ def get_extractor(
return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path)
elif format_ == FORMAT_DEX:
import capa.features.extractors.dexparser.extractor
return capa.features.extractors.dexparser.extractor.DexparserFeatureExtractor(path)
elif backend == BACKEND_BINJA:
from capa.features.extractors.binja.find_binja_api import find_binja_path
@@ -374,6 +381,9 @@ def get_file_extractors(sample: Path, format_: str) -> List[FeatureExtractor]:
elif format_ == capa.features.common.FORMAT_ELF:
file_extractors.append(capa.features.extractors.elffile.ElfFeatureExtractor(sample))
elif format_ == capa.features.common.FORMAT_DEX:
file_extractors.append(capa.features.extractors.dexfile.DexFileFeatureExtractor(sample))
elif format_ == FORMAT_CAPE:
report = json.load(Path(sample).open(encoding="utf-8"))
file_extractors.append(capa.features.extractors.cape.extractor.CapeExtractor.from_report(report))
@@ -796,6 +806,7 @@ def install_common_args(parser, wanted=None):
(FORMAT_PE, "Windows PE file"),
(FORMAT_DOTNET, ".NET PE file"),
(FORMAT_ELF, "Executable and Linkable Format"),
(FORMAT_DEX, "Android DEX file"),
(FORMAT_SC32, "32-bit shellcode"),
(FORMAT_SC64, "64-bit shellcode"),
(FORMAT_CAPE, "CAPE sandbox report"),

View File

@@ -50,6 +50,7 @@ dependencies = [
"dncil==1.0.2",
"pydantic==2.4.0",
"protobuf==4.23.4",
"dexparser==1.2.0",
]
dynamic = ["version"]