mirror of
https://github.com/mandiant/capa.git
synced 2026-02-04 11:07:53 -08:00
Initial plumbing to support DEX files
This commit is contained in:
@@ -409,7 +409,9 @@ ARCH_I386 = "i386"
|
||||
ARCH_AMD64 = "amd64"
|
||||
# dotnet
|
||||
ARCH_ANY = "any"
|
||||
VALID_ARCH = (ARCH_I386, ARCH_AMD64, ARCH_ANY)
|
||||
# dex
|
||||
ARCH_DALVIK = "dalvik"
|
||||
VALID_ARCH = (ARCH_I386, ARCH_AMD64, ARCH_ANY, ARCH_DALVIK)
|
||||
|
||||
|
||||
class Arch(Feature):
|
||||
@@ -421,10 +423,11 @@ class Arch(Feature):
|
||||
OS_WINDOWS = "windows"
|
||||
OS_LINUX = "linux"
|
||||
OS_MACOS = "macos"
|
||||
OS_ANDROID = "android"
|
||||
# dotnet
|
||||
OS_ANY = "any"
|
||||
VALID_OS = {os.value for os in capa.features.extractors.elf.OS}
|
||||
VALID_OS.update({OS_WINDOWS, OS_LINUX, OS_MACOS, OS_ANY})
|
||||
VALID_OS.update({OS_WINDOWS, OS_LINUX, OS_MACOS, OS_ANY, OS_ANDROID})
|
||||
# internal only, not to be used in rules
|
||||
OS_AUTO = "auto"
|
||||
|
||||
@@ -452,7 +455,8 @@ class OS(Feature):
|
||||
FORMAT_PE = "pe"
|
||||
FORMAT_ELF = "elf"
|
||||
FORMAT_DOTNET = "dotnet"
|
||||
VALID_FORMAT = (FORMAT_PE, FORMAT_ELF, FORMAT_DOTNET)
|
||||
FORMAT_DEX = "dex"
|
||||
VALID_FORMAT = (FORMAT_PE, FORMAT_ELF, FORMAT_DOTNET, FORMAT_DEX)
|
||||
# internal only, not to be used in rules
|
||||
FORMAT_AUTO = "auto"
|
||||
FORMAT_SC32 = "sc32"
|
||||
@@ -464,6 +468,7 @@ STATIC_FORMATS = {
|
||||
FORMAT_PE,
|
||||
FORMAT_ELF,
|
||||
FORMAT_DOTNET,
|
||||
FORMAT_DEX,
|
||||
}
|
||||
DYNAMIC_FORMATS = {
|
||||
FORMAT_CAPE,
|
||||
|
||||
@@ -24,8 +24,11 @@ from capa.features.common import (
|
||||
OS_AUTO,
|
||||
ARCH_ANY,
|
||||
FORMAT_PE,
|
||||
FORMAT_DEX,
|
||||
FORMAT_ELF,
|
||||
OS_ANDROID,
|
||||
OS_WINDOWS,
|
||||
ARCH_DALVIK,
|
||||
FORMAT_FREEZE,
|
||||
FORMAT_RESULT,
|
||||
Arch,
|
||||
@@ -41,6 +44,7 @@ logger = logging.getLogger(__name__)
|
||||
# match strings for formats
|
||||
MATCH_PE = b"MZ"
|
||||
MATCH_ELF = b"\x7fELF"
|
||||
MATCH_DEX = b"dex\n"
|
||||
MATCH_RESULT = b'{"meta":'
|
||||
MATCH_JSON_OBJECT = b'{"'
|
||||
|
||||
@@ -61,6 +65,8 @@ def extract_format(buf) -> Iterator[Tuple[Feature, Address]]:
|
||||
yield Format(FORMAT_PE), NO_ADDRESS
|
||||
elif buf.startswith(MATCH_ELF):
|
||||
yield Format(FORMAT_ELF), NO_ADDRESS
|
||||
elif len(buf) > 8 and buf.startswith(MATCH_DEX) and buf[7] == 0x00:
|
||||
yield Format(FORMAT_DEX), NO_ADDRESS
|
||||
elif is_freeze(buf):
|
||||
yield Format(FORMAT_FREEZE), NO_ADDRESS
|
||||
elif buf.startswith(MATCH_RESULT):
|
||||
@@ -96,6 +102,9 @@ def extract_arch(buf) -> Iterator[Tuple[Feature, Address]]:
|
||||
|
||||
yield Arch(arch), NO_ADDRESS
|
||||
|
||||
elif len(buf) > 8 and buf.startswith(MATCH_DEX) and buf[7] == 0x00:
|
||||
yield Arch(ARCH_DALVIK), NO_ADDRESS
|
||||
|
||||
else:
|
||||
# we likely end up here:
|
||||
# 1. handling shellcode, or
|
||||
@@ -129,6 +138,9 @@ def extract_os(buf, os=OS_AUTO) -> Iterator[Tuple[Feature, Address]]:
|
||||
|
||||
yield OS(os), NO_ADDRESS
|
||||
|
||||
elif len(buf) > 8 and buf.startswith(MATCH_DEX) and buf[7] == 0x00:
|
||||
yield OS(OS_ANDROID), NO_ADDRESS
|
||||
|
||||
else:
|
||||
# we likely end up here:
|
||||
# 1. handling shellcode, or
|
||||
|
||||
91
capa/features/extractors/dexfile.py
Normal file
91
capa/features/extractors/dexfile.py
Normal file
@@ -0,0 +1,91 @@
|
||||
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
from pathlib import Path
|
||||
|
||||
from dexparser import DEXParser
|
||||
|
||||
from capa.features.common import OS, FORMAT_DEX, OS_ANDROID, ARCH_DALVIK, Arch, Format, Feature
|
||||
from capa.features.address import NO_ADDRESS, Address
|
||||
from capa.features.extractors.base_extractor import SampleHashes, StaticFeatureExtractor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_file_format(**kwargs) -> Iterator[Tuple[Format, Address]]:
|
||||
yield Format(FORMAT_DEX), NO_ADDRESS
|
||||
|
||||
|
||||
FILE_HANDLERS = (extract_file_format,)
|
||||
|
||||
|
||||
def extract_file_features(dex: DEXParser) -> Iterator[Tuple[Feature, Address]]:
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, addr in file_handler(dex=dex): # type: ignore
|
||||
yield feature, addr
|
||||
|
||||
|
||||
def extract_file_os(**kwargs) -> Iterator[Tuple[OS, Address]]:
|
||||
yield OS(OS_ANDROID), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_arch(**kwargs) -> Iterator[Tuple[Arch, Address]]:
|
||||
yield Arch(ARCH_DALVIK), NO_ADDRESS
|
||||
|
||||
|
||||
GLOBAL_HANDLERS = (
|
||||
extract_file_os,
|
||||
extract_file_arch,
|
||||
)
|
||||
|
||||
|
||||
def extract_global_features(dex: DEXParser) -> Iterator[Tuple[Feature, Address]]:
|
||||
for handler in GLOBAL_HANDLERS:
|
||||
for feature, va in handler(dex=dex): # type: ignore
|
||||
yield feature, va
|
||||
|
||||
|
||||
class DexFileFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self, path: Path):
|
||||
super().__init__(hashes=SampleHashes.from_bytes(path.read_bytes()))
|
||||
self.path: Path = path
|
||||
self.dex = DEXParser(filedir=str(path))
|
||||
|
||||
def get_base_address(self):
|
||||
return NO_ADDRESS
|
||||
|
||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
yield from extract_global_features(self.dex)
|
||||
|
||||
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
yield from extract_file_features(self.dex)
|
||||
|
||||
def get_functions(self):
|
||||
raise NotImplementedError("DexFileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def extract_function_features(self, f):
|
||||
raise NotImplementedError("DexFileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def get_basic_blocks(self, f):
|
||||
raise NotImplementedError("DexFileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def extract_basic_block_features(self, f, bb):
|
||||
raise NotImplementedError("DexFileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def get_instructions(self, f, bb):
|
||||
raise NotImplementedError("DexFileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def extract_insn_features(self, f, bb, insn):
|
||||
raise NotImplementedError("DexFileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def is_library_function(self, va):
|
||||
raise NotImplementedError("DexFileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def get_function_name(self, va):
|
||||
raise NotImplementedError("DexFileFeatureExtractor can only be used to extract file features")
|
||||
0
capa/features/extractors/dexparser/__init__.py
Normal file
0
capa/features/extractors/dexparser/__init__.py
Normal file
89
capa/features/extractors/dexparser/extractor.py
Normal file
89
capa/features/extractors/dexparser/extractor.py
Normal file
@@ -0,0 +1,89 @@
|
||||
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import List, Tuple, Iterator
|
||||
from pathlib import Path
|
||||
|
||||
import dexparser
|
||||
|
||||
import capa.features.extractors
|
||||
import capa.features.extractors.dexfile
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import NO_ADDRESS, Address
|
||||
from capa.features.extractors.base_extractor import (
|
||||
BBHandle,
|
||||
InsnHandle,
|
||||
SampleHashes,
|
||||
FunctionHandle,
|
||||
StaticFeatureExtractor,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DexparserFeatureExtractorCache:
|
||||
def __init__(self, dex: dexparser.DEXParser):
|
||||
self.dex = dex
|
||||
|
||||
|
||||
class DexparserFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self, path: Path):
|
||||
self.dex = dexparser.DEXParser(filedir=str(path))
|
||||
super().__init__(hashes=SampleHashes.from_bytes(path.read_bytes()))
|
||||
|
||||
self.cache = DexparserFeatureExtractorCache(self.dex)
|
||||
|
||||
# pre-compute these because we'll yield them at *every* scope.
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
self.global_features.extend(capa.features.extractors.dexfile.extract_file_format())
|
||||
self.global_features.extend(capa.features.extractors.dexfile.extract_file_os(dex=self.dex))
|
||||
self.global_features.extend(capa.features.extractors.dexfile.extract_file_arch(dex=self.dex))
|
||||
|
||||
def todo(self):
|
||||
import inspect
|
||||
|
||||
logger.debug("[DexparserFeatureExtractor:TODO] " + inspect.stack()[1].function)
|
||||
|
||||
def get_base_address(self):
|
||||
return NO_ADDRESS
|
||||
|
||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
yield from self.global_features
|
||||
|
||||
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
return self.todo()
|
||||
yield
|
||||
|
||||
def get_functions(self) -> Iterator[FunctionHandle]:
|
||||
return self.todo()
|
||||
yield
|
||||
|
||||
def extract_function_features(self, f: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
return self.todo()
|
||||
yield
|
||||
|
||||
def get_basic_blocks(self, f: FunctionHandle) -> Iterator[BBHandle]:
|
||||
return self.todo()
|
||||
yield
|
||||
|
||||
def extract_basic_block_features(self, f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
return self.todo()
|
||||
yield
|
||||
|
||||
def get_instructions(self, f: FunctionHandle, bb: BBHandle) -> Iterator[InsnHandle]:
|
||||
return self.todo()
|
||||
yield
|
||||
|
||||
def extract_insn_features(
|
||||
self, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
return self.todo()
|
||||
yield
|
||||
11
capa/main.py
11
capa/main.py
@@ -44,6 +44,7 @@ import capa.render.result_document
|
||||
import capa.render.result_document as rdoc
|
||||
import capa.features.extractors.common
|
||||
import capa.features.extractors.pefile
|
||||
import capa.features.extractors.dexfile
|
||||
import capa.features.extractors.elffile
|
||||
import capa.features.extractors.dotnetfile
|
||||
import capa.features.extractors.base_extractor
|
||||
@@ -71,6 +72,7 @@ from capa.features.common import (
|
||||
OS_LINUX,
|
||||
OS_MACOS,
|
||||
FORMAT_PE,
|
||||
FORMAT_DEX,
|
||||
FORMAT_ELF,
|
||||
OS_WINDOWS,
|
||||
FORMAT_AUTO,
|
||||
@@ -306,6 +308,11 @@ def get_extractor(
|
||||
|
||||
return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path)
|
||||
|
||||
elif format_ == FORMAT_DEX:
|
||||
import capa.features.extractors.dexparser.extractor
|
||||
|
||||
return capa.features.extractors.dexparser.extractor.DexparserFeatureExtractor(path)
|
||||
|
||||
elif backend == BACKEND_BINJA:
|
||||
from capa.features.extractors.binja.find_binja_api import find_binja_path
|
||||
|
||||
@@ -374,6 +381,9 @@ def get_file_extractors(sample: Path, format_: str) -> List[FeatureExtractor]:
|
||||
elif format_ == capa.features.common.FORMAT_ELF:
|
||||
file_extractors.append(capa.features.extractors.elffile.ElfFeatureExtractor(sample))
|
||||
|
||||
elif format_ == capa.features.common.FORMAT_DEX:
|
||||
file_extractors.append(capa.features.extractors.dexfile.DexFileFeatureExtractor(sample))
|
||||
|
||||
elif format_ == FORMAT_CAPE:
|
||||
report = json.load(Path(sample).open(encoding="utf-8"))
|
||||
file_extractors.append(capa.features.extractors.cape.extractor.CapeExtractor.from_report(report))
|
||||
@@ -796,6 +806,7 @@ def install_common_args(parser, wanted=None):
|
||||
(FORMAT_PE, "Windows PE file"),
|
||||
(FORMAT_DOTNET, ".NET PE file"),
|
||||
(FORMAT_ELF, "Executable and Linkable Format"),
|
||||
(FORMAT_DEX, "Android DEX file"),
|
||||
(FORMAT_SC32, "32-bit shellcode"),
|
||||
(FORMAT_SC64, "64-bit shellcode"),
|
||||
(FORMAT_CAPE, "CAPE sandbox report"),
|
||||
|
||||
@@ -50,6 +50,7 @@ dependencies = [
|
||||
"dncil==1.0.2",
|
||||
"pydantic==2.4.0",
|
||||
"protobuf==4.23.4",
|
||||
"dexparser==1.2.0",
|
||||
]
|
||||
dynamic = ["version"]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user