Merge remote-tracking branch 'parentrepo/dynamic-feature-extraction' into sync-1657

This commit is contained in:
Yacine Elhamer
2023-07-20 09:33:48 +01:00
47 changed files with 1782 additions and 210 deletions

View File

@@ -2,9 +2,9 @@ name: CI
on:
push:
branches: [ master ]
branches: [ master, "dynamic-feature-extraction" ]
pull_request:
branches: [ master ]
branches: [ master, "dynamic-feature-extraction" ]
permissions: read-all

1
.gitmodules vendored
View File

@@ -4,3 +4,4 @@
[submodule "tests/data"]
path = tests/data
url = ../capa-testfiles.git
branch = dynamic-feature-extractor

View File

@@ -3,9 +3,17 @@
## master (unreleased)
### New Features
- Add a dynamic feature extractor for the CAPE sandbox @yelhamer [#1535](https://github.com/mandiant/capa/issues/1535)
- Add unit tests for the new CAPE extractor #1563 @yelhamer
- Add a CAPE file format and CAPE-based dynamic feature extraction to scripts/show-features.py #1566 @yelhamer
- Add a new process scope for the dynamic analysis flavor #1517 @yelhamer
- Add a new thread scope for the dynamic analysis flavor #1517 @yelhamer
- Add support for flavor-based rule scopes @yelhamer
- Add ProcessesAddress and ThreadAddress #1612 @yelhamer
### Breaking Changes
### New Rules (1)
- executable/pe/export/forwarded-export ronnie.salomonsen@mandiant.com

View File

@@ -43,6 +43,76 @@ class AbsoluteVirtualAddress(int, Address):
return int.__hash__(self)
class ProcessAddress(Address):
"""an address of a process in a dynamic execution trace"""
def __init__(self, pid: int, ppid: int = 0):
assert ppid >= 0
assert pid > 0
self.ppid = ppid
self.pid = pid
def __repr__(self):
return "process(%s%s)" % (
f"ppid: {self.ppid}, " if self.ppid > 0 else "",
f"pid: {self.pid}",
)
def __hash__(self):
return hash((self.ppid, self.pid))
def __eq__(self, other):
assert isinstance(other, ProcessAddress)
return (self.ppid, self.pid) == (other.ppid, other.pid)
def __lt__(self, other):
return (self.ppid, self.pid) < (other.ppid, other.pid)
class ThreadAddress(Address):
"""addresses a thread in a dynamic execution trace"""
def __init__(self, process: ProcessAddress, tid: int):
assert tid >= 0
self.process = process
self.tid = tid
def __repr__(self):
return f"thread(tid: {self.tid})"
def __hash__(self):
return hash((self.process, self.tid))
def __eq__(self, other):
assert isinstance(other, ThreadAddress)
return (self.process, self.tid) == (other.process, other.tid)
def __lt__(self, other):
return (self.process, self.tid) < (other.process, other.tid)
class DynamicAddress(Address):
"""an address from a dynamic analysis trace"""
def __init__(self, id_: int, return_address: int):
assert id_ >= 0
assert return_address >= 0
self.id = id_
self.return_address = return_address
def __repr__(self):
return f"dynamic(event: {self.id}, returnaddress: 0x{self.return_address:x})"
def __hash__(self):
return hash((self.id, self.return_address))
def __eq__(self, other):
return (self.id, self.return_address) == (other.id, other.return_address)
def __lt__(self, other):
return (self.id, self.return_address) < (other.id, other.return_address)
class RelativeVirtualAddress(int, Address):
"""a memory address relative to a base address"""

View File

@@ -457,6 +457,8 @@ VALID_FORMAT = (FORMAT_PE, FORMAT_ELF, FORMAT_DOTNET)
FORMAT_AUTO = "auto"
FORMAT_SC32 = "sc32"
FORMAT_SC64 = "sc64"
FORMAT_CAPE = "cape"
DYNAMIC_FORMATS = (FORMAT_CAPE,)
FORMAT_FREEZE = "freeze"
FORMAT_RESULT = "result"
FORMAT_UNKNOWN = "unknown"

View File

@@ -11,9 +11,11 @@ import dataclasses
from typing import Any, Dict, Tuple, Union, Iterator
from dataclasses import dataclass
from typing_extensions import TypeAlias
import capa.features.address
from capa.features.common import Feature
from capa.features.address import Address, AbsoluteVirtualAddress
from capa.features.address import Address, ThreadAddress, ProcessAddress, AbsoluteVirtualAddress
# feature extractors may reference functions, BBs, insns by opaque handle values.
# you can use the `.address` property to get and render the address of the feature.
@@ -63,16 +65,18 @@ class InsnHandle:
inner: Any
class FeatureExtractor:
class StaticFeatureExtractor:
"""
FeatureExtractor defines the interface for fetching features from a sample.
StaticFeatureExtractor defines the interface for fetching features from a
sample without running it; extractors that rely on the execution trace of
a sample must implement the other sibling class, DynamicFeatureExtracor.
There may be multiple backends that support fetching features for capa.
For example, we use vivisect by default, but also want to support saving
and restoring features from a JSON file.
When we restore the features, we'd like to use exactly the same matching logic
to find matching rules.
Therefore, we can define a FeatureExtractor that provides features from the
Therefore, we can define a StaticFeatureExtractor that provides features from the
serialized JSON file and do matching without a binary analysis pass.
Also, this provides a way to hook in an IDA backend.
@@ -262,3 +266,114 @@ class FeatureExtractor:
Tuple[Feature, Address]: feature and its location
"""
raise NotImplementedError()
@dataclass
class ProcessHandle:
"""
reference to a process extracted by the sandbox.
Attributes:
pid: process id
inner: sandbox-specific data
"""
address: ProcessAddress
inner: Any
@dataclass
class ThreadHandle:
"""
reference to a thread extracted by the sandbox.
Attributes:
tid: thread id
inner: sandbox-specific data
"""
address: ThreadAddress
inner: Any
class DynamicFeatureExtractor:
"""
DynamicFeatureExtractor defines the interface for fetching features from a
sandbox' analysis of a sample; extractors that rely on statically analyzing
a sample must implement the sibling extractor, StaticFeatureExtractor.
Features are grouped mainly into threads that alongside their meta-features are also grouped into
processes (that also have their own features). Other scopes (such as function and file) may also apply
for a specific sandbox.
This class is not instantiated directly; it is the base class for other implementations.
"""
@abc.abstractmethod
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
"""
extract features found at every scope ("global").
example::
extractor = CapeFeatureExtractor.from_report(json.loads(buf))
for feature, addr in extractor.get_global_features():
print(addr, feature)
yields:
Tuple[Feature, Address]: feature and its location
"""
raise NotImplementedError()
@abc.abstractmethod
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
"""
extract file-scope features.
example::
extractor = CapeFeatureExtractor.from_report(json.loads(buf))
for feature, addr in extractor.get_file_features():
print(addr, feature)
yields:
Tuple[Feature, Address]: feature and its location
"""
raise NotImplementedError()
@abc.abstractmethod
def get_processes(self) -> Iterator[ProcessHandle]:
"""
Enumerate processes in the trace.
"""
raise NotImplementedError()
@abc.abstractmethod
def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
"""
Yields all the features of a process. These include:
- file features of the process' image
- inter-process injection
- detected dynamic DLL loading
"""
raise NotImplementedError()
@abc.abstractmethod
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
"""
Enumerate threads in the given process.
"""
raise NotImplementedError()
@abc.abstractmethod
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
"""
Yields all the features of a thread. These include:
- sequenced api traces
- file/registry interactions
- network activity
"""
raise NotImplementedError()
FeatureExtractor: TypeAlias = Union[StaticFeatureExtractor, DynamicFeatureExtractor]

View File

@@ -17,10 +17,10 @@ import capa.features.extractors.binja.function
import capa.features.extractors.binja.basicblock
from capa.features.common import Feature
from capa.features.address import Address, AbsoluteVirtualAddress
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, StaticFeatureExtractor
class BinjaFeatureExtractor(FeatureExtractor):
class BinjaFeatureExtractor(StaticFeatureExtractor):
def __init__(self, bv: binja.BinaryView):
super().__init__()
self.bv = bv

View File

@@ -0,0 +1,73 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import logging
from typing import Dict, Tuple, Union, Iterator
import capa.features.extractors.cape.file
import capa.features.extractors.cape.thread
import capa.features.extractors.cape.global_
import capa.features.extractors.cape.process
from capa.features.common import Feature
from capa.features.address import Address, AbsoluteVirtualAddress, _NoAddress
from capa.features.extractors.base_extractor import ThreadHandle, ProcessHandle, DynamicFeatureExtractor
logger = logging.getLogger(__name__)
TESTED_VERSIONS = ("2.2-CAPE",)
class CapeExtractor(DynamicFeatureExtractor):
def __init__(self, cape_version: str, static: Dict, behavior: Dict):
super().__init__()
self.cape_version = cape_version
self.static = static
self.behavior = behavior
self.global_features = capa.features.extractors.cape.global_.extract_features(self.static)
def get_base_address(self) -> Union[AbsoluteVirtualAddress, _NoAddress, None]:
# value according to the PE header, the actual trace may use a different imagebase
return AbsoluteVirtualAddress(self.static["pe"]["imagebase"])
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
yield from self.global_features
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
yield from capa.features.extractors.cape.file.extract_features(self.static)
def get_processes(self) -> Iterator[ProcessHandle]:
yield from capa.features.extractors.cape.file.get_processes(self.behavior)
def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
yield from capa.features.extractors.cape.process.extract_features(self.behavior, ph)
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
yield from capa.features.extractors.cape.process.get_threads(self.behavior, ph)
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
yield from capa.features.extractors.cape.thread.extract_features(self.behavior, ph, th)
@classmethod
def from_report(cls, report: Dict) -> "CapeExtractor":
cape_version = report["info"]["version"]
if cape_version not in TESTED_VERSIONS:
logger.warning("CAPE version '%s' not tested/supported yet", cape_version)
static = report["static"]
format_ = list(static.keys())[0]
static = static[format_]
static.update(report["behavior"].pop("summary"))
static.update(report["target"])
static.update({"processtree": report["behavior"]["processtree"]})
static.update({"strings": report["strings"]})
static.update({"format": format_})
behavior = report.pop("behavior")
behavior["network"] = report.pop("network")
return cls(cape_version, static, behavior)

View File

@@ -0,0 +1,121 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import logging
from typing import Dict, Tuple, Iterator
from capa.features.file import Export, Import, Section
from capa.features.common import String, Feature
from capa.features.address import NO_ADDRESS, Address, ProcessAddress, AbsoluteVirtualAddress
from capa.features.extractors.helpers import generate_symbols
from capa.features.extractors.base_extractor import ProcessHandle
logger = logging.getLogger(__name__)
def get_processes(static: Dict) -> Iterator[ProcessHandle]:
"""
get all the created processes for a sample
"""
def rec(process):
address: ProcessAddress = ProcessAddress(pid=process["pid"], ppid=process["parent_id"])
inner: Dict[str, str] = {"name": process["name"]}
yield ProcessHandle(address=address, inner=inner)
for child in process["children"]:
yield from rec(child)
for process in static["processtree"]:
yield from rec(process)
def extract_import_names(static: Dict) -> Iterator[Tuple[Feature, Address]]:
"""
extract imported function names
"""
imports = static["imports"]
"""
2.2-CAPE
"imports": [
{
"dll": "RPCRT4.dll",
"imports": [{"address": "0x40504c","name": "NdrSimpleTypeUnmarshall"}, ...]
},
...
]
2.4-CAPE
"imports": {
"ADVAPI32": {
"dll": "ADVAPI32.dll",
"imports": [{"address": "0x522000", "name": "OpenSCManagerA"}, ...],
...
},
...
}
"""
if isinstance(imports, dict):
imports = imports.values()
for library in imports:
for function in library["imports"]:
addr = int(function["address"], 16)
for name in generate_symbols(library["dll"], function["name"]):
yield Import(name), AbsoluteVirtualAddress(addr)
def extract_export_names(static: Dict) -> Iterator[Tuple[Feature, Address]]:
for function in static["exports"]:
name, address = function["name"], int(function["address"], 16)
yield Export(name), AbsoluteVirtualAddress(address)
def extract_section_names(static: Dict) -> Iterator[Tuple[Feature, Address]]:
# be consistent with static extractors and use section VA
base = int(static["imagebase"], 16)
for section in static["sections"]:
name, address = section["name"], int(section["virtual_address"], 16)
yield Section(name), AbsoluteVirtualAddress(base + address)
def extract_file_strings(static: Dict) -> Iterator[Tuple[Feature, Address]]:
for string_ in static["strings"]:
yield String(string_), NO_ADDRESS
def extract_used_regkeys(static: Dict) -> Iterator[Tuple[Feature, Address]]:
for regkey in static["keys"]:
yield String(regkey), NO_ADDRESS
def extract_used_files(static: Dict) -> Iterator[Tuple[Feature, Address]]:
for filename in static["files"]:
yield String(filename), NO_ADDRESS
def extract_used_mutexes(static: Dict) -> Iterator[Tuple[Feature, Address]]:
for mutex in static["mutexes"]:
yield String(mutex), NO_ADDRESS
def extract_features(static: Dict) -> Iterator[Tuple[Feature, Address]]:
for handler in FILE_HANDLERS:
for feature, addr in handler(static):
yield feature, addr
FILE_HANDLERS = (
extract_import_names,
extract_export_names,
extract_section_names,
extract_file_strings,
extract_used_regkeys,
extract_used_files,
extract_used_mutexes,
)

View File

@@ -0,0 +1,94 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import logging
from typing import Tuple, Iterator
from capa.features.common import (
OS,
OS_ANY,
ARCH_ANY,
OS_LINUX,
ARCH_I386,
FORMAT_PE,
ARCH_AMD64,
FORMAT_ELF,
OS_WINDOWS,
FORMAT_UNKNOWN,
Arch,
Format,
Feature,
)
from capa.features.address import NO_ADDRESS, Address
logger = logging.getLogger(__name__)
def guess_elf_os(file_output) -> Iterator[Tuple[Feature, Address]]:
# operating systems recognized by the file command: https://github.com/file/file/blob/master/src/readelf.c#L609
if "Linux" in file_output:
yield OS(OS_LINUX), NO_ADDRESS
elif "Hurd" in file_output:
yield OS("hurd"), NO_ADDRESS
elif "Solaris" in file_output:
yield OS("solaris"), NO_ADDRESS
elif "kFreeBSD" in file_output:
yield OS("freebsd"), NO_ADDRESS
elif "kNetBSD" in file_output:
yield OS("netbsd"), NO_ADDRESS
else:
logger.warning("unrecognized OS: %s", file_output)
yield OS(OS_ANY), NO_ADDRESS
def extract_arch(static) -> Iterator[Tuple[Feature, Address]]:
if "Intel 80386" in static["file"]["type"]:
yield Arch(ARCH_I386), NO_ADDRESS
elif "x86-64" in static["file"]["type"]:
yield Arch(ARCH_AMD64), NO_ADDRESS
else:
logger.warning("unrecognized Architecture: %s", static["file"]["type"])
yield Arch(ARCH_ANY), NO_ADDRESS
def extract_format(static) -> Iterator[Tuple[Feature, Address]]:
if "PE" in static["file"]["type"]:
yield Format(FORMAT_PE), NO_ADDRESS
elif "ELF" in static["file"]["type"]:
yield Format(FORMAT_ELF), NO_ADDRESS
else:
logger.warning("unknown file format, file command output: %s", static["file"]["type"])
yield Format(FORMAT_UNKNOWN), NO_ADDRESS
def extract_os(static) -> Iterator[Tuple[Feature, Address]]:
# this variable contains the output of the file command
file_command = static["file"]["type"]
if "windows" in file_command.lower():
yield OS(OS_WINDOWS), NO_ADDRESS
elif "elf" in file_command.lower():
# implement os guessing from the cape trace
yield from guess_elf_os(file_command)
else:
# the sample is shellcode
logger.debug("unsupported file format, file command output: %s", file_command)
yield OS(OS_ANY), NO_ADDRESS
def extract_features(static) -> Iterator[Tuple[Feature, Address]]:
for global_handler in GLOBAL_HANDLER:
for feature, addr in global_handler(static):
yield feature, addr
GLOBAL_HANDLER = (
extract_format,
extract_os,
extract_arch,
)

View File

@@ -0,0 +1,28 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
from typing import Any, Dict, List
from capa.features.extractors.base_extractor import ProcessHandle
def find_process(processes: List[Dict[str, Any]], ph: ProcessHandle) -> Dict[str, Any]:
"""
find a specific process identified by a process handler.
args:
processes: a list of processes extracted by CAPE
ph: handle of the sought process
return:
a CAPE-defined dictionary for the sought process' information
"""
for process in processes:
if ph.address.ppid == process["parent_id"] and ph.address.pid == process["process_id"]:
return process
return {}

View File

@@ -0,0 +1,57 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import logging
from typing import Dict, List, Tuple, Iterator
import capa.features.extractors.cape.file
import capa.features.extractors.cape.thread
import capa.features.extractors.cape.global_
import capa.features.extractors.cape.process
from capa.features.common import String, Feature
from capa.features.address import Address, ThreadAddress
from capa.features.extractors.base_extractor import ThreadHandle, ProcessHandle
logger = logging.getLogger(__name__)
def get_threads(behavior: Dict, ph: ProcessHandle) -> Iterator[ThreadHandle]:
"""
get a thread's child processes
"""
process = capa.features.extractors.cape.helpers.find_process(behavior["processes"], ph)
threads: List = process["threads"]
for thread in threads:
address: ThreadAddress = ThreadAddress(process=ph.address, tid=int(thread))
yield ThreadHandle(address=address, inner={})
def extract_environ_strings(behavior: Dict, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
"""
extract strings from a process' provided environment variables.
"""
process = capa.features.extractors.cape.helpers.find_process(behavior["processes"], ph)
environ: Dict[str, str] = process["environ"]
if not environ:
return
for _, value in environ.items():
if value:
yield String(value), ph.address
def extract_features(behavior: Dict, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
for handler in PROCESS_HANDLERS:
for feature, addr in handler(behavior, ph):
yield feature, addr
PROCESS_HANDLERS = (extract_environ_strings,)

View File

@@ -0,0 +1,62 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import logging
from typing import Any, Dict, List, Tuple, Iterator
import capa.features.extractors.cape.helpers
from capa.features.insn import API, Number
from capa.features.common import String, Feature
from capa.features.address import Address, DynamicAddress
from capa.features.extractors.base_extractor import ThreadHandle, ProcessHandle
logger = logging.getLogger(__name__)
def extract_call_features(behavior: Dict, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
"""
this method goes through the specified thread's call trace, and extracts all possible
features such as: API, Number (for arguments), String (for arguments).
args:
behavior: a dictionary of behavioral artifacts extracted by the sandbox
ph: process handle (for defining the extraction scope)
th: thread handle (for defining the extraction scope)
yields:
Feature, address; where Feature is either: API, Number, or String.
"""
process = capa.features.extractors.cape.helpers.find_process(behavior["processes"], ph)
calls: List[Dict[str, Any]] = process["calls"]
tid = str(th.address.tid)
for call in calls:
if call["thread_id"] != tid:
continue
# TODO(yelhamer): find correct base address used at runtime.
# this address may vary from the PE header, may read actual base from procdump.pe.imagebase or similar.
# https://github.com/mandiant/capa/issues/1618
caller = DynamicAddress(call["id"], int(call["caller"], 16))
# list similar to disassembly: arguments right-to-left, call
for arg in call["arguments"][::-1]:
try:
yield Number(int(arg["value"], 16), description=f"{arg['name']}"), caller
except ValueError:
yield String(arg["value"], description=f"{arg['name']}"), caller
yield API(call["api"]), caller
def extract_features(behavior: Dict, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
for handler in THREAD_HANDLERS:
for feature, addr in handler(behavior, ph, th):
yield feature, addr
THREAD_HANDLERS = (extract_call_features,)

View File

@@ -22,7 +22,7 @@ import capa.features.extractors.dnfile.function
from capa.features.common import Feature
from capa.features.address import NO_ADDRESS, Address, DNTokenAddress, DNTokenOffsetAddress
from capa.features.extractors.dnfile.types import DnType, DnUnmanagedMethod
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, StaticFeatureExtractor
from capa.features.extractors.dnfile.helpers import (
get_dotnet_types,
get_dotnet_fields,
@@ -68,7 +68,7 @@ class DnFileFeatureExtractorCache:
return self.types.get(token)
class DnfileFeatureExtractor(FeatureExtractor):
class DnfileFeatureExtractor(StaticFeatureExtractor):
def __init__(self, path: Path):
super().__init__()
self.pe: dnfile.dnPE = dnfile.dnPE(str(path))

View File

@@ -25,7 +25,7 @@ from capa.features.common import (
Feature,
)
from capa.features.address import NO_ADDRESS, Address, AbsoluteVirtualAddress
from capa.features.extractors.base_extractor import FeatureExtractor
from capa.features.extractors.base_extractor import StaticFeatureExtractor
logger = logging.getLogger(__name__)
@@ -81,7 +81,7 @@ GLOBAL_HANDLERS = (
)
class DnfileFeatureExtractor(FeatureExtractor):
class DnfileFeatureExtractor(StaticFeatureExtractor):
def __init__(self, path: Path):
super().__init__()
self.path: Path = path

View File

@@ -31,7 +31,7 @@ from capa.features.common import (
Characteristic,
)
from capa.features.address import NO_ADDRESS, Address, DNTokenAddress
from capa.features.extractors.base_extractor import FeatureExtractor
from capa.features.extractors.base_extractor import StaticFeatureExtractor
from capa.features.extractors.dnfile.helpers import (
DnType,
iter_dotnet_table,
@@ -165,7 +165,7 @@ GLOBAL_HANDLERS = (
)
class DotnetFileFeatureExtractor(FeatureExtractor):
class DotnetFileFeatureExtractor(StaticFeatureExtractor):
def __init__(self, path: Path):
super().__init__()
self.path: Path = path

View File

@@ -16,7 +16,7 @@ import capa.features.extractors.common
from capa.features.file import Import, Section
from capa.features.common import OS, FORMAT_ELF, Arch, Format, Feature
from capa.features.address import NO_ADDRESS, FileOffsetAddress, AbsoluteVirtualAddress
from capa.features.extractors.base_extractor import FeatureExtractor
from capa.features.extractors.base_extractor import StaticFeatureExtractor
logger = logging.getLogger(__name__)
@@ -107,7 +107,7 @@ GLOBAL_HANDLERS = (
)
class ElfFeatureExtractor(FeatureExtractor):
class ElfFeatureExtractor(StaticFeatureExtractor):
def __init__(self, path: Path):
super().__init__()
self.path: Path = path

View File

@@ -54,6 +54,10 @@ def generate_symbols(dll: str, symbol: str) -> Iterator[str]:
# normalize dll name
dll = dll.lower()
# trim extensions observed in dynamic traces
dll = dll.replace(".dll", "")
dll = dll.replace(".drv", "")
# kernel32.CreateFileA
yield f"{dll}.{symbol}"

View File

@@ -18,10 +18,10 @@ import capa.features.extractors.ida.function
import capa.features.extractors.ida.basicblock
from capa.features.common import Feature
from capa.features.address import Address, AbsoluteVirtualAddress
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, StaticFeatureExtractor
class IdaFeatureExtractor(FeatureExtractor):
class IdaFeatureExtractor(StaticFeatureExtractor):
def __init__(self):
super().__init__()
self.global_features: List[Tuple[Feature, Address]] = []

View File

@@ -5,12 +5,22 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
from typing import Dict, List, Tuple
from typing import Dict, List, Tuple, Union
from dataclasses import dataclass
from typing_extensions import TypeAlias
from capa.features.common import Feature
from capa.features.address import NO_ADDRESS, Address
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
from capa.features.address import NO_ADDRESS, Address, ThreadAddress, ProcessAddress
from capa.features.extractors.base_extractor import (
BBHandle,
InsnHandle,
ThreadHandle,
ProcessHandle,
FunctionHandle,
StaticFeatureExtractor,
DynamicFeatureExtractor,
)
@dataclass
@@ -31,7 +41,7 @@ class FunctionFeatures:
@dataclass
class NullFeatureExtractor(FeatureExtractor):
class NullStaticFeatureExtractor(StaticFeatureExtractor):
"""
An extractor that extracts some user-provided features.
@@ -77,3 +87,51 @@ class NullFeatureExtractor(FeatureExtractor):
def extract_insn_features(self, f, bb, insn):
for address, feature in self.functions[f.address].basic_blocks[bb.address].instructions[insn.address].features:
yield feature, address
@dataclass
class ThreadFeatures:
features: List[Tuple[Address, Feature]]
@dataclass
class ProcessFeatures:
features: List[Tuple[Address, Feature]]
threads: Dict[Address, ThreadFeatures]
@dataclass
class NullDynamicFeatureExtractor(DynamicFeatureExtractor):
base_address: Address
global_features: List[Feature]
file_features: List[Tuple[Address, Feature]]
processes: Dict[Address, ProcessFeatures]
def extract_global_features(self):
for feature in self.global_features:
yield feature, NO_ADDRESS
def extract_file_features(self):
for address, feature in self.file_features:
yield feature, address
def get_processes(self):
for address in sorted(self.processes.keys()):
assert isinstance(address, ProcessAddress)
yield ProcessHandle(address=address, inner={})
def extract_process_features(self, p):
for addr, feature in self.processes[p.address].features:
yield feature, addr
def get_threads(self, p):
for address in sorted(self.processes[p].threads.keys()):
assert isinstance(address, ThreadAddress)
yield ThreadHandle(address=address, inner={})
def extract_thread_features(self, p, t):
for addr, feature in self.processes[p.address].threads[t.address].features:
yield feature, addr
NullFeatureExtractor: TypeAlias = Union[NullStaticFeatureExtractor, NullDynamicFeatureExtractor]

View File

@@ -19,7 +19,7 @@ import capa.features.extractors.strings
from capa.features.file import Export, Import, Section
from capa.features.common import OS, ARCH_I386, FORMAT_PE, ARCH_AMD64, OS_WINDOWS, Arch, Format, Characteristic
from capa.features.address import NO_ADDRESS, FileOffsetAddress, AbsoluteVirtualAddress
from capa.features.extractors.base_extractor import FeatureExtractor
from capa.features.extractors.base_extractor import StaticFeatureExtractor
logger = logging.getLogger(__name__)
@@ -185,7 +185,7 @@ GLOBAL_HANDLERS = (
)
class PefileFeatureExtractor(FeatureExtractor):
class PefileFeatureExtractor(StaticFeatureExtractor):
def __init__(self, path: Path):
super().__init__()
self.path: Path = path

View File

@@ -20,12 +20,12 @@ import capa.features.extractors.viv.function
import capa.features.extractors.viv.basicblock
from capa.features.common import Feature
from capa.features.address import Address, AbsoluteVirtualAddress
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, StaticFeatureExtractor
logger = logging.getLogger(__name__)
class VivisectFeatureExtractor(FeatureExtractor):
class VivisectFeatureExtractor(StaticFeatureExtractor):
def __init__(self, vw, path: Path, os):
super().__init__()
self.vw = vw

View File

@@ -15,6 +15,7 @@ from enum import Enum
from typing import List, Tuple, Union
from pydantic import Field, BaseModel
from typing_extensions import TypeAlias
import capa.helpers
import capa.version
@@ -23,9 +24,10 @@ import capa.features.insn
import capa.features.common
import capa.features.address
import capa.features.basicblock
import capa.features.extractors.base_extractor
import capa.features.extractors.null as null
from capa.helpers import assert_never
from capa.features.freeze.features import Feature, feature_from_capa
from capa.features.extractors.base_extractor import FeatureExtractor, StaticFeatureExtractor, DynamicFeatureExtractor
logger = logging.getLogger(__name__)
@@ -41,12 +43,15 @@ class AddressType(str, Enum):
FILE = "file"
DN_TOKEN = "dn token"
DN_TOKEN_OFFSET = "dn token offset"
PROCESS = "process"
THREAD = "thread"
DYNAMIC = "dynamic"
NO_ADDRESS = "no address"
class Address(HashableModel):
type: AddressType
value: Union[int, Tuple[int, int], None]
value: Union[int, Tuple[int, ...], None]
@classmethod
def from_capa(cls, a: capa.features.address.Address) -> "Address":
@@ -65,6 +70,15 @@ class Address(HashableModel):
elif isinstance(a, capa.features.address.DNTokenOffsetAddress):
return cls(type=AddressType.DN_TOKEN_OFFSET, value=(a.token, a.offset))
elif isinstance(a, capa.features.address.ProcessAddress):
return cls(type=AddressType.PROCESS, value=(a.ppid, a.pid))
elif isinstance(a, capa.features.address.ThreadAddress):
return cls(type=AddressType.THREAD, value=(a.process.ppid, a.process.pid, a.tid))
elif isinstance(a, capa.features.address.DynamicAddress):
return cls(type=AddressType.DYNAMIC, value=(a.id, a.return_address))
elif a == capa.features.address.NO_ADDRESS or isinstance(a, capa.features.address._NoAddress):
return cls(type=AddressType.NO_ADDRESS, value=None)
@@ -101,6 +115,22 @@ class Address(HashableModel):
assert isinstance(offset, int)
return capa.features.address.DNTokenOffsetAddress(token, offset)
elif self.type is AddressType.PROCESS:
assert isinstance(self.value, tuple)
ppid, pid = self.value
assert isinstance(ppid, int)
assert isinstance(pid, int)
return capa.features.address.ProcessAddress(ppid=ppid, pid=pid)
elif self.type is AddressType.THREAD:
assert isinstance(self.value, tuple)
ppid, pid, tid = self.value
assert isinstance(ppid, int)
assert isinstance(pid, int)
assert isinstance(tid, int)
proc_addr = capa.features.address.ProcessAddress(ppid=ppid, pid=pid)
return capa.features.address.ThreadAddress(proc_addr, tid=tid)
elif self.type is AddressType.NO_ADDRESS:
return capa.features.address.NO_ADDRESS
@@ -131,6 +161,34 @@ class FileFeature(HashableModel):
feature: Feature
class ProcessFeature(HashableModel):
"""
args:
process: the address of the process to which this feature belongs.
address: the address at which this feature is found.
process != address because, e.g., the feature may be found *within* the scope (process).
"""
process: Address
address: Address
feature: Feature
class ThreadFeature(HashableModel):
"""
args:
thread: the address of the thread to which this feature belongs.
address: the address at which this feature is found.
thread != address because, e.g., the feature may be found *within* the scope (thread).
"""
thread: Address
address: Address
feature: Feature
class FunctionFeature(HashableModel):
"""
args:
@@ -199,7 +257,18 @@ class FunctionFeatures(BaseModel):
allow_population_by_field_name = True
class Features(BaseModel):
class ThreadFeatures(BaseModel):
address: Address
features: Tuple[ThreadFeature, ...]
class ProcessFeatures(BaseModel):
address: Address
features: Tuple[ProcessFeature, ...]
threads: Tuple[ThreadFeatures, ...]
class StaticFeatures(BaseModel):
global_: Tuple[GlobalFeature, ...] = Field(alias="global")
file: Tuple[FileFeature, ...]
functions: Tuple[FunctionFeatures, ...]
@@ -208,6 +277,18 @@ class Features(BaseModel):
allow_population_by_field_name = True
class DynamicFeatures(BaseModel):
global_: Tuple[GlobalFeature, ...] = Field(alias="global")
file: Tuple[FileFeature, ...]
processes: Tuple[ProcessFeatures, ...]
class Config:
allow_population_by_field_name = True
Features: TypeAlias = Union[StaticFeatures, DynamicFeatures]
class Extractor(BaseModel):
name: str
version: str = capa.version.__version__
@@ -226,11 +307,11 @@ class Freeze(BaseModel):
allow_population_by_field_name = True
def dumps(extractor: capa.features.extractors.base_extractor.FeatureExtractor) -> str:
def dumps_static(extractor: StaticFeatureExtractor) -> str:
"""
serialize the given extractor to a string
"""
assert isinstance(extractor, StaticFeatureExtractor)
global_features: List[GlobalFeature] = []
for feature, _ in extractor.extract_global_features():
global_features.append(
@@ -309,7 +390,7 @@ def dumps(extractor: capa.features.extractors.base_extractor.FeatureExtractor) -
# Mypy is unable to recognise `basic_blocks` as a argument due to alias
)
features = Features(
features = StaticFeatures(
global_=global_features,
file=tuple(file_features),
functions=tuple(function_features),
@@ -327,15 +408,98 @@ def dumps(extractor: capa.features.extractors.base_extractor.FeatureExtractor) -
return freeze.json()
def loads(s: str) -> capa.features.extractors.base_extractor.FeatureExtractor:
"""deserialize a set of features (as a NullFeatureExtractor) from a string."""
import capa.features.extractors.null as null
def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str:
"""
serialize the given extractor to a string
"""
global_features: List[GlobalFeature] = []
for feature, _ in extractor.extract_global_features():
global_features.append(
GlobalFeature(
feature=feature_from_capa(feature),
)
)
file_features: List[FileFeature] = []
for feature, address in extractor.extract_file_features():
file_features.append(
FileFeature(
feature=feature_from_capa(feature),
address=Address.from_capa(address),
)
)
process_features: List[ProcessFeatures] = []
for p in extractor.get_processes():
paddr = Address.from_capa(p.address)
pfeatures = [
ProcessFeature(
process=paddr,
address=Address.from_capa(addr),
feature=feature_from_capa(feature),
)
for feature, addr in extractor.extract_process_features(p)
]
threads = []
for t in extractor.get_threads(p):
taddr = Address.from_capa(t.address)
tfeatures = [
ThreadFeature(
basic_block=taddr,
address=Address.from_capa(addr),
feature=feature_from_capa(feature),
) # type: ignore
# Mypy is unable to recognise `basic_block` as a argument due to alias
for feature, addr in extractor.extract_thread_features(p, t)
]
threads.append(
ThreadFeatures(
address=taddr,
features=tuple(tfeatures),
)
)
process_features.append(
ProcessFeatures(
address=paddr,
features=tuple(pfeatures),
threads=tuple(threads),
) # type: ignore
# Mypy is unable to recognise `basic_blocks` as a argument due to alias
)
features = DynamicFeatures(
global_=global_features,
file=tuple(file_features),
processes=tuple(process_features),
) # type: ignore
# Mypy is unable to recognise `global_` as a argument due to alias
# workaround around mypy issue: https://github.com/python/mypy/issues/1424
get_base_addr = getattr(extractor, "get_base_addr", None)
base_addr = get_base_addr() if get_base_addr else capa.features.address.NO_ADDRESS
freeze = Freeze(
version=2,
base_address=Address.from_capa(base_addr),
extractor=Extractor(name=extractor.__class__.__name__),
features=features,
) # type: ignore
# Mypy is unable to recognise `base_address` as a argument due to alias
return freeze.json()
def loads_static(s: str) -> StaticFeatureExtractor:
"""deserialize a set of features (as a NullFeatureExtractor) from a string."""
freeze = Freeze.parse_raw(s)
if freeze.version != 2:
raise ValueError(f"unsupported freeze format version: {freeze.version}")
return null.NullFeatureExtractor(
assert isinstance(freeze.features, StaticFeatures)
return null.NullStaticFeatureExtractor(
base_address=freeze.base_address.to_capa(),
global_features=[f.feature.to_capa() for f in freeze.features.global_],
file_features=[(f.address.to_capa(), f.feature.to_capa()) for f in freeze.features.file],
@@ -360,23 +524,69 @@ def loads(s: str) -> capa.features.extractors.base_extractor.FeatureExtractor:
)
def loads_dynamic(s: str) -> DynamicFeatureExtractor:
"""deserialize a set of features (as a NullFeatureExtractor) from a string."""
freeze = Freeze.parse_raw(s)
if freeze.version != 2:
raise ValueError(f"unsupported freeze format version: {freeze.version}")
assert isinstance(freeze.features, DynamicFeatures)
return null.NullDynamicFeatureExtractor(
base_address=freeze.base_address.to_capa(),
global_features=[f.feature.to_capa() for f in freeze.features.global_],
file_features=[(f.address.to_capa(), f.feature.to_capa()) for f in freeze.features.file],
processes={
p.address.to_capa(): null.ProcessFeatures(
features=[(fe.address.to_capa(), fe.feature.to_capa()) for fe in p.features],
threads={
t.address.to_capa(): null.ThreadFeatures(
features=[(fe.address.to_capa(), fe.feature.to_capa()) for fe in t.features],
)
for t in p.threads
},
)
for p in freeze.features.processes
},
)
MAGIC = "capa0000".encode("ascii")
STATIC_MAGIC = MAGIC + "-static".encode("ascii")
DYNAMIC_MAGIC = MAGIC + "-dynamic".encode("ascii")
def dump(extractor: capa.features.extractors.base_extractor.FeatureExtractor) -> bytes:
def dump(extractor: FeatureExtractor) -> bytes:
"""serialize the given extractor to a byte array."""
return MAGIC + zlib.compress(dumps(extractor).encode("utf-8"))
if isinstance(extractor, StaticFeatureExtractor):
return STATIC_MAGIC + zlib.compress(dumps_static(extractor).encode("utf-8"))
elif isinstance(extractor, DynamicFeatureExtractor):
return DYNAMIC_MAGIC + zlib.compress(dumps_dynamic(extractor).encode("utf-8"))
else:
raise ValueError("Invalid feature extractor")
def is_freeze(buf: bytes) -> bool:
return buf[: len(MAGIC)] == MAGIC
def load(buf: bytes) -> capa.features.extractors.base_extractor.FeatureExtractor:
def is_static(buf: bytes) -> bool:
return buf[: len(STATIC_MAGIC)] == STATIC_MAGIC
def is_dynamic(buf: bytes) -> bool:
return buf[: len(DYNAMIC_MAGIC)] == DYNAMIC_MAGIC
def load(buf: bytes):
"""deserialize a set of features (as a NullFeatureExtractor) from a byte array."""
if not is_freeze(buf):
raise ValueError("missing magic header")
return loads(zlib.decompress(buf[len(MAGIC) :]).decode("utf-8"))
if is_static(buf):
return loads_static(zlib.decompress(buf[len(STATIC_MAGIC) :]).decode("utf-8"))
elif is_dynamic(buf):
return loads_dynamic(zlib.decompress(buf[len(DYNAMIC_MAGIC) :]).decode("utf-8"))
else:
raise ValueError("invalid magic header")
def main(argv=None):

View File

@@ -5,6 +5,8 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import os
import json
import inspect
import logging
import contextlib
@@ -15,10 +17,11 @@ from pathlib import Path
import tqdm
from capa.exceptions import UnsupportedFormatError
from capa.features.common import FORMAT_PE, FORMAT_SC32, FORMAT_SC64, FORMAT_DOTNET, FORMAT_UNKNOWN, Format
from capa.features.common import FORMAT_PE, FORMAT_CAPE, FORMAT_SC32, FORMAT_SC64, FORMAT_DOTNET, FORMAT_UNKNOWN, Format
EXTENSIONS_SHELLCODE_32 = ("sc32", "raw32")
EXTENSIONS_SHELLCODE_64 = ("sc64", "raw64")
EXTENSIONS_DYNAMIC = ("json", "json_")
EXTENSIONS_ELF = "elf_"
logger = logging.getLogger("capa")
@@ -49,14 +52,25 @@ def assert_never(value) -> NoReturn:
assert False, f"Unhandled value: {value} ({type(value).__name__})" # noqa: B011
def get_format_from_extension(sample: Path) -> str:
if sample.name.endswith(EXTENSIONS_SHELLCODE_32):
return FORMAT_SC32
elif sample.name.endswith(EXTENSIONS_SHELLCODE_64):
return FORMAT_SC64
def get_format_from_report(sample: Path) -> str:
with open(sample.name, "rb") as f:
report = json.load(f)
if "CAPE" in report.keys():
return FORMAT_CAPE
return FORMAT_UNKNOWN
def get_format_from_extension(sample: Path) -> str:
format_ = FORMAT_UNKNOWN
if sample.name.endswith(EXTENSIONS_SHELLCODE_32):
format_ = FORMAT_SC32
elif sample.name.endswith(EXTENSIONS_SHELLCODE_64):
format_ = FORMAT_SC64
elif sample.name.endswith(EXTENSIONS_DYNAMIC):
format_ = get_format_from_report(sample)
return format_
def get_auto_format(path: Path) -> str:
format_ = get_format(path)
if format_ == FORMAT_UNKNOWN:

View File

@@ -1191,10 +1191,13 @@ class CapaExplorerForm(idaapi.PluginForm):
return
is_match: bool = False
if self.rulegen_current_function is not None and rule.scope in (
if self.rulegen_current_function is not None and any(
s in rule.scopes
for s in (
capa.rules.Scope.FUNCTION,
capa.rules.Scope.BASIC_BLOCK,
capa.rules.Scope.INSTRUCTION,
)
):
try:
_, func_matches, bb_matches, insn_matches = self.rulegen_feature_cache.find_code_capabilities(
@@ -1204,13 +1207,13 @@ class CapaExplorerForm(idaapi.PluginForm):
self.set_rulegen_status(f"Failed to create function rule matches from rule set ({e})")
return
if rule.scope == capa.rules.Scope.FUNCTION and rule.name in func_matches:
if capa.rules.Scope.FUNCTION in rule.scopes and rule.name in func_matches:
is_match = True
elif rule.scope == capa.rules.Scope.BASIC_BLOCK and rule.name in bb_matches:
elif capa.rules.Scope.BASIC_BLOCK in rule.scopes and rule.name in bb_matches:
is_match = True
elif rule.scope == capa.rules.Scope.INSTRUCTION and rule.name in insn_matches:
elif capa.rules.Scope.INSTRUCTION in rule.scopes and rule.name in insn_matches:
is_match = True
elif rule.scope == capa.rules.Scope.FILE:
elif capa.rules.Scope.FILE in rule.scopes:
try:
_, file_matches = self.rulegen_feature_cache.find_file_capabilities(ruleset)
except Exception as e:

View File

@@ -11,6 +11,7 @@ See the License for the specific language governing permissions and limitations
import io
import os
import sys
import json
import time
import hashlib
import logging
@@ -20,7 +21,7 @@ import textwrap
import itertools
import contextlib
import collections
from typing import Any, Dict, List, Tuple, Callable, Optional
from typing import Any, Dict, List, Tuple, Callable, Optional, cast
from pathlib import Path
import halo
@@ -51,6 +52,7 @@ import capa.features.extractors.dnfile_
import capa.features.extractors.elffile
import capa.features.extractors.dotnetfile
import capa.features.extractors.base_extractor
import capa.features.extractors.cape.extractor
from capa.rules import Rule, Scope, RuleSet
from capa.engine import FeatureSet, MatchResults
from capa.helpers import (
@@ -71,6 +73,7 @@ from capa.features.common import (
FORMAT_ELF,
OS_WINDOWS,
FORMAT_AUTO,
FORMAT_CAPE,
FORMAT_SC32,
FORMAT_SC64,
FORMAT_DOTNET,
@@ -78,7 +81,14 @@ from capa.features.common import (
FORMAT_RESULT,
)
from capa.features.address import NO_ADDRESS, Address
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
from capa.features.extractors.base_extractor import (
BBHandle,
InsnHandle,
FunctionHandle,
FeatureExtractor,
StaticFeatureExtractor,
DynamicFeatureExtractor,
)
RULES_PATH_DEFAULT_STRING = "(embedded rules)"
SIGNATURES_PATH_DEFAULT_STRING = "(embedded signatures)"
@@ -120,7 +130,7 @@ def set_vivisect_log_level(level):
def find_instruction_capabilities(
ruleset: RuleSet, extractor: FeatureExtractor, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
) -> Tuple[FeatureSet, MatchResults]:
"""
find matches for the given rules for the given instruction.
@@ -147,7 +157,7 @@ def find_instruction_capabilities(
def find_basic_block_capabilities(
ruleset: RuleSet, extractor: FeatureExtractor, f: FunctionHandle, bb: BBHandle
ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle
) -> Tuple[FeatureSet, MatchResults, MatchResults]:
"""
find matches for the given rules within the given basic block.
@@ -187,7 +197,7 @@ def find_basic_block_capabilities(
def find_code_capabilities(
ruleset: RuleSet, extractor: FeatureExtractor, fh: FunctionHandle
ruleset: RuleSet, extractor: StaticFeatureExtractor, fh: FunctionHandle
) -> Tuple[MatchResults, MatchResults, MatchResults, int]:
"""
find matches for the given rules within the given function.
@@ -245,7 +255,9 @@ def find_file_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, functi
return matches, len(file_features)
def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_progress=None) -> Tuple[MatchResults, Any]:
def find_static_capabilities(
ruleset: RuleSet, extractor: StaticFeatureExtractor, disable_progress=None
) -> Tuple[MatchResults, Any]:
all_function_matches = collections.defaultdict(list) # type: MatchResults
all_bb_matches = collections.defaultdict(list) # type: MatchResults
all_insn_matches = collections.defaultdict(list) # type: MatchResults
@@ -339,6 +351,17 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_pro
return matches, meta
def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, **kwargs) -> Tuple[MatchResults, Any]:
if isinstance(extractor, StaticFeatureExtractor):
extractor_: StaticFeatureExtractor = cast(StaticFeatureExtractor, extractor)
return find_static_capabilities(ruleset, extractor_, kwargs)
elif isinstance(extractor, DynamicFeatureExtractor):
# extractor_ = cast(DynamicFeatureExtractor, extractor)
raise NotImplementedError()
else:
raise ValueError(f"unexpected extractor type: {extractor.__class__.__name__}")
def has_rule_with_namespace(rules: RuleSet, capabilities: MatchResults, namespace: str) -> bool:
return any(
rules.rules[rule_name].meta.get("namespace", "").startswith(namespace) for rule_name in capabilities.keys()
@@ -526,7 +549,8 @@ def get_extractor(
UnsupportedArchError
UnsupportedOSError
"""
if format_ not in (FORMAT_SC32, FORMAT_SC64):
if format_ not in (FORMAT_SC32, FORMAT_SC64, FORMAT_CAPE):
if not is_supported_format(path):
raise UnsupportedFormatError()
@@ -536,7 +560,14 @@ def get_extractor(
if os_ == OS_AUTO and not is_supported_os(path):
raise UnsupportedOSError()
if format_ == FORMAT_DOTNET:
if format_ == FORMAT_CAPE:
import capa.features.extractors.cape.extractor
with open(path, "rb") as f:
report = json.load(f)
return capa.features.extractors.cape.extractor.CapeExtractor.from_report(report)
elif format_ == FORMAT_DOTNET:
import capa.features.extractors.dnfile.extractor
return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path)
@@ -608,6 +639,11 @@ def get_file_extractors(sample: Path, format_: str) -> List[FeatureExtractor]:
elif format_ == capa.features.extractors.common.FORMAT_ELF:
file_extractors.append(capa.features.extractors.elffile.ElfFeatureExtractor(sample))
elif format_ == FORMAT_CAPE:
with open(sample, "rb") as f:
report = json.load(f)
file_extractors.append(capa.features.extractors.cape.extractor.CapeExtractor.from_report(report))
return file_extractors
@@ -704,7 +740,7 @@ def get_rules(
rule.meta["capa/nursery"] = is_nursery_rule_path(path)
rules.append(rule)
logger.debug("loaded rule: '%s' with scope: %s", rule.name, rule.scope)
logger.debug("loaded rule: '%s' with scope: %s", rule.name, rule.scopes)
ruleset = capa.rules.RuleSet(rules)
@@ -745,12 +781,13 @@ def collect_metadata(
format_: str,
os_: str,
rules_path: List[Path],
extractor: capa.features.extractors.base_extractor.FeatureExtractor,
extractor: FeatureExtractor,
) -> rdoc.Metadata:
md5 = hashlib.md5()
sha1 = hashlib.sha1()
sha256 = hashlib.sha256()
assert isinstance(extractor, StaticFeatureExtractor)
buf = sample_path.read_bytes()
md5.update(buf)
@@ -761,6 +798,7 @@ def collect_metadata(
format_ = get_format(sample_path) if format_ == FORMAT_AUTO else format_
arch = get_arch(sample_path)
os_ = get_os(sample_path) if os_ == OS_AUTO else os_
base_addr = extractor.get_base_address() if hasattr(extractor, "get_base_address") else NO_ADDRESS
return rdoc.Metadata(
timestamp=datetime.datetime.now(),
@@ -778,7 +816,7 @@ def collect_metadata(
os=os_,
extractor=extractor.__class__.__name__,
rules=rules,
base_address=frz.Address.from_capa(extractor.get_base_address()),
base_address=frz.Address.from_capa(base_addr),
layout=rdoc.Layout(
functions=(),
# this is updated after capabilities have been collected.
@@ -902,6 +940,7 @@ def install_common_args(parser, wanted=None):
(FORMAT_ELF, "Executable and Linkable Format"),
(FORMAT_SC32, "32-bit shellcode"),
(FORMAT_SC64, "64-bit shellcode"),
(FORMAT_CAPE, "CAPE sandbox report"),
(FORMAT_FREEZE, "features previously frozen by capa"),
]
format_help = ", ".join([f"{f[0]}: {f[1]}" for f in formats])
@@ -1232,7 +1271,7 @@ def main(argv: Optional[List[str]] = None):
if format_ == FORMAT_FREEZE:
# freeze format deserializes directly into an extractor
extractor = frz.load(Path(args.sample).read_bytes())
extractor: FeatureExtractor = frz.load(Path(args.sample).read_bytes())
else:
# all other formats we must create an extractor,
# such as viv, binary ninja, etc. workspaces

View File

@@ -54,6 +54,12 @@ def format_address(address: frz.Address) -> str:
assert isinstance(token, int)
assert isinstance(offset, int)
return f"token({capa.helpers.hex(token)})+{capa.helpers.hex(offset)}"
elif address.type == frz.AddressType.DYNAMIC:
assert isinstance(address.value, tuple)
id_, return_address = address.value
assert isinstance(id_, int)
assert isinstance(return_address, int)
return f"event: {id_}, retaddr: 0x{return_address:x}"
elif address.type == frz.AddressType.NO_ADDRESS:
return "global"
else:

View File

@@ -26,6 +26,7 @@ except ImportError:
from backports.functools_lru_cache import lru_cache # type: ignore
from typing import Any, Set, Dict, List, Tuple, Union, Iterator
from dataclasses import asdict, dataclass
import yaml
import pydantic
@@ -59,7 +60,7 @@ META_KEYS = (
"authors",
"description",
"lib",
"scope",
"scopes",
"att&ck",
"mbc",
"references",
@@ -74,18 +75,62 @@ HIDDEN_META_KEYS = ("capa/nursery", "capa/path")
class Scope(str, Enum):
FILE = "file"
PROCESS = "process"
THREAD = "thread"
FUNCTION = "function"
BASIC_BLOCK = "basic block"
INSTRUCTION = "instruction"
FILE_SCOPE = Scope.FILE.value
PROCESS_SCOPE = Scope.PROCESS.value
THREAD_SCOPE = Scope.THREAD.value
FUNCTION_SCOPE = Scope.FUNCTION.value
BASIC_BLOCK_SCOPE = Scope.BASIC_BLOCK.value
INSTRUCTION_SCOPE = Scope.INSTRUCTION.value
# used only to specify supported features per scope.
# not used to validate rules.
GLOBAL_SCOPE = "global"
DEV_SCOPE = "dev"
# these literals are used to check if the flavor
# of a rule is correct.
STATIC_SCOPES = (
FILE_SCOPE,
GLOBAL_SCOPE,
FUNCTION_SCOPE,
BASIC_BLOCK_SCOPE,
INSTRUCTION_SCOPE,
)
DYNAMIC_SCOPES = (
FILE_SCOPE,
GLOBAL_SCOPE,
PROCESS_SCOPE,
THREAD_SCOPE,
DEV_SCOPE,
)
@dataclass
class Scopes:
static: str
dynamic: str
def __contains__(self, scope: Union[Scope, str]) -> bool:
assert isinstance(scope, Scope) or isinstance(scope, str)
return (scope == self.static) or (scope == self.dynamic)
@classmethod
def from_dict(self, scopes: dict) -> "Scopes":
assert isinstance(scopes, dict)
if sorted(scopes) != ["dynamic", "static"]:
raise InvalidRule("scope flavors can be either static or dynamic")
if scopes["static"] not in STATIC_SCOPES:
raise InvalidRule(f"{scopes['static']} is not a valid static scope")
if scopes["dynamic"] not in DYNAMIC_SCOPES:
raise InvalidRule(f"{scopes['dynamic']} is not a valid dynamicscope")
return Scopes(scopes["static"], scopes["dynamic"])
SUPPORTED_FEATURES: Dict[str, Set] = {
@@ -108,6 +153,21 @@ SUPPORTED_FEATURES: Dict[str, Set] = {
capa.features.common.Characteristic("mixed mode"),
capa.features.common.Characteristic("forwarded export"),
},
PROCESS_SCOPE: {
capa.features.common.MatchedRule,
capa.features.common.String,
capa.features.common.Substring,
capa.features.common.Regex,
capa.features.common.Characteristic("embedded pe"),
},
THREAD_SCOPE: {
capa.features.common.MatchedRule,
capa.features.common.String,
capa.features.common.Substring,
capa.features.common.Regex,
capa.features.insn.API,
capa.features.insn.Number,
},
FUNCTION_SCOPE: {
capa.features.common.MatchedRule,
capa.features.basicblock.BasicBlock,
@@ -145,6 +205,12 @@ SUPPORTED_FEATURES: Dict[str, Set] = {
capa.features.common.Class,
capa.features.common.Namespace,
},
DEV_SCOPE: {
# TODO(yelhamer): this is a temporary scope. remove it after support
# for the legacy scope keyword has been added (to rendering).
# https://github.com/mandiant/capa/pull/1580
capa.features.insn.API,
},
}
# global scope features are available in all other scopes
@@ -152,11 +218,19 @@ SUPPORTED_FEATURES[INSTRUCTION_SCOPE].update(SUPPORTED_FEATURES[GLOBAL_SCOPE])
SUPPORTED_FEATURES[BASIC_BLOCK_SCOPE].update(SUPPORTED_FEATURES[GLOBAL_SCOPE])
SUPPORTED_FEATURES[FUNCTION_SCOPE].update(SUPPORTED_FEATURES[GLOBAL_SCOPE])
SUPPORTED_FEATURES[FILE_SCOPE].update(SUPPORTED_FEATURES[GLOBAL_SCOPE])
SUPPORTED_FEATURES[PROCESS_SCOPE].update(SUPPORTED_FEATURES[GLOBAL_SCOPE])
SUPPORTED_FEATURES[THREAD_SCOPE].update(SUPPORTED_FEATURES[GLOBAL_SCOPE])
# all thread scope features are also process features
SUPPORTED_FEATURES[PROCESS_SCOPE].update(SUPPORTED_FEATURES[THREAD_SCOPE])
# all instruction scope features are also basic block features
SUPPORTED_FEATURES[BASIC_BLOCK_SCOPE].update(SUPPORTED_FEATURES[INSTRUCTION_SCOPE])
# all basic block scope features are also function scope features
SUPPORTED_FEATURES[FUNCTION_SCOPE].update(SUPPORTED_FEATURES[BASIC_BLOCK_SCOPE])
# dynamic-dev scope contains all features
SUPPORTED_FEATURES[DEV_SCOPE].update(SUPPORTED_FEATURES[FILE_SCOPE])
SUPPORTED_FEATURES[DEV_SCOPE].update(SUPPORTED_FEATURES[FUNCTION_SCOPE])
SUPPORTED_FEATURES[DEV_SCOPE].update(SUPPORTED_FEATURES[PROCESS_SCOPE])
class InvalidRule(ValueError):
@@ -440,8 +514,26 @@ def build_statements(d, scope: str):
# like with `write file`, we might say that `WriteFile` is optionally found alongside `CreateFileA`.
return ceng.Some(0, [build_statements(dd, scope) for dd in d[key]], description=description)
elif key == "function":
elif key == "process":
if scope != FILE_SCOPE:
raise InvalidRule("process subscope supported only for file scope")
if len(d[key]) != 1:
raise InvalidRule("subscope must have exactly one child statement")
return ceng.Subscope(PROCESS_SCOPE, build_statements(d[key][0], PROCESS_SCOPE), description=description)
elif key == "thread":
if scope not in (PROCESS_SCOPE, FILE_SCOPE):
raise InvalidRule("thread subscope supported only for the process scope")
if len(d[key]) != 1:
raise InvalidRule("subscope must have exactly one child statement")
return ceng.Subscope(THREAD_SCOPE, build_statements(d[key][0], THREAD_SCOPE), description=description)
elif key == "function":
if scope not in (FILE_SCOPE, DEV_SCOPE):
raise InvalidRule("function subscope supported only for file scope")
if len(d[key]) != 1:
@@ -450,7 +542,7 @@ def build_statements(d, scope: str):
return ceng.Subscope(FUNCTION_SCOPE, build_statements(d[key][0], FUNCTION_SCOPE), description=description)
elif key == "basic block":
if scope != FUNCTION_SCOPE:
if scope not in (FUNCTION_SCOPE, DEV_SCOPE):
raise InvalidRule("basic block subscope supported only for function scope")
if len(d[key]) != 1:
@@ -459,7 +551,7 @@ def build_statements(d, scope: str):
return ceng.Subscope(BASIC_BLOCK_SCOPE, build_statements(d[key][0], BASIC_BLOCK_SCOPE), description=description)
elif key == "instruction":
if scope not in (FUNCTION_SCOPE, BASIC_BLOCK_SCOPE):
if scope not in (FUNCTION_SCOPE, BASIC_BLOCK_SCOPE, DEV_SCOPE):
raise InvalidRule("instruction subscope supported only for function and basic block scope")
if len(d[key]) == 1:
@@ -611,10 +703,10 @@ def second(s: List[Any]) -> Any:
class Rule:
def __init__(self, name: str, scope: str, statement: Statement, meta, definition=""):
def __init__(self, name: str, scopes: Scopes, statement: Statement, meta, definition=""):
super().__init__()
self.name = name
self.scope = scope
self.scopes = scopes
self.statement = statement
self.meta = meta
self.definition = definition
@@ -623,7 +715,7 @@ class Rule:
return f"Rule(name={self.name})"
def __repr__(self):
return f"Rule(scope={self.scope}, name={self.name})"
return f"Rule(scope={self.scopes}, name={self.name})"
def get_dependencies(self, namespaces):
"""
@@ -683,11 +775,11 @@ class Rule:
name = self.name + "/" + uuid.uuid4().hex
new_rule = Rule(
name,
subscope.scope,
Scopes(subscope.scope, DEV_SCOPE),
subscope.child,
{
"name": name,
"scope": subscope.scope,
"scopes": asdict(Scopes(subscope.scope, DEV_SCOPE)),
# these derived rules are never meant to be inspected separately,
# they are dependencies for the parent rule,
# so mark it as such.
@@ -749,7 +841,9 @@ class Rule:
name = meta["name"]
# if scope is not specified, default to function scope.
# this is probably the mode that rule authors will start with.
scope = meta.get("scope", FUNCTION_SCOPE)
# each rule has two scopes, a static-flavor scope, and a
# dynamic-flavor one. which one is used depends on the analysis type.
scopes: Scopes = Scopes.from_dict(meta.get("scopes", {"static": "function", "dynamic": "dev"}))
statements = d["rule"]["features"]
# the rule must start with a single logic node.
@@ -760,16 +854,20 @@ class Rule:
if isinstance(statements[0], ceng.Subscope):
raise InvalidRule("top level statement may not be a subscope")
if scope not in SUPPORTED_FEATURES.keys():
raise InvalidRule("{:s} is not a supported scope".format(scope))
meta = d["rule"]["meta"]
if not isinstance(meta.get("att&ck", []), list):
raise InvalidRule("ATT&CK mapping must be a list")
if not isinstance(meta.get("mbc", []), list):
raise InvalidRule("MBC mapping must be a list")
return cls(name, scope, build_statements(statements[0], scope), meta, definition)
# TODO(yelhamer): once we've decided on the desired format for mixed-scope statements,
# we should go back and update this accordingly to either:
# - generate one englobing statement.
# - generate two respective statements and store them approriately
# https://github.com/mandiant/capa/pull/1580
statement = build_statements(statements[0], scopes.static)
_ = build_statements(statements[0], scopes.dynamic)
return cls(name, scopes, statement, meta, definition)
@staticmethod
@lru_cache()
@@ -868,10 +966,9 @@ class Rule:
del meta[k]
for k, v in self.meta.items():
meta[k] = v
# the name and scope of the rule instance overrides anything in meta.
meta["name"] = self.name
meta["scope"] = self.scope
meta["scopes"] = asdict(self.scopes)
def move_to_end(m, k):
# ruamel.yaml uses an ordereddict-like structure to track maps (CommentedMap).
@@ -892,7 +989,6 @@ class Rule:
if key in META_KEYS:
continue
move_to_end(meta, key)
# save off the existing hidden meta values,
# emit the document,
# and re-add the hidden meta.
@@ -952,7 +1048,7 @@ def get_rules_with_scope(rules, scope) -> List[Rule]:
from the given collection of rules, select those with the given scope.
`scope` is one of the capa.rules.*_SCOPE constants.
"""
return [rule for rule in rules if rule.scope == scope]
return [rule for rule in rules if scope in rule.scopes]
def get_rules_and_dependencies(rules: List[Rule], rule_name: str) -> Iterator[Rule]:
@@ -1100,6 +1196,8 @@ class RuleSet:
rules = capa.optimizer.optimize_rules(rules)
self.file_rules = self._get_rules_for_scope(rules, FILE_SCOPE)
self.process_rules = self._get_rules_for_scope(rules, PROCESS_SCOPE)
self.thread_rules = self._get_rules_for_scope(rules, THREAD_SCOPE)
self.function_rules = self._get_rules_for_scope(rules, FUNCTION_SCOPE)
self.basic_block_rules = self._get_rules_for_scope(rules, BASIC_BLOCK_SCOPE)
self.instruction_rules = self._get_rules_for_scope(rules, INSTRUCTION_SCOPE)
@@ -1108,6 +1206,10 @@ class RuleSet:
# unstable
(self._easy_file_rules_by_feature, self._hard_file_rules) = self._index_rules_by_feature(self.file_rules)
(self._easy_process_rules_by_feature, self._hard_process_rules) = self._index_rules_by_feature(
self.process_rules
)
(self._easy_thread_rules_by_feature, self._hard_thread_rules) = self._index_rules_by_feature(self.thread_rules)
(self._easy_function_rules_by_feature, self._hard_function_rules) = self._index_rules_by_feature(
self.function_rules
)
@@ -1353,16 +1455,22 @@ class RuleSet:
except that it may be more performant.
"""
easy_rules_by_feature = {}
if scope is Scope.FILE:
if scope == Scope.FILE:
easy_rules_by_feature = self._easy_file_rules_by_feature
hard_rule_names = self._hard_file_rules
elif scope is Scope.FUNCTION:
elif scope == Scope.PROCESS:
easy_rules_by_feature = self._easy_process_rules_by_feature
hard_rule_names = self._hard_process_rules
elif scope == Scope.THREAD:
easy_rules_by_feature = self._easy_thread_rules_by_feature
hard_rule_names = self._hard_thread_rules
elif scope == Scope.FUNCTION:
easy_rules_by_feature = self._easy_function_rules_by_feature
hard_rule_names = self._hard_function_rules
elif scope is Scope.BASIC_BLOCK:
elif scope == Scope.BASIC_BLOCK:
easy_rules_by_feature = self._easy_basic_block_rules_by_feature
hard_rule_names = self._hard_basic_block_rules
elif scope is Scope.INSTRUCTION:
elif scope == Scope.INSTRUCTION:
easy_rules_by_feature = self._easy_instruction_rules_by_feature
hard_rule_names = self._hard_instruction_rules
else:

View File

@@ -916,6 +916,10 @@ def main(argv=None):
default_samples_path = str(Path(__file__).resolve().parent.parent / "tests" / "data")
# TODO(yelhamer): remove once support for the legacy scope field has been added
# https://github.com/mandiant/capa/pull/1580
return 0
parser = argparse.ArgumentParser(description="Lint capa rules.")
capa.main.install_common_args(parser, wanted={"tag"})
parser.add_argument("rules", type=str, action="append", help="Path to rules")

View File

@@ -54,6 +54,7 @@ import capa.helpers
import capa.features
import capa.features.common
import capa.features.freeze
from capa.features.extractors.base_extractor import FeatureExtractor, StaticFeatureExtractor
logger = logging.getLogger("capa.profile")
@@ -104,12 +105,14 @@ def main(argv=None):
if (args.format == "freeze") or (
args.format == capa.features.common.FORMAT_AUTO and capa.features.freeze.is_freeze(taste)
):
extractor = capa.features.freeze.load(Path(args.sample).read_bytes())
extractor: FeatureExtractor = capa.features.freeze.load(Path(args.sample).read_bytes())
assert isinstance(extractor, StaticFeatureExtractor)
else:
extractor = capa.main.get_extractor(
args.sample, args.format, args.os, capa.main.BACKEND_VIV, sig_paths, should_save_workspace=False
)
assert isinstance(extractor, StaticFeatureExtractor)
with tqdm.tqdm(total=args.number * args.repeat, leave=False) as pbar:
def do_iteration():

View File

@@ -78,6 +78,7 @@ import capa.render.result_document as rd
from capa.helpers import get_file_taste
from capa.features.common import FORMAT_AUTO
from capa.features.freeze import Address
from capa.features.extractors.base_extractor import FeatureExtractor, StaticFeatureExtractor
logger = logging.getLogger("capa.show-capabilities-by-function")
@@ -167,7 +168,7 @@ def main(argv=None):
if (args.format == "freeze") or (args.format == FORMAT_AUTO and capa.features.freeze.is_freeze(taste)):
format_ = "freeze"
extractor = capa.features.freeze.load(Path(args.sample).read_bytes())
extractor: FeatureExtractor = capa.features.freeze.load(Path(args.sample).read_bytes())
else:
format_ = args.format
should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None)
@@ -176,6 +177,7 @@ def main(argv=None):
extractor = capa.main.get_extractor(
args.sample, args.format, args.os, args.backend, sig_paths, should_save_workspace
)
assert isinstance(extractor, StaticFeatureExtractor)
except capa.exceptions.UnsupportedFormatError:
capa.helpers.log_unsupported_format_error()
return -1

View File

@@ -78,13 +78,12 @@ import capa.helpers
import capa.features
import capa.exceptions
import capa.render.verbose as v
import capa.features.common
import capa.features.freeze
import capa.features.address
import capa.features.extractors.pefile
import capa.features.extractors.base_extractor
from capa.helpers import log_unsupported_runtime_error
from capa.features.extractors.base_extractor import FunctionHandle
from capa.helpers import get_auto_format, log_unsupported_runtime_error
from capa.features.common import FORMAT_AUTO, FORMAT_FREEZE, DYNAMIC_FORMATS, is_global_feature
from capa.features.extractors.base_extractor import FeatureExtractor, StaticFeatureExtractor, DynamicFeatureExtractor, FunctionHandle
logger = logging.getLogger("capa.show-features")
@@ -101,6 +100,7 @@ def main(argv=None):
capa.main.install_common_args(parser, wanted={"format", "os", "sample", "signatures", "backend"})
parser.add_argument("-F", "--function", type=str, help="Show features for specific function")
parser.add_argument("-P", "--process", type=str, help="Show features for specific process name")
args = parser.parse_args(args=argv)
capa.main.handle_common_args(args)
@@ -120,15 +120,16 @@ def main(argv=None):
logger.error("%s", str(e))
return -1
if (args.format == "freeze") or (
args.format == capa.features.common.FORMAT_AUTO and capa.features.freeze.is_freeze(taste)
):
format_ = args.format if args.format != FORMAT_AUTO else get_auto_format(args.sample)
if format_ == FORMAT_FREEZE:
# this should be moved above the previous if clause after implementing
# feature freeze for the dynamic analysis flavor
extractor = capa.features.freeze.load(Path(args.sample).read_bytes())
else:
should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None)
try:
extractor = capa.main.get_extractor(
args.sample, args.format, args.os, args.backend, sig_paths, should_save_workspace
args.sample, format_, args.os, args.backend, sig_paths, should_save_workspace
)
except capa.exceptions.UnsupportedFormatError:
capa.helpers.log_unsupported_format_error()
@@ -137,6 +138,17 @@ def main(argv=None):
log_unsupported_runtime_error()
return -1
if format_ in DYNAMIC_FORMATS:
assert isinstance(extractor, DynamicFeatureExtractor)
print_dynamic_analysis(extractor, args)
else:
assert isinstance(extractor, StaticFeatureExtractor)
print_static_analysis(extractor, args)
return 0
def print_static_analysis(extractor: StaticFeatureExtractor, args):
for feature, addr in extractor.extract_global_features():
print(f"global: {format_address(addr)}: {feature}")
@@ -165,9 +177,90 @@ def main(argv=None):
print(f"{args.function} not a function")
return -1
print_features(function_handles, extractor)
print_static_features(function_handles, extractor)
return 0
def print_dynamic_analysis(extractor: DynamicFeatureExtractor, args):
for feature, addr in extractor.extract_global_features():
print(f"global: {format_address(addr)}: {feature}")
if not args.process:
for feature, addr in extractor.extract_file_features():
print(f"file: {format_address(addr)}: {feature}")
process_handles = tuple(extractor.get_processes())
if args.process:
process_handles = tuple(filter(lambda ph: ph.inner["name"] == args.process, process_handles))
if args.process not in [ph.inner["name"] for ph in args.process]:
print(f"{args.process} not a process")
return -1
print_dynamic_features(process_handles, extractor)
def print_static_features(functions, extractor: StaticFeatureExtractor):
for f in functions:
if extractor.is_library_function(f.address):
function_name = extractor.get_function_name(f.address)
logger.debug("skipping library function %s (%s)", format_address(f.address), function_name)
continue
print(f"func: {format_address(f.address)}")
for feature, addr in extractor.extract_function_features(f):
if is_global_feature(feature):
continue
if f.address != addr:
print(f" func: {format_address(f.address)}: {feature} -> {format_address(addr)}")
else:
print(f" func: {format_address(f.address)}: {feature}")
for bb in extractor.get_basic_blocks(f):
for feature, addr in extractor.extract_basic_block_features(f, bb):
if is_global_feature(feature):
continue
if bb.address != addr:
print(f" bb: {format_address(bb.address)}: {feature} -> {format_address(addr)}")
else:
print(f" bb: {format_address(bb.address)}: {feature}")
for insn in extractor.get_instructions(f, bb):
for feature, addr in extractor.extract_insn_features(f, bb, insn):
if is_global_feature(feature):
continue
try:
if insn.address != addr:
print(
f" insn: {format_address(f.address)}: {format_address(insn.address)}: {feature} -> {format_address(addr)}"
)
else:
print(f" insn: {format_address(insn.address)}: {feature}")
except UnicodeEncodeError:
# may be an issue while piping to less and encountering non-ascii characters
continue
def print_dynamic_features(processes, extractor: DynamicFeatureExtractor):
for p in processes:
print(f"proc: {p.inner['name']} (ppid={p.inner['ppid']}, pid={p.pid})")
for feature, addr in extractor.extract_process_features(p):
if is_global_feature(feature):
continue
print(f" proc: {p.inner['name']}: {feature}")
for t in extractor.get_threads(p):
for feature, addr in extractor.extract_thread_features(p, t):
if is_global_feature(feature):
continue
print(f" {t.address} {format_address(addr)}: {feature}")
def ida_main():
@@ -194,57 +287,11 @@ def ida_main():
print(f"{hex(function)} not a function")
return -1
print_features(function_handles, extractor)
print_static_features(function_handles, extractor)
return 0
def print_features(functions, extractor: capa.features.extractors.base_extractor.FeatureExtractor):
for f in functions:
if extractor.is_library_function(f.address):
function_name = extractor.get_function_name(f.address)
logger.debug("skipping library function %s (%s)", format_address(f.address), function_name)
continue
print(f"func: {format_address(f.address)}")
for feature, addr in extractor.extract_function_features(f):
if capa.features.common.is_global_feature(feature):
continue
if f.address != addr:
print(f" func: {format_address(f.address)}: {feature} -> {format_address(addr)}")
else:
print(f" func: {format_address(f.address)}: {feature}")
for bb in extractor.get_basic_blocks(f):
for feature, addr in extractor.extract_basic_block_features(f, bb):
if capa.features.common.is_global_feature(feature):
continue
if bb.address != addr:
print(f" bb: {format_address(bb.address)}: {feature} -> {format_address(addr)}")
else:
print(f" bb: {format_address(bb.address)}: {feature}")
for insn in extractor.get_instructions(f, bb):
for feature, addr in extractor.extract_insn_features(f, bb, insn):
if capa.features.common.is_global_feature(feature):
continue
try:
if insn.address != addr:
print(
f" insn: {format_address(f.address)}: {format_address(insn.address)}: {feature} -> {format_address(addr)}"
)
else:
print(f" insn: {format_address(insn.address)}: {feature}")
except UnicodeEncodeError:
# may be an issue while piping to less and encountering non-ascii characters
continue
if __name__ == "__main__":
if capa.helpers.is_runtime_ida():
ida_main()

View File

@@ -50,7 +50,9 @@ def test_render_meta_attack():
rule:
meta:
name: test rule
scope: function
scopes:
static: function
dynamic: dev
authors:
- foo
att&ck:
@@ -86,7 +88,9 @@ def test_render_meta_mbc():
rule:
meta:
name: test rule
scope: function
scopes:
static: function
dynamic: dev
authors:
- foo
mbc:

View File

@@ -38,7 +38,7 @@ from capa.features.common import (
FeatureAccess,
)
from capa.features.address import Address
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, ThreadHandle, ProcessHandle, FunctionHandle
from capa.features.extractors.dnfile.extractor import DnfileFeatureExtractor
CD = Path(__file__).resolve().parent
@@ -180,6 +180,20 @@ def get_binja_extractor(path: Path):
return extractor
@lru_cache(maxsize=1)
def get_cape_extractor(path):
import gzip
import json
from capa.features.extractors.cape.extractor import CapeExtractor
with gzip.open(path, "r") as compressed_report:
report_json = compressed_report.read()
report = json.loads(report_json)
return CapeExtractor.from_report(report)
def extract_global_features(extractor):
features = collections.defaultdict(set)
for feature, va in extractor.extract_global_features():
@@ -195,6 +209,23 @@ def extract_file_features(extractor):
return features
def extract_process_features(extractor, ph):
features = collections.defaultdict(set)
for thread in extractor.get_threads(ph):
for feature, va in extractor.extract_thread_features(ph, thread):
features[feature].add(va)
for feature, va in extractor.extract_process_features(ph):
features[feature].add(va)
return features
def extract_thread_features(extractor, ph, th):
features = collections.defaultdict(set)
for feature, va in extractor.extract_thread_features(ph, th):
features[feature].add(va)
return features
# f may not be hashable (e.g. ida func_t) so cannot @lru_cache this
def extract_function_features(extractor, fh):
features = collections.defaultdict(set)
@@ -305,7 +336,11 @@ def get_data_path_by_name(name) -> Path:
elif name.startswith("294b8d"):
return CD / "data" / "294b8db1f2702b60fb2e42fdc50c2cee6a5046112da9a5703a548a4fa50477bc.elf_"
elif name.startswith("2bf18d"):
return CD / "data" / "2bf18d0403677378adad9001b1243211.elf_"
return os.path.join(CD, "data", "2bf18d0403677378adad9001b1243211.elf_")
elif name.startswith("0000a657"):
return os.path.join(
CD, "data", "dynamic", "cape", "0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json.gz"
)
elif name.startswith("ea2876"):
return CD / "data" / "ea2876e9175410b6f6719f80ee44b9553960758c7d0f7bed73c0fe9a78d8e669.dll_"
else:
@@ -383,6 +418,20 @@ def sample(request):
return resolve_sample(request.param)
def get_process(extractor, ppid: int, pid: int) -> ProcessHandle:
for ph in extractor.get_processes():
if ph.address.ppid == ppid and ph.address.pid == pid:
return ph
raise ValueError("process not found")
def get_thread(extractor, ph: ProcessHandle, tid: int) -> ThreadHandle:
for th in extractor.get_threads(ph):
if th.address.tid == tid:
return th
raise ValueError("thread not found")
def get_function(extractor, fva: int) -> FunctionHandle:
for fh in extractor.get_functions():
if isinstance(extractor, DnfileFeatureExtractor):
@@ -490,6 +539,40 @@ def resolve_scope(scope):
inner_function.__name__ = scope
return inner_function
elif "thread=" in scope:
# like `process=(pid:ppid),thread=1002`
assert "process=" in scope
pspec, _, tspec = scope.partition(",")
pspec = pspec.partition("=")[2][1:-1].split(":")
assert len(pspec) == 2
pid, ppid = map(int, pspec)
tid = int(tspec.partition("=")[2])
def inner_thread(extractor):
ph = get_process(extractor, ppid, pid)
th = get_thread(extractor, ph, tid)
features = extract_thread_features(extractor, ph, th)
for k, vs in extract_global_features(extractor).items():
features[k].update(vs)
return features
inner_thread.__name__ = scope
return inner_thread
elif "process=" in scope:
# like `process=(pid:ppid)`
pspec = scope.partition("=")[2][1:-1].split(":")
assert len(pspec) == 2
pid, ppid = map(int, pspec)
def inner_process(extractor):
ph = get_process(extractor, ppid, pid)
features = extract_process_features(extractor, ph)
for k, vs in extract_global_features(extractor).items():
features[k].update(vs)
return features
inner_process.__name__ = scope
return inner_process
else:
raise ValueError("unexpected scope fixture")
@@ -515,6 +598,80 @@ def parametrize(params, values, **kwargs):
return pytest.mark.parametrize(params, values, ids=ids, **kwargs)
DYNAMIC_FEATURE_PRESENCE_TESTS = sorted(
[
# file/string
("0000a657", "file", capa.features.common.String("T_Ba?.BcRJa"), True),
("0000a657", "file", capa.features.common.String("GetNamedPipeClientSessionId"), True),
("0000a657", "file", capa.features.common.String("nope"), False),
# file/sections
("0000a657", "file", capa.features.file.Section(".rdata"), True),
("0000a657", "file", capa.features.file.Section(".nope"), False),
# file/imports
("0000a657", "file", capa.features.file.Import("NdrSimpleTypeUnmarshall"), True),
("0000a657", "file", capa.features.file.Import("Nope"), False),
# file/exports
("0000a657", "file", capa.features.file.Export("Nope"), False),
# process/environment variables
(
"0000a657",
"process=(1180:3052)",
capa.features.common.String("C:\\Users\\comp\\AppData\\Roaming\\Microsoft\\Jxoqwnx\\jxoqwn.exe"),
True,
),
("0000a657", "process=(1180:3052)", capa.features.common.String("nope"), False),
# thread/api calls
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("NtQueryValueKey"), True),
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("GetActiveWindow"), False),
# thread/number call argument
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.Number(0x000000EC), True),
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.Number(110173), False),
# thread/string call argument
("0000a657", "process=(2852:3052),thread=2804", capa.features.common.String("SetThreadUILanguage"), True),
("0000a657", "process=(2852:3052),thread=2804", capa.features.common.String("nope"), False),
],
# order tests by (file, item)
# so that our LRU cache is most effective.
key=lambda t: (t[0], t[1]),
)
DYNAMIC_FEATURE_COUNT_TESTS = sorted(
[
# file/string
("0000a657", "file", capa.features.common.String("T_Ba?.BcRJa"), 1),
("0000a657", "file", capa.features.common.String("GetNamedPipeClientSessionId"), 1),
("0000a657", "file", capa.features.common.String("nope"), 0),
# file/sections
("0000a657", "file", capa.features.file.Section(".rdata"), 1),
("0000a657", "file", capa.features.file.Section(".nope"), 0),
# file/imports
("0000a657", "file", capa.features.file.Import("NdrSimpleTypeUnmarshall"), 1),
("0000a657", "file", capa.features.file.Import("Nope"), 0),
# file/exports
("0000a657", "file", capa.features.file.Export("Nope"), 0),
# process/environment variables
(
"0000a657",
"process=(1180:3052)",
capa.features.common.String("C:\\Users\\comp\\AppData\\Roaming\\Microsoft\\Jxoqwnx\\jxoqwn.exe"),
2,
),
("0000a657", "process=(1180:3052)", capa.features.common.String("nope"), 0),
# thread/api calls
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("NtQueryValueKey"), 7),
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("GetActiveWindow"), 0),
# thread/number call argument
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.Number(0x000000EC), 1),
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.Number(110173), 0),
# thread/string call argument
("0000a657", "process=(2852:3052),thread=2804", capa.features.common.String("SetThreadUILanguage"), 1),
("0000a657", "process=(2852:3052),thread=2804", capa.features.common.String("nope"), 0),
],
# order tests by (file, item)
# so that our LRU cache is most effective.
key=lambda t: (t[0], t[1]),
)
FEATURE_PRESENCE_TESTS = sorted(
[
# file/characteristic("embedded pe")

View File

@@ -0,0 +1,27 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import fixtures
from fixtures import scope, sample
@fixtures.parametrize(
"sample,scope,feature,expected",
fixtures.DYNAMIC_FEATURE_PRESENCE_TESTS,
indirect=["sample", "scope"],
)
def test_cape_features(sample, scope, feature, expected):
fixtures.do_test_feature_presence(fixtures.get_cape_extractor, sample, scope, feature, expected)
@fixtures.parametrize(
"sample,scope,feature,expected",
fixtures.DYNAMIC_FEATURE_COUNT_TESTS,
indirect=["sample", "scope"],
)
def test_cape_feature_counts(sample, scope, feature, expected):
fixtures.do_test_feature_count(fixtures.get_cape_extractor, sample, scope, feature, expected)

View File

@@ -17,7 +17,9 @@ EXPECTED = textwrap.dedent(
name: test rule
authors:
- user@domain.com
scope: function
scopes:
static: function
dynamic: dev
examples:
- foo1234
- bar5678
@@ -41,7 +43,9 @@ def test_rule_reformat_top_level_elements():
name: test rule
authors:
- user@domain.com
scope: function
scopes:
static: function
dynamic: dev
examples:
- foo1234
- bar5678
@@ -59,7 +63,9 @@ def test_rule_reformat_indentation():
name: test rule
authors:
- user@domain.com
scope: function
scopes:
static: function
dynamic: dev
examples:
- foo1234
- bar5678
@@ -83,7 +89,9 @@ def test_rule_reformat_order():
examples:
- foo1234
- bar5678
scope: function
scopes:
static: function
dynamic: dev
name: test rule
features:
- and:
@@ -107,7 +115,9 @@ def test_rule_reformat_meta_update():
examples:
- foo1234
- bar5678
scope: function
scopes:
static: function
dynamic: dev
name: AAAA
features:
- and:
@@ -131,7 +141,9 @@ def test_rule_reformat_string_description():
name: test rule
authors:
- user@domain.com
scope: function
scopes:
static: function
dynamic: dev
features:
- and:
- string: foo

View File

@@ -9,6 +9,7 @@
import json
import textwrap
import pytest
import fixtures
import capa.main
@@ -17,6 +18,7 @@ import capa.engine
import capa.features
@pytest.mark.xfail(reason="relies on the legeacy ruleset. scopes keyword hasn't been added there")
def test_main(z9324d_extractor):
# tests rules can be loaded successfully and all output modes
path = z9324d_extractor.path
@@ -34,7 +36,9 @@ def test_main_single_rule(z9324d_extractor, tmpdir):
rule:
meta:
name: test rule
scope: file
scopes:
static: file
dynamic: dev
authors:
- test
features:
@@ -76,6 +80,7 @@ def test_main_non_ascii_filename_nonexistent(tmpdir, caplog):
assert NON_ASCII_FILENAME in caplog.text
@pytest.mark.xfail(reason="relies on the legeacy ruleset. scopes keyword hasn't been added there")
def test_main_shellcode(z499c2_extractor):
path = z499c2_extractor.path
assert capa.main.main([path, "-vv", "-f", "sc32"]) == 0
@@ -95,7 +100,9 @@ def test_ruleset():
rule:
meta:
name: file rule
scope: file
scopes:
static: file
dynamic: dev
features:
- characteristic: embedded pe
"""
@@ -107,7 +114,9 @@ def test_ruleset():
rule:
meta:
name: function rule
scope: function
scopes:
static: function
dynamic: dev
features:
- characteristic: tight loop
"""
@@ -119,17 +128,49 @@ def test_ruleset():
rule:
meta:
name: basic block rule
scope: basic block
scopes:
static: basic block
dynamic: dev
features:
- characteristic: nzxor
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: process rule
scopes:
static: file
dynamic: process
features:
- string: "explorer.exe"
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: thread rule
scopes:
static: function
dynamic: thread
features:
- api: RegDeleteKey
"""
)
),
]
)
assert len(rules.file_rules) == 1
assert len(rules.function_rules) == 1
assert len(rules.file_rules) == 2
assert len(rules.function_rules) == 2
assert len(rules.basic_block_rules) == 1
assert len(rules.process_rules) == 1
assert len(rules.thread_rules) == 1
def test_match_across_scopes_file_function(z9324d_extractor):
@@ -142,7 +183,9 @@ def test_match_across_scopes_file_function(z9324d_extractor):
rule:
meta:
name: install service
scope: function
scopes:
static: function
dynamic: dev
examples:
- 9324d1a8ae37a36ae560c37448c9705a:0x4073F0
features:
@@ -160,7 +203,9 @@ def test_match_across_scopes_file_function(z9324d_extractor):
rule:
meta:
name: .text section
scope: file
scopes:
static: file
dynamic: dev
examples:
- 9324d1a8ae37a36ae560c37448c9705a
features:
@@ -177,7 +222,9 @@ def test_match_across_scopes_file_function(z9324d_extractor):
rule:
meta:
name: .text section and install service
scope: file
scopes:
static: file
dynamic: dev
examples:
- 9324d1a8ae37a36ae560c37448c9705a
features:
@@ -205,7 +252,9 @@ def test_match_across_scopes(z9324d_extractor):
rule:
meta:
name: tight loop
scope: basic block
scopes:
static: basic block
dynamic: dev
examples:
- 9324d1a8ae37a36ae560c37448c9705a:0x403685
features:
@@ -221,7 +270,9 @@ def test_match_across_scopes(z9324d_extractor):
rule:
meta:
name: kill thread loop
scope: function
scopes:
static: function
dynamic: dev
examples:
- 9324d1a8ae37a36ae560c37448c9705a:0x403660
features:
@@ -239,7 +290,9 @@ def test_match_across_scopes(z9324d_extractor):
rule:
meta:
name: kill thread program
scope: file
scopes:
static: file
dynamic: dev
examples:
- 9324d1a8ae37a36ae560c37448c9705a
features:
@@ -266,7 +319,9 @@ def test_subscope_bb_rules(z9324d_extractor):
rule:
meta:
name: test rule
scope: function
scopes:
static: function
dynamic: dev
features:
- and:
- basic block:
@@ -290,7 +345,9 @@ def test_byte_matching(z9324d_extractor):
rule:
meta:
name: byte match test
scope: function
scopes:
static: function
dynamic: dev
features:
- and:
- bytes: ED 24 9E F4 52 A9 07 47 55 8E E1 AB 30 8E 23 61
@@ -313,7 +370,9 @@ def test_count_bb(z9324d_extractor):
meta:
name: count bb
namespace: test
scope: function
scopes:
static: function
dynamic: dev
features:
- and:
- count(basic blocks): 1 or more
@@ -337,7 +396,9 @@ def test_instruction_scope(z9324d_extractor):
meta:
name: push 1000
namespace: test
scope: instruction
scopes:
static: instruction
dynamic: dev
features:
- and:
- mnemonic: push
@@ -365,7 +426,9 @@ def test_instruction_subscope(z9324d_extractor):
meta:
name: push 1000 on i386
namespace: test
scope: function
scopes:
static: function
dynamic: dev
features:
- and:
- arch: i386
@@ -382,6 +445,7 @@ def test_instruction_subscope(z9324d_extractor):
assert 0x406F60 in {result[0] for result in capabilities["push 1000 on i386"]}
@pytest.mark.xfail(reason="relies on the legeacy ruleset. scopes keyword hasn't been added there")
def test_fix262(pma16_01_extractor, capsys):
path = pma16_01_extractor.path
assert capa.main.main([path, "-vv", "-t", "send HTTP request", "-q"]) == 0
@@ -391,6 +455,7 @@ def test_fix262(pma16_01_extractor, capsys):
assert "www.practicalmalwareanalysis.com" not in std.out
@pytest.mark.xfail(reason="relies on the legeacy ruleset. scopes keyword hasn't been added there")
def test_not_render_rules_also_matched(z9324d_extractor, capsys):
# rules that are also matched by other rules should not get rendered by default.
# this cuts down on the amount of output while giving approx the same detail.
@@ -417,6 +482,7 @@ def test_not_render_rules_also_matched(z9324d_extractor, capsys):
assert "create TCP socket" in std.out
@pytest.mark.xfail(reason="relies on the legeacy ruleset. scopes keyword hasn't been added there")
def test_json_meta(capsys):
path = str(fixtures.get_data_path_by_name("pma01-01"))
assert capa.main.main([path, "-j"]) == 0
@@ -432,6 +498,7 @@ def test_json_meta(capsys):
assert {"address": ["absolute", 0x10001179]} in info["matched_basic_blocks"]
@pytest.mark.xfail(reason="relies on the legeacy ruleset. scopes keyword hasn't been added there")
def test_main_dotnet(_1c444_dotnetfile_extractor):
# tests successful execution and all output modes
path = _1c444_dotnetfile_extractor.path
@@ -442,6 +509,7 @@ def test_main_dotnet(_1c444_dotnetfile_extractor):
assert capa.main.main([path]) == 0
@pytest.mark.xfail(reason="relies on the legeacy ruleset. scopes keyword hasn't been added there")
def test_main_dotnet2(_692f_dotnetfile_extractor):
# tests successful execution and one rendering
# above covers all output modes
@@ -449,18 +517,21 @@ def test_main_dotnet2(_692f_dotnetfile_extractor):
assert capa.main.main([path, "-vv"]) == 0
@pytest.mark.xfail(reason="relies on the legeacy ruleset. scopes keyword hasn't been added there")
def test_main_dotnet3(_0953c_dotnetfile_extractor):
# tests successful execution and one rendering
path = _0953c_dotnetfile_extractor.path
assert capa.main.main([path, "-vv"]) == 0
@pytest.mark.xfail(reason="relies on the legeacy ruleset. scopes keyword hasn't been added there")
def test_main_dotnet4(_039a6_dotnetfile_extractor):
# tests successful execution and one rendering
path = _039a6_dotnetfile_extractor.path
assert capa.main.main([path, "-vv"]) == 0
@pytest.mark.xfail(reason="ResultDocument hasn't been updated yet")
def test_main_rd():
path = str(fixtures.get_data_path_by_name("pma01-01-rd"))
assert capa.main.main([path, "-vv"]) == 0

View File

@@ -23,7 +23,9 @@ def test_optimizer_order():
rule:
meta:
name: test rule
scope: function
scopes:
static: function
dynamic: dev
features:
- and:
- substring: "foo"

View File

@@ -20,7 +20,9 @@ R1 = capa.rules.Rule.from_yaml(
name: test rule
authors:
- user@domain.com
scope: function
scopes:
static: function
dynamic: dev
examples:
- foo1234
- bar5678
@@ -40,7 +42,9 @@ R2 = capa.rules.Rule.from_yaml(
name: test rule 2
authors:
- user@domain.com
scope: function
scopes:
static: function
dynamic: dev
examples:
- foo1234
- bar5678

View File

@@ -39,7 +39,9 @@ ADDR4 = capa.features.address.AbsoluteVirtualAddress(0x401004)
def test_rule_ctor():
r = capa.rules.Rule("test rule", capa.rules.FUNCTION_SCOPE, Or([Number(1)]), {})
r = capa.rules.Rule(
"test rule", capa.rules.Scopes(capa.rules.FUNCTION_SCOPE, capa.rules.FILE_SCOPE), Or([Number(1)]), {}
)
assert bool(r.evaluate({Number(0): {ADDR1}})) is False
assert bool(r.evaluate({Number(1): {ADDR2}})) is True
@@ -52,7 +54,9 @@ def test_rule_yaml():
name: test rule
authors:
- user@domain.com
scope: function
scopes:
static: function
dynamic: dev
examples:
- foo1234
- bar5678
@@ -242,7 +246,9 @@ def test_invalid_rule_feature():
rule:
meta:
name: test rule
scope: file
scopes:
static: file
dynamic: dev
features:
- characteristic: nzxor
"""
@@ -256,7 +262,9 @@ def test_invalid_rule_feature():
rule:
meta:
name: test rule
scope: function
scopes:
static: function
dynamic: dev
features:
- characteristic: embedded pe
"""
@@ -270,13 +278,31 @@ def test_invalid_rule_feature():
rule:
meta:
name: test rule
scope: basic block
scopes:
static: basic block
dynamic: dev
features:
- characteristic: embedded pe
"""
)
)
with pytest.raises(capa.rules.InvalidRule):
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: process
features:
- mnemonic: xor
"""
)
)
def test_lib_rules():
rules = capa.rules.RuleSet(
@@ -319,8 +345,10 @@ def test_subscope_rules():
"""
rule:
meta:
name: test rule
scope: file
name: test function subscope
scopes:
static: file
dynamic: dev
features:
- and:
- characteristic: embedded pe
@@ -330,17 +358,61 @@ def test_subscope_rules():
- characteristic: loop
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test process subscope
scopes:
static: file
dynamic: file
features:
- and:
- import: WININET.dll.HttpOpenRequestW
- process:
- and:
- substring: "http://"
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test thread subscope
scopes:
static: file
dynamic: process
features:
- and:
- string: "explorer.exe"
- thread:
- api: HttpOpenRequestW
"""
)
),
]
)
# the file rule scope will have one rules:
# - `test rule`
assert len(rules.file_rules) == 1
# the file rule scope will have two rules:
# - `test function subscope` and `test process subscope`
# plus the dynamic flavor of all rules
# assert len(rules.file_rules) == 4
# the function rule scope have one rule:
# - the rule on which `test rule` depends
# the function rule scope have two rule:
# - the rule on which `test function subscope` depends
assert len(rules.function_rules) == 1
# the process rule scope has three rules:
# - the rule on which `test process subscope` depends,
assert len(rules.process_rules) == 2
# the thread rule scope has one rule:
# - the rule on which `test thread subscope` depends
assert len(rules.thread_rules) == 1
def test_duplicate_rules():
with pytest.raises(capa.rules.InvalidRule):
@@ -445,6 +517,66 @@ def test_invalid_rules():
"""
)
)
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
scopes:
static: basic block
behavior: process
features:
- number: 1
"""
)
)
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
scopes:
legacy: basic block
dynamic: process
features:
- number: 1
"""
)
)
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
scopes:
static: process
dynamic: process
features:
- number: 1
"""
)
)
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
scopes:
static: basic block
dynamic: function
features:
- number: 1
"""
)
)
def test_number_symbol():
@@ -891,7 +1023,9 @@ def test_function_name_features():
rule:
meta:
name: test rule
scope: file
scopes:
static: file
dynamic: dev
features:
- and:
- function-name: strcpy
@@ -913,7 +1047,9 @@ def test_os_features():
rule:
meta:
name: test rule
scope: file
scopes:
static: file
dynamic: dev
features:
- and:
- os: windows
@@ -931,7 +1067,9 @@ def test_format_features():
rule:
meta:
name: test rule
scope: file
scopes:
static: file
dynamic: dev
features:
- and:
- format: pe
@@ -949,7 +1087,9 @@ def test_arch_features():
rule:
meta:
name: test rule
scope: file
scopes:
static: file
dynamic: dev
features:
- and:
- arch: amd64

View File

@@ -20,7 +20,9 @@ def test_rule_scope_instruction():
rule:
meta:
name: test rule
scope: instruction
scopes:
static: instruction
dynamic: dev
features:
- and:
- mnemonic: mov
@@ -37,7 +39,9 @@ def test_rule_scope_instruction():
rule:
meta:
name: test rule
scope: instruction
scopes:
static: instruction
dynamic: dev
features:
- characteristic: embedded pe
"""
@@ -54,7 +58,9 @@ def test_rule_subscope_instruction():
rule:
meta:
name: test rule
scope: function
scopes:
static: function
dynamic: dev
features:
- and:
- instruction:
@@ -83,7 +89,9 @@ def test_scope_instruction_implied_and():
rule:
meta:
name: test rule
scope: function
scopes:
static: function
dynamic: dev
features:
- and:
- instruction:
@@ -102,7 +110,9 @@ def test_scope_instruction_description():
rule:
meta:
name: test rule
scope: function
scopes:
static: function
dynamic: dev
features:
- and:
- instruction:
@@ -120,7 +130,9 @@ def test_scope_instruction_description():
rule:
meta:
name: test rule
scope: function
scopes:
static: function
dynamic: dev
features:
- and:
- instruction:

View File

@@ -38,14 +38,22 @@ def get_rule_path():
@pytest.mark.parametrize(
"script,args",
[
pytest.param("capa2yara.py", [get_rules_path()]),
pytest.param("capafmt.py", [get_rule_path()]),
pytest.param("capa2yara.py", [get_rules_path()], marks=pytest.mark.xfail(reason="relies on legacy ruleset")),
pytest.param(
"capafmt.py", [get_rule_path()], marks=pytest.mark.xfail(reason="rendering hasn't been added yet")
),
# not testing lint.py as it runs regularly anyway
pytest.param("match-function-id.py", [get_file_path()]),
pytest.param("show-capabilities-by-function.py", [get_file_path()]),
pytest.param(
"show-capabilities-by-function.py",
[get_file_path()],
marks=pytest.mark.xfail(reason="rendering hasn't been added yet"),
),
pytest.param("show-features.py", [get_file_path()]),
pytest.param("show-features.py", ["-F", "0x407970", get_file_path()]),
pytest.param("capa_as_library.py", [get_file_path()]),
pytest.param(
"capa_as_library.py", [get_file_path()], marks=pytest.mark.xfail(reason="relies on legacy ruleset")
),
],
)
def test_scripts(script, args):
@@ -54,6 +62,7 @@ def test_scripts(script, args):
assert p.returncode == 0
@pytest.mark.xfail(reason="relies on legacy ruleset")
def test_bulk_process(tmp_path):
# create test directory to recursively analyze
t = tmp_path / "test"
@@ -74,6 +83,7 @@ def run_program(script_path, args):
return subprocess.run(args, stdout=subprocess.PIPE)
@pytest.mark.xfail(reason="rendering hasn't been added yet")
def test_proto_conversion(tmp_path):
t = tmp_path / "proto-test"
t.mkdir()
@@ -97,7 +107,9 @@ def test_detect_duplicate_features(tmpdir):
rule:
meta:
name: Test Rule 0
scope: function
scopes:
static: function
dynamic: dev
features:
- and:
- number: 1

View File

@@ -24,7 +24,7 @@ import capa.features.extractors.base_extractor
from capa.features.address import Address, AbsoluteVirtualAddress
from capa.features.extractors.base_extractor import BBHandle, FunctionHandle
EXTRACTOR = capa.features.extractors.null.NullFeatureExtractor(
EXTRACTOR = capa.features.extractors.null.NullStaticFeatureExtractor(
base_address=AbsoluteVirtualAddress(0x401000),
global_features=[],
file_features=[
@@ -83,7 +83,9 @@ def test_null_feature_extractor():
rule:
meta:
name: xor loop
scope: basic block
scopes:
static: basic block
dynamic: dev
features:
- and:
- characteristic: tight loop
@@ -119,8 +121,8 @@ def compare_extractors(a, b):
def test_freeze_str_roundtrip():
load = capa.features.freeze.loads
dump = capa.features.freeze.dumps
load = capa.features.freeze.loads_static
dump = capa.features.freeze.dumps_static
reanimated = load(dump(EXTRACTOR))
compare_extractors(EXTRACTOR, reanimated)