Merge branch 'test-cape-extractor' into cape-extractor

This commit is contained in:
Yacine Elhamer
2023-06-20 10:47:56 +01:00
3 changed files with 188 additions and 1 deletions

View File

@@ -6,6 +6,7 @@
### New Features
- Utility script to detect feature overlap between new and existing CAPA rules [#1451](https://github.com/mandiant/capa/issues/1451) [@Aayush-Goel-04](https://github.com/aayush-goel-04)
- Add a dynamic feature extractor for the CAPE sandbox @yelhamer [#1535](https://github.com/mandiant/capa/issues/1535)
- Add unit tests for the new CAPE extractor #1563 @yelhamer
### Breaking Changes
- Update Metadata type in capa main [#1411](https://github.com/mandiant/capa/issues/1411) [@Aayush-Goel-04](https://github.com/aayush-goel-04) @manasghandat

View File

@@ -41,7 +41,7 @@ from capa.features.common import (
FeatureAccess,
)
from capa.features.address import Address
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, ThreadHandle, ProcessHandle, FunctionHandle
from capa.features.extractors.dnfile.extractor import DnfileFeatureExtractor
CD = os.path.dirname(__file__)
@@ -183,6 +183,20 @@ def get_binja_extractor(path):
return extractor
@lru_cache(maxsize=1)
def get_cape_extractor(path):
import gzip
import json
from capa.features.extractors.cape.extractor import CapeExtractor
with gzip.open(path, "r") as compressed_report:
report_json = compressed_report.read()
report = json.loads(report_json)
return CapeExtractor.from_report(report)
def extract_global_features(extractor):
features = collections.defaultdict(set)
for feature, va in extractor.extract_global_features():
@@ -198,6 +212,23 @@ def extract_file_features(extractor):
return features
def extract_process_features(extractor, ph):
features = collections.defaultdict(set)
for thread in extractor.get_threads(ph):
for feature, va in extractor.extract_thread_features(ph, thread):
features[feature].add(va)
for feature, va in extractor.extract_process_features(ph):
features[feature].add(va)
return features
def extract_thread_features(extractor, ph, th):
features = collections.defaultdict(set)
for feature, va in extractor.extract_thread_features(ph, th):
features[feature].add(va)
return features
# f may not be hashable (e.g. ida func_t) so cannot @lru_cache this
def extract_function_features(extractor, fh):
features = collections.defaultdict(set)
@@ -311,6 +342,10 @@ def get_data_path_by_name(name):
return os.path.join(CD, "data", "294b8db1f2702b60fb2e42fdc50c2cee6a5046112da9a5703a548a4fa50477bc.elf_")
elif name.startswith("2bf18d"):
return os.path.join(CD, "data", "2bf18d0403677378adad9001b1243211.elf_")
elif name.startswith("0000a657"):
return os.path.join(
CD, "data/dynamic/cape", "0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json.gz"
)
else:
raise ValueError(f"unexpected sample fixture: {name}")
@@ -371,6 +406,8 @@ def get_sample_md5_by_name(name):
return "3db3e55b16a7b1b1afb970d5e77c5d98"
elif name.startswith("2bf18d"):
return "2bf18d0403677378adad9001b1243211"
elif name.startswith("0000a657"):
return "0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json.gz"
else:
raise ValueError(f"unexpected sample fixture: {name}")
@@ -384,6 +421,20 @@ def sample(request):
return resolve_sample(request.param)
def get_process(extractor, ppid: int, pid: int) -> ProcessHandle:
for ph in extractor.get_processes():
if ph.inner["ppid"] == ppid and ph.pid == pid:
return ProcessHandle(pid, {"ppid": ppid})
raise ValueError("process not found")
def get_thread(extractor, ph: ProcessHandle, tid: int) -> ThreadHandle:
for th in extractor.get_threads(ph):
if th.tid == tid:
return th
raise ValueError("thread not found")
def get_function(extractor, fva: int) -> FunctionHandle:
for fh in extractor.get_functions():
if isinstance(extractor, DnfileFeatureExtractor):
@@ -491,6 +542,40 @@ def resolve_scope(scope):
inner_function.__name__ = scope
return inner_function
elif "thread=" in scope:
# like `process=(pid:ppid),thread=1002`
assert "process=" in scope
pspec, _, tspec = scope.partition(",")
pspec = pspec.partition("=")[2][1:-1].split(":")
assert len(pspec) == 2
pid, ppid = map(int, pspec)
tid = int(tspec.partition("=")[2])
def inner_thread(extractor):
ph = get_process(extractor, ppid, pid)
th = get_thread(extractor, ph, tid)
features = extract_thread_features(extractor, ph, th)
for k, vs in extract_global_features(extractor).items():
features[k].update(vs)
return features
inner_thread.__name__ = scope
return inner_thread
elif "process=" in scope:
# like `process=(pid:ppid)`
pspec = scope.partition("=")[2][1:-1].split(":")
assert len(pspec) == 2
pid, ppid = map(int, pspec)
def inner_process(extractor):
ph = get_process(extractor, ppid, pid)
features = extract_process_features(extractor, ph)
for k, vs in extract_global_features(extractor).items():
features[k].update(vs)
return features
inner_process.__name__ = scope
return inner_process
else:
raise ValueError("unexpected scope fixture")
@@ -516,6 +601,80 @@ def parametrize(params, values, **kwargs):
return pytest.mark.parametrize(params, values, ids=ids, **kwargs)
DYNAMIC_FEATURE_PRESENCE_TESTS = sorted(
[
# file/string
("0000a657", "file", capa.features.common.String("T_Ba?.BcRJa"), True),
("0000a657", "file", capa.features.common.String("GetNamedPipeClientSessionId"), True),
("0000a657", "file", capa.features.common.String("nope"), False),
# file/sections
("0000a657", "file", capa.features.file.Section(".rdata"), True),
("0000a657", "file", capa.features.file.Section(".nope"), False),
# file/imports
("0000a657", "file", capa.features.file.Import("NdrSimpleTypeUnmarshall"), True),
("0000a657", "file", capa.features.file.Import("Nope"), False),
# file/exports
("0000a657", "file", capa.features.file.Export("Nope"), False),
# process/environment variables
(
"0000a657",
"process=(1180:3052)",
capa.features.common.String("C:\\Users\\comp\\AppData\\Roaming\\Microsoft\\Jxoqwnx\\jxoqwn.exe"),
True,
),
("0000a657", "process=(1180:3052)", capa.features.common.String("nope"), False),
# thread/api calls
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("NtQueryValueKey"), True),
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("GetActiveWindow"), False),
# thread/number call argument
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.Number(0x000000EC), True),
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.Number(110173), False),
# thread/string call argument
# ("0000a657", "process=(2852:3052),thread=500", capa.features.common.String("NtQuerySystemInformation"), True),
# ("0000a657", "process=(2852:3052),thread=500", capa.features.common.String("nope"), False),
],
# order tests by (file, item)
# so that our LRU cache is most effective.
key=lambda t: (t[0], t[1]),
)
DYNAMIC_FEATURE_COUNT_TESTS = sorted(
[
# file/string
("0000a657", "file", capa.features.common.String("T_Ba?.BcRJa"), 1),
("0000a657", "file", capa.features.common.String("GetNamedPipeClientSessionId"), 1),
("0000a657", "file", capa.features.common.String("nope"), 0),
# file/sections
("0000a657", "file", capa.features.file.Section(".rdata"), 1),
("0000a657", "file", capa.features.file.Section(".nope"), 0),
# file/imports
("0000a657", "file", capa.features.file.Import("NdrSimpleTypeUnmarshall"), 1),
("0000a657", "file", capa.features.file.Import("Nope"), 0),
# file/exports
("0000a657", "file", capa.features.file.Export("Nope"), 0),
# process/environment variables
(
"0000a657",
"process=(1180:3052)",
capa.features.common.String("C:\\Users\\comp\\AppData\\Roaming\\Microsoft\\Jxoqwnx\\jxoqwn.exe"),
1,
),
("0000a657", "process=(1180:3052)", capa.features.common.String("nope"), 0),
# thread/api calls
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("NtQueryValueKey"), 5),
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("GetActiveWindow"), 0),
# thread/number call argument
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.Number(0x000000EC), 1),
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.Number(110173), 0),
# thread/string call argument
# ("0000a657", "process=(2852:3052),thread=500", capa.features.common.String("NtQuerySystemInformation"), True),
# ("0000a657", "process=(2852:3052),thread=500", capa.features.common.String("nope"), False),
],
# order tests by (file, item)
# so that our LRU cache is most effective.
key=lambda t: (t[0], t[1]),
)
FEATURE_PRESENCE_TESTS = sorted(
[
# file/characteristic("embedded pe")

View File

@@ -0,0 +1,27 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import fixtures
from fixtures import *
@fixtures.parametrize(
"sample,scope,feature,expected",
fixtures.DYNAMIC_FEATURE_PRESENCE_TESTS,
indirect=["sample", "scope"],
)
def test_cape_features(sample, scope, feature, expected):
fixtures.do_test_feature_presence(fixtures.get_cape_extractor, sample, scope, feature, expected)
@fixtures.parametrize(
"sample,scope,feature,expected",
fixtures.DYNAMIC_FEATURE_COUNT_TESTS,
indirect=["sample", "scope"],
)
def test_viv_feature_counts(sample, scope, feature, expected):
fixtures.do_test_feature_count(fixtures.get_cape_extractor, sample, scope, feature, expected)