tests: consolidate feature test fixtures and runners

This commit is contained in:
Willi Ballenthin
2026-04-15 13:51:56 +02:00
committed by Willi Ballenthin
parent 9fd4f8dd74
commit 251a4e285f
19 changed files with 2640 additions and 1144 deletions
+558 -177
View File
@@ -12,128 +12,279 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import contextlib
import json
import logging
import contextlib
import collections
from pathlib import Path
from dataclasses import dataclass, field
from functools import lru_cache
from pathlib import Path
from typing import Callable, Optional, Union
import pytest
import capa.rules
import capa.engine as ceng
import capa.loader
import capa.features.basicblock
import capa.features.common
import capa.features.file
import capa.features.insn
import capa.features.common
import capa.features.basicblock
import capa.loader
import capa.rules
import capa.render.result_document
from capa.features.address import Address
from capa.features.common import (
ARCH_AMD64,
ARCH_I386,
FORMAT_AUTO,
FORMAT_DOTNET,
FORMAT_ELF,
FORMAT_PE,
OS,
OS_ANY,
OS_AUTO,
OS_LINUX,
ARCH_I386,
FORMAT_PE,
ARCH_AMD64,
FORMAT_ELF,
OS_WINDOWS,
FORMAT_AUTO,
FORMAT_DOTNET,
Arch,
Format,
Feature,
FeatureAccess,
Format,
)
from capa.features.address import Address
from capa.features.extractors.base_extractor import (
BBHandle,
CallHandle,
InsnHandle,
ThreadHandle,
ProcessHandle,
FunctionHandle,
InsnHandle,
ProcessHandle,
ThreadHandle,
)
from capa.features.extractors.dnfile.extractor import DnfileFeatureExtractor
logger = logging.getLogger(__name__)
CD = Path(__file__).resolve().parent
FIXTURE_MANIFEST_DIR = CD / "fixtures" / "features"
DOTNET_DIR = CD / "data" / "dotnet"
DNFILE_TESTFILES = DOTNET_DIR / "dnfile-testfiles"
def parse_feature_string(s: str) -> Feature | ceng.Range | ceng.Statement:
"""
parse a fixture feature string into a Feature, Range, or Statement.
count(...) fixtures have a string integer value in the JSON
(e.g. `count(basic blocks): 7`). translate that to an int so
`build_feature` returns a Range rather than raising on an
unrecognized range expression.
"""
key, _, value = s.partition(": ")
return capa.rules.build_feature(key, value, initial_description=None)
initial_value: str | int = value
if key.startswith("count(") and key.endswith(")"):
try:
initial_value = int(value)
except ValueError:
# leave as string so that `build_feature` can handle
# "N or more"/"N or fewer"/"(N, M)" range expressions.
initial_value = value
return capa.rules.build_feature(key, initial_value, initial_description=None)
FEATURE_MARKS: dict[tuple[str, str, str], list[dict]] = {}
# scope-kind tags are derived from the fixture location and inserted
# into the fixture's tag set. backends that only support a subset of
# scopes (e.g., pefile is file-only) can exclude the others via tags.
SCOPE_KIND_TAGS: frozenset[str] = frozenset(
{
"file",
"function",
"basic-block",
"instruction",
"process",
"thread",
"call",
}
)
# feature-type tags are derived from the fixture feature string's key
# and inserted into the fixture's tag set. backends that don't support
# a feature type (e.g., pefile has no function-name features) can
# exclude by tag rather than by Python class.
#
# values come from `capa.rules.parse_feature` keys so the tag names
# align with the textual rule syntax.
FEATURE_TYPE_TAGS: frozenset[str] = frozenset(
{
"api",
"string",
"substring",
"bytes",
"number",
"offset",
"mnemonic",
"basic blocks",
"characteristic",
"export",
"import",
"section",
"match",
"function-name",
"os",
"format",
"arch",
"class",
"namespace",
"property",
# operand[N].X is collapsed to operand.X (index-independent)
"operand.number",
"operand.offset",
}
)
def _load_feature_tests() -> tuple[list[tuple], list[tuple], list[tuple], list[tuple], list[tuple], list[tuple]]:
with (CD / "fixtures" / "feature-presence.json").open("r") as f:
data = json.load(f)
presence_tests = []
symtab_tests = []
count_tests = []
ghidra_count_tests = []
dotnet_presence_tests = []
dotnet_count_tests = []
for entry in data["features"]:
feature_str = entry["feature"]
tags = entry.get("tags", [])
if "marks" in entry:
FEATURE_MARKS[(entry["file"], entry["location"], feature_str)] = entry["marks"]
if feature_str.startswith("count("):
key, _, value_str = feature_str.partition(": ")
count = int(value_str)
range_obj = capa.rules.build_feature(key, count, initial_description=None)
inner_feature = range_obj.child
test_tuple = (entry["file"], entry["location"], inner_feature, count)
if "ghidra" in tags:
ghidra_count_tests.append(test_tuple)
elif "dotnet" in tags:
dotnet_count_tests.append(test_tuple)
else:
count_tests.append(test_tuple)
else:
feature = parse_feature_string(feature_str)
test_tuple = (entry["file"], entry["location"], feature, entry["expected"])
if "symtab" in tags:
symtab_tests.append(test_tuple)
elif "dotnet" in tags:
dotnet_presence_tests.append(test_tuple)
else:
presence_tests.append(test_tuple)
presence_tests.sort(key=lambda t: (t[0], t[1]))
symtab_tests.sort(key=lambda t: (t[0], t[1]))
count_tests.sort(key=lambda t: (t[0], t[1]))
ghidra_count_tests.sort(key=lambda t: (t[0], t[1]))
dotnet_presence_tests.sort(key=lambda t: (t[0], t[1]))
dotnet_count_tests.sort(key=lambda t: (t[0], t[1]))
return (
presence_tests,
symtab_tests,
count_tests,
ghidra_count_tests,
dotnet_presence_tests,
dotnet_count_tests,
# known fixture tags used for backend selection.
# merged tags that are not listed here will fail collection, to catch typos.
KNOWN_FIXTURE_TAGS: frozenset[str] = (
frozenset(
{
"static",
"dynamic",
"dotnet",
"elf",
"flirt",
"symtab",
"ghidra",
"binja-db",
"binexport",
"aarch64",
"cape",
"drakvuf",
"vmray",
}
)
| SCOPE_KIND_TAGS
| FEATURE_TYPE_TAGS
)
def get_scope_kind(location: str) -> str:
"""
classify a fixture location string into a scope kind.
reuses the same location grammar handled by `resolve_scope()`.
"""
if location == "file":
return "file"
if "insn=" in location:
return "instruction"
if "bb=" in location:
return "basic-block"
if "call=" in location:
return "call"
if "thread=" in location:
return "thread"
if "process=" in location:
return "process"
if location.startswith(("function", "token")):
return "function"
raise ValueError(f"unexpected scope location: {location}")
def get_feature_type_tag(feature_str: str) -> str:
"""
extract the feature-type tag from a fixture feature string.
examples:
`api: CryptSetHashParam` -> `api`
`function-name: __aulldiv` -> `function-name`
`count(basic blocks): 7` -> `basic blocks`
`count(mnemonic(mov)): 3` -> `mnemonic`
`count(characteristic(nzxor))` -> `characteristic`
`operand[1].number: 0xFF` -> `operand.number`
`property/read: Foo.Bar` -> `property`
"""
if feature_str.startswith("count("):
# find the matching close-paren for the outer `count(` so that
# nested parens and colons inside the argument (e.g. strings with
# `:` or `(`) don't confuse a naive partition.
depth = 0
for i, c in enumerate(feature_str):
if c == "(":
depth += 1
elif c == ")":
depth -= 1
if depth == 0:
inner = feature_str[len("count(") : i]
# collapse nested arg: `mnemonic(mov)` -> `mnemonic`
inner, _, _ = inner.partition("(")
return _normalize_feature_key(inner.strip())
raise ValueError(f"unbalanced parentheses in feature string: {feature_str!r}")
key, _, _ = feature_str.partition(":")
return _normalize_feature_key(key.strip())
def _normalize_feature_key(key: str) -> str:
# collapse `operand[N].X` -> `operand.X` so the tag is index-independent
if key.startswith("operand[") and "]." in key:
_, _, suffix = key.partition("].")
return f"operand.{suffix}"
# collapse `property/read` and `property/write` -> `property`
if key.startswith("property/"):
return "property"
return key
@dataclass(frozen=True)
class FixtureMark:
backend: str
mark: str
reason: str
@dataclass(frozen=True)
class FixtureFile:
key: str
path: Path
tags: frozenset[str] = frozenset()
@dataclass(frozen=True)
class FeatureFixture:
sample_key: str
sample_path: Path
location: str
scope_kind: str
statement: Union[Feature, ceng.Range, ceng.Statement]
expected: bool = True
tags: frozenset[str] = frozenset()
marks: tuple[FixtureMark, ...] = ()
explanation: Optional[str] = None
comment: Optional[str] = None
@dataclass(frozen=True)
class BackendFeaturePolicy:
name: str
get_extractor: Callable[[Path], object]
include_tags: frozenset[str] = field(default_factory=frozenset)
exclude_tags: frozenset[str] = field(default_factory=frozenset)
def __post_init__(self):
object.__setattr__(self, "include_tags", frozenset(self.include_tags))
object.__setattr__(self, "exclude_tags", frozenset(self.exclude_tags))
@lru_cache(maxsize=1)
def _load_feature_fixture_manifests() -> tuple[tuple[Path, dict], ...]:
manifests = []
for path in sorted(FIXTURE_MANIFEST_DIR.glob("*.json")):
with path.open("r") as f:
manifests.append((path, json.load(f)))
if not manifests:
raise ValueError(f"no fixture manifests found in {FIXTURE_MANIFEST_DIR}")
return tuple(manifests)
@lru_cache(maxsize=1)
def _load_fixture_file_paths() -> dict[str, Path]:
with (CD / "fixtures" / "feature-presence.json").open("r") as f:
data = json.load(f)
return {entry["key"]: CD / entry["path"] for entry in data["files"]}
return {key: file.path for key, file in load_feature_fixture_files().items()}
def get_fixture_file_path(key: str) -> Path:
@@ -143,27 +294,218 @@ def get_fixture_file_path(key: str) -> Path:
return paths[key]
def apply_backend_marks(backend: str, sample_key: str, feature: Feature):
"""Apply skip/xfail marks from fixtures for a specific backend.
Args:
backend: backend name matching marks in fixtures (e.g. "idalib", "freeze")
sample_key: the file key from fixtures (e.g. "mimikatz", "pma12-04")
feature: the parsed Feature object to match against
@lru_cache(maxsize=1)
def load_feature_fixture_files() -> dict[str, FixtureFile]:
"""
for (mk, _ml, mf), marks in FEATURE_MARKS.items():
if mk != sample_key:
load the combined `files` tables from `tests/fixtures/features/*.json`.
file entries may include a `tags` list that will be inherited
by feature fixtures that reference the file.
"""
files: dict[str, FixtureFile] = {}
file_sources: dict[str, Path] = {}
for manifest_path, data in _load_feature_fixture_manifests():
for entry in data["files"]:
key = entry["key"]
if key in files:
raise ValueError(
f"duplicate fixture file key {key!r} in {file_sources[key]} and {manifest_path}"
)
tags = frozenset(entry.get("tags", []))
unknown = tags - KNOWN_FIXTURE_TAGS
if unknown:
raise ValueError(
f"unknown fixture tag(s) on file {key!r} in {manifest_path}: {sorted(unknown)}"
)
files[key] = FixtureFile(
key=key,
path=CD / entry["path"],
tags=tags,
)
file_sources[key] = manifest_path
return files
@lru_cache(maxsize=1)
def load_feature_fixtures() -> tuple[FeatureFixture, ...]:
"""
load the full list of feature fixtures from `tests/fixtures/features/*.json`.
merges file-level tags into feature-level tags, validates tags against
the known registry, parses the statement (including `count(...)`), and
defaults `expected` to True.
"""
files = load_feature_fixture_files()
fixtures_: list[FeatureFixture] = []
for manifest_path, data in _load_feature_fixture_manifests():
for entry in data["features"]:
key = entry["file"]
if key not in files:
raise ValueError(
f"unknown fixture file key referenced by feature in {manifest_path}: {key!r}"
)
file = files[key]
feature_str: str = entry["feature"]
feature_tags = frozenset(entry.get("tags", []))
merged_tags = file.tags | feature_tags
unknown = merged_tags - KNOWN_FIXTURE_TAGS
if unknown:
raise ValueError(
f"unknown fixture tag(s) on feature {feature_str!r} for file {key!r} in {manifest_path}: {sorted(unknown)}"
)
location = entry["location"]
statement = parse_feature_string(feature_str)
scope_kind = get_scope_kind(location)
feature_type_tag = get_feature_type_tag(feature_str)
# scope-kind and feature-type tags are auto-derived so that
# backend policies can include/exclude scopes and feature types
# purely via `include_tags`/`exclude_tags`. they're drawn from
# the known-tag registry so no re-validation is needed here.
merged_tags = merged_tags | {scope_kind, feature_type_tag}
expected = entry.get("expected", True)
marks = tuple(
FixtureMark(backend=m["backend"], mark=m["mark"], reason=m["reason"])
for m in entry.get("marks", [])
)
fixtures_.append(
FeatureFixture(
sample_key=key,
sample_path=file.path,
location=location,
scope_kind=scope_kind,
statement=statement,
expected=expected,
tags=merged_tags,
marks=marks,
explanation=entry.get("explanation"),
comment=entry.get("comment"),
)
)
fixtures_.sort(key=lambda f: (f.sample_key, f.location))
return tuple(fixtures_)
@dataclass(frozen=True)
class FixtureSelectionSummary:
total: int
selected: int
excluded: int
excluded_by_tag: dict[str, int]
def _fixture_is_included(policy: BackendFeaturePolicy, fixture: FeatureFixture) -> bool:
"""decide whether a fixture is selected by a policy."""
if policy.include_tags and not (fixture.tags & policy.include_tags):
return False
if fixture.tags & policy.exclude_tags:
return False
return True
def select_feature_fixtures(policy: BackendFeaturePolicy) -> list[FeatureFixture]:
"""
select fixtures matching a backend policy.
rules (applied in order):
1. start from all fixtures
2. if `include_tags` is non-empty, keep fixtures whose tags intersect it
3. drop fixtures whose tags intersect `exclude_tags`
scope kinds and feature types are exposed as auto-derived tags, so
a policy can restrict scope or feature type via `exclude_tags` too.
"""
return [f for f in load_feature_fixtures() if _fixture_is_included(policy, f)]
def summarize_feature_selection(
policy: BackendFeaturePolicy,
) -> FixtureSelectionSummary:
"""
summarize the effect of a policy's fixture selection.
useful for debug output and maintenance scripts.
"""
all_fixtures = load_feature_fixtures()
excluded_by_tag: dict[str, int] = collections.defaultdict(int)
selected = 0
for fixture in all_fixtures:
if _fixture_is_included(policy, fixture):
selected += 1
continue
if parse_feature_string(mf) != feature:
continue
for m in marks:
if m["backend"] != backend:
for tag in sorted(fixture.tags):
excluded_by_tag[tag] += 1
return FixtureSelectionSummary(
total=len(all_fixtures),
selected=selected,
excluded=len(all_fixtures) - selected,
excluded_by_tag=dict(excluded_by_tag),
)
def _fixture_test_id(fixture: FeatureFixture) -> str:
"""
build a readable pytest parameter id for a fixture.
mirrors the legacy `make_test_id` shape: sample-location-statement-expected.
"""
return "-".join(
[
fixture.sample_key,
fixture.location,
str(fixture.statement),
str(fixture.expected),
]
)
def parametrize_backend_feature_fixtures(policy: BackendFeaturePolicy):
"""
build a pytest parametrize decorator for a backend's selected fixtures.
applies JSON marks matching `policy.name` to the parameter set, so
backend-specific skip/xfail behavior stays in the JSON data file.
"""
selected = select_feature_fixtures(policy)
params = []
for fixture in selected:
marks = []
for mark in fixture.marks:
if mark.backend != policy.name:
continue
if m["mark"] == "skip":
pytest.skip(m["reason"])
elif m["mark"] == "xfail":
pytest.xfail(m["reason"])
return
if mark.mark == "skip":
marks.append(pytest.mark.skip(reason=mark.reason))
elif mark.mark == "xfail":
marks.append(pytest.mark.xfail(reason=mark.reason))
else:
raise ValueError(
f"unknown mark {mark.mark!r} for backend {policy.name!r}"
)
params.append(pytest.param(fixture, marks=marks, id=_fixture_test_id(fixture)))
return pytest.mark.parametrize("feature_fixture", params)
def run_feature_fixture(policy: BackendFeaturePolicy, fixture: FeatureFixture) -> None:
"""
generic runner that evaluates a feature fixture against a backend.
handles both plain features and `count(...)` statements via one
`evaluate` path, comparing the boolean result to `fixture.expected`.
"""
extractor = policy.get_extractor(fixture.sample_path)
scope = resolve_scope(fixture.location)
features = scope(extractor)
result = fixture.statement.evaluate(features)
actual = bool(result)
if fixture.expected:
msg = f"{fixture.statement} should match in {fixture.location}"
else:
msg = f"{fixture.statement} should not match in {fixture.location}"
assert actual == fixture.expected, msg
@contextlib.contextmanager
@@ -209,6 +551,7 @@ def xfail(condition, reason: str = ""):
def get_viv_extractor(path: Path):
import capa.loader
import capa.features.extractors.viv.extractor
import capa.main
sigpaths = [
CD / "data" / "sigs" / "test_aulldiv.pat",
@@ -225,7 +568,9 @@ def get_viv_extractor(path: Path):
else:
vw = capa.loader.get_workspace(path, FORMAT_AUTO, sigpaths=sigpaths)
vw.saveWorkspace()
extractor = capa.features.extractors.viv.extractor.VivisectFeatureExtractor(vw, path, OS_AUTO)
extractor = capa.features.extractors.viv.extractor.VivisectFeatureExtractor(
vw, path, OS_AUTO
)
fixup_viv(path, extractor)
return extractor
@@ -320,8 +665,8 @@ def get_idalib_extractor(path: Path):
if not idalib.load_idalib():
raise RuntimeError("failed to load IDA idalib module.")
import idapro
import ida_auto
import idapro
import capa.features.extractors.ida.extractor
@@ -361,8 +706,8 @@ def fixup_idalib(path: Path, extractor):
"""
IDA fixups to overcome differences between backends
"""
import idaapi
import ida_funcs
import idaapi
def remove_library_id_flag(fva):
f = idaapi.get_func(fva)
@@ -380,8 +725,8 @@ def fixup_idalib(path: Path, extractor):
@lru_cache(maxsize=1)
def get_cape_extractor(path):
from capa.helpers import load_json_from_path
from capa.features.extractors.cape.extractor import CapeExtractor
from capa.helpers import load_json_from_path
report = load_json_from_path(path)
@@ -390,8 +735,8 @@ def get_cape_extractor(path):
@lru_cache(maxsize=1)
def get_drakvuf_extractor(path):
from capa.helpers import load_jsonl_from_path
from capa.features.extractors.drakvuf.extractor import DrakvufExtractor
from capa.helpers import load_jsonl_from_path
report = load_jsonl_from_path(path)
@@ -447,10 +792,14 @@ def get_binexport_extractor(path):
be2 = capa.features.extractors.binexport2.get_binexport2(path)
search_paths = [CD / "data", CD / "data" / "aarch64"]
path = capa.features.extractors.binexport2.get_sample_from_binexport2(path, be2, search_paths)
path = capa.features.extractors.binexport2.get_sample_from_binexport2(
path, be2, search_paths
)
buf = path.read_bytes()
return capa.features.extractors.binexport2.extractor.BinExport2FeatureExtractor(be2, buf)
return capa.features.extractors.binexport2.extractor.BinExport2FeatureExtractor(
be2, buf
)
def extract_global_features(extractor):
@@ -533,6 +882,13 @@ def extract_instruction_features(extractor, fh, bbh, ih) -> dict[Feature, set[Ad
# note: to reduce the testing time it's recommended to reuse already existing test samples, if possible
def get_data_path_by_name(name) -> Path:
# prefer the fixture manifest registry; fall back to the legacy hard-coded
# branches below for any keys not yet migrated.
lookup_key = name[:-3] if name.endswith("...") else name
json_paths = _load_fixture_file_paths()
if lookup_key in json_paths:
return json_paths[lookup_key]
if name == "mimikatz":
return CD / "data" / "mimikatz.exe_"
elif name == "kernel32":
@@ -556,7 +912,11 @@ def get_data_path_by_name(name) -> Path:
elif name == "al-khaser x64":
return CD / "data" / "al-khaser_x64.exe_"
elif name.startswith("39c05"):
return CD / "data" / "39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.dll_"
return (
CD
/ "data"
/ "39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.dll_"
)
elif name.startswith("499c2"):
return CD / "data" / "499c2a85f6e8142c3f48d4251c9c7cd6.raw32"
elif name.startswith("9324d"):
@@ -580,7 +940,11 @@ def get_data_path_by_name(name) -> Path:
elif name.startswith("77329"):
return CD / "data" / "773290480d5445f11d3dc1b800728966.exe_"
elif name.startswith("3b13b"):
return CD / "data" / "3b13b6f1d7cd14dc4a097a12e2e505c0a4cff495262261e2bfc991df238b9b04.dll_"
return (
CD
/ "data"
/ "3b13b6f1d7cd14dc4a097a12e2e505c0a4cff495262261e2bfc991df238b9b04.dll_"
)
elif name == "7351f.elf":
return CD / "data" / "7351f8a40c5450557b24622417fc478d.elf_"
elif name == "055da8e6.elf":
@@ -596,25 +960,54 @@ def get_data_path_by_name(name) -> Path:
elif name.startswith("b9f5b"):
return CD / "data" / "b9f5bd514485fb06da39beff051b9fdc.exe_"
elif name.startswith("mixed-mode-64"):
return DNFILE_TESTFILES / "mixed-mode" / "ModuleCode" / "bin" / "ModuleCode_amd64.exe"
return (
DNFILE_TESTFILES
/ "mixed-mode"
/ "ModuleCode"
/ "bin"
/ "ModuleCode_amd64.exe"
)
elif name.startswith("hello-world"):
return DNFILE_TESTFILES / "hello-world" / "hello-world.exe"
elif name.startswith("_1c444"):
return DOTNET_DIR / "1c444ebeba24dcba8628b7dfe5fec7c6.exe_"
elif name.startswith("_387f15"):
return DOTNET_DIR / "387f15043f0198fd3a637b0758c2b6dde9ead795c3ed70803426fc355731b173.dll_"
return (
DOTNET_DIR
/ "387f15043f0198fd3a637b0758c2b6dde9ead795c3ed70803426fc355731b173.dll_"
)
elif name.startswith("_692f"):
return DOTNET_DIR / "692f7fd6d198e804d6af98eb9e390d61.exe_"
elif name.startswith("_0953c"):
return CD / "data" / "0953cc3b77ed2974b09e3a00708f88de931d681e2d0cb64afbaf714610beabe6.exe_"
return (
CD
/ "data"
/ "0953cc3b77ed2974b09e3a00708f88de931d681e2d0cb64afbaf714610beabe6.exe_"
)
elif name.startswith("_039a6"):
return CD / "data" / "039a6336d0802a2255669e6867a5679c7eb83313dbc61fb1c7232147379bd304.exe_"
return (
CD
/ "data"
/ "039a6336d0802a2255669e6867a5679c7eb83313dbc61fb1c7232147379bd304.exe_"
)
elif name.startswith("b5f052"):
return CD / "data" / "b5f0524e69b3a3cf636c7ac366ca57bf5e3a8fdc8a9f01caf196c611a7918a87.elf_"
return (
CD
/ "data"
/ "b5f0524e69b3a3cf636c7ac366ca57bf5e3a8fdc8a9f01caf196c611a7918a87.elf_"
)
elif name.startswith("bf7a9c"):
return CD / "data" / "bf7a9c8bdfa6d47e01ad2b056264acc3fd90cf43fe0ed8deec93ab46b47d76cb.elf_"
return (
CD
/ "data"
/ "bf7a9c8bdfa6d47e01ad2b056264acc3fd90cf43fe0ed8deec93ab46b47d76cb.elf_"
)
elif name.startswith("294b8d"):
return CD / "data" / "294b8db1f2702b60fb2e42fdc50c2cee6a5046112da9a5703a548a4fa50477bc.elf_"
return (
CD
/ "data"
/ "294b8db1f2702b60fb2e42fdc50c2cee6a5046112da9a5703a548a4fa50477bc.elf_"
)
elif name.startswith("2bf18d"):
return CD / "data" / "2bf18d0403677378adad9001b1243211.elf_"
elif name.startswith("2d3edc"):
@@ -670,11 +1063,23 @@ def get_data_path_by_name(name) -> Path:
/ "eb12873c0ce3e9ea109c2a447956cbd10ca2c3e86936e526b2c6e28764999f21_min_archive.zip"
)
elif name.startswith("ea2876"):
return CD / "data" / "ea2876e9175410b6f6719f80ee44b9553960758c7d0f7bed73c0fe9a78d8e669.dll_"
return (
CD
/ "data"
/ "ea2876e9175410b6f6719f80ee44b9553960758c7d0f7bed73c0fe9a78d8e669.dll_"
)
elif name.startswith("1038a2"):
return CD / "data" / "1038a23daad86042c66bfe6c9d052d27048de9653bde5750dc0f240c792d9ac8.elf_"
return (
CD
/ "data"
/ "1038a23daad86042c66bfe6c9d052d27048de9653bde5750dc0f240c792d9ac8.elf_"
)
elif name.startswith("3da7c"):
return CD / "data" / "3da7c2c70a2d93ac4643f20339d5c7d61388bddd77a4a5fd732311efad78e535.elf_"
return (
CD
/ "data"
/ "3da7c2c70a2d93ac4643f20339d5c7d61388bddd77a4a5fd732311efad78e535.elf_"
)
elif name.startswith("nested_typedef"):
return CD / "data" / "dotnet" / "dd9098ff91717f4906afe9dafdfa2f52.exe_"
elif name.startswith("nested_typeref"):
@@ -820,7 +1225,9 @@ def get_basic_block(extractor, fh: FunctionHandle, va: int) -> BBHandle:
raise ValueError("basic block not found")
def get_instruction(extractor, fh: FunctionHandle, bbh: BBHandle, va: int) -> InsnHandle:
def get_instruction(
extractor, fh: FunctionHandle, bbh: BBHandle, va: int
) -> InsnHandle:
for ih in extractor.get_instructions(fh, bbh):
if isinstance(extractor, DnfileFeatureExtractor):
addr = ih.inner.offset
@@ -980,65 +1387,28 @@ def parametrize(params, values, **kwargs):
return pytest.mark.parametrize(params, values, ids=ids, **kwargs)
(
FEATURE_PRESENCE_TESTS,
FEATURE_SYMTAB_FUNC_TESTS,
FEATURE_COUNT_TESTS,
FEATURE_COUNT_TESTS_GHIDRA,
FEATURE_PRESENCE_TESTS_DOTNET,
FEATURE_COUNT_TESTS_DOTNET,
) = _load_feature_tests()
# legacy tuple-of-tuples lists still needed by `test_binexport_features.py`,
# which rewrites a mimikatz sample path to its `.ghidra.BinExport` counterpart
# at test time.
#
# built from the new `load_feature_fixtures()` so the JSON manifests remain the
# single source of truth for fixture data.
FEATURE_PRESENCE_TESTS: list[tuple] = sorted(
(
(f.sample_key, f.location, f.statement, f.expected)
for f in load_feature_fixtures()
if not isinstance(f.statement, ceng.Range)
and not (f.tags & frozenset({"dotnet", "symtab"}))
),
key=lambda t: (t[0], t[1]),
)
FEATURE_PRESENCE_TESTS_IDA = [
# file/imports
# IDA can recover more names of APIs imported by ordinal
("mimikatz", "file", capa.features.file.Import("cabinet.FCIAddFile"), True),
]
FEATURE_BINJA_DATABASE_TESTS = sorted(
[
# insn/regex
(
"pma16-01_binja_db",
"function=0x4021B0",
capa.features.common.Regex("HTTP/1.0"),
True,
),
(
"pma16-01_binja_db",
"function=0x402F40",
capa.features.common.Regex("www.practicalmalwareanalysis.com"),
True,
),
(
"pma16-01_binja_db",
"function=0x402F40",
capa.features.common.Substring("practicalmalwareanalysis.com"),
True,
),
(
"pma16-01_binja_db",
"file",
capa.features.file.FunctionName("__aulldiv"),
True,
),
# os & format & arch
("pma16-01_binja_db", "file", OS(OS_WINDOWS), True),
("pma16-01_binja_db", "file", OS(OS_LINUX), False),
("pma16-01_binja_db", "function=0x404356", OS(OS_WINDOWS), True),
("pma16-01_binja_db", "function=0x404356,bb=0x4043B9", OS(OS_WINDOWS), True),
("pma16-01_binja_db", "file", Arch(ARCH_I386), True),
("pma16-01_binja_db", "file", Arch(ARCH_AMD64), False),
("pma16-01_binja_db", "function=0x404356", Arch(ARCH_I386), True),
("pma16-01_binja_db", "function=0x404356,bb=0x4043B9", Arch(ARCH_I386), True),
("pma16-01_binja_db", "file", Format(FORMAT_PE), True),
("pma16-01_binja_db", "file", Format(FORMAT_ELF), False),
# format is also a global feature
("pma16-01_binja_db", "function=0x404356", Format(FORMAT_PE), True),
],
# order tests by (file, item)
# so that our LRU cache is most effective.
FEATURE_COUNT_TESTS_GHIDRA: list[tuple] = sorted(
(
(f.sample_key, f.location, f.statement.child, f.statement.min)
for f in load_feature_fixtures()
if isinstance(f.statement, ceng.Range) and "ghidra" in f.tags
),
key=lambda t: (t[0], t[1]),
)
@@ -1202,20 +1572,26 @@ def get_result_doc(path: Path):
@pytest.fixture
def pma0101_rd():
# python -m capa.main tests/data/Practical\ Malware\ Analysis\ Lab\ 01-01.dll_ --json > tests/data/rd/Practical\ Malware\ Analysis\ Lab\ 01-01.dll_.json
return get_result_doc(CD / "data" / "rd" / "Practical Malware Analysis Lab 01-01.dll_.json")
return get_result_doc(
CD / "data" / "rd" / "Practical Malware Analysis Lab 01-01.dll_.json"
)
@pytest.fixture
def dotnet_1c444e_rd():
# .NET sample
# python -m capa.main tests/data/dotnet/1c444ebeba24dcba8628b7dfe5fec7c6.exe_ --json > tests/data/rd/1c444ebeba24dcba8628b7dfe5fec7c6.exe_.json
return get_result_doc(CD / "data" / "rd" / "1c444ebeba24dcba8628b7dfe5fec7c6.exe_.json")
return get_result_doc(
CD / "data" / "rd" / "1c444ebeba24dcba8628b7dfe5fec7c6.exe_.json"
)
@pytest.fixture
def a3f3bbc_rd():
# python -m capa.main tests/data/3f3bbcf8fd90bdcdcdc5494314ed4225.exe_ --json > tests/data/rd/3f3bbcf8fd90bdcdcdc5494314ed4225.exe_.json
return get_result_doc(CD / "data" / "rd" / "3f3bbcf8fd90bdcdcdc5494314ed4225.exe_.json")
return get_result_doc(
CD / "data" / "rd" / "3f3bbcf8fd90bdcdcdc5494314ed4225.exe_.json"
)
@pytest.fixture
@@ -1233,7 +1609,9 @@ def al_khaserx64_rd():
@pytest.fixture
def a076114_rd():
# python -m capa.main tests/data/0761142efbda6c4b1e801223de723578.dll_ --json > tests/data/rd/0761142efbda6c4b1e801223de723578.dll_.json
return get_result_doc(CD / "data" / "rd" / "0761142efbda6c4b1e801223de723578.dll_.json")
return get_result_doc(
CD / "data" / "rd" / "0761142efbda6c4b1e801223de723578.dll_.json"
)
@pytest.fixture
@@ -1241,5 +1619,8 @@ def dynamic_a0000a6_rd():
# python -m capa.main tests/data/dynamic/cape/v2.2/0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json --json > tests/data/rd/0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json
# gzip tests/data/rd/0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json
return get_result_doc(
CD / "data" / "rd" / "0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json.gz"
CD
/ "data"
/ "rd"
/ "0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json.gz"
)
+169
View File
@@ -0,0 +1,169 @@
# backend feature fixtures
This spec describes how contributors should add and consume backend feature fixtures.
## Scope
This spec covers feature-fixture tests only. It does not cover extractor helper tests, CLI smoke tests, or other bespoke tests.
## Source of truth
Feature fixtures live in these JSON manifests under `tests/fixtures/features/`:
- `static.json`
- `binja-db.json`
- `binexport.json`
- `cape.json`
- `drakvuf.json`
- `vmray.json`
Each manifest contains:
- a `files` list that maps fixture keys to sample paths
- a `features` list that describes feature assertions
The loader reads all of these manifests and combines them into one fixture set.
A backend feature test should not maintain its own private list of feature fixtures if the same information can be expressed in these JSON manifests.
## Fixture shape
Each feature fixture specifies:
- the sample key
- the location within the sample
- the feature or statement to evaluate
- optional tags
- optional backend marks
- optional `expected: false`
If `expected` is omitted, it means `true`.
This applies to ordinary feature assertions and `count(...)` assertions.
Examples:
```json
{
"file": "pma16-01",
"location": "file",
"feature": "format: pe"
}
```
```json
{
"file": "mimikatz",
"location": "function=0x40E5C2",
"feature": "count(basic blocks): 7"
}
```
```json
{
"file": "mimikatz",
"location": "function=0x401000",
"feature": "characteristic: loop",
"expected": false
}
```
## Tags
Tags are used to describe fixture requirements or sample properties that backends may need for selection.
Examples include:
- `dotnet`
- `elf`
- `dynamic`
- `flirt`
- `symtab`
- `binja-db`
- `binexport`
- `aarch64`
Tags may appear on file entries or feature entries. file tags are inherited by their features.
Tags should not duplicate information that can already be derived from:
- the location string
- the parsed feature type
Unknown tags should fail collection.
## Backend selection
Backends consume one shared fixture list and select the fixtures they support.
Large backends should prefer exclusion-based selection. this means new fixtures run by default unless they are explicitly out of scope.
Examples:
- `viv` excludes `.NET`
- `ghidra` excludes `.NET`
- `binja` excludes `.NET`
- `idalib` excludes `.NET`
Small-surface backends may use inclusion-based selection where that is clearer.
Examples:
- `dnfile` includes `.NET`
- `dotnetfile` includes `.NET`
Backends may also restrict supported scopes or feature types.
## Backend test file shape
A backend feature test file should normally have:
- one backend policy object
- one feature-test entry point that consumes shared fixtures
For example:
```python
import fixtures
BACKEND = fixtures.BackendFeaturePolicy(
name="viv",
get_extractor=fixtures.get_viv_extractor,
exclude_tags={"dotnet"},
)
@fixtures.parametrize_backend_feature_fixtures(BACKEND)
def test_viv_features(feature_fixture):
fixtures.run_feature_fixture(BACKEND, feature_fixture)
```
Module-level availability checks are still allowed. runtime-specific hooks are allowed only when they depend on the installed backend or tool version and cannot be represented declaratively in the fixture manifests.
## Known bugs and marks
Known backend bugs should be represented in the fixture manifests through backend-specific marks.
Backends should not usually edit the shared JSON manifests just to avoid a fixture. they should prefer selecting or excluding fixtures through backend policy.
The main reason to keep marks in JSON is to record known exceptions such as:
- a backend-specific `xfail`
- a backend-specific `skip`
## Expected contributor workflow
When adding a new feature test:
1. add the sample path to the appropriate JSON manifest `files` list if it is not already present
2. add the feature fixture to that manifest `features` list
3. add tags only when they express a real requirement or sample property
4. omit `expected` unless the expected result is `false`
5. use JSON marks only for known backend bugs
When adding a new backend:
1. create one backend feature test file
2. define one backend policy describing extractor and exclusions
3. use the shared feature runner
4. add runtime hooks only if the environment or installed tool version requires them
+517
View File
@@ -0,0 +1,517 @@
{
"files": [
{
"key": "687e79.ghidra.be2",
"path": "data/binexport2/687e79cde5b0ced75ac229465835054931f9ec438816f2827a8be5f3bd474929.elf_.ghidra.BinExport",
"tags": [
"binexport",
"elf",
"aarch64"
]
},
{
"key": "d1e650.ghidra.be2",
"path": "data/binexport2/d1e6506964edbfffb08c0dd32e1486b11fbced7a4bd870ffe79f110298f0efb8.elf_.ghidra.BinExport",
"tags": [
"binexport",
"elf",
"aarch64"
]
}
],
"features": [
{
"file": "687e79.ghidra.be2",
"location": "file",
"feature": "string: AppDataService start",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "file",
"feature": "string: nope",
"expected": false
},
{
"file": "687e79.ghidra.be2",
"location": "file",
"feature": "section: .text",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "file",
"feature": "section: .nope",
"expected": false
},
{
"file": "687e79.ghidra.be2",
"location": "file",
"feature": "export: android::clearDir",
"expected": true,
"marks": [
{
"backend": "binexport",
"mark": "xfail",
"reason": "name demangling is not implemented"
}
]
},
{
"file": "687e79.ghidra.be2",
"location": "file",
"feature": "export: nope",
"expected": false
},
{
"file": "687e79.ghidra.be2",
"location": "file",
"feature": "import: fopen",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "file",
"feature": "import: exit",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "file",
"feature": "import: _ZN7android10IInterfaceD0Ev",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "file",
"feature": "import: nope",
"expected": false
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x1056c0",
"feature": "characteristic: loop",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x1075c0",
"feature": "characteristic: loop",
"expected": false
},
{
"file": "d1e650.ghidra.be2",
"location": "function=0x114af4",
"feature": "characteristic: tight loop",
"expected": true
},
{
"file": "d1e650.ghidra.be2",
"location": "function=0x118F1C",
"feature": "characteristic: tight loop",
"expected": true
},
{
"file": "d1e650.ghidra.be2",
"location": "function=0x11464c",
"feature": "characteristic: tight loop",
"expected": false
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x0",
"feature": "characteristic: stack string",
"expected": true,
"marks": [
{
"backend": "binexport",
"mark": "xfail",
"reason": "stack string detection not implemented yet for binexport"
}
]
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x107588",
"feature": "mnemonic: stp",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x107588",
"feature": "mnemonic: adrp",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x107588",
"feature": "mnemonic: bl",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x107588",
"feature": "mnemonic: in",
"expected": false
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x107588",
"feature": "mnemonic: adrl",
"expected": false
},
{
"file": "d1e650.ghidra.be2",
"location": "function=0x11451c",
"feature": "number: 0x10",
"expected": false,
"comment": "00114524 add x29,sp,#0x10"
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x105128",
"feature": "number: 0xE0",
"expected": false,
"comment": "00105128 sub sp,sp,#0xE0"
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x105128,bb=0x1051e4",
"feature": "operand[1].number: 0xFFFFFFFF",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x107588,bb=0x107588",
"feature": "operand[1].number: 0x8",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x107588,bb=0x107588,insn=0x1075a4",
"feature": "operand[1].number: 0x8",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x105128,bb=0x105450",
"feature": "operand[2].offset: 0x10",
"expected": true
},
{
"file": "d1e650.ghidra.be2",
"location": "function=0x124854,bb=0x1248AC,insn=0x1248B4",
"feature": "operand[2].offset: -0x48",
"expected": true
},
{
"file": "d1e650.ghidra.be2",
"location": "function=0x13347c,bb=0x133548,insn=0x133554",
"feature": "operand[2].offset: 0x20",
"expected": false
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x105C88",
"feature": "number: 0xF000",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x1057f8,bb=0x1057f8",
"feature": "number: 0xFFFFFFFFFFFFFFFF",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x1066e0,bb=0x1068c4",
"feature": "number: 0xFFFFFFFF",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x105128,bb=0x105450",
"feature": "offset: 0x10",
"expected": true
},
{
"file": "d1e650.ghidra.be2",
"location": "function=0x13347c,bb=0x133548,insn=0x133554",
"feature": "offset: 0x20",
"expected": false,
"comment": "ldp x29,x30,[sp, #0x20]"
},
{
"file": "d1e650.ghidra.be2",
"location": "function=0x1183e0,bb=0x11849c,insn=0x1184b0",
"feature": "offset: 0x8",
"expected": true,
"comment": "stp x20,x0,[x19, #0x8]"
},
{
"file": "d1e650.ghidra.be2",
"location": "function=0x138688,bb=0x138994,insn=0x1389a8",
"feature": "offset: 0x8",
"expected": true,
"comment": "str xzr,[x8, #0x8]!"
},
{
"file": "d1e650.ghidra.be2",
"location": "function=0x138688,bb=0x138978,insn=0x138984",
"feature": "offset: 0x8",
"expected": true,
"comment": "ldr x9,[x8, #0x8]!"
},
{
"file": "d1e650.ghidra.be2",
"location": "function=0x11451c",
"feature": "offset: 0x20",
"expected": false,
"comment": "ldr x19,[sp], #0x20"
},
{
"file": "d1e650.ghidra.be2",
"location": "function=0x138a9c,bb=0x138b00,insn=0x138b00",
"feature": "offset: 0x1",
"expected": true,
"comment": "ldrb w9,[x8, #0x1]"
},
{
"file": "d1e650.ghidra.be2",
"location": "function=0x124854,bb=0x1248AC,insn=0x1248B4",
"feature": "offset: -0x48",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x105128,bb=0x105128,insn=0x10514c",
"feature": "offset: 0x8",
"expected": true,
"comment": "0010514c add x23,param_1,#0x8"
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x105c88",
"feature": "api: memset",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x105c88",
"feature": "api: Nope",
"expected": false
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x107588",
"feature": "string: AppDataService start",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x1075c0",
"feature": "string: AppDataService",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x107588",
"feature": "string: nope",
"expected": false
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x106d58",
"feature": "string: /data/misc/wifi/wpa_supplicant.conf",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x105c88",
"feature": "string: /innerRename/",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x106d58",
"feature": "string: /\\/data\\/misc/",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x106d58",
"feature": "substring: /data/misc",
"expected": true
},
{
"file": "d1e650.ghidra.be2",
"location": "function=0x1165a4",
"feature": "bytes: E4 05 B8 93 70 BA 6B 41 9C D7 92 52 75 BF 6F CC 1E 83 60 CC",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x1057f8",
"feature": "bytes: 2F 00 73 00 79 00 73 00 74 00 65 00 6D 00 2F 00 78 00 62 00 69 00 6E 00 2F 00 62 00 75 00 73 00 79 00 62 00 6F 00 78 00",
"expected": false,
"comment": "don't extract byte features for obvious strings"
},
{
"file": "d1e650.ghidra.be2",
"location": "function=0x114af4",
"feature": "characteristic: nzxor",
"expected": true
},
{
"file": "d1e650.ghidra.be2",
"location": "function=0x117988",
"feature": "characteristic: nzxor",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x105b38",
"feature": "characteristic: recursive call",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x106530",
"feature": "characteristic: recursive call",
"expected": true
},
{
"file": "d1e650.ghidra.be2",
"location": "function=0x118620",
"feature": "characteristic: indirect call",
"expected": true
},
{
"file": "d1e650.ghidra.be2",
"location": "function=0x118500",
"feature": "characteristic: indirect call",
"expected": false
},
{
"file": "d1e650.ghidra.be2",
"location": "function=0x11451c",
"feature": "characteristic: indirect call",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x105080",
"feature": "characteristic: calls from",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x1070e8",
"feature": "characteristic: calls from",
"expected": false
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x1075c0",
"feature": "characteristic: calls to",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "file",
"feature": "function-name: __libc_init",
"expected": true,
"marks": [
{
"backend": "binexport",
"mark": "xfail",
"reason": "TODO should this be a function-name?"
}
]
},
{
"file": "687e79.ghidra.be2",
"location": "file",
"feature": "os: android",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "file",
"feature": "os: linux",
"expected": false
},
{
"file": "687e79.ghidra.be2",
"location": "file",
"feature": "os: windows",
"expected": false
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x107588",
"feature": "os: android",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x1075c0,bb=0x1076c0",
"feature": "os: android",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "file",
"feature": "arch: i386",
"expected": false
},
{
"file": "687e79.ghidra.be2",
"location": "file",
"feature": "arch: amd64",
"expected": false
},
{
"file": "687e79.ghidra.be2",
"location": "file",
"feature": "arch: aarch64",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x107588",
"feature": "arch: aarch64",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x1075c0,bb=0x1076c0",
"feature": "arch: aarch64",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "file",
"feature": "format: elf",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "file",
"feature": "format: pe",
"expected": false
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x107588",
"feature": "format: elf",
"expected": true
},
{
"file": "687e79.ghidra.be2",
"location": "function=0x107588",
"feature": "format: pe",
"expected": false
}
]
}
+91
View File
@@ -0,0 +1,91 @@
{
"files": [
{
"key": "pma16-01_binja_db",
"path": "data/Practical Malware Analysis Lab 16-01.exe_.bndb",
"tags": [
"binja-db"
]
}
],
"features": [
{
"file": "pma16-01_binja_db",
"location": "function=0x4021B0",
"feature": "string: /HTTP/1.0/"
},
{
"file": "pma16-01_binja_db",
"location": "function=0x402F40",
"feature": "string: /www.practicalmalwareanalysis.com/"
},
{
"file": "pma16-01_binja_db",
"location": "function=0x402F40",
"feature": "substring: practicalmalwareanalysis.com"
},
{
"file": "pma16-01_binja_db",
"location": "file",
"feature": "function-name: __aulldiv"
},
{
"file": "pma16-01_binja_db",
"location": "file",
"feature": "os: windows"
},
{
"file": "pma16-01_binja_db",
"location": "file",
"feature": "os: linux",
"expected": false
},
{
"file": "pma16-01_binja_db",
"location": "function=0x404356",
"feature": "os: windows"
},
{
"file": "pma16-01_binja_db",
"location": "function=0x404356,bb=0x4043B9",
"feature": "os: windows"
},
{
"file": "pma16-01_binja_db",
"location": "file",
"feature": "arch: i386"
},
{
"file": "pma16-01_binja_db",
"location": "file",
"feature": "arch: amd64",
"expected": false
},
{
"file": "pma16-01_binja_db",
"location": "function=0x404356",
"feature": "arch: i386"
},
{
"file": "pma16-01_binja_db",
"location": "function=0x404356,bb=0x4043B9",
"feature": "arch: i386"
},
{
"file": "pma16-01_binja_db",
"location": "file",
"feature": "format: pe"
},
{
"file": "pma16-01_binja_db",
"location": "file",
"feature": "format: elf",
"expected": false
},
{
"file": "pma16-01_binja_db",
"location": "function=0x404356",
"feature": "format: pe"
}
]
}
+221
View File
@@ -0,0 +1,221 @@
{
"files": [
{
"key": "0000a657",
"path": "data/dynamic/cape/v2.2/0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json.gz",
"tags": [
"dynamic",
"cape"
]
},
{
"key": "d46900",
"path": "data/dynamic/cape/v2.2/d46900384c78863420fb3e297d0a2f743cd2b6b3f7f82bf64059a168e07aceb7.json.gz",
"tags": [
"dynamic",
"cape"
]
}
],
"features": [
{
"file": "0000a657",
"location": "file",
"feature": "string: T_Ba?.BcRJa"
},
{
"file": "0000a657",
"location": "file",
"feature": "string: GetNamedPipeClientSessionId"
},
{
"file": "0000a657",
"location": "file",
"feature": "string: nope",
"expected": false
},
{
"file": "0000a657",
"location": "file",
"feature": "section: .rdata"
},
{
"file": "0000a657",
"location": "file",
"feature": "section: .nope",
"expected": false
},
{
"file": "0000a657",
"location": "file",
"feature": "import: NdrSimpleTypeUnmarshall"
},
{
"file": "0000a657",
"location": "file",
"feature": "import: Nope",
"expected": false
},
{
"file": "0000a657",
"location": "file",
"feature": "export: Nope",
"expected": false
},
{
"file": "0000a657",
"location": "process=(1180:3052)",
"feature": "string: C:\\Users\\comp\\AppData\\Roaming\\Microsoft\\Jxoqwnx\\jxoqwn.exe"
},
{
"file": "0000a657",
"location": "process=(1180:3052)",
"feature": "string: nope",
"expected": false
},
{
"file": "0000a657",
"location": "process=(2900:2852),thread=2904",
"feature": "api: RegQueryValueExA"
},
{
"file": "0000a657",
"location": "process=(2900:2852),thread=2904",
"feature": "api: RegQueryValueEx"
},
{
"file": "0000a657",
"location": "process=(2852:3052),thread=2804",
"feature": "api: NtQueryValueKey"
},
{
"file": "0000a657",
"location": "process=(2852:3052),thread=2804",
"feature": "api: GetActiveWindow",
"expected": false
},
{
"file": "0000a657",
"location": "process=(2852:3052),thread=2804",
"feature": "number: 0xEC"
},
{
"file": "0000a657",
"location": "process=(2852:3052),thread=2804",
"feature": "number: 110173",
"expected": false
},
{
"file": "0000a657",
"location": "process=(2852:3052),thread=2804",
"feature": "string: SetThreadUILanguage"
},
{
"file": "0000a657",
"location": "process=(2852:3052),thread=2804",
"feature": "string: nope",
"expected": false
},
{
"file": "0000a657",
"location": "process=(2852:3052),thread=2804,call=56",
"feature": "api: NtQueryValueKey"
},
{
"file": "0000a657",
"location": "process=(2852:3052),thread=2804,call=1958",
"feature": "api: nope",
"expected": false
},
{
"file": "0000a657",
"location": "file",
"feature": "count(string(T_Ba?.BcRJa)): 1"
},
{
"file": "0000a657",
"location": "file",
"feature": "count(string(GetNamedPipeClientSessionId)): 1"
},
{
"file": "0000a657",
"location": "file",
"feature": "count(string(nope)): 0"
},
{
"file": "0000a657",
"location": "file",
"feature": "count(section(.rdata)): 1"
},
{
"file": "0000a657",
"location": "file",
"feature": "count(section(.nope)): 0"
},
{
"file": "0000a657",
"location": "file",
"feature": "count(import(NdrSimpleTypeUnmarshall)): 1"
},
{
"file": "0000a657",
"location": "file",
"feature": "count(import(Nope)): 0"
},
{
"file": "0000a657",
"location": "file",
"feature": "count(export(Nope)): 0"
},
{
"file": "0000a657",
"location": "process=(1180:3052)",
"feature": "count(string(C:\\Users\\comp\\AppData\\Roaming\\Microsoft\\Jxoqwnx\\jxoqwn.exe)): 2"
},
{
"file": "0000a657",
"location": "process=(1180:3052)",
"feature": "count(string(nope)): 0"
},
{
"file": "0000a657",
"location": "process=(2852:3052),thread=2804",
"feature": "count(api(NtQueryValueKey)): 7"
},
{
"file": "0000a657",
"location": "process=(2852:3052),thread=2804",
"feature": "count(api(GetActiveWindow)): 0"
},
{
"file": "0000a657",
"location": "process=(2852:3052),thread=2804",
"feature": "count(number(0xEC)): 1"
},
{
"file": "0000a657",
"location": "process=(2852:3052),thread=2804",
"feature": "count(number(110173)): 0"
},
{
"file": "0000a657",
"location": "process=(2852:3052),thread=2804",
"feature": "count(string(SetThreadUILanguage)): 1"
},
{
"file": "0000a657",
"location": "process=(2852:3052),thread=2804",
"feature": "count(string(nope)): 0"
},
{
"file": "0000a657",
"location": "process=(2852:3052),thread=2804,call=56",
"feature": "count(api(NtQueryValueKey)): 1"
},
{
"file": "0000a657",
"location": "process=(2852:3052),thread=2804,call=1958",
"feature": "count(api(nope)): 0"
}
]
}
+129
View File
@@ -0,0 +1,129 @@
{
"files": [
{
"key": "93b2d1-drakvuf",
"path": "data/dynamic/drakvuf/93b2d1840566f45fab674ebc79a9d19c88993bcb645e0357f3cb584d16e7c795.log.gz",
"tags": [
"dynamic",
"drakvuf"
]
}
],
"features": [
{
"file": "93b2d1-drakvuf",
"location": "file",
"feature": "string: \\Program Files\\WindowsApps\\does_not_exist",
"expected": false
},
{
"file": "93b2d1-drakvuf",
"location": "file",
"feature": "import: SetUnhandledExceptionFilter"
},
{
"file": "93b2d1-drakvuf",
"location": "process=(3564:4852),thread=6592",
"feature": "api: LdrLoadDll"
},
{
"file": "93b2d1-drakvuf",
"location": "process=(3564:4852),thread=6592",
"feature": "api: DoesNotExist",
"expected": false
},
{
"file": "93b2d1-drakvuf",
"location": "process=(3564:4852),thread=4716,call=17",
"feature": "api: CreateWindowExW"
},
{
"file": "93b2d1-drakvuf",
"location": "process=(3564:4852),thread=4716,call=17",
"feature": "api: CreateWindowEx"
},
{
"file": "93b2d1-drakvuf",
"location": "process=(3564:4852),thread=6592,call=1",
"feature": "api: LdrLoadDll"
},
{
"file": "93b2d1-drakvuf",
"location": "process=(3564:4852),thread=6592,call=1",
"feature": "api: DoesNotExist",
"expected": false
},
{
"file": "93b2d1-drakvuf",
"location": "process=(3564:4852),thread=6592,call=1",
"feature": "string: 0x667e2beb40:\"api-ms-win-core-fibers-l1-1-1\""
},
{
"file": "93b2d1-drakvuf",
"location": "process=(3564:4852),thread=6592,call=1",
"feature": "string: non_existant",
"expected": false
},
{
"file": "93b2d1-drakvuf",
"location": "process=(3564:4852),thread=6592,call=1",
"feature": "number: 0x801"
},
{
"file": "93b2d1-drakvuf",
"location": "process=(3564:4852),thread=6592,call=1",
"feature": "number: 0x10101010101",
"expected": false
},
{
"file": "93b2d1-drakvuf",
"location": "file",
"feature": "count(string(\\Program Files\\WindowsApps\\does_not_exist)): 0"
},
{
"file": "93b2d1-drakvuf",
"location": "file",
"feature": "count(import(SetUnhandledExceptionFilter)): 1"
},
{
"file": "93b2d1-drakvuf",
"location": "process=(3564:4852),thread=6592",
"feature": "count(api(LdrLoadDll)): 9"
},
{
"file": "93b2d1-drakvuf",
"location": "process=(3564:4852),thread=6592",
"feature": "count(api(DoesNotExist)): 0"
},
{
"file": "93b2d1-drakvuf",
"location": "process=(3564:4852),thread=6592,call=1",
"feature": "count(api(LdrLoadDll)): 1"
},
{
"file": "93b2d1-drakvuf",
"location": "process=(3564:4852),thread=6592,call=1",
"feature": "count(api(DoesNotExist)): 0"
},
{
"file": "93b2d1-drakvuf",
"location": "process=(3564:4852),thread=6592,call=1",
"feature": "count(string(0x667e2beb40:\"api-ms-win-core-fibers-l1-1-1\")): 1"
},
{
"file": "93b2d1-drakvuf",
"location": "process=(3564:4852),thread=6592,call=1",
"feature": "count(string(non_existant)): 0"
},
{
"file": "93b2d1-drakvuf",
"location": "process=(3564:4852),thread=6592,call=1",
"feature": "count(number(0x801)): 1"
},
{
"file": "93b2d1-drakvuf",
"location": "process=(3564:4852),thread=6592,call=1",
"feature": "count(number(0x10101010101)): 0"
}
]
}
File diff suppressed because it is too large Load Diff
+233
View File
@@ -0,0 +1,233 @@
{
"files": [
{
"key": "93b2d1-vmray",
"path": "data/dynamic/vmray/93b2d1840566f45fab674ebc79a9d19c88993bcb645e0357f3cb584d16e7c795_min_archive.zip",
"tags": [
"dynamic",
"vmray"
]
},
{
"key": "2f8a79-vmray",
"path": "data/dynamic/vmray/2f8a79b12a7a989ac7e5f6ec65050036588a92e65aeb6841e08dc228ff0e21b4_min_archive.zip",
"tags": [
"dynamic",
"vmray"
]
},
{
"key": "eb1287-vmray",
"path": "data/dynamic/vmray/eb12873c0ce3e9ea109c2a447956cbd10ca2c3e86936e526b2c6e28764999f21_min_archive.zip",
"tags": [
"dynamic",
"vmray"
]
}
],
"features": [
{
"file": "93b2d1-vmray",
"location": "file",
"feature": "string: api.%x%x.%s"
},
{
"file": "93b2d1-vmray",
"location": "file",
"feature": "string: \\Program Files\\WindowsApps\\does_not_exist",
"expected": false
},
{
"file": "93b2d1-vmray",
"location": "file",
"feature": "import: GetAddrInfoW"
},
{
"file": "93b2d1-vmray",
"location": "file",
"feature": "import: GetAddrInfo"
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2180",
"feature": "api: LoadLibraryExA"
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2180",
"feature": "api: LoadLibraryEx"
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2420",
"feature": "api: GetAddrInfoW"
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2420",
"feature": "api: GetAddrInfo"
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2420",
"feature": "api: DoesNotExist",
"expected": false
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2420,call=2361",
"feature": "api: GetAddrInfoW"
},
{
"file": "eb1287-vmray",
"location": "process=(4968:0),thread=5992,call=10981",
"feature": "api: CreateMutexW"
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2420,call=10323",
"feature": "string: raw.githubusercontent.com"
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2180,call=267",
"feature": "string: C:\\Users\\WhuOXYsD\\Desktop\\filename.exe",
"comment": "backslashes in paths; see #2428"
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2180,call=267",
"feature": "string: C:\\\\Users\\\\WhuOXYsD\\\\Desktop\\\\filename.exe",
"expected": false
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2204,call=2395",
"feature": "string: Software\\Microsoft\\Windows\\CurrentVersion\\Policies\\System"
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2204,call=2395",
"feature": "string: Software\\\\Microsoft\\\\Windows\\\\CurrentVersion\\\\Policies\\\\System",
"expected": false
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2420,call=2358",
"feature": "number: 0x1000",
"comment": "VirtualAlloc(4096, 4)"
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2420,call=2358",
"feature": "number: 0x4"
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2204,call=2395",
"feature": "number: 0x80000001",
"comment": "RegOpenKeyExW(Software\\Microsoft\\Windows\\CurrentVersion\\Policies\\System, 0, 131078); see #2"
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2204,call=2395",
"feature": "number: 0x0"
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2204,call=2395",
"feature": "number: 0x20006"
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2204,call=2397",
"feature": "number: 0x80000001",
"comment": "RegOpenKeyExW call 2397 (same parameters)"
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2204,call=2397",
"feature": "number: 0x0"
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2204,call=2397",
"feature": "number: 0x20006"
},
{
"file": "93b2d1-vmray",
"location": "file",
"feature": "count(import(GetAddrInfoW)): 1"
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2420",
"feature": "count(api(free)): 1"
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2420",
"feature": "count(api(GetAddrInfoW)): 5"
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2420,call=2345",
"feature": "count(api(free)): 1"
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2420,call=2345",
"feature": "count(api(GetAddrInfoW)): 0"
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2420,call=2361",
"feature": "count(api(GetAddrInfoW)): 1"
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2420,call=10323",
"feature": "count(string(raw.githubusercontent.com)): 1"
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2420,call=10323",
"feature": "count(string(non_existant)): 0"
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2420,call=10315",
"feature": "count(number(0x1000)): 1"
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2420,call=10315",
"feature": "count(number(0x4)): 1"
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2420,call=10315",
"feature": "count(number(0x194)): 0"
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2204,call=2395",
"feature": "count(number(0x80000001)): 1"
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2204,call=2395",
"feature": "count(number(0x0)): 1"
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2204,call=2395",
"feature": "count(number(0x20006)): 1"
},
{
"file": "93b2d1-vmray",
"location": "process=(2176:0),thread=2204,call=2395",
"feature": "count(number(0xf423f)): 0"
}
]
}
+20 -398
View File
@@ -14,407 +14,21 @@
from typing import cast
import pytest
import fixtures
import pytest
import capa.features.file
import capa.features.insn
import capa.features.common
from capa.features.common import (
OS,
OS_LINUX,
ARCH_I386,
FORMAT_PE,
ARCH_AMD64,
FORMAT_ELF,
OS_ANDROID,
OS_WINDOWS,
ARCH_AARCH64,
Arch,
Format,
)
FEATURE_PRESENCE_TESTS_BE2_ELF_AARCH64 = sorted(
[
# file/string
(
"687e79.ghidra.be2",
"file",
capa.features.common.String("AppDataService start"),
True,
),
("687e79.ghidra.be2", "file", capa.features.common.String("nope"), False),
# file/sections
("687e79.ghidra.be2", "file", capa.features.file.Section(".text"), True),
("687e79.ghidra.be2", "file", capa.features.file.Section(".nope"), False),
# file/exports
(
"687e79.ghidra.be2",
"file",
capa.features.file.Export("android::clearDir"),
"xfail: name demangling is not implemented",
),
("687e79.ghidra.be2", "file", capa.features.file.Export("nope"), False),
# file/imports
("687e79.ghidra.be2", "file", capa.features.file.Import("fopen"), True),
("687e79.ghidra.be2", "file", capa.features.file.Import("exit"), True),
(
"687e79.ghidra.be2",
"file",
capa.features.file.Import("_ZN7android10IInterfaceD0Ev"),
True,
),
("687e79.ghidra.be2", "file", capa.features.file.Import("nope"), False),
# function/characteristic(loop)
(
"687e79.ghidra.be2",
"function=0x1056c0",
capa.features.common.Characteristic("loop"),
True,
),
(
"687e79.ghidra.be2",
"function=0x1075c0",
capa.features.common.Characteristic("loop"),
False,
),
# bb/characteristic(tight loop)
(
"d1e650.ghidra.be2",
"function=0x114af4",
capa.features.common.Characteristic("tight loop"),
True,
),
(
"d1e650.ghidra.be2",
"function=0x118F1C",
capa.features.common.Characteristic("tight loop"),
True,
),
(
"d1e650.ghidra.be2",
"function=0x11464c",
capa.features.common.Characteristic("tight loop"),
False,
),
# bb/characteristic(stack string)
(
"687e79.ghidra.be2",
"function=0x0",
capa.features.common.Characteristic("stack string"),
"xfail: not implemented yet",
),
(
"687e79.ghidra.be2",
"function=0x0",
capa.features.common.Characteristic("stack string"),
"xfail: not implemented yet",
),
# insn/mnemonic
("687e79.ghidra.be2", "function=0x107588", capa.features.insn.Mnemonic("stp"), True),
("687e79.ghidra.be2", "function=0x107588", capa.features.insn.Mnemonic("adrp"), True),
("687e79.ghidra.be2", "function=0x107588", capa.features.insn.Mnemonic("bl"), True),
("687e79.ghidra.be2", "function=0x107588", capa.features.insn.Mnemonic("in"), False),
("687e79.ghidra.be2", "function=0x107588", capa.features.insn.Mnemonic("adrl"), False),
# insn/number
# 00114524 add x29,sp,#0x10
(
"d1e650.ghidra.be2",
"function=0x11451c",
capa.features.insn.Number(0x10),
False,
),
# 00105128 sub sp,sp,#0xE0
(
"687e79.ghidra.be2",
"function=0x105128",
capa.features.insn.Number(0xE0),
False,
),
# insn/operand.number
(
"687e79.ghidra.be2",
"function=0x105128,bb=0x1051e4",
capa.features.insn.OperandNumber(1, 0xFFFFFFFF),
True,
),
(
"687e79.ghidra.be2",
"function=0x107588,bb=0x107588",
capa.features.insn.OperandNumber(1, 0x8),
True,
),
(
"687e79.ghidra.be2",
"function=0x107588,bb=0x107588,insn=0x1075a4",
capa.features.insn.OperandNumber(1, 0x8),
True,
),
# insn/operand.offset
(
"687e79.ghidra.be2",
"function=0x105128,bb=0x105450",
capa.features.insn.OperandOffset(2, 0x10),
True,
),
(
"d1e650.ghidra.be2",
"function=0x124854,bb=0x1248AC,insn=0x1248B4",
capa.features.insn.OperandOffset(2, -0x48),
True,
),
(
"d1e650.ghidra.be2",
"function=0x13347c,bb=0x133548,insn=0x133554",
capa.features.insn.OperandOffset(2, 0x20),
False,
),
("687e79.ghidra.be2", "function=0x105C88", capa.features.insn.Number(0xF000), True),
# insn/number: negative
(
"687e79.ghidra.be2",
"function=0x1057f8,bb=0x1057f8",
capa.features.insn.Number(0xFFFFFFFFFFFFFFFF),
True,
),
(
"687e79.ghidra.be2",
"function=0x1057f8,bb=0x1057f8",
capa.features.insn.Number(0xFFFFFFFFFFFFFFFF),
True,
),
(
"687e79.ghidra.be2",
"function=0x1066e0,bb=0x1068c4",
capa.features.insn.Number(0xFFFFFFFF),
True,
),
# insn/offset
(
"687e79.ghidra.be2",
"function=0x105128,bb=0x105450",
capa.features.insn.Offset(0x10),
True,
),
# ldp x29,x30,[sp, #0x20]
(
"d1e650.ghidra.be2",
"function=0x13347c,bb=0x133548,insn=0x133554",
capa.features.insn.Offset(0x20),
False,
),
# stp x20,x0,[x19, #0x8]
(
"d1e650.ghidra.be2",
"function=0x1183e0,bb=0x11849c,insn=0x1184b0",
capa.features.insn.Offset(0x8),
True,
),
# str xzr,[x8, #0x8]!
(
"d1e650.ghidra.be2",
"function=0x138688,bb=0x138994,insn=0x1389a8",
capa.features.insn.Offset(0x8),
True,
),
# ldr x9,[x8, #0x8]!
(
"d1e650.ghidra.be2",
"function=0x138688,bb=0x138978,insn=0x138984",
capa.features.insn.Offset(0x8),
True,
),
# ldr x19,[sp], #0x20
(
"d1e650.ghidra.be2",
"function=0x11451c",
capa.features.insn.Offset(0x20),
False,
),
# ldrb w9,[x8, #0x1]
(
"d1e650.ghidra.be2",
"function=0x138a9c,bb=0x138b00,insn=0x138b00",
capa.features.insn.Offset(0x1),
True,
),
# insn/offset: negative
(
"d1e650.ghidra.be2",
"function=0x124854,bb=0x1248AC,insn=0x1248B4",
capa.features.insn.Offset(-0x48),
True,
),
# insn/offset from mnemonic: add
# 0010514c add x23,param_1,#0x8
(
"687e79.ghidra.be2",
"function=0x105128,bb=0x105128,insn=0x10514c",
capa.features.insn.Offset(0x8),
True,
),
# insn/api
# not extracting dll name
("687e79.ghidra.be2", "function=0x105c88", capa.features.insn.API("memset"), True),
("687e79.ghidra.be2", "function=0x105c88", capa.features.insn.API("Nope"), False),
# insn/string
(
"687e79.ghidra.be2",
"function=0x107588",
capa.features.common.String("AppDataService start"),
True,
),
(
"687e79.ghidra.be2",
"function=0x1075c0",
capa.features.common.String("AppDataService"),
True,
),
("687e79.ghidra.be2", "function=0x107588", capa.features.common.String("nope"), False),
(
"687e79.ghidra.be2",
"function=0x106d58",
capa.features.common.String("/data/misc/wifi/wpa_supplicant.conf"),
True,
),
# insn/regex
(
"687e79.ghidra.be2",
"function=0x105c88",
capa.features.common.Regex("innerRename"),
True,
),
(
"687e79.ghidra.be2",
"function=0x106d58",
capa.features.common.Regex("/data/misc"),
True,
),
(
"687e79.ghidra.be2",
"function=0x106d58",
capa.features.common.Substring("/data/misc"),
True,
),
# insn/bytes
(
"d1e650.ghidra.be2",
"function=0x1165a4",
capa.features.common.Bytes(bytes.fromhex("E405B89370BA6B419CD7925275BF6FCC1E8360CC")),
True,
),
# # don't extract byte features for obvious strings
(
"687e79.ghidra.be2",
"function=0x1057f8",
capa.features.common.Bytes("/system/xbin/busybox".encode("utf-16le")),
False,
),
# insn/characteristic(nzxor)
(
"d1e650.ghidra.be2",
"function=0x114af4",
capa.features.common.Characteristic("nzxor"),
True,
),
(
"d1e650.ghidra.be2",
"function=0x117988",
capa.features.common.Characteristic("nzxor"),
True,
),
# # insn/characteristic(cross section flow)
# ("a1982...", "function=0x4014D0", capa.features.common.Characteristic("cross section flow"), True),
# # insn/characteristic(cross section flow): imports don't count
# ("mimikatz", "function=0x4556E5", capa.features.common.Characteristic("cross section flow"), False),
# insn/characteristic(recursive call)
(
"687e79.ghidra.be2",
"function=0x105b38",
capa.features.common.Characteristic("recursive call"),
True,
),
(
"687e79.ghidra.be2",
"function=0x106530",
capa.features.common.Characteristic("recursive call"),
True,
),
# insn/characteristic(indirect call)
("d1e650.ghidra.be2", "function=0x118620", capa.features.common.Characteristic("indirect call"), True),
(
"d1e650.ghidra.be2",
"function=0x118500",
capa.features.common.Characteristic("indirect call"),
False,
),
("d1e650.ghidra.be2", "function=0x118620", capa.features.common.Characteristic("indirect call"), True),
(
"d1e650.ghidra.be2",
"function=0x11451c",
capa.features.common.Characteristic("indirect call"),
True,
),
# insn/characteristic(calls from)
(
"687e79.ghidra.be2",
"function=0x105080",
capa.features.common.Characteristic("calls from"),
True,
),
(
"687e79.ghidra.be2",
"function=0x1070e8",
capa.features.common.Characteristic("calls from"),
False,
),
# function/characteristic(calls to)
(
"687e79.ghidra.be2",
"function=0x1075c0",
capa.features.common.Characteristic("calls to"),
True,
),
# file/function-name
(
"687e79.ghidra.be2",
"file",
capa.features.file.FunctionName("__libc_init"),
"xfail: TODO should this be a function-name?",
),
# os & format & arch
("687e79.ghidra.be2", "file", OS(OS_ANDROID), True),
("687e79.ghidra.be2", "file", OS(OS_LINUX), False),
("687e79.ghidra.be2", "file", OS(OS_WINDOWS), False),
# os & format & arch are also global features
("687e79.ghidra.be2", "function=0x107588", OS(OS_ANDROID), True),
("687e79.ghidra.be2", "function=0x1075c0,bb=0x1076c0", OS(OS_ANDROID), True),
("687e79.ghidra.be2", "file", Arch(ARCH_I386), False),
("687e79.ghidra.be2", "file", Arch(ARCH_AMD64), False),
("687e79.ghidra.be2", "file", Arch(ARCH_AARCH64), True),
("687e79.ghidra.be2", "function=0x107588", Arch(ARCH_AARCH64), True),
("687e79.ghidra.be2", "function=0x1075c0,bb=0x1076c0", Arch(ARCH_AARCH64), True),
("687e79.ghidra.be2", "file", Format(FORMAT_ELF), True),
("687e79.ghidra.be2", "file", Format(FORMAT_PE), False),
("687e79.ghidra.be2", "function=0x107588", Format(FORMAT_ELF), True),
("687e79.ghidra.be2", "function=0x107588", Format(FORMAT_PE), False),
],
# order tests by (file, item)
# so that our LRU cache is most effective.
key=lambda t: (t[0], t[1]),
BACKEND = fixtures.BackendFeaturePolicy(
name="binexport",
get_extractor=fixtures.get_binexport_extractor,
include_tags={"binexport"},
)
@fixtures.parametrize(
"sample,scope,feature,expected",
FEATURE_PRESENCE_TESTS_BE2_ELF_AARCH64,
indirect=["sample", "scope"],
)
def test_binexport_features_elf_aarch64(sample, scope, feature, expected):
if not isinstance(expected, bool):
# (for now) xfails indicates using string like: "xfail: not implemented yet"
pytest.xfail(expected)
fixtures.do_test_feature_presence(fixtures.get_binexport_extractor, sample, scope, feature, expected)
@fixtures.parametrize_backend_feature_fixtures(BACKEND)
def test_binexport_features_elf_aarch64(feature_fixture):
fixtures.run_feature_fixture(BACKEND, feature_fixture)
@fixtures.parametrize(
@@ -426,12 +40,16 @@ def test_binexport_features_pe_x86(sample, scope, feature, expected):
if "mimikatz.exe_" not in sample.name:
pytest.skip("for now only testing mimikatz.exe_ Ghidra BinExport file")
if isinstance(feature, capa.features.common.Characteristic) and "stack string" in cast(str, feature.value):
if isinstance(
feature, capa.features.common.Characteristic
) and "stack string" in cast(str, feature.value):
pytest.skip("for now only testing basic features")
sample = sample.parent / "binexport2" / (sample.name + ".ghidra.BinExport")
assert sample.exists()
fixtures.do_test_feature_presence(fixtures.get_binexport_extractor, sample, scope, feature, expected)
fixtures.do_test_feature_presence(
fixtures.get_binexport_extractor, sample, scope, feature, expected
)
@fixtures.parametrize(
@@ -444,7 +62,9 @@ def test_binexport_feature_counts_ghidra(sample, scope, feature, expected):
pytest.skip("for now only testing mimikatz.exe_ Ghidra BinExport file")
sample = sample.parent / "binexport2" / (sample.name + ".ghidra.BinExport")
assert sample.exists()
fixtures.do_test_feature_count(fixtures.get_binexport_extractor, sample, scope, feature, expected)
fixtures.do_test_feature_count(
fixtures.get_binexport_extractor, sample, scope, feature, expected
)
@fixtures.parametrize(
@@ -455,4 +75,6 @@ def test_binexport_feature_counts_ghidra(sample, scope, feature, expected):
def test_binexport_feature_counts_intel(sample, scope, feature, expected):
sample = sample.parent / "binexport2" / (sample.name + ".ghidra.BinExport")
assert sample.exists()
fixtures.do_test_feature_count(fixtures.get_binexport_extractor, sample, scope, feature, expected)
fixtures.do_test_feature_count(
fixtures.get_binexport_extractor, sample, scope, feature, expected
)
+29 -19
View File
@@ -15,9 +15,10 @@
import logging
from pathlib import Path
import pytest
import fixtures
import pytest
import capa.features.common
import capa.main
logger = logging.getLogger(__file__)
@@ -31,41 +32,50 @@ try:
try:
binaryninja.load(source=b"\x90")
except RuntimeError:
logger.warning("Binary Ninja license is not valid, provide via $BN_LICENSE or license.dat")
logger.warning(
"Binary Ninja license is not valid, provide via $BN_LICENSE or license.dat"
)
else:
binja_present = True
except ImportError:
pass
@pytest.mark.skipif(binja_present is False, reason="Skip binja tests if the binaryninja Python API is not installed")
@fixtures.parametrize(
"sample,scope,feature,expected",
fixtures.FEATURE_PRESENCE_TESTS + fixtures.FEATURE_SYMTAB_FUNC_TESTS + fixtures.FEATURE_BINJA_DATABASE_TESTS,
indirect=["sample", "scope"],
BACKEND = fixtures.BackendFeaturePolicy(
name="binja",
# binja also loads .bndb database files natively, so include `binja-db`
# alongside the regular static-binary fixtures.
get_extractor=fixtures.get_binja_extractor,
include_tags={"static", "binja-db"},
exclude_tags={"dotnet", "ghidra"},
)
def test_binja_features(sample, scope, feature, expected):
fixtures.do_test_feature_presence(fixtures.get_binja_extractor, sample, scope, feature, expected)
@pytest.mark.skipif(binja_present is False, reason="Skip binja tests if the binaryninja Python API is not installed")
@fixtures.parametrize(
"sample,scope,feature,expected",
fixtures.FEATURE_COUNT_TESTS,
indirect=["sample", "scope"],
@pytest.mark.skipif(
binja_present is False,
reason="Skip binja tests if the binaryninja Python API is not installed",
)
def test_binja_feature_counts(sample, scope, feature, expected):
fixtures.do_test_feature_count(fixtures.get_binja_extractor, sample, scope, feature, expected)
@fixtures.parametrize_backend_feature_fixtures(BACKEND)
def test_binja_features(feature_fixture):
fixtures.run_feature_fixture(BACKEND, feature_fixture)
@pytest.mark.skipif(binja_present is False, reason="Skip binja tests if the binaryninja Python API is not installed")
@pytest.mark.skipif(
binja_present is False,
reason="Skip binja tests if the binaryninja Python API is not installed",
)
def test_standalone_binja_backend():
CD = Path(__file__).resolve().parent
test_path = CD / ".." / "tests" / "data" / "Practical Malware Analysis Lab 01-01.exe_"
test_path = (
CD / ".." / "tests" / "data" / "Practical Malware Analysis Lab 01-01.exe_"
)
assert capa.main.main([str(test_path), "-b", capa.main.BACKEND_BINJA]) == 0
@pytest.mark.skipif(binja_present is False, reason="Skip binja tests if the binaryninja Python API is not installed")
@pytest.mark.skipif(
binja_present is False,
reason="Skip binja tests if the binaryninja Python API is not installed",
)
def test_binja_version():
version = binaryninja.core_version_info() # type: ignore[possibly-undefined] # guarded by skipif binja_present
assert (version.major, version.minor) >= (5, 3)
+7 -100
View File
@@ -15,106 +15,13 @@
import fixtures
import capa.features.file
import capa.features.insn
import capa.features.common
DYNAMIC_CAPE_FEATURE_PRESENCE_TESTS = sorted(
[
# file/string
("0000a657", "file", capa.features.common.String("T_Ba?.BcRJa"), True),
("0000a657", "file", capa.features.common.String("GetNamedPipeClientSessionId"), True),
("0000a657", "file", capa.features.common.String("nope"), False),
# file/sections
("0000a657", "file", capa.features.file.Section(".rdata"), True),
("0000a657", "file", capa.features.file.Section(".nope"), False),
# file/imports
("0000a657", "file", capa.features.file.Import("NdrSimpleTypeUnmarshall"), True),
("0000a657", "file", capa.features.file.Import("Nope"), False),
# file/exports
("0000a657", "file", capa.features.file.Export("Nope"), False),
# process/environment variables
(
"0000a657",
"process=(1180:3052)",
capa.features.common.String("C:\\Users\\comp\\AppData\\Roaming\\Microsoft\\Jxoqwnx\\jxoqwn.exe"),
True,
),
("0000a657", "process=(1180:3052)", capa.features.common.String("nope"), False),
# thread/api calls
("0000a657", "process=(2900:2852),thread=2904", capa.features.insn.API("RegQueryValueExA"), True),
("0000a657", "process=(2900:2852),thread=2904", capa.features.insn.API("RegQueryValueEx"), True),
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("NtQueryValueKey"), True),
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("GetActiveWindow"), False),
# thread/number call argument
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.Number(0x000000EC), True),
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.Number(110173), False),
# thread/string call argument
("0000a657", "process=(2852:3052),thread=2804", capa.features.common.String("SetThreadUILanguage"), True),
("0000a657", "process=(2852:3052),thread=2804", capa.features.common.String("nope"), False),
("0000a657", "process=(2852:3052),thread=2804,call=56", capa.features.insn.API("NtQueryValueKey"), True),
("0000a657", "process=(2852:3052),thread=2804,call=1958", capa.features.insn.API("nope"), False),
],
# order tests by (file, item)
# so that our LRU cache is most effective.
key=lambda t: (t[0], t[1]),
)
DYNAMIC_CAPE_FEATURE_COUNT_TESTS = sorted(
# TODO(yelhamer): use the same sample for testing CAPE and DRAKVUF extractors
# https://github.com/mandiant/capa/issues/2180
[
# file/string
("0000a657", "file", capa.features.common.String("T_Ba?.BcRJa"), 1),
("0000a657", "file", capa.features.common.String("GetNamedPipeClientSessionId"), 1),
("0000a657", "file", capa.features.common.String("nope"), 0),
# file/sections
("0000a657", "file", capa.features.file.Section(".rdata"), 1),
("0000a657", "file", capa.features.file.Section(".nope"), 0),
# file/imports
("0000a657", "file", capa.features.file.Import("NdrSimpleTypeUnmarshall"), 1),
("0000a657", "file", capa.features.file.Import("Nope"), 0),
# file/exports
("0000a657", "file", capa.features.file.Export("Nope"), 0),
# process/environment variables
(
"0000a657",
"process=(1180:3052)",
capa.features.common.String("C:\\Users\\comp\\AppData\\Roaming\\Microsoft\\Jxoqwnx\\jxoqwn.exe"),
2,
),
("0000a657", "process=(1180:3052)", capa.features.common.String("nope"), 0),
# thread/api calls
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("NtQueryValueKey"), 7),
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("GetActiveWindow"), 0),
# thread/number call argument
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.Number(0x000000EC), 1),
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.Number(110173), 0),
# thread/string call argument
("0000a657", "process=(2852:3052),thread=2804", capa.features.common.String("SetThreadUILanguage"), 1),
("0000a657", "process=(2852:3052),thread=2804", capa.features.common.String("nope"), 0),
("0000a657", "process=(2852:3052),thread=2804,call=56", capa.features.insn.API("NtQueryValueKey"), 1),
("0000a657", "process=(2852:3052),thread=2804,call=1958", capa.features.insn.API("nope"), 0),
],
# order tests by (file, item)
# so that our LRU cache is most effective.
key=lambda t: (t[0], t[1]),
BACKEND = fixtures.BackendFeaturePolicy(
name="cape",
get_extractor=fixtures.get_cape_extractor,
include_tags={"cape"},
)
@fixtures.parametrize(
"sample,scope,feature,expected",
DYNAMIC_CAPE_FEATURE_PRESENCE_TESTS,
indirect=["sample", "scope"],
)
def test_cape_features(sample, scope, feature, expected):
fixtures.do_test_feature_presence(fixtures.get_cape_extractor, sample, scope, feature, expected)
@fixtures.parametrize(
"sample,scope,feature,expected",
DYNAMIC_CAPE_FEATURE_COUNT_TESTS,
indirect=["sample", "scope"],
)
def test_cape_feature_counts(sample, scope, feature, expected):
fixtures.do_test_feature_count(fixtures.get_cape_extractor, sample, scope, feature, expected)
@fixtures.parametrize_backend_feature_fixtures(BACKEND)
def test_cape_features(feature_fixture):
fixtures.run_feature_fixture(BACKEND, feature_fixture)
+7 -14
View File
@@ -31,23 +31,16 @@ CD = Path(__file__).resolve().parent
DOTNET_DIR = Path(__file__).resolve().parent / "data" / "dotnet"
@fixtures.parametrize(
"sample,scope,feature,expected",
fixtures.FEATURE_PRESENCE_TESTS_DOTNET,
indirect=["sample", "scope"],
BACKEND = fixtures.BackendFeaturePolicy(
name="dnfile",
get_extractor=fixtures.get_dnfile_extractor,
include_tags={"dotnet"},
)
def test_dnfile_features(sample, scope, feature, expected):
fixtures.do_test_feature_presence(fixtures.get_dnfile_extractor, sample, scope, feature, expected)
@fixtures.parametrize(
"sample,scope,feature,expected",
fixtures.FEATURE_COUNT_TESTS_DOTNET,
indirect=["sample", "scope"],
)
def test_dnfile_feature_counts(sample, scope, feature, expected):
fixtures.do_test_feature_count(fixtures.get_dnfile_extractor, sample, scope, feature, expected)
@fixtures.parametrize_backend_feature_fixtures(BACKEND)
def test_dnfile_features(feature_fixture):
fixtures.run_feature_fixture(BACKEND, feature_fixture)
def test_get_dotnet_table_row_first_row():
+15 -14
View File
@@ -12,25 +12,26 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import pytest
import fixtures
import capa.features.file
@fixtures.parametrize(
"sample,scope,feature,expected",
fixtures.FEATURE_PRESENCE_TESTS_DOTNET,
indirect=["sample", "scope"],
BACKEND = fixtures.BackendFeaturePolicy(
name="dotnetfile",
get_extractor=fixtures.get_dotnetfile_extractor,
include_tags={"dotnet"},
exclude_tags={
# dotnetfile is a file-scope extractor; drop non-file scopes
"function",
"basic-block",
"instruction",
# and drop feature types dotnetfile doesn't produce
"function-name",
},
)
def test_dotnetfile_features(sample, scope, feature, expected):
if scope.__name__ != "file":
pytest.xfail("dotnetfile only extracts file scope features")
if isinstance(feature, capa.features.file.FunctionName):
pytest.xfail("dotnetfile doesn't extract function names")
fixtures.do_test_feature_presence(fixtures.get_dotnetfile_extractor, sample, scope, feature, expected)
@fixtures.parametrize_backend_feature_fixtures(BACKEND)
def test_dotnetfile_features(feature_fixture):
fixtures.run_feature_fixture(BACKEND, feature_fixture)
@fixtures.parametrize(
+7 -81
View File
@@ -15,87 +15,13 @@
import fixtures
import capa.features.file
import capa.features.insn
import capa.features.common
DYNAMIC_DRAKVUF_FEATURE_PRESENCE_TESTS = sorted(
[
("93b2d1-drakvuf", "file", capa.features.common.String("\\Program Files\\WindowsApps\\does_not_exist"), False),
# file/imports
("93b2d1-drakvuf", "file", capa.features.file.Import("SetUnhandledExceptionFilter"), True),
# thread/api calls
("93b2d1-drakvuf", "process=(3564:4852),thread=6592", capa.features.insn.API("LdrLoadDll"), True),
("93b2d1-drakvuf", "process=(3564:4852),thread=6592", capa.features.insn.API("DoesNotExist"), False),
# call/api
("93b2d1-drakvuf", "process=(3564:4852),thread=4716,call=17", capa.features.insn.API("CreateWindowExW"), True),
("93b2d1-drakvuf", "process=(3564:4852),thread=4716,call=17", capa.features.insn.API("CreateWindowEx"), True),
("93b2d1-drakvuf", "process=(3564:4852),thread=6592,call=1", capa.features.insn.API("LdrLoadDll"), True),
("93b2d1-drakvuf", "process=(3564:4852),thread=6592,call=1", capa.features.insn.API("DoesNotExist"), False),
# call/string argument
(
"93b2d1-drakvuf",
"process=(3564:4852),thread=6592,call=1",
capa.features.common.String('0x667e2beb40:"api-ms-win-core-fibers-l1-1-1"'),
True,
),
(
"93b2d1-drakvuf",
"process=(3564:4852),thread=6592,call=1",
capa.features.common.String("non_existant"),
False,
),
# call/number argument
("93b2d1-drakvuf", "process=(3564:4852),thread=6592,call=1", capa.features.insn.Number(0x801), True),
("93b2d1-drakvuf", "process=(3564:4852),thread=6592,call=1", capa.features.insn.Number(0x010101010101), False),
],
# order tests by (file, item)
# so that our LRU cache is most effective.
key=lambda t: (t[0], t[1]),
)
DYNAMIC_DRAKVUF_FEATURE_COUNT_TESTS = sorted(
[
("93b2d1-drakvuf", "file", capa.features.common.String("\\Program Files\\WindowsApps\\does_not_exist"), False),
# file/imports
("93b2d1-drakvuf", "file", capa.features.file.Import("SetUnhandledExceptionFilter"), 1),
# thread/api calls
("93b2d1-drakvuf", "process=(3564:4852),thread=6592", capa.features.insn.API("LdrLoadDll"), 9),
("93b2d1-drakvuf", "process=(3564:4852),thread=6592", capa.features.insn.API("DoesNotExist"), False),
# call/api
("93b2d1-drakvuf", "process=(3564:4852),thread=6592,call=1", capa.features.insn.API("LdrLoadDll"), 1),
("93b2d1-drakvuf", "process=(3564:4852),thread=6592,call=1", capa.features.insn.API("DoesNotExist"), 0),
# call/string argument
(
"93b2d1-drakvuf",
"process=(3564:4852),thread=6592,call=1",
capa.features.common.String('0x667e2beb40:"api-ms-win-core-fibers-l1-1-1"'),
1,
),
("93b2d1-drakvuf", "process=(3564:4852),thread=6592,call=1", capa.features.common.String("non_existant"), 0),
# call/number argument
("93b2d1-drakvuf", "process=(3564:4852),thread=6592,call=1", capa.features.insn.Number(0x801), 1),
("93b2d1-drakvuf", "process=(3564:4852),thread=6592,call=1", capa.features.insn.Number(0x010101010101), 0),
],
# order tests by (file, item)
# so that our LRU cache is most effective.
key=lambda t: (t[0], t[1]),
BACKEND = fixtures.BackendFeaturePolicy(
name="drakvuf",
get_extractor=fixtures.get_drakvuf_extractor,
include_tags={"drakvuf"},
)
@fixtures.parametrize(
"sample,scope,feature,expected",
DYNAMIC_DRAKVUF_FEATURE_PRESENCE_TESTS,
indirect=["sample", "scope"],
)
def test_drakvuf_features(sample, scope, feature, expected):
fixtures.do_test_feature_presence(fixtures.get_drakvuf_extractor, sample, scope, feature, expected)
@fixtures.parametrize(
"sample,scope,feature,expected",
DYNAMIC_DRAKVUF_FEATURE_COUNT_TESTS,
indirect=["sample", "scope"],
)
def test_drakvuf_feature_counts(sample, scope, feature, expected):
fixtures.do_test_feature_count(fixtures.get_drakvuf_extractor, sample, scope, feature, expected)
@fixtures.parametrize_backend_feature_fixtures(BACKEND)
def test_drakvuf_features(feature_fixture):
fixtures.run_feature_fixture(BACKEND, feature_fixture)
+16 -31
View File
@@ -11,42 +11,27 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import importlib.util
import os
import pytest
import fixtures
import pytest
import capa.features.common
ghidra_present = (
importlib.util.find_spec("pyghidra") is not None
and "GHIDRA_INSTALL_DIR" in os.environ
)
ghidra_present = importlib.util.find_spec("pyghidra") is not None and "GHIDRA_INSTALL_DIR" in os.environ
BACKEND = fixtures.BackendFeaturePolicy(
name="ghidra",
get_extractor=fixtures.get_ghidra_extractor,
include_tags={"static"},
exclude_tags={"dotnet"},
)
@pytest.mark.skipif(ghidra_present is False, reason="PyGhidra not installed")
@fixtures.parametrize(
"sample,scope,feature,expected",
[
(
pytest.param(
*t,
marks=pytest.mark.xfail(
reason="specific to Vivisect and basic blocks do not align with Ghidra's analysis"
),
)
if t[0] == "294b8d..." and t[2] == capa.features.common.String("\r\n\x00:ht")
else t
)
for t in fixtures.FEATURE_PRESENCE_TESTS
],
indirect=["sample", "scope"],
)
def test_ghidra_features(sample, scope, feature, expected):
fixtures.do_test_feature_presence(fixtures.get_ghidra_extractor, sample, scope, feature, expected)
@pytest.mark.skipif(ghidra_present is False, reason="PyGhidra not installed")
@fixtures.parametrize(
"sample,scope,feature,expected", fixtures.FEATURE_COUNT_TESTS_GHIDRA, indirect=["sample", "scope"]
)
def test_ghidra_feature_counts(sample, scope, feature, expected):
fixtures.do_test_feature_count(fixtures.get_ghidra_extractor, sample, scope, feature, expected)
@fixtures.parametrize_backend_feature_fixtures(BACKEND)
def test_ghidra_features(feature_fixture):
fixtures.run_feature_fixture(BACKEND, feature_fixture)
+34 -32
View File
@@ -12,39 +12,56 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from pathlib import Path
import pytest
import fixtures
import pytest
import capa.features.extractors.ida.idalib
from capa.features.common import Characteristic
from capa.features.file import FunctionName
from capa.features.insn import API
from capa.features.common import Characteristic
logger = logging.getLogger(__name__)
idalib_present = capa.features.extractors.ida.idalib.has_idalib()
if idalib_present:
try:
import idapro # noqa: F401 [imported but unused]
import ida_kernwin
import idapro # noqa: F401 [imported but unused]
kernel_version: str = ida_kernwin.get_kernel_version()
except ImportError:
idalib_present = False
kernel_version = "0.0"
else:
kernel_version = "0.0"
@pytest.mark.skipif(idalib_present is False, reason="Skip idalib tests if the idalib Python API is not installed")
@fixtures.parametrize(
"sample,scope,feature,expected",
fixtures.FEATURE_PRESENCE_TESTS + fixtures.FEATURE_SYMTAB_FUNC_TESTS,
indirect=["sample", "scope"],
BACKEND = fixtures.BackendFeaturePolicy(
name="idalib",
get_extractor=fixtures.get_idalib_extractor,
include_tags={"static"},
exclude_tags={"dotnet", "ghidra"},
)
def test_idalib_features(sample: Path, scope, feature, expected):
if kernel_version in {"9.0", "9.1"} and sample.name.startswith("2bf18d"):
if isinstance(feature, (API, FunctionName)) and feature.value == "__libc_connect":
@pytest.mark.skipif(
idalib_present is False,
reason="Skip idalib tests if the idalib Python API is not installed",
)
@fixtures.parametrize_backend_feature_fixtures(BACKEND)
def test_idalib_features(feature_fixture):
# apply runtime-conditional xfails for specific IDA versions.
# version-specific behavior stays in the test body because it
# depends on the installed IDA, not on the fixture data.
sample_name = feature_fixture.sample_path.name
statement = feature_fixture.statement
if kernel_version in {"9.0", "9.1"} and sample_name.startswith("2bf18d"):
if (
isinstance(statement, (API, FunctionName))
and statement.value == "__libc_connect"
):
# see discussion here: https://github.com/mandiant/capa/pull/2742#issuecomment-3674146335
#
# > i confirmed that there were changes in 9.2 related to the ELF loader handling names,
@@ -52,35 +69,20 @@ def test_idalib_features(sample: Path, scope, feature, expected):
# > prevented this name from surfacing.
pytest.xfail(f"IDA {kernel_version} does not extract all ELF symbols")
if kernel_version in {"9.0"} and sample.name.startswith("Practical Malware Analysis Lab 12-04.exe_"):
if isinstance(feature, Characteristic) and feature.value == "embedded pe":
if kernel_version in {"9.0"} and sample_name.startswith(
"Practical Malware Analysis Lab 12-04.exe_"
):
if isinstance(statement, Characteristic) and statement.value == "embedded pe":
# see discussion here: https://github.com/mandiant/capa/pull/2742#issuecomment-3667086165
#
# idalib for IDA 9.0 doesn't support argv arguments, so we can't ask that resources are loaded
pytest.xfail("idalib 9.0 does not support loading resource segments")
try:
fixtures.do_test_feature_presence(fixtures.get_idalib_extractor, sample, scope, feature, expected)
fixtures.run_feature_fixture(BACKEND, feature_fixture)
finally:
logger.debug("closing database...")
import idapro
idapro.close_database(save=False)
logger.debug("closed database.")
@pytest.mark.skipif(idalib_present is False, reason="Skip idalib tests if the idalib Python API is not installed")
@fixtures.parametrize(
"sample,scope,feature,expected",
fixtures.FEATURE_COUNT_TESTS,
indirect=["sample", "scope"],
)
def test_idalib_feature_counts(sample, scope, feature, expected):
try:
fixtures.do_test_feature_count(fixtures.get_idalib_extractor, sample, scope, feature, expected)
finally:
logger.debug("closing database...")
import idapro
idapro.close_database(save=False)
logger.debug("closed database.")
+17 -16
View File
@@ -12,24 +12,25 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import pytest
import fixtures
import capa.features.file
@fixtures.parametrize(
"sample,scope,feature,expected",
fixtures.FEATURE_PRESENCE_TESTS,
indirect=["sample", "scope"],
BACKEND = fixtures.BackendFeaturePolicy(
name="pefile",
get_extractor=fixtures.get_pefile_extractor,
include_tags={"static"},
exclude_tags={
"dotnet",
"elf",
# pefile is a file-scope extractor; drop non-file scopes
"function",
"basic-block",
"instruction",
# and drop feature types pefile doesn't produce
"function-name",
},
)
def test_pefile_features(sample, scope, feature, expected):
if scope.__name__ != "file":
pytest.xfail("pefile only extracts file scope features")
if isinstance(feature, capa.features.file.FunctionName):
pytest.xfail("pefile doesn't extract function names")
if ".elf" in sample.name:
pytest.xfail("pefile doesn't handle ELF files")
fixtures.do_test_feature_presence(fixtures.get_pefile_extractor, sample, scope, feature, expected)
@fixtures.parametrize_backend_feature_fixtures(BACKEND)
def test_pefile_features(feature_fixture):
fixtures.run_feature_fixture(BACKEND, feature_fixture)
+8 -14
View File
@@ -14,20 +14,14 @@
import fixtures
@fixtures.parametrize(
"sample,scope,feature,expected",
fixtures.FEATURE_PRESENCE_TESTS + fixtures.FEATURE_SYMTAB_FUNC_TESTS,
indirect=["sample", "scope"],
BACKEND = fixtures.BackendFeaturePolicy(
name="viv",
get_extractor=fixtures.get_viv_extractor,
include_tags={"static"},
exclude_tags={"dotnet", "ghidra"},
)
def test_viv_features(sample, scope, feature, expected):
fixtures.do_test_feature_presence(fixtures.get_viv_extractor, sample, scope, feature, expected)
@fixtures.parametrize(
"sample,scope,feature,expected",
fixtures.FEATURE_COUNT_TESTS,
indirect=["sample", "scope"],
)
def test_viv_feature_counts(sample, scope, feature, expected):
fixtures.do_test_feature_count(fixtures.get_viv_extractor, sample, scope, feature, expected)
@fixtures.parametrize_backend_feature_fixtures(BACKEND)
def test_viv_features(feature_fixture):
fixtures.run_feature_fixture(BACKEND, feature_fixture)
+7 -119
View File
@@ -15,128 +15,16 @@
import fixtures
import capa.features.file
import capa.features.insn
import capa.features.common
DYNAMIC_VMRAY_FEATURE_PRESENCE_TESTS = sorted(
[
("93b2d1-vmray", "file", capa.features.common.String("api.%x%x.%s"), True),
("93b2d1-vmray", "file", capa.features.common.String("\\Program Files\\WindowsApps\\does_not_exist"), False),
# file/imports
("93b2d1-vmray", "file", capa.features.file.Import("GetAddrInfoW"), True),
("93b2d1-vmray", "file", capa.features.file.Import("GetAddrInfo"), True),
# thread/api calls
("93b2d1-vmray", "process=(2176:0),thread=2180", capa.features.insn.API("LoadLibraryExA"), True),
("93b2d1-vmray", "process=(2176:0),thread=2180", capa.features.insn.API("LoadLibraryEx"), True),
("93b2d1-vmray", "process=(2176:0),thread=2420", capa.features.insn.API("GetAddrInfoW"), True),
("93b2d1-vmray", "process=(2176:0),thread=2420", capa.features.insn.API("GetAddrInfo"), True),
("93b2d1-vmray", "process=(2176:0),thread=2420", capa.features.insn.API("DoesNotExist"), False),
# call/api
("93b2d1-vmray", "process=(2176:0),thread=2420,call=2361", capa.features.insn.API("GetAddrInfoW"), True),
("eb1287-vmray", "process=(4968:0),thread=5992,call=10981", capa.features.insn.API("CreateMutexW"), True),
# call/string argument
(
"93b2d1-vmray",
"process=(2176:0),thread=2420,call=10323",
capa.features.common.String("raw.githubusercontent.com"),
True,
),
# backslashes in paths; see #2428
(
"93b2d1-vmray",
"process=(2176:0),thread=2180,call=267",
capa.features.common.String("C:\\Users\\WhuOXYsD\\Desktop\\filename.exe"),
True,
),
(
"93b2d1-vmray",
"process=(2176:0),thread=2180,call=267",
capa.features.common.String("C:\\\\Users\\\\WhuOXYsD\\\\Desktop\\\\filename.exe"),
False,
),
(
"93b2d1-vmray",
"process=(2176:0),thread=2204,call=2395",
capa.features.common.String("Software\\Microsoft\\Windows\\CurrentVersion\\Policies\\System"),
True,
),
(
"93b2d1-vmray",
"process=(2176:0),thread=2204,call=2395",
capa.features.common.String("Software\\\\Microsoft\\\\Windows\\\\CurrentVersion\\\\Policies\\\\System"),
False,
),
# call/number argument
# VirtualAlloc(4096, 4)
("93b2d1-vmray", "process=(2176:0),thread=2420,call=2358", capa.features.insn.Number(4096), True),
("93b2d1-vmray", "process=(2176:0),thread=2420,call=2358", capa.features.insn.Number(4), True),
# call/number argument - registry API parameters (issue #2)
# RegOpenKeyExW(Software\Microsoft\Windows\CurrentVersion\Policies\System, 0, 131078)
("93b2d1-vmray", "process=(2176:0),thread=2204,call=2395", capa.features.insn.Number(2147483649), True),
("93b2d1-vmray", "process=(2176:0),thread=2204,call=2395", capa.features.insn.Number(0), True),
("93b2d1-vmray", "process=(2176:0),thread=2204,call=2395", capa.features.insn.Number(131078), True),
# RegOpenKeyExW call 2397 (same parameters)
("93b2d1-vmray", "process=(2176:0),thread=2204,call=2397", capa.features.insn.Number(2147483649), True),
("93b2d1-vmray", "process=(2176:0),thread=2204,call=2397", capa.features.insn.Number(0), True),
("93b2d1-vmray", "process=(2176:0),thread=2204,call=2397", capa.features.insn.Number(131078), True),
],
# order tests by (file, item)
# so that our LRU cache is most effective.
key=lambda t: (t[0], t[1]),
)
DYNAMIC_VMRAY_FEATURE_COUNT_TESTS = sorted(
[
# file/imports
("93b2d1-vmray", "file", capa.features.file.Import("GetAddrInfoW"), 1),
# thread/api calls
("93b2d1-vmray", "process=(2176:0),thread=2420", capa.features.insn.API("free"), 1),
("93b2d1-vmray", "process=(2176:0),thread=2420", capa.features.insn.API("GetAddrInfoW"), 5),
# call/api
("93b2d1-vmray", "process=(2176:0),thread=2420,call=2345", capa.features.insn.API("free"), 1),
("93b2d1-vmray", "process=(2176:0),thread=2420,call=2345", capa.features.insn.API("GetAddrInfoW"), 0),
("93b2d1-vmray", "process=(2176:0),thread=2420,call=2361", capa.features.insn.API("GetAddrInfoW"), 1),
# call/string argument
(
"93b2d1-vmray",
"process=(2176:0),thread=2420,call=10323",
capa.features.common.String("raw.githubusercontent.com"),
1,
),
("93b2d1-vmray", "process=(2176:0),thread=2420,call=10323", capa.features.common.String("non_existant"), 0),
# call/number argument
("93b2d1-vmray", "process=(2176:0),thread=2420,call=10315", capa.features.insn.Number(4096), 1),
("93b2d1-vmray", "process=(2176:0),thread=2420,call=10315", capa.features.insn.Number(4), 1),
("93b2d1-vmray", "process=(2176:0),thread=2420,call=10315", capa.features.insn.Number(404), 0),
# call/number argument - registry API parameters (issue #2)
("93b2d1-vmray", "process=(2176:0),thread=2204,call=2395", capa.features.insn.Number(2147483649), 1),
("93b2d1-vmray", "process=(2176:0),thread=2204,call=2395", capa.features.insn.Number(0), 1),
("93b2d1-vmray", "process=(2176:0),thread=2204,call=2395", capa.features.insn.Number(131078), 1),
("93b2d1-vmray", "process=(2176:0),thread=2204,call=2395", capa.features.insn.Number(999999), 0),
],
# order tests by (file, item)
# so that our LRU cache is most effective.
key=lambda t: (t[0], t[1]),
BACKEND = fixtures.BackendFeaturePolicy(
name="vmray",
get_extractor=fixtures.get_vmray_extractor,
include_tags={"vmray"},
)
@fixtures.parametrize(
"sample,scope,feature,expected",
DYNAMIC_VMRAY_FEATURE_PRESENCE_TESTS,
indirect=["sample", "scope"],
)
def test_vmray_features(sample, scope, feature, expected):
fixtures.do_test_feature_presence(fixtures.get_vmray_extractor, sample, scope, feature, expected)
@fixtures.parametrize(
"sample,scope,feature,expected",
DYNAMIC_VMRAY_FEATURE_COUNT_TESTS,
indirect=["sample", "scope"],
)
def test_vmray_feature_counts(sample, scope, feature, expected):
fixtures.do_test_feature_count(fixtures.get_vmray_extractor, sample, scope, feature, expected)
@fixtures.parametrize_backend_feature_fixtures(BACKEND)
def test_vmray_features(feature_fixture):
fixtures.run_feature_fixture(BACKEND, feature_fixture)
def test_vmray_processes():