tests: cleanup tests and fixtures

2026-07-28 14:47:08 -07:00 · 2026-05-11 11:14:28 +02:00
parent 2604c91668
commit eb258c719f
14 changed files with 102 additions and 270 deletions
@@ -25,7 +25,6 @@ import pytest

 import capa.rules
 import capa.engine as ceng
-import capa.loader
 import capa.render.result_document
 from capa.features.common import OS_AUTO, FORMAT_AUTO, Feature
 from capa.features.address import Address
@@ -44,8 +43,7 @@ from capa.features.extractors.dnfile.extractor import DnfileFeatureExtractor
 logger = logging.getLogger(__name__)
 CD = Path(__file__).resolve().parent
 FIXTURE_MANIFEST_DIR = CD / "fixtures" / "features"
-DOTNET_DIR = CD / "data" / "dotnet"
-DNFILE_TESTFILES = DOTNET_DIR / "dnfile-testfiles"
+DNFILE_TESTFILES = CD / "data" / "dotnet" / "dnfile-testfiles"


 def parse_feature_string(s: str) -> Feature | ceng.Range | ceng.Statement:
@@ -102,7 +100,6 @@ KNOWN_FIXTURE_TAGS: set[str] = (
        "dotnet",  # .NET format
        "elf",  # ELF format
        "flirt",  # requires FLIRT signature matching
-        "symtab",  # requires ELF symbol table parsing  TODO: can we remove this?
        "binja-db",  # Binary Ninja database format
        "binexport",  # BinExport2 format
        "aarch64",  # AArch64 architecture
@@ -366,44 +363,6 @@ def run_feature_fixture(
    assert actual == fixture.expected, msg


-@contextlib.contextmanager
-def xfail(condition, reason: str = ""):
-    """
-    context manager that wraps a block that is expected to fail in some cases.
-    when it does fail (and is expected), then mark this as pytest.xfail.
-    if its unexpected, raise an exception, so the test fails.
-
-    example::
-
-        # this test:
-        #  - passes on Linux if foo() works
-        #  - fails  on Linux if foo() fails
-        #  - xfails on Windows if foo() fails
-        #  - fails  on Windows if foo() works
-        with xfail(sys.platform == "win32", reason="doesn't work on Windows"):
-            foo()
-    """
-    try:
-        # do the block
-        yield
-    except Exception:
-        if condition:
-            # we expected the test to fail, so raise and register this via pytest
-            pytest.xfail(reason or "")
-        else:
-            # we don't expect an exception, so the test should fail
-            raise
-    else:
-        if not condition:
-            # here we expect the block to run successfully,
-            # and we've received no exception,
-            # so this is good
-            pass
-        else:
-            # we expected an exception, but didn't find one. that's an error.
-            raise RuntimeError("expected to fail, but didn't")
-
-
 def extract_global_features(extractor):
    features = collections.defaultdict(set)
    for feature, va in extractor.extract_global_features():
@@ -671,11 +630,6 @@ def resolve_scope(scope):
        raise ValueError("unexpected scope fixture")


-@pytest.fixture
-def scope(request):
-    return resolve_scope(request.param)
-
-
 def make_test_id(values):
    return "-".join(map(str, values))

@@ -692,29 +646,6 @@ def parametrize(params, values, **kwargs):
    return pytest.mark.parametrize(params, values, ids=ids, **kwargs)


-FEATURE_COUNT_TESTS_BE2_INTEL = [
-    (
-        "mimikatz",
-        "function=0x40105d,bb=0x401125,insn=0x401125",
-        capa.features.insn.Offset(0),
-        1,
-    ),
-    (
-        "mimikatz",
-        "function=0x40105d,bb=0x401125,insn=0x401125",
-        capa.features.insn.OperandOffset(1, 0),
-        1,
-    ),
-]
-
-
-def do_test_feature_count(get_extractor, sample, scope, feature, expected):
-    extractor = get_extractor(sample)
-    features = scope(extractor)
-    assert features.get(feature, set()) != set(), f"{feature} should be found in {scope.__name__}"
-    assert len(features[feature]) == expected, f"{feature} should be found {expected} times in {scope.__name__}"
-
-
 def get_result_doc(path: Path):
    return capa.render.result_document.ResultDocument.from_file(path)

@@ -766,14 +697,13 @@ def dynamic_a0000a6_rd():


 PMA1601 = CD / "data" / "Practical Malware Analysis Lab 16-01.exe_"
-z9324 = CD / "data" / "9324d1a8ae37a36ae560c37448c9705a.exe_"


 # used by test_viv_features
 # as well as some fixtures below
@functools.lru_cache(maxsize=1)
 def get_viv_extractor(path: Path):
-    import capa.main
+    import capa.loader
    import capa.features.extractors.viv.extractor

    sigpaths = [
@@ -809,7 +739,7 @@ def get_viv_extractor(path: Path):

@pytest.fixture
 def z9324d_extractor():
-    return get_viv_extractor(z9324)
+    return get_viv_extractor(CD / "data" / "9324d1a8ae37a36ae560c37448c9705a.exe_")


@pytest.fixture
@@ -902,6 +832,7 @@ def get_ghidra_extractor(path: Path):
    if not pyghidra.started():
        pyghidra.start()

+    import capa.loader
    import capa.features.extractors.ghidra.context

    if path in GHIDRA_CACHE:
@@ -78,7 +78,6 @@ Examples include:
 - `elf`
 - `dynamic`
 - `flirt`
- `symtab`
 - `binja-db`
 - `binexport`
 - `aarch64`
@@ -145,13 +144,17 @@ Module-level availability checks are still allowed. runtime-specific hooks are a

 Known backend bugs should be represented in the fixture manifests through backend-specific marks.

-Backends should not usually edit the shared JSON manifests just to avoid a fixture. they should prefer selecting or excluding fixtures through backend policy.
+Backends should not usually edit the shared JSON manifests just to avoid a fixture. They should prefer selecting or excluding fixtures through backend policy.

 The main reason to keep marks in JSON is to record known exceptions such as:

 - a backend-specific `xfail`
 - a backend-specific `skip`

+Ideally, this information is better to put in the backend-specific test code (like test_viv_features.py);
+however, in order to triage one of these failures, you have to go look at the json file anyways, and its
+easier to see the mark next to the thing that fails.
+
 ## Expected contributor workflow

 When adding a new feature test:
@@ -1055,6 +1055,18 @@
      "location": "function=0x401000",
      "feature": "count(basic blocks): 3",
      "explanation": "Ghidra: 3 basic blocks in function"
+    },
+    {
+      "file": "mimikatz.ghidra.be2",
+      "location": "function=0x40105d,bb=0x401125,insn=0x401125",
+      "feature": "count(offset(0x0)): 1",
+      "explanation": "MOV [EDI], CX matches OFFSET_ZERO_PATTERNS, must yield Offset(0) exactly once"
+    },
+    {
+      "file": "mimikatz.ghidra.be2",
+      "location": "function=0x40105d,bb=0x401125,insn=0x401125",
+      "feature": "count(operand[1].offset(0x0)): 1",
+      "explanation": "MOV [EDI], CX matches OFFSET_ZERO_PATTERNS, must yield OperandOffset(1, 0) exactly once"
    }
  ]
 }
@@ -3,18 +3,7 @@
    {
      "key": "0000a657",
      "path": "data/dynamic/cape/v2.2/0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json.gz",
-      "tags": [
-        "dynamic",
-        "cape"
-      ]
-    },
-    {
-      "key": "d46900",
-      "path": "data/dynamic/cape/v2.2/d46900384c78863420fb3e297d0a2f743cd2b6b3f7f82bf64059a168e07aceb7.json.gz",
-      "tags": [
-        "dynamic",
-        "cape"
-      ]
+      "tags": ["dynamic", "cape"]
    }
  ],
  "features": [
@@ -83,7 +83,7 @@
    {
      "key": "2bf18d",
      "path": "data/2bf18d0403677378adad9001b1243211.elf_",
-      "tags": ["elf", "static", "symtab"]
+      "tags": ["elf", "static"]
    },
    {
      "key": "2d3edc",
@@ -95,22 +95,6 @@
      "path": "data/ea2876e9175410b6f6719f80ee44b9553960758c7d0f7bed73c0fe9a78d8e669.dll_",
      "tags": ["static"]
    },
-    {
-      "key": "pma01-01.frz",
-      "path": "fixtures/freeze/Practical Malware Analysis Lab 01-01.dll_.frz"
-    },
-    {
-      "key": "009c2377.frz",
-      "path": "fixtures/freeze/009c2377b67997b0da1579f4bbc822c1.exe_.frz"
-    },
-    {
-      "key": "055da8e6.frz",
-      "path": "fixtures/freeze/055da8e6ccfe5a9380231ea04b850e18.elf_.frz"
-    },
-    {
-      "key": "034b7231.frz",
-      "path": "fixtures/freeze/034b7231a49387604e81a5a5d2fe7e08f6982c418a28b719d2faace3c312ebb5.exe_.frz"
-    },
    {
      "key": "b9f5b",
      "path": "data/b9f5bd514485fb06da39beff051b9fdc.exe_",
@@ -160,95 +144,6 @@
      "key": "nested_typeref",
      "path": "data/dotnet/2c7d60f77812607dec5085973ff76cea.dll_",
      "tags": ["static"]
-    },
-    {
-      "key": "pma01-01",
-      "path": "data/Practical Malware Analysis Lab 01-01.dll_",
-      "tags": ["static"]
-    },
-    {
-      "key": "pma01-01-rd",
-      "path": "data/rd/Practical Malware Analysis Lab 01-01.dll_.json"
-    },
-    {
-      "key": "pma21-01",
-      "path": "data/Practical Malware Analysis Lab 21-01.exe_",
-      "tags": ["static"]
-    },
-    {
-      "key": "al-khaser x86",
-      "path": "data/al-khaser_x86.exe_",
-      "tags": ["static"]
-    },
-    {
-      "key": "39c05",
-      "path": "data/39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.dll_",
-      "tags": ["static"]
-    },
-    {
-      "key": "499c2",
-      "path": "data/499c2a85f6e8142c3f48d4251c9c7cd6.raw32",
-      "tags": ["static"]
-    },
-    {
-      "key": "9324d",
-      "path": "data/9324d1a8ae37a36ae560c37448c9705a.exe_",
-      "tags": ["static"]
-    },
-    {
-      "key": "395eb",
-      "path": "data/395eb0ddd99d2c9e37b6d0b73485ee9c.exe_",
-      "tags": ["static"]
-    },
-    {
-      "key": "a933a",
-      "path": "data/a933a1a402775cfa94b6bee0963f4b46.dll_",
-      "tags": ["static"]
-    },
-    {
-      "key": "bfb9b",
-      "path": "data/bfb9b5391a13d0afd787e87ab90f14f5.dll_",
-      "tags": ["static"]
-    },
-    {
-      "key": "82bf6",
-      "path": "data/82BF6347ACF15E5D883715DC289D8A2B.exe_",
-      "tags": ["static"]
-    },
-    {
-      "key": "pingtaest",
-      "path": "data/ping_t\u00e4st.exe_",
-      "tags": ["static"]
-    },
-    {
-      "key": "3b13b",
-      "path": "data/3b13b6f1d7cd14dc4a097a12e2e505c0a4cff495262261e2bfc991df238b9b04.dll_",
-      "tags": ["static"]
-    },
-    {
-      "key": "2f7f5f",
-      "path": "data/2f7f5fb5de175e770d7eae87666f9831.elf_",
-      "tags": ["elf", "static"]
-    },
-    {
-      "key": "b5f052",
-      "path": "data/b5f0524e69b3a3cf636c7ac366ca57bf5e3a8fdc8a9f01caf196c611a7918a87.elf_",
-      "tags": ["elf", "static"]
-    },
-    {
-      "key": "bf7a9c",
-      "path": "data/bf7a9c8bdfa6d47e01ad2b056264acc3fd90cf43fe0ed8deec93ab46b47d76cb.elf_",
-      "tags": ["elf", "static"]
-    },
-    {
-      "key": "1038a2",
-      "path": "data/1038a23daad86042c66bfe6c9d052d27048de9653bde5750dc0f240c792d9ac8.elf_",
-      "tags": ["elf", "static"]
-    },
-    {
-      "key": "3da7c",
-      "path": "data/3da7c2c70a2d93ac4643f20339d5c7d61388bddd77a4a5fd732311efad78e535.elf_",
-      "tags": ["elf", "static"]
    }
  ],
  "features": [
@@ -1146,37 +1041,37 @@
      "file": "2bf18d",
      "location": "function=0x4027b3,bb=0x402861,insn=0x40286d",
      "feature": "api: __GI_connect",
-      "explanation": "API from symbol table alternative name"
+      "explanation": "API from ELF symbol table alternative name"
    },
    {
      "file": "2bf18d",
      "location": "function=0x4027b3,bb=0x402861,insn=0x40286d",
      "feature": "api: connect",
-      "explanation": "API from symbol table alternative name"
+      "explanation": "API from ELF symbol table alternative name"
    },
    {
      "file": "2bf18d",
      "location": "function=0x4027b3,bb=0x402861,insn=0x40286d",
      "feature": "api: __libc_connect",
-      "explanation": "API from symbol table alternative name"
+      "explanation": "API from ELF symbol table alternative name"
    },
    {
      "file": "2bf18d",
      "location": "function=0x4088a4",
      "feature": "function-name: __GI_connect",
-      "explanation": "function name from symbol table alternative name"
+      "explanation": "function name from ELF symbol table alternative name"
    },
    {
      "file": "2bf18d",
      "location": "function=0x4088a4",
      "feature": "function-name: connect",
-      "explanation": "function name from symbol table alternative name"
+      "explanation": "function name from ELF symbol table alternative name"
    },
    {
      "file": "2bf18d",
      "location": "function=0x4088a4",
      "feature": "function-name: __libc_connect",
-      "explanation": "function name from symbol table alternative name"
+      "explanation": "function name from ELF symbol table alternative name"
    },
    {
      "file": "mimikatz",
@@ -3,26 +3,12 @@
    {
      "key": "93b2d1-vmray",
      "path": "data/dynamic/vmray/93b2d1840566f45fab674ebc79a9d19c88993bcb645e0357f3cb584d16e7c795_min_archive.zip",
-      "tags": [
-        "dynamic",
-        "vmray"
-      ]
-    },
-    {
-      "key": "2f8a79-vmray",
-      "path": "data/dynamic/vmray/2f8a79b12a7a989ac7e5f6ec65050036588a92e65aeb6841e08dc228ff0e21b4_min_archive.zip",
-      "tags": [
-        "dynamic",
-        "vmray"
-      ]
+      "tags": ["dynamic", "vmray"]
    },
    {
      "key": "eb1287-vmray",
      "path": "data/dynamic/vmray/eb12873c0ce3e9ea109c2a447956cbd10ca2c3e86936e526b2c6e28764999f21_min_archive.zip",
-      "tags": [
-        "dynamic",
-        "vmray"
-      ]
+      "tags": ["dynamic", "vmray"]
    }
  ],
  "features": [
@@ -23,16 +23,3 @@ import fixtures
 def test_binexport_features(feature_fixture):
    extractor = fixtures.get_binexport_extractor(feature_fixture.sample_path)
    fixtures.run_feature_fixture(extractor, feature_fixture)
-
-
-@fixtures.parametrize(
-    "sample,scope,feature,expected",
-    fixtures.FEATURE_COUNT_TESTS_BE2_INTEL,
-    indirect=["sample", "scope"],
-)
-def test_binexport_feature_counts_intel(sample, scope, feature, expected):
-    sample = sample.parent / "binexport2" / (sample.name + ".ghidra.BinExport")
-    assert sample.exists()
-    fixtures.do_test_feature_count(
-        fixtures.get_binexport_extractor, sample, scope, feature, expected
-    )
@@ -14,10 +14,10 @@

 import textwrap

-import capa.rules
-import capa.features.common
 import fixtures

+import capa.rules
+import capa.features.common
 import capa.capabilities.common
 import capa.features.extractors.null
 from capa.features.address import AbsoluteVirtualAddress
@@ -0,0 +1,24 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from fixtures import get_fixture_files
+
+
+def test_no_orphaned_file_entries():
+    for manifest_path, data in get_fixture_files():
+        feature_refs = {feat["file"] for feat in data.get("features", [])}
+        for entry in data["files"]:
+            assert entry["key"] in feature_refs, (
+                f"file entry {entry['key']!r} in {manifest_path.name} is not referenced by any feature"
+            )
@@ -27,6 +27,11 @@ idalib_present = idalib.has_idalib()
 if idalib_present:
    try:
        if True:
+            # in order to use idalib, we have to import the idapro package
+            # which manipulates the search path as a side effect.
+            # we have to do this before importing ida_* packages.
+            # but isort wants to put idapro after ida_kernwin, so we use
+            # this dumb branch to keep the ordering correct.
            import idapro  # noqa: F401 [imported but unused]
        import ida_kernwin

@@ -14,6 +14,7 @@

 import copy
 from typing import Any
+from pathlib import Path

 import pytest

@@ -26,21 +27,26 @@ import capa.render.result_document as rd
 import capa.features.freeze.features
 from capa.helpers import assert_never

+CD = Path(__file__).resolve().parent

-@pytest.mark.parametrize(
-    "rd_file",
-    [
-        pytest.param("a3f3bbc_rd"),
-        pytest.param("al_khaserx86_rd"),
-        pytest.param("al_khaserx64_rd"),
-        pytest.param("a076114_rd"),
-        pytest.param("pma0101_rd"),
-        pytest.param("dotnet_1c444e_rd"),
-        pytest.param("dynamic_a0000a6_rd"),
-    ],
+STATIC_RD_FILES = [
+    pytest.param(CD / "data" / "rd" / "3f3bbcf8fd90bdcdcdc5494314ed4225.exe_.json", id="a3f3bbc"),
+    pytest.param(CD / "data" / "rd" / "al-khaser_x86.exe_.json", id="al_khaserx86"),
+    pytest.param(CD / "data" / "rd" / "al-khaser_x64.exe_.json", id="al_khaserx64"),
+    pytest.param(CD / "data" / "rd" / "0761142efbda6c4b1e801223de723578.dll_.json", id="a076114"),
+    pytest.param(CD / "data" / "rd" / "Practical Malware Analysis Lab 01-01.dll_.json", id="pma0101"),
+    pytest.param(CD / "data" / "rd" / "1c444ebeba24dcba8628b7dfe5fec7c6.exe_.json", id="dotnet_1c444e"),
+]
+
+DYNAMIC_RD_FILE = pytest.param(
+    CD / "data" / "rd" / "0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json.gz",
+    id="dynamic_a0000a6",
 )
-def test_doc_to_pb2(request, rd_file):
-    src: rd.ResultDocument = request.getfixturevalue(rd_file)
+
+
+@pytest.mark.parametrize("rd_path", STATIC_RD_FILES + [DYNAMIC_RD_FILE])
+def test_doc_to_pb2(rd_path):
+    src = rd.ResultDocument.from_file(rd_path)
    dst = capa.render.proto.doc_to_pb2(src)

    assert_meta(src.meta, dst.meta)
@@ -398,18 +404,7 @@ def assert_round_trip(doc: rd.ResultDocument):
    assert one_bytes != three_bytes


-@pytest.mark.parametrize(
-    "rd_file",
-    [
-        pytest.param("a3f3bbc_rd"),
-        pytest.param("al_khaserx86_rd"),
-        pytest.param("al_khaserx64_rd"),
-        pytest.param("a076114_rd"),
-        pytest.param("pma0101_rd"),
-        pytest.param("dotnet_1c444e_rd"),
-        pytest.param("dynamic_a0000a6_rd"),
-    ],
-)
-def test_round_trip(request, rd_file):
-    doc: rd.ResultDocument = request.getfixturevalue(rd_file)
+@pytest.mark.parametrize("rd_path", STATIC_RD_FILES + [DYNAMIC_RD_FILE])
+def test_round_trip(rd_path):
+    doc = rd.ResultDocument.from_file(rd_path)
    assert_round_trip(doc)
@@ -254,8 +254,11 @@ def test_render_vverbose_feature(feature, expected):
    assert output == expected


-def test_render_default_returns_non_empty(pma0101_rd):
-    output = capa.render.default.render_default(pma0101_rd)
+def test_render_default_returns_non_empty():
+    rd = capa.render.result_document.ResultDocument.from_file(
+        fixtures.CD / "data" / "rd" / "Practical Malware Analysis Lab 01-01.dll_.json"
+    )
+    output = capa.render.default.render_default(rd)
    assert output != ""
    assert "md5" in output
    assert "290934c61de9176ad682ffdd65f0a669" in output
@@ -268,18 +268,18 @@ def assert_round_trip(rd: rdoc.ResultDocument):


@pytest.mark.parametrize(
-    "rd_file",
+    "rd_path",
    [
-        pytest.param("a3f3bbc_rd"),
-        pytest.param("al_khaserx86_rd"),
-        pytest.param("al_khaserx64_rd"),
-        pytest.param("a076114_rd"),
-        pytest.param("pma0101_rd"),
-        pytest.param("dotnet_1c444e_rd"),
+        pytest.param(fixtures.CD / "data" / "rd" / "3f3bbcf8fd90bdcdcdc5494314ed4225.exe_.json", id="a3f3bbc"),
+        pytest.param(fixtures.CD / "data" / "rd" / "al-khaser_x86.exe_.json", id="al_khaserx86"),
+        pytest.param(fixtures.CD / "data" / "rd" / "al-khaser_x64.exe_.json", id="al_khaserx64"),
+        pytest.param(fixtures.CD / "data" / "rd" / "0761142efbda6c4b1e801223de723578.dll_.json", id="a076114"),
+        pytest.param(fixtures.CD / "data" / "rd" / "Practical Malware Analysis Lab 01-01.dll_.json", id="pma0101"),
+        pytest.param(fixtures.CD / "data" / "rd" / "1c444ebeba24dcba8628b7dfe5fec7c6.exe_.json", id="dotnet_1c444e"),
    ],
 )
-def test_round_trip(request, rd_file):
-    rd: rdoc.ResultDocument = request.getfixturevalue(rd_file)
+def test_round_trip(rd_path):
+    rd = rdoc.ResultDocument.from_file(rd_path)
    assert_round_trip(rd)


@@ -21,12 +21,14 @@ import subprocess
 from pathlib import Path

 import pytest
-
-import capa.rules
 import fixtures

+import capa.rules
+
 logger = logging.getLogger(__name__)

+CD = Path(__file__).resolve().parent
+

 def get_script_path(s: str):
    return str(fixtures.CD / ".." / "scripts" / s)