diff --git a/tests/fixtures.py b/tests/fixtures.py index 060925ef..2edac7be 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -25,7 +25,6 @@ import pytest import capa.rules import capa.engine as ceng -import capa.loader import capa.render.result_document from capa.features.common import OS_AUTO, FORMAT_AUTO, Feature from capa.features.address import Address @@ -44,8 +43,7 @@ from capa.features.extractors.dnfile.extractor import DnfileFeatureExtractor logger = logging.getLogger(__name__) CD = Path(__file__).resolve().parent FIXTURE_MANIFEST_DIR = CD / "fixtures" / "features" -DOTNET_DIR = CD / "data" / "dotnet" -DNFILE_TESTFILES = DOTNET_DIR / "dnfile-testfiles" +DNFILE_TESTFILES = CD / "data" / "dotnet" / "dnfile-testfiles" def parse_feature_string(s: str) -> Feature | ceng.Range | ceng.Statement: @@ -102,7 +100,6 @@ KNOWN_FIXTURE_TAGS: set[str] = ( "dotnet", # .NET format "elf", # ELF format "flirt", # requires FLIRT signature matching - "symtab", # requires ELF symbol table parsing TODO: can we remove this? "binja-db", # Binary Ninja database format "binexport", # BinExport2 format "aarch64", # AArch64 architecture @@ -366,44 +363,6 @@ def run_feature_fixture( assert actual == fixture.expected, msg -@contextlib.contextmanager -def xfail(condition, reason: str = ""): - """ - context manager that wraps a block that is expected to fail in some cases. - when it does fail (and is expected), then mark this as pytest.xfail. - if its unexpected, raise an exception, so the test fails. - - example:: - - # this test: - # - passes on Linux if foo() works - # - fails on Linux if foo() fails - # - xfails on Windows if foo() fails - # - fails on Windows if foo() works - with xfail(sys.platform == "win32", reason="doesn't work on Windows"): - foo() - """ - try: - # do the block - yield - except Exception: - if condition: - # we expected the test to fail, so raise and register this via pytest - pytest.xfail(reason or "") - else: - # we don't expect an exception, so the test should fail - raise - else: - if not condition: - # here we expect the block to run successfully, - # and we've received no exception, - # so this is good - pass - else: - # we expected an exception, but didn't find one. that's an error. - raise RuntimeError("expected to fail, but didn't") - - def extract_global_features(extractor): features = collections.defaultdict(set) for feature, va in extractor.extract_global_features(): @@ -671,11 +630,6 @@ def resolve_scope(scope): raise ValueError("unexpected scope fixture") -@pytest.fixture -def scope(request): - return resolve_scope(request.param) - - def make_test_id(values): return "-".join(map(str, values)) @@ -692,29 +646,6 @@ def parametrize(params, values, **kwargs): return pytest.mark.parametrize(params, values, ids=ids, **kwargs) -FEATURE_COUNT_TESTS_BE2_INTEL = [ - ( - "mimikatz", - "function=0x40105d,bb=0x401125,insn=0x401125", - capa.features.insn.Offset(0), - 1, - ), - ( - "mimikatz", - "function=0x40105d,bb=0x401125,insn=0x401125", - capa.features.insn.OperandOffset(1, 0), - 1, - ), -] - - -def do_test_feature_count(get_extractor, sample, scope, feature, expected): - extractor = get_extractor(sample) - features = scope(extractor) - assert features.get(feature, set()) != set(), f"{feature} should be found in {scope.__name__}" - assert len(features[feature]) == expected, f"{feature} should be found {expected} times in {scope.__name__}" - - def get_result_doc(path: Path): return capa.render.result_document.ResultDocument.from_file(path) @@ -766,14 +697,13 @@ def dynamic_a0000a6_rd(): PMA1601 = CD / "data" / "Practical Malware Analysis Lab 16-01.exe_" -z9324 = CD / "data" / "9324d1a8ae37a36ae560c37448c9705a.exe_" # used by test_viv_features # as well as some fixtures below @functools.lru_cache(maxsize=1) def get_viv_extractor(path: Path): - import capa.main + import capa.loader import capa.features.extractors.viv.extractor sigpaths = [ @@ -809,7 +739,7 @@ def get_viv_extractor(path: Path): @pytest.fixture def z9324d_extractor(): - return get_viv_extractor(z9324) + return get_viv_extractor(CD / "data" / "9324d1a8ae37a36ae560c37448c9705a.exe_") @pytest.fixture @@ -902,6 +832,7 @@ def get_ghidra_extractor(path: Path): if not pyghidra.started(): pyghidra.start() + import capa.loader import capa.features.extractors.ghidra.context if path in GHIDRA_CACHE: diff --git a/tests/fixtures/features/README.md b/tests/fixtures/features/README.md index a164229a..fbaadc9c 100644 --- a/tests/fixtures/features/README.md +++ b/tests/fixtures/features/README.md @@ -78,7 +78,6 @@ Examples include: - `elf` - `dynamic` - `flirt` -- `symtab` - `binja-db` - `binexport` - `aarch64` @@ -145,13 +144,17 @@ Module-level availability checks are still allowed. runtime-specific hooks are a Known backend bugs should be represented in the fixture manifests through backend-specific marks. -Backends should not usually edit the shared JSON manifests just to avoid a fixture. they should prefer selecting or excluding fixtures through backend policy. +Backends should not usually edit the shared JSON manifests just to avoid a fixture. They should prefer selecting or excluding fixtures through backend policy. The main reason to keep marks in JSON is to record known exceptions such as: - a backend-specific `xfail` - a backend-specific `skip` +Ideally, this information is better to put in the backend-specific test code (like test_viv_features.py); +however, in order to triage one of these failures, you have to go look at the json file anyways, and its +easier to see the mark next to the thing that fails. + ## Expected contributor workflow When adding a new feature test: diff --git a/tests/fixtures/features/binexport.json b/tests/fixtures/features/binexport.json index 1a2a08c6..5e953e62 100644 --- a/tests/fixtures/features/binexport.json +++ b/tests/fixtures/features/binexport.json @@ -1055,6 +1055,18 @@ "location": "function=0x401000", "feature": "count(basic blocks): 3", "explanation": "Ghidra: 3 basic blocks in function" + }, + { + "file": "mimikatz.ghidra.be2", + "location": "function=0x40105d,bb=0x401125,insn=0x401125", + "feature": "count(offset(0x0)): 1", + "explanation": "MOV [EDI], CX matches OFFSET_ZERO_PATTERNS, must yield Offset(0) exactly once" + }, + { + "file": "mimikatz.ghidra.be2", + "location": "function=0x40105d,bb=0x401125,insn=0x401125", + "feature": "count(operand[1].offset(0x0)): 1", + "explanation": "MOV [EDI], CX matches OFFSET_ZERO_PATTERNS, must yield OperandOffset(1, 0) exactly once" } ] } diff --git a/tests/fixtures/features/cape.json b/tests/fixtures/features/cape.json index eff2e038..61bd90d5 100644 --- a/tests/fixtures/features/cape.json +++ b/tests/fixtures/features/cape.json @@ -3,18 +3,7 @@ { "key": "0000a657", "path": "data/dynamic/cape/v2.2/0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json.gz", - "tags": [ - "dynamic", - "cape" - ] - }, - { - "key": "d46900", - "path": "data/dynamic/cape/v2.2/d46900384c78863420fb3e297d0a2f743cd2b6b3f7f82bf64059a168e07aceb7.json.gz", - "tags": [ - "dynamic", - "cape" - ] + "tags": ["dynamic", "cape"] } ], "features": [ diff --git a/tests/fixtures/features/static.json b/tests/fixtures/features/static.json index e274d64e..049c97e0 100644 --- a/tests/fixtures/features/static.json +++ b/tests/fixtures/features/static.json @@ -83,7 +83,7 @@ { "key": "2bf18d", "path": "data/2bf18d0403677378adad9001b1243211.elf_", - "tags": ["elf", "static", "symtab"] + "tags": ["elf", "static"] }, { "key": "2d3edc", @@ -95,22 +95,6 @@ "path": "data/ea2876e9175410b6f6719f80ee44b9553960758c7d0f7bed73c0fe9a78d8e669.dll_", "tags": ["static"] }, - { - "key": "pma01-01.frz", - "path": "fixtures/freeze/Practical Malware Analysis Lab 01-01.dll_.frz" - }, - { - "key": "009c2377.frz", - "path": "fixtures/freeze/009c2377b67997b0da1579f4bbc822c1.exe_.frz" - }, - { - "key": "055da8e6.frz", - "path": "fixtures/freeze/055da8e6ccfe5a9380231ea04b850e18.elf_.frz" - }, - { - "key": "034b7231.frz", - "path": "fixtures/freeze/034b7231a49387604e81a5a5d2fe7e08f6982c418a28b719d2faace3c312ebb5.exe_.frz" - }, { "key": "b9f5b", "path": "data/b9f5bd514485fb06da39beff051b9fdc.exe_", @@ -160,95 +144,6 @@ "key": "nested_typeref", "path": "data/dotnet/2c7d60f77812607dec5085973ff76cea.dll_", "tags": ["static"] - }, - { - "key": "pma01-01", - "path": "data/Practical Malware Analysis Lab 01-01.dll_", - "tags": ["static"] - }, - { - "key": "pma01-01-rd", - "path": "data/rd/Practical Malware Analysis Lab 01-01.dll_.json" - }, - { - "key": "pma21-01", - "path": "data/Practical Malware Analysis Lab 21-01.exe_", - "tags": ["static"] - }, - { - "key": "al-khaser x86", - "path": "data/al-khaser_x86.exe_", - "tags": ["static"] - }, - { - "key": "39c05", - "path": "data/39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.dll_", - "tags": ["static"] - }, - { - "key": "499c2", - "path": "data/499c2a85f6e8142c3f48d4251c9c7cd6.raw32", - "tags": ["static"] - }, - { - "key": "9324d", - "path": "data/9324d1a8ae37a36ae560c37448c9705a.exe_", - "tags": ["static"] - }, - { - "key": "395eb", - "path": "data/395eb0ddd99d2c9e37b6d0b73485ee9c.exe_", - "tags": ["static"] - }, - { - "key": "a933a", - "path": "data/a933a1a402775cfa94b6bee0963f4b46.dll_", - "tags": ["static"] - }, - { - "key": "bfb9b", - "path": "data/bfb9b5391a13d0afd787e87ab90f14f5.dll_", - "tags": ["static"] - }, - { - "key": "82bf6", - "path": "data/82BF6347ACF15E5D883715DC289D8A2B.exe_", - "tags": ["static"] - }, - { - "key": "pingtaest", - "path": "data/ping_t\u00e4st.exe_", - "tags": ["static"] - }, - { - "key": "3b13b", - "path": "data/3b13b6f1d7cd14dc4a097a12e2e505c0a4cff495262261e2bfc991df238b9b04.dll_", - "tags": ["static"] - }, - { - "key": "2f7f5f", - "path": "data/2f7f5fb5de175e770d7eae87666f9831.elf_", - "tags": ["elf", "static"] - }, - { - "key": "b5f052", - "path": "data/b5f0524e69b3a3cf636c7ac366ca57bf5e3a8fdc8a9f01caf196c611a7918a87.elf_", - "tags": ["elf", "static"] - }, - { - "key": "bf7a9c", - "path": "data/bf7a9c8bdfa6d47e01ad2b056264acc3fd90cf43fe0ed8deec93ab46b47d76cb.elf_", - "tags": ["elf", "static"] - }, - { - "key": "1038a2", - "path": "data/1038a23daad86042c66bfe6c9d052d27048de9653bde5750dc0f240c792d9ac8.elf_", - "tags": ["elf", "static"] - }, - { - "key": "3da7c", - "path": "data/3da7c2c70a2d93ac4643f20339d5c7d61388bddd77a4a5fd732311efad78e535.elf_", - "tags": ["elf", "static"] } ], "features": [ @@ -1146,37 +1041,37 @@ "file": "2bf18d", "location": "function=0x4027b3,bb=0x402861,insn=0x40286d", "feature": "api: __GI_connect", - "explanation": "API from symbol table alternative name" + "explanation": "API from ELF symbol table alternative name" }, { "file": "2bf18d", "location": "function=0x4027b3,bb=0x402861,insn=0x40286d", "feature": "api: connect", - "explanation": "API from symbol table alternative name" + "explanation": "API from ELF symbol table alternative name" }, { "file": "2bf18d", "location": "function=0x4027b3,bb=0x402861,insn=0x40286d", "feature": "api: __libc_connect", - "explanation": "API from symbol table alternative name" + "explanation": "API from ELF symbol table alternative name" }, { "file": "2bf18d", "location": "function=0x4088a4", "feature": "function-name: __GI_connect", - "explanation": "function name from symbol table alternative name" + "explanation": "function name from ELF symbol table alternative name" }, { "file": "2bf18d", "location": "function=0x4088a4", "feature": "function-name: connect", - "explanation": "function name from symbol table alternative name" + "explanation": "function name from ELF symbol table alternative name" }, { "file": "2bf18d", "location": "function=0x4088a4", "feature": "function-name: __libc_connect", - "explanation": "function name from symbol table alternative name" + "explanation": "function name from ELF symbol table alternative name" }, { "file": "mimikatz", diff --git a/tests/fixtures/features/vmray.json b/tests/fixtures/features/vmray.json index ae114971..9f6cf5d7 100644 --- a/tests/fixtures/features/vmray.json +++ b/tests/fixtures/features/vmray.json @@ -3,26 +3,12 @@ { "key": "93b2d1-vmray", "path": "data/dynamic/vmray/93b2d1840566f45fab674ebc79a9d19c88993bcb645e0357f3cb584d16e7c795_min_archive.zip", - "tags": [ - "dynamic", - "vmray" - ] - }, - { - "key": "2f8a79-vmray", - "path": "data/dynamic/vmray/2f8a79b12a7a989ac7e5f6ec65050036588a92e65aeb6841e08dc228ff0e21b4_min_archive.zip", - "tags": [ - "dynamic", - "vmray" - ] + "tags": ["dynamic", "vmray"] }, { "key": "eb1287-vmray", "path": "data/dynamic/vmray/eb12873c0ce3e9ea109c2a447956cbd10ca2c3e86936e526b2c6e28764999f21_min_archive.zip", - "tags": [ - "dynamic", - "vmray" - ] + "tags": ["dynamic", "vmray"] } ], "features": [ diff --git a/tests/test_binexport_features.py b/tests/test_binexport_features.py index 2172954f..dbb05afc 100644 --- a/tests/test_binexport_features.py +++ b/tests/test_binexport_features.py @@ -23,16 +23,3 @@ import fixtures def test_binexport_features(feature_fixture): extractor = fixtures.get_binexport_extractor(feature_fixture.sample_path) fixtures.run_feature_fixture(extractor, feature_fixture) - - -@fixtures.parametrize( - "sample,scope,feature,expected", - fixtures.FEATURE_COUNT_TESTS_BE2_INTEL, - indirect=["sample", "scope"], -) -def test_binexport_feature_counts_intel(sample, scope, feature, expected): - sample = sample.parent / "binexport2" / (sample.name + ".ghidra.BinExport") - assert sample.exists() - fixtures.do_test_feature_count( - fixtures.get_binexport_extractor, sample, scope, feature, expected - ) diff --git a/tests/test_capabilities.py b/tests/test_capabilities.py index c9cb6464..3539a524 100644 --- a/tests/test_capabilities.py +++ b/tests/test_capabilities.py @@ -14,10 +14,10 @@ import textwrap -import capa.rules -import capa.features.common import fixtures +import capa.rules +import capa.features.common import capa.capabilities.common import capa.features.extractors.null from capa.features.address import AbsoluteVirtualAddress diff --git a/tests/test_fixture_manifests.py b/tests/test_fixture_manifests.py new file mode 100644 index 00000000..60b88097 --- /dev/null +++ b/tests/test_fixture_manifests.py @@ -0,0 +1,24 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from fixtures import get_fixture_files + + +def test_no_orphaned_file_entries(): + for manifest_path, data in get_fixture_files(): + feature_refs = {feat["file"] for feat in data.get("features", [])} + for entry in data["files"]: + assert entry["key"] in feature_refs, ( + f"file entry {entry['key']!r} in {manifest_path.name} is not referenced by any feature" + ) diff --git a/tests/test_idalib_features.py b/tests/test_idalib_features.py index 61835b9a..855cc396 100644 --- a/tests/test_idalib_features.py +++ b/tests/test_idalib_features.py @@ -27,6 +27,11 @@ idalib_present = idalib.has_idalib() if idalib_present: try: if True: + # in order to use idalib, we have to import the idapro package + # which manipulates the search path as a side effect. + # we have to do this before importing ida_* packages. + # but isort wants to put idapro after ida_kernwin, so we use + # this dumb branch to keep the ordering correct. import idapro # noqa: F401 [imported but unused] import ida_kernwin diff --git a/tests/test_proto.py b/tests/test_proto.py index 85b4f6d0..d4907968 100644 --- a/tests/test_proto.py +++ b/tests/test_proto.py @@ -14,6 +14,7 @@ import copy from typing import Any +from pathlib import Path import pytest @@ -26,21 +27,26 @@ import capa.render.result_document as rd import capa.features.freeze.features from capa.helpers import assert_never +CD = Path(__file__).resolve().parent -@pytest.mark.parametrize( - "rd_file", - [ - pytest.param("a3f3bbc_rd"), - pytest.param("al_khaserx86_rd"), - pytest.param("al_khaserx64_rd"), - pytest.param("a076114_rd"), - pytest.param("pma0101_rd"), - pytest.param("dotnet_1c444e_rd"), - pytest.param("dynamic_a0000a6_rd"), - ], +STATIC_RD_FILES = [ + pytest.param(CD / "data" / "rd" / "3f3bbcf8fd90bdcdcdc5494314ed4225.exe_.json", id="a3f3bbc"), + pytest.param(CD / "data" / "rd" / "al-khaser_x86.exe_.json", id="al_khaserx86"), + pytest.param(CD / "data" / "rd" / "al-khaser_x64.exe_.json", id="al_khaserx64"), + pytest.param(CD / "data" / "rd" / "0761142efbda6c4b1e801223de723578.dll_.json", id="a076114"), + pytest.param(CD / "data" / "rd" / "Practical Malware Analysis Lab 01-01.dll_.json", id="pma0101"), + pytest.param(CD / "data" / "rd" / "1c444ebeba24dcba8628b7dfe5fec7c6.exe_.json", id="dotnet_1c444e"), +] + +DYNAMIC_RD_FILE = pytest.param( + CD / "data" / "rd" / "0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json.gz", + id="dynamic_a0000a6", ) -def test_doc_to_pb2(request, rd_file): - src: rd.ResultDocument = request.getfixturevalue(rd_file) + + +@pytest.mark.parametrize("rd_path", STATIC_RD_FILES + [DYNAMIC_RD_FILE]) +def test_doc_to_pb2(rd_path): + src = rd.ResultDocument.from_file(rd_path) dst = capa.render.proto.doc_to_pb2(src) assert_meta(src.meta, dst.meta) @@ -398,18 +404,7 @@ def assert_round_trip(doc: rd.ResultDocument): assert one_bytes != three_bytes -@pytest.mark.parametrize( - "rd_file", - [ - pytest.param("a3f3bbc_rd"), - pytest.param("al_khaserx86_rd"), - pytest.param("al_khaserx64_rd"), - pytest.param("a076114_rd"), - pytest.param("pma0101_rd"), - pytest.param("dotnet_1c444e_rd"), - pytest.param("dynamic_a0000a6_rd"), - ], -) -def test_round_trip(request, rd_file): - doc: rd.ResultDocument = request.getfixturevalue(rd_file) +@pytest.mark.parametrize("rd_path", STATIC_RD_FILES + [DYNAMIC_RD_FILE]) +def test_round_trip(rd_path): + doc = rd.ResultDocument.from_file(rd_path) assert_round_trip(doc) diff --git a/tests/test_render.py b/tests/test_render.py index bb9e7dac..3ece52ce 100644 --- a/tests/test_render.py +++ b/tests/test_render.py @@ -254,8 +254,11 @@ def test_render_vverbose_feature(feature, expected): assert output == expected -def test_render_default_returns_non_empty(pma0101_rd): - output = capa.render.default.render_default(pma0101_rd) +def test_render_default_returns_non_empty(): + rd = capa.render.result_document.ResultDocument.from_file( + fixtures.CD / "data" / "rd" / "Practical Malware Analysis Lab 01-01.dll_.json" + ) + output = capa.render.default.render_default(rd) assert output != "" assert "md5" in output assert "290934c61de9176ad682ffdd65f0a669" in output diff --git a/tests/test_result_document.py b/tests/test_result_document.py index b18763ab..ff058b92 100644 --- a/tests/test_result_document.py +++ b/tests/test_result_document.py @@ -268,18 +268,18 @@ def assert_round_trip(rd: rdoc.ResultDocument): @pytest.mark.parametrize( - "rd_file", + "rd_path", [ - pytest.param("a3f3bbc_rd"), - pytest.param("al_khaserx86_rd"), - pytest.param("al_khaserx64_rd"), - pytest.param("a076114_rd"), - pytest.param("pma0101_rd"), - pytest.param("dotnet_1c444e_rd"), + pytest.param(fixtures.CD / "data" / "rd" / "3f3bbcf8fd90bdcdcdc5494314ed4225.exe_.json", id="a3f3bbc"), + pytest.param(fixtures.CD / "data" / "rd" / "al-khaser_x86.exe_.json", id="al_khaserx86"), + pytest.param(fixtures.CD / "data" / "rd" / "al-khaser_x64.exe_.json", id="al_khaserx64"), + pytest.param(fixtures.CD / "data" / "rd" / "0761142efbda6c4b1e801223de723578.dll_.json", id="a076114"), + pytest.param(fixtures.CD / "data" / "rd" / "Practical Malware Analysis Lab 01-01.dll_.json", id="pma0101"), + pytest.param(fixtures.CD / "data" / "rd" / "1c444ebeba24dcba8628b7dfe5fec7c6.exe_.json", id="dotnet_1c444e"), ], ) -def test_round_trip(request, rd_file): - rd: rdoc.ResultDocument = request.getfixturevalue(rd_file) +def test_round_trip(rd_path): + rd = rdoc.ResultDocument.from_file(rd_path) assert_round_trip(rd) diff --git a/tests/test_scripts.py b/tests/test_scripts.py index 3ca9eeb9..82417c64 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -21,12 +21,14 @@ import subprocess from pathlib import Path import pytest - -import capa.rules import fixtures +import capa.rules + logger = logging.getLogger(__name__) +CD = Path(__file__).resolve().parent + def get_script_path(s: str): return str(fixtures.CD / ".." / "scripts" / s)