tests: make fixtures more consistent in prep for other backends

2025-12-23 07:28:34 -08:00 · 2020-08-14 12:04:53 -06:00
parent d161c094a6
commit b784f086b4
4 changed files with 108 additions and 113 deletions
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -12,6 +12,7 @@ import collections

 import pytest

+import capa.main
 import capa.features.file
 import capa.features.insn
 import capa.features.basicblock
@@ -26,6 +27,19 @@ except ImportError:
 CD = os.path.dirname(__file__)


+@lru_cache()
+def get_viv_extractor(path):
+    import capa.features.extractors.viv
+
+    if "raw32" in path:
+        vw = capa.main.get_workspace(path, "sc32", should_save=False)
+    elif "raw64" in path:
+        vw = capa.main.get_workspace(path, "sc64", should_save=False)
+    else:
+        vw = capa.main.get_workspace(path, "auto", should_save=True)
+    return capa.features.extractors.viv.VivisectFeatureExtractor(vw, path)
+
+
@lru_cache()
 def extract_file_features(extractor):
    features = collections.defaultdict(set)
@@ -59,26 +73,42 @@ def extract_basic_block_features(extractor, f, bb):
    return features


-@pytest.fixture
-def sample(request):
-    if request.param == "mimikatz":
+def get_data_path_by_name(name):
+    if name == "mimikatz":
        return os.path.join(CD, "data", "mimikatz.exe_")
-    elif request.param == "kernel32":
+    elif name == "kernel32":
        return os.path.join(CD, "data", "kernel32.dll_")
-    elif request.param == "kernel32-64":
+    elif name == "kernel32-64":
        return os.path.join(CD, "data", "kernel32-64.dll_")
-    elif request.param == "pma12-04":
+    elif name == "pma12-04":
        return os.path.join(CD, "data", "Practical Malware Analysis Lab 12-04.exe_")
-    elif request.param.startswith("a1982"):
-        return os.path.join(CD, "data", "a198216798ca38f280dc413f8c57f2c2.exe_")
-    elif request.param.startswith("39c05"):
+    elif name == "pma21-01":
+        return os.path.join(CD, "data", "Practical Malware Analysis Lab 21-01.exe_")
+    elif name == "al-khaser x86":
+        return os.path.join(CD, "data", "al-khaser_x86.exe_")
+    elif name.startswith("39c05"):
        return os.path.join(CD, "data", "39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.dll_")
-    elif request.param.startswith("c9188"):
+    elif name.startswith("499c2"):
+        return os.path.join(CD, "data", "499c2a85f6e8142c3f48d4251c9c7cd6.raw32")
+    elif name.startswith("9324d"):
+        return os.path.join(CD, "data", "9324d1a8ae37a36ae560c37448c9705a.exe_")
+    elif name.startswith("a1982"):
+        return os.path.join(CD, "data", "a198216798ca38f280dc413f8c57f2c2.exe_")
+    elif name.startswith("a933a"):
+        return os.path.join(CD, "data", "a933a1a402775cfa94b6bee0963f4b46.dll_")
+    elif name.startswith("bfb9b"):
+        return os.path.join(CD, "data", "bfb9b5391a13d0afd787e87ab90f14f5.dll_")
+    elif name.startswith("c9188"):
        return os.path.join(CD, "data", "c91887d861d9bd4a5872249b641bc9f9.exe_")
    else:
        raise ValueError("unexpected sample fixture")


+@pytest.fixture
+def sample(request):
+    return get_data_path_by_name(request.param)
+
+
 def get_function(extractor, fva):
    for f in extractor.get_functions():
        if f.__int__() == fva:
@@ -303,88 +333,71 @@ def do_test_feature_count(get_extractor, sample, scope, feature, expected):
    assert len(features[feature]) == expected, msg


-Sample = collections.namedtuple("Sample", ["vw", "path"])
+def get_extractor(path):
+    # decide here which extractor to load for tests.
+    # maybe check which python version we've loaded or if we're in IDA.
+    extractor = get_viv_extractor(path)
+
+    # overload the extractor so that the fixture exposes `extractor.path`
+    setattr(extractor, "path", path)
+    return extractor


@pytest.fixture
-def mimikatz():
-    import viv_utils
-    path = os.path.join(CD, "data", "mimikatz.exe_")
-    return Sample(viv_utils.getWorkspace(path), path)
+def mimikatz_extractor():
+    return get_extractor(get_data_path_by_name("mimikatz"))


@pytest.fixture
-def sample_a933a1a402775cfa94b6bee0963f4b46():
-    import viv_utils
-    path = os.path.join(CD, "data", "a933a1a402775cfa94b6bee0963f4b46.dll_")
-    return Sample(viv_utils.getWorkspace(path), path)
+def a933a_extractor():
+    return get_extractor(get_data_path_by_name("a933a..."))


@pytest.fixture
-def kernel32():
-    import viv_utils
-    path = os.path.join(CD, "data", "kernel32.dll_")
-    return Sample(viv_utils.getWorkspace(path), path)
+def kernel32_extractor():
+    return get_extractor(get_data_path_by_name("kernel32"))


@pytest.fixture
-def sample_a198216798ca38f280dc413f8c57f2c2():
-    import viv_utils
-    path = os.path.join(CD, "data", "a198216798ca38f280dc413f8c57f2c2.exe_")
-    return Sample(viv_utils.getWorkspace(path), path)
+def a1982_extractor():
+    return get_extractor(get_data_path_by_name("a1982..."))


@pytest.fixture
-def sample_9324d1a8ae37a36ae560c37448c9705a():
-    import viv_utils
-    path = os.path.join(CD, "data", "9324d1a8ae37a36ae560c37448c9705a.exe_")
-    return Sample(viv_utils.getWorkspace(path), path)
+def z9324d_extractor():
+    return get_extractor(get_data_path_by_name("9324d..."))


@pytest.fixture
-def pma_lab_12_04():
-    import viv_utils
-    path = os.path.join(CD, "data", "Practical Malware Analysis Lab 12-04.exe_")
-    return Sample(viv_utils.getWorkspace(path), path)
+def pma12_04_extractor():
+    return get_extractor(get_data_path_by_name("pma12-04"))


@pytest.fixture
-def sample_bfb9b5391a13d0afd787e87ab90f14f5():
-    import viv_utils
-    path = os.path.join(CD, "data", "bfb9b5391a13d0afd787e87ab90f14f5.dll_")
-    return Sample(viv_utils.getWorkspace(path), path)
+def bfb9b_extractor():
+    return get_extractor(get_data_path_by_name("bfb9b..."))


@pytest.fixture
-def sample_lab21_01():
-    import viv_utils
-    path = os.path.join(CD, "data", "Practical Malware Analysis Lab 21-01.exe_")
-    return Sample(viv_utils.getWorkspace(path), path)
+def pma21_01_extractor():
+    return get_extractor(get_data_path_by_name("pma21-01"))


@pytest.fixture
-def sample_c91887d861d9bd4a5872249b641bc9f9():
-    import viv_utils
-    path = os.path.join(CD, "data", "c91887d861d9bd4a5872249b641bc9f9.exe_")
-    return Sample(viv_utils.getWorkspace(path), path)
+def c9188_extractor():
+    return get_extractor(get_data_path_by_name("c9188..."))


@pytest.fixture
-def sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41():
-    import viv_utils
-    path = os.path.join(CD, "data", "39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.dll_",)
-    return Sample(viv_utils.getWorkspace(path), path)
+def z39c05_extractor():
+    return get_extractor(get_data_path_by_name("39c05..."))


@pytest.fixture
-def sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32():
-    import viv_utils
-    path = os.path.join(CD, "data", "499c2a85f6e8142c3f48d4251c9c7cd6.raw32")
-    return Sample(viv_utils.getShellcodeWorkspace(path), path)
+def z499c2_extractor():
+    return get_extractor(get_data_path_by_name("499c2..."))


@pytest.fixture
-def sample_al_khaser_x86():
-    import viv_utils
-    path = os.path.join(CD, "data", "al-khaser_x86.exe_")
-    return Sample(viv_utils.getWorkspace(path), path)
+def al_khaser_x86_extractor():
+    return get_extractor(get_data_path_by_name("al-khaser x86"))
--- a/tests/test_freeze.py
+++ b/tests/test_freeze.py
@@ -169,18 +169,20 @@ def test_serialize_features():
    roundtrip_feature(capa.features.file.Import("#11"))


-def test_freeze_sample(tmpdir, sample_9324d1a8ae37a36ae560c37448c9705a):
+def test_freeze_sample(tmpdir, z9324d_extractor):
    # tmpdir fixture handles cleanup
    o = tmpdir.mkdir("capa").join("test.frz").strpath
-    assert capa.features.freeze.main([sample_9324d1a8ae37a36ae560c37448c9705a.path, o, "-v"]) == 0
+    path = z9324d_extractor.path
+    assert capa.features.freeze.main([path, o, "-v"]) == 0


-def test_freeze_load_sample(tmpdir, sample_9324d1a8ae37a36ae560c37448c9705a):
+def test_freeze_load_sample(tmpdir, z9324d_extractor):
    o = tmpdir.mkdir("capa").join("test.frz")
-    viv_extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
-        sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
-    )
+
    with open(o.strpath, "wb") as f:
-        f.write(capa.features.freeze.dump(viv_extractor))
-    null_extractor = capa.features.freeze.load(o.open("rb").read())
-    compare_extractors_viv_null(viv_extractor, null_extractor)
+        f.write(capa.features.freeze.dump(z9324d_extractor))
+
+    with open(o.strpath, "rb") as f:
+        null_extractor = capa.features.freeze.load(f.read())
+
+    compare_extractors_viv_null(z9324d_extractor, null_extractor)
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -18,15 +18,16 @@ import capa.features.extractors.viv
 from capa.engine import *


-def test_main(sample_9324d1a8ae37a36ae560c37448c9705a):
+def test_main(z9324d_extractor):
    # tests rules can be loaded successfully and all output modes
-    assert capa.main.main([sample_9324d1a8ae37a36ae560c37448c9705a.path, "-vv"]) == 0
-    assert capa.main.main([sample_9324d1a8ae37a36ae560c37448c9705a.path, "-v"]) == 0
-    assert capa.main.main([sample_9324d1a8ae37a36ae560c37448c9705a.path, "-j"]) == 0
-    assert capa.main.main([sample_9324d1a8ae37a36ae560c37448c9705a.path]) == 0
+    path = z9324d_extractor.path
+    assert capa.main.main([path, "-vv"]) == 0
+    assert capa.main.main([path, "-v"]) == 0
+    assert capa.main.main([path, "-j"]) == 0
+    assert capa.main.main([path]) == 0


-def test_main_single_rule(sample_9324d1a8ae37a36ae560c37448c9705a, tmpdir):
+def test_main_single_rule(z9324d_extractor, tmpdir):
    # tests a single rule can be loaded successfully
    RULE_CONTENT = textwrap.dedent(
        """
@@ -38,16 +39,18 @@ def test_main_single_rule(sample_9324d1a8ae37a36ae560c37448c9705a, tmpdir):
              - string: test
        """
    )
+    path = z9324d_extractor.path
    rule_file = tmpdir.mkdir("capa").join("rule.yml")
    rule_file.write(RULE_CONTENT)
-    assert capa.main.main([sample_9324d1a8ae37a36ae560c37448c9705a.path, "-v", "-r", rule_file.strpath,]) == 0
+    assert capa.main.main([path, "-v", "-r", rule_file.strpath,]) == 0


-def test_main_shellcode(sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32):
-    assert capa.main.main([sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32.path, "-vv", "-f", "sc32"]) == 0
-    assert capa.main.main([sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32.path, "-v", "-f", "sc32"]) == 0
-    assert capa.main.main([sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32.path, "-j", "-f", "sc32"]) == 0
-    assert capa.main.main([sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32.path, "-f", "sc32"]) == 0
+def test_main_shellcode(z499c2_extractor):
+    path = z499c2_extractor.path
+    assert capa.main.main([path, "-vv", "-f", "sc32"]) == 0
+    assert capa.main.main([path, "-v", "-f", "sc32"]) == 0
+    assert capa.main.main([path, "-j", "-f", "sc32"]) == 0
+    assert capa.main.main([path, "-f", "sc32"]) == 0


 def test_ruleset():
@@ -96,7 +99,7 @@ def test_ruleset():
    assert len(rules.basic_block_rules) == 1


-def test_match_across_scopes_file_function(sample_9324d1a8ae37a36ae560c37448c9705a):
+def test_match_across_scopes_file_function(z9324d_extractor):
    rules = capa.rules.RuleSet(
        [
            # this rule should match on a function (0x4073F0)
@@ -153,16 +156,13 @@ def test_match_across_scopes_file_function(sample_9324d1a8ae37a36ae560c37448c970
            ),
        ]
    )
-    extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
-        sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
-    )
-    capabilities, meta = capa.main.find_capabilities(rules, extractor)
+    capabilities, meta = capa.main.find_capabilities(rules, z9324d_extractor)
    assert "install service" in capabilities
    assert ".text section" in capabilities
    assert ".text section and install service" in capabilities


-def test_match_across_scopes(sample_9324d1a8ae37a36ae560c37448c9705a):
+def test_match_across_scopes(z9324d_extractor):
    rules = capa.rules.RuleSet(
        [
            # this rule should match on a basic block (including at least 0x403685)
@@ -218,16 +218,13 @@ def test_match_across_scopes(sample_9324d1a8ae37a36ae560c37448c9705a):
            ),
        ]
    )
-    extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
-        sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path
-    )
-    capabilities, meta = capa.main.find_capabilities(rules, extractor)
+    capabilities, meta = capa.main.find_capabilities(rules, z9324d_extractor)
    assert "tight loop" in capabilities
    assert "kill thread loop" in capabilities
    assert "kill thread program" in capabilities


-def test_subscope_bb_rules(sample_9324d1a8ae37a36ae560c37448c9705a):
+def test_subscope_bb_rules(z9324d_extractor):
    rules = capa.rules.RuleSet(
        [
            capa.rules.Rule.from_yaml(
@@ -247,14 +244,11 @@ def test_subscope_bb_rules(sample_9324d1a8ae37a36ae560c37448c9705a):
        ]
    )
    # tight loop at 0x403685
-    extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
-        sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
-    )
-    capabilities, meta = capa.main.find_capabilities(rules, extractor)
+    capabilities, meta = capa.main.find_capabilities(rules, z9324d_extractor)
    assert "test rule" in capabilities


-def test_byte_matching(sample_9324d1a8ae37a36ae560c37448c9705a):
+def test_byte_matching(z9324d_extractor):
    rules = capa.rules.RuleSet(
        [
            capa.rules.Rule.from_yaml(
@@ -272,15 +266,11 @@ def test_byte_matching(sample_9324d1a8ae37a36ae560c37448c9705a):
            )
        ]
    )
-
-    extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
-        sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
-    )
-    capabilities, meta = capa.main.find_capabilities(rules, extractor)
+    capabilities, meta = capa.main.find_capabilities(rules, z9324d_extractor)
    assert "byte match test" in capabilities


-def test_count_bb(sample_9324d1a8ae37a36ae560c37448c9705a):
+def test_count_bb(z9324d_extractor):
    rules = capa.rules.RuleSet(
        [
            capa.rules.Rule.from_yaml(
@@ -299,9 +289,5 @@ def test_count_bb(sample_9324d1a8ae37a36ae560c37448c9705a):
            )
        ]
    )
-
-    extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
-        sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
-    )
-    capabilities, meta = capa.main.find_capabilities(rules, extractor)
+    capabilities, meta = capa.main.find_capabilities(rules, z9324d_extractor)
    assert "count bb" in capabilities
--- a/tests/test_viv_features.py
+++ b/tests/test_viv_features.py
@@ -12,12 +12,6 @@ import capa.main
 import capa.features.extractors.viv


-@lru_cache()
-def get_viv_extractor(path):
-    vw = capa.main.get_workspace(path, "auto", should_save=True)
-    return capa.features.extractors.viv.VivisectFeatureExtractor(vw, path)
-
-
@parametrize(
    "sample,scope,feature,expected", FEATURE_PRESENCE_TESTS, indirect=["sample", "scope"],
 )