diff --git a/CHANGELOG.md b/CHANGELOG.md index ff8c8861..9baf581f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,6 +42,8 @@ - fix: rules/__init__.py: duplicate bytes_features line @williballenthin #3027 +- fix: EXTENSIONS_DYNAMIC has inconsistent leading dots @williballenthin #3028 + ### capa Explorer Web ### capa Explorer IDA Pro plugin diff --git a/capa/helpers.py b/capa/helpers.py index 45639da8..17dc2e13 100644 --- a/capa/helpers.py +++ b/capa/helpers.py @@ -58,16 +58,16 @@ from capa.features.common import ( Format, ) -EXTENSIONS_SHELLCODE_32 = ("sc32", "raw32") -EXTENSIONS_SHELLCODE_64 = ("sc64", "raw64") +EXTENSIONS_SHELLCODE_32 = (".sc32", ".raw32") +EXTENSIONS_SHELLCODE_64 = (".sc64", ".raw64") # CAPE (.json, .json_, .json.gz) # DRAKVUF (.log, .log.gz) # VMRay (.zip) -EXTENSIONS_DYNAMIC = ("json", "json_", "json.gz", "log", ".log.gz", ".zip") -EXTENSIONS_BINEXPORT2 = ("BinExport", "BinExport2") -EXTENSIONS_ELF = "elf_" -EXTENSIONS_FREEZE = "frz" -EXTENSIONS_BINJA_DB = "bndb" +EXTENSIONS_DYNAMIC = (".json", ".json_", ".json.gz", ".log", ".log.gz", ".zip") +EXTENSIONS_BINEXPORT2 = (".BinExport", ".BinExport2") +EXTENSIONS_ELF = ".elf_" +EXTENSIONS_FREEZE = ".frz" +EXTENSIONS_BINJA_DB = ".bndb" logger = logging.getLogger("capa") @@ -197,7 +197,7 @@ def load_one_jsonl_from_path(jsonl_path: Path): def get_format_from_report(sample: Path) -> str: - if sample.name.endswith((".log", "log.gz")): + if sample.name.endswith((".log", ".log.gz")): line = load_one_jsonl_from_path(sample) if "Plugin" in line: return FORMAT_DRAKVUF @@ -207,7 +207,7 @@ def get_format_from_report(sample: Path) -> str: if "logs/summary_v2.json" in namelist and "logs/flog.xml" in namelist: # assume VMRay zipfile at a minimum has these files return FORMAT_VMRAY - elif sample.name.endswith(("json", "json_", "json.gz")): + elif sample.name.endswith((".json", ".json_", ".json.gz")): report = load_json_from_path(sample) if "CAPE" in report: return FORMAT_CAPE diff --git a/tests/test_helpers.py b/tests/test_helpers.py index f40b2b37..26cb9d6c 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -20,6 +20,15 @@ from pathlib import Path import pytest import capa.helpers +from capa.helpers import ( + EXTENSIONS_ELF, + EXTENSIONS_FREEZE, + EXTENSIONS_DYNAMIC, + EXTENSIONS_BINJA_DB, + EXTENSIONS_BINEXPORT2, + EXTENSIONS_SHELLCODE_32, + EXTENSIONS_SHELLCODE_64, +) from capa.features.extractors import helpers CD = Path(__file__).resolve().parent @@ -106,3 +115,32 @@ def test_load_one_jsonl_from_path_empty_raises(tmp_path): p.write_bytes(b"") with pytest.raises(StopIteration): capa.helpers.load_one_jsonl_from_path(p) + + +def test_extensions_dot_prefix(): + for ext_group in ( + EXTENSIONS_SHELLCODE_32, + EXTENSIONS_SHELLCODE_64, + EXTENSIONS_DYNAMIC, + EXTENSIONS_BINEXPORT2, + (EXTENSIONS_ELF,), + (EXTENSIONS_FREEZE,), + (EXTENSIONS_BINJA_DB,), + ): + for ext in ext_group: + assert ext.startswith("."), f"extension {ext!r} must start with a dot" + + assert Path("sample.log").name.endswith(EXTENSIONS_DYNAMIC) + assert not Path("dialog").name.endswith(EXTENSIONS_DYNAMIC) + assert not Path("catalog").name.endswith(EXTENSIONS_DYNAMIC) + assert Path("report.json").name.endswith(EXTENSIONS_DYNAMIC) + assert not Path("notajson").name.endswith(EXTENSIONS_DYNAMIC) + assert Path("sample.sc32").name.endswith(EXTENSIONS_SHELLCODE_32) + assert Path("sample.raw32").name.endswith(EXTENSIONS_SHELLCODE_32) + assert Path("sample.sc64").name.endswith(EXTENSIONS_SHELLCODE_64) + assert Path("sample.raw64").name.endswith(EXTENSIONS_SHELLCODE_64) + assert Path("sample.BinExport").name.endswith(EXTENSIONS_BINEXPORT2) + assert Path("sample.BinExport2").name.endswith(EXTENSIONS_BINEXPORT2) + assert Path("sample.elf_").name.endswith(EXTENSIONS_ELF) + assert Path("sample.frz").name.endswith(EXTENSIONS_FREEZE) + assert Path("sample.bndb").name.endswith(EXTENSIONS_BINJA_DB)