diff --git a/capa/helpers.py b/capa/helpers.py index daa51aa1..b6f947fc 100644 --- a/capa/helpers.py +++ b/capa/helpers.py @@ -52,15 +52,15 @@ def assert_never(value) -> NoReturn: assert False, f"Unhandled value: {value} ({type(value).__name__})" -def get_format_from_extension(sample: str) -> str: - if sample.endswith(EXTENSIONS_SHELLCODE_32): +def get_format_from_extension(sample: Path) -> str: + if sample.name.endswith(EXTENSIONS_SHELLCODE_32): return FORMAT_SC32 - elif sample.endswith(EXTENSIONS_SHELLCODE_64): + elif sample.name.endswith(EXTENSIONS_SHELLCODE_64): return FORMAT_SC64 return FORMAT_UNKNOWN -def get_auto_format(path: str) -> str: +def get_auto_format(path: Path) -> str: format_ = get_format(path) if format_ == FORMAT_UNKNOWN: format_ = get_format_from_extension(path) @@ -69,17 +69,16 @@ def get_auto_format(path: str) -> str: return format_ -def get_format(sample: str) -> str: +def get_format(sample: Path) -> str: # imported locally to avoid import cycle from capa.features.extractors.common import extract_format from capa.features.extractors.dnfile_ import DnfileFeatureExtractor - with open(sample, "rb") as f: - buf = f.read() + buf = sample.read_bytes() for feature, _ in extract_format(buf): if feature == Format(FORMAT_PE): - dnfile_extractor = DnfileFeatureExtractor(sample) + dnfile_extractor = DnfileFeatureExtractor(str(sample)) if dnfile_extractor.is_dotnet_file(): feature = Format(FORMAT_DOTNET) diff --git a/capa/ida/helpers.py b/capa/ida/helpers.py index 64dcb84d..b8f1b2d2 100644 --- a/capa/ida/helpers.py +++ b/capa/ida/helpers.py @@ -157,7 +157,7 @@ def collect_metadata(rules: List[Path]): arch=arch, os=os, extractor="ida", - rules=tuple(str(r.resolve().absolute()) for r in rules), + rules=tuple(r.resolve().absolute().as_posix() for r in rules), base_address=capa.features.freeze.Address.from_capa(idaapi.get_imagebase()), layout=rdoc.Layout( functions=tuple() diff --git a/capa/main.py b/capa/main.py index ef665ac5..6024c169 100644 --- a/capa/main.py +++ b/capa/main.py @@ -364,26 +364,23 @@ def has_file_limitation(rules: RuleSet, capabilities: MatchResults, is_standalon return False -def is_supported_format(sample: str) -> bool: +def is_supported_format(sample: Path) -> bool: """ Return if this is a supported file based on magic header values """ - with open(sample, "rb") as f: - taste = f.read(0x100) + taste = sample.open("rb").read(0x100) return len(list(capa.features.extractors.common.extract_format(taste))) == 1 -def is_supported_arch(sample: str) -> bool: - with open(sample, "rb") as f: - buf = f.read() +def is_supported_arch(sample: Path) -> bool: + buf = sample.read_bytes() return len(list(capa.features.extractors.common.extract_arch(buf))) == 1 -def get_arch(sample: str) -> str: - with open(sample, "rb") as f: - buf = f.read() +def get_arch(sample: Path) -> str: + buf = sample.read_bytes() for feature, _ in capa.features.extractors.common.extract_arch(buf): assert isinstance(feature.value, str) @@ -392,16 +389,14 @@ def get_arch(sample: str) -> str: return "unknown" -def is_supported_os(sample: str) -> bool: - with open(sample, "rb") as f: - buf = f.read() +def is_supported_os(sample: Path) -> bool: + buf = sample.read_bytes() return len(list(capa.features.extractors.common.extract_os(buf))) == 1 -def get_os(sample: str) -> str: - with open(sample, "rb") as f: - buf = f.read() +def get_os(sample: Path) -> str: + buf = sample.read_bytes() for feature, _ in capa.features.extractors.common.extract_os(buf): assert isinstance(feature.value, str) @@ -463,7 +458,7 @@ def get_default_signatures() -> List[Path]: return ret -def get_workspace(path, format_, sigpaths: List[Path]): +def get_workspace(path: Path, format_: str, sigpaths: List[Path]): """ load the program at the given path into a vivisect workspace using the given format. also apply the given FLIRT signatures. @@ -490,18 +485,18 @@ def get_workspace(path, format_, sigpaths: List[Path]): raise UnsupportedFormatError() # don't analyze, so that we can add our Flirt function analyzer first. - vw = viv_utils.getWorkspace(path, analyze=False, should_save=False) + vw = viv_utils.getWorkspace(str(path), analyze=False, should_save=False) elif format_ in {FORMAT_PE, FORMAT_ELF}: - vw = viv_utils.getWorkspace(path, analyze=False, should_save=False) + vw = viv_utils.getWorkspace(str(path), analyze=False, should_save=False) elif format_ == FORMAT_SC32: # these are not analyzed nor saved. - vw = viv_utils.getShellcodeWorkspaceFromFile(path, arch="i386", analyze=False) + vw = viv_utils.getShellcodeWorkspaceFromFile(str(path), arch="i386", analyze=False) elif format_ == FORMAT_SC64: - vw = viv_utils.getShellcodeWorkspaceFromFile(path, arch="amd64", analyze=False) + vw = viv_utils.getShellcodeWorkspaceFromFile(str(path), arch="amd64", analyze=False) else: raise ValueError("unexpected format: " + format_) - viv_utils.flirt.register_flirt_signature_analyzers(vw, [s.as_posix() for s in sigpaths]) + viv_utils.flirt.register_flirt_signature_analyzers(vw, [str(s) for s in sigpaths]) vw.analyze() @@ -511,7 +506,7 @@ def get_workspace(path, format_, sigpaths: List[Path]): # TODO get_extractors -> List[FeatureExtractor]? def get_extractor( - path: str, + path: Path, format_: str, os_: str, backend: str, @@ -538,7 +533,7 @@ def get_extractor( if format_ == FORMAT_DOTNET: import capa.features.extractors.dnfile.extractor - return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path) + return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(str(path)) elif backend == BACKEND_BINJA: from capa.features.extractors.binja.find_binja_api import find_binja_path @@ -561,7 +556,7 @@ def get_extractor( import capa.features.extractors.binja.extractor with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress): - bv: BinaryView = BinaryViewType.get_view_of_file(path) + bv: BinaryView = BinaryViewType.get_view_of_file(str(path)) if bv is None: raise RuntimeError(f"Binary Ninja cannot open file {path}") @@ -587,18 +582,18 @@ def get_extractor( return capa.features.extractors.viv.extractor.VivisectFeatureExtractor(vw, path, os_) -def get_file_extractors(sample: str, format_: str) -> List[FeatureExtractor]: +def get_file_extractors(sample: Path, format_: str) -> List[FeatureExtractor]: file_extractors: List[FeatureExtractor] = list() if format_ == FORMAT_PE: - file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(sample)) + file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(str(sample))) elif format_ == FORMAT_DOTNET: - file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(sample)) - file_extractors.append(capa.features.extractors.dnfile_.DnfileFeatureExtractor(sample)) + file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(str(sample))) + file_extractors.append(capa.features.extractors.dnfile_.DnfileFeatureExtractor(str(sample))) elif format_ == capa.features.extractors.common.FORMAT_ELF: - file_extractors.append(capa.features.extractors.elffile.ElfFeatureExtractor(sample)) + file_extractors.append(capa.features.extractors.elffile.ElfFeatureExtractor(str(sample))) return file_extractors @@ -733,7 +728,7 @@ def get_signatures(sigs_path: Path) -> List[Path]: def collect_metadata( argv: List[str], - sample_path: str, + sample_path: Path, format_: str, os_: str, rules_path: List[Path], @@ -743,7 +738,7 @@ def collect_metadata( sha1 = hashlib.sha1() sha256 = hashlib.sha256() - buf = Path(sample_path).read_bytes() + buf = sample_path.read_bytes() md5.update(buf) sha1.update(buf) @@ -762,7 +757,7 @@ def collect_metadata( md5=md5.hexdigest(), sha1=sha1.hexdigest(), sha256=sha256.hexdigest(), - path=Path(sample_path).resolve().as_posix(), + path=sample_path.resolve().absolute().as_posix(), ), analysis=rdoc.Analysis( format=format_, @@ -1008,6 +1003,9 @@ def handle_common_args(args): else: raise RuntimeError("unexpected --color value: " + args.color) + if hasattr(args, "sample"): + args.sample = Path(args.sample) + if hasattr(args, "rules"): rules_paths: List[Path] = [] @@ -1116,7 +1114,7 @@ def main(argv=None): return ret try: - _ = get_file_taste(Path(args.sample)) + _ = get_file_taste(args.sample) except IOError as e: # per our research there's not a programmatic way to render the IOError with non-ASCII filename unless we # handle the IOError separately and reach into the args diff --git a/capa/render/proto/__init__.py b/capa/render/proto/__init__.py index 4bbe66f8..cd2251ec 100644 --- a/capa/render/proto/__init__.py +++ b/capa/render/proto/__init__.py @@ -24,15 +24,10 @@ $ protoc.exe --python_out=. --mypy_out=. (e.g. capa/render/proto Alternatively, --pyi_out=. can be used to generate a Python Interface file that supports development """ -import sys -import json -import argparse import datetime from typing import Any, Dict, Union -from pathlib import Path import google.protobuf.json_format -from google.protobuf.json_format import MessageToJson import capa.rules import capa.features.freeze as frz diff --git a/scripts/bulk-process.py b/scripts/bulk-process.py index 7f71d067..29e83bf0 100644 --- a/scripts/bulk-process.py +++ b/scripts/bulk-process.py @@ -170,7 +170,7 @@ def main(argv=None): samples = [] for file in Path(args.input).rglob("*"): - samples.append(file.as_posix()) + samples.append(file) def pmap(f, args, parallelism=multiprocessing.cpu_count()): """apply the given function f to the given args using subprocesses""" @@ -205,7 +205,7 @@ def main(argv=None): if result["status"] == "error": logger.warning(result["error"]) elif result["status"] == "ok": - results[result["path"]] = rd.ResultDocument.parse_obj(result["ok"]).json(exclude_none=True) + results[result["path"].as_posix()] = rd.ResultDocument.parse_obj(result["ok"]).json(exclude_none=True) else: raise ValueError(f"unexpected status: {result['status']}") diff --git a/scripts/capa_as_library.py b/scripts/capa_as_library.py index 57e32b2f..f0a4f5cc 100644 --- a/scripts/capa_as_library.py +++ b/scripts/capa_as_library.py @@ -161,7 +161,7 @@ def render_dictionary(doc: rd.ResultDocument) -> Dict[str, Any]: # ==== render dictionary helpers -def capa_details(rules_path, file_path, output_format="dictionary"): +def capa_details(rules_path: Path, file_path: Path, output_format="dictionary"): # load rules from disk rules = capa.main.get_rules([rules_path]) @@ -210,5 +210,5 @@ if __name__ == "__main__": args = parser.parse_args() if args.rules != RULES_PATH: args.rules = Path(args.rules) - print(capa_details(args.rules, args.file, args.output)) + print(capa_details(args.rules, Path(args.file), args.output)) sys.exit(0) diff --git a/scripts/import-to-bn.py b/scripts/import-to-bn.py index 8fe8da6e..04662324 100644 --- a/scripts/import-to-bn.py +++ b/scripts/import-to-bn.py @@ -51,13 +51,13 @@ def load_analysis(bv): binaryninja.log_info(f"dirname: {dirname}\nshortname: {shortname}\n") js_path = path = dirname / (shortname + ".js") json_path = dirname / (shortname + ".json") - if os.access(js_path.as_posix(), os.R_OK): + if os.access(str(js_path), os.R_OK): path = js_path - elif os.access(json_path.as_posix(), os.R_OK): + elif os.access(str(json_path), os.R_OK): path = json_path else: path = binaryninja.interaction.get_open_filename_input("capa report:", "JSON (*.js *.json);;All Files (*)") - if not path or not os.access(path.as_posix(), os.R_OK): + if not path or not os.access(str(path), os.R_OK): binaryninja.log_error("Invalid filename.") return 0 binaryninja.log_info(f"Using capa file {path}") diff --git a/scripts/lint.py b/scripts/lint.py index dae18c2b..e289b356 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -296,14 +296,14 @@ DEFAULT_SIGNATURES = capa.main.get_default_signatures() def get_sample_capabilities(ctx: Context, path: Path) -> Set[str]: - nice_path = path.resolve().absolute().as_posix() + nice_path = path.resolve().absolute() if path in ctx.capabilities_by_sample: logger.debug("found cached results: %s: %d capabilities", nice_path, len(ctx.capabilities_by_sample[path])) return ctx.capabilities_by_sample[path] - if nice_path.endswith(capa.helpers.EXTENSIONS_SHELLCODE_32): + if nice_path.name.endswith(capa.helpers.EXTENSIONS_SHELLCODE_32): format_ = "sc32" - elif nice_path.endswith(capa.helpers.EXTENSIONS_SHELLCODE_64): + elif nice_path.name.endswith(capa.helpers.EXTENSIONS_SHELLCODE_64): format_ = "sc64" else: format_ = capa.main.get_auto_format(nice_path) @@ -356,7 +356,7 @@ class DoesntMatchExample(Lint): try: capabilities = get_sample_capabilities(ctx, path) except Exception as e: - logger.error("failed to extract capabilities: %s %s %s", rule.name, str(path), e, exc_info=True) + logger.error("failed to extract capabilities: %s %s %s", rule.name, path, e, exc_info=True) return True if rule.name not in capabilities: @@ -917,12 +917,12 @@ def main(argv=None): if argv is None: argv = sys.argv[1:] - samples_path = (Path(__file__).resolve().parent.parent / "tests" / "data").as_posix() + default_samples_path = str(Path(__file__).resolve().parent.parent / "tests" / "data") parser = argparse.ArgumentParser(description="Lint capa rules.") capa.main.install_common_args(parser, wanted={"tag"}) parser.add_argument("rules", type=str, action="append", help="Path to rules") - parser.add_argument("--samples", type=str, default=samples_path, help="Path to samples") + parser.add_argument("--samples", type=str, default=default_samples_path, help="Path to samples") parser.add_argument( "--thorough", action="store_true", @@ -953,12 +953,12 @@ def main(argv=None): return -1 logger.info("collecting potentially referenced samples") - samplePath = Path(args.samples) - if not samplePath.exists(): - logger.error("samples path %s does not exist", samplePath) + samples_path = Path(args.samples) + if not samples_path.exists(): + logger.error("samples path %s does not exist", Path(samples_path)) return -1 - samples = collect_samples(samplePath) + samples = collect_samples(Path(samples_path)) ctx = Context(samples=samples, rules=rules, is_thorough=args.thorough) diff --git a/scripts/setup-linter-dependencies.py b/scripts/setup-linter-dependencies.py index a6af52e9..a59d32f0 100644 --- a/scripts/setup-linter-dependencies.py +++ b/scripts/setup-linter-dependencies.py @@ -187,7 +187,7 @@ if __name__ == "__main__": "--output", "-o", type=str, - default=(Path(__file__).resolve().parent / "linter-data.json").as_posix(), + default=str(Path(__file__).resolve().parent / "linter-data.json"), help="Path to output file (lint.py will be looking for linter-data.json)", ) main(parser.parse_args(args=argv[1:])) diff --git a/tests/fixtures.py b/tests/fixtures.py index b3109005..a8024491 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -89,7 +89,7 @@ def xfail(condition, reason=None): # need to limit cache size so GitHub Actions doesn't run out of memory, see #545 @lru_cache(maxsize=1) -def get_viv_extractor(path): +def get_viv_extractor(path: Path): import capa.main import capa.features.extractors.viv.extractor @@ -101,9 +101,9 @@ def get_viv_extractor(path): CD.parent / "sigs" / "3_flare_common_libs.sig", ] - if "raw32" in path: + if "raw32" in path.name: vw = capa.main.get_workspace(path, "sc32", sigpaths=sigpaths) - elif "raw64" in path: + elif "raw64" in path.name: vw = capa.main.get_workspace(path, "sc64", sigpaths=sigpaths) else: vw = capa.main.get_workspace(path, FORMAT_AUTO, sigpaths=sigpaths) @@ -113,14 +113,14 @@ def get_viv_extractor(path): return extractor -def fixup_viv(path, extractor): +def fixup_viv(path: Path, extractor): """ vivisect fixups to overcome differences between backends """ - if "3b13b" in path: + if "3b13b" in path.name: # vivisect only recognizes calling thunk function at 0x10001573 extractor.vw.makeFunction(0x10006860) - if "294b8d" in path: + if "294b8d" in path.name: # see vivisect/#561 extractor.vw.makeFunction(0x404970) @@ -129,10 +129,10 @@ def fixup_viv(path, extractor): def get_pefile_extractor(path): import capa.features.extractors.pefile - extractor = capa.features.extractors.pefile.PefileFeatureExtractor(path) + extractor = capa.features.extractors.pefile.PefileFeatureExtractor(str(path)) # overload the extractor so that the fixture exposes `extractor.path` - setattr(extractor, "path", path) + setattr(extractor, "path", str(path)) return extractor @@ -140,10 +140,10 @@ def get_pefile_extractor(path): def get_dotnetfile_extractor(path): import capa.features.extractors.dotnetfile - extractor = capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor(path) + extractor = capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor(str(path)) # overload the extractor so that the fixture exposes `extractor.path` - setattr(extractor, "path", path) + setattr(extractor, "path", str(path)) return extractor @@ -152,10 +152,10 @@ def get_dotnetfile_extractor(path): def get_dnfile_extractor(path): import capa.features.extractors.dnfile.extractor - extractor = capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path) + extractor = capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(str(path)) # overload the extractor so that the fixture exposes `extractor.path` - setattr(extractor, "path", path) + setattr(extractor, "path", str(path)) return extractor @@ -234,81 +234,81 @@ def extract_instruction_features(extractor, fh, bbh, ih) -> Dict[Feature, Set[Ad # note: to reduce the testing time it's recommended to reuse already existing test samples, if possible def get_data_path_by_name(name): if name == "mimikatz": - return (CD / "data" / "mimikatz.exe_").as_posix() + return CD / "data" / "mimikatz.exe_" elif name == "kernel32": - return (CD / "data" / "kernel32.dll_").as_posix() + return CD / "data" / "kernel32.dll_" elif name == "kernel32-64": - return (CD / "data" / "kernel32-64.dll_").as_posix() + return CD / "data" / "kernel32-64.dll_" elif name == "pma01-01": - return (CD / "data" / "Practical Malware Analysis Lab 01-01.dll_").as_posix() + return CD / "data" / "Practical Malware Analysis Lab 01-01.dll_" elif name == "pma01-01-rd": - return (CD / "data" / "rd" / "Practical Malware Analysis Lab 01-01.dll_.json").as_posix() + return CD / "data" / "rd" / "Practical Malware Analysis Lab 01-01.dll_.json" elif name == "pma12-04": - return (CD / "data" / "Practical Malware Analysis Lab 12-04.exe_").as_posix() + return CD / "data" / "Practical Malware Analysis Lab 12-04.exe_" elif name == "pma16-01": - return (CD / "data" / "Practical Malware Analysis Lab 16-01.exe_").as_posix() + return CD / "data" / "Practical Malware Analysis Lab 16-01.exe_" elif name == "pma21-01": - return (CD / "data" / "Practical Malware Analysis Lab 21-01.exe_").as_posix() + return CD / "data" / "Practical Malware Analysis Lab 21-01.exe_" elif name == "al-khaser x86": - return (CD / "data" / "al-khaser_x86.exe_").as_posix() + return CD / "data" / "al-khaser_x86.exe_" elif name == "al-khaser x64": - return (CD / "data" / "al-khaser_x64.exe_").as_posix() + return CD / "data" / "al-khaser_x64.exe_" elif name.startswith("39c05"): - return (CD / "data" / "39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.dll_").as_posix() + return CD / "data" / "39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.dll_" elif name.startswith("499c2"): - return (CD / "data" / "499c2a85f6e8142c3f48d4251c9c7cd6.raw32").as_posix() + return CD / "data" / "499c2a85f6e8142c3f48d4251c9c7cd6.raw32" elif name.startswith("9324d"): - return (CD / "data" / "9324d1a8ae37a36ae560c37448c9705a.exe_").as_posix() + return CD / "data" / "9324d1a8ae37a36ae560c37448c9705a.exe_" elif name.startswith("a1982"): - return (CD / "data" / "a198216798ca38f280dc413f8c57f2c2.exe_").as_posix() + return CD / "data" / "a198216798ca38f280dc413f8c57f2c2.exe_" elif name.startswith("a933a"): - return (CD / "data" / "a933a1a402775cfa94b6bee0963f4b46.dll_").as_posix() + return CD / "data" / "a933a1a402775cfa94b6bee0963f4b46.dll_" elif name.startswith("bfb9b"): - return (CD / "data" / "bfb9b5391a13d0afd787e87ab90f14f5.dll_").as_posix() + return CD / "data" / "bfb9b5391a13d0afd787e87ab90f14f5.dll_" elif name.startswith("c9188"): - return (CD / "data" / "c91887d861d9bd4a5872249b641bc9f9.exe_").as_posix() + return CD / "data" / "c91887d861d9bd4a5872249b641bc9f9.exe_" elif name.startswith("64d9f"): - return (CD / "data" / "64d9f7d96b99467f36e22fada623c3bb.dll_").as_posix() + return CD / "data" / "64d9f7d96b99467f36e22fada623c3bb.dll_" elif name.startswith("82bf6"): - return (CD / "data" / "82BF6347ACF15E5D883715DC289D8A2B.exe_").as_posix() + return CD / "data" / "82BF6347ACF15E5D883715DC289D8A2B.exe_" elif name.startswith("pingtaest"): - return (CD / "data" / "ping_täst.exe_").as_posix() + return CD / "data" / "ping_täst.exe_" elif name.startswith("77329"): - return (CD / "data" / "773290480d5445f11d3dc1b800728966.exe_").as_posix() + return CD / "data" / "773290480d5445f11d3dc1b800728966.exe_" elif name.startswith("3b13b"): - return (CD / "data" / "3b13b6f1d7cd14dc4a097a12e2e505c0a4cff495262261e2bfc991df238b9b04.dll_").as_posix() + return CD / "data" / "3b13b6f1d7cd14dc4a097a12e2e505c0a4cff495262261e2bfc991df238b9b04.dll_" elif name == "7351f.elf": - return (CD / "data" / "7351f8a40c5450557b24622417fc478d.elf_").as_posix() + return CD / "data" / "7351f8a40c5450557b24622417fc478d.elf_" elif name.startswith("79abd"): - return (CD / "data" / "79abd17391adc6251ecdc58d13d76baf.dll_").as_posix() + return CD / "data" / "79abd17391adc6251ecdc58d13d76baf.dll_" elif name.startswith("946a9"): - return (CD / "data" / "946a99f36a46d335dec080d9a4371940.dll_").as_posix() + return CD / "data" / "946a99f36a46d335dec080d9a4371940.dll_" elif name.startswith("2f7f5f"): - return (CD / "data" / "2f7f5fb5de175e770d7eae87666f9831.elf_").as_posix() + return CD / "data" / "2f7f5fb5de175e770d7eae87666f9831.elf_" elif name.startswith("b9f5b"): - return (CD / "data" / "b9f5bd514485fb06da39beff051b9fdc.exe_").as_posix() + return CD / "data" / "b9f5bd514485fb06da39beff051b9fdc.exe_" elif name.startswith("mixed-mode-64"): - return (DNFILE_TESTFILES / "mixed-mode" / "ModuleCode" / "bin" / "ModuleCode_amd64.exe").as_posix() + return DNFILE_TESTFILES / "mixed-mode" / "ModuleCode" / "bin" / "ModuleCode_amd64.exe" elif name.startswith("hello-world"): - return (DNFILE_TESTFILES / "hello-world" / "hello-world.exe").as_posix() + return DNFILE_TESTFILES / "hello-world" / "hello-world.exe" elif name.startswith("_1c444"): - return (DOTNET_DIR / "1c444ebeba24dcba8628b7dfe5fec7c6.exe_").as_posix() + return DOTNET_DIR / "1c444ebeba24dcba8628b7dfe5fec7c6.exe_" elif name.startswith("_387f15"): - return (DOTNET_DIR / "387f15043f0198fd3a637b0758c2b6dde9ead795c3ed70803426fc355731b173.dll_").as_posix() + return DOTNET_DIR / "387f15043f0198fd3a637b0758c2b6dde9ead795c3ed70803426fc355731b173.dll_" elif name.startswith("_692f"): - return (DOTNET_DIR / "692f7fd6d198e804d6af98eb9e390d61.exe_").as_posix() + return DOTNET_DIR / "692f7fd6d198e804d6af98eb9e390d61.exe_" elif name.startswith("_0953c"): - return (CD / "data" / "0953cc3b77ed2974b09e3a00708f88de931d681e2d0cb64afbaf714610beabe6.exe_").as_posix() + return CD / "data" / "0953cc3b77ed2974b09e3a00708f88de931d681e2d0cb64afbaf714610beabe6.exe_" elif name.startswith("_039a6"): - return (CD / "data" / "039a6336d0802a2255669e6867a5679c7eb83313dbc61fb1c7232147379bd304.exe_").as_posix() + return CD / "data" / "039a6336d0802a2255669e6867a5679c7eb83313dbc61fb1c7232147379bd304.exe_" elif name.startswith("b5f052"): - return (CD / "data" / "b5f0524e69b3a3cf636c7ac366ca57bf5e3a8fdc8a9f01caf196c611a7918a87.elf_").as_posix() + return CD / "data" / "b5f0524e69b3a3cf636c7ac366ca57bf5e3a8fdc8a9f01caf196c611a7918a87.elf_" elif name.startswith("bf7a9c"): - return (CD / "data" / "bf7a9c8bdfa6d47e01ad2b056264acc3fd90cf43fe0ed8deec93ab46b47d76cb.elf_").as_posix() + return CD / "data" / "bf7a9c8bdfa6d47e01ad2b056264acc3fd90cf43fe0ed8deec93ab46b47d76cb.elf_" elif name.startswith("294b8d"): - return (CD / "data" / "294b8db1f2702b60fb2e42fdc50c2cee6a5046112da9a5703a548a4fa50477bc.elf_").as_posix() + return CD / "data" / "294b8db1f2702b60fb2e42fdc50c2cee6a5046112da9a5703a548a4fa50477bc.elf_" elif name.startswith("2bf18d"): - return (CD / "data" / "2bf18d0403677378adad9001b1243211.elf_").as_posix() + return CD / "data" / "2bf18d0403677378adad9001b1243211.elf_" else: raise ValueError(f"unexpected sample fixture: {name}") @@ -1048,6 +1048,7 @@ FEATURE_COUNT_TESTS_DOTNET = [ def do_test_feature_presence(get_extractor, sample, scope, feature, expected): + print(sample) extractor = get_extractor(sample) features = scope(extractor) if expected: @@ -1064,10 +1065,10 @@ def do_test_feature_count(get_extractor, sample, scope, feature, expected): assert len(features[feature]) == expected, msg -def get_extractor(path): +def get_extractor(path: Path): extractor = get_viv_extractor(path) # overload the extractor so that the fixture exposes `extractor.path` - setattr(extractor, "path", path) + setattr(extractor, "path", str(path)) return extractor @@ -1182,29 +1183,29 @@ def get_result_doc(path): @pytest.fixture def pma0101_rd(): - return get_result_doc((CD / "data" / "rd" / "Practical Malware Analysis Lab 01-01.dll_.json").as_posix()) + return get_result_doc(CD / "data" / "rd" / "Practical Malware Analysis Lab 01-01.dll_.json") @pytest.fixture def dotnet_1c444e_rd(): - return get_result_doc((CD / "data" / "rd" / "1c444ebeba24dcba8628b7dfe5fec7c6.exe_.json").as_posix()) + return get_result_doc(CD / "data" / "rd" / "1c444ebeba24dcba8628b7dfe5fec7c6.exe_.json") @pytest.fixture def a3f3bbc_rd(): - return get_result_doc((CD / "data" / "rd" / "3f3bbcf8fd90bdcdcdc5494314ed4225.exe_.json").as_posix()) + return get_result_doc(CD / "data" / "rd" / "3f3bbcf8fd90bdcdcdc5494314ed4225.exe_.json") @pytest.fixture def al_khaserx86_rd(): - return get_result_doc((CD / "data" / "rd" / "al-khaser_x86.exe_.json").as_posix()) + return get_result_doc(CD / "data" / "rd" / "al-khaser_x86.exe_.json") @pytest.fixture def al_khaserx64_rd(): - return get_result_doc((CD / "data" / "rd" / "al-khaser_x64.exe_.json").as_posix()) + return get_result_doc(CD / "data" / "rd" / "al-khaser_x64.exe_.json") @pytest.fixture def a076114_rd(): - return get_result_doc((CD / "data" / "rd" / "0761142efbda6c4b1e801223de723578.dll_.json").as_posix()) + return get_result_doc(CD / "data" / "rd" / "0761142efbda6c4b1e801223de723578.dll_.json") diff --git a/tests/test_main.py b/tests/test_main.py index d17e6e64..bf711e9a 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -426,7 +426,7 @@ def test_not_render_rules_also_matched(z9324d_extractor, capsys): def test_json_meta(capsys): - path = fixtures.get_data_path_by_name("pma01-01") + path = str(fixtures.get_data_path_by_name("pma01-01")) assert capa.main.main([path, "-j"]) == 0 std = capsys.readouterr() std_json = json.loads(std.out) @@ -470,7 +470,7 @@ def test_main_dotnet4(_039a6_dotnetfile_extractor): def test_main_rd(): - path = fixtures.get_data_path_by_name("pma01-01-rd") + path = str(fixtures.get_data_path_by_name("pma01-01-rd")) assert capa.main.main([path, "-vv"]) == 0 assert capa.main.main([path, "-v"]) == 0 assert capa.main.main([path, "-j"]) == 0 diff --git a/tests/test_pefile_features.py b/tests/test_pefile_features.py index e0b735e8..62c14f96 100644 --- a/tests/test_pefile_features.py +++ b/tests/test_pefile_features.py @@ -25,7 +25,7 @@ def test_pefile_features(sample, scope, feature, expected): if isinstance(feature, capa.features.file.FunctionName): pytest.xfail("pefile doesn't extract function names") - if ".elf" in sample: + if ".elf" in sample.name: pytest.xfail("pefile doesn't handle ELF files") fixtures.do_test_feature_presence(fixtures.get_pefile_extractor, sample, scope, feature, expected) diff --git a/tests/test_scripts.py b/tests/test_scripts.py index 6dfe9be3..496df577 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -19,19 +19,19 @@ CD = Path(__file__).resolve().parent def get_script_path(s: str): - return (CD / ".." / "scripts" / s).as_posix() + return str(CD / ".." / "scripts" / s) def get_file_path(): - return (CD / "data" / "9324d1a8ae37a36ae560c37448c9705a.exe_").as_posix() + return str(CD / "data" / "9324d1a8ae37a36ae560c37448c9705a.exe_") def get_rules_path(): - return (CD / ".." / "rules").as_posix() + return str(CD / ".." / "rules") def get_rule_path(): - return (Path(get_rules_path()) / "lib" / "allocate-memory.yml").as_posix() + return str(Path(get_rules_path()) / "lib" / "allocate-memory.yml") @pytest.mark.parametrize( @@ -63,7 +63,7 @@ def test_bulk_process(tmp_path): dest_file.write_bytes(source_file.read_bytes()) - p = run_program(get_script_path("bulk-process.py"), [t.parent.as_posix()]) + p = run_program(get_script_path("bulk-process.py"), [str(t.parent)]) assert p.returncode == 0