diff --git a/scripts/bulk-process.py b/scripts/bulk-process.py index f22c55e0..ef51a4aa 100644 --- a/scripts/bulk-process.py +++ b/scripts/bulk-process.py @@ -81,6 +81,7 @@ def get_capa_results(args): rules (capa.rules.RuleSet): the rules to match signatures (List[str]): list of file system paths to signature files format (str): the name of the sample file format + os (str): the name of the operating system path (str): the file system path to the sample to process args is a tuple because i'm not quite sure how to unpack multiple arguments using `map`. @@ -96,12 +97,12 @@ def get_capa_results(args): meta (dict): the meta analysis results capabilities (dict): the matched capabilities and their result objects """ - rules, sigpaths, format, path = args + rules, sigpaths, format, os, path = args should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None) logger.info("computing capa results for: %s", path) try: extractor = capa.main.get_extractor( - path, format, capa.main.BACKEND_VIV, sigpaths, should_save_workspace, disable_progress=True + path, format, os, capa.main.BACKEND_VIV, sigpaths, should_save_workspace, disable_progress=True ) except capa.main.UnsupportedFormatError: # i'm 100% sure if multiprocessing will reliably raise exceptions across process boundaries. @@ -127,7 +128,7 @@ def get_capa_results(args): "error": f"unexpected error: {e}", } - meta = capa.main.collect_metadata([], path, [], extractor) + meta = capa.main.collect_metadata([], path, format, os, [], extractor) capabilities, counts = capa.main.find_capabilities(rules, extractor, disable_progress=True) meta["analysis"].update(counts) meta["analysis"]["layout"] = capa.main.compute_layout(rules, extractor, capabilities) @@ -142,7 +143,7 @@ def main(argv=None): argv = sys.argv[1:] parser = argparse.ArgumentParser(description="detect capabilities in programs.") - capa.main.install_common_args(parser, wanted={"rules", "signatures"}) + capa.main.install_common_args(parser, wanted={"rules", "signatures", "format", "os"}) parser.add_argument("input", type=str, help="Path to directory of files to recursively analyze") parser.add_argument( "-n", "--parallelism", type=int, default=multiprocessing.cpu_count(), help="parallelism factor" diff --git a/scripts/capa_as_library.py b/scripts/capa_as_library.py index 884b2e94..5d97d2b6 100644 --- a/scripts/capa_as_library.py +++ b/scripts/capa_as_library.py @@ -14,6 +14,7 @@ import capa.render.default import capa.render.result_document as rd import capa.features.freeze.features as frzf from capa.engine import * +from capa.features.common import OS_AUTO # == Render dictionary helpers @@ -164,11 +165,13 @@ def capa_details(rules_path, file_path, output_format="dictionary"): rules = capa.main.get_rules([rules_path]) # extract features and find capabilities - extractor = capa.main.get_extractor(file_path, "auto", capa.main.BACKEND_VIV, [], False, disable_progress=True) + extractor = capa.main.get_extractor( + file_path, "auto", OS_AUTO, capa.main.BACKEND_VIV, [], False, disable_progress=True + ) capabilities, counts = capa.main.find_capabilities(rules, extractor, disable_progress=True) # collect metadata (used only to make rendering more complete) - meta = capa.main.collect_metadata([], file_path, rules_path, extractor) + meta = capa.main.collect_metadata([], file_path, "auto", OS_AUTO, rules_path, extractor) meta["analysis"].update(counts) meta["analysis"]["layout"] = capa.main.compute_layout(rules, extractor, capabilities) diff --git a/scripts/lint.py b/scripts/lint.py index cf56f1a8..92c7fbcf 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -45,7 +45,7 @@ import capa.engine import capa.helpers import capa.features.insn from capa.rules import Rule, RuleSet -from capa.features.common import FORMAT_PE, FORMAT_DOTNET, String, Feature, Substring +from capa.features.common import OS_AUTO, FORMAT_PE, FORMAT_DOTNET, String, Feature, Substring from capa.render.result_document import RuleMetadata logger = logging.getLogger("lint") @@ -310,7 +310,9 @@ def get_sample_capabilities(ctx: Context, path: Path) -> Set[str]: format_ = capa.main.get_auto_format(nice_path) logger.debug("analyzing sample: %s", nice_path) - extractor = capa.main.get_extractor(nice_path, format_, "", DEFAULT_SIGNATURES, False, disable_progress=True) + extractor = capa.main.get_extractor( + nice_path, format_, OS_AUTO, "", DEFAULT_SIGNATURES, False, disable_progress=True + ) capabilities, _ = capa.main.find_capabilities(ctx.rules, extractor, disable_progress=True) # mypy doesn't seem to be happy with the MatchResults type alias & set(...keys())? diff --git a/scripts/profile-time.py b/scripts/profile-time.py index 0c7f0783..9b97b767 100644 --- a/scripts/profile-time.py +++ b/scripts/profile-time.py @@ -71,7 +71,7 @@ def main(argv=None): label += " (dirty)" parser = argparse.ArgumentParser(description="Profile capa performance") - capa.main.install_common_args(parser, wanted={"format", "sample", "signatures", "rules"}) + capa.main.install_common_args(parser, wanted={"format", "os", "sample", "signatures", "rules"}) parser.add_argument("--number", type=int, default=3, help="batch size of profile collection") parser.add_argument("--repeat", type=int, default=30, help="batch count of profile collection") @@ -104,7 +104,7 @@ def main(argv=None): extractor = capa.features.freeze.load(f.read()) else: extractor = capa.main.get_extractor( - args.sample, args.format, capa.main.BACKEND_VIV, sig_paths, should_save_workspace=False + args.sample, args.format, args.os, capa.main.BACKEND_VIV, sig_paths, should_save_workspace=False ) with tqdm.tqdm(total=args.number * args.repeat) as pbar: diff --git a/scripts/show-capabilities-by-function.py b/scripts/show-capabilities-by-function.py index dbd47f8f..a7dea657 100644 --- a/scripts/show-capabilities-by-function.py +++ b/scripts/show-capabilities-by-function.py @@ -130,7 +130,7 @@ def main(argv=None): argv = sys.argv[1:] parser = argparse.ArgumentParser(description="detect capabilities in programs.") - capa.main.install_common_args(parser, wanted={"format", "backend", "sample", "signatures", "rules", "tag"}) + capa.main.install_common_args(parser, wanted={"format", "os", "backend", "sample", "signatures", "rules", "tag"}) args = parser.parse_args(args=argv) capa.main.handle_common_args(args) @@ -166,7 +166,7 @@ def main(argv=None): try: extractor = capa.main.get_extractor( - args.sample, args.format, args.backend, sig_paths, should_save_workspace + args.sample, args.format, args.os, args.backend, sig_paths, should_save_workspace ) except capa.exceptions.UnsupportedFormatError: capa.helpers.log_unsupported_format_error() @@ -175,7 +175,7 @@ def main(argv=None): capa.helpers.log_unsupported_runtime_error() return -1 - meta = capa.main.collect_metadata(argv, args.sample, args.rules, extractor) + meta = capa.main.collect_metadata(argv, args.sample, format_, args.os, args.rules, extractor) capabilities, counts = capa.main.find_capabilities(rules, extractor) meta["analysis"].update(counts) meta["analysis"]["layout"] = capa.main.compute_layout(rules, extractor, capabilities) diff --git a/scripts/show-features.py b/scripts/show-features.py index 297977d5..365a6d70 100644 --- a/scripts/show-features.py +++ b/scripts/show-features.py @@ -95,7 +95,7 @@ def main(argv=None): argv = sys.argv[1:] parser = argparse.ArgumentParser(description="Show the features that capa extracts from the given sample") - capa.main.install_common_args(parser, wanted={"format", "sample", "signatures", "backend"}) + capa.main.install_common_args(parser, wanted={"format", "os", "sample", "signatures", "backend"}) parser.add_argument("-F", "--function", type=str, help="Show features for specific function") args = parser.parse_args(args=argv) @@ -120,7 +120,7 @@ def main(argv=None): should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None) try: extractor = capa.main.get_extractor( - args.sample, args.format, args.backend, sig_paths, should_save_workspace + args.sample, args.format, args.os, args.backend, sig_paths, should_save_workspace ) except capa.exceptions.UnsupportedFormatError: capa.helpers.log_unsupported_format_error() diff --git a/tests/fixtures.py b/tests/fixtures.py index 5602f096..5fb31539 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -26,6 +26,7 @@ import capa.features.basicblock from capa.features.common import ( OS, OS_ANY, + OS_AUTO, OS_LINUX, ARCH_I386, FORMAT_PE, @@ -106,7 +107,7 @@ def get_viv_extractor(path): else: vw = capa.main.get_workspace(path, "auto", sigpaths=sigpaths) vw.saveWorkspace() - extractor = capa.features.extractors.viv.extractor.VivisectFeatureExtractor(vw, path) + extractor = capa.features.extractors.viv.extractor.VivisectFeatureExtractor(vw, path, OS_AUTO) fixup_viv(path, extractor) return extractor