diff --git a/capa/main.py b/capa/main.py index d76c6852..19fda0d4 100644 --- a/capa/main.py +++ b/capa/main.py @@ -282,6 +282,8 @@ def get_workspace(path, format, should_save=True): vw = get_shellcode_vw(path, arch="i386", should_save=should_save) elif format == "sc64": vw = get_shellcode_vw(path, arch="amd64", should_save=should_save) + else: + raise ValueError("unexpected format: " + format) logger.debug("%s", get_meta_str(vw)) return vw @@ -444,19 +446,162 @@ def collect_metadata(argv, sample_path, rules_path, format, extractor): } +def install_common_args(parser, wanted=None): + """ + register a common set of command line arguments for re-use by main & scripts. + these are things like logging/coloring/etc. + also enable callers to opt-in to common arguments, like specifying the input sample. + + this routine lets many script use the same language for cli arguments. + see `handle_common_args` to do common configuration. + + args: + parser (argparse.ArgumentParser): a parser to update in place, adding common arguments. + wanted (Set[str]): collection of arguments to opt-into, including: + - "sample": required positional argument to input file. + - "format": flag to override file format. + - "backend": flag to override analysis backend under py3. + - "rules": flag to override path to capa rules. + - "tag": flag to override/specify which rules to match. + """ + if wanted is None: + wanted = set() + + # + # common arguments that all scripts will have + # + + parser.add_argument("--version", action="version", version="%(prog)s {:s}".format(capa.version.__version__)) + parser.add_argument( + "-v", "--verbose", action="store_true", help="enable verbose result document (no effect with --json)" + ) + parser.add_argument( + "-vv", "--vverbose", action="store_true", help="enable very verbose result document (no effect with --json)" + ) + parser.add_argument("-d", "--debug", action="store_true", help="enable debugging output on STDERR") + parser.add_argument("-q", "--quiet", action="store_true", help="disable all output but errors") + parser.add_argument( + "--color", + type=str, + choices=("auto", "always", "never"), + default="auto", + help="enable ANSI color codes in results, default: only during interactive session", + ) + + # + # arguments that may be opted into: + # + # - sample + # - format + # - rules + # - tag + # + + if "sample" in wanted: + if sys.version_info >= (3, 0): + parser.add_argument( + # Python 3 str handles non-ASCII arguments correctly + "sample", + type=str, + help="path to sample to analyze", + ) + else: + parser.add_argument( + # in #328 we noticed that the sample path is not handled correctly if it contains non-ASCII characters + # https://stackoverflow.com/a/22947334/ offers a solution and decoding using getfilesystemencoding works + # in our testing, however other sources suggest `sys.stdin.encoding` (https://stackoverflow.com/q/4012571/) + "sample", + type=lambda s: s.decode(sys.getfilesystemencoding()), + help="path to sample to analyze", + ) + + if "format" in wanted: + formats = [ + ("auto", "(default) detect file type automatically"), + ("pe", "Windows PE file"), + ("sc32", "32-bit shellcode"), + ("sc64", "64-bit shellcode"), + ("freeze", "features previously frozen by capa"), + ] + format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats]) + parser.add_argument( + "-f", + "--format", + choices=[f[0] for f in formats], + default="auto", + help="select sample format, %s" % format_help, + ) + + if "backend" in wanted and sys.version_info >= (3, 0): + parser.add_argument( + "-b", + "--backend", + type=str, + help="select the backend to use", + choices=(BACKEND_VIV, BACKEND_SMDA), + default=BACKEND_VIV, + ) + + if "rules" in wanted: + parser.add_argument( + "-r", + "--rules", + type=str, + default=RULES_PATH_DEFAULT_STRING, + help="path to rule file or directory, use embedded rules by default", + ) + + if "tag" in wanted: + parser.add_argument("-t", "--tag", type=str, help="filter on rule meta field values") + + +def handle_common_args(args): + """ + handle the global config specified by `install_common_args`, + such as configuring logging/coloring/etc. + + args: + args (argparse.Namespace): parsed arguments that included at least `install_common_args` args. + """ + if args.quiet: + logging.basicConfig(level=logging.WARNING) + logging.getLogger().setLevel(logging.WARNING) + elif args.debug: + logging.basicConfig(level=logging.DEBUG) + logging.getLogger().setLevel(logging.DEBUG) + else: + logging.basicConfig(level=logging.INFO) + logging.getLogger().setLevel(logging.INFO) + + # disable vivisect-related logging, it's verbose and not relevant for capa users + set_vivisect_log_level(logging.CRITICAL) + + # py2 doesn't know about cp65001, which is a variant of utf-8 on windows + # tqdm bails when trying to render the progress bar in this setup. + # because cp65001 is utf-8, we just map that codepage to the utf-8 codec. + # see #380 and: https://stackoverflow.com/a/3259271/87207 + import codecs + + codecs.register(lambda name: codecs.lookup("utf-8") if name == "cp65001" else None) + + if args.color == "always": + colorama.init(strip=False) + elif args.color == "auto": + # colorama will detect: + # - when on Windows console, and fixup coloring, and + # - when not an interactive session, and disable coloring + # renderers should use coloring and assume it will be stripped out if necessary. + colorama.init() + elif args.color == "never": + colorama.init(strip=True) + else: + raise RuntimeError("unexpected --color value: " + args.color) + + def main(argv=None): if argv is None: argv = sys.argv[1:] - formats = [ - ("auto", "(default) detect file type automatically"), - ("pe", "Windows PE file"), - ("sc32", "32-bit shellcode"), - ("sc64", "64-bit shellcode"), - ("freeze", "features previously frozen by capa"), - ] - format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats]) - desc = "The FLARE team's open-source tool to identify capabilities in executable files." epilog = textwrap.dedent( """ @@ -489,74 +634,10 @@ def main(argv=None): parser = argparse.ArgumentParser( description=desc, epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter ) - - if sys.version_info >= (3, 0): - parser.add_argument( - # Python 3 str handles non-ASCII arguments correctly - "sample", - type=str, - help="path to sample to analyze", - ) - else: - parser.add_argument( - # in #328 we noticed that the sample path is not handled correctly if it contains non-ASCII characters - # https://stackoverflow.com/a/22947334/ offers a solution and decoding using getfilesystemencoding works - # in our testing, however other sources suggest `sys.stdin.encoding` (https://stackoverflow.com/q/4012571/) - "sample", - type=lambda s: s.decode(sys.getfilesystemencoding()), - help="path to sample to analyze", - ) - parser.add_argument("--version", action="version", version="%(prog)s {:s}".format(capa.version.__version__)) - parser.add_argument( - "-r", - "--rules", - type=str, - default=RULES_PATH_DEFAULT_STRING, - help="path to rule file or directory, use embedded rules by default", - ) - parser.add_argument( - "-f", "--format", choices=[f[0] for f in formats], default="auto", help="select sample format, %s" % format_help - ) - if sys.version_info >= (3, 0): - parser.add_argument( - "-b", - "--backend", - type=str, - help="select the backend to use", - choices=(BACKEND_VIV, BACKEND_SMDA), - default=BACKEND_VIV, - ) - parser.add_argument("-t", "--tag", type=str, help="filter on rule meta field values") + install_common_args(parser, {"sample", "format", "backend", "rules", "tag"}) parser.add_argument("-j", "--json", action="store_true", help="emit JSON instead of text") - parser.add_argument( - "-v", "--verbose", action="store_true", help="enable verbose result document (no effect with --json)" - ) - parser.add_argument( - "-vv", "--vverbose", action="store_true", help="enable very verbose result document (no effect with --json)" - ) - parser.add_argument("-d", "--debug", action="store_true", help="enable debugging output on STDERR") - parser.add_argument("-q", "--quiet", action="store_true", help="disable all output but errors") - parser.add_argument( - "--color", - type=str, - choices=("auto", "always", "never"), - default="auto", - help="enable ANSI color codes in results, default: only during interactive session", - ) args = parser.parse_args(args=argv) - - if args.quiet: - logging.basicConfig(level=logging.WARNING) - logging.getLogger().setLevel(logging.WARNING) - elif args.debug: - logging.basicConfig(level=logging.DEBUG) - logging.getLogger().setLevel(logging.DEBUG) - else: - logging.basicConfig(level=logging.INFO) - logging.getLogger().setLevel(logging.INFO) - - # disable vivisect-related logging, it's verbose and not relevant for capa users - set_vivisect_log_level(logging.CRITICAL) + handle_common_args(args) try: taste = get_file_taste(args.sample) @@ -566,14 +647,6 @@ def main(argv=None): logger.error("%s", e.args[0]) return -1 - # py2 doesn't know about cp65001, which is a variant of utf-8 on windows - # tqdm bails when trying to render the progress bar in this setup. - # because cp65001 is utf-8, we just map that codepage to the utf-8 codec. - # see #380 and: https://stackoverflow.com/a/3259271/87207 - import codecs - - codecs.register(lambda name: codecs.lookup("utf-8") if name == "cp65001" else None) - if args.rules == RULES_PATH_DEFAULT_STRING: logger.debug("-" * 80) logger.debug(" Using default embedded rules.") @@ -630,7 +703,7 @@ def main(argv=None): else: format = args.format try: - backend = args.backend if sys.version_info > (3, 0) else capa.main.BACKEND_VIV + backend = args.backend if sys.version_info > (3, 0) else BACKEND_VIV extractor = get_extractor(args.sample, args.format, backend, disable_progress=args.quiet) except UnsupportedFormatError: logger.error("-" * 80) @@ -664,19 +737,6 @@ def main(argv=None): if not (args.verbose or args.vverbose or args.json): return -1 - if args.color == "always": - colorama.init(strip=False) - elif args.color == "auto": - # colorama will detect: - # - when on Windows console, and fixup coloring, and - # - when not an interactive session, and disable coloring - # renderers should use coloring and assume it will be stripped out if necessary. - colorama.init() - elif args.color == "never": - colorama.init(strip=True) - else: - raise RuntimeError("unexpected --color value: " + args.color) - if args.json: print(capa.render.render_json(meta, rules, capabilities)) elif args.vverbose: diff --git a/scripts/bulk-process.py b/scripts/bulk-process.py index 65f7c66f..ac7b6c8e 100644 --- a/scripts/bulk-process.py +++ b/scripts/bulk-process.py @@ -65,6 +65,7 @@ import multiprocessing.pool import capa import capa.main +import capa.rules import capa.render logger = logging.getLogger("capa") @@ -139,42 +140,14 @@ def main(argv=None): argv = sys.argv[1:] parser = argparse.ArgumentParser(description="detect capabilities in programs.") + capa.main.install_common_args(parser, wanted={"rules"}) parser.add_argument("input", type=str, help="Path to directory of files to recursively analyze") - parser.add_argument( - "-r", - "--rules", - type=str, - default="(embedded rules)", - help="Path to rule file or directory, use embedded rules by default", - ) - parser.add_argument("-d", "--debug", action="store_true", help="Enable debugging output on STDERR") - parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors") parser.add_argument( "-n", "--parallelism", type=int, default=multiprocessing.cpu_count(), help="parallelism factor" ) parser.add_argument("--no-mp", action="store_true", help="disable subprocesses") args = parser.parse_args(args=argv) - - if args.quiet: - logging.basicConfig(level=logging.ERROR) - logging.getLogger().setLevel(logging.ERROR) - elif args.debug: - logging.basicConfig(level=logging.DEBUG) - logging.getLogger().setLevel(logging.DEBUG) - else: - logging.basicConfig(level=logging.INFO) - logging.getLogger().setLevel(logging.INFO) - - # disable vivisect-related logging, it's verbose and not relevant for capa users - capa.main.set_vivisect_log_level(logging.CRITICAL) - - # py2 doesn't know about cp65001, which is a variant of utf-8 on windows - # tqdm bails when trying to render the progress bar in this setup. - # because cp65001 is utf-8, we just map that codepage to the utf-8 codec. - # see #380 and: https://stackoverflow.com/a/3259271/87207 - import codecs - - codecs.register(lambda name: codecs.lookup("utf-8") if name == "cp65001" else None) + capa.main.handle_common_args(args) if args.rules == "(embedded rules)": logger.info("using default embedded rules") diff --git a/scripts/capa_as_library.py b/scripts/capa_as_library.py index 587e1437..87a2378a 100644 --- a/scripts/capa_as_library.py +++ b/scripts/capa_as_library.py @@ -6,6 +6,7 @@ import collections import capa.main import capa.rules import capa.engine +import capa.render import capa.features import capa.render.utils as rutils from capa.engine import * diff --git a/scripts/import-to-ida.py b/scripts/import-to-ida.py index 5b519d7f..fe96f056 100644 --- a/scripts/import-to-ida.py +++ b/scripts/import-to-ida.py @@ -31,10 +31,8 @@ See the License for the specific language governing permissions and limitations import json import logging -import idc import idautils import ida_funcs -import ida_idaapi import ida_kernwin logger = logging.getLogger("capa") diff --git a/scripts/lint.py b/scripts/lint.py index 1e177df9..80ccf2af 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -320,7 +320,7 @@ class FormatIncorrect(Lint): expected = capa.rules.Rule.from_yaml(rule.definition, use_ruamel=True).to_yaml() if actual != expected: - diff = difflib.ndiff(actual.splitlines(1), expected.splitlines(1)) + diff = difflib.ndiff(actual.splitlines(1), expected.splitlines(True)) self.recommendation = self.recommendation_template.format("".join(diff)) return True @@ -555,6 +555,7 @@ def main(argv=None): samples_path = os.path.join(os.path.dirname(__file__), "..", "tests", "data") parser = argparse.ArgumentParser(description="A program.") + capa.main.install_common_args(parser, wanted={"tag"}) parser.add_argument("rules", type=str, help="Path to rules") parser.add_argument("--samples", type=str, default=samples_path, help="Path to samples") parser.add_argument( @@ -562,22 +563,9 @@ def main(argv=None): action="store_true", help="Enable thorough linting - takes more time, but does a better job", ) - parser.add_argument("-t", "--tag", type=str, help="filter on rule meta field values") - parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging") - parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors") args = parser.parse_args(args=argv) + capa.main.handle_common_args(args) - if args.verbose: - level = logging.DEBUG - elif args.quiet: - level = logging.ERROR - else: - level = logging.INFO - - logging.basicConfig(level=level) - logging.getLogger("capa.lint").setLevel(level) - - capa.main.set_vivisect_log_level(logging.CRITICAL) logging.getLogger("capa").setLevel(logging.CRITICAL) logging.getLogger("viv_utils").setLevel(logging.CRITICAL) diff --git a/scripts/migrate-rules.py b/scripts/migrate-rules.py deleted file mode 100644 index 0c52e96f..00000000 --- a/scripts/migrate-rules.py +++ /dev/null @@ -1,167 +0,0 @@ -#!/usr/bin/env python -""" -migrate rules and their namespaces. - -example: - - $ python scripts/migrate-rules.py migration.csv ./rules ./new-rules - -Copyright (C) 2020 FireEye, Inc. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. -You may obtain a copy of the License at: [package root]/LICENSE.txt -Unless required by applicable law or agreed to in writing, software distributed under the License - is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and limitations under the License. -""" -import os -import csv -import sys -import logging -import os.path -import argparse -import collections - -import capa.rules - -logger = logging.getLogger("migrate-rules") - - -def read_plan(plan_path): - with open(plan_path, "rb") as f: - return list( - csv.DictReader( - f, - restkey="other", - fieldnames=( - "existing path", - "existing name", - "existing rule-category", - "proposed name", - "proposed namespace", - "ATT&CK", - "MBC", - "comment1", - ), - ) - ) - - -def read_rules(rule_directory): - rules = {} - for root, dirs, files in os.walk(rule_directory): - for file in files: - path = os.path.join(root, file) - if not path.endswith(".yml"): - logger.info("skipping file: %s", path) - continue - - rule = capa.rules.Rule.from_yaml_file(path) - rules[rule.name] = rule - - if "nursery" in path: - rule.meta["capa/nursery"] = True - return rules - - -def main(argv=None): - if argv is None: - argv = sys.argv[1:] - - parser = argparse.ArgumentParser(description="migrate rules.") - parser.add_argument("plan", type=str, help="Path to CSV describing migration") - parser.add_argument("source", type=str, help="Source directory of rules") - parser.add_argument("destination", type=str, help="Destination directory of rules") - args = parser.parse_args(args=argv) - - logging.basicConfig(level=logging.INFO) - logging.getLogger().setLevel(logging.INFO) - - plan = read_plan(args.plan) - logger.info("read %d plan entries", len(plan)) - - rules = read_rules(args.source) - logger.info("read %d rules", len(rules)) - - planned_rules = set([row["existing name"] for row in plan]) - unplanned_rules = [rule for (name, rule) in rules.items() if name not in planned_rules] - - if unplanned_rules: - logger.error("plan does not account for %d rules:" % (len(unplanned_rules))) - for rule in unplanned_rules: - logger.error(" " + rule.name) - return -1 - - # pairs of strings (needle, replacement) - match_translations = [] - - for row in plan: - if not row["existing name"]: - continue - - rule = rules[row["existing name"]] - - if rule.meta["name"] != row["proposed name"]: - logger.info("renaming rule '%s' -> '%s'", rule.meta["name"], row["proposed name"]) - - # assume the yaml is formatted like `- match: $rule-name`. - # but since its been linted, this should be ok. - match_translations.append(("- match: " + rule.meta["name"], "- match: " + row["proposed name"])) - - rule.meta["name"] = row["proposed name"] - rule.name = row["proposed name"] - - if "rule-category" in rule.meta: - logger.info("deleting rule category '%s'", rule.meta["rule-category"]) - del rule.meta["rule-category"] - - rule.meta["namespace"] = row["proposed namespace"] - - if row["ATT&CK"] != "n/a" and row["ATT&CK"] != "": - tag = row["ATT&CK"] - name, _, id = tag.rpartition(" ") - tag = "%s [%s]" % (name, id) - rule.meta["att&ck"] = [tag] - - if row["MBC"] != "n/a" and row["MBC"] != "": - tag = row["MBC"] - rule.meta["mbc"] = [tag] - - for rule in rules.values(): - filename = rule.name - filename = filename.lower() - filename = filename.replace(" ", "-") - filename = filename.replace("(", "") - filename = filename.replace(")", "") - filename = filename.replace("+", "") - filename = filename.replace("/", "") - filename = filename + ".yml" - - try: - if rule.meta.get("capa/nursery"): - directory = os.path.join(args.destination, "nursery") - elif rule.meta.get("lib"): - directory = os.path.join(args.destination, "lib") - else: - directory = os.path.join(args.destination, rule.meta.get("namespace")) - os.makedirs(directory) - except OSError: - pass - else: - logger.info("created namespace: %s", directory) - - path = os.path.join(directory, filename) - logger.info("writing rule %s", path) - - doc = rule.to_yaml().decode("utf-8") - for (needle, replacement) in match_translations: - doc = doc.replace(needle, replacement) - - with open(path, "wb") as f: - f.write(doc.encode("utf-8")) - - return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/scripts/show-capabilities-by-function.py b/scripts/show-capabilities-by-function.py index a8feb35e..569e7534 100644 --- a/scripts/show-capabilities-by-function.py +++ b/scripts/show-capabilities-by-function.py @@ -110,143 +110,93 @@ def main(argv=None): if argv is None: argv = sys.argv[1:] - formats = [ - ("auto", "(default) detect file type automatically"), - ("pe", "Windows PE file"), - ("sc32", "32-bit shellcode"), - ("sc64", "64-bit shellcode"), - ("freeze", "features previously frozen by capa"), - ] - format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats]) + parser = argparse.ArgumentParser(description="detect capabilities in programs.") + capa.main.install_common_args(parser, wanted={"format", "sample", "rules", "tag"}) + args = parser.parse_args(args=argv) + capa.main.handle_common_args(args) - parser = argparse.ArgumentParser(description="detect capabilities in programs.") - parser.add_argument("sample", type=str, help="Path to sample to analyze") - parser.add_argument( - "-r", - "--rules", - type=str, - default="(embedded rules)", - help="Path to rule file or directory, use embedded rules by default", - ) - parser.add_argument("-t", "--tag", type=str, help="Filter on rule meta field values") - parser.add_argument("-d", "--debug", action="store_true", help="Enable debugging output on STDERR") - parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors") - parser.add_argument( - "-f", - "--format", - choices=[f[0] for f in formats], - default="auto", - help="Select sample format, %s" % format_help, - ) - args = parser.parse_args(args=argv) + try: + taste = get_file_taste(args.sample) + except IOError as e: + logger.error("%s", str(e)) + return -1 - if args.quiet: - logging.basicConfig(level=logging.ERROR) - logging.getLogger().setLevel(logging.ERROR) - elif args.debug: - logging.basicConfig(level=logging.DEBUG) - logging.getLogger().setLevel(logging.DEBUG) - else: - logging.basicConfig(level=logging.INFO) - logging.getLogger().setLevel(logging.INFO) + if args.rules == "(embedded rules)": + logger.info("-" * 80) + logger.info(" Using default embedded rules.") + logger.info(" To provide your own rules, use the form `capa.exe -r ./path/to/rules/ /path/to/mal.exe`.") + logger.info(" You can see the current default rule set here:") + logger.info(" https://github.com/fireeye/capa-rules") + logger.info("-" * 80) - # disable vivisect-related logging, it's verbose and not relevant for capa users - capa.main.set_vivisect_log_level(logging.CRITICAL) + logger.debug("detected running from source") + args.rules = os.path.join(os.path.dirname(__file__), "..", "rules") + logger.debug("default rule path (source method): %s", args.rules) + else: + logger.info("using rules path: %s", args.rules) + try: + rules = capa.main.get_rules(args.rules) + rules = capa.rules.RuleSet(rules) + logger.info("successfully loaded %s rules", len(rules)) + if args.tag: + rules = rules.filter_rules_by_meta(args.tag) + logger.info("selected %s rules", len(rules)) + except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e: + logger.error("%s", str(e)) + return -1 + + if (args.format == "freeze") or (args.format == "auto" and capa.features.freeze.is_freeze(taste)): + format = "freeze" + with open(args.sample, "rb") as f: + extractor = capa.features.freeze.load(f.read()) + else: + format = args.format try: - taste = get_file_taste(args.sample) - except IOError as e: - logger.error("%s", str(e)) + extractor = capa.main.get_extractor(args.sample, args.format) + except capa.main.UnsupportedFormatError: + logger.error("-" * 80) + logger.error(" Input file does not appear to be a PE file.") + logger.error(" ") + logger.error( + " capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64)." + ) + logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.") + logger.error("-" * 80) + return -1 + except capa.main.UnsupportedRuntimeError: + logger.error("-" * 80) + logger.error(" Unsupported runtime or Python interpreter.") + logger.error(" ") + logger.error(" capa supports running under Python 2.7 using Vivisect for binary analysis.") + logger.error(" It can also run within IDA Pro, using either Python 2.7 or 3.5+.") + logger.error(" ") + logger.error(" If you're seeing this message on the command line, please ensure you're running Python 2.7.") + logger.error("-" * 80) return -1 - # py2 doesn't know about cp65001, which is a variant of utf-8 on windows - # tqdm bails when trying to render the progress bar in this setup. - # because cp65001 is utf-8, we just map that codepage to the utf-8 codec. - # see #380 and: https://stackoverflow.com/a/3259271/87207 - import codecs + meta = capa.main.collect_metadata(argv, args.sample, args.rules, format, extractor) + capabilities, counts = capa.main.find_capabilities(rules, extractor) + meta["analysis"].update(counts) - codecs.register(lambda name: codecs.lookup("utf-8") if name == "cp65001" else None) - - if args.rules == "(embedded rules)": - logger.info("-" * 80) - logger.info(" Using default embedded rules.") - logger.info(" To provide your own rules, use the form `capa.exe -r ./path/to/rules/ /path/to/mal.exe`.") - logger.info(" You can see the current default rule set here:") - logger.info(" https://github.com/fireeye/capa-rules") - logger.info("-" * 80) - - logger.debug("detected running from source") - args.rules = os.path.join(os.path.dirname(__file__), "..", "rules") - logger.debug("default rule path (source method): %s", args.rules) - else: - logger.info("using rules path: %s", args.rules) - - try: - rules = capa.main.get_rules(args.rules) - rules = capa.rules.RuleSet(rules) - logger.info("successfully loaded %s rules", len(rules)) - if args.tag: - rules = rules.filter_rules_by_meta(args.tag) - logger.info("selected %s rules", len(rules)) - except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e: - logger.error("%s", str(e)) + if capa.main.has_file_limitation(rules, capabilities): + # bail if capa encountered file limitation e.g. a packed binary + # do show the output in verbose mode, though. + if not (args.verbose or args.vverbose or args.json): return -1 - if (args.format == "freeze") or (args.format == "auto" and capa.features.freeze.is_freeze(taste)): - format = "freeze" - with open(args.sample, "rb") as f: - extractor = capa.features.freeze.load(f.read()) - else: - format = args.format - try: - extractor = capa.main.get_extractor(args.sample, args.format, capa.main.BACKEND_VIV) - except capa.main.UnsupportedFormatError: - logger.error("-" * 80) - logger.error(" Input file does not appear to be a PE file.") - logger.error(" ") - logger.error( - " capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64)." - ) - logger.error( - " If you don't know the input file type, you can try using the `file` utility to guess it." - ) - logger.error("-" * 80) - return -1 - except capa.main.UnsupportedRuntimeError: - logger.error("-" * 80) - logger.error(" Unsupported runtime or Python interpreter.") - logger.error(" ") - logger.error(" capa supports running under Python 2.7 using Vivisect for binary analysis.") - logger.error(" It can also run within IDA Pro, using either Python 2.7 or 3.5+.") - logger.error(" ") - logger.error( - " If you're seeing this message on the command line, please ensure you're running Python 2.7." - ) - logger.error("-" * 80) - return -1 + # colorama will detect: + # - when on Windows console, and fixup coloring, and + # - when not an interactive session, and disable coloring + # renderers should use coloring and assume it will be stripped out if necessary. + colorama.init() + doc = capa.render.convert_capabilities_to_result_document(meta, rules, capabilities) + print(render_matches_by_function(doc)) + colorama.deinit() - meta = capa.main.collect_metadata(argv, args.sample, args.rules, format, extractor) - capabilities, counts = capa.main.find_capabilities(rules, extractor) - meta["analysis"].update(counts) + logger.info("done.") - if capa.main.has_file_limitation(rules, capabilities): - # bail if capa encountered file limitation e.g. a packed binary - # do show the output in verbose mode, though. - if not (args.verbose or args.vverbose or args.json): - return -1 - - # colorama will detect: - # - when on Windows console, and fixup coloring, and - # - when not an interactive session, and disable coloring - # renderers should use coloring and assume it will be stripped out if necessary. - colorama.init() - doc = capa.render.convert_capabilities_to_result_document(meta, rules, capabilities) - print(render_matches_by_function(doc)) - colorama.deinit() - - logger.info("done.") - - return 0 + return 0 if __name__ == "__main__": diff --git a/scripts/show-features.py b/scripts/show-features.py index c8f74de9..f357ab51 100644 --- a/scripts/show-features.py +++ b/scripts/show-features.py @@ -82,37 +82,12 @@ def main(argv=None): if argv is None: argv = sys.argv[1:] - formats = [ - ("auto", "(default) detect file type automatically"), - ("pe", "Windows PE file"), - ("sc32", "32-bit shellcode"), - ("sc64", "64-bit shellcode"), - ("freeze", "features previously frozen by capa"), - ] - format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats]) - parser = argparse.ArgumentParser(description="Show the features that capa extracts from the given sample") - parser.add_argument("sample", type=str, help="Path to sample to analyze") - parser.add_argument( - "-f", "--format", choices=[f[0] for f in formats], default="auto", help="Select sample format, %s" % format_help - ) + capa.main.install_common_args(parser, wanted={"format", "sample"}) + parser.add_argument("-F", "--function", type=lambda x: int(x, 0x10), help="Show features for specific function") - parser.add_argument("-d", "--debug", action="store_true", help="Enable debugging output on STDERR") - parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors") args = parser.parse_args(args=argv) - - if args.quiet: - logging.basicConfig(level=logging.ERROR) - logging.getLogger().setLevel(logging.ERROR) - elif args.debug: - logging.basicConfig(level=logging.DEBUG) - logging.getLogger().setLevel(logging.DEBUG) - else: - logging.basicConfig(level=logging.INFO) - logging.getLogger().setLevel(logging.INFO) - - # disable vivisect-related logging, it's verbose and not relevant for capa users - capa.main.set_vivisect_log_level(logging.CRITICAL) + capa.main.handle_common_args(args) try: taste = capa.helpers.get_file_taste(args.sample)