Merge pull request #450 from fireeye/feature-refactor-args

refactor common cli argument handling
2026-02-04 19:12:01 -08:00 · 2021-03-05 15:07:50 -07:00
parent dfbe1418d4 9f743f1c59
commit 3e55581bf7
8 changed files with 242 additions and 464 deletions
--- a/capa/main.py
+++ b/capa/main.py
@@ -282,6 +282,8 @@ def get_workspace(path, format, should_save=True):
        vw = get_shellcode_vw(path, arch="i386", should_save=should_save)
    elif format == "sc64":
        vw = get_shellcode_vw(path, arch="amd64", should_save=should_save)
+    else:
+        raise ValueError("unexpected format: " + format)
    logger.debug("%s", get_meta_str(vw))
    return vw

@@ -444,19 +446,162 @@ def collect_metadata(argv, sample_path, rules_path, format, extractor):
    }


+def install_common_args(parser, wanted=None):
+    """
+    register a common set of command line arguments for re-use by main & scripts.
+    these are things like logging/coloring/etc.
+    also enable callers to opt-in to common arguments, like specifying the input sample.
+
+    this routine lets many script use the same language for cli arguments.
+    see `handle_common_args` to do common configuration.
+
+    args:
+      parser (argparse.ArgumentParser): a parser to update in place, adding common arguments.
+      wanted (Set[str]): collection of arguments to opt-into, including:
+        - "sample": required positional argument to input file.
+        - "format": flag to override file format.
+        - "backend": flag to override analysis backend under py3.
+        - "rules": flag to override path to capa rules.
+        - "tag": flag to override/specify which rules to match.
+    """
+    if wanted is None:
+        wanted = set()
+
+    #
+    # common arguments that all scripts will have
+    #
+
+    parser.add_argument("--version", action="version", version="%(prog)s {:s}".format(capa.version.__version__))
+    parser.add_argument(
+        "-v", "--verbose", action="store_true", help="enable verbose result document (no effect with --json)"
+    )
+    parser.add_argument(
+        "-vv", "--vverbose", action="store_true", help="enable very verbose result document (no effect with --json)"
+    )
+    parser.add_argument("-d", "--debug", action="store_true", help="enable debugging output on STDERR")
+    parser.add_argument("-q", "--quiet", action="store_true", help="disable all output but errors")
+    parser.add_argument(
+        "--color",
+        type=str,
+        choices=("auto", "always", "never"),
+        default="auto",
+        help="enable ANSI color codes in results, default: only during interactive session",
+    )
+
+    #
+    # arguments that may be opted into:
+    #
+    #   - sample
+    #   - format
+    #   - rules
+    #   - tag
+    #
+
+    if "sample" in wanted:
+        if sys.version_info >= (3, 0):
+            parser.add_argument(
+                # Python 3 str handles non-ASCII arguments correctly
+                "sample",
+                type=str,
+                help="path to sample to analyze",
+            )
+        else:
+            parser.add_argument(
+                # in #328 we noticed that the sample path is not handled correctly if it contains non-ASCII characters
+                # https://stackoverflow.com/a/22947334/ offers a solution and decoding using getfilesystemencoding works
+                # in our testing, however other sources suggest `sys.stdin.encoding` (https://stackoverflow.com/q/4012571/)
+                "sample",
+                type=lambda s: s.decode(sys.getfilesystemencoding()),
+                help="path to sample to analyze",
+            )
+
+    if "format" in wanted:
+        formats = [
+            ("auto", "(default) detect file type automatically"),
+            ("pe", "Windows PE file"),
+            ("sc32", "32-bit shellcode"),
+            ("sc64", "64-bit shellcode"),
+            ("freeze", "features previously frozen by capa"),
+        ]
+        format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats])
+        parser.add_argument(
+            "-f",
+            "--format",
+            choices=[f[0] for f in formats],
+            default="auto",
+            help="select sample format, %s" % format_help,
+        )
+
+    if "backend" in wanted and sys.version_info >= (3, 0):
+        parser.add_argument(
+            "-b",
+            "--backend",
+            type=str,
+            help="select the backend to use",
+            choices=(BACKEND_VIV, BACKEND_SMDA),
+            default=BACKEND_VIV,
+        )
+
+    if "rules" in wanted:
+        parser.add_argument(
+            "-r",
+            "--rules",
+            type=str,
+            default=RULES_PATH_DEFAULT_STRING,
+            help="path to rule file or directory, use embedded rules by default",
+        )
+
+    if "tag" in wanted:
+        parser.add_argument("-t", "--tag", type=str, help="filter on rule meta field values")
+
+
+def handle_common_args(args):
+    """
+    handle the global config specified by `install_common_args`,
+    such as configuring logging/coloring/etc.
+
+    args:
+      args (argparse.Namespace): parsed arguments that included at least `install_common_args` args.
+    """
+    if args.quiet:
+        logging.basicConfig(level=logging.WARNING)
+        logging.getLogger().setLevel(logging.WARNING)
+    elif args.debug:
+        logging.basicConfig(level=logging.DEBUG)
+        logging.getLogger().setLevel(logging.DEBUG)
+    else:
+        logging.basicConfig(level=logging.INFO)
+        logging.getLogger().setLevel(logging.INFO)
+
+    # disable vivisect-related logging, it's verbose and not relevant for capa users
+    set_vivisect_log_level(logging.CRITICAL)
+
+    # py2 doesn't know about cp65001, which is a variant of utf-8 on windows
+    # tqdm bails when trying to render the progress bar in this setup.
+    # because cp65001 is utf-8, we just map that codepage to the utf-8 codec.
+    # see #380 and: https://stackoverflow.com/a/3259271/87207
+    import codecs
+
+    codecs.register(lambda name: codecs.lookup("utf-8") if name == "cp65001" else None)
+
+    if args.color == "always":
+        colorama.init(strip=False)
+    elif args.color == "auto":
+        # colorama will detect:
+        #  - when on Windows console, and fixup coloring, and
+        #  - when not an interactive session, and disable coloring
+        # renderers should use coloring and assume it will be stripped out if necessary.
+        colorama.init()
+    elif args.color == "never":
+        colorama.init(strip=True)
+    else:
+        raise RuntimeError("unexpected --color value: " + args.color)
+
+
 def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]

-    formats = [
-        ("auto", "(default) detect file type automatically"),
-        ("pe", "Windows PE file"),
-        ("sc32", "32-bit shellcode"),
-        ("sc64", "64-bit shellcode"),
-        ("freeze", "features previously frozen by capa"),
-    ]
-    format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats])
-
    desc = "The FLARE team's open-source tool to identify capabilities in executable files."
    epilog = textwrap.dedent(
        """
@@ -489,74 +634,10 @@ def main(argv=None):
    parser = argparse.ArgumentParser(
        description=desc, epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter
    )
-
-    if sys.version_info >= (3, 0):
-        parser.add_argument(
-            # Python 3 str handles non-ASCII arguments correctly
-            "sample",
-            type=str,
-            help="path to sample to analyze",
-        )
-    else:
-        parser.add_argument(
-            # in #328 we noticed that the sample path is not handled correctly if it contains non-ASCII characters
-            # https://stackoverflow.com/a/22947334/ offers a solution and decoding using getfilesystemencoding works
-            # in our testing, however other sources suggest `sys.stdin.encoding` (https://stackoverflow.com/q/4012571/)
-            "sample",
-            type=lambda s: s.decode(sys.getfilesystemencoding()),
-            help="path to sample to analyze",
-        )
-    parser.add_argument("--version", action="version", version="%(prog)s {:s}".format(capa.version.__version__))
-    parser.add_argument(
-        "-r",
-        "--rules",
-        type=str,
-        default=RULES_PATH_DEFAULT_STRING,
-        help="path to rule file or directory, use embedded rules by default",
-    )
-    parser.add_argument(
-        "-f", "--format", choices=[f[0] for f in formats], default="auto", help="select sample format, %s" % format_help
-    )
-    if sys.version_info >= (3, 0):
-        parser.add_argument(
-            "-b",
-            "--backend",
-            type=str,
-            help="select the backend to use",
-            choices=(BACKEND_VIV, BACKEND_SMDA),
-            default=BACKEND_VIV,
-        )
-    parser.add_argument("-t", "--tag", type=str, help="filter on rule meta field values")
+    install_common_args(parser, {"sample", "format", "backend", "rules", "tag"})
    parser.add_argument("-j", "--json", action="store_true", help="emit JSON instead of text")
-    parser.add_argument(
-        "-v", "--verbose", action="store_true", help="enable verbose result document (no effect with --json)"
-    )
-    parser.add_argument(
-        "-vv", "--vverbose", action="store_true", help="enable very verbose result document (no effect with --json)"
-    )
-    parser.add_argument("-d", "--debug", action="store_true", help="enable debugging output on STDERR")
-    parser.add_argument("-q", "--quiet", action="store_true", help="disable all output but errors")
-    parser.add_argument(
-        "--color",
-        type=str,
-        choices=("auto", "always", "never"),
-        default="auto",
-        help="enable ANSI color codes in results, default: only during interactive session",
-    )
    args = parser.parse_args(args=argv)
-
-    if args.quiet:
-        logging.basicConfig(level=logging.WARNING)
-        logging.getLogger().setLevel(logging.WARNING)
-    elif args.debug:
-        logging.basicConfig(level=logging.DEBUG)
-        logging.getLogger().setLevel(logging.DEBUG)
-    else:
-        logging.basicConfig(level=logging.INFO)
-        logging.getLogger().setLevel(logging.INFO)
-
-    # disable vivisect-related logging, it's verbose and not relevant for capa users
-    set_vivisect_log_level(logging.CRITICAL)
+    handle_common_args(args)

    try:
        taste = get_file_taste(args.sample)
@@ -566,14 +647,6 @@ def main(argv=None):
        logger.error("%s", e.args[0])
        return -1

-    # py2 doesn't know about cp65001, which is a variant of utf-8 on windows
-    # tqdm bails when trying to render the progress bar in this setup.
-    # because cp65001 is utf-8, we just map that codepage to the utf-8 codec.
-    # see #380 and: https://stackoverflow.com/a/3259271/87207
-    import codecs
-
-    codecs.register(lambda name: codecs.lookup("utf-8") if name == "cp65001" else None)
-
    if args.rules == RULES_PATH_DEFAULT_STRING:
        logger.debug("-" * 80)
        logger.debug(" Using default embedded rules.")
@@ -630,7 +703,7 @@ def main(argv=None):
    else:
        format = args.format
        try:
-            backend = args.backend if sys.version_info > (3, 0) else capa.main.BACKEND_VIV
+            backend = args.backend if sys.version_info > (3, 0) else BACKEND_VIV
            extractor = get_extractor(args.sample, args.format, backend, disable_progress=args.quiet)
        except UnsupportedFormatError:
            logger.error("-" * 80)
@@ -664,19 +737,6 @@ def main(argv=None):
        if not (args.verbose or args.vverbose or args.json):
            return -1

-    if args.color == "always":
-        colorama.init(strip=False)
-    elif args.color == "auto":
-        # colorama will detect:
-        #  - when on Windows console, and fixup coloring, and
-        #  - when not an interactive session, and disable coloring
-        # renderers should use coloring and assume it will be stripped out if necessary.
-        colorama.init()
-    elif args.color == "never":
-        colorama.init(strip=True)
-    else:
-        raise RuntimeError("unexpected --color value: " + args.color)
-
    if args.json:
        print(capa.render.render_json(meta, rules, capabilities))
    elif args.vverbose:
--- a/scripts/bulk-process.py
+++ b/scripts/bulk-process.py
@@ -65,6 +65,7 @@ import multiprocessing.pool

 import capa
 import capa.main
+import capa.rules
 import capa.render

 logger = logging.getLogger("capa")
@@ -139,42 +140,14 @@ def main(argv=None):
        argv = sys.argv[1:]

        parser = argparse.ArgumentParser(description="detect capabilities in programs.")
+        capa.main.install_common_args(parser, wanted={"rules"})
        parser.add_argument("input", type=str, help="Path to directory of files to recursively analyze")
-        parser.add_argument(
-            "-r",
-            "--rules",
-            type=str,
-            default="(embedded rules)",
-            help="Path to rule file or directory, use embedded rules by default",
-        )
-        parser.add_argument("-d", "--debug", action="store_true", help="Enable debugging output on STDERR")
-        parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
        parser.add_argument(
            "-n", "--parallelism", type=int, default=multiprocessing.cpu_count(), help="parallelism factor"
        )
        parser.add_argument("--no-mp", action="store_true", help="disable subprocesses")
        args = parser.parse_args(args=argv)
-
-        if args.quiet:
-            logging.basicConfig(level=logging.ERROR)
-            logging.getLogger().setLevel(logging.ERROR)
-        elif args.debug:
-            logging.basicConfig(level=logging.DEBUG)
-            logging.getLogger().setLevel(logging.DEBUG)
-        else:
-            logging.basicConfig(level=logging.INFO)
-            logging.getLogger().setLevel(logging.INFO)
-
-        # disable vivisect-related logging, it's verbose and not relevant for capa users
-        capa.main.set_vivisect_log_level(logging.CRITICAL)
-
-        # py2 doesn't know about cp65001, which is a variant of utf-8 on windows
-        # tqdm bails when trying to render the progress bar in this setup.
-        # because cp65001 is utf-8, we just map that codepage to the utf-8 codec.
-        # see #380 and: https://stackoverflow.com/a/3259271/87207
-        import codecs
-
-        codecs.register(lambda name: codecs.lookup("utf-8") if name == "cp65001" else None)
+        capa.main.handle_common_args(args)

        if args.rules == "(embedded rules)":
            logger.info("using default embedded rules")
--- a/scripts/capa_as_library.py
+++ b/scripts/capa_as_library.py
@@ -6,6 +6,7 @@ import collections
 import capa.main
 import capa.rules
 import capa.engine
+import capa.render
 import capa.features
 import capa.render.utils as rutils
 from capa.engine import *
--- a/scripts/import-to-ida.py
+++ b/scripts/import-to-ida.py
@@ -31,10 +31,8 @@ See the License for the specific language governing permissions and limitations
 import json
 import logging

-import idc
 import idautils
 import ida_funcs
-import ida_idaapi
 import ida_kernwin

 logger = logging.getLogger("capa")
--- a/scripts/lint.py
+++ b/scripts/lint.py
@@ -320,7 +320,7 @@ class FormatIncorrect(Lint):
        expected = capa.rules.Rule.from_yaml(rule.definition, use_ruamel=True).to_yaml()

        if actual != expected:
-            diff = difflib.ndiff(actual.splitlines(1), expected.splitlines(1))
+            diff = difflib.ndiff(actual.splitlines(1), expected.splitlines(True))
            self.recommendation = self.recommendation_template.format("".join(diff))
            return True

@@ -555,6 +555,7 @@ def main(argv=None):
    samples_path = os.path.join(os.path.dirname(__file__), "..", "tests", "data")

    parser = argparse.ArgumentParser(description="A program.")
+    capa.main.install_common_args(parser, wanted={"tag"})
    parser.add_argument("rules", type=str, help="Path to rules")
    parser.add_argument("--samples", type=str, default=samples_path, help="Path to samples")
    parser.add_argument(
@@ -562,22 +563,9 @@ def main(argv=None):
        action="store_true",
        help="Enable thorough linting - takes more time, but does a better job",
    )
-    parser.add_argument("-t", "--tag", type=str, help="filter on rule meta field values")
-    parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging")
-    parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
    args = parser.parse_args(args=argv)
+    capa.main.handle_common_args(args)

-    if args.verbose:
-        level = logging.DEBUG
-    elif args.quiet:
-        level = logging.ERROR
-    else:
-        level = logging.INFO
-
-    logging.basicConfig(level=level)
-    logging.getLogger("capa.lint").setLevel(level)
-
-    capa.main.set_vivisect_log_level(logging.CRITICAL)
    logging.getLogger("capa").setLevel(logging.CRITICAL)
    logging.getLogger("viv_utils").setLevel(logging.CRITICAL)

--- a/scripts/migrate-rules.py
+++ b/scripts/migrate-rules.py
@@ -1,167 +0,0 @@
-#!/usr/bin/env python
-"""
-migrate rules and their namespaces.
-
-example:
-
-    $ python scripts/migrate-rules.py migration.csv ./rules ./new-rules
-
-Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
-You may obtain a copy of the License at: [package root]/LICENSE.txt
-Unless required by applicable law or agreed to in writing, software distributed under the License
- is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and limitations under the License.
-"""
-import os
-import csv
-import sys
-import logging
-import os.path
-import argparse
-import collections
-
-import capa.rules
-
-logger = logging.getLogger("migrate-rules")
-
-
-def read_plan(plan_path):
-    with open(plan_path, "rb") as f:
-        return list(
-            csv.DictReader(
-                f,
-                restkey="other",
-                fieldnames=(
-                    "existing path",
-                    "existing name",
-                    "existing rule-category",
-                    "proposed name",
-                    "proposed namespace",
-                    "ATT&CK",
-                    "MBC",
-                    "comment1",
-                ),
-            )
-        )
-
-
-def read_rules(rule_directory):
-    rules = {}
-    for root, dirs, files in os.walk(rule_directory):
-        for file in files:
-            path = os.path.join(root, file)
-            if not path.endswith(".yml"):
-                logger.info("skipping file: %s", path)
-                continue
-
-            rule = capa.rules.Rule.from_yaml_file(path)
-            rules[rule.name] = rule
-
-            if "nursery" in path:
-                rule.meta["capa/nursery"] = True
-    return rules
-
-
-def main(argv=None):
-    if argv is None:
-        argv = sys.argv[1:]
-
-    parser = argparse.ArgumentParser(description="migrate rules.")
-    parser.add_argument("plan", type=str, help="Path to CSV describing migration")
-    parser.add_argument("source", type=str, help="Source directory of rules")
-    parser.add_argument("destination", type=str, help="Destination directory of rules")
-    args = parser.parse_args(args=argv)
-
-    logging.basicConfig(level=logging.INFO)
-    logging.getLogger().setLevel(logging.INFO)
-
-    plan = read_plan(args.plan)
-    logger.info("read %d plan entries", len(plan))
-
-    rules = read_rules(args.source)
-    logger.info("read %d rules", len(rules))
-
-    planned_rules = set([row["existing name"] for row in plan])
-    unplanned_rules = [rule for (name, rule) in rules.items() if name not in planned_rules]
-
-    if unplanned_rules:
-        logger.error("plan does not account for %d rules:" % (len(unplanned_rules)))
-        for rule in unplanned_rules:
-            logger.error("  " + rule.name)
-        return -1
-
-    # pairs of strings (needle, replacement)
-    match_translations = []
-
-    for row in plan:
-        if not row["existing name"]:
-            continue
-
-        rule = rules[row["existing name"]]
-
-        if rule.meta["name"] != row["proposed name"]:
-            logger.info("renaming rule '%s' -> '%s'", rule.meta["name"], row["proposed name"])
-
-            # assume the yaml is formatted like `- match: $rule-name`.
-            # but since its been linted, this should be ok.
-            match_translations.append(("- match: " + rule.meta["name"], "- match: " + row["proposed name"]))
-
-            rule.meta["name"] = row["proposed name"]
-            rule.name = row["proposed name"]
-
-        if "rule-category" in rule.meta:
-            logger.info("deleting rule category '%s'", rule.meta["rule-category"])
-            del rule.meta["rule-category"]
-
-        rule.meta["namespace"] = row["proposed namespace"]
-
-        if row["ATT&CK"] != "n/a" and row["ATT&CK"] != "":
-            tag = row["ATT&CK"]
-            name, _, id = tag.rpartition(" ")
-            tag = "%s [%s]" % (name, id)
-            rule.meta["att&ck"] = [tag]
-
-        if row["MBC"] != "n/a" and row["MBC"] != "":
-            tag = row["MBC"]
-            rule.meta["mbc"] = [tag]
-
-    for rule in rules.values():
-        filename = rule.name
-        filename = filename.lower()
-        filename = filename.replace(" ", "-")
-        filename = filename.replace("(", "")
-        filename = filename.replace(")", "")
-        filename = filename.replace("+", "")
-        filename = filename.replace("/", "")
-        filename = filename + ".yml"
-
-        try:
-            if rule.meta.get("capa/nursery"):
-                directory = os.path.join(args.destination, "nursery")
-            elif rule.meta.get("lib"):
-                directory = os.path.join(args.destination, "lib")
-            else:
-                directory = os.path.join(args.destination, rule.meta.get("namespace"))
-            os.makedirs(directory)
-        except OSError:
-            pass
-        else:
-            logger.info("created namespace: %s", directory)
-
-        path = os.path.join(directory, filename)
-        logger.info("writing rule %s", path)
-
-        doc = rule.to_yaml().decode("utf-8")
-        for (needle, replacement) in match_translations:
-            doc = doc.replace(needle, replacement)
-
-        with open(path, "wb") as f:
-            f.write(doc.encode("utf-8"))
-
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
--- a/scripts/show-capabilities-by-function.py
+++ b/scripts/show-capabilities-by-function.py
@@ -110,143 +110,93 @@ def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]

-        formats = [
-            ("auto", "(default) detect file type automatically"),
-            ("pe", "Windows PE file"),
-            ("sc32", "32-bit shellcode"),
-            ("sc64", "64-bit shellcode"),
-            ("freeze", "features previously frozen by capa"),
-        ]
-        format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats])
+    parser = argparse.ArgumentParser(description="detect capabilities in programs.")
+    capa.main.install_common_args(parser, wanted={"format", "sample", "rules", "tag"})
+    args = parser.parse_args(args=argv)
+    capa.main.handle_common_args(args)

-        parser = argparse.ArgumentParser(description="detect capabilities in programs.")
-        parser.add_argument("sample", type=str, help="Path to sample to analyze")
-        parser.add_argument(
-            "-r",
-            "--rules",
-            type=str,
-            default="(embedded rules)",
-            help="Path to rule file or directory, use embedded rules by default",
-        )
-        parser.add_argument("-t", "--tag", type=str, help="Filter on rule meta field values")
-        parser.add_argument("-d", "--debug", action="store_true", help="Enable debugging output on STDERR")
-        parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
-        parser.add_argument(
-            "-f",
-            "--format",
-            choices=[f[0] for f in formats],
-            default="auto",
-            help="Select sample format, %s" % format_help,
-        )
-        args = parser.parse_args(args=argv)
+    try:
+        taste = get_file_taste(args.sample)
+    except IOError as e:
+        logger.error("%s", str(e))
+        return -1

-        if args.quiet:
-            logging.basicConfig(level=logging.ERROR)
-            logging.getLogger().setLevel(logging.ERROR)
-        elif args.debug:
-            logging.basicConfig(level=logging.DEBUG)
-            logging.getLogger().setLevel(logging.DEBUG)
-        else:
-            logging.basicConfig(level=logging.INFO)
-            logging.getLogger().setLevel(logging.INFO)
+    if args.rules == "(embedded rules)":
+        logger.info("-" * 80)
+        logger.info(" Using default embedded rules.")
+        logger.info(" To provide your own rules, use the form `capa.exe -r ./path/to/rules/  /path/to/mal.exe`.")
+        logger.info(" You can see the current default rule set here:")
+        logger.info("     https://github.com/fireeye/capa-rules")
+        logger.info("-" * 80)

-        # disable vivisect-related logging, it's verbose and not relevant for capa users
-        capa.main.set_vivisect_log_level(logging.CRITICAL)
+        logger.debug("detected running from source")
+        args.rules = os.path.join(os.path.dirname(__file__), "..", "rules")
+        logger.debug("default rule path (source method): %s", args.rules)
+    else:
+        logger.info("using rules path: %s", args.rules)

+    try:
+        rules = capa.main.get_rules(args.rules)
+        rules = capa.rules.RuleSet(rules)
+        logger.info("successfully loaded %s rules", len(rules))
+        if args.tag:
+            rules = rules.filter_rules_by_meta(args.tag)
+            logger.info("selected %s rules", len(rules))
+    except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e:
+        logger.error("%s", str(e))
+        return -1
+
+    if (args.format == "freeze") or (args.format == "auto" and capa.features.freeze.is_freeze(taste)):
+        format = "freeze"
+        with open(args.sample, "rb") as f:
+            extractor = capa.features.freeze.load(f.read())
+    else:
+        format = args.format
        try:
-            taste = get_file_taste(args.sample)
-        except IOError as e:
-            logger.error("%s", str(e))
+            extractor = capa.main.get_extractor(args.sample, args.format)
+        except capa.main.UnsupportedFormatError:
+            logger.error("-" * 80)
+            logger.error(" Input file does not appear to be a PE file.")
+            logger.error(" ")
+            logger.error(
+                " capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64)."
+            )
+            logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.")
+            logger.error("-" * 80)
+            return -1
+        except capa.main.UnsupportedRuntimeError:
+            logger.error("-" * 80)
+            logger.error(" Unsupported runtime or Python interpreter.")
+            logger.error(" ")
+            logger.error(" capa supports running under Python 2.7 using Vivisect for binary analysis.")
+            logger.error(" It can also run within IDA Pro, using either Python 2.7 or 3.5+.")
+            logger.error(" ")
+            logger.error(" If you're seeing this message on the command line, please ensure you're running Python 2.7.")
+            logger.error("-" * 80)
            return -1

-        # py2 doesn't know about cp65001, which is a variant of utf-8 on windows
-        # tqdm bails when trying to render the progress bar in this setup.
-        # because cp65001 is utf-8, we just map that codepage to the utf-8 codec.
-        # see #380 and: https://stackoverflow.com/a/3259271/87207
-        import codecs
+    meta = capa.main.collect_metadata(argv, args.sample, args.rules, format, extractor)
+    capabilities, counts = capa.main.find_capabilities(rules, extractor)
+    meta["analysis"].update(counts)

-        codecs.register(lambda name: codecs.lookup("utf-8") if name == "cp65001" else None)
-
-        if args.rules == "(embedded rules)":
-            logger.info("-" * 80)
-            logger.info(" Using default embedded rules.")
-            logger.info(" To provide your own rules, use the form `capa.exe -r ./path/to/rules/  /path/to/mal.exe`.")
-            logger.info(" You can see the current default rule set here:")
-            logger.info("     https://github.com/fireeye/capa-rules")
-            logger.info("-" * 80)
-
-            logger.debug("detected running from source")
-            args.rules = os.path.join(os.path.dirname(__file__), "..", "rules")
-            logger.debug("default rule path (source method): %s", args.rules)
-        else:
-            logger.info("using rules path: %s", args.rules)
-
-        try:
-            rules = capa.main.get_rules(args.rules)
-            rules = capa.rules.RuleSet(rules)
-            logger.info("successfully loaded %s rules", len(rules))
-            if args.tag:
-                rules = rules.filter_rules_by_meta(args.tag)
-                logger.info("selected %s rules", len(rules))
-        except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e:
-            logger.error("%s", str(e))
+    if capa.main.has_file_limitation(rules, capabilities):
+        # bail if capa encountered file limitation e.g. a packed binary
+        # do show the output in verbose mode, though.
+        if not (args.verbose or args.vverbose or args.json):
            return -1

-        if (args.format == "freeze") or (args.format == "auto" and capa.features.freeze.is_freeze(taste)):
-            format = "freeze"
-            with open(args.sample, "rb") as f:
-                extractor = capa.features.freeze.load(f.read())
-        else:
-            format = args.format
-            try:
-                extractor = capa.main.get_extractor(args.sample, args.format, capa.main.BACKEND_VIV)
-            except capa.main.UnsupportedFormatError:
-                logger.error("-" * 80)
-                logger.error(" Input file does not appear to be a PE file.")
-                logger.error(" ")
-                logger.error(
-                    " capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64)."
-                )
-                logger.error(
-                    " If you don't know the input file type, you can try using the `file` utility to guess it."
-                )
-                logger.error("-" * 80)
-                return -1
-            except capa.main.UnsupportedRuntimeError:
-                logger.error("-" * 80)
-                logger.error(" Unsupported runtime or Python interpreter.")
-                logger.error(" ")
-                logger.error(" capa supports running under Python 2.7 using Vivisect for binary analysis.")
-                logger.error(" It can also run within IDA Pro, using either Python 2.7 or 3.5+.")
-                logger.error(" ")
-                logger.error(
-                    " If you're seeing this message on the command line, please ensure you're running Python 2.7."
-                )
-                logger.error("-" * 80)
-                return -1
+    # colorama will detect:
+    #  - when on Windows console, and fixup coloring, and
+    #  - when not an interactive session, and disable coloring
+    # renderers should use coloring and assume it will be stripped out if necessary.
+    colorama.init()
+    doc = capa.render.convert_capabilities_to_result_document(meta, rules, capabilities)
+    print(render_matches_by_function(doc))
+    colorama.deinit()

-        meta = capa.main.collect_metadata(argv, args.sample, args.rules, format, extractor)
-        capabilities, counts = capa.main.find_capabilities(rules, extractor)
-        meta["analysis"].update(counts)
+    logger.info("done.")

-        if capa.main.has_file_limitation(rules, capabilities):
-            # bail if capa encountered file limitation e.g. a packed binary
-            # do show the output in verbose mode, though.
-            if not (args.verbose or args.vverbose or args.json):
-                return -1
-
-        # colorama will detect:
-        #  - when on Windows console, and fixup coloring, and
-        #  - when not an interactive session, and disable coloring
-        # renderers should use coloring and assume it will be stripped out if necessary.
-        colorama.init()
-        doc = capa.render.convert_capabilities_to_result_document(meta, rules, capabilities)
-        print(render_matches_by_function(doc))
-        colorama.deinit()
-
-        logger.info("done.")
-
-        return 0
+    return 0


 if __name__ == "__main__":
--- a/scripts/show-features.py
+++ b/scripts/show-features.py
@@ -82,37 +82,12 @@ def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]

-    formats = [
-        ("auto", "(default) detect file type automatically"),
-        ("pe", "Windows PE file"),
-        ("sc32", "32-bit shellcode"),
-        ("sc64", "64-bit shellcode"),
-        ("freeze", "features previously frozen by capa"),
-    ]
-    format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats])
-
    parser = argparse.ArgumentParser(description="Show the features that capa extracts from the given sample")
-    parser.add_argument("sample", type=str, help="Path to sample to analyze")
-    parser.add_argument(
-        "-f", "--format", choices=[f[0] for f in formats], default="auto", help="Select sample format, %s" % format_help
-    )
+    capa.main.install_common_args(parser, wanted={"format", "sample"})
+
    parser.add_argument("-F", "--function", type=lambda x: int(x, 0x10), help="Show features for specific function")
-    parser.add_argument("-d", "--debug", action="store_true", help="Enable debugging output on STDERR")
-    parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
    args = parser.parse_args(args=argv)
-
-    if args.quiet:
-        logging.basicConfig(level=logging.ERROR)
-        logging.getLogger().setLevel(logging.ERROR)
-    elif args.debug:
-        logging.basicConfig(level=logging.DEBUG)
-        logging.getLogger().setLevel(logging.DEBUG)
-    else:
-        logging.basicConfig(level=logging.INFO)
-        logging.getLogger().setLevel(logging.INFO)
-
-    # disable vivisect-related logging, it's verbose and not relevant for capa users
-    capa.main.set_vivisect_log_level(logging.CRITICAL)
+    capa.main.handle_common_args(args)

    try:
        taste = capa.helpers.get_file_taste(args.sample)