scripts: use common argument handler

closes #449
2026-02-04 11:07:53 -08:00 · 2021-03-05 10:58:19 -07:00
parent f2749d884f
commit 3a1d5d068c
6 changed files with 90 additions and 201 deletions
--- a/scripts/bulk-process.py
+++ b/scripts/bulk-process.py
@@ -65,6 +65,7 @@ import multiprocessing.pool

 import capa
 import capa.main
+import capa.rules
 import capa.render

 logger = logging.getLogger("capa")
@@ -139,42 +140,14 @@ def main(argv=None):
        argv = sys.argv[1:]

        parser = argparse.ArgumentParser(description="detect capabilities in programs.")
+        capa.main.install_common_args(parser, wanted={"rules"})
        parser.add_argument("input", type=str, help="Path to directory of files to recursively analyze")
-        parser.add_argument(
-            "-r",
-            "--rules",
-            type=str,
-            default="(embedded rules)",
-            help="Path to rule file or directory, use embedded rules by default",
-        )
-        parser.add_argument("-d", "--debug", action="store_true", help="Enable debugging output on STDERR")
-        parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
        parser.add_argument(
            "-n", "--parallelism", type=int, default=multiprocessing.cpu_count(), help="parallelism factor"
        )
        parser.add_argument("--no-mp", action="store_true", help="disable subprocesses")
        args = parser.parse_args(args=argv)
-
-        if args.quiet:
-            logging.basicConfig(level=logging.ERROR)
-            logging.getLogger().setLevel(logging.ERROR)
-        elif args.debug:
-            logging.basicConfig(level=logging.DEBUG)
-            logging.getLogger().setLevel(logging.DEBUG)
-        else:
-            logging.basicConfig(level=logging.INFO)
-            logging.getLogger().setLevel(logging.INFO)
-
-        # disable vivisect-related logging, it's verbose and not relevant for capa users
-        capa.main.set_vivisect_log_level(logging.CRITICAL)
-
-        # py2 doesn't know about cp65001, which is a variant of utf-8 on windows
-        # tqdm bails when trying to render the progress bar in this setup.
-        # because cp65001 is utf-8, we just map that codepage to the utf-8 codec.
-        # see #380 and: https://stackoverflow.com/a/3259271/87207
-        import codecs
-
-        codecs.register(lambda name: codecs.lookup("utf-8") if name == "cp65001" else None)
+        capa.main.handle_common_args(args)

        if args.rules == "(embedded rules)":
            logger.info("using default embedded rules")
--- a/scripts/capa_as_library.py
+++ b/scripts/capa_as_library.py
@@ -6,6 +6,7 @@ import collections
 import capa.main
 import capa.rules
 import capa.engine
+import capa.render
 import capa.features
 import capa.render.utils as rutils
 from capa.engine import *
--- a/scripts/import-to-ida.py
+++ b/scripts/import-to-ida.py
@@ -31,10 +31,8 @@ See the License for the specific language governing permissions and limitations
 import json
 import logging

-import idc
 import idautils
 import ida_funcs
-import ida_idaapi
 import ida_kernwin

 logger = logging.getLogger("capa")
--- a/scripts/lint.py
+++ b/scripts/lint.py
@@ -320,7 +320,7 @@ class FormatIncorrect(Lint):
        expected = capa.rules.Rule.from_yaml(rule.definition, use_ruamel=True).to_yaml()

        if actual != expected:
-            diff = difflib.ndiff(actual.splitlines(1), expected.splitlines(1))
+            diff = difflib.ndiff(actual.splitlines(1), expected.splitlines(True))
            self.recommendation = self.recommendation_template.format("".join(diff))
            return True

@@ -555,6 +555,7 @@ def main(argv=None):
    samples_path = os.path.join(os.path.dirname(__file__), "..", "tests", "data")

    parser = argparse.ArgumentParser(description="A program.")
+    capa.main.install_common_args(parser, wanted={"tag"})
    parser.add_argument("rules", type=str, help="Path to rules")
    parser.add_argument("--samples", type=str, default=samples_path, help="Path to samples")
    parser.add_argument(
@@ -562,22 +563,9 @@ def main(argv=None):
        action="store_true",
        help="Enable thorough linting - takes more time, but does a better job",
    )
-    parser.add_argument("-t", "--tag", type=str, help="filter on rule meta field values")
-    parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging")
-    parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
    args = parser.parse_args(args=argv)
+    capa.main.handle_common_args(args)

-    if args.verbose:
-        level = logging.DEBUG
-    elif args.quiet:
-        level = logging.ERROR
-    else:
-        level = logging.INFO
-
-    logging.basicConfig(level=level)
-    logging.getLogger("capa.lint").setLevel(level)
-
-    capa.main.set_vivisect_log_level(logging.CRITICAL)
    logging.getLogger("capa").setLevel(logging.CRITICAL)
    logging.getLogger("viv_utils").setLevel(logging.CRITICAL)

--- a/scripts/show-capabilities-by-function.py
+++ b/scripts/show-capabilities-by-function.py
@@ -110,144 +110,98 @@ def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]

-        formats = [
-            ("auto", "(default) detect file type automatically"),
-            ("pe", "Windows PE file"),
-            ("sc32", "32-bit shellcode"),
-            ("sc64", "64-bit shellcode"),
-            ("freeze", "features previously frozen by capa"),
-        ]
-        format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats])
+    parser = argparse.ArgumentParser(description="detect capabilities in programs.")
+    capa.main.install_common_args(parser, wanted={"format", "sample", "rules", "tag"})
+    args = parser.parse_args(args=argv)
+    capa.main.handle_common_args(args)

-        parser = argparse.ArgumentParser(description="detect capabilities in programs.")
-        parser.add_argument("sample", type=str, help="Path to sample to analyze")
-        parser.add_argument(
-            "-r",
-            "--rules",
-            type=str,
-            default="(embedded rules)",
-            help="Path to rule file or directory, use embedded rules by default",
-        )
-        parser.add_argument("-t", "--tag", type=str, help="Filter on rule meta field values")
-        parser.add_argument("-d", "--debug", action="store_true", help="Enable debugging output on STDERR")
-        parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
-        parser.add_argument(
-            "-f",
-            "--format",
-            choices=[f[0] for f in formats],
-            default="auto",
-            help="Select sample format, %s" % format_help,
-        )
-        args = parser.parse_args(args=argv)
+    try:
+        taste = get_file_taste(args.sample)
+    except IOError as e:
+        logger.error("%s", str(e))
+        return -1

-        if args.quiet:
-            logging.basicConfig(level=logging.ERROR)
-            logging.getLogger().setLevel(logging.ERROR)
-        elif args.debug:
-            logging.basicConfig(level=logging.DEBUG)
-            logging.getLogger().setLevel(logging.DEBUG)
-        else:
-            logging.basicConfig(level=logging.INFO)
-            logging.getLogger().setLevel(logging.INFO)
+    if args.rules == "(embedded rules)":
+        logger.info("-" * 80)
+        logger.info(" Using default embedded rules.")
+        logger.info(" To provide your own rules, use the form `capa.exe -r ./path/to/rules/  /path/to/mal.exe`.")
+        logger.info(" You can see the current default rule set here:")
+        logger.info("     https://github.com/fireeye/capa-rules")
+        logger.info("-" * 80)

-        # disable vivisect-related logging, it's verbose and not relevant for capa users
-        capa.main.set_vivisect_log_level(logging.CRITICAL)
+        logger.debug("detected running from source")
+        args.rules = os.path.join(os.path.dirname(__file__), "..", "rules")
+        logger.debug("default rule path (source method): %s", args.rules)
+    else:
+        logger.info("using rules path: %s", args.rules)

+    try:
+        rules = capa.main.get_rules(args.rules)
+        rules = capa.rules.RuleSet(rules)
+        logger.info("successfully loaded %s rules", len(rules))
+        if args.tag:
+            rules = rules.filter_rules_by_meta(args.tag)
+            logger.info("selected %s rules", len(rules))
+    except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e:
+        logger.error("%s", str(e))
+        return -1
+
+    if (args.format == "freeze") or (args.format == "auto" and capa.features.freeze.is_freeze(taste)):
+        format = "freeze"
+        with open(args.sample, "rb") as f:
+            extractor = capa.features.freeze.load(f.read())
+    else:
+        format = args.format
        try:
-            taste = get_file_taste(args.sample)
-        except IOError as e:
-            logger.error("%s", str(e))
+            extractor = capa.main.get_extractor(args.sample, args.format)
+        except capa.main.UnsupportedFormatError:
+            logger.error("-" * 80)
+            logger.error(" Input file does not appear to be a PE file.")
+            logger.error(" ")
+            logger.error(
+                " capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64)."
+            )
+            logger.error(
+                " If you don't know the input file type, you can try using the `file` utility to guess it."
+            )
+            logger.error("-" * 80)
+            return -1
+        except capa.main.UnsupportedRuntimeError:
+            logger.error("-" * 80)
+            logger.error(" Unsupported runtime or Python interpreter.")
+            logger.error(" ")
+            logger.error(" capa supports running under Python 2.7 using Vivisect for binary analysis.")
+            logger.error(" It can also run within IDA Pro, using either Python 2.7 or 3.5+.")
+            logger.error(" ")
+            logger.error(
+                " If you're seeing this message on the command line, please ensure you're running Python 2.7."
+            )
+            logger.error("-" * 80)
            return -1

-        # py2 doesn't know about cp65001, which is a variant of utf-8 on windows
-        # tqdm bails when trying to render the progress bar in this setup.
-        # because cp65001 is utf-8, we just map that codepage to the utf-8 codec.
-        # see #380 and: https://stackoverflow.com/a/3259271/87207
-        import codecs
+    meta = capa.main.collect_metadata(argv, args.sample, args.rules, format, extractor)
+    capabilities, counts = capa.main.find_capabilities(rules, extractor)
+    meta["analysis"].update(counts)

-        codecs.register(lambda name: codecs.lookup("utf-8") if name == "cp65001" else None)
-
-        if args.rules == "(embedded rules)":
-            logger.info("-" * 80)
-            logger.info(" Using default embedded rules.")
-            logger.info(" To provide your own rules, use the form `capa.exe -r ./path/to/rules/  /path/to/mal.exe`.")
-            logger.info(" You can see the current default rule set here:")
-            logger.info("     https://github.com/fireeye/capa-rules")
-            logger.info("-" * 80)
-
-            logger.debug("detected running from source")
-            args.rules = os.path.join(os.path.dirname(__file__), "..", "rules")
-            logger.debug("default rule path (source method): %s", args.rules)
-        else:
-            logger.info("using rules path: %s", args.rules)
-
-        try:
-            rules = capa.main.get_rules(args.rules)
-            rules = capa.rules.RuleSet(rules)
-            logger.info("successfully loaded %s rules", len(rules))
-            if args.tag:
-                rules = rules.filter_rules_by_meta(args.tag)
-                logger.info("selected %s rules", len(rules))
-        except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e:
-            logger.error("%s", str(e))
+    if capa.main.has_file_limitation(rules, capabilities):
+        # bail if capa encountered file limitation e.g. a packed binary
+        # do show the output in verbose mode, though.
+        if not (args.verbose or args.vverbose or args.json):
            return -1

-        if (args.format == "freeze") or (args.format == "auto" and capa.features.freeze.is_freeze(taste)):
-            format = "freeze"
-            with open(args.sample, "rb") as f:
-                extractor = capa.features.freeze.load(f.read())
-        else:
-            format = args.format
-            try:
-                extractor = capa.main.get_extractor(args.sample, args.format)
-            except capa.main.UnsupportedFormatError:
-                logger.error("-" * 80)
-                logger.error(" Input file does not appear to be a PE file.")
-                logger.error(" ")
-                logger.error(
-                    " capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64)."
-                )
-                logger.error(
-                    " If you don't know the input file type, you can try using the `file` utility to guess it."
-                )
-                logger.error("-" * 80)
-                return -1
-            except capa.main.UnsupportedRuntimeError:
-                logger.error("-" * 80)
-                logger.error(" Unsupported runtime or Python interpreter.")
-                logger.error(" ")
-                logger.error(" capa supports running under Python 2.7 using Vivisect for binary analysis.")
-                logger.error(" It can also run within IDA Pro, using either Python 2.7 or 3.5+.")
-                logger.error(" ")
-                logger.error(
-                    " If you're seeing this message on the command line, please ensure you're running Python 2.7."
-                )
-                logger.error("-" * 80)
-                return -1
+    # colorama will detect:
+    #  - when on Windows console, and fixup coloring, and
+    #  - when not an interactive session, and disable coloring
+    # renderers should use coloring and assume it will be stripped out if necessary.
+    colorama.init()
+    doc = capa.render.convert_capabilities_to_result_document(meta, rules, capabilities)
+    print(render_matches_by_function(doc))
+    colorama.deinit()

-        meta = capa.main.collect_metadata(argv, args.sample, args.rules, format, extractor)
-        capabilities, counts = capa.main.find_capabilities(rules, extractor)
-        meta["analysis"].update(counts)
+    logger.info("done.")

-        if capa.main.has_file_limitation(rules, capabilities):
-            # bail if capa encountered file limitation e.g. a packed binary
-            # do show the output in verbose mode, though.
-            if not (args.verbose or args.vverbose or args.json):
-                return -1
-
-        # colorama will detect:
-        #  - when on Windows console, and fixup coloring, and
-        #  - when not an interactive session, and disable coloring
-        # renderers should use coloring and assume it will be stripped out if necessary.
-        colorama.init()
-        doc = capa.render.convert_capabilities_to_result_document(meta, rules, capabilities)
-        print(render_matches_by_function(doc))
-        colorama.deinit()
-
-        logger.info("done.")
-
-        return 0
+    return 0


 if __name__ == "__main__":
-    sys.exit(main())
+    sys.exit(main())
--- a/scripts/show-features.py
+++ b/scripts/show-features.py
@@ -82,37 +82,12 @@ def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]

-    formats = [
-        ("auto", "(default) detect file type automatically"),
-        ("pe", "Windows PE file"),
-        ("sc32", "32-bit shellcode"),
-        ("sc64", "64-bit shellcode"),
-        ("freeze", "features previously frozen by capa"),
-    ]
-    format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats])
-
    parser = argparse.ArgumentParser(description="Show the features that capa extracts from the given sample")
-    parser.add_argument("sample", type=str, help="Path to sample to analyze")
-    parser.add_argument(
-        "-f", "--format", choices=[f[0] for f in formats], default="auto", help="Select sample format, %s" % format_help
-    )
+    capa.main.install_common_args(parser, wanted={"format", "sample"})
+
    parser.add_argument("-F", "--function", type=lambda x: int(x, 0x10), help="Show features for specific function")
-    parser.add_argument("-d", "--debug", action="store_true", help="Enable debugging output on STDERR")
-    parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
    args = parser.parse_args(args=argv)
-
-    if args.quiet:
-        logging.basicConfig(level=logging.ERROR)
-        logging.getLogger().setLevel(logging.ERROR)
-    elif args.debug:
-        logging.basicConfig(level=logging.DEBUG)
-        logging.getLogger().setLevel(logging.DEBUG)
-    else:
-        logging.basicConfig(level=logging.INFO)
-        logging.getLogger().setLevel(logging.INFO)
-
-    # disable vivisect-related logging, it's verbose and not relevant for capa users
-    capa.main.set_vivisect_log_level(logging.CRITICAL)
+    capa.main.handle_common_args(args)

    try:
        taste = capa.helpers.get_file_taste(args.sample)