import source files, forgetting about 938 prior commits

2025-12-22 23:26:21 -08:00 · 2020-06-18 09:13:01 -06:00
parent f2d795090c
commit add3537447
65 changed files with 10322 additions and 0 deletions
--- a/scripts/testbed/run_rule_on_testbed.py
+++ b/scripts/testbed/run_rule_on_testbed.py
@@ -0,0 +1,297 @@
+'''
+Run a capa rule file against the testbed (frozen features in a directory).
+
+Example usage:
+  run_rule_on_testbed.py <path to rules> <rule name> <testbed dir>
+  run_rule_on_testbed.py ..\\rules "create pipe" samples
+'''
+
+import os
+import sys
+import json
+import time
+import logging
+
+from collections import defaultdict
+
+import argparse
+
+import capa.main
+import capa.rules
+import capa.features.freeze
+
+from scripts.testbed import FNAMES_EXTENSION, FREEZE_EXTENSION
+from start_ida_export_fimages import export_fimages
+
+
+logger = logging.getLogger(__name__)
+
+# sorry globals...
+file_count = 0
+file_hits = 0
+mal_hits = 0
+other_hits = 0
+function_hits = 0
+errors = 0
+function_names = set([])
+
+
+CATEGORY = {
+    'malicious': 'MAL',
+    'benign': 'BEN',
+}
+
+
+def check_rule(path, rules, rule_name, only_matching, save_image, verbose):
+    global file_count, file_hits, mal_hits, other_hits, function_hits, errors
+
+    try:
+        capabilities = get_capabilities(path, rules)
+    except (ValueError, KeyError) as e:
+        logger.error('cannot load %s due to %s: %s', path, type(e).__name__, str(e))
+        errors += 1
+        return
+
+    file_count += 1
+    hits = get_function_hits(capabilities, rule_name)
+    if hits == 0:
+        if not only_matching:
+            render_no_hit(path)
+    else:
+        print('[x] rule matches %d function(s) in %s (%s)' % (hits, path, get_category(path)))
+
+        file_hits += 1
+        function_hits += hits
+
+        if get_category(path) == 'MAL':
+            mal_hits += 1
+        else:
+            other_hits += 1
+
+        if verbose:
+            render_hit_verbose(capabilities, path, verbose > 1)
+
+        if save_image:
+            fvas = ['0x%x' % fva for fva in get_hit_fvas(capabilities)]
+            file_path = get_idb_or_sample_path(path)
+            if file_path:
+                if not export_fimages(file_path, save_image, fvas):
+                    logger.warning('exporting images failed')
+            else:
+                logger.warning('could not get IDB or sample path')
+
+
+def get_idb_or_sample_path(path):
+    exts = ['.idb', '.i64', '.exe_', '.dll_', '.mal_']
+    roots = [os.path.splitext(path)[0], path]
+    for e in exts:
+        for r in roots:
+            p = '%s%s' % (r, e)
+            if os.path.exists(p):
+                return p
+    return None
+
+
+def get_capabilities(path, rules):
+    logger.debug('matching rules in %s', path)
+    with open(path, 'rb') as f:
+        extractor = capa.features.freeze.load(f.read())
+    return capa.main.find_capabilities(rules, extractor, disable_progress=True)
+
+
+def get_function_hits(capabilities, rule_name):
+    return len(capabilities.get(rule_name, []))
+
+
+def get_category(path):
+    for c in CATEGORY:
+        if c in path:
+            return CATEGORY[c]
+    return 'UNK'
+
+
+def render_no_hit(path):
+    print('[ ] no match in %s (%s)' % (path, get_category(path)))
+
+
+def render_hit_verbose(capabilities, path, vverbose):
+    try:
+        fnames = load_fnames(path)
+    except IOError as e:
+        logger.error('%s', str(e))
+        fnames = None
+
+    for rule, ress in capabilities.items():
+        for (fva, res) in sorted(ress, key=lambda p: p[0]):
+            if fnames and fva in fnames:
+                fname = fnames[fva]
+                function_names.add(fname)
+            else:
+                fname = '<name unknown>'
+            print('  - function 0x%x (%s)' % (fva, fname))
+
+            if vverbose:
+                capa.main.render_result(res, indent='      ')
+
+
+def get_hit_fvas(capabilities):
+    fvas = []
+    for rule, ress in capabilities.items():
+        for (fva, res) in sorted(ress, key=lambda p: p[0]):
+            fvas.append(fva)
+    return fvas
+
+
+def load_fnames(path):
+    fnames_path = path.replace(FREEZE_EXTENSION, FNAMES_EXTENSION)
+    if not os.path.exists(fnames_path):
+        raise IOError('%s does not exist' % fnames_path)
+
+    logger.debug('fnames path: %s', fnames_path)
+    try:
+        # json file with format { fva: fname }
+        fnames = load_json(fnames_path)
+        logger.debug('loaded JSON file')
+    except TypeError:
+        # csv file with format idbmd5;md5;fva;fname
+        fnames = load_csv(fnames_path)
+        logger.debug('loaded CSV file')
+    fnames = convert_keys_to_int(fnames)
+    logger.debug('read %d function names' % len(fnames))
+    return fnames
+
+
+def load_json(path):
+    with open(path, 'r') as f:
+        try:
+            funcs = json.load(f)
+        except ValueError as e:
+            logger.debug('not a JSON file, %s', str(e))
+            raise TypeError
+    return funcs
+
+
+def load_csv(path):
+    funcs = defaultdict(str)
+    with open(path, 'r') as f:
+        data = f.read().splitlines()
+    for line in data:
+        try:
+            idbmd5, md5, fva, name = line.split(':', 3)
+        except ValueError as e:
+            logger.warning('%s: "%s"', str(e), line)
+        funcs[fva] = name
+    return funcs
+
+
+def convert_keys_to_int(funcs_in):
+    funcs = {}
+    for k, v in funcs_in.iteritems():
+        try:
+            k = int(k)
+        except ValueError:
+            k = int(k, 0x10)
+        funcs[k] = v
+    return funcs
+
+
+def print_summary(verbose, start_time):
+    global file_count, file_hits, function_hits, errors
+
+    print('\n[SUMMARY]')
+    m, s = divmod(time.time() - start_time, 60)
+    logger.info('ran for %d:%02d minutes', m, s)
+    ratio = ' (%d%%)' % ((float(file_hits) / file_count) * 100) if file_count else ''
+    print('matched %d function(s) in %d/%d%s sample(s), encountered %d error(s)' % (
+        function_hits, file_hits, file_count, ratio, errors))
+    print('%d hits on (MAL) files; %d hits on other files' % (mal_hits, other_hits))
+
+    if verbose:
+        if len(function_names) > 0:
+            print('matched function names (unique):')
+            for fname in function_names:
+                print '  - %s' % fname
+
+
+def main(argv=None):
+    if argv is None:
+        argv = sys.argv[1:]
+
+    parser = argparse.ArgumentParser(description="Run capa rule file against frozen features in a directory")
+    parser.add_argument("rules", type=str,
+                        help="Path to directory containing rules")
+    parser.add_argument("rule_name", type=str,
+                        help="Name of rule to test")
+    parser.add_argument("frozen_path", type=str,
+                        help="Path to frozen feature file or directory")
+    parser.add_argument("-f", "--fast", action="store_true",
+                        help="Don't test slow files")
+    parser.add_argument("-o", "--only_matching", action="store_true",
+                        help="Print only if rule matches")
+    parser.add_argument("-s", "--save_image", action="store",
+                        help="Directory to save exported images of function graphs")
+    parser.add_argument("-v", "--verbose", action="count", default=0,
+                        help="Increase output verbosity")
+    parser.add_argument("-q", "--quiet", action="store_true",
+                        help="Disable all output but errors")
+    args = parser.parse_args(args=argv)
+
+    if args.quiet:
+        logging.basicConfig(level=logging.ERROR)
+        logging.getLogger().setLevel(logging.ERROR)
+    elif args.verbose:
+        logging.basicConfig(level=logging.DEBUG)
+        logging.getLogger().setLevel(logging.DEBUG)
+    else:
+        logging.basicConfig(level=logging.INFO)
+        logging.getLogger().setLevel(logging.INFO)
+
+    if not os.path.isdir(args.rules):
+        logger.error('%s is not a directory', args.rules)
+        return -1
+
+    # load rule
+    try:
+        rules = capa.main.get_rules(args.rules)
+        rules = list(capa.rules.get_rules_and_dependencies(rules, args.rule_name))
+        rules = capa.rules.RuleSet(rules)
+    except IOError as e:
+        logger.error('%s', str(e))
+        return -1
+    except capa.rules.InvalidRule as e:
+        logger.error('%s', str(e))
+        return -1
+
+    time0 = time.time()
+
+    print('[RULE %s]' % args.rule_name)
+    if os.path.isfile(args.frozen_path):
+        check_rule(args.frozen_path, rules, args.rule_name, args.only_matching, args.save_image, args.verbose)
+
+    try:
+        # get only freeze files from directory
+        freeze_files = []
+        for root, dirs, files in os.walk(args.frozen_path):
+            for file in files:
+                if not file.endswith(FREEZE_EXTENSION):
+                    continue
+
+                path = os.path.join(root, file)
+                if args.fast and 'slow' in path:
+                    logger.debug('fast mode skipping %s', path)
+                    continue
+
+                freeze_files.append(path)
+
+        for path in sorted(freeze_files):
+            sample_time0 = time.time()
+            check_rule(path, rules, args.rule_name, args.only_matching, args.save_image, args.verbose)
+            logger.debug('rule check took %d seconds', time.time() - sample_time0)
+    except KeyboardInterrupt:
+        logger.info('Received keyboard interrupt, terminating')
+
+    print_summary(args.verbose, time0)
+
+
+if __name__ == "__main__":
+    sys.exit(main())