import source files, forgetting about 938 prior commits

2025-12-22 23:26:21 -08:00 · 2020-06-18 09:13:01 -06:00
parent f2d795090c
commit add3537447
65 changed files with 10322 additions and 0 deletions
--- a/scripts/testbed/README.md
+++ b/scripts/testbed/README.md
@@ -0,0 +1,71 @@
+# Testbed
+Goal of the testbed is to support the development of new `capa` rules. Scripts allow to test rules against a large sample set and to batch process samples, e.g. to freeze features or to generate other meta data used for testing.
+
+The testbed contains malicious and benign files. Data sources are:
+- Microsoft EXE and DLL files from `C:\Windows\System32`, `C:\Windows\SysWOW64`, etc.
+- samples analyzed and annotated by FLARE analysts during malware analysis
+
+Samples containing the keyword `slow` in their path indicate a longer test run time (>20 seconds) and can be ignored via the `-f` argument.
+
+Running a rule against a large set of executable programs helps to quickly determine on which functions/samples a rule hits. This helps to identify:
+- true positives: hits on expected functions
+- false positives: hits on unexpected functions, for example
+  - if a rule is to generic or
+  - if a rule hits on a capability present in many (benign) samples
+
+To provide additional context the testbed contains function names from the following data sources:
+- benign files: function names from Microsoft's PDB information
+- malicious files: function names provided by FLARE analysts and obtained from 
+the LabelMaker 2000 (LM2k) annotations repository
+
+For each test sample the testbed contains the following files:
+- a `.frz` file storing the extracted `capa` features
+  - `capa`'s serialized features, via `capa.features.freeze`
+- a `.fnames` file mapping function addresses to function names
+  - JSON file that maps fvas to function names or
+  - CSV file with entries `idbmd5;md5;fva;fname`
+- (optional) the binary file with extension `.exe_`, `.dll_`, or `.mal_`
+
+## Scripts
+### `run_rule_on_testbed.py`
+Run a `capa` rule file against the testbed (frozen features in a directory).
+
+Meant to be run on directories that contain `.frz` and `.fnames` files. 
+
+Example usage:
+
+    run_rule_on_testbed.py <testbed dir>
+    run_rule_on_testbed.py samples
+
+With the `-s <image_path>` argument, the script exports images of function graphs to the provided path.
+Converting the images requires `graphviz`. See https://graphviz.gitlab.io/about/; get Python interface via `pip install graphviz`.
+
+## Helper Scripts
+### `freeze_features.py`
+Use `freeze_features.py` to freeze `capa` features of a file or of files in a directory.
+
+Example usage:
+
+    freeze_features.py <testbed dir>
+    freeze_features.py samples
+
+### `start_ida_dump_fnames.py`
+Start IDA Pro in autonomous mode to dump JSON file of function names `{fva: fname}`. Processes a single file or a directory.
+
+This script uses `_dump_fnames.py` to dump the JSON file of functions names and is meant to be run on benign files with PDB information. IDA should apply function names from the PDB information automatically.
+
+Example usage:
+
+    start_ida_dump_fnames.py <candidate files dir>
+    start_ida_dump_fnames.py samples\benign
+
+### `start_ida_export_fimages.py`
+Start IDA Pro in autonomous mode to export images of function graphs.
+`run_rule_on_testbed.py` integrates the export mechanism (`-s` option)
+
+This script uses `_export_fimages.py` to export DOT files of function graphs and then converts them to PNG images using `graphviz`.
+
+Example usage:
+
+    start_ida_export_fimages.py <target file> <output dir> -f <function list>
+    start_ida_export_fimages.py test.exe imgs -f 0x401000,0x402F90
--- a/scripts/testbed/init.py
+++ b/scripts/testbed/init.py
@@ -0,0 +1,2 @@
+FNAMES_EXTENSION = '.fnames'
+FREEZE_EXTENSION = '.frz'
--- a/scripts/testbed/_dump_fnames.py
+++ b/scripts/testbed/_dump_fnames.py
@@ -0,0 +1,46 @@
+'''
+IDAPython script to dump JSON file of functions names { fva: fname }.
+Meant to be run on benign files with PDB information. IDA should apply function names from the PDB files automatically.
+Can also be run on annotated IDA database files.
+
+Example usage (via IDA autonomous mode):
+  ida.exe -A -S_dump_fnames.py "<output path>" <sample_path>
+'''
+
+import json
+
+import idc
+import idautils
+
+
+def main():
+    if len(idc.ARGV) != 2:
+        # requires output file path argument
+        idc.qexit(-1)
+
+    # wait for auto-analysis to finish
+    idc.auto_wait()
+
+    INF_SHORT_DN_ATTR = idc.get_inf_attr(idc.INF_SHORT_DN)  # short form of demangled names
+
+    fnames = {}
+    for f in idautils.Functions():
+        fname = idc.get_name(f)
+        if fname.startswith("sub_"):
+            continue
+
+        name_demangled = idc.demangle_name(fname, INF_SHORT_DN_ATTR)
+        if name_demangled:
+            fname = name_demangled
+
+        fnames[f] = fname
+
+    with open(idc.ARGV[1], "w") as f:
+        json.dump(fnames, f)
+
+    # exit IDA
+    idc.qexit(0)
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/testbed/_export_fimages.py
+++ b/scripts/testbed/_export_fimages.py
@@ -0,0 +1,44 @@
+'''
+IDAPython script to export DOT files of function graphs.
+
+Example usage (via IDA autonomous mode):
+  ida.exe -A -S_export_fimages.py "<output dir>" <fva1> [<fva2> ...] <sample_path>
+'''
+
+import os
+
+import idc
+import idaapi
+import ida_gdl
+
+
+def main():
+    if len(idc.ARGV) < 3:
+        # requires output directory and function VAs argument(s)
+        idc.qexit(-1)
+
+    # wait for auto-analysis to finish
+    idc.auto_wait()
+
+    out_dir = idc.ARGV[1]
+    fvas = [int(fva, 0x10) for fva in idc.ARGV[2:]]
+    idb_name = os.path.split(idc.get_idb_path())[-1]
+
+    for fva in fvas:
+        fstart = idc.get_func_attr(fva, idc.FUNCATTR_START)
+        name = '%s_0x%x' % (idb_name.replace('.', '_'), fstart)
+        out_path = os.path.join(out_dir, name)
+        fname = idc.get_name(fstart)
+
+        if not ida_gdl.gen_flow_graph(out_path, '%s (0x%x)' % (fname, fstart), idaapi.get_func(fstart), 0, 0,
+                                      ida_gdl.CHART_GEN_DOT | ida_gdl.CHART_PRINT_NAMES):
+            print 'IDA error generating flow graph'
+        # TODO add label to DOT file, see https://stackoverflow.com/a/6452088/10548020
+        # TODO highlight where rule matched
+
+    # exit IDA
+    idc.qexit(0)
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/testbed/freeze_features.py
+++ b/scripts/testbed/freeze_features.py
@@ -0,0 +1,102 @@
+'''
+Freeze capa features.
+
+Example usage:
+  freeze_features.py <test files dir>
+  freeze_features.py samples\benign
+'''
+
+import os
+import sys
+import time
+import logging
+
+import argparse
+
+from scripts.testbed import FREEZE_EXTENSION
+from capa.features.freeze import main as freeze_features
+
+
+# only process files with these extensions
+TARGET_EXTENSIONS = [
+    '.mal_',
+    '.exe_',
+    '.dll_',
+    '.sys_'
+]
+
+
+logger = logging.getLogger('check_rule')
+
+
+def freeze(input_path, reprocess):
+    if not os.path.exists(input_path):
+        raise IOError('%s does not exist or cannot be accessed' % input_path)
+
+    if os.path.isfile(input_path):
+        outfile = '%s%s' % (input_path, FREEZE_EXTENSION)
+        freeze_file(input_path, outfile, reprocess)
+
+    elif os.path.isdir(input_path):
+        logger.info('freezing features of %s files in %s', '|'.join(TARGET_EXTENSIONS), input_path)
+        for root, dirs, files in os.walk(input_path):
+            for file in files:
+                if not os.path.splitext(file)[1] in TARGET_EXTENSIONS:
+                    logger.debug('skipping non-target file: %s', file)
+                    continue
+                path = os.path.join(root, file)
+                outfile = '%s%s' % (path, FREEZE_EXTENSION)
+                freeze_file(path, outfile, reprocess)
+
+
+def freeze_file(path, output, reprocess=False):
+    logger.info('freezing features of %s', path)
+
+    if os.path.exists(output) and not reprocess:
+        logger.info('%s already exists, provide -r argument to reprocess', output)
+        return
+
+    try:
+        freeze_features([path, output])  # args: sample, output
+    except Exception as e:
+        logger.error('could not freeze features for %s: %s', path, str(e))
+
+
+def main(argv=None):
+    if argv is None:
+        argv = sys.argv[1:]
+
+    parser = argparse.ArgumentParser(description="Freeze capa features of a file or of files in a directory")
+    parser.add_argument("file_path", type=str,
+                        help="Path to file or directory to analyze")
+    parser.add_argument("-r", "--reprocess", action="store_true", default=False,
+                        help="Overwrite existing analysis")
+    parser.add_argument("-v", "--verbose", action="store_true",
+                        help="Enable verbose output")
+    parser.add_argument("-q", "--quiet", action="store_true",
+                        help="Disable all output but errors")
+    args = parser.parse_args(args=argv)
+
+    if args.quiet:
+        logging.basicConfig(level=logging.ERROR)
+        logging.getLogger().setLevel(logging.ERROR)
+    elif args.verbose:
+        logging.basicConfig(level=logging.DEBUG)
+        logging.getLogger().setLevel(logging.DEBUG)
+    else:
+        logging.basicConfig(level=logging.INFO)
+        logging.getLogger().setLevel(logging.INFO)
+
+    time0 = time.time()
+    try:
+        freeze(args.file_path, args.reprocess)
+    except IOError as e:
+        logger.error('%s', str(e))
+        return -1
+
+    logger.info('freezing features took %d seconds', time.time() - time0)
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/scripts/testbed/run_rule_on_testbed.py
+++ b/scripts/testbed/run_rule_on_testbed.py
@@ -0,0 +1,297 @@
+'''
+Run a capa rule file against the testbed (frozen features in a directory).
+
+Example usage:
+  run_rule_on_testbed.py <path to rules> <rule name> <testbed dir>
+  run_rule_on_testbed.py ..\\rules "create pipe" samples
+'''
+
+import os
+import sys
+import json
+import time
+import logging
+
+from collections import defaultdict
+
+import argparse
+
+import capa.main
+import capa.rules
+import capa.features.freeze
+
+from scripts.testbed import FNAMES_EXTENSION, FREEZE_EXTENSION
+from start_ida_export_fimages import export_fimages
+
+
+logger = logging.getLogger(__name__)
+
+# sorry globals...
+file_count = 0
+file_hits = 0
+mal_hits = 0
+other_hits = 0
+function_hits = 0
+errors = 0
+function_names = set([])
+
+
+CATEGORY = {
+    'malicious': 'MAL',
+    'benign': 'BEN',
+}
+
+
+def check_rule(path, rules, rule_name, only_matching, save_image, verbose):
+    global file_count, file_hits, mal_hits, other_hits, function_hits, errors
+
+    try:
+        capabilities = get_capabilities(path, rules)
+    except (ValueError, KeyError) as e:
+        logger.error('cannot load %s due to %s: %s', path, type(e).__name__, str(e))
+        errors += 1
+        return
+
+    file_count += 1
+    hits = get_function_hits(capabilities, rule_name)
+    if hits == 0:
+        if not only_matching:
+            render_no_hit(path)
+    else:
+        print('[x] rule matches %d function(s) in %s (%s)' % (hits, path, get_category(path)))
+
+        file_hits += 1
+        function_hits += hits
+
+        if get_category(path) == 'MAL':
+            mal_hits += 1
+        else:
+            other_hits += 1
+
+        if verbose:
+            render_hit_verbose(capabilities, path, verbose > 1)
+
+        if save_image:
+            fvas = ['0x%x' % fva for fva in get_hit_fvas(capabilities)]
+            file_path = get_idb_or_sample_path(path)
+            if file_path:
+                if not export_fimages(file_path, save_image, fvas):
+                    logger.warning('exporting images failed')
+            else:
+                logger.warning('could not get IDB or sample path')
+
+
+def get_idb_or_sample_path(path):
+    exts = ['.idb', '.i64', '.exe_', '.dll_', '.mal_']
+    roots = [os.path.splitext(path)[0], path]
+    for e in exts:
+        for r in roots:
+            p = '%s%s' % (r, e)
+            if os.path.exists(p):
+                return p
+    return None
+
+
+def get_capabilities(path, rules):
+    logger.debug('matching rules in %s', path)
+    with open(path, 'rb') as f:
+        extractor = capa.features.freeze.load(f.read())
+    return capa.main.find_capabilities(rules, extractor, disable_progress=True)
+
+
+def get_function_hits(capabilities, rule_name):
+    return len(capabilities.get(rule_name, []))
+
+
+def get_category(path):
+    for c in CATEGORY:
+        if c in path:
+            return CATEGORY[c]
+    return 'UNK'
+
+
+def render_no_hit(path):
+    print('[ ] no match in %s (%s)' % (path, get_category(path)))
+
+
+def render_hit_verbose(capabilities, path, vverbose):
+    try:
+        fnames = load_fnames(path)
+    except IOError as e:
+        logger.error('%s', str(e))
+        fnames = None
+
+    for rule, ress in capabilities.items():
+        for (fva, res) in sorted(ress, key=lambda p: p[0]):
+            if fnames and fva in fnames:
+                fname = fnames[fva]
+                function_names.add(fname)
+            else:
+                fname = '<name unknown>'
+            print('  - function 0x%x (%s)' % (fva, fname))
+
+            if vverbose:
+                capa.main.render_result(res, indent='      ')
+
+
+def get_hit_fvas(capabilities):
+    fvas = []
+    for rule, ress in capabilities.items():
+        for (fva, res) in sorted(ress, key=lambda p: p[0]):
+            fvas.append(fva)
+    return fvas
+
+
+def load_fnames(path):
+    fnames_path = path.replace(FREEZE_EXTENSION, FNAMES_EXTENSION)
+    if not os.path.exists(fnames_path):
+        raise IOError('%s does not exist' % fnames_path)
+
+    logger.debug('fnames path: %s', fnames_path)
+    try:
+        # json file with format { fva: fname }
+        fnames = load_json(fnames_path)
+        logger.debug('loaded JSON file')
+    except TypeError:
+        # csv file with format idbmd5;md5;fva;fname
+        fnames = load_csv(fnames_path)
+        logger.debug('loaded CSV file')
+    fnames = convert_keys_to_int(fnames)
+    logger.debug('read %d function names' % len(fnames))
+    return fnames
+
+
+def load_json(path):
+    with open(path, 'r') as f:
+        try:
+            funcs = json.load(f)
+        except ValueError as e:
+            logger.debug('not a JSON file, %s', str(e))
+            raise TypeError
+    return funcs
+
+
+def load_csv(path):
+    funcs = defaultdict(str)
+    with open(path, 'r') as f:
+        data = f.read().splitlines()
+    for line in data:
+        try:
+            idbmd5, md5, fva, name = line.split(':', 3)
+        except ValueError as e:
+            logger.warning('%s: "%s"', str(e), line)
+        funcs[fva] = name
+    return funcs
+
+
+def convert_keys_to_int(funcs_in):
+    funcs = {}
+    for k, v in funcs_in.iteritems():
+        try:
+            k = int(k)
+        except ValueError:
+            k = int(k, 0x10)
+        funcs[k] = v
+    return funcs
+
+
+def print_summary(verbose, start_time):
+    global file_count, file_hits, function_hits, errors
+
+    print('\n[SUMMARY]')
+    m, s = divmod(time.time() - start_time, 60)
+    logger.info('ran for %d:%02d minutes', m, s)
+    ratio = ' (%d%%)' % ((float(file_hits) / file_count) * 100) if file_count else ''
+    print('matched %d function(s) in %d/%d%s sample(s), encountered %d error(s)' % (
+        function_hits, file_hits, file_count, ratio, errors))
+    print('%d hits on (MAL) files; %d hits on other files' % (mal_hits, other_hits))
+
+    if verbose:
+        if len(function_names) > 0:
+            print('matched function names (unique):')
+            for fname in function_names:
+                print '  - %s' % fname
+
+
+def main(argv=None):
+    if argv is None:
+        argv = sys.argv[1:]
+
+    parser = argparse.ArgumentParser(description="Run capa rule file against frozen features in a directory")
+    parser.add_argument("rules", type=str,
+                        help="Path to directory containing rules")
+    parser.add_argument("rule_name", type=str,
+                        help="Name of rule to test")
+    parser.add_argument("frozen_path", type=str,
+                        help="Path to frozen feature file or directory")
+    parser.add_argument("-f", "--fast", action="store_true",
+                        help="Don't test slow files")
+    parser.add_argument("-o", "--only_matching", action="store_true",
+                        help="Print only if rule matches")
+    parser.add_argument("-s", "--save_image", action="store",
+                        help="Directory to save exported images of function graphs")
+    parser.add_argument("-v", "--verbose", action="count", default=0,
+                        help="Increase output verbosity")
+    parser.add_argument("-q", "--quiet", action="store_true",
+                        help="Disable all output but errors")
+    args = parser.parse_args(args=argv)
+
+    if args.quiet:
+        logging.basicConfig(level=logging.ERROR)
+        logging.getLogger().setLevel(logging.ERROR)
+    elif args.verbose:
+        logging.basicConfig(level=logging.DEBUG)
+        logging.getLogger().setLevel(logging.DEBUG)
+    else:
+        logging.basicConfig(level=logging.INFO)
+        logging.getLogger().setLevel(logging.INFO)
+
+    if not os.path.isdir(args.rules):
+        logger.error('%s is not a directory', args.rules)
+        return -1
+
+    # load rule
+    try:
+        rules = capa.main.get_rules(args.rules)
+        rules = list(capa.rules.get_rules_and_dependencies(rules, args.rule_name))
+        rules = capa.rules.RuleSet(rules)
+    except IOError as e:
+        logger.error('%s', str(e))
+        return -1
+    except capa.rules.InvalidRule as e:
+        logger.error('%s', str(e))
+        return -1
+
+    time0 = time.time()
+
+    print('[RULE %s]' % args.rule_name)
+    if os.path.isfile(args.frozen_path):
+        check_rule(args.frozen_path, rules, args.rule_name, args.only_matching, args.save_image, args.verbose)
+
+    try:
+        # get only freeze files from directory
+        freeze_files = []
+        for root, dirs, files in os.walk(args.frozen_path):
+            for file in files:
+                if not file.endswith(FREEZE_EXTENSION):
+                    continue
+
+                path = os.path.join(root, file)
+                if args.fast and 'slow' in path:
+                    logger.debug('fast mode skipping %s', path)
+                    continue
+
+                freeze_files.append(path)
+
+        for path in sorted(freeze_files):
+            sample_time0 = time.time()
+            check_rule(path, rules, args.rule_name, args.only_matching, args.save_image, args.verbose)
+            logger.debug('rule check took %d seconds', time.time() - sample_time0)
+    except KeyboardInterrupt:
+        logger.info('Received keyboard interrupt, terminating')
+
+    print_summary(args.verbose, time0)
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/scripts/testbed/start_ida_dump_fnames.py
+++ b/scripts/testbed/start_ida_dump_fnames.py
@@ -0,0 +1,131 @@
+'''
+Start IDA Pro in autonomous mode to dump JSON file of function names { fva: fname }.
+Processes a single file or a directory.
+Only runs on files with supported file extensions.
+
+Example usage:
+  start_ida_dump_fnames.py <candidate files dir>
+  start_ida_dump_fnames.py samples\benign
+'''
+
+import os
+import sys
+import json
+import hashlib
+import logging
+import subprocess
+
+import argparse
+
+from scripts.testbed import FNAMES_EXTENSION
+
+IDA32_PATH = 'C:\\Program Files\\IDA Pro 7.3\\ida.exe'
+IDA64_PATH = 'C:\\Program Files\\IDA Pro 7.3\\ida64.exe'
+
+# expected in same directory as this file
+DUMP_SCRIPT_PATH = os.path.abspath('_dump_fnames.py')
+
+SUPPORTED_EXTENSIONS = [
+    '.exe_',
+    '.dll_',
+    '.sys_',
+    '.idb',
+    '.i64',
+]
+
+
+logger = logging.getLogger(__name__)
+
+
+def call_ida_dump_script(sample_path, reprocess):
+    ''' call IDA in autonomous mode and return True if success, False on failure '''
+    logger.info('processing %s (MD5: %s)', sample_path, get_md5_hexdigest(sample_path))
+
+    # TODO detect 64-bit binaries
+    if os.path.splitext(sample_path)[-1] == '.i64':
+        IDA_PATH = IDA64_PATH
+    else:
+        IDA_PATH = IDA32_PATH
+
+    if sample_path.endswith('.idb') or sample_path.endswith('.i64'):
+        sample_path = sample_path[:-4]
+
+    fnames = '%s%s' % (sample_path, FNAMES_EXTENSION)
+    if os.path.exists(fnames) and not reprocess:
+        logger.info('%s already exists and contains %d function names, provide -r argument to reprocess',
+                    fnames, len(get_function_names(fnames)))
+        return True
+
+    out_path = os.path.split(fnames)[-1]  # relative to IDA database file
+    args = [IDA_PATH, '-A', '-S%s "%s"' % (DUMP_SCRIPT_PATH, out_path), sample_path]
+    logger.debug('calling "%s"' % ' '.join(args))
+    subprocess.call(args)
+
+    if not os.path.exists(fnames):
+        logger.warning('%s was not created', fnames)
+        return False
+
+    logger.debug('extracted %d function names to %s', len(get_function_names(fnames)), fnames)
+    return True
+
+
+def get_md5_hexdigest(sample_path):
+    m = hashlib.md5()
+    with open(sample_path, 'rb') as f:
+        m.update(f.read())
+    return m.hexdigest()
+
+
+def get_function_names(fnames_file):
+    if not os.path.exists(fnames_file):
+        return None
+    with open(fnames_file, 'r') as f:
+        return json.load(f)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Launch IDA Pro in autonomous mode to dump function names of a file or of files in a directory")
+    parser.add_argument("file_path", type=str,
+                        help="File or directory path to analyze")
+    parser.add_argument("-r", "--reprocess", action="store_true", default=False,
+                        help="Overwrite existing analysis")
+    parser.add_argument("-v", "--verbose", action="store_true",
+                        help="Enable verbose output")
+    args = parser.parse_args(args=sys.argv[1:])
+
+    if args.verbose:
+        logging.basicConfig(level=logging.DEBUG)
+        logging.getLogger().setLevel(logging.DEBUG)
+    else:
+        logging.basicConfig(level=logging.INFO)
+        logging.getLogger().setLevel(logging.INFO)
+
+    if not os.path.exists(args.file_path):
+        logger.warning('%s does not exist', args.file_path)
+        return -1
+
+    if os.path.isfile(args.file_path):
+        call_ida_dump_script(args.file_path, args.reprocess)
+        return 0
+
+    errors = 0
+
+    logger.info('processing files in %s with file extension %s', args.file_path, '|'.join(SUPPORTED_EXTENSIONS))
+    for root, dirs, files in os.walk(args.file_path):
+        for file in files:
+            if not os.path.splitext(file)[1] in SUPPORTED_EXTENSIONS:
+                logger.debug('%s does not have supported file extension', file)
+                continue
+            path = os.path.join(root, file)
+            if not call_ida_dump_script(path, args.reprocess):
+                errors += 1
+
+    if errors:
+        logger.warning('encountered %d errors', errors)
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/scripts/testbed/start_ida_export_fimages.py
+++ b/scripts/testbed/start_ida_export_fimages.py
@@ -0,0 +1,135 @@
+'''
+Start IDA Pro in autonomous mode to export images of function graphs.
+
+Example usage:
+  start_ida_export_fimages.py <target file> <output dir> -f <function list>
+  start_ida_export_fimages.py test.exe imgs -f 0x401000,0x402F90
+'''
+
+import os
+import imp
+import sys
+import hashlib
+import logging
+import subprocess
+
+import argparse
+
+try:
+    imp.find_module('graphviz')
+    from graphviz import Source
+    graphviz_found = True
+except ImportError:
+    graphviz_found = False
+
+
+IDA32_PATH = 'C:\\Program Files\\IDA Pro 7.3\\ida.exe'
+IDA64_PATH = 'C:\\Program Files\\IDA Pro 7.3\\ida64.exe'
+
+# expected in same directory as this file
+EXPORT_SCRIPT_PATH = os.path.abspath('_export_fimages.py')
+
+
+logger = logging.getLogger(__name__)
+
+
+def export_fimages(file_path, out_dir, functions, manual=False):
+    '''
+    Export images of function graphs.
+    :param file_path: file to analyze
+    :param out_dir: output directory
+    :param functions: list of strings of hex formatted fvas
+    :param manual: non-autonomous mode
+    :return: True on success, False otherwise
+    '''
+    if not graphviz_found:
+        logger.warning('please install graphviz to export images')
+        return False
+
+    if not os.path.exists(out_dir):
+        os.mkdir(out_dir)
+
+    script_args = [os.path.abspath(out_dir)] + functions
+    call_ida_script(EXPORT_SCRIPT_PATH, script_args, file_path, manual)
+
+    img_count = 0
+    for root, dirs, files in os.walk(out_dir):
+        for file in files:
+            if not file.endswith('.dot'):
+                continue
+            try:
+                s = Source.from_file(file, directory=out_dir)
+                s.render(file, directory=out_dir, format='png', cleanup=True)
+                img_count += 1
+            except BaseException:
+                logger.warning('graphviz error rendering file')
+    if img_count > 0:
+        logger.info('exported %d function graph images to "%s"', img_count, os.path.abspath(out_dir))
+        return True
+    else:
+        logger.warning('failed to export function graph images')
+        return False
+
+
+def call_ida_script(script_path, script_args, sample_path, manual):
+    logger.info('processing %s (MD5: %s)', sample_path, get_md5_hexdigest(sample_path))
+
+    # TODO detect 64-bit binaries
+    if os.path.splitext(sample_path)[-1] == '.i64':
+        IDA_PATH = IDA64_PATH
+    else:
+        IDA_PATH = IDA32_PATH
+
+    args = [IDA_PATH, '-A', '-S%s %s' % (script_path, ' '.join(script_args)), sample_path]
+
+    if manual:
+        args.remove('-A')
+
+    logger.debug('calling "%s"' % ' '.join(args))
+    if subprocess.call(args) == 0:
+        return True
+    else:
+        return False
+
+
+def get_md5_hexdigest(sample_path):
+    m = hashlib.md5()
+    with open(sample_path, 'rb') as f:
+        m.update(f.read())
+    return m.hexdigest()
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Launch IDA Pro in autonomous mode to export images of function graphs")
+    parser.add_argument("file_path", type=str,
+                        help="File to export from")
+    parser.add_argument("out_dir", type=str,
+                        help="Export target directory")
+    parser.add_argument("-f", "--functions", action="store",
+                        help="Comma separated list of functions to export")
+    parser.add_argument("-m", "--manual", action="store_true",
+                        help="Manual mode: show IDA dialog boxes")
+    parser.add_argument("-v", "--verbose", action="store_true",
+                        help="Enable verbose output")
+    args = parser.parse_args(args=sys.argv[1:])
+
+    if args.verbose:
+        logging.basicConfig(level=logging.DEBUG)
+        logging.getLogger().setLevel(logging.DEBUG)
+    else:
+        logging.basicConfig(level=logging.INFO)
+        logging.getLogger().setLevel(logging.INFO)
+
+    if not os.path.isfile(args.file_path):
+        logger.warning('%s is not a file', args.file_path)
+        return -1
+
+    functions = args.functions.split(',')
+    export_fimages(args.file_path, args.out_dir, functions, args.manual)
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())