""" Start IDA Pro in autonomous mode to dump JSON file of function names { fva: fname }. Processes a single file or a directory. Only runs on files with supported file extensions. Example usage: start_ida_dump_fnames.py start_ida_dump_fnames.py samples\benign """ import os import sys import json import hashlib import logging import subprocess import argparse from scripts.testbed import FNAMES_EXTENSION IDA32_PATH = "C:\\Program Files\\IDA Pro 7.3\\ida.exe" IDA64_PATH = "C:\\Program Files\\IDA Pro 7.3\\ida64.exe" # expected in same directory as this file DUMP_SCRIPT_PATH = os.path.abspath("_dump_fnames.py") SUPPORTED_EXTENSIONS = [ ".exe_", ".dll_", ".sys_", ".idb", ".i64", ] logger = logging.getLogger(__name__) def call_ida_dump_script(sample_path, reprocess): """ call IDA in autonomous mode and return True if success, False on failure """ logger.info("processing %s (MD5: %s)", sample_path, get_md5_hexdigest(sample_path)) # TODO detect 64-bit binaries if os.path.splitext(sample_path)[-1] == ".i64": IDA_PATH = IDA64_PATH else: IDA_PATH = IDA32_PATH if sample_path.endswith(".idb") or sample_path.endswith(".i64"): sample_path = sample_path[:-4] fnames = "%s%s" % (sample_path, FNAMES_EXTENSION) if os.path.exists(fnames) and not reprocess: logger.info( "%s already exists and contains %d function names, provide -r argument to reprocess", fnames, len(get_function_names(fnames)), ) return True out_path = os.path.split(fnames)[-1] # relative to IDA database file args = [IDA_PATH, "-A", '-S%s "%s"' % (DUMP_SCRIPT_PATH, out_path), sample_path] logger.debug('calling "%s"' % " ".join(args)) subprocess.call(args) if not os.path.exists(fnames): logger.warning("%s was not created", fnames) return False logger.debug("extracted %d function names to %s", len(get_function_names(fnames)), fnames) return True def get_md5_hexdigest(sample_path): m = hashlib.md5() with open(sample_path, "rb") as f: m.update(f.read()) return m.hexdigest() def get_function_names(fnames_file): if not os.path.exists(fnames_file): return None with open(fnames_file, "r") as f: return json.load(f) def main(): parser = argparse.ArgumentParser( description="Launch IDA Pro in autonomous mode to dump function names of a file or of files in a directory" ) parser.add_argument("file_path", type=str, help="File or directory path to analyze") parser.add_argument("-r", "--reprocess", action="store_true", default=False, help="Overwrite existing analysis") parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose output") args = parser.parse_args(args=sys.argv[1:]) if args.verbose: logging.basicConfig(level=logging.DEBUG) logging.getLogger().setLevel(logging.DEBUG) else: logging.basicConfig(level=logging.INFO) logging.getLogger().setLevel(logging.INFO) if not os.path.exists(args.file_path): logger.warning("%s does not exist", args.file_path) return -1 if os.path.isfile(args.file_path): call_ida_dump_script(args.file_path, args.reprocess) return 0 errors = 0 logger.info("processing files in %s with file extension %s", args.file_path, "|".join(SUPPORTED_EXTENSIONS)) for root, dirs, files in os.walk(args.file_path): for file in files: if not os.path.splitext(file)[1] in SUPPORTED_EXTENSIONS: logger.debug("%s does not have supported file extension", file) continue path = os.path.join(root, file) if not call_ida_dump_script(path, args.reprocess): errors += 1 if errors: logger.warning("encountered %d errors", errors) return 0 if __name__ == "__main__": sys.exit(main())