mirror of
https://github.com/mandiant/capa.git
synced 2025-12-22 23:26:21 -08:00
import source files, forgetting about 938 prior commits
This commit is contained in:
71
scripts/testbed/README.md
Normal file
71
scripts/testbed/README.md
Normal file
@@ -0,0 +1,71 @@
|
||||
# Testbed
|
||||
Goal of the testbed is to support the development of new `capa` rules. Scripts allow to test rules against a large sample set and to batch process samples, e.g. to freeze features or to generate other meta data used for testing.
|
||||
|
||||
The testbed contains malicious and benign files. Data sources are:
|
||||
- Microsoft EXE and DLL files from `C:\Windows\System32`, `C:\Windows\SysWOW64`, etc.
|
||||
- samples analyzed and annotated by FLARE analysts during malware analysis
|
||||
|
||||
Samples containing the keyword `slow` in their path indicate a longer test run time (>20 seconds) and can be ignored via the `-f` argument.
|
||||
|
||||
Running a rule against a large set of executable programs helps to quickly determine on which functions/samples a rule hits. This helps to identify:
|
||||
- true positives: hits on expected functions
|
||||
- false positives: hits on unexpected functions, for example
|
||||
- if a rule is to generic or
|
||||
- if a rule hits on a capability present in many (benign) samples
|
||||
|
||||
To provide additional context the testbed contains function names from the following data sources:
|
||||
- benign files: function names from Microsoft's PDB information
|
||||
- malicious files: function names provided by FLARE analysts and obtained from
|
||||
the LabelMaker 2000 (LM2k) annotations repository
|
||||
|
||||
For each test sample the testbed contains the following files:
|
||||
- a `.frz` file storing the extracted `capa` features
|
||||
- `capa`'s serialized features, via `capa.features.freeze`
|
||||
- a `.fnames` file mapping function addresses to function names
|
||||
- JSON file that maps fvas to function names or
|
||||
- CSV file with entries `idbmd5;md5;fva;fname`
|
||||
- (optional) the binary file with extension `.exe_`, `.dll_`, or `.mal_`
|
||||
|
||||
## Scripts
|
||||
### `run_rule_on_testbed.py`
|
||||
Run a `capa` rule file against the testbed (frozen features in a directory).
|
||||
|
||||
Meant to be run on directories that contain `.frz` and `.fnames` files.
|
||||
|
||||
Example usage:
|
||||
|
||||
run_rule_on_testbed.py <testbed dir>
|
||||
run_rule_on_testbed.py samples
|
||||
|
||||
With the `-s <image_path>` argument, the script exports images of function graphs to the provided path.
|
||||
Converting the images requires `graphviz`. See https://graphviz.gitlab.io/about/; get Python interface via `pip install graphviz`.
|
||||
|
||||
## Helper Scripts
|
||||
### `freeze_features.py`
|
||||
Use `freeze_features.py` to freeze `capa` features of a file or of files in a directory.
|
||||
|
||||
Example usage:
|
||||
|
||||
freeze_features.py <testbed dir>
|
||||
freeze_features.py samples
|
||||
|
||||
### `start_ida_dump_fnames.py`
|
||||
Start IDA Pro in autonomous mode to dump JSON file of function names `{fva: fname}`. Processes a single file or a directory.
|
||||
|
||||
This script uses `_dump_fnames.py` to dump the JSON file of functions names and is meant to be run on benign files with PDB information. IDA should apply function names from the PDB information automatically.
|
||||
|
||||
Example usage:
|
||||
|
||||
start_ida_dump_fnames.py <candidate files dir>
|
||||
start_ida_dump_fnames.py samples\benign
|
||||
|
||||
### `start_ida_export_fimages.py`
|
||||
Start IDA Pro in autonomous mode to export images of function graphs.
|
||||
`run_rule_on_testbed.py` integrates the export mechanism (`-s` option)
|
||||
|
||||
This script uses `_export_fimages.py` to export DOT files of function graphs and then converts them to PNG images using `graphviz`.
|
||||
|
||||
Example usage:
|
||||
|
||||
start_ida_export_fimages.py <target file> <output dir> -f <function list>
|
||||
start_ida_export_fimages.py test.exe imgs -f 0x401000,0x402F90
|
||||
2
scripts/testbed/__init__.py
Normal file
2
scripts/testbed/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
FNAMES_EXTENSION = '.fnames'
|
||||
FREEZE_EXTENSION = '.frz'
|
||||
46
scripts/testbed/_dump_fnames.py
Normal file
46
scripts/testbed/_dump_fnames.py
Normal file
@@ -0,0 +1,46 @@
|
||||
'''
|
||||
IDAPython script to dump JSON file of functions names { fva: fname }.
|
||||
Meant to be run on benign files with PDB information. IDA should apply function names from the PDB files automatically.
|
||||
Can also be run on annotated IDA database files.
|
||||
|
||||
Example usage (via IDA autonomous mode):
|
||||
ida.exe -A -S_dump_fnames.py "<output path>" <sample_path>
|
||||
'''
|
||||
|
||||
import json
|
||||
|
||||
import idc
|
||||
import idautils
|
||||
|
||||
|
||||
def main():
|
||||
if len(idc.ARGV) != 2:
|
||||
# requires output file path argument
|
||||
idc.qexit(-1)
|
||||
|
||||
# wait for auto-analysis to finish
|
||||
idc.auto_wait()
|
||||
|
||||
INF_SHORT_DN_ATTR = idc.get_inf_attr(idc.INF_SHORT_DN) # short form of demangled names
|
||||
|
||||
fnames = {}
|
||||
for f in idautils.Functions():
|
||||
fname = idc.get_name(f)
|
||||
if fname.startswith("sub_"):
|
||||
continue
|
||||
|
||||
name_demangled = idc.demangle_name(fname, INF_SHORT_DN_ATTR)
|
||||
if name_demangled:
|
||||
fname = name_demangled
|
||||
|
||||
fnames[f] = fname
|
||||
|
||||
with open(idc.ARGV[1], "w") as f:
|
||||
json.dump(fnames, f)
|
||||
|
||||
# exit IDA
|
||||
idc.qexit(0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
44
scripts/testbed/_export_fimages.py
Normal file
44
scripts/testbed/_export_fimages.py
Normal file
@@ -0,0 +1,44 @@
|
||||
'''
|
||||
IDAPython script to export DOT files of function graphs.
|
||||
|
||||
Example usage (via IDA autonomous mode):
|
||||
ida.exe -A -S_export_fimages.py "<output dir>" <fva1> [<fva2> ...] <sample_path>
|
||||
'''
|
||||
|
||||
import os
|
||||
|
||||
import idc
|
||||
import idaapi
|
||||
import ida_gdl
|
||||
|
||||
|
||||
def main():
|
||||
if len(idc.ARGV) < 3:
|
||||
# requires output directory and function VAs argument(s)
|
||||
idc.qexit(-1)
|
||||
|
||||
# wait for auto-analysis to finish
|
||||
idc.auto_wait()
|
||||
|
||||
out_dir = idc.ARGV[1]
|
||||
fvas = [int(fva, 0x10) for fva in idc.ARGV[2:]]
|
||||
idb_name = os.path.split(idc.get_idb_path())[-1]
|
||||
|
||||
for fva in fvas:
|
||||
fstart = idc.get_func_attr(fva, idc.FUNCATTR_START)
|
||||
name = '%s_0x%x' % (idb_name.replace('.', '_'), fstart)
|
||||
out_path = os.path.join(out_dir, name)
|
||||
fname = idc.get_name(fstart)
|
||||
|
||||
if not ida_gdl.gen_flow_graph(out_path, '%s (0x%x)' % (fname, fstart), idaapi.get_func(fstart), 0, 0,
|
||||
ida_gdl.CHART_GEN_DOT | ida_gdl.CHART_PRINT_NAMES):
|
||||
print 'IDA error generating flow graph'
|
||||
# TODO add label to DOT file, see https://stackoverflow.com/a/6452088/10548020
|
||||
# TODO highlight where rule matched
|
||||
|
||||
# exit IDA
|
||||
idc.qexit(0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
102
scripts/testbed/freeze_features.py
Normal file
102
scripts/testbed/freeze_features.py
Normal file
@@ -0,0 +1,102 @@
|
||||
'''
|
||||
Freeze capa features.
|
||||
|
||||
Example usage:
|
||||
freeze_features.py <test files dir>
|
||||
freeze_features.py samples\benign
|
||||
'''
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import logging
|
||||
|
||||
import argparse
|
||||
|
||||
from scripts.testbed import FREEZE_EXTENSION
|
||||
from capa.features.freeze import main as freeze_features
|
||||
|
||||
|
||||
# only process files with these extensions
|
||||
TARGET_EXTENSIONS = [
|
||||
'.mal_',
|
||||
'.exe_',
|
||||
'.dll_',
|
||||
'.sys_'
|
||||
]
|
||||
|
||||
|
||||
logger = logging.getLogger('check_rule')
|
||||
|
||||
|
||||
def freeze(input_path, reprocess):
|
||||
if not os.path.exists(input_path):
|
||||
raise IOError('%s does not exist or cannot be accessed' % input_path)
|
||||
|
||||
if os.path.isfile(input_path):
|
||||
outfile = '%s%s' % (input_path, FREEZE_EXTENSION)
|
||||
freeze_file(input_path, outfile, reprocess)
|
||||
|
||||
elif os.path.isdir(input_path):
|
||||
logger.info('freezing features of %s files in %s', '|'.join(TARGET_EXTENSIONS), input_path)
|
||||
for root, dirs, files in os.walk(input_path):
|
||||
for file in files:
|
||||
if not os.path.splitext(file)[1] in TARGET_EXTENSIONS:
|
||||
logger.debug('skipping non-target file: %s', file)
|
||||
continue
|
||||
path = os.path.join(root, file)
|
||||
outfile = '%s%s' % (path, FREEZE_EXTENSION)
|
||||
freeze_file(path, outfile, reprocess)
|
||||
|
||||
|
||||
def freeze_file(path, output, reprocess=False):
|
||||
logger.info('freezing features of %s', path)
|
||||
|
||||
if os.path.exists(output) and not reprocess:
|
||||
logger.info('%s already exists, provide -r argument to reprocess', output)
|
||||
return
|
||||
|
||||
try:
|
||||
freeze_features([path, output]) # args: sample, output
|
||||
except Exception as e:
|
||||
logger.error('could not freeze features for %s: %s', path, str(e))
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
if argv is None:
|
||||
argv = sys.argv[1:]
|
||||
|
||||
parser = argparse.ArgumentParser(description="Freeze capa features of a file or of files in a directory")
|
||||
parser.add_argument("file_path", type=str,
|
||||
help="Path to file or directory to analyze")
|
||||
parser.add_argument("-r", "--reprocess", action="store_true", default=False,
|
||||
help="Overwrite existing analysis")
|
||||
parser.add_argument("-v", "--verbose", action="store_true",
|
||||
help="Enable verbose output")
|
||||
parser.add_argument("-q", "--quiet", action="store_true",
|
||||
help="Disable all output but errors")
|
||||
args = parser.parse_args(args=argv)
|
||||
|
||||
if args.quiet:
|
||||
logging.basicConfig(level=logging.ERROR)
|
||||
logging.getLogger().setLevel(logging.ERROR)
|
||||
elif args.verbose:
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
else:
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
time0 = time.time()
|
||||
try:
|
||||
freeze(args.file_path, args.reprocess)
|
||||
except IOError as e:
|
||||
logger.error('%s', str(e))
|
||||
return -1
|
||||
|
||||
logger.info('freezing features took %d seconds', time.time() - time0)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
297
scripts/testbed/run_rule_on_testbed.py
Normal file
297
scripts/testbed/run_rule_on_testbed.py
Normal file
@@ -0,0 +1,297 @@
|
||||
'''
|
||||
Run a capa rule file against the testbed (frozen features in a directory).
|
||||
|
||||
Example usage:
|
||||
run_rule_on_testbed.py <path to rules> <rule name> <testbed dir>
|
||||
run_rule_on_testbed.py ..\\rules "create pipe" samples
|
||||
'''
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
import logging
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
import argparse
|
||||
|
||||
import capa.main
|
||||
import capa.rules
|
||||
import capa.features.freeze
|
||||
|
||||
from scripts.testbed import FNAMES_EXTENSION, FREEZE_EXTENSION
|
||||
from start_ida_export_fimages import export_fimages
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# sorry globals...
|
||||
file_count = 0
|
||||
file_hits = 0
|
||||
mal_hits = 0
|
||||
other_hits = 0
|
||||
function_hits = 0
|
||||
errors = 0
|
||||
function_names = set([])
|
||||
|
||||
|
||||
CATEGORY = {
|
||||
'malicious': 'MAL',
|
||||
'benign': 'BEN',
|
||||
}
|
||||
|
||||
|
||||
def check_rule(path, rules, rule_name, only_matching, save_image, verbose):
|
||||
global file_count, file_hits, mal_hits, other_hits, function_hits, errors
|
||||
|
||||
try:
|
||||
capabilities = get_capabilities(path, rules)
|
||||
except (ValueError, KeyError) as e:
|
||||
logger.error('cannot load %s due to %s: %s', path, type(e).__name__, str(e))
|
||||
errors += 1
|
||||
return
|
||||
|
||||
file_count += 1
|
||||
hits = get_function_hits(capabilities, rule_name)
|
||||
if hits == 0:
|
||||
if not only_matching:
|
||||
render_no_hit(path)
|
||||
else:
|
||||
print('[x] rule matches %d function(s) in %s (%s)' % (hits, path, get_category(path)))
|
||||
|
||||
file_hits += 1
|
||||
function_hits += hits
|
||||
|
||||
if get_category(path) == 'MAL':
|
||||
mal_hits += 1
|
||||
else:
|
||||
other_hits += 1
|
||||
|
||||
if verbose:
|
||||
render_hit_verbose(capabilities, path, verbose > 1)
|
||||
|
||||
if save_image:
|
||||
fvas = ['0x%x' % fva for fva in get_hit_fvas(capabilities)]
|
||||
file_path = get_idb_or_sample_path(path)
|
||||
if file_path:
|
||||
if not export_fimages(file_path, save_image, fvas):
|
||||
logger.warning('exporting images failed')
|
||||
else:
|
||||
logger.warning('could not get IDB or sample path')
|
||||
|
||||
|
||||
def get_idb_or_sample_path(path):
|
||||
exts = ['.idb', '.i64', '.exe_', '.dll_', '.mal_']
|
||||
roots = [os.path.splitext(path)[0], path]
|
||||
for e in exts:
|
||||
for r in roots:
|
||||
p = '%s%s' % (r, e)
|
||||
if os.path.exists(p):
|
||||
return p
|
||||
return None
|
||||
|
||||
|
||||
def get_capabilities(path, rules):
|
||||
logger.debug('matching rules in %s', path)
|
||||
with open(path, 'rb') as f:
|
||||
extractor = capa.features.freeze.load(f.read())
|
||||
return capa.main.find_capabilities(rules, extractor, disable_progress=True)
|
||||
|
||||
|
||||
def get_function_hits(capabilities, rule_name):
|
||||
return len(capabilities.get(rule_name, []))
|
||||
|
||||
|
||||
def get_category(path):
|
||||
for c in CATEGORY:
|
||||
if c in path:
|
||||
return CATEGORY[c]
|
||||
return 'UNK'
|
||||
|
||||
|
||||
def render_no_hit(path):
|
||||
print('[ ] no match in %s (%s)' % (path, get_category(path)))
|
||||
|
||||
|
||||
def render_hit_verbose(capabilities, path, vverbose):
|
||||
try:
|
||||
fnames = load_fnames(path)
|
||||
except IOError as e:
|
||||
logger.error('%s', str(e))
|
||||
fnames = None
|
||||
|
||||
for rule, ress in capabilities.items():
|
||||
for (fva, res) in sorted(ress, key=lambda p: p[0]):
|
||||
if fnames and fva in fnames:
|
||||
fname = fnames[fva]
|
||||
function_names.add(fname)
|
||||
else:
|
||||
fname = '<name unknown>'
|
||||
print(' - function 0x%x (%s)' % (fva, fname))
|
||||
|
||||
if vverbose:
|
||||
capa.main.render_result(res, indent=' ')
|
||||
|
||||
|
||||
def get_hit_fvas(capabilities):
|
||||
fvas = []
|
||||
for rule, ress in capabilities.items():
|
||||
for (fva, res) in sorted(ress, key=lambda p: p[0]):
|
||||
fvas.append(fva)
|
||||
return fvas
|
||||
|
||||
|
||||
def load_fnames(path):
|
||||
fnames_path = path.replace(FREEZE_EXTENSION, FNAMES_EXTENSION)
|
||||
if not os.path.exists(fnames_path):
|
||||
raise IOError('%s does not exist' % fnames_path)
|
||||
|
||||
logger.debug('fnames path: %s', fnames_path)
|
||||
try:
|
||||
# json file with format { fva: fname }
|
||||
fnames = load_json(fnames_path)
|
||||
logger.debug('loaded JSON file')
|
||||
except TypeError:
|
||||
# csv file with format idbmd5;md5;fva;fname
|
||||
fnames = load_csv(fnames_path)
|
||||
logger.debug('loaded CSV file')
|
||||
fnames = convert_keys_to_int(fnames)
|
||||
logger.debug('read %d function names' % len(fnames))
|
||||
return fnames
|
||||
|
||||
|
||||
def load_json(path):
|
||||
with open(path, 'r') as f:
|
||||
try:
|
||||
funcs = json.load(f)
|
||||
except ValueError as e:
|
||||
logger.debug('not a JSON file, %s', str(e))
|
||||
raise TypeError
|
||||
return funcs
|
||||
|
||||
|
||||
def load_csv(path):
|
||||
funcs = defaultdict(str)
|
||||
with open(path, 'r') as f:
|
||||
data = f.read().splitlines()
|
||||
for line in data:
|
||||
try:
|
||||
idbmd5, md5, fva, name = line.split(':', 3)
|
||||
except ValueError as e:
|
||||
logger.warning('%s: "%s"', str(e), line)
|
||||
funcs[fva] = name
|
||||
return funcs
|
||||
|
||||
|
||||
def convert_keys_to_int(funcs_in):
|
||||
funcs = {}
|
||||
for k, v in funcs_in.iteritems():
|
||||
try:
|
||||
k = int(k)
|
||||
except ValueError:
|
||||
k = int(k, 0x10)
|
||||
funcs[k] = v
|
||||
return funcs
|
||||
|
||||
|
||||
def print_summary(verbose, start_time):
|
||||
global file_count, file_hits, function_hits, errors
|
||||
|
||||
print('\n[SUMMARY]')
|
||||
m, s = divmod(time.time() - start_time, 60)
|
||||
logger.info('ran for %d:%02d minutes', m, s)
|
||||
ratio = ' (%d%%)' % ((float(file_hits) / file_count) * 100) if file_count else ''
|
||||
print('matched %d function(s) in %d/%d%s sample(s), encountered %d error(s)' % (
|
||||
function_hits, file_hits, file_count, ratio, errors))
|
||||
print('%d hits on (MAL) files; %d hits on other files' % (mal_hits, other_hits))
|
||||
|
||||
if verbose:
|
||||
if len(function_names) > 0:
|
||||
print('matched function names (unique):')
|
||||
for fname in function_names:
|
||||
print ' - %s' % fname
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
if argv is None:
|
||||
argv = sys.argv[1:]
|
||||
|
||||
parser = argparse.ArgumentParser(description="Run capa rule file against frozen features in a directory")
|
||||
parser.add_argument("rules", type=str,
|
||||
help="Path to directory containing rules")
|
||||
parser.add_argument("rule_name", type=str,
|
||||
help="Name of rule to test")
|
||||
parser.add_argument("frozen_path", type=str,
|
||||
help="Path to frozen feature file or directory")
|
||||
parser.add_argument("-f", "--fast", action="store_true",
|
||||
help="Don't test slow files")
|
||||
parser.add_argument("-o", "--only_matching", action="store_true",
|
||||
help="Print only if rule matches")
|
||||
parser.add_argument("-s", "--save_image", action="store",
|
||||
help="Directory to save exported images of function graphs")
|
||||
parser.add_argument("-v", "--verbose", action="count", default=0,
|
||||
help="Increase output verbosity")
|
||||
parser.add_argument("-q", "--quiet", action="store_true",
|
||||
help="Disable all output but errors")
|
||||
args = parser.parse_args(args=argv)
|
||||
|
||||
if args.quiet:
|
||||
logging.basicConfig(level=logging.ERROR)
|
||||
logging.getLogger().setLevel(logging.ERROR)
|
||||
elif args.verbose:
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
else:
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
if not os.path.isdir(args.rules):
|
||||
logger.error('%s is not a directory', args.rules)
|
||||
return -1
|
||||
|
||||
# load rule
|
||||
try:
|
||||
rules = capa.main.get_rules(args.rules)
|
||||
rules = list(capa.rules.get_rules_and_dependencies(rules, args.rule_name))
|
||||
rules = capa.rules.RuleSet(rules)
|
||||
except IOError as e:
|
||||
logger.error('%s', str(e))
|
||||
return -1
|
||||
except capa.rules.InvalidRule as e:
|
||||
logger.error('%s', str(e))
|
||||
return -1
|
||||
|
||||
time0 = time.time()
|
||||
|
||||
print('[RULE %s]' % args.rule_name)
|
||||
if os.path.isfile(args.frozen_path):
|
||||
check_rule(args.frozen_path, rules, args.rule_name, args.only_matching, args.save_image, args.verbose)
|
||||
|
||||
try:
|
||||
# get only freeze files from directory
|
||||
freeze_files = []
|
||||
for root, dirs, files in os.walk(args.frozen_path):
|
||||
for file in files:
|
||||
if not file.endswith(FREEZE_EXTENSION):
|
||||
continue
|
||||
|
||||
path = os.path.join(root, file)
|
||||
if args.fast and 'slow' in path:
|
||||
logger.debug('fast mode skipping %s', path)
|
||||
continue
|
||||
|
||||
freeze_files.append(path)
|
||||
|
||||
for path in sorted(freeze_files):
|
||||
sample_time0 = time.time()
|
||||
check_rule(path, rules, args.rule_name, args.only_matching, args.save_image, args.verbose)
|
||||
logger.debug('rule check took %d seconds', time.time() - sample_time0)
|
||||
except KeyboardInterrupt:
|
||||
logger.info('Received keyboard interrupt, terminating')
|
||||
|
||||
print_summary(args.verbose, time0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
131
scripts/testbed/start_ida_dump_fnames.py
Normal file
131
scripts/testbed/start_ida_dump_fnames.py
Normal file
@@ -0,0 +1,131 @@
|
||||
'''
|
||||
Start IDA Pro in autonomous mode to dump JSON file of function names { fva: fname }.
|
||||
Processes a single file or a directory.
|
||||
Only runs on files with supported file extensions.
|
||||
|
||||
Example usage:
|
||||
start_ida_dump_fnames.py <candidate files dir>
|
||||
start_ida_dump_fnames.py samples\benign
|
||||
'''
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import hashlib
|
||||
import logging
|
||||
import subprocess
|
||||
|
||||
import argparse
|
||||
|
||||
from scripts.testbed import FNAMES_EXTENSION
|
||||
|
||||
IDA32_PATH = 'C:\\Program Files\\IDA Pro 7.3\\ida.exe'
|
||||
IDA64_PATH = 'C:\\Program Files\\IDA Pro 7.3\\ida64.exe'
|
||||
|
||||
# expected in same directory as this file
|
||||
DUMP_SCRIPT_PATH = os.path.abspath('_dump_fnames.py')
|
||||
|
||||
SUPPORTED_EXTENSIONS = [
|
||||
'.exe_',
|
||||
'.dll_',
|
||||
'.sys_',
|
||||
'.idb',
|
||||
'.i64',
|
||||
]
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def call_ida_dump_script(sample_path, reprocess):
|
||||
''' call IDA in autonomous mode and return True if success, False on failure '''
|
||||
logger.info('processing %s (MD5: %s)', sample_path, get_md5_hexdigest(sample_path))
|
||||
|
||||
# TODO detect 64-bit binaries
|
||||
if os.path.splitext(sample_path)[-1] == '.i64':
|
||||
IDA_PATH = IDA64_PATH
|
||||
else:
|
||||
IDA_PATH = IDA32_PATH
|
||||
|
||||
if sample_path.endswith('.idb') or sample_path.endswith('.i64'):
|
||||
sample_path = sample_path[:-4]
|
||||
|
||||
fnames = '%s%s' % (sample_path, FNAMES_EXTENSION)
|
||||
if os.path.exists(fnames) and not reprocess:
|
||||
logger.info('%s already exists and contains %d function names, provide -r argument to reprocess',
|
||||
fnames, len(get_function_names(fnames)))
|
||||
return True
|
||||
|
||||
out_path = os.path.split(fnames)[-1] # relative to IDA database file
|
||||
args = [IDA_PATH, '-A', '-S%s "%s"' % (DUMP_SCRIPT_PATH, out_path), sample_path]
|
||||
logger.debug('calling "%s"' % ' '.join(args))
|
||||
subprocess.call(args)
|
||||
|
||||
if not os.path.exists(fnames):
|
||||
logger.warning('%s was not created', fnames)
|
||||
return False
|
||||
|
||||
logger.debug('extracted %d function names to %s', len(get_function_names(fnames)), fnames)
|
||||
return True
|
||||
|
||||
|
||||
def get_md5_hexdigest(sample_path):
|
||||
m = hashlib.md5()
|
||||
with open(sample_path, 'rb') as f:
|
||||
m.update(f.read())
|
||||
return m.hexdigest()
|
||||
|
||||
|
||||
def get_function_names(fnames_file):
|
||||
if not os.path.exists(fnames_file):
|
||||
return None
|
||||
with open(fnames_file, 'r') as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Launch IDA Pro in autonomous mode to dump function names of a file or of files in a directory")
|
||||
parser.add_argument("file_path", type=str,
|
||||
help="File or directory path to analyze")
|
||||
parser.add_argument("-r", "--reprocess", action="store_true", default=False,
|
||||
help="Overwrite existing analysis")
|
||||
parser.add_argument("-v", "--verbose", action="store_true",
|
||||
help="Enable verbose output")
|
||||
args = parser.parse_args(args=sys.argv[1:])
|
||||
|
||||
if args.verbose:
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
else:
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
if not os.path.exists(args.file_path):
|
||||
logger.warning('%s does not exist', args.file_path)
|
||||
return -1
|
||||
|
||||
if os.path.isfile(args.file_path):
|
||||
call_ida_dump_script(args.file_path, args.reprocess)
|
||||
return 0
|
||||
|
||||
errors = 0
|
||||
|
||||
logger.info('processing files in %s with file extension %s', args.file_path, '|'.join(SUPPORTED_EXTENSIONS))
|
||||
for root, dirs, files in os.walk(args.file_path):
|
||||
for file in files:
|
||||
if not os.path.splitext(file)[1] in SUPPORTED_EXTENSIONS:
|
||||
logger.debug('%s does not have supported file extension', file)
|
||||
continue
|
||||
path = os.path.join(root, file)
|
||||
if not call_ida_dump_script(path, args.reprocess):
|
||||
errors += 1
|
||||
|
||||
if errors:
|
||||
logger.warning('encountered %d errors', errors)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
135
scripts/testbed/start_ida_export_fimages.py
Normal file
135
scripts/testbed/start_ida_export_fimages.py
Normal file
@@ -0,0 +1,135 @@
|
||||
'''
|
||||
Start IDA Pro in autonomous mode to export images of function graphs.
|
||||
|
||||
Example usage:
|
||||
start_ida_export_fimages.py <target file> <output dir> -f <function list>
|
||||
start_ida_export_fimages.py test.exe imgs -f 0x401000,0x402F90
|
||||
'''
|
||||
|
||||
import os
|
||||
import imp
|
||||
import sys
|
||||
import hashlib
|
||||
import logging
|
||||
import subprocess
|
||||
|
||||
import argparse
|
||||
|
||||
try:
|
||||
imp.find_module('graphviz')
|
||||
from graphviz import Source
|
||||
graphviz_found = True
|
||||
except ImportError:
|
||||
graphviz_found = False
|
||||
|
||||
|
||||
IDA32_PATH = 'C:\\Program Files\\IDA Pro 7.3\\ida.exe'
|
||||
IDA64_PATH = 'C:\\Program Files\\IDA Pro 7.3\\ida64.exe'
|
||||
|
||||
# expected in same directory as this file
|
||||
EXPORT_SCRIPT_PATH = os.path.abspath('_export_fimages.py')
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def export_fimages(file_path, out_dir, functions, manual=False):
|
||||
'''
|
||||
Export images of function graphs.
|
||||
:param file_path: file to analyze
|
||||
:param out_dir: output directory
|
||||
:param functions: list of strings of hex formatted fvas
|
||||
:param manual: non-autonomous mode
|
||||
:return: True on success, False otherwise
|
||||
'''
|
||||
if not graphviz_found:
|
||||
logger.warning('please install graphviz to export images')
|
||||
return False
|
||||
|
||||
if not os.path.exists(out_dir):
|
||||
os.mkdir(out_dir)
|
||||
|
||||
script_args = [os.path.abspath(out_dir)] + functions
|
||||
call_ida_script(EXPORT_SCRIPT_PATH, script_args, file_path, manual)
|
||||
|
||||
img_count = 0
|
||||
for root, dirs, files in os.walk(out_dir):
|
||||
for file in files:
|
||||
if not file.endswith('.dot'):
|
||||
continue
|
||||
try:
|
||||
s = Source.from_file(file, directory=out_dir)
|
||||
s.render(file, directory=out_dir, format='png', cleanup=True)
|
||||
img_count += 1
|
||||
except BaseException:
|
||||
logger.warning('graphviz error rendering file')
|
||||
if img_count > 0:
|
||||
logger.info('exported %d function graph images to "%s"', img_count, os.path.abspath(out_dir))
|
||||
return True
|
||||
else:
|
||||
logger.warning('failed to export function graph images')
|
||||
return False
|
||||
|
||||
|
||||
def call_ida_script(script_path, script_args, sample_path, manual):
|
||||
logger.info('processing %s (MD5: %s)', sample_path, get_md5_hexdigest(sample_path))
|
||||
|
||||
# TODO detect 64-bit binaries
|
||||
if os.path.splitext(sample_path)[-1] == '.i64':
|
||||
IDA_PATH = IDA64_PATH
|
||||
else:
|
||||
IDA_PATH = IDA32_PATH
|
||||
|
||||
args = [IDA_PATH, '-A', '-S%s %s' % (script_path, ' '.join(script_args)), sample_path]
|
||||
|
||||
if manual:
|
||||
args.remove('-A')
|
||||
|
||||
logger.debug('calling "%s"' % ' '.join(args))
|
||||
if subprocess.call(args) == 0:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def get_md5_hexdigest(sample_path):
|
||||
m = hashlib.md5()
|
||||
with open(sample_path, 'rb') as f:
|
||||
m.update(f.read())
|
||||
return m.hexdigest()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Launch IDA Pro in autonomous mode to export images of function graphs")
|
||||
parser.add_argument("file_path", type=str,
|
||||
help="File to export from")
|
||||
parser.add_argument("out_dir", type=str,
|
||||
help="Export target directory")
|
||||
parser.add_argument("-f", "--functions", action="store",
|
||||
help="Comma separated list of functions to export")
|
||||
parser.add_argument("-m", "--manual", action="store_true",
|
||||
help="Manual mode: show IDA dialog boxes")
|
||||
parser.add_argument("-v", "--verbose", action="store_true",
|
||||
help="Enable verbose output")
|
||||
args = parser.parse_args(args=sys.argv[1:])
|
||||
|
||||
if args.verbose:
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
else:
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
if not os.path.isfile(args.file_path):
|
||||
logger.warning('%s is not a file', args.file_path)
|
||||
return -1
|
||||
|
||||
functions = args.functions.split(',')
|
||||
export_fimages(args.file_path, args.out_dir, functions, args.manual)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user