mirror of
https://github.com/mandiant/capa.git
synced 2025-12-23 07:28:34 -08:00
Get rid of `True` in characteristic (rules, output and json) as it is implicit. This way, the same syntax is used for characteristic as for the rest of the features. Co-authored-by: William Ballenthin <william.ballenthin@fireeye.com>
103 lines
2.7 KiB
Python
103 lines
2.7 KiB
Python
import PE.carve as pe_carve # vivisect PE
|
|
|
|
from capa.features import Characteristic
|
|
from capa.features.file import Export
|
|
from capa.features.file import Import
|
|
from capa.features.file import Section
|
|
from capa.features import String
|
|
import capa.features.extractors.strings
|
|
|
|
|
|
def extract_file_embedded_pe(vw, file_path):
|
|
with open(file_path, 'rb') as f:
|
|
fbytes = f.read()
|
|
|
|
for offset, i in pe_carve.carve(fbytes, 1):
|
|
yield Characteristic('embedded pe'), offset
|
|
|
|
|
|
def extract_file_export_names(vw, file_path):
|
|
for va, etype, name, _ in vw.getExports():
|
|
yield Export(name), va
|
|
|
|
|
|
def extract_file_import_names(vw, file_path):
|
|
'''
|
|
extract imported function names
|
|
1. imports by ordinal:
|
|
- modulename.#ordinal
|
|
2. imports by name, results in two features to support importname-only matching:
|
|
- modulename.importname
|
|
- importname
|
|
'''
|
|
for va, _, _, tinfo in vw.getImports():
|
|
# vivisect source: tinfo = "%s.%s" % (libname, impname)
|
|
modname, impname = tinfo.split('.')
|
|
if is_viv_ord_impname(impname):
|
|
# replace ord prefix with #
|
|
impname = '#%s' % impname[len('ord'):]
|
|
tinfo = '%s.%s' % (modname, impname)
|
|
yield Import(tinfo), va
|
|
else:
|
|
yield Import(tinfo), va
|
|
yield Import(impname), va
|
|
|
|
|
|
def is_viv_ord_impname(impname):
|
|
'''
|
|
return if import name matches vivisect's ordinal naming scheme `'ord%d' % ord`
|
|
'''
|
|
if not impname.startswith('ord'):
|
|
return False
|
|
try:
|
|
int(impname[len('ord'):])
|
|
except ValueError:
|
|
return False
|
|
else:
|
|
return True
|
|
|
|
|
|
def extract_file_section_names(vw, file_path):
|
|
for va, _, segname, _ in vw.getSegments():
|
|
yield Section(segname), va
|
|
|
|
|
|
def extract_file_strings(vw, file_path):
|
|
'''
|
|
extract ASCII and UTF-16 LE strings from file
|
|
'''
|
|
with open(file_path, 'rb') as f:
|
|
b = f.read()
|
|
|
|
for s in capa.features.extractors.strings.extract_ascii_strings(b):
|
|
yield String(s.s), s.offset
|
|
|
|
for s in capa.features.extractors.strings.extract_unicode_strings(b):
|
|
yield String(s.s), s.offset
|
|
|
|
|
|
def extract_features(vw, file_path):
|
|
'''
|
|
extract file features from given workspace
|
|
|
|
args:
|
|
vw (vivisect.VivWorkspace): the vivisect workspace
|
|
file_path: path to the input file
|
|
|
|
yields:
|
|
Tuple[Feature, VA]: a feature and its location.
|
|
'''
|
|
|
|
for file_handler in FILE_HANDLERS:
|
|
for feature, va in file_handler(vw, file_path):
|
|
yield feature, va
|
|
|
|
|
|
FILE_HANDLERS = (
|
|
extract_file_embedded_pe,
|
|
extract_file_export_names,
|
|
extract_file_import_names,
|
|
extract_file_section_names,
|
|
extract_file_strings,
|
|
)
|