mirror of
https://github.com/mandiant/capa.git
synced 2026-02-04 11:07:53 -08:00
extractors: add required loc_db
Since the following PR, miasm requires LocationDB in the object's constructor instead of creating a new LocationDB: https://github.com/cea-sec/miasm/pull/1274 This was not the case at the point I started the miasm backend implementation. Adapt the code to work with this change, which also means interacting with miasm in a better way.
This commit is contained in:
@@ -8,6 +8,7 @@
|
||||
|
||||
import miasm.analysis.binary
|
||||
import miasm.analysis.machine
|
||||
from miasm.core.locationdb import LocationDB
|
||||
|
||||
import capa.features.extractors.miasm.file
|
||||
import capa.features.extractors.miasm.insn
|
||||
@@ -20,7 +21,8 @@ class MiasmFeatureExtractor(FeatureExtractor):
|
||||
def __init__(self, buf):
|
||||
super(MiasmFeatureExtractor, self).__init__()
|
||||
self.buf = buf
|
||||
self.container = miasm.analysis.binary.Container.from_string(buf)
|
||||
self.loc_db = LocationDB()
|
||||
self.container = miasm.analysis.binary.Container.from_string(buf, self.loc_db)
|
||||
self.pe = self.container.executable
|
||||
self.machine = miasm.analysis.machine.Machine(self.container.arch)
|
||||
self.cfg = self._build_cfg()
|
||||
@@ -29,7 +31,7 @@ class MiasmFeatureExtractor(FeatureExtractor):
|
||||
return self.container.entry_point
|
||||
|
||||
def extract_file_features(self):
|
||||
for feature, va in capa.features.extractors.miasm.file.extract_file_features(self.buf, self.pe):
|
||||
for feature, va in capa.features.extractors.miasm.file.extract_file_features(self):
|
||||
yield feature, va
|
||||
|
||||
# TODO: Improve this function (it just considers all loc_keys target of calls a function), port to miasm
|
||||
@@ -62,7 +64,7 @@ class MiasmFeatureExtractor(FeatureExtractor):
|
||||
get the basic blocks of the function represented by lock_key
|
||||
"""
|
||||
block = self.cfg.loc_key_to_block(loc_key)
|
||||
disassembler = self.machine.dis_engine(self.container.bin_stream, follow_call=False)
|
||||
disassembler = self.machine.dis_engine(self.container.bin_stream, loc_db=self.loc_db, follow_call=False)
|
||||
cfg = disassembler.dis_multiblock(self.block_offset(block))
|
||||
return cfg.blocks
|
||||
|
||||
|
||||
@@ -15,28 +15,29 @@ from capa.features import String, Characteristic
|
||||
from capa.features.file import Export, Import, Section
|
||||
|
||||
|
||||
def extract_file_embedded_pe(buf, _):
|
||||
def extract_file_embedded_pe(extractor):
|
||||
"""
|
||||
extract embedded PE features
|
||||
"""
|
||||
buf = extractor.buf
|
||||
for match in re.finditer(b"MZ", buf):
|
||||
offset = match.start()
|
||||
subcontainer = miasm.analysis.binary.ContainerPE.from_string(buf[offset:])
|
||||
subcontainer = miasm.analysis.binary.ContainerPE.from_string(buf[offset:], loc_db=extractor.loc_db)
|
||||
if isinstance(subcontainer, miasm.analysis.binary.ContainerPE):
|
||||
yield Characteristic("embedded pe"), offset
|
||||
|
||||
|
||||
def extract_file_export_names(_, pe):
|
||||
def extract_file_export_names(extractor):
|
||||
"""
|
||||
extract file exports and their addresses
|
||||
"""
|
||||
for symbol, va in miasm.jitter.loader.pe.get_export_name_addr_list(pe):
|
||||
for symbol, va in miasm.jitter.loader.pe.get_export_name_addr_list(extractor.pe):
|
||||
# Only use func names and not ordinals
|
||||
if isinstance(symbol, str):
|
||||
yield Export(symbol), va
|
||||
|
||||
|
||||
def extract_file_import_names(_, pe):
|
||||
def extract_file_import_names(extractor):
|
||||
"""
|
||||
extract imported function names and their addresses
|
||||
1. imports by ordinal:
|
||||
@@ -45,7 +46,7 @@ def extract_file_import_names(_, pe):
|
||||
- modulename.importname
|
||||
- importname
|
||||
"""
|
||||
for ((dll, symbol), va_set) in miasm.jitter.loader.pe.get_import_address_pe(pe).items():
|
||||
for ((dll, symbol), va_set) in miasm.jitter.loader.pe.get_import_address_pe(extractor.pe).items():
|
||||
dll_name = dll[:-4] # Remove .dll
|
||||
for va in va_set:
|
||||
if isinstance(symbol, int):
|
||||
@@ -55,28 +56,28 @@ def extract_file_import_names(_, pe):
|
||||
yield Import(symbol), va
|
||||
|
||||
|
||||
def extract_file_section_names(_, pe):
|
||||
def extract_file_section_names(extractor):
|
||||
"""
|
||||
extract file sections and their addresses
|
||||
"""
|
||||
for section in pe.SHList.shlist:
|
||||
for section in extractor.pe.SHList.shlist:
|
||||
name = section.name.partition(b"\x00")[0].decode("ascii")
|
||||
va = section.addr
|
||||
yield Section(name), va
|
||||
|
||||
|
||||
def extract_file_strings(buf, _):
|
||||
def extract_file_strings(extractor):
|
||||
"""
|
||||
extract ASCII and UTF-16 LE strings from file
|
||||
"""
|
||||
for s in capa.features.extractors.strings.extract_ascii_strings(buf):
|
||||
for s in capa.features.extractors.strings.extract_ascii_strings(extractor.buf):
|
||||
yield String(s.s), s.offset
|
||||
|
||||
for s in capa.features.extractors.strings.extract_unicode_strings(buf):
|
||||
for s in capa.features.extractors.strings.extract_unicode_strings(extractor.buf):
|
||||
yield String(s.s), s.offset
|
||||
|
||||
|
||||
def extract_file_features(buf, pe):
|
||||
def extract_file_features(extractor):
|
||||
"""
|
||||
extract file features from given buffer and parsed binary
|
||||
|
||||
@@ -88,7 +89,7 @@ def extract_file_features(buf, pe):
|
||||
Tuple[Feature, VA]: a feature and its location.
|
||||
"""
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, va in file_handler(buf, pe):
|
||||
for feature, va in file_handler(extractor):
|
||||
yield feature, va
|
||||
|
||||
|
||||
|
||||
@@ -24,7 +24,9 @@ def extract_function_loop(extractor, loc_key):
|
||||
returns if the function has a loop
|
||||
"""
|
||||
block = extractor.cfg.loc_key_to_block(loc_key)
|
||||
disassembler = extractor.machine.dis_engine(extractor.container.bin_stream, follow_call=False)
|
||||
disassembler = extractor.machine.dis_engine(
|
||||
extractor.container.bin_stream, loc_db=extractor.loc_db, follow_call=False
|
||||
)
|
||||
offset = extractor.block_offset(block)
|
||||
cfg = disassembler.dis_multiblock(offset)
|
||||
if cfg.has_loop():
|
||||
|
||||
Reference in New Issue
Block a user