extractors: add required loc_db

Since the following PR, miasm requires LocationDB in the object's
constructor instead of creating a new LocationDB:
https://github.com/cea-sec/miasm/pull/1274

This was not the case at the point I started the miasm backend
implementation. Adapt the code to work with this change, which also
means interacting with miasm in a better way.
This commit is contained in:
Ana Maria Martinez Gomez
2021-02-05 15:26:17 +01:00
parent 2e98ba990c
commit dc828e82b3
3 changed files with 22 additions and 17 deletions

View File

@@ -8,6 +8,7 @@
import miasm.analysis.binary
import miasm.analysis.machine
from miasm.core.locationdb import LocationDB
import capa.features.extractors.miasm.file
import capa.features.extractors.miasm.insn
@@ -20,7 +21,8 @@ class MiasmFeatureExtractor(FeatureExtractor):
def __init__(self, buf):
super(MiasmFeatureExtractor, self).__init__()
self.buf = buf
self.container = miasm.analysis.binary.Container.from_string(buf)
self.loc_db = LocationDB()
self.container = miasm.analysis.binary.Container.from_string(buf, self.loc_db)
self.pe = self.container.executable
self.machine = miasm.analysis.machine.Machine(self.container.arch)
self.cfg = self._build_cfg()
@@ -29,7 +31,7 @@ class MiasmFeatureExtractor(FeatureExtractor):
return self.container.entry_point
def extract_file_features(self):
for feature, va in capa.features.extractors.miasm.file.extract_file_features(self.buf, self.pe):
for feature, va in capa.features.extractors.miasm.file.extract_file_features(self):
yield feature, va
# TODO: Improve this function (it just considers all loc_keys target of calls a function), port to miasm
@@ -62,7 +64,7 @@ class MiasmFeatureExtractor(FeatureExtractor):
get the basic blocks of the function represented by lock_key
"""
block = self.cfg.loc_key_to_block(loc_key)
disassembler = self.machine.dis_engine(self.container.bin_stream, follow_call=False)
disassembler = self.machine.dis_engine(self.container.bin_stream, loc_db=self.loc_db, follow_call=False)
cfg = disassembler.dis_multiblock(self.block_offset(block))
return cfg.blocks

View File

@@ -15,28 +15,29 @@ from capa.features import String, Characteristic
from capa.features.file import Export, Import, Section
def extract_file_embedded_pe(buf, _):
def extract_file_embedded_pe(extractor):
"""
extract embedded PE features
"""
buf = extractor.buf
for match in re.finditer(b"MZ", buf):
offset = match.start()
subcontainer = miasm.analysis.binary.ContainerPE.from_string(buf[offset:])
subcontainer = miasm.analysis.binary.ContainerPE.from_string(buf[offset:], loc_db=extractor.loc_db)
if isinstance(subcontainer, miasm.analysis.binary.ContainerPE):
yield Characteristic("embedded pe"), offset
def extract_file_export_names(_, pe):
def extract_file_export_names(extractor):
"""
extract file exports and their addresses
"""
for symbol, va in miasm.jitter.loader.pe.get_export_name_addr_list(pe):
for symbol, va in miasm.jitter.loader.pe.get_export_name_addr_list(extractor.pe):
# Only use func names and not ordinals
if isinstance(symbol, str):
yield Export(symbol), va
def extract_file_import_names(_, pe):
def extract_file_import_names(extractor):
"""
extract imported function names and their addresses
1. imports by ordinal:
@@ -45,7 +46,7 @@ def extract_file_import_names(_, pe):
- modulename.importname
- importname
"""
for ((dll, symbol), va_set) in miasm.jitter.loader.pe.get_import_address_pe(pe).items():
for ((dll, symbol), va_set) in miasm.jitter.loader.pe.get_import_address_pe(extractor.pe).items():
dll_name = dll[:-4] # Remove .dll
for va in va_set:
if isinstance(symbol, int):
@@ -55,28 +56,28 @@ def extract_file_import_names(_, pe):
yield Import(symbol), va
def extract_file_section_names(_, pe):
def extract_file_section_names(extractor):
"""
extract file sections and their addresses
"""
for section in pe.SHList.shlist:
for section in extractor.pe.SHList.shlist:
name = section.name.partition(b"\x00")[0].decode("ascii")
va = section.addr
yield Section(name), va
def extract_file_strings(buf, _):
def extract_file_strings(extractor):
"""
extract ASCII and UTF-16 LE strings from file
"""
for s in capa.features.extractors.strings.extract_ascii_strings(buf):
for s in capa.features.extractors.strings.extract_ascii_strings(extractor.buf):
yield String(s.s), s.offset
for s in capa.features.extractors.strings.extract_unicode_strings(buf):
for s in capa.features.extractors.strings.extract_unicode_strings(extractor.buf):
yield String(s.s), s.offset
def extract_file_features(buf, pe):
def extract_file_features(extractor):
"""
extract file features from given buffer and parsed binary
@@ -88,7 +89,7 @@ def extract_file_features(buf, pe):
Tuple[Feature, VA]: a feature and its location.
"""
for file_handler in FILE_HANDLERS:
for feature, va in file_handler(buf, pe):
for feature, va in file_handler(extractor):
yield feature, va

View File

@@ -24,7 +24,9 @@ def extract_function_loop(extractor, loc_key):
returns if the function has a loop
"""
block = extractor.cfg.loc_key_to_block(loc_key)
disassembler = extractor.machine.dis_engine(extractor.container.bin_stream, follow_call=False)
disassembler = extractor.machine.dis_engine(
extractor.container.bin_stream, loc_db=extractor.loc_db, follow_call=False
)
offset = extractor.block_offset(block)
cfg = disassembler.dis_multiblock(offset)
if cfg.has_loop():