diff --git a/capa/features/extractors/miasm/__init__.py b/capa/features/extractors/miasm/__init__.py index 54235afa..c589a143 100644 --- a/capa/features/extractors/miasm/__init__.py +++ b/capa/features/extractors/miasm/__init__.py @@ -8,6 +8,7 @@ import miasm.analysis.binary import miasm.analysis.machine +from miasm.core.locationdb import LocationDB import capa.features.extractors.miasm.file import capa.features.extractors.miasm.insn @@ -20,7 +21,8 @@ class MiasmFeatureExtractor(FeatureExtractor): def __init__(self, buf): super(MiasmFeatureExtractor, self).__init__() self.buf = buf - self.container = miasm.analysis.binary.Container.from_string(buf) + self.loc_db = LocationDB() + self.container = miasm.analysis.binary.Container.from_string(buf, self.loc_db) self.pe = self.container.executable self.machine = miasm.analysis.machine.Machine(self.container.arch) self.cfg = self._build_cfg() @@ -29,7 +31,7 @@ class MiasmFeatureExtractor(FeatureExtractor): return self.container.entry_point def extract_file_features(self): - for feature, va in capa.features.extractors.miasm.file.extract_file_features(self.buf, self.pe): + for feature, va in capa.features.extractors.miasm.file.extract_file_features(self): yield feature, va # TODO: Improve this function (it just considers all loc_keys target of calls a function), port to miasm @@ -62,7 +64,7 @@ class MiasmFeatureExtractor(FeatureExtractor): get the basic blocks of the function represented by lock_key """ block = self.cfg.loc_key_to_block(loc_key) - disassembler = self.machine.dis_engine(self.container.bin_stream, follow_call=False) + disassembler = self.machine.dis_engine(self.container.bin_stream, loc_db=self.loc_db, follow_call=False) cfg = disassembler.dis_multiblock(self.block_offset(block)) return cfg.blocks diff --git a/capa/features/extractors/miasm/file.py b/capa/features/extractors/miasm/file.py index aeaec76d..a2066cf9 100644 --- a/capa/features/extractors/miasm/file.py +++ b/capa/features/extractors/miasm/file.py @@ -15,28 +15,29 @@ from capa.features import String, Characteristic from capa.features.file import Export, Import, Section -def extract_file_embedded_pe(buf, _): +def extract_file_embedded_pe(extractor): """ extract embedded PE features """ + buf = extractor.buf for match in re.finditer(b"MZ", buf): offset = match.start() - subcontainer = miasm.analysis.binary.ContainerPE.from_string(buf[offset:]) + subcontainer = miasm.analysis.binary.ContainerPE.from_string(buf[offset:], loc_db=extractor.loc_db) if isinstance(subcontainer, miasm.analysis.binary.ContainerPE): yield Characteristic("embedded pe"), offset -def extract_file_export_names(_, pe): +def extract_file_export_names(extractor): """ extract file exports and their addresses """ - for symbol, va in miasm.jitter.loader.pe.get_export_name_addr_list(pe): + for symbol, va in miasm.jitter.loader.pe.get_export_name_addr_list(extractor.pe): # Only use func names and not ordinals if isinstance(symbol, str): yield Export(symbol), va -def extract_file_import_names(_, pe): +def extract_file_import_names(extractor): """ extract imported function names and their addresses 1. imports by ordinal: @@ -45,7 +46,7 @@ def extract_file_import_names(_, pe): - modulename.importname - importname """ - for ((dll, symbol), va_set) in miasm.jitter.loader.pe.get_import_address_pe(pe).items(): + for ((dll, symbol), va_set) in miasm.jitter.loader.pe.get_import_address_pe(extractor.pe).items(): dll_name = dll[:-4] # Remove .dll for va in va_set: if isinstance(symbol, int): @@ -55,28 +56,28 @@ def extract_file_import_names(_, pe): yield Import(symbol), va -def extract_file_section_names(_, pe): +def extract_file_section_names(extractor): """ extract file sections and their addresses """ - for section in pe.SHList.shlist: + for section in extractor.pe.SHList.shlist: name = section.name.partition(b"\x00")[0].decode("ascii") va = section.addr yield Section(name), va -def extract_file_strings(buf, _): +def extract_file_strings(extractor): """ extract ASCII and UTF-16 LE strings from file """ - for s in capa.features.extractors.strings.extract_ascii_strings(buf): + for s in capa.features.extractors.strings.extract_ascii_strings(extractor.buf): yield String(s.s), s.offset - for s in capa.features.extractors.strings.extract_unicode_strings(buf): + for s in capa.features.extractors.strings.extract_unicode_strings(extractor.buf): yield String(s.s), s.offset -def extract_file_features(buf, pe): +def extract_file_features(extractor): """ extract file features from given buffer and parsed binary @@ -88,7 +89,7 @@ def extract_file_features(buf, pe): Tuple[Feature, VA]: a feature and its location. """ for file_handler in FILE_HANDLERS: - for feature, va in file_handler(buf, pe): + for feature, va in file_handler(extractor): yield feature, va diff --git a/capa/features/extractors/miasm/function.py b/capa/features/extractors/miasm/function.py index 922deb7e..4ee56ccc 100644 --- a/capa/features/extractors/miasm/function.py +++ b/capa/features/extractors/miasm/function.py @@ -24,7 +24,9 @@ def extract_function_loop(extractor, loc_key): returns if the function has a loop """ block = extractor.cfg.loc_key_to_block(loc_key) - disassembler = extractor.machine.dis_engine(extractor.container.bin_stream, follow_call=False) + disassembler = extractor.machine.dis_engine( + extractor.container.bin_stream, loc_db=extractor.loc_db, follow_call=False + ) offset = extractor.block_offset(block) cfg = disassembler.dis_multiblock(offset) if cfg.has_loop():