diff --git a/capa/features/extractors/ida/extractor.py b/capa/features/extractors/ida/extractor.py index e91d803f..05e45f57 100644 --- a/capa/features/extractors/ida/extractor.py +++ b/capa/features/extractors/ida/extractor.py @@ -16,23 +16,10 @@ import capa.features.extractors.ida.file import capa.features.extractors.ida.insn import capa.features.extractors.ida.function import capa.features.extractors.ida.basicblock -from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, Format +from capa.features.common import OS, OS_WINDOWS from capa.features.extractors.base_extractor import FeatureExtractor -def extract_format(): - format_name = ida_loader.get_file_type_name() - - if "PE" in format_name: - yield Format(FORMAT_PE), 0x0 - elif "ELF64" in format_name: - yield Format(FORMAT_ELF), 0x0 - elif "ELF32" in format_name: - yield Format(FORMAT_ELF), 0x0 - else: - raise NotImplementedError("file format: %s", format_name) - - def extract_os(): format_name = ida_loader.get_file_type_name() diff --git a/capa/features/extractors/ida/file.py b/capa/features/extractors/ida/file.py index d3f6a101..6531c6ad 100644 --- a/capa/features/extractors/ida/file.py +++ b/capa/features/extractors/ida/file.py @@ -11,12 +11,14 @@ import struct import idc import idaapi import idautils +import ida_loader import capa.features.extractors.helpers import capa.features.extractors.strings import capa.features.extractors.ida.helpers from capa.features.file import Export, Import, Section, FunctionName from capa.features.common import String, Characteristic +from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, Format def check_segment_for_pe(seg): @@ -153,6 +155,19 @@ def extract_file_function_names(): yield FunctionName(name), ea +def extract_file_format(): + format_name = ida_loader.get_file_type_name() + + if "PE" in format_name: + yield Format(FORMAT_PE), 0x0 + elif "ELF64" in format_name: + yield Format(FORMAT_ELF), 0x0 + elif "ELF32" in format_name: + yield Format(FORMAT_ELF), 0x0 + else: + raise NotImplementedError("file format: %s", format_name) + + def extract_features(): """extract file features""" for file_handler in FILE_HANDLERS: @@ -167,6 +182,7 @@ FILE_HANDLERS = ( extract_file_section_names, extract_file_embedded_pe, extract_file_function_names, + extract_file_format, ) diff --git a/capa/features/extractors/pefile.py b/capa/features/extractors/pefile.py index 28ee3797..a8bcd8bc 100644 --- a/capa/features/extractors/pefile.py +++ b/capa/features/extractors/pefile.py @@ -117,7 +117,7 @@ def extract_os(pe, file_path): yield OS(OS_WINDOWS), 0x0 -def extract_format(pe, file_path): +def extract_file_format(pe, file_path): yield Format(FORMAT_PE), 0x0 @@ -146,7 +146,7 @@ FILE_HANDLERS = ( extract_file_strings, extract_file_function_names, extract_os, - extract_format, + extract_file_format, ) diff --git a/capa/features/extractors/smda/extractor.py b/capa/features/extractors/smda/extractor.py index 2f7443aa..59f64a06 100644 --- a/capa/features/extractors/smda/extractor.py +++ b/capa/features/extractors/smda/extractor.py @@ -18,7 +18,6 @@ class SmdaFeatureExtractor(FeatureExtractor): self.global_features = [] self.global_features.extend(capa.features.extractors.common.extract_os(self.buf)) - self.global_features.extend(capa.features.extractors.common.extract_format(self.buf)) def get_base_address(self): return self.smda_report.base_addr diff --git a/capa/features/extractors/smda/file.py b/capa/features/extractors/smda/file.py index 5250e26b..0be6aa0a 100644 --- a/capa/features/extractors/smda/file.py +++ b/capa/features/extractors/smda/file.py @@ -1,6 +1,7 @@ # if we have SMDA we definitely have lief import lief +import capa.features.extractors.common import capa.features.extractors.helpers import capa.features.extractors.strings from capa.features.file import Export, Import, Section @@ -74,6 +75,11 @@ def extract_file_function_names(smda_report, file_path): return +def extract_file_format(smda_report, file_path): + with open(file_path, "rb") as f: + yield from capa.features.extractors.common.extract_format(f.read()) + + def extract_features(smda_report, file_path): """ extract file features from given workspace @@ -98,4 +104,5 @@ FILE_HANDLERS = ( extract_file_section_names, extract_file_strings, extract_file_function_names, + extract_file_format, ) diff --git a/capa/features/extractors/viv/extractor.py b/capa/features/extractors/viv/extractor.py index f6960390..b922d5dc 100644 --- a/capa/features/extractors/viv/extractor.py +++ b/capa/features/extractors/viv/extractor.py @@ -43,7 +43,6 @@ class VivisectFeatureExtractor(FeatureExtractor): self.global_features = [] self.global_features.extend(capa.features.extractors.common.extract_os(self.buf)) - self.global_features.extend(capa.features.extractors.common.extract_format(self.buf)) def get_base_address(self): # assume there is only one file loaded into the vw diff --git a/capa/features/extractors/viv/file.py b/capa/features/extractors/viv/file.py index 9ec9d672..c92e124e 100644 --- a/capa/features/extractors/viv/file.py +++ b/capa/features/extractors/viv/file.py @@ -11,6 +11,7 @@ import viv_utils import viv_utils.flirt import capa.features.insn +import capa.features.extractors.common import capa.features.extractors.helpers import capa.features.extractors.strings from capa.features.file import Export, Import, Section, FunctionName @@ -87,6 +88,10 @@ def extract_file_function_names(vw, buf): yield FunctionName(name), va +def extract_file_format(vw, buf): + yield from capa.features.extractors.common.extract_format(buf) + + def extract_features(vw, buf: bytes): """ extract file features from given workspace @@ -111,4 +116,5 @@ FILE_HANDLERS = ( extract_file_section_names, extract_file_strings, extract_file_function_names, + extract_file_format, )