features: move Format features to file scope

This commit is contained in:
William Ballenthin
2021-08-16 16:37:04 -06:00
parent ab1326f858
commit 5405e182c3
7 changed files with 32 additions and 18 deletions

View File

@@ -16,23 +16,10 @@ import capa.features.extractors.ida.file
import capa.features.extractors.ida.insn
import capa.features.extractors.ida.function
import capa.features.extractors.ida.basicblock
from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, Format
from capa.features.common import OS, OS_WINDOWS
from capa.features.extractors.base_extractor import FeatureExtractor
def extract_format():
format_name = ida_loader.get_file_type_name()
if "PE" in format_name:
yield Format(FORMAT_PE), 0x0
elif "ELF64" in format_name:
yield Format(FORMAT_ELF), 0x0
elif "ELF32" in format_name:
yield Format(FORMAT_ELF), 0x0
else:
raise NotImplementedError("file format: %s", format_name)
def extract_os():
format_name = ida_loader.get_file_type_name()

View File

@@ -11,12 +11,14 @@ import struct
import idc
import idaapi
import idautils
import ida_loader
import capa.features.extractors.helpers
import capa.features.extractors.strings
import capa.features.extractors.ida.helpers
from capa.features.file import Export, Import, Section, FunctionName
from capa.features.common import String, Characteristic
from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, Format
def check_segment_for_pe(seg):
@@ -153,6 +155,19 @@ def extract_file_function_names():
yield FunctionName(name), ea
def extract_file_format():
format_name = ida_loader.get_file_type_name()
if "PE" in format_name:
yield Format(FORMAT_PE), 0x0
elif "ELF64" in format_name:
yield Format(FORMAT_ELF), 0x0
elif "ELF32" in format_name:
yield Format(FORMAT_ELF), 0x0
else:
raise NotImplementedError("file format: %s", format_name)
def extract_features():
"""extract file features"""
for file_handler in FILE_HANDLERS:
@@ -167,6 +182,7 @@ FILE_HANDLERS = (
extract_file_section_names,
extract_file_embedded_pe,
extract_file_function_names,
extract_file_format,
)

View File

@@ -117,7 +117,7 @@ def extract_os(pe, file_path):
yield OS(OS_WINDOWS), 0x0
def extract_format(pe, file_path):
def extract_file_format(pe, file_path):
yield Format(FORMAT_PE), 0x0
@@ -146,7 +146,7 @@ FILE_HANDLERS = (
extract_file_strings,
extract_file_function_names,
extract_os,
extract_format,
extract_file_format,
)

View File

@@ -18,7 +18,6 @@ class SmdaFeatureExtractor(FeatureExtractor):
self.global_features = []
self.global_features.extend(capa.features.extractors.common.extract_os(self.buf))
self.global_features.extend(capa.features.extractors.common.extract_format(self.buf))
def get_base_address(self):
return self.smda_report.base_addr

View File

@@ -1,6 +1,7 @@
# if we have SMDA we definitely have lief
import lief
import capa.features.extractors.common
import capa.features.extractors.helpers
import capa.features.extractors.strings
from capa.features.file import Export, Import, Section
@@ -74,6 +75,11 @@ def extract_file_function_names(smda_report, file_path):
return
def extract_file_format(smda_report, file_path):
with open(file_path, "rb") as f:
yield from capa.features.extractors.common.extract_format(f.read())
def extract_features(smda_report, file_path):
"""
extract file features from given workspace
@@ -98,4 +104,5 @@ FILE_HANDLERS = (
extract_file_section_names,
extract_file_strings,
extract_file_function_names,
extract_file_format,
)

View File

@@ -43,7 +43,6 @@ class VivisectFeatureExtractor(FeatureExtractor):
self.global_features = []
self.global_features.extend(capa.features.extractors.common.extract_os(self.buf))
self.global_features.extend(capa.features.extractors.common.extract_format(self.buf))
def get_base_address(self):
# assume there is only one file loaded into the vw

View File

@@ -11,6 +11,7 @@ import viv_utils
import viv_utils.flirt
import capa.features.insn
import capa.features.extractors.common
import capa.features.extractors.helpers
import capa.features.extractors.strings
from capa.features.file import Export, Import, Section, FunctionName
@@ -87,6 +88,10 @@ def extract_file_function_names(vw, buf):
yield FunctionName(name), va
def extract_file_format(vw, buf):
yield from capa.features.extractors.common.extract_format(buf)
def extract_features(vw, buf: bytes):
"""
extract file features from given workspace
@@ -111,4 +116,5 @@ FILE_HANDLERS = (
extract_file_section_names,
extract_file_strings,
extract_file_function_names,
extract_file_format,
)