extractors: viv: extract format and OS at all scopes

This commit is contained in:
William Ballenthin
2021-08-11 14:44:41 -06:00
parent fa8b4a4203
commit 294f74b209
3 changed files with 59 additions and 0 deletions

View File

@@ -0,0 +1,31 @@
import io
import logging
import binascii
import contextlib
import capa.features.extractors.elf
from capa.features.common import CHARACTERISTIC_PE, CHARACTERISTIC_ELF, CHARACTERISTIC_WINDOWS, Characteristic
logger = logging.getLogger(__name__)
def extract_format(buf):
if buf.startswith(b"MZ"):
yield CHARACTERISTIC_PE, 0x0
elif buf.startswith(b"\x7fELF"):
yield CHARACTERISTIC_ELF, 0x0
else:
raise NotImplementedError("file format: %s", binascii.hexlify(buf[:4]).decode("ascii"))
def extract_os(buf):
if buf.startswith(b"MZ"):
yield CHARACTERISTIC_WINDOWS, 0x0
elif buf.startswith(b"\x7fELF"):
with contextlib.closing(io.BytesIO(buf)) as f:
os = capa.features.extractors.elf.detect_elf_os(f)
yield Characteristic("os/%s" % (os.lower())), 0x0
else:
raise NotImplementedError("file format: %s", binascii.hexlify(buf[:4]).decode("ascii"))

View File

@@ -12,6 +12,7 @@ import viv_utils.flirt
import capa.features.extractors.viv.file
import capa.features.extractors.viv.insn
import capa.features.extractors.viv.common
import capa.features.extractors.viv.function
import capa.features.extractors.viv.basicblock
from capa.features.extractors.base_extractor import FeatureExtractor
@@ -40,6 +41,10 @@ class VivisectFeatureExtractor(FeatureExtractor):
with open(self.path, "rb") as f:
self.buf = f.read()
self.global_features = []
self.global_features.extend(capa.features.extractors.viv.common.extract_os(self.buf))
self.global_features.extend(capa.features.extractors.viv.common.extract_format(self.buf))
def get_base_address(self):
# assume there is only one file loaded into the vw
return list(self.vw.filemeta.values())[0]["imagebase"]
@@ -47,6 +52,7 @@ class VivisectFeatureExtractor(FeatureExtractor):
def extract_file_features(self):
for feature, va in capa.features.extractors.viv.file.extract_features(self.vw, self.buf):
yield feature, va
yield from self.global_features
def get_functions(self):
for va in sorted(self.vw.getFunctions()):
@@ -55,6 +61,7 @@ class VivisectFeatureExtractor(FeatureExtractor):
def extract_function_features(self, f):
for feature, va in capa.features.extractors.viv.function.extract_features(f):
yield feature, va
yield from self.global_features
def get_basic_blocks(self, f):
return f.basic_blocks
@@ -62,6 +69,7 @@ class VivisectFeatureExtractor(FeatureExtractor):
def extract_basic_block_features(self, f, bb):
for feature, va in capa.features.extractors.viv.basicblock.extract_features(f, bb):
yield feature, va
yield from self.global_features
def get_instructions(self, f, bb):
for insn in bb.instructions:
@@ -70,6 +78,7 @@ class VivisectFeatureExtractor(FeatureExtractor):
def extract_insn_features(self, f, bb, insn):
for feature, va in capa.features.extractors.viv.insn.extract_features(f, bb, insn):
yield feature, va
yield from self.global_features
def is_library_function(self, va):
return viv_utils.flirt.is_library_function(self.vw, va)

View File

@@ -76,6 +76,7 @@ import capa.engine
import capa.helpers
import capa.features
import capa.features.freeze
import capa.features.common
logger = logging.getLogger("capa.show-features")
@@ -193,6 +194,15 @@ def ida_main():
return 0
def is_global_feature(feature):
if (isinstance(feature, capa.features.common.Characteristic)
and isinstance(feature.value, str)
and (feature.value.startswith("os/")
or feature.value.startswith("format/"))):
return True
return False
def print_features(functions, extractor):
for f in functions:
function_address = int(f)
@@ -203,14 +213,23 @@ def print_features(functions, extractor):
continue
for feature, va in extractor.extract_function_features(f):
if is_global_feature(feature):
continue
print("func: 0x%08x: %s" % (va, feature))
for bb in extractor.get_basic_blocks(f):
for feature, va in extractor.extract_basic_block_features(f, bb):
if is_global_feature(feature):
continue
print("bb : 0x%08x: %s" % (va, feature))
for insn in extractor.get_instructions(f, bb):
for feature, va in extractor.extract_insn_features(f, bb, insn):
if is_global_feature(feature):
continue
try:
print("insn: 0x%08x: %s" % (va, feature))
except UnicodeEncodeError: