From 294f74b209bb83b5b9eac4fcc8003ddbfde87ece Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Wed, 11 Aug 2021 14:44:41 -0600 Subject: [PATCH] extractors: viv: extract format and OS at all scopes --- capa/features/extractors/viv/common.py | 31 +++++++++++++++++++++++ capa/features/extractors/viv/extractor.py | 9 +++++++ scripts/show-features.py | 19 ++++++++++++++ 3 files changed, 59 insertions(+) create mode 100644 capa/features/extractors/viv/common.py diff --git a/capa/features/extractors/viv/common.py b/capa/features/extractors/viv/common.py new file mode 100644 index 00000000..fb47f303 --- /dev/null +++ b/capa/features/extractors/viv/common.py @@ -0,0 +1,31 @@ +import io +import logging +import binascii +import contextlib + +import capa.features.extractors.elf +from capa.features.common import CHARACTERISTIC_PE, CHARACTERISTIC_ELF, CHARACTERISTIC_WINDOWS, Characteristic + + +logger = logging.getLogger(__name__) + + +def extract_format(buf): + if buf.startswith(b"MZ"): + yield CHARACTERISTIC_PE, 0x0 + elif buf.startswith(b"\x7fELF"): + yield CHARACTERISTIC_ELF, 0x0 + else: + raise NotImplementedError("file format: %s", binascii.hexlify(buf[:4]).decode("ascii")) + + +def extract_os(buf): + if buf.startswith(b"MZ"): + yield CHARACTERISTIC_WINDOWS, 0x0 + elif buf.startswith(b"\x7fELF"): + with contextlib.closing(io.BytesIO(buf)) as f: + os = capa.features.extractors.elf.detect_elf_os(f) + + yield Characteristic("os/%s" % (os.lower())), 0x0 + else: + raise NotImplementedError("file format: %s", binascii.hexlify(buf[:4]).decode("ascii")) diff --git a/capa/features/extractors/viv/extractor.py b/capa/features/extractors/viv/extractor.py index 05fec5a3..ae8aa4fb 100644 --- a/capa/features/extractors/viv/extractor.py +++ b/capa/features/extractors/viv/extractor.py @@ -12,6 +12,7 @@ import viv_utils.flirt import capa.features.extractors.viv.file import capa.features.extractors.viv.insn +import capa.features.extractors.viv.common import capa.features.extractors.viv.function import capa.features.extractors.viv.basicblock from capa.features.extractors.base_extractor import FeatureExtractor @@ -40,6 +41,10 @@ class VivisectFeatureExtractor(FeatureExtractor): with open(self.path, "rb") as f: self.buf = f.read() + self.global_features = [] + self.global_features.extend(capa.features.extractors.viv.common.extract_os(self.buf)) + self.global_features.extend(capa.features.extractors.viv.common.extract_format(self.buf)) + def get_base_address(self): # assume there is only one file loaded into the vw return list(self.vw.filemeta.values())[0]["imagebase"] @@ -47,6 +52,7 @@ class VivisectFeatureExtractor(FeatureExtractor): def extract_file_features(self): for feature, va in capa.features.extractors.viv.file.extract_features(self.vw, self.buf): yield feature, va + yield from self.global_features def get_functions(self): for va in sorted(self.vw.getFunctions()): @@ -55,6 +61,7 @@ class VivisectFeatureExtractor(FeatureExtractor): def extract_function_features(self, f): for feature, va in capa.features.extractors.viv.function.extract_features(f): yield feature, va + yield from self.global_features def get_basic_blocks(self, f): return f.basic_blocks @@ -62,6 +69,7 @@ class VivisectFeatureExtractor(FeatureExtractor): def extract_basic_block_features(self, f, bb): for feature, va in capa.features.extractors.viv.basicblock.extract_features(f, bb): yield feature, va + yield from self.global_features def get_instructions(self, f, bb): for insn in bb.instructions: @@ -70,6 +78,7 @@ class VivisectFeatureExtractor(FeatureExtractor): def extract_insn_features(self, f, bb, insn): for feature, va in capa.features.extractors.viv.insn.extract_features(f, bb, insn): yield feature, va + yield from self.global_features def is_library_function(self, va): return viv_utils.flirt.is_library_function(self.vw, va) diff --git a/scripts/show-features.py b/scripts/show-features.py index 3090a471..1d37588c 100644 --- a/scripts/show-features.py +++ b/scripts/show-features.py @@ -76,6 +76,7 @@ import capa.engine import capa.helpers import capa.features import capa.features.freeze +import capa.features.common logger = logging.getLogger("capa.show-features") @@ -193,6 +194,15 @@ def ida_main(): return 0 +def is_global_feature(feature): + if (isinstance(feature, capa.features.common.Characteristic) + and isinstance(feature.value, str) + and (feature.value.startswith("os/") + or feature.value.startswith("format/"))): + return True + return False + + def print_features(functions, extractor): for f in functions: function_address = int(f) @@ -203,14 +213,23 @@ def print_features(functions, extractor): continue for feature, va in extractor.extract_function_features(f): + if is_global_feature(feature): + continue + print("func: 0x%08x: %s" % (va, feature)) for bb in extractor.get_basic_blocks(f): for feature, va in extractor.extract_basic_block_features(f, bb): + if is_global_feature(feature): + continue + print("bb : 0x%08x: %s" % (va, feature)) for insn in extractor.get_instructions(f, bb): for feature, va in extractor.extract_insn_features(f, bb, insn): + if is_global_feature(feature): + continue + try: print("insn: 0x%08x: %s" % (va, feature)) except UnicodeEncodeError: