From 8bb1a1cb5a2b5b2ceee34fa5a3b8653faaffe3df Mon Sep 17 00:00:00 2001
From: Moritz Raabe <moritz.raabe@fireeye.com>
Date: Thu, 23 Sep 2021 19:35:14 +0200
Subject: [PATCH] use viv-utils functions

---
 capa/main.py                 | 108 ++---------------------------------
 scripts/match-function-id.py |   2 +-
 setup.py                     |   2 +-
 3 files changed, 8 insertions(+), 104 deletions(-)

diff --git a/capa/main.py b/capa/main.py
index f67a9c6f..b584be60 100644
--- a/capa/main.py
+++ b/capa/main.py
@@ -10,8 +10,6 @@ See the License for the specific language governing permissions and limitations
 """
 import os
 import sys
-import gzip
-import time
 import hashlib
 import logging
 import os.path
@@ -19,7 +17,6 @@ import argparse
 import datetime
 import textwrap
 import itertools
-import contextlib
 import collections
 from typing import Any, Dict, List, Tuple
 
@@ -58,14 +55,6 @@ EXTENSIONS_SHELLCODE_64 = ("sc64", "raw64")
 logger = logging.getLogger("capa")
 
 
-@contextlib.contextmanager
-def timing(msg: str):
-    t0 = time.time()
-    yield
-    t1 = time.time()
-    logger.debug("perf: %s: %0.2fs", msg, t1 - t0)
-
-
 def set_vivisect_log_level(level):
     logging.getLogger("vivisect").setLevel(level)
     logging.getLogger("vivisect.base").setLevel(level)
@@ -301,40 +290,6 @@ def get_os(sample: str) -> str:
     return "unknown"
 
 
-SHELLCODE_BASE = 0x690000
-
-
-def get_shellcode_vw(sample, arch="auto"):
-    """
-    Return shellcode workspace using explicit arch or via auto detect.
-    The workspace is *not* analyzed nor saved. Its up to the caller to do this.
-    Then, they can register FLIRT analyzers or decide not to write to disk.
-    """
-    import viv_utils
-
-    with open(sample, "rb") as f:
-        sample_bytes = f.read()
-
-    if arch == "auto":
-        # choose arch with most functions, idea by Jay G.
-        vw_cands = []
-        for arch in ["i386", "amd64"]:
-            vw_cands.append(
-                viv_utils.getShellcodeWorkspace(
-                    sample_bytes, arch, base=SHELLCODE_BASE, analyze=False, should_save=False
-                )
-            )
-        if not vw_cands:
-            raise ValueError("could not generate vivisect workspace")
-        vw = max(vw_cands, key=lambda vw: len(vw.getFunctions()))
-    else:
-        vw = viv_utils.getShellcodeWorkspace(sample_bytes, arch, base=SHELLCODE_BASE, analyze=False, should_save=False)
-
-    vw.setMeta("StorageName", "%s.viv" % sample)
-
-    return vw
-
-
 def get_meta_str(vw):
     """
     Return workspace meta information string
@@ -346,58 +301,6 @@ def get_meta_str(vw):
     return "%s, number of functions: %d" % (", ".join(meta), len(vw.getFunctions()))
 
 
-def load_flirt_signature(path):
-    # lazy import enables us to only require flirt here and not in IDA, for example
-    import flirt
-
-    if path.endswith(".sig"):
-        with open(path, "rb") as f:
-            with timing("flirt: parsing .sig: " + path):
-                sigs = flirt.parse_sig(f.read())
-
-    elif path.endswith(".pat"):
-        with open(path, "rb") as f:
-            with timing("flirt: parsing .pat: " + path):
-                sigs = flirt.parse_pat(f.read().decode("utf-8").replace("\r\n", "\n"))
-
-    elif path.endswith(".pat.gz"):
-        with gzip.open(path, "rb") as f:
-            with timing("flirt: parsing .pat.gz: " + path):
-                sigs = flirt.parse_pat(f.read().decode("utf-8").replace("\r\n", "\n"))
-
-    else:
-        raise ValueError("unexpect signature file extension: " + path)
-
-    return sigs
-
-
-def register_flirt_signature_analyzers(vw, sigpaths):
-    """
-    args:
-      vw (vivisect.VivWorkspace):
-      sigpaths (List[str]): file system paths of .sig/.pat files
-    """
-    # lazy import enables us to only require flirt here and not in IDA, for example
-    import flirt
-    import viv_utils.flirt
-
-    for sigpath in sigpaths:
-        try:
-            sigs = load_flirt_signature(sigpath)
-        except ValueError as e:
-            logger.warning("could not load %s: %s", sigpath, str(e))
-            continue
-
-        logger.debug("flirt: sig count: %d", len(sigs))
-
-        with timing("flirt: compiling sigs"):
-            matcher = flirt.compile(sigs)
-
-        analyzer = viv_utils.flirt.FlirtFunctionAnalyzer(matcher, sigpath)
-        logger.debug("registering viv function analyzer: %s", repr(analyzer))
-        viv_utils.flirt.addFlirtFunctionAnalyzer(vw, analyzer)
-
-
 def is_running_standalone() -> bool:
     """
     are we running from a PyInstaller'd executable?
@@ -458,8 +361,9 @@ def get_workspace(path, format, sigpaths):
 
     supported formats:
       - pe
-      - sc32
-      - sc64
+      - elf
+      - shellcode 32-bit
+      - shellcode 64-bit
       - auto
 
     this creates and analyzes the workspace; however, it does *not* save the workspace.
@@ -480,13 +384,13 @@ def get_workspace(path, format, sigpaths):
         vw = viv_utils.getWorkspace(path, analyze=False, should_save=False)
     elif format == "sc32":
         # these are not analyzed nor saved.
-        vw = get_shellcode_vw(path, arch="i386")
+        vw = viv_utils.getShellcodeWorkspaceFromFile(path, arch="i386", analyze=False)
     elif format == "sc64":
-        vw = get_shellcode_vw(path, arch="amd64")
+        vw = viv_utils.getShellcodeWorkspaceFromFile(path, arch="amd64", analyze=False)
     else:
         raise ValueError("unexpected format: " + format)
 
-    register_flirt_signature_analyzers(vw, sigpaths)
+    viv_utils.flirt.register_flirt_signature_analyzers(vw, sigpaths)
 
     vw.analyze()
 
diff --git a/scripts/match-function-id.py b/scripts/match-function-id.py
index c37702ba..a1ea125f 100644
--- a/scripts/match-function-id.py
+++ b/scripts/match-function-id.py
@@ -105,7 +105,7 @@ def main(argv=None):
 
     analyzers = []
     for sigpath in args.signatures:
-        sigs = capa.main.load_flirt_signature(sigpath)
+        sigs = viv_utils.flirt.load_flirt_signature(sigpath)
 
         with capa.main.timing("flirt: compiling sigs"):
             matcher = flirt.compile(sigs)
diff --git a/setup.py b/setup.py
index c76897d9..15bdea2d 100644
--- a/setup.py
+++ b/setup.py
@@ -18,7 +18,7 @@ requirements = [
     "termcolor==1.1.0",
     "wcwidth==0.2.5",
     "ida-settings==2.1.0",
-    "viv-utils[flirt]==0.6.5",
+    "viv-utils[flirt]==0.6.6",
     "halo==0.0.31",
     "networkx==2.5.1",
     "ruamel.yaml==0.17.16",