unpack: add can_unpack routine

unpack: aspack: add doc
unpack: aspack: add documentation
2025-12-06 21:00:57 -08:00 · 2020-08-29 21:38:26 -06:00 · 2020-08-26 15:15:06 -06:00 · 2020-08-26 14:26:36 -06:00 · 2020-08-26 14:07:25 -06:00 · 2020-08-26 13:49:08 -06:00
39 changed files with 2706 additions and 1112 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -14,38 +14,64 @@ jobs:
          - os: ubuntu-16.04
            # use old linux so that the shared library versioning is more portable
            artifact_name: capa
-            asset_name: capa-linux
+            asset_name: linux
          - os: windows-latest
            artifact_name: capa.exe
-            asset_name: capa-windows.exe
+            asset_name: windows
          - os: macos-latest
            artifact_name: capa
-            asset_name: capa-macos
+            asset_name: macos
    steps:
-    - name: Checkout capa
-      uses: actions/checkout@v2
-      with:
-        submodules: true
-    - name: Set up Python 2.7
-      uses: actions/setup-python@v2
-      with:
-        python-version: 2.7
-    - name: Install PyInstaller
-      run: pip install pyinstaller
-    - name: Install capa
-      run: pip install -e .
-    - name: Build standalone executable
-      run: pyinstaller .github/pyinstaller/pyinstaller.spec
-    - name: Does it run?
-      run: dist/capa "tests/data/Practical Malware Analysis Lab 01-01.dll_"
-    - uses: actions/upload-artifact@v2
-      with:
-        name: ${{ matrix.asset_name }}
-        path: dist/${{ matrix.artifact_name }}
-    - name: Upload binaries to GH Release
-      uses: svenstaro/upload-release-action@v2
-      with:
-        repo_token: ${{ secrets.CAPA_TOKEN }}
-        file: dist/${{ matrix.artifact_name }}
-        asset_name: ${{ matrix.asset_name }}
-        tag: ${{ github.ref }}
+      - name: Checkout capa
+        uses: actions/checkout@v2
+        with:
+          submodules: true
+      - name: Set up Python 2.7
+        uses: actions/setup-python@v2
+        with:
+          python-version: 2.7
+      - name: Install PyInstaller
+        # pyinstaller 4 doesn't support Python 2.7
+        run: pip install 'pyinstaller==3.*'
+      - name: Install capa
+        run: pip install -e .
+      - name: Build standalone executable
+        run: pyinstaller .github/pyinstaller/pyinstaller.spec
+      - name: Does it run?
+        run: dist/capa "tests/data/Practical Malware Analysis Lab 01-01.dll_"
+      - uses: actions/upload-artifact@v2
+        with:
+          name: ${{ matrix.asset_name }}
+          path: dist/${{ matrix.artifact_name }}
+
+  zip:
+    name: zip ${{ matrix.asset_name }}
+    runs-on: ubuntu-latest
+    needs: build
+    strategy:
+      matrix:
+        include:
+          - asset_name: linux
+            artifact_name: capa
+          - asset_name: windows
+            artifact_name: capa.exe
+          - asset_name: macos
+            artifact_name: capa
+    steps:
+      - name: Download ${{ matrix.asset_name }}
+        uses: actions/download-artifact@v2
+        with:
+          name: ${{ matrix.asset_name }}
+      - name: Set executable flag
+        run: chmod +x ${{ matrix.artifact_name }}
+      - name: Set zip name
+        run: echo ::set-env name=zip_name::capa-${GITHUB_REF#refs/tags/}-${{ matrix.asset_name }}.zip
+      - name: Zip ${{ matrix.artifact_name }} into ${{ env.zip_name }}
+        run: zip ${{ env.zip_name }} ${{ matrix.artifact_name }}
+      - name: Upload ${{ env.zip_name }} to GH Release
+        uses: svenstaro/upload-release-action@v2
+        with:
+          repo_token: ${{ secrets.GITHUB_TOKEN}}
+          file: ${{ env.zip_name }}
+          tag: ${{ github.ref }}
+
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -41,19 +41,29 @@ jobs:
      run: python scripts/lint.py rules/

  tests:
+    name: Tests in ${{ matrix.python }}
    runs-on: ubuntu-latest
    needs: [code_style, rule_linter]
+    strategy:
+      matrix:
+        include:
+          - python: 2.7
+          - python: 3.6
+          - python: 3.7
+          - python: 3.8
+          - python: '3.9.0-alpha - 3.9.x' # Python latest
    steps:
    - name: Checkout capa with submodules
      uses: actions/checkout@v2
      with:
        submodules: true
-    - name: Set up Python 2.7
+    - name: Set up Python ${{ matrix.python }}
      uses: actions/setup-python@v2
      with:
-        python-version: 2.7
+        python-version: ${{ matrix.python }}
    - name: Install capa
-      run: pip install -e .[dev]
+      # TODO: remove `pefile` when we bump lancelot >= 0.3.7
+      run: pip install -e .[dev] pefile
    - name: Run tests
      run: pytest tests/

--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -38,15 +38,15 @@ Download a standalone binary below and checkout the readme [here on GitHub](http
  - hash data using sha1 via x86 extensions @re-fox
  - hash data using sha256 via x86 extensions @re-fox
  - capture network configuration via ipconfig @re-fox
-  - hash data via WinCrypt @michael-hunhoff
-  - get file attributes @michael-hunhoff
-  - allocate thread local storage @michael-hunhoff
-  - get thread local storage value @michael-hunhoff
-  - set thread local storage @michael-hunhoff
-  - get session integrity level @michael-hunhoff
-  - add file to cabinet file @michael-hunhoff
-  - flush cabinet file @michael-hunhoff
-  - open cabinet file @michael-hunhoff
+  - hash data via WinCrypt @mike-hunhoff
+  - get file attributes @mike-hunhoff
+  - allocate thread local storage @mike-hunhoff
+  - get thread local storage value @mike-hunhoff
+  - set thread local storage @mike-hunhoff
+  - get session integrity level @mike-hunhoff
+  - add file to cabinet file @mike-hunhoff
+  - flush cabinet file @mike-hunhoff
+  - open cabinet file @mike-hunhoff
  - gather firefox profile information @re-fox
  - encrypt data using skipjack @re-fox
  - encrypt data using camellia @re-fox
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 ![capa](.github/logo.png)

 [![CI status](https://github.com/fireeye/capa/workflows/CI/badge.svg)](https://github.com/fireeye/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster)
-[![Number of rules](https://img.shields.io/badge/rules-293-blue.svg)](https://github.com/fireeye/capa-rules)
+[![Number of rules](https://img.shields.io/badge/rules-303-blue.svg)](https://github.com/fireeye/capa-rules)
 [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt)

 capa detects capabilities in executable files.
--- a/capa/features/init.py
+++ b/capa/features/init.py
@@ -161,7 +161,7 @@ class Regex(String):


 class StringFactory(object):
-    def __new__(self, value, description):
+    def __new__(self, value, description=None):
        if value.startswith("/") and (value.endswith("/") or value.endswith("/i")):
            return Regex(value, description=description)
        return String(value, description=description)
--- a/capa/features/extractors/init.py
+++ b/capa/features/extractors/init.py
@@ -196,7 +196,7 @@ class NullFeatureExtractor(FeatureExtractor):
            'functions': {
                0x401000: {
                    'features': [
-                        (0x401000, capa.features.Characteristic('switch')),
+                        (0x401000, capa.features.Characteristic('nzxor')),
                    ],
                    'basic blocks': {
                        0x401000: {
--- a/capa/features/extractors/ida/function.py
+++ b/capa/features/extractors/ida/function.py
@@ -14,16 +14,6 @@ from capa.features import Characteristic
 from capa.features.extractors import loops


-def extract_function_switch(f):
-    """ extract switch indicators from a function
-
-        arg:
-            f (IDA func_t)
-    """
-    if capa.features.extractors.ida.helpers.is_function_switch_statement(f):
-        yield Characteristic("switch"), f.start_ea
-
-
 def extract_function_calls_to(f):
    """ extract callers to a function

@@ -72,7 +62,7 @@ def extract_features(f):
            yield feature, ea


-FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_switch, extract_function_loop, extract_recursive_call)
+FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call)


 def main():
--- a/capa/features/extractors/ida/helpers.py
+++ b/capa/features/extractors/ida/helpers.py
@@ -300,22 +300,6 @@ def is_function_recursive(f):
    return False


-def is_function_switch_statement(f):
-    """ check a function for switch statement indicators
-
-        adapted from:
-        https://reverseengineering.stackexchange.com/questions/17548/calc-switch-cases-in-idapython-cant-iterate-over-results?rq=1
-
-        arg:
-            f (IDA func_t)
-    """
-    for (start, end) in idautils.Chunks(f.start_ea):
-        for head in idautils.Heads(start, end):
-            if idaapi.get_switch_info(head):
-                return True
-    return False
-
-
 def is_basic_block_tight_loop(bb):
    """ check basic block loops to self

@@ -331,3 +315,29 @@ def is_basic_block_tight_loop(bb):
            if ref == bb.start_ea:
                return True
    return False
+
+
+def find_data_reference_from_insn(insn, max_depth=10):
+    """ search for data reference from instruction, return address of instruction if no reference exists """
+    depth = 0
+    ea = insn.ea
+
+    while True:
+        data_refs = list(idautils.DataRefsFrom(ea))
+
+        if len(data_refs) != 1:
+            # break if no refs or more than one ref (assume nested pointers only have one data reference)
+            break
+
+        if ea == data_refs[0]:
+            # break if circular reference
+            break
+
+        depth += 1
+        if depth > max_depth:
+            # break if max depth
+            break
+
+        ea = data_refs[0]
+
+    return ea
--- a/capa/features/extractors/ida/insn.py
+++ b/capa/features/extractors/ida/insn.py
@@ -98,8 +98,11 @@ def extract_insn_number_features(f, bb, insn):
        #   .text:00401145 add esp, 0Ch
        return

-    for op in capa.features.extractors.ida.helpers.get_insn_ops(insn, target_ops=(idaapi.o_imm,)):
-        const = capa.features.extractors.ida.helpers.mask_op_val(op)
+    for op in capa.features.extractors.ida.helpers.get_insn_ops(insn, target_ops=(idaapi.o_imm, idaapi.o_mem)):
+        if op.type == idaapi.o_imm:
+            const = capa.features.extractors.ida.helpers.mask_op_val(op)
+        else:
+            const = op.addr
        if not idaapi.is_mapped(const):
            yield Number(const), insn.ea
            yield Number(const, arch=get_arch(f.ctx)), insn.ea
@@ -116,11 +119,8 @@ def extract_insn_bytes_features(f, bb, insn):
        example:
            push    offset iid_004118d4_IShellLinkA ; riid
    """
-    if idaapi.is_call_insn(insn):
-        # ignore call instructions
-        return
-
-    for ref in idautils.DataRefsFrom(insn.ea):
+    ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
+    if ref != insn.ea:
        extracted_bytes = capa.features.extractors.ida.helpers.read_bytes_at(ref, MAX_BYTES_FEATURE_SIZE)
        if extracted_bytes and not capa.features.extractors.helpers.all_zeros(extracted_bytes):
            yield Bytes(extracted_bytes), insn.ea
@@ -137,7 +137,8 @@ def extract_insn_string_features(f, bb, insn):
        example:
            push offset aAcr     ; "ACR  > "
    """
-    for ref in idautils.DataRefsFrom(insn.ea):
+    ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
+    if ref != insn.ea:
        found = capa.features.extractors.ida.helpers.find_string_at(ref)
        if found:
            yield String(found), insn.ea
--- a/capa/features/extractors/lancelot/init.py
+++ b/capa/features/extractors/lancelot/init.py
@@ -0,0 +1,92 @@
+# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+import logging
+
+import lancelot
+
+import capa.features.extractors
+import capa.features.extractors.lancelot.file
+import capa.features.extractors.lancelot.insn
+import capa.features.extractors.lancelot.function
+import capa.features.extractors.lancelot.basicblock
+
+__all__ = ["file", "function", "basicblock", "insn"]
+logger = logging.getLogger(__name__)
+
+
+class BB(object):
+    """extend the lancelot.BasicBlock with an __int__ method to access the address"""
+
+    def __init__(self, ws, bb):
+        super(BB, self).__init__()
+        self.ws = ws
+        self.address = bb.address
+        self.length = bb.length
+        self.predecessors = bb.predecessors
+        self.successors = bb.successors
+
+    def __int__(self):
+        return self.address
+
+    @property
+    def instructions(self):
+        va = self.address
+        while va < self.address + self.length:
+            try:
+                insn = self.ws.read_insn(va)
+            except ValueError:
+                logger.warning("failed to read instruction at 0x%x", va)
+                return
+
+            yield insn
+            va += insn.length
+
+
+class LancelotFeatureExtractor(capa.features.extractors.FeatureExtractor):
+    def __init__(self, buf):
+        super(LancelotFeatureExtractor, self).__init__()
+        self.buf = buf
+        self.ws = lancelot.from_bytes(buf)
+        self.ctx = {}
+
+    def get_base_address(self):
+        return self.ws.base_address
+
+    def extract_file_features(self):
+        for feature, va in capa.features.extractors.lancelot.file.extract_file_features(self.buf):
+            yield feature, va
+
+    def get_functions(self):
+        for va in self.ws.get_functions():
+            # this is just the address of the function
+            yield va
+
+    def extract_function_features(self, f):
+        for feature, va in capa.features.extractors.lancelot.function.extract_function_features(self.ws, f):
+            yield feature, va
+
+    def get_basic_blocks(self, f):
+        try:
+            cfg = self.ws.build_cfg(f)
+        except:
+            logger.warning("failed to build CFG for 0x%x", f)
+            return
+        else:
+            for bb in cfg.basic_blocks.values():
+                yield BB(self.ws, bb)
+
+    def extract_basic_block_features(self, f, bb):
+        for feature, va in capa.features.extractors.lancelot.basicblock.extract_basic_block_features(self.ws, bb):
+            yield feature, va
+
+    def get_instructions(self, f, bb):
+        return bb.instructions
+
+    def extract_insn_features(self, f, bb, insn):
+        for feature, va in capa.features.extractors.lancelot.insn.extract_insn_features(self, f, bb, insn):
+            yield feature, va
--- a/capa/features/extractors/lancelot/basicblock.py
+++ b/capa/features/extractors/lancelot/basicblock.py
@@ -0,0 +1,120 @@
+import string
+import struct
+import logging
+
+from lancelot import (
+    FLOW_VA,
+    OPERAND_SIZE,
+    OPERAND_TYPE,
+    MEMORY_OPERAND_BASE,
+    OPERAND_TYPE_MEMORY,
+    OPERAND_TYPE_IMMEDIATE,
+    IMMEDIATE_OPERAND_VALUE,
+)
+
+from capa.features import Characteristic
+from capa.features.basicblock import BasicBlock
+from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
+
+logger = logging.getLogger(__name__)
+
+
+def extract_bb_tight_loop(ws, bb):
+    """ check basic block for tight loop indicators """
+    if bb.address in map(lambda flow: flow[FLOW_VA], bb.successors):
+        yield Characteristic("tight loop"), bb.address
+
+
+def is_mov_imm_to_stack(insn):
+    if not insn.mnemonic.startswith("mov"):
+        return False
+
+    try:
+        dst, src = insn.operands
+    except ValueError:
+        # not two operands
+        return False
+
+    if src[OPERAND_TYPE] != OPERAND_TYPE_IMMEDIATE:
+        return False
+
+    if src[IMMEDIATE_OPERAND_VALUE] < 0:
+        return False
+
+    if dst[OPERAND_TYPE] != OPERAND_TYPE_MEMORY:
+        return False
+
+    if dst[MEMORY_OPERAND_BASE] not in ("ebp", "rbp", "esp", "rsp"):
+        return False
+
+    return True
+
+
+def is_printable_ascii(chars):
+    return all(c < 127 and chr(c) in string.printable for c in chars)
+
+
+def is_printable_utf16le(chars):
+    if all(c == b"\x00" for c in chars[1::2]):
+        return is_printable_ascii(chars[::2])
+
+
+def get_printable_len(operand):
+    """
+    Return string length if all operand bytes are ascii or utf16-le printable
+    """
+    operand_size = operand[OPERAND_SIZE]
+    if operand_size == 8:
+        chars = struct.pack("<B", operand[IMMEDIATE_OPERAND_VALUE])
+    elif operand_size == 16:
+        chars = struct.pack("<H", operand[IMMEDIATE_OPERAND_VALUE])
+    elif operand_size == 32:
+        chars = struct.pack("<I", operand[IMMEDIATE_OPERAND_VALUE])
+    elif operand_size == 64:
+        chars = struct.pack("<Q", operand[IMMEDIATE_OPERAND_VALUE])
+    else:
+        raise ValueError("unexpected operand size: " + str(operand_size))
+
+    if is_printable_ascii(chars):
+        return operand_size / 8
+    if is_printable_utf16le(chars):
+        return operand_size / 16
+
+    return 0
+
+
+def _bb_has_stackstring(ws, bb):
+    """
+    extract potential stackstring creation, using the following heuristics:
+      - basic block contains enough moves of constant bytes to the stack
+    """
+    count = 0
+    for insn in bb.instructions:
+        if is_mov_imm_to_stack(insn):
+            # add number of operand bytes
+            src = insn.operands[1]
+            count += get_printable_len(src)
+
+        if count > MIN_STACKSTRING_LEN:
+            return True
+
+    return False
+
+
+def extract_stackstring(ws, bb):
+    """ check basic block for stackstring indicators """
+    if _bb_has_stackstring(ws, bb):
+        yield Characteristic("stack string"), bb.address
+
+
+def extract_basic_block_features(ws, bb):
+    yield BasicBlock(), bb.address
+    for bb_handler in BASIC_BLOCK_HANDLERS:
+        for feature, va in bb_handler(ws, bb):
+            yield feature, va
+
+
+BASIC_BLOCK_HANDLERS = (
+    extract_bb_tight_loop,
+    extract_stackstring,
+)
--- a/capa/features/extractors/lancelot/file.py
+++ b/capa/features/extractors/lancelot/file.py
@@ -0,0 +1,81 @@
+import pefile
+
+import capa.features.extractors.strings
+from capa.features import String, Characteristic
+from capa.features.file import Export, Import, Section
+
+
+def extract_file_embedded_pe(buf, pe):
+    buf = buf[2:]
+
+    total_offset = 2
+    while True:
+        try:
+            offset = buf.index(b"MZ")
+        except ValueError:
+            return
+        else:
+            rest = buf[offset:]
+            total_offset += offset
+
+            try:
+                _ = pefile.PE(data=rest)
+            except:
+                pass
+            else:
+                yield Characteristic("embedded pe"), total_offset
+
+            buf = rest[2:]
+            total_offset += 2
+
+
+def extract_file_export_names(buf, pe):
+    if not hasattr(pe, "DIRECTORY_ENTRY_EXPORT"):
+        return
+
+    base_address = pe.OPTIONAL_HEADER.ImageBase
+    for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols:
+        yield Export(exp.name.decode("ascii")), base_address + exp.address
+
+
+def extract_file_import_names(buf, pe):
+    base_address = pe.OPTIONAL_HEADER.ImageBase
+    for entry in pe.DIRECTORY_ENTRY_IMPORT:
+        libname = entry.dll.decode("ascii").lower().partition(".")[0]
+        for imp in entry.imports:
+            if imp.ordinal:
+                yield Import("%s.#%s" % (libname, imp.ordinal)), imp.address
+            else:
+                impname = imp.name.decode("ascii")
+                yield Import("%s.%s" % (libname, impname)), imp.address
+                yield Import("%s" % (impname)), imp.address
+
+
+def extract_file_section_names(buf, pe):
+    base_address = pe.OPTIONAL_HEADER.ImageBase
+    for section in pe.sections:
+        yield Section(section.Name.partition(b"\x00")[0].decode("ascii")), base_address + section.VirtualAddress
+
+
+def extract_file_strings(buf, pe):
+    for s in capa.features.extractors.strings.extract_ascii_strings(buf):
+        yield String(s.s), s.offset
+
+    for s in capa.features.extractors.strings.extract_unicode_strings(buf):
+        yield String(s.s), s.offset
+
+
+def extract_file_features(buf):
+    pe = pefile.PE(data=buf)
+    for file_handler in FILE_HANDLERS:
+        for feature, va in file_handler(buf, pe):
+            yield feature, va
+
+
+FILE_HANDLERS = (
+    extract_file_embedded_pe,
+    extract_file_export_names,
+    extract_file_import_names,
+    extract_file_section_names,
+    extract_file_strings,
+)
--- a/capa/features/extractors/lancelot/function.py
+++ b/capa/features/extractors/lancelot/function.py
@@ -0,0 +1,64 @@
+import logging
+
+try:
+    from functools import lru_cache
+except ImportError:
+    from backports.functools_lru_cache import lru_cache
+
+from lancelot import (
+    FLOW_VA,
+    FLOW_TYPE,
+    FLOW_TYPE_CONDITIONAL_JUMP,
+    FLOW_TYPE_CONDITIONAL_MOVE,
+    FLOW_TYPE_UNCONDITIONAL_JUMP,
+)
+
+from capa.features import Characteristic
+from capa.features.extractors import loops
+
+logger = logging.getLogger(__name__)
+
+
+@lru_cache
+def get_call_graph(ws):
+    return ws.build_call_graph()
+
+
+def extract_function_calls_to(ws, f):
+    cg = get_call_graph(ws)
+
+    for caller in cg.calls_to.get(f, []):
+        yield Characteristic("calls to"), caller
+
+
+def extract_function_loop(ws, f):
+    edges = []
+    for bb in ws.build_cfg(f).basic_blocks.values():
+        for flow in bb.successors:
+            if flow[FLOW_TYPE] in (
+                FLOW_TYPE_UNCONDITIONAL_JUMP,
+                FLOW_TYPE_CONDITIONAL_JUMP,
+                FLOW_TYPE_CONDITIONAL_MOVE,
+            ):
+                edges.append((bb.address, flow[FLOW_VA]))
+                continue
+
+    if edges and loops.has_loop(edges):
+        yield Characteristic("loop"), f
+
+
+FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop)
+
+
+_not_implemented = set([])
+
+
+def extract_function_features(ws, f):
+    for func_handler in FUNCTION_HANDLERS:
+        try:
+            for feature, va in func_handler(ws, f):
+                yield feature, va
+        except NotImplementedError:
+            if func_handler.__name__ not in _not_implemented:
+                logger.warning("not implemented: %s", func_handler.__name__)
+                _not_implemented.add(func_handler.__name__)
--- a/capa/features/extractors/lancelot/helpers.py
+++ b/capa/features/extractors/lancelot/helpers.py
@@ -0,0 +1,33 @@
+from lancelot import (
+    OPERAND_TYPE,
+    MEMORY_OPERAND_BASE,
+    MEMORY_OPERAND_DISP,
+    OPERAND_TYPE_MEMORY,
+    OPERAND_TYPE_IMMEDIATE,
+    IMMEDIATE_OPERAND_VALUE,
+    IMMEDIATE_OPERAND_IS_RELATIVE,
+)
+
+
+def get_operand_target(insn, op):
+    if op[OPERAND_TYPE] == OPERAND_TYPE_MEMORY:
+        # call direct, x64
+        # rip relative
+        # kernel32-64:180001041    call    cs:__imp_RtlVirtualUnwind_0
+        if op[MEMORY_OPERAND_BASE] == "rip":
+            return op[MEMORY_OPERAND_DISP] + insn.address + insn.length
+
+        # call direct, x32
+        # mimikatz:0x403BD3  call    ds:CryptAcquireContextW
+        elif op[MEMORY_OPERAND_BASE] == None:
+            return op[MEMORY_OPERAND_DISP]
+
+    # call via thunk
+    # mimikatz:0x455A41  call    LsaQueryInformationPolicy
+    elif op[OPERAND_TYPE] == OPERAND_TYPE_IMMEDIATE and op[IMMEDIATE_OPERAND_IS_RELATIVE]:
+        return op[IMMEDIATE_OPERAND_VALUE] + insn.address + insn.length
+
+    elif op[OPERAND_TYPE] == OPERAND_TYPE_IMMEDIATE:
+        return op[IMMEDIATE_OPERAND_VALUE]
+
+    raise ValueError("memory operand has no target")
--- a/capa/features/extractors/lancelot/indirect_calls.py
+++ b/capa/features/extractors/lancelot/indirect_calls.py
@@ -0,0 +1,149 @@
+# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+
+import collections
+
+from lancelot import (
+    FLOW_VA,
+    OPERAND_TYPE,
+    PERMISSION_READ,
+    MEMORY_OPERAND_BASE,
+    MEMORY_OPERAND_DISP,
+    OPERAND_TYPE_MEMORY,
+    MEMORY_OPERAND_INDEX,
+    OPERAND_TYPE_REGISTER,
+    MEMORY_OPERAND_SEGMENT,
+    OPERAND_TYPE_IMMEDIATE,
+    IMMEDIATE_OPERAND_VALUE,
+    REGISTER_OPERAND_REGISTER,
+    IMMEDIATE_OPERAND_IS_RELATIVE,
+)
+
+from capa.features.extractors.lancelot.helpers import get_operand_target
+
+DESTRUCTIVE_MNEMONICS = ("mov", "lea", "pop", "xor")
+
+
+class NotFoundError(Exception):
+    pass
+
+
+def read_instructions(ws, bb):
+    va = bb.address
+    while va < bb.address + bb.length:
+        try:
+            insn = ws.read_insn(va)
+        except ValueError:
+            return
+
+        yield insn
+        va += insn.length
+
+
+def build_instruction_predecessors(ws, cfg):
+    preds = collections.defaultdict(set)
+
+    for bb in cfg.basic_blocks.values():
+        insns = list(read_instructions(ws, bb))
+
+        for i, insn in enumerate(insns):
+            if i == 0:
+                for pred in bb.predecessors:
+                    pred_bb = cfg.basic_blocks[pred[FLOW_VA]]
+                    preds[insn.address].add(list(read_instructions(ws, pred_bb))[-1].address)
+            else:
+                preds[insn.address].add(insns[i - 1].address)
+
+    return preds
+
+
+def find_definition(ws, f, insn):
+    """
+    scan backwards from the given address looking for assignments to the given register.
+    if a constant, return that value.
+    args:
+      ws (lancelot.PE)
+      f (int): the function start address
+      insn (lancelot.Instruction): call instruction to resolve
+    returns:
+      (va: int, value?: int|None): the address of the assignment and the value, if a constant.
+    raises:
+      NotFoundError: when the definition cannot be found.
+    """
+    assert insn.mnemonic == "call"
+    op0 = insn.operands[0]
+    assert op0[OPERAND_TYPE] == OPERAND_TYPE_REGISTER
+    reg = op0[REGISTER_OPERAND_REGISTER]
+
+    cfg = ws.build_cfg(f)
+    preds = build_instruction_predecessors(ws, cfg)
+
+    q = collections.deque()
+    seen = set([])
+    q.extend(preds[insn.address])
+    while q:
+        cur = q.popleft()
+
+        # skip if we've already processed this location
+        if cur in seen:
+            continue
+        seen.add(cur)
+
+        insn = ws.read_insn(cur)
+        operands = insn.operands
+
+        if len(operands) == 0:
+            q.extend(preds[cur])
+            continue
+
+        op0 = operands[0]
+        if not (
+            op0[OPERAND_TYPE] == OPERAND_TYPE_REGISTER
+            and op0[REGISTER_OPERAND_REGISTER] == reg
+            and insn.mnemonic in DESTRUCTIVE_MNEMONICS
+        ):
+            q.extend(preds[cur])
+            continue
+
+        # if we reach here, the instruction is destructive to our target register.
+
+        # we currently only support extracting the constant from something like: `mov $reg, IAT`
+        # so, any other pattern results in an unknown value, represented by None.
+        # this is a good place to extend in the future, if we need more robust support.
+        if insn.mnemonic != "mov":
+            return (cur, None)
+        else:
+            op1 = operands[1]
+            try:
+                target = get_operand_target(insn, op1)
+            except ValueError:
+                return (cur, None)
+            else:
+                return (cur, target)
+
+    raise NotFoundError()
+
+
+def is_indirect_call(insn):
+    return insn.mnemonic == "call" and insn.operands[0][OPERAND_TYPE] == OPERAND_TYPE_REGISTER
+
+
+def resolve_indirect_call(ws, f, insn):
+    """
+    inspect the given indirect call instruction and attempt to resolve the target address.
+    args:
+      ws (lancelot.PE): the analysis workspace
+      f (int): the address of the function to analyze
+      insn (lancelot.Instruction): the instruction at which to start analysis
+    returns:
+      (va: int, value?: int|None): the address of the assignment and the value, if a constant.
+    raises:
+      NotFoundError: when the definition cannot be found.
+    """
+    assert is_indirect_call(insn)
+    return find_definition(ws, f, insn)
--- a/capa/features/extractors/lancelot/insn.py
+++ b/capa/features/extractors/lancelot/insn.py
@@ -0,0 +1,487 @@
+import logging
+import itertools
+
+import pefile
+
+try:
+    from functools import lru_cache
+except ImportError:
+    from backports.functools_lru_cache import lru_cache
+
+from lancelot import (
+    OPERAND_TYPE,
+    PERMISSION_READ,
+    MEMORY_OPERAND_BASE,
+    MEMORY_OPERAND_DISP,
+    OPERAND_TYPE_MEMORY,
+    MEMORY_OPERAND_INDEX,
+    OPERAND_TYPE_REGISTER,
+    MEMORY_OPERAND_SEGMENT,
+    OPERAND_TYPE_IMMEDIATE,
+    IMMEDIATE_OPERAND_VALUE,
+    REGISTER_OPERAND_REGISTER,
+    IMMEDIATE_OPERAND_IS_RELATIVE,
+)
+
+import capa.features.extractors.helpers
+from capa.features import ARCH_X32, ARCH_X64, MAX_BYTES_FEATURE_SIZE, Bytes, String, Characteristic
+from capa.features.insn import Number, Offset, Mnemonic
+from capa.features.extractors.lancelot.helpers import get_operand_target
+from capa.features.extractors.lancelot.function import get_call_graph
+from capa.features.extractors.lancelot.indirect_calls import NotFoundError, resolve_indirect_call
+
+logger = logging.getLogger(__name__)
+
+
+# security cookie checks may perform non-zeroing XORs, these are expected within a certain
+# byte range within the first and returning basic blocks, this helps to reduce FP features
+SECURITY_COOKIE_BYTES_DELTA = 0x40
+
+
+def get_arch(ws):
+    if ws.arch == "x32":
+        return ARCH_X32
+    elif ws.arch == "x64":
+        return ARCH_X64
+    else:
+        raise ValueError("unexpected architecture")
+
+
+@lru_cache
+def get_pefile(xtor):
+    return pefile.PE(data=xtor.buf)
+
+
+@lru_cache
+def get_imports(xtor):
+    pe = get_pefile(xtor)
+
+    imports = {}
+    for entry in pe.DIRECTORY_ENTRY_IMPORT:
+        libname = entry.dll.decode("ascii").lower().partition(".")[0]
+        for imp in entry.imports:
+            if imp.ordinal:
+                imports[imp.address] = "%s.#%s" % (libname, imp.ordinal)
+            else:
+                impname = imp.name.decode("ascii")
+                imports[imp.address] = "%s.%s" % (libname, impname)
+    return imports
+
+
+@lru_cache
+def get_thunks(xtor):
+    thunks = {}
+    for va in xtor.ws.get_functions():
+        try:
+            insn = xtor.ws.read_insn(va)
+        except ValueError:
+            continue
+
+        if insn.mnemonic != "jmp":
+            continue
+
+        op0 = insn.operands[0]
+
+        try:
+            target = get_operand_target(insn, op0)
+        except ValueError:
+            continue
+
+        imports = get_imports(xtor)
+        if target not in imports:
+            continue
+
+        thunks[va] = imports[target]
+
+    return thunks
+
+
+def extract_insn_api_features(xtor, f, bb, insn):
+    """parse API features from the given instruction."""
+
+    if insn.mnemonic != "call":
+        return
+
+    op0 = insn.operands[0]
+
+    if op0[OPERAND_TYPE] == OPERAND_TYPE_REGISTER:
+        try:
+            (_, target) = resolve_indirect_call(xtor.ws, f, insn)
+        except NotFoundError:
+            return
+        if target is None:
+            return
+    else:
+        try:
+            target = get_operand_target(insn, op0)
+        except ValueError:
+            return
+
+    imports = get_imports(xtor)
+    if target in imports:
+        for feature, va in capa.features.extractors.helpers.generate_api_features(imports[target], insn.address):
+            yield feature, va
+        return
+
+    thunks = get_thunks(xtor)
+    if target in thunks:
+        for feature, va in capa.features.extractors.helpers.generate_api_features(thunks[target], insn.address):
+            yield feature, va
+
+
+def extract_insn_mnemonic_features(xtor, f, bb, insn):
+    """parse mnemonic features from the given instruction."""
+    yield Mnemonic(insn.mnemonic), insn.address
+
+
+def extract_insn_number_features(xtor, f, bb, insn):
+    """parse number features from the given instruction."""
+    operands = insn.operands
+
+    for operand in operands:
+        if operand[OPERAND_TYPE] != OPERAND_TYPE_IMMEDIATE:
+            continue
+
+        v = operand[IMMEDIATE_OPERAND_VALUE]
+
+        if xtor.ws.probe(v) & PERMISSION_READ:
+            # v is a valid address
+            # therefore, assume its not also a constant.
+            continue
+
+        if (
+            insn.mnemonic == "add"
+            and operands[0][OPERAND_TYPE] == OPERAND_TYPE_REGISTER
+            and operands[0][REGISTER_OPERAND_REGISTER] == "esp"
+        ):
+            # skip things like:
+            #
+            #    .text:00401140                 call    sub_407E2B
+            #    .text:00401145                 add     esp, 0Ch
+            return
+
+        yield Number(v), insn.address
+        yield Number(v, arch=get_arch(xtor.ws)), insn.address
+
+
+def extract_insn_offset_features(xtor, f, bb, insn):
+    """parse structure offset features from the given instruction."""
+    operands = insn.operands
+
+    for operand in operands:
+        if operand[OPERAND_TYPE] != OPERAND_TYPE_MEMORY:
+            continue
+
+        if operand[MEMORY_OPERAND_BASE] in ("esp", "ebp", "rbp"):
+            continue
+
+        # lancelot provides `None` when the displacement is not present.
+        v = operand[MEMORY_OPERAND_DISP] or 0
+
+        yield Offset(v), insn.address
+        yield Offset(v, arch=get_arch(xtor.ws)), insn.address
+
+
+def derefs(xtor, p):
+    """
+    recursively follow the given pointer, yielding the valid memory addresses along the way.
+    useful when you may have a pointer to string, or pointer to pointer to string, etc.
+    this is a "do what i mean" type of helper function.
+    """
+
+    depth = 0
+    while True:
+        if not xtor.ws.probe(p) & PERMISSION_READ:
+            return
+        yield p
+
+        next = xtor.ws.read_pointer(p)
+
+        # sanity: pointer points to self
+        if next == p:
+            return
+
+        # sanity: avoid chains of pointers that are unreasonably deep
+        depth += 1
+        if depth > 10:
+            return
+
+        p = next
+
+
+def read_bytes(xtor, va):
+    """
+    read up to MAX_BYTES_FEATURE_SIZE from the given address.
+
+    raises:
+      ValueError: if the given address is not valid.
+    """
+    start = va
+    end = va + MAX_BYTES_FEATURE_SIZE
+    pe = get_pefile(xtor)
+
+    for section in pe.sections:
+        section_start = pe.OPTIONAL_HEADER.ImageBase + section.VirtualAddress
+        section_end = pe.OPTIONAL_HEADER.ImageBase + section.VirtualAddress + section.Misc_VirtualSize
+
+        if section_start <= start < section_end:
+            end = min(end, section_end)
+            return xtor.ws.read_bytes(start, end - start)
+
+    raise ValueError("invalid address")
+
+
+# these are mnemonics that may flow (jump) elsewhere
+FLOW_MNEMONICS = set(
+    [
+        "call",
+        "jb",
+        "jbe",
+        "jcxz",
+        "jecxz",
+        "jknzd",
+        "jkzd",
+        "jl",
+        "jle",
+        "jmp",
+        "jnb",
+        "jnbe",
+        "jnl",
+        "jnle",
+        "jno",
+        "jnp",
+        "jns",
+        "jnz",
+        "jo",
+        "jp",
+        "jrcxz",
+        "js",
+        "jz",
+    ]
+)
+
+
+def extract_insn_bytes_features(xtor, f, bb, insn):
+    """
+    parse byte sequence features from the given instruction.
+    """
+    if insn.mnemonic in FLOW_MNEMONICS:
+        return
+
+    for operand in insn.operands:
+        try:
+            target = get_operand_target(insn, operand)
+        except ValueError:
+            continue
+
+        for ptr in derefs(xtor, target):
+            try:
+                buf = read_bytes(xtor, ptr)
+            except ValueError:
+                continue
+
+            if capa.features.extractors.helpers.all_zeros(buf):
+                continue
+
+            yield Bytes(buf), insn.address
+
+
+def first(s):
+    """enumerate the first element in the sequence"""
+    for i in s:
+        yield i
+        break
+
+
+def extract_insn_string_features(xtor, f, bb, insn):
+    """parse string features from the given instruction."""
+    for bytez, va in extract_insn_bytes_features(xtor, f, bb, insn):
+        buf = bytez.value
+
+        for s in itertools.chain(
+            first(capa.features.extractors.strings.extract_ascii_strings(buf)),
+            first(capa.features.extractors.strings.extract_unicode_strings(buf)),
+        ):
+            if s.offset == 0:
+                yield String(s.s), va
+
+
+def is_security_cookie(xtor, f, bb, insn):
+    """
+    check if an instruction is related to security cookie checks
+    """
+    op1 = insn.operands[1]
+    if op1[OPERAND_TYPE] == OPERAND_TYPE_REGISTER and op1[REGISTER_OPERAND_REGISTER] not in (
+        "esp",
+        "ebp",
+        "rbp",
+        "rsp",
+    ):
+        return False
+
+    # expect security cookie init in first basic block within first bytes (instructions)
+    if f == bb.address and insn.address < (bb.address + SECURITY_COOKIE_BYTES_DELTA):
+        return True
+
+    # ... or within last bytes (instructions) before a return
+    insns = list(xtor.get_instructions(f, bb))
+    if insns[-1].mnemonic in ("ret", "retn") and insn.address > (bb.address + bb.length - SECURITY_COOKIE_BYTES_DELTA):
+        return True
+
+    return False
+
+
+def extract_insn_nzxor_characteristic_features(xtor, f, bb, insn):
+    """
+    parse non-zeroing XOR instruction from the given instruction.
+    ignore expected non-zeroing XORs, e.g. security cookies.
+    """
+    if insn.mnemonic != "xor":
+        return
+
+    operands = insn.operands
+    if operands[0] == operands[1]:
+        return
+
+    if is_security_cookie(xtor, f, bb, insn):
+        return
+
+    yield Characteristic("nzxor"), insn.address
+
+
+def extract_insn_peb_access_characteristic_features(xtor, f, bb, insn):
+    """
+    parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64
+    """
+    for operand in insn.operands:
+        if (
+            operand[OPERAND_TYPE] == OPERAND_TYPE_MEMORY
+            and operand[MEMORY_OPERAND_SEGMENT] == "gs"
+            and operand[MEMORY_OPERAND_DISP] == 0x60
+        ):
+            yield Characteristic("peb access"), insn.address
+
+        if (
+            operand[OPERAND_TYPE] == OPERAND_TYPE_MEMORY
+            and operand[MEMORY_OPERAND_SEGMENT] == "fs"
+            and operand[MEMORY_OPERAND_DISP] == 0x30
+        ):
+            yield Characteristic("peb access"), insn.address
+
+
+def extract_insn_segment_access_features(xtor, f, bb, insn):
+    """ parse the instruction for access to fs or gs """
+    for operand in insn.operands:
+        if operand[OPERAND_TYPE] == OPERAND_TYPE_MEMORY and operand[MEMORY_OPERAND_SEGMENT] == "gs":
+            yield Characteristic("gs access"), insn.address
+
+        if operand[OPERAND_TYPE] == OPERAND_TYPE_MEMORY and operand[MEMORY_OPERAND_SEGMENT] == "fs":
+            yield Characteristic("fs access"), insn.address
+
+
+def get_section(xtor, va):
+    pe = get_pefile(xtor)
+
+    for i, section in enumerate(pe.sections):
+        section_start = pe.OPTIONAL_HEADER.ImageBase + section.VirtualAddress
+        section_end = pe.OPTIONAL_HEADER.ImageBase + section.VirtualAddress + section.Misc_VirtualSize
+
+        if section_start <= va < section_end:
+            return i
+
+    raise ValueError("invalid address")
+
+
+def extract_insn_cross_section_cflow(xtor, f, bb, insn):
+    """
+    inspect the instruction for a CALL or JMP that crosses section boundaries.
+    """
+    if insn.mnemonic not in FLOW_MNEMONICS:
+        return
+
+    try:
+        target = get_operand_target(insn, insn.operands[0])
+    except ValueError:
+        return
+
+    if target in get_imports(xtor):
+        return
+
+    try:
+        if get_section(xtor, insn.address) != get_section(xtor, target):
+            yield Characteristic("cross section flow"), insn.address
+    except ValueError:
+        return
+
+
+def extract_function_calls_from(xtor, f, bb, insn):
+    cg = get_call_graph(xtor.ws)
+
+    for callee in cg.calls_from.get(insn.address, []):
+        yield Characteristic("calls from"), callee
+
+        if callee == f:
+            yield Characteristic("recursive call"), insn.address
+
+    # lancelot doesn't count API calls when constructing the call graph
+    # so we still have to scan for calls to an import
+    if insn.mnemonic != "call":
+        return
+
+    try:
+        target = get_operand_target(insn, insn.operands[0])
+    except ValueError:
+        return
+
+    imports = get_imports(xtor)
+    if target in imports:
+        yield Characteristic("calls from"), target
+
+
+# this is a feature that's most relevant at the function or basic block scope,
+# however, its most efficient to extract at the instruction scope.
+def extract_function_indirect_call_characteristic_features(xtor, f, bb, insn):
+    """
+    extract indirect function call characteristic (e.g., call eax or call dword ptr [edx+4])
+    does not include calls like => call ds:dword_ABD4974
+    """
+    if insn.mnemonic != "call":
+        return
+
+    op0 = insn.operands[0]
+    if op0[OPERAND_TYPE] == OPERAND_TYPE_REGISTER:
+        yield Characteristic("indirect call"), insn.address
+    elif op0[OPERAND_TYPE] == OPERAND_TYPE_MEMORY and op0[MEMORY_OPERAND_BASE] is not None:
+        yield Characteristic("indirect call"), insn.address
+    elif op0[OPERAND_TYPE] == OPERAND_TYPE_MEMORY and op0[MEMORY_OPERAND_INDEX] is not None:
+        yield Characteristic("indirect call"), insn.address
+
+
+_not_implemented = set([])
+
+
+def extract_insn_features(xtor, f, bb, insn):
+    for insn_handler in INSTRUCTION_HANDLERS:
+        try:
+            for feature, va in insn_handler(xtor, f, bb, insn):
+                yield feature, va
+        except NotImplementedError:
+            if insn_handler.__name__ not in _not_implemented:
+                logger.warning("not implemented: %s", insn_handler.__name__)
+                _not_implemented.add(insn_handler.__name__)
+
+
+INSTRUCTION_HANDLERS = (
+    extract_insn_api_features,
+    extract_insn_number_features,
+    extract_insn_string_features,
+    extract_insn_bytes_features,
+    extract_insn_offset_features,
+    extract_insn_nzxor_characteristic_features,
+    extract_insn_mnemonic_features,
+    extract_insn_peb_access_characteristic_features,
+    extract_insn_cross_section_cflow,
+    extract_insn_segment_access_features,
+    extract_function_calls_from,
+    extract_function_indirect_call_characteristic_features,
+)
--- a/capa/features/extractors/viv/init.py
+++ b/capa/features/extractors/viv/init.py
@@ -8,11 +8,7 @@

 import types

-import file
-import insn
-import function
 import viv_utils
-import basicblock

 import capa.features.extractors
 import capa.features.extractors.viv.file
--- a/capa/features/extractors/viv/function.py
+++ b/capa/features/extractors/viv/function.py
@@ -25,45 +25,6 @@ def interface_extract_function_XXX(f):
    yield NotImplementedError("feature"), NotImplementedError("virtual address")


-def get_switches(vw):
-    """
-    caching accessor to vivisect workspace switch constructs.
-    """
-    if "switches" in vw.metadata:
-        return vw.metadata["switches"]
-    else:
-        # addresses of switches in the program
-        switches = set()
-
-        for case_va, _ in filter(lambda t: "case" in t[1], vw.getNames()):
-            # assume that the xref to a case location is a switch construct
-            for switch_va, _, _, _ in vw.getXrefsTo(case_va):
-                switches.add(switch_va)
-
-        vw.metadata["switches"] = switches
-        return switches
-
-
-def get_functions_with_switch(vw):
-    if "functions_with_switch" in vw.metadata:
-        return vw.metadata["functions_with_switch"]
-    else:
-        functions = set()
-        for switch in get_switches(vw):
-            functions.add(vw.getFunction(switch))
-        vw.metadata["functions_with_switch"] = functions
-        return functions
-
-
-def extract_function_switch(f):
-    """
-    parse if a function contains a switch statement based on location names
-    method can be optimized
-    """
-    if f.va in get_functions_with_switch(f.vw):
-        yield Characteristic("switch"), f.va
-
-
 def extract_function_calls_to(f):
    for src, _, _, _ in f.vw.getXrefsTo(f.va, rtype=vivisect.const.REF_CODE):
        yield Characteristic("calls to"), src
@@ -106,4 +67,4 @@ def extract_features(f):
            yield feature, va


-FUNCTION_HANDLERS = (extract_function_switch, extract_function_calls_to, extract_function_loop)
+FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop)
--- a/capa/features/extractors/viv/insn.py
+++ b/capa/features/extractors/viv/insn.py
@@ -128,10 +128,13 @@ def extract_insn_number_features(f, bb, insn):
    #     push    3136B0h         ; dwControlCode
    for oper in insn.opers:
        # this is for both x32 and x64
-        if not isinstance(oper, envi.archs.i386.disasm.i386ImmOper):
+        if not isinstance(oper, (envi.archs.i386.disasm.i386ImmOper, envi.archs.i386.disasm.i386ImmMemOper)):
            continue

-        v = oper.getOperValue(oper)
+        if isinstance(oper, envi.archs.i386.disasm.i386ImmOper):
+            v = oper.getOperValue(oper)
+        else:
+            v = oper.getOperAddr(oper)

        if f.vw.probeMemory(v, 1, envi.memory.MM_READ):
            # this is a valid address
@@ -162,7 +165,12 @@ def derefs(vw, p):
            return
        yield p

-        next = vw.readMemoryPtr(p)
+        try:
+            next = vw.readMemoryPtr(p)
+        except Exception:
+            # if not enough bytes can be read, such as end of the section.
+            # unfortunately, viv returns a plain old generic `Exception` for this.
+            return

        # sanity: pointer points to self
        if next == p:
@@ -390,7 +398,9 @@ def extract_insn_peb_access_characteristic_features(f, bb, insn):
    if insn.mnem not in ["push", "mov"]:
        return

-    if "fs" in insn.getPrefixName():
+    prefix = insn.getPrefixName()
+
+    if "fs" in prefix:
        for oper in insn.opers:
            # examples
            #
@@ -403,10 +413,12 @@ def extract_insn_peb_access_characteristic_features(f, bb, insn):
                isinstance(oper, envi.archs.i386.disasm.i386ImmMemOper) and oper.imm == 0x30
            ):
                yield Characteristic("peb access"), insn.va
-    elif "gs" in insn.getPrefixName():
+    elif "gs" in prefix:
        for oper in insn.opers:
-            if (isinstance(oper, envi.archs.amd64.disasm.i386RegMemOper) and oper.disp == 0x60) or (
-                isinstance(oper, envi.archs.amd64.disasm.i386ImmMemOper) and oper.imm == 0x60
+            if (
+                (isinstance(oper, envi.archs.amd64.disasm.i386RegMemOper) and oper.disp == 0x60)
+                or (isinstance(oper, envi.archs.amd64.disasm.i386SibOper) and oper.imm == 0x60)
+                or (isinstance(oper, envi.archs.amd64.disasm.i386ImmMemOper) and oper.imm == 0x60)
            ):
                yield Characteristic("peb access"), insn.va
    else:
--- a/capa/features/freeze.py
+++ b/capa/features/freeze.py
@@ -101,7 +101,9 @@ def dumps(extractor):
            for feature, va in extractor.extract_basic_block_features(f, bb):
                ret["scopes"]["basic block"].append(serialize_feature(feature) + (hex(va), (hex(f), hex(bb),)))

-            for insn, insnva in sorted([(insn, int(insn)) for insn in extractor.get_instructions(f, bb)]):
+            for insnva, insn in sorted(
+                [(insn.__int__(), insn) for insn in extractor.get_instructions(f, bb)], key=lambda p: p[0]
+            ):
                ret["functions"][hex(f)][hex(bb)].append(hex(insnva))

                for feature, va in extractor.extract_insn_features(f, bb, insn):
@@ -245,12 +247,7 @@ def main(argv=None):
        logging.basicConfig(level=logging.INFO)
        logging.getLogger().setLevel(logging.INFO)

-    vw = capa.main.get_workspace(args.sample, args.format)
-
-    # don't import this at top level to support ida/py3 backend
-    import capa.features.extractors.viv
-
-    extractor = capa.features.extractors.viv.VivisectFeatureExtractor(vw, args.sample)
+    extractor = capa.main.get_extractor(args.sample, args.format)
    with open(args.output, "wb") as f:
        f.write(dump(extractor))

--- a/capa/features/insn.py
+++ b/capa/features/insn.py
@@ -24,7 +24,10 @@ class Number(Feature):
        super(Number, self).__init__(value, arch=arch, description=description)

    def get_value_str(self):
-        return "0x%X" % self.value
+        if self.value < 0:
+            return "-0x%X" % (-self.value)
+        else:
+            return "0x%X" % self.value


 class Offset(Feature):
--- a/capa/ida/explorer/model.py
+++ b/capa/ida/explorer/model.py
@@ -353,7 +353,7 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
            # TODO: do we display 'not'
            pass
        elif statement["type"] == "some":
-            return CapaExplorerDefaultItem(parent, statement["count"] + " or more")
+            return CapaExplorerDefaultItem(parent, str(statement["count"]) + " or more")
        elif statement["type"] == "range":
            # `range` is a weird node, its almost a hybrid of statement + feature.
            # it is a specific feature repeated multiple times.
@@ -528,7 +528,7 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
            if feature[feature["type"]] in ("embedded pe",):
                return CapaExplorerByteViewItem(parent, display, location)

-            if feature[feature["type"]] in ("loop", "recursive call", "tight loop", "switch"):
+            if feature[feature["type"]] in ("loop", "recursive call", "tight loop"):
                return CapaExplorerFeatureItem(parent, display=display)

            # default to instruction view for all other characteristics
@@ -546,7 +546,17 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
        if feature["type"] == "basicblock":
            return CapaExplorerBlockItem(parent, location)

-        if feature["type"] in ("bytes", "api", "mnemonic", "number", "offset"):
+        if feature["type"] in (
+            "bytes",
+            "api",
+            "mnemonic",
+            "number",
+            "offset",
+            "number/x32",
+            "number/x64",
+            "offset/x32",
+            "offset/x64",
+        ):
            # display instruction preview
            return CapaExplorerInstructionViewItem(parent, display, location)

--- a/capa/main.py
+++ b/capa/main.py
@@ -18,6 +18,7 @@ import datetime
 import textwrap
 import collections

+import halo
 import tqdm
 import colorama

@@ -31,7 +32,7 @@ import capa.features.extractors
 from capa.helpers import oint, get_file_taste

 RULES_PATH_DEFAULT_STRING = "(embedded rules)"
-SUPPORTED_FILE_MAGIC = set(["MZ"])
+SUPPORTED_FILE_MAGIC = set([b"MZ"])


 logger = logging.getLogger("capa")
@@ -106,7 +107,7 @@ def find_capabilities(ruleset, extractor, disable_progress=None):

    meta = {"feature_counts": {"file": 0, "functions": {},}}

-    for f in tqdm.tqdm(extractor.get_functions(), disable=disable_progress, unit=" functions"):
+    for f in tqdm.tqdm(list(extractor.get_functions()), disable=disable_progress, desc="matching", unit=" functions"):
        function_matches, bb_matches, feature_count = find_function_capabilities(ruleset, extractor, f)
        meta["feature_counts"]["functions"][f.__int__()] = feature_count
        logger.debug("analyzed function 0x%x and extracted %d features", f.__int__(), feature_count)
@@ -269,16 +270,17 @@ def get_workspace(path, format, should_save=True):
    return vw


-def get_extractor_py2(path, format):
+def get_extractor_py2(path, format, disable_progress=False):
    import capa.features.extractors.viv

-    vw = get_workspace(path, format, should_save=False)
+    with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress):
+        vw = get_workspace(path, format, should_save=False)

-    try:
-        vw.saveWorkspace()
-    except IOError:
-        # see #168 for discussion around how to handle non-writable directories
-        logger.info("source directory is not writable, won't save intermediate workspace")
+        try:
+            vw.saveWorkspace()
+        except IOError:
+            # see #168 for discussion around how to handle non-writable directories
+            logger.info("source directory is not writable, won't save intermediate workspace")

    return capa.features.extractors.viv.VivisectFeatureExtractor(vw, path)

@@ -287,19 +289,36 @@ class UnsupportedRuntimeError(RuntimeError):
    pass


-def get_extractor_py3(path, format):
-    raise UnsupportedRuntimeError()
+def get_extractor_py3(path, format, disable_progress=False):
+    try:
+        import lancelot
+
+        import capa.features.extractors.lancelot
+    except ImportError:
+        logger.warning("lancelot not installed")
+        raise UnsupportedRuntimeError()
+
+    if format not in ("pe", "auto"):
+        raise UnsupportedFormatError(format)
+
+    if not is_supported_file_type(path):
+        raise UnsupportedFormatError()
+
+    with open(path, "rb") as f:
+        buf = f.read()
+
+    return capa.features.extractors.lancelot.LancelotFeatureExtractor(buf)


-def get_extractor(path, format):
+def get_extractor(path, format, disable_progress=False):
    """
    raises:
      UnsupportedFormatError:
    """
    if sys.version_info >= (3, 0):
-        return get_extractor_py3(path, format)
+        return get_extractor_py3(path, format, disable_progress=disable_progress)
    else:
-        return get_extractor_py2(path, format)
+        return get_extractor_py2(path, format, disable_progress=disable_progress)


 def is_nursery_rule_path(path):
@@ -315,7 +334,7 @@ def is_nursery_rule_path(path):
    return "nursery" in path


-def get_rules(rule_path):
+def get_rules(rule_path, disable_progress=False):
    if not os.path.exists(rule_path):
        raise IOError("rule path %s does not exist or cannot be accessed" % rule_path)

@@ -343,7 +362,8 @@ def get_rules(rule_path):
                rule_paths.append(rule_path)

    rules = []
-    for rule_path in rule_paths:
+
+    for rule_path in tqdm.tqdm(list(rule_paths), disable=disable_progress, desc="loading ", unit="     rules"):
        try:
            rule = capa.rules.Rule.from_yaml_file(rule_path)
        except capa.rules.InvalidRule:
@@ -526,7 +546,7 @@ def main(argv=None):
        logger.debug("using rules path: %s", rules_path)

    try:
-        rules = get_rules(rules_path)
+        rules = get_rules(rules_path, disable_progress=args.quiet)
        rules = capa.rules.RuleSet(rules)
        logger.debug("successfully loaded %s rules", len(rules))
        if args.tag:
@@ -546,7 +566,7 @@ def main(argv=None):
    else:
        format = args.format
        try:
-            extractor = get_extractor(args.sample, args.format)
+            extractor = get_extractor(args.sample, args.format, disable_progress=args.quiet)
        except UnsupportedFormatError:
            logger.error("-" * 80)
            logger.error(" Input file does not appear to be a PE file.")
--- a/capa/rules.py
+++ b/capa/rules.py
@@ -69,7 +69,6 @@ SUPPORTED_FEATURES = {
    FUNCTION_SCOPE: {
        # plus basic block scope features, see below
        capa.features.basicblock.BasicBlock,
-        capa.features.Characteristic("switch"),
        capa.features.Characteristic("calls from"),
        capa.features.Characteristic("calls to"),
        capa.features.Characteristic("loop"),
--- a/capa/unpack/init.py
+++ b/capa/unpack/init.py
@@ -0,0 +1,47 @@
+import sys
+import logging
+
+try:
+    from functools import lru_cache
+except ImportError:
+    from backports.functools_lru_cache import lru_cache
+
+logger = logging.getLogger(__name__)
+
+
+class NotPackedError(ValueError):
+    def __init__(self):
+        super(NotPackedError, self).__init__("not packed")
+
+
+def can_unpack():
+    # the unpacking backend is based on Speakeasy, which supports python 3.6+
+    return sys.version_info >= (3, 6)
+
+
+@lru_cache
+def get_unpackers():
+    # break import loop
+    import capa.unpack.aspack
+
+    return {p.name: p for p in [capa.unpack.aspack.AspackUnpacker]}
+
+
+def detect_packer(buf):
+    for unpacker in get_unpackers().values():
+        if unpacker.is_packed(buf):
+            return unpacker.name
+
+    raise NotPackedError()
+
+
+def is_packed(buf):
+    try:
+        detect_packer(buf)
+        return True
+    except NotPackedError:
+        return False
+
+
+def unpack_pe(packer, buf):
+    return get_unpackers()[packer].unpack_pe(buf)
--- a/capa/unpack/aspack.py
+++ b/capa/unpack/aspack.py
@@ -0,0 +1,459 @@
+import io
+import struct
+import logging
+import contextlib
+import collections
+
+import pefile
+import speakeasy
+import speakeasy.common as se_common
+import speakeasy.profiler
+import speakeasy.windows.objman
+
+logger = logging.getLogger(__name__)
+
+
+def pefile_get_section_by_name(pe, section_name):
+    for section in pe.sections:
+        try:
+            if section.Name.partition(b"\x00")[0].decode("ascii") == section_name:
+                return section
+        except:
+            continue
+    raise ValueError("section not found")
+
+
+def prepare_emu_context(se, module):
+    """
+    prepare an Speakeasy instance for emulating the given module, without running it.
+
+    this is useful when planning to manually control the emulator,
+    such as via `Speakeasy.emu.emu_eng.start(...)`.
+    typically, Speakeasy expects to do "Run based" analysis,
+    which doesn't give us too much control.
+
+    much of this was derived from win32::Win32Emulator::run_module.
+    hopefully this can eventually be merged into Speakeasy.
+
+    args:
+      se (speakeasy.Speakeasy): the instance to prepare
+      module (speakeasy.Module): the module that will be emulated
+    """
+    se._init_hooks()
+
+    main_exe = None
+    if not module.is_exe():
+        container = se.emu.init_container_process()
+        if container:
+            se.emu.processes.append(container)
+            se.emu.curr_process = container
+    else:
+        main_exe = module
+
+    if main_exe:
+        se.emu.user_modules = [main_exe] + se.emu.user_modules
+
+    # Create an empty process object for the module if none is supplied
+    if len(se.emu.processes) == 0:
+        p = speakeasy.windows.objman.Process(se.emu, path=module.get_emu_path(), base=module.base, pe=module)
+        se.emu.curr_process = p
+
+    t = speakeasy.windows.objman.Thread(se.emu, stack_base=se.emu.stack_base, stack_commit=module.stack_commit)
+
+    se.emu.om.objects.update({t.address: t})
+    se.emu.curr_process.threads.append(t)
+    se.emu.curr_thread = t
+
+    peb = se.emu.alloc_peb(se.emu.curr_process)
+    se.emu.init_teb(t, peb)
+
+
+INSN_PUSHA = 0x60
+INSN_POPA = 0x61
+
+
+class AspackUnpacker(speakeasy.Speakeasy):
+    name = "aspack"
+
+    def __init__(self, buf, debug=False):
+        super(AspackUnpacker, self).__init__(debug=debug)
+        self.module = self.load_module(data=buf)
+        prepare_emu_context(self, self.module)
+
+    @staticmethod
+    def detect_aspack(buf):
+        """
+        return True if the given buffer contains an ASPack'd PE file.
+        we detect aspack by looking at the section names for .aspack.
+        the unpacking routine contains further validation and will raise an exception if necessary.
+
+        args:
+          buf (bytes): the contents of a PE file.
+
+        returns: bool
+        """
+        try:
+            pe = pefile.PE(data=buf, fast_load=True)
+        except:
+            return False
+
+        try:
+            pefile_get_section_by_name(pe, ".aspack")
+        except ValueError:
+            pass
+        else:
+            return True
+
+        return False
+
+    @classmethod
+    def unpack_pe(cls, buf):
+        """
+        unpack the given buffer that contains an ASPack'd PE file.
+        return the contents of a reconstructed PE file.
+
+        args:
+          buf (bytes): the contents of an ASPack'd PE file.
+
+        returns: bytes
+        """
+        unpacker = cls(buf)
+        return unpacker.unpack()
+
+    def stepi(self):
+        self.emu.emu_eng.start(self.emu.get_pc(), count=1)
+
+    def remove_hook(self, hook_type, hook_handle):
+        # TODO: this should be part of speakeasy
+        self.emu.hooks[hook_type].remove(hook_handle)
+        self.emu.emu_eng.hook_remove(hook_handle.handle)
+
+    def remove_mem_read_hook(self, hook_handle):
+        # TODO: this should be part of speakeasy
+        self.remove_hook(se_common.HOOK_MEM_READ, hook_handle)
+
+    @contextlib.contextmanager
+    def mem_read_hook(self, hook):
+        """
+        context manager for temporarily installing a hook on the emulator.
+
+        example:
+
+            with self.mem_read_hook(lambda emu, access, addr, size, ctx: emu.stop()):
+                self.emu.emu_eng.start(0x401000)
+
+        args:
+          hook (speakeasy.common.MemReadHook): the hook to install
+        """
+        handle = self.add_mem_read_hook(hook)
+        # if this fails, then there's still an unfixed bug in Speakeasy
+        assert handle.handle != 0
+        try:
+            yield
+        finally:
+            self.remove_mem_read_hook(handle)
+
+    def remove_code_hook(self, hook_handle):
+        # TODO: this should be part of speakeasy
+        self.remove_hook(se_common.HOOK_CODE, hook_handle)
+
+    @contextlib.contextmanager
+    def code_hook(self, hook):
+        """
+        context manager for temporarily installing a hook on the emulator.
+
+        example:
+
+            with self.code_hook(lambda emu, addr, size, ctx: emu.stop()):
+                self.emu.emu_eng.start(0x401000)
+
+        args:
+          hook (speakeasy.common.CodeHook): the hook to install
+        """
+        handle = self.add_code_hook(hook)
+        assert handle.handle != 0
+        try:
+            yield
+        finally:
+            self.remove_code_hook(handle)
+
+    def read_ptr(self, va):
+        endian = "little"
+        val = self.mem_read(va, self.emu.ptr_size)
+        return int.from_bytes(val, endian)
+
+    def dump(self):
+        """
+        emulate the loaded module, pausing after an appropriate section hop.
+        then, dump and return the module's memory and OEP.
+
+        this routine is specific to aspack. it makes the following assumptions:
+          - aspack starts with a PUSHA to save off the CPU context
+          - aspeck then runs its unpacking stub
+          - aspeck executes POPA to restore the CPU context
+          - aspack section hops to the OEP
+
+        we'll emulate in a few phases:
+          1. single step over PUSHA at the entrypoint
+          2. extract the address of the saved CPU context
+          3. emulate until the saved CPU context is read
+          4. assert this is a POPA instruction
+          5. emulate until a section hop
+          6. profit!
+
+        return the module's memory segment and the OEP.
+
+        returns: Tuple[byte, int]
+        """
+
+        # prime the emulator.
+        # this is derived from winemu::WindowsEmulator::start()
+        self.emu.curr_run = speakeasy.profiler.Run()
+        self.emu.curr_mod = self.module
+        self.emu.set_hooks()
+        self.emu._set_emu_hooks()
+
+        # 0. sanity checking: assert entrypoint is a PUSHA instruction
+        entrypoint = self.module.base + self.module.ep
+        opcode = self.emu.mem_read(entrypoint, 1)[0]
+        if opcode != INSN_PUSHA:
+            raise ValueError("not packed with supported ASPack")
+
+        # 1. single step over PUSHA
+        self.emu.set_pc(entrypoint)
+        self.stepi()
+
+        # 2. extract address of saved CPU context
+        saved_cpu_context = self.emu.get_stack_ptr()
+
+        # 3. emulate until saved CPU context is accessed
+        def until_read(target):
+            """return a mem_read hook that stops the emulator when an address is read."""
+
+            def inner(emu, _access, addr, _size, _value, _ctx):
+                if addr == target:
+                    emu.stop()
+                return True
+
+            return inner
+
+        with self.mem_read_hook(until_read(saved_cpu_context)):
+            self.emu.emu_eng.start(self.emu.get_pc())
+
+        # 4. assert this is a POPA instruction
+        opcode = self.emu.mem_read(self.emu.get_pc(), 1)[0]
+        if opcode != INSN_POPA:
+            raise ValueError("not packed with supported ASPack")
+        logger.debug("POPA: 0x%x", self.emu.get_pc())
+
+        # 5. emulate until a section hop
+        aspack_section = self.module.get_section_by_name(".aspack")
+        start = self.module.base + aspack_section.VirtualAddress
+        end = start + aspack_section.Misc_VirtualSize
+
+        def until_section_hop(start, end):
+            def inner(emu, addr, _size, _ctx):
+                if addr < start or addr >= end:
+                    emu.stop()
+                return True
+
+            return inner
+
+        with self.code_hook(until_section_hop(start, end)):
+            self.emu.emu_eng.start(self.emu.get_pc())
+
+        # 6. dump and return
+        oep = self.emu.get_pc()
+        logger.debug("OEP: 0x%x", oep)
+
+        mm = self.get_address_map(self.module.base)
+        buf = self.mem_read(mm.base, mm.size)
+
+        return buf, oep
+
+    def fixup(self, buf, oep):
+        """
+        fixup a PE image that's been dumped from memory after unpacking aspack.
+
+        there are two big fixes that need to happen:
+          1. update the section pointers and sizes
+          2. rebuild the import table
+
+        for (1) updating the section pointers, we'll just update the
+        physical pointers to match the virtual pointers, since this is a loaded image.
+
+        for (2) rebuilding the import table, we'll:
+          (a) inspect the emulation results for resolved imports, which tells us dll/symbol names
+          (b) scan the dumped image for the unpacked import thunks (Import Address Table/Thunk Table)
+          (c) match the import thunks with resolved imports
+          (d) build the import table structures
+          (e) write the reconstructed table into the .aspack section
+
+        since the .aspack section contains the unpacking stub, which is no longer used,
+        then we'll write the reconstructed IAT there. hopefully its big enough.
+        """
+        pe = pefile.PE(data=buf)
+
+        pe.OPTIONAL_HEADER.AddressOfEntryPoint = oep - self.module.base
+
+        # 1. update section pointers and sizes.
+        for section in pe.sections:
+            section.PointerToRawData = section.VirtualAddress
+            section.SizeOfRawData = section.Misc_VirtualSize
+
+        # 2. rebuild the import table
+
+        # place the reconstructed import table in the .aspack section (unpacking stub)
+        reconstruction_target = pefile_get_section_by_name(pe, ".aspack").VirtualAddress
+
+        # mapping from import pointer to (dll name, symbol name).
+        # the import pointer is generated by speakeasy and is not mapped.
+        # it often looks something like 0xfeedf008.
+        # as we encounter pointers with values like this, we can resolve the symbol.
+        imports = {}
+
+        # 2a. find resolved imports
+        for addr, (dll, sym) in self.module.import_table.items():
+            # these are items in the original import table.
+            logger.debug(f"found static import  {dll}.{sym}")
+            imports[addr] = (dll, sym)
+        for (addr, dll, sym) in self.emu.dyn_imps:
+            # these are imports that have been resolved at runtime by the unpacking stub.
+            logger.debug(f"found dynamic import {dll}.{sym}")
+            imports[addr] = (dll, sym)
+
+        # 2b. find the existing thunk tables
+        # these are pointer-aligned tables of import pointers.
+        # in my test sample, its found at the start of the first section.
+
+        # ordered list of tuples (VA, import pointer)
+        # look up the symbol using the import pointer and the `imports` mapping.
+        thunks = []
+
+        # scan from the start of the first section
+        # until we reach values that don't look like thunk tables.
+        for va in range(pe.sections[0].VirtualAddress + self.module.base, 0xFFFFFFFFFFFFFFFF, self.emu.ptr_size):
+            ptr = self.read_ptr(va)
+            if ptr == 0:
+                # probably padding/terminating entry
+                continue
+
+            if ptr in imports:
+                thunks.append((va, ptr,))
+                logger.debug(f"found import thunk at {va:08x} to {ptr:08x} for {imports[ptr][0]}\t{imports[ptr][1]}")
+                continue
+
+            # otherwise, at the end of the thunk tables
+            break
+
+        # collect the thunk entries into contiguous tables, grouped by dll name.
+        #
+        # list of thunk tuples that are contiguous and have the same dll name:
+        #   (VA, import pointer, dll name, symbol name)
+        curr_idt_table = []
+        # list of list of thunk tuples, like above
+        idt_tables = []
+        for thunk in thunks:
+            va, imp = thunk
+            dll, sym = imports[imp]
+
+            if not curr_idt_table:
+                curr_idt_table.append((va, imp, dll, sym))
+            elif curr_idt_table[0][2] == dll:
+                curr_idt_table.append((va, imp, dll, sym))
+            else:
+                idt_tables.append(curr_idt_table)
+                curr_idt_table = [(va, imp, dll, sym)]
+        idt_tables.append(curr_idt_table)
+
+        # 2d. build the import table structures
+
+        # mapping from the data identifier to its RVA (which will be found within the reconstruction blob)
+        locations = {}
+        # the raw bytes of the reconstructed import structures.
+        # it will have the following layout:
+        #   1. DLL name strings and Hint/Name table entries
+        #   2. Import Lookup Tables (points into (1))
+        #   3. Import Directory Tables (points into (1), (2), and original Thunk Tables)
+        reconstruction = io.BytesIO()
+
+        # list of dll names
+        dlls = list(sorted(set(map(lambda pair: pair[0], imports.values()))))
+        # mapping from dll name to list of symbols
+        symbols = collections.defaultdict(set)
+        for dll, sym in imports.values():
+            symbols[dll].add(sym)
+
+        # emit strings into the reconstruction blob
+        for dll in dlls:
+            locations[("dll", dll)] = reconstruction_target + reconstruction.tell()
+            reconstruction.write(dll.encode("ascii") + b"\x00")
+            if reconstruction.tell() % 2 == 1:
+                # padding
+                reconstruction.write(b"\x00")
+
+            for sym in sorted(symbols[dll]):
+                locations[("hint", dll, sym)] = reconstruction_target + reconstruction.tell()
+                # export name pointer table hint == 0
+                reconstruction.write(b"\x00\x00")
+                # name
+                reconstruction.write(sym.encode("ascii") + b"\x00")
+                if reconstruction.tell() % 2 == 1:
+                    # padding
+                    reconstruction.write(b"\x00")
+
+        # emit Import Lookup Tables for each recovered thunk table
+        ptr_format = "<I" if self.emu.ptr_size == 4 else "<Q"
+        for i, idt_entry in enumerate(idt_tables):
+            locations[("import lookup table", i)] = reconstruction_target + reconstruction.tell()
+            for (va, imp, dll, sym) in idt_entry:
+                reconstruction.write(struct.pack(ptr_format, locations[("hint", dll, sym)]))
+            reconstruction.write(b"\x00" * 8)
+
+        # emit Import Descriptor Tables for each recovered thunk table
+        IDT_ENTRY_SIZE = 0x20
+        for i, idt_entry in enumerate(idt_tables):
+            va, _, dll, _ = idt_entry[0]
+            rva = va - self.module.base
+            locations[("import descriptor table", i)] = reconstruction_target + reconstruction.tell()
+
+            # import lookup table rva
+            reconstruction.write(struct.pack("<I", locations[("import lookup table", i)]))
+            # date stamp
+            reconstruction.write(struct.pack("<I", 0x0))
+            # forwarder chain
+            reconstruction.write(struct.pack("<I", 0x0))
+            # name rva
+            reconstruction.write(struct.pack("<I", locations[("dll", dll)]))
+            # import address table rva
+            reconstruction.write(struct.pack("<I", rva))
+        # empty last entry
+        reconstruction.write(b"\x00" * IDT_ENTRY_SIZE)
+
+        # if the reconstructed import structures are larger than the unpacking stub...
+        # i'm not sure what we'll do. probably need to add a section.
+        assert len(reconstruction.getvalue()) <= pefile_get_section_by_name(pe, ".aspack").Misc_VirtualSize
+
+        pe.set_bytes_at_rva(reconstruction_target, reconstruction.getvalue())
+        pe.OPTIONAL_HEADER.DATA_DIRECTORY[1].VirtualAddress = locations[("import descriptor table", 0)]
+        pe.OPTIONAL_HEADER.DATA_DIRECTORY[1].Size = IDT_ENTRY_SIZE * len(idt_tables)
+
+        return pe.write()
+
+    def unpack(self):
+        buf, oep = self.dump()
+        buf = self.fixup(buf, oep)
+        return buf
+
+
+if __name__ == "__main__":
+    import sys
+
+    input = sys.argv[1]
+    output = sys.argv[1]
+
+    with open(sys.argv[1], "rb") as f:
+        buf = f.read()
+
+    with open(sys.argv[2], "wb") as f:
+        f.write(AspackUnpacker.unpack_pe(buf))
--- a/2
+++ b/2
--- a/scripts/import-to-bn.py
+++ b/scripts/import-to-bn.py
@@ -1,112 +1,112 @@
-"""
-Binary Ninja plugin that imports a capa report,
-produced via `capa --json /path/to/sample`,
-into the current database.
-
-It will mark up functions with their capa matches, like:
-
-    ; capa: print debug messages (host-interaction/log/debug/write-event)
-    ; capa: delete service (host-interaction/service/delete)
-    ; Attributes: bp-based frame
-
-    public UninstallService
-    UninstallService proc near
-    ...
-
-To use, invoke from the Binary Ninja Tools menu, or from the 
-command-palette.
-
-Adapted for Binary Ninja by @psifertex
-
-This script will verify that the report matches the workspace.
-Check the log window for any errors, and/or the summary of changes.
-
-Derived from: https://github.com/fireeye/capa/blob/master/scripts/import-to-ida.py
-"""
-import os
-import json
-
-from binaryninja import *
-
-
-def append_func_cmt(bv, va, cmt):
-    """
-    add the given comment to the given function, 
-    if it doesn't already exist.
-    """
-    func = bv.get_function_at(va)
-    if not func:
-        raise ValueError("not a function")
-
-    if cmt in func.comment:
-        return
-
-    func.comment = func.comment + "\n" + cmt
-
-
-def load_analysis(bv):
-    shortname = os.path.splitext(os.path.basename(bv.file.filename))[0]
-    dirname = os.path.dirname(bv.file.filename)
-    log_info(f"dirname: {dirname}\nshortname: {shortname}\n")
-    if os.access(os.path.join(dirname, shortname + ".js"), os.R_OK):
-        path = os.path.join(dirname, shortname + ".js")
-    elif os.access(os.path.join(dirname, shortname + ".json"), os.R_OK):
-        path = os.path.join(dirname, shortname + ".json")
-    else:
-        path = interaction.get_open_filename_input("capa report:", "JSON (*.js *.json);;All Files (*)")
-    if not path or not os.access(path, os.R_OK):
-        log_error("Invalid filename.")
-        return 0
-    log_info("Using capa file %s" % path)
-
-    with open(path, "rb") as f:
-        doc = json.loads(f.read().decode("utf-8"))
-
-    if "meta" not in doc or "rules" not in doc:
-        log_error("doesn't appear to be a capa report")
-        return -1
-
-    a = doc["meta"]["sample"]["md5"].lower()
-    md5 = Transform["MD5"]
-    rawhex = Transform["RawHex"]
-    b = rawhex.encode(md5.encode(bv.parent_view.read(bv.parent_view.start, bv.parent_view.end))).decode("utf-8")
-    if not a == b:
-        log_error("sample mismatch")
-        return -2
-
-    rows = []
-    for rule in doc["rules"].values():
-        if rule["meta"].get("lib"):
-            continue
-        if rule["meta"].get("capa/subscope"):
-            continue
-        if rule["meta"]["scope"] != "function":
-            continue
-
-        name = rule["meta"]["name"]
-        ns = rule["meta"].get("namespace", "")
-        for va in rule["matches"].keys():
-            va = int(va)
-            rows.append((ns, name, va))
-
-    # order by (namespace, name) so that like things show up together
-    rows = sorted(rows)
-    for ns, name, va in rows:
-        if ns:
-            cmt = "%s (%s)" % (name, ns)
-        else:
-            cmt = "%s" % (name,)
-
-        log_info("0x%x: %s" % (va, cmt))
-        try:
-            # message will look something like:
-            #
-            #     capa: delete service (host-interaction/service/delete)
-            append_func_cmt(bv, va, "capa: " + cmt)
-        except ValueError:
-            continue
-
-    log_info("ok")
-
-
-PluginCommand.register("Load capa file", "Loads an analysis file from capa", load_analysis)
+"""
+Binary Ninja plugin that imports a capa report,
+produced via `capa --json /path/to/sample`,
+into the current database.
+
+It will mark up functions with their capa matches, like:
+
+    ; capa: print debug messages (host-interaction/log/debug/write-event)
+    ; capa: delete service (host-interaction/service/delete)
+    ; Attributes: bp-based frame
+
+    public UninstallService
+    UninstallService proc near
+    ...
+
+To use, invoke from the Binary Ninja Tools menu, or from the 
+command-palette.
+
+Adapted for Binary Ninja by @psifertex
+
+This script will verify that the report matches the workspace.
+Check the log window for any errors, and/or the summary of changes.
+
+Derived from: https://github.com/fireeye/capa/blob/master/scripts/import-to-ida.py
+"""
+import os
+import json
+
+from binaryninja import *
+
+
+def append_func_cmt(bv, va, cmt):
+    """
+    add the given comment to the given function, 
+    if it doesn't already exist.
+    """
+    func = bv.get_function_at(va)
+    if not func:
+        raise ValueError("not a function")
+
+    if cmt in func.comment:
+        return
+
+    func.comment = func.comment + "\n" + cmt
+
+
+def load_analysis(bv):
+    shortname = os.path.splitext(os.path.basename(bv.file.filename))[0]
+    dirname = os.path.dirname(bv.file.filename)
+    log_info(f"dirname: {dirname}\nshortname: {shortname}\n")
+    if os.access(os.path.join(dirname, shortname + ".js"), os.R_OK):
+        path = os.path.join(dirname, shortname + ".js")
+    elif os.access(os.path.join(dirname, shortname + ".json"), os.R_OK):
+        path = os.path.join(dirname, shortname + ".json")
+    else:
+        path = interaction.get_open_filename_input("capa report:", "JSON (*.js *.json);;All Files (*)")
+    if not path or not os.access(path, os.R_OK):
+        log_error("Invalid filename.")
+        return 0
+    log_info("Using capa file %s" % path)
+
+    with open(path, "rb") as f:
+        doc = json.loads(f.read().decode("utf-8"))
+
+    if "meta" not in doc or "rules" not in doc:
+        log_error("doesn't appear to be a capa report")
+        return -1
+
+    a = doc["meta"]["sample"]["md5"].lower()
+    md5 = Transform["MD5"]
+    rawhex = Transform["RawHex"]
+    b = rawhex.encode(md5.encode(bv.parent_view.read(bv.parent_view.start, bv.parent_view.end))).decode("utf-8")
+    if not a == b:
+        log_error("sample mismatch")
+        return -2
+
+    rows = []
+    for rule in doc["rules"].values():
+        if rule["meta"].get("lib"):
+            continue
+        if rule["meta"].get("capa/subscope"):
+            continue
+        if rule["meta"]["scope"] != "function":
+            continue
+
+        name = rule["meta"]["name"]
+        ns = rule["meta"].get("namespace", "")
+        for va in rule["matches"].keys():
+            va = int(va)
+            rows.append((ns, name, va))
+
+    # order by (namespace, name) so that like things show up together
+    rows = sorted(rows)
+    for ns, name, va in rows:
+        if ns:
+            cmt = "%s (%s)" % (name, ns)
+        else:
+            cmt = "%s" % (name,)
+
+        log_info("0x%x: %s" % (va, cmt))
+        try:
+            # message will look something like:
+            #
+            #     capa: delete service (host-interaction/service/delete)
+            append_func_cmt(bv, va, "capa: " + cmt)
+        except ValueError:
+            continue
+
+    log_info("ok")
+
+
+PluginCommand.register("Load capa file", "Loads an analysis file from capa", load_analysis)
--- a/scripts/import-to-ida.py
+++ b/scripts/import-to-ida.py
@@ -1,117 +1,117 @@
-"""
-IDA Pro script that imports a capa report,
-produced via `capa --json /path/to/sample`,
-into the current database.
-
-It will mark up functions with their capa matches, like:
-
-    ; capa: print debug messages (host-interaction/log/debug/write-event)
-    ; capa: delete service (host-interaction/service/delete)
-    ; Attributes: bp-based frame
-
-    public UninstallService
-    UninstallService proc near
-    ...
-
-To use, invoke from the IDA Pro scripting dialog,
-such as via Alt-F9,
-and then select the existing capa report from the file system.
-
-This script will verify that the report matches the workspace.
-Check the output window for any errors, and/or the summary of changes.
-
-Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
-You may obtain a copy of the License at: [package root]/LICENSE.txt
-Unless required by applicable law or agreed to in writing, software distributed under the License
- is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and limitations under the License.
-"""
-import json
-import logging
-
-import idc
-import idautils
-import ida_idaapi
-import ida_kernwin
-
-logger = logging.getLogger("capa")
-
-
-def append_func_cmt(va, cmt, repeatable=False):
-    """
-    add the given comment to the given function, 
-    if it doesn't already exist.
-    """
-    func = ida_funcs.get_func(va)
-    if not func:
-        raise ValueError("not a function")
-
-    existing = ida_funcs.get_func_cmt(func, repeatable) or ""
-    if cmt in existing:
-        return
-
-    new = existing + "\n" + cmt
-    ida_funcs.set_func_cmt(func, new, repeatable)
-
-
-def main():
-    path = ida_kernwin.ask_file(False, "*", "capa report")
-    if not path:
-        return 0
-
-    with open(path, "rb") as f:
-        doc = json.loads(f.read().decode("utf-8"))
-
-    if "meta" not in doc or "rules" not in doc:
-        logger.error("doesn't appear to be a capa report")
-        return -1
-
-    # in IDA 7.4, the MD5 hash may be truncated, for example:
-    # wanted: 84882c9d43e23d63b82004fae74ebb61
-    # found: b'84882C9D43E23D63B82004FAE74EBB6\x00'
-    #
-    # see: https://github.com/idapython/bin/issues/11
-    a = doc["meta"]["sample"]["md5"].lower()
-    b = idautils.GetInputFileMD5().decode("ascii").lower().rstrip("\x00")
-    if not a.startswith(b):
-        logger.error("sample mismatch")
-        return -2
-
-    rows = []
-    for rule in doc["rules"].values():
-        if rule["meta"].get("lib"):
-            continue
-        if rule["meta"].get("capa/subscope"):
-            continue
-        if rule["meta"]["scope"] != "function":
-            continue
-
-        name = rule["meta"]["name"]
-        ns = rule["meta"].get("namespace", "")
-        for va in rule["matches"].keys():
-            va = int(va)
-            rows.append((ns, name, va))
-
-    # order by (namespace, name) so that like things show up together
-    rows = sorted(rows)
-    for ns, name, va in rows:
-        if ns:
-            cmt = "%s (%s)" % (name, ns)
-        else:
-            cmt = "%s" % (name,)
-
-        logger.info("0x%x: %s", va, cmt)
-        try:
-            # message will look something like:
-            #
-            #     capa: delete service (host-interaction/service/delete)
-            append_func_cmt(va, "capa: " + cmt, repeatable=False)
-        except ValueError:
-            continue
-
-    logger.info("ok")
-
-
-main()
+"""
+IDA Pro script that imports a capa report,
+produced via `capa --json /path/to/sample`,
+into the current database.
+
+It will mark up functions with their capa matches, like:
+
+    ; capa: print debug messages (host-interaction/log/debug/write-event)
+    ; capa: delete service (host-interaction/service/delete)
+    ; Attributes: bp-based frame
+
+    public UninstallService
+    UninstallService proc near
+    ...
+
+To use, invoke from the IDA Pro scripting dialog,
+such as via Alt-F9,
+and then select the existing capa report from the file system.
+
+This script will verify that the report matches the workspace.
+Check the output window for any errors, and/or the summary of changes.
+
+Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+You may obtain a copy of the License at: [package root]/LICENSE.txt
+Unless required by applicable law or agreed to in writing, software distributed under the License
+ is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and limitations under the License.
+"""
+import json
+import logging
+
+import idc
+import idautils
+import ida_idaapi
+import ida_kernwin
+
+logger = logging.getLogger("capa")
+
+
+def append_func_cmt(va, cmt, repeatable=False):
+    """
+    add the given comment to the given function, 
+    if it doesn't already exist.
+    """
+    func = ida_funcs.get_func(va)
+    if not func:
+        raise ValueError("not a function")
+
+    existing = ida_funcs.get_func_cmt(func, repeatable) or ""
+    if cmt in existing:
+        return
+
+    new = existing + "\n" + cmt
+    ida_funcs.set_func_cmt(func, new, repeatable)
+
+
+def main():
+    path = ida_kernwin.ask_file(False, "*", "capa report")
+    if not path:
+        return 0
+
+    with open(path, "rb") as f:
+        doc = json.loads(f.read().decode("utf-8"))
+
+    if "meta" not in doc or "rules" not in doc:
+        logger.error("doesn't appear to be a capa report")
+        return -1
+
+    # in IDA 7.4, the MD5 hash may be truncated, for example:
+    # wanted: 84882c9d43e23d63b82004fae74ebb61
+    # found: b'84882C9D43E23D63B82004FAE74EBB6\x00'
+    #
+    # see: https://github.com/idapython/bin/issues/11
+    a = doc["meta"]["sample"]["md5"].lower()
+    b = idautils.GetInputFileMD5().decode("ascii").lower().rstrip("\x00")
+    if not a.startswith(b):
+        logger.error("sample mismatch")
+        return -2
+
+    rows = []
+    for rule in doc["rules"].values():
+        if rule["meta"].get("lib"):
+            continue
+        if rule["meta"].get("capa/subscope"):
+            continue
+        if rule["meta"]["scope"] != "function":
+            continue
+
+        name = rule["meta"]["name"]
+        ns = rule["meta"].get("namespace", "")
+        for va in rule["matches"].keys():
+            va = int(va)
+            rows.append((ns, name, va))
+
+    # order by (namespace, name) so that like things show up together
+    rows = sorted(rows)
+    for ns, name, va in rows:
+        if ns:
+            cmt = "%s (%s)" % (name, ns)
+        else:
+            cmt = "%s" % (name,)
+
+        logger.info("0x%x: %s", va, cmt)
+        try:
+            # message will look something like:
+            #
+            #     capa: delete service (host-interaction/service/delete)
+            append_func_cmt(va, "capa: " + cmt, repeatable=False)
+        except ValueError:
+            continue
+
+    logger.info("ok")
+
+
+main()
--- a/setup.py
+++ b/setup.py
@@ -11,17 +11,20 @@ import sys

 import setuptools

-requirements = ["six", "tqdm", "pyyaml", "tabulate", "colorama", "termcolor", "ruamel.yaml", "wcwidth"]
+# halo==0.0.30 is the last version to support py2.7
+requirements = ["six", "tqdm", "pyyaml", "tabulate", "colorama", "termcolor", "ruamel.yaml", "wcwidth", "halo==0.0.30"]

 if sys.version_info >= (3, 0):
    # py3
    requirements.append("networkx")
+    requirements.append("pylancelot~=0.3.6")
 else:
    # py2
    requirements.append("enum34")
    requirements.append("vivisect @ https://github.com/williballenthin/vivisect/tarball/v0.0.20200804#egg=vivisect")
    requirements.append("viv-utils")
    requirements.append("networkx==2.2")  # v2.2 is last version supported by Python 2.7
+    requirements.append("backports.functools-lru-cache")

 # this sets __version__
 # via: http://stackoverflow.com/a/7071358/87207
@@ -52,7 +55,7 @@ setuptools.setup(
            "pycodestyle",
            "black ; python_version>'3.0'",
            "isort",
-        ]
+        ],
    },
    zip_safe=False,
    keywords="capa",
--- a/tests/data
+++ b/tests/data
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -7,79 +7,507 @@
 # See the License for the specific language governing permissions and limitations under the License.

 import os
+import sys
 import os.path
+import contextlib
 import collections

 import pytest
-import viv_utils
+
+import capa.main
+import capa.features.file
+import capa.features.insn
+import capa.features.basicblock
+from capa.features import ARCH_X32, ARCH_X64
+
+try:
+    from functools import lru_cache
+except ImportError:
+    from backports.functools_lru_cache import lru_cache
+

 CD = os.path.dirname(__file__)


-Sample = collections.namedtuple("Sample", ["vw", "path"])
+@contextlib.contextmanager
+def xfail(condition, reason=None):
+    """
+    context manager that wraps a block that is expected to fail in some cases.
+    when it does fail (and is expected), then mark this as pytest.xfail.
+    if its unexpected, raise an exception, so the test fails.
+
+    example::
+
+        # this test:
+        #  - passes on py3 if foo() works
+        #  - fails  on py3 if foo() fails
+        #  - xfails on py2 if foo() fails
+        #  - fails  on py2 if foo() works
+        with xfail(sys.version_info < (3, 0), reason="py2 doesn't foo"):
+            foo()
+    """
+    try:
+        # do the block
+        yield
+    except:
+        if condition:
+            # we expected the test to fail, so raise and register this via pytest
+            pytest.xfail(reason)
+        else:
+            # we don't expect an exception, so the test should fail
+            raise
+    else:
+        if not condition:
+            # here we expect the block to run successfully,
+            # and we've received no exception,
+            # so this is good
+            pass
+        else:
+            # we expected an exception, but didn't find one. that's an error.
+            raise RuntimeError("expected to fail, but didn't")
+
+
+@lru_cache()
+def get_viv_extractor(path):
+    import capa.features.extractors.viv
+
+    if "raw32" in path:
+        vw = capa.main.get_workspace(path, "sc32", should_save=False)
+    elif "raw64" in path:
+        vw = capa.main.get_workspace(path, "sc64", should_save=False)
+    else:
+        vw = capa.main.get_workspace(path, "auto", should_save=True)
+    return capa.features.extractors.viv.VivisectFeatureExtractor(vw, path)
+
+
+@lru_cache
+def get_lancelot_extractor(path):
+    import capa.features.extractors.lancelot
+
+    with open(path, "rb") as f:
+        buf = f.read()
+
+    return capa.features.extractors.lancelot.LancelotFeatureExtractor(buf)
+
+
+@lru_cache()
+def extract_file_features(extractor):
+    features = collections.defaultdict(set)
+    for feature, va in extractor.extract_file_features():
+        features[feature].add(va)
+    return features
+
+
+# f may not be hashable (e.g. ida func_t) so cannot @lru_cache this
+def extract_function_features(extractor, f):
+    features = collections.defaultdict(set)
+    for bb in extractor.get_basic_blocks(f):
+        for insn in extractor.get_instructions(f, bb):
+            for feature, va in extractor.extract_insn_features(f, bb, insn):
+                features[feature].add(va)
+        for feature, va in extractor.extract_basic_block_features(f, bb):
+            features[feature].add(va)
+    for feature, va in extractor.extract_function_features(f):
+        features[feature].add(va)
+    return features
+
+
+# f may not be hashable (e.g. ida func_t) so cannot @lru_cache this
+def extract_basic_block_features(extractor, f, bb):
+    features = collections.defaultdict(set)
+    for insn in extractor.get_instructions(f, bb):
+        for feature, va in extractor.extract_insn_features(f, bb, insn):
+            features[feature].add(va)
+    for feature, va in extractor.extract_basic_block_features(f, bb):
+        features[feature].add(va)
+    return features
+
+
+def get_data_path_by_name(name):
+    if name == "mimikatz":
+        return os.path.join(CD, "data", "mimikatz.exe_")
+    elif name == "kernel32":
+        return os.path.join(CD, "data", "kernel32.dll_")
+    elif name == "kernel32-64":
+        return os.path.join(CD, "data", "kernel32-64.dll_")
+    elif name == "pma12-04":
+        return os.path.join(CD, "data", "Practical Malware Analysis Lab 12-04.exe_")
+    elif name == "pma21-01":
+        return os.path.join(CD, "data", "Practical Malware Analysis Lab 21-01.exe_")
+    elif name == "al-khaser x86":
+        return os.path.join(CD, "data", "al-khaser_x86.exe_")
+    elif name.startswith("39c05"):
+        return os.path.join(CD, "data", "39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.dll_")
+    elif name.startswith("499c2"):
+        return os.path.join(CD, "data", "499c2a85f6e8142c3f48d4251c9c7cd6.raw32")
+    elif name.startswith("9324d"):
+        return os.path.join(CD, "data", "9324d1a8ae37a36ae560c37448c9705a.exe_")
+    elif name.startswith("a1982"):
+        return os.path.join(CD, "data", "a198216798ca38f280dc413f8c57f2c2.exe_")
+    elif name.startswith("a933a"):
+        return os.path.join(CD, "data", "a933a1a402775cfa94b6bee0963f4b46.dll_")
+    elif name.startswith("bfb9b"):
+        return os.path.join(CD, "data", "bfb9b5391a13d0afd787e87ab90f14f5.dll_")
+    elif name.startswith("c9188"):
+        return os.path.join(CD, "data", "c91887d861d9bd4a5872249b641bc9f9.exe_")
+    elif name == "aspack":
+        return os.path.join(CD, "data", "2055994ff75b4309eee3a49c5749d306")
+    else:
+        raise ValueError("unexpected sample fixture")
+
+
+def get_sample_md5_by_name(name):
+    """used by IDA tests to ensure the correct IDB is loaded"""
+    if name == "mimikatz":
+        return "5f66b82558ca92e54e77f216ef4c066c"
+    elif name == "kernel32":
+        return "e80758cf485db142fca1ee03a34ead05"
+    elif name == "kernel32-64":
+        return "a8565440629ac87f6fef7d588fe3ff0f"
+    elif name == "pma12-04":
+        return "56bed8249e7c2982a90e54e1e55391a2"
+    elif name == "pma21-01":
+        return "c8403fb05244e23a7931c766409b5e22"
+    elif name == "al-khaser x86":
+        return "db648cd247281954344f1d810c6fd590"
+    elif name.startswith("39c05"):
+        return "b7841b9d5dc1f511a93cc7576672ec0c"
+    elif name.startswith("499c2"):
+        return "499c2a85f6e8142c3f48d4251c9c7cd6"
+    elif name.startswith("9324d"):
+        return "9324d1a8ae37a36ae560c37448c9705a"
+    elif name.startswith("a1982"):
+        return "a198216798ca38f280dc413f8c57f2c2"
+    elif name.startswith("a933a"):
+        return "a933a1a402775cfa94b6bee0963f4b46"
+    elif name.startswith("bfb9b"):
+        return "bfb9b5391a13d0afd787e87ab90f14f5"
+    elif name.startswith("c9188"):
+        return "c91887d861d9bd4a5872249b641bc9f9"
+    else:
+        raise ValueError("unexpected sample fixture")
+
+
+def resolve_sample(sample):
+    return get_data_path_by_name(sample)


@pytest.fixture
-def mimikatz():
-    path = os.path.join(CD, "data", "mimikatz.exe_")
-    return Sample(viv_utils.getWorkspace(path), path)
+def sample(request):
+    return resolve_sample(request.param)
+
+
+def get_function(extractor, fva):
+    for f in extractor.get_functions():
+        if f.__int__() == fva:
+            return f
+    raise ValueError("function not found")
+
+
+def get_basic_block(extractor, f, va):
+    for bb in extractor.get_basic_blocks(f):
+        if bb.__int__() == va:
+            return bb
+    raise ValueError("basic block not found")
+
+
+def resolve_scope(scope):
+    if scope == "file":
+
+        def inner(extractor):
+            return extract_file_features(extractor)
+
+        inner.__name__ = scope
+        return inner
+    elif "bb=" in scope:
+        # like `function=0x401000,bb=0x40100A`
+        fspec, _, bbspec = scope.partition(",")
+        fva = int(fspec.partition("=")[2], 0x10)
+        bbva = int(bbspec.partition("=")[2], 0x10)
+
+        def inner(extractor):
+            f = get_function(extractor, fva)
+            bb = get_basic_block(extractor, f, bbva)
+            return extract_basic_block_features(extractor, f, bb)
+
+        inner.__name__ = scope
+        return inner
+    elif scope.startswith("function"):
+        # like `function=0x401000`
+        va = int(scope.partition("=")[2], 0x10)
+
+        def inner(extractor):
+            f = get_function(extractor, va)
+            return extract_function_features(extractor, f)
+
+        inner.__name__ = scope
+        return inner
+    else:
+        raise ValueError("unexpected scope fixture")


@pytest.fixture
-def sample_a933a1a402775cfa94b6bee0963f4b46():
-    path = os.path.join(CD, "data", "a933a1a402775cfa94b6bee0963f4b46.dll_")
-    return Sample(viv_utils.getWorkspace(path), path)
+def scope(request):
+    return resolve_scope(request.param)
+
+
+def make_test_id(values):
+    return "-".join(map(str, values))
+
+
+def parametrize(params, values, **kwargs):
+    """
+    extend `pytest.mark.parametrize` to pretty-print features.
+    by default, it renders objects as an opaque value.
+    ref: https://docs.pytest.org/en/2.9.0/example/parametrize.html#different-options-for-test-ids
+    rendered ID might look something like:
+        mimikatz-function=0x403BAC-api(CryptDestroyKey)-True
+    """
+    ids = list(map(make_test_id, values))
+    return pytest.mark.parametrize(params, values, ids=ids, **kwargs)
+
+
+FEATURE_PRESENCE_TESTS = [
+    # file/characteristic("embedded pe")
+    ("pma12-04", "file", capa.features.Characteristic("embedded pe"), True),
+    # file/string
+    ("mimikatz", "file", capa.features.String("SCardControl"), True),
+    ("mimikatz", "file", capa.features.String("SCardTransmit"), True),
+    ("mimikatz", "file", capa.features.String("ACR  > "), True),
+    ("mimikatz", "file", capa.features.String("nope"), False),
+    # file/sections
+    ("mimikatz", "file", capa.features.file.Section(".text"), True),
+    ("mimikatz", "file", capa.features.file.Section(".nope"), False),
+    # IDA doesn't extract unmapped sections by default
+    # ("mimikatz", "file", capa.features.file.Section(".rsrc"), True),
+    # file/exports
+    ("kernel32", "file", capa.features.file.Export("BaseThreadInitThunk"), True),
+    ("kernel32", "file", capa.features.file.Export("lstrlenW"), True),
+    ("kernel32", "file", capa.features.file.Export("nope"), False),
+    # file/imports
+    ("mimikatz", "file", capa.features.file.Import("advapi32.CryptSetHashParam"), True),
+    ("mimikatz", "file", capa.features.file.Import("CryptSetHashParam"), True),
+    ("mimikatz", "file", capa.features.file.Import("kernel32.IsWow64Process"), True),
+    ("mimikatz", "file", capa.features.file.Import("msvcrt.exit"), True),
+    ("mimikatz", "file", capa.features.file.Import("cabinet.#11"), True),
+    ("mimikatz", "file", capa.features.file.Import("#11"), False),
+    ("mimikatz", "file", capa.features.file.Import("#nope"), False),
+    ("mimikatz", "file", capa.features.file.Import("nope"), False),
+    # function/characteristic(loop)
+    ("mimikatz", "function=0x401517", capa.features.Characteristic("loop"), True),
+    ("mimikatz", "function=0x401000", capa.features.Characteristic("loop"), False),
+    # bb/characteristic(tight loop)
+    ("mimikatz", "function=0x402EC4", capa.features.Characteristic("tight loop"), True),
+    ("mimikatz", "function=0x401000", capa.features.Characteristic("tight loop"), False),
+    # bb/characteristic(stack string)
+    ("mimikatz", "function=0x4556E5", capa.features.Characteristic("stack string"), True),
+    ("mimikatz", "function=0x401000", capa.features.Characteristic("stack string"), False),
+    # bb/characteristic(tight loop)
+    ("mimikatz", "function=0x402EC4,bb=0x402F8E", capa.features.Characteristic("tight loop"), True),
+    ("mimikatz", "function=0x401000,bb=0x401000", capa.features.Characteristic("tight loop"), False),
+    # insn/mnemonic
+    ("mimikatz", "function=0x40105D", capa.features.insn.Mnemonic("push"), True),
+    ("mimikatz", "function=0x40105D", capa.features.insn.Mnemonic("movzx"), True),
+    ("mimikatz", "function=0x40105D", capa.features.insn.Mnemonic("xor"), True),
+    ("mimikatz", "function=0x40105D", capa.features.insn.Mnemonic("in"), False),
+    ("mimikatz", "function=0x40105D", capa.features.insn.Mnemonic("out"), False),
+    # insn/number
+    ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF), True),
+    ("mimikatz", "function=0x40105D", capa.features.insn.Number(0x3136B0), True),
+    # insn/number: stack adjustments
+    ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xC), False),
+    ("mimikatz", "function=0x40105D", capa.features.insn.Number(0x10), False),
+    # insn/number: arch flavors
+    ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF), True),
+    ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, arch=ARCH_X32), True),
+    ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, arch=ARCH_X64), False),
+    # insn/offset
+    ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0), True),
+    ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x4), True),
+    ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0xC), True),
+    # insn/offset: stack references
+    ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x8), False),
+    ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x10), False),
+    # insn/offset: negative
+    ("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x1), True),
+    ("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x2), True),
+    # insn/offset: arch flavors
+    ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0), True),
+    ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, arch=ARCH_X32), True),
+    ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, arch=ARCH_X64), False),
+    # insn/api
+    ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContextW"), True),
+    ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContext"), True),
+    ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptGenKey"), True),
+    ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptImportKey"), True),
+    ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptDestroyKey"), True),
+    ("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptAcquireContextW"), True),
+    ("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptAcquireContext"), True),
+    ("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptGenKey"), True),
+    ("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptImportKey"), True),
+    ("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptDestroyKey"), True),
+    ("mimikatz", "function=0x403BAC", capa.features.insn.API("Nope"), False),
+    ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.Nope"), False),
+    # insn/api: thunk
+    ("mimikatz", "function=0x4556E5", capa.features.insn.API("advapi32.LsaQueryInformationPolicy"), True),
+    ("mimikatz", "function=0x4556E5", capa.features.insn.API("LsaQueryInformationPolicy"), True),
+    # insn/api: x64
+    ("kernel32-64", "function=0x180001010", capa.features.insn.API("RtlVirtualUnwind"), True,),
+    ("kernel32-64", "function=0x180001010", capa.features.insn.API("RtlVirtualUnwind"), True),
+    # insn/api: x64 thunk
+    ("kernel32-64", "function=0x1800202B0", capa.features.insn.API("RtlCaptureContext"), True,),
+    ("kernel32-64", "function=0x1800202B0", capa.features.insn.API("RtlCaptureContext"), True),
+    # insn/api: resolve indirect calls
+    ("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CreatePipe"), True),
+    ("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.SetHandleInformation"), True),
+    ("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CloseHandle"), True),
+    ("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.WriteFile"), True),
+    # insn/string
+    ("mimikatz", "function=0x40105D", capa.features.String("SCardControl"), True),
+    ("mimikatz", "function=0x40105D", capa.features.String("SCardTransmit"), True),
+    ("mimikatz", "function=0x40105D", capa.features.String("ACR  > "), True),
+    ("mimikatz", "function=0x40105D", capa.features.String("nope"), False),
+    # insn/string, pointer to string
+    ("mimikatz", "function=0x44EDEF", capa.features.String("INPUTEVENT"), True),
+    # insn/bytes
+    ("mimikatz", "function=0x40105D", capa.features.Bytes("SCardControl".encode("utf-16le")), True),
+    ("mimikatz", "function=0x40105D", capa.features.Bytes("SCardTransmit".encode("utf-16le")), True),
+    ("mimikatz", "function=0x40105D", capa.features.Bytes("ACR  > ".encode("utf-16le")), True),
+    ("mimikatz", "function=0x40105D", capa.features.Bytes("nope".encode("ascii")), False),
+    # insn/bytes, pointer to bytes
+    ("mimikatz", "function=0x44EDEF", capa.features.Bytes("INPUTEVENT".encode("utf-16le")), True),
+    # insn/characteristic(nzxor)
+    ("mimikatz", "function=0x410DFC", capa.features.Characteristic("nzxor"), True),
+    ("mimikatz", "function=0x40105D", capa.features.Characteristic("nzxor"), False),
+    # insn/characteristic(nzxor): no security cookies
+    ("mimikatz", "function=0x46D534", capa.features.Characteristic("nzxor"), False),
+    # insn/characteristic(peb access)
+    ("kernel32-64", "function=0x1800017D0", capa.features.Characteristic("peb access"), True),
+    ("mimikatz", "function=0x4556E5", capa.features.Characteristic("peb access"), False),
+    # insn/characteristic(gs access)
+    ("kernel32-64", "function=0x180001068", capa.features.Characteristic("gs access"), True),
+    ("mimikatz", "function=0x4556E5", capa.features.Characteristic("gs access"), False),
+    # insn/characteristic(cross section flow)
+    ("a1982...", "function=0x4014D0", capa.features.Characteristic("cross section flow"), True),
+    # insn/characteristic(cross section flow): imports don't count
+    ("kernel32-64", "function=0x180001068", capa.features.Characteristic("cross section flow"), False),
+    ("mimikatz", "function=0x4556E5", capa.features.Characteristic("cross section flow"), False),
+    # insn/characteristic(recursive call)
+    ("39c05...", "function=0x10003100", capa.features.Characteristic("recursive call"), True),
+    ("mimikatz", "function=0x4556E5", capa.features.Characteristic("recursive call"), False),
+    # insn/characteristic(indirect call)
+    ("mimikatz", "function=0x4175FF", capa.features.Characteristic("indirect call"), True),
+    ("mimikatz", "function=0x4556E5", capa.features.Characteristic("indirect call"), False),
+    # insn/characteristic(calls from)
+    ("mimikatz", "function=0x4556E5", capa.features.Characteristic("calls from"), True),
+    ("mimikatz", "function=0x4702FD", capa.features.Characteristic("calls from"), False),
+    # function/characteristic(calls to)
+    ("mimikatz", "function=0x40105D", capa.features.Characteristic("calls to"), True),
+    ("mimikatz", "function=0x4556E5", capa.features.Characteristic("calls to"), False),
+]
+
+FEATURE_COUNT_TESTS = [
+    ("mimikatz", "function=0x40E5C2", capa.features.basicblock.BasicBlock(), 7),
+    ("mimikatz", "function=0x4702FD", capa.features.Characteristic("calls from"), 0),
+    ("mimikatz", "function=0x40E5C2", capa.features.Characteristic("calls from"), 3),
+    ("mimikatz", "function=0x4556E5", capa.features.Characteristic("calls to"), 0),
+    ("mimikatz", "function=0x40B1F1", capa.features.Characteristic("calls to"), 3),
+]
+
+
+def do_test_feature_presence(get_extractor, sample, scope, feature, expected):
+    extractor = get_extractor(sample)
+    features = scope(extractor)
+    if expected:
+        msg = "%s should be found in %s" % (str(feature), scope.__name__)
+    else:
+        msg = "%s should not be found in %s" % (str(feature), scope.__name__)
+    assert feature.evaluate(features) == expected, msg
+
+
+def do_test_feature_count(get_extractor, sample, scope, feature, expected):
+    extractor = get_extractor(sample)
+    features = scope(extractor)
+    msg = "%s should be found %d times in %s, found: %d" % (
+        str(feature),
+        expected,
+        scope.__name__,
+        len(features[feature]),
+    )
+    assert len(features[feature]) == expected, msg
+
+
+def get_extractor(path):
+    if sys.version_info >= (3, 0):
+        extractor = get_lancelot_extractor(path)
+    else:
+        extractor = get_viv_extractor(path)
+
+    # overload the extractor so that the fixture exposes `extractor.path`
+    setattr(extractor, "path", path)
+    return extractor


@pytest.fixture
-def kernel32():
-    path = os.path.join(CD, "data", "kernel32.dll_")
-    return Sample(viv_utils.getWorkspace(path), path)
+def mimikatz_extractor():
+    return get_extractor(get_data_path_by_name("mimikatz"))


@pytest.fixture
-def sample_a198216798ca38f280dc413f8c57f2c2():
-    path = os.path.join(CD, "data", "a198216798ca38f280dc413f8c57f2c2.exe_")
-    return Sample(viv_utils.getWorkspace(path), path)
+def a933a_extractor():
+    return get_extractor(get_data_path_by_name("a933a..."))


@pytest.fixture
-def sample_9324d1a8ae37a36ae560c37448c9705a():
-    path = os.path.join(CD, "data", "9324d1a8ae37a36ae560c37448c9705a.exe_")
-    return Sample(viv_utils.getWorkspace(path), path)
+def kernel32_extractor():
+    return get_extractor(get_data_path_by_name("kernel32"))


@pytest.fixture
-def pma_lab_12_04():
-    path = os.path.join(CD, "data", "Practical Malware Analysis Lab 12-04.exe_")
-    return Sample(viv_utils.getWorkspace(path), path)
+def a1982_extractor():
+    return get_extractor(get_data_path_by_name("a1982..."))


@pytest.fixture
-def sample_bfb9b5391a13d0afd787e87ab90f14f5():
-    path = os.path.join(CD, "data", "bfb9b5391a13d0afd787e87ab90f14f5.dll_")
-    return Sample(viv_utils.getWorkspace(path), path)
+def z9324d_extractor():
+    return get_extractor(get_data_path_by_name("9324d..."))


@pytest.fixture
-def sample_lab21_01():
-    path = os.path.join(CD, "data", "Practical Malware Analysis Lab 21-01.exe_")
-    return Sample(viv_utils.getWorkspace(path), path)
+def pma12_04_extractor():
+    return get_extractor(get_data_path_by_name("pma12-04"))


@pytest.fixture
-def sample_c91887d861d9bd4a5872249b641bc9f9():
-    path = os.path.join(CD, "data", "c91887d861d9bd4a5872249b641bc9f9.exe_")
-    return Sample(viv_utils.getWorkspace(path), path)
+def bfb9b_extractor():
+    return get_extractor(get_data_path_by_name("bfb9b..."))


@pytest.fixture
-def sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41():
-    path = os.path.join(CD, "data", "39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.dll_",)
-    return Sample(viv_utils.getWorkspace(path), path)
+def pma21_01_extractor():
+    return get_extractor(get_data_path_by_name("pma21-01"))


@pytest.fixture
-def sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32():
-    path = os.path.join(CD, "data", "499c2a85f6e8142c3f48d4251c9c7cd6.raw32")
-    return Sample(viv_utils.getShellcodeWorkspace(path), path)
+def c9188_extractor():
+    return get_extractor(get_data_path_by_name("c9188..."))
+
+
+@pytest.fixture
+def z39c05_extractor():
+    return get_extractor(get_data_path_by_name("39c05..."))
+
+
+@pytest.fixture
+def z499c2_extractor():
+    return get_extractor(get_data_path_by_name("499c2..."))
+
+
+@pytest.fixture
+def al_khaser_x86_extractor():
+    return get_extractor(get_data_path_by_name("al-khaser x86"))
+
+
+@pytest.fixture
+def aspack_extractor():
+    return get_extractor(get_data_path_by_name("aspack"))
--- a/tests/test_freeze.py
+++ b/tests/test_freeze.py
@@ -5,9 +5,10 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
-
+import sys
 import textwrap

+import pytest
 from fixtures import *

 import capa.main
@@ -23,7 +24,7 @@ EXTRACTOR = capa.features.extractors.NullFeatureExtractor(
        "file features": [(0x402345, capa.features.Characteristic("embedded pe")),],
        "functions": {
            0x401000: {
-                "features": [(0x401000, capa.features.Characteristic("switch")),],
+                "features": [(0x401000, capa.features.Characteristic("indirect call")),],
                "basic blocks": {
                    0x401000: {
                        "features": [(0x401000, capa.features.Characteristic("tight loop")),],
@@ -104,17 +105,14 @@ def compare_extractors_viv_null(viv_ext, null_ext):
      viv_ext (capa.features.extractors.viv.VivisectFeatureExtractor)
      null_ext (capa.features.extractors.NullFeatureExtractor)
    """
-
-    # TODO: ordering of these things probably doesn't work yet
-
    assert list(viv_ext.extract_file_features()) == list(null_ext.extract_file_features())
-    assert to_int(list(viv_ext.get_functions())) == list(null_ext.get_functions())
+    assert list(map(to_int, viv_ext.get_functions())) == list(null_ext.get_functions())
    for f in viv_ext.get_functions():
-        assert to_int(list(viv_ext.get_basic_blocks(f))) == list(null_ext.get_basic_blocks(to_int(f)))
+        assert list(map(to_int, viv_ext.get_basic_blocks(f))) == list(null_ext.get_basic_blocks(to_int(f)))
        assert list(viv_ext.extract_function_features(f)) == list(null_ext.extract_function_features(to_int(f)))

        for bb in viv_ext.get_basic_blocks(f):
-            assert to_int(list(viv_ext.get_instructions(f, bb))) == list(
+            assert list(map(to_int, viv_ext.get_instructions(f, bb))) == list(
                null_ext.get_instructions(to_int(f), to_int(bb))
            )
            assert list(viv_ext.extract_basic_block_features(f, bb)) == list(
@@ -129,10 +127,7 @@ def compare_extractors_viv_null(viv_ext, null_ext):

 def to_int(o):
    """helper to get int value of extractor items"""
-    if isinstance(o, list):
-        return map(lambda x: capa.helpers.oint(x), o)
-    else:
-        return capa.helpers.oint(o)
+    return capa.helpers.oint(o)


 def test_freeze_s_roundtrip():
@@ -169,18 +164,22 @@ def test_serialize_features():
    roundtrip_feature(capa.features.file.Import("#11"))


-def test_freeze_sample(tmpdir, sample_9324d1a8ae37a36ae560c37448c9705a):
+@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
+def test_freeze_sample(tmpdir, z9324d_extractor):
    # tmpdir fixture handles cleanup
    o = tmpdir.mkdir("capa").join("test.frz").strpath
-    assert capa.features.freeze.main([sample_9324d1a8ae37a36ae560c37448c9705a.path, o, "-v"]) == 0
+    path = z9324d_extractor.path
+    assert capa.features.freeze.main([path, o, "-v"]) == 0


-def test_freeze_load_sample(tmpdir, sample_9324d1a8ae37a36ae560c37448c9705a):
+@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
+def test_freeze_load_sample(tmpdir, z9324d_extractor):
    o = tmpdir.mkdir("capa").join("test.frz")
-    viv_extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
-        sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
-    )
+
    with open(o.strpath, "wb") as f:
-        f.write(capa.features.freeze.dump(viv_extractor))
-    null_extractor = capa.features.freeze.load(o.open("rb").read())
-    compare_extractors_viv_null(viv_extractor, null_extractor)
+        f.write(capa.features.freeze.dump(z9324d_extractor))
+
+    with open(o.strpath, "rb") as f:
+        null_extractor = capa.features.freeze.load(f.read())
+
+    compare_extractors_viv_null(z9324d_extractor, null_extractor)
--- a/tests/test_ida_features.py
+++ b/tests/test_ida_features.py
@@ -1,288 +1,104 @@
-# run this script from within IDA with ./tests/data/mimikatz.exe open
-import logging
-import binascii
-import traceback
-import collections
-
-import pytest
-
-import capa.features
-import capa.features.file
-import capa.features.insn
-import capa.features.basicblock
-from capa.features import ARCH_X32, ARCH_X64
-
-logger = logging.getLogger("test_ida_features")
-
-
-def check_input_file():
-    import idautils
-
-    wanted = "5f66b82558ca92e54e77f216ef4c066c"
-    # some versions (7.4) of IDA return a truncated version of the MD5.
-    # https://github.com/idapython/bin/issues/11
-    try:
-        found = idautils.GetInputFileMD5()[:31].decode("ascii").lower()
-    except UnicodeDecodeError:
-        # in IDA 7.5 or so, GetInputFileMD5 started returning raw binary
-        # rather than the hex digest
-        found = binascii.hexlify(idautils.GetInputFileMD5()[:15]).decode("ascii").lower()
-    if not wanted.startswith(found):
-        raise RuntimeError("please run the tests against `mimikatz.exe`")
-
-
-def get_extractor():
-    check_input_file()
-
-    # have to import import this inline so pytest doesn't bail outside of IDA
-    import capa.features.extractors.ida
-
-    return capa.features.extractors.ida.IdaFeatureExtractor()
-
-
-def extract_file_features():
-    extractor = get_extractor()
-    features = set([])
-    for feature, va in extractor.extract_file_features():
-        features.add(feature)
-    return features
-
-
-def extract_function_features(f):
-    extractor = get_extractor()
-    features = collections.defaultdict(set)
-    for bb in extractor.get_basic_blocks(f):
-        for insn in extractor.get_instructions(f, bb):
-            for feature, va in extractor.extract_insn_features(f, bb, insn):
-                features[feature].add(va)
-        for feature, va in extractor.extract_basic_block_features(f, bb):
-            features[feature].add(va)
-    for feature, va in extractor.extract_function_features(f):
-        features[feature].add(va)
-    return features
-
-
-def extract_basic_block_features(f, bb):
-    extractor = get_extractor()
-    features = collections.defaultdict(set)
-    for insn in extractor.get_instructions(f, bb):
-        for feature, va in extractor.extract_insn_features(f, bb, insn):
-            features[feature].add(va)
-    for feature, va in extractor.extract_basic_block_features(f, bb):
-        features[feature].add(va)
-    return features
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_api_features():
-    f = get_extractor().get_function(0x403BAC)
-    features = extract_function_features(f)
-    assert capa.features.insn.API("advapi32.CryptAcquireContextW") in features
-    assert capa.features.insn.API("advapi32.CryptAcquireContext") in features
-    assert capa.features.insn.API("advapi32.CryptGenKey") in features
-    assert capa.features.insn.API("advapi32.CryptImportKey") in features
-    assert capa.features.insn.API("advapi32.CryptDestroyKey") in features
-    assert capa.features.insn.API("CryptAcquireContextW") in features
-    assert capa.features.insn.API("CryptAcquireContext") in features
-    assert capa.features.insn.API("CryptGenKey") in features
-    assert capa.features.insn.API("CryptImportKey") in features
-    assert capa.features.insn.API("CryptDestroyKey") in features
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_string_features():
-    f = get_extractor().get_function(0x40105D)
-    features = extract_function_features(f)
-    assert capa.features.String("SCardControl") in features
-    assert capa.features.String("SCardTransmit") in features
-    assert capa.features.String("ACR  > ") in features
-    # other strings not in this function
-    assert capa.features.String("bcrypt.dll") not in features
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_byte_features():
-    f = get_extractor().get_function(0x40105D)
-    features = extract_function_features(f)
-    wanted = capa.features.Bytes("SCardControl".encode("utf-16le"))
-    # use `==` rather than `is` because the result is not `True` but a truthy value.
-    assert wanted.evaluate(features) == True
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_number_features():
-    f = get_extractor().get_function(0x40105D)
-    features = extract_function_features(f)
-    assert capa.features.insn.Number(0xFF) in features
-    assert capa.features.insn.Number(0x3136B0) in features
-    # the following are stack adjustments
-    assert capa.features.insn.Number(0xC) not in features
-    assert capa.features.insn.Number(0x10) not in features
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_number_arch_features():
-    f = get_extractor().get_function(0x40105D)
-    features = extract_function_features(f)
-    assert capa.features.insn.Number(0xFF) in features
-    assert capa.features.insn.Number(0xFF, arch=ARCH_X32) in features
-    assert capa.features.insn.Number(0xFF, arch=ARCH_X64) not in features
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_offset_features():
-    f = get_extractor().get_function(0x40105D)
-    features = extract_function_features(f)
-    assert capa.features.insn.Offset(0x0) in features
-    assert capa.features.insn.Offset(0x4) in features
-    assert capa.features.insn.Offset(0xC) in features
-    # the following are stack references
-    assert capa.features.insn.Offset(0x8) not in features
-    assert capa.features.insn.Offset(0x10) not in features
-
-    # this function has the following negative offsets
-    # movzx   ecx, byte ptr [eax-1]
-    # movzx   eax, byte ptr [eax-2]
-    f = get_extractor().get_function(0x4011FB)
-    features = extract_function_features(f)
-    assert capa.features.insn.Offset(-0x1) in features
-    assert capa.features.insn.Offset(-0x2) in features
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_offset_arch_features():
-    f = get_extractor().get_function(0x40105D)
-    features = extract_function_features(f)
-    assert capa.features.insn.Offset(0x0) in features
-    assert capa.features.insn.Offset(0x0, arch=ARCH_X32) in features
-    assert capa.features.insn.Offset(0x0, arch=ARCH_X64) not in features
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_nzxor_features():
-    f = get_extractor().get_function(0x410DFC)
-    features = extract_function_features(f)
-    assert capa.features.Characteristic("nzxor") in features  # 0x0410F0B
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_mnemonic_features():
-    f = get_extractor().get_function(0x40105D)
-    features = extract_function_features(f)
-    assert capa.features.insn.Mnemonic("push") in features
-    assert capa.features.insn.Mnemonic("movzx") in features
-    assert capa.features.insn.Mnemonic("xor") in features
-
-    assert capa.features.insn.Mnemonic("in") not in features
-    assert capa.features.insn.Mnemonic("out") not in features
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_file_section_name_features():
-    features = extract_file_features()
-    assert capa.features.file.Section(".idata") in features
-    assert capa.features.file.Section(".text") in features
-    assert capa.features.file.Section(".nope") not in features
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_tight_loop_features():
-    extractor = get_extractor()
-
-    f = extractor.get_function(0x402EC4)
-    for bb in extractor.get_basic_blocks(f):
-        if bb.__int__() != 0x402F8E:
-            continue
-        features = extract_basic_block_features(f, bb)
-        assert capa.features.Characteristic("tight loop") in features
-        assert capa.features.basicblock.BasicBlock() in features
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_tight_loop_bb_features():
-    extractor = get_extractor()
-
-    f = extractor.get_function(0x402EC4)
-    for bb in extractor.get_basic_blocks(f):
-        if bb.__int__() != 0x402F8E:
-            continue
-        features = extract_basic_block_features(f, bb)
-        assert capa.features.Characteristic("tight loop") in features
-        assert capa.features.basicblock.BasicBlock() in features
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_file_import_name_features():
-    features = extract_file_features()
-    assert capa.features.file.Import("advapi32.CryptSetHashParam") in features
-    assert capa.features.file.Import("CryptSetHashParam") in features
-    assert capa.features.file.Import("kernel32.IsWow64Process") in features
-    assert capa.features.file.Import("msvcrt.exit") in features
-    assert capa.features.file.Import("cabinet.#11") in features
-    assert capa.features.file.Import("#11") not in features
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_stackstring_features():
-    f = get_extractor().get_function(0x4556E5)
-    features = extract_function_features(f)
-    assert capa.features.Characteristic("stack string") in features
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_switch_features():
-    f = get_extractor().get_function(0x409411)
-    features = extract_function_features(f)
-    assert capa.features.Characteristic("switch") in features
-
-    f = get_extractor().get_function(0x409393)
-    features = extract_function_features(f)
-    assert capa.features.Characteristic("switch") not in features
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_function_calls_to():
-    # this function is used in a function pointer
-    f = get_extractor().get_function(0x4011FB)
-    features = extract_function_features(f)
-    assert capa.features.Characteristic("calls to") not in features
-
-    # __FindPESection is called once
-    f = get_extractor().get_function(0x470360)
-    features = extract_function_features(f)
-    assert len(features[capa.features.Characteristic("calls to")]) == 1
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_function_calls_from():
-    f = get_extractor().get_function(0x4011FB)
-    features = extract_function_features(f)
-    assert capa.features.Characteristic("calls from") in features
-    assert len(features[capa.features.Characteristic("calls from")]) == 3
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_basic_block_count():
-    f = get_extractor().get_function(0x4011FB)
-    features = extract_function_features(f)
-    assert len(features[capa.features.basicblock.BasicBlock()]) == 15
-
-
-if __name__ == "__main__":
-    print("-" * 80)
-
-    # invoke all functions in this module that start with `test_`
-    for name in dir(sys.modules[__name__]):
-        if not name.startswith("test_"):
-            continue
-
-        test = getattr(sys.modules[__name__], name)
-        logger.debug("invoking test: %s", name)
-        sys.stderr.flush()
-        try:
-            test()
-        except AssertionError as e:
-            print("FAIL %s" % (name))
-            traceback.print_exc()
-        else:
-            print("OK   %s" % (name))
+# run this script from within IDA with ./tests/data/mimikatz.exe open
+import sys
+import logging
+import os.path
+import binascii
+import traceback
+
+import pytest
+
+try:
+    sys.path.append(os.path.dirname(__file__))
+    from fixtures import *
+finally:
+    sys.path.pop()
+
+
+logger = logging.getLogger("test_ida_features")
+
+
+def check_input_file(wanted):
+    import idautils
+
+    # some versions (7.4) of IDA return a truncated version of the MD5.
+    # https://github.com/idapython/bin/issues/11
+    try:
+        found = idautils.GetInputFileMD5()[:31].decode("ascii").lower()
+    except UnicodeDecodeError:
+        # in IDA 7.5 or so, GetInputFileMD5 started returning raw binary
+        # rather than the hex digest
+        found = binascii.hexlify(idautils.GetInputFileMD5()[:15]).decode("ascii").lower()
+
+    if not wanted.startswith(found):
+        raise RuntimeError("please run the tests against sample with MD5: `%s`" % (wanted))
+
+
+def get_ida_extractor(_path):
+    check_input_file("5f66b82558ca92e54e77f216ef4c066c")
+
+    # have to import import this inline so pytest doesn't bail outside of IDA
+    import capa.features.extractors.ida
+
+    return capa.features.extractors.ida.IdaFeatureExtractor()
+
+
+@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
+def test_ida_features():
+    for (sample, scope, feature, expected) in FEATURE_PRESENCE_TESTS:
+        id = make_test_id((sample, scope, feature, expected))
+
+        try:
+            check_input_file(get_sample_md5_by_name(sample))
+        except RuntimeError:
+            print("SKIP %s" % (id))
+            continue
+
+        scope = resolve_scope(scope)
+        sample = resolve_sample(sample)
+
+        try:
+            do_test_feature_presence(get_ida_extractor, sample, scope, feature, expected)
+        except Exception as e:
+            print("FAIL %s" % (id))
+            traceback.print_exc()
+        else:
+            print("OK   %s" % (id))
+
+
+@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
+def test_ida_feature_counts():
+    for (sample, scope, feature, expected) in FEATURE_COUNT_TESTS:
+        id = make_test_id((sample, scope, feature, expected))
+
+        try:
+            check_input_file(get_sample_md5_by_name(sample))
+        except RuntimeError:
+            print("SKIP %s" % (id))
+            continue
+
+        scope = resolve_scope(scope)
+        sample = resolve_sample(sample)
+
+        try:
+            do_test_feature_count(get_ida_extractor, sample, scope, feature, expected)
+        except Exception as e:
+            print("FAIL %s" % (id))
+            traceback.print_exc()
+        else:
+            print("OK   %s" % (id))
+
+
+if __name__ == "__main__":
+    print("-" * 80)
+
+    # invoke all functions in this module that start with `test_`
+    for name in dir(sys.modules[__name__]):
+        if not name.startswith("test_"):
+            continue
+
+        test = getattr(sys.modules[__name__], name)
+        logger.debug("invoking test: %s", name)
+        sys.stderr.flush()
+        test()
+
+    print("DONE")
--- a/tests/test_lancelot_features.py
+++ b/tests/test_lancelot_features.py
@@ -0,0 +1,26 @@
+# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+
+
+from fixtures import *
+
+
+@parametrize(
+    "sample,scope,feature,expected", FEATURE_PRESENCE_TESTS, indirect=["sample", "scope"],
+)
+def test_lancelot_features(sample, scope, feature, expected):
+    with xfail(sys.version_info < (3, 0), reason="lancelot only works on py3"):
+        do_test_feature_presence(get_lancelot_extractor, sample, scope, feature, expected)
+
+
+@parametrize(
+    "sample,scope,feature,expected", FEATURE_COUNT_TESTS, indirect=["sample", "scope"],
+)
+def test_lancelot_feature_counts(sample, scope, feature, expected):
+    with xfail(sys.version_info < (3, 0), reason="lancelot only works on py3"):
+        do_test_feature_count(get_lancelot_extractor, sample, scope, feature, expected)
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -5,28 +5,31 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
-
+import sys
 import textwrap

+import pytest
 from fixtures import *

 import capa.main
 import capa.rules
 import capa.engine
 import capa.features
-import capa.features.extractors.viv
 from capa.engine import *


-def test_main(sample_9324d1a8ae37a36ae560c37448c9705a):
+@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
+def test_main(z9324d_extractor):
    # tests rules can be loaded successfully and all output modes
-    assert capa.main.main([sample_9324d1a8ae37a36ae560c37448c9705a.path, "-vv"]) == 0
-    assert capa.main.main([sample_9324d1a8ae37a36ae560c37448c9705a.path, "-v"]) == 0
-    assert capa.main.main([sample_9324d1a8ae37a36ae560c37448c9705a.path, "-j"]) == 0
-    assert capa.main.main([sample_9324d1a8ae37a36ae560c37448c9705a.path]) == 0
+    path = z9324d_extractor.path
+    assert capa.main.main([path, "-vv"]) == 0
+    assert capa.main.main([path, "-v"]) == 0
+    assert capa.main.main([path, "-j"]) == 0
+    assert capa.main.main([path]) == 0


-def test_main_single_rule(sample_9324d1a8ae37a36ae560c37448c9705a, tmpdir):
+@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
+def test_main_single_rule(z9324d_extractor, tmpdir):
    # tests a single rule can be loaded successfully
    RULE_CONTENT = textwrap.dedent(
        """
@@ -38,16 +41,19 @@ def test_main_single_rule(sample_9324d1a8ae37a36ae560c37448c9705a, tmpdir):
              - string: test
        """
    )
+    path = z9324d_extractor.path
    rule_file = tmpdir.mkdir("capa").join("rule.yml")
    rule_file.write(RULE_CONTENT)
-    assert capa.main.main([sample_9324d1a8ae37a36ae560c37448c9705a.path, "-v", "-r", rule_file.strpath,]) == 0
+    assert capa.main.main([path, "-v", "-r", rule_file.strpath,]) == 0


-def test_main_shellcode(sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32):
-    assert capa.main.main([sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32.path, "-vv", "-f", "sc32"]) == 0
-    assert capa.main.main([sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32.path, "-v", "-f", "sc32"]) == 0
-    assert capa.main.main([sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32.path, "-j", "-f", "sc32"]) == 0
-    assert capa.main.main([sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32.path, "-f", "sc32"]) == 0
+@pytest.mark.xfail(sys.version_info >= (3, 0), reason="lancelot doesn't support shellcode workspaces")
+def test_main_shellcode(z499c2_extractor):
+    path = z499c2_extractor.path
+    assert capa.main.main([path, "-vv", "-f", "sc32"]) == 0
+    assert capa.main.main([path, "-v", "-f", "sc32"]) == 0
+    assert capa.main.main([path, "-j", "-f", "sc32"]) == 0
+    assert capa.main.main([path, "-f", "sc32"]) == 0


 def test_ruleset():
@@ -73,7 +79,7 @@ def test_ruleset():
                            name: function rule
                            scope: function
                        features:
-                          - characteristic: switch
+                          - characteristic: tight loop
                    """
                )
            ),
@@ -96,7 +102,8 @@ def test_ruleset():
    assert len(rules.basic_block_rules) == 1


-def test_match_across_scopes_file_function(sample_9324d1a8ae37a36ae560c37448c9705a):
+@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
+def test_match_across_scopes_file_function(z9324d_extractor):
    rules = capa.rules.RuleSet(
        [
            # this rule should match on a function (0x4073F0)
@@ -153,16 +160,14 @@ def test_match_across_scopes_file_function(sample_9324d1a8ae37a36ae560c37448c970
            ),
        ]
    )
-    extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
-        sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
-    )
-    capabilities, meta = capa.main.find_capabilities(rules, extractor)
+    capabilities, meta = capa.main.find_capabilities(rules, z9324d_extractor)
    assert "install service" in capabilities
    assert ".text section" in capabilities
    assert ".text section and install service" in capabilities


-def test_match_across_scopes(sample_9324d1a8ae37a36ae560c37448c9705a):
+@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
+def test_match_across_scopes(z9324d_extractor):
    rules = capa.rules.RuleSet(
        [
            # this rule should match on a basic block (including at least 0x403685)
@@ -218,16 +223,14 @@ def test_match_across_scopes(sample_9324d1a8ae37a36ae560c37448c9705a):
            ),
        ]
    )
-    extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
-        sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path
-    )
-    capabilities, meta = capa.main.find_capabilities(rules, extractor)
+    capabilities, meta = capa.main.find_capabilities(rules, z9324d_extractor)
    assert "tight loop" in capabilities
    assert "kill thread loop" in capabilities
    assert "kill thread program" in capabilities


-def test_subscope_bb_rules(sample_9324d1a8ae37a36ae560c37448c9705a):
+@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
+def test_subscope_bb_rules(z9324d_extractor):
    rules = capa.rules.RuleSet(
        [
            capa.rules.Rule.from_yaml(
@@ -247,14 +250,12 @@ def test_subscope_bb_rules(sample_9324d1a8ae37a36ae560c37448c9705a):
        ]
    )
    # tight loop at 0x403685
-    extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
-        sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
-    )
-    capabilities, meta = capa.main.find_capabilities(rules, extractor)
+    capabilities, meta = capa.main.find_capabilities(rules, z9324d_extractor)
    assert "test rule" in capabilities


-def test_byte_matching(sample_9324d1a8ae37a36ae560c37448c9705a):
+@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
+def test_byte_matching(z9324d_extractor):
    rules = capa.rules.RuleSet(
        [
            capa.rules.Rule.from_yaml(
@@ -272,15 +273,12 @@ def test_byte_matching(sample_9324d1a8ae37a36ae560c37448c9705a):
            )
        ]
    )
-
-    extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
-        sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
-    )
-    capabilities, meta = capa.main.find_capabilities(rules, extractor)
+    capabilities, meta = capa.main.find_capabilities(rules, z9324d_extractor)
    assert "byte match test" in capabilities


-def test_count_bb(sample_9324d1a8ae37a36ae560c37448c9705a):
+@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
+def test_count_bb(z9324d_extractor):
    rules = capa.rules.RuleSet(
        [
            capa.rules.Rule.from_yaml(
@@ -299,9 +297,5 @@ def test_count_bb(sample_9324d1a8ae37a36ae560c37448c9705a):
            )
        ]
    )
-
-    extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
-        sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
-    )
-    capabilities, meta = capa.main.find_capabilities(rules, extractor)
+    capabilities, meta = capa.main.find_capabilities(rules, z9324d_extractor)
    assert "count bb" in capabilities
--- a/tests/test_rules.py
+++ b/tests/test_rules.py
@@ -162,6 +162,23 @@ def test_rule_yaml_count_range():
    assert r.evaluate({Number(100): {1, 2, 3}}) == False


+def test_rule_yaml_count_string():
+    rule = textwrap.dedent(
+        """
+        rule:
+            meta:
+                name: test rule
+            features:
+                - count(string(foo)): 2
+        """
+    )
+    r = capa.rules.Rule.from_yaml(rule)
+    assert r.evaluate({String("foo"): {}}) == False
+    assert r.evaluate({String("foo"): {1}}) == False
+    assert r.evaluate({String("foo"): {1, 2}}) == True
+    assert r.evaluate({String("foo"): {1, 2, 3}}) == False
+
+
 def test_invalid_rule_feature():
    with pytest.raises(capa.rules.InvalidRule):
        capa.rules.Rule.from_yaml(
@@ -267,7 +284,7 @@ def test_subscope_rules():
                                - function:
                                    - and:
                                        - characteristic: nzxor
-                                        - characteristic: switch
+                                        - characteristic: loop
                    """
                )
            )
--- a/tests/test_unpack.py
+++ b/tests/test_unpack.py
@@ -0,0 +1,62 @@
+# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+import sys
+
+import pefile
+import pytest
+from fixtures import *
+
+import capa.unpack
+
+
+@pytest.mark.xfail(sys.version_info <= (3, 5), reason="auto-unpack only works on py3.6+")
+def test_aspack_is_packed(aspack_extractor):
+    path = aspack_extractor.path
+
+    with open(path, "rb") as f:
+        buf = f.read()
+
+    assert capa.unpack.is_packed(buf) is True
+
+
+@pytest.mark.xfail(sys.version_info <= (3, 5), reason="auto-unpack only works on py3.6+")
+def test_aspack_detect(aspack_extractor):
+    path = aspack_extractor.path
+
+    with open(path, "rb") as f:
+        buf = f.read()
+
+    assert capa.unpack.detect_packer(buf) == "aspack"
+
+
+@pytest.mark.xfail(sys.version_info <= (3, 5), reason="auto-unpack only works on py3.6+")
+def test_aspack_unpack(aspack_extractor):
+    with open(aspack_extractor.path, "rb") as f:
+        buf = f.read()
+
+    unpacked = capa.unpack.unpack_pe("aspack", buf)
+
+    pe = pefile.PE(data=unpacked)
+    assert pe.OPTIONAL_HEADER.ImageBase == 0x4AD00000
+    assert pe.OPTIONAL_HEADER.AddressOfEntryPoint == 0x1A610
+    assert b"This program cannot be run in DOS mode" in unpacked
+    assert "(C) Copyright 1985-2000 Microsoft Corp.".encode("utf-16le") in unpacked
+    assert "CMD.EXE has halted. %0".encode("utf-16le") in unpacked
+
+    dlls = set([])
+    syms = set([])
+    for entry in pe.DIRECTORY_ENTRY_IMPORT:
+        dlls.add(entry.dll.decode("ascii").lower().partition(".")[0])
+        for imp in entry.imports:
+            syms.add(imp.name.decode("ascii"))
+
+    assert dlls == {"advapi32", "kernel32", "msvcrt", "user32"}
+    assert "RegQueryValueExW" in syms
+    assert "WriteConsoleW" in syms
+    assert "realloc" in syms
+    assert "GetProcessWindowStation" in syms
--- a/tests/test_viv_features.py
+++ b/tests/test_viv_features.py
@@ -5,340 +5,22 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
+import sys

-import viv_utils
 from fixtures import *

-import capa.features
-import capa.features.file
-import capa.features.insn
-import capa.features.basicblock
-import capa.features.extractors.viv.file
-import capa.features.extractors.viv.insn
-import capa.features.extractors.viv.function
-import capa.features.extractors.viv.basicblock
-from capa.features import ARCH_X32, ARCH_X64

+@parametrize(
+    "sample,scope,feature,expected", FEATURE_PRESENCE_TESTS, indirect=["sample", "scope"],
+)
+def test_viv_features(sample, scope, feature, expected):
+    with xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2"):
+        do_test_feature_presence(get_viv_extractor, sample, scope, feature, expected)

-def extract_file_features(vw, path):
-    features = set([])
-    for feature, va in capa.features.extractors.viv.file.extract_features(vw, path):
-        features.add(feature)
-    return features

-
-def extract_function_features(f):
-    features = collections.defaultdict(set)
-    for bb in f.basic_blocks:
-        for insn in bb.instructions:
-            for feature, va in capa.features.extractors.viv.insn.extract_features(f, bb, insn):
-                features[feature].add(va)
-        for feature, va in capa.features.extractors.viv.basicblock.extract_features(f, bb):
-            features[feature].add(va)
-    for feature, va in capa.features.extractors.viv.function.extract_features(f):
-        features[feature].add(va)
-    return features
-
-
-def extract_basic_block_features(f, bb):
-    features = set({})
-    for insn in bb.instructions:
-        for feature, _ in capa.features.extractors.viv.insn.extract_features(f, bb, insn):
-            features.add(feature)
-    for feature, _ in capa.features.extractors.viv.basicblock.extract_features(f, bb):
-        features.add(feature)
-    return features
-
-
-def test_api_features(mimikatz):
-    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x403BAC))
-    assert capa.features.insn.API("advapi32.CryptAcquireContextW") in features
-    assert capa.features.insn.API("advapi32.CryptAcquireContext") in features
-    assert capa.features.insn.API("advapi32.CryptGenKey") in features
-    assert capa.features.insn.API("advapi32.CryptImportKey") in features
-    assert capa.features.insn.API("advapi32.CryptDestroyKey") in features
-    assert capa.features.insn.API("CryptAcquireContextW") in features
-    assert capa.features.insn.API("CryptAcquireContext") in features
-    assert capa.features.insn.API("CryptGenKey") in features
-    assert capa.features.insn.API("CryptImportKey") in features
-    assert capa.features.insn.API("CryptDestroyKey") in features
-
-
-def test_api_features_64_bit(sample_a198216798ca38f280dc413f8c57f2c2):
-    features = extract_function_features(viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x4011B0))
-    assert capa.features.insn.API("kernel32.GetStringTypeA") in features
-    assert capa.features.insn.API("kernel32.GetStringTypeW") not in features
-    assert capa.features.insn.API("kernel32.GetStringType") in features
-    assert capa.features.insn.API("GetStringTypeA") in features
-    assert capa.features.insn.API("GetStringType") in features
-    # call via thunk in IDA Pro
-    features = extract_function_features(viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x401CB0))
-    assert capa.features.insn.API("msvcrt.vfprintf") in features
-    assert capa.features.insn.API("vfprintf") in features
-
-
-def test_string_features(mimikatz):
-    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x40105D))
-    assert capa.features.String("SCardControl") in features
-    assert capa.features.String("SCardTransmit") in features
-    assert capa.features.String("ACR  > ") in features
-    # other strings not in this function
-    assert capa.features.String("bcrypt.dll") not in features
-
-
-def test_string_pointer_features(mimikatz):
-    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x44EDEF))
-    assert capa.features.String("INPUTEVENT") in features
-
-
-def test_byte_features(sample_9324d1a8ae37a36ae560c37448c9705a):
-    features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60))
-    wanted = capa.features.Bytes(b"\xED\x24\x9E\xF4\x52\xA9\x07\x47\x55\x8E\xE1\xAB\x30\x8E\x23\x61")
-    # use `==` rather than `is` because the result is not `True` but a truthy value.
-    assert wanted.evaluate(features) == True
-
-
-def test_byte_features64(sample_lab21_01):
-    features = extract_function_features(viv_utils.Function(sample_lab21_01.vw, 0x1400010C0))
-    wanted = capa.features.Bytes(b"\x32\xA2\xDF\x2D\x99\x2B\x00\x00")
-    # use `==` rather than `is` because the result is not `True` but a truthy value.
-    assert wanted.evaluate(features) == True
-
-
-def test_bytes_pointer_features(mimikatz):
-    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x44EDEF))
-    assert capa.features.Bytes("INPUTEVENT".encode("utf-16le")).evaluate(features) == True
-
-
-def test_number_features(mimikatz):
-    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x40105D))
-    assert capa.features.insn.Number(0xFF) in features
-    assert capa.features.insn.Number(0x3136B0) in features
-    # the following are stack adjustments
-    assert capa.features.insn.Number(0xC) not in features
-    assert capa.features.insn.Number(0x10) not in features
-
-
-def test_number_arch_features(mimikatz):
-    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x40105D))
-    assert capa.features.insn.Number(0xFF) in features
-    assert capa.features.insn.Number(0xFF, arch=ARCH_X32) in features
-    assert capa.features.insn.Number(0xFF, arch=ARCH_X64) not in features
-
-
-def test_offset_features(mimikatz):
-    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x40105D))
-    assert capa.features.insn.Offset(0x0) in features
-    assert capa.features.insn.Offset(0x4) in features
-    assert capa.features.insn.Offset(0xC) in features
-    # the following are stack references
-    assert capa.features.insn.Offset(0x8) not in features
-    assert capa.features.insn.Offset(0x10) not in features
-
-    # this function has the following negative offsets
-    # movzx   ecx, byte ptr [eax-1]
-    # movzx   eax, byte ptr [eax-2]
-    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x4011FB))
-    assert capa.features.insn.Offset(-0x1) in features
-    assert capa.features.insn.Offset(-0x2) in features
-
-
-def test_offset_arch_features(mimikatz):
-    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x40105D))
-    assert capa.features.insn.Offset(0x0) in features
-    assert capa.features.insn.Offset(0x0, arch=ARCH_X32) in features
-    assert capa.features.insn.Offset(0x0, arch=ARCH_X64) not in features
-
-
-def test_nzxor_features(mimikatz):
-    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x410DFC))
-    assert capa.features.Characteristic("nzxor") in features  # 0x0410F0B
-
-
-def get_bb_insn(f, va):
-    """fetch the BasicBlock and Instruction instances for the given VA in the given function."""
-    for bb in f.basic_blocks:
-        for insn in bb.instructions:
-            if insn.va == va:
-                return (bb, insn)
-    raise KeyError(va)
-
-
-def test_is_security_cookie(mimikatz):
-    # not a security cookie check
-    f = viv_utils.Function(mimikatz.vw, 0x410DFC)
-    for va in [0x0410F0B]:
-        bb, insn = get_bb_insn(f, va)
-        assert capa.features.extractors.viv.insn.is_security_cookie(f, bb, insn) == False
-
-    # security cookie initial set and final check
-    f = viv_utils.Function(mimikatz.vw, 0x46C54A)
-    for va in [0x46C557, 0x46C63A]:
-        bb, insn = get_bb_insn(f, va)
-        assert capa.features.extractors.viv.insn.is_security_cookie(f, bb, insn) == True
-
-
-def test_mnemonic_features(mimikatz):
-    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x40105D))
-    assert capa.features.insn.Mnemonic("push") in features
-    assert capa.features.insn.Mnemonic("movzx") in features
-    assert capa.features.insn.Mnemonic("xor") in features
-
-    assert capa.features.insn.Mnemonic("in") not in features
-    assert capa.features.insn.Mnemonic("out") not in features
-
-
-def test_peb_access_features(sample_a933a1a402775cfa94b6bee0963f4b46):
-    features = extract_function_features(viv_utils.Function(sample_a933a1a402775cfa94b6bee0963f4b46.vw, 0xABA6FEC))
-    assert capa.features.Characteristic("peb access") in features
-
-
-def test_file_section_name_features(mimikatz):
-    features = extract_file_features(mimikatz.vw, mimikatz.path)
-    assert capa.features.file.Section(".rsrc") in features
-    assert capa.features.file.Section(".text") in features
-    assert capa.features.file.Section(".nope") not in features
-
-
-def test_tight_loop_features(mimikatz):
-    f = viv_utils.Function(mimikatz.vw, 0x402EC4)
-    for bb in f.basic_blocks:
-        if bb.va != 0x402F8E:
-            continue
-        features = extract_basic_block_features(f, bb)
-        assert capa.features.Characteristic("tight loop") in features
-        assert capa.features.basicblock.BasicBlock() in features
-
-
-def test_tight_loop_bb_features(mimikatz):
-    f = viv_utils.Function(mimikatz.vw, 0x402EC4)
-    for bb in f.basic_blocks:
-        if bb.va != 0x402F8E:
-            continue
-        features = extract_basic_block_features(f, bb)
-        assert capa.features.Characteristic("tight loop") in features
-        assert capa.features.basicblock.BasicBlock() in features
-
-
-def test_file_export_name_features(kernel32):
-    features = extract_file_features(kernel32.vw, kernel32.path)
-    assert capa.features.file.Export("BaseThreadInitThunk") in features
-    assert capa.features.file.Export("lstrlenW") in features
-
-
-def test_file_import_name_features(mimikatz):
-    features = extract_file_features(mimikatz.vw, mimikatz.path)
-    assert capa.features.file.Import("advapi32.CryptSetHashParam") in features
-    assert capa.features.file.Import("CryptSetHashParam") in features
-    assert capa.features.file.Import("kernel32.IsWow64Process") in features
-    assert capa.features.file.Import("msvcrt.exit") in features
-    assert capa.features.file.Import("cabinet.#11") in features
-    assert capa.features.file.Import("#11") not in features
-
-
-def test_cross_section_flow_features(sample_a198216798ca38f280dc413f8c57f2c2):
-    features = extract_function_features(viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x4014D0))
-    assert capa.features.Characteristic("cross section flow") in features
-
-    # this function has calls to some imports,
-    # which should not trigger cross-section flow characteristic
-    features = extract_function_features(viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x401563))
-    assert capa.features.Characteristic("cross section flow") not in features
-
-
-def test_segment_access_features(sample_a933a1a402775cfa94b6bee0963f4b46):
-    features = extract_function_features(viv_utils.Function(sample_a933a1a402775cfa94b6bee0963f4b46.vw, 0xABA6FEC))
-    assert capa.features.Characteristic("fs access") in features
-
-
-def test_thunk_features(sample_9324d1a8ae37a36ae560c37448c9705a):
-    features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x407970))
-    assert capa.features.insn.API("kernel32.CreateToolhelp32Snapshot") in features
-    assert capa.features.insn.API("CreateToolhelp32Snapshot") in features
-
-
-def test_file_embedded_pe(pma_lab_12_04):
-    features = extract_file_features(pma_lab_12_04.vw, pma_lab_12_04.path)
-    assert capa.features.Characteristic("embedded pe") in features
-
-
-def test_stackstring_features(mimikatz):
-    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x4556E5))
-    assert capa.features.Characteristic("stack string") in features
-
-
-def test_switch_features(mimikatz):
-    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x409411))
-    assert capa.features.Characteristic("switch") in features
-
-    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x409393))
-    assert capa.features.Characteristic("switch") not in features
-
-
-def test_recursive_call_feature(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41):
-    features = extract_function_features(
-        viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10003100)
-    )
-    assert capa.features.Characteristic("recursive call") in features
-
-    features = extract_function_features(
-        viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10007B00)
-    )
-    assert capa.features.Characteristic("recursive call") not in features
-
-
-def test_loop_feature(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41):
-    features = extract_function_features(
-        viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10003D30)
-    )
-    assert capa.features.Characteristic("loop") in features
-
-    features = extract_function_features(
-        viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10007250)
-    )
-    assert capa.features.Characteristic("loop") not in features
-
-
-def test_file_string_features(sample_bfb9b5391a13d0afd787e87ab90f14f5):
-    features = extract_file_features(
-        sample_bfb9b5391a13d0afd787e87ab90f14f5.vw, sample_bfb9b5391a13d0afd787e87ab90f14f5.path,
-    )
-    assert capa.features.String("WarStop") in features  # ASCII, offset 0x40EC
-    assert capa.features.String("cimage/png") in features  # UTF-16 LE, offset 0x350E
-
-
-def test_function_calls_to(sample_9324d1a8ae37a36ae560c37448c9705a):
-    features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60))
-    assert capa.features.Characteristic("calls to") in features
-    assert len(features[capa.features.Characteristic("calls to")]) == 1
-
-
-def test_function_calls_to64(sample_lab21_01):
-    features = extract_function_features(viv_utils.Function(sample_lab21_01.vw, 0x1400052D0))  # memcpy
-    assert capa.features.Characteristic("calls to") in features
-    assert len(features[capa.features.Characteristic("calls to")]) == 8
-
-
-def test_function_calls_from(sample_9324d1a8ae37a36ae560c37448c9705a):
-    features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60))
-    assert capa.features.Characteristic("calls from") in features
-    assert len(features[capa.features.Characteristic("calls from")]) == 23
-
-
-def test_basic_block_count(sample_9324d1a8ae37a36ae560c37448c9705a):
-    features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60))
-    assert len(features[capa.features.basicblock.BasicBlock()]) == 26
-
-
-def test_indirect_call_features(sample_a933a1a402775cfa94b6bee0963f4b46):
-    features = extract_function_features(viv_utils.Function(sample_a933a1a402775cfa94b6bee0963f4b46.vw, 0xABA68A0))
-    assert capa.features.Characteristic("indirect call") in features
-    assert len(features[capa.features.Characteristic("indirect call")]) == 3
-
-
-def test_indirect_calls_resolved(sample_c91887d861d9bd4a5872249b641bc9f9):
-    features = extract_function_features(viv_utils.Function(sample_c91887d861d9bd4a5872249b641bc9f9.vw, 0x401A77))
-    assert capa.features.insn.API("kernel32.CreatePipe") in features
-    assert capa.features.insn.API("kernel32.SetHandleInformation") in features
-    assert capa.features.insn.API("kernel32.CloseHandle") in features
-    assert capa.features.insn.API("kernel32.WriteFile") in features
+@parametrize(
+    "sample,scope,feature,expected", FEATURE_COUNT_TESTS, indirect=["sample", "scope"],
+)
+def test_viv_feature_counts(sample, scope, feature, expected):
+    with xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2"):
+        do_test_feature_count(get_viv_extractor, sample, scope, feature, expected)
Author	SHA1	Message	Date
William Ballenthin	216e288ade	unpack: add can_unpack routine	2020-08-29 21:38:26 -06:00
William Ballenthin	ec55a9c482	unpack: aspack: add doc	2020-08-26 15:15:06 -06:00
William Ballenthin	e5136683e6	unpack: aspack: add documentation	2020-08-26 14:26:36 -06:00
William Ballenthin	63561b73c1	unpack: aspack: split into its own module	2020-08-26 14:07:25 -06:00
William Ballenthin	b700ad1655	unpack: aspack: fixup PE imports after dumping	2020-08-26 13:49:08 -06:00
William Ballenthin	52eef09c8b	unpack: aspack: split dump from fixup	2020-08-25 19:08:00 -06:00
William Ballenthin	de3f223bb5	unpack: aspack: fixup the PE header for OEP	2020-08-25 18:57:11 -06:00
William Ballenthin	6a2e5ad530	unpack: use speakeasy to find aspack OEP	2020-08-25 18:50:15 -06:00
William Ballenthin	5575166b7a	unpack: flesh out tests	2020-08-25 12:53:43 -06:00
William Ballenthin	8a2ba9cd71	Merge branch 'backend-lancelot' into fix-46	2020-08-25 12:43:45 -06:00
William Ballenthin	e523c6cb50	init unpack module	2020-08-25 12:43:40 -06:00
Capa Bot	8be1c84fd2	Sync capa rules submodule	2020-08-25 16:35:30 +00:00
Capa Bot	739100d481	Sync capa-testfiles submodule	2020-08-25 16:34:26 +00:00
Willi Ballenthin	fd7d9aafe9	Merge pull request #247 from Ana06/test-pythons Test all supported Python versions	2020-08-21 07:55:08 -06:00
Ana María Martínez Gómez	a39e3cca79	ci: test all supported Python versions I assume once we migrate to Python3, we want to support Python 3.6-9. Python 3.5 will stop receiving security fixes next month, so I don't think we need to support it. As running the test as many times as we want is free, run them for all supported versions to ensure capa work in all of them.	2020-08-21 15:39:13 +02:00
Ana María Martínez Gómez	ad011b08f6	ci: use matrix in tests workflow to avoid duplication Use a matrix with the Python version to avoid duplication when testing different Python versions.	2020-08-21 15:00:06 +02:00
William Ballenthin	854e3d7774	submodule: rules update	2020-08-20 15:15:14 -06:00
Capa Bot	b4fa6fc954	Sync capa rules submodule	2020-08-20 10:06:04 +00:00
Willi Ballenthin	585a9c167f	Merge pull request #243 from fireeye/fix-241 fix 241: string counting exception	2020-08-18 12:09:52 -06:00
Willi Ballenthin	5f731f72ed	Merge pull request #239 from fireeye/backport-py3-fixes backport py3 testing enhancements	2020-08-18 12:09:22 -06:00
Willi Ballenthin	385c956184	fixtures: fix doc	2020-08-17 20:53:34 -06:00
Willi Ballenthin	d8f2b7b4df	Merge pull request #236 from fireeye/fix-225 fix 225: declarative tests	2020-08-17 10:06:22 -06:00
Willi Ballenthin	b49ed276a9	Merge pull request #238 from Ana06/zip-binaries Fix build workflow & zip binaries	2020-08-17 07:47:08 -06:00
Ana María Martínez Gómez	a2da55fb6f	Add version number to zip in build workflow Relay in `github.ref` (the release tag).	2020-08-17 11:59:04 +02:00
William Ballenthin	1b6ac29053	Merge branch 'master' into backend-lancelot	2020-08-17 00:04:57 -06:00
William Ballenthin	d3dad3a66a	rules: fix bug in string counting closes #241	2020-08-16 21:38:13 -06:00
William Ballenthin	b084f7cb9b	pep8	2020-08-16 05:18:39 -06:00
William Ballenthin	89edaf4c5c	tests: xfail things that won't work on py3	2020-08-16 05:17:17 -06:00
William Ballenthin	6cd2931645	ci: test on both py2 and py3	2020-08-16 05:04:19 -06:00
William Ballenthin	295d3fee5d	tests: limit tests to py2/py3	2020-08-16 05:03:57 -06:00
William Ballenthin	0af6386693	tests: fixtures: add ctxmgr for catching xfail	2020-08-16 05:03:23 -06:00
William Ballenthin	1873d0b7c5	*: py3 compat	2020-08-16 05:03:08 -06:00
William Ballenthin	c032d556fb	tests: freeze: make py3 compatible	2020-08-16 05:02:35 -06:00
William Ballenthin	d7f1c23f4d	tests: show found number of features when unexpected	2020-08-16 05:01:20 -06:00
William Ballenthin	df4c75882d	dos2unix	2020-08-16 04:58:35 -06:00
William Ballenthin	0cfbed05b4	ci: install pefile on py3	2020-08-16 01:41:25 -06:00
William Ballenthin	ca95512811	ci: test on both py2 and py3	2020-08-16 01:36:25 -06:00
William Ballenthin	bb9803fcc0	tests: limit tests to py2/py3	2020-08-16 01:10:10 -06:00
William Ballenthin	1fe945e3ed	tests: main: xfail sc analysis on py3	2020-08-16 01:09:40 -06:00
William Ballenthin	6ba4798822	tests: fixtures: add ctxmgr for catching xfail	2020-08-16 01:09:06 -06:00
William Ballenthin	f424dd126f	*: py3 compat	2020-08-16 00:05:26 -06:00
William Ballenthin	9fa128b27d	tests: freeze: make py3 compatible	2020-08-16 00:05:04 -06:00
William Ballenthin	09bca1e5f7	setup: bump lancelot dep version	2020-08-16 00:04:39 -06:00
William Ballenthin	c623791a84	tests: lancelot: use common harness from fixtures	2020-08-15 23:32:50 -06:00
William Ballenthin	980a34adca	setup: bump lancelot dep version	2020-08-15 23:32:35 -06:00
William Ballenthin	8721eb05eb	tests: show found number of features when unexpected	2020-08-15 23:32:13 -06:00
William Ballenthin	512ea89662	Merge branch 'backend-lancelot' of github.com:fireeye/capa into backend-lancelot	2020-08-15 21:52:53 -06:00
William Ballenthin	15259d455c	Merge branch 'master' into backend-lancelot	2020-08-15 21:52:28 -06:00
Willi Ballenthin	1e097ef759	setup: add pylancelot dep on py3	2020-08-15 11:13:06 -06:00
Ana María Martínez Gómez	f7925c2990	Fix pypinstaller to version 3 in build workflow pyinstaller 4 doesn't support Python 2.7. Without a version, it takes the last version making the workflow fail.	2020-08-15 12:28:51 +02:00
Ana María Martínez Gómez	b94f665d4b	Zip release binaries Update `build` workflow to zip the binaries before upload them. Use linux to zip all the binaries.	2020-08-15 12:28:48 +02:00
Ana María Martínez Gómez	68f27dfea4	Fix indentation of build workflow Correct indentation to make it easier to read.	2020-08-15 09:11:18 +02:00
Ana María Martínez Gómez	35226e1e4e	Use GitHub default repo token in build action As we this workflow modifies only the same repo, no extra token (`CAPA_TOKEN`) is needed and we can use the default `GITHUB TOKEN` instead.	2020-08-15 09:11:16 +02:00
Capa Bot	9c40befdd3	Sync capa-testfiles submodule	2020-08-14 19:35:00 +00:00
William Ballenthin	c1b7176e36	submodule: testfiles update	2020-08-14 13:34:43 -06:00
William Ballenthin	259a0a2007	tests: ida: remove old print	2020-08-14 13:15:22 -06:00
William Ballenthin	eee565b596	tests: ida: tweak tests to fit IDA behavior	2020-08-14 13:10:38 -06:00
William Ballenthin	26061c25a5	tests: fixtures: add mapping from test data to md5	2020-08-14 12:58:08 -06:00
William Ballenthin	897da4237d	tests: fixtures: remove lru_cache on some accessors	2020-08-14 12:48:19 -06:00
William Ballenthin	1923d479d8	tests: fixtures: fix name error	2020-08-14 12:35:30 -06:00
William Ballenthin	6b8bce4f42	tests: fixtures: factor out resolution of scope/sample	2020-08-14 12:34:00 -06:00
William Ballenthin	107a68628b	tests: ida: attempt to use new framework (wip)	2020-08-14 12:22:59 -06:00
William Ballenthin	26c9811ba1	tests: viv: fix typo preventing some tests from running	2020-08-14 12:22:39 -06:00
William Ballenthin	b784f086b4	tests: make fixtures more consistent in prep for other backends	2020-08-14 12:04:53 -06:00
William Ballenthin	d161c094a6	setup: add backports.lru_cache for py2.7	2020-08-14 11:28:44 -06:00
William Ballenthin	8cbe3f8546	tests: move expected features into fixtures for reuse closes #225	2020-08-14 11:25:00 -06:00
William Ballenthin	0e049ef56d	viv: insn: fix gs extraction	2020-08-14 11:18:19 -06:00
William Ballenthin	f193ceb91a	merge	2020-08-14 10:26:52 -06:00
Willi Ballenthin	ac7f079af8	Merge pull request #235 from fireeye/progressbar-tweaks main: progress bar updates (+rules, and realize iterators)	2020-08-14 10:23:43 -06:00
William Ballenthin	5f47280e0d	main: disable spinner when in quiet mode	2020-08-14 10:19:39 -06:00
Capa Bot	b7d39cf4c9	Sync capa rules submodule	2020-08-14 16:02:13 +00:00
William Ballenthin	de2c3c9800	main: display spinner while generating viv workspace	2020-08-14 09:38:08 -06:00
William Ballenthin	6e525a93d7	viv: insn: derefs: fix exception	2020-08-14 09:37:51 -06:00
William Ballenthin	90cdef5232	main: progress bar updates (+rules, and realize iterators)	2020-08-13 17:25:07 -06:00
William Ballenthin	fb19841997	Merge branch 'master' into backend-lancelot	2020-08-13 17:24:28 -06:00
William Ballenthin	40d16c925f	main: progress bar updates (+rules, and realize iterators)	2020-08-13 17:23:36 -06:00
William Ballenthin	d5f73b47a4	main: use lancelot on py3	2020-08-13 13:35:09 -06:00
William Ballenthin	caf738ee4e	lancelot: implement indirect call resolution	2020-08-13 13:22:50 -06:00
William Ballenthin	c6f27200fe	lancelot: pull get_operand_target into helpers	2020-08-13 13:22:29 -06:00
Capa Bot	e3e13cdb11	Sync capa rules submodule	2020-08-13 18:51:28 +00:00
William Ballenthin	5c967cd6ef	lancelot: insn: calls to/from	2020-08-13 12:39:32 -06:00
Willi Ballenthin	db3369fd09	Merge pull request #232 from Ana06/remove-switch extractor: remove characteristic(switch)	2020-08-13 10:07:07 -06:00
Capa Bot	35086d4a69	Sync capa rules submodule	2020-08-13 16:06:21 +00:00
Ana María Martínez Gómez	adaac03d1d	extractor: remove characteristic(switch) Get rid of the `characteristic(switch)` feature as any of our rules use it and its analysis is not very easy. Analysis results most likely differ across backends, leading to inconsistency.	2020-08-13 16:47:01 +02:00
Capa Bot	199cccaef9	Sync capa rules submodule	2020-08-12 23:27:17 +00:00
Capa Bot	e64277ed41	Sync capa-testfiles submodule	2020-08-12 23:26:45 +00:00
Willi Ballenthin	744b4915c9	Merge pull request #226 from fireeye/enhancement-223 IDA: resolve nested data references to strings/bytes	2020-08-12 09:05:11 -06:00
Capa Bot	5d9ccf1f76	Sync capa rules submodule	2020-08-11 21:04:09 +00:00
Capa Bot	15607d63ab	Sync capa-testfiles submodule	2020-08-11 21:03:00 +00:00
Willi Ballenthin	362db6898a	Merge pull request #230 from fireeye/enhancement-immediate-memory-reference-as-number adding support to emit number features for unmapped immediate memory references	2020-08-11 14:59:26 -06:00
Michael Hunhoff	70b4546c33	adding test for unmapped immediate data reference	2020-08-11 14:13:43 -06:00
Michael Hunhoff	791afd7ac8	adding code to emit number feature for unmapped immediate data reference	2020-08-11 14:12:41 -06:00
Capa Bot	6f352283e6	Sync capa-testfiles submodule	2020-08-11 19:36:17 +00:00
Capa Bot	db85fbab4f	Sync capa rules submodule	2020-08-11 14:54:42 +00:00
mike-hunhoff	20cc23adc5	Merge pull request #228 from fireeye/bugfix-explorer-display-arch-decorator explorer: adding support to display arch decorator on numbers/offsets	2020-08-11 07:50:08 -07:00
Michael Hunhoff	828819e13f	switching to iterative solution for data reference search	2020-08-11 08:45:20 -06:00
Michael Hunhoff	79d94144c6	adding IDA extractor code to resolve nested data references for string and bytes features	2020-08-11 08:44:44 -06:00
Michael Hunhoff	c46a1d2b44	black format changes	2020-08-11 08:26:48 -06:00
Capa Bot	7a18fbf9d4	Sync capa rules submodule	2020-08-11 07:19:00 +00:00
Capa Bot	7d62156a29	Sync capa-testfiles submodule	2020-08-11 07:12:56 +00:00
William Ballenthin	a59e1054fe	tests: lancelot: feature counts	2020-08-10 19:04:29 -06:00
William Ballenthin	db45a06ba7	lancelot: insn: simplify operand target fetching	2020-08-10 19:04:09 -06:00
William Ballenthin	c739caee40	tests: lancelot: add tests for counts of features	2020-08-10 18:53:40 -06:00
William Ballenthin	bdf6b69be6	lancelot: insn: indirect call	2020-08-10 18:45:58 -06:00
William Ballenthin	fdd6f7434b	lancelot: insn: xsection flow and recursive calls	2020-08-10 18:40:45 -06:00
Michael Hunhoff	def8130a24	adding support to display arch decorator on numbers/offsets	2020-08-10 18:27:37 -06:00
William Ballenthin	21adb2b9d1	tests: lancelot: formatting	2020-08-10 18:16:14 -06:00
William Ballenthin	5929c0652c	lancelot: insn: fs/gs	2020-08-10 18:15:10 -06:00
William Ballenthin	e7bf5bfceb	lancelot: insn: nzxor	2020-08-10 18:05:26 -06:00
William Ballenthin	c2f55fad12	tests: lancelot: construct assert message	2020-08-10 18:05:08 -06:00
William Ballenthin	7ac4cf47f7	lancelot: insn: pass f, bb, insn throughout	2020-08-10 18:04:37 -06:00
William Ballenthin	3f49a224f5	lancelot: off-by-one instruction enumerator	2020-08-10 18:03:40 -06:00
William Ballenthin	695f1bf55a	lancelot: insn: strings	2020-08-10 17:23:19 -06:00
William Ballenthin	10f5a54e1d	lancelot: insn: bytes	2020-08-10 17:08:28 -06:00
William Ballenthin	042654ee97	lancelot: insn: mnemonic	2020-08-10 13:50:46 -06:00
William Ballenthin	1da450001c	lancelot: insn: offset	2020-08-10 13:47:43 -06:00
William Ballenthin	7996e2efe7	tests: lancelot: remove old tests	2020-08-10 11:51:48 -06:00
William Ballenthin	5eded3c5cc	lancelot: insn: implement API features	2020-08-10 11:49:37 -06:00
William Ballenthin	cdae840519	lancelot: file: fix import address	2020-08-10 11:49:11 -06:00
William Ballenthin	fcb8c4a293	tests: lancelot: override parametrize for better naming	2020-08-09 15:46:34 -06:00
William Ballenthin	4e6b475ff6	tests: lancelot: add number tests	2020-08-08 13:55:52 -06:00
William Ballenthin	02a8ad1ea4	tests: add more lancelot feature tests	2020-08-08 13:52:22 -06:00
William Ballenthin	393b332f9c	feature: insn: better render negative offset	2020-08-08 13:52:01 -06:00
William Ballenthin	bf4a8dcd3e	setup: add dep on backports.lru	2020-08-08 13:51:50 -06:00
William Ballenthin	9bde11fa6f	extractor: lancelot: fix stackstring	2020-08-08 13:51:34 -06:00
William Ballenthin	43c6eec30b	extractors: begin to implement lancelot backend	2020-08-08 12:48:56 -06:00
Capa Bot	f7cd52826e	Sync capa rules submodule	2020-08-05 18:51:51 +00:00
Capa Bot	23d31c3c2c	Sync capa-testfiles submodule	2020-08-05 18:50:52 +00:00
Willi Ballenthin	732b47e845	changelog: fix @mike-hunhoff handle	2020-08-05 08:20:34 -06:00