Merge pull request #515 from Ana06/v1-6-1

changelog: add master (unreleased) to CHANGELOG
Add placeholder for master (unreleased changes) in CHANGELOG. Document this in the release checklist.
2025-12-14 16:40:44 -08:00 · 2021-04-07 18:03:56 +02:00 · 2021-04-07 17:50:19 +02:00 · 2021-04-07 17:50:16 +02:00 · 2021-04-07 13:53:32 +00:00 · 2021-04-07 11:11:43 +02:00
67 changed files with 5329 additions and 2226 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,9 @@
+# Set the default behavior, in case people don't have core.autocrlf set.
+* text=auto
+
+# Explicitly declare text files you want to always be normalized and converted
+# to native line endings on checkout.
+*.py text
+*.yml text
+*.md text
+*.txt text
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -5,7 +5,7 @@ about: Create a report to help us improve
 ---
 <!--
 # Is your bug report related to capa rules (for example a false positive)?
-We use sybmodules to separate code, rules and test data. If your issue is related to capa rules, please report it at https://github.com/fireeye/capa-rules/issues.
+We use submodules to separate code, rules and test data. If your issue is related to capa rules, please report it at https://github.com/fireeye/capa-rules/issues.

 # Have you checked that your issue isn't already filed?
 Please search if there is a similar issue at https://github.com/fireeye/capa/issues. If there is already a similar issue, please add more details there instead of opening a new one.
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -5,7 +5,7 @@ about: Suggest an idea for capa
 ---
 <!--
 # Is your issue related to capa rules (for example an idea for a new rule)?
-We use sybmodules to separate code, rules and test data. If your issue is related to capa rules, please report it at https://github.com/fireeye/capa-rules/issues.
+We use submodules to separate code, rules and test data. If your issue is related to capa rules, please report it at https://github.com/fireeye/capa-rules/issues.

 # Have you checked that your issue isn't already filed?
 Please search if there is a similar issue at https://github.com/fireeye/capa/issues. If there is already a similar issue, please add more details there instead of opening a new one.
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -0,0 +1,6 @@
+version: 2
+updates:
+  - package-ecosystem: "pip"
+    directory: "/"
+    schedule:
+      interval: "weekly"
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -0,0 +1,32 @@
+<!--
+Thank you for contributing to capa! :heart:
+
+IMPORTANT NOTE
+It's most important that you submit your improvements. So even if you don't use this complete template we look forward to collaborating!
+
+Please read capa's CONTRIBUTING guide if you haven't done so already.
+It contains helpful information about how to contribute to capa. Check https://github.com/fireeye/capa/blob/master/.github/CONTRIBUTING.md
+
+PR template based on https://embeddedartistry.com/blog/2017/08/04/a-github-pull-request-template-for-your-projects/
+-->
+
+### Description
+
+<!-- Please describe the changes in this PR. Including your motivation and context helps us to review. -->
+
+closes # (issue)
+
+### Type of change
+
+Please update the [CHANGELOG.md](/CHANGELOG.md)
+
+- [ ] Bug fix (non-breaking change which fixes an issue)
+- [ ] New feature (non-breaking change which adds functionality)
+- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
+- [ ] This change requires a documentation update
+  - [ ] I have made the corresponding changes to the documentation
+
+### Tests
+
+- [ ] I have added tests that prove my fix is effective or that my feature works
+- [ ] No new tests needed
--- a/.github/pyinstaller/hooks/hook-smda.py
+++ b/.github/pyinstaller/hooks/hook-smda.py
@@ -0,0 +1,5 @@
+# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
+import PyInstaller.utils.hooks
+
+# ref: https://groups.google.com/g/pyinstaller/c/amWi0-66uZI/m/miPoKfWjBAAJ
+binaries = PyInstaller.utils.hooks.collect_dynamic_libs("capstone")
--- a/.github/pyinstaller/hooks/hook-vivisect.py
+++ b/.github/pyinstaller/hooks/hook-vivisect.py
@@ -13,3 +13,144 @@ from PyInstaller.utils.hooks import copy_metadata
 #
 # ref: https://github.com/pyinstaller/pyinstaller/issues/1713#issuecomment-162682084
 datas = copy_metadata("vivisect")
+
+excludedimports = [
+    # viv gui requires these heavy libraries,
+    # but viv as a library doesn't.
+    # they shouldn't be installed in our configuration,
+    # but we'll ensure they don't slip in here (such as on developers' systems).
+    "PyQt5",
+    "qt5",
+    "pyqtwebengine",
+    # the above are imported by these viv modules.
+    # so really, we'd want to exclude these submodules of viv.
+    # but i dont think this works.
+    "vqt",
+    "vdb.qt",
+    "envi.qt",
+    # unused by capa
+    "pyasn1",
+]
+
+hiddenimports = [
+    # vivisect does manual/runtime importing of its modules,
+    # so declare the things that could be imported here.
+    "vivisect",
+    "vivisect.analysis",
+    "vivisect.analysis.amd64",
+    "vivisect.analysis.amd64",
+    "vivisect.analysis.amd64.emulation",
+    "vivisect.analysis.amd64.golang",
+    "vivisect.analysis.crypto",
+    "vivisect.analysis.crypto",
+    "vivisect.analysis.crypto.constants",
+    "vivisect.analysis.elf",
+    "vivisect.analysis.elf",
+    "vivisect.analysis.elf.elfplt",
+    "vivisect.analysis.elf.libc_start_main",
+    "vivisect.analysis.generic",
+    "vivisect.analysis.generic",
+    "vivisect.analysis.generic.codeblocks",
+    "vivisect.analysis.generic.emucode",
+    "vivisect.analysis.generic.entrypoints",
+    "vivisect.analysis.generic.funcentries",
+    "vivisect.analysis.generic.impapi",
+    "vivisect.analysis.generic.mkpointers",
+    "vivisect.analysis.generic.pointers",
+    "vivisect.analysis.generic.pointertables",
+    "vivisect.analysis.generic.relocations",
+    "vivisect.analysis.generic.strconst",
+    "vivisect.analysis.generic.switchcase",
+    "vivisect.analysis.generic.thunks",
+    "vivisect.analysis.generic.noret",
+    "vivisect.analysis.i386",
+    "vivisect.analysis.i386",
+    "vivisect.analysis.i386.calling",
+    "vivisect.analysis.i386.golang",
+    "vivisect.analysis.i386.importcalls",
+    "vivisect.analysis.i386.instrhook",
+    "vivisect.analysis.i386.thunk_bx",
+    "vivisect.analysis.ms",
+    "vivisect.analysis.ms",
+    "vivisect.analysis.ms.hotpatch",
+    "vivisect.analysis.ms.localhints",
+    "vivisect.analysis.ms.msvc",
+    "vivisect.analysis.ms.msvcfunc",
+    "vivisect.analysis.ms.vftables",
+    "vivisect.analysis.pe",
+    "vivisect.impapi.posix.amd64",
+    "vivisect.impapi.posix.i386",
+    "vivisect.impapi.windows",
+    "vivisect.impapi.windows.amd64",
+    "vivisect.impapi.windows.i386",
+    "vivisect.impapi.winkern.i386",
+    "vivisect.impapi.winkern.amd64",
+    "vivisect.parsers.blob",
+    "vivisect.parsers.elf",
+    "vivisect.parsers.ihex",
+    "vivisect.parsers.macho",
+    "vivisect.parsers.pe",
+    "vivisect.storage",
+    "vivisect.storage.basicfile",
+    "vstruct.constants",
+    "vstruct.constants.ntstatus",
+    "vstruct.defs",
+    "vstruct.defs.arm7",
+    "vstruct.defs.bmp",
+    "vstruct.defs.dns",
+    "vstruct.defs.elf",
+    "vstruct.defs.gif",
+    "vstruct.defs.ihex",
+    "vstruct.defs.inet",
+    "vstruct.defs.java",
+    "vstruct.defs.kdcom",
+    "vstruct.defs.macho",
+    "vstruct.defs.macho.const",
+    "vstruct.defs.macho.fat",
+    "vstruct.defs.macho.loader",
+    "vstruct.defs.macho.stabs",
+    "vstruct.defs.minidump",
+    "vstruct.defs.pcap",
+    "vstruct.defs.pe",
+    "vstruct.defs.pptp",
+    "vstruct.defs.rar",
+    "vstruct.defs.swf",
+    "vstruct.defs.win32",
+    "vstruct.defs.windows",
+    "vstruct.defs.windows.win_5_1_i386",
+    "vstruct.defs.windows.win_5_1_i386.ntdll",
+    "vstruct.defs.windows.win_5_1_i386.ntoskrnl",
+    "vstruct.defs.windows.win_5_1_i386.win32k",
+    "vstruct.defs.windows.win_5_2_i386",
+    "vstruct.defs.windows.win_5_2_i386.ntdll",
+    "vstruct.defs.windows.win_5_2_i386.ntoskrnl",
+    "vstruct.defs.windows.win_5_2_i386.win32k",
+    "vstruct.defs.windows.win_6_1_amd64",
+    "vstruct.defs.windows.win_6_1_amd64.ntdll",
+    "vstruct.defs.windows.win_6_1_amd64.ntoskrnl",
+    "vstruct.defs.windows.win_6_1_amd64.win32k",
+    "vstruct.defs.windows.win_6_1_i386",
+    "vstruct.defs.windows.win_6_1_i386.ntdll",
+    "vstruct.defs.windows.win_6_1_i386.ntoskrnl",
+    "vstruct.defs.windows.win_6_1_i386.win32k",
+    "vstruct.defs.windows.win_6_1_wow64",
+    "vstruct.defs.windows.win_6_1_wow64.ntdll",
+    "vstruct.defs.windows.win_6_2_amd64",
+    "vstruct.defs.windows.win_6_2_amd64.ntdll",
+    "vstruct.defs.windows.win_6_2_amd64.ntoskrnl",
+    "vstruct.defs.windows.win_6_2_amd64.win32k",
+    "vstruct.defs.windows.win_6_2_i386",
+    "vstruct.defs.windows.win_6_2_i386.ntdll",
+    "vstruct.defs.windows.win_6_2_i386.ntoskrnl",
+    "vstruct.defs.windows.win_6_2_i386.win32k",
+    "vstruct.defs.windows.win_6_2_wow64",
+    "vstruct.defs.windows.win_6_2_wow64.ntdll",
+    "vstruct.defs.windows.win_6_3_amd64",
+    "vstruct.defs.windows.win_6_3_amd64.ntdll",
+    "vstruct.defs.windows.win_6_3_amd64.ntoskrnl",
+    "vstruct.defs.windows.win_6_3_i386",
+    "vstruct.defs.windows.win_6_3_i386.ntdll",
+    "vstruct.defs.windows.win_6_3_i386.ntoskrnl",
+    "vstruct.defs.windows.win_6_3_wow64",
+    "vstruct.defs.windows.win_6_3_wow64.ntdll",
+]
--- a/.github/pyinstaller/pyinstaller.spec
+++ b/.github/pyinstaller/pyinstaller.spec
@@ -16,9 +16,10 @@ with open('./capa/version.py', 'wb') as f:
    #                 - commits since
    #                   g------- git hash fragment
    version = (subprocess.check_output(["git", "describe", "--always", "--tags", "--long"])
+               .decode("utf-8")
               .strip()
               .replace("tags/", ""))
-    f.write("__version__ = '%s'" % version)
+    f.write(("__version__ = '%s'" % version).encode("utf-8"))

 a = Analysis(
    # when invoking pyinstaller from the project root,
@@ -41,128 +42,6 @@ a = Analysis(
        # ref: https://stackoverflow.com/a/62278462/87207
        (os.path.dirname(wcwidth.__file__), 'wcwidth')
    ],
-    hiddenimports=[
-        # vivisect does manual/runtime importing of its modules,
-        # so declare the things that could be imported here.
-        "vivisect",
-        "vivisect.analysis",
-        "vivisect.analysis.amd64",
-        "vivisect.analysis.amd64",
-        "vivisect.analysis.amd64.emulation",
-        "vivisect.analysis.amd64.golang",
-        "vivisect.analysis.crypto",
-        "vivisect.analysis.crypto",
-        "vivisect.analysis.crypto.constants",
-        "vivisect.analysis.elf",
-        "vivisect.analysis.elf",
-        "vivisect.analysis.elf.elfplt",
-        "vivisect.analysis.elf.libc_start_main",
-        "vivisect.analysis.generic",
-        "vivisect.analysis.generic",
-        "vivisect.analysis.generic.codeblocks",
-        "vivisect.analysis.generic.emucode",
-        "vivisect.analysis.generic.entrypoints",
-        "vivisect.analysis.generic.funcentries",
-        "vivisect.analysis.generic.impapi",
-        "vivisect.analysis.generic.mkpointers",
-        "vivisect.analysis.generic.pointers",
-        "vivisect.analysis.generic.pointertables",
-        "vivisect.analysis.generic.relocations",
-        "vivisect.analysis.generic.strconst",
-        "vivisect.analysis.generic.switchcase",
-        "vivisect.analysis.generic.thunks",
-        "vivisect.analysis.i386",
-        "vivisect.analysis.i386",
-        "vivisect.analysis.i386.calling",
-        "vivisect.analysis.i386.golang",
-        "vivisect.analysis.i386.importcalls",
-        "vivisect.analysis.i386.instrhook",
-        "vivisect.analysis.i386.thunk_bx",
-        "vivisect.analysis.ms",
-        "vivisect.analysis.ms",
-        "vivisect.analysis.ms.hotpatch",
-        "vivisect.analysis.ms.localhints",
-        "vivisect.analysis.ms.msvc",
-        "vivisect.analysis.ms.msvcfunc",
-        "vivisect.analysis.ms.vftables",
-        "vivisect.analysis.pe",
-        "vivisect.impapi.posix.amd64",
-        "vivisect.impapi.posix.i386",
-        "vivisect.impapi.windows",
-        "vivisect.impapi.windows.amd64",
-        "vivisect.impapi.windows.i386",
-        "vivisect.impapi.winkern.i386",
-        "vivisect.impapi.winkern.amd64",
-        "vivisect.parsers.blob",
-        "vivisect.parsers.elf",
-        "vivisect.parsers.ihex",
-        "vivisect.parsers.macho",
-        "vivisect.parsers.pe",
-        "vivisect.parsers.utils",
-        "vivisect.storage",
-        "vivisect.storage.basicfile",
-        "vstruct.constants",
-        "vstruct.constants.ntstatus",
-        "vstruct.defs",
-        "vstruct.defs.arm7",
-        "vstruct.defs.bmp",
-        "vstruct.defs.dns",
-        "vstruct.defs.elf",
-        "vstruct.defs.gif",
-        "vstruct.defs.ihex",
-        "vstruct.defs.inet",
-        "vstruct.defs.java",
-        "vstruct.defs.kdcom",
-        "vstruct.defs.macho",
-        "vstruct.defs.macho.const",
-        "vstruct.defs.macho.fat",
-        "vstruct.defs.macho.loader",
-        "vstruct.defs.macho.stabs",
-        "vstruct.defs.minidump",
-        "vstruct.defs.pcap",
-        "vstruct.defs.pe",
-        "vstruct.defs.pptp",
-        "vstruct.defs.rar",
-        "vstruct.defs.swf",
-        "vstruct.defs.win32",
-        "vstruct.defs.windows",
-        "vstruct.defs.windows.win_5_1_i386",
-        "vstruct.defs.windows.win_5_1_i386.ntdll",
-        "vstruct.defs.windows.win_5_1_i386.ntoskrnl",
-        "vstruct.defs.windows.win_5_1_i386.win32k",
-        "vstruct.defs.windows.win_5_2_i386",
-        "vstruct.defs.windows.win_5_2_i386.ntdll",
-        "vstruct.defs.windows.win_5_2_i386.ntoskrnl",
-        "vstruct.defs.windows.win_5_2_i386.win32k",
-        "vstruct.defs.windows.win_6_1_amd64",
-        "vstruct.defs.windows.win_6_1_amd64.ntdll",
-        "vstruct.defs.windows.win_6_1_amd64.ntoskrnl",
-        "vstruct.defs.windows.win_6_1_amd64.win32k",
-        "vstruct.defs.windows.win_6_1_i386",
-        "vstruct.defs.windows.win_6_1_i386.ntdll",
-        "vstruct.defs.windows.win_6_1_i386.ntoskrnl",
-        "vstruct.defs.windows.win_6_1_i386.win32k",
-        "vstruct.defs.windows.win_6_1_wow64",
-        "vstruct.defs.windows.win_6_1_wow64.ntdll",
-        "vstruct.defs.windows.win_6_2_amd64",
-        "vstruct.defs.windows.win_6_2_amd64.ntdll",
-        "vstruct.defs.windows.win_6_2_amd64.ntoskrnl",
-        "vstruct.defs.windows.win_6_2_amd64.win32k",
-        "vstruct.defs.windows.win_6_2_i386",
-        "vstruct.defs.windows.win_6_2_i386.ntdll",
-        "vstruct.defs.windows.win_6_2_i386.ntoskrnl",
-        "vstruct.defs.windows.win_6_2_i386.win32k",
-        "vstruct.defs.windows.win_6_2_wow64",
-        "vstruct.defs.windows.win_6_2_wow64.ntdll",
-        "vstruct.defs.windows.win_6_3_amd64",
-        "vstruct.defs.windows.win_6_3_amd64.ntdll",
-        "vstruct.defs.windows.win_6_3_amd64.ntoskrnl",
-        "vstruct.defs.windows.win_6_3_i386",
-        "vstruct.defs.windows.win_6_3_i386.ntdll",
-        "vstruct.defs.windows.win_6_3_i386.ntoskrnl",
-        "vstruct.defs.windows.win_6_3_wow64",
-        "vstruct.defs.windows.win_6_3_wow64.ntdll",
-    ],
    # when invoking pyinstaller from the project root,
    # this gets run from the project root.
    hookspath=['.github/pyinstaller/hooks'],
@@ -180,6 +59,25 @@ a = Analysis(
        # since we don't spawn a notebook, we can safely remove these.
        "IPython",
        "ipywidgets",
+
+        # these are pulled in by networkx
+        # but we don't need to compute the strongly connected components.
+        "numpy",
+        "scipy",
+        "matplotlib",
+        "pandas",
+        "pytest",
+
+        # deps from viv that we don't use.
+        # this duplicates the entries in `hook-vivisect`,
+        # but works better this way.
+        "vqt",
+        "vdb.qt",
+        "envi.qt",
+        "PyQt5",
+        "qt5",
+        "pyqtwebengine",
+        "pyasn1"
    ])

 a.binaries = a.binaries - TOC([
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -2,7 +2,7 @@ name: build

 on:
  release:
-    types: [created, edited, published]
+    types: [edited, published]

 jobs:
  build:
@@ -15,10 +15,10 @@ jobs:
            # use old linux so that the shared library versioning is more portable
            artifact_name: capa
            asset_name: linux
-          - os: windows-latest
+          - os: windows-2019
            artifact_name: capa.exe
            asset_name: windows
-          - os: macos-latest
+          - os: macos-10.15
            artifact_name: capa
            asset_name: macos
    steps:
@@ -26,19 +26,14 @@ jobs:
        uses: actions/checkout@v2
        with:
          submodules: true
-      - name: Set up Python 2.7
+      - name: Set up Python 3.9
        uses: actions/setup-python@v2
        with:
-          python-version: 2.7
-      - if: matrix.os == 'ubuntu-latest'
+          python-version: 3.9
+      - if: matrix.os == 'ubuntu-16.04'
        run: sudo apt-get install -y libyaml-dev
-      - if: matrix.os == 'windows-latest'
-        run: |
-          choco install vcredist2008
-          choco install --ignore-dependencies vcpython27
      - name: Install PyInstaller
-        # pyinstaller 4 doesn't support Python 2.7
-        run: pip install 'pyinstaller==3.*'
+        run: pip install 'pyinstaller==4.2'
      - name: Install capa
        run: pip install -e .
      - name: Build standalone executable
@@ -52,7 +47,7 @@ jobs:

  zip:
    name: zip ${{ matrix.asset_name }}
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    needs: build
    strategy:
      matrix:
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -9,7 +9,7 @@ on:

 jobs:
  deploy:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    steps:
      - uses: actions/checkout@v2
      - name: Set up Python
--- a/.github/workflows/tag.yml
+++ b/.github/workflows/tag.yml
@@ -0,0 +1,24 @@
+name: tag
+
+on:
+  release:
+    types: [published]
+
+jobs:
+  tag:
+    name: Tag capa rules
+    runs-on: ubuntu-20.04
+    steps:
+    - name: Checkout capa-rules
+      uses: actions/checkout@v2
+      with:
+        repository: fireeye/capa-rules
+        token: ${{ secrets.CAPA_TOKEN }}
+    - name: Tag capa-rules
+      run: git tag ${{ github.event.release.tag_name }}
+    - name: Push tag to capa-rules
+      uses: ad-m/github-push-action@master
+      with:
+        repository: fireeye/capa-rules
+        github_token: ${{ secrets.CAPA_TOKEN }}
+        tags: true
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -8,7 +8,7 @@ on:

 jobs:
  code_style:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    steps:
    - name: Checkout capa
      uses: actions/checkout@v2
@@ -24,7 +24,7 @@ jobs:
      run: black -l 120 --check .

  rule_linter:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    steps:
    - name: Checkout capa with rules submodule
      uses: actions/checkout@v2
@@ -41,30 +41,39 @@ jobs:
      run: python scripts/lint.py rules/

  tests:
-    name: Tests in ${{ matrix.python }}
-    runs-on: ubuntu-latest
+    name: Tests in ${{ matrix.python-version }} on ${{ matrix.os }}
+    runs-on: ${{ matrix.os }}
    needs: [code_style, rule_linter]
    strategy:
+      fail-fast: false
      matrix:
+        os: [ubuntu-20.04, windows-2019, macos-10.15]
+        # across all operating systems
+        python-version: [3.6, 3.9]
        include:
-          - python: 2.7
-          - python: 3.6
-          - python: 3.7
-          - python: 3.8
-          - python: '3.9.0-rc.1' # Python latest
+          # on Ubuntu run these as well
+          - os: ubuntu-20.04
+            python-version: 2.7
+          - os: ubuntu-20.04
+            python-version: 3.7
+          - os: ubuntu-20.04
+            python-version: 3.8
    steps:
    - name: Checkout capa with submodules
      uses: actions/checkout@v2
      with:
        submodules: true
-    - name: Set up Python ${{ matrix.python }}
+    - name: Set up Python ${{ matrix.python-version }}
      uses: actions/setup-python@v2
      with:
-        python-version: ${{ matrix.python }}
+        python-version: ${{ matrix.python-version }}
    - name: Install pyyaml
+      if: matrix.os == 'ubuntu-20.04'
      run: sudo apt-get install -y libyaml-dev
+    - name: Install Microsoft Visual C++ 9.0
+      if: matrix.os == 'windows-2019' && matrix.python-version == '2.7'
+      run: choco install vcpython27
    - name: Install capa
      run: pip install -e .[dev]
    - name: Run tests
      run: pytest tests/
-
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,275 @@
 # Change Log

+## master (unreleased)
+
+### New Features
+
+### New Rules
+
+### Bug Fixes
+
+### Changes
+
+### Development
+
+### Raw diffs
+- [capa v1.6.1...master](https://github.com/fireeye/capa/compare/v1.6.1...master)
+- [capa-rules v1.6.1...master](https://github.com/fireeye/capa-rules/compare/v1.6.1...master)
+
+
+## v1.6.1 (2021-04-07)
+
+This release includes several bug fixes, such as a vivisect issue that prevented capa from working on Windows with Python 3. It also adds 17 new rules and a bunch of improvements in the rules and IDA rule generator. We appreciate everyone who opened issues, provided feedback, and contributed code and rules.
+
+### Upcoming changes
+
+**This is the very last capa release that supports Python 2.** The next release will be v2.0 and will have breaking changes, including the removal of Python 2 support.
+
+### New features
+
+- explorer: add support for multi-line tab and SHIFT + Tab #474 @mike-hunhoff
+
+![multi-line tab in rule generator](doc/img/changelog/tab.gif)
+
+### New Rules (17)
+
+- encrypt data using RC4 with custom key via WinAPI @MalwareMechanic
+- encrypt data using Curve25519 @dandonov
+- packaged as an IExpress self-extracting archive @recvfrom
+- create registry key via offline registry library @johnk3r
+- open registry key via offline registry library @johnk3r
+- query registry key via offline registry library @johnk3r
+- set registry key via offline registry library @johnk3r
+- delete registry key via offline registry library @johnk3r
+- enumerate PE sections @Ana06
+- inject DLL reflectively @Ana06
+- inspect section memory permissions @Ana06
+- parse PE exports @Ana06
+- rebuild import table @Ana06
+- compare security identifiers @mike-hunhoff
+- get user security identifier @mike-hunhoff
+- listen for remote procedure calls @mike-hunhoff
+- query remote server for available data @mike-hunhoff
+
+### Bug Fixes
+
+- vivisect: update to v1.0.1 which includes bug fix for #459 (capa failed in Windows with Python 3 and vivisect) #512 @williballenthin
+- explorer: fix initialize rules directory #464 @mike-hunhoff
+- explorer: support subscope rules #493 @mike-hunhoff
+- explorer: add checks to validate matched data when searching #500 @mike-hunhoff
+- features, explorer: add support for string features with special characters e.g. '\n' #468 @mike-hunhoff
+
+### Changes
+
+- vivisect: raises `IncompatibleVivVersion` instead of `UnicodeDecodeError` when using incompatible Python 2 `.viv` files with Python3 #479 @Ana06
+- explorer: improve settings modification #465 @mike-hunhoff
+- rules: improvements @mr-tz, @re-fox, @mike-hunhoff
+- rules, lint: enforce string with double quotes formatting in rules #468 @mike-hunhoff
+- lint: ensure LF end of line #485 #486 @mr-tz
+- setup: pin dependencies #513 #504 @Ana06 @mr-tz
+
+### Development
+
+- ci: test on Windows, Ubuntu, macOS across Python versions #470 @mr-tz @Ana06
+- ci: pin OS versions #491 @williballenthin
+- ci: tag capa-rules on release #476 @Ana06
+- doc: document release process #476 @Ana06
+- doc: Improve README badges #477 #478 @ana06 @mr-tz
+- doc: update capa explorer documentation #503 @mike-hunhoff
+- doc: add PR template #495 @mr-tz
+- changelog: document incompatibility of viv files #475 @Ana06
+- rule loading: ignore files starting with .git #492 @mr-tz
+
+### Raw diffs
+
+  - [capa v1.6.0...v1.6.1](https://github.com/fireeye/capa/compare/v1.6.0...v1.6.1)
+  - [capa-rules v1.6.0...v1.6.1](https://github.com/fireeye/capa-rules/compare/v1.6.0...v1.6.1)
+
+
+## v1.6.0 (2021-03-09)
+
+This release adds the capa explorer rule generator plugin for IDA Pro, vivisect support for Python 3 and 12 new rules. We appreciate everyone who opened issues, provided feedback, and contributed code and rules. Thank you also to the vivisect development team (@rakuy0, @atlas0fd00m) for the Python 3 support (`vivisect==1.0.0`) and the fixes for Python 2 (`vivisect==0.2.1`).
+
+### Rule Generator IDA Plugin
+
+The capa explorer IDA plugin now helps you quickly build new capa rules using features extracted directly from your IDA database. Without leaving the plugin interface you can use the features extracted by capa explorer to develop and test new rules and save your work directly to your capa rules directory. To get started select the new `Rule Generator` tab, navigate to a function in the IDA `Disassembly` view, and click `Analyze`. For more information check out the capa explorer [readme](https://github.com/fireeye/capa/blob/master/capa/ida/plugin/README.md).
+
+![](doc/img/rulegen_expanded.png)
+
+### Python 2/3 vivisect workspace compatibility
+
+This version of capa adds Python 3 support in vivisect. Note that `.viv` files (generated by vivisect) are not compatible between Python 2 and Python 3. When updating to Python 3 you need to delete all the `.viv` files for capa to work.
+
+If you get the following error (or a similar one), you most likely need to delete `.viv` files:
+```
+UnicodeDecodeError: 'ascii' codec can't decode byte 0x90 in position 2: ordinal not in range(128)
+```
+
+### Upcoming changes
+
+**This is the last capa release that supports Python 2.** The next release will be v2.0 and will have breaking changes, including the removal of Python 2 support.
+
+If you have workflows that rely on the Python 2 version and need future maintenance, please reach out. We may be able to supply limited backports of key fixes and features.
+
+### New features
+
+- explorer: Add capa explorer rule generator plugin for IDA Pro. Now capa explorer helps you build new capa rules!  #426, #438, #439 @mike-hunhoff
+- python: Python 3 support in vivisect #421 @Ana06
+- main: Add backend option in Python 3 to select the backend to be used (either SMDA or vivisect) #421 @Ana06
+- python: Python 3 support in IDA #429, #437 @mike-hunhoff
+- ci: test pyinstaller CI #452 @williballenthin
+- scripts: enable multiple backends in `show-features.py` #429 @mike-hunhoff
+- scripts: add `scripts/vivisect-py2-vs-py3.sh`  to compare vivisect Python 2 vs 3 (can easily be modified to test run times and compare different versions) #421 @Ana06
+
+### New Rules (12)
+
+- patch process command line @re-fox @williballenthin (graduated from nursery)
+- compiled with dmd @re-fox
+- compiled with exe4j @johnk3r
+- compiled from Visual Basic @williballenthin
+- capture screenshot in Go @TcM1911
+- compiled with Nim @mike-hunhoff
+- linked against Go process enumeration library @TcM1911
+- linked against Go registry library @TcM1911
+- linked against Go WMI library @TcM1911
+- linked against Go static asset library @TcM1911
+- inspect load icon resource @mike-hunhoff
+- linked against XZip @mr-tz
+
+### Bug Fixes
+
+- ida: check for unmapped addresses when resolving data references #436 @mike-hunhoff
+
+### Changes
+
+- setup: vivisect v1.0.0 is the default backend for Python3 (it was SMDA before) #421 @Ana06
+- setup: bump vivisect to 0.2.1 #454 @mr-tz
+- linter: adding ntoskrnl, ntdll overlap lint #428 @mike-hunhoff
+- ci: use py3.9 and pyinstaller 4.2 to build standalone binaries #452 @williballenthin
+- scripts: remove old migration script #450 @williballenthin
+
+### Development
+
+- main: factor out common cli argument handling #450 @williballenthin
+
+### Raw diffs
+
+  - [capa v1.5.1...v1.6.0](https://github.com/fireeye/capa/compare/v1.5.1...v1.6.0)
+  - [capa-rules v1.5.1...v1.6.0](https://github.com/fireeye/capa-rules/compare/v1.5.1...v1.6.0)
+
+
+## v1.5.1 (2021-02-09)
+
+This release fixes the version number that we forgot to update for v1.5.0 (therefore, v1.5.0 was not published to pypi). It also includes 1 new rule and some rule improvements.
+
+### New Rules (1)
+
+- encrypt data using vest @re-fox
+
+### Raw diffs
+
+  - [capa v1.5.0...v1.5.1](https://github.com/fireeye/capa/compare/v1.5.1...v1.6.0)
+  - [capa-rules v1.5.0...v1.5.1](https://github.com/fireeye/capa-rules/compare/v1.5.1...v1.6.0)
+
+
+## v1.5.0 (2021-02-05)
+
+This release brings support for running capa under Python 3 via [SMDA](https://github.com/danielplohmann/smda), more thorough CI testing and linting, better extraction of strings and byte features, and 50 (!) new rules. We appreciate everyone who opened issues, provided feedback, and contributed code and rules. A special shout out to the following new project contributors:
+
+  - @johnk3r
+  - @doomedraven
+  - @stvemillertime
+  - @itreallynick
+  - @0x534a
+  
+@dzbeck also added [Malware Behavior Catalog](https://github.com/MBCProject/mbc-markdown) (MBC) and ATT&CK mappings for many rules.
+
+Download a standalone binary below and checkout the readme [here on GitHub](https://github.com/fireeye/capa/). Report issues on our [issue tracker](https://github.com/fireeye/capa/issues) and contribute new rules at [capa-rules](https://github.com/fireeye/capa-rules/).
+
+
+### New Features
+
+  - py3 support via SMDA #355 @danielplohmann @jcrussell
+  - scripts: example of using capa as a library #372, #380 @doomedraven
+  - ci: enable dependabot #373 @mr-tz
+  - ci: lint rules @mr-tz
+  - ci: lint rule format #401 @mr-tz
+  - freeze: add base address #391 @mr-tz
+  - json: meta: add base address #412 @mr-tz
+
+### New Rules (50)
+
+  - 64-bit execution via heavens gate @recvfrom
+  - contain anti-disasm techniques @mr-tz
+  - check for microsoft office emulation @re-fox
+  - check for windows sandbox via device @re-fox
+  - check for windows sandbox via dns suffix @re-fox
+  - check for windows sandbox via genuine state @re-fox
+  - check for windows sandbox via process name @re-fox
+  - check for windows sandbox via registry @re-fox
+  - capture microphone audio @re-fox
+  - capture public ip @re-fox
+  - get domain trust relationships @johnk3r
+  - check HTTP status code @mr-tz
+  - compiled with perl2exe @re-fox
+  - compiled with ps2exe @re-fox
+  - compiled with pyarmor @stvemillertime, @itreallynick
+  - validate payment card number using luhn algorithm @re-fox
+  - hash data using fnv @re-fox @mr-tz
+  - generate random numbers via WinAPI @mike-hunhoff @johnk3r
+  - enumerate files recursively @re-fox
+  - get file system object information @mike-hunhoff
+  - read virtual disk @re-fox
+  - register minifilter driver @mike-hunhoff
+  - start minifilter driver @mike-hunhoff
+  - enumerate gui resources @johnk3r
+  - simulate CTRL ALT DEL @mike-hunhoff
+  - hijack thread execution @0x534a
+  - inject dll @0x534a
+  - inject pe @0x534a
+  - create or open registry key @mike-hunhoff
+  - delete registry value @mike-hunhoff
+  - query or enumerate registry key @mike-hunhoff
+  - query or enumerate registry value @mike-hunhoff
+  - resume thread @0x534a
+  - suspend thread @0x534a
+  - allocate memory @0x534a
+  - allocate RW memory @0x534a
+  - contain pusha popa sequence @mr-tz
+  - create or open file @mike-hunhoff
+  - open process @0x534a
+  - open thread @0x534a
+  - get kernel32 base address @mr-tz
+  - get ntdll base address @mr-tz
+  - encrypt or decrypt data via BCrypt @mike-hunhoff
+  - generate random numbers using the Delphi LCG @williballenthin
+  - hash data via BCrypt @mike-hunhoff
+  - migrate process to active window station @williballenthin
+  - patch process command line @williballenthin
+  - resolve function by hash @williballenthin
+  - persist via Winlogon Helper DLL registry key @0x534a
+  - schedule task via command line @0x534a
+
+### Bug Fixes
+
+  - doc: pyinstaller build process @mr-tz
+  - ida: better bytes extraction #409 @mike-hunhoff
+  - viv: better unicode string extraction #364 @mike-hunhoff
+  - viv: better unicode string extraction #378 @mr-tz
+  - viv: more xor instructions #379 @mr-tz
+  - viv: decrease logging verbosity #381 @mr-tz
+  - rules: fix api description syntax #403 @mike-hunhoff
+  - main: disable progress background thread #410 @mike-hunhoff
+  
+### Changes
+
+  - rules: return lib rules for scopes #398 @mr-tz
+  
+### Raw diffs
+
+  - [capa v1.4.1...v1.5.0](https://github.com/fireeye/capa/compare/v1.4.1...v1.5.0)
+  - [capa-rules v1.4.0...v1.5.0](https://github.com/fireeye/capa-rules/compare/v1.4.0...v1.5.0)
+
 ## v1.4.1 (2020-10-23)

 This release fixes an issue building capa on our CI server, which prevented us from building standalone binaries for v1.4.1.
--- a/README.md
+++ b/README.md
@@ -1,7 +1,10 @@
 ![capa](.github/logo.png)

+[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa)
+[![Last release](https://img.shields.io/github/v/release/fireeye/capa)](https://github.com/fireeye/capa/releases)
+[![Number of rules](https://img.shields.io/badge/rules-485-blue.svg)](https://github.com/fireeye/capa-rules)
 [![CI status](https://github.com/fireeye/capa/workflows/CI/badge.svg)](https://github.com/fireeye/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster)
-[![Number of rules](https://img.shields.io/badge/rules-414-blue.svg)](https://github.com/fireeye/capa-rules)
+[![Downloads](https://img.shields.io/github/downloads/fireeye/capa/total)](https://github.com/fireeye/capa/releases)
 [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt)

 capa detects capabilities in executable files.
@@ -146,11 +149,10 @@ rule:
 The [github.com/fireeye/capa-rules](https://github.com/fireeye/capa-rules) repository contains hundreds of standard library rules that are distributed with capa.
 Please learn to write rules and contribute new entries as you find interesting techniques in malware.

-If you use IDA Pro, then you use can use the [capa explorer IDA plugin](capa/ida/plugin/).
-capa explorer lets you quickly identify and navigate to interesting areas of a program and dissect capa rule matches at
-the assembly level.
+If you use IDA Pro, then you can use the [capa explorer](capa/ida/plugin/) plugin.
+capa explorer helps you identify interesting areas of a program and build new capa rules using features extracted directly from your IDA Pro database.

-![capa + IDA Pro integration](doc/img/ida_plugin_intro.gif)
+![capa + IDA Pro integration](doc/img/explorer_expanded.png)

 # further information
 ## capa
--- a/capa/features/init.py
+++ b/capa/features/init.py
@@ -38,6 +38,20 @@ def hex_string(h):
    return " ".join(h[i : i + 2] for i in range(0, len(h), 2)).upper()


+def escape_string(s):
+    """escape special characters"""
+    s = repr(s)
+    if not s.startswith(('"', "'")):
+        # u'hello\r\nworld' -> hello\\r\\nworld
+        s = s[2:-1]
+    else:
+        # 'hello\r\nworld' -> hello\\r\\nworld
+        s = s[1:-1]
+    s = s.replace("\\'", "'")  # repr() may escape "'" in some edge cases, remove
+    s = s.replace('"', '\\"')  # repr() does not escape '"', add
+    return s
+
+
 class Feature(object):
    def __init__(self, value, arch=None, description=None):
        """
--- a/capa/features/extractors/helpers.py
+++ b/capa/features/extractors/helpers.py
@@ -42,7 +42,9 @@ def is_ordinal(symbol):
    """
    is the given symbol an ordinal that is prefixed by "#"?
    """
+    if symbol:
        return symbol[0] == "#"
+    return False


 def generate_symbols(dll, symbol):
--- a/capa/features/extractors/ida/helpers.py
+++ b/capa/features/extractors/ida/helpers.py
@@ -166,6 +166,10 @@ def basic_block_size(bb):

 def read_bytes_at(ea, count):
    """ """
+    # check if byte has a value, see get_wide_byte doc
+    if not idc.is_loaded(ea):
+        return b""
+
    segm_end = idc.get_segm_end(ea)
    if ea + count > segm_end:
        return idc.get_bytes(ea, segm_end - ea)
@@ -347,6 +351,10 @@ def find_data_reference_from_insn(insn, max_depth=10):
            # break if circular reference
            break

+        if not idaapi.is_mapped(data_refs[0]):
+            # break if address is not mapped
+            break
+
        depth += 1
        if depth > max_depth:
            # break if max depth
--- a/capa/features/extractors/ida/insn.py
+++ b/capa/features/extractors/ida/insn.py
@@ -148,6 +148,9 @@ def extract_insn_bytes_features(f, bb, insn):
    example:
        push    offset iid_004118d4_IShellLinkA ; riid
    """
+    if idaapi.is_call_insn(insn):
+        return
+
    ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
    if ref != insn.ea:
        extracted_bytes = capa.features.extractors.ida.helpers.read_bytes_at(ref, MAX_BYTES_FEATURE_SIZE)
@@ -302,7 +305,7 @@ def extract_insn_nzxor_characteristic_features(f, bb, insn):
        bb (IDA BasicBlock)
        insn (IDA insn_t)
    """
-    if insn.itype != idaapi.NN_xor:
+    if insn.itype not in (idaapi.NN_xor, idaapi.NN_xorpd, idaapi.NN_xorps, idaapi.NN_pxor):
        return
    if capa.features.extractors.ida.helpers.is_operand_equal(insn.Op1, insn.Op2):
        return
--- a/capa/features/extractors/smda/init.py
+++ b/capa/features/extractors/smda/init.py
@@ -0,0 +1,52 @@
+import sys
+import types
+
+from smda.common.SmdaReport import SmdaReport
+from smda.common.SmdaInstruction import SmdaInstruction
+
+import capa.features.extractors.smda.file
+import capa.features.extractors.smda.insn
+import capa.features.extractors.smda.function
+import capa.features.extractors.smda.basicblock
+from capa.main import UnsupportedRuntimeError
+from capa.features.extractors import FeatureExtractor
+
+
+class SmdaFeatureExtractor(FeatureExtractor):
+    def __init__(self, smda_report: SmdaReport, path):
+        super(SmdaFeatureExtractor, self).__init__()
+        if sys.version_info < (3, 0):
+            raise UnsupportedRuntimeError("SMDA should only be used with Python 3.")
+        self.smda_report = smda_report
+        self.path = path
+
+    def get_base_address(self):
+        return self.smda_report.base_addr
+
+    def extract_file_features(self):
+        for feature, va in capa.features.extractors.smda.file.extract_features(self.smda_report, self.path):
+            yield feature, va
+
+    def get_functions(self):
+        for function in self.smda_report.getFunctions():
+            yield function
+
+    def extract_function_features(self, f):
+        for feature, va in capa.features.extractors.smda.function.extract_features(f):
+            yield feature, va
+
+    def get_basic_blocks(self, f):
+        for bb in f.getBlocks():
+            yield bb
+
+    def extract_basic_block_features(self, f, bb):
+        for feature, va in capa.features.extractors.smda.basicblock.extract_features(f, bb):
+            yield feature, va
+
+    def get_instructions(self, f, bb):
+        for smda_ins in bb.getInstructions():
+            yield smda_ins
+
+    def extract_insn_features(self, f, bb, insn):
+        for feature, va in capa.features.extractors.smda.insn.extract_features(f, bb, insn):
+            yield feature, va
--- a/capa/features/extractors/smda/basicblock.py
+++ b/capa/features/extractors/smda/basicblock.py
@@ -0,0 +1,131 @@
+import sys
+import string
+import struct
+
+from capa.features import Characteristic
+from capa.features.basicblock import BasicBlock
+from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
+
+
+def _bb_has_tight_loop(f, bb):
+    """
+    parse tight loops, true if last instruction in basic block branches to bb start
+    """
+    return bb.offset in f.blockrefs[bb.offset] if bb.offset in f.blockrefs else False
+
+
+def extract_bb_tight_loop(f, bb):
+    """ check basic block for tight loop indicators """
+    if _bb_has_tight_loop(f, bb):
+        yield Characteristic("tight loop"), bb.offset
+
+
+def _bb_has_stackstring(f, bb):
+    """
+    extract potential stackstring creation, using the following heuristics:
+      - basic block contains enough moves of constant bytes to the stack
+    """
+    count = 0
+    for instr in bb.getInstructions():
+        if is_mov_imm_to_stack(instr):
+            count += get_printable_len(instr.getDetailed())
+        if count > MIN_STACKSTRING_LEN:
+            return True
+    return False
+
+
+def get_operands(smda_ins):
+    return [o.strip() for o in smda_ins.operands.split(",")]
+
+
+def extract_stackstring(f, bb):
+    """ check basic block for stackstring indicators """
+    if _bb_has_stackstring(f, bb):
+        yield Characteristic("stack string"), bb.offset
+
+
+def is_mov_imm_to_stack(smda_ins):
+    """
+    Return if instruction moves immediate onto stack
+    """
+    if not smda_ins.mnemonic.startswith("mov"):
+        return False
+
+    try:
+        dst, src = get_operands(smda_ins)
+    except ValueError:
+        # not two operands
+        return False
+
+    try:
+        int(src, 16)
+    except ValueError:
+        return False
+
+    if not any(regname in dst for regname in ["ebp", "rbp", "esp", "rsp"]):
+        return False
+
+    return True
+
+
+def is_printable_ascii(chars):
+    return all(c < 127 and chr(c) in string.printable for c in chars)
+
+
+def is_printable_utf16le(chars):
+    if all(c == 0x00 for c in chars[1::2]):
+        return is_printable_ascii(chars[::2])
+
+
+def get_printable_len(instr):
+    """
+    Return string length if all operand bytes are ascii or utf16-le printable
+
+    Works on a capstone instruction
+    """
+    # should have exactly two operands for mov immediate
+    if len(instr.operands) != 2:
+        return 0
+
+    op_value = instr.operands[1].value.imm
+
+    if instr.imm_size == 1:
+        chars = struct.pack("<B", op_value & 0xFF)
+    elif instr.imm_size == 2:
+        chars = struct.pack("<H", op_value & 0xFFFF)
+    elif instr.imm_size == 4:
+        chars = struct.pack("<I", op_value & 0xFFFFFFFF)
+    elif instr.imm_size == 8:
+        chars = struct.pack("<Q", op_value & 0xFFFFFFFFFFFFFFFF)
+    else:
+        raise ValueError("Unhandled operand data type 0x%x." % instr.imm_size)
+
+    if is_printable_ascii(chars):
+        return instr.imm_size
+    if is_printable_utf16le(chars):
+        return instr.imm_size // 2
+
+    return 0
+
+
+def extract_features(f, bb):
+    """
+    extract features from the given basic block.
+
+    args:
+      f (smda.common.SmdaFunction): the function from which to extract features
+      bb (smda.common.SmdaBasicBlock): the basic block to process.
+
+    yields:
+      Feature, set[VA]: the features and their location found in this basic block.
+    """
+    yield BasicBlock(), bb.offset
+    for bb_handler in BASIC_BLOCK_HANDLERS:
+        for feature, va in bb_handler(f, bb):
+            yield feature, va
+
+
+BASIC_BLOCK_HANDLERS = (
+    extract_bb_tight_loop,
+    extract_stackstring,
+)
--- a/capa/features/extractors/smda/file.py
+++ b/capa/features/extractors/smda/file.py
@@ -0,0 +1,139 @@
+import struct
+
+# if we have SMDA we definitely have lief
+import lief
+
+import capa.features.extractors.helpers
+import capa.features.extractors.strings
+from capa.features import String, Characteristic
+from capa.features.file import Export, Import, Section
+
+
+def carve(pbytes, offset=0):
+    """
+    Return a list of (offset, size, xor) tuples of embedded PEs
+
+    Based on the version from vivisect:
+    https://github.com/vivisect/vivisect/blob/7be4037b1cecc4551b397f840405a1fc606f9b53/PE/carve.py#L19
+    And its IDA adaptation:
+    capa/features/extractors/ida/file.py
+    """
+    mz_xor = [
+        (
+            capa.features.extractors.helpers.xor_static(b"MZ", i),
+            capa.features.extractors.helpers.xor_static(b"PE", i),
+            i,
+        )
+        for i in range(256)
+    ]
+
+    pblen = len(pbytes)
+    todo = [(pbytes.find(mzx, offset), mzx, pex, i) for mzx, pex, i in mz_xor]
+    todo = [(off, mzx, pex, i) for (off, mzx, pex, i) in todo if off != -1]
+
+    while len(todo):
+
+        off, mzx, pex, i = todo.pop()
+
+        # The MZ header has one field we will check
+        # e_lfanew is at 0x3c
+        e_lfanew = off + 0x3C
+        if pblen < (e_lfanew + 4):
+            continue
+
+        newoff = struct.unpack("<I", capa.features.extractors.helpers.xor_static(pbytes[e_lfanew : e_lfanew + 4], i))[0]
+
+        nextres = pbytes.find(mzx, off + 1)
+        if nextres != -1:
+            todo.append((nextres, mzx, pex, i))
+
+        peoff = off + newoff
+        if pblen < (peoff + 2):
+            continue
+
+        if pbytes[peoff : peoff + 2] == pex:
+            yield (off, i)
+
+
+def extract_file_embedded_pe(smda_report, file_path):
+    with open(file_path, "rb") as f:
+        fbytes = f.read()
+
+    for offset, i in carve(fbytes, 1):
+        yield Characteristic("embedded pe"), offset
+
+
+def extract_file_export_names(smda_report, file_path):
+    lief_binary = lief.parse(file_path)
+    if lief_binary is not None:
+        for function in lief_binary.exported_functions:
+            yield Export(function.name), function.address
+
+
+def extract_file_import_names(smda_report, file_path):
+    # extract import table info via LIEF
+    lief_binary = lief.parse(file_path)
+    if not isinstance(lief_binary, lief.PE.Binary):
+        return
+    for imported_library in lief_binary.imports:
+        library_name = imported_library.name.lower()
+        library_name = library_name[:-4] if library_name.endswith(".dll") else library_name
+        for func in imported_library.entries:
+            if func.name:
+                va = func.iat_address + smda_report.base_addr
+                for name in capa.features.extractors.helpers.generate_symbols(library_name, func.name):
+                    yield Import(name), va
+            elif func.is_ordinal:
+                for name in capa.features.extractors.helpers.generate_symbols(library_name, "#%s" % func.ordinal):
+                    yield Import(name), va
+
+
+def extract_file_section_names(smda_report, file_path):
+    lief_binary = lief.parse(file_path)
+    if not isinstance(lief_binary, lief.PE.Binary):
+        return
+    if lief_binary and lief_binary.sections:
+        base_address = lief_binary.optional_header.imagebase
+        for section in lief_binary.sections:
+            yield Section(section.name), base_address + section.virtual_address
+
+
+def extract_file_strings(smda_report, file_path):
+    """
+    extract ASCII and UTF-16 LE strings from file
+    """
+    with open(file_path, "rb") as f:
+        b = f.read()
+
+    for s in capa.features.extractors.strings.extract_ascii_strings(b):
+        yield String(s.s), s.offset
+
+    for s in capa.features.extractors.strings.extract_unicode_strings(b):
+        yield String(s.s), s.offset
+
+
+def extract_features(smda_report, file_path):
+    """
+    extract file features from given workspace
+
+    args:
+      smda_report (smda.common.SmdaReport): a SmdaReport
+      file_path: path to the input file
+
+    yields:
+      Tuple[Feature, VA]: a feature and its location.
+    """
+
+    for file_handler in FILE_HANDLERS:
+        result = file_handler(smda_report, file_path)
+        for feature, va in file_handler(smda_report, file_path):
+            yield feature, va
+
+
+FILE_HANDLERS = (
+    extract_file_embedded_pe,
+    extract_file_export_names,
+    extract_file_import_names,
+    extract_file_section_names,
+    extract_file_strings,
+)
--- a/capa/features/extractors/smda/function.py
+++ b/capa/features/extractors/smda/function.py
@@ -0,0 +1,38 @@
+from capa.features import Characteristic
+from capa.features.extractors import loops
+
+
+def extract_function_calls_to(f):
+    for inref in f.inrefs:
+        yield Characteristic("calls to"), inref
+
+
+def extract_function_loop(f):
+    """
+    parse if a function has a loop
+    """
+    edges = []
+    for bb_from, bb_tos in f.blockrefs.items():
+        for bb_to in bb_tos:
+            edges.append((bb_from, bb_to))
+
+    if edges and loops.has_loop(edges):
+        yield Characteristic("loop"), f.offset
+
+
+def extract_features(f):
+    """
+    extract features from the given function.
+
+    args:
+      f (smda.common.SmdaFunction): the function from which to extract features
+
+    yields:
+      Feature, set[VA]: the features and their location found in this function.
+    """
+    for func_handler in FUNCTION_HANDLERS:
+        for feature, va in func_handler(f):
+            yield feature, va
+
+
+FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop)
--- a/capa/features/extractors/smda/insn.py
+++ b/capa/features/extractors/smda/insn.py
@@ -0,0 +1,393 @@
+import re
+import string
+import struct
+
+from smda.common.SmdaReport import SmdaReport
+
+import capa.features.extractors.helpers
+from capa.features import (
+    ARCH_X32,
+    ARCH_X64,
+    MAX_BYTES_FEATURE_SIZE,
+    THUNK_CHAIN_DEPTH_DELTA,
+    Bytes,
+    String,
+    Characteristic,
+)
+from capa.features.insn import API, Number, Offset, Mnemonic
+
+# security cookie checks may perform non-zeroing XORs, these are expected within a certain
+# byte range within the first and returning basic blocks, this helps to reduce FP features
+SECURITY_COOKIE_BYTES_DELTA = 0x40
+PATTERN_HEXNUM = re.compile(r"[+\-] (?P<num>0x[a-fA-F0-9]+)")
+PATTERN_SINGLENUM = re.compile(r"[+\-] (?P<num>[0-9])")
+
+
+def get_arch(smda_report):
+    if smda_report.architecture == "intel":
+        if smda_report.bitness == 32:
+            return ARCH_X32
+        elif smda_report.bitness == 64:
+            return ARCH_X64
+    else:
+        raise NotImplementedError
+
+
+def extract_insn_api_features(f, bb, insn):
+    """parse API features from the given instruction."""
+    if insn.offset in f.apirefs:
+        api_entry = f.apirefs[insn.offset]
+        # reformat
+        dll_name, api_name = api_entry.split("!")
+        dll_name = dll_name.split(".")[0]
+        dll_name = dll_name.lower()
+        for name in capa.features.extractors.helpers.generate_symbols(dll_name, api_name):
+            yield API(name), insn.offset
+    elif insn.offset in f.outrefs:
+        current_function = f
+        current_instruction = insn
+        for index in range(THUNK_CHAIN_DEPTH_DELTA):
+            if current_function and len(current_function.outrefs[current_instruction.offset]) == 1:
+                target = current_function.outrefs[current_instruction.offset][0]
+                referenced_function = current_function.smda_report.getFunction(target)
+                if referenced_function:
+                    # TODO SMDA: implement this function for both jmp and call, checking if function has 1 instruction which refs an API
+                    if referenced_function.isApiThunk():
+                        api_entry = (
+                            referenced_function.apirefs[target] if target in referenced_function.apirefs else None
+                        )
+                        if api_entry:
+                            # reformat
+                            dll_name, api_name = api_entry.split("!")
+                            dll_name = dll_name.split(".")[0]
+                            dll_name = dll_name.lower()
+                            for name in capa.features.extractors.helpers.generate_symbols(dll_name, api_name):
+                                yield API(name), insn.offset
+                    elif referenced_function.num_instructions == 1 and referenced_function.num_outrefs == 1:
+                        current_function = referenced_function
+                        current_instruction = [i for i in referenced_function.getInstructions()][0]
+                else:
+                    return
+
+
+def extract_insn_number_features(f, bb, insn):
+    """parse number features from the given instruction."""
+    # example:
+    #
+    #     push    3136B0h         ; dwControlCode
+    operands = [o.strip() for o in insn.operands.split(",")]
+    if insn.mnemonic == "add" and operands[0] in ["esp", "rsp"]:
+        # skip things like:
+        #
+        #    .text:00401140                 call    sub_407E2B
+        #    .text:00401145                 add     esp, 0Ch
+        return
+    for operand in operands:
+        try:
+            yield Number(int(operand, 16)), insn.offset
+            yield Number(int(operand, 16), arch=get_arch(f.smda_report)), insn.offset
+        except:
+            continue
+
+
+def read_bytes(smda_report, va, num_bytes=None):
+    """
+    read up to MAX_BYTES_FEATURE_SIZE from the given address.
+    """
+
+    rva = va - smda_report.base_addr
+    if smda_report.buffer is None:
+        return
+    buffer_end = len(smda_report.buffer)
+    max_bytes = num_bytes if num_bytes is not None else MAX_BYTES_FEATURE_SIZE
+    if rva + max_bytes > buffer_end:
+        return smda_report.buffer[rva:]
+    else:
+        return smda_report.buffer[rva : rva + max_bytes]
+
+
+def derefs(smda_report, p):
+    """
+    recursively follow the given pointer, yielding the valid memory addresses along the way.
+    useful when you may have a pointer to string, or pointer to pointer to string, etc.
+
+    this is a "do what i mean" type of helper function.
+
+    based on the implementation in viv/insn.py
+    """
+    depth = 0
+    while True:
+        if not smda_report.isAddrWithinMemoryImage(p):
+            return
+        yield p
+
+        bytes_ = read_bytes(smda_report, p, num_bytes=4)
+        val = struct.unpack("I", bytes_)[0]
+
+        # sanity: pointer points to self
+        if val == p:
+            return
+
+        # sanity: avoid chains of pointers that are unreasonably deep
+        depth += 1
+        if depth > 10:
+            return
+
+        p = val
+
+
+def extract_insn_bytes_features(f, bb, insn):
+    """
+    parse byte sequence features from the given instruction.
+    example:
+        #     push    offset iid_004118d4_IShellLinkA ; riid
+    """
+    for data_ref in insn.getDataRefs():
+        for v in derefs(f.smda_report, data_ref):
+            bytes_read = read_bytes(f.smda_report, v)
+            if bytes_read is None:
+                continue
+            if capa.features.extractors.helpers.all_zeros(bytes_read):
+                continue
+
+            yield Bytes(bytes_read), insn.offset
+
+
+def detect_ascii_len(smda_report, offset):
+    if smda_report.buffer is None:
+        return 0
+    ascii_len = 0
+    rva = offset - smda_report.base_addr
+    char = smda_report.buffer[rva]
+    while char < 127 and chr(char) in string.printable:
+        ascii_len += 1
+        rva += 1
+        char = smda_report.buffer[rva]
+    if char == 0:
+        return ascii_len
+    return 0
+
+
+def detect_unicode_len(smda_report, offset):
+    if smda_report.buffer is None:
+        return 0
+    unicode_len = 0
+    rva = offset - smda_report.base_addr
+    char = smda_report.buffer[rva]
+    second_char = smda_report.buffer[rva + 1]
+    while char < 127 and chr(char) in string.printable and second_char == 0:
+        unicode_len += 2
+        rva += 2
+        char = smda_report.buffer[rva]
+        second_char = smda_report.buffer[rva + 1]
+    if char == 0 and second_char == 0:
+        return unicode_len
+    return 0
+
+
+def read_string(smda_report, offset):
+    alen = detect_ascii_len(smda_report, offset)
+    if alen > 1:
+        return read_bytes(smda_report, offset, alen).decode("utf-8")
+    ulen = detect_unicode_len(smda_report, offset)
+    if ulen > 2:
+        return read_bytes(smda_report, offset, ulen).decode("utf-16")
+
+
+def extract_insn_string_features(f, bb, insn):
+    """parse string features from the given instruction."""
+    # example:
+    #
+    #     push    offset aAcr     ; "ACR  > "
+    for data_ref in insn.getDataRefs():
+        for v in derefs(f.smda_report, data_ref):
+            string_read = read_string(f.smda_report, v)
+            if string_read:
+                yield String(string_read.rstrip("\x00")), insn.offset
+
+
+def extract_insn_offset_features(f, bb, insn):
+    """parse structure offset features from the given instruction."""
+    # examples:
+    #
+    #     mov eax, [esi + 4]
+    #     mov eax, [esi + ecx + 16384]
+    operands = [o.strip() for o in insn.operands.split(",")]
+    for operand in operands:
+        if not "ptr" in operand:
+            continue
+        if "esp" in operand or "ebp" in operand or "rbp" in operand:
+            continue
+        number = 0
+        number_hex = re.search(PATTERN_HEXNUM, operand)
+        number_int = re.search(PATTERN_SINGLENUM, operand)
+        if number_hex:
+            number = int(number_hex.group("num"), 16)
+            number = -1 * number if number_hex.group().startswith("-") else number
+        elif number_int:
+            number = int(number_int.group("num"))
+            number = -1 * number if number_int.group().startswith("-") else number
+        yield Offset(number), insn.offset
+        yield Offset(number, arch=get_arch(f.smda_report)), insn.offset
+
+
+def is_security_cookie(f, bb, insn):
+    """
+    check if an instruction is related to security cookie checks
+    """
+    # security cookie check should use SP or BP
+    operands = [o.strip() for o in insn.operands.split(",")]
+    if operands[1] not in ["esp", "ebp", "rsp", "rbp"]:
+        return False
+    for index, block in enumerate(f.getBlocks()):
+        # expect security cookie init in first basic block within first bytes (instructions)
+        block_instructions = [i for i in block.getInstructions()]
+        if index == 0 and insn.offset < (block_instructions[0].offset + SECURITY_COOKIE_BYTES_DELTA):
+            return True
+        # ... or within last bytes (instructions) before a return
+        if block_instructions[-1].mnemonic.startswith("ret") and insn.offset > (
+            block_instructions[-1].offset - SECURITY_COOKIE_BYTES_DELTA
+        ):
+            return True
+    return False
+
+
+def extract_insn_nzxor_characteristic_features(f, bb, insn):
+    """
+    parse non-zeroing XOR instruction from the given instruction.
+    ignore expected non-zeroing XORs, e.g. security cookies.
+    """
+
+    if insn.mnemonic not in ("xor", "xorpd", "xorps", "pxor"):
+        return
+
+    operands = [o.strip() for o in insn.operands.split(",")]
+    if operands[0] == operands[1]:
+        return
+
+    if is_security_cookie(f, bb, insn):
+        return
+
+    yield Characteristic("nzxor"), insn.offset
+
+
+def extract_insn_mnemonic_features(f, bb, insn):
+    """parse mnemonic features from the given instruction."""
+    yield Mnemonic(insn.mnemonic), insn.offset
+
+
+def extract_insn_peb_access_characteristic_features(f, bb, insn):
+    """
+    parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64
+    """
+
+    if insn.mnemonic not in ["push", "mov"]:
+        return
+
+    operands = [o.strip() for o in insn.operands.split(",")]
+    for operand in operands:
+        if "fs:" in operand and "0x30" in operand:
+            yield Characteristic("peb access"), insn.offset
+        elif "gs:" in operand and "0x60" in operand:
+            yield Characteristic("peb access"), insn.offset
+
+
+def extract_insn_segment_access_features(f, bb, insn):
+    """ parse the instruction for access to fs or gs """
+    operands = [o.strip() for o in insn.operands.split(",")]
+    for operand in operands:
+        if "fs:" in operand:
+            yield Characteristic("fs access"), insn.offset
+        elif "gs:" in operand:
+            yield Characteristic("gs access"), insn.offset
+
+
+def extract_insn_cross_section_cflow(f, bb, insn):
+    """
+    inspect the instruction for a CALL or JMP that crosses section boundaries.
+    """
+    if insn.mnemonic in ["call", "jmp"]:
+        if insn.offset in f.apirefs:
+            return
+
+        smda_report = insn.smda_function.smda_report
+        if insn.offset in f.outrefs:
+            for target in f.outrefs[insn.offset]:
+                if smda_report.getSection(insn.offset) != smda_report.getSection(target):
+                    yield Characteristic("cross section flow"), insn.offset
+        elif insn.operands.startswith("0x"):
+            target = int(insn.operands, 16)
+            if smda_report.getSection(insn.offset) != smda_report.getSection(target):
+                yield Characteristic("cross section flow"), insn.offset
+
+
+# this is a feature that's most relevant at the function scope,
+# however, its most efficient to extract at the instruction scope.
+def extract_function_calls_from(f, bb, insn):
+    if insn.mnemonic != "call":
+        return
+
+    if insn.offset in f.outrefs:
+        for outref in f.outrefs[insn.offset]:
+            yield Characteristic("calls from"), outref
+
+            if outref == f.offset:
+                # if we found a jump target and it's the function address
+                # mark as recursive
+                yield Characteristic("recursive call"), outref
+    if insn.offset in f.apirefs:
+        yield Characteristic("calls from"), insn.offset
+
+
+# this is a feature that's most relevant at the function or basic block scope,
+# however, its most efficient to extract at the instruction scope.
+def extract_function_indirect_call_characteristic_features(f, bb, insn):
+    """
+    extract indirect function call characteristic (e.g., call eax or call dword ptr [edx+4])
+    does not include calls like => call ds:dword_ABD4974
+    """
+    if insn.mnemonic != "call":
+        return
+    if insn.operands.startswith("0x"):
+        return False
+    if "qword ptr" in insn.operands and "rip" in insn.operands:
+        return False
+    if insn.operands.startswith("dword ptr [0x"):
+        return False
+    # call edx
+    # call dword ptr [eax+50h]
+    # call qword ptr [rsp+78h]
+    yield Characteristic("indirect call"), insn.offset
+
+
+def extract_features(f, bb, insn):
+    """
+    extract features from the given insn.
+
+    args:
+      f (smda.common.SmdaFunction): the function to process.
+      bb (smda.common.SmdaBasicBlock): the basic block to process.
+      insn (smda.common.SmdaInstruction): the instruction to process.
+
+    yields:
+      Feature, set[VA]: the features and their location found in this insn.
+    """
+    for insn_handler in INSTRUCTION_HANDLERS:
+        for feature, va in insn_handler(f, bb, insn):
+            yield feature, va
+
+
+INSTRUCTION_HANDLERS = (
+    extract_insn_api_features,
+    extract_insn_number_features,
+    extract_insn_string_features,
+    extract_insn_bytes_features,
+    extract_insn_offset_features,
+    extract_insn_nzxor_characteristic_features,
+    extract_insn_mnemonic_features,
+    extract_insn_peb_access_characteristic_features,
+    extract_insn_cross_section_cflow,
+    extract_insn_segment_access_features,
+    extract_function_calls_from,
+    extract_function_indirect_call_characteristic_features,
+)
--- a/capa/features/extractors/viv/init.py
+++ b/capa/features/extractors/viv/init.py
@@ -8,11 +8,7 @@

 import types

-import file
-import insn
-import function
 import viv_utils
-import basicblock

 import capa.features.extractors
 import capa.features.extractors.viv.file
@@ -42,7 +38,7 @@ def add_va_int_cast(o):
    this bit of skullduggery lets use cast viv-utils objects as ints.
    the correct way of doing this is to update viv-utils (or subclass the objects here).
    """
-    setattr(o, "__int__", types.MethodType(get_va, o, type(o)))
+    setattr(o, "__int__", types.MethodType(get_va, o))
    return o


--- a/capa/features/extractors/viv/basicblock.py
+++ b/capa/features/extractors/viv/basicblock.py
@@ -125,11 +125,16 @@ def get_printable_len(oper):


 def is_printable_ascii(chars):
-    return all(ord(c) < 127 and c in string.printable for c in chars)
+    try:
+        chars_str = chars.decode("ascii")
+    except UnicodeDecodeError:
+        return False
+    else:
+        return all(c in string.printable for c in chars_str)


 def is_printable_utf16le(chars):
-    if all(c == "\x00" for c in chars[1::2]):
+    if all(c == b"\x00" for c in chars[1::2]):
        return is_printable_ascii(chars[::2])


--- a/capa/features/extractors/viv/insn.py
+++ b/capa/features/extractors/viv/insn.py
@@ -239,7 +239,7 @@ def read_bytes(vw, va):
    """
    segm = vw.getSegment(va)
    if not segm:
-        raise envi.SegmentationViolation()
+        raise envi.SegmentationViolation(va)

    segm_end = segm[0] + segm[1]
    try:
@@ -258,10 +258,10 @@ def extract_insn_bytes_features(f, bb, insn):
    example:
        #     push    offset iid_004118d4_IShellLinkA ; riid
    """
-    for oper in insn.opers:
    if insn.mnem == "call":
-            continue
+        return

+    for oper in insn.opers:
        if isinstance(oper, envi.archs.i386.disasm.i386ImmOper):
            v = oper.getOperValue(oper)
        elif isinstance(oper, envi.archs.i386.disasm.i386RegMemOper):
@@ -311,6 +311,10 @@ def read_string(vw, offset):
                # vivisect seems to mis-detect the end unicode strings
                # off by one, too short
                ulen += 1
+            else:
+                # vivisect seems to mis-detect the end unicode strings
+                # off by two, too short
+                ulen += 2
            return read_memory(vw, offset, ulen).decode("utf-16")

    raise ValueError("not a string", offset)
@@ -325,6 +329,9 @@ def extract_insn_string_features(f, bb, insn):
    for oper in insn.opers:
        if isinstance(oper, envi.archs.i386.disasm.i386ImmOper):
            v = oper.getOperValue(oper)
+        elif isinstance(oper, envi.archs.i386.disasm.i386ImmMemOper):
+            # like 0x10056CB4 in `lea eax, dword [0x10056CB4]`
+            v = oper.imm
        elif isinstance(oper, envi.archs.i386.disasm.i386SibOper):
            # like 0x401000 in `mov eax, 0x401000[2 * ebx]`
            v = oper.imm
@@ -415,7 +422,7 @@ def extract_insn_nzxor_characteristic_features(f, bb, insn):
    parse non-zeroing XOR instruction from the given instruction.
    ignore expected non-zeroing XORs, e.g. security cookies.
    """
-    if insn.mnem != "xor":
+    if insn.mnem not in ("xor", "xorpd", "xorps", "pxor"):
        return

    if insn.opers[0] == insn.opers[1]:
@@ -492,6 +499,10 @@ def extract_insn_cross_section_cflow(f, bb, insn):
    inspect the instruction for a CALL or JMP that crosses section boundaries.
    """
    for va, flags in insn.getBranches():
+        if va is None:
+            # va may be none for dynamic branches that haven't been resolved, such as `jmp eax`.
+            continue
+
        if flags & envi.BR_FALL:
            continue

--- a/capa/features/freeze.py
+++ b/capa/features/freeze.py
@@ -5,6 +5,7 @@ json format:

    {
      'version': 1,
+      'base address': int(base address),
      'functions': {
        int(function va): {
          'basic blocks': {
@@ -86,6 +87,7 @@ def dumps(extractor):
    """
    ret = {
        "version": 1,
+        "base address": extractor.get_base_address(),
        "functions": {},
        "scopes": {
            "file": [],
@@ -147,6 +149,7 @@ def loads(s):
        raise ValueError("unsupported freeze format version: %d" % (doc.get("version")))

    features = {
+        "base address": doc.get("base address"),
        "file features": [],
        "functions": {},
    }
@@ -261,6 +264,15 @@ def main(argv=None):
    parser.add_argument(
        "-f", "--format", choices=[f[0] for f in formats], default="auto", help="Select sample format, %s" % format_help
    )
+    if sys.version_info >= (3, 0):
+        parser.add_argument(
+            "-b",
+            "--backend",
+            type=str,
+            help="select the backend to use",
+            choices=(capa.main.BACKEND_VIV, capa.main.BACKEND_SMDA),
+            default=capa.main.BACKEND_VIV,
+        )
    args = parser.parse_args(args=argv)

    if args.quiet:
@@ -273,7 +285,8 @@ def main(argv=None):
        logging.basicConfig(level=logging.INFO)
        logging.getLogger().setLevel(logging.INFO)

-    extractor = capa.main.get_extractor(args.sample, args.format)
+    backend = args.backend if sys.version_info > (3, 0) else capa.main.BACKEND_VIV
+    extractor = capa.main.get_extractor(args.sample, args.format, backend)
    with open(args.output, "wb") as f:
        f.write(dump(extractor))

--- a/capa/features/insn.py
+++ b/capa/features/insn.py
@@ -16,7 +16,7 @@ class API(Feature):
            modname, _, impname = name.rpartition(".")
            name = modname.lower() + "." + impname

-        super(API, self).__init__(name, description)
+        super(API, self).__init__(name, description=description)


 class Number(Feature):
--- a/capa/ida/helpers/init.py
+++ b/capa/ida/helpers/init.py
@@ -82,14 +82,26 @@ def get_func_start_ea(ea):
    return f if f is None else f.start_ea


-def collect_metadata():
+def get_file_md5():
+    """ """
    md5 = idautils.GetInputFileMD5()
    if not isinstance(md5, six.string_types):
        md5 = capa.features.bytes_to_str(md5)
+    return md5

+
+def get_file_sha256():
+    """ """
    sha256 = idaapi.retrieve_input_file_sha256()
    if not isinstance(sha256, six.string_types):
        sha256 = capa.features.bytes_to_str(sha256)
+    return sha256
+
+
+def collect_metadata():
+    """ """
+    md5 = get_file_md5()
+    sha256 = get_file_sha256()

    return {
        "timestamp": datetime.datetime.now().isoformat(),
@@ -103,6 +115,7 @@ def collect_metadata():
        "analysis": {
            "format": idaapi.get_file_type_name(),
            "extractor": "ida",
+            "base_address": idaapi.get_imagebase(),
        },
        "version": capa.version.__version__,
    }
--- a/capa/ida/plugin/README.md
+++ b/capa/ida/plugin/README.md
@@ -1,49 +1,35 @@
 ![capa explorer](../../../.github/capa-explorer-logo.png)

-capa explorer is an IDA Pro plugin written in Python that integrates the FLARE team's open-source framework, capa, with IDA. capa is a framework that uses a well-defined collection of rules to 
+capa explorer is an IDAPython plugin that integrates the FLARE team's open-source framework, capa, with IDA Pro. capa is a framework that uses a well-defined collection of rules to 
 identify capabilities in a program. You can run capa against a PE file or shellcode and it tells you what it thinks the program can do. For example, it might suggest that 
-the program is a backdoor, can install services, or relies on HTTP to communicate. You can use capa explorer to run capa directly on an IDA database without requiring access
-to the source binary. Once a database has been analyzed, capa explorer can be used to quickly identify and navigate to interesting areas of a program 
-and dissect capa rule matches at the assembly level.
+the program is a backdoor, can install services, or relies on HTTP to communicate. capa explorer runs capa directly against your IDA Pro database (IDB) without requiring access
+to the original binary file. Once a database has been analyzed, capa explorer helps you identify interesting areas of a program and build new capa rules using features extracted from your IDB.

 We love using capa explorer during malware analysis because it teaches us what parts of a program suggest a behavior. As we click on rows, capa explorer jumps directly 
-to important addresses in the IDA Pro database and highlights key features in the Disassembly view so they stand out visually. To illustrate, we use capa explorer to 
+to important addresses in the IDB and highlights key features in the Disassembly view so they stand out visually. To illustrate, we use capa explorer to 
 analyze Lab 14-02 from [Practical Malware Analysis](https://nostarch.com/malware) (PMA) available [here](https://practicalmalwareanalysis.com/labs/). Our goal is to understand 
 the program's functionality.

 After loading Lab 14-02 into IDA and analyzing the database with capa explorer, we see that capa detected a rule match for `self delete via COMSPEC environment variable`:

-![](../../../doc/img/ida_plugin_example_1.png)
+![](../../../doc/img/explorer_condensed.png)

-We can use capa explorer to navigate the IDA Disassembly view directly to the suspect function and get an assembly-level breakdown of why capa matched `self delete via COMSPEC environment variable` 
-for this particular function.
+We can use capa explorer to navigate our Disassembly view directly to the suspect function and get an assembly-level breakdown of why capa matched `self delete via COMSPEC environment variable`.

-![](../../../doc/img/ida_plugin_example_2.png)
+![](../../../doc/img/explorer_expanded.png)

 Using the `Rule Information` and `Details` columns capa explorer shows us that the suspect function matched `self delete via COMSPEC environment variable` because it contains capa rule matches for `create process`, `get COMSPEC environment variable`,
-and `query environment variable`, references to the strings `COMSPEC`, ` > nul`, and `/c del`, and calls to the Windows API functions `GetEnvironmentVariableA` and `ShellExecuteEx`.
+and `query environment variable`, references to the strings `COMSPEC`, ` > nul`, and `/c del `, and calls to the Windows API functions `GetEnvironmentVariableA` and `ShellExecuteEx`.
+
+capa explorer also helps you build new capa rules. To start select the `Rule Generator` tab, navigate to a function in your Disassembly view,
+and click `Analyze`. capa explorer will extract features from the function and display them in the `Features` pane. You can add features listed in this pane to the `Editor` pane
+by either double-clicking a feature or using multi-select + right-click to add multiple features at once. The `Preview` and `Editor` panes help edit your rule. Use the `Preview` pane
+to modify the rule text directly and the `Editor` pane to construct and rearrange your hierarchy of statements and features. When you finish a rule you can save it directly to a file by clicking `Save`.
+
+![](../../../doc/img/rulegen_expanded.png)

 For more information on the FLARE team's open-source framework, capa, check out the overview in our first [blog](https://www.fireeye.com/blog/threat-research/2020/07/capa-automatically-identify-malware-capabilities.html).

-## Features
-
-![](../../../doc/img/ida_plugin_intro.gif)
-
-* Display capa results in an interactive tree view of rule matches and their locations in the current database
-* Search for keywords or phrases found in the `Rule Information`, `Address`, or `Details` columns
-* Display rule source content when a user hovers their cursor over a rule match
-* Double-click `Address` column to view associated feature in the IDA Disassembly view
-* Limit tree view results to the function currently displayed in the IDA Disassembly view; update results as a user navigates to different functions
-* Export results as formatted JSON by navigating to `File > Export results...`
-* Remember a user's capa rules directory for future runs; change capa rules directory by navigating to `Rules > Change rules directory...`
-* Automatically re-analyze database when user performs a program rebase
-* Automatically update results when IDA is used to rename a function
-* Select one or more checkboxes to highlight the associated addresses in the IDA Disassembly view
-* Right-click a function match to rename it; the new function name is propagated to the current IDA database
-* Right-click to copy a result by column or by row
-* Sort results by column
-* Reset tree view and IDA Disassembly view highlighting by clicking `Reset`
-
 ## Getting Started

 ### Requirements
@@ -56,7 +42,7 @@ If you encounter issues with your specific setup, please open a new [Issue](http

 ### Supported File Types

-capa explorer is limited to the file types supported by capa, which includes:
+capa explorer is limited to the file types supported by capa, which include:

 * Windows 32-bit and 64-bit PE files
 * Windows 32-bit and 64-bit shellcode
@@ -74,38 +60,48 @@ You can install capa explorer using the following steps:

 ### Usage

-1. Run IDA and analyze a supported file type (select the `Manual Load` and `Load Resources` options in IDA for best results)
+1. Open IDA and analyze a supported file type (select the `Manual Load` and `Load Resources` options in IDA for best results)
 2. Open capa explorer in IDA by navigating to `Edit > Plugins > FLARE capa explorer` or using the keyboard shortcut `Alt+F5`
-3. Click the `Analyze` button
+3. Select the `Program Analysis` tab
+4. Click the `Analyze` button

 When running capa explorer for the first time you are prompted to select a file directory containing capa rules. The plugin conveniently
-remembers your selection for future runs; you can change this selection by navigating to `Rules > Change rules directory...`. We recommend 
+remembers your selection for future runs; you can change this selection and other default settings by clicking `Settings`. We recommend 
 downloading and using the [standard collection of capa rules](https://github.com/fireeye/capa-rules) when getting started with the plugin.

-#### Tips
+#### Tips for Program Analysis

 * Start analysis by clicking the `Analyze` button
-* Reset the plugin user interface and remove highlighting from IDA disassembly view by clicking the `Reset` button
-* Change your capa rules directory by navigating to `Rules > Change rules directory...` from the plugin menu
+* Reset the plugin user interface and remove highlighting from your Disassembly view by clicking the `Reset` button
+* Change your capa rules directory and other default settings by clicking `Settings`
 * Hover your cursor over a rule match to view the source content of the rule
-* Double-click the `Address` column to navigate the IDA Disassembly view to the associated feature
+* Double-click the `Address` column to navigate your Disassembly view to the address of the associated feature
 * Double-click a result in the `Rule Information` column to expand its children
-* Select a checkbox in the `Rule Information` column to highlight the address of the associated feature in the IDA Dissasembly view
+* Select a checkbox in the `Rule Information` column to highlight the address of the associated feature in your Dissasembly view
+
+#### Tips for Rule Generator
+
+* Navigate to a function in your Disassembly view and click`Analyze` to get started
+* Double-click or use multi-select + right-click to add features from the `Features` pane to the `Editor` pane
+* Right-click features in the `Editor` pane to make context-specific modifications
+* Drag-and-drop (single click + multi-select support) features in the `Editor` pane to construct your hierarchy of statements and features
+* Right-click anywhere in the `Editor` pane not on a feature to remove all features
+* Add descriptions or comments to a feature by editing the corresponding column in the `Editor` pane
+* Directly edit rule text and metadata fields using the `Preview` pane
+* Change the default rule author and default rule scope displayed in the `Preview` pane by clicking `Settings`

 ## Development

-Because capa explorer is packaged with capa you will need to install capa locally for development.
-
-You can install capa locally by following the steps outlined in `Method 3: Inspecting the capa source code` of the [capa 
+capa explorer is packaged with capa so you will need to install capa locally for development. You can install capa locally by following the steps outlined in `Method 3: Inspecting the capa source code` of the [capa 
 installation guide](https://github.com/fireeye/capa/blob/master/doc/installation.md#method-3-inspecting-the-capa-source-code). Once installed, copy [capa_explorer.py](https://raw.githubusercontent.com/fireeye/capa/master/capa/ida/plugin/capa_explorer.py) 
-to your IDA plugins directory to run the plugin in IDA.
+to your plugins directory to install capa explorer in IDA.

 ### Components

 capa explorer consists of two main components:

-* An IDA [feature extractor](https://github.com/fireeye/capa/tree/master/capa/features/extractors/ida) built on top of IDA's binary analysis engine
-  * This component uses IDAPython to extract [capa features](https://github.com/fireeye/capa-rules/blob/master/doc/format.md#extracted-features) from the IDA database such as strings, 
+* An [feature extractor](https://github.com/fireeye/capa/tree/master/capa/features/extractors/ida) built on top of IDA's binary analysis engine
+  * This component uses IDAPython to extract [capa features](https://github.com/fireeye/capa-rules/blob/master/doc/format.md#extracted-features) from your IDBs such as strings, 
 disassembly, and control flow; these extracted features are used by capa to find feature combinations that result in a rule match
 * An [interactive user interface](https://github.com/fireeye/capa/tree/master/capa/ida/plugin) for displaying and exploring capa rule matches
-  * This component integrates the IDA feature extractor and capa, providing an interactive user interface to dissect rule matches found by capa using features extracted by the IDA feature extractor
+  * This component integrates the feature extractor and capa, providing an interactive user interface to dissect rule matches found by capa using features extracted directly from your IDBs
--- a/capa/ida/plugin/form.py
+++ b/capa/ida/plugin/form.py
--- a/capa/ida/plugin/item.py
+++ b/capa/ida/plugin/item.py
@@ -35,20 +35,19 @@ def location_to_hex(location):
 class CapaExplorerDataItem(object):
    """store data for CapaExplorerDataModel"""

-    def __init__(self, parent, data):
+    def __init__(self, parent, data, can_check=True):
        """initialize item"""
        self.pred = parent
        self._data = data
        self.children = []
        self._checked = False
+        self._can_check = can_check

        # default state for item
-        self.flags = (
-            QtCore.Qt.ItemIsEnabled
-            | QtCore.Qt.ItemIsSelectable
-            | QtCore.Qt.ItemIsTristate
-            | QtCore.Qt.ItemIsUserCheckable
-        )
+        self.flags = QtCore.Qt.ItemIsEnabled | QtCore.Qt.ItemIsSelectable
+
+        if self._can_check:
+            self.flags = self.flags | QtCore.Qt.ItemIsUserCheckable | QtCore.Qt.ItemIsTristate

        if self.pred:
            self.pred.appendChild(self)
@@ -70,6 +69,10 @@ class CapaExplorerDataItem(object):
        """
        self._checked = checked

+    def canCheck(self):
+        """ """
+        return self._can_check
+
    def isChecked(self):
        """get item is checked"""
        return self._checked
@@ -165,7 +168,7 @@ class CapaExplorerRuleItem(CapaExplorerDataItem):

    fmt = "%s (%d matches)"

-    def __init__(self, parent, name, namespace, count, source):
+    def __init__(self, parent, name, namespace, count, source, can_check=True):
        """initialize item

        @param parent: parent node
@@ -175,7 +178,7 @@ class CapaExplorerRuleItem(CapaExplorerDataItem):
        @param source: rule source (tooltip)
        """
        display = self.fmt % (name, count) if count > 1 else name
-        super(CapaExplorerRuleItem, self).__init__(parent, [display, "", namespace])
+        super(CapaExplorerRuleItem, self).__init__(parent, [display, "", namespace], can_check)
        self._source = source

    @property
@@ -208,14 +211,14 @@ class CapaExplorerFunctionItem(CapaExplorerDataItem):

    fmt = "function(%s)"

-    def __init__(self, parent, location):
+    def __init__(self, parent, location, can_check=True):
        """initialize item

        @param parent: parent node
        @param location: virtual address of function as seen by IDA
        """
        super(CapaExplorerFunctionItem, self).__init__(
-            parent, [self.fmt % idaapi.get_name(location), location_to_hex(location), ""]
+            parent, [self.fmt % idaapi.get_name(location), location_to_hex(location), ""], can_check
        )

    @property
--- a/capa/ida/plugin/model.py
+++ b/capa/ida/plugin/model.py
@@ -6,7 +6,7 @@
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.

-from collections import deque
+from collections import deque, defaultdict

 import idc
 import idaapi
@@ -110,6 +110,8 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):

        if role == QtCore.Qt.CheckStateRole and column == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION:
            # inform view how to display content of checkbox - un/checked
+            if not item.canCheck():
+                return None
            return QtCore.Qt.Checked if item.isChecked() else QtCore.Qt.Unchecked

        if role == QtCore.Qt.FontRole and column in (
@@ -424,14 +426,28 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
        for child in match.get("children", []):
            self.render_capa_doc_match(parent2, child, doc)

-    def render_capa_doc(self, doc):
-        """render capa features specified in doc
-
-        @param doc: capa result doc
-        """
-        # inform model that changes are about to occur
-        self.beginResetModel()
+    def render_capa_doc_by_function(self, doc):
+        """ """
+        matches_by_function = {}
+        for rule in rutils.capability_rules(doc):
+            for ea in rule["matches"].keys():
+                ea = capa.ida.helpers.get_func_start_ea(ea)
+                if ea is None:
+                    # file scope, skip for rendering in this mode
+                    continue
+                if None is matches_by_function.get(ea, None):
+                    matches_by_function[ea] = CapaExplorerFunctionItem(self.root_node, ea, can_check=False)
+                CapaExplorerRuleItem(
+                    matches_by_function[ea],
+                    rule["meta"]["name"],
+                    rule["meta"].get("namespace"),
+                    len(rule["matches"]),
+                    rule["source"],
+                    can_check=False,
+                )

+    def render_capa_doc_by_program(self, doc):
+        """ """
        for rule in rutils.capability_rules(doc):
            rule_name = rule["meta"]["name"]
            rule_namespace = rule["meta"].get("namespace")
@@ -451,6 +467,19 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):

                self.render_capa_doc_match(parent2, match, doc)

+    def render_capa_doc(self, doc, by_function):
+        """render capa features specified in doc
+
+        @param doc: capa result doc
+        """
+        # inform model that changes are about to occur
+        self.beginResetModel()
+
+        if by_function:
+            self.render_capa_doc_by_function(doc)
+        else:
+            self.render_capa_doc_by_program(doc)
+
        # inform model changes have ended
        self.endResetModel()

@@ -459,13 +488,17 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):

        @param feature: capa feature read from doc
        """
-        if feature[feature["type"]]:
+        key = feature["type"]
+        value = feature[feature["type"]]
+        if value:
+            if key == "string":
+                value = '"%s"' % capa.features.escape_string(value)
            if feature.get("description", ""):
-                return "%s(%s = %s)" % (feature["type"], feature[feature["type"]], feature["description"])
+                return "%s(%s = %s)" % (key, value, feature["description"])
            else:
-                return "%s(%s)" % (feature["type"], feature[feature["type"]])
+                return "%s(%s)" % (key, value)
        else:
-            return "%s" % feature["type"]
+            return "%s" % key

    def render_capa_doc_feature_node(self, parent, feature, locations, doc):
        """process capa doc feature node
@@ -522,7 +555,9 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
            )

        if feature["type"] == "regex":
-            return CapaExplorerStringViewItem(parent, display, location, feature["match"])
+            return CapaExplorerStringViewItem(
+                parent, display, location, '"%s"' % capa.features.escape_string(feature["match"])
+            )

        if feature["type"] == "basicblock":
            return CapaExplorerBlockItem(parent, location)
@@ -547,7 +582,9 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):

        if feature["type"] in ("string",):
            # display string preview
-            return CapaExplorerStringViewItem(parent, display, location, feature[feature["type"]])
+            return CapaExplorerStringViewItem(
+                parent, display, location, '"%s"' % capa.features.escape_string(feature[feature["type"]])
+            )

        if feature["type"] in ("import", "export"):
            # display no preview
--- a/capa/ida/plugin/view.py
+++ b/capa/ida/plugin/view.py
@@ -5,15 +5,936 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
+import re
+from collections import Counter

 import idc
-from PyQt5 import QtCore, QtWidgets
+from PyQt5 import QtGui, QtCore, QtWidgets

+import capa.rules
+import capa.engine
+import capa.ida.helpers
+import capa.features.basicblock
 from capa.ida.plugin.item import CapaExplorerFunctionItem
 from capa.ida.plugin.model import CapaExplorerDataModel

 MAX_SECTION_SIZE = 750

+# default colors used in views
+COLOR_GREEN_RGB = (79, 121, 66)
+COLOR_BLUE_RGB = (37, 147, 215)
+
+
+def calc_level_by_indent(line, prev_level=0):
+    """ """
+    if not len(line.strip()):
+        # blank line, which may occur for comments so we simply use the last level
+        return prev_level
+    stripped = line.lstrip()
+    if stripped.startswith("description"):
+        # need to adjust two spaces when encountering string description
+        line = line[2:]
+    # calc line level based on preceding whitespace
+    return len(line) - len(stripped)
+
+
+def parse_feature_for_node(feature):
+    """ """
+    description = ""
+    comment = ""
+
+    if feature.startswith("- count"):
+        # count is weird, we need to handle special
+        # first, we need to grab the comment, if exists
+        # next, we need to check for an embedded description
+        feature, _, comment = feature.partition("#")
+        m = re.search(r"- count\(([a-zA-Z]+)\((.+)\s+=\s+(.+)\)\):\s*(.+)", feature)
+        if m:
+            # reconstruct count without description
+            feature, value, description, count = m.groups()
+            feature = "- count(%s(%s)): %s" % (feature, value, count)
+    elif not feature.startswith("#"):
+        feature, _, comment = feature.partition("#")
+        feature, _, description = feature.partition("=")
+
+    return map(lambda o: o.strip(), (feature, description, comment))
+
+
+def parse_node_for_feature(feature, description, comment, depth):
+    """ """
+    depth = (depth * 2) + 4
+    display = ""
+
+    if feature.startswith("#"):
+        display += "%s%s\n" % (" " * depth, feature)
+    elif description:
+        if feature.startswith(("- and", "- or", "- optional", "- basic block", "- not")):
+            display += "%s%s" % (" " * depth, feature)
+            if comment:
+                display += " # %s" % comment
+            display += "\n%s- description: %s\n" % (" " * (depth + 2), description)
+        elif feature.startswith("- string"):
+            display += "%s%s" % (" " * depth, feature)
+            if comment:
+                display += " # %s" % comment
+            display += "\n%sdescription: %s\n" % (" " * (depth + 2), description)
+        elif feature.startswith("- count"):
+            # count is weird, we need to format description based on feature type, so we parse with regex
+            # assume format - count(<feature_name>(<feature_value>)): <count>
+            m = re.search(r"- count\(([a-zA-Z]+)\((.+)\)\): (.+)", feature)
+            if m:
+                name, value, count = m.groups()
+                if name in ("string",):
+                    display += "%s%s" % (" " * depth, feature)
+                    if comment:
+                        display += " # %s" % comment
+                    display += "\n%sdescription: %s\n" % (" " * (depth + 2), description)
+                else:
+                    display += "%s- count(%s(%s = %s)): %s" % (
+                        " " * depth,
+                        name,
+                        value,
+                        description,
+                        count,
+                    )
+                    if comment:
+                        display += " # %s\n" % comment
+        else:
+            display += "%s%s = %s" % (" " * depth, feature, description)
+            if comment:
+                display += " # %s\n" % comment
+    else:
+        display += "%s%s" % (" " * depth, feature)
+        if comment:
+            display += " # %s\n" % comment
+
+    return display if display.endswith("\n") else display + "\n"
+
+
+def yaml_to_nodes(s):
+    level = 0
+    for line in s.splitlines():
+        feature, description, comment = parse_feature_for_node(line.strip())
+
+        o = QtWidgets.QTreeWidgetItem(None)
+
+        # set node attributes
+        setattr(o, "capa_level", calc_level_by_indent(line, level))
+
+        if feature.startswith(("- and:", "- or:", "- not:", "- basic block:", "- optional:")):
+            setattr(o, "capa_type", CapaExplorerRulgenEditor.get_node_type_expression())
+        elif feature.startswith("#"):
+            setattr(o, "capa_type", CapaExplorerRulgenEditor.get_node_type_comment())
+        else:
+            setattr(o, "capa_type", CapaExplorerRulgenEditor.get_node_type_feature())
+
+        # set node text
+        for (i, v) in enumerate((feature, description, comment)):
+            o.setText(i, v)
+
+        yield o
+
+
+def iterate_tree(o):
+    """ """
+    itr = QtWidgets.QTreeWidgetItemIterator(o)
+    while itr.value():
+        yield itr.value()
+        itr += 1
+
+
+def calc_item_depth(o):
+    """ """
+    depth = 0
+    while True:
+        if not o.parent():
+            break
+        depth += 1
+        o = o.parent()
+    return depth
+
+
+def build_action(o, display, data, slot):
+    """ """
+    action = QtWidgets.QAction(display, o)
+
+    action.setData(data)
+    action.triggered.connect(lambda checked: slot(action))
+
+    return action
+
+
+def build_context_menu(o, actions):
+    """ """
+    menu = QtWidgets.QMenu()
+
+    for action in actions:
+        if isinstance(action, QtWidgets.QMenu):
+            menu.addMenu(action)
+        else:
+            menu.addAction(build_action(o, *action))
+
+    return menu
+
+
+class CapaExplorerRulgenPreview(QtWidgets.QTextEdit):
+
+    INDENT = " " * 2
+
+    def __init__(self, parent=None):
+        """ """
+        super(CapaExplorerRulgenPreview, self).__init__(parent)
+
+        self.setFont(QtGui.QFont("Courier", weight=QtGui.QFont.Bold))
+        self.setLineWrapMode(QtWidgets.QTextEdit.NoWrap)
+        self.setHorizontalScrollBarPolicy(QtCore.Qt.ScrollBarAsNeeded)
+
+    def reset_view(self):
+        """ """
+        self.clear()
+
+    def load_preview_meta(self, ea, author, scope):
+        """ """
+        metadata_default = [
+            "# generated using capa explorer for IDA Pro",
+            "rule:",
+            "  meta:",
+            "    name: <insert_name>",
+            "    namespace: <insert_namespace>",
+            "    author: %s" % author,
+            "    scope: %s" % scope,
+            "    references: <insert_references>",
+            "    examples:",
+            "      - %s:0x%X" % (capa.ida.helpers.get_file_md5().upper(), ea)
+            if ea
+            else "      - %s" % (capa.ida.helpers.get_file_md5().upper()),
+            "  features:",
+        ]
+        self.setText("\n".join(metadata_default))
+
+    def keyPressEvent(self, e):
+        """intercept key press events"""
+        if e.key() in (QtCore.Qt.Key_Tab, QtCore.Qt.Key_Backtab):
+            # apparently it's not easy to implement tabs as spaces, or multi-line tab or SHIFT + Tab
+            # so we need to implement it ourselves so we can retain properly formatted capa rules
+            # when a user uses the Tab key
+            if self.textCursor().selection().isEmpty():
+                # single line, only worry about Tab
+                if e.key() == QtCore.Qt.Key_Tab:
+                    self.insertPlainText(self.INDENT)
+            else:
+                # multi-line tab or SHIFT + Tab
+                cur = self.textCursor()
+                select_start_ppos = cur.selectionStart()
+                select_end_ppos = cur.selectionEnd()
+
+                scroll_ppos = self.verticalScrollBar().sliderPosition()
+
+                # determine lineno for first selected line, and column
+                cur.setPosition(select_start_ppos)
+                start_lineno = self.count_previous_lines_from_block(cur.block())
+                start_lineco = cur.columnNumber()
+
+                # determine lineno for last selected line
+                cur.setPosition(select_end_ppos)
+                end_lineno = self.count_previous_lines_from_block(cur.block())
+
+                # now we need to indent or dedent the selected lines. for now, we read the text, modify
+                # the lines between start_lineno and end_lineno accordingly, and then reset the view
+                # this might not be the best solution, but it avoids messing around with cursor positions
+                # to determine the beginning of lines
+
+                plain = self.toPlainText().splitlines()
+
+                if e.key() == QtCore.Qt.Key_Tab:
+                    # user Tab, indent selected lines
+                    lines_modified = end_lineno - start_lineno
+                    first_modified = True
+                    change = [self.INDENT + line for line in plain[start_lineno : end_lineno + 1]]
+                else:
+                    # user SHIFT + Tab, dedent selected lines
+                    lines_modified = 0
+                    first_modified = False
+                    change = []
+                    for (lineno, line) in enumerate(plain[start_lineno : end_lineno + 1]):
+                        if line.startswith(self.INDENT):
+                            if lineno == 0:
+                                # keep track if first line is modified, so we can properly display
+                                # the text selection later
+                                first_modified = True
+                            lines_modified += 1
+                            line = line[len(self.INDENT) :]
+                        change.append(line)
+
+                # apply modifications, and reset view
+                plain[start_lineno : end_lineno + 1] = change
+                self.setPlainText("\n".join(plain) + "\n")
+
+                # now we need to properly adjust the selection positions, so users don't have to
+                # re-select when indenting or dedenting the same lines repeatedly
+                if e.key() == QtCore.Qt.Key_Tab:
+                    # user Tab, increase increment selection positions
+                    select_start_ppos += len(self.INDENT)
+                    select_end_ppos += (lines_modified * len(self.INDENT)) + len(self.INDENT)
+                elif lines_modified:
+                    # user SHIFT + Tab, decrease selection positions
+                    if start_lineco not in (0, 1) and first_modified:
+                        # only decrease start position if not in first column
+                        select_start_ppos -= len(self.INDENT)
+                    select_end_ppos -= lines_modified * len(self.INDENT)
+
+                # apply updated selection and restore previous scroll position
+                self.set_selection(select_start_ppos, select_end_ppos, len(self.toPlainText()))
+                self.verticalScrollBar().setSliderPosition(scroll_ppos)
+        else:
+            super(CapaExplorerRulgenPreview, self).keyPressEvent(e)
+
+    def count_previous_lines_from_block(self, block):
+        """calculate number of lines preceding block"""
+        count = 0
+        while True:
+            block = block.previous()
+            if not block.isValid():
+                break
+            count += block.lineCount()
+        return count
+
+    def set_selection(self, start, end, max):
+        """set text selection"""
+        cursor = self.textCursor()
+        cursor.setPosition(start)
+        cursor.setPosition(end if end < max else max, QtGui.QTextCursor.KeepAnchor)
+        self.setTextCursor(cursor)
+
+
+class CapaExplorerRulgenEditor(QtWidgets.QTreeWidget):
+
+    updated = QtCore.pyqtSignal()
+
+    def __init__(self, preview, parent=None):
+        """ """
+        super(CapaExplorerRulgenEditor, self).__init__(parent)
+
+        self.preview = preview
+
+        self.setHeaderLabels(["Feature", "Description", "Comment"])
+        self.header().setSectionResizeMode(QtWidgets.QHeaderView.ResizeToContents)
+        self.header().setStretchLastSection(False)
+        self.setExpandsOnDoubleClick(False)
+        self.setEditTriggers(QtWidgets.QAbstractItemView.NoEditTriggers)
+        self.setContextMenuPolicy(QtCore.Qt.CustomContextMenu)
+        self.setSelectionMode(QtWidgets.QAbstractItemView.ExtendedSelection)
+        self.setStyleSheet("QTreeView::item {padding-right: 15 px;padding-bottom: 2 px;}")
+
+        # enable drag and drop
+        self.setDragEnabled(True)
+        self.setAcceptDrops(True)
+        self.setDragDropMode(QtWidgets.QAbstractItemView.InternalMove)
+
+        # connect slots
+        self.itemChanged.connect(self.slot_item_changed)
+        self.customContextMenuRequested.connect(self.slot_custom_context_menu_requested)
+        self.itemDoubleClicked.connect(self.slot_item_double_clicked)
+
+        self.root = None
+        self.reset_view()
+
+        self.is_editing = False
+
+    @staticmethod
+    def get_column_feature_index():
+        """ """
+        return 0
+
+    @staticmethod
+    def get_column_description_index():
+        """ """
+        return 1
+
+    @staticmethod
+    def get_column_comment_index():
+        """ """
+        return 2
+
+    @staticmethod
+    def get_node_type_expression():
+        """ """
+        return 0
+
+    @staticmethod
+    def get_node_type_feature():
+        """ """
+        return 1
+
+    @staticmethod
+    def get_node_type_comment():
+        """ """
+        return 2
+
+    def dragMoveEvent(self, e):
+        """ """
+        super(CapaExplorerRulgenEditor, self).dragMoveEvent(e)
+
+    def dragEventEnter(self, e):
+        """ """
+        super(CapaExplorerRulgenEditor, self).dragEventEnter(e)
+
+    def dropEvent(self, e):
+        """ """
+        if not self.indexAt(e.pos()).isValid():
+            return
+
+        super(CapaExplorerRulgenEditor, self).dropEvent(e)
+
+        # self.prune_expressions()
+        self.update_preview()
+        self.expandAll()
+
+    def reset_view(self):
+        """ """
+        self.root = None
+        self.clear()
+
+    def slot_item_changed(self, item, column):
+        """ """
+        if self.is_editing:
+            self.update_preview()
+            self.is_editing = False
+
+    def slot_remove_selected(self, action):
+        """ """
+        for o in self.selectedItems():
+            if o == self.root:
+                self.takeTopLevelItem(self.indexOfTopLevelItem(o))
+                self.root = None
+                continue
+            o.parent().removeChild(o)
+
+    def slot_nest_features(self, action):
+        """ """
+        # create a new parent under root node, by default; new node added last position in tree
+        new_parent = self.new_expression_node(self.root, (action.data()[0], ""))
+
+        if "basic block" in action.data()[0]:
+            # add default child expression when nesting under basic block
+            new_parent.setExpanded(True)
+            new_parent = self.new_expression_node(new_parent, ("- or:", ""))
+
+        for o in self.get_features(selected=True):
+            # take child from its parent by index, add to new parent
+            new_parent.addChild(o.parent().takeChild(o.parent().indexOfChild(o)))
+
+        # ensure new parent expanded
+        new_parent.setExpanded(True)
+
+    def slot_edit_expression(self, action):
+        """ """
+        expression, o = action.data()
+        if "basic block" in expression and "basic block" not in o.text(
+            CapaExplorerRulgenEditor.get_column_feature_index()
+        ):
+            # current expression is "basic block", and not changing to "basic block" expression
+            children = o.takeChildren()
+            new_parent = self.new_expression_node(o, ("- or:", ""))
+            for child in children:
+                new_parent.addChild(child)
+            new_parent.setExpanded(True)
+        o.setText(CapaExplorerRulgenEditor.get_column_feature_index(), expression)
+
+    def slot_clear_all(self, action):
+        """ """
+        self.reset_view()
+
+    def slot_custom_context_menu_requested(self, pos):
+        """ """
+        if not self.indexAt(pos).isValid():
+            # user selected invalid index
+            self.load_custom_context_menu_invalid_index(pos)
+        elif self.itemAt(pos).capa_type == CapaExplorerRulgenEditor.get_node_type_expression():
+            # user selected expression node
+            self.load_custom_context_menu_expression(pos)
+        else:
+            # user selected feature node
+            self.load_custom_context_menu_feature(pos)
+
+        self.update_preview()
+
+    def slot_item_double_clicked(self, o, column):
+        """ """
+        if column in (
+            CapaExplorerRulgenEditor.get_column_comment_index(),
+            CapaExplorerRulgenEditor.get_column_description_index(),
+        ):
+            o.setFlags(o.flags() | QtCore.Qt.ItemIsEditable)
+            self.editItem(o, column)
+            o.setFlags(o.flags() & ~QtCore.Qt.ItemIsEditable)
+            self.is_editing = True
+
+    def update_preview(self):
+        """ """
+        rule_text = self.preview.toPlainText()
+
+        if -1 != rule_text.find("features:"):
+            rule_text = rule_text[: rule_text.find("features:") + len("features:")]
+            rule_text += "\n"
+        else:
+            rule_text = rule_text.rstrip()
+            rule_text += "\n  features:\n"
+
+        for o in iterate_tree(self):
+            feature, description, comment = map(lambda o: o.strip(), tuple(o.text(i) for i in range(3)))
+            rule_text += parse_node_for_feature(feature, description, comment, calc_item_depth(o))
+
+        # FIXME we avoid circular update by disabling signals when updating
+        # the preview. Preferably we would refactor the code to avoid this
+        # in the first place
+        self.preview.blockSignals(True)
+        self.preview.setPlainText(rule_text)
+        self.preview.blockSignals(False)
+
+        # emit signal so views can update
+        self.updated.emit()
+
+    def load_custom_context_menu_invalid_index(self, pos):
+        """ """
+        actions = (("Remove all", (), self.slot_clear_all),)
+
+        menu = build_context_menu(self.parent(), actions)
+        menu.exec_(self.viewport().mapToGlobal(pos))
+
+    def load_custom_context_menu_feature(self, pos):
+        """ """
+        actions = (("Remove selection", (), self.slot_remove_selected),)
+
+        sub_actions = (
+            ("and", ("- and:",), self.slot_nest_features),
+            ("or", ("- or:",), self.slot_nest_features),
+            ("not", ("- not:",), self.slot_nest_features),
+            ("optional", ("- optional:",), self.slot_nest_features),
+            ("basic block", ("- basic block:",), self.slot_nest_features),
+        )
+
+        # build submenu with modify actions
+        sub_menu = build_context_menu(self.parent(), sub_actions)
+        sub_menu.setTitle("Nest feature%s" % ("" if len(tuple(self.get_features(selected=True))) == 1 else "s"))
+
+        # build main menu with submenu + main actions
+        menu = build_context_menu(self.parent(), (sub_menu,) + actions)
+
+        menu.exec_(self.viewport().mapToGlobal(pos))
+
+    def load_custom_context_menu_expression(self, pos):
+        """ """
+        actions = (("Remove expression", (), self.slot_remove_selected),)
+
+        sub_actions = (
+            ("and", ("- and:", self.itemAt(pos)), self.slot_edit_expression),
+            ("or", ("- or:", self.itemAt(pos)), self.slot_edit_expression),
+            ("not", ("- not:", self.itemAt(pos)), self.slot_edit_expression),
+            ("optional", ("- optional:", self.itemAt(pos)), self.slot_edit_expression),
+            ("basic block", ("- basic block:", self.itemAt(pos)), self.slot_edit_expression),
+        )
+
+        # build submenu with modify actions
+        sub_menu = build_context_menu(self.parent(), sub_actions)
+        sub_menu.setTitle("Modify")
+
+        # build main menu with submenu + main actions
+        menu = build_context_menu(self.parent(), (sub_menu,) + actions)
+
+        menu.exec_(self.viewport().mapToGlobal(pos))
+
+    def style_expression_node(self, o):
+        """ """
+        font = QtGui.QFont()
+        font.setBold(True)
+
+        o.setFont(CapaExplorerRulgenEditor.get_column_feature_index(), font)
+
+    def style_feature_node(self, o):
+        """ """
+        font = QtGui.QFont()
+        brush = QtGui.QBrush()
+
+        font.setFamily("Courier")
+        font.setWeight(QtGui.QFont.Medium)
+        brush.setColor(QtGui.QColor(*COLOR_GREEN_RGB))
+
+        o.setFont(CapaExplorerRulgenEditor.get_column_feature_index(), font)
+        o.setForeground(CapaExplorerRulgenEditor.get_column_feature_index(), brush)
+
+    def style_comment_node(self, o):
+        """ """
+        font = QtGui.QFont()
+        font.setBold(True)
+        font.setFamily("Courier")
+
+        o.setFont(CapaExplorerRulgenEditor.get_column_feature_index(), font)
+
+    def set_expression_node(self, o):
+        """ """
+        setattr(o, "capa_type", CapaExplorerRulgenEditor.get_node_type_expression())
+        self.style_expression_node(o)
+
+    def set_feature_node(self, o):
+        """ """
+        setattr(o, "capa_type", CapaExplorerRulgenEditor.get_node_type_feature())
+        o.setFlags(o.flags() & ~QtCore.Qt.ItemIsDropEnabled)
+        self.style_feature_node(o)
+
+    def set_comment_node(self, o):
+        """ """
+        setattr(o, "capa_type", CapaExplorerRulgenEditor.get_node_type_comment())
+        o.setFlags(o.flags() & ~QtCore.Qt.ItemIsDropEnabled)
+
+        self.style_comment_node(o)
+
+    def new_expression_node(self, parent, values=()):
+        """ """
+        o = QtWidgets.QTreeWidgetItem(parent)
+        self.set_expression_node(o)
+        for (i, v) in enumerate(values):
+            o.setText(i, v)
+        return o
+
+    def new_feature_node(self, parent, values=()):
+        """ """
+        o = QtWidgets.QTreeWidgetItem(parent)
+        self.set_feature_node(o)
+        for (i, v) in enumerate(values):
+            o.setText(i, v)
+        return o
+
+    def new_comment_node(self, parent, values=()):
+        """ """
+        o = QtWidgets.QTreeWidgetItem(parent)
+        self.set_comment_node(o)
+        for (i, v) in enumerate(values):
+            o.setText(i, v)
+        return o
+
+    def update_features(self, features):
+        """ """
+        if not self.root:
+            # root node does not exist, create default node, set expanded
+            self.root = self.new_expression_node(self, ("- or:", ""))
+
+        # build feature counts
+        counted = list(zip(Counter(features).keys(), Counter(features).values()))
+
+        # single features
+        for (k, v) in filter(lambda t: t[1] == 1, counted):
+            if isinstance(k, (capa.features.String,)):
+                value = '"%s"' % capa.features.escape_string(k.get_value_str())
+            else:
+                value = k.get_value_str()
+            self.new_feature_node(self.root, ("- %s: %s" % (k.name.lower(), value), ""))
+
+        # n > 1 features
+        for (k, v) in filter(lambda t: t[1] > 1, counted):
+            if k.value:
+                if isinstance(k, (capa.features.String,)):
+                    value = '"%s"' % capa.features.escape_string(k.get_value_str())
+                else:
+                    value = k.get_value_str()
+                display = "- count(%s(%s)): %d" % (k.name.lower(), value, v)
+            else:
+                display = "- count(%s): %d" % (k.name.lower(), v)
+            self.new_feature_node(self.root, (display, ""))
+
+        self.expandAll()
+        self.update_preview()
+
+    def load_features_from_yaml(self, rule_text, update_preview=False):
+        """ """
+
+        def add_node(parent, node):
+            if node.text(0).startswith("description:"):
+                if parent.childCount():
+                    parent.child(parent.childCount() - 1).setText(1, node.text(0).lstrip("description:").lstrip())
+                else:
+                    parent.setText(1, node.text(0).lstrip("description:").lstrip())
+            elif node.text(0).startswith("- description:"):
+                parent.setText(1, node.text(0).lstrip("- description:").lstrip())
+            else:
+                parent.addChild(node)
+
+        def build(parent, nodes):
+            if nodes:
+                child_lvl = nodes[0].capa_level
+                while nodes:
+                    node = nodes.pop(0)
+                    if node.capa_level == child_lvl:
+                        add_node(parent, node)
+                    elif node.capa_level > child_lvl:
+                        nodes.insert(0, node)
+                        build(parent.child(parent.childCount() - 1), nodes)
+                    else:
+                        parent = parent.parent() if parent.parent() else parent
+                        add_node(parent, node)
+
+        self.reset_view()
+
+        # check for lack of features block
+        if -1 == rule_text.find("features:"):
+            return
+
+        rule_features = rule_text[rule_text.find("features:") + len("features:") :].strip()
+        rule_nodes = list(yaml_to_nodes(rule_features))
+
+        # check for lack of nodes
+        if not rule_nodes:
+            return
+
+        for o in rule_nodes:
+            (self.set_expression_node, self.set_feature_node, self.set_comment_node)[o.capa_type](o)
+
+        self.root = rule_nodes.pop(0)
+        self.addTopLevelItem(self.root)
+
+        if update_preview:
+            self.preview.blockSignals(True)
+            self.preview.setPlainText(rule_text)
+            self.preview.blockSignals(False)
+
+        build(self.root, rule_nodes)
+
+        self.expandAll()
+
+    def get_features(self, selected=False, ignore=()):
+        """ """
+        for feature in filter(
+            lambda o: o.capa_type
+            in (CapaExplorerRulgenEditor.get_node_type_feature(), CapaExplorerRulgenEditor.get_node_type_comment()),
+            tuple(iterate_tree(self)),
+        ):
+            if feature in ignore:
+                continue
+            if selected and not feature.isSelected():
+                continue
+            yield feature
+
+    def get_expressions(self, selected=False, ignore=()):
+        """ """
+        for expression in filter(
+            lambda o: o.capa_type == CapaExplorerRulgenEditor.get_node_type_expression(), tuple(iterate_tree(self))
+        ):
+            if expression in ignore:
+                continue
+            if selected and not expression.isSelected():
+                continue
+            yield expression
+
+
+class CapaExplorerRulegenFeatures(QtWidgets.QTreeWidget):
+    def __init__(self, editor, parent=None):
+        """ """
+        super(CapaExplorerRulegenFeatures, self).__init__(parent)
+
+        self.parent_items = {}
+        self.editor = editor
+
+        self.setHeaderLabels(["Feature", "Virtual Address"])
+        self.header().setSectionResizeMode(QtWidgets.QHeaderView.ResizeToContents)
+        self.setStyleSheet("QTreeView::item {padding-right: 15 px;padding-bottom: 2 px;}")
+
+        self.setExpandsOnDoubleClick(False)
+        self.setContextMenuPolicy(QtCore.Qt.CustomContextMenu)
+        self.setSelectionMode(QtWidgets.QAbstractItemView.ExtendedSelection)
+
+        # connect slots
+        self.itemDoubleClicked.connect(self.slot_item_double_clicked)
+        self.customContextMenuRequested.connect(self.slot_custom_context_menu_requested)
+
+        self.reset_view()
+
+    @staticmethod
+    def get_column_feature_index():
+        """ """
+        return 0
+
+    @staticmethod
+    def get_column_address_index():
+        """ """
+        return 1
+
+    @staticmethod
+    def get_node_type_parent():
+        """ """
+        return 0
+
+    @staticmethod
+    def get_node_type_leaf():
+        """ """
+        return 1
+
+    def reset_view(self):
+        """ """
+        self.clear()
+
+    def slot_add_selected_features(self, action):
+        """ """
+        selected = [item.data(0, 0x100) for item in self.selectedItems()]
+        if selected:
+            self.editor.update_features(selected)
+
+    def slot_custom_context_menu_requested(self, pos):
+        """ """
+        actions = []
+        action_add_features_fmt = ""
+
+        selected_items_count = len(self.selectedItems())
+        if selected_items_count == 0:
+            return
+
+        if selected_items_count == 1:
+            action_add_features_fmt = "Add feature"
+        else:
+            action_add_features_fmt = "Add %d features" % selected_items_count
+
+        actions.append((action_add_features_fmt, (), self.slot_add_selected_features))
+
+        menu = build_context_menu(self.parent(), actions)
+        menu.exec_(self.viewport().mapToGlobal(pos))
+
+    def slot_item_double_clicked(self, o, column):
+        """ """
+        if column == CapaExplorerRulegenFeatures.get_column_address_index() and o.text(column):
+            idc.jumpto(int(o.text(column), 0x10))
+        elif o.capa_type == CapaExplorerRulegenFeatures.get_node_type_leaf():
+            self.editor.update_features([o.data(0, 0x100)])
+
+    def show_all_items(self):
+        """ """
+        for o in iterate_tree(self):
+            o.setHidden(False)
+            o.setExpanded(False)
+
+    def filter_items_by_text(self, text):
+        """ """
+        if text:
+            for o in iterate_tree(self):
+                data = o.data(0, 0x100)
+                if data:
+                    to_match = data.get_value_str()
+                    if not to_match or text.lower() not in to_match.lower():
+                        o.setHidden(True)
+                        continue
+                o.setHidden(False)
+                o.setExpanded(True)
+        else:
+            self.show_all_items()
+
+    def style_parent_node(self, o):
+        """ """
+        font = QtGui.QFont()
+        font.setBold(True)
+
+        o.setFont(CapaExplorerRulegenFeatures.get_column_feature_index(), font)
+
+    def style_leaf_node(self, o):
+        """ """
+        font = QtGui.QFont("Courier", weight=QtGui.QFont.Bold)
+        brush = QtGui.QBrush()
+
+        o.setFont(CapaExplorerRulegenFeatures.get_column_feature_index(), font)
+        o.setFont(CapaExplorerRulegenFeatures.get_column_address_index(), font)
+
+        brush.setColor(QtGui.QColor(*COLOR_GREEN_RGB))
+        o.setForeground(CapaExplorerRulegenFeatures.get_column_feature_index(), brush)
+
+        brush.setColor(QtGui.QColor(*COLOR_BLUE_RGB))
+        o.setForeground(CapaExplorerRulegenFeatures.get_column_address_index(), brush)
+
+    def set_parent_node(self, o):
+        """ """
+        o.setFlags(o.flags() & ~QtCore.Qt.ItemIsSelectable)
+        setattr(o, "capa_type", CapaExplorerRulegenFeatures.get_node_type_parent())
+        self.style_parent_node(o)
+
+    def set_leaf_node(self, o):
+        """ """
+        setattr(o, "capa_type", CapaExplorerRulegenFeatures.get_node_type_leaf())
+        self.style_leaf_node(o)
+
+    def new_parent_node(self, parent, data, feature=None):
+        """ """
+        o = QtWidgets.QTreeWidgetItem(parent)
+
+        self.set_parent_node(o)
+        for (i, v) in enumerate(data):
+            o.setText(i, v)
+        if feature:
+            o.setData(0, 0x100, feature)
+
+        return o
+
+    def new_leaf_node(self, parent, data, feature=None):
+        """ """
+        o = QtWidgets.QTreeWidgetItem(parent)
+
+        self.set_leaf_node(o)
+        for (i, v) in enumerate(data):
+            o.setText(i, v)
+        if feature:
+            o.setData(0, 0x100, feature)
+
+        return o
+
+    def load_features(self, file_features, func_features={}):
+        """ """
+        self.parse_features_for_tree(self.new_parent_node(self, ("File Scope",)), file_features)
+        if func_features:
+            self.parse_features_for_tree(self.new_parent_node(self, ("Function/Basic Block Scope",)), func_features)
+
+    def parse_features_for_tree(self, parent, features):
+        """ """
+        self.parent_items = {}
+
+        def format_address(e):
+            return "%X" % e if e else ""
+
+        def format_feature(feature):
+            """ """
+            name = feature.name.lower()
+            value = feature.get_value_str()
+            if isinstance(feature, (capa.features.String,)):
+                value = '"%s"' % capa.features.escape_string(value)
+            return "%s(%s)" % (name, value)
+
+        for (feature, eas) in sorted(features.items(), key=lambda k: sorted(k[1])):
+            if isinstance(feature, capa.features.basicblock.BasicBlock):
+                # filter basic blocks for now, we may want to add these back in some time
+                # in the future
+                continue
+
+            # level 0
+            if type(feature) not in self.parent_items:
+                self.parent_items[type(feature)] = self.new_parent_node(parent, (feature.name.lower(),))
+
+            # level 1
+            if feature not in self.parent_items:
+                if len(eas) > 1:
+                    self.parent_items[feature] = self.new_parent_node(
+                        self.parent_items[type(feature)], (format_feature(feature),), feature=feature
+                    )
+                else:
+                    self.parent_items[feature] = self.new_leaf_node(
+                        self.parent_items[type(feature)], (format_feature(feature),), feature=feature
+                    )
+
+            # level n > 1
+            if len(eas) > 1:
+                for ea in sorted(eas):
+                    self.new_leaf_node(
+                        self.parent_items[feature], (format_feature(feature), format_address(ea)), feature=feature
+                    )
+            else:
+                ea = eas.pop()
+                for (i, v) in enumerate((format_feature(feature), format_address(ea))):
+                    self.parent_items[feature].setText(i, v)
+                self.parent_items[feature].setData(0, 0x100, feature)
+

 class CapaExplorerQtreeView(QtWidgets.QTreeView):
    """tree view used to display hierarchical capa results
--- a/capa/main.py
+++ b/capa/main.py
@@ -32,7 +32,9 @@ import capa.features.extractors
 from capa.helpers import oint, get_file_taste

 RULES_PATH_DEFAULT_STRING = "(embedded rules)"
-SUPPORTED_FILE_MAGIC = set(["MZ"])
+SUPPORTED_FILE_MAGIC = set([b"MZ"])
+BACKEND_VIV = "vivisect"
+BACKEND_SMDA = "smda"


 logger = logging.getLogger("capa")
@@ -40,8 +42,11 @@ logger = logging.getLogger("capa")

 def set_vivisect_log_level(level):
    logging.getLogger("vivisect").setLevel(level)
+    logging.getLogger("vivisect.base").setLevel(level)
+    logging.getLogger("vivisect.impemu").setLevel(level)
    logging.getLogger("vtrace").setLevel(level)
    logging.getLogger("envi").setLevel(level)
+    logging.getLogger("envi.codeflow").setLevel(level)


 def find_function_capabilities(ruleset, extractor, f):
@@ -112,7 +117,13 @@ def find_capabilities(ruleset, extractor, disable_progress=None):
        }
    }

-    for f in tqdm.tqdm(list(extractor.get_functions()), disable=disable_progress, desc="matching", unit=" functions"):
+    pbar = tqdm.tqdm
+    if disable_progress:
+        # do not use tqdm to avoid unnecessary side effects when caller intends
+        # to disable progress completely
+        pbar = lambda s, *args, **kwargs: s
+
+    for f in pbar(list(extractor.get_functions()), desc="matching", unit=" functions"):
        function_matches, bb_matches, feature_count = find_function_capabilities(ruleset, extractor, f)
        meta["feature_counts"]["functions"][f.__int__()] = feature_count
        logger.debug("analyzed function 0x%x and extracted %d features", f.__int__(), feature_count)
@@ -271,6 +282,8 @@ def get_workspace(path, format, should_save=True):
        vw = get_shellcode_vw(path, arch="i386", should_save=should_save)
    elif format == "sc64":
        vw = get_shellcode_vw(path, arch="amd64", should_save=should_save)
+    else:
+        raise ValueError("unexpected format: " + format)
    logger.debug("%s", get_meta_str(vw))
    return vw

@@ -294,17 +307,43 @@ class UnsupportedRuntimeError(RuntimeError):
    pass


-def get_extractor_py3(path, format, disable_progress=False):
-    raise UnsupportedRuntimeError()
+def get_extractor_py3(path, format, backend, disable_progress=False):
+    if backend == "smda":
+        from smda.SmdaConfig import SmdaConfig
+        from smda.Disassembler import Disassembler
+
+        import capa.features.extractors.smda
+
+        smda_report = None
+        with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress):
+            config = SmdaConfig()
+            config.STORE_BUFFER = True
+            smda_disasm = Disassembler(config)
+            smda_report = smda_disasm.disassembleFile(path)
+
+        return capa.features.extractors.smda.SmdaFeatureExtractor(smda_report, path)
+    else:
+        import capa.features.extractors.viv
+
+        with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress):
+            vw = get_workspace(path, format, should_save=False)
+
+            try:
+                vw.saveWorkspace()
+            except IOError:
+                # see #168 for discussion around how to handle non-writable directories
+                logger.info("source directory is not writable, won't save intermediate workspace")
+
+        return capa.features.extractors.viv.VivisectFeatureExtractor(vw, path)


-def get_extractor(path, format, disable_progress=False):
+def get_extractor(path, format, backend, disable_progress=False):
    """
    raises:
      UnsupportedFormatError:
    """
    if sys.version_info >= (3, 0):
-        return get_extractor_py3(path, format, disable_progress=disable_progress)
+        return get_extractor_py3(path, format, backend, disable_progress=disable_progress)
    else:
        return get_extractor_py2(path, format, disable_progress=disable_progress)

@@ -340,8 +379,8 @@ def get_rules(rule_path, disable_progress=False):

            for file in files:
                if not file.endswith(".yml"):
-                    if not (file.endswith(".md") or file.endswith(".git") or file.endswith(".txt")):
-                        # expect to see readme.md, format.md, and maybe a .git directory
+                    if not (file.startswith(".git") or file.endswith((".git", ".md", ".txt"))):
+                        # expect to see .git* files, readme.md, format.md, and maybe a .git directory
                        # other things maybe are rules, but are mis-named.
                        logger.warning("skipping non-.yml file: %s", file)
                    continue
@@ -351,7 +390,13 @@ def get_rules(rule_path, disable_progress=False):

    rules = []

-    for rule_path in tqdm.tqdm(list(rule_paths), disable=disable_progress, desc="loading ", unit="     rules"):
+    pbar = tqdm.tqdm
+    if disable_progress:
+        # do not use tqdm to avoid unnecessary side effects when caller intends
+        # to disable progress completely
+        pbar = lambda s, *args, **kwargs: s
+
+    for rule_path in pbar(list(rule_paths), desc="loading ", unit="     rules"):
        try:
            rule = capa.rules.Rule.from_yaml_file(rule_path)
        except capa.rules.InvalidRule:
@@ -401,10 +446,76 @@ def collect_metadata(argv, sample_path, rules_path, format, extractor):
    }


-def main(argv=None):
-    if argv is None:
-        argv = sys.argv[1:]
+def install_common_args(parser, wanted=None):
+    """
+    register a common set of command line arguments for re-use by main & scripts.
+    these are things like logging/coloring/etc.
+    also enable callers to opt-in to common arguments, like specifying the input sample.

+    this routine lets many script use the same language for cli arguments.
+    see `handle_common_args` to do common configuration.
+
+    args:
+      parser (argparse.ArgumentParser): a parser to update in place, adding common arguments.
+      wanted (Set[str]): collection of arguments to opt-into, including:
+        - "sample": required positional argument to input file.
+        - "format": flag to override file format.
+        - "backend": flag to override analysis backend under py3.
+        - "rules": flag to override path to capa rules.
+        - "tag": flag to override/specify which rules to match.
+    """
+    if wanted is None:
+        wanted = set()
+
+    #
+    # common arguments that all scripts will have
+    #
+
+    parser.add_argument("--version", action="version", version="%(prog)s {:s}".format(capa.version.__version__))
+    parser.add_argument(
+        "-v", "--verbose", action="store_true", help="enable verbose result document (no effect with --json)"
+    )
+    parser.add_argument(
+        "-vv", "--vverbose", action="store_true", help="enable very verbose result document (no effect with --json)"
+    )
+    parser.add_argument("-d", "--debug", action="store_true", help="enable debugging output on STDERR")
+    parser.add_argument("-q", "--quiet", action="store_true", help="disable all output but errors")
+    parser.add_argument(
+        "--color",
+        type=str,
+        choices=("auto", "always", "never"),
+        default="auto",
+        help="enable ANSI color codes in results, default: only during interactive session",
+    )
+
+    #
+    # arguments that may be opted into:
+    #
+    #   - sample
+    #   - format
+    #   - rules
+    #   - tag
+    #
+
+    if "sample" in wanted:
+        if sys.version_info >= (3, 0):
+            parser.add_argument(
+                # Python 3 str handles non-ASCII arguments correctly
+                "sample",
+                type=str,
+                help="path to sample to analyze",
+            )
+        else:
+            parser.add_argument(
+                # in #328 we noticed that the sample path is not handled correctly if it contains non-ASCII characters
+                # https://stackoverflow.com/a/22947334/ offers a solution and decoding using getfilesystemencoding works
+                # in our testing, however other sources suggest `sys.stdin.encoding` (https://stackoverflow.com/q/4012571/)
+                "sample",
+                type=lambda s: s.decode(sys.getfilesystemencoding()),
+                help="path to sample to analyze",
+            )
+
+    if "format" in wanted:
        formats = [
            ("auto", "(default) detect file type automatically"),
            ("pe", "Windows PE file"),
@@ -413,6 +524,83 @@ def main(argv=None):
            ("freeze", "features previously frozen by capa"),
        ]
        format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats])
+        parser.add_argument(
+            "-f",
+            "--format",
+            choices=[f[0] for f in formats],
+            default="auto",
+            help="select sample format, %s" % format_help,
+        )
+
+    if "backend" in wanted and sys.version_info >= (3, 0):
+        parser.add_argument(
+            "-b",
+            "--backend",
+            type=str,
+            help="select the backend to use",
+            choices=(BACKEND_VIV, BACKEND_SMDA),
+            default=BACKEND_VIV,
+        )
+
+    if "rules" in wanted:
+        parser.add_argument(
+            "-r",
+            "--rules",
+            type=str,
+            default=RULES_PATH_DEFAULT_STRING,
+            help="path to rule file or directory, use embedded rules by default",
+        )
+
+    if "tag" in wanted:
+        parser.add_argument("-t", "--tag", type=str, help="filter on rule meta field values")
+
+
+def handle_common_args(args):
+    """
+    handle the global config specified by `install_common_args`,
+    such as configuring logging/coloring/etc.
+
+    args:
+      args (argparse.Namespace): parsed arguments that included at least `install_common_args` args.
+    """
+    if args.quiet:
+        logging.basicConfig(level=logging.WARNING)
+        logging.getLogger().setLevel(logging.WARNING)
+    elif args.debug:
+        logging.basicConfig(level=logging.DEBUG)
+        logging.getLogger().setLevel(logging.DEBUG)
+    else:
+        logging.basicConfig(level=logging.INFO)
+        logging.getLogger().setLevel(logging.INFO)
+
+    # disable vivisect-related logging, it's verbose and not relevant for capa users
+    set_vivisect_log_level(logging.CRITICAL)
+
+    # py2 doesn't know about cp65001, which is a variant of utf-8 on windows
+    # tqdm bails when trying to render the progress bar in this setup.
+    # because cp65001 is utf-8, we just map that codepage to the utf-8 codec.
+    # see #380 and: https://stackoverflow.com/a/3259271/87207
+    import codecs
+
+    codecs.register(lambda name: codecs.lookup("utf-8") if name == "cp65001" else None)
+
+    if args.color == "always":
+        colorama.init(strip=False)
+    elif args.color == "auto":
+        # colorama will detect:
+        #  - when on Windows console, and fixup coloring, and
+        #  - when not an interactive session, and disable coloring
+        # renderers should use coloring and assume it will be stripped out if necessary.
+        colorama.init()
+    elif args.color == "never":
+        colorama.init(strip=True)
+    else:
+        raise RuntimeError("unexpected --color value: " + args.color)
+
+
+def main(argv=None):
+    if argv is None:
+        argv = sys.argv[1:]

    desc = "The FLARE team's open-source tool to identify capabilities in executable files."
    epilog = textwrap.dedent(
@@ -446,56 +634,10 @@ def main(argv=None):
    parser = argparse.ArgumentParser(
        description=desc, epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter
    )
-    parser.add_argument(
-        # in #328 we noticed that the sample path is not handled correctly if it contains non-ASCII characters
-        # https://stackoverflow.com/a/22947334/ offers a solution and decoding using getfilesystemencoding works
-        # in our testing, however other sources suggest `sys.stdin.encoding` (https://stackoverflow.com/q/4012571/)
-        "sample",
-        type=lambda s: s.decode(sys.getfilesystemencoding()),
-        help="path to sample to analyze",
-    )
-    parser.add_argument("--version", action="version", version="%(prog)s {:s}".format(capa.version.__version__))
-    parser.add_argument(
-        "-r",
-        "--rules",
-        type=str,
-        default=RULES_PATH_DEFAULT_STRING,
-        help="path to rule file or directory, use embedded rules by default",
-    )
-    parser.add_argument(
-        "-f", "--format", choices=[f[0] for f in formats], default="auto", help="select sample format, %s" % format_help
-    )
-    parser.add_argument("-t", "--tag", type=str, help="filter on rule meta field values")
+    install_common_args(parser, {"sample", "format", "backend", "rules", "tag"})
    parser.add_argument("-j", "--json", action="store_true", help="emit JSON instead of text")
-    parser.add_argument(
-        "-v", "--verbose", action="store_true", help="enable verbose result document (no effect with --json)"
-    )
-    parser.add_argument(
-        "-vv", "--vverbose", action="store_true", help="enable very verbose result document (no effect with --json)"
-    )
-    parser.add_argument("-d", "--debug", action="store_true", help="enable debugging output on STDERR")
-    parser.add_argument("-q", "--quiet", action="store_true", help="disable all output but errors")
-    parser.add_argument(
-        "--color",
-        type=str,
-        choices=("auto", "always", "never"),
-        default="auto",
-        help="enable ANSI color codes in results, default: only during interactive session",
-    )
    args = parser.parse_args(args=argv)
-
-    if args.quiet:
-        logging.basicConfig(level=logging.WARNING)
-        logging.getLogger().setLevel(logging.WARNING)
-    elif args.debug:
-        logging.basicConfig(level=logging.DEBUG)
-        logging.getLogger().setLevel(logging.DEBUG)
-    else:
-        logging.basicConfig(level=logging.INFO)
-        logging.getLogger().setLevel(logging.INFO)
-
-    # disable vivisect-related logging, it's verbose and not relevant for capa users
-    set_vivisect_log_level(logging.CRITICAL)
+    handle_common_args(args)

    try:
        taste = get_file_taste(args.sample)
@@ -505,14 +647,6 @@ def main(argv=None):
        logger.error("%s", e.args[0])
        return -1

-    # py2 doesn't know about cp65001, which is a variant of utf-8 on windows
-    # tqdm bails when trying to render the progress bar in this setup.
-    # because cp65001 is utf-8, we just map that codepage to the utf-8 codec.
-    # see #380 and: https://stackoverflow.com/a/3259271/87207
-    import codecs
-
-    codecs.register(lambda name: codecs.lookup("utf-8") if name == "cp65001" else None)
-
    if args.rules == RULES_PATH_DEFAULT_STRING:
        logger.debug("-" * 80)
        logger.debug(" Using default embedded rules.")
@@ -550,7 +684,7 @@ def main(argv=None):
            # during the load of the RuleSet, we extract subscope statements into their own rules
            # that are subsequently `match`ed upon. this inflates the total rule count.
            # so, filter out the subscope rules when reporting total number of loaded rules.
-            len(filter(lambda r: "capa/subscope-rule" not in r.meta, rules.rules.values())),
+            len([i for i in filter(lambda r: "capa/subscope-rule" not in r.meta, rules.rules.values())]),
        )
        if args.tag:
            rules = rules.filter_rules_by_meta(args.tag)
@@ -569,7 +703,8 @@ def main(argv=None):
    else:
        format = args.format
        try:
-            extractor = get_extractor(args.sample, args.format, disable_progress=args.quiet)
+            backend = args.backend if sys.version_info > (3, 0) else BACKEND_VIV
+            extractor = get_extractor(args.sample, args.format, backend, disable_progress=args.quiet)
        except UnsupportedFormatError:
            logger.error("-" * 80)
            logger.error(" Input file does not appear to be a PE file.")
@@ -602,19 +737,6 @@ def main(argv=None):
        if not (args.verbose or args.vverbose or args.json):
            return -1

-    if args.color == "always":
-        colorama.init(strip=False)
-    elif args.color == "auto":
-        # colorama will detect:
-        #  - when on Windows console, and fixup coloring, and
-        #  - when not an interactive session, and disable coloring
-        # renderers should use coloring and assume it will be stripped out if necessary.
-        colorama.init()
-    elif args.color == "never":
-        colorama.init(strip=True)
-    else:
-        raise RuntimeError("unexpected --color value: " + args.color)
-
    if args.json:
        print(capa.render.render_json(meta, rules, capabilities))
    elif args.vverbose:
--- a/capa/render/vverbose.py
+++ b/capa/render/vverbose.py
@@ -56,7 +56,11 @@ def render_statement(ostream, match, statement, indent=0):
        child = statement["child"]

        if child[child["type"]]:
-            value = rutils.bold2(child[child["type"]])
+            if child["type"] == "string":
+                value = '"%s"' % capa.features.escape_string(child[child["type"]])
+            else:
+                value = child[child["type"]]
+            value = rutils.bold2(value)
            if child.get("description"):
                ostream.write("count(%s(%s = %s)): " % (child["type"], value, child["description"]))
            else:
@@ -90,6 +94,9 @@ def render_feature(ostream, match, feature, indent=0):
        key = "string"  # render string for regex to mirror the rule source
        value = feature["match"]  # the match provides more information than the value for regex

+    if key == "string":
+        value = '"%s"' % capa.features.escape_string(value)
+
    ostream.write(key)
    ostream.write(": ")

--- a/capa/rules.py
+++ b/capa/rules.py
@@ -6,6 +6,7 @@
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.

+import re
 import uuid
 import codecs
 import logging
@@ -600,6 +601,9 @@ class Rule(object):
        # use block mode, not inline json-like mode
        y.default_flow_style = False

+        # leave quotes unchanged
+        y.preserve_quotes = True
+
        # indent lists by two spaces below their parent
        #
        #     features:
@@ -614,16 +618,20 @@ class Rule(object):
        return y

    @classmethod
-    def from_yaml(cls, s):
+    def from_yaml(cls, s, use_ruamel=False):
+        if use_ruamel:
+            # ruamel enables nice formatting and doc roundtripping with comments
+            doc = cls._get_ruamel_yaml_parser().load(s)
+        else:
            # use pyyaml because it can be much faster than ruamel (pure python)
            doc = yaml.load(s, Loader=cls._get_yaml_loader())
        return cls.from_dict(doc, s)

    @classmethod
-    def from_yaml_file(cls, path):
+    def from_yaml_file(cls, path, use_ruamel=False):
        with open(path, "rb") as f:
            try:
-                return cls.from_yaml(f.read().decode("utf-8"))
+                return cls.from_yaml(f.read().decode("utf-8"), use_ruamel=use_ruamel)
            except InvalidRule as e:
                raise InvalidRuleWithPath(path, str(e))

@@ -716,7 +724,20 @@ class Rule(object):
        # tweaking `ruamel.indent()` doesn't quite give us the control we want.
        # so, add the two extra spaces that we've determined we need through experimentation.
        # see #263
-        doc = doc.replace("  description:", "    description:")
+        # only do this for the features section, so the meta description doesn't get reformatted
+        # assumes features section always exists
+        features_offset = doc.find("features")
+        doc = doc[:features_offset] + doc[features_offset:].replace("  description:", "    description:")
+
+        # for negative hex numbers, yaml dump outputs:
+        # - offset: !!int '0x-30'
+        # we prefer:
+        # - offset: -0x30
+        # the below regex makes these adjustments and while ugly, we don't have to explore the ruamel.yaml insides
+        doc = re.sub(r"!!int '0x-([0-9a-fA-F]+)'", r"-0x\1", doc)
+
+        # normalize CRLF to LF
+        doc = doc.replace("\r\n", "\n")
        return doc


@@ -866,7 +887,8 @@ class RuleSet(object):
        given a collection of rules, collect the rules that are needed at the given scope.
        these rules are ordered topologically.

-        don't include "lib" rules, unless they are dependencies of other rules.
+        don't include auto-generated "subscope" rules.
+        we want to include general "lib" rules here - even if they are not dependencies of other rules, see #398
        """
        scope_rules = set([])

@@ -875,7 +897,7 @@ class RuleSet(object):
        #  at lower scope, e.g. function scope.
        # so, we find all dependencies of all rules, and later will filter them down.
        for rule in rules:
-            if rule.meta.get("lib", False):
+            if rule.meta.get("capa/subscope-rule", False):
                continue

            scope_rules.update(get_rules_and_dependencies(rules, rule.name))
--- a/capa/version.py
+++ b/capa/version.py
@@ -1 +1 @@
-__version__ = "1.4.0"
+__version__ = "1.6.1"
--- a/doc/img/changelog/tab.gif
+++ b/doc/img/changelog/tab.gif
--- a/doc/img/explorer_condensed.png
+++ b/doc/img/explorer_condensed.png
--- a/doc/img/explorer_expanded.png
+++ b/doc/img/explorer_expanded.png
--- a/doc/img/ida_plugin_example_1.png
+++ b/doc/img/ida_plugin_example_1.png
--- a/doc/img/ida_plugin_example_2.png
+++ b/doc/img/ida_plugin_example_2.png
--- a/doc/img/ida_plugin_intro.gif
+++ b/doc/img/ida_plugin_intro.gif
--- a/doc/img/rulegen_expanded.png
+++ b/doc/img/rulegen_expanded.png
--- a/doc/installation.md
+++ b/doc/installation.md
@@ -74,8 +74,20 @@ Note that some development dependencies (including the black code formatter) req
 To check the code style, formatting and run the tests you can run the script `scripts/ci.sh`.
 You can run it with the argument `no_tests` to skip the tests and only run the code style and formatting: `scripts/ci.sh no_tests`

-### 3. Setup hooks [optional]
+### 3. Compile binary using PyInstaller
+We compile capa standalone binaries using PyInstaller. To reproduce the build process check out the source code as described above and follow these steps.

+#### Install PyInstaller:
+For Python 2.7: `$ pip install 'pyinstaller==3.*'` (PyInstaller 4 doesn't support Python 2.7)
+
+For Python 3: `$ pip install 'pyinstaller`
+
+#### Run Pyinstaller
+`$ pyinstaller .github/pyinstaller/pyinstaller.spec`
+
+You can find the compiled binary in the created directory `dist/`.
+
+### 4. Setup hooks [optional]
 If you plan to contribute to capa, you may want to setup the hooks.
 Run `scripts/setup-hooks.sh` to set the following hooks up:
 - The `pre-commit` hook runs checks before every `git commit`.
@@ -84,4 +96,3 @@ Run `scripts/setup-hooks.sh` to set the following hooks up:
 - The `pre-push` hook runs checks before every `git push`.
  It runs `scripts/ci.sh` aborting the push if there are code style or rule linter offenses or if the tests fail.
  This way you can ensure everything is alright before sending a pull request.
-
--- a/doc/release.md
+++ b/doc/release.md
@@ -0,0 +1,44 @@
+# Release checklist
+
+- [ ] Ensure all [milestoned issues/PRs](https://github.com/fireeye/capa/milestones) are addressed, or reassign to a new milestone.
+- [ ] Add the `dont merge` label to all PRs that are close to be ready to merge (or merge them if they are ready) in [capa](https://github.com/fireeye/capa/pulls) and [capa-rules](https://github.com/fireeye/capa-rules/pulls).
+- [ ] Ensure the [CI workflow succeeds in master](https://github.com/fireeye/capa/actions/workflows/tests.yml?query=branch%3Amaster).
+- [ ] Ensure that `python scripts/lint.py rules/ --thorough` succeeds (only `missing examples`  offenses are allowed in the nursery).
+- [ ] Review changes
+  - capa https://github.com/fireeye/capa/compare/\<last-release\>...master
+  - capa-rules https://github.com/fireeye/capa-rules/compare/\<last-release>\...master
+- [ ] Update [CHANGELOG.md](https://github.com/fireeye/capa/blob/master/CHANGELOG.md)
+  - Do not forget to add a nice introduction thanking contributors
+  - Remember that we need a major release if we introduce breaking changes
+  - Sections
+    - New Features
+    - New Rules
+    - Bug Fixes
+    - Changes
+    - Development
+    - Raw diffs
+  - Update `Raw diffs` links
+  - Create placeholder for `master (unreleased)` section
+    ```
+    ## master (unreleased)
+
+    ### New Features
+
+    ### New Rules
+
+    ### Bug Fixes
+
+    ### Changes
+
+    ### Development
+
+    ### Raw diffs
+    - [capa <release>...master](https://github.com/fireeye/capa/compare/<release>...master)
+    - [capa-rules <release>...master](https://github.com/fireeye/capa-rules/compare/<release>...master)
+    ```
+- [ ] Update [capa/version.py](https://github.com/fireeye/capa/blob/master/capa/version.py)
+- [ ] Create a PR with the updated [CHANGELOG.md](https://github.com/fireeye/capa/blob/master/CHANGELOG.md) and [capa/version.py](https://github.com/fireeye/capa/blob/master/capa/version.py). Copy this checklist in the PR description.
+- [ ] After PR review, merge the PR and [create the release in GH](https://github.com/fireeye/capa/releases/new) using text from the [CHANGELOG.md](https://github.com/fireeye/capa/blob/master/CHANGELOG.md).
+- [ ] Verify GH actions [upload artifacts](https://github.com/fireeye/capa/releases), [publish to PyPI](https://pypi.org/project/flare-capa) and [create a tag in capa rules](https://github.com/fireeye/capa-rules/tags) upon completion.
+- [ ] [Spread the word](https://twitter.com)
+
--- a/2
+++ b/2
--- a/scripts/bulk-process.py
+++ b/scripts/bulk-process.py
@@ -65,6 +65,7 @@ import multiprocessing.pool

 import capa
 import capa.main
+import capa.rules
 import capa.render

 logger = logging.getLogger("capa")
@@ -95,7 +96,7 @@ def get_capa_results(args):
    rules, format, path = args
    logger.info("computing capa results for: %s", path)
    try:
-        extractor = capa.main.get_extractor(path, format, disable_progress=True)
+        extractor = capa.main.get_extractor(path, format, capa.main.BACKEND_VIV, disable_progress=True)
    except capa.main.UnsupportedFormatError:
        # i'm 100% sure if multiprocessing will reliably raise exceptions across process boundaries.
        # so instead, return an object with explicit success/failure status.
@@ -139,42 +140,14 @@ def main(argv=None):
        argv = sys.argv[1:]

        parser = argparse.ArgumentParser(description="detect capabilities in programs.")
+        capa.main.install_common_args(parser, wanted={"rules"})
        parser.add_argument("input", type=str, help="Path to directory of files to recursively analyze")
-        parser.add_argument(
-            "-r",
-            "--rules",
-            type=str,
-            default="(embedded rules)",
-            help="Path to rule file or directory, use embedded rules by default",
-        )
-        parser.add_argument("-d", "--debug", action="store_true", help="Enable debugging output on STDERR")
-        parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
        parser.add_argument(
            "-n", "--parallelism", type=int, default=multiprocessing.cpu_count(), help="parallelism factor"
        )
        parser.add_argument("--no-mp", action="store_true", help="disable subprocesses")
        args = parser.parse_args(args=argv)
-
-        if args.quiet:
-            logging.basicConfig(level=logging.ERROR)
-            logging.getLogger().setLevel(logging.ERROR)
-        elif args.debug:
-            logging.basicConfig(level=logging.DEBUG)
-            logging.getLogger().setLevel(logging.DEBUG)
-        else:
-            logging.basicConfig(level=logging.INFO)
-            logging.getLogger().setLevel(logging.INFO)
-
-        # disable vivisect-related logging, it's verbose and not relevant for capa users
-        capa.main.set_vivisect_log_level(logging.CRITICAL)
-
-        # py2 doesn't know about cp65001, which is a variant of utf-8 on windows
-        # tqdm bails when trying to render the progress bar in this setup.
-        # because cp65001 is utf-8, we just map that codepage to the utf-8 codec.
-        # see #380 and: https://stackoverflow.com/a/3259271/87207
-        import codecs
-
-        codecs.register(lambda name: codecs.lookup("utf-8") if name == "cp65001" else None)
+        capa.main.handle_common_args(args)

        if args.rules == "(embedded rules)":
            logger.info("using default embedded rules")
--- a/scripts/capa_as_library.py
+++ b/scripts/capa_as_library.py
@@ -0,0 +1,215 @@
+#!/usr/bin/env python3
+
+import json
+import collections
+
+import capa.main
+import capa.rules
+import capa.engine
+import capa.render
+import capa.features
+import capa.render.utils as rutils
+from capa.engine import *
+from capa.render import convert_capabilities_to_result_document
+
+# edit this to set the path for file to analyze and rule directory
+RULES_PATH = "/tmp/capa/rules/"
+
+# load rules from disk
+rules = capa.main.get_rules(RULES_PATH, disable_progress=True)
+rules = capa.rules.RuleSet(rules)
+
+# == Render ddictionary helpers
+def render_meta(doc, ostream):
+    ostream["md5"] = doc["meta"]["sample"]["md5"]
+    ostream["sha1"] = doc["meta"]["sample"]["sha1"]
+    ostream["sha256"] = doc["meta"]["sample"]["sha256"]
+    ostream["path"] = doc["meta"]["sample"]["path"]
+
+
+def find_subrule_matches(doc):
+    """
+    collect the rule names that have been matched as a subrule match.
+    this way we can avoid displaying entries for things that are too specific.
+    """
+    matches = set([])
+
+    def rec(node):
+        if not node["success"]:
+            # there's probably a bug here for rules that do `not: match: ...`
+            # but we don't have any examples of this yet
+            return
+
+        elif node["node"]["type"] == "statement":
+            for child in node["children"]:
+                rec(child)
+
+        elif node["node"]["type"] == "feature":
+            if node["node"]["feature"]["type"] == "match":
+                matches.add(node["node"]["feature"]["match"])
+
+    for rule in rutils.capability_rules(doc):
+        for node in rule["matches"].values():
+            rec(node)
+
+    return matches
+
+
+def render_capabilities(doc, ostream):
+    """
+    example::
+        {'CAPABILITY': {'accept command line arguments': 'host-interaction/cli',
+                'allocate thread local storage (2 matches)': 'host-interaction/process',
+                'check for time delay via GetTickCount': 'anti-analysis/anti-debugging/debugger-detection',
+                'check if process is running under wine': 'anti-analysis/anti-emulation/wine',
+                'contain a resource (.rsrc) section': 'executable/pe/section/rsrc',
+                'write file (3 matches)': 'host-interaction/file-system/write'}
+        }
+    """
+    subrule_matches = find_subrule_matches(doc)
+
+    ostream["CAPABILITY"] = dict()
+    for rule in rutils.capability_rules(doc):
+        if rule["meta"]["name"] in subrule_matches:
+            # rules that are also matched by other rules should not get rendered by default.
+            # this cuts down on the amount of output while giving approx the same detail.
+            # see #224
+            continue
+
+        count = len(rule["matches"])
+        if count == 1:
+            capability = rule["meta"]["name"]
+        else:
+            capability = "%s (%d matches)" % (rule["meta"]["name"], count)
+
+        ostream["CAPABILITY"].setdefault(rule["meta"]["namespace"], list())
+        ostream["CAPABILITY"][rule["meta"]["namespace"]].append(capability)
+
+
+def render_attack(doc, ostream):
+    """
+    example::
+        {'ATT&CK': {'COLLECTION': ['Input Capture::Keylogging [T1056.001]'],
+            'DEFENSE EVASION': ['Obfuscated Files or Information [T1027]',
+                                'Virtualization/Sandbox Evasion::System Checks '
+                                '[T1497.001]'],
+            'DISCOVERY': ['File and Directory Discovery [T1083]',
+                          'Query Registry [T1012]',
+                          'System Information Discovery [T1082]'],
+            'EXECUTION': ['Shared Modules [T1129]']}
+        }
+    """
+    ostream["ATTCK"] = dict()
+    tactics = collections.defaultdict(set)
+    for rule in rutils.capability_rules(doc):
+        if not rule["meta"].get("att&ck"):
+            continue
+
+        for attack in rule["meta"]["att&ck"]:
+            tactic, _, rest = attack.partition("::")
+            if "::" in rest:
+                technique, _, rest = rest.partition("::")
+                subtechnique, _, id = rest.rpartition(" ")
+                tactics[tactic].add((technique, subtechnique, id))
+            else:
+                technique, _, id = rest.rpartition(" ")
+                tactics[tactic].add((technique, id))
+
+    for tactic, techniques in sorted(tactics.items()):
+        inner_rows = []
+        for spec in sorted(techniques):
+            if len(spec) == 2:
+                technique, id = spec
+                inner_rows.append("%s %s" % (technique, id))
+            elif len(spec) == 3:
+                technique, subtechnique, id = spec
+                inner_rows.append("%s::%s %s" % (technique, subtechnique, id))
+            else:
+                raise RuntimeError("unexpected ATT&CK spec format")
+        ostream["ATTCK"].setdefault(tactic.upper(), inner_rows)
+
+
+def render_mbc(doc, ostream):
+    """
+    example::
+        {'MBC': {'ANTI-BEHAVIORAL ANALYSIS': ['Debugger Detection::Timing/Delay Check '
+                                      'GetTickCount [B0001.032]',
+                                      'Emulator Detection [B0004]',
+                                      'Virtual Machine Detection::Instruction '
+                                      'Testing [B0009.029]',
+                                      'Virtual Machine Detection [B0009]'],
+         'COLLECTION': ['Keylogging::Polling [F0002.002]'],
+         'CRYPTOGRAPHY': ['Encrypt Data::RC4 [C0027.009]',
+                          'Generate Pseudo-random Sequence::RC4 PRGA '
+                          '[C0021.004]']}
+        }
+    """
+    ostream["MBC"] = dict()
+    objectives = collections.defaultdict(set)
+    for rule in rutils.capability_rules(doc):
+        if not rule["meta"].get("mbc"):
+            continue
+
+        mbcs = rule["meta"]["mbc"]
+        if not isinstance(mbcs, list):
+            raise ValueError("invalid rule: MBC mapping is not a list")
+
+        for mbc in mbcs:
+            objective, _, rest = mbc.partition("::")
+            if "::" in rest:
+                behavior, _, rest = rest.partition("::")
+                method, _, id = rest.rpartition(" ")
+                objectives[objective].add((behavior, method, id))
+            else:
+                behavior, _, id = rest.rpartition(" ")
+                objectives[objective].add((behavior, id))
+
+    for objective, behaviors in sorted(objectives.items()):
+        inner_rows = []
+        for spec in sorted(behaviors):
+            if len(spec) == 2:
+                behavior, id = spec
+                inner_rows.append("%s %s" % (behavior, id))
+            elif len(spec) == 3:
+                behavior, method, id = spec
+                inner_rows.append("%s::%s %s" % (behavior, method, id))
+            else:
+                raise RuntimeError("unexpected MBC spec format")
+        ostream["MBC"].setdefault(objective.upper(), inner_rows)
+
+
+def render_dictionary(doc):
+    ostream = dict()
+    render_meta(doc, ostream)
+    render_attack(doc, ostream)
+    render_mbc(doc, ostream)
+    render_capabilities(doc, ostream)
+
+    return ostream
+
+
+# ==== render dictionary helpers
+def capa_details(file_path, output_format="dictionary"):
+
+    # extract features and find capabilities
+    extractor = capa.main.get_extractor(file_path, "auto", capa.main.BACKEND_VIV, disable_progress=True)
+    capabilities, counts = capa.main.find_capabilities(rules, extractor, disable_progress=True)
+
+    # collect metadata (used only to make rendering more complete)
+    meta = capa.main.collect_metadata("", file_path, RULES_PATH, "auto", extractor)
+    meta["analysis"].update(counts)
+
+    capa_output = False
+    if output_format == "dictionary":
+        # ...as python dictionary, simplified as textable but in dictionary
+        doc = convert_capabilities_to_result_document(meta, rules, capabilities)
+        capa_output = render_dictionary(doc)
+    elif output_format == "json":
+        # render results
+        # ...as json
+        capa_output = json.loads(capa.render.render_json(meta, rules, capabilities))
+    elif output_format == "texttable":
+        # ...as human readable text table
+        capa_output = capa.render.render_default(meta, rules, capabilities)
+
+    return capa_output
--- a/scripts/capafmt.py
+++ b/scripts/capafmt.py
@@ -38,6 +38,12 @@ def main(argv=None):
    )
    parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging")
    parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
+    parser.add_argument(
+        "-c",
+        "--check",
+        action="store_true",
+        help="Don't output (reformatted) rule, only return status. 0 = no changes, 1 = would reformat",
+    )
    args = parser.parse_args(args=argv)

    if args.verbose:
@@ -50,12 +56,24 @@ def main(argv=None):
    logging.basicConfig(level=level)
    logging.getLogger("capafmt").setLevel(level)

-    rule = capa.rules.Rule.from_yaml_file(args.path)
+    rule = capa.rules.Rule.from_yaml_file(args.path, use_ruamel=True)
+    reformatted_rule = rule.to_yaml()
+
+    if args.check:
+        if rule.definition == reformatted_rule:
+            logger.info("rule is formatted correctly, nice! (%s)", rule.name)
+            return 0
+        else:
+            logger.info("rule requires reformatting (%s)", rule.name)
+            if "\r\n" in rule.definition:
+                logger.info("please make sure that the file uses LF (\\n) line endings only")
+            return 1
+
    if args.in_place:
        with open(args.path, "wb") as f:
-            f.write(rule.to_yaml().encode("utf-8"))
+            f.write(reformatted_rule.encode("utf-8"))
    else:
-        print(rule.to_yaml().rstrip("\n"))
+        print(reformatted_rule)

    return 0

--- a/scripts/import-to-ida.py
+++ b/scripts/import-to-ida.py
@@ -31,10 +31,8 @@ See the License for the specific language governing permissions and limitations
 import json
 import logging

-import idc
 import idautils
 import ida_funcs
-import ida_idaapi
 import ida_kernwin

 logger = logging.getLogger("capa")
--- a/scripts/lint.py
+++ b/scripts/lint.py
@@ -15,7 +15,9 @@ See the License for the specific language governing permissions and limitations
 """
 import os
 import sys
+import time
 import string
+import difflib
 import hashlib
 import logging
 import os.path
@@ -23,7 +25,10 @@ import argparse
 import itertools
 import posixpath

+import ruamel.yaml
+
 import capa.main
+import capa.rules
 import capa.engine
 import capa.features
 import capa.features.insn
@@ -32,7 +37,11 @@ logger = logging.getLogger("capa.lint")


 class Lint(object):
+    WARN = "WARN"
+    FAIL = "FAIL"
+
    name = "lint"
+    level = FAIL
    recommendation = ""

    def check_rule(self, ctx, rule):
@@ -194,7 +203,7 @@ class DoesntMatchExample(Lint):
                continue

            try:
-                extractor = capa.main.get_extractor(path, "auto")
+                extractor = capa.main.get_extractor(path, "auto", capa.main.BACKEND_VIV, disable_progress=True)
                capabilities, meta = capa.main.find_capabilities(ctx["rules"], extractor, disable_progress=True)
            except Exception as e:
                logger.error("failed to extract capabilities: %s %s %s", rule.name, path, e)
@@ -232,7 +241,7 @@ class LibRuleNotInLibDirectory(Lint):
        if "lib" not in rule.meta:
            return False

-        return "/lib/" not in get_normpath(rule.meta["capa/path"])
+        return "lib/" not in get_normpath(rule.meta["capa/path"])


 class LibRuleHasNamespace(Lint):
@@ -276,6 +285,91 @@ class FeatureNegativeNumber(Lint):
        return False


+class FeatureNtdllNtoskrnlApi(Lint):
+    name = "feature api may overlap with ntdll and ntoskrnl"
+    level = Lint.WARN
+    recommendation = (
+        "check if {:s} is exported by both ntdll and ntoskrnl; if true, consider removing {:s} "
+        "module requirement to improve detection"
+    )
+
+    def check_features(self, ctx, features):
+        for feature in features:
+            if isinstance(feature, capa.features.insn.API):
+                modname, _, impname = feature.value.rpartition(".")
+                if modname in ("ntdll", "ntoskrnl"):
+                    self.recommendation = self.recommendation.format(impname, modname)
+                    return True
+        return False
+
+
+class FormatLineFeedEOL(Lint):
+    name = "line(s) end with CRLF (\\r\\n)"
+    recommendation = "convert line endings to LF (\\n) for example using dos2unix"
+
+    def check_rule(self, ctx, rule):
+        if len(rule.definition.split("\r\n")) > 0:
+            return False
+        return True
+
+
+class FormatSingleEmptyLineEOF(Lint):
+    name = "EOF format"
+    recommendation = "end file with a single empty line"
+
+    def check_rule(self, ctx, rule):
+        if rule.definition.endswith("\n") and not rule.definition.endswith("\n\n"):
+            return False
+        return True
+
+
+class FormatIncorrect(Lint):
+    name = "rule format incorrect"
+    recommendation_template = "use scripts/capafmt.py or adjust as follows\n{:s}"
+
+    def check_rule(self, ctx, rule):
+        actual = rule.definition
+        expected = capa.rules.Rule.from_yaml(rule.definition, use_ruamel=True).to_yaml()
+
+        if actual != expected:
+            diff = difflib.ndiff(actual.splitlines(1), expected.splitlines(True))
+            recommendation_template = self.recommendation_template
+            if "\r\n" in actual:
+                recommendation_template = (
+                    self.recommendation_template + "\nplease make sure that the file uses LF (\\n) line endings only"
+                )
+            self.recommendation = recommendation_template.format("".join(diff))
+            return True
+
+        return False
+
+
+class FormatStringQuotesIncorrect(Lint):
+    name = "rule string quotes incorrect"
+
+    def check_rule(self, ctx, rule):
+        events = capa.rules.Rule._get_ruamel_yaml_parser().parse(rule.definition)
+        for key in events:
+            if not (isinstance(key, ruamel.yaml.ScalarEvent) and key.value == "string"):
+                continue
+            value = next(events)  # assume value is next event
+            if not isinstance(value, ruamel.yaml.ScalarEvent):
+                # ignore non-scalar
+                continue
+            if value.value.startswith("/") and value.value.endswith(("/", "/i")):
+                # ignore regex for now
+                continue
+            if value.style is None:
+                # no quotes
+                self.recommendation = 'add double quotes to "%s"' % value.value
+                return True
+            if value.style == "'":
+                # single quote
+                self.recommendation = 'change single quotes to double quotes for "%s"' % value.value
+                return True
+        return False
+
+
 def run_lints(lints, ctx, rule):
    for lint in lints:
        if lint.check_rule(ctx, rule):
@@ -325,14 +419,7 @@ def lint_meta(ctx, rule):
    return run_lints(META_LINTS, ctx, rule)


-FEATURE_LINTS = (
-    FeatureStringTooShort(),
-    FeatureNegativeNumber(),
-)
-
-
-def get_normpath(path):
-    return posixpath.normpath(path).replace(os.sep, "/")
+FEATURE_LINTS = (FeatureStringTooShort(), FeatureNegativeNumber(), FeatureNtdllNtoskrnlApi())


 def lint_features(ctx, rule):
@@ -340,6 +427,22 @@ def lint_features(ctx, rule):
    return run_feature_lints(FEATURE_LINTS, ctx, features)


+FORMAT_LINTS = (
+    FormatLineFeedEOL(),
+    FormatSingleEmptyLineEOF(),
+    FormatStringQuotesIncorrect(),
+    FormatIncorrect(),
+)
+
+
+def lint_format(ctx, rule):
+    return run_lints(FORMAT_LINTS, ctx, rule)
+
+
+def get_normpath(path):
+    return posixpath.normpath(path).replace(os.sep, "/")
+
+
 def get_features(ctx, rule):
    # get features from rule and all dependencies including subscopes and matched rules
    features = []
@@ -390,6 +493,7 @@ def lint_rule(ctx, rule):
            lint_meta(ctx, rule),
            lint_logic(ctx, rule),
            lint_features(ctx, rule),
+            lint_format(ctx, rule),
        )
    )

@@ -406,25 +510,28 @@ def lint_rule(ctx, rule):
            )
        )

-        level = "WARN" if is_nursery_rule(rule) else "FAIL"
-
        for violation in violations:
            print(
                "%s  %s: %s: %s"
                % (
                    "    " if is_nursery_rule(rule) else "",
-                    level,
+                    Lint.WARN if is_nursery_rule(rule) else violation.level,
                    violation.name,
                    violation.recommendation,
                )
            )

-    elif len(violations) == 0 and is_nursery_rule(rule):
+        print("")
+
+    lints_failed = any(map(lambda v: v.level == Lint.FAIL, violations))
+
+    if not lints_failed and is_nursery_rule(rule):
        print("")
        print("%s%s" % ("    (nursery) ", rule.name))
-        print("%s  %s: %s: %s" % ("    ", "WARN", "no violations", "Graduate the rule"))
+        print("%s  %s: %s: %s" % ("    ", Lint.WARN, "no lint failures", "Graduate the rule"))
+        print("")

-    return len(violations) > 0 and not is_nursery_rule(rule)
+    return lints_failed and not is_nursery_rule(rule)


 def lint(ctx, rules):
@@ -492,7 +599,8 @@ def main(argv=None):

    samples_path = os.path.join(os.path.dirname(__file__), "..", "tests", "data")

-    parser = argparse.ArgumentParser(description="A program.")
+    parser = argparse.ArgumentParser(description="Lint capa rules.")
+    capa.main.install_common_args(parser, wanted={"tag"})
    parser.add_argument("rules", type=str, help="Path to rules")
    parser.add_argument("--samples", type=str, default=samples_path, help="Path to samples")
    parser.add_argument(
@@ -500,31 +608,28 @@ def main(argv=None):
        action="store_true",
        help="Enable thorough linting - takes more time, but does a better job",
    )
-    parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging")
-    parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
    args = parser.parse_args(args=argv)
+    capa.main.handle_common_args(args)

-    if args.verbose:
-        level = logging.DEBUG
-    elif args.quiet:
-        level = logging.ERROR
+    if args.debug:
+        logging.getLogger("capa").setLevel(logging.DEBUG)
+        logging.getLogger("viv_utils").setLevel(logging.DEBUG)
    else:
-        level = logging.INFO
+        logging.getLogger("capa").setLevel(logging.ERROR)
+        logging.getLogger("viv_utils").setLevel(logging.ERROR)

-    logging.basicConfig(level=level)
-    logging.getLogger("capa.lint").setLevel(level)
-
-    capa.main.set_vivisect_log_level(logging.CRITICAL)
-    logging.getLogger("capa").setLevel(logging.CRITICAL)
+    time0 = time.time()

    try:
-        rules = capa.main.get_rules(args.rules)
+        rules = capa.main.get_rules(args.rules, disable_progress=True)
        rules = capa.rules.RuleSet(rules)
        logger.info("successfully loaded %s rules", len(rules))
-    except IOError as e:
-        logger.error("%s", str(e))
-        return -1
-    except capa.rules.InvalidRule as e:
+        if args.tag:
+            rules = rules.filter_rules_by_meta(args.tag)
+            logger.debug("selected %s rules", len(rules))
+            for i, r in enumerate(rules.rules, 1):
+                logger.debug(" %d. %s", i, r)
+    except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e:
        logger.error("%s", str(e))
        return -1

@@ -542,8 +647,12 @@ def main(argv=None):
    }

    did_violate = lint(ctx, rules)
+
+    min, sec = divmod(time.time() - time0, 60)
+    logger.debug("lints ran for ~ %02d:%02dm", min, sec)
+
    if not did_violate:
-        logger.info("no suggestions, nice!")
+        logger.info("no lints failed, nice!")
        return 0
    else:
        return 1
--- a/scripts/migrate-rules.py
+++ b/scripts/migrate-rules.py
@@ -1,167 +0,0 @@
-#!/usr/bin/env python
-"""
-migrate rules and their namespaces.
-
-example:
-
-    $ python scripts/migrate-rules.py migration.csv ./rules ./new-rules
-
-Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
-You may obtain a copy of the License at: [package root]/LICENSE.txt
-Unless required by applicable law or agreed to in writing, software distributed under the License
- is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and limitations under the License.
-"""
-import os
-import csv
-import sys
-import logging
-import os.path
-import argparse
-import collections
-
-import capa.rules
-
-logger = logging.getLogger("migrate-rules")
-
-
-def read_plan(plan_path):
-    with open(plan_path, "rb") as f:
-        return list(
-            csv.DictReader(
-                f,
-                restkey="other",
-                fieldnames=(
-                    "existing path",
-                    "existing name",
-                    "existing rule-category",
-                    "proposed name",
-                    "proposed namespace",
-                    "ATT&CK",
-                    "MBC",
-                    "comment1",
-                ),
-            )
-        )
-
-
-def read_rules(rule_directory):
-    rules = {}
-    for root, dirs, files in os.walk(rule_directory):
-        for file in files:
-            path = os.path.join(root, file)
-            if not path.endswith(".yml"):
-                logger.info("skipping file: %s", path)
-                continue
-
-            rule = capa.rules.Rule.from_yaml_file(path)
-            rules[rule.name] = rule
-
-            if "nursery" in path:
-                rule.meta["capa/nursery"] = True
-    return rules
-
-
-def main(argv=None):
-    if argv is None:
-        argv = sys.argv[1:]
-
-    parser = argparse.ArgumentParser(description="migrate rules.")
-    parser.add_argument("plan", type=str, help="Path to CSV describing migration")
-    parser.add_argument("source", type=str, help="Source directory of rules")
-    parser.add_argument("destination", type=str, help="Destination directory of rules")
-    args = parser.parse_args(args=argv)
-
-    logging.basicConfig(level=logging.INFO)
-    logging.getLogger().setLevel(logging.INFO)
-
-    plan = read_plan(args.plan)
-    logger.info("read %d plan entries", len(plan))
-
-    rules = read_rules(args.source)
-    logger.info("read %d rules", len(rules))
-
-    planned_rules = set([row["existing name"] for row in plan])
-    unplanned_rules = [rule for (name, rule) in rules.items() if name not in planned_rules]
-
-    if unplanned_rules:
-        logger.error("plan does not account for %d rules:" % (len(unplanned_rules)))
-        for rule in unplanned_rules:
-            logger.error("  " + rule.name)
-        return -1
-
-    # pairs of strings (needle, replacement)
-    match_translations = []
-
-    for row in plan:
-        if not row["existing name"]:
-            continue
-
-        rule = rules[row["existing name"]]
-
-        if rule.meta["name"] != row["proposed name"]:
-            logger.info("renaming rule '%s' -> '%s'", rule.meta["name"], row["proposed name"])
-
-            # assume the yaml is formatted like `- match: $rule-name`.
-            # but since its been linted, this should be ok.
-            match_translations.append(("- match: " + rule.meta["name"], "- match: " + row["proposed name"]))
-
-            rule.meta["name"] = row["proposed name"]
-            rule.name = row["proposed name"]
-
-        if "rule-category" in rule.meta:
-            logger.info("deleting rule category '%s'", rule.meta["rule-category"])
-            del rule.meta["rule-category"]
-
-        rule.meta["namespace"] = row["proposed namespace"]
-
-        if row["ATT&CK"] != "n/a" and row["ATT&CK"] != "":
-            tag = row["ATT&CK"]
-            name, _, id = tag.rpartition(" ")
-            tag = "%s [%s]" % (name, id)
-            rule.meta["att&ck"] = [tag]
-
-        if row["MBC"] != "n/a" and row["MBC"] != "":
-            tag = row["MBC"]
-            rule.meta["mbc"] = [tag]
-
-    for rule in rules.values():
-        filename = rule.name
-        filename = filename.lower()
-        filename = filename.replace(" ", "-")
-        filename = filename.replace("(", "")
-        filename = filename.replace(")", "")
-        filename = filename.replace("+", "")
-        filename = filename.replace("/", "")
-        filename = filename + ".yml"
-
-        try:
-            if rule.meta.get("capa/nursery"):
-                directory = os.path.join(args.destination, "nursery")
-            elif rule.meta.get("lib"):
-                directory = os.path.join(args.destination, "lib")
-            else:
-                directory = os.path.join(args.destination, rule.meta.get("namespace"))
-            os.makedirs(directory)
-        except OSError:
-            pass
-        else:
-            logger.info("created namespace: %s", directory)
-
-        path = os.path.join(directory, filename)
-        logger.info("writing rule %s", path)
-
-        doc = rule.to_yaml().decode("utf-8")
-        for (needle, replacement) in match_translations:
-            doc = doc.replace(needle, replacement)
-
-        with open(path, "wb") as f:
-            f.write(doc.encode("utf-8"))
-
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
--- a/scripts/show-capabilities-by-function.py
+++ b/scripts/show-capabilities-by-function.py
@@ -63,7 +63,6 @@ import capa.render
 import capa.features
 import capa.render.utils as rutils
 import capa.features.freeze
-import capa.features.extractors.viv
 from capa.helpers import get_file_taste

 logger = logging.getLogger("capa.show-capabilities-by-function")
@@ -111,48 +110,10 @@ def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]

-        formats = [
-            ("auto", "(default) detect file type automatically"),
-            ("pe", "Windows PE file"),
-            ("sc32", "32-bit shellcode"),
-            ("sc64", "64-bit shellcode"),
-            ("freeze", "features previously frozen by capa"),
-        ]
-        format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats])
-
    parser = argparse.ArgumentParser(description="detect capabilities in programs.")
-        parser.add_argument("sample", type=str, help="Path to sample to analyze")
-        parser.add_argument(
-            "-r",
-            "--rules",
-            type=str,
-            default="(embedded rules)",
-            help="Path to rule file or directory, use embedded rules by default",
-        )
-        parser.add_argument("-t", "--tag", type=str, help="Filter on rule meta field values")
-        parser.add_argument("-d", "--debug", action="store_true", help="Enable debugging output on STDERR")
-        parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
-        parser.add_argument(
-            "-f",
-            "--format",
-            choices=[f[0] for f in formats],
-            default="auto",
-            help="Select sample format, %s" % format_help,
-        )
+    capa.main.install_common_args(parser, wanted={"format", "sample", "rules", "tag"})
    args = parser.parse_args(args=argv)
-
-        if args.quiet:
-            logging.basicConfig(level=logging.ERROR)
-            logging.getLogger().setLevel(logging.ERROR)
-        elif args.debug:
-            logging.basicConfig(level=logging.DEBUG)
-            logging.getLogger().setLevel(logging.DEBUG)
-        else:
-            logging.basicConfig(level=logging.INFO)
-            logging.getLogger().setLevel(logging.INFO)
-
-        # disable vivisect-related logging, it's verbose and not relevant for capa users
-        capa.main.set_vivisect_log_level(logging.CRITICAL)
+    capa.main.handle_common_args(args)

    try:
        taste = get_file_taste(args.sample)
@@ -160,14 +121,6 @@ def main(argv=None):
        logger.error("%s", str(e))
        return -1

-        # py2 doesn't know about cp65001, which is a variant of utf-8 on windows
-        # tqdm bails when trying to render the progress bar in this setup.
-        # because cp65001 is utf-8, we just map that codepage to the utf-8 codec.
-        # see #380 and: https://stackoverflow.com/a/3259271/87207
-        import codecs
-
-        codecs.register(lambda name: codecs.lookup("utf-8") if name == "cp65001" else None)
-
    if args.rules == "(embedded rules)":
        logger.info("-" * 80)
        logger.info(" Using default embedded rules.")
@@ -208,9 +161,7 @@ def main(argv=None):
            logger.error(
                " capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64)."
            )
-                logger.error(
-                    " If you don't know the input file type, you can try using the `file` utility to guess it."
-                )
+            logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.")
            logger.error("-" * 80)
            return -1
        except capa.main.UnsupportedRuntimeError:
@@ -220,9 +171,7 @@ def main(argv=None):
            logger.error(" capa supports running under Python 2.7 using Vivisect for binary analysis.")
            logger.error(" It can also run within IDA Pro, using either Python 2.7 or 3.5+.")
            logger.error(" ")
-                logger.error(
-                    " If you're seeing this message on the command line, please ensure you're running Python 2.7."
-                )
+            logger.error(" If you're seeing this message on the command line, please ensure you're running Python 2.7.")
            logger.error("-" * 80)
            return -1

--- a/scripts/show-features.py
+++ b/scripts/show-features.py
@@ -71,41 +71,56 @@ import argparse
 import capa.main
 import capa.rules
 import capa.engine
+import capa.helpers
 import capa.features
 import capa.features.freeze
-import capa.features.extractors.viv
+
+logger = logging.getLogger("capa.show-features")


 def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]

-    formats = [
-        ("auto", "(default) detect file type automatically"),
-        ("pe", "Windows PE file"),
-        ("sc32", "32-bit shellcode"),
-        ("sc64", "64-bit shellcode"),
-        ("freeze", "features previously frozen by capa"),
-    ]
-    format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats])
-
    parser = argparse.ArgumentParser(description="Show the features that capa extracts from the given sample")
-    parser.add_argument("sample", type=str, help="Path to sample to analyze")
-    parser.add_argument(
-        "-f", "--format", choices=[f[0] for f in formats], default="auto", help="Select sample format, %s" % format_help
-    )
+    capa.main.install_common_args(parser, wanted={"format", "sample"})
+
    parser.add_argument("-F", "--function", type=lambda x: int(x, 0x10), help="Show features for specific function")
    args = parser.parse_args(args=argv)
+    capa.main.handle_common_args(args)

-    logging.basicConfig(level=logging.INFO)
-    logging.getLogger().setLevel(logging.INFO)
+    try:
+        taste = capa.helpers.get_file_taste(args.sample)
+    except IOError as e:
+        logger.error("%s", str(e))
+        return -1

-    if args.format == "freeze":
+    if (args.format == "freeze") or (args.format == "auto" and capa.features.freeze.is_freeze(taste)):
        with open(args.sample, "rb") as f:
            extractor = capa.features.freeze.load(f.read())
    else:
-        vw = capa.main.get_workspace(args.sample, args.format)
-        extractor = capa.features.extractors.viv.VivisectFeatureExtractor(vw, args.sample)
+        try:
+            extractor = capa.main.get_extractor(args.sample, args.format, capa.main.BACKEND_VIV)
+        except capa.main.UnsupportedFormatError:
+            logger.error("-" * 80)
+            logger.error(" Input file does not appear to be a PE file.")
+            logger.error(" ")
+            logger.error(
+                " capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64)."
+            )
+            logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.")
+            logger.error("-" * 80)
+            return -1
+        except capa.main.UnsupportedRuntimeError:
+            logger.error("-" * 80)
+            logger.error(" Unsupported runtime or Python interpreter.")
+            logger.error(" ")
+            logger.error(" capa supports running under Python 2.7 using Vivisect for binary analysis.")
+            logger.error(" It can also run within IDA Pro, using either Python 2.7 or 3.5+.")
+            logger.error(" ")
+            logger.error(" If you're seeing this message on the command line, please ensure you're running Python 2.7.")
+            logger.error("-" * 80)
+            return -1

    if not args.function:
        for feature, va in extractor.extract_file_features():
@@ -118,15 +133,13 @@ def main(argv=None):

    if args.function:
        if args.format == "freeze":
-            functions = filter(lambda f: f == args.function, functions)
+            functions = tuple(filter(lambda f: f == args.function, functions))
        else:
-            functions = filter(lambda f: f.va == args.function, functions)
+            functions = tuple(filter(lambda f: capa.helpers.oint(f) == args.function, functions))

-            if args.function not in [f.va for f in functions]:
-                print("0x%X not a function, creating it" % args.function)
-                vw.makeFunction(args.function)
-                functions = extractor.get_functions()
-                functions = filter(lambda f: f.va == args.function, functions)
+            if args.function not in [capa.helpers.oint(f) for f in functions]:
+                print("0x%X not a function" % args.function)
+                return -1

        if len(functions) == 0:
            print("0x%X not a function")
@@ -154,7 +167,7 @@ def ida_main():
    functions = extractor.get_functions()

    if function:
-        functions = filter(lambda f: f.start_ea == function, functions)
+        functions = tuple(filter(lambda f: f.start_ea == function, functions))

        if len(functions) == 0:
            print("0x%X not a function" % function)
--- a/scripts/vivisect-py2-vs-py3.sh
+++ b/scripts/vivisect-py2-vs-py3.sh
@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+
+int() {
+  int=$(bc <<< "scale=0; ($1 + 0.5)/1")
+}
+
+export TIMEFORMAT='%3R'
+threshold_time=90
+threshold_py3_time=60 # Do not warn if it doesn't take at least 1 minute to run
+rm tests/data/*.viv 2>/dev/null
+mkdir results
+for file in tests/data/*
+do
+  file=$(printf %q "$file") # Handle names with white spaces
+  file_name=$(basename $file)
+  echo $file_name
+
+  rm "$file.viv" 2>/dev/null
+  py3_time=$(sh -c "time python3 scripts/show-features.py $file >> results/p3-$file_name.out 2>/dev/null" 2>&1)
+  rm "$file.viv" 2>/dev/null
+  py2_time=$(sh -c "time python2 scripts/show-features.py $file >> results/p2-$file_name.out 2>/dev/null" 2>&1)
+
+  int $py3_time
+  if (($int > $threshold_py3_time))
+  then
+    percentage=$(bc <<< "scale=3; $py2_time/$py3_time*100 + 0.5")
+    int $percentage
+    if (($int < $threshold_py3_time))
+    then
+      echo -n "  SLOWER ($percentage): "
+    fi
+  fi
+  echo "  PY2($py2_time) PY3($py3_time)"
+done
+
+threshold_features=98
+counter=0
+average=0
+results_for() {
+  py3=$(cat "results/p3-$file_name.out" | grep "$1" | wc -l)
+  py2=$(cat "results/p2-$file_name.out" | grep "$1" | wc -l)
+  if (($py2 > 0))
+  then
+    percentage=$(bc <<< "scale=2; 100*$py3/$py2")
+    average=$(bc <<< "scale=2; $percentage + $average")
+    count=$(($count + 1))
+    int $percentage
+    if (($int < $threshold_features))
+    then
+      echo -e "$1: py2($py2) py3($py3) $percentage% - $file_name"
+    fi
+  fi
+}
+
+rm tests/data/*.viv 2>/dev/null
+echo -e '\nRESULTS:'
+for file in tests/data/*
+do
+  file_name=$(basename $file)
+  if test -f "results/p2-$file_name.out"; then
+    results_for 'insn'
+    results_for 'file'
+    results_for 'func'
+    results_for 'bb'
+  fi
+done
+
+average=$(bc <<< "scale=2; $average/$count")
+echo "TOTAL: $average"
--- a/setup.py
+++ b/setup.py
@@ -11,30 +11,33 @@ import sys

 import setuptools

-# halo==0.0.30 is the last version to support py2.7
 requirements = [
-    "six",
-    "tqdm",
-    "pyyaml",
-    "tabulate",
-    "colorama",
-    "termcolor",
-    "ruamel.yaml",
-    "wcwidth",
-    "halo==0.0.30",
+    "six==1.15.0",
+    "tqdm==4.60.0",
+    "pyyaml==5.4.1",
+    "tabulate==0.8.9",
+    "colorama==0.4.4",
+    "termcolor==1.1.0",
+    "wcwidth==0.2.5",
    "ida-settings==2.1.0",
+    "viv-utils==0.6.0",
 ]

 if sys.version_info >= (3, 0):
    # py3
-    requirements.append("networkx")
+    requirements.append("halo==0.0.31")
+    requirements.append("networkx==2.5.1")
+    requirements.append("ruamel.yaml==0.17.0")
+    requirements.append("vivisect==1.0.1")
+    requirements.append("smda==1.5.13")
 else:
    # py2
    requirements.append("enum34==1.1.6")  # v1.1.6 is needed by halo 0.0.30 / spinners 0.0.24
-    requirements.append("vivisect==0.1.0")
-    requirements.append("viv-utils")
+    requirements.append("halo==0.0.30")  # halo==0.0.30 is the last version to support py2.7
+    requirements.append("vivisect==0.2.1")
    requirements.append("networkx==2.2")  # v2.2 is last version supported by Python 2.7
-    requirements.append("backports.functools-lru-cache")
+    requirements.append("ruamel.yaml==0.16.13")  # last version tested with Python 2.7
+    requirements.append("backports.functools-lru-cache==1.6.1")

 # this sets __version__
 # via: http://stackoverflow.com/a/7071358/87207
@@ -74,13 +77,13 @@ setuptools.setup(
    install_requires=requirements,
    extras_require={
        "dev": [
-            "pytest",
-            "pytest-sugar",
-            "pytest-instafail",
-            "pytest-cov",
-            "pycodestyle",
-            "black ; python_version>'3.0'",
-            "isort",
+            "pytest==4.6.11",  # TODO: Change to 6.2.3 when removing py2
+            "pytest-sugar==0.9.4",
+            "pytest-instafail==0.4.2",
+            "pytest-cov==2.11.1",
+            "pycodestyle==2.7.0",
+            "black==20.8b1 ; python_version>'3.0'",
+            "isort==4.3.21",  # TODO: Change to 5.8.0 when removing py2
        ]
    },
    zip_safe=False,
--- a/tests/data
+++ b/tests/data
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -10,6 +10,7 @@
 import os
 import sys
 import os.path
+import binascii
 import contextlib
 import collections

@@ -78,7 +79,33 @@ def get_viv_extractor(path):
        vw = capa.main.get_workspace(path, "sc64", should_save=False)
    else:
        vw = capa.main.get_workspace(path, "auto", should_save=True)
-    return capa.features.extractors.viv.VivisectFeatureExtractor(vw, path)
+    extractor = capa.features.extractors.viv.VivisectFeatureExtractor(vw, path)
+    fixup_viv(path, extractor)
+    return extractor
+
+
+def fixup_viv(path, extractor):
+    """
+    vivisect fixups to overcome differences between backends
+    """
+    if "3b13b" in path:
+        # vivisect only recognizes calling thunk function at 0x10001573
+        extractor.vw.makeFunction(0x10006860)
+
+
+@lru_cache()
+def get_smda_extractor(path):
+    from smda.SmdaConfig import SmdaConfig
+    from smda.Disassembler import Disassembler
+
+    import capa.features.extractors.smda
+
+    config = SmdaConfig()
+    config.STORE_BUFFER = True
+    disasm = Disassembler(config)
+    report = disasm.disassembleFile(path)
+
+    return capa.features.extractors.smda.SmdaFeatureExtractor(report, path)


@lru_cache()
@@ -129,6 +156,8 @@ def get_data_path_by_name(name):
        return os.path.join(CD, "data", "Practical Malware Analysis Lab 21-01.exe_")
    elif name == "al-khaser x86":
        return os.path.join(CD, "data", "al-khaser_x86.exe_")
+    elif name == "al-khaser x64":
+        return os.path.join(CD, "data", "al-khaser_x64.exe_")
    elif name.startswith("39c05"):
        return os.path.join(CD, "data", "39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.dll_")
    elif name.startswith("499c2"):
@@ -149,8 +178,12 @@ def get_data_path_by_name(name):
        return os.path.join(CD, "data", "82BF6347ACF15E5D883715DC289D8A2B.exe_")
    elif name.startswith("pingtaest"):
        return os.path.join(CD, "data", "ping_täst.exe_")
+    elif name.startswith("77329"):
+        return os.path.join(CD, "data", "773290480d5445f11d3dc1b800728966.exe_")
+    elif name.startswith("3b13b"):
+        return os.path.join(CD, "data", "3b13b6f1d7cd14dc4a097a12e2e505c0a4cff495262261e2bfc991df238b9b04.dll_")
    else:
-        raise ValueError("unexpected sample fixture")
+        raise ValueError("unexpected sample fixture: %s" % name)


 def get_sample_md5_by_name(name):
@@ -169,6 +202,8 @@ def get_sample_md5_by_name(name):
        return "c8403fb05244e23a7931c766409b5e22"
    elif name == "al-khaser x86":
        return "db648cd247281954344f1d810c6fd590"
+    elif name == "al-khaser x64":
+        return "3cb21ae76ff3da4b7e02d77ff76e82be"
    elif name.startswith("39c05"):
        return "b7841b9d5dc1f511a93cc7576672ec0c"
    elif name.startswith("499c2"):
@@ -187,8 +222,13 @@ def get_sample_md5_by_name(name):
        return "64d9f7d96b99467f36e22fada623c3bb"
    elif name.startswith("82bf6"):
        return "82bf6347acf15e5d883715dc289d8a2b"
+    elif name.startswith("77329"):
+        return "773290480d5445f11d3dc1b800728966"
+    elif name.startswith("3b13b"):
+        # file name is SHA256 hash
+        return "56a6ffe6a02941028cc8235204eef31d"
    else:
-        raise ValueError("unexpected sample fixture")
+        raise ValueError("unexpected sample fixture: %s" % name)


 def resolve_sample(sample):
@@ -377,7 +417,7 @@ FEATURE_PRESENCE_TESTS = [
    ),
    ("kernel32-64", "function=0x1800202B0", capa.features.insn.API("RtlCaptureContext"), True),
    # insn/api: x64 nested thunk
-    ("82bf6", "function=0x140059342", capa.features.insn.API("ElfClearEventLogFile"), True),
+    ("al-khaser x64", "function=0x14004B4F0", capa.features.insn.API("__vcrt_GetModuleHandle"), True),
    # insn/api: call via jmp
    ("mimikatz", "function=0x40B3C6", capa.features.insn.API("LocalFree"), True),
    ("c91887...", "function=0x40156F", capa.features.insn.API("CloseClipboard"), True),
@@ -392,16 +432,21 @@ FEATURE_PRESENCE_TESTS = [
    ("mimikatz", "function=0x40105D", capa.features.String("SCardTransmit"), True),
    ("mimikatz", "function=0x40105D", capa.features.String("ACR  > "), True),
    ("mimikatz", "function=0x40105D", capa.features.String("nope"), False),
+    ("773290...", "function=0x140001140", capa.features.String(r"%s:\\OfficePackagesForWDAG"), True),
    # insn/regex, issue #262
    ("pma16-01", "function=0x4021B0", capa.features.Regex("HTTP/1.0"), True),
    ("pma16-01", "function=0x4021B0", capa.features.Regex("www.practicalmalwareanalysis.com"), False),
    # insn/string, pointer to string
    ("mimikatz", "function=0x44EDEF", capa.features.String("INPUTEVENT"), True),
+    # insn/string, direct memory reference
+    ("mimikatz", "function=0x46D6CE", capa.features.String("(null)"), True),
    # insn/bytes
    ("mimikatz", "function=0x40105D", capa.features.Bytes("SCardControl".encode("utf-16le")), True),
    ("mimikatz", "function=0x40105D", capa.features.Bytes("SCardTransmit".encode("utf-16le")), True),
    ("mimikatz", "function=0x40105D", capa.features.Bytes("ACR  > ".encode("utf-16le")), True),
    ("mimikatz", "function=0x40105D", capa.features.Bytes("nope".encode("ascii")), False),
+    # IDA features included byte sequences read from invalid memory, fixed in #409
+    ("mimikatz", "function=0x44570F", capa.features.Bytes(binascii.unhexlify("FF" * 256)), False),
    # insn/bytes, pointer to bytes
    ("mimikatz", "function=0x44EDEF", capa.features.Bytes("INPUTEVENT".encode("utf-16le")), True),
    # insn/characteristic(nzxor)
@@ -409,6 +454,9 @@ FEATURE_PRESENCE_TESTS = [
    ("mimikatz", "function=0x40105D", capa.features.Characteristic("nzxor"), False),
    # insn/characteristic(nzxor): no security cookies
    ("mimikatz", "function=0x46D534", capa.features.Characteristic("nzxor"), False),
+    # insn/characteristic(nzxor): xorps
+    # viv needs fixup to recognize function, see above
+    ("3b13b...", "function=0x10006860", capa.features.Characteristic("nzxor"), True),
    # insn/characteristic(peb access)
    ("kernel32-64", "function=0x1800017D0", capa.features.Characteristic("peb access"), True),
    ("mimikatz", "function=0x4556E5", capa.features.Characteristic("peb access"), False),
@@ -472,11 +520,7 @@ def do_test_feature_count(get_extractor, sample, scope, feature, expected):


 def get_extractor(path):
-    if sys.version_info >= (3, 0):
-        raise RuntimeError("no supported py3 backends yet")
-    else:
    extractor = get_viv_extractor(path)
-
    # overload the extractor so that the fixture exposes `extractor.path`
    setattr(extractor, "path", path)
    return extractor
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -7,6 +7,7 @@
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
 import sys
+import json
 import textwrap

 import pytest
@@ -19,7 +20,6 @@ import capa.features
 from capa.engine import *


-@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
 def test_main(z9324d_extractor):
    # tests rules can be loaded successfully and all output modes
    path = z9324d_extractor.path
@@ -29,7 +29,6 @@ def test_main(z9324d_extractor):
    assert capa.main.main([path]) == 0


-@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
 def test_main_single_rule(z9324d_extractor, tmpdir):
    # tests a single rule can be loaded successfully
    RULE_CONTENT = textwrap.dedent(
@@ -58,7 +57,6 @@ def test_main_single_rule(z9324d_extractor, tmpdir):
    )


-@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
 def test_main_non_ascii_filename(pingtaest_extractor, tmpdir, capsys):
    # on py2.7, need to be careful about str (which can hold bytes)
    #  vs unicode (which is only unicode characters).
@@ -71,18 +69,22 @@ def test_main_non_ascii_filename(pingtaest_extractor, tmpdir, capsys):
    std = capsys.readouterr()
    # but here, we have to use a unicode instance,
    # because capsys has decoded the output for us.
+    if sys.version_info >= (3, 0):
+        assert pingtaest_extractor.path in std.out
+    else:
        assert pingtaest_extractor.path.decode("utf-8") in std.out


-@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
 def test_main_non_ascii_filename_nonexistent(tmpdir, caplog):
    NON_ASCII_FILENAME = "täst_not_there.exe"
    assert capa.main.main(["-q", NON_ASCII_FILENAME]) == -1

+    if sys.version_info >= (3, 0):
+        assert NON_ASCII_FILENAME in caplog.text
+    else:
        assert NON_ASCII_FILENAME.decode("utf-8") in caplog.text


-@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
 def test_main_shellcode(z499c2_extractor):
    path = z499c2_extractor.path
    assert capa.main.main([path, "-vv", "-f", "sc32"]) == 0
@@ -137,7 +139,6 @@ def test_ruleset():
    assert len(rules.basic_block_rules) == 1


-@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
 def test_match_across_scopes_file_function(z9324d_extractor):
    rules = capa.rules.RuleSet(
        [
@@ -201,7 +202,6 @@ def test_match_across_scopes_file_function(z9324d_extractor):
    assert ".text section and install service" in capabilities


-@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
 def test_match_across_scopes(z9324d_extractor):
    rules = capa.rules.RuleSet(
        [
@@ -264,7 +264,6 @@ def test_match_across_scopes(z9324d_extractor):
    assert "kill thread program" in capabilities


-@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
 def test_subscope_bb_rules(z9324d_extractor):
    rules = capa.rules.RuleSet(
        [
@@ -289,7 +288,6 @@ def test_subscope_bb_rules(z9324d_extractor):
    assert "test rule" in capabilities


-@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
 def test_byte_matching(z9324d_extractor):
    rules = capa.rules.RuleSet(
        [
@@ -312,7 +310,6 @@ def test_byte_matching(z9324d_extractor):
    assert "byte match test" in capabilities


-@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
 def test_count_bb(z9324d_extractor):
    rules = capa.rules.RuleSet(
        [
@@ -336,7 +333,6 @@ def test_count_bb(z9324d_extractor):
    assert "count bb" in capabilities


-@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
 def test_fix262(pma16_01_extractor, capsys):
    # tests rules can be loaded successfully and all output modes
    path = pma16_01_extractor.path
@@ -347,7 +343,6 @@ def test_fix262(pma16_01_extractor, capsys):
    assert "www.practicalmalwareanalysis.com" not in std.out


-@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
 def test_not_render_rules_also_matched(z9324d_extractor, capsys):
    # rules that are also matched by other rules should not get rendered by default.
    # this cuts down on the amount of output while giving approx the same detail.
@@ -371,3 +366,20 @@ def test_not_render_rules_also_matched(z9324d_extractor, capsys):
    assert "act as TCP client" in std.out
    assert "connect TCP socket" in std.out
    assert "create TCP socket" in std.out
+
+
+# It tests main works with different backends
+def test_backend_option(capsys):
+    if sys.version_info > (3, 0):
+        path = get_data_path_by_name("pma16-01")
+        assert capa.main.main([path, "-j", "-b", capa.main.BACKEND_VIV]) == 0
+        std = capsys.readouterr()
+        std_json = json.loads(std.out)
+        assert std_json["meta"]["analysis"]["extractor"] == "VivisectFeatureExtractor"
+        assert len(std_json["rules"]) > 0
+
+        assert capa.main.main([path, "-j", "-b", capa.main.BACKEND_SMDA]) == 0
+        std = capsys.readouterr()
+        std_json = json.loads(std.out)
+        assert std_json["meta"]["analysis"]["extractor"] == "SmdaFeatureExtractor"
+        assert len(std_json["rules"]) > 0
--- a/tests/test_rules.py
+++ b/tests/test_rules.py
@@ -282,7 +282,8 @@ def test_lib_rules():
            ),
        ]
    )
-    assert len(rules.function_rules) == 1
+    # lib rules are added to the rule set
+    assert len(rules.function_rules) == 2


 def test_subscope_rules():
@@ -680,6 +681,25 @@ def test_explicit_string_values_int():
    assert (String("0x123") in children) == True


+def test_string_values_special_characters():
+    rule = textwrap.dedent(
+        """
+        rule:
+            meta:
+                name: test rule
+            features:
+                - or:
+                    - string: "hello\\r\\nworld"
+                    - string: "bye\\nbye"
+                      description: "test description"
+        """
+    )
+    r = capa.rules.Rule.from_yaml(rule)
+    children = list(r.statement.get_children())
+    assert (String("hello\r\nworld") in children) == True
+    assert (String("bye\nbye") in children) == True
+
+
 def test_regex_values_always_string():
    rules = [
        capa.rules.Rule.from_yaml(
--- a/tests/test_smda_features.py
+++ b/tests/test_smda_features.py
@@ -0,0 +1,31 @@
+# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+import sys
+
+from fixtures import *
+
+
+@parametrize(
+    "sample,scope,feature,expected",
+    FEATURE_PRESENCE_TESTS,
+    indirect=["sample", "scope"],
+)
+@pytest.mark.xfail(sys.version_info < (3, 0), reason="SMDA only works on py3")
+@pytest.mark.xfail(sys.platform == "win32", reason="SMDA bug: https://github.com/danielplohmann/smda/issues/20")
+def test_smda_features(sample, scope, feature, expected):
+    do_test_feature_presence(get_smda_extractor, sample, scope, feature, expected)
+
+
+@parametrize(
+    "sample,scope,feature,expected",
+    FEATURE_COUNT_TESTS,
+    indirect=["sample", "scope"],
+)
+def test_smda_feature_counts(sample, scope, feature, expected):
+    with xfail(sys.version_info < (3, 0), reason="SMDA only works on py3"):
+        do_test_feature_count(get_smda_extractor, sample, scope, feature, expected)
--- a/tests/test_viv_features.py
+++ b/tests/test_viv_features.py
@@ -16,7 +16,6 @@ from fixtures import *
    indirect=["sample", "scope"],
 )
 def test_viv_features(sample, scope, feature, expected):
-    with xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2"):
    do_test_feature_presence(get_viv_extractor, sample, scope, feature, expected)


@@ -26,5 +25,4 @@ def test_viv_features(sample, scope, feature, expected):
    indirect=["sample", "scope"],
 )
 def test_viv_feature_counts(sample, scope, feature, expected):
-    with xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2"):
    do_test_feature_count(get_viv_extractor, sample, scope, feature, expected)