extractors: Do not use generate_api_features

`generate_api_features` was merged with the implementation of `generate_import_features` and replaced by `generate_symbol`: 2b2656c2a3 Use the new function in the miasm backend implementation.
extractors: add required loc_db
2025-12-08 22:00:36 -08:00 · 2021-02-05 15:41:13 +01:00 · 2021-02-05 15:41:04 +01:00 · 2021-02-03 15:07:31 +01:00 · 2021-02-03 15:07:31 +01:00 · 2021-02-03 12:50:56 +01:00
85 changed files with 6313 additions and 2627 deletions
--- a/.github/capa-explorer-logo.png
+++ b/.github/capa-explorer-logo.png
--- a/.github/capa-ida.jpg
+++ b/.github/capa-ida.jpg
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -0,0 +1,6 @@
+version: 2
+updates:
+  - package-ecosystem: "pip"
+    directory: "/"
+    schedule:
+      interval: "weekly"
--- a/.github/pyinstaller/pyinstaller.spec
+++ b/.github/pyinstaller/pyinstaller.spec
@@ -44,7 +44,6 @@ a = Analysis(
    hiddenimports=[
        # vivisect does manual/runtime importing of its modules,
        # so declare the things that could be imported here.
-        "pycparser",
        "vivisect",
        "vivisect.analysis",
        "vivisect.analysis.amd64",
@@ -92,11 +91,13 @@ a = Analysis(
        "vivisect.impapi.windows",
        "vivisect.impapi.windows.amd64",
        "vivisect.impapi.windows.i386",
+        "vivisect.impapi.winkern.i386",
+        "vivisect.impapi.winkern.amd64",
        "vivisect.parsers.blob",
        "vivisect.parsers.elf",
        "vivisect.parsers.ihex",
        "vivisect.parsers.macho",
-        "vivisect.parsers.parse_pe",
+        "vivisect.parsers.pe",
        "vivisect.parsers.utils",
        "vivisect.storage",
        "vivisect.storage.basicfile",
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -2,7 +2,7 @@ name: build

 on:
  release:
-    types: [created, edited]
+    types: [edited, published]

 jobs:
  build:
@@ -14,38 +14,69 @@ jobs:
          - os: ubuntu-16.04
            # use old linux so that the shared library versioning is more portable
            artifact_name: capa
-            asset_name: capa-linux
+            asset_name: linux
          - os: windows-latest
            artifact_name: capa.exe
-            asset_name: capa-windows.exe
+            asset_name: windows
          - os: macos-latest
            artifact_name: capa
-            asset_name: capa-macos
+            asset_name: macos
    steps:
-    - name: Checkout capa
-      uses: actions/checkout@v2
-      with:
-        submodules: true
-    - name: Set up Python 2.7
-      uses: actions/setup-python@v2
-      with:
-        python-version: 2.7
-    - name: Install PyInstaller
-      run: pip install pyinstaller
-    - name: Install capa
-      run: pip install -e .
-    - name: Build standalone executable
-      run: pyinstaller .github/pyinstaller/pyinstaller.spec
-    - name: Does it run?
-      run: dist/capa "tests/data/Practical Malware Analysis Lab 01-01.dll_"
-    - uses: actions/upload-artifact@v2
-      with:
-        name: ${{ matrix.asset_name }}
-        path: dist/${{ matrix.artifact_name }}
-    - name: Upload binaries to GH Release
-      uses: svenstaro/upload-release-action@v2
-      with:
-        repo_token: ${{ secrets.CAPA_TOKEN }}
-        file: dist/${{ matrix.artifact_name }}
-        asset_name: ${{ matrix.asset_name }}
-        tag: ${{ github.ref }}
+      - name: Checkout capa
+        uses: actions/checkout@v2
+        with:
+          submodules: true
+      - name: Set up Python 2.7
+        uses: actions/setup-python@v2
+        with:
+          python-version: 2.7
+      - if: matrix.os == 'ubuntu-latest'
+        run: sudo apt-get install -y libyaml-dev
+      - if: matrix.os == 'windows-latest'
+        run: |
+          choco install vcredist2008
+          choco install --ignore-dependencies vcpython27
+      - name: Install PyInstaller
+        # pyinstaller 4 doesn't support Python 2.7
+        run: pip install 'pyinstaller==3.*'
+      - name: Install capa
+        run: pip install -e .
+      - name: Build standalone executable
+        run: pyinstaller .github/pyinstaller/pyinstaller.spec
+      - name: Does it run?
+        run: dist/capa "tests/data/Practical Malware Analysis Lab 01-01.dll_"
+      - uses: actions/upload-artifact@v2
+        with:
+          name: ${{ matrix.asset_name }}
+          path: dist/${{ matrix.artifact_name }}
+
+  zip:
+    name: zip ${{ matrix.asset_name }}
+    runs-on: ubuntu-latest
+    needs: build
+    strategy:
+      matrix:
+        include:
+          - asset_name: linux
+            artifact_name: capa
+          - asset_name: windows
+            artifact_name: capa.exe
+          - asset_name: macos
+            artifact_name: capa
+    steps:
+      - name: Download ${{ matrix.asset_name }}
+        uses: actions/download-artifact@v2
+        with:
+          name: ${{ matrix.asset_name }}
+      - name: Set executable flag
+        run: chmod +x ${{ matrix.artifact_name }}
+      - name: Set zip name
+        run: echo "zip_name=capa-${GITHUB_REF#refs/tags/}-${{ matrix.asset_name }}.zip" >> $GITHUB_ENV
+      - name: Zip ${{ matrix.artifact_name }} into ${{ env.zip_name }}
+        run: zip ${{ env.zip_name }} ${{ matrix.artifact_name }}
+      - name: Upload ${{ env.zip_name }} to GH Release
+        uses: svenstaro/upload-release-action@v2
+        with:
+          repo_token: ${{ secrets.GITHUB_TOKEN}}
+          file: ${{ env.zip_name }}
+          tag: ${{ github.ref }}
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -0,0 +1,29 @@
+# This workflows will upload a Python Package using Twine when a release is created
+# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
+
+name: publish to pypi
+
+on:
+  release:
+    types: [published]
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: '2.7'
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install setuptools wheel twine
+      - name: Build and publish
+        env:
+          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+        run: |
+          python setup.py sdist bdist_wheel
+          twine upload --skip-existing dist/*
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -41,17 +41,28 @@ jobs:
      run: python scripts/lint.py rules/

  tests:
+    name: Tests in ${{ matrix.python }}
    runs-on: ubuntu-latest
    needs: [code_style, rule_linter]
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - python: 2.7
+          - python: 3.7
+          - python: 3.8
+          - python: 3.9.1
    steps:
    - name: Checkout capa with submodules
      uses: actions/checkout@v2
      with:
        submodules: true
-    - name: Set up Python 2.7
+    - name: Set up Python ${{ matrix.python }}
      uses: actions/setup-python@v2
      with:
-        python-version: 2.7
+        python-version: ${{ matrix.python }}
+    - name: Install pyyaml
+      run: sudo apt-get install -y libyaml-dev
    - name: Install capa
      run: pip install -e .[dev]
    - name: Run tests
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,6 @@
 [submodule "rules"]
 	path = rules
-	url = git@github.com:fireeye/capa-rules.git
+	url = ../capa-rules.git
 [submodule "tests/data"]
 	path = tests/data
-	url = git@github.com:fireeye/capa-testfiles.git
+	url = ../capa-testfiles.git
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,303 @@
 # Change Log

+## v1.4.1 (2020-10-23)
+
+This release fixes an issue building capa on our CI server, which prevented us from building standalone binaries for v1.4.1.
+
+### Bug Fixes
+
+  - install VC dependencies for Python 2.7 during Windows build
+  
+### Raw diffs
+
+  - [capa v1.4.0...v1.4.1](https://github.com/fireeye/capa/compare/v1.4.0...v1.4.1)
+  - [capa-rules v1.4.0...v1.4.1](https://github.com/fireeye/capa-rules/compare/v1.4.0...v1.4.1)  
+
+## v1.4.0 (2020-10-23)
+
+This capa release includes changes to the rule parsing, enhanced feature extraction, various bug fixes, and improved capa scripts. Everyone should benefit from the improved functionality and performance. The community helped to add 69 new rules. We appreciate everyone who opened issues, provided feedback, and contributed code and rules. A special shout out to the following new project contributors:
+
+  - @mwilliams31
+  - @yt0ng
+
+@dzbeck added [Malware Behavior Catalog](https://github.com/MBCProject/mbc-markdown) (MBC) and ATT&CK mappings for 86 rules.
+
+Download a standalone binary below and checkout the readme [here on GitHub](https://github.com/fireeye/capa/). Report issues on our [issue tracker](https://github.com/fireeye/capa/issues) and contribute new rules at [capa-rules](https://github.com/fireeye/capa-rules/).
+
+### New features
+
+  - script that demonstrates bulk processing @williballenthin #307
+  - main: render MBC table @mr-tz #332
+  - ida backend: improve detection of APIs called via two or more chained thunks @mike-hunhoff #340
+  - viv backend: improve detection of APIs called via two or more chained thunks @mr-tz #341
+  - features: extract APIs called via jmp instruction @mr-tz #337
+
+### New rules
+
+  - clear the Windows event log @mike-hunhoff
+  - crash the Windows event logging service @mike-hunhoff
+  - packed with kkrunchy @re-fox
+  - packed with nspack @re-fox
+  - packed with pebundle @re-fox
+  - packed with pelocknt @re-fox
+  - packed with peshield @re-fox
+  - packed with petite @re-fox
+  - packed with rlpack @re-fox
+  - packed with upack @re-fox
+  - packed with y0da crypter @re-fox
+  - compiled with rust @re-fox
+  - compute adler32 checksum @mwilliams31
+  - encrypt-data-using-hc-128 @recvfrom
+  - manipulate console @williballenthin
+  - references logon banner @re-fox
+  - terminate process via fastfail @re-fox
+  - delete volume shadow copies @mr-tz
+  - authenticate HMAC @mr-tz
+  - compiled from EPL @williballenthin
+  - compiled with Go @williballenthin
+  - create Restart Manager session @mike-hunhoff
+  - decode data using Base64 via WinAPI @mike-hunhoff
+  - empty recycle bin quietly @mwilliams31
+  - enumerate network shares @mike-hunhoff
+  - hook routines via microsoft detours @williballenthin
+  - hooked by API Override @williballenthin
+  - impersonate user @mike-hunhoff
+  - the @williballenthin packer detection package, thanks to Hexacorn for the data, see https://www.hexacorn.com/blog/2016/12/15/pe-section-names-re-visited/
+    - packed with CCG
+    - packed with Crunch
+    - packed with Dragon Armor
+    - packed with enigma
+    - packed with Epack
+    - packed with MaskPE
+    - packed with MEW
+    - packed with Mpress
+    - packed with Neolite
+    - packed with PECompact
+    - packed with Pepack
+    - packed with Perplex
+    - packed with ProCrypt
+    - packed with RPCrypt
+    - packed with SeauSFX
+    - packed with Shrinker
+    - packed with Simple Pack
+    - packed with StarForce
+    - packed with SVKP
+    - packed with Themida
+    - packed with TSULoader
+    - packed with VProtect
+    - packed with WWPACK
+    - rebuilt by ImpRec
+    - packaged as a Pintool
+    - packaged as a CreateInstall installer
+    - packaged as a WinZip self-extracting archive
+  - reference 114DNS DNS server @williballenthin
+  - reference AliDNS DNS server @williballenthin
+  - reference Cloudflare DNS server @williballenthin
+  - reference Comodo Secure DNS server @williballenthin
+  - reference Google Public DNS server @williballenthin
+  - reference Hurricane Electric DNS server @williballenthin
+  - reference kornet DNS server @williballenthin
+  - reference L3 DNS server @williballenthin
+  - reference OpenDNS DNS server @williballenthin
+  - reference Quad9 DNS server @williballenthin
+  - reference Verisign DNS server @williballenthin
+  - run as service @mike-hunhoff
+  - schedule task via ITaskService @mike-hunhoff
+  - references DNS over HTTPS endpoints @yt0ng
+
+### Bug fixes
+
+  - ida plugin: fix tree-view exception @mike-hunhoff #315
+  - ida plugin: fix feature count @mike-hunhoff
+  - main: fix reported total rule count @williballenthin #325
+  - features: fix handling of API names with multiple periods @mike-hunhoff #329
+  - ida backend: find all byte sequences instead of only first @mike-hunhoff #335
+  - features: display 0 value @mr-tz #338
+  - ida backend: extract ordinal and name imports @mr-tz #343
+  - show-features: improvements and support within IDA @mr-tz #342
+  - main: sanity check MBC rendering @williballenthin
+  - main: handle sample path that contains non-ASCII characters @mr-tz #328
+
+### Changes
+
+  - rules: use yaml.CLoader for better performance @williballenthin #306
+  - rules: parse descriptions for statements @mr-tz #312
+
+### Raw diffs
+
+  - [capa v1.3.0...v1.4.0](https://github.com/fireeye/capa/compare/v1.3.0...v1.4.0)
+  - [capa-rules v1.3.0...v1.4.0](https://github.com/fireeye/capa-rules/compare/v1.3.0...v1.4.0)
+
+## v1.3.0 (2020-09-14)
+
+This release brings newly updated mappings to the [Malware Behavior Catalog version 2.0](https://github.com/MBCProject/mbc-markdown), many enhancements to the IDA Pro plugin, [flare-capa on PyPI](https://pypi.org/project/flare-capa/), a bunch of bug fixes to improve feature extraction, and four new rules. We received contributions from ten reverse engineers, including seven new ones:
+
+  - @dzbeck
+  - @recvfrom
+  - @toomanybananas
+  - @cclauss 
+  - @adamprescott91 
+  - @weslambert
+  - @stevemk14ebr 
+  
+Download a standalone binary below and checkout the readme [here on GitHub](https://github.com/fireeye/capa/). Report issues on our [issue tracker](https://github.com/fireeye/capa/issues) and contribute new rules at [capa-rules](https://github.com/fireeye/capa-rules/).
+
+### Key changes to IDA Plugin
+
+The IDA Pro integration is now distributed as a real plugin, instead of a script. This enables a few things:
+
+  - keyboard shortcuts and file menu integration
+  - updates distributed PyPI/`pip install --upgrade` without touching your `%IDADIR%`
+  - generally doing thing the "right way"
+
+How to get this new version? Its easy: download [capa_explorer.py](https://raw.githubusercontent.com/fireeye/capa/master/capa/ida/plugin/capa_explorer.py) to your IDA plugins directory and update your capa installation (incidentally, this is a good opportunity to migrate to `pip install flare-capa` instead of git checkouts). Now you should see the plugin listed in the `Edit > Plugins > FLARE capa explorer` menu in IDA. 
+
+Please refer to the plugin [readme](https://github.com/fireeye/capa/blob/master/capa/ida/plugin/README.md) for additional information on installing and using the IDA Pro plugin.
+
+Please open an issue in this repository if you notice anything weird.
+ 
+### New features
+
+  - ida plugin: now a real plugin, not a script @mike-hunhoff 
+  - core: distributed via PyPI as [flare-capa](https://pypi.org/project/flare-capa/) @williballenthin 
+  - features: enable automatic A/W handling for imports @williballenthin @Ana06 #246 
+  - ida plugin: persist rules directory setting via [ida-settings](https://github.com/williballenthin/ida-settings) @williballenthin #268
+  - ida plugin: add search bar to results view @williballenthin #285
+  - ida plugin: add `Analyze` and `Reset` buttons to tree view @mike-hunhoff #304
+  - ida plugin: add status label to tree view @mike-hunhoff
+  - ida plugin: add progress indicator @mike-hunhoff, @mr-tz
+
+### New rules
+
+  - compiled with py2exe @re-fox
+  - resolve path using msvcrt @re-fox 
+  - decompress data using QuickLZ @edeca
+  - encrypt data using sosemanuk @recvfrom 
+
+### Bug fixes
+
+  - rule: reduce FP in DNS resolution @toomanybananas
+  - engine: report correct strings matched via regex @williballenthin #262 
+  - formatter: correctly format descriptions in two-line syntax @williballenthin @recvfrom #263 
+  - viv: better extract offsets from SibOper operands @williballenthin @edeca #276 
+  - import-to-ida: fix import error @cclauss 
+  - viv: don't write settings to ~/.viv/viv.json @williballenthin @rakuy0 @weslambert #244
+  - ida plugin: remove dependency loop that resulted in unnecessary overhead @mike-hunhoff #303
+  - ida plugin: correctly highlight regex matches in IDA Disassembly view @mike-hunhoff #305
+  - ida plugin: better handle rule directory prompt and failure case @stevemk14ebr @mike-hunhoff #309
+
+### Changes
+
+  - rules: update meta mapping to MBC 2.0! @dzbeck
+  - render: don't display rules that are also matched by other rules @williballenthin @Ana06 #224
+  - ida plugin: simplify tabs, removing summary and adding detail to results view @williballenthin #286
+  - ida plugin: analysis is no longer automatically started when plugin is first opened @mike-hunhoff #304
+  - ida plugin: user must manually select a capa rules directory before analysis can be performed @mike-hunhoff
+  - ida plugin: user interface controls are disabled until analysis is performed @mike-hunhoff #304
+
+### Raw diffs
+
+  - [capa v1.2.0...v1.3.0](https://github.com/fireeye/capa/compare/v1.2.0...v1.3.0)
+  - [capa-rules v1.2.0...v1.3.0](https://github.com/fireeye/capa-rules/compare/v1.2.0...v1.3.0)
+
+## v1.2.0 (2020-08-31)
+
+This release brings UI enhancements, especially for the IDA Pro plugin, 
+investment towards py3 support,
+fixes some bugs identified by the community, 
+and 46 (!) new rules.
+We received contributions from ten reverse engineers, including five new ones:
+
+  - @agithubuserlol
+  - @recvfrom
+  - @D4nch3n
+  - @edeca
+  - @winniepe 
+  
+Download a standalone binary below and checkout the readme [here on GitHub](https://github.com/fireeye/capa/).
+Report issues on our [issue tracker](https://github.com/fireeye/capa/issues)
+and contribute new rules at [capa-rules](https://github.com/fireeye/capa-rules/).
+ 
+### New features
+
+  - ida plugin: display arch flavors @mike-hunhoff
+  - ida plugin: display block descriptions @mike-hunhoff
+  - ida backend: extract features from nested pointers @mike-hunhoff
+  - main: show more progress output @williballenthin
+  - core: pin dependency versions #258 @recvfrom
+
+### New rules
+  - bypass UAC via AppInfo ALPC @agithubuserlol
+  - bypass UAC via token manipulation @agithubuserlol
+  - check for sandbox and av modules @re-fox
+  - check for sandbox username @re-fox
+  - check if process is running under wine @re-fox
+  - validate credit card number using luhn algorithm @re-fox
+  - validate credit card number using luhn algorithm with no lookup table @re-fox
+  - hash data using FNV @edeca @mr-tz
+  - link many functions at runtime @mr-tz
+  - reference public RSA key @mr-tz
+  - packed with ASPack @williballenthin
+  - delete internet cache @mike-hunhoff
+  - enumerate internet cache @mike-hunhoff
+  - send ICMP echo request @mike-hunhoff
+  - check for debugger via API @mike-hunhoff
+  - check for hardware breakpoints @mike-hunhoff
+  - check for kernel debugger via shared user data structure @mike-hunhoff
+  - check for protected handle exception @mike-hunhoff
+  - check for software breakpoints @mike-hunhoff
+  - check for trap flag exception @mike-hunhoff
+  - check for unexpected memory writes @mike-hunhoff
+  - check process job object @mike-hunhoff
+  - reference anti-VM strings targeting Parallels @mike-hunhoff
+  - reference anti-VM strings targeting Qemu @mike-hunhoff
+  - reference anti-VM strings targeting VirtualBox @mike-hunhoff
+  - reference anti-VM strings targeting VirtualPC @mike-hunhoff
+  - reference anti-VM strings targeting VMWare @mike-hunhoff
+  - reference anti-VM strings targeting Xen @mike-hunhoff
+  - reference analysis tools strings @mike-hunhoff
+  - reference WMI statements @mike-hunhoff
+  - get number of processor cores @mike-hunhoff
+  - get number of processors @mike-hunhoff
+  - enumerate disk properties @mike-hunhoff
+  - get disk size @mike-hunhoff
+  - get process heap flags @mike-hunhoff
+  - get process heap force flags @mike-hunhoff
+  - get Explorer PID @mike-hunhoff
+  - delay execution @mike-hunhoff
+  - check for process debug object @mike-hunhoff
+  - check license value @mike-hunhoff
+  - check ProcessDebugFlags @mike-hunhoff
+  - check ProcessDebugPort @mike-hunhoff
+  - check SystemKernelDebuggerInformation @mike-hunhoff
+  - check thread yield allowed @mike-hunhoff
+  - enumerate system firmware tables @mike-hunhoff
+  - get system firmware table @mike-hunhoff
+  - hide thread from debugger @mike-hunhoff
+
+### Bug fixes
+
+  - ida backend: extract unmapped immediate number features @mike-hunhoff
+  - ida backend: fix stack cookie check #257 @mike-hunhoff
+  - viv backend: better extract gs segment access @williballenthin
+  - core: enable counting of string features #241 @D4nch3n @williballenthin
+  - core: enable descriptions on feature with arch flavors @mike-hunhoff
+  - core: update git links for non-SSH access #259 @recvfrom
+
+### Changes
+
+  - ida plugin: better default display showing first level nesting @winniepe
+  - remove unused `characteristic(switch)` feature @ana06
+  - prepare testing infrastructure for multiple backends/py3 @williballenthin
+  - ci: zip build artifacts @ana06
+  - ci: build all supported python versions @ana06
+  - code style and formatting @mr-tz
+
+### Raw diffs
+
+  - [capa v1.1.0...v1.2.0](https://github.com/fireeye/capa/compare/v1.1.0...v1.2.0)
+  - [capa-rules v1.1.0...v1.2.0](https://github.com/fireeye/capa-rules/compare/v1.1.0...v1.2.0)
+
 ## v1.1.0 (2020-08-05)

 This release brings new rule format updates, such as adding `offset/x32` and negative offsets,
@@ -38,15 +336,15 @@ Download a standalone binary below and checkout the readme [here on GitHub](http
  - hash data using sha1 via x86 extensions @re-fox
  - hash data using sha256 via x86 extensions @re-fox
  - capture network configuration via ipconfig @re-fox
-  - hash data via WinCrypt @michael-hunhoff
-  - get file attributes @michael-hunhoff
-  - allocate thread local storage @michael-hunhoff
-  - get thread local storage value @michael-hunhoff
-  - set thread local storage @michael-hunhoff
-  - get session integrity level @michael-hunhoff
-  - add file to cabinet file @michael-hunhoff
-  - flush cabinet file @michael-hunhoff
-  - open cabinet file @michael-hunhoff
+  - hash data via WinCrypt @mike-hunhoff
+  - get file attributes @mike-hunhoff
+  - allocate thread local storage @mike-hunhoff
+  - get thread local storage value @mike-hunhoff
+  - set thread local storage @mike-hunhoff
+  - get session integrity level @mike-hunhoff
+  - add file to cabinet file @mike-hunhoff
+  - flush cabinet file @mike-hunhoff
+  - open cabinet file @mike-hunhoff
  - gather firefox profile information @re-fox
  - encrypt data using skipjack @re-fox
  - encrypt data using camellia @re-fox
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 ![capa](.github/logo.png)

 [![CI status](https://github.com/fireeye/capa/workflows/CI/badge.svg)](https://github.com/fireeye/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster)
-[![Number of rules](https://img.shields.io/badge/rules-293-blue.svg)](https://github.com/fireeye/capa-rules)
+[![Number of rules](https://img.shields.io/badge/rules-455-blue.svg)](https://github.com/fireeye/capa-rules)
 [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt)

 capa detects capabilities in executable files.
@@ -71,7 +71,7 @@ Alternatively, you can fetch a nightly build of a standalone binary from one of

 To use capa as a library or integrate with another tool, see [doc/installation.md](doc/installation.md) for further setup instructions.

-For more information about how to use capa, including running it as an IDA script/plugin see [doc/usage.md](doc/usage.md).
+For more information about how to use capa, see [doc/usage.md](doc/usage.md).

 # example

@@ -146,12 +146,11 @@ rule:
 The [github.com/fireeye/capa-rules](https://github.com/fireeye/capa-rules) repository contains hundreds of standard library rules that are distributed with capa.
 Please learn to write rules and contribute new entries as you find interesting techniques in malware.

-If you use IDA Pro, then you use can use the [IDA Pro plugin for capa](./capa/ida/ida_capa_explorer.py).
-This script adds new user interface elements to IDA, including an interactive tree view of rule matches and their locations within the current database.
-As you select the checkboxes, the plugin will highlight the addresses associated with the features.
-We use this plugin all the time to quickly jump to interesting parts of a program.
+If you use IDA Pro, then you use can use the [capa explorer IDA plugin](capa/ida/plugin/).
+capa explorer lets you quickly identify and navigate to interesting areas of a program and dissect capa rule matches at
+the assembly level.

-![capa + IDA Pro integration](.github/capa-ida.jpg)
+![capa + IDA Pro integration](doc/img/ida_plugin_intro.gif)

 # further information
 ## capa
--- a/capa/engine.py
+++ b/capa/engine.py
@@ -6,7 +6,6 @@
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.

-import sys
 import copy
 import collections

--- a/capa/features/init.py
+++ b/capa/features/init.py
@@ -16,6 +16,9 @@ import capa.engine
 logger = logging.getLogger(__name__)
 MAX_BYTES_FEATURE_SIZE = 0x100

+# thunks may be chained so we specify a delta to control the depth to which these chains are explored
+THUNK_CHAIN_DEPTH_DELTA = 5
+
 # identifiers for supported architectures names that tweak a feature
 # for example, offset/x32
 ARCH_X32 = "x32"
@@ -74,7 +77,7 @@ class Feature(object):
        return self.value

    def __str__(self):
-        if self.value:
+        if self.value is not None:
            if self.description:
                return "%s(%s = %s)" % (self.name, self.get_value_str(), self.description)
            else:
@@ -139,7 +142,6 @@ class Regex(String):
            raise ValueError(
                "invalid regular expression: %s it should use Python syntax, try it at https://pythex.org" % value
            )
-        self.match = None

    def evaluate(self, ctx):
        for feature, locations in ctx.items():
@@ -151,17 +153,45 @@ class Regex(String):
            # using this mode cleans is more convenient for rule authors,
            # so that they don't have to prefix/suffix their terms like: /.*foo.*/.
            if self.re.search(feature.value):
-                self.match = feature.value
-                return capa.engine.Result(True, self, [], locations=locations)
+                # unlike other features, we cannot return put a reference to `self` directly in a `Result`.
+                # this is because `self` may match on many strings, so we can't stuff the matched value into it.
+                # instead, return a new instance that has a reference to both the regex and the matched value.
+                # see #262.
+                return capa.engine.Result(True, _MatchedRegex(self, feature.value), [], locations=locations)

-        return capa.engine.Result(False, self, [])
+        return capa.engine.Result(False, _MatchedRegex(self, None), [])
+
+    def __str__(self):
+        return "regex(string =~ %s)" % self.value
+
+
+class _MatchedRegex(Regex):
+    """
+    this represents a specific instance of a regular expression feature match.
+    treat it the same as a `Regex` except it has the `match` field that contains the complete string that matched.
+
+    note: this type should only ever be constructed by `Regex.evaluate()`. it is not part of the public API.
+    """
+
+    def __init__(self, regex, match):
+        """
+        args:
+          regex (Regex): the regex feature that matches
+          match (string|None): the matching string or None if it doesn't match
+        """
+        super(_MatchedRegex, self).__init__(regex.value, description=regex.description)
+        # we want this to collide with the name of `Regex` above,
+        # so that it works nicely with the renderers.
+        self.name = "regex"
+        # this may be None if the regex doesn't match
+        self.match = match

    def __str__(self):
        return 'regex(string =~ %s, matched = "%s")' % (self.value, self.match)


 class StringFactory(object):
-    def __new__(self, value, description):
+    def __new__(self, value, description=None):
        if value.startswith("/") and (value.endswith("/") or value.endswith("/i")):
            return Regex(value, description=description)
        return String(value, description=description)
--- a/capa/features/extractors/init.py
+++ b/capa/features/extractors/init.py
@@ -8,6 +8,8 @@

 import abc

+from capa.helpers import oint
+

 class FeatureExtractor(object):
    """
@@ -35,6 +37,12 @@ class FeatureExtractor(object):
        #
        super(FeatureExtractor, self).__init__()

+    def block_offset(self, bb):
+        return oint(bb)
+
+    def function_offset(self, f):
+        return oint(f)
+
    @abc.abstractmethod
    def get_base_address(self):
        """
@@ -196,7 +204,7 @@ class NullFeatureExtractor(FeatureExtractor):
            'functions': {
                0x401000: {
                    'features': [
-                        (0x401000, capa.features.Characteristic('switch')),
+                        (0x401000, capa.features.Characteristic('nzxor')),
                    ],
                    'basic blocks': {
                        0x401000: {
--- a/capa/features/extractors/helpers.py
+++ b/capa/features/extractors/helpers.py
@@ -9,6 +9,7 @@
 import sys
 import builtins

+from capa.features.file import Import
 from capa.features.insn import API

 MIN_STACKSTRING_LEN = 8
@@ -21,25 +22,34 @@ def xor_static(data, i):
        return "".join(chr(ord(c) ^ i) for c in data)


-def is_aw_function(function_name):
+def is_aw_function(symbol):
    """
    is the given function name an A/W function?
    these are variants of functions that, on Windows, accept either a narrow or wide string.
    """
-    if len(function_name) < 2:
+    if len(symbol) < 2:
        return False

    # last character should be 'A' or 'W'
-    if function_name[-1] not in ("A", "W"):
+    if symbol[-1] not in ("A", "W"):
        return False

    # second to last character should be lowercase letter
-    return "a" <= function_name[-2] <= "z" or "0" <= function_name[-2] <= "9"
+    return "a" <= symbol[-2] <= "z" or "0" <= symbol[-2] <= "9"


-def generate_api_features(apiname, va):
+def is_ordinal(symbol):
    """
-    for a given function name and address, generate API names.
+    is the given symbol an ordinal that is prefixed by "#"?
+    """
+    if symbol:
+        return symbol[0] == "#"
+    return False
+
+
+def generate_symbols(dll, symbol):
+    """
+    for a given dll and symbol name, generate variants.
    we over-generate features to make matching easier.
    these include:
      - kernel32.CreateFileA
@@ -47,22 +57,20 @@ def generate_api_features(apiname, va):
      - CreateFileA
      - CreateFile
    """
-    # (kernel32.CreateFileA, 0x401000)
-    yield API(apiname), va
+    # kernel32.CreateFileA
+    yield "%s.%s" % (dll, symbol)

-    if is_aw_function(apiname):
-        # (kernel32.CreateFile, 0x401000)
-        yield API(apiname[:-1]), va
+    if not is_ordinal(symbol):
+        # CreateFileA
+        yield symbol

-    if "." in apiname:
-        modname, impname = apiname.split(".")
-        # strip modname to support importname-only matching
-        # (CreateFileA, 0x401000)
-        yield API(impname), va
+    if is_aw_function(symbol):
+        # kernel32.CreateFile
+        yield "%s.%s" % (dll, symbol[:-1])

-        if is_aw_function(impname):
-            # (CreateFile, 0x401000)
-            yield API(impname[:-1]), va
+        if not is_ordinal(symbol):
+            # CreateFile
+            yield symbol[:-1]


 def all_zeros(bytez):
--- a/capa/features/extractors/ida/init.py
+++ b/capa/features/extractors/ida/init.py
@@ -75,7 +75,7 @@ class IdaFeatureExtractor(FeatureExtractor):
            yield feature, ea

    def get_basic_blocks(self, f):
-        for bb in idaapi.FlowChart(f, flags=idaapi.FC_PREDS):
+        for bb in capa.features.extractors.ida.helpers.get_function_blocks(f):
            yield add_ea_int_cast(bb)

    def extract_basic_block_features(self, f, bb):
--- a/capa/features/extractors/ida/basicblock.py
+++ b/capa/features/extractors/ida/basicblock.py
@@ -20,10 +20,10 @@ from capa.features.extractors.helpers import MIN_STACKSTRING_LEN


 def get_printable_len(op):
-    """ Return string length if all operand bytes are ascii or utf16-le printable
+    """Return string length if all operand bytes are ascii or utf16-le printable

-        args:
-            op (IDA op_t)
+    args:
+        op (IDA op_t)
    """
    op_val = capa.features.extractors.ida.helpers.mask_op_val(op)

@@ -62,10 +62,10 @@ def get_printable_len(op):


 def is_mov_imm_to_stack(insn):
-    """ verify instruction moves immediate onto stack
+    """verify instruction moves immediate onto stack

-        args:
-            insn (IDA insn_t)
+    args:
+        insn (IDA insn_t)
    """
    if insn.Op2.type != idaapi.o_imm:
        return False
@@ -80,13 +80,13 @@ def is_mov_imm_to_stack(insn):


 def bb_contains_stackstring(f, bb):
-    """ check basic block for stackstring indicators
+    """check basic block for stackstring indicators

-        true if basic block contains enough moves of constant bytes to the stack
+    true if basic block contains enough moves of constant bytes to the stack

-        args:
-            f (IDA func_t)
-            bb (IDA BasicBlock)
+    args:
+        f (IDA func_t)
+        bb (IDA BasicBlock)
    """
    count = 0
    for insn in capa.features.extractors.ida.helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
@@ -98,33 +98,33 @@ def bb_contains_stackstring(f, bb):


 def extract_bb_stackstring(f, bb):
-    """ extract stackstring indicators from basic block
+    """extract stackstring indicators from basic block

-        args:
-            f (IDA func_t)
-            bb (IDA BasicBlock)
+    args:
+        f (IDA func_t)
+        bb (IDA BasicBlock)
    """
    if bb_contains_stackstring(f, bb):
        yield Characteristic("stack string"), bb.start_ea


 def extract_bb_tight_loop(f, bb):
-    """ extract tight loop indicators from a basic block
+    """extract tight loop indicators from a basic block

-        args:
-            f (IDA func_t)
-            bb (IDA BasicBlock)
+    args:
+        f (IDA func_t)
+        bb (IDA BasicBlock)
    """
    if capa.features.extractors.ida.helpers.is_basic_block_tight_loop(bb):
        yield Characteristic("tight loop"), bb.start_ea


 def extract_features(f, bb):
-    """ extract basic block features
+    """extract basic block features

-        args:
-            f (IDA func_t)
-            bb (IDA BasicBlock)
+    args:
+        f (IDA func_t)
+        bb (IDA BasicBlock)
    """
    for bb_handler in BASIC_BLOCK_HANDLERS:
        for (feature, ea) in bb_handler(f, bb):
--- a/capa/features/extractors/ida/file.py
+++ b/capa/features/extractors/ida/file.py
@@ -20,13 +20,13 @@ from capa.features.file import Export, Import, Section


 def check_segment_for_pe(seg):
-    """ check segment for embedded PE
+    """check segment for embedded PE

-        adapted for IDA from:
-        https://github.com/vivisect/vivisect/blob/7be4037b1cecc4551b397f840405a1fc606f9b53/PE/carve.py#L19
+    adapted for IDA from:
+    https://github.com/vivisect/vivisect/blob/7be4037b1cecc4551b397f840405a1fc606f9b53/PE/carve.py#L19

-        args:
-            seg (IDA segment_t)
+    args:
+        seg (IDA segment_t)
    """
    seg_max = seg.end_ea
    mz_xor = [
@@ -37,11 +37,11 @@ def check_segment_for_pe(seg):
        )
        for i in range(256)
    ]
-    todo = [
-        (capa.features.extractors.ida.helpers.find_byte_sequence(seg.start_ea, seg.end_ea, mzx), mzx, pex, i)
-        for mzx, pex, i in mz_xor
-    ]
-    todo = [(off, mzx, pex, i) for (off, mzx, pex, i) in todo if off != idaapi.BADADDR]
+
+    todo = []
+    for (mzx, pex, i) in mz_xor:
+        for off in capa.features.extractors.ida.helpers.find_byte_sequence(seg.start_ea, seg.end_ea, mzx):
+            todo.append((off, mzx, pex, i))

    while len(todo):
        off, mzx, pex, i = todo.pop()
@@ -61,17 +61,16 @@ def check_segment_for_pe(seg):
        if idc.get_bytes(peoff, 2) == pex:
            yield (off, i)

-        nextres = capa.features.extractors.ida.helpers.find_byte_sequence(off + 1, seg.end_ea, mzx)
-        if nextres != -1:
+        for nextres in capa.features.extractors.ida.helpers.find_byte_sequence(off + 1, seg.end_ea, mzx):
            todo.append((nextres, mzx, pex, i))


 def extract_file_embedded_pe():
-    """ extract embedded PE features
+    """extract embedded PE features

-        IDA must load resource sections for this to be complete
-            - '-R' from console
-            - Check 'Load resource sections' when opening binary in IDA manually
+    IDA must load resource sections for this to be complete
+        - '-R' from console
+        - Check 'Load resource sections' when opening binary in IDA manually
    """
    for seg in capa.features.extractors.ida.helpers.get_segments(skip_header_segments=True):
        for (ea, _) in check_segment_for_pe(seg):
@@ -85,41 +84,54 @@ def extract_file_export_names():


 def extract_file_import_names():
-    """ extract function imports
+    """extract function imports

-        1. imports by ordinal:
-         - modulename.#ordinal
+    1. imports by ordinal:
+     - modulename.#ordinal

-        2. imports by name, results in two features to support importname-only
-           matching:
-         - modulename.importname
-         - importname
+    2. imports by name, results in two features to support importname-only
+       matching:
+     - modulename.importname
+     - importname
    """
    for (ea, info) in capa.features.extractors.ida.helpers.get_file_imports().items():
-        if info[1]:
-            yield Import("%s.%s" % (info[0], info[1])), ea
-            yield Import(info[1]), ea
-        if info[2]:
-            yield Import("%s.#%s" % (info[0], str(info[2]))), ea
+        if info[1] and info[2]:
+            # e.g. in mimikatz: ('cabinet', 'FCIAddFile', 11L)
+            # extract by name here and by ordinal below
+            for name in capa.features.extractors.helpers.generate_symbols(info[0], info[1]):
+                yield Import(name), ea
+            dll = info[0]
+            symbol = "#%d" % (info[2])
+        elif info[1]:
+            dll = info[0]
+            symbol = info[1]
+        elif info[2]:
+            dll = info[0]
+            symbol = "#%d" % (info[2])
+        else:
+            continue
+
+        for name in capa.features.extractors.helpers.generate_symbols(dll, symbol):
+            yield Import(name), ea


 def extract_file_section_names():
-    """ extract section names
+    """extract section names

-        IDA must load resource sections for this to be complete
-            - '-R' from console
-            - Check 'Load resource sections' when opening binary in IDA manually
+    IDA must load resource sections for this to be complete
+        - '-R' from console
+        - Check 'Load resource sections' when opening binary in IDA manually
    """
    for seg in capa.features.extractors.ida.helpers.get_segments(skip_header_segments=True):
        yield Section(idaapi.get_segm_name(seg)), seg.start_ea


 def extract_file_strings():
-    """ extract ASCII and UTF-16 LE strings
+    """extract ASCII and UTF-16 LE strings

-        IDA must load resource sections for this to be complete
-            - '-R' from console
-            - Check 'Load resource sections' when opening binary in IDA manually
+    IDA must load resource sections for this to be complete
+        - '-R' from console
+        - Check 'Load resource sections' when opening binary in IDA manually
    """
    for seg in capa.features.extractors.ida.helpers.get_segments():
        seg_buff = capa.features.extractors.ida.helpers.get_segment_buffer(seg)
--- a/capa/features/extractors/ida/function.py
+++ b/capa/features/extractors/ida/function.py
@@ -14,31 +14,21 @@ from capa.features import Characteristic
 from capa.features.extractors import loops


-def extract_function_switch(f):
-    """ extract switch indicators from a function
-
-        arg:
-            f (IDA func_t)
-    """
-    if capa.features.extractors.ida.helpers.is_function_switch_statement(f):
-        yield Characteristic("switch"), f.start_ea
-
-
 def extract_function_calls_to(f):
-    """ extract callers to a function
+    """extract callers to a function

-        args:
-            f (IDA func_t)
+    args:
+        f (IDA func_t)
    """
    for ea in idautils.CodeRefsTo(f.start_ea, True):
        yield Characteristic("calls to"), ea


 def extract_function_loop(f):
-    """ extract loop indicators from a function
+    """extract loop indicators from a function

-        args:
-            f (IDA func_t)
+    args:
+        f (IDA func_t)
    """
    edges = []

@@ -52,27 +42,27 @@ def extract_function_loop(f):


 def extract_recursive_call(f):
-    """ extract recursive function call
+    """extract recursive function call

-        args:
-            f (IDA func_t)
+    args:
+        f (IDA func_t)
    """
    if capa.features.extractors.ida.helpers.is_function_recursive(f):
        yield Characteristic("recursive call"), f.start_ea


 def extract_features(f):
-    """ extract function features
+    """extract function features

-        arg:
-            f (IDA func_t)
+    arg:
+        f (IDA func_t)
    """
    for func_handler in FUNCTION_HANDLERS:
        for (feature, ea) in func_handler(f):
            yield feature, ea


-FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_switch, extract_function_loop, extract_recursive_call)
+FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call)


 def main():
--- a/capa/features/extractors/ida/helpers.py
+++ b/capa/features/extractors/ida/helpers.py
@@ -12,31 +12,39 @@ import string
 import idc
 import idaapi
 import idautils
+import ida_bytes


 def find_byte_sequence(start, end, seq):
-    """ find byte sequence
+    """yield all ea of a given byte sequence

-        args:
-            start: min virtual address
-            end: max virtual address
-            seq: bytes to search e.g. b'\x01\x03'
+    args:
+        start: min virtual address
+        end: max virtual address
+        seq: bytes to search e.g. b"\x01\x03"
    """
    if sys.version_info[0] >= 3:
-        return idaapi.find_binary(start, end, " ".join(["%02x" % b for b in seq]), 0, idaapi.SEARCH_DOWN)
+        seq = " ".join(["%02x" % b for b in seq])
    else:
-        return idaapi.find_binary(start, end, " ".join(["%02x" % ord(b) for b in seq]), 0, idaapi.SEARCH_DOWN)
+        seq = " ".join(["%02x" % ord(b) for b in seq])
+
+    while True:
+        ea = idaapi.find_binary(start, end, seq, 0, idaapi.SEARCH_DOWN)
+        if ea == idaapi.BADADDR:
+            break
+        start = ea + 1
+        yield ea


 def get_functions(start=None, end=None, skip_thunks=False, skip_libs=False):
-    """ get functions, range optional
+    """get functions, range optional

-        args:
-            start: min virtual address
-            end: max virtual address
+    args:
+        start: min virtual address
+        end: max virtual address

-        ret:
-            yield func_t*
+    ret:
+        yield func_t*
    """
    for ea in idautils.Functions(start=start, end=end):
        f = idaapi.get_func(ea)
@@ -45,10 +53,10 @@ def get_functions(start=None, end=None, skip_thunks=False, skip_libs=False):


 def get_segments(skip_header_segments=False):
-    """ get list of segments (sections) in the binary image
+    """get list of segments (sections) in the binary image

-        args:
-            skip_header_segments: IDA may load header segments - skip if set
+    args:
+        skip_header_segments: IDA may load header segments - skip if set
    """
    for n in range(idaapi.get_segm_qty()):
        seg = idaapi.getnseg(n)
@@ -57,9 +65,9 @@ def get_segments(skip_header_segments=False):


 def get_segment_buffer(seg):
-    """ return bytes stored in a given segment
+    """return bytes stored in a given segment

-        decrease buffer size until IDA is able to read bytes from the segment
+    decrease buffer size until IDA is able to read bytes from the segment
    """
    buff = b""
    sz = seg.end_ea - seg.start_ea
@@ -97,13 +105,13 @@ def get_file_imports():


 def get_instructions_in_range(start, end):
-    """ yield instructions in range
+    """yield instructions in range

-        args:
-            start: virtual address (inclusive)
-            end: virtual address (exclusive)
-        yield:
-            (insn_t*)
+    args:
+        start: virtual address (inclusive)
+        end: virtual address (exclusive)
+    yield:
+        (insn_t*)
    """
    for head in idautils.Heads(start, end):
        insn = idautils.DecodeInstruction(head)
@@ -158,6 +166,10 @@ def basic_block_size(bb):

 def read_bytes_at(ea, count):
    """ """
+    # check if byte has a value, see get_wide_byte doc
+    if not idc.is_loaded(ea):
+        return b""
+
    segm_end = idc.get_segm_end(ea)
    if ea + count > segm_end:
        return idc.get_bytes(ea, segm_end - ea)
@@ -183,10 +195,10 @@ def find_string_at(ea, min=4):


 def get_op_phrase_info(op):
-    """ parse phrase features from operand
+    """parse phrase features from operand

-        Pretty much dup of sark's implementation:
-            https://github.com/tmr232/Sark/blob/master/sark/code/instruction.py#L28-L73
+    Pretty much dup of sark's implementation:
+        https://github.com/tmr232/Sark/blob/master/sark/code/instruction.py#L28-L73
    """
    if op.type not in (idaapi.o_phrase, idaapi.o_displ):
        return {}
@@ -229,6 +241,12 @@ def is_op_read(insn, op):
    return idaapi.has_cf_use(insn.get_canon_feature(), op.n)


+def is_op_offset(insn, op):
+    """ Check is an operand has been marked as an offset (by auto-analysis or manually) """
+    flags = idaapi.get_flags(insn.ea)
+    return ida_bytes.is_off(flags, op.n)
+
+
 def is_sp_modified(insn):
    """ determine if instruction modifies SP, ESP, RSP """
    for op in get_insn_ops(insn, target_ops=(idaapi.o_reg,)):
@@ -269,15 +287,15 @@ def is_op_stack_var(ea, index):


 def mask_op_val(op):
-    """ mask value by data type
+    """mask value by data type

-        necessary due to a bug in AMD64
+    necessary due to a bug in AMD64

-        Example:
-            .rsrc:0054C12C mov [ebp+var_4], 0FFFFFFFFh
+    Example:
+        .rsrc:0054C12C mov [ebp+var_4], 0FFFFFFFFh

-            insn.Op2.dtype == idaapi.dt_dword
-            insn.Op2.value == 0xffffffffffffffff
+        insn.Op2.dtype == idaapi.dt_dword
+        insn.Op2.value == 0xffffffffffffffff
    """
    masks = {
        idaapi.dt_byte: 0xFF,
@@ -289,10 +307,10 @@ def mask_op_val(op):


 def is_function_recursive(f):
-    """ check if function is recursive
+    """check if function is recursive

-        args:
-            f (IDA func_t)
+    args:
+        f (IDA func_t)
    """
    for ref in idautils.CodeRefsTo(f.start_ea, True):
        if f.contains(ref):
@@ -300,30 +318,14 @@ def is_function_recursive(f):
    return False


-def is_function_switch_statement(f):
-    """ check a function for switch statement indicators
-
-        adapted from:
-        https://reverseengineering.stackexchange.com/questions/17548/calc-switch-cases-in-idapython-cant-iterate-over-results?rq=1
-
-        arg:
-            f (IDA func_t)
-    """
-    for (start, end) in idautils.Chunks(f.start_ea):
-        for head in idautils.Heads(start, end):
-            if idaapi.get_switch_info(head):
-                return True
-    return False
-
-
 def is_basic_block_tight_loop(bb):
-    """ check basic block loops to self
+    """check basic block loops to self

-        true if last instruction in basic block branches to basic block start
+    true if last instruction in basic block branches to basic block start

-        args:
-            f (IDA func_t)
-            bb (IDA BasicBlock)
+    args:
+        f (IDA func_t)
+        bb (IDA BasicBlock)
    """
    bb_end = idc.prev_head(bb.end_ea)
    if bb.start_ea < bb_end:
@@ -331,3 +333,47 @@ def is_basic_block_tight_loop(bb):
            if ref == bb.start_ea:
                return True
    return False
+
+
+def find_data_reference_from_insn(insn, max_depth=10):
+    """ search for data reference from instruction, return address of instruction if no reference exists """
+    depth = 0
+    ea = insn.ea
+
+    while True:
+        data_refs = list(idautils.DataRefsFrom(ea))
+
+        if len(data_refs) != 1:
+            # break if no refs or more than one ref (assume nested pointers only have one data reference)
+            break
+
+        if ea == data_refs[0]:
+            # break if circular reference
+            break
+
+        depth += 1
+        if depth > max_depth:
+            # break if max depth
+            break
+
+        ea = data_refs[0]
+
+    return ea
+
+
+def get_function_blocks(f):
+    """yield basic blocks contained in specified function
+
+    args:
+        f (IDA func_t)
+    yield:
+        block (IDA BasicBlock)
+    """
+    # leverage idaapi.FC_NOEXT flag to ignore useless external blocks referenced by the function
+    for block in idaapi.FlowChart(f, flags=(idaapi.FC_PREDS | idaapi.FC_NOEXT)):
+        yield block
+
+
+def is_basic_block_return(bb):
+    """ check if basic block is return block """
+    return bb.type == idaapi.fcb_ret
--- a/capa/features/extractors/ida/insn.py
+++ b/capa/features/extractors/ida/insn.py
@@ -12,8 +12,20 @@ import idautils

 import capa.features.extractors.helpers
 import capa.features.extractors.ida.helpers
-from capa.features import ARCH_X32, ARCH_X64, MAX_BYTES_FEATURE_SIZE, Bytes, String, Characteristic
-from capa.features.insn import Number, Offset, Mnemonic
+from capa.features import (
+    ARCH_X32,
+    ARCH_X64,
+    MAX_BYTES_FEATURE_SIZE,
+    THUNK_CHAIN_DEPTH_DELTA,
+    Bytes,
+    String,
+    Characteristic,
+)
+from capa.features.insn import API, Number, Offset, Mnemonic
+
+# security cookie checks may perform non-zeroing XORs, these are expected within a certain
+# byte range within the first and returning basic blocks, this helps to reduce FP features
+SECURITY_COOKIE_BYTES_DELTA = 0x40


 def get_arch(ctx):
@@ -42,51 +54,63 @@ def get_imports(ctx):

 def check_for_api_call(ctx, insn):
    """ check instruction for API call """
-    if not idaapi.is_call_insn(insn):
+    if not insn.get_canon_mnem() in ("call", "jmp"):
        return

-    for ref in idautils.CodeRefsFrom(insn.ea, False):
+    info = ()
+    ref = insn.ea
+
+    # attempt to resolve API calls by following chained thunks to a reasonable depth
+    for _ in range(THUNK_CHAIN_DEPTH_DELTA):
+        # assume only one code/data ref when resolving "call" or "jmp"
+        try:
+            ref = tuple(idautils.CodeRefsFrom(ref, False))[0]
+        except IndexError:
+            try:
+                # thunks may be marked as data refs
+                ref = tuple(idautils.DataRefsFrom(ref))[0]
+            except IndexError:
+                break
+
        info = get_imports(ctx).get(ref, ())
        if info:
-            yield "%s.%s" % (info[0], info[1])
-        else:
-            f = idaapi.get_func(ref)
-            # check if call to thunk
-            # TODO: first instruction might not always be the thunk
-            if f and (f.flags & idaapi.FUNC_THUNK):
-                for thunk_ref in idautils.DataRefsFrom(ref):
-                    # TODO: always data ref for thunk??
-                    info = get_imports(ctx).get(thunk_ref, ())
-                    if info:
-                        yield "%s.%s" % (info[0], info[1])
+            break
+
+        f = idaapi.get_func(ref)
+        if not f or not (f.flags & idaapi.FUNC_THUNK):
+            break
+
+    if info:
+        yield "%s.%s" % (info[0], info[1])


 def extract_insn_api_features(f, bb, insn):
-    """ parse instruction API features
+    """parse instruction API features

-        args:
-            f (IDA func_t)
-            bb (IDA BasicBlock)
-            insn (IDA insn_t)
+    args:
+        f (IDA func_t)
+        bb (IDA BasicBlock)
+        insn (IDA insn_t)

-        example:
-            call dword [0x00473038]
+    example:
+        call dword [0x00473038]
    """
    for api in check_for_api_call(f.ctx, insn):
-        for (feature, ea) in capa.features.extractors.helpers.generate_api_features(api, insn.ea):
-            yield feature, ea
+        dll, _, symbol = api.rpartition(".")
+        for name in capa.features.extractors.helpers.generate_symbols(dll, symbol):
+            yield API(name), insn.ea


 def extract_insn_number_features(f, bb, insn):
-    """ parse instruction number features
+    """parse instruction number features

-        args:
-            f (IDA func_t)
-            bb (IDA BasicBlock)
-            insn (IDA insn_t)
+    args:
+        f (IDA func_t)
+        bb (IDA BasicBlock)
+        insn (IDA insn_t)

-        example:
-            push    3136B0h         ; dwControlCode
+    example:
+        push    3136B0h         ; dwControlCode
    """
    if idaapi.is_ret_insn(insn):
        # skip things like:
@@ -98,61 +122,70 @@ def extract_insn_number_features(f, bb, insn):
        #   .text:00401145 add esp, 0Ch
        return

-    for op in capa.features.extractors.ida.helpers.get_insn_ops(insn, target_ops=(idaapi.o_imm,)):
-        const = capa.features.extractors.ida.helpers.mask_op_val(op)
-        if not idaapi.is_mapped(const):
-            yield Number(const), insn.ea
-            yield Number(const, arch=get_arch(f.ctx)), insn.ea
+    for op in capa.features.extractors.ida.helpers.get_insn_ops(insn, target_ops=(idaapi.o_imm, idaapi.o_mem)):
+        # skip things like:
+        #   .text:00401100 shr eax, offset loc_C
+        if capa.features.extractors.ida.helpers.is_op_offset(insn, op):
+            continue
+
+        if op.type == idaapi.o_imm:
+            const = capa.features.extractors.ida.helpers.mask_op_val(op)
+        else:
+            const = op.addr
+
+        yield Number(const), insn.ea
+        yield Number(const, arch=get_arch(f.ctx)), insn.ea


 def extract_insn_bytes_features(f, bb, insn):
-    """ parse referenced byte sequences
+    """parse referenced byte sequences

-        args:
-            f (IDA func_t)
-            bb (IDA BasicBlock)
-            insn (IDA insn_t)
+    args:
+        f (IDA func_t)
+        bb (IDA BasicBlock)
+        insn (IDA insn_t)

-        example:
-            push    offset iid_004118d4_IShellLinkA ; riid
+    example:
+        push    offset iid_004118d4_IShellLinkA ; riid
    """
    if idaapi.is_call_insn(insn):
-        # ignore call instructions
        return

-    for ref in idautils.DataRefsFrom(insn.ea):
+    ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
+    if ref != insn.ea:
        extracted_bytes = capa.features.extractors.ida.helpers.read_bytes_at(ref, MAX_BYTES_FEATURE_SIZE)
        if extracted_bytes and not capa.features.extractors.helpers.all_zeros(extracted_bytes):
            yield Bytes(extracted_bytes), insn.ea


 def extract_insn_string_features(f, bb, insn):
-    """ parse instruction string features
+    """parse instruction string features

-        args:
-            f (IDA func_t)
-            bb (IDA BasicBlock)
-            insn (IDA insn_t)
+    args:
+        f (IDA func_t)
+        bb (IDA BasicBlock)
+        insn (IDA insn_t)

-        example:
-            push offset aAcr     ; "ACR  > "
+    example:
+        push offset aAcr     ; "ACR  > "
    """
-    for ref in idautils.DataRefsFrom(insn.ea):
+    ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
+    if ref != insn.ea:
        found = capa.features.extractors.ida.helpers.find_string_at(ref)
        if found:
            yield String(found), insn.ea


 def extract_insn_offset_features(f, bb, insn):
-    """ parse instruction structure offset features
+    """parse instruction structure offset features

-        args:
-            f (IDA func_t)
-            bb (IDA BasicBlock)
-            insn (IDA insn_t)
+    args:
+        f (IDA func_t)
+        bb (IDA BasicBlock)
+        insn (IDA insn_t)

-        example:
-            .text:0040112F cmp [esi+4], ebx
+    example:
+        .text:0040112F cmp [esi+4], ebx
    """
    for op in capa.features.extractors.ida.helpers.get_insn_ops(insn, target_ops=(idaapi.o_phrase, idaapi.o_displ)):
        if capa.features.extractors.ida.helpers.is_op_stack_var(insn.ea, op.n):
@@ -174,11 +207,11 @@ def extract_insn_offset_features(f, bb, insn):


 def contains_stack_cookie_keywords(s):
-    """ check if string contains stack cookie keywords
+    """check if string contains stack cookie keywords

-        Examples:
-            xor     ecx, ebp ; StackCookie
-            mov     eax, ___security_cookie
+    Examples:
+        xor     ecx, ebp ; StackCookie
+        mov     eax, ___security_cookie
    """
    if not s:
        return False
@@ -189,30 +222,30 @@ def contains_stack_cookie_keywords(s):


 def bb_stack_cookie_registers(bb):
-    """ scan basic block for stack cookie operations
+    """scan basic block for stack cookie operations

-        yield registers ids that may have been used for stack cookie operations
+    yield registers ids that may have been used for stack cookie operations

-        assume instruction that sets stack cookie and nzxor exist in same block
-        and stack cookie register is not modified prior to nzxor
+    assume instruction that sets stack cookie and nzxor exist in same block
+    and stack cookie register is not modified prior to nzxor

-        Example:
-            .text:004062DA mov     eax, ___security_cookie <-- stack cookie
-            .text:004062DF mov     ecx, eax
-            .text:004062E1 mov     ebx, [esi]
-            .text:004062E3 and     ecx, 1Fh
-            .text:004062E6 mov     edi, [esi+4]
-            .text:004062E9 xor     ebx, eax
-            .text:004062EB mov     esi, [esi+8]
-            .text:004062EE xor     edi, eax <-- ignore
-            .text:004062F0 xor     esi, eax <-- ignore
-            .text:004062F2 ror     edi, cl
-            .text:004062F4 ror     esi, cl
-            .text:004062F6 ror     ebx, cl
-            .text:004062F8 cmp     edi, esi
-            .text:004062FA jnz     loc_40639D
+    Example:
+        .text:004062DA mov     eax, ___security_cookie <-- stack cookie
+        .text:004062DF mov     ecx, eax
+        .text:004062E1 mov     ebx, [esi]
+        .text:004062E3 and     ecx, 1Fh
+        .text:004062E6 mov     edi, [esi+4]
+        .text:004062E9 xor     ebx, eax
+        .text:004062EB mov     esi, [esi+8]
+        .text:004062EE xor     edi, eax <-- ignore
+        .text:004062F0 xor     esi, eax <-- ignore
+        .text:004062F2 ror     edi, cl
+        .text:004062F4 ror     esi, cl
+        .text:004062F6 ror     ebx, cl
+        .text:004062F8 cmp     edi, esi
+        .text:004062FA jnz     loc_40639D

-        TODO: this is expensive, but necessary?...
+    TODO: this is expensive, but necessary?...
    """
    for insn in capa.features.extractors.ida.helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
        if contains_stack_cookie_keywords(idc.GetDisasm(insn.ea)):
@@ -222,12 +255,37 @@ def bb_stack_cookie_registers(bb):
                    yield op.reg


+def is_nzxor_stack_cookie_delta(f, bb, insn):
+    """ check if nzxor exists within stack cookie delta """
+    # security cookie check should use SP or BP
+    if not capa.features.extractors.ida.helpers.is_frame_register(insn.Op2.reg):
+        return False
+
+    f_bbs = tuple(capa.features.extractors.ida.helpers.get_function_blocks(f))
+
+    # expect security cookie init in first basic block within first bytes (instructions)
+    if capa.features.extractors.ida.helpers.is_basic_block_equal(bb, f_bbs[0]) and insn.ea < (
+        bb.start_ea + SECURITY_COOKIE_BYTES_DELTA
+    ):
+        return True
+
+    # ... or within last bytes (instructions) before a return
+    if capa.features.extractors.ida.helpers.is_basic_block_return(bb) and insn.ea > (
+        bb.start_ea + capa.features.extractors.ida.helpers.basic_block_size(bb) - SECURITY_COOKIE_BYTES_DELTA
+    ):
+        return True
+
+    return False
+
+
 def is_nzxor_stack_cookie(f, bb, insn):
    """ check if nzxor is related to stack cookie """
    if contains_stack_cookie_keywords(idaapi.get_cmt(insn.ea, False)):
        # Example:
        #   xor     ecx, ebp        ; StackCookie
        return True
+    if is_nzxor_stack_cookie_delta(f, bb, insn):
+        return True
    stack_cookie_regs = tuple(bb_stack_cookie_registers(bb))
    if any(op_reg in stack_cookie_regs for op_reg in (insn.Op1.reg, insn.Op2.reg)):
        # Example:
@@ -238,16 +296,16 @@ def is_nzxor_stack_cookie(f, bb, insn):


 def extract_insn_nzxor_characteristic_features(f, bb, insn):
-    """ parse instruction non-zeroing XOR instruction
+    """parse instruction non-zeroing XOR instruction

-        ignore expected non-zeroing XORs, e.g. security cookies
+    ignore expected non-zeroing XORs, e.g. security cookies

-        args:
-            f (IDA func_t)
-            bb (IDA BasicBlock)
-            insn (IDA insn_t)
+    args:
+        f (IDA func_t)
+        bb (IDA BasicBlock)
+        insn (IDA insn_t)
    """
-    if insn.itype != idaapi.NN_xor:
+    if insn.itype not in (idaapi.NN_xor, idaapi.NN_xorpd, idaapi.NN_xorps, idaapi.NN_pxor):
        return
    if capa.features.extractors.ida.helpers.is_operand_equal(insn.Op1, insn.Op2):
        return
@@ -257,23 +315,23 @@ def extract_insn_nzxor_characteristic_features(f, bb, insn):


 def extract_insn_mnemonic_features(f, bb, insn):
-    """ parse instruction mnemonic features
+    """parse instruction mnemonic features

-        args:
-            f (IDA func_t)
-            bb (IDA BasicBlock)
-            insn (IDA insn_t)
+    args:
+        f (IDA func_t)
+        bb (IDA BasicBlock)
+        insn (IDA insn_t)
    """
    yield Mnemonic(insn.get_canon_mnem()), insn.ea


 def extract_insn_peb_access_characteristic_features(f, bb, insn):
-    """ parse instruction peb access
+    """parse instruction peb access

-        fs:[0x30] on x86, gs:[0x60] on x64
+    fs:[0x30] on x86, gs:[0x60] on x64

-        TODO:
-            IDA should be able to do this..
+    TODO:
+        IDA should be able to do this..
    """
    if insn.itype not in (idaapi.NN_push, idaapi.NN_mov):
        return
@@ -290,10 +348,10 @@ def extract_insn_peb_access_characteristic_features(f, bb, insn):


 def extract_insn_segment_access_features(f, bb, insn):
-    """ parse instruction fs or gs access
+    """parse instruction fs or gs access

-        TODO:
-            IDA should be able to do this...
+    TODO:
+        IDA should be able to do this...
    """
    if all(map(lambda op: op.type != idaapi.o_mem, insn.ops)):
        # try to optimize for only memory references
@@ -311,12 +369,12 @@ def extract_insn_segment_access_features(f, bb, insn):


 def extract_insn_cross_section_cflow(f, bb, insn):
-    """ inspect the instruction for a CALL or JMP that crosses section boundaries
+    """inspect the instruction for a CALL or JMP that crosses section boundaries

-        args:
-            f (IDA func_t)
-            bb (IDA BasicBlock)
-            insn (IDA insn_t)
+    args:
+        f (IDA func_t)
+        bb (IDA BasicBlock)
+        insn (IDA insn_t)
    """
    for ref in idautils.CodeRefsFrom(insn.ea, False):
        if ref in get_imports(f.ctx).keys():
@@ -331,14 +389,14 @@ def extract_insn_cross_section_cflow(f, bb, insn):


 def extract_function_calls_from(f, bb, insn):
-    """ extract functions calls from features
+    """extract functions calls from features

-        most relevant at the function scope, however, its most efficient to extract at the instruction scope
+    most relevant at the function scope, however, its most efficient to extract at the instruction scope

-        args:
-            f (IDA func_t)
-            bb (IDA BasicBlock)
-            insn (IDA insn_t)
+    args:
+        f (IDA func_t)
+        bb (IDA BasicBlock)
+        insn (IDA insn_t)
    """
    if idaapi.is_call_insn(insn):
        for ref in idautils.CodeRefsFrom(insn.ea, False):
@@ -346,28 +404,28 @@ def extract_function_calls_from(f, bb, insn):


 def extract_function_indirect_call_characteristic_features(f, bb, insn):
-    """ extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
-        does not include calls like => call ds:dword_ABD4974
+    """extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
+    does not include calls like => call ds:dword_ABD4974

-        most relevant at the function or basic block scope;
-        however, its most efficient to extract at the instruction scope
+    most relevant at the function or basic block scope;
+    however, its most efficient to extract at the instruction scope

-        args:
-            f (IDA func_t)
-            bb (IDA BasicBlock)
-            insn (IDA insn_t)
+    args:
+        f (IDA func_t)
+        bb (IDA BasicBlock)
+        insn (IDA insn_t)
    """
    if idaapi.is_call_insn(insn) and idc.get_operand_type(insn.ea, 0) in (idc.o_reg, idc.o_phrase, idc.o_displ):
        yield Characteristic("indirect call"), insn.ea


 def extract_features(f, bb, insn):
-    """ extract instruction features
+    """extract instruction features

-        args:
-            f (IDA func_t)
-            bb (IDA BasicBlock)
-            insn (IDA insn_t)
+    args:
+        f (IDA func_t)
+        bb (IDA BasicBlock)
+        insn (IDA insn_t)
    """
    for inst_handler in INSTRUCTION_HANDLERS:
        for (feature, ea) in inst_handler(f, bb, insn):
--- a/capa/features/extractors/loops.py
+++ b/capa/features/extractors/loops.py
@@ -11,14 +11,14 @@ from networkx.algorithms.components import strongly_connected_components


 def has_loop(edges, threshold=2):
-    """ check if a list of edges representing a directed graph contains a loop
+    """check if a list of edges representing a directed graph contains a loop

-        args:
-            edges: list of edge sets representing a directed graph i.e. [(1, 2), (2, 1)]
-            threshold: min number of nodes contained in loop
+    args:
+        edges: list of edge sets representing a directed graph i.e. [(1, 2), (2, 1)]
+        threshold: min number of nodes contained in loop

-        returns:
-            bool
+    returns:
+        bool
    """
    g = nx.DiGraph()
    g.add_edges_from(edges)
--- a/capa/features/extractors/miasm/init.py
+++ b/capa/features/extractors/miasm/init.py
@@ -0,0 +1,107 @@
+# Copyright (C) 2020 FireEye, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: https://github.com/fireeye/capa/blob/master/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+
+import miasm.analysis.binary
+import miasm.analysis.machine
+from miasm.core.locationdb import LocationDB
+
+import capa.features.extractors.miasm.file
+import capa.features.extractors.miasm.insn
+import capa.features.extractors.miasm.function
+import capa.features.extractors.miasm.basicblock
+from capa.features.extractors import FeatureExtractor
+
+
+class MiasmFeatureExtractor(FeatureExtractor):
+    def __init__(self, buf):
+        super(MiasmFeatureExtractor, self).__init__()
+        self.buf = buf
+        self.loc_db = LocationDB()
+        self.container = miasm.analysis.binary.Container.from_string(buf, self.loc_db)
+        self.pe = self.container.executable
+        self.machine = miasm.analysis.machine.Machine(self.container.arch)
+        self.cfg = self._build_cfg()
+
+    def get_base_address(self):
+        return self.container.entry_point
+
+    def extract_file_features(self):
+        for feature, va in capa.features.extractors.miasm.file.extract_file_features(self):
+            yield feature, va
+
+    # TODO: Improve this function (it just considers all loc_keys target of calls a function), port to miasm
+    def get_functions(self):
+        """
+        returns all loc_keys which are the argument of any call function
+        """
+        functions = set()
+
+        for block in self.cfg.blocks:
+            for line in block.lines:
+                if line.is_subcall() and line.args[0].is_loc():
+                    loc_key = line.args[0].loc_key
+                    if loc_key not in functions:
+                        functions.add(loc_key)
+                        yield loc_key
+
+    def extract_function_features(self, loc_key):
+        for feature, va in capa.features.extractors.miasm.function.extract_features(self, loc_key):
+            yield feature, va
+
+    def block_offset(self, bb):
+        return bb.lines[0].offset
+
+    def function_offset(self, f):
+        return self.cfg.loc_key_to_block(f).lines[0].offset
+
+    def get_basic_blocks(self, loc_key):
+        """
+        get the basic blocks of the function represented by lock_key
+        """
+        block = self.cfg.loc_key_to_block(loc_key)
+        disassembler = self.machine.dis_engine(self.container.bin_stream, loc_db=self.loc_db, follow_call=False)
+        cfg = disassembler.dis_multiblock(self.block_offset(block))
+        return cfg.blocks
+
+    def extract_basic_block_features(self, _, bb):
+        for feature, va in capa.features.extractors.miasm.basicblock.extract_features(bb):
+            yield feature, va
+
+    def get_instructions(self, _, bb):
+        return bb.lines
+
+    def extract_insn_features(self, f, bb, insn):
+        for feature, va in capa.features.extractors.miasm.insn.extract_features(self, f, bb, insn):
+            yield feature, va
+
+    def _get_entry_points(self):
+        entry_points = {self.get_base_address()}
+
+        for _, va in miasm.jitter.loader.pe.get_export_name_addr_list(self.pe):
+            entry_points.add(va)
+
+        return entry_points
+
+    # This is more efficient that using the `blocks` argument in `dis_multiblock`
+    # See http://www.williballenthin.com/post/2020-01-12-miasm-part-2
+    # TODO: port this efficiency improvement to miasm
+    def _build_cfg(self):
+        loc_db = self.container.loc_db
+        disassembler = self.machine.dis_engine(self.container.bin_stream, follow_call=True, loc_db=loc_db)
+        job_done = set()
+        cfgs = {}
+
+        for va in self._get_entry_points():
+            cfgs[va] = disassembler.dis_multiblock(va, job_done=job_done)
+
+        complete_cfs = miasm.core.asmblock.AsmCFG(loc_db)
+        for cfg in cfgs.values():
+            complete_cfs.merge(cfg)
+
+        disassembler.apply_splitting(complete_cfs)
+        return complete_cfs
--- a/capa/features/extractors/miasm/basicblock.py
+++ b/capa/features/extractors/miasm/basicblock.py
@@ -0,0 +1,134 @@
+# Copyright (C) 2020 FireEye, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: https://github.com/fireeye/capa/blob/master/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+
+import sys
+import string
+import struct
+
+from capa.features import Characteristic
+from capa.features.basicblock import BasicBlock
+from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
+
+
+# TODO: Avoid this duplication (this code is in __init__ as well)
+def block_offset(bb):
+    return bb.lines[0].offset
+
+
+def extract_bb_tight_loop(bb):
+    """ check basic block for tight loop indicators """
+    if any(c.loc_key == bb.loc_key for c in bb.bto):
+        yield Characteristic("tight loop"), block_offset(bb)
+
+
+def is_mov_imm_to_stack(instr):
+    """
+    Return if instruction moves immediate onto stack
+    """
+    if not instr.name.startswith("MOV"):
+        return False
+
+    try:
+        dst, src = instr.args
+    except ValueError:
+        # not two operands
+        return False
+
+    if not src.is_int():
+        return False
+
+    if not dst.is_mem():
+        return False
+
+    # should detect things like `@8[ESP + 0x8]` and `EBP` and not fail in other cases
+    if any(register in str(dst) for register in ["EBP", "RBP", "ESP", "RSP"]):
+        return True
+
+    return False
+
+
+def is_printable_ascii(chars):
+    if sys.version_info >= (3, 0):
+        return all(c < 127 and chr(c) in string.printable for c in chars)
+    else:
+        return all(ord(c) < 127 and c in string.printable for c in chars)
+
+
+def is_printable_utf16le(chars):
+    if all(c == b"\x00" for c in chars[1::2]):
+        return is_printable_ascii(chars[::2])
+
+
+def get_printable_len(insn):
+    """
+    Return string length if all operand bytes are ascii or utf16-le printable
+    """
+    dst, src = insn.args
+
+    if not src.is_int():
+        return ValueError("unexpected operand type")
+
+    if not dst.is_mem():
+        return ValueError("unexpected operand type")
+
+    if isinstance(src.arg, int):
+        val = src.arg
+    else:
+        val = src.arg.arg
+
+    size = (val.bit_length() + 7) // 8
+
+    if size == 0:
+        return 0
+    elif size == 1:
+        chars = struct.pack("<B", val)
+    elif size == 2:
+        chars = struct.pack("<H", val)
+    elif size == 4:
+        chars = struct.pack("<I", val)
+    elif size == 8:
+        chars = struct.pack("<Q", val)
+
+    if is_printable_ascii(chars):
+        return size
+
+    if is_printable_utf16le(chars):
+        return size / 2
+
+    return 0
+
+
+def extract_stackstring(bb):
+    """ check basic block for stackstring indicators """
+    count = 0
+    for line in bb.lines:
+        if is_mov_imm_to_stack(line):
+            count += get_printable_len(line)
+        if count > MIN_STACKSTRING_LEN:
+            yield Characteristic("stack string"), block_offset(bb)
+            return
+
+
+def extract_features(bb):
+    """
+    extract features from the given basic block.
+    args:
+      bb (miasm.core.asmblock.AsmBlock): the basic block to process.
+    yields:
+      Feature, set[VA]: the features and their location found in this basic block.
+    """
+    yield BasicBlock(), block_offset(bb)
+    for bb_handler in BASIC_BLOCK_HANDLERS:
+        for feature, va in bb_handler(bb):
+            yield feature, va
+
+
+BASIC_BLOCK_HANDLERS = (
+    extract_bb_tight_loop,
+    extract_stackstring,
+)
--- a/capa/features/extractors/miasm/file.py
+++ b/capa/features/extractors/miasm/file.py
@@ -0,0 +1,102 @@
+# Copyright (C) 2020 FireEye, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: https://github.com/fireeye/capa/blob/master/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+
+import re
+
+import miasm.analysis.binary
+
+import capa.features.extractors.strings
+from capa.features import String, Characteristic
+from capa.features.file import Export, Import, Section
+
+
+def extract_file_embedded_pe(extractor):
+    """
+    extract embedded PE features
+    """
+    buf = extractor.buf
+    for match in re.finditer(b"MZ", buf):
+        offset = match.start()
+        subcontainer = miasm.analysis.binary.ContainerPE.from_string(buf[offset:], loc_db=extractor.loc_db)
+        if isinstance(subcontainer, miasm.analysis.binary.ContainerPE):
+            yield Characteristic("embedded pe"), offset
+
+
+def extract_file_export_names(extractor):
+    """
+    extract file exports and their addresses
+    """
+    for symbol, va in miasm.jitter.loader.pe.get_export_name_addr_list(extractor.pe):
+        # Only use func names and not ordinals
+        if isinstance(symbol, str):
+            yield Export(symbol), va
+
+
+def extract_file_import_names(extractor):
+    """
+    extract imported function names and their addresses
+    1. imports by ordinal:
+     - modulename.#ordinal
+    2. imports by name, results in two features to support importname-only matching:
+     - modulename.importname
+     - importname
+    """
+    for ((dll, symbol), va_set) in miasm.jitter.loader.pe.get_import_address_pe(extractor.pe).items():
+        dll_name = dll[:-4]  # Remove .dll
+        for va in va_set:
+            if isinstance(symbol, int):
+                yield Import("%s.#%s" % (dll_name, symbol)), va
+            else:
+                yield Import("%s.%s" % (dll_name, symbol)), va
+                yield Import(symbol), va
+
+
+def extract_file_section_names(extractor):
+    """
+    extract file sections and their addresses
+    """
+    for section in extractor.pe.SHList.shlist:
+        name = section.name.partition(b"\x00")[0].decode("ascii")
+        va = section.addr
+        yield Section(name), va
+
+
+def extract_file_strings(extractor):
+    """
+    extract ASCII and UTF-16 LE strings from file
+    """
+    for s in capa.features.extractors.strings.extract_ascii_strings(extractor.buf):
+        yield String(s.s), s.offset
+
+    for s in capa.features.extractors.strings.extract_unicode_strings(extractor.buf):
+        yield String(s.s), s.offset
+
+
+def extract_file_features(extractor):
+    """
+    extract file features from given buffer and parsed binary
+
+    args:
+      buf (bytes): binary content
+      container (miasm.analysis.binary.ContainerPE): parsed binary returned by miasm
+
+    yields:
+      Tuple[Feature, VA]: a feature and its location.
+    """
+    for file_handler in FILE_HANDLERS:
+        for feature, va in file_handler(extractor):
+            yield feature, va
+
+
+FILE_HANDLERS = (
+    extract_file_embedded_pe,
+    extract_file_export_names,
+    extract_file_import_names,
+    extract_file_section_names,
+    extract_file_strings,
+)
--- a/capa/features/extractors/miasm/function.py
+++ b/capa/features/extractors/miasm/function.py
@@ -0,0 +1,50 @@
+# Copyright (C) 2020 FireEye, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: https://github.com/fireeye/capa/blob/master/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+
+from capa.features import Characteristic
+
+
+def extract_function_calls_to(extractor, loc_key):
+    for pred_key in extractor.cfg.predecessors(loc_key):
+        pred_block = extractor.cfg.loc_key_to_block(pred_key)
+        pred_insn = pred_block.get_subcall_instr()
+        if pred_insn and pred_insn.is_subcall():
+            dst = pred_insn.args[0]
+            if dst.is_loc() and dst.loc_key == loc_key:
+                yield Characteristic("calls to"), pred_insn.offset
+
+
+def extract_function_loop(extractor, loc_key):
+    """
+    returns if the function has a loop
+    """
+    block = extractor.cfg.loc_key_to_block(loc_key)
+    disassembler = extractor.machine.dis_engine(
+        extractor.container.bin_stream, loc_db=extractor.loc_db, follow_call=False
+    )
+    offset = extractor.block_offset(block)
+    cfg = disassembler.dis_multiblock(offset)
+    if cfg.has_loop():
+        yield Characteristic("loop"), offset
+
+
+def extract_features(extractor, loc_key):
+    """
+    extract features from the given function.
+    args:
+      cfg (AsmCFG): the CFG of the function from which to extract features
+      loc_key (LocKey): LocKey which represents the beginning of the function
+    yields:
+      Feature, set[VA]: the features and their location found in this function.
+    """
+    for func_handler in FUNCTION_HANDLERS:
+        for feature, va in func_handler(extractor, loc_key):
+            yield feature, va
+
+
+FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop)
--- a/capa/features/extractors/miasm/insn.py
+++ b/capa/features/extractors/miasm/insn.py
@@ -0,0 +1,126 @@
+# Copyright (C) 2020 FireEye, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: https://github.com/fireeye/capa/blob/master/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+
+import miasm.expression.expression
+
+import capa.features.extractors.helpers
+from capa.features.insn import Mnemonic
+
+
+# TODO: remove duplication (similar code in file.py)
+# TODO: this function should be cached
+def get_imports(pe):
+    imports = {}
+    for ((dll, symbol), va_set) in miasm.jitter.loader.pe.get_import_address_pe(pe).items():
+        dll_name = dll[:-4]
+        for va in va_set:
+            if isinstance(symbol, int):
+                imports[va] = "%s.#%s" % (dll_name, symbol)
+            else:
+                imports[va] = "%s.%s" % (dll_name, symbol)
+    return imports
+
+
+def extract_insn_api_features(extractor, _f, _bb, insn):
+    """parse API features from the given instruction."""
+    if insn.is_subcall():
+        arg = insn.args[0]
+        if isinstance(arg, miasm.expression.expression.ExprMem) and isinstance(
+            arg.ptr, miasm.expression.expression.ExprInt
+        ):
+            target = int(arg.ptr)
+            imports = get_imports(extractor.pe)
+            if target in imports:
+                dll, _, symbol = imports[target].rpartition(".")
+                for feature in capa.features.extractors.helpers.generate_symbols(dll, symbol):
+                    yield feature, insn.offset
+
+
+def extract_insn_number_features(extractor, f, bb, insn):
+    """parse number features from the given instruction."""
+    raise NotImplementedError()
+
+
+def extract_insn_string_features(extractor, f, bb, insn):
+    """parse string features from the given instruction."""
+    raise NotImplementedError()
+
+
+def extract_insn_offset_features(extractor, f, bb, insn):
+    """parse structure offset features from the given instruction."""
+    raise NotImplementedError()
+
+
+def extract_insn_nzxor_characteristic_features(extractor, f, bb, insn):
+    """
+    parse non-zeroing XOR instruction from the given instruction.
+    ignore expected non-zeroing XORs, e.g. security cookies.
+    """
+    raise NotImplementedError()
+
+
+def extract_insn_mnemonic_features(extractor, f, bb, insn):
+    """parse mnemonic features from the given instruction."""
+    yield Mnemonic(insn.name), insn.offset
+
+
+def extract_insn_peb_access_characteristic_features(extractor, f, bb, insn):
+    """
+    parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64
+    """
+    raise NotImplementedError()
+
+
+def extract_insn_segment_access_features(extractor, f, bb, insn):
+    """ parse the instruction for access to fs or gs """
+    raise NotImplementedError()
+
+
+def extract_insn_cross_section_cflow(extractor, f, bb, insn):
+    """
+    inspect the instruction for a CALL or JMP that crosses section boundaries.
+    """
+    raise NotImplementedError()
+
+
+# this is a feature that's most relevant at the function scope,
+# however, its most efficient to extract at the instruction scope.
+def extract_function_calls_from(f, bb, insn):
+    raise NotImplementedError()
+
+
+def extract_features(extractor, f, bb, insn):
+    """
+    extract features from the given insn.
+    args:
+      extractor (MiasmFeatureExtractor)
+      f (miasm.expression.expression.LocKey): the function from which to extract features
+      bb (miasm.core.asmblock.AsmBlock): the basic block to process.
+      insn (Instruction): the instruction to process.
+    yields:
+      Feature, set[VA]: the features and their location found in this insn.
+    """
+    for insn_handler in INSTRUCTION_HANDLERS:
+        for feature, va in insn_handler(extractor, f, bb, insn):
+            yield feature, va
+
+
+INSTRUCTION_HANDLERS = (
+    extract_insn_api_features,
+    # extract_insn_number_features,
+    # extract_insn_string_features,
+    # extract_insn_bytes_features,
+    # extract_insn_offset_features,
+    # extract_insn_nzxor_characteristic_features,
+    extract_insn_mnemonic_features,
+    # extract_insn_peb_access_characteristic_features,
+    # extract_insn_cross_section_cflow,
+    # extract_insn_segment_access_features,
+    # extract_function_calls_from,
+    # extract_function_indirect_call_characteristic_features,
+)
--- a/capa/features/extractors/smda/init.py
+++ b/capa/features/extractors/smda/init.py
@@ -0,0 +1,52 @@
+import sys
+import types
+
+from smda.common.SmdaReport import SmdaReport
+from smda.common.SmdaInstruction import SmdaInstruction
+
+import capa.features.extractors.smda.file
+import capa.features.extractors.smda.insn
+import capa.features.extractors.smda.function
+import capa.features.extractors.smda.basicblock
+from capa.main import UnsupportedRuntimeError
+from capa.features.extractors import FeatureExtractor
+
+
+class SmdaFeatureExtractor(FeatureExtractor):
+    def __init__(self, smda_report: SmdaReport, path):
+        super(SmdaFeatureExtractor, self).__init__()
+        if sys.version_info < (3, 0):
+            raise UnsupportedRuntimeError("SMDA should only be used with Python 3.")
+        self.smda_report = smda_report
+        self.path = path
+
+    def get_base_address(self):
+        return self.smda_report.base_addr
+
+    def extract_file_features(self):
+        for feature, va in capa.features.extractors.smda.file.extract_features(self.smda_report, self.path):
+            yield feature, va
+
+    def get_functions(self):
+        for function in self.smda_report.getFunctions():
+            yield function
+
+    def extract_function_features(self, f):
+        for feature, va in capa.features.extractors.smda.function.extract_features(f):
+            yield feature, va
+
+    def get_basic_blocks(self, f):
+        for bb in f.getBlocks():
+            yield bb
+
+    def extract_basic_block_features(self, f, bb):
+        for feature, va in capa.features.extractors.smda.basicblock.extract_features(f, bb):
+            yield feature, va
+
+    def get_instructions(self, f, bb):
+        for smda_ins in bb.getInstructions():
+            yield smda_ins
+
+    def extract_insn_features(self, f, bb, insn):
+        for feature, va in capa.features.extractors.smda.insn.extract_features(f, bb, insn):
+            yield feature, va
--- a/capa/features/extractors/smda/basicblock.py
+++ b/capa/features/extractors/smda/basicblock.py
@@ -0,0 +1,131 @@
+import sys
+import string
+import struct
+
+from capa.features import Characteristic
+from capa.features.basicblock import BasicBlock
+from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
+
+
+def _bb_has_tight_loop(f, bb):
+    """
+    parse tight loops, true if last instruction in basic block branches to bb start
+    """
+    return bb.offset in f.blockrefs[bb.offset] if bb.offset in f.blockrefs else False
+
+
+def extract_bb_tight_loop(f, bb):
+    """ check basic block for tight loop indicators """
+    if _bb_has_tight_loop(f, bb):
+        yield Characteristic("tight loop"), bb.offset
+
+
+def _bb_has_stackstring(f, bb):
+    """
+    extract potential stackstring creation, using the following heuristics:
+      - basic block contains enough moves of constant bytes to the stack
+    """
+    count = 0
+    for instr in bb.getInstructions():
+        if is_mov_imm_to_stack(instr):
+            count += get_printable_len(instr.getDetailed())
+        if count > MIN_STACKSTRING_LEN:
+            return True
+    return False
+
+
+def get_operands(smda_ins):
+    return [o.strip() for o in smda_ins.operands.split(",")]
+
+
+def extract_stackstring(f, bb):
+    """ check basic block for stackstring indicators """
+    if _bb_has_stackstring(f, bb):
+        yield Characteristic("stack string"), bb.offset
+
+
+def is_mov_imm_to_stack(smda_ins):
+    """
+    Return if instruction moves immediate onto stack
+    """
+    if not smda_ins.mnemonic.startswith("mov"):
+        return False
+
+    try:
+        dst, src = get_operands(smda_ins)
+    except ValueError:
+        # not two operands
+        return False
+
+    try:
+        int(src, 16)
+    except ValueError:
+        return False
+
+    if not any(regname in dst for regname in ["ebp", "rbp", "esp", "rsp"]):
+        return False
+
+    return True
+
+
+def is_printable_ascii(chars):
+    return all(c < 127 and chr(c) in string.printable for c in chars)
+
+
+def is_printable_utf16le(chars):
+    if all(c == 0x00 for c in chars[1::2]):
+        return is_printable_ascii(chars[::2])
+
+
+def get_printable_len(instr):
+    """
+    Return string length if all operand bytes are ascii or utf16-le printable
+
+    Works on a capstone instruction
+    """
+    # should have exactly two operands for mov immediate
+    if len(instr.operands) != 2:
+        return 0
+
+    op_value = instr.operands[1].value.imm
+
+    if instr.imm_size == 1:
+        chars = struct.pack("<B", op_value & 0xFF)
+    elif instr.imm_size == 2:
+        chars = struct.pack("<H", op_value & 0xFFFF)
+    elif instr.imm_size == 4:
+        chars = struct.pack("<I", op_value & 0xFFFFFFFF)
+    elif instr.imm_size == 8:
+        chars = struct.pack("<Q", op_value & 0xFFFFFFFFFFFFFFFF)
+    else:
+        raise ValueError("Unhandled operand data type 0x%x." % instr.imm_size)
+
+    if is_printable_ascii(chars):
+        return instr.imm_size
+    if is_printable_utf16le(chars):
+        return instr.imm_size // 2
+
+    return 0
+
+
+def extract_features(f, bb):
+    """
+    extract features from the given basic block.
+
+    args:
+      f (smda.common.SmdaFunction): the function from which to extract features
+      bb (smda.common.SmdaBasicBlock): the basic block to process.
+
+    yields:
+      Feature, set[VA]: the features and their location found in this basic block.
+    """
+    yield BasicBlock(), bb.offset
+    for bb_handler in BASIC_BLOCK_HANDLERS:
+        for feature, va in bb_handler(f, bb):
+            yield feature, va
+
+
+BASIC_BLOCK_HANDLERS = (
+    extract_bb_tight_loop,
+    extract_stackstring,
+)
--- a/capa/features/extractors/smda/file.py
+++ b/capa/features/extractors/smda/file.py
@@ -0,0 +1,139 @@
+import struct
+
+# if we have SMDA we definitely have lief
+import lief
+
+import capa.features.extractors.helpers
+import capa.features.extractors.strings
+from capa.features import String, Characteristic
+from capa.features.file import Export, Import, Section
+
+
+def carve(pbytes, offset=0):
+    """
+    Return a list of (offset, size, xor) tuples of embedded PEs
+
+    Based on the version from vivisect:
+    https://github.com/vivisect/vivisect/blob/7be4037b1cecc4551b397f840405a1fc606f9b53/PE/carve.py#L19
+    And its IDA adaptation:
+    capa/features/extractors/ida/file.py
+    """
+    mz_xor = [
+        (
+            capa.features.extractors.helpers.xor_static(b"MZ", i),
+            capa.features.extractors.helpers.xor_static(b"PE", i),
+            i,
+        )
+        for i in range(256)
+    ]
+
+    pblen = len(pbytes)
+    todo = [(pbytes.find(mzx, offset), mzx, pex, i) for mzx, pex, i in mz_xor]
+    todo = [(off, mzx, pex, i) for (off, mzx, pex, i) in todo if off != -1]
+
+    while len(todo):
+
+        off, mzx, pex, i = todo.pop()
+
+        # The MZ header has one field we will check
+        # e_lfanew is at 0x3c
+        e_lfanew = off + 0x3C
+        if pblen < (e_lfanew + 4):
+            continue
+
+        newoff = struct.unpack("<I", capa.features.extractors.helpers.xor_static(pbytes[e_lfanew : e_lfanew + 4], i))[0]
+
+        nextres = pbytes.find(mzx, off + 1)
+        if nextres != -1:
+            todo.append((nextres, mzx, pex, i))
+
+        peoff = off + newoff
+        if pblen < (peoff + 2):
+            continue
+
+        if pbytes[peoff : peoff + 2] == pex:
+            yield (off, i)
+
+
+def extract_file_embedded_pe(smda_report, file_path):
+    with open(file_path, "rb") as f:
+        fbytes = f.read()
+
+    for offset, i in carve(fbytes, 1):
+        yield Characteristic("embedded pe"), offset
+
+
+def extract_file_export_names(smda_report, file_path):
+    lief_binary = lief.parse(file_path)
+    if lief_binary is not None:
+        for function in lief_binary.exported_functions:
+            yield Export(function.name), function.address
+
+
+def extract_file_import_names(smda_report, file_path):
+    # extract import table info via LIEF
+    lief_binary = lief.parse(file_path)
+    if not isinstance(lief_binary, lief.PE.Binary):
+        return
+    for imported_library in lief_binary.imports:
+        library_name = imported_library.name.lower()
+        library_name = library_name[:-4] if library_name.endswith(".dll") else library_name
+        for func in imported_library.entries:
+            if func.name:
+                va = func.iat_address + smda_report.base_addr
+                for name in capa.features.extractors.helpers.generate_symbols(library_name, func.name):
+                    yield Import(name), va
+            elif func.is_ordinal:
+                for name in capa.features.extractors.helpers.generate_symbols(library_name, "#%s" % func.ordinal):
+                    yield Import(name), va
+
+
+def extract_file_section_names(smda_report, file_path):
+    lief_binary = lief.parse(file_path)
+    if not isinstance(lief_binary, lief.PE.Binary):
+        return
+    if lief_binary and lief_binary.sections:
+        base_address = lief_binary.optional_header.imagebase
+        for section in lief_binary.sections:
+            yield Section(section.name), base_address + section.virtual_address
+
+
+def extract_file_strings(smda_report, file_path):
+    """
+    extract ASCII and UTF-16 LE strings from file
+    """
+    with open(file_path, "rb") as f:
+        b = f.read()
+
+    for s in capa.features.extractors.strings.extract_ascii_strings(b):
+        yield String(s.s), s.offset
+
+    for s in capa.features.extractors.strings.extract_unicode_strings(b):
+        yield String(s.s), s.offset
+
+
+def extract_features(smda_report, file_path):
+    """
+    extract file features from given workspace
+
+    args:
+      smda_report (smda.common.SmdaReport): a SmdaReport
+      file_path: path to the input file
+
+    yields:
+      Tuple[Feature, VA]: a feature and its location.
+    """
+
+    for file_handler in FILE_HANDLERS:
+        result = file_handler(smda_report, file_path)
+        for feature, va in file_handler(smda_report, file_path):
+            yield feature, va
+
+
+FILE_HANDLERS = (
+    extract_file_embedded_pe,
+    extract_file_export_names,
+    extract_file_import_names,
+    extract_file_section_names,
+    extract_file_strings,
+)
--- a/capa/features/extractors/smda/function.py
+++ b/capa/features/extractors/smda/function.py
@@ -0,0 +1,38 @@
+from capa.features import Characteristic
+from capa.features.extractors import loops
+
+
+def extract_function_calls_to(f):
+    for inref in f.inrefs:
+        yield Characteristic("calls to"), inref
+
+
+def extract_function_loop(f):
+    """
+    parse if a function has a loop
+    """
+    edges = []
+    for bb_from, bb_tos in f.blockrefs.items():
+        for bb_to in bb_tos:
+            edges.append((bb_from, bb_to))
+
+    if edges and loops.has_loop(edges):
+        yield Characteristic("loop"), f.offset
+
+
+def extract_features(f):
+    """
+    extract features from the given function.
+
+    args:
+      f (smda.common.SmdaFunction): the function from which to extract features
+
+    yields:
+      Feature, set[VA]: the features and their location found in this function.
+    """
+    for func_handler in FUNCTION_HANDLERS:
+        for feature, va in func_handler(f):
+            yield feature, va
+
+
+FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop)
--- a/capa/features/extractors/smda/insn.py
+++ b/capa/features/extractors/smda/insn.py
@@ -0,0 +1,393 @@
+import re
+import string
+import struct
+
+from smda.common.SmdaReport import SmdaReport
+
+import capa.features.extractors.helpers
+from capa.features import (
+    ARCH_X32,
+    ARCH_X64,
+    MAX_BYTES_FEATURE_SIZE,
+    THUNK_CHAIN_DEPTH_DELTA,
+    Bytes,
+    String,
+    Characteristic,
+)
+from capa.features.insn import API, Number, Offset, Mnemonic
+
+# security cookie checks may perform non-zeroing XORs, these are expected within a certain
+# byte range within the first and returning basic blocks, this helps to reduce FP features
+SECURITY_COOKIE_BYTES_DELTA = 0x40
+PATTERN_HEXNUM = re.compile(r"[+\-] (?P<num>0x[a-fA-F0-9]+)")
+PATTERN_SINGLENUM = re.compile(r"[+\-] (?P<num>[0-9])")
+
+
+def get_arch(smda_report):
+    if smda_report.architecture == "intel":
+        if smda_report.bitness == 32:
+            return ARCH_X32
+        elif smda_report.bitness == 64:
+            return ARCH_X64
+    else:
+        raise NotImplementedError
+
+
+def extract_insn_api_features(f, bb, insn):
+    """parse API features from the given instruction."""
+    if insn.offset in f.apirefs:
+        api_entry = f.apirefs[insn.offset]
+        # reformat
+        dll_name, api_name = api_entry.split("!")
+        dll_name = dll_name.split(".")[0]
+        dll_name = dll_name.lower()
+        for name in capa.features.extractors.helpers.generate_symbols(dll_name, api_name):
+            yield API(name), insn.offset
+    elif insn.offset in f.outrefs:
+        current_function = f
+        current_instruction = insn
+        for index in range(THUNK_CHAIN_DEPTH_DELTA):
+            if current_function and len(current_function.outrefs[current_instruction.offset]) == 1:
+                target = current_function.outrefs[current_instruction.offset][0]
+                referenced_function = current_function.smda_report.getFunction(target)
+                if referenced_function:
+                    # TODO SMDA: implement this function for both jmp and call, checking if function has 1 instruction which refs an API
+                    if referenced_function.isApiThunk():
+                        api_entry = (
+                            referenced_function.apirefs[target] if target in referenced_function.apirefs else None
+                        )
+                        if api_entry:
+                            # reformat
+                            dll_name, api_name = api_entry.split("!")
+                            dll_name = dll_name.split(".")[0]
+                            dll_name = dll_name.lower()
+                            for name in capa.features.extractors.helpers.generate_symbols(dll_name, api_name):
+                                yield API(name), insn.offset
+                    elif referenced_function.num_instructions == 1 and referenced_function.num_outrefs == 1:
+                        current_function = referenced_function
+                        current_instruction = [i for i in referenced_function.getInstructions()][0]
+                else:
+                    return
+
+
+def extract_insn_number_features(f, bb, insn):
+    """parse number features from the given instruction."""
+    # example:
+    #
+    #     push    3136B0h         ; dwControlCode
+    operands = [o.strip() for o in insn.operands.split(",")]
+    if insn.mnemonic == "add" and operands[0] in ["esp", "rsp"]:
+        # skip things like:
+        #
+        #    .text:00401140                 call    sub_407E2B
+        #    .text:00401145                 add     esp, 0Ch
+        return
+    for operand in operands:
+        try:
+            yield Number(int(operand, 16)), insn.offset
+            yield Number(int(operand, 16), arch=get_arch(f.smda_report)), insn.offset
+        except:
+            continue
+
+
+def read_bytes(smda_report, va, num_bytes=None):
+    """
+    read up to MAX_BYTES_FEATURE_SIZE from the given address.
+    """
+
+    rva = va - smda_report.base_addr
+    if smda_report.buffer is None:
+        return
+    buffer_end = len(smda_report.buffer)
+    max_bytes = num_bytes if num_bytes is not None else MAX_BYTES_FEATURE_SIZE
+    if rva + max_bytes > buffer_end:
+        return smda_report.buffer[rva:]
+    else:
+        return smda_report.buffer[rva : rva + max_bytes]
+
+
+def derefs(smda_report, p):
+    """
+    recursively follow the given pointer, yielding the valid memory addresses along the way.
+    useful when you may have a pointer to string, or pointer to pointer to string, etc.
+
+    this is a "do what i mean" type of helper function.
+
+    based on the implementation in viv/insn.py
+    """
+    depth = 0
+    while True:
+        if not smda_report.isAddrWithinMemoryImage(p):
+            return
+        yield p
+
+        bytes_ = read_bytes(smda_report, p, num_bytes=4)
+        val = struct.unpack("I", bytes_)[0]
+
+        # sanity: pointer points to self
+        if val == p:
+            return
+
+        # sanity: avoid chains of pointers that are unreasonably deep
+        depth += 1
+        if depth > 10:
+            return
+
+        p = val
+
+
+def extract_insn_bytes_features(f, bb, insn):
+    """
+    parse byte sequence features from the given instruction.
+    example:
+        #     push    offset iid_004118d4_IShellLinkA ; riid
+    """
+    for data_ref in insn.getDataRefs():
+        for v in derefs(f.smda_report, data_ref):
+            bytes_read = read_bytes(f.smda_report, v)
+            if bytes_read is None:
+                continue
+            if capa.features.extractors.helpers.all_zeros(bytes_read):
+                continue
+
+            yield Bytes(bytes_read), insn.offset
+
+
+def detect_ascii_len(smda_report, offset):
+    if smda_report.buffer is None:
+        return 0
+    ascii_len = 0
+    rva = offset - smda_report.base_addr
+    char = smda_report.buffer[rva]
+    while char < 127 and chr(char) in string.printable:
+        ascii_len += 1
+        rva += 1
+        char = smda_report.buffer[rva]
+    if char == 0:
+        return ascii_len
+    return 0
+
+
+def detect_unicode_len(smda_report, offset):
+    if smda_report.buffer is None:
+        return 0
+    unicode_len = 0
+    rva = offset - smda_report.base_addr
+    char = smda_report.buffer[rva]
+    second_char = smda_report.buffer[rva + 1]
+    while char < 127 and chr(char) in string.printable and second_char == 0:
+        unicode_len += 2
+        rva += 2
+        char = smda_report.buffer[rva]
+        second_char = smda_report.buffer[rva + 1]
+    if char == 0 and second_char == 0:
+        return unicode_len
+    return 0
+
+
+def read_string(smda_report, offset):
+    alen = detect_ascii_len(smda_report, offset)
+    if alen > 1:
+        return read_bytes(smda_report, offset, alen).decode("utf-8")
+    ulen = detect_unicode_len(smda_report, offset)
+    if ulen > 2:
+        return read_bytes(smda_report, offset, ulen).decode("utf-16")
+
+
+def extract_insn_string_features(f, bb, insn):
+    """parse string features from the given instruction."""
+    # example:
+    #
+    #     push    offset aAcr     ; "ACR  > "
+    for data_ref in insn.getDataRefs():
+        for v in derefs(f.smda_report, data_ref):
+            string_read = read_string(f.smda_report, v)
+            if string_read:
+                yield String(string_read.rstrip("\x00")), insn.offset
+
+
+def extract_insn_offset_features(f, bb, insn):
+    """parse structure offset features from the given instruction."""
+    # examples:
+    #
+    #     mov eax, [esi + 4]
+    #     mov eax, [esi + ecx + 16384]
+    operands = [o.strip() for o in insn.operands.split(",")]
+    for operand in operands:
+        if not "ptr" in operand:
+            continue
+        if "esp" in operand or "ebp" in operand or "rbp" in operand:
+            continue
+        number = 0
+        number_hex = re.search(PATTERN_HEXNUM, operand)
+        number_int = re.search(PATTERN_SINGLENUM, operand)
+        if number_hex:
+            number = int(number_hex.group("num"), 16)
+            number = -1 * number if number_hex.group().startswith("-") else number
+        elif number_int:
+            number = int(number_int.group("num"))
+            number = -1 * number if number_int.group().startswith("-") else number
+        yield Offset(number), insn.offset
+        yield Offset(number, arch=get_arch(f.smda_report)), insn.offset
+
+
+def is_security_cookie(f, bb, insn):
+    """
+    check if an instruction is related to security cookie checks
+    """
+    # security cookie check should use SP or BP
+    operands = [o.strip() for o in insn.operands.split(",")]
+    if operands[1] not in ["esp", "ebp", "rsp", "rbp"]:
+        return False
+    for index, block in enumerate(f.getBlocks()):
+        # expect security cookie init in first basic block within first bytes (instructions)
+        block_instructions = [i for i in block.getInstructions()]
+        if index == 0 and insn.offset < (block_instructions[0].offset + SECURITY_COOKIE_BYTES_DELTA):
+            return True
+        # ... or within last bytes (instructions) before a return
+        if block_instructions[-1].mnemonic.startswith("ret") and insn.offset > (
+            block_instructions[-1].offset - SECURITY_COOKIE_BYTES_DELTA
+        ):
+            return True
+    return False
+
+
+def extract_insn_nzxor_characteristic_features(f, bb, insn):
+    """
+    parse non-zeroing XOR instruction from the given instruction.
+    ignore expected non-zeroing XORs, e.g. security cookies.
+    """
+
+    if insn.mnemonic not in ("xor", "xorpd", "xorps", "pxor"):
+        return
+
+    operands = [o.strip() for o in insn.operands.split(",")]
+    if operands[0] == operands[1]:
+        return
+
+    if is_security_cookie(f, bb, insn):
+        return
+
+    yield Characteristic("nzxor"), insn.offset
+
+
+def extract_insn_mnemonic_features(f, bb, insn):
+    """parse mnemonic features from the given instruction."""
+    yield Mnemonic(insn.mnemonic), insn.offset
+
+
+def extract_insn_peb_access_characteristic_features(f, bb, insn):
+    """
+    parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64
+    """
+
+    if insn.mnemonic not in ["push", "mov"]:
+        return
+
+    operands = [o.strip() for o in insn.operands.split(",")]
+    for operand in operands:
+        if "fs:" in operand and "0x30" in operand:
+            yield Characteristic("peb access"), insn.offset
+        elif "gs:" in operand and "0x60" in operand:
+            yield Characteristic("peb access"), insn.offset
+
+
+def extract_insn_segment_access_features(f, bb, insn):
+    """ parse the instruction for access to fs or gs """
+    operands = [o.strip() for o in insn.operands.split(",")]
+    for operand in operands:
+        if "fs:" in operand:
+            yield Characteristic("fs access"), insn.offset
+        elif "gs:" in operand:
+            yield Characteristic("gs access"), insn.offset
+
+
+def extract_insn_cross_section_cflow(f, bb, insn):
+    """
+    inspect the instruction for a CALL or JMP that crosses section boundaries.
+    """
+    if insn.mnemonic in ["call", "jmp"]:
+        if insn.offset in f.apirefs:
+            return
+
+        smda_report = insn.smda_function.smda_report
+        if insn.offset in f.outrefs:
+            for target in f.outrefs[insn.offset]:
+                if smda_report.getSection(insn.offset) != smda_report.getSection(target):
+                    yield Characteristic("cross section flow"), insn.offset
+        elif insn.operands.startswith("0x"):
+            target = int(insn.operands, 16)
+            if smda_report.getSection(insn.offset) != smda_report.getSection(target):
+                yield Characteristic("cross section flow"), insn.offset
+
+
+# this is a feature that's most relevant at the function scope,
+# however, its most efficient to extract at the instruction scope.
+def extract_function_calls_from(f, bb, insn):
+    if insn.mnemonic != "call":
+        return
+
+    if insn.offset in f.outrefs:
+        for outref in f.outrefs[insn.offset]:
+            yield Characteristic("calls from"), outref
+
+            if outref == f.offset:
+                # if we found a jump target and it's the function address
+                # mark as recursive
+                yield Characteristic("recursive call"), outref
+    if insn.offset in f.apirefs:
+        yield Characteristic("calls from"), f.apirefs[insn.offset]
+
+
+# this is a feature that's most relevant at the function or basic block scope,
+# however, its most efficient to extract at the instruction scope.
+def extract_function_indirect_call_characteristic_features(f, bb, insn):
+    """
+    extract indirect function call characteristic (e.g., call eax or call dword ptr [edx+4])
+    does not include calls like => call ds:dword_ABD4974
+    """
+    if insn.mnemonic != "call":
+        return
+    if insn.operands.startswith("0x"):
+        return False
+    if "qword ptr" in insn.operands and "rip" in insn.operands:
+        return False
+    if insn.operands.startswith("dword ptr [0x"):
+        return False
+    # call edx
+    # call dword ptr [eax+50h]
+    # call qword ptr [rsp+78h]
+    yield Characteristic("indirect call"), insn.offset
+
+
+def extract_features(f, bb, insn):
+    """
+    extract features from the given insn.
+
+    args:
+      f (smda.common.SmdaFunction): the function to process.
+      bb (smda.common.SmdaBasicBlock): the basic block to process.
+      insn (smda.common.SmdaInstruction): the instruction to process.
+
+    yields:
+      Feature, set[VA]: the features and their location found in this insn.
+    """
+    for insn_handler in INSTRUCTION_HANDLERS:
+        for feature, va in insn_handler(f, bb, insn):
+            yield feature, va
+
+
+INSTRUCTION_HANDLERS = (
+    extract_insn_api_features,
+    extract_insn_number_features,
+    extract_insn_string_features,
+    extract_insn_bytes_features,
+    extract_insn_offset_features,
+    extract_insn_nzxor_characteristic_features,
+    extract_insn_mnemonic_features,
+    extract_insn_peb_access_characteristic_features,
+    extract_insn_cross_section_cflow,
+    extract_insn_segment_access_features,
+    extract_function_calls_from,
+    extract_function_indirect_call_characteristic_features,
+)
--- a/capa/features/extractors/viv/file.py
+++ b/capa/features/extractors/viv/file.py
@@ -8,6 +8,7 @@

 import PE.carve as pe_carve  # vivisect PE

+import capa.features.extractors.helpers
 import capa.features.extractors.strings
 from capa.features import String, Characteristic
 from capa.features.file import Export, Import, Section
@@ -41,11 +42,9 @@ def extract_file_import_names(vw, file_path):
        if is_viv_ord_impname(impname):
            # replace ord prefix with #
            impname = "#%s" % impname[len("ord") :]
-            tinfo = "%s.%s" % (modname, impname)
-            yield Import(tinfo), va
-        else:
-            yield Import(tinfo), va
-            yield Import(impname), va
+
+        for name in capa.features.extractors.helpers.generate_symbols(modname, impname):
+            yield Import(name), va


 def is_viv_ord_impname(impname):
--- a/capa/features/extractors/viv/function.py
+++ b/capa/features/extractors/viv/function.py
@@ -25,45 +25,6 @@ def interface_extract_function_XXX(f):
    yield NotImplementedError("feature"), NotImplementedError("virtual address")


-def get_switches(vw):
-    """
-    caching accessor to vivisect workspace switch constructs.
-    """
-    if "switches" in vw.metadata:
-        return vw.metadata["switches"]
-    else:
-        # addresses of switches in the program
-        switches = set()
-
-        for case_va, _ in filter(lambda t: "case" in t[1], vw.getNames()):
-            # assume that the xref to a case location is a switch construct
-            for switch_va, _, _, _ in vw.getXrefsTo(case_va):
-                switches.add(switch_va)
-
-        vw.metadata["switches"] = switches
-        return switches
-
-
-def get_functions_with_switch(vw):
-    if "functions_with_switch" in vw.metadata:
-        return vw.metadata["functions_with_switch"]
-    else:
-        functions = set()
-        for switch in get_switches(vw):
-            functions.add(vw.getFunction(switch))
-        vw.metadata["functions_with_switch"] = functions
-        return functions
-
-
-def extract_function_switch(f):
-    """
-    parse if a function contains a switch statement based on location names
-    method can be optimized
-    """
-    if f.va in get_functions_with_switch(f.vw):
-        yield Characteristic("switch"), f.va
-
-
 def extract_function_calls_to(f):
    for src, _, _, _ in f.vw.getXrefsTo(f.va, rtype=vivisect.const.REF_CODE):
        yield Characteristic("calls to"), src
@@ -106,4 +67,4 @@ def extract_features(f):
            yield feature, va


-FUNCTION_HANDLERS = (extract_function_switch, extract_function_calls_to, extract_function_loop)
+FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop)
--- a/capa/features/extractors/viv/helpers.py
+++ b/capa/features/extractors/viv/helpers.py
@@ -0,0 +1,20 @@
+# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+from vivisect.const import XR_TO, REF_CODE
+
+
+def get_coderef_from(vw, va):
+    """
+    return first code `tova` whose origin is the specified va
+    return None if no code reference is found
+    """
+    xrefs = vw.getXrefsFrom(va, REF_CODE)
+    if len(xrefs) > 0:
+        return xrefs[0][XR_TO]
+    else:
+        return None
--- a/capa/features/extractors/viv/indirect_calls.py
+++ b/capa/features/extractors/viv/indirect_calls.py
@@ -132,7 +132,7 @@ def is_indirect_call(vw, va, insn=None):
    if insn is None:
        insn = vw.parseOpcode(va)

-    return insn.mnem == "call" and isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper)
+    return insn.mnem in ("call", "jmp") and isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper)


 def resolve_indirect_call(vw, va, insn=None):
--- a/capa/features/extractors/viv/insn.py
+++ b/capa/features/extractors/viv/insn.py
@@ -7,12 +7,20 @@
 # See the License for the specific language governing permissions and limitations under the License.

 import envi.memory
-import vivisect.const
 import envi.archs.i386.disasm

 import capa.features.extractors.helpers
-from capa.features import ARCH_X32, ARCH_X64, MAX_BYTES_FEATURE_SIZE, Bytes, String, Characteristic
-from capa.features.insn import Number, Offset, Mnemonic
+import capa.features.extractors.viv.helpers
+from capa.features import (
+    ARCH_X32,
+    ARCH_X64,
+    MAX_BYTES_FEATURE_SIZE,
+    THUNK_CHAIN_DEPTH_DELTA,
+    Bytes,
+    String,
+    Characteristic,
+)
+from capa.features.insn import API, Number, Offset, Mnemonic
 from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_indirect_call

 # security cookie checks may perform non-zeroing XORs, these are expected within a certain
@@ -47,11 +55,15 @@ def get_imports(vw):
    """
    caching accessor to vivisect workspace imports
    avoids performance issues in vivisect when collecting locations
+
+    returns: Dict[int, Tuple[str, str]]
    """
    if "imports" in vw.metadata:
        return vw.metadata["imports"]
    else:
-        imports = {p[0]: p[3] for p in vw.getImports()}
+        imports = {
+            p[0]: (p[3].rpartition(".")[0], p[3].replace(".ord", ".#").rpartition(".")[2]) for p in vw.getImports()
+        }
        vw.metadata["imports"] = imports
        return imports

@@ -63,35 +75,51 @@ def extract_insn_api_features(f, bb, insn):
    #
    #    call dword [0x00473038]

-    if insn.mnem != "call":
+    if insn.mnem not in ("call", "jmp"):
        return

+    if insn.mnem == "jmp":
+        if f.vw.getFunctionMeta(f.va, "Thunk"):
+            return
+
    # traditional call via IAT
    if isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper):
        oper = insn.opers[0]
        target = oper.getOperAddr(insn)

        imports = get_imports(f.vw)
-        if target in imports.keys():
-            for feature, va in capa.features.extractors.helpers.generate_api_features(imports[target], insn.va):
-                yield feature, va
+        if target in imports:
+            dll, symbol = imports[target]
+            for name in capa.features.extractors.helpers.generate_symbols(dll, symbol):
+                yield API(name), insn.va

    # call via thunk on x86,
    # see 9324d1a8ae37a36ae560c37448c9705a at 0x407985
    #
    # this is also how calls to internal functions may be decoded on x64.
    # see Lab21-01.exe_:0x140001178
-    elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper):
-        target = insn.opers[0].getOperValue(insn)
+    #
+    # follow chained thunks, e.g. in 82bf6347acf15e5d883715dc289d8a2b at 0x14005E0FF in
+    # 0x140059342 (viv) / 0x14005E0C0 (IDA)
+    # 14005E0FF call    j_ElfClearEventLogFileW (14005AAF8)
+    #   14005AAF8 jmp     ElfClearEventLogFileW (14005E196)
+    #     14005E196 jmp     cs:__imp_ElfClearEventLogFileW

-        try:
-            thunk = f.vw.getFunctionMeta(target, "Thunk")
-        except vivisect.exc.InvalidFunction:
+    elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper):
+        imports = get_imports(f.vw)
+        target = capa.features.extractors.viv.helpers.get_coderef_from(f.vw, insn.va)
+        if not target:
            return
-        else:
-            if thunk:
-                for feature, va in capa.features.extractors.helpers.generate_api_features(thunk, insn.va):
-                    yield feature, va
+
+        for _ in range(THUNK_CHAIN_DEPTH_DELTA):
+            if target in imports:
+                dll, symbol = imports[target]
+                for name in capa.features.extractors.helpers.generate_symbols(dll, symbol):
+                    yield API(name), insn.va
+
+            target = capa.features.extractors.viv.helpers.get_coderef_from(f.vw, target)
+            if not target:
+                return

    # call via import on x64
    # see Lab21-01.exe_:0x14000118C
@@ -100,9 +128,10 @@ def extract_insn_api_features(f, bb, insn):
        target = op.getOperAddr(insn)

        imports = get_imports(f.vw)
-        if target in imports.keys():
-            for feature, va in capa.features.extractors.helpers.generate_api_features(imports[target], insn.va):
-                yield feature, va
+        if target in imports:
+            dll, symbol = imports[target]
+            for name in capa.features.extractors.helpers.generate_symbols(dll, symbol):
+                yield API(name), insn.va

    elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper):
        try:
@@ -116,9 +145,10 @@ def extract_insn_api_features(f, bb, insn):
            return

        imports = get_imports(f.vw)
-        if target in imports.keys():
-            for feature, va in capa.features.extractors.helpers.generate_api_features(imports[target], insn.va):
-                yield feature, va
+        if target in imports:
+            dll, symbol = imports[target]
+            for name in capa.features.extractors.helpers.generate_symbols(dll, symbol):
+                yield API(name), insn.va


 def extract_insn_number_features(f, bb, insn):
@@ -128,10 +158,13 @@ def extract_insn_number_features(f, bb, insn):
    #     push    3136B0h         ; dwControlCode
    for oper in insn.opers:
        # this is for both x32 and x64
-        if not isinstance(oper, envi.archs.i386.disasm.i386ImmOper):
+        if not isinstance(oper, (envi.archs.i386.disasm.i386ImmOper, envi.archs.i386.disasm.i386ImmMemOper)):
            continue

-        v = oper.getOperValue(oper)
+        if isinstance(oper, envi.archs.i386.disasm.i386ImmOper):
+            v = oper.getOperValue(oper)
+        else:
+            v = oper.getOperAddr(oper)

        if f.vw.probeMemory(v, 1, envi.memory.MM_READ):
            # this is a valid address
@@ -162,7 +195,12 @@ def derefs(vw, p):
            return
        yield p

-        next = vw.readMemoryPtr(p)
+        try:
+            next = vw.readMemoryPtr(p)
+        except Exception:
+            # if not enough bytes can be read, such as end of the section.
+            # unfortunately, viv returns a plain old generic `Exception` for this.
+            return

        # sanity: pointer points to self
        if next == p:
@@ -220,10 +258,10 @@ def extract_insn_bytes_features(f, bb, insn):
    example:
        #     push    offset iid_004118d4_IShellLinkA ; riid
    """
-    for oper in insn.opers:
-        if insn.mnem == "call":
-            continue
+    if insn.mnem == "call":
+        return

+    for oper in insn.opers:
        if isinstance(oper, envi.archs.i386.disasm.i386ImmOper):
            v = oper.getOperValue(oper)
        elif isinstance(oper, envi.archs.i386.disasm.i386RegMemOper):
@@ -273,6 +311,10 @@ def read_string(vw, offset):
                # vivisect seems to mis-detect the end unicode strings
                # off by one, too short
                ulen += 1
+            else:
+                # vivisect seems to mis-detect the end unicode strings
+                # off by two, too short
+                ulen += 2
            return read_memory(vw, offset, ulen).decode("utf-16")

    raise ValueError("not a string", offset)
@@ -287,6 +329,9 @@ def extract_insn_string_features(f, bb, insn):
    for oper in insn.opers:
        if isinstance(oper, envi.archs.i386.disasm.i386ImmOper):
            v = oper.getOperValue(oper)
+        elif isinstance(oper, envi.archs.i386.disasm.i386ImmMemOper):
+            # like 0x10056CB4 in `lea eax, dword [0x10056CB4]`
+            v = oper.imm
        elif isinstance(oper, envi.archs.i386.disasm.i386SibOper):
            # like 0x401000 in `mov eax, 0x401000[2 * ebx]`
            v = oper.imm
@@ -310,25 +355,38 @@ def extract_insn_offset_features(f, bb, insn):
    #
    #     .text:0040112F    cmp     [esi+4], ebx
    for oper in insn.opers:
+
        # this is for both x32 and x64
-        if not isinstance(oper, envi.archs.i386.disasm.i386RegMemOper):
-            continue
+        # like [esi + 4]
+        #       reg   ^
+        #             disp
+        if isinstance(oper, envi.archs.i386.disasm.i386RegMemOper):
+            if oper.reg == envi.archs.i386.disasm.REG_ESP:
+                continue

-        if oper.reg == envi.archs.i386.disasm.REG_ESP:
-            continue
+            if oper.reg == envi.archs.i386.disasm.REG_EBP:
+                continue

-        if oper.reg == envi.archs.i386.disasm.REG_EBP:
-            continue
+            # TODO: do x64 support for real.
+            if oper.reg == envi.archs.amd64.disasm.REG_RBP:
+                continue

-        # TODO: do x64 support for real.
-        if oper.reg == envi.archs.amd64.disasm.REG_RBP:
-            continue
+            # viv already decodes offsets as signed
+            v = oper.disp

-        # viv already decodes offsets as signed
-        v = oper.disp
+            yield Offset(v), insn.va
+            yield Offset(v, arch=get_arch(f.vw)), insn.va

-        yield Offset(v), insn.va
-        yield Offset(v, arch=get_arch(f.vw)), insn.va
+        # like: [esi + ecx + 16384]
+        #        reg   ^     ^
+        #              index ^
+        #                    disp
+        elif isinstance(oper, envi.archs.i386.disasm.i386SibOper):
+            # viv already decodes offsets as signed
+            v = oper.disp
+
+            yield Offset(v), insn.va
+            yield Offset(v, arch=get_arch(f.vw)), insn.va


 def is_security_cookie(f, bb, insn):
@@ -364,7 +422,7 @@ def extract_insn_nzxor_characteristic_features(f, bb, insn):
    parse non-zeroing XOR instruction from the given instruction.
    ignore expected non-zeroing XORs, e.g. security cookies.
    """
-    if insn.mnem != "xor":
+    if insn.mnem not in ("xor", "xorpd", "xorps", "pxor"):
        return

    if insn.opers[0] == insn.opers[1]:
@@ -390,7 +448,9 @@ def extract_insn_peb_access_characteristic_features(f, bb, insn):
    if insn.mnem not in ["push", "mov"]:
        return

-    if "fs" in insn.getPrefixName():
+    prefix = insn.getPrefixName()
+
+    if "fs" in prefix:
        for oper in insn.opers:
            # examples
            #
@@ -403,10 +463,12 @@ def extract_insn_peb_access_characteristic_features(f, bb, insn):
                isinstance(oper, envi.archs.i386.disasm.i386ImmMemOper) and oper.imm == 0x30
            ):
                yield Characteristic("peb access"), insn.va
-    elif "gs" in insn.getPrefixName():
+    elif "gs" in prefix:
        for oper in insn.opers:
-            if (isinstance(oper, envi.archs.amd64.disasm.i386RegMemOper) and oper.disp == 0x60) or (
-                isinstance(oper, envi.archs.amd64.disasm.i386ImmMemOper) and oper.imm == 0x60
+            if (
+                (isinstance(oper, envi.archs.amd64.disasm.i386RegMemOper) and oper.disp == 0x60)
+                or (isinstance(oper, envi.archs.amd64.disasm.i386SibOper) and oper.imm == 0x60)
+                or (isinstance(oper, envi.archs.amd64.disasm.i386ImmMemOper) and oper.imm == 0x60)
            ):
                yield Characteristic("peb access"), insn.va
    else:
--- a/capa/features/freeze.py
+++ b/capa/features/freeze.py
@@ -5,6 +5,7 @@ json format:

    {
      'version': 1,
+      'base address': int(base address),
      'functions': {
        int(function va): {
          'basic blocks': {
@@ -84,7 +85,17 @@ def dumps(extractor):
    returns:
      str: the serialized features.
    """
-    ret = {"version": 1, "functions": {}, "scopes": {"file": [], "function": [], "basic block": [], "instruction": [],}}
+    ret = {
+        "version": 1,
+        "base address": extractor.get_base_address(),
+        "functions": {},
+        "scopes": {
+            "file": [],
+            "function": [],
+            "basic block": [],
+            "instruction": [],
+        },
+    }

    for feature, va in extractor.extract_file_features():
        ret["scopes"]["file"].append(serialize_feature(feature) + (hex(va), ()))
@@ -99,14 +110,33 @@ def dumps(extractor):
            ret["functions"][hex(f)][hex(bb)] = []

            for feature, va in extractor.extract_basic_block_features(f, bb):
-                ret["scopes"]["basic block"].append(serialize_feature(feature) + (hex(va), (hex(f), hex(bb),)))
+                ret["scopes"]["basic block"].append(
+                    serialize_feature(feature)
+                    + (
+                        hex(va),
+                        (
+                            hex(f),
+                            hex(bb),
+                        ),
+                    )
+                )

-            for insn, insnva in sorted([(insn, int(insn)) for insn in extractor.get_instructions(f, bb)]):
+            for insnva, insn in sorted(
+                [(insn.__int__(), insn) for insn in extractor.get_instructions(f, bb)], key=lambda p: p[0]
+            ):
                ret["functions"][hex(f)][hex(bb)].append(hex(insnva))

                for feature, va in extractor.extract_insn_features(f, bb, insn):
                    ret["scopes"]["instruction"].append(
-                        serialize_feature(feature) + (hex(va), (hex(f), hex(bb), hex(insnva),))
+                        serialize_feature(feature)
+                        + (
+                            hex(va),
+                            (
+                                hex(f),
+                                hex(bb),
+                                hex(insnva),
+                            ),
+                        )
                    )
    return json.dumps(ret)

@@ -119,6 +149,7 @@ def loads(s):
        raise ValueError("unsupported freeze format version: %d" % (doc.get("version")))

    features = {
+        "base address": doc.get("base address"),
        "file features": [],
        "functions": {},
    }
@@ -245,12 +276,7 @@ def main(argv=None):
        logging.basicConfig(level=logging.INFO)
        logging.getLogger().setLevel(logging.INFO)

-    vw = capa.main.get_workspace(args.sample, args.format)
-
-    # don't import this at top level to support ida/py3 backend
-    import capa.features.extractors.viv
-
-    extractor = capa.features.extractors.viv.VivisectFeatureExtractor(vw, args.sample)
+    extractor = capa.main.get_extractor(args.sample, args.format)
    with open(args.output, "wb") as f:
        f.write(dump(extractor))

--- a/capa/features/insn.py
+++ b/capa/features/insn.py
@@ -13,10 +13,10 @@ class API(Feature):
    def __init__(self, name, description=None):
        # Downcase library name if given
        if "." in name:
-            modname, impname = name.split(".")
+            modname, _, impname = name.rpartition(".")
            name = modname.lower() + "." + impname

-        super(API, self).__init__(name, description)
+        super(API, self).__init__(name, description=description)


 class Number(Feature):
@@ -37,4 +37,4 @@ class Offset(Feature):

 class Mnemonic(Feature):
    def __init__(self, value, description=None):
-        super(Mnemonic, self).__init__(value, description=description)
+        super(Mnemonic, self).__init__(value.lower(), description=description)
--- a/capa/ida/explorer/init.py
+++ b/capa/ida/explorer/init.py
--- a/capa/ida/explorer/proxy.py
+++ b/capa/ida/explorer/proxy.py
@@ -1,89 +0,0 @@
-# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
-# Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at: [package root]/LICENSE.txt
-# Unless required by applicable law or agreed to in writing, software distributed under the License
-#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and limitations under the License.
-
-from PyQt5 import QtCore
-
-from capa.ida.explorer.model import CapaExplorerDataModel
-
-
-class CapaExplorerSortFilterProxyModel(QtCore.QSortFilterProxyModel):
-    def __init__(self, parent=None):
-        """ """
-        super(CapaExplorerSortFilterProxyModel, self).__init__(parent)
-
-    def lessThan(self, left, right):
-        """ true if the value of the left item is less than value of right item
-
-            @param left: QModelIndex*
-            @param right: QModelIndex*
-
-            @retval True/False
-        """
-        ldata = left.internalPointer().data(left.column())
-        rdata = right.internalPointer().data(right.column())
-
-        if (
-            ldata
-            and rdata
-            and left.column() == CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS
-            and left.column() == right.column()
-        ):
-            # convert virtual address before compare
-            return int(ldata, 16) < int(rdata, 16)
-        else:
-            # compare as lowercase
-            return ldata.lower() < rdata.lower()
-
-    def filterAcceptsRow(self, row, parent):
-        """ true if the item in the row indicated by the given row and parent
-            should be included in the model; otherwise returns false
-
-            @param row: int
-            @param parent: QModelIndex*
-
-            @retval True/False
-        """
-        if self.filter_accepts_row_self(row, parent):
-            return True
-
-        alpha = parent
-        while alpha.isValid():
-            if self.filter_accepts_row_self(alpha.row(), alpha.parent()):
-                return True
-            alpha = alpha.parent()
-
-        if self.index_has_accepted_children(row, parent):
-            return True
-
-        return False
-
-    def add_single_string_filter(self, column, string):
-        """ add fixed string filter
-
-            @param column: key column
-            @param string: string to sort
-        """
-        self.setFilterKeyColumn(column)
-        self.setFilterFixedString(string)
-
-    def index_has_accepted_children(self, row, parent):
-        """ """
-        model_index = self.sourceModel().index(row, 0, parent)
-
-        if model_index.isValid():
-            for idx in range(self.sourceModel().rowCount(model_index)):
-                if self.filter_accepts_row_self(idx, model_index):
-                    return True
-                if self.index_has_accepted_children(idx, model_index):
-                    return True
-
-        return False
-
-    def filter_accepts_row_self(self, row, parent):
-        """ """
-        return super(CapaExplorerSortFilterProxyModel, self).filterAcceptsRow(row, parent)
--- a/capa/ida/helpers/init.py
+++ b/capa/ida/helpers/init.py
@@ -46,7 +46,6 @@ def is_supported_ida_version():
        logger.warning(
            "Your IDA Pro version is: %s. Supported versions are: %s." % (version, ", ".join(SUPPORTED_IDA_VERSIONS))
        )
-        capa.ida.helpers.inform_user_ida_ui(warning_msg)
        return False
    return True

@@ -62,7 +61,6 @@ def is_supported_file_type():
        )
        logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.")
        logger.error("-" * 80)
-        inform_user_ida_ui("capa does not support the format of this file")
        return False
    return True

@@ -102,6 +100,10 @@ def collect_metadata():
            "sha256": sha256,
            "path": idaapi.get_input_file_path(),
        },
-        "analysis": {"format": idaapi.get_file_type_name(), "extractor": "ida",},
+        "analysis": {
+            "format": idaapi.get_file_type_name(),
+            "extractor": "ida",
+            "base_address": idaapi.get_imagebase(),
+        },
        "version": capa.version.__version__,
    }
--- a/capa/ida/ida_capa_explorer.py
+++ b/capa/ida/ida_capa_explorer.py
@@ -1,573 +0,0 @@
-# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
-# Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at: [package root]/LICENSE.txt
-# Unless required by applicable law or agreed to in writing, software distributed under the License
-#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and limitations under the License.
-
-import os
-import json
-import logging
-import collections
-
-import idaapi
-from PyQt5 import QtGui, QtCore, QtWidgets
-
-import capa.main
-import capa.rules
-import capa.ida.helpers
-import capa.render.utils as rutils
-import capa.features.extractors.ida
-from capa.ida.explorer.view import CapaExplorerQtreeView
-from capa.ida.explorer.model import CapaExplorerDataModel
-from capa.ida.explorer.proxy import CapaExplorerSortFilterProxyModel
-
-PLUGIN_NAME = "capa explorer"
-
-logger = logging.getLogger("capa")
-
-
-class CapaExplorerIdaHooks(idaapi.UI_Hooks):
-    def __init__(self, screen_ea_changed_hook, action_hooks):
-        """ facilitate IDA UI hooks
-
-            @param screen_ea_changed_hook: function hook for IDA screen ea changed
-            @param action_hooks: dict of IDA action handles
-        """
-        super(CapaExplorerIdaHooks, self).__init__()
-
-        self.screen_ea_changed_hook = screen_ea_changed_hook
-        self.process_action_hooks = action_hooks
-        self.process_action_handle = None
-        self.process_action_meta = {}
-
-    def preprocess_action(self, name):
-        """ called prior to action completed
-
-            @param name: name of action defined by idagui.cfg
-
-            @retval must be 0
-        """
-        self.process_action_handle = self.process_action_hooks.get(name, None)
-
-        if self.process_action_handle:
-            self.process_action_handle(self.process_action_meta)
-
-        # must return 0 for IDA
-        return 0
-
-    def postprocess_action(self):
-        """ called after action completed """
-        if not self.process_action_handle:
-            return
-
-        self.process_action_handle(self.process_action_meta, post=True)
-        self.reset()
-
-    def screen_ea_changed(self, curr_ea, prev_ea):
-        """ called after screen location is changed
-
-            @param curr_ea: current location
-            @param prev_ea: prev location
-        """
-        self.screen_ea_changed_hook(idaapi.get_current_widget(), curr_ea, prev_ea)
-
-    def reset(self):
-        """ reset internal state """
-        self.process_action_handle = None
-        self.process_action_meta.clear()
-
-
-class CapaExplorerForm(idaapi.PluginForm):
-    def __init__(self):
-        """ """
-        super(CapaExplorerForm, self).__init__()
-
-        self.form_title = PLUGIN_NAME
-        self.file_loc = __file__
-
-        self.parent = None
-        self.ida_hooks = None
-        self.doc = None
-
-        # models
-        self.model_data = None
-        self.model_proxy = None
-
-        # user interface elements
-        self.view_limit_results_by_function = None
-        self.view_tree = None
-        self.view_summary = None
-        self.view_attack = None
-        self.view_tabs = None
-        self.view_menu_bar = None
-
-    def OnCreate(self, form):
-        """ """
-        self.parent = self.FormToPyQtWidget(form)
-        self.load_interface()
-        self.load_capa_results()
-        self.load_ida_hooks()
-
-        self.view_tree.reset()
-
-        logger.info("form created.")
-
-    def Show(self):
-        """ """
-        return idaapi.PluginForm.Show(
-            self, self.form_title, options=(idaapi.PluginForm.WOPN_TAB | idaapi.PluginForm.WCLS_CLOSE_LATER)
-        )
-
-    def OnClose(self, form):
-        """ form is closed """
-        self.unload_ida_hooks()
-        self.ida_reset()
-
-        logger.info("form closed.")
-
-    def load_interface(self):
-        """ load user interface """
-        # load models
-        self.model_data = CapaExplorerDataModel()
-        self.model_proxy = CapaExplorerSortFilterProxyModel()
-        self.model_proxy.setSourceModel(self.model_data)
-
-        # load tree
-        self.view_tree = CapaExplorerQtreeView(self.model_proxy, self.parent)
-
-        # load summary table
-        self.load_view_summary()
-        self.load_view_attack()
-
-        # load parent tab and children tab views
-        self.load_view_tabs()
-        self.load_view_checkbox_limit_by()
-        self.load_view_summary_tab()
-        self.load_view_attack_tab()
-        self.load_view_tree_tab()
-
-        # load menu bar and sub menus
-        self.load_view_menu_bar()
-        self.load_file_menu()
-
-        # load parent view
-        self.load_view_parent()
-
-    def load_view_tabs(self):
-        """ load tabs """
-        tabs = QtWidgets.QTabWidget()
-        self.view_tabs = tabs
-
-    def load_view_menu_bar(self):
-        """ load menu bar """
-        bar = QtWidgets.QMenuBar()
-        self.view_menu_bar = bar
-
-    def load_view_summary(self):
-        """ load capa summary table """
-        table_headers = [
-            "Capability",
-            "Namespace",
-        ]
-
-        table = QtWidgets.QTableWidget()
-
-        table.setColumnCount(len(table_headers))
-        table.verticalHeader().setVisible(False)
-        table.setSortingEnabled(False)
-        table.setEditTriggers(QtWidgets.QAbstractItemView.NoEditTriggers)
-        table.setFocusPolicy(QtCore.Qt.NoFocus)
-        table.setSelectionMode(QtWidgets.QAbstractItemView.NoSelection)
-        table.setHorizontalHeaderLabels(table_headers)
-        table.horizontalHeader().setDefaultAlignment(QtCore.Qt.AlignLeft)
-        table.setShowGrid(False)
-        table.setStyleSheet("QTableWidget::item { padding: 25px; }")
-
-        self.view_summary = table
-
-    def load_view_attack(self):
-        """ load MITRE ATT&CK table """
-        table_headers = [
-            "ATT&CK Tactic",
-            "ATT&CK Technique ",
-        ]
-
-        table = QtWidgets.QTableWidget()
-
-        table.setColumnCount(len(table_headers))
-        table.verticalHeader().setVisible(False)
-        table.setSortingEnabled(False)
-        table.setEditTriggers(QtWidgets.QAbstractItemView.NoEditTriggers)
-        table.setFocusPolicy(QtCore.Qt.NoFocus)
-        table.setSelectionMode(QtWidgets.QAbstractItemView.NoSelection)
-        table.setHorizontalHeaderLabels(table_headers)
-        table.horizontalHeader().setDefaultAlignment(QtCore.Qt.AlignLeft)
-        table.setShowGrid(False)
-        table.setStyleSheet("QTableWidget::item { padding: 25px; }")
-
-        self.view_attack = table
-
-    def load_view_checkbox_limit_by(self):
-        """ load limit results by function checkbox """
-        check = QtWidgets.QCheckBox("Limit results to current function")
-        check.setChecked(False)
-        check.stateChanged.connect(self.slot_checkbox_limit_by_changed)
-
-        self.view_limit_results_by_function = check
-
-    def load_view_parent(self):
-        """ load view parent """
-        layout = QtWidgets.QVBoxLayout()
-
-        layout.addWidget(self.view_tabs)
-        layout.setMenuBar(self.view_menu_bar)
-
-        self.parent.setLayout(layout)
-
-    def load_view_tree_tab(self):
-        """ load capa tree tab view """
-        layout = QtWidgets.QVBoxLayout()
-        layout.addWidget(self.view_limit_results_by_function)
-        layout.addWidget(self.view_tree)
-
-        tab = QtWidgets.QWidget()
-        tab.setLayout(layout)
-
-        self.view_tabs.addTab(tab, "Tree View")
-
-    def load_view_summary_tab(self):
-        """ load capa summary tab view """
-        layout = QtWidgets.QVBoxLayout()
-        layout.addWidget(self.view_summary)
-
-        tab = QtWidgets.QWidget()
-        tab.setLayout(layout)
-
-        self.view_tabs.addTab(tab, "Summary")
-
-    def load_view_attack_tab(self):
-        """ load MITRE ATT&CK tab view """
-        layout = QtWidgets.QVBoxLayout()
-        layout.addWidget(self.view_attack)
-
-        tab = QtWidgets.QWidget()
-        tab.setLayout(layout)
-
-        self.view_tabs.addTab(tab, "MITRE")
-
-    def load_file_menu(self):
-        """ load file menu actions """
-        actions = (
-            ("Reset view", "Reset plugin view", self.reset),
-            ("Run analysis", "Run capa analysis on current database", self.reload),
-            ("Export results...", "Export capa results as JSON file", self.export_json),
-        )
-
-        menu = self.view_menu_bar.addMenu("File")
-        for (name, _, handle) in actions:
-            action = QtWidgets.QAction(name, self.parent)
-            action.triggered.connect(handle)
-            menu.addAction(action)
-
-    def export_json(self):
-        """ export capa results as JSON file """
-        if not self.doc:
-            idaapi.info("No capa results to export.")
-            return
-        path = idaapi.ask_file(True, "*.json", "Choose file")
-        if os.path.exists(path) and 1 != idaapi.ask_yn(1, "File already exists. Overwrite?"):
-            return
-        with open(path, "wb") as export_file:
-            export_file.write(
-                json.dumps(self.doc, sort_keys=True, cls=capa.render.CapaJsonObjectEncoder).encode("utf-8")
-            )
-
-    def load_ida_hooks(self):
-        """ load IDA Pro UI hooks """
-        action_hooks = {
-            "MakeName": self.ida_hook_rename,
-            "EditFunction": self.ida_hook_rename,
-        }
-
-        self.ida_hooks = CapaExplorerIdaHooks(self.ida_hook_screen_ea_changed, action_hooks)
-        self.ida_hooks.hook()
-
-    def unload_ida_hooks(self):
-        """ unload IDA Pro UI hooks """
-        if self.ida_hooks:
-            self.ida_hooks.unhook()
-
-    def ida_hook_rename(self, meta, post=False):
-        """ hook for IDA rename action
-
-            called twice, once before action and once after
-            action completes
-
-            @param meta: metadata cache
-            @param post: indicates pre or post action
-        """
-        location = idaapi.get_screen_ea()
-        if not location or not capa.ida.helpers.is_func_start(location):
-            return
-
-        curr_name = idaapi.get_name(location)
-
-        if post:
-            # post action update data model w/ current name
-            self.model_data.update_function_name(meta.get("prev_name", ""), curr_name)
-        else:
-            # pre action so save current name for replacement later
-            meta["prev_name"] = curr_name
-
-    def ida_hook_screen_ea_changed(self, widget, new_ea, old_ea):
-        """ hook for IDA screen ea changed
-
-            @param widget: IDA widget type
-            @param new_ea: destination ea
-            @param old_ea: source ea
-         """
-        if not self.view_limit_results_by_function.isChecked():
-            # ignore if checkbox not selected
-            return
-
-        if idaapi.get_widget_type(widget) != idaapi.BWN_DISASM:
-            # ignore views other than asm
-            return
-
-        # attempt to map virtual addresses to function start addresses
-        new_func_start = capa.ida.helpers.get_func_start_ea(new_ea)
-        old_func_start = capa.ida.helpers.get_func_start_ea(old_ea)
-
-        if new_func_start and new_func_start == old_func_start:
-            # navigated within the same function - do nothing
-            return
-
-        if new_func_start:
-            # navigated to new function - filter for function start virtual address
-            match = capa.ida.explorer.item.location_to_hex(new_func_start)
-        else:
-            # navigated to virtual address not in valid function - clear filter
-            match = ""
-
-        # filter on virtual address to avoid updating filter string if function name is changed
-        self.model_proxy.add_single_string_filter(CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS, match)
-        self.view_tree.resize_columns_to_content()
-
-    def load_capa_results(self):
-        """ run capa analysis and render results in UI """
-        logger.info("-" * 80)
-        logger.info(" Using default embedded rules.")
-        logger.info(" ")
-        logger.info(" You can see the current default rule set here:")
-        logger.info("     https://github.com/fireeye/capa-rules")
-        logger.info("-" * 80)
-
-        rules_path = os.path.join(os.path.dirname(self.file_loc), "../..", "rules")
-        rules = capa.main.get_rules(rules_path)
-        rules = capa.rules.RuleSet(rules)
-
-        meta = capa.ida.helpers.collect_metadata()
-
-        capabilities, counts = capa.main.find_capabilities(
-            rules, capa.features.extractors.ida.IdaFeatureExtractor(), True
-        )
-        meta["analysis"].update(counts)
-
-        # support binary files specifically for x86/AMD64 shellcode
-        # warn user binary file is loaded but still allow capa to process it
-        # TODO: check specific architecture of binary files based on how user configured IDA processors
-        if idaapi.get_file_type_name() == "Binary file":
-            logger.warning("-" * 80)
-            logger.warning(" Input file appears to be a binary file.")
-            logger.warning(" ")
-            logger.warning(
-                " capa currently only supports analyzing binary files containing x86/AMD64 shellcode with IDA."
-            )
-            logger.warning(
-                " This means the results may be misleading or incomplete if the binary file loaded in IDA is not x86/AMD64."
-            )
-            logger.warning(" If you don't know the input file type, you can try using the `file` utility to guess it.")
-            logger.warning("-" * 80)
-
-            capa.ida.helpers.inform_user_ida_ui("capa encountered warnings during analysis")
-
-        if capa.main.has_file_limitation(rules, capabilities, is_standalone=False):
-            capa.ida.helpers.inform_user_ida_ui("capa encountered warnings during analysis")
-
-        logger.info("analysis completed.")
-
-        self.doc = capa.render.convert_capabilities_to_result_document(meta, rules, capabilities)
-
-        self.model_data.render_capa_doc(self.doc)
-        self.render_capa_doc_summary()
-        self.render_capa_doc_mitre_summary()
-
-        self.set_view_tree_default_sort_order()
-
-        logger.info("render views completed.")
-
-    def set_view_tree_default_sort_order(self):
-        """ set capa tree view default sort order """
-        self.view_tree.sortByColumn(CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION, QtCore.Qt.AscendingOrder)
-
-    def render_capa_doc_summary(self):
-        """ render capa summary results """
-        for (row, rule) in enumerate(rutils.capability_rules(self.doc)):
-            count = len(rule["matches"])
-
-            if count == 1:
-                capability = rule["meta"]["name"]
-            else:
-                capability = "%s (%d matches)" % (rule["meta"]["name"], count)
-
-            self.view_summary.setRowCount(row + 1)
-
-            self.view_summary.setItem(row, 0, self.render_new_table_header_item(capability))
-            self.view_summary.setItem(row, 1, QtWidgets.QTableWidgetItem(rule["meta"]["namespace"]))
-
-        # resize columns to content
-        self.view_summary.resizeColumnsToContents()
-
-    def render_capa_doc_mitre_summary(self):
-        """ render capa MITRE ATT&CK results """
-        tactics = collections.defaultdict(set)
-
-        for rule in rutils.capability_rules(self.doc):
-            if not rule["meta"].get("att&ck"):
-                continue
-
-            for attack in rule["meta"]["att&ck"]:
-                tactic, _, rest = attack.partition("::")
-                if "::" in rest:
-                    technique, _, rest = rest.partition("::")
-                    subtechnique, _, id = rest.rpartition(" ")
-                    tactics[tactic].add((technique, subtechnique, id))
-                else:
-                    technique, _, id = rest.rpartition(" ")
-                    tactics[tactic].add((technique, id))
-
-        column_one = []
-        column_two = []
-
-        for (tactic, techniques) in sorted(tactics.items()):
-            column_one.append(tactic.upper())
-            # add extra space when more than one technique
-            column_one.extend(["" for i in range(len(techniques) - 1)])
-
-            for spec in sorted(techniques):
-                if len(spec) == 2:
-                    technique, id = spec
-                    column_two.append("%s %s" % (technique, id))
-                elif len(spec) == 3:
-                    technique, subtechnique, id = spec
-                    column_two.append("%s::%s %s" % (technique, subtechnique, id))
-                else:
-                    raise RuntimeError("unexpected ATT&CK spec format")
-
-        self.view_attack.setRowCount(max(len(column_one), len(column_two)))
-
-        for row, value in enumerate(column_one):
-            self.view_attack.setItem(row, 0, self.render_new_table_header_item(value))
-
-        for row, value in enumerate(column_two):
-            self.view_attack.setItem(row, 1, QtWidgets.QTableWidgetItem(value))
-
-        # resize columns to content
-        self.view_attack.resizeColumnsToContents()
-
-    def render_new_table_header_item(self, text):
-        """ create new table header item with default style """
-        item = QtWidgets.QTableWidgetItem(text)
-        item.setForeground(QtGui.QColor(88, 139, 174))
-
-        font = QtGui.QFont()
-        font.setBold(True)
-
-        item.setFont(font)
-
-        return item
-
-    def ida_reset(self):
-        """ reset IDA UI """
-        self.model_data.reset()
-        self.view_tree.reset()
-        self.view_limit_results_by_function.setChecked(False)
-        self.set_view_tree_default_sort_order()
-
-    def reload(self):
-        """ reload views and re-run capa analysis """
-        self.ida_reset()
-        self.model_proxy.invalidate()
-        self.model_data.clear()
-        self.view_summary.setRowCount(0)
-        self.load_capa_results()
-
-        logger.info("reload complete.")
-        idaapi.info("%s reload completed." % PLUGIN_NAME)
-
-    def reset(self):
-        """ reset UI elements
-
-            e.g. checkboxes and IDA highlighting
-        """
-        self.ida_reset()
-
-        logger.info("reset completed.")
-        idaapi.info("%s reset completed." % PLUGIN_NAME)
-
-    def slot_menu_bar_hovered(self, action):
-        """ display menu action tooltip
-
-            @param action: QtWidgets.QAction*
-
-            @reference: https://stackoverflow.com/questions/21725119/why-wont-qtooltips-appear-on-qactions-within-a-qmenu
-        """
-        QtWidgets.QToolTip.showText(
-            QtGui.QCursor.pos(), action.toolTip(), self.view_menu_bar, self.view_menu_bar.actionGeometry(action)
-        )
-
-    def slot_checkbox_limit_by_changed(self):
-        """ slot activated if checkbox clicked
-
-            if checked, configure function filter if screen location is located
-            in function, otherwise clear filter
-        """
-        match = ""
-        if self.view_limit_results_by_function.isChecked():
-            location = capa.ida.helpers.get_func_start_ea(idaapi.get_screen_ea())
-            if location:
-                match = capa.ida.explorer.item.location_to_hex(location)
-
-        self.model_proxy.add_single_string_filter(CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS, match)
-
-        self.view_tree.resize_columns_to_content()
-
-
-def main():
-    """ TODO: move to idaapi.plugin_t class """
-    logging.basicConfig(level=logging.INFO)
-
-    if not capa.ida.helpers.is_supported_ida_version():
-        return -1
-
-    if not capa.ida.helpers.is_supported_file_type():
-        return -1
-
-    global CAPA_EXPLORER_FORM
-
-    try:
-        # there is an instance, reload it
-        CAPA_EXPLORER_FORM
-        CAPA_EXPLORER_FORM.Close()
-        CAPA_EXPLORER_FORM = CapaExplorerForm()
-    except Exception:
-        # there is no instance yet
-        CAPA_EXPLORER_FORM = CapaExplorerForm()
-
-    CAPA_EXPLORER_FORM.Show()
-
-
-if __name__ == "__main__":
-    main()
--- a/capa/ida/plugin/README.md
+++ b/capa/ida/plugin/README.md
@@ -0,0 +1,111 @@
+![capa explorer](../../../.github/capa-explorer-logo.png)
+
+capa explorer is an IDA Pro plugin written in Python that integrates the FLARE team's open-source framework, capa, with IDA. capa is a framework that uses a well-defined collection of rules to 
+identify capabilities in a program. You can run capa against a PE file or shellcode and it tells you what it thinks the program can do. For example, it might suggest that 
+the program is a backdoor, can install services, or relies on HTTP to communicate. You can use capa explorer to run capa directly on an IDA database without requiring access
+to the source binary. Once a database has been analyzed, capa explorer can be used to quickly identify and navigate to interesting areas of a program 
+and dissect capa rule matches at the assembly level.
+
+We love using capa explorer during malware analysis because it teaches us what parts of a program suggest a behavior. As we click on rows, capa explorer jumps directly 
+to important addresses in the IDA Pro database and highlights key features in the Disassembly view so they stand out visually. To illustrate, we use capa explorer to 
+analyze Lab 14-02 from [Practical Malware Analysis](https://nostarch.com/malware) (PMA) available [here](https://practicalmalwareanalysis.com/labs/). Our goal is to understand 
+the program's functionality.
+
+After loading Lab 14-02 into IDA and analyzing the database with capa explorer, we see that capa detected a rule match for `self delete via COMSPEC environment variable`:
+
+![](../../../doc/img/ida_plugin_example_1.png)
+
+We can use capa explorer to navigate the IDA Disassembly view directly to the suspect function and get an assembly-level breakdown of why capa matched `self delete via COMSPEC environment variable` 
+for this particular function.
+
+![](../../../doc/img/ida_plugin_example_2.png)
+
+Using the `Rule Information` and `Details` columns capa explorer shows us that the suspect function matched `self delete via COMSPEC environment variable` because it contains capa rule matches for `create process`, `get COMSPEC environment variable`,
+and `query environment variable`, references to the strings `COMSPEC`, ` > nul`, and `/c del`, and calls to the Windows API functions `GetEnvironmentVariableA` and `ShellExecuteEx`.
+
+For more information on the FLARE team's open-source framework, capa, check out the overview in our first [blog](https://www.fireeye.com/blog/threat-research/2020/07/capa-automatically-identify-malware-capabilities.html).
+
+## Features
+
+![](../../../doc/img/ida_plugin_intro.gif)
+
+* Display capa results in an interactive tree view of rule matches and their locations in the current database
+* Search for keywords or phrases found in the `Rule Information`, `Address`, or `Details` columns
+* Display rule source content when a user hovers their cursor over a rule match
+* Double-click `Address` column to view associated feature in the IDA Disassembly view
+* Limit tree view results to the function currently displayed in the IDA Disassembly view; update results as a user navigates to different functions
+* Export results as formatted JSON by navigating to `File > Export results...`
+* Remember a user's capa rules directory for future runs; change capa rules directory by navigating to `Rules > Change rules directory...`
+* Automatically re-analyze database when user performs a program rebase
+* Automatically update results when IDA is used to rename a function
+* Select one or more checkboxes to highlight the associated addresses in the IDA Disassembly view
+* Right-click a function match to rename it; the new function name is propagated to the current IDA database
+* Right-click to copy a result by column or by row
+* Sort results by column
+* Reset tree view and IDA Disassembly view highlighting by clicking `Reset`
+
+## Getting Started
+
+### Requirements
+
+capa explorer supports the following IDA setups:
+
+* IDA Pro 7.4+ with Python 2.7 or Python 3.
+
+If you encounter issues with your specific setup, please open a new [Issue](https://github.com/fireeye/capa/issues).
+
+### Supported File Types
+
+capa explorer is limited to the file types supported by capa, which includes:
+
+* Windows 32-bit and 64-bit PE files
+* Windows 32-bit and 64-bit shellcode
+
+### Installation
+
+You can install capa explorer using the following steps:
+
+1. Install capa and its dependencies from PyPI for the Python interpreter used by your IDA installation:
+    ```
+    $ pip install flare-capa
+    ```
+3. Download the [standard collection of capa rules](https://github.com/fireeye/capa-rules) (capa explorer needs capa rules to analyze a database)
+4. Copy [capa_explorer.py](https://raw.githubusercontent.com/fireeye/capa/master/capa/ida/plugin/capa_explorer.py) to your IDA plugins directory
+
+### Usage
+
+1. Run IDA and analyze a supported file type (select the `Manual Load` and `Load Resources` options in IDA for best results)
+2. Open capa explorer in IDA by navigating to `Edit > Plugins > FLARE capa explorer` or using the keyboard shortcut `Alt+F5`
+3. Click the `Analyze` button
+
+When running capa explorer for the first time you are prompted to select a file directory containing capa rules. The plugin conveniently
+remembers your selection for future runs; you can change this selection by navigating to `Rules > Change rules directory...`. We recommend 
+downloading and using the [standard collection of capa rules](https://github.com/fireeye/capa-rules) when getting started with the plugin.
+
+#### Tips
+
+* Start analysis by clicking the `Analyze` button
+* Reset the plugin user interface and remove highlighting from IDA disassembly view by clicking the `Reset` button
+* Change your capa rules directory by navigating to `Rules > Change rules directory...` from the plugin menu
+* Hover your cursor over a rule match to view the source content of the rule
+* Double-click the `Address` column to navigate the IDA Disassembly view to the associated feature
+* Double-click a result in the `Rule Information` column to expand its children
+* Select a checkbox in the `Rule Information` column to highlight the address of the associated feature in the IDA Dissasembly view
+
+## Development
+
+Because capa explorer is packaged with capa you will need to install capa locally for development.
+
+You can install capa locally by following the steps outlined in `Method 3: Inspecting the capa source code` of the [capa 
+installation guide](https://github.com/fireeye/capa/blob/master/doc/installation.md#method-3-inspecting-the-capa-source-code). Once installed, copy [capa_explorer.py](https://raw.githubusercontent.com/fireeye/capa/master/capa/ida/plugin/capa_explorer.py) 
+to your IDA plugins directory to run the plugin in IDA.
+
+### Components
+
+capa explorer consists of two main components:
+
+* An IDA [feature extractor](https://github.com/fireeye/capa/tree/master/capa/features/extractors/ida) built on top of IDA's binary analysis engine
+  * This component uses IDAPython to extract [capa features](https://github.com/fireeye/capa-rules/blob/master/doc/format.md#extracted-features) from the IDA database such as strings, 
+disassembly, and control flow; these extracted features are used by capa to find feature combinations that result in a rule match
+* An [interactive user interface](https://github.com/fireeye/capa/tree/master/capa/ida/plugin) for displaying and exploring capa rule matches
+  * This component integrates the IDA feature extractor and capa, providing an interactive user interface to dissect rule matches found by capa using features extracted by the IDA feature extractor
--- a/capa/ida/plugin/init.py
+++ b/capa/ida/plugin/init.py
@@ -0,0 +1,117 @@
+# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+
+import logging
+
+import idaapi
+import ida_kernwin
+
+from capa.ida.helpers import is_supported_file_type, is_supported_ida_version
+from capa.ida.plugin.form import CapaExplorerForm
+from capa.ida.plugin.icon import ICON
+
+logger = logging.getLogger(__name__)
+
+
+class CapaExplorerPlugin(idaapi.plugin_t):
+
+    # Mandatory definitions
+    PLUGIN_NAME = "FLARE capa explorer"
+    PLUGIN_VERSION = "1.0.0"
+    PLUGIN_AUTHORS = "michael.hunhoff@mandiant.com, william.ballenthin@mandiant.com, moritz.raabe@mandiant.com"
+
+    wanted_name = PLUGIN_NAME
+    wanted_hotkey = "ALT-F5"
+    comment = "IDA Pro plugin for the FLARE team's capa tool to identify capabilities in executable files."
+    website = "https://github.com/fireeye/capa"
+    help = "See https://github.com/fireeye/capa/blob/master/doc/usage.md"
+    version = ""
+    flags = 0
+
+    def __init__(self):
+        """initialize plugin"""
+        self.form = None
+
+    def init(self):
+        """called when IDA is loading the plugin"""
+        logging.basicConfig(level=logging.INFO)
+
+        # do not load plugin if IDA version/file type not supported
+        if not is_supported_ida_version():
+            return idaapi.PLUGIN_SKIP
+        if not is_supported_file_type():
+            return idaapi.PLUGIN_SKIP
+        return idaapi.PLUGIN_OK
+
+    def term(self):
+        """called when IDA is unloading the plugin"""
+        pass
+
+    def run(self, arg):
+        """called when IDA is running the plugin as a script"""
+        self.form = CapaExplorerForm(self.PLUGIN_NAME)
+        return True
+
+
+# set the capa plugin icon.
+#
+# TL;DR: temporarily install a UI hook set the icon.
+#
+# Long form:
+#
+# in the IDAPython `plugin_t` life cycle,
+#   - `init` decides if a plugin should be registered
+#   - `run` executes the main logic (shows the window)
+#   - `term` cleans this up
+#
+# we want to associate an icon with the plugin action - which is created by IDA.
+# however, this action is created by IDA *after* `init` is called.
+# so, we can't do this in `plugin_t.init`.
+# we also can't spawn a thread and do it after a delay,
+#  since `ida_kernwin.update_action_icon` must be called from the main thread.
+# so we need to register a callback that's invoked from the main thread after the plugin is registered.
+#
+# after a lot of guess-and-check, we can use `UI_Hooks.updated_actions` to
+#  receive notications after IDA has created an action for each plugin.
+# so, create this hook, wait for capa plugin to load, set the icon, and unhook.
+
+
+class OnUpdatedActionsHook(ida_kernwin.UI_Hooks):
+    """register a callback to be invoked each time the UI actions are updated"""
+
+    def __init__(self, cb):
+        super(OnUpdatedActionsHook, self).__init__()
+        self.cb = cb
+
+    def updated_actions(self):
+        if self.cb():
+            # uninstall the callback once its run successfully
+            self.unhook()
+
+
+def install_icon():
+    plugin_name = CapaExplorerPlugin.PLUGIN_NAME
+    action_name = "Edit/Plugins/" + plugin_name
+
+    if action_name not in ida_kernwin.get_registered_actions():
+        # keep the hook registered
+        return False
+
+    # resource leak here. need to call `ida_kernwin.free_custom_icon`?
+    # however, since we're not cycling this icon a lot, its probably ok.
+    # expect to leak exactly one icon per application load.
+    icon = ida_kernwin.load_custom_icon(data=ICON)
+
+    ida_kernwin.update_action_icon(action_name, icon)
+
+    # uninstall the hook
+    return True
+
+
+h = OnUpdatedActionsHook(install_icon)
+h.hook()
--- a/capa/ida/plugin/capa_explorer.py
+++ b/capa/ida/plugin/capa_explorer.py
@@ -0,0 +1,17 @@
+# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+
+from capa.ida.plugin import CapaExplorerPlugin
+
+
+def PLUGIN_ENTRY():
+    """mandatory entry point for IDAPython plugins
+
+    copy this script to your IDA plugins directory and start the plugin by navigating to Edit > Plugins in IDA Pro
+    """
+    return CapaExplorerPlugin()
--- a/capa/ida/plugin/form.py
+++ b/capa/ida/plugin/form.py
@@ -0,0 +1,766 @@
+# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+
+import os
+import json
+import logging
+import collections
+
+import idaapi
+import ida_kernwin
+import ida_settings
+from PyQt5 import QtGui, QtCore, QtWidgets
+
+import capa.main
+import capa.rules
+import capa.ida.helpers
+import capa.render.utils as rutils
+import capa.features.extractors.ida
+from capa.ida.plugin.icon import QICON
+from capa.ida.plugin.view import CapaExplorerQtreeView
+from capa.ida.plugin.hooks import CapaExplorerIdaHooks
+from capa.ida.plugin.model import CapaExplorerDataModel
+from capa.ida.plugin.proxy import CapaExplorerRangeProxyModel, CapaExplorerSearchProxyModel
+
+logger = logging.getLogger(__name__)
+settings = ida_settings.IDASettings("capa")
+
+
+class UserCancelledError(Exception):
+    """throw exception when user cancels action"""
+
+    pass
+
+
+class CapaExplorerProgressIndicator(QtCore.QObject):
+    """implement progress signal, used during feature extraction"""
+
+    progress = QtCore.pyqtSignal(str)
+
+    def __init__(self):
+        """initialize signal object"""
+        super(CapaExplorerProgressIndicator, self).__init__()
+
+    def update(self, text):
+        """emit progress update
+
+        check if user cancelled action, raise exception for parent function to catch
+        """
+        if ida_kernwin.user_cancelled():
+            raise UserCancelledError("user cancelled")
+        self.progress.emit("extracting features from %s" % text)
+
+
+class CapaExplorerFeatureExtractor(capa.features.extractors.ida.IdaFeatureExtractor):
+    """subclass the IdaFeatureExtractor
+
+    track progress during feature extraction, also allow user to cancel feature extraction
+    """
+
+    def __init__(self):
+        super(CapaExplorerFeatureExtractor, self).__init__()
+        self.indicator = CapaExplorerProgressIndicator()
+
+    def extract_function_features(self, f):
+        self.indicator.update("function at 0x%X" % f.start_ea)
+        return super(CapaExplorerFeatureExtractor, self).extract_function_features(f)
+
+
+class CapaExplorerForm(idaapi.PluginForm):
+    """form element for plugin interface"""
+
+    def __init__(self, name):
+        """initialize form elements"""
+        super(CapaExplorerForm, self).__init__()
+
+        self.form_title = name
+        self.rule_path = ""
+        self.process_total = 0
+        self.process_count = 0
+
+        self.parent = None
+        self.ida_hooks = None
+        self.doc = None
+
+        # models
+        self.model_data = None
+        self.range_model_proxy = None
+        self.search_model_proxy = None
+
+        # UI controls
+        self.view_limit_results_by_function = None
+        self.view_search_bar = None
+        self.view_tree = None
+        self.view_attack = None
+        self.view_tabs = None
+        self.view_menu_bar = None
+        self.view_status_label = None
+        self.view_buttons = None
+        self.view_analyze_button = None
+        self.view_reset_button = None
+
+        self.Show()
+
+    def OnCreate(self, form):
+        """called when plugin form is created
+
+        load interface and install hooks but do not analyze database
+        """
+        self.parent = self.FormToPyQtWidget(form)
+        self.parent.setWindowIcon(QICON)
+        self.load_interface()
+        self.load_ida_hooks()
+
+    def Show(self):
+        """creates form if not already create, else brings plugin to front"""
+        return super(CapaExplorerForm, self).Show(
+            self.form_title,
+            options=(
+                idaapi.PluginForm.WOPN_TAB
+                | idaapi.PluginForm.WOPN_RESTORE
+                | idaapi.PluginForm.WCLS_CLOSE_LATER
+                | idaapi.PluginForm.WCLS_SAVE
+            ),
+        )
+
+    def OnClose(self, form):
+        """called when form is closed
+
+        ensure any plugin modifications (e.g. hooks and UI changes) are reset before the plugin is closed
+        """
+        self.unload_ida_hooks()
+        self.model_data.reset()
+
+    def load_interface(self):
+        """load user interface"""
+        # load models
+        self.model_data = CapaExplorerDataModel()
+
+        # model <- filter range <- filter search <- view
+
+        self.range_model_proxy = CapaExplorerRangeProxyModel()
+        self.range_model_proxy.setSourceModel(self.model_data)
+
+        self.search_model_proxy = CapaExplorerSearchProxyModel()
+        self.search_model_proxy.setSourceModel(self.range_model_proxy)
+
+        self.view_tree = CapaExplorerQtreeView(self.search_model_proxy, self.parent)
+        self.load_view_attack()
+
+        # load parent tab and children tab views
+        self.load_view_tabs()
+        self.load_view_checkbox_limit_by()
+        self.load_view_search_bar()
+        self.load_view_tree_tab()
+        self.load_view_attack_tab()
+        self.load_view_status_label()
+        self.load_view_buttons()
+
+        # load menu bar and sub menus
+        self.load_view_menu_bar()
+        self.load_file_menu()
+        self.load_rules_menu()
+
+        # load parent view
+        self.load_view_parent()
+
+        self.disable_controls()
+
+    def load_view_tabs(self):
+        """load tabs"""
+        tabs = QtWidgets.QTabWidget()
+        self.view_tabs = tabs
+
+    def load_view_menu_bar(self):
+        """load menu bar"""
+        bar = QtWidgets.QMenuBar()
+        self.view_menu_bar = bar
+
+    def load_view_attack(self):
+        """load MITRE ATT&CK table"""
+        table_headers = [
+            "ATT&CK Tactic",
+            "ATT&CK Technique ",
+        ]
+
+        table = QtWidgets.QTableWidget()
+
+        table.setColumnCount(len(table_headers))
+        table.verticalHeader().setVisible(False)
+        table.setSortingEnabled(False)
+        table.setEditTriggers(QtWidgets.QAbstractItemView.NoEditTriggers)
+        table.setFocusPolicy(QtCore.Qt.NoFocus)
+        table.setSelectionMode(QtWidgets.QAbstractItemView.NoSelection)
+        table.setHorizontalHeaderLabels(table_headers)
+        table.horizontalHeader().setDefaultAlignment(QtCore.Qt.AlignLeft)
+        table.setShowGrid(False)
+        table.setStyleSheet("QTableWidget::item { padding: 25px; }")
+
+        self.view_attack = table
+
+    def load_view_checkbox_limit_by(self):
+        """load limit results by function checkbox"""
+        check = QtWidgets.QCheckBox("Limit results to current function")
+        check.setChecked(False)
+        check.stateChanged.connect(self.slot_checkbox_limit_by_changed)
+
+        self.view_limit_results_by_function = check
+
+    def load_view_status_label(self):
+        """load status label"""
+        label = QtWidgets.QLabel()
+        label.setAlignment(QtCore.Qt.AlignLeft)
+        label.setText("Click Analyze to get started...")
+
+        self.view_status_label = label
+
+    def load_view_buttons(self):
+        """load the button controls"""
+        analyze_button = QtWidgets.QPushButton("Analyze")
+        analyze_button.setToolTip("Run capa analysis on IDB")
+        reset_button = QtWidgets.QPushButton("Reset")
+        reset_button.setToolTip("Reset capa explorer and IDA user interfaces")
+
+        analyze_button.clicked.connect(self.slot_analyze)
+        reset_button.clicked.connect(self.slot_reset)
+
+        layout = QtWidgets.QHBoxLayout()
+        layout.addWidget(analyze_button)
+        layout.addWidget(reset_button)
+        layout.addStretch(1)
+
+        self.view_analyze_button = analyze_button
+        self.view_reset_button = reset_button
+        self.view_buttons = layout
+
+    def load_view_search_bar(self):
+        """load the search bar control"""
+        line = QtWidgets.QLineEdit()
+        line.setPlaceholderText("search...")
+        line.textChanged.connect(self.slot_limit_results_to_search)
+
+        self.view_search_bar = line
+
+    def load_view_parent(self):
+        """load view parent"""
+        layout = QtWidgets.QVBoxLayout()
+
+        layout.addWidget(self.view_tabs)
+        layout.addWidget(self.view_status_label)
+        layout.addLayout(self.view_buttons)
+        layout.setMenuBar(self.view_menu_bar)
+
+        self.parent.setLayout(layout)
+
+    def load_view_tree_tab(self):
+        """load tree view tab"""
+        layout = QtWidgets.QVBoxLayout()
+        layout.addWidget(self.view_limit_results_by_function)
+        layout.addWidget(self.view_search_bar)
+        layout.addWidget(self.view_tree)
+
+        tab = QtWidgets.QWidget()
+        tab.setLayout(layout)
+
+        self.view_tabs.addTab(tab, "Tree View")
+
+    def load_view_attack_tab(self):
+        """load MITRE ATT&CK view tab"""
+        layout = QtWidgets.QVBoxLayout()
+        layout.addWidget(self.view_attack)
+
+        tab = QtWidgets.QWidget()
+        tab.setLayout(layout)
+
+        self.view_tabs.addTab(tab, "MITRE")
+
+    def load_file_menu(self):
+        """load file menu controls"""
+        actions = (("Export results...", "Export capa results as JSON file", self.slot_export_json),)
+        self.load_menu("File", actions)
+
+    def load_rules_menu(self):
+        """load rules menu controls"""
+        actions = (("Change rules directory...", "Select new rules directory", self.slot_change_rules_dir),)
+        self.load_menu("Rules", actions)
+
+    def load_menu(self, title, actions):
+        """load menu actions
+
+        @param title: menu name displayed in UI
+        @param actions: tuple of tuples containing action name, tooltip, and slot function
+        """
+        menu = self.view_menu_bar.addMenu(title)
+        for (name, _, slot) in actions:
+            action = QtWidgets.QAction(name, self.parent)
+            action.triggered.connect(slot)
+            menu.addAction(action)
+
+    def slot_export_json(self):
+        """export capa results as JSON file"""
+        if not self.doc:
+            idaapi.info("No capa results to export.")
+            return
+
+        path = idaapi.ask_file(True, "*.json", "Choose file")
+
+        # user cancelled, entered blank input, etc.
+        if not path:
+            return
+
+        # check file exists, ask to override
+        if os.path.exists(path) and 1 != idaapi.ask_yn(1, "The selected file already exists. Overwrite?"):
+            return
+
+        with open(path, "wb") as export_file:
+            export_file.write(
+                json.dumps(self.doc, sort_keys=True, cls=capa.render.CapaJsonObjectEncoder).encode("utf-8")
+            )
+
+    def load_ida_hooks(self):
+        """load IDA UI hooks"""
+        # map named action (defined in idagui.cfg) to Python function
+        action_hooks = {
+            "MakeName": self.ida_hook_rename,
+            "EditFunction": self.ida_hook_rename,
+            "RebaseProgram": self.ida_hook_rebase,
+        }
+
+        self.ida_hooks = CapaExplorerIdaHooks(self.ida_hook_screen_ea_changed, action_hooks)
+        self.ida_hooks.hook()
+
+    def unload_ida_hooks(self):
+        """unload IDA Pro UI hooks
+
+        must be called before plugin is completely destroyed
+        """
+        if self.ida_hooks:
+            self.ida_hooks.unhook()
+
+    def ida_hook_rename(self, meta, post=False):
+        """function hook for IDA "MakeName" and "EditFunction" actions
+
+        called twice, once before action and once after action completes
+
+        @param meta: dict of key/value pairs set when action first called (may be empty)
+        @param post: False if action first call, True if action second call
+        """
+        location = idaapi.get_screen_ea()
+        if not location or not capa.ida.helpers.is_func_start(location):
+            return
+
+        curr_name = idaapi.get_name(location)
+
+        if post:
+            # post action update data model w/ current name
+            self.model_data.update_function_name(meta.get("prev_name", ""), curr_name)
+        else:
+            # pre action so save current name for replacement later
+            meta["prev_name"] = curr_name
+
+    def ida_hook_screen_ea_changed(self, widget, new_ea, old_ea):
+        """function hook for IDA "screen ea changed" action
+
+        called twice, once before action and once after action completes. this hook is currently only relevant
+        for limiting results displayed in the UI
+
+        @param widget: IDA widget type
+        @param new_ea: destination ea
+        @param old_ea: source ea
+        """
+        if not self.view_limit_results_by_function.isChecked():
+            # ignore if limit checkbox not selected
+            return
+
+        if idaapi.get_widget_type(widget) != idaapi.BWN_DISASM:
+            # ignore views not the assembly view
+            return
+
+        if idaapi.get_func(new_ea) == idaapi.get_func(old_ea):
+            # user navigated same function - ignore
+            return
+
+        self.limit_results_to_function(idaapi.get_func(new_ea))
+        self.view_tree.reset_ui()
+
+    def ida_hook_rebase(self, meta, post=False):
+        """function hook for IDA "RebaseProgram" action
+
+        called twice, once before action and once after action completes
+
+        @param meta: dict of key/value pairs set when action first called (may be empty)
+        @param post: False if action first call, True if action second call
+        """
+        if post:
+            if idaapi.get_imagebase() != meta.get("prev_base", -1):
+                capa.ida.helpers.inform_user_ida_ui("Running capa analysis again after program rebase")
+                self.slot_analyze()
+        else:
+            meta["prev_base"] = idaapi.get_imagebase()
+            self.model_data.reset()
+
+    def load_capa_results(self):
+        """run capa analysis and render results in UI
+
+        note: this function must always return, exception or not, in order for plugin to safely close the IDA
+        wait box
+        """
+        # new analysis, new doc
+        self.doc = None
+        self.process_total = 0
+        self.process_count = 1
+
+        def update_wait_box(text):
+            """update the IDA wait box"""
+            ida_kernwin.replace_wait_box("capa explorer...%s" % text)
+
+        def slot_progress_feature_extraction(text):
+            """slot function to handle feature extraction progress updates"""
+            update_wait_box("%s (%d of %d)" % (text, self.process_count, self.process_total))
+            self.process_count += 1
+
+        extractor = CapaExplorerFeatureExtractor()
+        extractor.indicator.progress.connect(slot_progress_feature_extraction)
+
+        update_wait_box("calculating analysis")
+
+        try:
+            self.process_total += len(tuple(extractor.get_functions()))
+        except Exception as e:
+            logger.error("Failed to calculate analysis (error: %s).", e)
+            return False
+
+        if ida_kernwin.user_cancelled():
+            logger.info("User cancelled analysis.")
+            return False
+
+        update_wait_box("loading rules")
+
+        try:
+            # resolve rules directory - check self and settings first, then ask user
+            if not self.rule_path:
+                if "rule_path" in settings and os.path.exists(settings["rule_path"]):
+                    self.rule_path = settings["rule_path"]
+                else:
+                    idaapi.info("Please select a file directory containing capa rules.")
+                    rule_path = self.ask_user_directory()
+                    if not rule_path:
+                        logger.warning(
+                            "You must select a file directory containing capa rules before analysis can be run. The standard collection of capa rules can be downloaded from https://github.com/fireeye/capa-rules."
+                        )
+                        return False
+                    self.rule_path = rule_path
+                    settings.user["rule_path"] = rule_path
+        except Exception as e:
+            logger.error("Failed to load capa rules (error: %s).", e)
+            return False
+
+        if ida_kernwin.user_cancelled():
+            logger.info("User cancelled analysis.")
+            return False
+
+        rule_path = self.rule_path
+
+        try:
+            if not os.path.exists(rule_path):
+                raise IOError("rule path %s does not exist or cannot be accessed" % rule_path)
+
+            rule_paths = []
+            if os.path.isfile(rule_path):
+                rule_paths.append(rule_path)
+            elif os.path.isdir(rule_path):
+                for root, dirs, files in os.walk(rule_path):
+                    if ".github" in root:
+                        # the .github directory contains CI config in capa-rules
+                        # this includes some .yml files
+                        # these are not rules
+                        continue
+                    for file in files:
+                        if not file.endswith(".yml"):
+                            if not (file.endswith(".md") or file.endswith(".git") or file.endswith(".txt")):
+                                # expect to see readme.md, format.md, and maybe a .git directory
+                                # other things maybe are rules, but are mis-named.
+                                logger.warning("skipping non-.yml file: %s", file)
+                            continue
+                        rule_path = os.path.join(root, file)
+                        rule_paths.append(rule_path)
+
+            rules = []
+            total_paths = len(rule_paths)
+            for (i, rule_path) in enumerate(rule_paths):
+                update_wait_box("loading capa rules from %s (%d of %d)" % (self.rule_path, i + 1, total_paths))
+                if ida_kernwin.user_cancelled():
+                    raise UserCancelledError("user cancelled")
+                try:
+                    rule = capa.rules.Rule.from_yaml_file(rule_path)
+                except capa.rules.InvalidRule:
+                    raise
+                else:
+                    rule.meta["capa/path"] = rule_path
+                    if capa.main.is_nursery_rule_path(rule_path):
+                        rule.meta["capa/nursery"] = True
+                    rules.append(rule)
+
+            rule_count = len(rules)
+            rules = capa.rules.RuleSet(rules)
+        except UserCancelledError:
+            logger.info("User cancelled analysis.")
+            return False
+        except Exception as e:
+            capa.ida.helpers.inform_user_ida_ui("Failed to load capa rules from %s" % self.rule_path)
+            logger.error("Failed to load rules from %s (error: %s).", self.rule_path, e)
+            logger.error(
+                "Make sure your file directory contains properly formatted capa rules. You can download the standard collection of capa rules from https://github.com/fireeye/capa-rules."
+            )
+            self.rule_path = ""
+            settings.user.del_value("rule_path")
+            return False
+
+        if ida_kernwin.user_cancelled():
+            logger.info("User cancelled analysis.")
+            return False
+
+        update_wait_box("extracting features")
+
+        try:
+            meta = capa.ida.helpers.collect_metadata()
+            capabilities, counts = capa.main.find_capabilities(rules, extractor, disable_progress=True)
+            meta["analysis"].update(counts)
+        except UserCancelledError:
+            logger.info("User cancelled analysis.")
+            return False
+        except Exception as e:
+            logger.error("Failed to extract capabilities from database (error: %s)", e)
+            return False
+
+        update_wait_box("checking for file limitations")
+
+        try:
+            # support binary files specifically for x86/AMD64 shellcode
+            # warn user binary file is loaded but still allow capa to process it
+            # TODO: check specific architecture of binary files based on how user configured IDA processors
+            if idaapi.get_file_type_name() == "Binary file":
+                logger.warning("-" * 80)
+                logger.warning(" Input file appears to be a binary file.")
+                logger.warning(" ")
+                logger.warning(
+                    " capa currently only supports analyzing binary files containing x86/AMD64 shellcode with IDA."
+                )
+                logger.warning(
+                    " This means the results may be misleading or incomplete if the binary file loaded in IDA is not x86/AMD64."
+                )
+                logger.warning(
+                    " If you don't know the input file type, you can try using the `file` utility to guess it."
+                )
+                logger.warning("-" * 80)
+
+                capa.ida.helpers.inform_user_ida_ui("capa encountered file type warnings during analysis")
+
+            if capa.main.has_file_limitation(rules, capabilities, is_standalone=False):
+                capa.ida.helpers.inform_user_ida_ui("capa encountered file limitation warnings during analysis")
+        except Exception as e:
+            logger.error("Failed to check for file limitations (error: %s)", e)
+            return False
+
+        if ida_kernwin.user_cancelled():
+            logger.info("User cancelled analysis.")
+            return False
+
+        update_wait_box("rendering results")
+
+        try:
+            self.doc = capa.render.convert_capabilities_to_result_document(meta, rules, capabilities)
+            self.model_data.render_capa_doc(self.doc)
+            self.render_capa_doc_mitre_summary()
+            self.enable_controls()
+            self.set_view_status_label("capa rules directory: %s (%d rules)" % (self.rule_path, rule_count))
+        except Exception as e:
+            logger.error("Failed to render results (error: %s)", e)
+            return False
+
+        return True
+
+    def render_capa_doc_mitre_summary(self):
+        """render MITRE ATT&CK results"""
+        tactics = collections.defaultdict(set)
+
+        for rule in rutils.capability_rules(self.doc):
+            if not rule["meta"].get("att&ck"):
+                continue
+
+            for attack in rule["meta"]["att&ck"]:
+                tactic, _, rest = attack.partition("::")
+                if "::" in rest:
+                    technique, _, rest = rest.partition("::")
+                    subtechnique, _, id = rest.rpartition(" ")
+                    tactics[tactic].add((technique, subtechnique, id))
+                else:
+                    technique, _, id = rest.rpartition(" ")
+                    tactics[tactic].add((technique, id))
+
+        column_one = []
+        column_two = []
+
+        for (tactic, techniques) in sorted(tactics.items()):
+            column_one.append(tactic.upper())
+            # add extra space when more than one technique
+            column_one.extend(["" for i in range(len(techniques) - 1)])
+
+            for spec in sorted(techniques):
+                if len(spec) == 2:
+                    technique, id = spec
+                    column_two.append("%s %s" % (technique, id))
+                elif len(spec) == 3:
+                    technique, subtechnique, id = spec
+                    column_two.append("%s::%s %s" % (technique, subtechnique, id))
+                else:
+                    raise RuntimeError("unexpected ATT&CK spec format")
+
+        self.view_attack.setRowCount(max(len(column_one), len(column_two)))
+
+        for (row, value) in enumerate(column_one):
+            self.view_attack.setItem(row, 0, self.render_new_table_header_item(value))
+
+        for (row, value) in enumerate(column_two):
+            self.view_attack.setItem(row, 1, QtWidgets.QTableWidgetItem(value))
+
+        # resize columns to content
+        self.view_attack.resizeColumnsToContents()
+
+    def render_new_table_header_item(self, text):
+        """create new table header item with our style
+
+        @param text: header text to display
+        """
+        item = QtWidgets.QTableWidgetItem(text)
+        item.setForeground(QtGui.QColor(37, 147, 215))
+        font = QtGui.QFont()
+        font.setBold(True)
+        item.setFont(font)
+        return item
+
+    def reset_view_tree(self):
+        """reset tree view UI controls
+
+        called when user selects plugin reset from menu
+        """
+        self.view_limit_results_by_function.setChecked(False)
+        self.view_search_bar.setText("")
+        self.view_tree.reset_ui()
+
+    def slot_analyze(self):
+        """run capa analysis and reload UI controls
+
+        called when user selects plugin reload from menu
+        """
+        self.range_model_proxy.invalidate()
+        self.search_model_proxy.invalidate()
+        self.model_data.reset()
+        self.model_data.clear()
+        self.disable_controls()
+        self.set_view_status_label("Loading...")
+
+        ida_kernwin.show_wait_box("capa explorer")
+        success = self.load_capa_results()
+        ida_kernwin.hide_wait_box()
+
+        self.reset_view_tree()
+
+        if not success:
+            self.set_view_status_label("Click Analyze to get started...")
+            logger.info("Analysis failed.")
+        else:
+            logger.info("Analysis completed.")
+
+    def slot_reset(self, checked):
+        """reset UI elements
+
+        e.g. checkboxes and IDA highlighting
+        """
+        self.model_data.reset()
+        self.reset_view_tree()
+        logger.info("Reset completed.")
+
+    def slot_checkbox_limit_by_changed(self, state):
+        """slot activated if checkbox clicked
+
+        if checked, configure function filter if screen location is located in function, otherwise clear filter
+
+        @param state: checked state
+        """
+        if state == QtCore.Qt.Checked:
+            self.limit_results_to_function(idaapi.get_func(idaapi.get_screen_ea()))
+        else:
+            self.range_model_proxy.reset_address_range_filter()
+
+        self.view_tree.reset_ui()
+
+    def limit_results_to_function(self, f):
+        """add filter to limit results to current function
+
+        adds new address range filter to include function bounds, allowing basic blocks matched within a function
+        to be included in the results
+
+        @param f: (IDA func_t)
+        """
+        if f:
+            self.range_model_proxy.add_address_range_filter(f.start_ea, f.end_ea)
+        else:
+            # if function not exists don't display any results (assume address never -1)
+            self.range_model_proxy.add_address_range_filter(-1, -1)
+
+    def slot_limit_results_to_search(self, text):
+        """limit tree view results to search matches
+
+        reset view after filter to maintain level 1 expansion
+        """
+        self.search_model_proxy.set_query(text)
+        self.view_tree.reset_ui(should_sort=False)
+
+    def ask_user_directory(self):
+        """create Qt dialog to ask user for a directory"""
+        return str(
+            QtWidgets.QFileDialog.getExistingDirectory(
+                self.parent, "Please select a capa rules directory", self.rule_path
+            )
+        )
+
+    def slot_change_rules_dir(self):
+        """allow user to change rules directory
+
+        user selection stored in settings for future runs
+        """
+        rule_path = self.ask_user_directory()
+        if not rule_path:
+            logger.warning("No rule directory selected, nothing to do.")
+            return
+
+        self.rule_path = rule_path
+        settings.user["rule_path"] = rule_path
+
+        if 1 == idaapi.ask_yn(1, "Run analysis now?"):
+            self.slot_analyze()
+
+    def set_view_status_label(self, text):
+        """update status label control
+
+        @param text: updated text
+        """
+        self.view_status_label.setText(text)
+
+    def disable_controls(self):
+        """disable form controls"""
+        self.view_reset_button.setEnabled(False)
+        self.view_tabs.setTabEnabled(0, False)
+        self.view_tabs.setTabEnabled(1, False)
+
+    def enable_controls(self):
+        """enable form controls"""
+        self.view_reset_button.setEnabled(True)
+        self.view_tabs.setTabEnabled(0, True)
+        self.view_tabs.setTabEnabled(1, True)
--- a/capa/ida/plugin/hooks.py
+++ b/capa/ida/plugin/hooks.py
@@ -0,0 +1,60 @@
+# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+
+import idaapi
+
+
+class CapaExplorerIdaHooks(idaapi.UI_Hooks):
+    def __init__(self, screen_ea_changed_hook, action_hooks):
+        """facilitate IDA UI hooks
+
+        @param screen_ea_changed_hook: function hook for IDA screen ea changed
+        @param action_hooks: dict of IDA action handles
+        """
+        super(CapaExplorerIdaHooks, self).__init__()
+
+        self.screen_ea_changed_hook = screen_ea_changed_hook
+        self.process_action_hooks = action_hooks
+        self.process_action_handle = None
+        self.process_action_meta = {}
+
+    def preprocess_action(self, name):
+        """called prior to action completed
+
+        @param name: name of action defined by idagui.cfg
+
+        @retval must be 0
+        """
+        self.process_action_handle = self.process_action_hooks.get(name, None)
+
+        if self.process_action_handle:
+            self.process_action_handle(self.process_action_meta)
+
+        # must return 0 for IDA
+        return 0
+
+    def postprocess_action(self):
+        """called after action completed"""
+        if not self.process_action_handle:
+            return
+
+        self.process_action_handle(self.process_action_meta, post=True)
+        self.reset()
+
+    def screen_ea_changed(self, curr_ea, prev_ea):
+        """called after screen location is changed
+
+        @param curr_ea: current location
+        @param prev_ea: prev location
+        """
+        self.screen_ea_changed_hook(idaapi.get_current_widget(), curr_ea, prev_ea)
+
+    def reset(self):
+        """reset internal state"""
+        self.process_action_handle = None
+        self.process_action_meta.clear()
--- a/capa/ida/plugin/icon.py
+++ b/capa/ida/plugin/icon.py
--- a/capa/ida/explorer/item.py
+++ b/capa/ida/explorer/item.py
@@ -17,9 +17,9 @@ import capa.ida.helpers


 def info_to_name(display):
-    """ extract root value from display name
+    """extract root value from display name

-        e.g. function(my_function) => my_function
+    e.g. function(my_function) => my_function
    """
    try:
        return display.split("(")[1].rstrip(")")
@@ -28,20 +28,21 @@ def info_to_name(display):


 def location_to_hex(location):
-    """ convert location to hex for display """
+    """convert location to hex for display"""
    return "%08X" % location


 class CapaExplorerDataItem(object):
-    """ store data for CapaExplorerDataModel """
+    """store data for CapaExplorerDataModel"""

    def __init__(self, parent, data):
-        """ """
+        """initialize item"""
        self.pred = parent
        self._data = data
        self.children = []
        self._checked = False

+        # default state for item
        self.flags = (
            QtCore.Qt.ItemIsEnabled
            | QtCore.Qt.ItemIsSelectable
@@ -53,117 +54,146 @@ class CapaExplorerDataItem(object):
            self.pred.appendChild(self)

    def setIsEditable(self, isEditable=False):
-        """ modify item flags to be editable or not """
+        """modify item editable flags
+
+        @param isEditable: True, can edit, False cannot edit
+        """
        if isEditable:
            self.flags |= QtCore.Qt.ItemIsEditable
        else:
            self.flags &= ~QtCore.Qt.ItemIsEditable

    def setChecked(self, checked):
-        """ set item as checked """
+        """set item as checked
+
+        @param checked: True, item checked, False item not checked
+        """
        self._checked = checked

    def isChecked(self):
-        """ get item is checked """
+        """get item is checked"""
        return self._checked

    def appendChild(self, item):
-        """ add child item
+        """add a new child to specified item

-            @param item: CapaExplorerDataItem*
+        @param item: CapaExplorerDataItem
        """
        self.children.append(item)

    def child(self, row):
-        """ get child row
+        """get child row

-            @param row: TODO
+        @param row: row number
        """
        return self.children[row]

    def childCount(self):
-        """ get child count """
+        """get child count"""
        return len(self.children)

    def columnCount(self):
-        """ get column count """
+        """get column count"""
        return len(self._data)

    def data(self, column):
-        """ get data at column """
+        """get data at column
+
+        @param: column number
+        """
        try:
            return self._data[column]
        except IndexError:
            return None

    def parent(self):
-        """ get parent """
+        """get parent"""
        return self.pred

    def row(self):
-        """ get row location """
+        """get row location"""
        if self.pred:
            return self.pred.children.index(self)
        return 0

    def setData(self, column, value):
-        """ set data in column """
+        """set data in column
+
+        @param column: column number
+        @value: value to set (assume str)
+        """
        self._data[column] = value

    def children(self):
-        """ yield children """
+        """yield children"""
        for child in self.children:
            yield child

    def removeChildren(self):
-        """ remove children from node """
+        """remove children"""
        del self.children[:]

    def __str__(self):
-        """ get string representation of columns """
+        """get string representation of columns
+
+        used for copy-n-paste operations
+        """
        return " ".join([data for data in self._data if data])

    @property
    def info(self):
-        """ return data stored in information column """
+        """return data stored in information column"""
        return self._data[0]

    @property
    def location(self):
-        """ return data stored in location column """
+        """return data stored in location column"""
        try:
+            # address stored as str, convert to int before return
            return int(self._data[1], 16)
        except ValueError:
            return None

    @property
    def details(self):
-        """ return data stored in details column """
+        """return data stored in details column"""
        return self._data[2]


 class CapaExplorerRuleItem(CapaExplorerDataItem):
-    """ store data relevant to capa function result """
+    """store data for rule result"""

    fmt = "%s (%d matches)"

-    def __init__(self, parent, display, count, source):
-        """ """
-        display = self.fmt % (display, count) if count > 1 else display
-        super(CapaExplorerRuleItem, self).__init__(parent, [display, "", ""])
+    def __init__(self, parent, name, namespace, count, source):
+        """initialize item
+
+        @param parent: parent node
+        @param name: rule name
+        @param namespace: rule namespace
+        @param count: number of match for this rule
+        @param source: rule source (tooltip)
+        """
+        display = self.fmt % (name, count) if count > 1 else name
+        super(CapaExplorerRuleItem, self).__init__(parent, [display, "", namespace])
        self._source = source

    @property
    def source(self):
-        """ return rule contents for display """
+        """return rule source to display (tooltip)"""
        return self._source


 class CapaExplorerRuleMatchItem(CapaExplorerDataItem):
-    """ store data relevant to capa function match result """
+    """store data for rule match"""

    def __init__(self, parent, display, source=""):
-        """ """
+        """initialize item
+
+        @param parent: parent node
+        @param display: text to display in UI
+        @param source: rule match source to display (tooltip)
+        """
        super(CapaExplorerRuleMatchItem, self).__init__(parent, [display, "", ""])
        self._source = source

@@ -174,82 +204,125 @@ class CapaExplorerRuleMatchItem(CapaExplorerDataItem):


 class CapaExplorerFunctionItem(CapaExplorerDataItem):
-    """ store data relevant to capa function result """
+    """store data for function match"""

    fmt = "function(%s)"

    def __init__(self, parent, location):
-        """ """
+        """initialize item
+
+        @param parent: parent node
+        @param location: virtual address of function as seen by IDA
+        """
        super(CapaExplorerFunctionItem, self).__init__(
            parent, [self.fmt % idaapi.get_name(location), location_to_hex(location), ""]
        )

    @property
    def info(self):
-        """ """
+        """return function name"""
        info = super(CapaExplorerFunctionItem, self).info
        display = info_to_name(info)
        return display if display else info

    @info.setter
    def info(self, display):
-        """ """
+        """set function name
+
+        called when user changes function name in plugin UI
+
+        @param display: new function name to display
+        """
        self._data[0] = self.fmt % display


 class CapaExplorerSubscopeItem(CapaExplorerDataItem):
-    """ store data relevant to subscope """
+    """store data for subscope match"""

    fmt = "subscope(%s)"

    def __init__(self, parent, scope):
-        """ """
+        """initialize item
+
+        @param parent: parent node
+        @param scope: subscope name
+        """
        super(CapaExplorerSubscopeItem, self).__init__(parent, [self.fmt % scope, "", ""])


 class CapaExplorerBlockItem(CapaExplorerDataItem):
-    """ store data relevant to capa basic block result """
+    """store data for basic block match"""

    fmt = "basic block(loc_%08X)"

    def __init__(self, parent, location):
-        """ """
+        """initialize item
+
+        @param parent: parent node
+        @param location: virtual address of basic block as seen by IDA
+        """
        super(CapaExplorerBlockItem, self).__init__(parent, [self.fmt % location, location_to_hex(location), ""])


 class CapaExplorerDefaultItem(CapaExplorerDataItem):
-    """ store data relevant to capa default result """
+    """store data for default match e.g. statement (and, or)"""

    def __init__(self, parent, display, details="", location=None):
-        """ """
+        """initialize item
+
+        @param parent: parent node
+        @param display: text to display in UI
+        @param details: text to display in details section of UI
+        @param location: virtual address as seen by IDA
+        """
        location = location_to_hex(location) if location else ""
        super(CapaExplorerDefaultItem, self).__init__(parent, [display, location, details])


 class CapaExplorerFeatureItem(CapaExplorerDataItem):
-    """ store data relevant to capa feature result """
+    """store data for feature match"""

    def __init__(self, parent, display, location="", details=""):
-        """ """
+        """initialize item
+
+        @param parent: parent node
+        @param display: text to display in UI
+        @param details: text to display in details section of UI
+        @param location: virtual address as seen by IDA
+        """
        location = location_to_hex(location) if location else ""
        super(CapaExplorerFeatureItem, self).__init__(parent, [display, location, details])


 class CapaExplorerInstructionViewItem(CapaExplorerFeatureItem):
-    """ store data relevant to an instruction preview """
+    """store data for instruction match"""

    def __init__(self, parent, display, location):
-        """ """
+        """initialize item
+
+        details section shows disassembly view for match
+
+        @param parent: parent node
+        @param display: text to display in UI
+        @param location: virtual address as seen by IDA
+        """
        details = capa.ida.helpers.get_disasm_line(location)
        super(CapaExplorerInstructionViewItem, self).__init__(parent, display, location=location, details=details)
        self.ida_highlight = idc.get_color(location, idc.CIC_ITEM)


 class CapaExplorerByteViewItem(CapaExplorerFeatureItem):
-    """ store data relevant to byte preview """
+    """store data for byte match"""

    def __init__(self, parent, display, location):
-        """ """
+        """initialize item
+
+        details section shows byte preview for match
+
+        @param parent: parent node
+        @param display: text to display in UI
+        @param location: virtual address as seen by IDA
+        """
        byte_snap = idaapi.get_bytes(location, 32)

        if byte_snap:
@@ -266,9 +339,14 @@ class CapaExplorerByteViewItem(CapaExplorerFeatureItem):


 class CapaExplorerStringViewItem(CapaExplorerFeatureItem):
-    """ store data relevant to string preview """
+    """store data for string match"""

-    def __init__(self, parent, display, location):
-        """ """
-        super(CapaExplorerStringViewItem, self).__init__(parent, display, location=location)
+    def __init__(self, parent, display, location, value):
+        """initialize item
+
+        @param parent: parent node
+        @param display: text to display in UI
+        @param location: virtual address as seen by IDA
+        """
+        super(CapaExplorerStringViewItem, self).__init__(parent, display, location=location, details=value)
        self.ida_highlight = idc.get_color(location, idc.CIC_ITEM)
--- a/capa/ida/explorer/model.py
+++ b/capa/ida/explorer/model.py
@@ -9,13 +9,12 @@
 from collections import deque

 import idc
-import six
 import idaapi
-from PyQt5 import Qt, QtGui, QtCore
+from PyQt5 import QtGui, QtCore

 import capa.ida.helpers
 import capa.render.utils as rutils
-from capa.ida.explorer.item import (
+from capa.ida.plugin.item import (
    CapaExplorerDataItem,
    CapaExplorerRuleItem,
    CapaExplorerBlockItem,
@@ -30,11 +29,11 @@ from capa.ida.explorer.item import (
 )

 # default highlight color used in IDA window
-DEFAULT_HIGHLIGHT = 0xD096FF
+DEFAULT_HIGHLIGHT = 0xE6C700


 class CapaExplorerDataModel(QtCore.QAbstractItemModel):
-    """ """
+    """model for displaying hierarchical results return by capa"""

    COLUMN_INDEX_RULE_INFORMATION = 0
    COLUMN_INDEX_VIRTUAL_ADDRESS = 1
@@ -43,14 +42,16 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
    COLUMN_COUNT = 3

    def __init__(self, parent=None):
-        """ """
+        """initialize model"""
        super(CapaExplorerDataModel, self).__init__(parent)
+        # root node does not have parent, contains header columns
        self.root_node = CapaExplorerDataItem(None, ["Rule Information", "Address", "Details"])

    def reset(self):
-        """ """
-        # reset checkboxes and color highlights
-        # TODO: make less hacky
+        """reset UI elements (e.g. checkboxes, IDA color highlights)
+
+        called when view wants to reset UI display
+        """
        for idx in range(self.root_node.childCount()):
            root_index = self.index(idx, 0, QtCore.QModelIndex())
            for model_index in self.iterateChildrenIndexFromRootIndex(root_index, ignore_root=False):
@@ -59,17 +60,20 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
                self.dataChanged.emit(model_index, model_index)

    def clear(self):
-        """ """
+        """clear model data
+
+        called when view wants to clear UI display
+        """
        self.beginResetModel()
        self.root_node.removeChildren()
        self.endResetModel()

    def columnCount(self, model_index):
-        """ get the number of columns for the children of the given parent
+        """return number of columns for the children of the given parent

-            @param model_index: QModelIndex*
+        @param model_index: QModelIndex

-            @retval column count
+        @retval column count
        """
        if model_index.isValid():
            return model_index.internalPointer().columnCount()
@@ -77,12 +81,14 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
            return self.root_node.columnCount()

    def data(self, model_index, role):
-        """ get data stored under the given role for the item referred to by the index
+        """return data stored at given index by display role

-            @param model_index: QModelIndex*
-            @param role: QtCore.Qt.*
+        this function is used to control UI elements (e.g. text font, color, etc.) based on column, item type, etc.

-            @retval data to be displayed
+        @param model_index: QModelIndex
+        @param role: QtCore.Qt.*
+
+        @retval data to be displayed
        """
        if not model_index.isValid():
            return None
@@ -131,14 +137,14 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
            )
            and column == CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION
        ):
-            # set bold font for top-level rules
+            # set bold font for important items
            font = QtGui.QFont()
            font.setBold(True)
            return font

        if role == QtCore.Qt.ForegroundRole and column == CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS:
            # set color for virtual address column
-            return QtGui.QColor(88, 139, 174)
+            return QtGui.QColor(37, 147, 215)

        if (
            role == QtCore.Qt.ForegroundRole
@@ -151,11 +157,11 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
        return None

    def flags(self, model_index):
-        """ get item flags for given index
+        """return item flags for given index

-            @param model_index: QModelIndex*
+        @param model_index: QModelIndex

-            @retval QtCore.Qt.ItemFlags
+        @retval QtCore.Qt.ItemFlags
        """
        if not model_index.isValid():
            return QtCore.Qt.NoItemFlags
@@ -163,13 +169,13 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
        return model_index.internalPointer().flags

    def headerData(self, section, orientation, role):
-        """ get data for the given role and section in the header with the specified orientation
+        """return data for the given role and section in the header with the specified orientation

-            @param section: int
-            @param orientation: QtCore.Qt.Orientation
-            @param role: QtCore.Qt.DisplayRole
+        @param section: int
+        @param orientation: QtCore.Qt.Orientation
+        @param role: QtCore.Qt.DisplayRole

-            @retval header data list()
+        @retval header data
        """
        if orientation == QtCore.Qt.Horizontal and role == QtCore.Qt.DisplayRole:
            return self.root_node.data(section)
@@ -177,13 +183,13 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
        return None

    def index(self, row, column, parent):
-        """ get index of the item in the model specified by the given row, column and parent index
+        """return index of the item by row, column, and parent index

-            @param row: int
-            @param column: int
-            @param parent: QModelIndex*
+        @param row: item row
+        @param column: item column
+        @param parent: QModelIndex of parent

-            @retval QModelIndex*
+        @retval QModelIndex of item
        """
        if not self.hasIndex(row, column, parent):
            return QtCore.QModelIndex()
@@ -201,13 +207,13 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
            return QtCore.QModelIndex()

    def parent(self, model_index):
-        """ get parent of the model item with the given index
+        """return parent index by child index

-            if the item has no parent, an invalid QModelIndex* is returned
+        if the item has no parent, an invalid QModelIndex is returned

-            @param model_index: QModelIndex*
+        @param model_index: QModelIndex of child

-            @retval QModelIndex*
+        @retval QModelIndex of parent
        """
        if not model_index.isValid():
            return QtCore.QModelIndex()
@@ -221,12 +227,12 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
        return self.createIndex(parent.row(), 0, parent)

    def iterateChildrenIndexFromRootIndex(self, model_index, ignore_root=True):
-        """ depth-first traversal of child nodes
+        """depth-first traversal of child nodes

-            @param model_index: QModelIndex*
-            @param ignore_root: if set, do not return root index
+        @param model_index: QModelIndex of starting item
+        @param ignore_root: True, do not yield root index, False yield root index

-            @retval yield QModelIndex*
+        @retval yield QModelIndex
        """
        visited = set()
        stack = deque((model_index,))
@@ -248,10 +254,10 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
                    stack.append(child_index.child(idx, 0))

    def reset_ida_highlighting(self, item, checked):
-        """ reset IDA highlight for an item
+        """reset IDA highlight for item

-            @param item: capa explorer item
-            @param checked: indicates item is or not checked
+        @param item: CapaExplorerDataItem
+        @param checked: True, item checked, False item not checked
        """
        if not isinstance(
            item, (CapaExplorerStringViewItem, CapaExplorerInstructionViewItem, CapaExplorerByteViewItem)
@@ -275,13 +281,11 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
                idc.set_color(item.location, idc.CIC_ITEM, item.ida_highlight)

    def setData(self, model_index, value, role):
-        """ set the role data for the item at index to value
+        """set data at index by role

-            @param model_index: QModelIndex*
-            @param value: QVariant*
-            @param role: QtCore.Qt.EditRole
-
-            @retval True/False
+        @param model_index: QModelIndex of item
+        @param value: value to set
+        @param role: QtCore.Qt.EditRole
        """
        if not model_index.isValid():
            return False
@@ -316,14 +320,13 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
        return False

    def rowCount(self, model_index):
-        """ get the number of rows under the given parent
+        """return number of rows under item by index

-            when the parent is valid it means that is returning the number of
-            children of parent
+        when the parent is valid it means that is returning the number of children of parent

-            @param model_index: QModelIndex*
+        @param model_index: QModelIndex

-            @retval row count
+        @retval row count
        """
        if model_index.column() > 0:
            return 0
@@ -336,24 +339,26 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
        return item.childCount()

    def render_capa_doc_statement_node(self, parent, statement, locations, doc):
-        """ render capa statement read from doc
+        """render capa statement read from doc

-            @param parent: parent to which new child is assigned
-            @param statement: statement read from doc
-            @param locations: locations of children (applies to range only?)
-            @param doc: capa result doc
-
-            "statement": {
-                "type": "or"
-            },
+        @param parent: parent to which new child is assigned
+        @param statement: statement read from doc
+        @param locations: locations of children (applies to range only?)
+        @param doc: result doc
        """
        if statement["type"] in ("and", "or", "optional"):
-            return CapaExplorerDefaultItem(parent, statement["type"])
+            display = statement["type"]
+            if statement.get("description"):
+                display += " (%s)" % statement["description"]
+            return CapaExplorerDefaultItem(parent, display)
        elif statement["type"] == "not":
            # TODO: do we display 'not'
            pass
        elif statement["type"] == "some":
-            return CapaExplorerDefaultItem(parent, statement["count"] + " or more")
+            display = "%d or more" % statement["count"]
+            if statement.get("description"):
+                display += " (%s)" % statement["description"]
+            return CapaExplorerDefaultItem(parent, display)
        elif statement["type"] == "range":
            # `range` is a weird node, its almost a hybrid of statement + feature.
            # it is a specific feature repeated multiple times.
@@ -370,6 +375,9 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
            else:
                display += "between %d and %d" % (statement["min"], statement["max"])

+            if statement.get("description"):
+                display += " (%s)" % statement["description"]
+
            parent2 = CapaExplorerFeatureItem(parent, display=display)

            for location in locations:
@@ -378,33 +386,19 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):

            return parent2
        elif statement["type"] == "subscope":
-            return CapaExplorerSubscopeItem(parent, statement[statement["type"]])
+            display = statement[statement["type"]]
+            if statement.get("description"):
+                display += " (%s)" % statement["description"]
+            return CapaExplorerSubscopeItem(parent, display)
        else:
            raise RuntimeError("unexpected match statement type: " + str(statement))

    def render_capa_doc_match(self, parent, match, doc):
-        """ render capa match read from doc
+        """render capa match read from doc

-            @param parent: parent node to which new child is assigned
-            @param match: match read from doc
-            @param doc: capa result doc
-
-            "matches": {
-                "0": {
-                    "children": [],
-                    "locations": [
-                        4317184
-                    ],
-                    "node": {
-                        "feature": {
-                            "section": ".rsrc",
-                            "type": "section"
-                        },
-                        "type": "feature"
-                    },
-                    "success": true
-                }
-            },
+        @param parent: parent node to which new child is assigned
+        @param match: match read from doc
+        @param doc: result doc
        """
        if not match["success"]:
            # TODO: display failed branches at some point? Help with debugging rules?
@@ -431,15 +425,19 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
            self.render_capa_doc_match(parent2, child, doc)

    def render_capa_doc(self, doc):
-        """ render capa features specified in doc
+        """render capa features specified in doc

-            @param doc: capa result doc
+        @param doc: capa result doc
        """
        # inform model that changes are about to occur
        self.beginResetModel()

        for rule in rutils.capability_rules(doc):
-            parent = CapaExplorerRuleItem(self.root_node, rule["meta"]["name"], len(rule["matches"]), rule["source"])
+            rule_name = rule["meta"]["name"]
+            rule_namespace = rule["meta"].get("namespace")
+            parent = CapaExplorerRuleItem(
+                self.root_node, rule_name, rule_namespace, len(rule["matches"]), rule["source"]
+            )

            for (location, match) in doc["rules"][rule["meta"]["name"]]["matches"].items():
                if rule["meta"]["scope"] == capa.rules.FILE_SCOPE:
@@ -457,18 +455,9 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
        self.endResetModel()

    def capa_doc_feature_to_display(self, feature):
-        """ convert capa doc feature type string to display string for ui
+        """convert capa doc feature type string to display string for ui

-            @param feature: capa feature read from doc
-
-            Example:
-                "feature": {
-                    "bytes": "01 14 02 00 00 00 00 00 C0 00 00 00 00 00 00 46",
-                    "description": "CLSID_ShellLink",
-                    "type": "bytes"
-                }
-
-                bytes(01 14 02 00 00 00 00 00 C0 00 00 00 00 00 00 46 = CLSID_ShellLink)
+        @param feature: capa feature read from doc
        """
        if feature[feature["type"]]:
            if feature.get("description", ""):
@@ -479,25 +468,24 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
            return "%s" % feature["type"]

    def render_capa_doc_feature_node(self, parent, feature, locations, doc):
-        """ process capa doc feature node
+        """process capa doc feature node

-            @param parent: parent node to which child is assigned
-            @param feature: capa doc feature node
-            @param locations: locations identified for feature
-            @param doc: capa doc
-
-            Example:
-              "feature": {
-                "description": "FILE_WRITE_DATA",
-                "number": "0x2",
-                "type": "number"
-              }
+        @param parent: parent node to which child is assigned
+        @param feature: capa doc feature node
+        @param locations: locations identified for feature
+        @param doc: capa doc
        """
        display = self.capa_doc_feature_to_display(feature)

        if len(locations) == 1:
            # only one location for feature so no need to nest children
-            parent2 = self.render_capa_doc_feature(parent, feature, next(iter(locations)), doc, display=display,)
+            parent2 = self.render_capa_doc_feature(
+                parent,
+                feature,
+                next(iter(locations)),
+                doc,
+                display=display,
+            )
        else:
            # feature has multiple children, nest  under one parent feature node
            parent2 = CapaExplorerFeatureItem(parent, display)
@@ -508,27 +496,20 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
        return parent2

    def render_capa_doc_feature(self, parent, feature, location, doc, display="-"):
-        """ render capa feature read from doc
+        """render capa feature read from doc

-            @param parent: parent node to which new child is assigned
-            @param feature: feature read from doc
-            @param doc: capa feature doc
-            @param location: address of feature
-            @param display: text to display in plugin ui
-
-            Example:
-              "feature": {
-                "description": "FILE_WRITE_DATA",
-                "number": "0x2",
-                "type": "number"
-              }
+        @param parent: parent node to which new child is assigned
+        @param feature: feature read from doc
+        @param doc: capa feature doc
+        @param location: address of feature
+        @param display: text to display in plugin UI
        """
        # special handling for characteristic pending type
        if feature["type"] == "characteristic":
            if feature[feature["type"]] in ("embedded pe",):
                return CapaExplorerByteViewItem(parent, display, location)

-            if feature[feature["type"]] in ("loop", "recursive call", "tight loop", "switch"):
+            if feature[feature["type"]] in ("loop", "recursive call", "tight loop"):
                return CapaExplorerFeatureItem(parent, display=display)

            # default to instruction view for all other characteristics
@@ -541,12 +522,22 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
            )

        if feature["type"] == "regex":
-            return CapaExplorerFeatureItem(parent, display, location, details=feature["match"])
+            return CapaExplorerStringViewItem(parent, display, location, feature["match"])

        if feature["type"] == "basicblock":
            return CapaExplorerBlockItem(parent, location)

-        if feature["type"] in ("bytes", "api", "mnemonic", "number", "offset"):
+        if feature["type"] in (
+            "bytes",
+            "api",
+            "mnemonic",
+            "number",
+            "offset",
+            "number/x32",
+            "number/x64",
+            "offset/x32",
+            "offset/x64",
+        ):
            # display instruction preview
            return CapaExplorerInstructionViewItem(parent, display, location)

@@ -556,7 +547,7 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):

        if feature["type"] in ("string",):
            # display string preview
-            return CapaExplorerStringViewItem(parent, display, location)
+            return CapaExplorerStringViewItem(parent, display, location, feature[feature["type"]])

        if feature["type"] in ("import", "export"):
            # display no preview
@@ -565,10 +556,12 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
        raise RuntimeError("unexpected feature type: " + str(feature["type"]))

    def update_function_name(self, old_name, new_name):
-        """ update all instances of old function name with new function name
+        """update all instances of old function name with new function name

-            @param old_name: previous function name
-            @param new_name: new function name
+        called when user updates function name using plugin UI
+
+        @param old_name: old function name
+        @param new_name: new function name
        """
        # create empty root index for search
        root_index = self.index(0, 0, QtCore.QModelIndex())
--- a/capa/ida/plugin/proxy.py
+++ b/capa/ida/plugin/proxy.py
@@ -0,0 +1,226 @@
+# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+import six
+from PyQt5 import QtCore
+from PyQt5.QtCore import Qt
+
+from capa.ida.plugin.model import CapaExplorerDataModel
+
+
+class CapaExplorerRangeProxyModel(QtCore.QSortFilterProxyModel):
+    """filter results based on virtual address range as seen by IDA
+
+    implements filtering for "limit results by current function" checkbox in plugin UI
+
+    minimum and maximum virtual addresses are used to filter results to a specific address range. this allows
+    basic blocks to be included when limiting results to a specific function
+    """
+
+    def __init__(self, parent=None):
+        """initialize proxy filter"""
+        super(CapaExplorerRangeProxyModel, self).__init__(parent)
+        self.min_ea = None
+        self.max_ea = None
+
+    def lessThan(self, left, right):
+        """return True if left item is less than right item, else False
+
+        @param left: QModelIndex of left
+        @param right: QModelIndex of right
+        """
+        ldata = left.internalPointer().data(left.column())
+        rdata = right.internalPointer().data(right.column())
+
+        if (
+            ldata
+            and rdata
+            and left.column() == CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS
+            and left.column() == right.column()
+        ):
+            # convert virtual address before compare
+            return int(ldata, 16) < int(rdata, 16)
+        else:
+            # compare as lowercase
+            return ldata.lower() < rdata.lower()
+
+    def filterAcceptsRow(self, row, parent):
+        """return true if the item in the row indicated by the given row and parent should be included in the model;
+        otherwise return false
+
+        @param row: row number
+        @param parent: QModelIndex of parent
+        """
+        if self.filter_accepts_row_self(row, parent):
+            return True
+
+        alpha = parent
+        while alpha.isValid():
+            if self.filter_accepts_row_self(alpha.row(), alpha.parent()):
+                return True
+            alpha = alpha.parent()
+
+        if self.index_has_accepted_children(row, parent):
+            return True
+
+        return False
+
+    def index_has_accepted_children(self, row, parent):
+        """return True if parent has one or more children that match filter, else False
+
+        @param row: row number
+        @param parent: QModelIndex of parent
+        """
+        model_index = self.sourceModel().index(row, 0, parent)
+
+        if model_index.isValid():
+            for idx in range(self.sourceModel().rowCount(model_index)):
+                if self.filter_accepts_row_self(idx, model_index):
+                    return True
+                if self.index_has_accepted_children(idx, model_index):
+                    return True
+
+        return False
+
+    def filter_accepts_row_self(self, row, parent):
+        """return True if filter accepts row, else False
+
+        @param row: row number
+        @param parent: QModelIndex of parent
+        """
+        # filter not set
+        if self.min_ea is None and self.max_ea is None:
+            return True
+
+        index = self.sourceModel().index(row, 0, parent)
+        data = index.internalPointer().data(CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS)
+
+        # virtual address may be empty
+        if not data:
+            return False
+
+        # convert virtual address str to int
+        ea = int(data, 16)
+
+        if self.min_ea <= ea and ea < self.max_ea:
+            return True
+
+        return False
+
+    def add_address_range_filter(self, min_ea, max_ea):
+        """add new address range filter
+
+        called when user checks "limit results by current function" in plugin UI
+
+        @param min_ea: minimum virtual address as seen by IDA
+        @param max_ea: maximum virtual address as seen by IDA
+        """
+        self.min_ea = min_ea
+        self.max_ea = max_ea
+
+        self.setFilterKeyColumn(CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS)
+        self.invalidateFilter()
+
+    def reset_address_range_filter(self):
+        """remove address range filter (accept all results)
+
+        called when user un-checks "limit results by current function" in plugin UI
+        """
+        self.min_ea = None
+        self.max_ea = None
+        self.invalidateFilter()
+
+
+class CapaExplorerSearchProxyModel(QtCore.QSortFilterProxyModel):
+    """A SortFilterProxyModel that accepts rows with a substring match for a configurable query.
+
+    Looks for matches in the text of all rows.
+    Displays the entire tree row if any of the tree branches,
+     that is, you can filter by rule name, or also
+     filter by "characteristic(nzxor)" to filter matches with some feature.
+    """
+
+    def __init__(self, parent=None):
+        """ """
+        super(CapaExplorerSearchProxyModel, self).__init__(parent)
+        self.query = ""
+        self.setFilterKeyColumn(-1)  # all columns
+
+    def filterAcceptsRow(self, row, parent):
+        """true if the item in the row indicated by the given row and parent
+        should be included in the model; otherwise returns false
+
+        @param row: int
+        @param parent: QModelIndex*
+
+        @retval True/False
+        """
+        # this row matches, accept it
+        if self.filter_accepts_row_self(row, parent):
+            return True
+
+        # the parent of this row matches, accept it
+        alpha = parent
+        while alpha.isValid():
+            if self.filter_accepts_row_self(alpha.row(), alpha.parent()):
+                return True
+            alpha = alpha.parent()
+
+        # this row is a parent, and a child matches, accept it
+        if self.index_has_accepted_children(row, parent):
+            return True
+
+        return False
+
+    def index_has_accepted_children(self, row, parent):
+        """returns True if the given row or its children should be accepted"""
+        source_model = self.sourceModel()
+        model_index = source_model.index(row, 0, parent)
+
+        if model_index.isValid():
+            for idx in range(source_model.rowCount(model_index)):
+                if self.filter_accepts_row_self(idx, model_index):
+                    return True
+                if self.index_has_accepted_children(idx, model_index):
+                    return True
+
+        return False
+
+    def filter_accepts_row_self(self, row, parent):
+        """returns True if the given row should be accepted"""
+        if self.query == "":
+            return True
+
+        source_model = self.sourceModel()
+
+        for column in (
+            CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION,
+            CapaExplorerDataModel.COLUMN_INDEX_VIRTUAL_ADDRESS,
+            CapaExplorerDataModel.COLUMN_INDEX_DETAILS,
+        ):
+            index = source_model.index(row, column, parent)
+            data = source_model.data(index, Qt.DisplayRole)
+
+            if not data:
+                continue
+
+            if not isinstance(data, six.string_types):
+                # sanity check: should already be a string, but double check
+                continue
+
+            # case in-sensitive matching
+            if self.query.lower() in data.lower():
+                return True
+
+        return False
+
+    def set_query(self, query):
+        self.query = query
+        self.invalidateFilter()
+
+    def reset_query(self):
+        self.set_query("")
--- a/capa/ida/explorer/view.py
+++ b/capa/ida/explorer/view.py
@@ -7,25 +7,25 @@
 # See the License for the specific language governing permissions and limitations under the License.

 import idc
-import idaapi
-from PyQt5 import QtGui, QtCore, QtWidgets
+from PyQt5 import QtCore, QtWidgets

-from capa.ida.explorer.item import CapaExplorerRuleItem, CapaExplorerFunctionItem
-from capa.ida.explorer.model import CapaExplorerDataModel
+from capa.ida.plugin.item import CapaExplorerFunctionItem
+from capa.ida.plugin.model import CapaExplorerDataModel
+
+MAX_SECTION_SIZE = 750


 class CapaExplorerQtreeView(QtWidgets.QTreeView):
-    """ capa explorer QTreeView implementation
+    """tree view used to display hierarchical capa results

-        view controls UI action responses and displays data from
-        CapaExplorerDataModel
+    view controls UI action responses and displays data from CapaExplorerDataModel

-        view does not modify CapaExplorerDataModel directly - data
-        modifications should be implemented in CapaExplorerDataModel
+    view does not modify CapaExplorerDataModel directly - data modifications should be implemented
+    in CapaExplorerDataModel
    """

    def __init__(self, model, parent=None):
-        """ initialize CapaExplorerQTreeView """
+        """initialize view"""
        super(CapaExplorerQtreeView, self).__init__(parent)

        self.setModel(model)
@@ -33,6 +33,9 @@ class CapaExplorerQtreeView(QtWidgets.QTreeView):
        self.model = model
        self.parent = parent

+        # control when we resize columns
+        self.should_resize_columns = True
+
        # configure custom UI controls
        self.setContextMenuPolicy(QtCore.Qt.CustomContextMenu)
        self.setExpandsOnDoubleClick(False)
@@ -43,9 +46,12 @@ class CapaExplorerQtreeView(QtWidgets.QTreeView):
        for idx in range(CapaExplorerDataModel.COLUMN_COUNT):
            self.header().setSectionResizeMode(idx, QtWidgets.QHeaderView.Interactive)

+        # disable stretch to enable horizontal scroll for last column, when needed
+        self.header().setStretchLastSection(False)
+
        # connect slots to resize columns when expanded or collapsed
-        self.expanded.connect(self.resize_columns_to_content)
-        self.collapsed.connect(self.resize_columns_to_content)
+        self.expanded.connect(self.slot_resize_columns_to_content)
+        self.collapsed.connect(self.slot_resize_columns_to_content)

        # connect slots
        self.customContextMenuRequested.connect(self.slot_custom_context_menu_requested)
@@ -53,45 +59,75 @@ class CapaExplorerQtreeView(QtWidgets.QTreeView):

        self.setStyleSheet("QTreeView::item {padding-right: 15 px;padding-bottom: 2 px;}")

-    def reset(self):
-        """ reset user interface changes
+    def reset_ui(self, should_sort=True):
+        """reset user interface changes

-            called when view should reset any user interface changes
-            made since the last reset e.g. IDA window highlighting
+        called when view should reset UI display e.g. expand items, resize columns
+
+        @param should_sort: True, sort results after reset, False don't sort results after reset
        """
-        self.collapseAll()
-        self.resize_columns_to_content()
+        if should_sort:
+            self.sortByColumn(CapaExplorerDataModel.COLUMN_INDEX_RULE_INFORMATION, QtCore.Qt.AscendingOrder)

-    def resize_columns_to_content(self):
-        """ reset view columns to contents """
-        self.header().resizeSections(QtWidgets.QHeaderView.ResizeToContents)
+        self.should_resize_columns = False
+        self.expandToDepth(0)
+        self.should_resize_columns = True
+
+        self.slot_resize_columns_to_content()
+
+    def slot_resize_columns_to_content(self):
+        """reset view columns to contents"""
+        if self.should_resize_columns:
+            self.header().resizeSections(QtWidgets.QHeaderView.ResizeToContents)
+
+            # limit size of first section
+            if self.header().sectionSize(0) > MAX_SECTION_SIZE:
+                self.header().resizeSection(0, MAX_SECTION_SIZE)

    def map_index_to_source_item(self, model_index):
-        """ map proxy model index to source model item
+        """map proxy model index to source model item

-            @param model_index: QModelIndex*
+        @param model_index: QModelIndex

-            @retval QObject*
+        @retval QObject
        """
-        return self.model.mapToSource(model_index).internalPointer()
+        # assume that self.model here is either:
+        #  - CapaExplorerDataModel, or
+        #  - QSortFilterProxyModel subclass
+        #
+        # The ProxyModels may be chained,
+        #  so keep resolving the index the CapaExplorerDataModel.
+
+        model = self.model
+        while not isinstance(model, CapaExplorerDataModel):
+            if not model_index.isValid():
+                raise ValueError("invalid index")
+
+            model_index = model.mapToSource(model_index)
+            model = model.sourceModel()
+
+        if not model_index.isValid():
+            raise ValueError("invalid index")
+
+        return model_index.internalPointer()

    def send_data_to_clipboard(self, data):
-        """ copy data to the clipboard
+        """copy data to the clipboard

-            @param data: data to be copied
+        @param data: data to be copied
        """
        clip = QtWidgets.QApplication.clipboard()
        clip.clear(mode=clip.Clipboard)
        clip.setText(data, mode=clip.Clipboard)

    def new_action(self, display, data, slot):
-        """ create action for context menu
+        """create action for context menu

-            @param display: text displayed to user in context menu
-            @param data: data passed to slot
-            @param slot: slot to connect
+        @param display: text displayed to user in context menu
+        @param data: data passed to slot
+        @param slot: slot to connect

-            @retval QAction*
+        @retval QAction
        """
        action = QtWidgets.QAction(display, self.parent)
        action.setData(data)
@@ -100,11 +136,11 @@ class CapaExplorerQtreeView(QtWidgets.QTreeView):
        return action

    def load_default_context_menu_actions(self, data):
-        """ yield actions specific to function custom context menu
+        """yield actions specific to function custom context menu

-            @param data: tuple
+        @param data: tuple

-            @yield QAction*
+        @yield QAction
        """
        default_actions = (
            ("Copy column", data, self.slot_copy_column),
@@ -116,11 +152,11 @@ class CapaExplorerQtreeView(QtWidgets.QTreeView):
            yield self.new_action(*action)

    def load_function_context_menu_actions(self, data):
-        """ yield actions specific to function custom context menu
+        """yield actions specific to function custom context menu

-            @param data: tuple
+        @param data: tuple

-            @yield QAction*
+        @yield QAction
        """
        function_actions = (("Rename function", data, self.slot_rename_function),)

@@ -133,15 +169,15 @@ class CapaExplorerQtreeView(QtWidgets.QTreeView):
            yield action

    def load_default_context_menu(self, pos, item, model_index):
-        """ create default custom context menu
+        """create default custom context menu

-            creates custom context menu containing default actions
+        creates custom context menu containing default actions

-            @param pos: TODO
-            @param item: TODO
-            @param model_index: TODO
+        @param pos: cursor position
+        @param item: CapaExplorerDataItem
+        @param model_index: QModelIndex

-            @retval QMenu*
+        @retval QMenu
        """
        menu = QtWidgets.QMenu()

@@ -151,16 +187,15 @@ class CapaExplorerQtreeView(QtWidgets.QTreeView):
        return menu

    def load_function_item_context_menu(self, pos, item, model_index):
-        """ create function custom context menu
+        """create function custom context menu

-            creates custom context menu containing actions specific to functions
-            and the default actions
+        creates custom context menu with both default actions and function actions

-            @param pos: TODO
-            @param item: TODO
-            @param model_index: TODO
+        @param pos: cursor position
+        @param item: CapaExplorerDataItem
+        @param model_index: QModelIndex

-            @retval QMenu*
+        @retval QMenu
        """
        menu = QtWidgets.QMenu()

@@ -170,43 +205,40 @@ class CapaExplorerQtreeView(QtWidgets.QTreeView):
        return menu

    def show_custom_context_menu(self, menu, pos):
-        """ display custom context menu in view
+        """display custom context menu in view

-            @param menu: TODO
-            @param pos: TODO
+        @param menu: QMenu to display
+        @param pos: cursor position
        """
        if menu:
            menu.exec_(self.viewport().mapToGlobal(pos))

    def slot_copy_column(self, action):
-        """ slot connected to custom context menu
+        """slot connected to custom context menu

-            allows user to select a column and copy the data
-            to clipboard
+        allows user to select a column and copy the data to clipboard

-            @param action: QAction*
+        @param action: QAction
        """
        _, item, model_index = action.data()
        self.send_data_to_clipboard(item.data(model_index.column()))

    def slot_copy_row(self, action):
-        """ slot connected to custom context menu
+        """slot connected to custom context menu

-            allows user to select a row and copy the space-delimited
-            data to clipboard
+        allows user to select a row and copy the space-delimited data to clipboard

-            @param action: QAction*
+        @param action: QAction
        """
        _, item, _ = action.data()
        self.send_data_to_clipboard(str(item))

    def slot_rename_function(self, action):
-        """ slot connected to custom context menu
+        """slot connected to custom context menu

-            allows user to select a edit a function name and push
-            changes to IDA
+        allows user to select a edit a function name and push changes to IDA

-            @param action: QAction*
+        @param action: QAction
        """
        _, item, model_index = action.data()

@@ -216,12 +248,11 @@ class CapaExplorerQtreeView(QtWidgets.QTreeView):
        item.setIsEditable(False)

    def slot_custom_context_menu_requested(self, pos):
-        """ slot connected to custom context menu request
+        """slot connected to custom context menu request

-            displays custom context menu to user containing action
-            relevant to the data item selected
+        displays custom context menu to user containing action relevant to the item selected

-            @param pos: TODO
+        @param pos: cursor position
        """
        model_index = self.indexAt(pos)

@@ -229,6 +260,7 @@ class CapaExplorerQtreeView(QtWidgets.QTreeView):
            return

        item = self.map_index_to_source_item(model_index)
+
        column = model_index.column()
        menu = None

@@ -243,9 +275,11 @@ class CapaExplorerQtreeView(QtWidgets.QTreeView):
        self.show_custom_context_menu(menu, pos)

    def slot_double_click(self, model_index):
-        """ slot connected to double click event
+        """slot connected to double-click event

-            @param model_index: QModelIndex*
+        if address column clicked, navigate IDA to address, else un/expand item clicked
+
+        @param model_index: QModelIndex
        """
        if not model_index.isValid():
            return
--- a/capa/ida/plugin_helpers.py
+++ b/capa/ida/plugin_helpers.py
@@ -1,99 +0,0 @@
-# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
-# Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at: [package root]/LICENSE.txt
-# Unless required by applicable law or agreed to in writing, software distributed under the License
-#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and limitations under the License.
-
-import os
-import logging
-
-import idc
-import idaapi
-from PyQt5.QtCore import Qt
-from PyQt5.QtWidgets import QTreeWidgetItem, QTreeWidgetItemIterator
-
-CAPA_EXTENSION = ".capas"
-
-
-logger = logging.getLogger("capa_ida")
-
-
-def get_input_file(freeze=True):
-    """
-    get input file path
-
-        freeze (bool): if True, get freeze file if it exists
-    """
-    # try original file in same directory as idb/i64 without idb/i64 file extension
-    input_file = idc.get_idb_path()[:-4]
-
-    if freeze:
-        # use frozen file if it exists
-        freeze_file_cand = "%s%s" % (input_file, CAPA_EXTENSION)
-        if os.path.isfile(freeze_file_cand):
-            return freeze_file_cand
-
-    if not os.path.isfile(input_file):
-        # TM naming
-        input_file = "%s.mal_" % idc.get_idb_path()[:-4]
-        if not os.path.isfile(input_file):
-            input_file = idaapi.ask_file(0, "*.*", "Please specify input file.")
-    if not input_file:
-        raise ValueError("could not find input file")
-    return input_file
-
-
-def get_orig_color_feature_vas(vas):
-    orig_colors = {}
-    for va in vas:
-        orig_colors[va] = idc.get_color(va, idc.CIC_ITEM)
-    return orig_colors
-
-
-def reset_colors(orig_colors):
-    if orig_colors:
-        for va, color in orig_colors.iteritems():
-            idc.set_color(va, idc.CIC_ITEM, orig_colors[va])
-
-
-def reset_selection(tree):
-    iterator = QTreeWidgetItemIterator(tree, QTreeWidgetItemIterator.Checked)
-    while iterator.value():
-        item = iterator.value()
-        item.setCheckState(0, Qt.Unchecked)  # column, state
-        iterator += 1
-
-
-def get_disasm_line(va):
-    return idc.generate_disasm_line(va, idc.GENDSM_FORCE_CODE)
-
-
-def get_selected_items(tree, skip_level_1=False):
-    selected = []
-    iterator = QTreeWidgetItemIterator(tree, QTreeWidgetItemIterator.Checked)
-    while iterator.value():
-        item = iterator.value()
-        if skip_level_1:
-            # hacky way to check if item is at level 1, if so, skip
-            # alternative, check if text in disasm column
-            if item.parent() and item.parent().parent() is None:
-                iterator += 1
-                continue
-        if item.text(1):
-            # logger.debug('selected %s, %s', item.text(0), item.text(1))
-            selected.append(int(item.text(1), 0x10))
-        iterator += 1
-    return selected
-
-
-def add_child_item(parent, values, feature=None):
-    child = QTreeWidgetItem(parent)
-    child.setFlags(child.flags() | Qt.ItemIsTristate | Qt.ItemIsUserCheckable)
-    for i, v in enumerate(values):
-        child.setText(i, v)
-        if feature:
-            child.setData(0, 0x100, feature)
-        child.setCheckState(0, Qt.Unchecked)
-    return child
--- a/capa/main.py
+++ b/capa/main.py
@@ -18,6 +18,7 @@ import datetime
 import textwrap
 import collections

+import halo
 import tqdm
 import colorama

@@ -28,7 +29,7 @@ import capa.version
 import capa.features
 import capa.features.freeze
 import capa.features.extractors
-from capa.helpers import oint, get_file_taste
+from capa.helpers import get_file_taste

 RULES_PATH_DEFAULT_STRING = "(embedded rules)"
 SUPPORTED_FILE_MAGIC = set(["MZ"])
@@ -39,8 +40,11 @@ logger = logging.getLogger("capa")

 def set_vivisect_log_level(level):
    logging.getLogger("vivisect").setLevel(level)
+    logging.getLogger("vivisect.base").setLevel(level)
+    logging.getLogger("vivisect.impemu").setLevel(level)
    logging.getLogger("vtrace").setLevel(level)
    logging.getLogger("envi").setLevel(level)
+    logging.getLogger("envi.codeflow").setLevel(level)


 def find_function_capabilities(ruleset, extractor, f):
@@ -68,14 +72,14 @@ def find_function_capabilities(ruleset, extractor, f):
                bb_features[feature].add(va)
                function_features[feature].add(va)

-        _, matches = capa.engine.match(ruleset.basic_block_rules, bb_features, oint(bb))
+        _, matches = capa.engine.match(ruleset.basic_block_rules, bb_features, extractor.block_offset(bb))

        for rule_name, res in matches.items():
            bb_matches[rule_name].extend(res)
            for va, _ in res:
                function_features[capa.features.MatchedRule(rule_name)].add(va)

-    _, function_matches = capa.engine.match(ruleset.function_rules, function_features, oint(f))
+    _, function_matches = capa.engine.match(ruleset.function_rules, function_features, extractor.function_offset(f))
    return function_matches, bb_matches, len(function_features)


@@ -104,12 +108,23 @@ def find_capabilities(ruleset, extractor, disable_progress=None):
    all_function_matches = collections.defaultdict(list)
    all_bb_matches = collections.defaultdict(list)

-    meta = {"feature_counts": {"file": 0, "functions": {},}}
+    meta = {
+        "feature_counts": {
+            "file": 0,
+            "functions": {},
+        }
+    }

-    for f in tqdm.tqdm(extractor.get_functions(), disable=disable_progress, unit=" functions"):
+    pbar = tqdm.tqdm
+    if disable_progress:
+        # do not use tqdm to avoid unnecessary side effects when caller intends
+        # to disable progress completely
+        pbar = lambda s, *args, **kwargs: s
+
+    for f in pbar(list(extractor.get_functions()), desc="matching", unit=" functions"):
        function_matches, bb_matches, feature_count = find_function_capabilities(ruleset, extractor, f)
-        meta["feature_counts"]["functions"][f.__int__()] = feature_count
-        logger.debug("analyzed function 0x%x and extracted %d features", f.__int__(), feature_count)
+        meta["feature_counts"]["functions"][extractor.function_offset(f)] = feature_count
+        logger.debug("analyzed function 0x%x and extracted %d features", extractor.function_offset(f), feature_count)

        for rule_name, res in function_matches.items():
            all_function_matches[rule_name].extend(res)
@@ -269,16 +284,17 @@ def get_workspace(path, format, should_save=True):
    return vw


-def get_extractor_py2(path, format):
+def get_extractor_py2(path, format, disable_progress=False):
    import capa.features.extractors.viv

-    vw = get_workspace(path, format, should_save=False)
+    with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress):
+        vw = get_workspace(path, format, should_save=False)

-    try:
-        vw.saveWorkspace()
-    except IOError:
-        # see #168 for discussion around how to handle non-writable directories
-        logger.info("source directory is not writable, won't save intermediate workspace")
+        try:
+            vw.saveWorkspace()
+        except IOError:
+            # see #168 for discussion around how to handle non-writable directories
+            logger.info("source directory is not writable, won't save intermediate workspace")

    return capa.features.extractors.viv.VivisectFeatureExtractor(vw, path)

@@ -287,19 +303,39 @@ class UnsupportedRuntimeError(RuntimeError):
    pass


-def get_extractor_py3(path, format):
-    raise UnsupportedRuntimeError()
+def get_extractor_py3(path, format, disable_progress=False):
+    if False:  # TODO: How to decide which backend to use?
+        from smda.SmdaConfig import SmdaConfig
+        from smda.Disassembler import Disassembler
+
+        import capa.features.extractors.smda
+
+        smda_report = None
+        with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress):
+            config = SmdaConfig()
+            config.STORE_BUFFER = True
+            smda_disasm = Disassembler(config)
+            smda_report = smda_disasm.disassembleFile(path)
+
+        return capa.features.extractors.smda.SmdaFeatureExtractor(smda_report, path)
+    else:
+        import capa.features.extractors.miasm
+
+        with open(path, "rb") as f:
+            buf = f.read()
+
+        return capa.features.extractors.miasm.MiasmFeatureExtractor(buf)


-def get_extractor(path, format):
+def get_extractor(path, format, disable_progress=False):
    """
    raises:
      UnsupportedFormatError:
    """
    if sys.version_info >= (3, 0):
-        return get_extractor_py3(path, format)
+        return get_extractor_py3(path, format, disable_progress=disable_progress)
    else:
-        return get_extractor_py2(path, format)
+        return get_extractor_py2(path, format, disable_progress=disable_progress)


 def is_nursery_rule_path(path):
@@ -315,7 +351,7 @@ def is_nursery_rule_path(path):
    return "nursery" in path


-def get_rules(rule_path):
+def get_rules(rule_path, disable_progress=False):
    if not os.path.exists(rule_path):
        raise IOError("rule path %s does not exist or cannot be accessed" % rule_path)

@@ -343,7 +379,14 @@ def get_rules(rule_path):
                rule_paths.append(rule_path)

    rules = []
-    for rule_path in rule_paths:
+
+    pbar = tqdm.tqdm
+    if disable_progress:
+        # do not use tqdm to avoid unnecessary side effects when caller intends
+        # to disable progress completely
+        pbar = lambda s, *args, **kwargs: s
+
+    for rule_path in pbar(list(rule_paths), desc="loading ", unit="     rules"):
        try:
            rule = capa.rules.Rule.from_yaml_file(rule_path)
        except capa.rules.InvalidRule:
@@ -438,7 +481,23 @@ def main(argv=None):
    parser = argparse.ArgumentParser(
        description=desc, epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter
    )
-    parser.add_argument("sample", type=str, help="path to sample to analyze")
+
+    if sys.version_info >= (3, 0):
+        parser.add_argument(
+            # Python 3 str handles non-ASCII arguments correctly
+            "sample",
+            type=str,
+            help="path to sample to analyze",
+        )
+    else:
+        parser.add_argument(
+            # in #328 we noticed that the sample path is not handled correctly if it contains non-ASCII characters
+            # https://stackoverflow.com/a/22947334/ offers a solution and decoding using getfilesystemencoding works
+            # in our testing, however other sources suggest `sys.stdin.encoding` (https://stackoverflow.com/q/4012571/)
+            "sample",
+            type=lambda s: s.decode(sys.getfilesystemencoding()),
+            help="path to sample to analyze",
+        )
    parser.add_argument("--version", action="version", version="%(prog)s {:s}".format(capa.version.__version__))
    parser.add_argument(
        "-r",
@@ -485,7 +544,9 @@ def main(argv=None):
    try:
        taste = get_file_taste(args.sample)
    except IOError as e:
-        logger.error("%s", str(e))
+        # per our research there's not a programmatic way to render the IOError with non-ASCII filename unless we
+        # handle the IOError separately and reach into the args
+        logger.error("%s", e.args[0])
        return -1

    # py2 doesn't know about cp65001, which is a variant of utf-8 on windows
@@ -526,9 +587,15 @@ def main(argv=None):
        logger.debug("using rules path: %s", rules_path)

    try:
-        rules = get_rules(rules_path)
+        rules = get_rules(rules_path, disable_progress=args.quiet)
        rules = capa.rules.RuleSet(rules)
-        logger.debug("successfully loaded %s rules", len(rules))
+        logger.debug(
+            "successfully loaded %s rules",
+            # during the load of the RuleSet, we extract subscope statements into their own rules
+            # that are subsequently `match`ed upon. this inflates the total rule count.
+            # so, filter out the subscope rules when reporting total number of loaded rules.
+            len([i for i in filter(lambda r: "capa/subscope-rule" not in r.meta, rules.rules.values())]),
+        )
        if args.tag:
            rules = rules.filter_rules_by_meta(args.tag)
            logger.debug("selected %s rules", len(rules))
@@ -546,7 +613,7 @@ def main(argv=None):
    else:
        format = args.format
        try:
-            extractor = get_extractor(args.sample, args.format)
+            extractor = get_extractor(args.sample, args.format, disable_progress=args.quiet)
        except UnsupportedFormatError:
            logger.error("-" * 80)
            logger.error(" Input file does not appear to be a PE file.")
--- a/capa/render/init.py
+++ b/capa/render/init.py
@@ -16,15 +16,15 @@ import capa.engine

 def convert_statement_to_result_document(statement):
    """
-        "statement": {
-            "type": "or"
-        },
+    "statement": {
+        "type": "or"
+    },

-        "statement": {
-            "max": 9223372036854775808,
-            "min": 2,
-            "type": "range"
-        },
+    "statement": {
+        "max": 9223372036854775808,
+        "min": 2,
+        "type": "range"
+    },
    """
    statement_type = statement.name.lower()
    result = {"type": statement_type}
@@ -47,28 +47,28 @@ def convert_statement_to_result_document(statement):

 def convert_feature_to_result_document(feature):
    """
-        "feature": {
-            "number": 6,
-            "type": "number"
-        },
+    "feature": {
+        "number": 6,
+        "type": "number"
+    },

-        "feature": {
-            "api": "ws2_32.WSASocket",
-            "type": "api"
-        },
+    "feature": {
+        "api": "ws2_32.WSASocket",
+        "type": "api"
+    },

-        "feature": {
-            "match": "create TCP socket",
-            "type": "match"
-        },
+    "feature": {
+        "match": "create TCP socket",
+        "type": "match"
+    },

-        "feature": {
-            "characteristic": [
-                "loop",
-                true
-            ],
-            "type": "characteristic"
-        },
+    "feature": {
+        "characteristic": [
+            "loop",
+            true
+        ],
+        "type": "characteristic"
+    },
    """
    result = {"type": feature.name, feature.name: feature.get_value_str()}
    if feature.description:
@@ -80,15 +80,15 @@ def convert_feature_to_result_document(feature):

 def convert_node_to_result_document(node):
    """
-        "node": {
-            "type": "statement",
-            "statement": { ... }
-        },
+    "node": {
+        "type": "statement",
+        "statement": { ... }
+    },

-        "node": {
-            "type": "feature",
-            "feature": { ... }
-        },
+    "node": {
+        "type": "feature",
+        "feature": { ... }
+    },
    """

    if isinstance(node, capa.engine.Statement):
@@ -152,7 +152,10 @@ def convert_match_to_result_document(rules, capabilities, result):
            scope = rule.meta["scope"]
            doc["node"] = {
                "type": "statement",
-                "statement": {"type": "subscope", "subscope": scope,},
+                "statement": {
+                    "type": "subscope",
+                    "subscope": scope,
+                },
            }

        for location in doc["locations"]:
@@ -257,5 +260,7 @@ class CapaJsonObjectEncoder(json.JSONEncoder):

 def render_json(meta, rules, capabilities):
    return json.dumps(
-        convert_capabilities_to_result_document(meta, rules, capabilities), cls=CapaJsonObjectEncoder, sort_keys=True,
+        convert_capabilities_to_result_document(meta, rules, capabilities),
+        cls=CapaJsonObjectEncoder,
+        sort_keys=True,
    )
--- a/capa/render/default.py
+++ b/capa/render/default.py
@@ -36,6 +36,34 @@ def render_meta(doc, ostream):
    ostream.write("\n")


+def find_subrule_matches(doc):
+    """
+    collect the rule names that have been matched as a subrule match.
+    this way we can avoid displaying entries for things that are too specific.
+    """
+    matches = set([])
+
+    def rec(node):
+        if not node["success"]:
+            # there's probably a bug here for rules that do `not: match: ...`
+            # but we don't have any examples of this yet
+            return
+
+        elif node["node"]["type"] == "statement":
+            for child in node["children"]:
+                rec(child)
+
+        elif node["node"]["type"] == "feature":
+            if node["node"]["feature"]["type"] == "match":
+                matches.add(node["node"]["feature"]["match"])
+
+    for rule in rutils.capability_rules(doc):
+        for node in rule["matches"].values():
+            rec(node)
+
+    return matches
+
+
 def render_capabilities(doc, ostream):
    """
    example::
@@ -48,8 +76,16 @@ def render_capabilities(doc, ostream):
        | ...                                                   | ...                                             |
        +-------------------------------------------------------+-------------------------------------------------+
    """
+    subrule_matches = find_subrule_matches(doc)
+
    rows = []
    for rule in rutils.capability_rules(doc):
+        if rule["meta"]["name"] in subrule_matches:
+            # rules that are also matched by other rules should not get rendered by default.
+            # this cuts down on the amount of output while giving approx the same detail.
+            # see #224
+            continue
+
        count = len(rule["matches"])
        if count == 1:
            capability = rutils.bold(rule["meta"]["name"])
@@ -109,7 +145,12 @@ def render_attack(doc, ostream):
                inner_rows.append("%s::%s %s" % (rutils.bold(technique), subtechnique, id))
            else:
                raise RuntimeError("unexpected ATT&CK spec format")
-        rows.append((rutils.bold(tactic.upper()), "\n".join(inner_rows),))
+        rows.append(
+            (
+                rutils.bold(tactic.upper()),
+                "\n".join(inner_rows),
+            )
+        )

    if rows:
        ostream.write(
@@ -120,6 +161,65 @@ def render_attack(doc, ostream):
        ostream.write("\n")


+def render_mbc(doc, ostream):
+    """
+    example::
+
+        +--------------------------+------------------------------------------------------------+
+        | MBC Objective            | MBC Behavior                                               |
+        |--------------------------+------------------------------------------------------------|
+        | ANTI-BEHAVIORAL ANALYSIS | Virtual Machine Detection::Instruction Testing [B0009.029] |
+        | COLLECTION               | Keylogging::Polling [F0002.002]                            |
+        | COMMUNICATION            | Interprocess Communication::Create Pipe [C0003.001]        |
+        |                          | Interprocess Communication::Write Pipe [C0003.004]         |
+        | IMPACT                   | Remote Access::Reverse Shell [B0022.001]                   |
+        +--------------------------+------------------------------------------------------------+
+    """
+    objectives = collections.defaultdict(set)
+    for rule in rutils.capability_rules(doc):
+        if not rule["meta"].get("mbc"):
+            continue
+
+        mbcs = rule["meta"]["mbc"]
+        if not isinstance(mbcs, list):
+            raise ValueError("invalid rule: MBC mapping is not a list")
+
+        for mbc in mbcs:
+            objective, _, rest = mbc.partition("::")
+            if "::" in rest:
+                behavior, _, rest = rest.partition("::")
+                method, _, id = rest.rpartition(" ")
+                objectives[objective].add((behavior, method, id))
+            else:
+                behavior, _, id = rest.rpartition(" ")
+                objectives[objective].add((behavior, id))
+
+    rows = []
+    for objective, behaviors in sorted(objectives.items()):
+        inner_rows = []
+        for spec in sorted(behaviors):
+            if len(spec) == 2:
+                behavior, id = spec
+                inner_rows.append("%s %s" % (rutils.bold(behavior), id))
+            elif len(spec) == 3:
+                behavior, method, id = spec
+                inner_rows.append("%s::%s %s" % (rutils.bold(behavior), method, id))
+            else:
+                raise RuntimeError("unexpected MBC spec format")
+        rows.append(
+            (
+                rutils.bold(objective.upper()),
+                "\n".join(inner_rows),
+            )
+        )
+
+    if rows:
+        ostream.write(
+            tabulate.tabulate(rows, headers=[width("MBC Objective", 25), width("MBC Behavior", 75)], tablefmt="psql")
+        )
+        ostream.write("\n")
+
+
 def render_default(doc):
    ostream = rutils.StringIO()

@@ -127,6 +227,8 @@ def render_default(doc):
    ostream.write("\n")
    render_attack(doc, ostream)
    ostream.write("\n")
+    render_mbc(doc, ostream)
+    ostream.write("\n")
    render_capabilities(doc, ostream)

    return ostream.getvalue()
--- a/capa/rules.py
+++ b/capa/rules.py
@@ -6,13 +6,20 @@
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.

+import re
 import uuid
 import codecs
 import logging
 import binascii
 import functools

+try:
+    from functools import lru_cache
+except ImportError:
+    from backports.functools_lru_cache import lru_cache
+
 import six
+import yaml
 import ruamel.yaml

 import capa.engine
@@ -25,7 +32,6 @@ from capa.features import MAX_BYTES_FEATURE_SIZE

 logger = logging.getLogger(__name__)

-
 # these are the standard metadata fields, in the preferred order.
 # when reformatted, any custom keys will come after these.
 META_KEYS = (
@@ -69,7 +75,6 @@ SUPPORTED_FEATURES = {
    FUNCTION_SCOPE: {
        # plus basic block scope features, see below
        capa.features.basicblock.BasicBlock,
-        capa.features.Characteristic("switch"),
        capa.features.Characteristic("calls from"),
        capa.features.Characteristic("calls to"),
        capa.features.Characteristic("loop"),
@@ -263,7 +268,7 @@ def parse_description(s, value_type, description=None):
                raise InvalidRule(
                    "unexpected bytes value: byte sequences must be no larger than %s bytes" % MAX_BYTES_FEATURE_SIZE
                )
-        elif value_type in {"number", "offset"}:
+        elif value_type in ("number", "offset") or value_type.startswith(("number/", "offset/")):
            try:
                value = parse_int(value)
            except ValueError:
@@ -272,27 +277,63 @@ def parse_description(s, value_type, description=None):
    return value, description


+def pop_statement_description_entry(d):
+    """
+    extracts the description for statements and removes the description entry from the document
+    a statement can only have one description
+
+    example:
+    the features definition
+      - or:
+        - description: statement description
+        - number: 1
+          description: feature description
+
+    becomes
+      <statement>: [
+        { "description": "statement description" },  <-- extracted here
+        { "number": 1, "description": "feature description" }
+      ]
+    """
+    if not isinstance(d, list):
+        return None
+
+    # identify child of form '{ "description": <description> }'
+    descriptions = list(filter(lambda c: isinstance(c, dict) and len(c) == 1 and "description" in c, d))
+    if len(descriptions) > 1:
+        raise InvalidRule("statements can only have one description")
+
+    if not descriptions:
+        return None
+
+    description = descriptions[0]
+    d.remove(description)
+
+    return description["description"]
+
+
 def build_statements(d, scope):
    if len(d.keys()) > 2:
        raise InvalidRule("too many statements")

    key = list(d.keys())[0]
+    description = pop_statement_description_entry(d[key])
    if key == "and":
-        return And([build_statements(dd, scope) for dd in d[key]], description=d.get("description"))
+        return And([build_statements(dd, scope) for dd in d[key]], description=description)
    elif key == "or":
-        return Or([build_statements(dd, scope) for dd in d[key]], description=d.get("description"))
+        return Or([build_statements(dd, scope) for dd in d[key]], description=description)
    elif key == "not":
        if len(d[key]) != 1:
            raise InvalidRule("not statement must have exactly one child statement")
-        return Not(build_statements(d[key][0], scope), description=d.get("description"))
+        return Not(build_statements(d[key][0], scope), description=description)
    elif key.endswith(" or more"):
        count = int(key[: -len("or more")])
-        return Some(count, [build_statements(dd, scope) for dd in d[key]], description=d.get("description"))
+        return Some(count, [build_statements(dd, scope) for dd in d[key]], description=description)
    elif key == "optional":
        # `optional` is an alias for `0 or more`
        # which is useful for documenting behaviors,
        # like with `write file`, we might say that `WriteFile` is optionally found alongside `CreateFileA`.
-        return Some(0, [build_statements(dd, scope) for dd in d[key]], description=d.get("description"))
+        return Some(0, [build_statements(dd, scope) for dd in d[key]], description=description)

    elif key == "function":
        if scope != FILE_SCOPE:
@@ -351,18 +392,18 @@ def build_statements(d, scope):

        count = d[key]
        if isinstance(count, int):
-            return Range(feature, min=count, max=count, description=d.get("description"))
+            return Range(feature, min=count, max=count, description=description)
        elif count.endswith(" or more"):
            min = parse_int(count[: -len(" or more")])
            max = None
-            return Range(feature, min=min, max=max, description=d.get("description"))
+            return Range(feature, min=min, max=max, description=description)
        elif count.endswith(" or fewer"):
            min = None
            max = parse_int(count[: -len(" or fewer")])
-            return Range(feature, min=min, max=max, description=d.get("description"))
+            return Range(feature, min=min, max=max, description=description)
        elif count.startswith("("):
            min, max = parse_range(count)
-            return Range(feature, min=min, max=max, description=d.get("description"))
+            return Range(feature, min=min, max=max, description=description)
        else:
            raise InvalidRule("unexpected range: %s" % (count))
    elif key == "string" and not isinstance(d[key], six.string_types):
@@ -386,26 +427,6 @@ def second(s):
    return s[1]


-# we use the ruamel.yaml parser because it supports roundtripping of documents with comments.
-yaml = ruamel.yaml.YAML(typ="rt")
-
-
-# use block mode, not inline json-like mode
-yaml.default_flow_style = False
-
-
-# indent lists by two spaces below their parent
-#
-#     features:
-#       - or:
-#         - mnemonic: aesdec
-#         - mnemonic: vaesdec
-yaml.indent(sequence=2, offset=2)
-
-# avoid word wrapping
-yaml.width = 4096
-
-
 class Rule(object):
    def __init__(self, name, scope, statement, meta, definition=""):
        super(Rule, self).__init__()
@@ -534,7 +555,7 @@ class Rule(object):
        return self.statement.evaluate(features)

    @classmethod
-    def from_dict(cls, d, s):
+    def from_dict(cls, d, definition):
        name = d["rule"]["meta"]["name"]
        # if scope is not specified, default to function scope.
        # this is probably the mode that rule authors will start with.
@@ -552,17 +573,65 @@ class Rule(object):
        if scope not in SUPPORTED_FEATURES.keys():
            raise InvalidRule("{:s} is not a supported scope".format(scope))

-        return cls(name, scope, build_statements(statements[0], scope), d["rule"]["meta"], s)
+        return cls(name, scope, build_statements(statements[0], scope), d["rule"]["meta"], definition)
+
+    @staticmethod
+    @lru_cache()
+    def _get_yaml_loader():
+        try:
+            # prefer to use CLoader to be fast, see #306
+            # on Linux, make sure you install libyaml-dev or similar
+            # on Windows, get WHLs from pyyaml.org/pypi
+            loader = yaml.CLoader
+            logger.debug("using libyaml CLoader.")
+        except:
+            loader = yaml.Loader
+            logger.debug("unable to import libyaml CLoader, falling back to Python yaml parser.")
+            logger.debug("this will be slower to load rules.")
+
+        return loader
+
+    @staticmethod
+    def _get_ruamel_yaml_parser():
+        # use ruamel to enable nice formatting
+
+        # we use the ruamel.yaml parser because it supports roundtripping of documents with comments.
+        y = ruamel.yaml.YAML(typ="rt")
+
+        # use block mode, not inline json-like mode
+        y.default_flow_style = False
+
+        # leave quotes unchanged
+        y.preserve_quotes = True
+
+        # indent lists by two spaces below their parent
+        #
+        #     features:
+        #       - or:
+        #         - mnemonic: aesdec
+        #         - mnemonic: vaesdec
+        y.indent(sequence=2, offset=2)
+
+        # avoid word wrapping
+        y.width = 4096
+
+        return y

    @classmethod
-    def from_yaml(cls, s):
-        return cls.from_dict(yaml.load(s), s)
+    def from_yaml(cls, s, use_ruamel=False):
+        if use_ruamel:
+            # ruamel enables nice formatting and doc roundtripping with comments
+            doc = cls._get_ruamel_yaml_parser().load(s)
+        else:
+            # use pyyaml because it can be much faster than ruamel (pure python)
+            doc = yaml.load(s, Loader=cls._get_yaml_loader())
+        return cls.from_dict(doc, s)

    @classmethod
-    def from_yaml_file(cls, path):
+    def from_yaml_file(cls, path, use_ruamel=False):
        with open(path, "rb") as f:
            try:
-                return cls.from_yaml(f.read().decode("utf-8"))
+                return cls.from_yaml(f.read().decode("utf-8"), use_ruamel=use_ruamel)
            except InvalidRule as e:
                raise InvalidRuleWithPath(path, str(e))

@@ -576,12 +645,25 @@ class Rule(object):
        # but not for rule logic.
        # programmatic generation of rules is not yet supported.

-        definition = yaml.load(self.definition)
-        # definition retains a reference to `meta`,
-        # so we're updating that in place.
-        definition["rule"]["meta"] = self.meta
-        meta = self.meta
+        # use ruamel because it supports round tripping.
+        # pyyaml will lose the existing ordering of rule statements.
+        definition = self._get_ruamel_yaml_parser().load(self.definition)

+        # we want to apply any updates that have been made to `meta`.
+        # so we would like to assigned it like this:
+        #
+        #     definition["rule"]["meta"] = self.meta
+        #
+        # however, `self.meta` is not ordered, its just a dict, so subsequent formatting doesn't work.
+        # so, we'll manually copy the keys over, re-using the existing ordereddict/CommentedMap
+        meta = definition["rule"]["meta"]
+        for k in meta.keys():
+            if k not in self.meta:
+                del meta[k]
+        for k, v in self.meta.items():
+            meta[k] = v
+
+        # the name and scope of the rule instance overrides anything in meta.
        meta["name"] = self.name
        meta["scope"] = self.scope

@@ -618,14 +700,43 @@ class Rule(object):
            del meta[key]

        ostream = six.BytesIO()
-        yaml.dump(definition, ostream)
+        self._get_ruamel_yaml_parser().dump(definition, ostream)

        for key, value in hidden_meta.items():
            if value is None:
                continue
            meta[key] = value

-        return ostream.getvalue().decode("utf-8").rstrip("\n") + "\n"
+        doc = ostream.getvalue().decode("utf-8").rstrip("\n") + "\n"
+        # when we have something like:
+        #
+        #     and:
+        #       - string: foo
+        #         description: bar
+        #
+        # we want the `description` horizontally aligned with the start of the `string` (like above).
+        # however, ruamel will give us (which I don't think is even valid yaml):
+        #
+        #     and:
+        #       - string: foo
+        #      description: bar
+        #
+        # tweaking `ruamel.indent()` doesn't quite give us the control we want.
+        # so, add the two extra spaces that we've determined we need through experimentation.
+        # see #263
+        # only do this for the features section, so the meta description doesn't get reformatted
+        # assumes features section always exists
+        features_offset = doc.find("features")
+        doc = doc[:features_offset] + doc[features_offset:].replace("  description:", "    description:")
+
+        # for negative hex numbers, yaml dump outputs:
+        # - offset: !!int '0x-30'
+        # we prefer:
+        # - offset: -0x30
+        # the below regex makes these adjustments and while ugly, we don't have to explore the ruamel.yaml insides
+        doc = re.sub(r"!!int '0x-([0-9a-fA-F]+)'", r"-0x\1", doc)
+
+        return doc


 def get_rules_with_scope(rules, scope):
@@ -774,7 +885,8 @@ class RuleSet(object):
        given a collection of rules, collect the rules that are needed at the given scope.
        these rules are ordered topologically.

-        don't include "lib" rules, unless they are dependencies of other rules.
+        don't include auto-generated "subscope" rules.
+        we want to include general "lib" rules here - even if they are not dependencies of other rules, see #398
        """
        scope_rules = set([])

@@ -783,7 +895,7 @@ class RuleSet(object):
        #  at lower scope, e.g. function scope.
        # so, we find all dependencies of all rules, and later will filter them down.
        for rule in rules:
-            if rule.meta.get("lib", False):
+            if rule.meta.get("capa/subscope-rule", False):
                continue

            scope_rules.update(get_rules_and_dependencies(rules, rule.name))
--- a/capa/version.py
+++ b/capa/version.py
@@ -1 +1 @@
-__version__ = "1.1.0"
+__version__ = "1.4.0"
--- a/doc/img/capa_explorer.png
+++ b/doc/img/capa_explorer.png
--- a/doc/img/ida_plugin_example_1.png
+++ b/doc/img/ida_plugin_example_1.png
--- a/doc/img/ida_plugin_example_2.png
+++ b/doc/img/ida_plugin_example_2.png
--- a/doc/img/ida_plugin_intro.gif
+++ b/doc/img/ida_plugin_intro.gif
--- a/doc/installation.md
+++ b/doc/installation.md
@@ -22,12 +22,15 @@ By default, on MacOS Catalina or greater, Gatekeeper will block execution of the
 ![approve dialog](img/approve.png)

 ## Method 2: Using capa as a Python library
-To install capa as a Python library, you'll need to install a few dependencies, and then use `pip` to fetch the capa module.
-Note: this technique doesn't pull the default rule set, so you should check it out separately from [capa-rules](https://github.com/fireeye/capa-rules/) and pass the directory to the entrypoint using `-r`.
+To install capa as a Python library use `pip` to fetch the `flare-capa` module.
+
+#### *Note*:
+This method is appropriate for integrating capa in an existing project.
+This technique doesn't pull the default rule set, so you should check it out separately from [capa-rules](https://github.com/fireeye/capa-rules/) and pass the directory to the entrypoint using `-r` or set the rules path in the IDA Pro plugin.
+Alternatively, see Method 3 below.

 ### 1. Install capa module
-Second, use `pip` to install the capa module to your local Python environment. This fetches the library code to your computer but does not keep editable source files around for you to hack on. If you'd like to edit the source files, see below.
-`$ pip install https://github.com/fireeye/capa/archive/master.zip`
+Use `pip` to install the capa module to your local Python environment. This fetches the library code to your computer but does not keep editable source files around for you to hack on. If you'd like to edit the source files, see below. `$ pip install flare-capa`

 ### 2. Use capa
 You can now import the `capa` module from a Python script or use the IDA Pro plugins from the `capa/ida` directory. For more information please see the [usage](usage.md) documentation.
@@ -71,8 +74,20 @@ Note that some development dependencies (including the black code formatter) req
 To check the code style, formatting and run the tests you can run the script `scripts/ci.sh`.
 You can run it with the argument `no_tests` to skip the tests and only run the code style and formatting: `scripts/ci.sh no_tests`

-### 3. Setup hooks [optional]
+### 3. Compile binary using PyInstaller
+We compile capa standalone binaries using PyInstaller. To reproduce the build process check out the source code as described above and follow these steps.

+#### Install PyInstaller:
+For Python 2.7: `$ pip install 'pyinstaller==3.*'` (PyInstaller 4 doesn't support Python 2.7)
+
+For Python 3: `$ pip install 'pyinstaller`
+
+#### Run Pyinstaller
+`$ pyinstaller .github/pyinstaller/pyinstaller.spec`
+
+You can find the compiled binary in the created directory `dist/`.
+
+### 4. Setup hooks [optional]
 If you plan to contribute to capa, you may want to setup the hooks.
 Run `scripts/setup-hooks.sh` to set the following hooks up:
 - The `pre-commit` hook runs checks before every `git commit`.
@@ -81,4 +96,3 @@ Run `scripts/setup-hooks.sh` to set the following hooks up:
 - The `pre-push` hook runs checks before every `git push`.
  It runs `scripts/ci.sh` aborting the push if there are code style or rule linter offenses or if the tests fail.
  This way you can ensure everything is alright before sending a pull request.
-
--- a/doc/usage.md
+++ b/doc/usage.md
@@ -4,37 +4,10 @@ See `capa -h` for all supported arguments and usage examples.

 ## tips and tricks

-  - [match only rules by given author or namespace](#only-run-selected-rules)
-  - [IDA Pro capa explorer](#capa-explorer)
-  - [IDA Pro rule generator](#rule-generator)
-
 ### only run selected rules
 Use the `-t` option to run rules with the given metadata value (see the rule fields `rule.meta.*`).
 For example, `capa -t william.ballenthin@mandiant.com` runs rules that reference Willi's email address (probably as the author), or
 `capa -t communication` runs rules with the namespace `communication`.

-### IDA Pro integrations
-You can run capa from within IDA Pro. Run `capa/main.py` via `File - Script file...` (or ALT + F7). 
-When running in IDA, capa uses IDA's disassembly and file analysis as its backend. 
-These results may vary from the standalone version that uses vivisect.
-IDA's analysis is generally a bit faster and more thorough than vivisect's, so you might prefer this mode.
-
-When run under IDA, capa supports both Python 2 and Python 3 interpreters.
-If you encounter issues with your specific setup, please open a new [Issue](https://github.com/fireeye/capa/issues).
-
-Additionally, capa comes with an IDA Pro plugin located in the `capa/ida` directory: the explorer.
-
-#### capa explorer
-The capa explorer allows you to interactively display and browse capabilities capa identified in a binary.
-As you select rules or logic, capa will highlight the addresses that support its analysis conclusions.
-We like to use capa to help find the most interesting parts of a program, such as where the C2 mechanism might be.
-
-![capa explorer](img/capa_explorer.png)
-
-To install the plugin, you'll need to be running IDA Pro 7.4 or 7.5 with either Python 2 or Python 3.
-Next make sure pip commands are run using the Python install that is configured for your IDA install:
-
-  1. Only if running Python 2.7, run command `$ pip install https://github.com/williballenthin/vivisect/zipball/master`
-  2. Run `$ pip install .` from capa root directory
-  3. Open IDA and navigate to `File > Script file…` or `Alt+F7`
-  4. Navigate to `<capa_install_dir>\capa\ida\` and choose `ida_capa_explorer.py`
+### IDA Pro plugin: capa explorer
+Please check out the [capa explorer documentation](/capa/ida/plugin/README.md).
--- a/2
+++ b/2
--- a/scripts/bulk-process.py
+++ b/scripts/bulk-process.py
@@ -0,0 +1,247 @@
+#!/usr/bin/env python
+"""
+bulk-process
+
+Invoke capa recursively against a directory of samples
+and emit a JSON document mapping the file paths to their results.
+
+By default, this will use subprocesses for parallelism.
+Use `-n/--parallelism` to change the subprocess count from
+ the default of current CPU count.
+Use `--no-mp` to use threads instead of processes,
+ which is probably not useful unless you set `--parallelism=1`.
+
+example:
+
+    $ python scripts/bulk-process /tmp/suspicious
+    {
+      "/tmp/suspicious/suspicious.dll_": {
+        "rules": {
+          "encode data using XOR": {
+            "matches": {
+              "268440358": {
+              [...]
+      "/tmp/suspicious/1.dll_": { ... }
+      "/tmp/suspicious/2.dll_": { ... }
+    }
+
+
+usage:
+
+    usage: bulk-process.py [-h] [-r RULES] [-d] [-q] [-n PARALLELISM] [--no-mp]
+                           input
+
+    detect capabilities in programs.
+
+    positional arguments:
+      input                 Path to directory of files to recursively analyze
+
+    optional arguments:
+      -h, --help            show this help message and exit
+      -r RULES, --rules RULES
+                            Path to rule file or directory, use embedded rules by
+                            default
+      -d, --debug           Enable debugging output on STDERR
+      -q, --quiet           Disable all output but errors
+      -n PARALLELISM, --parallelism PARALLELISM
+                            parallelism factor
+      --no-mp               disable subprocesses
+
+Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+You may obtain a copy of the License at: [package root]/LICENSE.txt
+Unless required by applicable law or agreed to in writing, software distributed under the License
+ is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and limitations under the License.
+"""
+import sys
+import json
+import logging
+import os.path
+import argparse
+import multiprocessing
+import multiprocessing.pool
+
+import capa
+import capa.main
+import capa.render
+
+logger = logging.getLogger("capa")
+
+
+def get_capa_results(args):
+    """
+    run capa against the file at the given path, using the given rules.
+
+    args is a tuple, containing:
+      rules (capa.rules.RuleSet): the rules to match
+      format (str): the name of the sample file format
+      path (str): the file system path to the sample to process
+
+    args is a tuple because i'm not quite sure how to unpack multiple arguments using `map`.
+
+    returns an dict with two required keys:
+      path (str): the file system path of the sample to process
+      status (str): either "error" or "ok"
+
+    when status == "error", then a human readable message is found in property "error".
+    when status == "ok", then the capa results are found in the property "ok".
+
+    the capa results are a dictionary with the following keys:
+      meta (dict): the meta analysis results
+      capabilities (dict): the matched capabilities and their result objects
+    """
+    rules, format, path = args
+    logger.info("computing capa results for: %s", path)
+    try:
+        extractor = capa.main.get_extractor(path, format, disable_progress=True)
+    except capa.main.UnsupportedFormatError:
+        # i'm 100% sure if multiprocessing will reliably raise exceptions across process boundaries.
+        # so instead, return an object with explicit success/failure status.
+        #
+        # if success, then status=ok, and results found in property "ok"
+        # if error, then status=error, and human readable message in property "error"
+        return {
+            "path": path,
+            "status": "error",
+            "error": "input file does not appear to be a PE file: %s" % path,
+        }
+    except capa.main.UnsupportedRuntimeError:
+        return {
+            "path": path,
+            "status": "error",
+            "error": "unsupported runtime or Python interpreter",
+        }
+    except Exception as e:
+        return {
+            "path": path,
+            "status": "error",
+            "error": "unexpected error: %s" % (e),
+        }
+
+    meta = capa.main.collect_metadata("", path, "", format, extractor)
+    capabilities, counts = capa.main.find_capabilities(rules, extractor, disable_progress=True)
+    meta["analysis"].update(counts)
+
+    return {
+        "path": path,
+        "status": "ok",
+        "ok": {
+            "meta": meta,
+            "capabilities": capabilities,
+        },
+    }
+
+
+def main(argv=None):
+    if argv is None:
+        argv = sys.argv[1:]
+
+        parser = argparse.ArgumentParser(description="detect capabilities in programs.")
+        parser.add_argument("input", type=str, help="Path to directory of files to recursively analyze")
+        parser.add_argument(
+            "-r",
+            "--rules",
+            type=str,
+            default="(embedded rules)",
+            help="Path to rule file or directory, use embedded rules by default",
+        )
+        parser.add_argument("-d", "--debug", action="store_true", help="Enable debugging output on STDERR")
+        parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
+        parser.add_argument(
+            "-n", "--parallelism", type=int, default=multiprocessing.cpu_count(), help="parallelism factor"
+        )
+        parser.add_argument("--no-mp", action="store_true", help="disable subprocesses")
+        args = parser.parse_args(args=argv)
+
+        if args.quiet:
+            logging.basicConfig(level=logging.ERROR)
+            logging.getLogger().setLevel(logging.ERROR)
+        elif args.debug:
+            logging.basicConfig(level=logging.DEBUG)
+            logging.getLogger().setLevel(logging.DEBUG)
+        else:
+            logging.basicConfig(level=logging.INFO)
+            logging.getLogger().setLevel(logging.INFO)
+
+        # disable vivisect-related logging, it's verbose and not relevant for capa users
+        capa.main.set_vivisect_log_level(logging.CRITICAL)
+
+        # py2 doesn't know about cp65001, which is a variant of utf-8 on windows
+        # tqdm bails when trying to render the progress bar in this setup.
+        # because cp65001 is utf-8, we just map that codepage to the utf-8 codec.
+        # see #380 and: https://stackoverflow.com/a/3259271/87207
+        import codecs
+
+        codecs.register(lambda name: codecs.lookup("utf-8") if name == "cp65001" else None)
+
+        if args.rules == "(embedded rules)":
+            logger.info("using default embedded rules")
+            logger.debug("detected running from source")
+            args.rules = os.path.join(os.path.dirname(__file__), "..", "rules")
+            logger.debug("default rule path (source method): %s", args.rules)
+        else:
+            logger.info("using rules path: %s", args.rules)
+
+        try:
+            rules = capa.main.get_rules(args.rules)
+            rules = capa.rules.RuleSet(rules)
+            logger.info("successfully loaded %s rules", len(rules))
+        except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e:
+            logger.error("%s", str(e))
+            return -1
+
+        samples = []
+        for (base, directories, files) in os.walk(args.input):
+            for file in files:
+                samples.append(os.path.join(base, file))
+
+        def pmap(f, args, parallelism=multiprocessing.cpu_count()):
+            """apply the given function f to the given args using subprocesses"""
+            return multiprocessing.Pool(parallelism).imap(f, args)
+
+        def tmap(f, args, parallelism=multiprocessing.cpu_count()):
+            """apply the given function f to the given args using threads"""
+            return multiprocessing.pool.ThreadPool(parallelism).imap(f, args)
+
+        def map(f, args, parallelism=None):
+            """apply the given function f to the given args in the current thread"""
+            for arg in args:
+                yield f(arg)
+
+        if args.no_mp:
+            if args.parallelism == 1:
+                logger.debug("using current thread mapper")
+                mapper = map
+            else:
+                logger.debug("using threading mapper")
+                mapper = tmap
+        else:
+            logger.debug("using process mapper")
+            mapper = pmap
+
+        results = {}
+        for result in mapper(
+            get_capa_results, [(rules, "pe", sample) for sample in samples], parallelism=args.parallelism
+        ):
+            if result["status"] == "error":
+                logger.warning(result["error"])
+            elif result["status"] == "ok":
+                meta = result["ok"]["meta"]
+                capabilities = result["ok"]["capabilities"]
+                # our renderer expects to emit a json document for a single sample
+                # so we deserialize the json document, store it in a larger dict, and we'll subsequently re-encode.
+                results[result["path"]] = json.loads(capa.render.render_json(meta, rules, capabilities))
+            else:
+                raise ValueError("unexpected status: %s" % (result["status"]))
+
+        print(json.dumps(results))
+
+        logger.info("done.")
+
+        return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/scripts/capa_as_library.py
+++ b/scripts/capa_as_library.py
@@ -0,0 +1,214 @@
+#!/usr/bin/env python3
+
+import json
+import collections
+
+import capa.main
+import capa.rules
+import capa.engine
+import capa.features
+import capa.render.utils as rutils
+from capa.engine import *
+from capa.render import convert_capabilities_to_result_document
+
+# edit this to set the path for file to analyze and rule directory
+RULES_PATH = "/tmp/capa/rules/"
+
+# load rules from disk
+rules = capa.main.get_rules(RULES_PATH, disable_progress=True)
+rules = capa.rules.RuleSet(rules)
+
+# == Render ddictionary helpers
+def render_meta(doc, ostream):
+    ostream["md5"] = doc["meta"]["sample"]["md5"]
+    ostream["sha1"] = doc["meta"]["sample"]["sha1"]
+    ostream["sha256"] = doc["meta"]["sample"]["sha256"]
+    ostream["path"] = doc["meta"]["sample"]["path"]
+
+
+def find_subrule_matches(doc):
+    """
+    collect the rule names that have been matched as a subrule match.
+    this way we can avoid displaying entries for things that are too specific.
+    """
+    matches = set([])
+
+    def rec(node):
+        if not node["success"]:
+            # there's probably a bug here for rules that do `not: match: ...`
+            # but we don't have any examples of this yet
+            return
+
+        elif node["node"]["type"] == "statement":
+            for child in node["children"]:
+                rec(child)
+
+        elif node["node"]["type"] == "feature":
+            if node["node"]["feature"]["type"] == "match":
+                matches.add(node["node"]["feature"]["match"])
+
+    for rule in rutils.capability_rules(doc):
+        for node in rule["matches"].values():
+            rec(node)
+
+    return matches
+
+
+def render_capabilities(doc, ostream):
+    """
+    example::
+        {'CAPABILITY': {'accept command line arguments': 'host-interaction/cli',
+                'allocate thread local storage (2 matches)': 'host-interaction/process',
+                'check for time delay via GetTickCount': 'anti-analysis/anti-debugging/debugger-detection',
+                'check if process is running under wine': 'anti-analysis/anti-emulation/wine',
+                'contain a resource (.rsrc) section': 'executable/pe/section/rsrc',
+                'write file (3 matches)': 'host-interaction/file-system/write'}
+        }
+    """
+    subrule_matches = find_subrule_matches(doc)
+
+    ostream["CAPABILITY"] = dict()
+    for rule in rutils.capability_rules(doc):
+        if rule["meta"]["name"] in subrule_matches:
+            # rules that are also matched by other rules should not get rendered by default.
+            # this cuts down on the amount of output while giving approx the same detail.
+            # see #224
+            continue
+
+        count = len(rule["matches"])
+        if count == 1:
+            capability = rule["meta"]["name"]
+        else:
+            capability = "%s (%d matches)" % (rule["meta"]["name"], count)
+
+        ostream["CAPABILITY"].setdefault(rule["meta"]["namespace"], list())
+        ostream["CAPABILITY"][rule["meta"]["namespace"]].append(capability)
+
+
+def render_attack(doc, ostream):
+    """
+    example::
+        {'ATT&CK': {'COLLECTION': ['Input Capture::Keylogging [T1056.001]'],
+            'DEFENSE EVASION': ['Obfuscated Files or Information [T1027]',
+                                'Virtualization/Sandbox Evasion::System Checks '
+                                '[T1497.001]'],
+            'DISCOVERY': ['File and Directory Discovery [T1083]',
+                          'Query Registry [T1012]',
+                          'System Information Discovery [T1082]'],
+            'EXECUTION': ['Shared Modules [T1129]']}
+        }
+    """
+    ostream["ATTCK"] = dict()
+    tactics = collections.defaultdict(set)
+    for rule in rutils.capability_rules(doc):
+        if not rule["meta"].get("att&ck"):
+            continue
+
+        for attack in rule["meta"]["att&ck"]:
+            tactic, _, rest = attack.partition("::")
+            if "::" in rest:
+                technique, _, rest = rest.partition("::")
+                subtechnique, _, id = rest.rpartition(" ")
+                tactics[tactic].add((technique, subtechnique, id))
+            else:
+                technique, _, id = rest.rpartition(" ")
+                tactics[tactic].add((technique, id))
+
+    for tactic, techniques in sorted(tactics.items()):
+        inner_rows = []
+        for spec in sorted(techniques):
+            if len(spec) == 2:
+                technique, id = spec
+                inner_rows.append("%s %s" % (technique, id))
+            elif len(spec) == 3:
+                technique, subtechnique, id = spec
+                inner_rows.append("%s::%s %s" % (technique, subtechnique, id))
+            else:
+                raise RuntimeError("unexpected ATT&CK spec format")
+        ostream["ATTCK"].setdefault(tactic.upper(), inner_rows)
+
+
+def render_mbc(doc, ostream):
+    """
+    example::
+        {'MBC': {'ANTI-BEHAVIORAL ANALYSIS': ['Debugger Detection::Timing/Delay Check '
+                                      'GetTickCount [B0001.032]',
+                                      'Emulator Detection [B0004]',
+                                      'Virtual Machine Detection::Instruction '
+                                      'Testing [B0009.029]',
+                                      'Virtual Machine Detection [B0009]'],
+         'COLLECTION': ['Keylogging::Polling [F0002.002]'],
+         'CRYPTOGRAPHY': ['Encrypt Data::RC4 [C0027.009]',
+                          'Generate Pseudo-random Sequence::RC4 PRGA '
+                          '[C0021.004]']}
+        }
+    """
+    ostream["MBC"] = dict()
+    objectives = collections.defaultdict(set)
+    for rule in rutils.capability_rules(doc):
+        if not rule["meta"].get("mbc"):
+            continue
+
+        mbcs = rule["meta"]["mbc"]
+        if not isinstance(mbcs, list):
+            raise ValueError("invalid rule: MBC mapping is not a list")
+
+        for mbc in mbcs:
+            objective, _, rest = mbc.partition("::")
+            if "::" in rest:
+                behavior, _, rest = rest.partition("::")
+                method, _, id = rest.rpartition(" ")
+                objectives[objective].add((behavior, method, id))
+            else:
+                behavior, _, id = rest.rpartition(" ")
+                objectives[objective].add((behavior, id))
+
+    for objective, behaviors in sorted(objectives.items()):
+        inner_rows = []
+        for spec in sorted(behaviors):
+            if len(spec) == 2:
+                behavior, id = spec
+                inner_rows.append("%s %s" % (behavior, id))
+            elif len(spec) == 3:
+                behavior, method, id = spec
+                inner_rows.append("%s::%s %s" % (behavior, method, id))
+            else:
+                raise RuntimeError("unexpected MBC spec format")
+        ostream["MBC"].setdefault(objective.upper(), inner_rows)
+
+
+def render_dictionary(doc):
+    ostream = dict()
+    render_meta(doc, ostream)
+    render_attack(doc, ostream)
+    render_mbc(doc, ostream)
+    render_capabilities(doc, ostream)
+
+    return ostream
+
+
+# ==== render dictionary helpers
+def capa_details(file_path, output_format="dictionary"):
+
+    # extract features and find capabilities
+    extractor = capa.main.get_extractor(file_path, "auto", disable_progress=True)
+    capabilities, counts = capa.main.find_capabilities(rules, extractor, disable_progress=True)
+
+    # collect metadata (used only to make rendering more complete)
+    meta = capa.main.collect_metadata("", file_path, RULES_PATH, "auto", extractor)
+    meta["analysis"].update(counts)
+
+    capa_output = False
+    if output_format == "dictionary":
+        # ...as python dictionary, simplified as textable but in dictionary
+        doc = convert_capabilities_to_result_document(meta, rules, capabilities)
+        capa_output = render_dictionary(doc)
+    elif output_format == "json":
+        # render results
+        # ...as json
+        capa_output = json.loads(capa.render.render_json(meta, rules, capabilities))
+    elif output_format == "texttable":
+        # ...as human readable text table
+        capa_output = capa.render.render_default(meta, rules, capabilities)
+
+    return capa_output
--- a/scripts/capafmt.py
+++ b/scripts/capafmt.py
@@ -38,6 +38,12 @@ def main(argv=None):
    )
    parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging")
    parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
+    parser.add_argument(
+        "-c",
+        "--check",
+        action="store_true",
+        help="Don't output (reformatted) rule, only return status. 0 = no changes, 1 = would reformat",
+    )
    args = parser.parse_args(args=argv)

    if args.verbose:
@@ -50,12 +56,22 @@ def main(argv=None):
    logging.basicConfig(level=level)
    logging.getLogger("capafmt").setLevel(level)

-    rule = capa.rules.Rule.from_yaml_file(args.path)
+    rule = capa.rules.Rule.from_yaml_file(args.path, use_ruamel=True)
+    reformatted_rule = rule.to_yaml()
+
+    if args.check:
+        if rule.definition == reformatted_rule:
+            logger.info("rule is formatted correctly, nice! (%s)", rule.name)
+            return 0
+        else:
+            logger.info("rule requires reformatting (%s)", rule.name)
+            return 1
+
    if args.in_place:
        with open(args.path, "wb") as f:
-            f.write(rule.to_yaml().encode("utf-8"))
+            f.write(reformatted_rule.encode("utf-8"))
    else:
-        print(rule.to_yaml().rstrip("\n"))
+        print(reformatted_rule)

    return 0

--- a/scripts/import-to-bn.py
+++ b/scripts/import-to-bn.py
@@ -1,112 +1,111 @@
-"""
-Binary Ninja plugin that imports a capa report,
-produced via `capa --json /path/to/sample`,
-into the current database.
-
-It will mark up functions with their capa matches, like:
-
-    ; capa: print debug messages (host-interaction/log/debug/write-event)
-    ; capa: delete service (host-interaction/service/delete)
-    ; Attributes: bp-based frame
-
-    public UninstallService
-    UninstallService proc near
-    ...
-
-To use, invoke from the Binary Ninja Tools menu, or from the 
-command-palette.
-
-Adapted for Binary Ninja by @psifertex
-
-This script will verify that the report matches the workspace.
-Check the log window for any errors, and/or the summary of changes.
-
-Derived from: https://github.com/fireeye/capa/blob/master/scripts/import-to-ida.py
-"""
-import os
-import json
-
-from binaryninja import *
-
-
-def append_func_cmt(bv, va, cmt):
-    """
-    add the given comment to the given function, 
-    if it doesn't already exist.
-    """
-    func = bv.get_function_at(va)
-    if not func:
-        raise ValueError("not a function")
-
-    if cmt in func.comment:
-        return
-
-    func.comment = func.comment + "\n" + cmt
-
-
-def load_analysis(bv):
-    shortname = os.path.splitext(os.path.basename(bv.file.filename))[0]
-    dirname = os.path.dirname(bv.file.filename)
-    log_info(f"dirname: {dirname}\nshortname: {shortname}\n")
-    if os.access(os.path.join(dirname, shortname + ".js"), os.R_OK):
-        path = os.path.join(dirname, shortname + ".js")
-    elif os.access(os.path.join(dirname, shortname + ".json"), os.R_OK):
-        path = os.path.join(dirname, shortname + ".json")
-    else:
-        path = interaction.get_open_filename_input("capa report:", "JSON (*.js *.json);;All Files (*)")
-    if not path or not os.access(path, os.R_OK):
-        log_error("Invalid filename.")
-        return 0
-    log_info("Using capa file %s" % path)
-
-    with open(path, "rb") as f:
-        doc = json.loads(f.read().decode("utf-8"))
-
-    if "meta" not in doc or "rules" not in doc:
-        log_error("doesn't appear to be a capa report")
-        return -1
-
-    a = doc["meta"]["sample"]["md5"].lower()
-    md5 = Transform["MD5"]
-    rawhex = Transform["RawHex"]
-    b = rawhex.encode(md5.encode(bv.parent_view.read(bv.parent_view.start, bv.parent_view.end))).decode("utf-8")
-    if not a == b:
-        log_error("sample mismatch")
-        return -2
-
-    rows = []
-    for rule in doc["rules"].values():
-        if rule["meta"].get("lib"):
-            continue
-        if rule["meta"].get("capa/subscope"):
-            continue
-        if rule["meta"]["scope"] != "function":
-            continue
-
-        name = rule["meta"]["name"]
-        ns = rule["meta"].get("namespace", "")
-        for va in rule["matches"].keys():
-            va = int(va)
-            rows.append((ns, name, va))
-
-    # order by (namespace, name) so that like things show up together
-    rows = sorted(rows)
-    for ns, name, va in rows:
-        if ns:
-            cmt = "%s (%s)" % (name, ns)
-        else:
-            cmt = "%s" % (name,)
-
-        log_info("0x%x: %s" % (va, cmt))
-        try:
-            # message will look something like:
-            #
-            #     capa: delete service (host-interaction/service/delete)
-            append_func_cmt(bv, va, "capa: " + cmt)
-        except ValueError:
-            continue
-
-    log_info("ok")
-
-
-PluginCommand.register("Load capa file", "Loads an analysis file from capa", load_analysis)
+"""
+Binary Ninja plugin that imports a capa report,
+produced via `capa --json /path/to/sample`,
+into the current database.
+
+It will mark up functions with their capa matches, like:
+
+    ; capa: print debug messages (host-interaction/log/debug/write-event)
+    ; capa: delete service (host-interaction/service/delete)
+    ; Attributes: bp-based frame
+
+    public UninstallService
+    UninstallService proc near
+    ...
+
+To use, invoke from the Binary Ninja Tools menu, or from the command-palette.
+
+Adapted for Binary Ninja by @psifertex
+
+This script will verify that the report matches the workspace.
+Check the log window for any errors, and/or the summary of changes.
+
+Derived from: https://github.com/fireeye/capa/blob/master/scripts/import-to-ida.py
+"""
+import os
+import json
+
+from binaryninja import *
+
+
+def append_func_cmt(bv, va, cmt):
+    """
+    add the given comment to the given function,
+    if it doesn't already exist.
+    """
+    func = bv.get_function_at(va)
+    if not func:
+        raise ValueError("not a function")
+
+    if cmt in func.comment:
+        return
+
+    func.comment = func.comment + "\n" + cmt
+
+
+def load_analysis(bv):
+    shortname = os.path.splitext(os.path.basename(bv.file.filename))[0]
+    dirname = os.path.dirname(bv.file.filename)
+    log_info(f"dirname: {dirname}\nshortname: {shortname}\n")
+    if os.access(os.path.join(dirname, shortname + ".js"), os.R_OK):
+        path = os.path.join(dirname, shortname + ".js")
+    elif os.access(os.path.join(dirname, shortname + ".json"), os.R_OK):
+        path = os.path.join(dirname, shortname + ".json")
+    else:
+        path = interaction.get_open_filename_input("capa report:", "JSON (*.js *.json);;All Files (*)")
+    if not path or not os.access(path, os.R_OK):
+        log_error("Invalid filename.")
+        return 0
+    log_info("Using capa file %s" % path)
+
+    with open(path, "rb") as f:
+        doc = json.loads(f.read().decode("utf-8"))
+
+    if "meta" not in doc or "rules" not in doc:
+        log_error("doesn't appear to be a capa report")
+        return -1
+
+    a = doc["meta"]["sample"]["md5"].lower()
+    md5 = Transform["MD5"]
+    rawhex = Transform["RawHex"]
+    b = rawhex.encode(md5.encode(bv.parent_view.read(bv.parent_view.start, bv.parent_view.end))).decode("utf-8")
+    if not a == b:
+        log_error("sample mismatch")
+        return -2
+
+    rows = []
+    for rule in doc["rules"].values():
+        if rule["meta"].get("lib"):
+            continue
+        if rule["meta"].get("capa/subscope"):
+            continue
+        if rule["meta"]["scope"] != "function":
+            continue
+
+        name = rule["meta"]["name"]
+        ns = rule["meta"].get("namespace", "")
+        for va in rule["matches"].keys():
+            va = int(va)
+            rows.append((ns, name, va))
+
+    # order by (namespace, name) so that like things show up together
+    rows = sorted(rows)
+    for ns, name, va in rows:
+        if ns:
+            cmt = "%s (%s)" % (name, ns)
+        else:
+            cmt = "%s" % (name,)
+
+        log_info("0x%x: %s" % (va, cmt))
+        try:
+            # message will look something like:
+            #
+            #     capa: delete service (host-interaction/service/delete)
+            append_func_cmt(bv, va, "capa: " + cmt)
+        except ValueError:
+            continue
+
+    log_info("ok")
+
+
+PluginCommand.register("Load capa file", "Loads an analysis file from capa", load_analysis)
--- a/scripts/import-to-ida.py
+++ b/scripts/import-to-ida.py
@@ -1,117 +1,118 @@
-"""
-IDA Pro script that imports a capa report,
-produced via `capa --json /path/to/sample`,
-into the current database.
-
-It will mark up functions with their capa matches, like:
-
-    ; capa: print debug messages (host-interaction/log/debug/write-event)
-    ; capa: delete service (host-interaction/service/delete)
-    ; Attributes: bp-based frame
-
-    public UninstallService
-    UninstallService proc near
-    ...
-
-To use, invoke from the IDA Pro scripting dialog,
-such as via Alt-F9,
-and then select the existing capa report from the file system.
-
-This script will verify that the report matches the workspace.
-Check the output window for any errors, and/or the summary of changes.
-
-Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
-You may obtain a copy of the License at: [package root]/LICENSE.txt
-Unless required by applicable law or agreed to in writing, software distributed under the License
- is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and limitations under the License.
-"""
-import json
-import logging
-
-import idc
-import idautils
-import ida_idaapi
-import ida_kernwin
-
-logger = logging.getLogger("capa")
-
-
-def append_func_cmt(va, cmt, repeatable=False):
-    """
-    add the given comment to the given function, 
-    if it doesn't already exist.
-    """
-    func = ida_funcs.get_func(va)
-    if not func:
-        raise ValueError("not a function")
-
-    existing = ida_funcs.get_func_cmt(func, repeatable) or ""
-    if cmt in existing:
-        return
-
-    new = existing + "\n" + cmt
-    ida_funcs.set_func_cmt(func, new, repeatable)
-
-
-def main():
-    path = ida_kernwin.ask_file(False, "*", "capa report")
-    if not path:
-        return 0
-
-    with open(path, "rb") as f:
-        doc = json.loads(f.read().decode("utf-8"))
-
-    if "meta" not in doc or "rules" not in doc:
-        logger.error("doesn't appear to be a capa report")
-        return -1
-
-    # in IDA 7.4, the MD5 hash may be truncated, for example:
-    # wanted: 84882c9d43e23d63b82004fae74ebb61
-    # found: b'84882C9D43E23D63B82004FAE74EBB6\x00'
-    #
-    # see: https://github.com/idapython/bin/issues/11
-    a = doc["meta"]["sample"]["md5"].lower()
-    b = idautils.GetInputFileMD5().decode("ascii").lower().rstrip("\x00")
-    if not a.startswith(b):
-        logger.error("sample mismatch")
-        return -2
-
-    rows = []
-    for rule in doc["rules"].values():
-        if rule["meta"].get("lib"):
-            continue
-        if rule["meta"].get("capa/subscope"):
-            continue
-        if rule["meta"]["scope"] != "function":
-            continue
-
-        name = rule["meta"]["name"]
-        ns = rule["meta"].get("namespace", "")
-        for va in rule["matches"].keys():
-            va = int(va)
-            rows.append((ns, name, va))
-
-    # order by (namespace, name) so that like things show up together
-    rows = sorted(rows)
-    for ns, name, va in rows:
-        if ns:
-            cmt = "%s (%s)" % (name, ns)
-        else:
-            cmt = "%s" % (name,)
-
-        logger.info("0x%x: %s", va, cmt)
-        try:
-            # message will look something like:
-            #
-            #     capa: delete service (host-interaction/service/delete)
-            append_func_cmt(va, "capa: " + cmt, repeatable=False)
-        except ValueError:
-            continue
-
-    logger.info("ok")
-
-
-main()
+"""
+IDA Pro script that imports a capa report,
+produced via `capa --json /path/to/sample`,
+into the current database.
+
+It will mark up functions with their capa matches, like:
+
+    ; capa: print debug messages (host-interaction/log/debug/write-event)
+    ; capa: delete service (host-interaction/service/delete)
+    ; Attributes: bp-based frame
+
+    public UninstallService
+    UninstallService proc near
+    ...
+
+To use, invoke from the IDA Pro scripting dialog,
+such as via Alt-F9,
+and then select the existing capa report from the file system.
+
+This script will verify that the report matches the workspace.
+Check the output window for any errors, and/or the summary of changes.
+
+Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+You may obtain a copy of the License at: [package root]/LICENSE.txt
+Unless required by applicable law or agreed to in writing, software distributed under the License
+ is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and limitations under the License.
+"""
+import json
+import logging
+
+import idc
+import idautils
+import ida_funcs
+import ida_idaapi
+import ida_kernwin
+
+logger = logging.getLogger("capa")
+
+
+def append_func_cmt(va, cmt, repeatable=False):
+    """
+    add the given comment to the given function,
+    if it doesn't already exist.
+    """
+    func = ida_funcs.get_func(va)
+    if not func:
+        raise ValueError("not a function")
+
+    existing = ida_funcs.get_func_cmt(func, repeatable) or ""
+    if cmt in existing:
+        return
+
+    new = existing + "\n" + cmt
+    ida_funcs.set_func_cmt(func, new, repeatable)
+
+
+def main():
+    path = ida_kernwin.ask_file(False, "*", "capa report")
+    if not path:
+        return 0
+
+    with open(path, "rb") as f:
+        doc = json.loads(f.read().decode("utf-8"))
+
+    if "meta" not in doc or "rules" not in doc:
+        logger.error("doesn't appear to be a capa report")
+        return -1
+
+    # in IDA 7.4, the MD5 hash may be truncated, for example:
+    # wanted: 84882c9d43e23d63b82004fae74ebb61
+    # found: b'84882C9D43E23D63B82004FAE74EBB6\x00'
+    #
+    # see: https://github.com/idapython/bin/issues/11
+    a = doc["meta"]["sample"]["md5"].lower()
+    b = idautils.GetInputFileMD5().decode("ascii").lower().rstrip("\x00")
+    if not a.startswith(b):
+        logger.error("sample mismatch")
+        return -2
+
+    rows = []
+    for rule in doc["rules"].values():
+        if rule["meta"].get("lib"):
+            continue
+        if rule["meta"].get("capa/subscope"):
+            continue
+        if rule["meta"]["scope"] != "function":
+            continue
+
+        name = rule["meta"]["name"]
+        ns = rule["meta"].get("namespace", "")
+        for va in rule["matches"].keys():
+            va = int(va)
+            rows.append((ns, name, va))
+
+    # order by (namespace, name) so that like things show up together
+    rows = sorted(rows)
+    for ns, name, va in rows:
+        if ns:
+            cmt = "%s (%s)" % (name, ns)
+        else:
+            cmt = "%s" % (name,)
+
+        logger.info("0x%x: %s", va, cmt)
+        try:
+            # message will look something like:
+            #
+            #     capa: delete service (host-interaction/service/delete)
+            append_func_cmt(va, "capa: " + cmt, repeatable=False)
+        except ValueError:
+            continue
+
+    logger.info("ok")
+
+
+main()
--- a/scripts/lint.py
+++ b/scripts/lint.py
@@ -15,7 +15,9 @@ See the License for the specific language governing permissions and limitations
 """
 import os
 import sys
+import time
 import string
+import difflib
 import hashlib
 import logging
 import os.path
@@ -24,6 +26,7 @@ import itertools
 import posixpath

 import capa.main
+import capa.rules
 import capa.engine
 import capa.features
 import capa.features.insn
@@ -194,7 +197,7 @@ class DoesntMatchExample(Lint):
                continue

            try:
-                extractor = capa.main.get_extractor(path, "auto")
+                extractor = capa.main.get_extractor(path, "auto", disable_progress=True)
                capabilities, meta = capa.main.find_capabilities(ctx["rules"], extractor, disable_progress=True)
            except Exception as e:
                logger.error("failed to extract capabilities: %s %s %s", rule.name, path, e)
@@ -232,7 +235,7 @@ class LibRuleNotInLibDirectory(Lint):
        if "lib" not in rule.meta:
            return False

-        return "/lib/" not in get_normpath(rule.meta["capa/path"])
+        return "lib/" not in get_normpath(rule.meta["capa/path"])


 class LibRuleHasNamespace(Lint):
@@ -276,6 +279,32 @@ class FeatureNegativeNumber(Lint):
        return False


+class FormatSingleEmptyLineEOF(Lint):
+    name = "EOF format"
+    recommendation = "end file with a single empty line"
+
+    def check_rule(self, ctx, rule):
+        if rule.definition.endswith("\n") and not rule.definition.endswith("\n\n"):
+            return False
+        return True
+
+
+class FormatIncorrect(Lint):
+    name = "rule format incorrect"
+    recommendation_template = "use scripts/capafmt.py or adjust as follows\n{:s}"
+
+    def check_rule(self, ctx, rule):
+        actual = rule.definition
+        expected = capa.rules.Rule.from_yaml(rule.definition, use_ruamel=True).to_yaml()
+
+        if actual != expected:
+            diff = difflib.ndiff(actual.splitlines(1), expected.splitlines(1))
+            self.recommendation = self.recommendation_template.format("".join(diff))
+            return True
+
+        return False
+
+
 def run_lints(lints, ctx, rule):
    for lint in lints:
        if lint.check_rule(ctx, rule):
@@ -331,15 +360,25 @@ FEATURE_LINTS = (
 )


-def get_normpath(path):
-    return posixpath.normpath(path).replace(os.sep, "/")
-
-
 def lint_features(ctx, rule):
    features = get_features(ctx, rule)
    return run_feature_lints(FEATURE_LINTS, ctx, features)


+FORMAT_LINTS = (
+    FormatSingleEmptyLineEOF(),
+    FormatIncorrect(),
+)
+
+
+def lint_format(ctx, rule):
+    return run_lints(FORMAT_LINTS, ctx, rule)
+
+
+def get_normpath(path):
+    return posixpath.normpath(path).replace(os.sep, "/")
+
+
 def get_features(ctx, rule):
    # get features from rule and all dependencies including subscopes and matched rules
    features = []
@@ -390,6 +429,7 @@ def lint_rule(ctx, rule):
            lint_meta(ctx, rule),
            lint_logic(ctx, rule),
            lint_features(ctx, rule),
+            lint_format(ctx, rule),
        )
    )

@@ -399,7 +439,11 @@ def lint_rule(ctx, rule):
        print("")
        print(
            "%s%s %s"
-            % ("    (nursery) " if is_nursery_rule(rule) else "", rule.name, ("(%s)" % category) if category else "",)
+            % (
+                "    (nursery) " if is_nursery_rule(rule) else "",
+                rule.name,
+                ("(%s)" % category) if category else "",
+            )
        )

        level = "WARN" if is_nursery_rule(rule) else "FAIL"
@@ -407,7 +451,12 @@ def lint_rule(ctx, rule):
        for violation in violations:
            print(
                "%s  %s: %s: %s"
-                % ("    " if is_nursery_rule(rule) else "", level, violation.name, violation.recommendation,)
+                % (
+                    "    " if is_nursery_rule(rule) else "",
+                    level,
+                    violation.name,
+                    violation.recommendation,
+                )
            )

    elif len(violations) == 0 and is_nursery_rule(rule):
@@ -487,8 +536,11 @@ def main(argv=None):
    parser.add_argument("rules", type=str, help="Path to rules")
    parser.add_argument("--samples", type=str, default=samples_path, help="Path to samples")
    parser.add_argument(
-        "--thorough", action="store_true", help="Enable thorough linting - takes more time, but does a better job",
+        "--thorough",
+        action="store_true",
+        help="Enable thorough linting - takes more time, but does a better job",
    )
+    parser.add_argument("-t", "--tag", type=str, help="filter on rule meta field values")
    parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging")
    parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
    args = parser.parse_args(args=argv)
@@ -505,15 +557,20 @@ def main(argv=None):

    capa.main.set_vivisect_log_level(logging.CRITICAL)
    logging.getLogger("capa").setLevel(logging.CRITICAL)
+    logging.getLogger("viv_utils").setLevel(logging.CRITICAL)
+
+    time0 = time.time()

    try:
-        rules = capa.main.get_rules(args.rules)
+        rules = capa.main.get_rules(args.rules, disable_progress=True)
        rules = capa.rules.RuleSet(rules)
        logger.info("successfully loaded %s rules", len(rules))
-    except IOError as e:
-        logger.error("%s", str(e))
-        return -1
-    except capa.rules.InvalidRule as e:
+        if args.tag:
+            rules = rules.filter_rules_by_meta(args.tag)
+            logger.debug("selected %s rules", len(rules))
+            for i, r in enumerate(rules.rules, 1):
+                logger.debug(" %d. %s", i, r)
+    except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e:
        logger.error("%s", str(e))
        return -1

@@ -531,6 +588,10 @@ def main(argv=None):
    }

    did_violate = lint(ctx, rules)
+
+    min, sec = divmod(time.time() - time0, 60)
+    logger.debug("lints ran for ~ %02d:%02dm", min, sec)
+
    if not did_violate:
        logger.info("no suggestions, nice!")
        return 0
--- a/scripts/show-capabilities-by-function.py
+++ b/scripts/show-capabilities-by-function.py
@@ -71,22 +71,22 @@ logger = logging.getLogger("capa.show-capabilities-by-function")

 def render_matches_by_function(doc):
    """
-        like:
+    like:

-            function at 0x1000321a with 33 features:
-              - get hostname
-              - initialize Winsock library
-            function at 0x10003286 with 63 features:
-              - create thread
-              - terminate thread
-            function at 0x10003415 with 116 features:
-              - write file
-              - send data
-              - link function at runtime
-              - create HTTP request
-              - get common file path
-              - send HTTP request
-              - connect to HTTP server
+        function at 0x1000321a with 33 features:
+          - get hostname
+          - initialize Winsock library
+        function at 0x10003286 with 63 features:
+          - create thread
+          - terminate thread
+        function at 0x10003415 with 116 features:
+          - write file
+          - send data
+          - link function at runtime
+          - create HTTP request
+          - get common file path
+          - send HTTP request
+          - connect to HTTP server
    """
    ostream = rutils.StringIO()

--- a/scripts/show-features.py
+++ b/scripts/show-features.py
@@ -1,5 +1,13 @@
 #!/usr/bin/env python2
 """
+Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+You may obtain a copy of the License at: [package root]/LICENSE.txt
+Unless required by applicable law or agreed to in writing, software distributed under the License
+ is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and limitations under the License.
+
 show-features

 Show the features that capa extracts from the given sample,
@@ -55,14 +63,6 @@ Example::
    insn: 0x10001027: number(0x1)
    insn: 0x10001027: mnemonic(shl)
    ...
-
-Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
-You may obtain a copy of the License at: [package root]/LICENSE.txt
-Unless required by applicable law or agreed to in writing, software distributed under the License
- is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and limitations under the License.
 """
 import sys
 import logging
@@ -89,12 +89,12 @@ def main(argv=None):
    ]
    format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats])

-    parser = argparse.ArgumentParser(description="detect capabilities in programs.")
+    parser = argparse.ArgumentParser(description="Show the features that capa extracts from the given sample")
    parser.add_argument("sample", type=str, help="Path to sample to analyze")
    parser.add_argument(
        "-f", "--format", choices=[f[0] for f in formats], default="auto", help="Select sample format, %s" % format_help
    )
-    parser.add_argument("-F", "--function", type=lambda x: int(x, 0), help="Show features for specific function")
+    parser.add_argument("-F", "--function", type=lambda x: int(x, 0x10), help="Show features for specific function")
    args = parser.parse_args(args=argv)

    logging.basicConfig(level=logging.INFO)
@@ -122,6 +122,50 @@ def main(argv=None):
        else:
            functions = filter(lambda f: f.va == args.function, functions)

+            if args.function not in [f.va for f in functions]:
+                print("0x%X not a function, creating it" % args.function)
+                vw.makeFunction(args.function)
+                functions = extractor.get_functions()
+                functions = filter(lambda f: f.va == args.function, functions)
+
+        if len(functions) == 0:
+            print("0x%X not a function")
+            return -1
+
+    print_features(functions, extractor)
+
+    return 0
+
+
+def ida_main():
+    function = idc.get_func_attr(idc.here(), idc.FUNCATTR_START)
+    print("getting features for current function 0x%X" % function)
+
+    extractor = capa.features.extractors.ida.IdaFeatureExtractor()
+
+    if not function:
+        for feature, va in extractor.extract_file_features():
+            if va:
+                print("file: 0x%08x: %s" % (va, feature))
+            else:
+                print("file: 0x00000000: %s" % (feature))
+        return
+
+    functions = extractor.get_functions()
+
+    if function:
+        functions = filter(lambda f: f.start_ea == function, functions)
+
+        if len(functions) == 0:
+            print("0x%X not a function" % function)
+            return -1
+
+    print_features(functions, extractor)
+
+    return 0
+
+
+def print_features(functions, extractor):
    for f in functions:
        for feature, va in extractor.extract_function_features(f):
            print("func: 0x%08x: %s" % (va, feature))
@@ -138,8 +182,9 @@ def main(argv=None):
                        # may be an issue while piping to less and encountering non-ascii characters
                        continue

-    return 0
-

 if __name__ == "__main__":
-    sys.exit(main())
+    if capa.main.is_runtime_ida():
+        ida_main()
+    else:
+        sys.exit(main())
--- a/setup.py
+++ b/setup.py
@@ -11,36 +11,66 @@ import sys

 import setuptools

-requirements = ["six", "tqdm", "pyyaml", "tabulate", "colorama", "termcolor", "ruamel.yaml", "wcwidth"]
+requirements = [
+    "six",
+    "tqdm",
+    "pyyaml",
+    "tabulate",
+    "colorama",
+    "termcolor",
+    "ruamel.yaml",
+    "wcwidth",
+    "ida-settings==2.1.0",
+]

 if sys.version_info >= (3, 0):
    # py3
+    requirements.append("halo")
    requirements.append("networkx")
+    requirements.append("smda==1.5.13")
 else:
    # py2
-    requirements.append("enum34")
-    requirements.append("vivisect @ https://github.com/williballenthin/vivisect/tarball/v0.0.20200804#egg=vivisect")
+    requirements.append("enum34==1.1.6")  # v1.1.6 is needed by halo 0.0.30 / spinners 0.0.24
+    requirements.append("halo==0.0.30")  # halo==0.0.30 is the last version to support py2.7
+    requirements.append("vivisect==0.1.0")
    requirements.append("viv-utils")
    requirements.append("networkx==2.2")  # v2.2 is last version supported by Python 2.7
+    requirements.append("backports.functools-lru-cache")

 # this sets __version__
 # via: http://stackoverflow.com/a/7071358/87207
 # and: http://stackoverflow.com/a/2073599/87207
-with open(os.path.join("capa", "version.py"), "rb") as f:
+with open(os.path.join("capa", "version.py"), "r") as f:
    exec(f.read())


+# via: https://packaging.python.org/guides/making-a-pypi-friendly-readme/
+this_directory = os.path.abspath(os.path.dirname(__file__))
+with open(os.path.join(this_directory, "README.md"), "r") as f:
+    long_description = f.read()
+
+
 setuptools.setup(
    name="flare-capa",
    version=__version__,
    description="The FLARE team's open-source tool to identify capabilities in executable files.",
-    long_description="",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
    author="Willi Ballenthin, Moritz Raabe",
    author_email="william.ballenthin@mandiant.com, moritz.raabe@mandiant.com",
    url="https://www.github.com/fireeye/capa",
+    project_urls={
+        "Documentation": "https://github.com/fireeye/capa/tree/master/doc",
+        "Rules": "https://github.com/fireeye/capa-rules",
+        "Rules Documentation": "https://github.com/fireeye/capa-rules/tree/master/doc",
+    },
    packages=setuptools.find_packages(exclude=["tests"]),
    package_dir={"capa": "capa"},
-    entry_points={"console_scripts": ["capa=capa.main:main",]},
+    entry_points={
+        "console_scripts": [
+            "capa=capa.main:main",
+        ]
+    },
    include_package_data=True,
    install_requires=requirements,
    extras_require={
@@ -55,12 +85,15 @@ setuptools.setup(
        ]
    },
    zip_safe=False,
-    keywords="capa",
+    keywords="capa malware analysis capability detection FLARE",
    classifiers=[
-        "Development Status :: 3 - Alpha",
+        "Development Status :: 5 - Production/Stable",
        "Intended Audience :: Developers",
+        "Intended Audience :: Information Technology",
+        "License :: OSI Approved :: Apache Software License",
        "Natural Language :: English",
-        "Programming Language :: Python :: 2",
+        "Programming Language :: Python :: 2.7",
        "Programming Language :: Python :: 3",
+        "Topic :: Security",
    ],
 )
--- a/tests/data
+++ b/tests/data
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 # Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -7,79 +8,607 @@
 # See the License for the specific language governing permissions and limitations under the License.

 import os
+import sys
 import os.path
+import binascii
+import contextlib
 import collections

 import pytest
-import viv_utils
+
+import capa.main
+import capa.features.file
+import capa.features.insn
+import capa.features.basicblock
+from capa.features import ARCH_X32, ARCH_X64
+
+try:
+    from functools import lru_cache
+except ImportError:
+    from backports.functools_lru_cache import lru_cache
+

 CD = os.path.dirname(__file__)


-Sample = collections.namedtuple("Sample", ["vw", "path"])
+@contextlib.contextmanager
+def xfail(condition, reason=None):
+    """
+    context manager that wraps a block that is expected to fail in some cases.
+    when it does fail (and is expected), then mark this as pytest.xfail.
+    if its unexpected, raise an exception, so the test fails.
+
+    example::
+
+        # this test:
+        #  - passes on py3 if foo() works
+        #  - fails  on py3 if foo() fails
+        #  - xfails on py2 if foo() fails
+        #  - fails  on py2 if foo() works
+        with xfail(sys.version_info < (3, 0), reason="py2 doesn't foo"):
+            foo()
+    """
+    try:
+        # do the block
+        yield
+    except:
+        if condition:
+            # we expected the test to fail, so raise and register this via pytest
+            pytest.xfail(reason)
+        else:
+            # we don't expect an exception, so the test should fail
+            raise
+    else:
+        if not condition:
+            # here we expect the block to run successfully,
+            # and we've received no exception,
+            # so this is good
+            pass
+        else:
+            # we expected an exception, but didn't find one. that's an error.
+            raise RuntimeError("expected to fail, but didn't")
+
+
+@lru_cache()
+def get_viv_extractor(path):
+    import capa.features.extractors.viv
+
+    if "raw32" in path:
+        vw = capa.main.get_workspace(path, "sc32", should_save=False)
+    elif "raw64" in path:
+        vw = capa.main.get_workspace(path, "sc64", should_save=False)
+    else:
+        vw = capa.main.get_workspace(path, "auto", should_save=True)
+    extractor = capa.features.extractors.viv.VivisectFeatureExtractor(vw, path)
+    fixup_viv(path, extractor)
+    return extractor
+
+
+def fixup_viv(path, extractor):
+    """
+    vivisect fixups to overcome differences between backends
+    """
+    if "3b13b" in path:
+        # vivisect only recognizes calling thunk function at 0x10001573
+        extractor.vw.makeFunction(0x10006860)
+
+
+@lru_cache()
+def get_smda_extractor(path):
+    from smda.SmdaConfig import SmdaConfig
+    from smda.Disassembler import Disassembler
+
+    import capa.features.extractors.smda
+
+    config = SmdaConfig()
+    config.STORE_BUFFER = True
+    disasm = Disassembler(config)
+    report = disasm.disassembleFile(path)
+
+    return capa.features.extractors.smda.SmdaFeatureExtractor(report, path)
+
+
+@lru_cache()
+def get_miasm_extractor(path):
+    import capa.features.extractors.miasm
+
+    with open(path, "rb") as f:
+        buf = f.read()
+
+    print("Using miasm!!!!")
+    return capa.features.extractors.miasm.MiasmFeatureExtractor(buf)
+
+
+@lru_cache()
+def extract_file_features(extractor):
+    features = collections.defaultdict(set)
+    for feature, va in extractor.extract_file_features():
+        features[feature].add(va)
+    return features
+
+
+# f may not be hashable (e.g. ida func_t) so cannot @lru_cache this
+def extract_function_features(extractor, f):
+    features = collections.defaultdict(set)
+    for bb in extractor.get_basic_blocks(f):
+        for insn in extractor.get_instructions(f, bb):
+            for feature, va in extractor.extract_insn_features(f, bb, insn):
+                features[feature].add(va)
+        for feature, va in extractor.extract_basic_block_features(f, bb):
+            features[feature].add(va)
+    for feature, va in extractor.extract_function_features(f):
+        features[feature].add(va)
+    return features
+
+
+# f may not be hashable (e.g. ida func_t) so cannot @lru_cache this
+def extract_basic_block_features(extractor, f, bb):
+    features = collections.defaultdict(set)
+    for insn in extractor.get_instructions(f, bb):
+        for feature, va in extractor.extract_insn_features(f, bb, insn):
+            features[feature].add(va)
+    for feature, va in extractor.extract_basic_block_features(f, bb):
+        features[feature].add(va)
+    return features
+
+
+def get_data_path_by_name(name):
+    if name == "mimikatz":
+        return os.path.join(CD, "data", "mimikatz.exe_")
+    elif name == "kernel32":
+        return os.path.join(CD, "data", "kernel32.dll_")
+    elif name == "kernel32-64":
+        return os.path.join(CD, "data", "kernel32-64.dll_")
+    elif name == "pma12-04":
+        return os.path.join(CD, "data", "Practical Malware Analysis Lab 12-04.exe_")
+    elif name == "pma16-01":
+        return os.path.join(CD, "data", "Practical Malware Analysis Lab 16-01.exe_")
+    elif name == "pma21-01":
+        return os.path.join(CD, "data", "Practical Malware Analysis Lab 21-01.exe_")
+    elif name == "al-khaser x86":
+        return os.path.join(CD, "data", "al-khaser_x86.exe_")
+    elif name == "al-khaser x64":
+        return os.path.join(CD, "data", "al-khaser_x64.exe_")
+    elif name.startswith("39c05"):
+        return os.path.join(CD, "data", "39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.dll_")
+    elif name.startswith("499c2"):
+        return os.path.join(CD, "data", "499c2a85f6e8142c3f48d4251c9c7cd6.raw32")
+    elif name.startswith("9324d"):
+        return os.path.join(CD, "data", "9324d1a8ae37a36ae560c37448c9705a.exe_")
+    elif name.startswith("a1982"):
+        return os.path.join(CD, "data", "a198216798ca38f280dc413f8c57f2c2.exe_")
+    elif name.startswith("a933a"):
+        return os.path.join(CD, "data", "a933a1a402775cfa94b6bee0963f4b46.dll_")
+    elif name.startswith("bfb9b"):
+        return os.path.join(CD, "data", "bfb9b5391a13d0afd787e87ab90f14f5.dll_")
+    elif name.startswith("c9188"):
+        return os.path.join(CD, "data", "c91887d861d9bd4a5872249b641bc9f9.exe_")
+    elif name.startswith("64d9f"):
+        return os.path.join(CD, "data", "64d9f7d96b99467f36e22fada623c3bb.dll_")
+    elif name.startswith("82bf6"):
+        return os.path.join(CD, "data", "82BF6347ACF15E5D883715DC289D8A2B.exe_")
+    elif name.startswith("pingtaest"):
+        return os.path.join(CD, "data", "ping_täst.exe_")
+    elif name.startswith("77329"):
+        return os.path.join(CD, "data", "773290480d5445f11d3dc1b800728966.exe_")
+    elif name.startswith("3b13b"):
+        return os.path.join(CD, "data", "3b13b6f1d7cd14dc4a097a12e2e505c0a4cff495262261e2bfc991df238b9b04.dll_")
+    else:
+        raise ValueError("unexpected sample fixture: %s" % name)
+
+
+def get_sample_md5_by_name(name):
+    """used by IDA tests to ensure the correct IDB is loaded"""
+    if name == "mimikatz":
+        return "5f66b82558ca92e54e77f216ef4c066c"
+    elif name == "kernel32":
+        return "e80758cf485db142fca1ee03a34ead05"
+    elif name == "kernel32-64":
+        return "a8565440629ac87f6fef7d588fe3ff0f"
+    elif name == "pma12-04":
+        return "56bed8249e7c2982a90e54e1e55391a2"
+    elif name == "pma16-01":
+        return "7faafc7e4a5c736ebfee6abbbc812d80"
+    elif name == "pma21-01":
+        return "c8403fb05244e23a7931c766409b5e22"
+    elif name == "al-khaser x86":
+        return "db648cd247281954344f1d810c6fd590"
+    elif name == "al-khaser x64":
+        return "3cb21ae76ff3da4b7e02d77ff76e82be"
+    elif name.startswith("39c05"):
+        return "b7841b9d5dc1f511a93cc7576672ec0c"
+    elif name.startswith("499c2"):
+        return "499c2a85f6e8142c3f48d4251c9c7cd6"
+    elif name.startswith("9324d"):
+        return "9324d1a8ae37a36ae560c37448c9705a"
+    elif name.startswith("a1982"):
+        return "a198216798ca38f280dc413f8c57f2c2"
+    elif name.startswith("a933a"):
+        return "a933a1a402775cfa94b6bee0963f4b46"
+    elif name.startswith("bfb9b"):
+        return "bfb9b5391a13d0afd787e87ab90f14f5"
+    elif name.startswith("c9188"):
+        return "c91887d861d9bd4a5872249b641bc9f9"
+    elif name.startswith("64d9f"):
+        return "64d9f7d96b99467f36e22fada623c3bb"
+    elif name.startswith("82bf6"):
+        return "82bf6347acf15e5d883715dc289d8a2b"
+    elif name.startswith("77329"):
+        return "773290480d5445f11d3dc1b800728966"
+    elif name.startswith("3b13b"):
+        # file name is SHA256 hash
+        return "56a6ffe6a02941028cc8235204eef31d"
+    else:
+        raise ValueError("unexpected sample fixture: %s" % name)
+
+
+def resolve_sample(sample):
+    return get_data_path_by_name(sample)


@pytest.fixture
-def mimikatz():
-    path = os.path.join(CD, "data", "mimikatz.exe_")
-    return Sample(viv_utils.getWorkspace(path), path)
+def sample(request):
+    return resolve_sample(request.param)
+
+
+def get_function(extractor, fva):
+    for f in extractor.get_functions():
+        if extractor.function_offset(f) == fva:
+            return f
+    raise ValueError("function not found")
+
+
+def get_basic_block(extractor, f, va):
+    for bb in extractor.get_basic_blocks(f):
+        if extractor.block_offset(bb) == va:
+            return bb
+    raise ValueError("basic block not found")
+
+
+def resolve_scope(scope):
+    if scope == "file":
+
+        def inner(extractor):
+            return extract_file_features(extractor)
+
+        inner.__name__ = scope
+        return inner
+    elif "bb=" in scope:
+        # like `function=0x401000,bb=0x40100A`
+        fspec, _, bbspec = scope.partition(",")
+        fva = int(fspec.partition("=")[2], 0x10)
+        bbva = int(bbspec.partition("=")[2], 0x10)
+
+        def inner(extractor):
+            f = get_function(extractor, fva)
+            bb = get_basic_block(extractor, f, bbva)
+            return extract_basic_block_features(extractor, f, bb)
+
+        inner.__name__ = scope
+        return inner
+    elif scope.startswith("function"):
+        # like `function=0x401000`
+        va = int(scope.partition("=")[2], 0x10)
+
+        def inner(extractor):
+            f = get_function(extractor, va)
+            return extract_function_features(extractor, f)
+
+        inner.__name__ = scope
+        return inner
+    else:
+        raise ValueError("unexpected scope fixture")


@pytest.fixture
-def sample_a933a1a402775cfa94b6bee0963f4b46():
-    path = os.path.join(CD, "data", "a933a1a402775cfa94b6bee0963f4b46.dll_")
-    return Sample(viv_utils.getWorkspace(path), path)
+def scope(request):
+    return resolve_scope(request.param)
+
+
+def make_test_id(values):
+    return "-".join(map(str, values))
+
+
+def parametrize(params, values, **kwargs):
+    """
+    extend `pytest.mark.parametrize` to pretty-print features.
+    by default, it renders objects as an opaque value.
+    ref: https://docs.pytest.org/en/2.9.0/example/parametrize.html#different-options-for-test-ids
+    rendered ID might look something like:
+        mimikatz-function=0x403BAC-api(CryptDestroyKey)-True
+    """
+    ids = list(map(make_test_id, values))
+    return pytest.mark.parametrize(params, values, ids=ids, **kwargs)
+
+
+FEATURE_PRESENCE_TESTS = [
+    # file/characteristic("embedded pe")
+    ("pma12-04", "file", capa.features.Characteristic("embedded pe"), True),
+    # file/string
+    ("mimikatz", "file", capa.features.String("SCardControl"), True),
+    ("mimikatz", "file", capa.features.String("SCardTransmit"), True),
+    ("mimikatz", "file", capa.features.String("ACR  > "), True),
+    ("mimikatz", "file", capa.features.String("nope"), False),
+    # file/sections
+    ("mimikatz", "file", capa.features.file.Section(".text"), True),
+    ("mimikatz", "file", capa.features.file.Section(".nope"), False),
+    # IDA doesn't extract unmapped sections by default
+    # ("mimikatz", "file", capa.features.file.Section(".rsrc"), True),
+    # file/exports
+    ("kernel32", "file", capa.features.file.Export("BaseThreadInitThunk"), True),
+    ("kernel32", "file", capa.features.file.Export("lstrlenW"), True),
+    ("kernel32", "file", capa.features.file.Export("nope"), False),
+    # file/imports
+    ("mimikatz", "file", capa.features.file.Import("advapi32.CryptSetHashParam"), True),
+    ("mimikatz", "file", capa.features.file.Import("CryptSetHashParam"), True),
+    ("mimikatz", "file", capa.features.file.Import("kernel32.IsWow64Process"), True),
+    ("mimikatz", "file", capa.features.file.Import("msvcrt.exit"), True),
+    ("mimikatz", "file", capa.features.file.Import("cabinet.#11"), True),
+    ("mimikatz", "file", capa.features.file.Import("#11"), False),
+    ("mimikatz", "file", capa.features.file.Import("#nope"), False),
+    ("mimikatz", "file", capa.features.file.Import("nope"), False),
+    ("mimikatz", "file", capa.features.file.Import("advapi32.CryptAcquireContextW"), True),
+    ("mimikatz", "file", capa.features.file.Import("advapi32.CryptAcquireContext"), True),
+    ("mimikatz", "file", capa.features.file.Import("CryptAcquireContextW"), True),
+    ("mimikatz", "file", capa.features.file.Import("CryptAcquireContext"), True),
+    # function/characteristic(loop)
+    ("mimikatz", "function=0x401517", capa.features.Characteristic("loop"), True),
+    ("mimikatz", "function=0x401000", capa.features.Characteristic("loop"), False),
+    # bb/characteristic(tight loop)
+    ("mimikatz", "function=0x402EC4", capa.features.Characteristic("tight loop"), True),
+    ("mimikatz", "function=0x401000", capa.features.Characteristic("tight loop"), False),
+    # bb/characteristic(stack string)
+    ("mimikatz", "function=0x4556E5", capa.features.Characteristic("stack string"), True),
+    ("mimikatz", "function=0x401000", capa.features.Characteristic("stack string"), False),
+    # bb/characteristic(tight loop)
+    ("mimikatz", "function=0x402EC4,bb=0x402F8E", capa.features.Characteristic("tight loop"), True),
+    ("mimikatz", "function=0x401000,bb=0x401000", capa.features.Characteristic("tight loop"), False),
+    # insn/mnemonic
+    ("mimikatz", "function=0x40105D", capa.features.insn.Mnemonic("push"), True),
+    ("mimikatz", "function=0x40105D", capa.features.insn.Mnemonic("movzx"), True),
+    ("mimikatz", "function=0x40105D", capa.features.insn.Mnemonic("xor"), True),
+    ("mimikatz", "function=0x40105D", capa.features.insn.Mnemonic("in"), False),
+    ("mimikatz", "function=0x40105D", capa.features.insn.Mnemonic("out"), False),
+    # insn/number
+    ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF), True),
+    ("mimikatz", "function=0x40105D", capa.features.insn.Number(0x3136B0), True),
+    # insn/number: stack adjustments
+    ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xC), False),
+    ("mimikatz", "function=0x40105D", capa.features.insn.Number(0x10), False),
+    # insn/number: arch flavors
+    ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF), True),
+    ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, arch=ARCH_X32), True),
+    ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, arch=ARCH_X64), False),
+    # insn/offset
+    ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0), True),
+    ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x4), True),
+    ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0xC), True),
+    # insn/offset, issue #276
+    ("64d9f", "function=0x10001510,bb=0x100015B0", capa.features.insn.Offset(0x4000), True),
+    # insn/offset: stack references
+    ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x8), False),
+    ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x10), False),
+    # insn/offset: negative
+    ("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x1), True),
+    ("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x2), True),
+    # insn/offset: arch flavors
+    ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0), True),
+    ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, arch=ARCH_X32), True),
+    ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, arch=ARCH_X64), False),
+    # insn/api
+    ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContextW"), True),
+    ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContext"), True),
+    ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptGenKey"), True),
+    ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptImportKey"), True),
+    ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptDestroyKey"), True),
+    ("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptAcquireContextW"), True),
+    ("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptAcquireContext"), True),
+    ("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptGenKey"), True),
+    ("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptImportKey"), True),
+    ("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptDestroyKey"), True),
+    ("mimikatz", "function=0x403BAC", capa.features.insn.API("Nope"), False),
+    ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.Nope"), False),
+    # insn/api: thunk
+    ("mimikatz", "function=0x4556E5", capa.features.insn.API("advapi32.LsaQueryInformationPolicy"), True),
+    ("mimikatz", "function=0x4556E5", capa.features.insn.API("LsaQueryInformationPolicy"), True),
+    # insn/api: x64
+    (
+        "kernel32-64",
+        "function=0x180001010",
+        capa.features.insn.API("RtlVirtualUnwind"),
+        True,
+    ),
+    ("kernel32-64", "function=0x180001010", capa.features.insn.API("RtlVirtualUnwind"), True),
+    # insn/api: x64 thunk
+    (
+        "kernel32-64",
+        "function=0x1800202B0",
+        capa.features.insn.API("RtlCaptureContext"),
+        True,
+    ),
+    ("kernel32-64", "function=0x1800202B0", capa.features.insn.API("RtlCaptureContext"), True),
+    # insn/api: x64 nested thunk
+    ("al-khaser x64", "function=0x14004B4F0", capa.features.insn.API("__vcrt_GetModuleHandle"), True),
+    # insn/api: call via jmp
+    ("mimikatz", "function=0x40B3C6", capa.features.insn.API("LocalFree"), True),
+    ("c91887...", "function=0x40156F", capa.features.insn.API("CloseClipboard"), True),
+    # TODO ignore thunk functions that call via jmp?
+    # insn/api: resolve indirect calls
+    ("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CreatePipe"), True),
+    ("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.SetHandleInformation"), True),
+    ("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CloseHandle"), True),
+    ("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.WriteFile"), True),
+    # insn/string
+    ("mimikatz", "function=0x40105D", capa.features.String("SCardControl"), True),
+    ("mimikatz", "function=0x40105D", capa.features.String("SCardTransmit"), True),
+    ("mimikatz", "function=0x40105D", capa.features.String("ACR  > "), True),
+    ("mimikatz", "function=0x40105D", capa.features.String("nope"), False),
+    ("773290...", "function=0x140001140", capa.features.String(r"%s:\\OfficePackagesForWDAG"), True),
+    # insn/regex, issue #262
+    ("pma16-01", "function=0x4021B0", capa.features.Regex("HTTP/1.0"), True),
+    ("pma16-01", "function=0x4021B0", capa.features.Regex("www.practicalmalwareanalysis.com"), False),
+    # insn/string, pointer to string
+    ("mimikatz", "function=0x44EDEF", capa.features.String("INPUTEVENT"), True),
+    # insn/string, direct memory reference
+    ("mimikatz", "function=0x46D6CE", capa.features.String("(null)"), True),
+    # insn/bytes
+    ("mimikatz", "function=0x40105D", capa.features.Bytes("SCardControl".encode("utf-16le")), True),
+    ("mimikatz", "function=0x40105D", capa.features.Bytes("SCardTransmit".encode("utf-16le")), True),
+    ("mimikatz", "function=0x40105D", capa.features.Bytes("ACR  > ".encode("utf-16le")), True),
+    ("mimikatz", "function=0x40105D", capa.features.Bytes("nope".encode("ascii")), False),
+    # IDA features included byte sequences read from invalid memory, fixed in #409
+    ("mimikatz", "function=0x44570F", capa.features.Bytes(binascii.unhexlify("FF" * 256)), False),
+    # insn/bytes, pointer to bytes
+    ("mimikatz", "function=0x44EDEF", capa.features.Bytes("INPUTEVENT".encode("utf-16le")), True),
+    # insn/characteristic(nzxor)
+    ("mimikatz", "function=0x410DFC", capa.features.Characteristic("nzxor"), True),
+    ("mimikatz", "function=0x40105D", capa.features.Characteristic("nzxor"), False),
+    # insn/characteristic(nzxor): no security cookies
+    ("mimikatz", "function=0x46D534", capa.features.Characteristic("nzxor"), False),
+    # insn/characteristic(nzxor): xorps
+    # viv needs fixup to recognize function, see above
+    ("3b13b...", "function=0x10006860", capa.features.Characteristic("nzxor"), True),
+    # insn/characteristic(peb access)
+    ("kernel32-64", "function=0x1800017D0", capa.features.Characteristic("peb access"), True),
+    ("mimikatz", "function=0x4556E5", capa.features.Characteristic("peb access"), False),
+    # insn/characteristic(gs access)
+    ("kernel32-64", "function=0x180001068", capa.features.Characteristic("gs access"), True),
+    ("mimikatz", "function=0x4556E5", capa.features.Characteristic("gs access"), False),
+    # insn/characteristic(cross section flow)
+    ("a1982...", "function=0x4014D0", capa.features.Characteristic("cross section flow"), True),
+    # insn/characteristic(cross section flow): imports don't count
+    ("kernel32-64", "function=0x180001068", capa.features.Characteristic("cross section flow"), False),
+    ("mimikatz", "function=0x4556E5", capa.features.Characteristic("cross section flow"), False),
+    # insn/characteristic(recursive call)
+    ("39c05...", "function=0x10003100", capa.features.Characteristic("recursive call"), True),
+    ("mimikatz", "function=0x4556E5", capa.features.Characteristic("recursive call"), False),
+    # insn/characteristic(indirect call)
+    ("mimikatz", "function=0x4175FF", capa.features.Characteristic("indirect call"), True),
+    ("mimikatz", "function=0x4556E5", capa.features.Characteristic("indirect call"), False),
+    # insn/characteristic(calls from)
+    ("mimikatz", "function=0x4556E5", capa.features.Characteristic("calls from"), True),
+    ("mimikatz", "function=0x4702FD", capa.features.Characteristic("calls from"), False),
+    # function/characteristic(calls to)
+    ("mimikatz", "function=0x40105D", capa.features.Characteristic("calls to"), True),
+    ("mimikatz", "function=0x4556E5", capa.features.Characteristic("calls to"), False),
+]
+
+FEATURE_PRESENCE_TESTS_IDA = [
+    # file/imports
+    # IDA can recover more names of APIs imported by ordinal
+    ("mimikatz", "file", capa.features.file.Import("cabinet.FCIAddFile"), True),
+]
+
+FEATURE_COUNT_TESTS = [
+    ("mimikatz", "function=0x40E5C2", capa.features.basicblock.BasicBlock(), 7),
+    ("mimikatz", "function=0x4702FD", capa.features.Characteristic("calls from"), 0),
+    ("mimikatz", "function=0x40E5C2", capa.features.Characteristic("calls from"), 3),
+    ("mimikatz", "function=0x4556E5", capa.features.Characteristic("calls to"), 0),
+    ("mimikatz", "function=0x40B1F1", capa.features.Characteristic("calls to"), 3),
+]
+
+
+def do_test_feature_presence(get_extractor, sample, scope, feature, expected):
+    extractor = get_extractor(sample)
+    features = scope(extractor)
+    if expected:
+        msg = "%s should be found in %s" % (str(feature), scope.__name__)
+    else:
+        msg = "%s should not be found in %s" % (str(feature), scope.__name__)
+    assert feature.evaluate(features) == expected, msg
+
+
+def do_test_feature_count(get_extractor, sample, scope, feature, expected):
+    extractor = get_extractor(sample)
+    features = scope(extractor)
+    msg = "%s should be found %d times in %s, found: %d" % (
+        str(feature),
+        expected,
+        scope.__name__,
+        len(features[feature]),
+    )
+    assert len(features[feature]) == expected, msg
+
+
+def get_extractor(path):
+    if sys.version_info >= (3, 0):
+        if False:  # TODO: How to decide which backend to use?
+            extractor = get_smda_extractor(path)
+        else:
+            extractor = get_miasm_extractor(path)
+    else:
+        extractor = get_viv_extractor(path)
+
+    # overload the extractor so that the fixture exposes `extractor.path`
+    setattr(extractor, "path", path)
+    return extractor


@pytest.fixture
-def kernel32():
-    path = os.path.join(CD, "data", "kernel32.dll_")
-    return Sample(viv_utils.getWorkspace(path), path)
+def mimikatz_extractor():
+    return get_extractor(get_data_path_by_name("mimikatz"))


@pytest.fixture
-def sample_a198216798ca38f280dc413f8c57f2c2():
-    path = os.path.join(CD, "data", "a198216798ca38f280dc413f8c57f2c2.exe_")
-    return Sample(viv_utils.getWorkspace(path), path)
+def a933a_extractor():
+    return get_extractor(get_data_path_by_name("a933a..."))


@pytest.fixture
-def sample_9324d1a8ae37a36ae560c37448c9705a():
-    path = os.path.join(CD, "data", "9324d1a8ae37a36ae560c37448c9705a.exe_")
-    return Sample(viv_utils.getWorkspace(path), path)
+def kernel32_extractor():
+    return get_extractor(get_data_path_by_name("kernel32"))


@pytest.fixture
-def pma_lab_12_04():
-    path = os.path.join(CD, "data", "Practical Malware Analysis Lab 12-04.exe_")
-    return Sample(viv_utils.getWorkspace(path), path)
+def a1982_extractor():
+    return get_extractor(get_data_path_by_name("a1982..."))


@pytest.fixture
-def sample_bfb9b5391a13d0afd787e87ab90f14f5():
-    path = os.path.join(CD, "data", "bfb9b5391a13d0afd787e87ab90f14f5.dll_")
-    return Sample(viv_utils.getWorkspace(path), path)
+def z9324d_extractor():
+    return get_extractor(get_data_path_by_name("9324d..."))


@pytest.fixture
-def sample_lab21_01():
-    path = os.path.join(CD, "data", "Practical Malware Analysis Lab 21-01.exe_")
-    return Sample(viv_utils.getWorkspace(path), path)
+def pma12_04_extractor():
+    return get_extractor(get_data_path_by_name("pma12-04"))


@pytest.fixture
-def sample_c91887d861d9bd4a5872249b641bc9f9():
-    path = os.path.join(CD, "data", "c91887d861d9bd4a5872249b641bc9f9.exe_")
-    return Sample(viv_utils.getWorkspace(path), path)
+def pma16_01_extractor():
+    return get_extractor(get_data_path_by_name("pma16-01"))


@pytest.fixture
-def sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41():
-    path = os.path.join(CD, "data", "39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.dll_",)
-    return Sample(viv_utils.getWorkspace(path), path)
+def bfb9b_extractor():
+    return get_extractor(get_data_path_by_name("bfb9b..."))


@pytest.fixture
-def sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32():
-    path = os.path.join(CD, "data", "499c2a85f6e8142c3f48d4251c9c7cd6.raw32")
-    return Sample(viv_utils.getShellcodeWorkspace(path), path)
+def pma21_01_extractor():
+    return get_extractor(get_data_path_by_name("pma21-01"))
+
+
+@pytest.fixture
+def c9188_extractor():
+    return get_extractor(get_data_path_by_name("c9188..."))
+
+
+@pytest.fixture
+def z39c05_extractor():
+    return get_extractor(get_data_path_by_name("39c05..."))
+
+
+@pytest.fixture
+def z499c2_extractor():
+    return get_extractor(get_data_path_by_name("499c2..."))
+
+
+@pytest.fixture
+def al_khaser_x86_extractor():
+    return get_extractor(get_data_path_by_name("al-khaser x86"))
+
+
+@pytest.fixture
+def pingtaest_extractor():
+    return get_extractor(get_data_path_by_name("pingtaest"))
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@@ -59,7 +59,13 @@ def test_some():
    )
    assert (
        Some(2, [Number(1), Number(2), Number(3)]).evaluate(
-            {Number(0): {1}, Number(1): {1}, Number(2): {1}, Number(3): {1}, Number(4): {1},}
+            {
+                Number(0): {1},
+                Number(1): {1},
+                Number(2): {1},
+                Number(3): {1},
+                Number(4): {1},
+            }
        )
        == True
    )
@@ -258,7 +264,9 @@ def test_match_matched_rules():
    ]

    features, matches = capa.engine.match(
-        capa.engine.topologically_order_rules(rules), {capa.features.insn.Number(100): {1}}, 0x0,
+        capa.engine.topologically_order_rules(rules),
+        {capa.features.insn.Number(100): {1}},
+        0x0,
    )
    assert capa.features.MatchedRule("test rule1") in features
    assert capa.features.MatchedRule("test rule2") in features
@@ -266,7 +274,9 @@ def test_match_matched_rules():
    # the ordering of the rules must not matter,
    # the engine should match rules in an appropriate order.
    features, matches = capa.engine.match(
-        capa.engine.topologically_order_rules(reversed(rules)), {capa.features.insn.Number(100): {1}}, 0x0,
+        capa.engine.topologically_order_rules(reversed(rules)),
+        {capa.features.insn.Number(100): {1}},
+        0x0,
    )
    assert capa.features.MatchedRule("test rule1") in features
    assert capa.features.MatchedRule("test rule2") in features
@@ -312,22 +322,30 @@ def test_regex():
        ),
    ]
    features, matches = capa.engine.match(
-        capa.engine.topologically_order_rules(rules), {capa.features.insn.Number(100): {1}}, 0x0,
+        capa.engine.topologically_order_rules(rules),
+        {capa.features.insn.Number(100): {1}},
+        0x0,
    )
    assert capa.features.MatchedRule("test rule") not in features

    features, matches = capa.engine.match(
-        capa.engine.topologically_order_rules(rules), {capa.features.String("aaaa"): {1}}, 0x0,
+        capa.engine.topologically_order_rules(rules),
+        {capa.features.String("aaaa"): {1}},
+        0x0,
    )
    assert capa.features.MatchedRule("test rule") not in features

    features, matches = capa.engine.match(
-        capa.engine.topologically_order_rules(rules), {capa.features.String("aBBBBa"): {1}}, 0x0,
+        capa.engine.topologically_order_rules(rules),
+        {capa.features.String("aBBBBa"): {1}},
+        0x0,
    )
    assert capa.features.MatchedRule("test rule") not in features

    features, matches = capa.engine.match(
-        capa.engine.topologically_order_rules(rules), {capa.features.String("abbbba"): {1}}, 0x0,
+        capa.engine.topologically_order_rules(rules),
+        {capa.features.String("abbbba"): {1}},
+        0x0,
    )
    assert capa.features.MatchedRule("test rule") in features
    assert capa.features.MatchedRule("rule with implied wildcards") in features
@@ -350,7 +368,9 @@ def test_regex_ignorecase():
        ),
    ]
    features, matches = capa.engine.match(
-        capa.engine.topologically_order_rules(rules), {capa.features.String("aBBBBa"): {1}}, 0x0,
+        capa.engine.topologically_order_rules(rules),
+        {capa.features.String("aBBBBa"): {1}},
+        0x0,
    )
    assert capa.features.MatchedRule("test rule") in features

@@ -429,7 +449,9 @@ def test_match_namespace():
    ]

    features, matches = capa.engine.match(
-        capa.engine.topologically_order_rules(rules), {capa.features.insn.API("CreateFile"): {1}}, 0x0,
+        capa.engine.topologically_order_rules(rules),
+        {capa.features.insn.API("CreateFile"): {1}},
+        0x0,
    )
    assert "CreateFile API" in matches
    assert "file-create" in matches
@@ -439,7 +461,9 @@ def test_match_namespace():
    assert capa.features.MatchedRule("file/create/CreateFile") in features

    features, matches = capa.engine.match(
-        capa.engine.topologically_order_rules(rules), {capa.features.insn.API("WriteFile"): {1}}, 0x0,
+        capa.engine.topologically_order_rules(rules),
+        {capa.features.insn.API("WriteFile"): {1}},
+        0x0,
    )
    assert "WriteFile API" in matches
    assert "file-create" not in matches
--- a/tests/test_fmt.py
+++ b/tests/test_fmt.py
@@ -92,6 +92,8 @@ def test_rule_reformat_order():


 def test_rule_reformat_meta_update():
+    # test updating the rule content after parsing
+
    rule = textwrap.dedent(
        """
        rule:
@@ -112,3 +114,24 @@ def test_rule_reformat_meta_update():
    rule = capa.rules.Rule.from_yaml(rule)
    rule.name = "test rule"
    assert rule.to_yaml() == EXPECTED
+
+
+def test_rule_reformat_string_description():
+    # the `description` should be aligned with the preceding feature name.
+    # see #263
+    src = textwrap.dedent(
+        """
+        rule:
+          meta:
+            name: test rule
+            author: user@domain.com
+            scope: function
+          features:
+            - and:
+              - string: foo
+                description: bar
+        """
+    ).lstrip()
+
+    rule = capa.rules.Rule.from_yaml(src)
+    assert rule.to_yaml() == src
--- a/tests/test_freeze.py
+++ b/tests/test_freeze.py
@@ -5,9 +5,10 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
-
+import sys
 import textwrap

+import pytest
 from fixtures import *

 import capa.main
@@ -20,13 +21,19 @@ import capa.features.extractors
 EXTRACTOR = capa.features.extractors.NullFeatureExtractor(
    {
        "base address": 0x401000,
-        "file features": [(0x402345, capa.features.Characteristic("embedded pe")),],
+        "file features": [
+            (0x402345, capa.features.Characteristic("embedded pe")),
+        ],
        "functions": {
            0x401000: {
-                "features": [(0x401000, capa.features.Characteristic("switch")),],
+                "features": [
+                    (0x401000, capa.features.Characteristic("indirect call")),
+                ],
                "basic blocks": {
                    0x401000: {
-                        "features": [(0x401000, capa.features.Characteristic("tight loop")),],
+                        "features": [
+                            (0x401000, capa.features.Characteristic("tight loop")),
+                        ],
                        "instructions": {
                            0x401000: {
                                "features": [
@@ -34,7 +41,11 @@ EXTRACTOR = capa.features.extractors.NullFeatureExtractor(
                                    (0x401000, capa.features.Characteristic("nzxor")),
                                ],
                            },
-                            0x401002: {"features": [(0x401002, capa.features.insn.Mnemonic("mov")),],},
+                            0x401002: {
+                                "features": [
+                                    (0x401002, capa.features.insn.Mnemonic("mov")),
+                                ],
+                            },
                        },
                    },
                },
@@ -104,17 +115,14 @@ def compare_extractors_viv_null(viv_ext, null_ext):
      viv_ext (capa.features.extractors.viv.VivisectFeatureExtractor)
      null_ext (capa.features.extractors.NullFeatureExtractor)
    """
-
-    # TODO: ordering of these things probably doesn't work yet
-
    assert list(viv_ext.extract_file_features()) == list(null_ext.extract_file_features())
-    assert to_int(list(viv_ext.get_functions())) == list(null_ext.get_functions())
+    assert list(map(to_int, viv_ext.get_functions())) == list(null_ext.get_functions())
    for f in viv_ext.get_functions():
-        assert to_int(list(viv_ext.get_basic_blocks(f))) == list(null_ext.get_basic_blocks(to_int(f)))
+        assert list(map(to_int, viv_ext.get_basic_blocks(f))) == list(null_ext.get_basic_blocks(to_int(f)))
        assert list(viv_ext.extract_function_features(f)) == list(null_ext.extract_function_features(to_int(f)))

        for bb in viv_ext.get_basic_blocks(f):
-            assert to_int(list(viv_ext.get_instructions(f, bb))) == list(
+            assert list(map(to_int, viv_ext.get_instructions(f, bb))) == list(
                null_ext.get_instructions(to_int(f), to_int(bb))
            )
            assert list(viv_ext.extract_basic_block_features(f, bb)) == list(
@@ -129,10 +137,7 @@ def compare_extractors_viv_null(viv_ext, null_ext):

 def to_int(o):
    """helper to get int value of extractor items"""
-    if isinstance(o, list):
-        return map(lambda x: capa.helpers.oint(x), o)
-    else:
-        return capa.helpers.oint(o)
+    return capa.helpers.oint(o)


 def test_freeze_s_roundtrip():
@@ -169,18 +174,22 @@ def test_serialize_features():
    roundtrip_feature(capa.features.file.Import("#11"))


-def test_freeze_sample(tmpdir, sample_9324d1a8ae37a36ae560c37448c9705a):
+@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
+def test_freeze_sample(tmpdir, z9324d_extractor):
    # tmpdir fixture handles cleanup
    o = tmpdir.mkdir("capa").join("test.frz").strpath
-    assert capa.features.freeze.main([sample_9324d1a8ae37a36ae560c37448c9705a.path, o, "-v"]) == 0
+    path = z9324d_extractor.path
+    assert capa.features.freeze.main([path, o, "-v"]) == 0


-def test_freeze_load_sample(tmpdir, sample_9324d1a8ae37a36ae560c37448c9705a):
+@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
+def test_freeze_load_sample(tmpdir, z9324d_extractor):
    o = tmpdir.mkdir("capa").join("test.frz")
-    viv_extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
-        sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
-    )
+
    with open(o.strpath, "wb") as f:
-        f.write(capa.features.freeze.dump(viv_extractor))
-    null_extractor = capa.features.freeze.load(o.open("rb").read())
-    compare_extractors_viv_null(viv_extractor, null_extractor)
+        f.write(capa.features.freeze.dump(z9324d_extractor))
+
+    with open(o.strpath, "rb") as f:
+        null_extractor = capa.features.freeze.load(f.read())
+
+    compare_extractors_viv_null(z9324d_extractor, null_extractor)
--- a/tests/test_ida_features.py
+++ b/tests/test_ida_features.py
@@ -1,24 +1,25 @@
 # run this script from within IDA with ./tests/data/mimikatz.exe open
+import sys
 import logging
+import os.path
 import binascii
 import traceback
-import collections

 import pytest

-import capa.features
-import capa.features.file
-import capa.features.insn
-import capa.features.basicblock
-from capa.features import ARCH_X32, ARCH_X64
+try:
+    sys.path.append(os.path.dirname(__file__))
+    from fixtures import *
+finally:
+    sys.path.pop()
+

 logger = logging.getLogger("test_ida_features")


-def check_input_file():
+def check_input_file(wanted):
    import idautils

-    wanted = "5f66b82558ca92e54e77f216ef4c066c"
    # some versions (7.4) of IDA return a truncated version of the MD5.
    # https://github.com/idapython/bin/issues/11
    try:
@@ -27,12 +28,13 @@ def check_input_file():
        # in IDA 7.5 or so, GetInputFileMD5 started returning raw binary
        # rather than the hex digest
        found = binascii.hexlify(idautils.GetInputFileMD5()[:15]).decode("ascii").lower()
+
    if not wanted.startswith(found):
-        raise RuntimeError("please run the tests against `mimikatz.exe`")
+        raise RuntimeError("please run the tests against sample with MD5: `%s`" % (wanted))


-def get_extractor():
-    check_input_file()
+def get_ida_extractor(_path):
+    check_input_file("5f66b82558ca92e54e77f216ef4c066c")

    # have to import import this inline so pytest doesn't bail outside of IDA
    import capa.features.extractors.ida
@@ -40,232 +42,50 @@ def get_extractor():
    return capa.features.extractors.ida.IdaFeatureExtractor()


-def extract_file_features():
-    extractor = get_extractor()
-    features = set([])
-    for feature, va in extractor.extract_file_features():
-        features.add(feature)
-    return features
-
-
-def extract_function_features(f):
-    extractor = get_extractor()
-    features = collections.defaultdict(set)
-    for bb in extractor.get_basic_blocks(f):
-        for insn in extractor.get_instructions(f, bb):
-            for feature, va in extractor.extract_insn_features(f, bb, insn):
-                features[feature].add(va)
-        for feature, va in extractor.extract_basic_block_features(f, bb):
-            features[feature].add(va)
-    for feature, va in extractor.extract_function_features(f):
-        features[feature].add(va)
-    return features
-
-
-def extract_basic_block_features(f, bb):
-    extractor = get_extractor()
-    features = collections.defaultdict(set)
-    for insn in extractor.get_instructions(f, bb):
-        for feature, va in extractor.extract_insn_features(f, bb, insn):
-            features[feature].add(va)
-    for feature, va in extractor.extract_basic_block_features(f, bb):
-        features[feature].add(va)
-    return features
-
-
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_api_features():
-    f = get_extractor().get_function(0x403BAC)
-    features = extract_function_features(f)
-    assert capa.features.insn.API("advapi32.CryptAcquireContextW") in features
-    assert capa.features.insn.API("advapi32.CryptAcquireContext") in features
-    assert capa.features.insn.API("advapi32.CryptGenKey") in features
-    assert capa.features.insn.API("advapi32.CryptImportKey") in features
-    assert capa.features.insn.API("advapi32.CryptDestroyKey") in features
-    assert capa.features.insn.API("CryptAcquireContextW") in features
-    assert capa.features.insn.API("CryptAcquireContext") in features
-    assert capa.features.insn.API("CryptGenKey") in features
-    assert capa.features.insn.API("CryptImportKey") in features
-    assert capa.features.insn.API("CryptDestroyKey") in features
+def test_ida_features():
+    for (sample, scope, feature, expected) in FEATURE_PRESENCE_TESTS + FEATURE_PRESENCE_TESTS_IDA:
+        id = make_test_id((sample, scope, feature, expected))

-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_string_features():
-    f = get_extractor().get_function(0x40105D)
-    features = extract_function_features(f)
-    assert capa.features.String("SCardControl") in features
-    assert capa.features.String("SCardTransmit") in features
-    assert capa.features.String("ACR  > ") in features
-    # other strings not in this function
-    assert capa.features.String("bcrypt.dll") not in features
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_byte_features():
-    f = get_extractor().get_function(0x40105D)
-    features = extract_function_features(f)
-    wanted = capa.features.Bytes("SCardControl".encode("utf-16le"))
-    # use `==` rather than `is` because the result is not `True` but a truthy value.
-    assert wanted.evaluate(features) == True
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_number_features():
-    f = get_extractor().get_function(0x40105D)
-    features = extract_function_features(f)
-    assert capa.features.insn.Number(0xFF) in features
-    assert capa.features.insn.Number(0x3136B0) in features
-    # the following are stack adjustments
-    assert capa.features.insn.Number(0xC) not in features
-    assert capa.features.insn.Number(0x10) not in features
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_number_arch_features():
-    f = get_extractor().get_function(0x40105D)
-    features = extract_function_features(f)
-    assert capa.features.insn.Number(0xFF) in features
-    assert capa.features.insn.Number(0xFF, arch=ARCH_X32) in features
-    assert capa.features.insn.Number(0xFF, arch=ARCH_X64) not in features
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_offset_features():
-    f = get_extractor().get_function(0x40105D)
-    features = extract_function_features(f)
-    assert capa.features.insn.Offset(0x0) in features
-    assert capa.features.insn.Offset(0x4) in features
-    assert capa.features.insn.Offset(0xC) in features
-    # the following are stack references
-    assert capa.features.insn.Offset(0x8) not in features
-    assert capa.features.insn.Offset(0x10) not in features
-
-    # this function has the following negative offsets
-    # movzx   ecx, byte ptr [eax-1]
-    # movzx   eax, byte ptr [eax-2]
-    f = get_extractor().get_function(0x4011FB)
-    features = extract_function_features(f)
-    assert capa.features.insn.Offset(-0x1) in features
-    assert capa.features.insn.Offset(-0x2) in features
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_offset_arch_features():
-    f = get_extractor().get_function(0x40105D)
-    features = extract_function_features(f)
-    assert capa.features.insn.Offset(0x0) in features
-    assert capa.features.insn.Offset(0x0, arch=ARCH_X32) in features
-    assert capa.features.insn.Offset(0x0, arch=ARCH_X64) not in features
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_nzxor_features():
-    f = get_extractor().get_function(0x410DFC)
-    features = extract_function_features(f)
-    assert capa.features.Characteristic("nzxor") in features  # 0x0410F0B
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_mnemonic_features():
-    f = get_extractor().get_function(0x40105D)
-    features = extract_function_features(f)
-    assert capa.features.insn.Mnemonic("push") in features
-    assert capa.features.insn.Mnemonic("movzx") in features
-    assert capa.features.insn.Mnemonic("xor") in features
-
-    assert capa.features.insn.Mnemonic("in") not in features
-    assert capa.features.insn.Mnemonic("out") not in features
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_file_section_name_features():
-    features = extract_file_features()
-    assert capa.features.file.Section(".idata") in features
-    assert capa.features.file.Section(".text") in features
-    assert capa.features.file.Section(".nope") not in features
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_tight_loop_features():
-    extractor = get_extractor()
-
-    f = extractor.get_function(0x402EC4)
-    for bb in extractor.get_basic_blocks(f):
-        if bb.__int__() != 0x402F8E:
+        try:
+            check_input_file(get_sample_md5_by_name(sample))
+        except RuntimeError:
+            print("SKIP %s" % (id))
            continue
-        features = extract_basic_block_features(f, bb)
-        assert capa.features.Characteristic("tight loop") in features
-        assert capa.features.basicblock.BasicBlock() in features
+
+        scope = resolve_scope(scope)
+        sample = resolve_sample(sample)
+
+        try:
+            do_test_feature_presence(get_ida_extractor, sample, scope, feature, expected)
+        except Exception as e:
+            print("FAIL %s" % (id))
+            traceback.print_exc()
+        else:
+            print("OK   %s" % (id))


@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_tight_loop_bb_features():
-    extractor = get_extractor()
+def test_ida_feature_counts():
+    for (sample, scope, feature, expected) in FEATURE_COUNT_TESTS:
+        id = make_test_id((sample, scope, feature, expected))

-    f = extractor.get_function(0x402EC4)
-    for bb in extractor.get_basic_blocks(f):
-        if bb.__int__() != 0x402F8E:
+        try:
+            check_input_file(get_sample_md5_by_name(sample))
+        except RuntimeError:
+            print("SKIP %s" % (id))
            continue
-        features = extract_basic_block_features(f, bb)
-        assert capa.features.Characteristic("tight loop") in features
-        assert capa.features.basicblock.BasicBlock() in features

+        scope = resolve_scope(scope)
+        sample = resolve_sample(sample)

-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_file_import_name_features():
-    features = extract_file_features()
-    assert capa.features.file.Import("advapi32.CryptSetHashParam") in features
-    assert capa.features.file.Import("CryptSetHashParam") in features
-    assert capa.features.file.Import("kernel32.IsWow64Process") in features
-    assert capa.features.file.Import("msvcrt.exit") in features
-    assert capa.features.file.Import("cabinet.#11") in features
-    assert capa.features.file.Import("#11") not in features
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_stackstring_features():
-    f = get_extractor().get_function(0x4556E5)
-    features = extract_function_features(f)
-    assert capa.features.Characteristic("stack string") in features
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_switch_features():
-    f = get_extractor().get_function(0x409411)
-    features = extract_function_features(f)
-    assert capa.features.Characteristic("switch") in features
-
-    f = get_extractor().get_function(0x409393)
-    features = extract_function_features(f)
-    assert capa.features.Characteristic("switch") not in features
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_function_calls_to():
-    # this function is used in a function pointer
-    f = get_extractor().get_function(0x4011FB)
-    features = extract_function_features(f)
-    assert capa.features.Characteristic("calls to") not in features
-
-    # __FindPESection is called once
-    f = get_extractor().get_function(0x470360)
-    features = extract_function_features(f)
-    assert len(features[capa.features.Characteristic("calls to")]) == 1
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_function_calls_from():
-    f = get_extractor().get_function(0x4011FB)
-    features = extract_function_features(f)
-    assert capa.features.Characteristic("calls from") in features
-    assert len(features[capa.features.Characteristic("calls from")]) == 3
-
-
-@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
-def test_basic_block_count():
-    f = get_extractor().get_function(0x4011FB)
-    features = extract_function_features(f)
-    assert len(features[capa.features.basicblock.BasicBlock()]) == 15
+        try:
+            do_test_feature_count(get_ida_extractor, sample, scope, feature, expected)
+        except Exception as e:
+            print("FAIL %s" % (id))
+            traceback.print_exc()
+        else:
+            print("OK   %s" % (id))


 if __name__ == "__main__":
@@ -279,10 +99,6 @@ if __name__ == "__main__":
        test = getattr(sys.modules[__name__], name)
        logger.debug("invoking test: %s", name)
        sys.stderr.flush()
-        try:
-            test()
-        except AssertionError as e:
-            print("FAIL %s" % (name))
-            traceback.print_exc()
-        else:
-            print("OK   %s" % (name))
+        test()
+
+    print("DONE")
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 # Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -5,28 +6,29 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
-
+import sys
 import textwrap

+import pytest
 from fixtures import *

 import capa.main
 import capa.rules
 import capa.engine
 import capa.features
-import capa.features.extractors.viv
 from capa.engine import *


-def test_main(sample_9324d1a8ae37a36ae560c37448c9705a):
+def test_main(z9324d_extractor):
    # tests rules can be loaded successfully and all output modes
-    assert capa.main.main([sample_9324d1a8ae37a36ae560c37448c9705a.path, "-vv"]) == 0
-    assert capa.main.main([sample_9324d1a8ae37a36ae560c37448c9705a.path, "-v"]) == 0
-    assert capa.main.main([sample_9324d1a8ae37a36ae560c37448c9705a.path, "-j"]) == 0
-    assert capa.main.main([sample_9324d1a8ae37a36ae560c37448c9705a.path]) == 0
+    path = z9324d_extractor.path
+    assert capa.main.main([path, "-vv"]) == 0
+    assert capa.main.main([path, "-v"]) == 0
+    assert capa.main.main([path, "-j"]) == 0
+    assert capa.main.main([path]) == 0


-def test_main_single_rule(sample_9324d1a8ae37a36ae560c37448c9705a, tmpdir):
+def test_main_single_rule(z9324d_extractor, tmpdir):
    # tests a single rule can be loaded successfully
    RULE_CONTENT = textwrap.dedent(
        """
@@ -38,16 +40,56 @@ def test_main_single_rule(sample_9324d1a8ae37a36ae560c37448c9705a, tmpdir):
              - string: test
        """
    )
+    path = z9324d_extractor.path
    rule_file = tmpdir.mkdir("capa").join("rule.yml")
    rule_file.write(RULE_CONTENT)
-    assert capa.main.main([sample_9324d1a8ae37a36ae560c37448c9705a.path, "-v", "-r", rule_file.strpath,]) == 0
+    assert (
+        capa.main.main(
+            [
+                path,
+                "-v",
+                "-r",
+                rule_file.strpath,
+            ]
+        )
+        == 0
+    )


-def test_main_shellcode(sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32):
-    assert capa.main.main([sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32.path, "-vv", "-f", "sc32"]) == 0
-    assert capa.main.main([sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32.path, "-v", "-f", "sc32"]) == 0
-    assert capa.main.main([sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32.path, "-j", "-f", "sc32"]) == 0
-    assert capa.main.main([sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32.path, "-f", "sc32"]) == 0
+def test_main_non_ascii_filename(pingtaest_extractor, tmpdir, capsys):
+    # on py2.7, need to be careful about str (which can hold bytes)
+    #  vs unicode (which is only unicode characters).
+    # on py3, this should not be needed.
+    #
+    # here we print a string with unicode characters in it
+    # (specifically, a byte string with utf-8 bytes in it, see file encoding)
+    assert capa.main.main(["-q", pingtaest_extractor.path]) == 0
+
+    std = capsys.readouterr()
+    # but here, we have to use a unicode instance,
+    # because capsys has decoded the output for us.
+    if sys.version_info >= (3, 0):
+        assert pingtaest_extractor.path in std.out
+    else:
+        assert pingtaest_extractor.path.decode("utf-8") in std.out
+
+
+def test_main_non_ascii_filename_nonexistent(tmpdir, caplog):
+    NON_ASCII_FILENAME = "täst_not_there.exe"
+    assert capa.main.main(["-q", NON_ASCII_FILENAME]) == -1
+
+    if sys.version_info >= (3, 0):
+        assert NON_ASCII_FILENAME in caplog.text
+    else:
+        assert NON_ASCII_FILENAME.decode("utf-8") in caplog.text
+
+
+def test_main_shellcode(z499c2_extractor):
+    path = z499c2_extractor.path
+    assert capa.main.main([path, "-vv", "-f", "sc32"]) == 0
+    assert capa.main.main([path, "-v", "-f", "sc32"]) == 0
+    assert capa.main.main([path, "-j", "-f", "sc32"]) == 0
+    assert capa.main.main([path, "-f", "sc32"]) == 0


 def test_ruleset():
@@ -73,7 +115,7 @@ def test_ruleset():
                            name: function rule
                            scope: function
                        features:
-                          - characteristic: switch
+                          - characteristic: tight loop
                    """
                )
            ),
@@ -96,7 +138,7 @@ def test_ruleset():
    assert len(rules.basic_block_rules) == 1


-def test_match_across_scopes_file_function(sample_9324d1a8ae37a36ae560c37448c9705a):
+def test_match_across_scopes_file_function(z9324d_extractor):
    rules = capa.rules.RuleSet(
        [
            # this rule should match on a function (0x4073F0)
@@ -153,16 +195,13 @@ def test_match_across_scopes_file_function(sample_9324d1a8ae37a36ae560c37448c970
            ),
        ]
    )
-    extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
-        sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
-    )
-    capabilities, meta = capa.main.find_capabilities(rules, extractor)
+    capabilities, meta = capa.main.find_capabilities(rules, z9324d_extractor)
    assert "install service" in capabilities
    assert ".text section" in capabilities
    assert ".text section and install service" in capabilities


-def test_match_across_scopes(sample_9324d1a8ae37a36ae560c37448c9705a):
+def test_match_across_scopes(z9324d_extractor):
    rules = capa.rules.RuleSet(
        [
            # this rule should match on a basic block (including at least 0x403685)
@@ -218,16 +257,13 @@ def test_match_across_scopes(sample_9324d1a8ae37a36ae560c37448c9705a):
            ),
        ]
    )
-    extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
-        sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path
-    )
-    capabilities, meta = capa.main.find_capabilities(rules, extractor)
+    capabilities, meta = capa.main.find_capabilities(rules, z9324d_extractor)
    assert "tight loop" in capabilities
    assert "kill thread loop" in capabilities
    assert "kill thread program" in capabilities


-def test_subscope_bb_rules(sample_9324d1a8ae37a36ae560c37448c9705a):
+def test_subscope_bb_rules(z9324d_extractor):
    rules = capa.rules.RuleSet(
        [
            capa.rules.Rule.from_yaml(
@@ -247,14 +283,11 @@ def test_subscope_bb_rules(sample_9324d1a8ae37a36ae560c37448c9705a):
        ]
    )
    # tight loop at 0x403685
-    extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
-        sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
-    )
-    capabilities, meta = capa.main.find_capabilities(rules, extractor)
+    capabilities, meta = capa.main.find_capabilities(rules, z9324d_extractor)
    assert "test rule" in capabilities


-def test_byte_matching(sample_9324d1a8ae37a36ae560c37448c9705a):
+def test_byte_matching(z9324d_extractor):
    rules = capa.rules.RuleSet(
        [
            capa.rules.Rule.from_yaml(
@@ -272,15 +305,11 @@ def test_byte_matching(sample_9324d1a8ae37a36ae560c37448c9705a):
            )
        ]
    )
-
-    extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
-        sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
-    )
-    capabilities, meta = capa.main.find_capabilities(rules, extractor)
+    capabilities, meta = capa.main.find_capabilities(rules, z9324d_extractor)
    assert "byte match test" in capabilities


-def test_count_bb(sample_9324d1a8ae37a36ae560c37448c9705a):
+def test_count_bb(z9324d_extractor):
    rules = capa.rules.RuleSet(
        [
            capa.rules.Rule.from_yaml(
@@ -299,9 +328,40 @@ def test_count_bb(sample_9324d1a8ae37a36ae560c37448c9705a):
            )
        ]
    )
-
-    extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
-        sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
-    )
-    capabilities, meta = capa.main.find_capabilities(rules, extractor)
+    capabilities, meta = capa.main.find_capabilities(rules, z9324d_extractor)
    assert "count bb" in capabilities
+
+
+def test_fix262(pma16_01_extractor, capsys):
+    # tests rules can be loaded successfully and all output modes
+    path = pma16_01_extractor.path
+    assert capa.main.main([path, "-vv", "-t", "send HTTP request", "-q"]) == 0
+
+    std = capsys.readouterr()
+    assert "HTTP/1.0" in std.out
+    assert "www.practicalmalwareanalysis.com" not in std.out
+
+
+def test_not_render_rules_also_matched(z9324d_extractor, capsys):
+    # rules that are also matched by other rules should not get rendered by default.
+    # this cuts down on the amount of output while giving approx the same detail.
+    # see #224
+    path = z9324d_extractor.path
+
+    # `act as TCP client` matches on
+    # `connect TCP client` matches on
+    # `create TCP socket`
+    #
+    # so only `act as TCP client` should be displayed
+    assert capa.main.main([path]) == 0
+    std = capsys.readouterr()
+    assert "act as TCP client" in std.out
+    assert "connect TCP socket" not in std.out
+    assert "create TCP socket" not in std.out
+
+    # this strategy only applies to the default renderer, not any verbose renderer
+    assert capa.main.main([path, "-v"]) == 0
+    std = capsys.readouterr()
+    assert "act as TCP client" in std.out
+    assert "connect TCP socket" in std.out
+    assert "create TCP socket" in std.out
--- a/tests/test_miasm_features.py
+++ b/tests/test_miasm_features.py
@@ -0,0 +1,29 @@
+# Copyright (C) 2020 FireEye, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: https://github.com/fireeye/capa/blob/master/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+
+import sys
+
+from fixtures import *
+
+
+@parametrize(
+    "sample,scope,feature,expected",
+    FEATURE_PRESENCE_TESTS,
+    indirect=["sample", "scope"],
+)
+def test_miasm_features(sample, scope, feature, expected):
+    do_test_feature_presence(get_miasm_extractor, sample, scope, feature, expected)
+
+
+@parametrize(
+    "sample,scope,feature,expected",
+    FEATURE_COUNT_TESTS,
+    indirect=["sample", "scope"],
+)
+def test_miasm_feature_counts(sample, scope, feature, expected):
+    do_test_feature_count(get_miasm_extractor, sample, scope, feature, expected)
--- a/tests/test_rules.py
+++ b/tests/test_rules.py
@@ -69,46 +69,63 @@ def test_rule_yaml_complex():
    assert r.evaluate({Number(6): {1}, Number(7): {1}, Number(8): {1}}) == False


-def test_rule_yaml_descriptions():
+def test_rule_descriptions():
    rule = textwrap.dedent(
        """
        rule:
-            meta:
-                name: test rule
-            features:
+          meta:
+            name: test rule
+          features:
+            - and:
+              - description: and description
+              - number: 1 = number description
+              - string: mystring
+                description: string description
+              - string: '/myregex/'
+                description: regex description
+              # TODO - count(number(2 = number description)): 2
+              - or:
+                - description: or description
                - and:
-                    - number: 1 = This is the number 1
-                    - string: This program cannot be run in DOS mode.
-                      description: MS-DOS stub message
-                    - string: '/SELECT.*FROM.*WHERE/i'
-                      description: SQL WHERE Clause
-                    - count(number(2 = AF_INET/SOCK_DGRAM)): 2
-                    - or:
-                        - and:
-                            - offset: 0x50 = IMAGE_NT_HEADERS.OptionalHeader.SizeOfImage
-                            - offset: 0x34 = IMAGE_NT_HEADERS.OptionalHeader.ImageBase
-                          description: 32-bits
-                        - and:
-                            - offset: 0x50 = IMAGE_NT_HEADERS64.OptionalHeader.SizeOfImage
-                            - offset: 0x30 = IMAGE_NT_HEADERS64.OptionalHeader.ImageBase
-                          description: 64-bits
-                      description: PE headers offsets
+                  - offset: 0x50 = offset description
+                  - offset: 0x34 = offset description
+                  - description: and description
+                - and:
+                  - description: and description
+                  - offset/x64: 0x50 = offset/x64 description
+                  - offset/x64: 0x30 = offset/x64 description
        """
    )
    r = capa.rules.Rule.from_yaml(rule)
-    assert (
-        r.evaluate(
-            {
-                Number(1): {1},
-                Number(2): {2, 3},
-                String("This program cannot be run in DOS mode."): {4},
-                String("SELECT password FROM hidden_table WHERE user == admin"): {5},
-                Offset(0x50): {6},
-                Offset(0x30): {7},
-            }
+
+    def rec(statement):
+        if isinstance(statement, capa.engine.Statement):
+            assert statement.description == statement.name.lower() + " description"
+            for child in statement.get_children():
+                rec(child)
+        else:
+            assert statement.description == statement.name + " description"
+
+    rec(r.statement)
+
+
+def test_invalid_rule_statement_descriptions():
+    # statements can only have one description
+    with pytest.raises(capa.rules.InvalidRule):
+        capa.rules.Rule.from_yaml(
+            textwrap.dedent(
+                """
+                rule:
+                  meta:
+                    name: test rule
+                  features:
+                    - or:
+                      - number: 1 = This is the number 1
+                      - description: description
+                      - description: another description (invalid)
+                """
+            )
        )
-        == True
-    )


 def test_rule_yaml_not():
@@ -162,6 +179,23 @@ def test_rule_yaml_count_range():
    assert r.evaluate({Number(100): {1, 2, 3}}) == False


+def test_rule_yaml_count_string():
+    rule = textwrap.dedent(
+        """
+        rule:
+            meta:
+                name: test rule
+            features:
+                - count(string(foo)): 2
+        """
+    )
+    r = capa.rules.Rule.from_yaml(rule)
+    assert r.evaluate({String("foo"): {}}) == False
+    assert r.evaluate({String("foo"): {1}}) == False
+    assert r.evaluate({String("foo"): {1, 2}}) == True
+    assert r.evaluate({String("foo"): {1, 2, 3}}) == False
+
+
 def test_invalid_rule_feature():
    with pytest.raises(capa.rules.InvalidRule):
        capa.rules.Rule.from_yaml(
@@ -248,7 +282,8 @@ def test_lib_rules():
            ),
        ]
    )
-    assert len(rules.function_rules) == 1
+    # lib rules are added to the rule set
+    assert len(rules.function_rules) == 2


 def test_subscope_rules():
@@ -267,7 +302,7 @@ def test_subscope_rules():
                                - function:
                                    - and:
                                        - characteristic: nzxor
-                                        - characteristic: switch
+                                        - characteristic: loop
                    """
                )
            )
@@ -466,6 +501,21 @@ def test_number_arch():
    assert r.evaluate({Number(2, arch=ARCH_X64): {1}}) == False


+def test_number_arch_symbol():
+    r = capa.rules.Rule.from_yaml(
+        textwrap.dedent(
+            """
+            rule:
+                meta:
+                    name: test rule
+                features:
+                    - number/x32: 2 = some constant
+            """
+        )
+    )
+    assert r.evaluate({Number(2, arch=ARCH_X32, description="some constant"): {1}}) == True
+
+
 def test_offset_symbol():
    rule = textwrap.dedent(
        """
@@ -529,6 +579,21 @@ def test_offset_arch():
    assert r.evaluate({Offset(2, arch=ARCH_X64): {1}}) == False


+def test_offset_arch_symbol():
+    r = capa.rules.Rule.from_yaml(
+        textwrap.dedent(
+            """
+            rule:
+                meta:
+                    name: test rule
+                features:
+                    - offset/x32: 2 = some constant
+            """
+        )
+    )
+    assert r.evaluate({Offset(2, arch=ARCH_X32, description="some constant"): {1}}) == True
+
+
 def test_invalid_offset():
    with pytest.raises(capa.rules.InvalidRule):
        r = capa.rules.Rule.from_yaml(
@@ -633,12 +698,16 @@ def test_regex_values_always_string():
        ),
    ]
    features, matches = capa.engine.match(
-        capa.engine.topologically_order_rules(rules), {capa.features.String("123"): {1}}, 0x0,
+        capa.engine.topologically_order_rules(rules),
+        {capa.features.String("123"): {1}},
+        0x0,
    )
    assert capa.features.MatchedRule("test rule") in features

    features, matches = capa.engine.match(
-        capa.engine.topologically_order_rules(rules), {capa.features.String("0x123"): {1}}, 0x0,
+        capa.engine.topologically_order_rules(rules),
+        {capa.features.String("0x123"): {1}},
+        0x0,
    )
    assert capa.features.MatchedRule("test rule") in features

--- a/tests/test_smda_features.py
+++ b/tests/test_smda_features.py
@@ -0,0 +1,30 @@
+# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+import sys
+
+from fixtures import *
+
+
+@parametrize(
+    "sample,scope,feature,expected",
+    FEATURE_PRESENCE_TESTS,
+    indirect=["sample", "scope"],
+)
+def test_smda_features(sample, scope, feature, expected):
+    with xfail(sys.version_info < (3, 0), reason="SMDA only works on py3"):
+        do_test_feature_presence(get_smda_extractor, sample, scope, feature, expected)
+
+
+@parametrize(
+    "sample,scope,feature,expected",
+    FEATURE_COUNT_TESTS,
+    indirect=["sample", "scope"],
+)
+def test_smda_feature_counts(sample, scope, feature, expected):
+    with xfail(sys.version_info < (3, 0), reason="SMDA only works on py3"):
+        do_test_feature_count(get_smda_extractor, sample, scope, feature, expected)
--- a/tests/test_viv_features.py
+++ b/tests/test_viv_features.py
@@ -5,340 +5,26 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
+import sys

-import viv_utils
 from fixtures import *

-import capa.features
-import capa.features.file
-import capa.features.insn
-import capa.features.basicblock
-import capa.features.extractors.viv.file
-import capa.features.extractors.viv.insn
-import capa.features.extractors.viv.function
-import capa.features.extractors.viv.basicblock
-from capa.features import ARCH_X32, ARCH_X64

-
-def extract_file_features(vw, path):
-    features = set([])
-    for feature, va in capa.features.extractors.viv.file.extract_features(vw, path):
-        features.add(feature)
-    return features
-
-
-def extract_function_features(f):
-    features = collections.defaultdict(set)
-    for bb in f.basic_blocks:
-        for insn in bb.instructions:
-            for feature, va in capa.features.extractors.viv.insn.extract_features(f, bb, insn):
-                features[feature].add(va)
-        for feature, va in capa.features.extractors.viv.basicblock.extract_features(f, bb):
-            features[feature].add(va)
-    for feature, va in capa.features.extractors.viv.function.extract_features(f):
-        features[feature].add(va)
-    return features
-
-
-def extract_basic_block_features(f, bb):
-    features = set({})
-    for insn in bb.instructions:
-        for feature, _ in capa.features.extractors.viv.insn.extract_features(f, bb, insn):
-            features.add(feature)
-    for feature, _ in capa.features.extractors.viv.basicblock.extract_features(f, bb):
-        features.add(feature)
-    return features
-
-
-def test_api_features(mimikatz):
-    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x403BAC))
-    assert capa.features.insn.API("advapi32.CryptAcquireContextW") in features
-    assert capa.features.insn.API("advapi32.CryptAcquireContext") in features
-    assert capa.features.insn.API("advapi32.CryptGenKey") in features
-    assert capa.features.insn.API("advapi32.CryptImportKey") in features
-    assert capa.features.insn.API("advapi32.CryptDestroyKey") in features
-    assert capa.features.insn.API("CryptAcquireContextW") in features
-    assert capa.features.insn.API("CryptAcquireContext") in features
-    assert capa.features.insn.API("CryptGenKey") in features
-    assert capa.features.insn.API("CryptImportKey") in features
-    assert capa.features.insn.API("CryptDestroyKey") in features
-
-
-def test_api_features_64_bit(sample_a198216798ca38f280dc413f8c57f2c2):
-    features = extract_function_features(viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x4011B0))
-    assert capa.features.insn.API("kernel32.GetStringTypeA") in features
-    assert capa.features.insn.API("kernel32.GetStringTypeW") not in features
-    assert capa.features.insn.API("kernel32.GetStringType") in features
-    assert capa.features.insn.API("GetStringTypeA") in features
-    assert capa.features.insn.API("GetStringType") in features
-    # call via thunk in IDA Pro
-    features = extract_function_features(viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x401CB0))
-    assert capa.features.insn.API("msvcrt.vfprintf") in features
-    assert capa.features.insn.API("vfprintf") in features
-
-
-def test_string_features(mimikatz):
-    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x40105D))
-    assert capa.features.String("SCardControl") in features
-    assert capa.features.String("SCardTransmit") in features
-    assert capa.features.String("ACR  > ") in features
-    # other strings not in this function
-    assert capa.features.String("bcrypt.dll") not in features
-
-
-def test_string_pointer_features(mimikatz):
-    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x44EDEF))
-    assert capa.features.String("INPUTEVENT") in features
-
-
-def test_byte_features(sample_9324d1a8ae37a36ae560c37448c9705a):
-    features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60))
-    wanted = capa.features.Bytes(b"\xED\x24\x9E\xF4\x52\xA9\x07\x47\x55\x8E\xE1\xAB\x30\x8E\x23\x61")
-    # use `==` rather than `is` because the result is not `True` but a truthy value.
-    assert wanted.evaluate(features) == True
-
-
-def test_byte_features64(sample_lab21_01):
-    features = extract_function_features(viv_utils.Function(sample_lab21_01.vw, 0x1400010C0))
-    wanted = capa.features.Bytes(b"\x32\xA2\xDF\x2D\x99\x2B\x00\x00")
-    # use `==` rather than `is` because the result is not `True` but a truthy value.
-    assert wanted.evaluate(features) == True
-
-
-def test_bytes_pointer_features(mimikatz):
-    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x44EDEF))
-    assert capa.features.Bytes("INPUTEVENT".encode("utf-16le")).evaluate(features) == True
-
-
-def test_number_features(mimikatz):
-    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x40105D))
-    assert capa.features.insn.Number(0xFF) in features
-    assert capa.features.insn.Number(0x3136B0) in features
-    # the following are stack adjustments
-    assert capa.features.insn.Number(0xC) not in features
-    assert capa.features.insn.Number(0x10) not in features
-
-
-def test_number_arch_features(mimikatz):
-    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x40105D))
-    assert capa.features.insn.Number(0xFF) in features
-    assert capa.features.insn.Number(0xFF, arch=ARCH_X32) in features
-    assert capa.features.insn.Number(0xFF, arch=ARCH_X64) not in features
-
-
-def test_offset_features(mimikatz):
-    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x40105D))
-    assert capa.features.insn.Offset(0x0) in features
-    assert capa.features.insn.Offset(0x4) in features
-    assert capa.features.insn.Offset(0xC) in features
-    # the following are stack references
-    assert capa.features.insn.Offset(0x8) not in features
-    assert capa.features.insn.Offset(0x10) not in features
-
-    # this function has the following negative offsets
-    # movzx   ecx, byte ptr [eax-1]
-    # movzx   eax, byte ptr [eax-2]
-    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x4011FB))
-    assert capa.features.insn.Offset(-0x1) in features
-    assert capa.features.insn.Offset(-0x2) in features
-
-
-def test_offset_arch_features(mimikatz):
-    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x40105D))
-    assert capa.features.insn.Offset(0x0) in features
-    assert capa.features.insn.Offset(0x0, arch=ARCH_X32) in features
-    assert capa.features.insn.Offset(0x0, arch=ARCH_X64) not in features
-
-
-def test_nzxor_features(mimikatz):
-    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x410DFC))
-    assert capa.features.Characteristic("nzxor") in features  # 0x0410F0B
-
-
-def get_bb_insn(f, va):
-    """fetch the BasicBlock and Instruction instances for the given VA in the given function."""
-    for bb in f.basic_blocks:
-        for insn in bb.instructions:
-            if insn.va == va:
-                return (bb, insn)
-    raise KeyError(va)
-
-
-def test_is_security_cookie(mimikatz):
-    # not a security cookie check
-    f = viv_utils.Function(mimikatz.vw, 0x410DFC)
-    for va in [0x0410F0B]:
-        bb, insn = get_bb_insn(f, va)
-        assert capa.features.extractors.viv.insn.is_security_cookie(f, bb, insn) == False
-
-    # security cookie initial set and final check
-    f = viv_utils.Function(mimikatz.vw, 0x46C54A)
-    for va in [0x46C557, 0x46C63A]:
-        bb, insn = get_bb_insn(f, va)
-        assert capa.features.extractors.viv.insn.is_security_cookie(f, bb, insn) == True
-
-
-def test_mnemonic_features(mimikatz):
-    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x40105D))
-    assert capa.features.insn.Mnemonic("push") in features
-    assert capa.features.insn.Mnemonic("movzx") in features
-    assert capa.features.insn.Mnemonic("xor") in features
-
-    assert capa.features.insn.Mnemonic("in") not in features
-    assert capa.features.insn.Mnemonic("out") not in features
-
-
-def test_peb_access_features(sample_a933a1a402775cfa94b6bee0963f4b46):
-    features = extract_function_features(viv_utils.Function(sample_a933a1a402775cfa94b6bee0963f4b46.vw, 0xABA6FEC))
-    assert capa.features.Characteristic("peb access") in features
-
-
-def test_file_section_name_features(mimikatz):
-    features = extract_file_features(mimikatz.vw, mimikatz.path)
-    assert capa.features.file.Section(".rsrc") in features
-    assert capa.features.file.Section(".text") in features
-    assert capa.features.file.Section(".nope") not in features
-
-
-def test_tight_loop_features(mimikatz):
-    f = viv_utils.Function(mimikatz.vw, 0x402EC4)
-    for bb in f.basic_blocks:
-        if bb.va != 0x402F8E:
-            continue
-        features = extract_basic_block_features(f, bb)
-        assert capa.features.Characteristic("tight loop") in features
-        assert capa.features.basicblock.BasicBlock() in features
-
-
-def test_tight_loop_bb_features(mimikatz):
-    f = viv_utils.Function(mimikatz.vw, 0x402EC4)
-    for bb in f.basic_blocks:
-        if bb.va != 0x402F8E:
-            continue
-        features = extract_basic_block_features(f, bb)
-        assert capa.features.Characteristic("tight loop") in features
-        assert capa.features.basicblock.BasicBlock() in features
-
-
-def test_file_export_name_features(kernel32):
-    features = extract_file_features(kernel32.vw, kernel32.path)
-    assert capa.features.file.Export("BaseThreadInitThunk") in features
-    assert capa.features.file.Export("lstrlenW") in features
-
-
-def test_file_import_name_features(mimikatz):
-    features = extract_file_features(mimikatz.vw, mimikatz.path)
-    assert capa.features.file.Import("advapi32.CryptSetHashParam") in features
-    assert capa.features.file.Import("CryptSetHashParam") in features
-    assert capa.features.file.Import("kernel32.IsWow64Process") in features
-    assert capa.features.file.Import("msvcrt.exit") in features
-    assert capa.features.file.Import("cabinet.#11") in features
-    assert capa.features.file.Import("#11") not in features
-
-
-def test_cross_section_flow_features(sample_a198216798ca38f280dc413f8c57f2c2):
-    features = extract_function_features(viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x4014D0))
-    assert capa.features.Characteristic("cross section flow") in features
-
-    # this function has calls to some imports,
-    # which should not trigger cross-section flow characteristic
-    features = extract_function_features(viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x401563))
-    assert capa.features.Characteristic("cross section flow") not in features
-
-
-def test_segment_access_features(sample_a933a1a402775cfa94b6bee0963f4b46):
-    features = extract_function_features(viv_utils.Function(sample_a933a1a402775cfa94b6bee0963f4b46.vw, 0xABA6FEC))
-    assert capa.features.Characteristic("fs access") in features
-
-
-def test_thunk_features(sample_9324d1a8ae37a36ae560c37448c9705a):
-    features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x407970))
-    assert capa.features.insn.API("kernel32.CreateToolhelp32Snapshot") in features
-    assert capa.features.insn.API("CreateToolhelp32Snapshot") in features
-
-
-def test_file_embedded_pe(pma_lab_12_04):
-    features = extract_file_features(pma_lab_12_04.vw, pma_lab_12_04.path)
-    assert capa.features.Characteristic("embedded pe") in features
-
-
-def test_stackstring_features(mimikatz):
-    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x4556E5))
-    assert capa.features.Characteristic("stack string") in features
-
-
-def test_switch_features(mimikatz):
-    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x409411))
-    assert capa.features.Characteristic("switch") in features
-
-    features = extract_function_features(viv_utils.Function(mimikatz.vw, 0x409393))
-    assert capa.features.Characteristic("switch") not in features
-
-
-def test_recursive_call_feature(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41):
-    features = extract_function_features(
-        viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10003100)
-    )
-    assert capa.features.Characteristic("recursive call") in features
-
-    features = extract_function_features(
-        viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10007B00)
-    )
-    assert capa.features.Characteristic("recursive call") not in features
-
-
-def test_loop_feature(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41):
-    features = extract_function_features(
-        viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10003D30)
-    )
-    assert capa.features.Characteristic("loop") in features
-
-    features = extract_function_features(
-        viv_utils.Function(sample_39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.vw, 0x10007250)
-    )
-    assert capa.features.Characteristic("loop") not in features
-
-
-def test_file_string_features(sample_bfb9b5391a13d0afd787e87ab90f14f5):
-    features = extract_file_features(
-        sample_bfb9b5391a13d0afd787e87ab90f14f5.vw, sample_bfb9b5391a13d0afd787e87ab90f14f5.path,
-    )
-    assert capa.features.String("WarStop") in features  # ASCII, offset 0x40EC
-    assert capa.features.String("cimage/png") in features  # UTF-16 LE, offset 0x350E
-
-
-def test_function_calls_to(sample_9324d1a8ae37a36ae560c37448c9705a):
-    features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60))
-    assert capa.features.Characteristic("calls to") in features
-    assert len(features[capa.features.Characteristic("calls to")]) == 1
-
-
-def test_function_calls_to64(sample_lab21_01):
-    features = extract_function_features(viv_utils.Function(sample_lab21_01.vw, 0x1400052D0))  # memcpy
-    assert capa.features.Characteristic("calls to") in features
-    assert len(features[capa.features.Characteristic("calls to")]) == 8
-
-
-def test_function_calls_from(sample_9324d1a8ae37a36ae560c37448c9705a):
-    features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60))
-    assert capa.features.Characteristic("calls from") in features
-    assert len(features[capa.features.Characteristic("calls from")]) == 23
-
-
-def test_basic_block_count(sample_9324d1a8ae37a36ae560c37448c9705a):
-    features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60))
-    assert len(features[capa.features.basicblock.BasicBlock()]) == 26
-
-
-def test_indirect_call_features(sample_a933a1a402775cfa94b6bee0963f4b46):
-    features = extract_function_features(viv_utils.Function(sample_a933a1a402775cfa94b6bee0963f4b46.vw, 0xABA68A0))
-    assert capa.features.Characteristic("indirect call") in features
-    assert len(features[capa.features.Characteristic("indirect call")]) == 3
-
-
-def test_indirect_calls_resolved(sample_c91887d861d9bd4a5872249b641bc9f9):
-    features = extract_function_features(viv_utils.Function(sample_c91887d861d9bd4a5872249b641bc9f9.vw, 0x401A77))
-    assert capa.features.insn.API("kernel32.CreatePipe") in features
-    assert capa.features.insn.API("kernel32.SetHandleInformation") in features
-    assert capa.features.insn.API("kernel32.CloseHandle") in features
-    assert capa.features.insn.API("kernel32.WriteFile") in features
+@parametrize(
+    "sample,scope,feature,expected",
+    FEATURE_PRESENCE_TESTS,
+    indirect=["sample", "scope"],
+)
+def test_viv_features(sample, scope, feature, expected):
+    with xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2"):
+        do_test_feature_presence(get_viv_extractor, sample, scope, feature, expected)
+
+
+@parametrize(
+    "sample,scope,feature,expected",
+    FEATURE_COUNT_TESTS,
+    indirect=["sample", "scope"],
+)
+def test_viv_feature_counts(sample, scope, feature, expected):
+    with xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2"):
+        do_test_feature_count(get_viv_extractor, sample, scope, feature, expected)