From 623bac1a406de9f113fa479c034d401467aeefc2 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Fri, 5 Nov 2021 16:19:16 -0600
Subject: [PATCH 01/26] engine: statement: document that the order of children
 is important

---
 capa/engine.py | 36 ++++++++++++++++++++++++++++++------
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/capa/engine.py b/capa/engine.py
index 7a2dea31..f64e1975 100644
--- a/capa/engine.py
+++ b/capa/engine.py
@@ -119,7 +119,13 @@ class Result:
 
 
 class And(Statement):
-    """match if all of the children evaluate to True."""
+    """
+    match if all of the children evaluate to True.
+
+    the order of evaluation is dicated by the property
+    `And.children` (type: List[Statement|Feature]).
+    a query optimizer may safely manipulate the order of these children.
+    """
 
     def __init__(self, children, description=None):
         super(And, self).__init__(description=description)
@@ -129,13 +135,25 @@ class And(Statement):
         capa.perf.counters["evaluate.feature"] += 1
         capa.perf.counters["evaluate.feature.and"] += 1  
 
-        results = [child.evaluate(ctx) for child in self.children]
-        success = all(results)
-        return Result(success, self, results)
+        results = []
+        for child in self.children:
+            result = child.evaluate(ctx) 
+            results.append(result)
+            if not result:
+                # short circuit
+                return Result(False, self, results)
+
+        return Result(True, self, results)
 
 
 class Or(Statement):
-    """match if any of the children evaluate to True."""
+    """
+    match if any of the children evaluate to True.
+ 
+    the order of evaluation is dicated by the property
+    `Or.children` (type: List[Statement|Feature]).
+    a query optimizer may safely manipulate the order of these children.
+    """
 
     def __init__(self, children, description=None):
         super(Or, self).__init__(description=description)
@@ -167,7 +185,13 @@ class Not(Statement):
 
 
 class Some(Statement):
-    """match if at least N of the children evaluate to True."""
+    """
+    match if at least N of the children evaluate to True.
+
+    the order of evaluation is dicated by the property
+    `Some.children` (type: List[Statement|Feature]).
+    a query optimizer may safely manipulate the order of these children.
+    """
 
     def __init__(self, count, children, description=None):
         super(Some, self).__init__(description=description)

From 8d9f418b2bc4305d04cd873e2f387a139a42701b Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Fri, 5 Nov 2021 16:20:22 -0600
Subject: [PATCH 02/26] rules: optimize by cost

---
 capa/rules.py | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/capa/rules.py b/capa/rules.py
index 6960e02b..caafb428 100644
--- a/capa/rules.py
+++ b/capa/rules.py
@@ -961,6 +961,8 @@ class RuleSet:
         if len(rules) == 0:
             raise InvalidRuleSet("no rules selected")
 
+        rules = self._optimize_rules(rules)
+
         self.file_rules = self._get_rules_for_scope(rules, FILE_SCOPE)
         self.function_rules = self._get_rules_for_scope(rules, FUNCTION_SCOPE)
         self.basic_block_rules = self._get_rules_for_scope(rules, BASIC_BLOCK_SCOPE)
@@ -1038,3 +1040,55 @@ class RuleSet:
                     rules_filtered.update(set(capa.rules.get_rules_and_dependencies(rules, rule.name)))
                     break
         return RuleSet(list(rules_filtered))
+
+    @staticmethod
+    def _get_node_cost(node):
+        if isinstance(node, (capa.features.common.OS, capa.features.common.Arch, capa.features.common.Format)):
+            return 0
+
+        # elif "everything else":
+        #   return 1
+        #
+        # this should be all hash-lookup features.
+        # see below.
+ 
+        elif isinstance(node, (capa.features.common.Substring, capa.features.common.Regex)):
+            return 2
+
+        elif isinstance(node, (ceng.Not, ceng.Range)):
+            return 3
+
+        elif isinstance(node, (ceng.And, ceng.Or, ceng.Some)):
+            return 4
+        
+        else:
+            # this should be all hash-lookup features.
+            return 1
+
+    @staticmethod
+    def _optimize_statement(statement):
+        # this routine operates in-place
+
+        if isinstance(statement, (ceng.And, ceng.Or, ceng.Some)):
+            # has .children
+            statement.children = sorted(statement.children, key=lambda n: -RuleSet._get_node_cost(n))
+            return
+        elif isinstance(statement, (ceng.Not, ceng.Range)):
+            # has .child
+            RuleSet._optimize_statement(statement.child)
+            return
+        else:
+            # appears to be "simple"
+            return
+
+    @staticmethod
+    def _optimize_rule(rule):
+        # operates in-place
+        RuleSet._optimize_statement(rule.statement)
+
+    @staticmethod
+    def _optimize_rules(rules):
+        logger.debug("optimizing %d rules", len(rules))
+        for rule in rules:
+            RuleSet._optimize_rule(rule)
+        return rules

From 18ba986eba853108a0f48a274c61bb71ed904974 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Fri, 5 Nov 2021 16:32:12 -0600
Subject: [PATCH 03/26] engine: or: short circuit

---
 capa/engine.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/capa/engine.py b/capa/engine.py
index f64e1975..871119c9 100644
--- a/capa/engine.py
+++ b/capa/engine.py
@@ -162,10 +162,16 @@ class Or(Statement):
     def evaluate(self, ctx):
         capa.perf.counters["evaluate.feature"] += 1
         capa.perf.counters["evaluate.feature.or"] += 1  
- 
-        results = [child.evaluate(ctx) for child in self.children]
-        success = any(results)
-        return Result(success, self, results)
+
+        results = []
+        for child in self.children:
+            result = child.evaluate(ctx)
+            results.append(result)
+            if result:
+                # short circuit as soon as we hit one match
+                return Result(True, self, results)
+
+        return Result(False, self, results)
 
 
 class Not(Statement):

From a329147d28b159ae40e4686e8a5d1ed27d97fd77 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Fri, 5 Nov 2021 16:32:23 -0600
Subject: [PATCH 04/26] engine: some: short circuit

---
 capa/engine.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/capa/engine.py b/capa/engine.py
index 871119c9..0f78a6c1 100644
--- a/capa/engine.py
+++ b/capa/engine.py
@@ -207,14 +207,16 @@ class Some(Statement):
     def evaluate(self, ctx):
         capa.perf.counters["evaluate.feature"] += 1
         capa.perf.counters["evaluate.feature.some"] += 1  
-  
-        results = [child.evaluate(ctx) for child in self.children]
-        # note that here we cast the child result as a bool
-        # because we've overridden `__bool__` above.
-        #
-        # we can't use `if child is True` because the instance is not True.
-        success = sum([1 for child in results if bool(child) is True]) >= self.count
-        return Result(success, self, results)
+ 
+        results = []
+        for child in self.children:
+            result = child.evaluate(ctx)
+            results.append(result)
+            if len(results) >= self.count:
+                # short circuit as soon as we hit the threshold
+                return Result(True, self, results)
+
+        return Result(False, self, results)
 
 
 class Range(Statement):

From e63f072e409433af763c05891e5d15357e999214 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Fri, 5 Nov 2021 16:39:00 -0600
Subject: [PATCH 05/26] rules: optimizer: use recursive cost of statements

---
 capa/rules.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/capa/rules.py b/capa/rules.py
index caafb428..57ebc791 100644
--- a/capa/rules.py
+++ b/capa/rules.py
@@ -1056,10 +1056,10 @@ class RuleSet:
             return 2
 
         elif isinstance(node, (ceng.Not, ceng.Range)):
-            return 3
+            return RuleSet._get_node_cost(node.child)
 
         elif isinstance(node, (ceng.And, ceng.Or, ceng.Some)):
-            return 4
+            return sum(map(RuleSet._get_node_cost, node.children))
         
         else:
             # this should be all hash-lookup features.

From d573b83c947892d62ffa2118afe308d2954f0372 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Fri, 5 Nov 2021 16:49:38 -0600
Subject: [PATCH 06/26] rule: optimization: add some documentation

---
 capa/rules.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/capa/rules.py b/capa/rules.py
index 57ebc791..038f4d73 100644
--- a/capa/rules.py
+++ b/capa/rules.py
@@ -1044,6 +1044,8 @@ class RuleSet:
     @staticmethod
     def _get_node_cost(node):
         if isinstance(node, (capa.features.common.OS, capa.features.common.Arch, capa.features.common.Format)):
+            # we assume these are the most restrictive features:
+            # authors commonly use them at the start of rules to restrict the category of samples to inspect
             return 0
 
         # elif "everything else":
@@ -1053,16 +1055,26 @@ class RuleSet:
         # see below.
  
         elif isinstance(node, (capa.features.common.Substring, capa.features.common.Regex)):
+            # substring and regex features require a full scan of each string
+            # which we anticipate is more expensive then a hash lookup feature (e.g. mnemonic or count).
+            #
+            # TODO: compute the average cost of these feature relative to hash feature
+            # and adjust the factor accordingly.
             return 2
 
         elif isinstance(node, (ceng.Not, ceng.Range)):
+            # the cost of these nodes are defined by the complexity of their single child.
             return RuleSet._get_node_cost(node.child)
 
         elif isinstance(node, (ceng.And, ceng.Or, ceng.Some)):
+            # the cost of these nodes is the full cost of their children
+            # as this is the worst-case scenario.
             return sum(map(RuleSet._get_node_cost, node.children))
         
         else:
             # this should be all hash-lookup features.
+            # we give this a arbitrary weight of 1.
+            # the only thing more "important" than this is checking OS/Arch/Format.
             return 1
 
     @staticmethod
@@ -1083,7 +1095,7 @@ class RuleSet:
 
     @staticmethod
     def _optimize_rule(rule):
-        # operates in-place
+        # this routine operates in-place
         RuleSet._optimize_statement(rule.statement)
 
     @staticmethod

From d86c3f4d48557094acfd56b860a8b2f125384af1 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Mon, 8 Nov 2021 11:50:16 -0700
Subject: [PATCH 07/26] common: move Result to capa.common from capa.engine

fixes circular import error in capa.features.freeze
---
 capa/engine.py          | 50 ++-----------------------------
 capa/features/common.py | 65 ++++++++++++++++++++++++++++++++++-------
 2 files changed, 57 insertions(+), 58 deletions(-)

diff --git a/capa/engine.py b/capa/engine.py
index 0f78a6c1..d7ff81f6 100644
--- a/capa/engine.py
+++ b/capa/engine.py
@@ -13,7 +13,7 @@ from typing import Set, Dict, List, Tuple, Union, Mapping, Iterable
 import capa.perf
 import capa.rules
 import capa.features.common
-from capa.features.common import Feature
+from capa.features.common import Result, Feature
 
 # a collection of features and the locations at which they are found.
 #
@@ -46,15 +46,9 @@ class Statement:
     def __repr__(self):
         return str(self)
 
-    def evaluate(self, features: FeatureSet) -> "Result":
+    def evaluate(self, features: FeatureSet) -> Result:
         """
         classes that inherit `Statement` must implement `evaluate`
-
-        args:
-          ctx (defaultdict[Feature, set[VA]])
-
-        returns:
-          Result
         """
         raise NotImplementedError()
 
@@ -78,46 +72,6 @@ class Statement:
                     children[i] = new
 
 
-class Result:
-    """
-    represents the results of an evaluation of statements against features.
-
-    instances of this class should behave like a bool,
-    e.g. `assert Result(True, ...) == True`
-
-    instances track additional metadata about evaluation results.
-    they contain references to the statement node (e.g. an And statement),
-     as well as the children Result instances.
-
-    we need this so that we can render the tree of expressions and their results.
-    """
-
-    def __init__(self, success: bool, statement: Union[Statement, Feature], children: List["Result"], locations=None):
-        """
-        args:
-          success (bool)
-          statement (capa.engine.Statement or capa.features.Feature)
-          children (list[Result])
-          locations (iterable[VA])
-        """
-        super(Result, self).__init__()
-        self.success = success
-        self.statement = statement
-        self.children = children
-        self.locations = locations if locations is not None else ()
-
-    def __eq__(self, other):
-        if isinstance(other, bool):
-            return self.success == other
-        return False
-
-    def __bool__(self):
-        return self.success
-
-    def __nonzero__(self):
-        return self.success
-
-
 class And(Statement):
     """
     match if all of the children evaluate to True.
diff --git a/capa/features/common.py b/capa/features/common.py
index 9fa5d8bf..0f01ef52 100644
--- a/capa/features/common.py
+++ b/capa/features/common.py
@@ -10,10 +10,9 @@ import re
 import codecs
 import logging
 import collections
-from typing import Set, Dict, Union
+from typing import Set, Dict, List, Union
 
 import capa.perf
-import capa.engine
 import capa.features
 import capa.features.extractors.elf
 
@@ -47,6 +46,52 @@ def escape_string(s: str) -> str:
     return s
 
 
+class Result:
+    """
+    represents the results of an evaluation of statements against features.
+
+    instances of this class should behave like a bool,
+    e.g. `assert Result(True, ...) == True`
+
+    instances track additional metadata about evaluation results.
+    they contain references to the statement node (e.g. an And statement),
+     as well as the children Result instances.
+
+    we need this so that we can render the tree of expressions and their results.
+    """
+
+    def __init__(
+        self,
+        success: bool,
+        statement: Union["capa.engine.Statement", "Feature"],
+        children: List["Result"],
+        locations=None,
+    ):
+        """
+        args:
+          success (bool)
+          statement (capa.engine.Statement or capa.features.Feature)
+          children (list[Result])
+          locations (iterable[VA])
+        """
+        super(Result, self).__init__()
+        self.success = success
+        self.statement = statement
+        self.children = children
+        self.locations = locations if locations is not None else ()
+
+    def __eq__(self, other):
+        if isinstance(other, bool):
+            return self.success == other
+        return False
+
+    def __bool__(self):
+        return self.success
+
+    def __nonzero__(self):
+        return self.success
+
+
 class Feature:
     def __init__(self, value: Union[str, int, bytes], bitness=None, description=None):
         """
@@ -97,10 +142,10 @@ class Feature:
     def __repr__(self):
         return str(self)
 
-    def evaluate(self, ctx: Dict["Feature", Set[int]]) -> "capa.engine.Result":
+    def evaluate(self, ctx: Dict["Feature", Set[int]]) -> "Result":
         capa.perf.counters["evaluate.feature"] += 1
         capa.perf.counters["evaluate.feature." + self.name] += 1
-        return capa.engine.Result(self in ctx, self, [], locations=ctx.get(self, []))
+        return Result(self in ctx, self, [], locations=ctx.get(self, []))
 
     def freeze_serialize(self):
         if self.bitness is not None:
@@ -176,9 +221,9 @@ class Substring(String):
             # unlike other features, we cannot return put a reference to `self` directly in a `Result`.
             # this is because `self` may match on many strings, so we can't stuff the matched value into it.
             # instead, return a new instance that has a reference to both the substring and the matched values.
-            return capa.engine.Result(True, _MatchedSubstring(self, matches), [], locations=locations)
+            return Result(True, _MatchedSubstring(self, matches), [], locations=locations)
         else:
-            return capa.engine.Result(False, _MatchedSubstring(self, None), [])
+            return Result(False, _MatchedSubstring(self, None), [])
 
     def __str__(self):
         return "substring(%s)" % self.value
@@ -269,9 +314,9 @@ class Regex(String):
             # this is because `self` may match on many strings, so we can't stuff the matched value into it.
             # instead, return a new instance that has a reference to both the regex and the matched values.
             # see #262.
-            return capa.engine.Result(True, _MatchedRegex(self, matches), [], locations=locations)
+            return Result(True, _MatchedRegex(self, matches), [], locations=locations)
         else:
-            return capa.engine.Result(False, _MatchedRegex(self, None), [])
+            return Result(False, _MatchedRegex(self, None), [])
 
     def __str__(self):
         return "regex(string =~ %s)" % self.value
@@ -326,9 +371,9 @@ class Bytes(Feature):
                 continue
 
             if feature.value.startswith(self.value):
-                return capa.engine.Result(True, self, [], locations=locations)
+                return Result(True, self, [], locations=locations)
 
-        return capa.engine.Result(False, self, [])
+        return Result(False, self, [])
 
     def get_value_str(self):
         return hex_string(bytes_to_str(self.value))

From 35fa50dbee0f4d0e2d10689abd6539795f0d5dd0 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Mon, 8 Nov 2021 11:50:37 -0700
Subject: [PATCH 08/26] pep8

---
 capa/engine.py          | 20 ++++++++++----------
 capa/features/common.py | 10 +++++-----
 capa/rules.py           |  6 +++---
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/capa/engine.py b/capa/engine.py
index d7ff81f6..601ddd34 100644
--- a/capa/engine.py
+++ b/capa/engine.py
@@ -87,11 +87,11 @@ class And(Statement):
 
     def evaluate(self, ctx):
         capa.perf.counters["evaluate.feature"] += 1
-        capa.perf.counters["evaluate.feature.and"] += 1  
+        capa.perf.counters["evaluate.feature.and"] += 1
 
         results = []
         for child in self.children:
-            result = child.evaluate(ctx) 
+            result = child.evaluate(ctx)
             results.append(result)
             if not result:
                 # short circuit
@@ -103,7 +103,7 @@ class And(Statement):
 class Or(Statement):
     """
     match if any of the children evaluate to True.
- 
+
     the order of evaluation is dicated by the property
     `Or.children` (type: List[Statement|Feature]).
     a query optimizer may safely manipulate the order of these children.
@@ -115,7 +115,7 @@ class Or(Statement):
 
     def evaluate(self, ctx):
         capa.perf.counters["evaluate.feature"] += 1
-        capa.perf.counters["evaluate.feature.or"] += 1  
+        capa.perf.counters["evaluate.feature.or"] += 1
 
         results = []
         for child in self.children:
@@ -137,8 +137,8 @@ class Not(Statement):
 
     def evaluate(self, ctx):
         capa.perf.counters["evaluate.feature"] += 1
-        capa.perf.counters["evaluate.feature.not"] += 1  
-  
+        capa.perf.counters["evaluate.feature.not"] += 1
+
         results = [self.child.evaluate(ctx)]
         success = not results[0]
         return Result(success, self, results)
@@ -160,8 +160,8 @@ class Some(Statement):
 
     def evaluate(self, ctx):
         capa.perf.counters["evaluate.feature"] += 1
-        capa.perf.counters["evaluate.feature.some"] += 1  
- 
+        capa.perf.counters["evaluate.feature.some"] += 1
+
         results = []
         for child in self.children:
             result = child.evaluate(ctx)
@@ -184,8 +184,8 @@ class Range(Statement):
 
     def evaluate(self, ctx):
         capa.perf.counters["evaluate.feature"] += 1
-        capa.perf.counters["evaluate.feature.range"] += 1  
-  
+        capa.perf.counters["evaluate.feature.range"] += 1
+
         count = len(ctx.get(self.child, []))
         if self.min == 0 and count == 0:
             return Result(True, self, [])
diff --git a/capa/features/common.py b/capa/features/common.py
index 0f01ef52..a40201e3 100644
--- a/capa/features/common.py
+++ b/capa/features/common.py
@@ -190,7 +190,7 @@ class Substring(String):
 
     def evaluate(self, ctx):
         capa.perf.counters["evaluate.feature"] += 1
-        capa.perf.counters["evaluate.feature.substring"] += 1 
+        capa.perf.counters["evaluate.feature.substring"] += 1
 
         # mapping from string value to list of locations.
         # will unique the locations later on.
@@ -278,8 +278,8 @@ class Regex(String):
 
     def evaluate(self, ctx):
         capa.perf.counters["evaluate.feature"] += 1
-        capa.perf.counters["evaluate.feature.regex"] += 1 
-                                                        
+        capa.perf.counters["evaluate.feature.regex"] += 1
+
         # mapping from string value to list of locations.
         # will unique the locations later on.
         matches = collections.defaultdict(list)
@@ -364,8 +364,8 @@ class Bytes(Feature):
 
     def evaluate(self, ctx):
         capa.perf.counters["evaluate.feature"] += 1
-        capa.perf.counters["evaluate.feature.bytes"] += 1 
-                                                        
+        capa.perf.counters["evaluate.feature.bytes"] += 1
+
         for feature, locations in ctx.items():
             if not isinstance(feature, (Bytes,)):
                 continue
diff --git a/capa/rules.py b/capa/rules.py
index 038f4d73..b49f7ee1 100644
--- a/capa/rules.py
+++ b/capa/rules.py
@@ -622,7 +622,7 @@ class Rule:
 
     def evaluate(self, features: FeatureSet):
         capa.perf.counters["evaluate.feature"] += 1
-        capa.perf.counters["evaluate.feature.rule"] += 1 
+        capa.perf.counters["evaluate.feature.rule"] += 1
         return self.statement.evaluate(features)
 
     @classmethod
@@ -1053,7 +1053,7 @@ class RuleSet:
         #
         # this should be all hash-lookup features.
         # see below.
- 
+
         elif isinstance(node, (capa.features.common.Substring, capa.features.common.Regex)):
             # substring and regex features require a full scan of each string
             # which we anticipate is more expensive then a hash lookup feature (e.g. mnemonic or count).
@@ -1070,7 +1070,7 @@ class RuleSet:
             # the cost of these nodes is the full cost of their children
             # as this is the worst-case scenario.
             return sum(map(RuleSet._get_node_cost, node.children))
-        
+
         else:
             # this should be all hash-lookup features.
             # we give this a arbitrary weight of 1.

From a995b53c380edc345757d50060e9d768661de41c Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Mon, 8 Nov 2021 11:50:49 -0700
Subject: [PATCH 09/26] perf: add reset routine

---
 capa/perf.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/capa/perf.py b/capa/perf.py
index d1e4083d..c7b416c0 100644
--- a/capa/perf.py
+++ b/capa/perf.py
@@ -1,3 +1,8 @@
 import collections
 
 counters = collections.Counter()
+
+
+def reset():
+    global counters
+    counters = collections.Counter()

From 480df323e5153a7cb89403c2ef0c657f514b8d69 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Mon, 8 Nov 2021 11:51:09 -0700
Subject: [PATCH 10/26] scripts: add py script for profiling time

---
 scripts/profile-time.py | 115 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 115 insertions(+)
 create mode 100644 scripts/profile-time.py

diff --git a/scripts/profile-time.py b/scripts/profile-time.py
new file mode 100644
index 00000000..3c47b67b
--- /dev/null
+++ b/scripts/profile-time.py
@@ -0,0 +1,115 @@
+import sys
+import timeit
+import logging
+import argparse
+import subprocess
+
+import tqdm
+import tabulate
+
+import capa.main
+import capa.perf
+import capa.rules
+import capa.engine
+import capa.helpers
+import capa.features
+import capa.features.common
+import capa.features.freeze
+
+logger = logging.getLogger("capa.profile")
+
+
+def main(argv=None):
+    if argv is None:
+        argv = sys.argv[1:]
+
+    label = subprocess.run(
+        "git show --pretty=oneline --abbrev-commit | head -n 1", shell=True, capture_output=True, text=True
+    ).stdout.strip()
+    is_dirty = (
+        subprocess.run(
+            "git status | grep 'modified: ' | grep -v 'rules' | grep -v 'tests/data'",
+            shell=True,
+            capture_output=True,
+            text=True,
+        ).stdout
+        != ""
+    )
+
+    if is_dirty:
+        label += " (dirty)"
+
+    parser = argparse.ArgumentParser(description="Profile capa performance")
+    capa.main.install_common_args(parser, wanted={"format", "sample", "signatures", "rules"})
+
+    parser.add_argument("--number", type=int, default=3, help="batch size of profile collection")
+    parser.add_argument("--repeat", type=int, default=30, help="batch count of profile collection")
+    parser.add_argument("--label", type=str, default=label, help="description of the profile collection")
+
+    args = parser.parse_args(args=argv)
+    capa.main.handle_common_args(args)
+
+    try:
+        taste = capa.helpers.get_file_taste(args.sample)
+    except IOError as e:
+        logger.error("%s", str(e))
+        return -1
+
+    try:
+        with capa.main.timing("load rules"):
+            rules = capa.rules.RuleSet(capa.main.get_rules(args.rules, disable_progress=True))
+    except (IOError) as e:
+        logger.error("%s", str(e))
+        return -1
+
+    try:
+        sig_paths = capa.main.get_signatures(args.signatures)
+    except (IOError) as e:
+        logger.error("%s", str(e))
+        return -1
+
+    if (args.format == "freeze") or (args.format == "auto" and capa.features.freeze.is_freeze(taste)):
+        with open(args.sample, "rb") as f:
+            extractor = capa.features.freeze.load(f.read())
+    else:
+        extractor = capa.main.get_extractor(
+            args.sample, args.format, capa.main.BACKEND_VIV, sig_paths, should_save_workspace=False
+        )
+
+    with tqdm.tqdm(total=args.number * args.repeat) as pbar:
+
+        def do_iteration():
+            capa.perf.reset()
+            capa.main.find_capabilities(rules, extractor, disable_progress=True)
+            pbar.update(1)
+
+        samples = timeit.repeat(do_iteration, number=args.number, repeat=args.repeat)
+
+    logger.debug("perf: find capabilities: min: %0.2fs" % (min(samples) / float(args.number)))
+    logger.debug("perf: find capabilities: avg: %0.2fs" % (sum(samples) / float(args.repeat) / float(args.number)))
+    logger.debug("perf: find capabilities: max: %0.2fs" % (max(samples) / float(args.number)))
+
+    for (counter, count) in capa.perf.counters.most_common():
+        logger.debug("perf: counter: {:}: {:,}".format(counter, count))
+
+    print(
+        tabulate.tabulate(
+            [
+                (
+                    args.label,
+                    "{:,}".format(capa.perf.counters["evaluate.feature"]),
+                    "%0.2fs" % (sum(samples) / float(args.repeat) / float(args.number)),
+                    "%0.2fs" % (min(samples) / float(args.number)),
+                    "%0.2fs" % (max(samples) / float(args.number)),
+                )
+            ],
+            headers=["label", "count(evaluations)", "avg(time)", "min(time)", "max(time)"],
+            tablefmt="github",
+        )
+    )
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())

From e3496b0660d0c25318770e9ecd54b2c83b74928f Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Mon, 8 Nov 2021 12:10:22 -0700
Subject: [PATCH 11/26] engine: move optimizer into its own module

---
 capa/optimizer.py | 70 +++++++++++++++++++++++++++++++++++++++++++++++
 capa/rules.py     | 67 ++-------------------------------------------
 2 files changed, 72 insertions(+), 65 deletions(-)
 create mode 100644 capa/optimizer.py

diff --git a/capa/optimizer.py b/capa/optimizer.py
new file mode 100644
index 00000000..462bdf0f
--- /dev/null
+++ b/capa/optimizer.py
@@ -0,0 +1,70 @@
+import logging
+
+import capa.engine as ceng
+import capa.features.common
+
+logger = logging.getLogger(__name__)
+
+
+def get_node_cost(node):
+    if isinstance(node, (capa.features.common.OS, capa.features.common.Arch, capa.features.common.Format)):
+        # we assume these are the most restrictive features:
+        # authors commonly use them at the start of rules to restrict the category of samples to inspect
+        return 0
+
+    # elif "everything else":
+    #   return 1
+    #
+    # this should be all hash-lookup features.
+    # see below.
+
+    elif isinstance(node, (capa.features.common.Substring, capa.features.common.Regex)):
+        # substring and regex features require a full scan of each string
+        # which we anticipate is more expensive then a hash lookup feature (e.g. mnemonic or count).
+        #
+        # TODO: compute the average cost of these feature relative to hash feature
+        # and adjust the factor accordingly.
+        return 2
+
+    elif isinstance(node, (ceng.Not, ceng.Range)):
+        # the cost of these nodes are defined by the complexity of their single child.
+        return get_node_cost(node.child)
+
+    elif isinstance(node, (ceng.And, ceng.Or, ceng.Some)):
+        # the cost of these nodes is the full cost of their children
+        # as this is the worst-case scenario.
+        return sum(map(get_node_cost, node.children))
+
+    else:
+        # this should be all hash-lookup features.
+        # we give this a arbitrary weight of 1.
+        # the only thing more "important" than this is checking OS/Arch/Format.
+        return 1
+
+
+def optimize_statement(statement):
+    # this routine operates in-place
+
+    if isinstance(statement, (ceng.And, ceng.Or, ceng.Some)):
+        # has .children
+        statement.children = sorted(statement.children, key=lambda n: -get_node_cost(n))
+        return
+    elif isinstance(statement, (ceng.Not, ceng.Range)):
+        # has .child
+        optimize_statement(statement.child)
+        return
+    else:
+        # appears to be "simple"
+        return
+
+
+def optimize_rule(rule):
+    # this routine operates in-place
+    optimize_statement(rule.statement)
+
+
+def optimize_rules(rules):
+    logger.debug("optimizing %d rules", len(rules))
+    for rule in rules:
+        optimize_rule(rule)
+    return rules
diff --git a/capa/rules.py b/capa/rules.py
index b49f7ee1..fb550ad4 100644
--- a/capa/rules.py
+++ b/capa/rules.py
@@ -30,6 +30,7 @@ import ruamel.yaml
 import capa.perf
 import capa.engine as ceng
 import capa.features
+import capa.optimizer
 import capa.features.file
 import capa.features.insn
 import capa.features.common
@@ -961,7 +962,7 @@ class RuleSet:
         if len(rules) == 0:
             raise InvalidRuleSet("no rules selected")
 
-        rules = self._optimize_rules(rules)
+        rules = capa.optimizer.optimize_rules(rules)
 
         self.file_rules = self._get_rules_for_scope(rules, FILE_SCOPE)
         self.function_rules = self._get_rules_for_scope(rules, FUNCTION_SCOPE)
@@ -1040,67 +1041,3 @@ class RuleSet:
                     rules_filtered.update(set(capa.rules.get_rules_and_dependencies(rules, rule.name)))
                     break
         return RuleSet(list(rules_filtered))
-
-    @staticmethod
-    def _get_node_cost(node):
-        if isinstance(node, (capa.features.common.OS, capa.features.common.Arch, capa.features.common.Format)):
-            # we assume these are the most restrictive features:
-            # authors commonly use them at the start of rules to restrict the category of samples to inspect
-            return 0
-
-        # elif "everything else":
-        #   return 1
-        #
-        # this should be all hash-lookup features.
-        # see below.
-
-        elif isinstance(node, (capa.features.common.Substring, capa.features.common.Regex)):
-            # substring and regex features require a full scan of each string
-            # which we anticipate is more expensive then a hash lookup feature (e.g. mnemonic or count).
-            #
-            # TODO: compute the average cost of these feature relative to hash feature
-            # and adjust the factor accordingly.
-            return 2
-
-        elif isinstance(node, (ceng.Not, ceng.Range)):
-            # the cost of these nodes are defined by the complexity of their single child.
-            return RuleSet._get_node_cost(node.child)
-
-        elif isinstance(node, (ceng.And, ceng.Or, ceng.Some)):
-            # the cost of these nodes is the full cost of their children
-            # as this is the worst-case scenario.
-            return sum(map(RuleSet._get_node_cost, node.children))
-
-        else:
-            # this should be all hash-lookup features.
-            # we give this a arbitrary weight of 1.
-            # the only thing more "important" than this is checking OS/Arch/Format.
-            return 1
-
-    @staticmethod
-    def _optimize_statement(statement):
-        # this routine operates in-place
-
-        if isinstance(statement, (ceng.And, ceng.Or, ceng.Some)):
-            # has .children
-            statement.children = sorted(statement.children, key=lambda n: -RuleSet._get_node_cost(n))
-            return
-        elif isinstance(statement, (ceng.Not, ceng.Range)):
-            # has .child
-            RuleSet._optimize_statement(statement.child)
-            return
-        else:
-            # appears to be "simple"
-            return
-
-    @staticmethod
-    def _optimize_rule(rule):
-        # this routine operates in-place
-        RuleSet._optimize_statement(rule.statement)
-
-    @staticmethod
-    def _optimize_rules(rules):
-        logger.debug("optimizing %d rules", len(rules))
-        for rule in rules:
-            RuleSet._optimize_rule(rule)
-        return rules

From 96813c37b7ac54a0d2a5af5168e705bee0e46f0b Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Mon, 8 Nov 2021 13:48:33 -0700
Subject: [PATCH 12/26] remove old improt

---
 capa/optimizer.py | 70 -----------------------------------------------
 capa/rules.py     |  3 --
 2 files changed, 73 deletions(-)
 delete mode 100644 capa/optimizer.py

diff --git a/capa/optimizer.py b/capa/optimizer.py
deleted file mode 100644
index 462bdf0f..00000000
--- a/capa/optimizer.py
+++ /dev/null
@@ -1,70 +0,0 @@
-import logging
-
-import capa.engine as ceng
-import capa.features.common
-
-logger = logging.getLogger(__name__)
-
-
-def get_node_cost(node):
-    if isinstance(node, (capa.features.common.OS, capa.features.common.Arch, capa.features.common.Format)):
-        # we assume these are the most restrictive features:
-        # authors commonly use them at the start of rules to restrict the category of samples to inspect
-        return 0
-
-    # elif "everything else":
-    #   return 1
-    #
-    # this should be all hash-lookup features.
-    # see below.
-
-    elif isinstance(node, (capa.features.common.Substring, capa.features.common.Regex)):
-        # substring and regex features require a full scan of each string
-        # which we anticipate is more expensive then a hash lookup feature (e.g. mnemonic or count).
-        #
-        # TODO: compute the average cost of these feature relative to hash feature
-        # and adjust the factor accordingly.
-        return 2
-
-    elif isinstance(node, (ceng.Not, ceng.Range)):
-        # the cost of these nodes are defined by the complexity of their single child.
-        return get_node_cost(node.child)
-
-    elif isinstance(node, (ceng.And, ceng.Or, ceng.Some)):
-        # the cost of these nodes is the full cost of their children
-        # as this is the worst-case scenario.
-        return sum(map(get_node_cost, node.children))
-
-    else:
-        # this should be all hash-lookup features.
-        # we give this a arbitrary weight of 1.
-        # the only thing more "important" than this is checking OS/Arch/Format.
-        return 1
-
-
-def optimize_statement(statement):
-    # this routine operates in-place
-
-    if isinstance(statement, (ceng.And, ceng.Or, ceng.Some)):
-        # has .children
-        statement.children = sorted(statement.children, key=lambda n: -get_node_cost(n))
-        return
-    elif isinstance(statement, (ceng.Not, ceng.Range)):
-        # has .child
-        optimize_statement(statement.child)
-        return
-    else:
-        # appears to be "simple"
-        return
-
-
-def optimize_rule(rule):
-    # this routine operates in-place
-    optimize_statement(rule.statement)
-
-
-def optimize_rules(rules):
-    logger.debug("optimizing %d rules", len(rules))
-    for rule in rules:
-        optimize_rule(rule)
-    return rules
diff --git a/capa/rules.py b/capa/rules.py
index fb550ad4..2753f19d 100644
--- a/capa/rules.py
+++ b/capa/rules.py
@@ -30,7 +30,6 @@ import ruamel.yaml
 import capa.perf
 import capa.engine as ceng
 import capa.features
-import capa.optimizer
 import capa.features.file
 import capa.features.insn
 import capa.features.common
@@ -962,8 +961,6 @@ class RuleSet:
         if len(rules) == 0:
             raise InvalidRuleSet("no rules selected")
 
-        rules = capa.optimizer.optimize_rules(rules)
-
         self.file_rules = self._get_rules_for_scope(rules, FILE_SCOPE)
         self.function_rules = self._get_rules_for_scope(rules, FUNCTION_SCOPE)
         self.basic_block_rules = self._get_rules_for_scope(rules, BASIC_BLOCK_SCOPE)

From d987719889da426e81c64768caa3f3d0108889b7 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Mon, 8 Nov 2021 13:53:37 -0700
Subject: [PATCH 13/26] engine: some: correctly count satisfied children

---
 capa/engine.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/capa/engine.py b/capa/engine.py
index 601ddd34..5d2383c6 100644
--- a/capa/engine.py
+++ b/capa/engine.py
@@ -163,10 +163,14 @@ class Some(Statement):
         capa.perf.counters["evaluate.feature.some"] += 1
 
         results = []
+        satisfied_children_count = 0
         for child in self.children:
             result = child.evaluate(ctx)
             results.append(result)
-            if len(results) >= self.count:
+            if result:
+                satisfied_children_count += 1
+
+            if satisfied_children_count >= self.count:
                 # short circuit as soon as we hit the threshold
                 return Result(True, self, results)
 

From 1a8405167930e977344bbd28701070c2c1c566bf Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Mon, 8 Nov 2021 14:07:31 -0700
Subject: [PATCH 14/26] changelog

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f0667535..49770247 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,8 @@
 
 ### New Features
 
+- engine: short circuit logic nodes for better performance #824 @williballenthin
+
 ### Breaking Changes
 
 ### New Rules (3)

From 9fa9c6a5d099364e12dc2fb6555d4aeff3348e05 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Mon, 8 Nov 2021 14:07:44 -0700
Subject: [PATCH 15/26] tests: add test demonstrating short circuiting

---
 tests/test_engine.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tests/test_engine.py b/tests/test_engine.py
index ce421759..b130f2f2 100644
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@@ -533,3 +533,21 @@ def test_render_offset():
     assert str(capa.features.insn.Offset(1)) == "offset(0x1)"
     assert str(capa.features.insn.Offset(1, bitness=capa.features.common.BITNESS_X32)) == "offset/x32(0x1)"
     assert str(capa.features.insn.Offset(1, bitness=capa.features.common.BITNESS_X64)) == "offset/x64(0x1)"
+
+
+def test_short_circuit_order():
+    # base cases.
+    assert Or([Number(1), Number(2)]).evaluate({Number(1): {1}}) == True
+    assert Or([Number(1), Number(2)]).evaluate({Number(2): {1}}) == True
+
+    # with short circuiting, only the children up until the first satisfied child are captured.
+    assert len(Or([Number(1), Number(2)]).evaluate({Number(1): {1}}).children) == 1
+    assert len(Or([Number(1), Number(2)]).evaluate({Number(2): {1}}).children) == 2
+    assert len(Or([Number(1), Number(2)]).evaluate({Number(1): {1}, Number(2): {1}}).children) == 1
+
+    # and its guaranteed that children are evaluated in order.
+    assert Or([Number(1), Number(2)]).evaluate({Number(1): {1}}).children[0].statement == Number(1)
+    assert Or([Number(1), Number(2)]).evaluate({Number(1): {1}}).children[0].statement != Number(2)
+
+    assert Or([Number(1), Number(2)]).evaluate({Number(2): {1}}).children[1].statement == Number(2)
+    assert Or([Number(1), Number(2)]).evaluate({Number(2): {1}}).children[1].statement != Number(1)

From 3e74da96a6f7b437e8b1f563ae2d1b3dc46ffc82 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Mon, 8 Nov 2021 14:55:11 -0700
Subject: [PATCH 16/26] engine: make short circuiting configurable

---
 capa/engine.py          | 104 ++++++++++++++++++++++++++--------------
 capa/features/common.py |  16 +++++--
 capa/rules.py           |   4 +-
 tests/test_engine.py    |  10 +++-
 4 files changed, 91 insertions(+), 43 deletions(-)

diff --git a/capa/engine.py b/capa/engine.py
index 5d2383c6..b3a62f46 100644
--- a/capa/engine.py
+++ b/capa/engine.py
@@ -46,9 +46,12 @@ class Statement:
     def __repr__(self):
         return str(self)
 
-    def evaluate(self, features: FeatureSet) -> Result:
+    def evaluate(self, features: FeatureSet, short_circuit=True) -> Result:
         """
         classes that inherit `Statement` must implement `evaluate`
+
+        args:
+            short_circuit (bool): if true, then statements like and/or/some may short circuit.
         """
         raise NotImplementedError()
 
@@ -85,19 +88,24 @@ class And(Statement):
         super(And, self).__init__(description=description)
         self.children = children
 
-    def evaluate(self, ctx):
+    def evaluate(self, ctx, short_circuit=True):
         capa.perf.counters["evaluate.feature"] += 1
         capa.perf.counters["evaluate.feature.and"] += 1
 
-        results = []
-        for child in self.children:
-            result = child.evaluate(ctx)
-            results.append(result)
-            if not result:
-                # short circuit
-                return Result(False, self, results)
+        if short_circuit:
+            results = []
+            for child in self.children:
+                result = child.evaluate(ctx, short_circuit=short_circuit)
+                results.append(result)
+                if not result:
+                    # short circuit
+                    return Result(False, self, results)
 
-        return Result(True, self, results)
+            return Result(True, self, results)
+        else:
+            results = [child.evaluate(ctx, short_circuit=short_circuit) for child in self.children]
+            success = all(results)
+            return Result(success, self, results)
 
 
 class Or(Statement):
@@ -113,19 +121,24 @@ class Or(Statement):
         super(Or, self).__init__(description=description)
         self.children = children
 
-    def evaluate(self, ctx):
+    def evaluate(self, ctx, short_circuit=True):
         capa.perf.counters["evaluate.feature"] += 1
         capa.perf.counters["evaluate.feature.or"] += 1
 
-        results = []
-        for child in self.children:
-            result = child.evaluate(ctx)
-            results.append(result)
-            if result:
-                # short circuit as soon as we hit one match
-                return Result(True, self, results)
+        if short_circuit:
+            results = []
+            for child in self.children:
+                result = child.evaluate(ctx, short_circuit=short_circuit)
+                results.append(result)
+                if result:
+                    # short circuit as soon as we hit one match
+                    return Result(True, self, results)
 
-        return Result(False, self, results)
+            return Result(False, self, results)
+        else:
+            results = [child.evaluate(ctx, short_circuit=short_circuit) for child in self.children]
+            success = any(results)
+            return Result(success, self, results)
 
 
 class Not(Statement):
@@ -135,11 +148,11 @@ class Not(Statement):
         super(Not, self).__init__(description=description)
         self.child = child
 
-    def evaluate(self, ctx):
+    def evaluate(self, ctx, short_circuit=True):
         capa.perf.counters["evaluate.feature"] += 1
         capa.perf.counters["evaluate.feature.not"] += 1
 
-        results = [self.child.evaluate(ctx)]
+        results = [self.child.evaluate(ctx, short_circuit=short_circuit)]
         success = not results[0]
         return Result(success, self, results)
 
@@ -158,23 +171,32 @@ class Some(Statement):
         self.count = count
         self.children = children
 
-    def evaluate(self, ctx):
+    def evaluate(self, ctx, short_circuit=True):
         capa.perf.counters["evaluate.feature"] += 1
         capa.perf.counters["evaluate.feature.some"] += 1
 
-        results = []
-        satisfied_children_count = 0
-        for child in self.children:
-            result = child.evaluate(ctx)
-            results.append(result)
-            if result:
-                satisfied_children_count += 1
+        if short_circuit:
+            results = []
+            satisfied_children_count = 0
+            for child in self.children:
+                result = child.evaluate(ctx, short_circuit=short_circuit)
+                results.append(result)
+                if result:
+                    satisfied_children_count += 1
 
-            if satisfied_children_count >= self.count:
-                # short circuit as soon as we hit the threshold
-                return Result(True, self, results)
+                if satisfied_children_count >= self.count:
+                    # short circuit as soon as we hit the threshold
+                    return Result(True, self, results)
 
-        return Result(False, self, results)
+            return Result(False, self, results)
+        else:
+            results = [child.evaluate(ctx, short_circuit=short_circuit) for child in self.children]
+            # note that here we cast the child result as a bool
+            # because we've overridden `__bool__` above.
+            #
+            # we can't use `if child is True` because the instance is not True.
+            success = sum([1 for child in results if bool(child) is True]) >= self.count
+            return Result(success, self, results)
 
 
 class Range(Statement):
@@ -186,7 +208,7 @@ class Range(Statement):
         self.min = min if min is not None else 0
         self.max = max if max is not None else (1 << 64 - 1)
 
-    def evaluate(self, ctx):
+    def evaluate(self, ctx, **kwargs):
         capa.perf.counters["evaluate.feature"] += 1
         capa.perf.counters["evaluate.feature.range"] += 1
 
@@ -214,7 +236,7 @@ class Subscope(Statement):
         self.scope = scope
         self.child = child
 
-    def evaluate(self, ctx):
+    def evaluate(self, ctx, **kwargs):
         raise ValueError("cannot evaluate a subscope directly!")
 
 
@@ -272,8 +294,18 @@ def match(rules: List["capa.rules.Rule"], features: FeatureSet, va: int) -> Tupl
     features = collections.defaultdict(set, copy.copy(features))
 
     for rule in rules:
-        res = rule.evaluate(features)
+        res = rule.evaluate(features, short_circuit=True)
         if res:
+            # we first matched the rule with short circuiting enabled.
+            # this is much faster than without short circuiting.
+            # however, we want to collect all results thoroughly,
+            # so once we've found a match quickly,
+            # go back and capture results without short circuiting.
+            res = rule.evaluate(features, short_circuit=False)
+
+            # sanity check
+            assert bool(res) is True
+
             results[rule.name].append((va, res))
             # we need to update the current `features`
             # because subsequent iterations of this loop may use newly added features,
diff --git a/capa/features/common.py b/capa/features/common.py
index 3a4e71e9..6b867766 100644
--- a/capa/features/common.py
+++ b/capa/features/common.py
@@ -146,7 +146,7 @@ class Feature:
     def __repr__(self):
         return str(self)
 
-    def evaluate(self, ctx: Dict["Feature", Set[int]]) -> Result:
+    def evaluate(self, ctx: Dict["Feature", Set[int]], **kwargs) -> Result:
         capa.perf.counters["evaluate.feature"] += 1
         capa.perf.counters["evaluate.feature." + self.name] += 1
         return Result(self in ctx, self, [], locations=ctx.get(self, []))
@@ -192,7 +192,7 @@ class Substring(String):
         super(Substring, self).__init__(value, description=description)
         self.value = value
 
-    def evaluate(self, ctx):
+    def evaluate(self, ctx, short_circuit=True):
         capa.perf.counters["evaluate.feature"] += 1
         capa.perf.counters["evaluate.feature.substring"] += 1
 
@@ -210,6 +210,10 @@ class Substring(String):
 
             if self.value in feature.value:
                 matches[feature.value].extend(locations)
+                if short_circuit:
+                    # we found one matching string, thats sufficient to match.
+                    # don't collect other matching strings in this mode.
+                    break
 
         if matches:
             # finalize: defaultdict -> dict
@@ -280,7 +284,7 @@ class Regex(String):
                 "invalid regular expression: %s it should use Python syntax, try it at https://pythex.org" % value
             )
 
-    def evaluate(self, ctx):
+    def evaluate(self, ctx, short_circuit=True):
         capa.perf.counters["evaluate.feature"] += 1
         capa.perf.counters["evaluate.feature.regex"] += 1
 
@@ -302,6 +306,10 @@ class Regex(String):
             # so that they don't have to prefix/suffix their terms like: /.*foo.*/.
             if self.re.search(feature.value):
                 matches[feature.value].extend(locations)
+                if short_circuit:
+                    # we found one matching string, thats sufficient to match.
+                    # don't collect other matching strings in this mode.
+                    break
 
         if matches:
             # finalize: defaultdict -> dict
@@ -366,7 +374,7 @@ class Bytes(Feature):
         super(Bytes, self).__init__(value, description=description)
         self.value = value
 
-    def evaluate(self, ctx):
+    def evaluate(self, ctx, **kwargs):
         capa.perf.counters["evaluate.feature"] += 1
         capa.perf.counters["evaluate.feature.bytes"] += 1
 
diff --git a/capa/rules.py b/capa/rules.py
index 2753f19d..00dc0837 100644
--- a/capa/rules.py
+++ b/capa/rules.py
@@ -620,10 +620,10 @@ class Rule:
         for new_rule in self._extract_subscope_rules_rec(self.statement):
             yield new_rule
 
-    def evaluate(self, features: FeatureSet):
+    def evaluate(self, features: FeatureSet, short_circuit=True):
         capa.perf.counters["evaluate.feature"] += 1
         capa.perf.counters["evaluate.feature.rule"] += 1
-        return self.statement.evaluate(features)
+        return self.statement.evaluate(features, short_circuit=short_circuit)
 
     @classmethod
     def from_dict(cls, d, definition):
diff --git a/tests/test_engine.py b/tests/test_engine.py
index b130f2f2..b07c89e6 100644
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@@ -535,7 +535,15 @@ def test_render_offset():
     assert str(capa.features.insn.Offset(1, bitness=capa.features.common.BITNESS_X64)) == "offset/x64(0x1)"
 
 
-def test_short_circuit_order():
+def test_short_circuit():
+    assert Or([Number(1), Number(2)]).evaluate({Number(1): {1}}) == True
+
+    # with short circuiting, only the children up until the first satisfied child are captured.
+    assert len(Or([Number(1), Number(2)]).evaluate({Number(1): {1}}, short_circuit=True).children) == 1
+    assert len(Or([Number(1), Number(2)]).evaluate({Number(1): {1}}, short_circuit=False).children) == 2
+
+
+def test_eval_order():
     # base cases.
     assert Or([Number(1), Number(2)]).evaluate({Number(1): {1}}) == True
     assert Or([Number(1), Number(2)]).evaluate({Number(2): {1}}) == True

From 0b517c51d87634d72db8c67c0028673709bc3777 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Mon, 8 Nov 2021 15:22:01 -0700
Subject: [PATCH 17/26] main: remove perf messages

---
 capa/main.py | 54 +++++++++++++++++++++++-----------------------------
 1 file changed, 24 insertions(+), 30 deletions(-)

diff --git a/capa/main.py b/capa/main.py
index 5aaa97d6..52c28d27 100644
--- a/capa/main.py
+++ b/capa/main.py
@@ -169,28 +169,27 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_pro
     n_funcs = len(functions)
 
     pb = pbar(functions, desc="matching", unit=" functions", postfix="skipped 0 library functions")
-    with timing("match functions"):
-        for f in pb:
-            function_address = int(f)
+    for f in pb:
+        function_address = int(f)
 
-            if extractor.is_library_function(function_address):
-                function_name = extractor.get_function_name(function_address)
-                logger.debug("skipping library function 0x%x (%s)", function_address, function_name)
-                meta["library_functions"][function_address] = function_name
-                n_libs = len(meta["library_functions"])
-                percentage = 100 * (n_libs / n_funcs)
-                if isinstance(pb, tqdm.tqdm):
-                    pb.set_postfix_str("skipped %d library functions (%d%%)" % (n_libs, percentage))
-                continue
+        if extractor.is_library_function(function_address):
+            function_name = extractor.get_function_name(function_address)
+            logger.debug("skipping library function 0x%x (%s)", function_address, function_name)
+            meta["library_functions"][function_address] = function_name
+            n_libs = len(meta["library_functions"])
+            percentage = 100 * (n_libs / n_funcs)
+            if isinstance(pb, tqdm.tqdm):
+                pb.set_postfix_str("skipped %d library functions (%d%%)" % (n_libs, percentage))
+            continue
 
-            function_matches, bb_matches, feature_count = find_function_capabilities(ruleset, extractor, f)
-            meta["feature_counts"]["functions"][function_address] = feature_count
-            logger.debug("analyzed function 0x%x and extracted %d features", function_address, feature_count)
+        function_matches, bb_matches, feature_count = find_function_capabilities(ruleset, extractor, f)
+        meta["feature_counts"]["functions"][function_address] = feature_count
+        logger.debug("analyzed function 0x%x and extracted %d features", function_address, feature_count)
 
-            for rule_name, res in function_matches.items():
-                all_function_matches[rule_name].extend(res)
-            for rule_name, res in bb_matches.items():
-                all_bb_matches[rule_name].extend(res)
+        for rule_name, res in function_matches.items():
+            all_function_matches[rule_name].extend(res)
+        for rule_name, res in bb_matches.items():
+            all_bb_matches[rule_name].extend(res)
 
     # collection of features that captures the rule matches within function and BB scopes.
     # mapping from feature (matched rule) to set of addresses at which it matched.
@@ -200,8 +199,7 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_pro
         rule = ruleset[rule_name]
         capa.engine.index_rule_matches(function_and_lower_features, rule, locations)
 
-    with timing("match file"):
-        all_file_matches, feature_count = find_file_capabilities(ruleset, extractor, function_and_lower_features)
+    all_file_matches, feature_count = find_file_capabilities(ruleset, extractor, function_and_lower_features)
     meta["feature_counts"]["file"] = feature_count
 
     matches = {
@@ -413,11 +411,9 @@ def get_workspace(path, format, sigpaths):
     else:
         raise ValueError("unexpected format: " + format)
 
-    with timing("load FLIRT"):
-        viv_utils.flirt.register_flirt_signature_analyzers(vw, sigpaths)
+    viv_utils.flirt.register_flirt_signature_analyzers(vw, sigpaths)
 
-    with timing("viv analyze"):
-        vw.analyze()
+    vw.analyze()
 
     logger.debug("%s", get_meta_str(vw))
     return vw
@@ -905,9 +901,8 @@ def main(argv=None):
         return E_MISSING_FILE
 
     try:
-        with timing("load rules"):
-            rules = get_rules(args.rules, disable_progress=args.quiet)
-            rules = capa.rules.RuleSet(rules)
+        rules = get_rules(args.rules, disable_progress=args.quiet)
+        rules = capa.rules.RuleSet(rules)
 
         logger.debug(
             "successfully loaded %s rules",
@@ -1020,8 +1015,7 @@ def main(argv=None):
 
     meta = collect_metadata(argv, args.sample, args.rules, extractor)
 
-    with timing("find capabilities"):
-        capabilities, counts = find_capabilities(rules, extractor, disable_progress=args.quiet)
+    capabilities, counts = find_capabilities(rules, extractor, disable_progress=args.quiet)
     meta["analysis"].update(counts)
     meta["analysis"]["layout"] = compute_layout(rules, extractor, capabilities)
 

From 152d0f32443f8b1669173d61b351f81c24f4c9fb Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Mon, 8 Nov 2021 15:34:59 -0700
Subject: [PATCH 18/26] ruleset: add query optimizer

---
 capa/optimizer.py | 70 +++++++++++++++++++++++++++++++++++++++++++++++
 capa/rules.py     |  3 ++
 2 files changed, 73 insertions(+)
 create mode 100644 capa/optimizer.py

diff --git a/capa/optimizer.py b/capa/optimizer.py
new file mode 100644
index 00000000..462bdf0f
--- /dev/null
+++ b/capa/optimizer.py
@@ -0,0 +1,70 @@
+import logging
+
+import capa.engine as ceng
+import capa.features.common
+
+logger = logging.getLogger(__name__)
+
+
+def get_node_cost(node):
+    if isinstance(node, (capa.features.common.OS, capa.features.common.Arch, capa.features.common.Format)):
+        # we assume these are the most restrictive features:
+        # authors commonly use them at the start of rules to restrict the category of samples to inspect
+        return 0
+
+    # elif "everything else":
+    #   return 1
+    #
+    # this should be all hash-lookup features.
+    # see below.
+
+    elif isinstance(node, (capa.features.common.Substring, capa.features.common.Regex)):
+        # substring and regex features require a full scan of each string
+        # which we anticipate is more expensive then a hash lookup feature (e.g. mnemonic or count).
+        #
+        # TODO: compute the average cost of these feature relative to hash feature
+        # and adjust the factor accordingly.
+        return 2
+
+    elif isinstance(node, (ceng.Not, ceng.Range)):
+        # the cost of these nodes are defined by the complexity of their single child.
+        return get_node_cost(node.child)
+
+    elif isinstance(node, (ceng.And, ceng.Or, ceng.Some)):
+        # the cost of these nodes is the full cost of their children
+        # as this is the worst-case scenario.
+        return sum(map(get_node_cost, node.children))
+
+    else:
+        # this should be all hash-lookup features.
+        # we give this a arbitrary weight of 1.
+        # the only thing more "important" than this is checking OS/Arch/Format.
+        return 1
+
+
+def optimize_statement(statement):
+    # this routine operates in-place
+
+    if isinstance(statement, (ceng.And, ceng.Or, ceng.Some)):
+        # has .children
+        statement.children = sorted(statement.children, key=lambda n: -get_node_cost(n))
+        return
+    elif isinstance(statement, (ceng.Not, ceng.Range)):
+        # has .child
+        optimize_statement(statement.child)
+        return
+    else:
+        # appears to be "simple"
+        return
+
+
+def optimize_rule(rule):
+    # this routine operates in-place
+    optimize_statement(rule.statement)
+
+
+def optimize_rules(rules):
+    logger.debug("optimizing %d rules", len(rules))
+    for rule in rules:
+        optimize_rule(rule)
+    return rules
diff --git a/capa/rules.py b/capa/rules.py
index 00dc0837..2d53a0aa 100644
--- a/capa/rules.py
+++ b/capa/rules.py
@@ -30,6 +30,7 @@ import ruamel.yaml
 import capa.perf
 import capa.engine as ceng
 import capa.features
+import capa.optimizer
 import capa.features.file
 import capa.features.insn
 import capa.features.common
@@ -961,6 +962,8 @@ class RuleSet:
         if len(rules) == 0:
             raise InvalidRuleSet("no rules selected")
 
+        rules = capa.optimizer.optimize_rules(rules)
+
         self.file_rules = self._get_rules_for_scope(rules, FILE_SCOPE)
         self.function_rules = self._get_rules_for_scope(rules, FUNCTION_SCOPE)
         self.basic_block_rules = self._get_rules_for_scope(rules, BASIC_BLOCK_SCOPE)

From e287dc9a32bef24ceaab907076d5377d6c892e18 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Mon, 8 Nov 2021 15:54:14 -0700
Subject: [PATCH 19/26] optimizer: fix sort order

---
 capa/optimizer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/capa/optimizer.py b/capa/optimizer.py
index 462bdf0f..9d14c6e6 100644
--- a/capa/optimizer.py
+++ b/capa/optimizer.py
@@ -47,7 +47,7 @@ def optimize_statement(statement):
 
     if isinstance(statement, (ceng.And, ceng.Or, ceng.Some)):
         # has .children
-        statement.children = sorted(statement.children, key=lambda n: -get_node_cost(n))
+        statement.children = sorted(statement.children, key=lambda n: get_node_cost(n))
         return
     elif isinstance(statement, (ceng.Not, ceng.Range)):
         # has .child

From 6909d6a54169879b04eb089d1e603adf6c50385c Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Mon, 8 Nov 2021 16:04:15 -0700
Subject: [PATCH 20/26] changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 665c3c1d..ccc9840d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,7 @@
 ### New Features
 
 - engine: short circuit logic nodes for better performance #824 @williballenthin
+- engine: add optimizer the order faster nodes first #829 @williballenthin
 
 ### Breaking Changes
 

From a68812b223a6bb74612c16ace2f2e95072db7ab2 Mon Sep 17 00:00:00 2001
From: Willi Ballenthin <willi.ballenthin@gmail.com>
Date: Tue, 9 Nov 2021 10:48:54 -0700
Subject: [PATCH 21/26] Update capa/engine.py

Co-authored-by: Moritz <mr-tz@users.noreply.github.com>
---
 capa/engine.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/capa/engine.py b/capa/engine.py
index b3a62f46..a690db90 100644
--- a/capa/engine.py
+++ b/capa/engine.py
@@ -79,7 +79,7 @@ class And(Statement):
     """
     match if all of the children evaluate to True.
 
-    the order of evaluation is dicated by the property
+    the order of evaluation is dictated by the property
     `And.children` (type: List[Statement|Feature]).
     a query optimizer may safely manipulate the order of these children.
     """

From 51af2d4a561378f6c903c8da536c8c5bfdb6e5fd Mon Sep 17 00:00:00 2001
From: Willi Ballenthin <willi.ballenthin@gmail.com>
Date: Tue, 9 Nov 2021 10:49:01 -0700
Subject: [PATCH 22/26] Update capa/engine.py

Co-authored-by: Moritz <mr-tz@users.noreply.github.com>
---
 capa/engine.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/capa/engine.py b/capa/engine.py
index a690db90..a9076c25 100644
--- a/capa/engine.py
+++ b/capa/engine.py
@@ -112,7 +112,7 @@ class Or(Statement):
     """
     match if any of the children evaluate to True.
 
-    the order of evaluation is dicated by the property
+    the order of evaluation is dictated by the property
     `Or.children` (type: List[Statement|Feature]).
     a query optimizer may safely manipulate the order of these children.
     """

From f427c5e9618bf0782083426dae2a1a2c44f3a778 Mon Sep 17 00:00:00 2001
From: Willi Ballenthin <willi.ballenthin@gmail.com>
Date: Tue, 9 Nov 2021 10:49:10 -0700
Subject: [PATCH 23/26] Update capa/engine.py

Co-authored-by: Moritz <mr-tz@users.noreply.github.com>
---
 capa/engine.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/capa/engine.py b/capa/engine.py
index a9076c25..29c0dc65 100644
--- a/capa/engine.py
+++ b/capa/engine.py
@@ -161,7 +161,7 @@ class Some(Statement):
     """
     match if at least N of the children evaluate to True.
 
-    the order of evaluation is dicated by the property
+    the order of evaluation is dictated by the property
     `Some.children` (type: List[Statement|Feature]).
     a query optimizer may safely manipulate the order of these children.
     """

From 7a4aee592be171c7ba7fde03623a1596db831b9e Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Tue, 9 Nov 2021 16:08:39 -0700
Subject: [PATCH 24/26] profile-time: add doc

---
 scripts/profile-time.py | 39 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/scripts/profile-time.py b/scripts/profile-time.py
index 3c47b67b..3d6b5e07 100644
--- a/scripts/profile-time.py
+++ b/scripts/profile-time.py
@@ -1,3 +1,34 @@
+"""
+Invoke capa multiple times and record profiling informations.
+Use the --number and --repeat options to change the number of iterations.
+By default, the script will emit a markdown table with a label pulled from git.
+
+Note: you can run this script against pre-generated .frz files to reduce the startup time.
+
+usage:
+
+    usage: profile-time.py [--number NUMBER] [--repeat REPEAT] [--label LABEL] sample
+
+    Profile capa performance
+
+    positional arguments:
+      sample                path to sample to analyze
+
+    optional arguments:
+      --number NUMBER       batch size of profile collection
+      --repeat REPEAT       batch count of profile collection
+      --label LABEL         description of the profile collection
+
+example:
+
+    $ python profile-time.py ./tests/data/kernel32.dll_.frz --number 1 --repeat 2
+
+    | label                                | count(evaluations)   | avg(time)   | min(time)   | max(time)   |
+    |--------------------------------------|----------------------|-------------|-------------|-------------|
+    | 18c30e4 main: remove perf debug msgs | 66,561,622           | 132.13s     | 125.14s     | 139.12s     |
+
+      ^^^ --label or git hash               
+"""
 import sys
 import timeit
 import logging
@@ -98,12 +129,16 @@ def main(argv=None):
                 (
                     args.label,
                     "{:,}".format(capa.perf.counters["evaluate.feature"]),
-                    "%0.2fs" % (sum(samples) / float(args.repeat) / float(args.number)),
+                    # python documentation indicates that min(samples) should be preferred,
+                    # so lets put that first.
+                    #
+                    # https://docs.python.org/3/library/timeit.html#timeit.Timer.repeat
                     "%0.2fs" % (min(samples) / float(args.number)),
+                    "%0.2fs" % (sum(samples) / float(args.repeat) / float(args.number)),
                     "%0.2fs" % (max(samples) / float(args.number)),
                 )
             ],
-            headers=["label", "count(evaluations)", "avg(time)", "min(time)", "max(time)"],
+            headers=["label", "count(evaluations)", "min(time)", "avg(time)", "max(time)"],
             tablefmt="github",
         )
     )

From ea386d02b68b6864586e433b8463a8060e220b85 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Tue, 9 Nov 2021 16:24:26 -0700
Subject: [PATCH 25/26] tests: add test demonstrating optimizer

---
 tests/test_optimizer.py | 65 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)
 create mode 100644 tests/test_optimizer.py

diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py
new file mode 100644
index 00000000..69a79bd6
--- /dev/null
+++ b/tests/test_optimizer.py
@@ -0,0 +1,65 @@
+# Copyright (C) 2021 FireEye, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+
+import textwrap
+
+import pytest
+
+import capa.rules
+import capa.engine
+import capa.optimizer
+import capa.features.common
+from capa.engine import Or, And
+from capa.features.insn import Mnemonic
+from capa.features.common import Arch, Bytes, Substring
+
+
+def test_optimizer_order():
+    rule = textwrap.dedent(
+        """
+        rule:
+            meta:
+                name: test rule
+                scope: function
+            features:
+                - and:
+                    - substring: "foo"
+                    - arch: amd64
+                    - mnemonic: cmp
+                    - and:
+                      - bytes: 3
+                      - offset: 2
+                    - or:
+                      - number: 1
+                      - offset: 4
+        """
+    )
+    r = capa.rules.Rule.from_yaml(rule)
+
+    # before optimization
+    children = list(r.statement.get_children())
+    assert isinstance(children[0], Substring)
+    assert isinstance(children[1], Arch)
+    assert isinstance(children[2], Mnemonic)
+    assert isinstance(children[3], And)
+    assert isinstance(children[4], Or)
+
+    # after optimization
+    capa.optimizer.optimize_rules([r])
+    children = list(r.statement.get_children())
+
+    # cost: 0
+    assert isinstance(children[0], Arch)
+    # cost: 1
+    assert isinstance(children[1], Mnemonic)
+    # cost: 2
+    assert isinstance(children[2], Substring)
+    # cost: 3
+    assert isinstance(children[3], Or)
+    # cost: 4
+    assert isinstance(children[4], And)

From d4d801c246940904f959ad14dbdaefde51a672eb Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Tue, 9 Nov 2021 16:26:26 -0700
Subject: [PATCH 26/26] optimizer: tweak costs slightly

---
 capa/optimizer.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/capa/optimizer.py b/capa/optimizer.py
index 9d14c6e6..0408bf07 100644
--- a/capa/optimizer.py
+++ b/capa/optimizer.py
@@ -18,7 +18,7 @@ def get_node_cost(node):
     # this should be all hash-lookup features.
     # see below.
 
-    elif isinstance(node, (capa.features.common.Substring, capa.features.common.Regex)):
+    elif isinstance(node, (capa.features.common.Substring, capa.features.common.Regex, capa.features.common.Bytes)):
         # substring and regex features require a full scan of each string
         # which we anticipate is more expensive then a hash lookup feature (e.g. mnemonic or count).
         #
@@ -28,12 +28,12 @@ def get_node_cost(node):
 
     elif isinstance(node, (ceng.Not, ceng.Range)):
         # the cost of these nodes are defined by the complexity of their single child.
-        return get_node_cost(node.child)
+        return 1 + get_node_cost(node.child)
 
     elif isinstance(node, (ceng.And, ceng.Or, ceng.Some)):
         # the cost of these nodes is the full cost of their children
         # as this is the worst-case scenario.
-        return sum(map(get_node_cost, node.children))
+        return 1 + sum(map(get_node_cost, node.children))
 
     else:
         # this should be all hash-lookup features.