mirror of
https://github.com/mandiant/capa.git
synced 2026-02-05 03:16:56 -08:00
3
.gitignore
vendored
3
.gitignore
vendored
@@ -115,3 +115,6 @@ isort-output.log
|
||||
black-output.log
|
||||
rule-linter-output.log
|
||||
.vscode
|
||||
scripts/perf/*.txt
|
||||
scripts/perf/*.svg
|
||||
scripts/perf/*.zip
|
||||
|
||||
@@ -20,6 +20,8 @@
|
||||
|
||||
### Development
|
||||
|
||||
- add profiling infrastructure #828 @williballenthin
|
||||
|
||||
### Raw diffs
|
||||
- [capa <release>...master](https://github.com/mandiant/capa/compare/v3.0.3...master)
|
||||
- [capa-rules <release>...master](https://github.com/mandiant/capa-rules/compare/v3.0.3...master)
|
||||
|
||||
@@ -10,6 +10,7 @@ import copy
|
||||
import collections
|
||||
from typing import Set, Dict, List, Tuple, Union, Mapping, Iterable
|
||||
|
||||
import capa.perf
|
||||
import capa.rules
|
||||
import capa.features.common
|
||||
from capa.features.common import Result, Feature
|
||||
@@ -79,6 +80,9 @@ class And(Statement):
|
||||
self.children = children
|
||||
|
||||
def evaluate(self, ctx):
|
||||
capa.perf.counters["evaluate.feature"] += 1
|
||||
capa.perf.counters["evaluate.feature.and"] += 1
|
||||
|
||||
results = [child.evaluate(ctx) for child in self.children]
|
||||
success = all(results)
|
||||
return Result(success, self, results)
|
||||
@@ -92,6 +96,9 @@ class Or(Statement):
|
||||
self.children = children
|
||||
|
||||
def evaluate(self, ctx):
|
||||
capa.perf.counters["evaluate.feature"] += 1
|
||||
capa.perf.counters["evaluate.feature.or"] += 1
|
||||
|
||||
results = [child.evaluate(ctx) for child in self.children]
|
||||
success = any(results)
|
||||
return Result(success, self, results)
|
||||
@@ -105,6 +112,9 @@ class Not(Statement):
|
||||
self.child = child
|
||||
|
||||
def evaluate(self, ctx):
|
||||
capa.perf.counters["evaluate.feature"] += 1
|
||||
capa.perf.counters["evaluate.feature.not"] += 1
|
||||
|
||||
results = [self.child.evaluate(ctx)]
|
||||
success = not results[0]
|
||||
return Result(success, self, results)
|
||||
@@ -119,6 +129,9 @@ class Some(Statement):
|
||||
self.children = children
|
||||
|
||||
def evaluate(self, ctx):
|
||||
capa.perf.counters["evaluate.feature"] += 1
|
||||
capa.perf.counters["evaluate.feature.some"] += 1
|
||||
|
||||
results = [child.evaluate(ctx) for child in self.children]
|
||||
# note that here we cast the child result as a bool
|
||||
# because we've overridden `__bool__` above.
|
||||
@@ -138,6 +151,9 @@ class Range(Statement):
|
||||
self.max = max if max is not None else (1 << 64 - 1)
|
||||
|
||||
def evaluate(self, ctx):
|
||||
capa.perf.counters["evaluate.feature"] += 1
|
||||
capa.perf.counters["evaluate.feature.range"] += 1
|
||||
|
||||
count = len(ctx.get(self.child, []))
|
||||
if self.min == 0 and count == 0:
|
||||
return Result(True, self, [])
|
||||
|
||||
@@ -16,6 +16,7 @@ if TYPE_CHECKING:
|
||||
# circular import, otherwise
|
||||
import capa.engine
|
||||
|
||||
import capa.perf
|
||||
import capa.features
|
||||
import capa.features.extractors.elf
|
||||
|
||||
@@ -146,6 +147,8 @@ class Feature:
|
||||
return str(self)
|
||||
|
||||
def evaluate(self, ctx: Dict["Feature", Set[int]]) -> Result:
|
||||
capa.perf.counters["evaluate.feature"] += 1
|
||||
capa.perf.counters["evaluate.feature." + self.name] += 1
|
||||
return Result(self in ctx, self, [], locations=ctx.get(self, []))
|
||||
|
||||
def freeze_serialize(self):
|
||||
@@ -190,6 +193,9 @@ class Substring(String):
|
||||
self.value = value
|
||||
|
||||
def evaluate(self, ctx):
|
||||
capa.perf.counters["evaluate.feature"] += 1
|
||||
capa.perf.counters["evaluate.feature.substring"] += 1
|
||||
|
||||
# mapping from string value to list of locations.
|
||||
# will unique the locations later on.
|
||||
matches = collections.defaultdict(list)
|
||||
@@ -275,6 +281,9 @@ class Regex(String):
|
||||
)
|
||||
|
||||
def evaluate(self, ctx):
|
||||
capa.perf.counters["evaluate.feature"] += 1
|
||||
capa.perf.counters["evaluate.feature.regex"] += 1
|
||||
|
||||
# mapping from string value to list of locations.
|
||||
# will unique the locations later on.
|
||||
matches = collections.defaultdict(list)
|
||||
@@ -358,6 +367,9 @@ class Bytes(Feature):
|
||||
self.value = value
|
||||
|
||||
def evaluate(self, ctx):
|
||||
capa.perf.counters["evaluate.feature"] += 1
|
||||
capa.perf.counters["evaluate.feature.bytes"] += 1
|
||||
|
||||
for feature, locations in ctx.items():
|
||||
if not isinstance(feature, (Bytes,)):
|
||||
continue
|
||||
|
||||
12
capa/main.py
12
capa/main.py
@@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import hashlib
|
||||
import logging
|
||||
import os.path
|
||||
@@ -17,6 +18,7 @@ import argparse
|
||||
import datetime
|
||||
import textwrap
|
||||
import itertools
|
||||
import contextlib
|
||||
import collections
|
||||
from typing import Any, Dict, List, Tuple
|
||||
|
||||
@@ -26,6 +28,7 @@ import colorama
|
||||
from pefile import PEFormatError
|
||||
from elftools.common.exceptions import ELFError
|
||||
|
||||
import capa.perf
|
||||
import capa.rules
|
||||
import capa.engine
|
||||
import capa.version
|
||||
@@ -65,6 +68,14 @@ E_UNSUPPORTED_IDA_VERSION = -19
|
||||
logger = logging.getLogger("capa")
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def timing(msg: str):
|
||||
t0 = time.time()
|
||||
yield
|
||||
t1 = time.time()
|
||||
logger.debug("perf: %s: %0.2fs", msg, t1 - t0)
|
||||
|
||||
|
||||
def set_vivisect_log_level(level):
|
||||
logging.getLogger("vivisect").setLevel(level)
|
||||
logging.getLogger("vivisect.base").setLevel(level)
|
||||
@@ -892,6 +903,7 @@ def main(argv=None):
|
||||
try:
|
||||
rules = get_rules(args.rules, disable_progress=args.quiet)
|
||||
rules = capa.rules.RuleSet(rules)
|
||||
|
||||
logger.debug(
|
||||
"successfully loaded %s rules",
|
||||
# during the load of the RuleSet, we extract subscope statements into their own rules
|
||||
|
||||
10
capa/perf.py
Normal file
10
capa/perf.py
Normal file
@@ -0,0 +1,10 @@
|
||||
import collections
|
||||
from typing import Dict
|
||||
|
||||
# this structure is unstable and may change before the next major release.
|
||||
counters: Dict[str, int] = collections.Counter()
|
||||
|
||||
|
||||
def reset():
|
||||
global counters
|
||||
counters = collections.Counter()
|
||||
@@ -27,6 +27,7 @@ from typing import Any, Dict, List, Union, Iterator
|
||||
import yaml
|
||||
import ruamel.yaml
|
||||
|
||||
import capa.perf
|
||||
import capa.engine as ceng
|
||||
import capa.features
|
||||
import capa.features.file
|
||||
@@ -620,6 +621,8 @@ class Rule:
|
||||
yield new_rule
|
||||
|
||||
def evaluate(self, features: FeatureSet):
|
||||
capa.perf.counters["evaluate.feature"] += 1
|
||||
capa.perf.counters["evaluate.feature.rule"] += 1
|
||||
return self.statement.evaluate(features)
|
||||
|
||||
@classmethod
|
||||
|
||||
150
scripts/profile-time.py
Normal file
150
scripts/profile-time.py
Normal file
@@ -0,0 +1,150 @@
|
||||
"""
|
||||
Invoke capa multiple times and record profiling informations.
|
||||
Use the --number and --repeat options to change the number of iterations.
|
||||
By default, the script will emit a markdown table with a label pulled from git.
|
||||
|
||||
Note: you can run this script against pre-generated .frz files to reduce the startup time.
|
||||
|
||||
usage:
|
||||
|
||||
usage: profile-time.py [--number NUMBER] [--repeat REPEAT] [--label LABEL] sample
|
||||
|
||||
Profile capa performance
|
||||
|
||||
positional arguments:
|
||||
sample path to sample to analyze
|
||||
|
||||
optional arguments:
|
||||
--number NUMBER batch size of profile collection
|
||||
--repeat REPEAT batch count of profile collection
|
||||
--label LABEL description of the profile collection
|
||||
|
||||
example:
|
||||
|
||||
$ python profile-time.py ./tests/data/kernel32.dll_.frz --number 1 --repeat 2
|
||||
|
||||
| label | count(evaluations) | avg(time) | min(time) | max(time) |
|
||||
|--------------------------------------|----------------------|-------------|-------------|-------------|
|
||||
| 18c30e4 main: remove perf debug msgs | 66,561,622 | 132.13s | 125.14s | 139.12s |
|
||||
|
||||
^^^ --label or git hash
|
||||
"""
|
||||
import sys
|
||||
import timeit
|
||||
import logging
|
||||
import argparse
|
||||
import subprocess
|
||||
|
||||
import tqdm
|
||||
import tabulate
|
||||
|
||||
import capa.main
|
||||
import capa.perf
|
||||
import capa.rules
|
||||
import capa.engine
|
||||
import capa.helpers
|
||||
import capa.features
|
||||
import capa.features.common
|
||||
import capa.features.freeze
|
||||
|
||||
logger = logging.getLogger("capa.profile")
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
if argv is None:
|
||||
argv = sys.argv[1:]
|
||||
|
||||
label = subprocess.run(
|
||||
"git show --pretty=oneline --abbrev-commit | head -n 1", shell=True, capture_output=True, text=True
|
||||
).stdout.strip()
|
||||
is_dirty = (
|
||||
subprocess.run(
|
||||
"git status | grep 'modified: ' | grep -v 'rules' | grep -v 'tests/data'",
|
||||
shell=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
).stdout
|
||||
!= ""
|
||||
)
|
||||
|
||||
if is_dirty:
|
||||
label += " (dirty)"
|
||||
|
||||
parser = argparse.ArgumentParser(description="Profile capa performance")
|
||||
capa.main.install_common_args(parser, wanted={"format", "sample", "signatures", "rules"})
|
||||
|
||||
parser.add_argument("--number", type=int, default=3, help="batch size of profile collection")
|
||||
parser.add_argument("--repeat", type=int, default=30, help="batch count of profile collection")
|
||||
parser.add_argument("--label", type=str, default=label, help="description of the profile collection")
|
||||
|
||||
args = parser.parse_args(args=argv)
|
||||
capa.main.handle_common_args(args)
|
||||
|
||||
try:
|
||||
taste = capa.helpers.get_file_taste(args.sample)
|
||||
except IOError as e:
|
||||
logger.error("%s", str(e))
|
||||
return -1
|
||||
|
||||
try:
|
||||
with capa.main.timing("load rules"):
|
||||
rules = capa.rules.RuleSet(capa.main.get_rules(args.rules, disable_progress=True))
|
||||
except (IOError) as e:
|
||||
logger.error("%s", str(e))
|
||||
return -1
|
||||
|
||||
try:
|
||||
sig_paths = capa.main.get_signatures(args.signatures)
|
||||
except (IOError) as e:
|
||||
logger.error("%s", str(e))
|
||||
return -1
|
||||
|
||||
if (args.format == "freeze") or (args.format == "auto" and capa.features.freeze.is_freeze(taste)):
|
||||
with open(args.sample, "rb") as f:
|
||||
extractor = capa.features.freeze.load(f.read())
|
||||
else:
|
||||
extractor = capa.main.get_extractor(
|
||||
args.sample, args.format, capa.main.BACKEND_VIV, sig_paths, should_save_workspace=False
|
||||
)
|
||||
|
||||
with tqdm.tqdm(total=args.number * args.repeat) as pbar:
|
||||
|
||||
def do_iteration():
|
||||
capa.perf.reset()
|
||||
capa.main.find_capabilities(rules, extractor, disable_progress=True)
|
||||
pbar.update(1)
|
||||
|
||||
samples = timeit.repeat(do_iteration, number=args.number, repeat=args.repeat)
|
||||
|
||||
logger.debug("perf: find capabilities: min: %0.2fs" % (min(samples) / float(args.number)))
|
||||
logger.debug("perf: find capabilities: avg: %0.2fs" % (sum(samples) / float(args.repeat) / float(args.number)))
|
||||
logger.debug("perf: find capabilities: max: %0.2fs" % (max(samples) / float(args.number)))
|
||||
|
||||
for (counter, count) in capa.perf.counters.most_common():
|
||||
logger.debug("perf: counter: {:}: {:,}".format(counter, count))
|
||||
|
||||
print(
|
||||
tabulate.tabulate(
|
||||
[
|
||||
(
|
||||
args.label,
|
||||
"{:,}".format(capa.perf.counters["evaluate.feature"]),
|
||||
# python documentation indicates that min(samples) should be preferred,
|
||||
# so lets put that first.
|
||||
#
|
||||
# https://docs.python.org/3/library/timeit.html#timeit.Timer.repeat
|
||||
"%0.2fs" % (min(samples) / float(args.number)),
|
||||
"%0.2fs" % (sum(samples) / float(args.repeat) / float(args.number)),
|
||||
"%0.2fs" % (max(samples) / float(args.number)),
|
||||
)
|
||||
],
|
||||
headers=["label", "count(evaluations)", "min(time)", "avg(time)", "max(time)"],
|
||||
tablefmt="github",
|
||||
)
|
||||
)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user