replace tqdm, termcolor, tabulate with rich (#2374)

* logging: use rich handler for logging

* tqdm: remove unneeded redirecting_print_to_tqdm function

* tqdm: introduce `CapaProgressBar` rich `Progress` bar

* tqdm: replace tqdm with rich Progress bar

* tqdm: remove tqdm dependency

* termcolor: replace termcolor and update `scripts/`

* tests: update `test_render.py` to use rich.console.Console

* termcolor: remove termcolor dependency

* capa.render.utils: add `write` & `writeln` methods to subclass `Console`

* update markup util functions to use fmt strings

* tests: update `test_render.py` to use `capa.render.utils.Console`

* replace kwarg `end=""` with `write` and `writeln` methods

* tabulate: replace tabulate with `rich.table`

* tabulate: remove `tabulate` and its dependency `wcwidth`

* logging: handle logging in `capa.main`

* logging: set up logging in `capa.main`

this commit sets up logging in `capa.main` and uses a shared
`log_console` in `capa.helpers` for logging purposes

* changelog: replace packages with rich

* remove entry from pyinstaller and unneeded progress.update call

* update requirements.txt

* scripts: use `capa.helpers.log_console` in `CapaProgressBar`

* logging: configure root logger to use `RichHandler`

* remove unused import `inspect`
This commit is contained in:
Fariss
2024-09-27 09:34:21 +02:00
committed by GitHub
parent 558bf0fbf2
commit 51a4eb46b8
16 changed files with 890 additions and 806 deletions

View File

@@ -1,8 +1,5 @@
[mypy]
[mypy-tqdm.*]
ignore_missing_imports = True
[mypy-ruamel.*]
ignore_missing_imports = True

View File

@@ -2,7 +2,6 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
import sys
import wcwidth
import capa.rules.cache
from pathlib import Path
@@ -29,13 +28,6 @@ a = Analysis(
("../../rules", "rules"),
("../../sigs", "sigs"),
("../../cache", "cache"),
# capa.render.default uses tabulate that depends on wcwidth.
# it seems wcwidth uses a json file `version.json`
# and this doesn't get picked up by pyinstaller automatically.
# so we manually embed the wcwidth resources here.
#
# ref: https://stackoverflow.com/a/62278462/87207
(Path(wcwidth.__file__).parent, "wcwidth"),
],
# when invoking pyinstaller from the project root,
# this gets run from the project root.
@@ -48,11 +40,6 @@ a = Analysis(
"tkinter",
"_tkinter",
"Tkinter",
# tqdm provides renderers for ipython,
# however, this drags in a lot of dependencies.
# since we don't spawn a notebook, we can safely remove these.
"IPython",
"ipywidgets",
# these are pulled in by networkx
# but we don't need to compute the strongly connected components.
"numpy",

File diff suppressed because it is too large Load Diff

View File

@@ -6,20 +6,16 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import sys
import logging
import itertools
import collections
from typing import Any, Tuple
import tqdm
from typing import Any, List, Tuple
import capa.perf
import capa.features.freeze as frz
import capa.render.result_document as rdoc
from capa.rules import Scope, RuleSet
from capa.engine import FeatureSet, MatchResults
from capa.helpers import redirecting_print_to_tqdm
from capa.capabilities.common import find_file_capabilities
from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle, DynamicFeatureExtractor
@@ -139,38 +135,30 @@ def find_dynamic_capabilities(
feature_counts = rdoc.DynamicFeatureCounts(file=0, processes=())
assert isinstance(extractor, DynamicFeatureExtractor)
with redirecting_print_to_tqdm(disable_progress):
with tqdm.contrib.logging.logging_redirect_tqdm():
pbar = tqdm.tqdm
if disable_progress:
# do not use tqdm to avoid unnecessary side effects when caller intends
# to disable progress completely
def pbar(s, *args, **kwargs):
return s
processes: List[ProcessHandle] = list(extractor.get_processes())
n_processes: int = len(processes)
elif not sys.stderr.isatty():
# don't display progress bar when stderr is redirected to a file
def pbar(s, *args, **kwargs):
return s
with capa.helpers.CapaProgressBar(
console=capa.helpers.log_console, transient=True, disable=disable_progress
) as pbar:
task = pbar.add_task("matching", total=n_processes, unit="processes")
for p in processes:
process_matches, thread_matches, call_matches, feature_count = find_process_capabilities(
ruleset, extractor, p
)
feature_counts.processes += (
rdoc.ProcessFeatureCount(address=frz.Address.from_capa(p.address), count=feature_count),
)
logger.debug("analyzed %s and extracted %d features", p.address, feature_count)
processes = list(extractor.get_processes())
for rule_name, res in process_matches.items():
all_process_matches[rule_name].extend(res)
for rule_name, res in thread_matches.items():
all_thread_matches[rule_name].extend(res)
for rule_name, res in call_matches.items():
all_call_matches[rule_name].extend(res)
pb = pbar(processes, desc="matching", unit=" processes", leave=False)
for p in pb:
process_matches, thread_matches, call_matches, feature_count = find_process_capabilities(
ruleset, extractor, p
)
feature_counts.processes += (
rdoc.ProcessFeatureCount(address=frz.Address.from_capa(p.address), count=feature_count),
)
logger.debug("analyzed %s and extracted %d features", p.address, feature_count)
for rule_name, res in process_matches.items():
all_process_matches[rule_name].extend(res)
for rule_name, res in thread_matches.items():
all_thread_matches[rule_name].extend(res)
for rule_name, res in call_matches.items():
all_call_matches[rule_name].extend(res)
pbar.advance(task)
# collection of features that captures the rule matches within process and thread scopes.
# mapping from feature (matched rule) to set of addresses at which it matched.

View File

@@ -6,21 +6,18 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import sys
import time
import logging
import itertools
import collections
from typing import Any, Tuple
import tqdm.contrib.logging
from typing import Any, List, Tuple
import capa.perf
import capa.helpers
import capa.features.freeze as frz
import capa.render.result_document as rdoc
from capa.rules import Scope, RuleSet
from capa.engine import FeatureSet, MatchResults
from capa.helpers import redirecting_print_to_tqdm
from capa.capabilities.common import find_file_capabilities
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, StaticFeatureExtractor
@@ -143,75 +140,58 @@ def find_static_capabilities(
library_functions: Tuple[rdoc.LibraryFunction, ...] = ()
assert isinstance(extractor, StaticFeatureExtractor)
with redirecting_print_to_tqdm(disable_progress):
with tqdm.contrib.logging.logging_redirect_tqdm():
pbar = tqdm.tqdm
if capa.helpers.is_runtime_ghidra():
# Ghidrathon interpreter cannot properly handle
# the TMonitor thread that is created via a monitor_interval
# > 0
pbar.monitor_interval = 0
if disable_progress:
# do not use tqdm to avoid unnecessary side effects when caller intends
# to disable progress completely
def pbar(s, *args, **kwargs):
return s
functions: List[FunctionHandle] = list(extractor.get_functions())
n_funcs: int = len(functions)
n_libs: int = 0
percentage: float = 0
elif not sys.stderr.isatty():
# don't display progress bar when stderr is redirected to a file
def pbar(s, *args, **kwargs):
return s
functions = list(extractor.get_functions())
n_funcs = len(functions)
pb = pbar(functions, desc="matching", unit=" functions", postfix="skipped 0 library functions", leave=False)
for f in pb:
t0 = time.time()
if extractor.is_library_function(f.address):
function_name = extractor.get_function_name(f.address)
logger.debug("skipping library function 0x%x (%s)", f.address, function_name)
library_functions += (
rdoc.LibraryFunction(address=frz.Address.from_capa(f.address), name=function_name),
)
n_libs = len(library_functions)
percentage = round(100 * (n_libs / n_funcs))
if isinstance(pb, tqdm.tqdm):
pb.set_postfix_str(f"skipped {n_libs} library functions ({percentage}%)")
continue
function_matches, bb_matches, insn_matches, feature_count = find_code_capabilities(
ruleset, extractor, f
with capa.helpers.CapaProgressBar(
console=capa.helpers.log_console, transient=True, disable=disable_progress
) as pbar:
task = pbar.add_task(
"matching", total=n_funcs, unit="functions", postfix=f"skipped {n_libs} library functions, {percentage}%"
)
for f in functions:
t0 = time.time()
if extractor.is_library_function(f.address):
function_name = extractor.get_function_name(f.address)
logger.debug("skipping library function 0x%x (%s)", f.address, function_name)
library_functions += (
rdoc.LibraryFunction(address=frz.Address.from_capa(f.address), name=function_name),
)
feature_counts.functions += (
rdoc.FunctionFeatureCount(address=frz.Address.from_capa(f.address), count=feature_count),
)
t1 = time.time()
n_libs = len(library_functions)
percentage = round(100 * (n_libs / n_funcs))
pbar.update(task, postfix=f"skipped {n_libs} library functions, {percentage}%")
pbar.advance(task)
continue
match_count = 0
for name, matches_ in itertools.chain(
function_matches.items(), bb_matches.items(), insn_matches.items()
):
# in practice, most matches are derived rules,
# like "check OS version/5bf4c7f39fd4492cbed0f6dc7d596d49"
# but when we log to the human, they really care about "real" rules.
if not ruleset.rules[name].is_subscope_rule():
match_count += len(matches_)
function_matches, bb_matches, insn_matches, feature_count = find_code_capabilities(ruleset, extractor, f)
feature_counts.functions += (
rdoc.FunctionFeatureCount(address=frz.Address.from_capa(f.address), count=feature_count),
)
t1 = time.time()
logger.debug(
"analyzed function 0x%x and extracted %d features, %d matches in %0.02fs",
f.address,
feature_count,
match_count,
t1 - t0,
)
match_count = 0
for name, matches_ in itertools.chain(function_matches.items(), bb_matches.items(), insn_matches.items()):
if not ruleset.rules[name].is_subscope_rule():
match_count += len(matches_)
for rule_name, res in function_matches.items():
all_function_matches[rule_name].extend(res)
for rule_name, res in bb_matches.items():
all_bb_matches[rule_name].extend(res)
for rule_name, res in insn_matches.items():
all_insn_matches[rule_name].extend(res)
logger.debug(
"analyzed function 0x%x and extracted %d features, %d matches in %0.02fs",
f.address,
feature_count,
match_count,
t1 - t0,
)
for rule_name, res in function_matches.items():
all_function_matches[rule_name].extend(res)
for rule_name, res in bb_matches.items():
all_bb_matches[rule_name].extend(res)
for rule_name, res in insn_matches.items():
all_insn_matches[rule_name].extend(res)
pbar.advance(task)
# collection of features that captures the rule matches within function, BB, and instruction scopes.
# mapping from feature (matched rule) to set of addresses at which it matched.

View File

@@ -10,7 +10,6 @@ import os
import sys
import gzip
import ctypes
import inspect
import logging
import tempfile
import contextlib
@@ -20,8 +19,21 @@ from pathlib import Path
from zipfile import ZipFile
from datetime import datetime
import tqdm
import msgspec.json
from rich.console import Console
from rich.progress import (
Task,
Text,
Progress,
BarColumn,
TextColumn,
SpinnerColumn,
ProgressColumn,
TimeElapsedColumn,
MofNCompleteColumn,
TaskProgressColumn,
TimeRemainingColumn,
)
from capa.exceptions import UnsupportedFormatError
from capa.features.common import (
@@ -51,6 +63,10 @@ EXTENSIONS_FREEZE = "frz"
logger = logging.getLogger("capa")
# shared console used to redirect logging to stderr
log_console: Console = Console(stderr=True)
def hex(n: int) -> str:
"""render the given number using upper case hex, like: 0x123ABC"""
if n < 0:
@@ -247,39 +263,6 @@ def get_format(sample: Path) -> str:
return FORMAT_UNKNOWN
@contextlib.contextmanager
def redirecting_print_to_tqdm(disable_progress):
"""
tqdm (progress bar) expects to have fairly tight control over console output.
so calls to `print()` will break the progress bar and make things look bad.
so, this context manager temporarily replaces the `print` implementation
with one that is compatible with tqdm.
via: https://stackoverflow.com/a/42424890/87207
"""
old_print = print # noqa: T202 [reserved word print used]
def new_print(*args, **kwargs):
# If tqdm.tqdm.write raises error, use builtin print
if disable_progress:
old_print(*args, **kwargs)
else:
try:
tqdm.tqdm.write(*args, **kwargs)
except Exception:
old_print(*args, **kwargs)
try:
# Globally replace print with new_print.
# Verified this works manually on Python 3.11:
# >>> import inspect
# >>> inspect.builtins
# <module 'builtins' (built-in)>
inspect.builtins.print = new_print # type: ignore
yield
finally:
inspect.builtins.print = old_print # type: ignore
def log_unsupported_format_error():
logger.error("-" * 80)
logger.error(" Input file does not appear to be a supported file.")
@@ -433,3 +416,47 @@ def is_cache_newer_than_rule_code(cache_dir: Path) -> bool:
return False
return True
class RateColumn(ProgressColumn):
"""Renders speed column in progress bar."""
def render(self, task: "Task") -> Text:
speed = f"{task.speed:>.1f}" if task.speed else "00.0"
unit = task.fields.get("unit", "it")
return Text.from_markup(f"[progress.data.speed]{speed} {unit}/s")
class PostfixColumn(ProgressColumn):
"""Renders a postfix column in progress bar."""
def render(self, task: "Task") -> Text:
return Text(task.fields.get("postfix", ""))
class MofNCompleteColumnWithUnit(MofNCompleteColumn):
"""Renders completed/total count column with a unit."""
def render(self, task: "Task") -> Text:
ret = super().render(task)
unit = task.fields.get("unit")
return ret.append(f" {unit}") if unit else ret
class CapaProgressBar(Progress):
@classmethod
def get_default_columns(cls):
return (
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
TaskProgressColumn(),
BarColumn(),
MofNCompleteColumnWithUnit(),
"",
TimeElapsedColumn(),
"<",
TimeRemainingColumn(),
"",
RateColumn(),
PostfixColumn(),
)

View File

@@ -22,6 +22,7 @@ from pathlib import Path
import colorama
from pefile import PEFormatError
from rich.logging import RichHandler
from elftools.common.exceptions import ELFError
import capa.perf
@@ -405,15 +406,23 @@ def handle_common_args(args):
ShouldExitError: if the program is invoked incorrectly and should exit.
"""
if args.quiet:
logging.basicConfig(level=logging.WARNING)
logging.getLogger().setLevel(logging.WARNING)
elif args.debug:
logging.basicConfig(level=logging.DEBUG)
logging.getLogger().setLevel(logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
logging.getLogger().setLevel(logging.INFO)
# use [/] after the logger name to reset any styling,
# and prevent the color from carrying over to the message
logformat = "[dim]%(name)s[/]: %(message)s"
# set markup=True to allow the use of Rich's markup syntax in log messages
rich_handler = RichHandler(markup=True, show_time=False, show_path=True, console=capa.helpers.log_console)
rich_handler.setFormatter(logging.Formatter(logformat))
# use RichHandler for root logger
logging.getLogger().addHandler(rich_handler)
# disable vivisect-related logging, it's verbose and not relevant for capa users
set_vivisect_log_level(logging.CRITICAL)

View File

@@ -9,28 +9,29 @@
import io
from typing import Dict, List, Tuple, Union, Iterator, Optional
import termcolor
import rich.console
from rich.progress import Text
import capa.render.result_document as rd
def bold(s: str) -> str:
def bold(s: str) -> Text:
"""draw attention to the given string"""
return termcolor.colored(s, "cyan")
return Text.from_markup(f"[cyan]{s}")
def bold2(s: str) -> str:
def bold2(s: str) -> Text:
"""draw attention to the given string, within a `bold` section"""
return termcolor.colored(s, "green")
return Text.from_markup(f"[green]{s}")
def mute(s: str) -> str:
def mute(s: str) -> Text:
"""draw attention away from the given string"""
return termcolor.colored(s, "dark_grey")
return Text.from_markup(f"[dim]{s}")
def warn(s: str) -> str:
return termcolor.colored(s, "yellow")
def warn(s: str) -> Text:
return Text.from_markup(f"[yellow]{s}")
def format_parts_id(data: Union[rd.AttackSpec, rd.MBCSpec]):
@@ -85,3 +86,17 @@ class StringIO(io.StringIO):
def writeln(self, s):
self.write(s)
self.write("\n")
class Console(rich.console.Console):
def writeln(self, *args, **kwargs) -> None:
"""
prints the text with a new line at the end.
"""
return self.print(*args, **kwargs)
def write(self, *args, **kwargs) -> None:
"""
prints the text without a new line at the end.
"""
return self.print(*args, **kwargs, end="")

View File

@@ -25,7 +25,8 @@ See the License for the specific language governing permissions and limitations
from typing import cast
import tabulate
from rich.text import Text
from rich.table import Table
import capa.rules
import capa.helpers
@@ -34,6 +35,7 @@ import capa.features.freeze as frz
import capa.render.result_document as rd
from capa.rules import RuleSet
from capa.engine import MatchResults
from capa.render.utils import Console
def format_address(address: frz.Address) -> str:
@@ -140,7 +142,7 @@ def render_call(layout: rd.DynamicLayout, addr: frz.Address) -> str:
)
def render_static_meta(ostream, meta: rd.StaticMetadata):
def render_static_meta(console: Console, meta: rd.StaticMetadata):
"""
like:
@@ -161,12 +163,16 @@ def render_static_meta(ostream, meta: rd.StaticMetadata):
total feature count 1918
"""
grid = Table.grid(padding=(0, 2))
grid.add_column(style="dim")
grid.add_column()
rows = [
("md5", meta.sample.md5),
("sha1", meta.sample.sha1),
("sha256", meta.sample.sha256),
("path", meta.sample.path),
("timestamp", meta.timestamp),
("timestamp", str(meta.timestamp)),
("capa version", meta.version),
("os", meta.analysis.os),
("format", meta.analysis.format),
@@ -175,18 +181,21 @@ def render_static_meta(ostream, meta: rd.StaticMetadata):
("extractor", meta.analysis.extractor),
("base address", format_address(meta.analysis.base_address)),
("rules", "\n".join(meta.analysis.rules)),
("function count", len(meta.analysis.feature_counts.functions)),
("library function count", len(meta.analysis.library_functions)),
("function count", str(len(meta.analysis.feature_counts.functions))),
("library function count", str(len(meta.analysis.library_functions))),
(
"total feature count",
meta.analysis.feature_counts.file + sum(f.count for f in meta.analysis.feature_counts.functions),
str(meta.analysis.feature_counts.file + sum(f.count for f in meta.analysis.feature_counts.functions)),
),
]
ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
for row in rows:
grid.add_row(*row)
console.print(grid)
def render_dynamic_meta(ostream, meta: rd.DynamicMetadata):
def render_dynamic_meta(console: Console, meta: rd.DynamicMetadata):
"""
like:
@@ -205,12 +214,16 @@ def render_dynamic_meta(ostream, meta: rd.DynamicMetadata):
total feature count 1918
"""
table = Table.grid(padding=(0, 2))
table.add_column(style="dim")
table.add_column()
rows = [
("md5", meta.sample.md5),
("sha1", meta.sample.sha1),
("sha256", meta.sample.sha256),
("path", meta.sample.path),
("timestamp", meta.timestamp),
("timestamp", str(meta.timestamp)),
("capa version", meta.version),
("os", meta.analysis.os),
("format", meta.analysis.format),
@@ -218,26 +231,29 @@ def render_dynamic_meta(ostream, meta: rd.DynamicMetadata):
("analysis", meta.flavor.value),
("extractor", meta.analysis.extractor),
("rules", "\n".join(meta.analysis.rules)),
("process count", len(meta.analysis.feature_counts.processes)),
("process count", str(len(meta.analysis.feature_counts.processes))),
(
"total feature count",
meta.analysis.feature_counts.file + sum(p.count for p in meta.analysis.feature_counts.processes),
str(meta.analysis.feature_counts.file + sum(p.count for p in meta.analysis.feature_counts.processes)),
),
]
ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
for row in rows:
table.add_row(*row)
console.print(table)
def render_meta(osstream, doc: rd.ResultDocument):
def render_meta(console: Console, doc: rd.ResultDocument):
if doc.meta.flavor == rd.Flavor.STATIC:
render_static_meta(osstream, cast(rd.StaticMetadata, doc.meta))
render_static_meta(console, cast(rd.StaticMetadata, doc.meta))
elif doc.meta.flavor == rd.Flavor.DYNAMIC:
render_dynamic_meta(osstream, cast(rd.DynamicMetadata, doc.meta))
render_dynamic_meta(console, cast(rd.DynamicMetadata, doc.meta))
else:
raise ValueError("invalid meta analysis")
def render_rules(ostream, doc: rd.ResultDocument):
def render_rules(console: Console, doc: rd.ResultDocument):
"""
like:
@@ -254,11 +270,15 @@ def render_rules(ostream, doc: rd.ResultDocument):
if count == 1:
capability = rutils.bold(rule.meta.name)
else:
capability = f"{rutils.bold(rule.meta.name)} ({count} matches)"
capability = Text.assemble(rutils.bold(rule.meta.name), f" ({count} matches)")
ostream.writeln(capability)
console.print(capability)
had_match = True
table = Table.grid(padding=(0, 2))
table.add_column(style="dim")
table.add_column()
rows = []
ns = rule.meta.namespace
@@ -310,23 +330,26 @@ def render_rules(ostream, doc: rd.ResultDocument):
rows.append(("matches", "\n".join(lines)))
ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
ostream.write("\n")
for row in rows:
table.add_row(*row)
console.print(table)
console.print()
if not had_match:
ostream.writeln(rutils.bold("no capabilities found"))
console.print(rutils.bold("no capabilities found"))
def render_verbose(doc: rd.ResultDocument):
ostream = rutils.StringIO()
console = Console(highlight=False)
render_meta(ostream, doc)
ostream.write("\n")
with console.capture() as capture:
render_meta(console, doc)
console.print()
render_rules(console, doc)
console.print()
render_rules(ostream, doc)
ostream.write("\n")
return ostream.getvalue()
return capture.get()
def render(meta, rules: RuleSet, capabilities: MatchResults) -> str:

View File

@@ -9,7 +9,8 @@ import logging
import textwrap
from typing import Dict, Iterable, Optional
import tabulate
from rich.text import Text
from rich.table import Table
import capa.rules
import capa.helpers
@@ -22,6 +23,7 @@ import capa.render.result_document as rd
import capa.features.freeze.features as frzf
from capa.rules import RuleSet
from capa.engine import MatchResults
from capa.render.utils import Console
logger = logging.getLogger(__name__)
@@ -45,7 +47,7 @@ def hanging_indent(s: str, indent: int) -> str:
return textwrap.indent(s, prefix=prefix)[len(prefix) :]
def render_locations(ostream, layout: rd.Layout, locations: Iterable[frz.Address], indent: int):
def render_locations(console: Console, layout: rd.Layout, locations: Iterable[frz.Address], indent: int):
import capa.render.verbose as v
# it's possible to have an empty locations array here,
@@ -56,7 +58,7 @@ def render_locations(ostream, layout: rd.Layout, locations: Iterable[frz.Address
if len(locations) == 0:
return
ostream.write(" @ ")
console.write(" @ ")
location0 = locations[0]
if len(locations) == 1:
@@ -64,58 +66,58 @@ def render_locations(ostream, layout: rd.Layout, locations: Iterable[frz.Address
if location.type == frz.AddressType.CALL:
assert isinstance(layout, rd.DynamicLayout)
ostream.write(hanging_indent(v.render_call(layout, location), indent + 1))
console.write(hanging_indent(v.render_call(layout, location), indent + 1))
else:
ostream.write(v.format_address(locations[0]))
console.write(v.format_address(locations[0]))
elif location0.type == frz.AddressType.CALL and len(locations) > 1:
location = locations[0]
assert isinstance(layout, rd.DynamicLayout)
s = f"{v.render_call(layout, location)}\nand {(len(locations) - 1)} more..."
ostream.write(hanging_indent(s, indent + 1))
console.write(hanging_indent(s, indent + 1))
elif len(locations) > 4:
# don't display too many locations, because it becomes very noisy.
# probably only the first handful of locations will be useful for inspection.
ostream.write(", ".join(map(v.format_address, locations[0:4])))
ostream.write(f", and {(len(locations) - 4)} more...")
console.write(", ".join(map(v.format_address, locations[0:4])))
console.write(f", and {(len(locations) - 4)} more...")
elif len(locations) > 1:
ostream.write(", ".join(map(v.format_address, locations)))
console.write(", ".join(map(v.format_address, locations)))
else:
raise RuntimeError("unreachable")
def render_statement(ostream, layout: rd.Layout, match: rd.Match, statement: rd.Statement, indent: int):
ostream.write(" " * indent)
def render_statement(console: Console, layout: rd.Layout, match: rd.Match, statement: rd.Statement, indent: int):
console.write(" " * indent)
if isinstance(statement, rd.SubscopeStatement):
# emit `basic block:`
# rather than `subscope:`
ostream.write(statement.scope)
console.write(statement.scope)
ostream.write(":")
console.write(":")
if statement.description:
ostream.write(f" = {statement.description}")
ostream.writeln("")
console.write(f" = {statement.description}")
console.writeln()
elif isinstance(statement, (rd.CompoundStatement)):
# emit `and:` `or:` `optional:` `not:`
ostream.write(statement.type)
console.write(statement.type)
ostream.write(":")
console.write(":")
if statement.description:
ostream.write(f" = {statement.description}")
ostream.writeln("")
console.write(f" = {statement.description}")
console.writeln()
elif isinstance(statement, rd.SomeStatement):
ostream.write(f"{statement.count} or more:")
console.write(f"{statement.count} or more:")
if statement.description:
ostream.write(f" = {statement.description}")
ostream.writeln("")
console.write(f" = {statement.description}")
console.writeln()
elif isinstance(statement, rd.RangeStatement):
# `range` is a weird node, its almost a hybrid of statement+feature.
@@ -133,25 +135,25 @@ def render_statement(ostream, layout: rd.Layout, match: rd.Match, statement: rd.
value = rutils.bold2(value)
if child.description:
ostream.write(f"count({child.type}({value} = {child.description})): ")
console.write(f"count({child.type}({value} = {child.description})): ")
else:
ostream.write(f"count({child.type}({value})): ")
console.write(f"count({child.type}({value})): ")
else:
ostream.write(f"count({child.type}): ")
console.write(f"count({child.type}): ")
if statement.max == statement.min:
ostream.write(f"{statement.min}")
console.write(f"{statement.min}")
elif statement.min == 0:
ostream.write(f"{statement.max} or fewer")
console.write(f"{statement.max} or fewer")
elif statement.max == (1 << 64 - 1):
ostream.write(f"{statement.min} or more")
console.write(f"{statement.min} or more")
else:
ostream.write(f"between {statement.min} and {statement.max}")
console.write(f"between {statement.min} and {statement.max}")
if statement.description:
ostream.write(f" = {statement.description}")
render_locations(ostream, layout, match.locations, indent)
ostream.writeln("")
console.write(f" = {statement.description}")
render_locations(console, layout, match.locations, indent)
console.writeln()
else:
raise RuntimeError("unexpected match statement type: " + str(statement))
@@ -162,9 +164,9 @@ def render_string_value(s: str) -> str:
def render_feature(
ostream, layout: rd.Layout, rule: rd.RuleMatches, match: rd.Match, feature: frzf.Feature, indent: int
console: Console, layout: rd.Layout, rule: rd.RuleMatches, match: rd.Match, feature: frzf.Feature, indent: int
):
ostream.write(" " * indent)
console.write(" " * indent)
key = feature.type
value: Optional[str]
@@ -205,14 +207,14 @@ def render_feature(
elif isinstance(feature, frzf.OperandOffsetFeature):
key = f"operand[{feature.index}].offset"
ostream.write(f"{key}: ")
console.write(f"{key}: ")
if value:
ostream.write(rutils.bold2(value))
console.write(rutils.bold2(value))
if feature.description:
ostream.write(capa.rules.DESCRIPTION_SEPARATOR)
ostream.write(feature.description)
console.write(capa.rules.DESCRIPTION_SEPARATOR)
console.write(feature.description)
if isinstance(feature, (frzf.OSFeature, frzf.ArchFeature, frzf.FormatFeature)):
# don't show the location of these global features
@@ -224,35 +226,32 @@ def render_feature(
elif isinstance(feature, (frzf.OSFeature, frzf.ArchFeature, frzf.FormatFeature)):
pass
else:
render_locations(ostream, layout, match.locations, indent)
ostream.write("\n")
render_locations(console, layout, match.locations, indent)
console.writeln()
else:
# like:
# regex: /blah/ = SOME_CONSTANT
# - "foo blah baz" @ 0x401000
# - "aaa blah bbb" @ 0x402000, 0x403400
ostream.write(key)
ostream.write(": ")
ostream.write(value)
ostream.write("\n")
console.writeln(f"{key}: {value}")
for capture, locations in sorted(match.captures.items()):
ostream.write(" " * (indent + 1))
ostream.write("- ")
ostream.write(rutils.bold2(render_string_value(capture)))
console.write(" " * (indent + 1))
console.write("- ")
console.write(rutils.bold2(render_string_value(capture)))
if isinstance(layout, rd.DynamicLayout) and rule.meta.scopes.dynamic == capa.rules.Scope.CALL:
# like above, don't re-render calls when in call scope.
pass
else:
render_locations(ostream, layout, locations, indent=indent)
ostream.write("\n")
render_locations(console, layout, locations, indent=indent)
console.writeln()
def render_node(ostream, layout: rd.Layout, rule: rd.RuleMatches, match: rd.Match, node: rd.Node, indent: int):
def render_node(console: Console, layout: rd.Layout, rule: rd.RuleMatches, match: rd.Match, node: rd.Node, indent: int):
if isinstance(node, rd.StatementNode):
render_statement(ostream, layout, match, node.statement, indent=indent)
render_statement(console, layout, match, node.statement, indent=indent)
elif isinstance(node, rd.FeatureNode):
render_feature(ostream, layout, rule, match, node.feature, indent=indent)
render_feature(console, layout, rule, match, node.feature, indent=indent)
else:
raise RuntimeError("unexpected node type: " + str(node))
@@ -265,7 +264,9 @@ MODE_SUCCESS = "success"
MODE_FAILURE = "failure"
def render_match(ostream, layout: rd.Layout, rule: rd.RuleMatches, match: rd.Match, indent=0, mode=MODE_SUCCESS):
def render_match(
console: Console, layout: rd.Layout, rule: rd.RuleMatches, match: rd.Match, indent=0, mode=MODE_SUCCESS
):
child_mode = mode
if mode == MODE_SUCCESS:
# display only nodes that evaluated successfully.
@@ -297,13 +298,13 @@ def render_match(ostream, layout: rd.Layout, rule: rd.RuleMatches, match: rd.Mat
else:
raise RuntimeError("unexpected mode: " + mode)
render_node(ostream, layout, rule, match, match.node, indent=indent)
render_node(console, layout, rule, match, match.node, indent=indent)
for child in match.children:
render_match(ostream, layout, rule, child, indent=indent + 1, mode=child_mode)
render_match(console, layout, rule, child, indent=indent + 1, mode=child_mode)
def render_rules(ostream, doc: rd.ResultDocument):
def render_rules(console: Console, doc: rd.ResultDocument):
"""
like:
@@ -350,13 +351,13 @@ def render_rules(ostream, doc: rd.ResultDocument):
if count == 1:
if rule.meta.lib:
lib_info = " (library rule)"
capability = f"{rutils.bold(rule.meta.name)}{lib_info}"
capability = Text.assemble(rutils.bold(rule.meta.name), f"{lib_info}")
else:
if rule.meta.lib:
lib_info = ", only showing first match of library rule"
capability = f"{rutils.bold(rule.meta.name)} ({count} matches{lib_info})"
capability = Text.assemble(rutils.bold(rule.meta.name), f" ({count} matches{lib_info})")
ostream.writeln(capability)
console.writeln(capability)
had_match = True
rows = []
@@ -402,7 +403,14 @@ def render_rules(ostream, doc: rd.ResultDocument):
if rule.meta.description:
rows.append(("description", rule.meta.description))
ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
grid = Table.grid(padding=(0, 2))
grid.add_column(style="dim")
grid.add_column()
for row in rows:
grid.add_row(*row)
console.writeln(grid)
if capa.rules.Scope.FILE in rule.meta.scopes:
matches = doc.rules[rule.meta.name].matches
@@ -413,61 +421,58 @@ def render_rules(ostream, doc: rd.ResultDocument):
# so, lets be explicit about our assumptions and raise an exception if they fail.
raise RuntimeError(f"unexpected file scope match count: {len(matches)}")
_, first_match = matches[0]
render_match(ostream, doc.meta.analysis.layout, rule, first_match, indent=0)
render_match(console, doc.meta.analysis.layout, rule, first_match, indent=0)
else:
for location, match in sorted(doc.rules[rule.meta.name].matches):
if doc.meta.flavor == rd.Flavor.STATIC:
assert rule.meta.scopes.static is not None
ostream.write(rule.meta.scopes.static.value)
ostream.write(" @ ")
ostream.write(capa.render.verbose.format_address(location))
console.write(rule.meta.scopes.static.value + " @ ")
console.write(capa.render.verbose.format_address(location))
if rule.meta.scopes.static == capa.rules.Scope.BASIC_BLOCK:
func = frz.Address.from_capa(functions_by_bb[location.to_capa()])
ostream.write(f" in function {capa.render.verbose.format_address(func)}")
console.write(f" in function {capa.render.verbose.format_address(func)}")
elif doc.meta.flavor == rd.Flavor.DYNAMIC:
assert rule.meta.scopes.dynamic is not None
assert isinstance(doc.meta.analysis.layout, rd.DynamicLayout)
ostream.write(rule.meta.scopes.dynamic.value)
ostream.write(" @ ")
console.write(rule.meta.scopes.dynamic.value + " @ ")
if rule.meta.scopes.dynamic == capa.rules.Scope.PROCESS:
ostream.write(v.render_process(doc.meta.analysis.layout, location))
console.write(v.render_process(doc.meta.analysis.layout, location))
elif rule.meta.scopes.dynamic == capa.rules.Scope.THREAD:
ostream.write(v.render_thread(doc.meta.analysis.layout, location))
console.write(v.render_thread(doc.meta.analysis.layout, location))
elif rule.meta.scopes.dynamic == capa.rules.Scope.CALL:
ostream.write(hanging_indent(v.render_call(doc.meta.analysis.layout, location), indent=1))
console.write(hanging_indent(v.render_call(doc.meta.analysis.layout, location), indent=1))
else:
capa.helpers.assert_never(rule.meta.scopes.dynamic)
else:
capa.helpers.assert_never(doc.meta.flavor)
ostream.write("\n")
render_match(ostream, doc.meta.analysis.layout, rule, match, indent=1)
console.writeln()
render_match(console, doc.meta.analysis.layout, rule, match, indent=1)
if rule.meta.lib:
# only show first match
break
ostream.write("\n")
console.writeln()
if not had_match:
ostream.writeln(rutils.bold("no capabilities found"))
console.writeln(rutils.bold("no capabilities found"))
def render_vverbose(doc: rd.ResultDocument):
ostream = rutils.StringIO()
console = Console(highlight=False)
capa.render.verbose.render_meta(ostream, doc)
ostream.write("\n")
with console.capture() as capture:
capa.render.verbose.render_meta(console, doc)
console.writeln()
render_rules(console, doc)
console.writeln()
render_rules(ostream, doc)
ostream.write("\n")
return ostream.getvalue()
return capture.get()
def render(meta, rules: RuleSet, capabilities: MatchResults) -> str:

View File

@@ -65,12 +65,8 @@ dependencies = [
# or minor otherwise).
# As specific constraints are identified, please provide
# comments and context.
"tqdm>=4",
"pyyaml>=6",
"tabulate>=0.9",
"colorama>=0.4",
"termcolor>=2",
"wcwidth>=0.2",
"ida-settings>=2",
"ruamel.yaml>=0.18",
"pefile>=2023.2.7",
@@ -146,8 +142,6 @@ dev = [
"types-backports==0.1.3",
"types-colorama==0.4.15.11",
"types-PyYAML==6.0.8",
"types-tabulate==0.9.0.20240106",
"types-termcolor==1.1.4",
"types-psutil==6.0.0.20240901",
"types_requests==2.32.0.20240712",
"types-protobuf==5.27.0.20240920",
@@ -236,10 +230,7 @@ DEP002 = [
"types-protobuf",
"types-psutil",
"types-PyYAML",
"types-tabulate",
"types-termcolor",
"types_requests",
"wcwidth"
]
# dependencies imported but missing from definitions

View File

@@ -39,10 +39,6 @@ ruamel-yaml-clib==0.2.8
setuptools==75.1.0
six==1.16.0
sortedcontainers==2.4.0
tabulate==0.9.0
termcolor==2.4.0
tqdm==4.66.5
viv-utils==0.7.11
vivisect==1.2.1
wcwidth==0.2.13
msgspec==0.18.6

View File

@@ -31,11 +31,9 @@ from typing import Set, Dict, List
from pathlib import Path
from dataclasses import field, dataclass
import tqdm
import pydantic
import termcolor
import ruamel.yaml
import tqdm.contrib.logging
from rich import print
import capa.main
import capa.rules
@@ -51,18 +49,6 @@ from capa.render.result_document import RuleMetadata
logger = logging.getLogger("lint")
def red(s):
return termcolor.colored(s, "red")
def orange(s):
return termcolor.colored(s, "yellow")
def green(s):
return termcolor.colored(s, "green")
@dataclass
class Context:
"""
@@ -80,8 +66,8 @@ class Context:
class Lint:
WARN = orange("WARN")
FAIL = red("FAIL")
WARN = "[yellow]WARN[/yellow]"
FAIL = "[red]FAIL[/red]"
name = "lint"
level = FAIL
@@ -896,7 +882,7 @@ def lint_rule(ctx: Context, rule: Rule):
if (not lints_failed) and (not lints_warned) and has_examples:
print("")
print(f'{" (nursery) " if is_nursery_rule(rule) else ""} {rule.name}')
print(f" {Lint.WARN}: {green('no lint failures')}: Graduate the rule")
print(f" {Lint.WARN}: '[green]no lint failures[/green]': Graduate the rule")
print("")
else:
lints_failed = len(tuple(filter(lambda v: v.level == Lint.FAIL, violations)))
@@ -921,12 +907,15 @@ def lint(ctx: Context):
ret = {}
source_rules = [rule for rule in ctx.rules.rules.values() if not rule.is_subscope_rule()]
with tqdm.contrib.logging.tqdm_logging_redirect(source_rules, unit="rule", leave=False) as pbar:
with capa.helpers.redirecting_print_to_tqdm(False):
for rule in pbar:
name = rule.name
pbar.set_description(width(f"linting rule: {name}", 48))
ret[name] = lint_rule(ctx, rule)
n_rules: int = len(source_rules)
with capa.helpers.CapaProgressBar(transient=True, console=capa.helpers.log_console) as pbar:
task = pbar.add_task(description="linting", total=n_rules, unit="rule")
for rule in source_rules:
name = rule.name
pbar.update(task, description=width(f"linting rule: {name}", 48))
ret[name] = lint_rule(ctx, rule)
pbar.advance(task)
return ret
@@ -1020,18 +1009,18 @@ def main(argv=None):
logger.debug("lints ran for ~ %02d:%02dm", min, sec)
if warned_rules:
print(orange("rules with WARN:"))
print("[yellow]rules with WARN:[/yellow]")
for warned_rule in sorted(warned_rules):
print(" - " + warned_rule)
print()
if failed_rules:
print(red("rules with FAIL:"))
print("[red]rules with FAIL:[/red]")
for failed_rule in sorted(failed_rules):
print(" - " + failed_rule)
return 1
else:
logger.info(green("no lints failed, nice!"))
logger.info("[green]no lints failed, nice![/green]")
return 0

View File

@@ -42,9 +42,10 @@ import logging
import argparse
import subprocess
import tqdm
import humanize
import tabulate
from rich import box
from rich.table import Table
from rich.console import Console
import capa.main
import capa.perf
@@ -92,51 +93,61 @@ def main(argv=None):
except capa.main.ShouldExitError as e:
return e.status_code
with tqdm.tqdm(total=args.number * args.repeat, leave=False) as pbar:
with capa.helpers.CapaProgressBar(console=capa.helpers.log_console) as progress:
total_iterations = args.number * args.repeat
task = progress.add_task("profiling", total=total_iterations)
def do_iteration():
capa.perf.reset()
capa.capabilities.common.find_capabilities(rules, extractor, disable_progress=True)
pbar.update(1)
progress.advance(task)
samples = timeit.repeat(do_iteration, number=args.number, repeat=args.repeat)
logger.debug("perf: find capabilities: min: %0.2fs", (min(samples) / float(args.number)))
logger.debug("perf: find capabilities: avg: %0.2fs", (sum(samples) / float(args.repeat) / float(args.number)))
logger.debug(
"perf: find capabilities: avg: %0.2fs",
(sum(samples) / float(args.repeat) / float(args.number)),
)
logger.debug("perf: find capabilities: max: %0.2fs", (max(samples) / float(args.number)))
for counter, count in capa.perf.counters.most_common():
logger.debug("perf: counter: %s: %s", counter, count)
print(
tabulate.tabulate(
[(counter, humanize.intcomma(count)) for counter, count in capa.perf.counters.most_common()],
headers=["feature class", "evaluation count"],
tablefmt="github",
)
)
print()
console = Console()
print(
tabulate.tabulate(
[
(
args.label,
"{:,}".format(capa.perf.counters["evaluate.feature"]),
# python documentation indicates that min(samples) should be preferred,
# so lets put that first.
#
# https://docs.python.org/3/library/timeit.html#timeit.Timer.repeat
f"{(min(samples) / float(args.number)):.2f}s",
f"{(sum(samples) / float(args.repeat) / float(args.number)):.2f}s",
f"{(max(samples) / float(args.number)):.2f}s",
)
],
headers=["label", "count(evaluations)", "min(time)", "avg(time)", "max(time)"],
tablefmt="github",
)
table1 = Table(box=box.MARKDOWN)
table1.add_column("feature class")
table1.add_column("evaluation count")
for counter, count in capa.perf.counters.most_common():
table1.add_row(counter, humanize.intcomma(count))
console.print(table1)
console.print()
table2 = Table(box=box.MARKDOWN)
table2.add_column("label")
table2.add_column("count(evaluations)", style="magenta")
table2.add_column("min(time)", style="green")
table2.add_column("avg(time)", style="yellow")
table2.add_column("max(time)", style="red")
table2.add_row(
args.label,
# python documentation indicates that min(samples) should be preferred,
# so lets put that first.
#
# https://docs.python.org/3/library/timeit.html#timeit.Timer.repeat
"{:,}".format(capa.perf.counters["evaluate.feature"]),
f"{(min(samples) / float(args.number)):.2f}s",
f"{(sum(samples) / float(args.repeat) / float(args.number)):.2f}s",
f"{(max(samples) / float(args.number)):.2f}s",
)
console.print(table2)
return 0

View File

@@ -12,11 +12,12 @@ import sys
import typing
import logging
import argparse
from typing import Set, Tuple
from typing import Set, List, Tuple
from collections import Counter
import tabulate
from termcolor import colored
from rich import print
from rich.text import Text
from rich.table import Table
import capa.main
import capa.rules
@@ -77,23 +78,30 @@ def get_file_features(
return feature_map
def get_colored(s: str):
def get_colored(s: str) -> Text:
if "(" in s and ")" in s:
s_split = s.split("(", 1)
s_color = colored(s_split[1][:-1], "cyan")
return f"{s_split[0]}({s_color})"
return Text.assemble(s_split[0], "(", (s_split[1][:-1], "cyan"), ")")
else:
return colored(s, "cyan")
return Text(s, style="cyan")
def print_unused_features(feature_map: typing.Counter[Feature], rules_feature_set: Set[Feature]):
unused_features = []
unused_features: List[Tuple[str, Text]] = []
for feature, count in reversed(feature_map.most_common()):
if feature in rules_feature_set:
continue
unused_features.append((str(count), get_colored(str(feature))))
table = Table(title="Unused Features", box=None)
table.add_column("Count", style="dim")
table.add_column("Feature")
for count_str, feature_text in unused_features:
table.add_row(count_str, feature_text)
print("\n")
print(tabulate.tabulate(unused_features, headers=["Count", "Feature"], tablefmt="plain"))
print(table)
print("\n")

View File

@@ -10,7 +10,6 @@ import textwrap
from unittest.mock import Mock
import fixtures
import rich.console
import capa.rules
import capa.render.utils
@@ -24,6 +23,7 @@ import capa.features.basicblock
import capa.render.result_document
import capa.render.result_document as rd
import capa.features.freeze.features
from capa.render.utils import Console
def test_render_number():
@@ -154,7 +154,7 @@ def test_render_meta_maec():
# capture the output of render_maec
f = io.StringIO()
console = rich.console.Console(file=f)
console = Console(file=f)
capa.render.default.render_maec(mock_rd, console)
output = f.getvalue()
@@ -198,7 +198,7 @@ def test_render_meta_maec():
],
)
def test_render_vverbose_feature(feature, expected):
ostream = capa.render.utils.StringIO()
console = Console(highlight=False)
addr = capa.features.freeze.Address.from_capa(capa.features.address.AbsoluteVirtualAddress(0x401000))
feature = capa.features.freeze.features.feature_from_capa(feature)
@@ -240,6 +240,8 @@ def test_render_vverbose_feature(feature, expected):
matches=(),
)
capa.render.vverbose.render_feature(ostream, layout, rm, matches, feature, indent=0)
with console.capture() as capture:
capa.render.vverbose.render_feature(console, layout, rm, matches, feature, indent=0)
assert ostream.getvalue().strip() == expected
output = capture.get().strip()
assert output == expected