Add redirect print to capa main

This commit is contained in:
Aayush Goel
2023-05-17 23:57:52 +05:30
parent ad611c2058
commit 275386806d
4 changed files with 69 additions and 62 deletions

View File

@@ -22,6 +22,7 @@
- improve ELF strtab and needed parsing @mr-tz - improve ELF strtab and needed parsing @mr-tz
- better handle exceptional cases when parsing ELF files [#1458](https://github.com/mandiant/capa/issues/1458) [@Aayush-Goel-04](https://github.com/aayush-goel-04) - better handle exceptional cases when parsing ELF files [#1458](https://github.com/mandiant/capa/issues/1458) [@Aayush-Goel-04](https://github.com/aayush-goel-04)
- Improved testing coverage for Binary Ninja Backend [#1446](https://github.com/mandiant/capa/issues/1446) [@Aayush-Goel-04](https://github.com/aayush-goel-04) - Improved testing coverage for Binary Ninja Backend [#1446](https://github.com/mandiant/capa/issues/1446) [@Aayush-Goel-04](https://github.com/aayush-goel-04)
- Add redirect print to tqdm for capa main [#749](https://github.com/mandiant/capa/issues/749) [@Aayush-Goel-04](https://github.com/aayush-goel-04)
- extractor: fix binja installation path detection does not work with Python 3.11 - extractor: fix binja installation path detection does not work with Python 3.11
### capa explorer IDA Pro plugin ### capa explorer IDA Pro plugin

View File

@@ -6,9 +6,13 @@
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License. # See the License for the specific language governing permissions and limitations under the License.
import os import os
import inspect
import logging import logging
import contextlib
from typing import NoReturn from typing import NoReturn
import tqdm
from capa.exceptions import UnsupportedFormatError from capa.exceptions import UnsupportedFormatError
from capa.features.common import FORMAT_PE, FORMAT_SC32, FORMAT_SC64, FORMAT_DOTNET, FORMAT_UNKNOWN, Format from capa.features.common import FORMAT_PE, FORMAT_SC32, FORMAT_SC64, FORMAT_DOTNET, FORMAT_UNKNOWN, Format
@@ -85,6 +89,39 @@ def get_format(sample: str) -> str:
return FORMAT_UNKNOWN return FORMAT_UNKNOWN
@contextlib.contextmanager
def redirecting_print_to_tqdm(disable_progress):
"""
tqdm (progress bar) expects to have fairly tight control over console output.
so calls to `print()` will break the progress bar and make things look bad.
so, this context manager temporarily replaces the `print` implementation
with one that is compatible with tqdm.
via: https://stackoverflow.com/a/42424890/87207
"""
old_print = print
def new_print(*args, **kwargs):
# If tqdm.tqdm.write raises error, use builtin print
if disable_progress:
old_print(*args, **kwargs)
else:
try:
tqdm.tqdm.write(*args, **kwargs)
except:
old_print(*args, **kwargs)
try:
# Globally replace print with new_print.
# Verified this works manually on Python 3.11:
# >>> import inspect
# >>> inspect.builtins
# <module 'builtins' (built-in)>
inspect.builtins.print = new_print # type: ignore
yield
finally:
inspect.builtins.print = old_print # type: ignore
def log_unsupported_format_error(): def log_unsupported_format_error():
logger.error("-" * 80) logger.error("-" * 80)
logger.error(" Input file does not appear to be a PE or ELF file.") logger.error(" Input file does not appear to be a PE or ELF file.")

View File

@@ -53,6 +53,7 @@ from capa.helpers import (
get_file_taste, get_file_taste,
get_auto_format, get_auto_format,
log_unsupported_os_error, log_unsupported_os_error,
redirecting_print_to_tqdm,
log_unsupported_arch_error, log_unsupported_arch_error,
log_unsupported_format_error, log_unsupported_format_error,
) )
@@ -251,38 +252,39 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_pro
"library_functions": {}, "library_functions": {},
} # type: Dict[str, Any] } # type: Dict[str, Any]
pbar = tqdm.tqdm with redirecting_print_to_tqdm(disable_progress):
if disable_progress: pbar = tqdm.tqdm
# do not use tqdm to avoid unnecessary side effects when caller intends if disable_progress:
# to disable progress completely # do not use tqdm to avoid unnecessary side effects when caller intends
def pbar(s, *args, **kwargs): # to disable progress completely
return s def pbar(s, *args, **kwargs):
return s
functions = list(extractor.get_functions()) functions = list(extractor.get_functions())
n_funcs = len(functions) n_funcs = len(functions)
pb = pbar(functions, desc="matching", unit=" functions", postfix="skipped 0 library functions") pb = pbar(functions, desc="matching", unit=" functions", postfix="skipped 0 library functions")
for f in pb: for f in pb:
if extractor.is_library_function(f.address): if extractor.is_library_function(f.address):
function_name = extractor.get_function_name(f.address) function_name = extractor.get_function_name(f.address)
logger.debug("skipping library function 0x%x (%s)", f.address, function_name) logger.debug("skipping library function 0x%x (%s)", f.address, function_name)
meta["library_functions"][f.address] = function_name meta["library_functions"][f.address] = function_name
n_libs = len(meta["library_functions"]) n_libs = len(meta["library_functions"])
percentage = round(100 * (n_libs / n_funcs)) percentage = round(100 * (n_libs / n_funcs))
if isinstance(pb, tqdm.tqdm): if isinstance(pb, tqdm.tqdm):
pb.set_postfix_str(f"skipped {n_libs} library functions ({percentage}%)") pb.set_postfix_str(f"skipped {n_libs} library functions ({percentage}%)")
continue continue
function_matches, bb_matches, insn_matches, feature_count = find_code_capabilities(ruleset, extractor, f) function_matches, bb_matches, insn_matches, feature_count = find_code_capabilities(ruleset, extractor, f)
meta["feature_counts"]["functions"][f.address] = feature_count meta["feature_counts"]["functions"][f.address] = feature_count
logger.debug("analyzed function 0x%x and extracted %d features", f.address, feature_count) logger.debug("analyzed function 0x%x and extracted %d features", f.address, feature_count)
for rule_name, res in function_matches.items(): for rule_name, res in function_matches.items():
all_function_matches[rule_name].extend(res) all_function_matches[rule_name].extend(res)
for rule_name, res in bb_matches.items(): for rule_name, res in bb_matches.items():
all_bb_matches[rule_name].extend(res) all_bb_matches[rule_name].extend(res)
for rule_name, res in insn_matches.items(): for rule_name, res in insn_matches.items():
all_insn_matches[rule_name].extend(res) all_insn_matches[rule_name].extend(res)
# collection of features that captures the rule matches within function, BB, and instruction scopes. # collection of features that captures the rule matches within function, BB, and instruction scopes.
# mapping from feature (matched rule) to set of addresses at which it matched. # mapping from feature (matched rule) to set of addresses at which it matched.

View File

@@ -22,13 +22,11 @@ import time
import string import string
import difflib import difflib
import hashlib import hashlib
import inspect
import logging import logging
import pathlib import pathlib
import argparse import argparse
import itertools import itertools
import posixpath import posixpath
import contextlib
from typing import Set, Dict, List from typing import Set, Dict, List
from pathlib import Path from pathlib import Path
from dataclasses import field, dataclass from dataclasses import field, dataclass
@@ -866,37 +864,6 @@ def width(s, count):
return s.ljust(count) return s.ljust(count)
@contextlib.contextmanager
def redirecting_print_to_tqdm():
"""
tqdm (progress bar) expects to have fairly tight control over console output.
so calls to `print()` will break the progress bar and make things look bad.
so, this context manager temporarily replaces the `print` implementation
with one that is compatible with tqdm.
via: https://stackoverflow.com/a/42424890/87207
"""
old_print = print
def new_print(*args, **kwargs):
# If tqdm.tqdm.write raises error, use builtin print
try:
tqdm.tqdm.write(*args, **kwargs)
except:
old_print(*args, **kwargs)
try:
# Globally replace print with new_print.
# Verified this works manually on Python 3.11:
# >>> import inspect
# >>> inspect.builtins
# <module 'builtins' (built-in)>
inspect.builtins.print = new_print # type: ignore
yield
finally:
inspect.builtins.print = old_print # type: ignore
def lint(ctx: Context): def lint(ctx: Context):
""" """
Returns: Dict[string, Tuple(int, int)] Returns: Dict[string, Tuple(int, int)]
@@ -907,7 +874,7 @@ def lint(ctx: Context):
source_rules = [rule for rule in ctx.rules.rules.values() if not rule.is_subscope_rule()] source_rules = [rule for rule in ctx.rules.rules.values() if not rule.is_subscope_rule()]
with tqdm.contrib.logging.tqdm_logging_redirect(source_rules, unit="rule") as pbar: with tqdm.contrib.logging.tqdm_logging_redirect(source_rules, unit="rule") as pbar:
with redirecting_print_to_tqdm(): with capa.helpers.redirecting_print_to_tqdm(False):
for rule in pbar: for rule in pbar:
name = rule.name name = rule.name
pbar.set_description(width(f"linting rule: {name}", 48)) pbar.set_description(width(f"linting rule: {name}", 48))