refactor main to for ease of integration (#1948)

* main: split main into a bunch of "main routines"

[wip] since there are a few references to BinExport2
that are in progress elsewhre. Next commit will remove them.

* main: remove references to wip BinExport2 code

* changelog

* main: rename first position argument "input_file"

closes #1946

* main: linters

* main: move rule-related routines to capa.rules

ref #1821

* main: extract routines to capa.loader module

closes #1821

* add loader module

* loader: learn to load freeze format

* freeze: use new cli arg handling

* Update capa/loader.py

Co-authored-by: Moritz <mr-tz@users.noreply.github.com>

* main: remove duplicate documentation

* main: add doc about where some functions live

* scripts: migrate to new main wrapper helper functions

* scripts: port to main routines

* main: better handle auto-detection of backend

* scripts: migrate bulk-process to main wrappers

* scripts: migrate scripts to main wrappers

* main: rename *_from_args to *_from_cli

* changelog

* cache-ruleset: remove duplication

* main: fix tag handling

* cache-ruleset: fix cli args

* cache-ruleset: fix special rule cli handling

* scripts: fix type bytes

* main: remove old TODO message

* loader: fix references to binja extractor

---------

Co-authored-by: Moritz <mr-tz@users.noreply.github.com>
This commit is contained in:
Willi Ballenthin
2024-01-29 13:59:05 +01:00
committed by GitHub
parent d2e1a47192
commit c3301d3b3f
26 changed files with 1321 additions and 1168 deletions

View File

@@ -36,7 +36,7 @@ example:
usage:
usage: bulk-process.py [-h] [-r RULES] [-d] [-q] [-n PARALLELISM] [--no-mp]
input
input_directory
detect capabilities in programs.
@@ -62,7 +62,6 @@ Unless required by applicable law or agreed to in writing, software distributed
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
"""
import os
import sys
import json
import logging
@@ -74,10 +73,10 @@ from pathlib import Path
import capa
import capa.main
import capa.rules
import capa.loader
import capa.render.json
import capa.capabilities.common
import capa.render.result_document as rd
from capa.features.common import OS_AUTO
logger = logging.getLogger("capa")
@@ -87,11 +86,8 @@ def get_capa_results(args):
run capa against the file at the given path, using the given rules.
args is a tuple, containing:
rules (capa.rules.RuleSet): the rules to match
signatures (List[str]): list of file system paths to signature files
format (str): the name of the sample file format
os (str): the name of the operating system
path (str): the file system path to the sample to process
rules, signatures, format, backend, os, input_file
as provided via the CLI arguments.
args is a tuple because i'm not quite sure how to unpack multiple arguments using `map`.
@@ -106,44 +102,58 @@ def get_capa_results(args):
meta (dict): the meta analysis results
capabilities (dict): the matched capabilities and their result objects
"""
rules, sigpaths, format, os_, path = args
should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None)
logger.info("computing capa results for: %s", path)
rules, signatures, format_, backend, os_, input_file = args
parser = argparse.ArgumentParser(description="detect capabilities in programs.")
capa.main.install_common_args(parser, wanted={"rules", "signatures", "format", "os", "backend", "input_file"})
argv = [
"--signatures",
signatures,
"--format",
format_,
"--backend",
backend,
"--os",
os_,
input_file,
]
if rules:
argv += ["--rules", rules]
args = parser.parse_args(args=argv)
try:
extractor = capa.main.get_extractor(
path, format, os_, capa.main.BACKEND_VIV, sigpaths, should_save_workspace, disable_progress=True
)
except capa.exceptions.UnsupportedFormatError:
# i'm 100% sure if multiprocessing will reliably raise exceptions across process boundaries.
capa.main.handle_common_args(args)
capa.main.ensure_input_exists_from_cli(args)
input_format = capa.main.get_input_format_from_cli(args)
rules = capa.main.get_rules_from_cli(args)
backend = capa.main.get_backend_from_cli(args, input_format)
sample_path = capa.main.get_sample_path_from_cli(args, backend)
if sample_path is None:
os_ = "unknown"
else:
os_ = capa.loader.get_os(sample_path)
extractor = capa.main.get_extractor_from_cli(args, input_format, backend)
except capa.main.ShouldExitError as e:
# i'm not 100% sure if multiprocessing will reliably raise exceptions across process boundaries.
# so instead, return an object with explicit success/failure status.
#
# if success, then status=ok, and results found in property "ok"
# if error, then status=error, and human readable message in property "error"
return {
"path": path,
"status": "error",
"error": f"input file does not appear to be a PE file: {path}",
}
except capa.exceptions.UnsupportedRuntimeError:
return {
"path": path,
"status": "error",
"error": "unsupported runtime or Python interpreter",
}
return {"path": input_file, "status": "error", "error": str(e), "status_code": e.status_code}
except Exception as e:
return {
"path": path,
"path": input_file,
"status": "error",
"error": f"unexpected error: {e}",
}
capabilities, counts = capa.capabilities.common.find_capabilities(rules, extractor, disable_progress=True)
meta = capa.main.collect_metadata([], path, format, os_, [], extractor, counts)
meta.analysis.layout = capa.main.compute_layout(rules, extractor, capabilities)
meta = capa.loader.collect_metadata(argv, args.input_file, format_, os_, [], extractor, counts)
meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities)
doc = rd.ResultDocument.from_capa(meta, rules, capabilities)
return {"path": path, "status": "ok", "ok": doc.model_dump()}
return {"path": input_file, "status": "ok", "ok": doc.model_dump()}
def main(argv=None):
@@ -151,30 +161,16 @@ def main(argv=None):
argv = sys.argv[1:]
parser = argparse.ArgumentParser(description="detect capabilities in programs.")
capa.main.install_common_args(parser, wanted={"rules", "signatures", "format", "os"})
parser.add_argument("input", type=str, help="Path to directory of files to recursively analyze")
capa.main.install_common_args(parser, wanted={"rules", "signatures", "format", "os", "backend"})
parser.add_argument("input_directory", type=str, help="Path to directory of files to recursively analyze")
parser.add_argument(
"-n", "--parallelism", type=int, default=multiprocessing.cpu_count(), help="parallelism factor"
)
parser.add_argument("--no-mp", action="store_true", help="disable subprocesses")
args = parser.parse_args(args=argv)
capa.main.handle_common_args(args)
try:
rules = capa.main.get_rules(args.rules)
logger.info("successfully loaded %s rules", len(rules))
except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e:
logger.error("%s", str(e))
return -1
try:
sig_paths = capa.main.get_signatures(args.signatures)
except IOError as e:
logger.error("%s", str(e))
return -1
samples = []
for file in Path(args.input).rglob("*"):
for file in Path(args.input_directory).rglob("*"):
samples.append(file)
cpu_count = multiprocessing.cpu_count()
@@ -203,18 +199,22 @@ def main(argv=None):
logger.debug("using process mapper")
mapper = pmap
rules = args.rules
if rules == [capa.main.RULES_PATH_DEFAULT_STRING]:
rules = None
results = {}
for result in mapper(
get_capa_results,
[(rules, sig_paths, "pe", OS_AUTO, sample) for sample in samples],
[(rules, args.signatures, args.format, args.backend, args.os, str(sample)) for sample in samples],
parallelism=args.parallelism,
):
if result["status"] == "error":
logger.warning(result["error"])
elif result["status"] == "ok":
results[result["path"].as_posix()] = rd.ResultDocument.model_validate(result["ok"]).model_dump_json(
exclude_none=True
)
doc = rd.ResultDocument.model_validate(result["ok"]).model_dump_json(exclude_none=True)
results[result["path"]] = json.loads(doc)
else:
raise ValueError(f"unexpected status: {result['status']}")

View File

@@ -36,20 +36,27 @@ def main(argv=None):
parser = argparse.ArgumentParser(description="Cache ruleset.")
capa.main.install_common_args(parser)
parser.add_argument("rules", type=str, action="append", help="Path to rules")
parser.add_argument("rules", type=str, help="Path to rules directory")
parser.add_argument("cache", type=str, help="Path to cache directory")
args = parser.parse_args(args=argv)
capa.main.handle_common_args(args)
if args.debug:
logging.getLogger("capa").setLevel(logging.DEBUG)
# don't use capa.main.handle_common_args
# because it expects a different format for the --rules argument
if args.quiet:
logging.basicConfig(level=logging.WARNING)
logging.getLogger().setLevel(logging.WARNING)
elif args.debug:
logging.basicConfig(level=logging.DEBUG)
logging.getLogger().setLevel(logging.DEBUG)
else:
logging.getLogger("capa").setLevel(logging.ERROR)
logging.basicConfig(level=logging.INFO)
logging.getLogger().setLevel(logging.INFO)
try:
cache_dir = Path(args.cache)
cache_dir.mkdir(parents=True, exist_ok=True)
rules = capa.main.get_rules(args.rules, cache_dir)
rules = capa.rules.get_rules([Path(args.rules)], cache_dir)
logger.info("successfully loaded %s rules", len(rules))
except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e:
logger.error("%s", str(e))

View File

@@ -723,36 +723,33 @@ def main(argv=None):
argv = sys.argv[1:]
parser = argparse.ArgumentParser(description="Capa to YARA rule converter")
parser.add_argument("rules", type=str, help="Path to rules")
parser.add_argument("--private", "-p", action="store_true", help="Create private rules", default=False)
capa.main.install_common_args(parser, wanted={"tag"})
parser.add_argument("--private", "-p", action="store_true", help="Create private rules", default=False)
parser.add_argument("rules", type=str, help="Path to rules directory")
args = parser.parse_args(args=argv)
make_priv = args.private
if args.verbose:
level = logging.DEBUG
elif args.quiet:
level = logging.ERROR
# don't use capa.main.handle_common_args
# because it expects a different format for the --rules argument
if args.quiet:
logging.basicConfig(level=logging.WARNING)
logging.getLogger().setLevel(logging.WARNING)
elif args.debug:
logging.basicConfig(level=logging.DEBUG)
logging.getLogger().setLevel(logging.DEBUG)
else:
level = logging.INFO
logging.basicConfig(level=level)
logging.getLogger("capa2yara").setLevel(level)
logging.basicConfig(level=logging.INFO)
logging.getLogger().setLevel(logging.INFO)
try:
rules = capa.main.get_rules([Path(args.rules)])
namespaces = capa.rules.index_rules_by_namespace(list(rules.rules.values()))
logger.info("successfully loaded %d rules (including subscope rules which will be ignored)", len(rules))
if args.tag:
rules = rules.filter_rules_by_meta(args.tag)
logger.debug("selected %d rules", len(rules))
for i, r in enumerate(rules.rules, 1):
logger.debug(" %d. %s", i, r)
rules = capa.rules.get_rules([Path(args.rules)])
logger.info("successfully loaded %s rules", len(rules))
except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e:
logger.error("%s", str(e))
return -1
namespaces = capa.rules.index_rules_by_namespace(list(rules.rules.values()))
output_yar(
"// Rules from Mandiant's https://github.com/mandiant/capa-rules converted to YARA using https://github.com/mandiant/capa/blob/master/scripts/capa2yara.py by Arnim Rupp"
)
@@ -780,10 +777,10 @@ def main(argv=None):
cround += 1
logger.info("doing convert_rules(), round: %d", cround)
num_rules = len(converted_rules)
count_incomplete += convert_rules(rules, namespaces, cround, make_priv)
count_incomplete += convert_rules(rules, namespaces, cround, args.private)
# one last round to collect all unconverted rules
count_incomplete += convert_rules(rules, namespaces, 9000, make_priv)
count_incomplete += convert_rules(rules, namespaces, 9000, args.private)
stats = "\n// converted rules : " + str(len(converted_rules))
stats += "\n// among those are incomplete : " + str(count_incomplete)

View File

@@ -15,6 +15,7 @@ from pathlib import Path
import capa.main
import capa.rules
import capa.engine
import capa.loader
import capa.features
import capa.render.json
import capa.render.utils as rutils
@@ -168,19 +169,19 @@ def render_dictionary(doc: rd.ResultDocument) -> Dict[str, Any]:
# ==== render dictionary helpers
def capa_details(rules_path: Path, file_path: Path, output_format="dictionary"):
def capa_details(rules_path: Path, input_file: Path, output_format="dictionary"):
# load rules from disk
rules = capa.main.get_rules([rules_path])
rules = capa.rules.get_rules([rules_path])
# extract features and find capabilities
extractor = capa.main.get_extractor(
file_path, FORMAT_AUTO, OS_AUTO, capa.main.BACKEND_VIV, [], False, disable_progress=True
extractor = capa.loader.get_extractor(
input_file, FORMAT_AUTO, OS_AUTO, capa.main.BACKEND_VIV, [], should_save_workspace=False, disable_progress=True
)
capabilities, counts = capa.capabilities.common.find_capabilities(rules, extractor, disable_progress=True)
# collect metadata (used only to make rendering more complete)
meta = capa.main.collect_metadata([], file_path, FORMAT_AUTO, OS_AUTO, [rules_path], extractor, counts)
meta.analysis.layout = capa.main.compute_layout(rules, extractor, capabilities)
meta = capa.loader.collect_metadata([], input_file, FORMAT_AUTO, OS_AUTO, [rules_path], extractor, counts)
meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities)
capa_output: Any = False
@@ -206,7 +207,7 @@ if __name__ == "__main__":
RULES_PATH = capa.main.get_default_root() / "rules"
parser = argparse.ArgumentParser(description="Extract capabilities from a file")
parser.add_argument("file", help="file to extract capabilities from")
parser.add_argument("input_file", help="file to extract capabilities from")
parser.add_argument("--rules", help="path to rules directory", default=RULES_PATH)
parser.add_argument(
"--output", help="output format", choices=["dictionary", "json", "texttable"], default="dictionary"
@@ -214,5 +215,5 @@ if __name__ == "__main__":
args = parser.parse_args()
if args.rules != RULES_PATH:
args.rules = Path(args.rules)
print(capa_details(args.rules, Path(args.file), args.output))
print(capa_details(args.rules, Path(args.input_file), args.output))
sys.exit(0)

View File

@@ -19,6 +19,7 @@ import logging
import argparse
from pathlib import Path
import capa.main
import capa.rules
logger = logging.getLogger("capafmt")
@@ -29,6 +30,7 @@ def main(argv=None):
argv = sys.argv[1:]
parser = argparse.ArgumentParser(description="Capa rule formatter.")
capa.main.install_common_args(parser)
parser.add_argument("path", type=str, help="Path to rule to format")
parser.add_argument(
"-i",
@@ -37,8 +39,6 @@ def main(argv=None):
dest="in_place",
help="Format the rule in place, otherwise, write formatted rule to STDOUT",
)
parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging")
parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
parser.add_argument(
"-c",
"--check",
@@ -47,15 +47,10 @@ def main(argv=None):
)
args = parser.parse_args(args=argv)
if args.verbose:
level = logging.DEBUG
elif args.quiet:
level = logging.ERROR
else:
level = logging.INFO
logging.basicConfig(level=level)
logging.getLogger("capafmt").setLevel(level)
try:
capa.main.handle_common_args(args)
except capa.main.ShouldExitError as e:
return e.status_code
rule = capa.rules.Rule.from_yaml_file(args.path, use_ruamel=True)
reformatted_rule = rule.to_yaml()

View File

@@ -17,8 +17,8 @@ import logging
import argparse
import contextlib
from typing import BinaryIO
from pathlib import Path
import capa.main
import capa.helpers
import capa.features.extractors.elf
@@ -36,28 +36,16 @@ def main(argv=None):
argv = sys.argv[1:]
parser = argparse.ArgumentParser(description="Detect the underlying OS for the given ELF file")
parser.add_argument("sample", type=str, help="path to ELF file")
logging_group = parser.add_argument_group("logging arguments")
logging_group.add_argument("-d", "--debug", action="store_true", help="enable debugging output on STDERR")
logging_group.add_argument(
"-q", "--quiet", action="store_true", help="disable all status output except fatal errors"
)
capa.main.install_common_args(parser, wanted={"input_file"})
args = parser.parse_args(args=argv)
if args.quiet:
logging.basicConfig(level=logging.WARNING)
logging.getLogger().setLevel(logging.WARNING)
elif args.debug:
logging.basicConfig(level=logging.DEBUG)
logging.getLogger().setLevel(logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
logging.getLogger().setLevel(logging.INFO)
try:
capa.main.handle_common_args(args)
capa.main.ensure_input_exists_from_cli(args)
except capa.main.ShouldExitError as e:
return e.status_code
f = Path(args.sample).open("rb")
f = args.input_file.open("rb")
with contextlib.closing(f):
try:

View File

@@ -48,7 +48,7 @@ def find_overlapping_rules(new_rule_path, rules_path):
overlapping_rules = []
# capa.rules.RuleSet stores all rules in given paths
ruleset = capa.main.get_rules(rules_path)
ruleset = capa.rules.get_rules(rules_path)
for rule_name, rule in ruleset.rules.items():
rule_features = rule.extract_all_features()

View File

@@ -39,6 +39,7 @@ import tqdm.contrib.logging
import capa.main
import capa.rules
import capa.engine
import capa.loader
import capa.helpers
import capa.features.insn
import capa.capabilities.common
@@ -363,8 +364,14 @@ def get_sample_capabilities(ctx: Context, path: Path) -> Set[str]:
format_ = capa.helpers.get_auto_format(nice_path)
logger.debug("analyzing sample: %s", nice_path)
extractor = capa.main.get_extractor(
nice_path, format_, OS_AUTO, capa.main.BACKEND_VIV, DEFAULT_SIGNATURES, False, disable_progress=True
extractor = capa.loader.get_extractor(
nice_path,
format_,
OS_AUTO,
capa.main.BACKEND_VIV,
DEFAULT_SIGNATURES,
should_save_workspace=False,
disable_progress=True,
)
capabilities, _ = capa.capabilities.common.find_capabilities(ctx.rules, extractor, disable_progress=True)
@@ -990,7 +997,11 @@ def main(argv=None):
help="Enable thorough linting - takes more time, but does a better job",
)
args = parser.parse_args(args=argv)
capa.main.handle_common_args(args)
try:
capa.main.handle_common_args(args)
except capa.main.ShouldExitError as e:
return e.status_code
if args.debug:
logging.getLogger("capa").setLevel(logging.DEBUG)
@@ -1002,16 +1013,9 @@ def main(argv=None):
time0 = time.time()
try:
rules = capa.main.get_rules(args.rules)
logger.info("successfully loaded %s rules", rules.source_rule_count)
if args.tag:
rules = rules.filter_rules_by_meta(args.tag)
logger.debug("selected %s rules", len(rules))
for i, r in enumerate(rules.rules, 1):
logger.debug(" %d. %s", i, r)
except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e:
logger.error("%s", str(e))
return -1
rules = capa.main.get_rules_from_cli(args)
except capa.main.ShouldExitError as e:
return e.status_code
logger.info("collecting potentially referenced samples")
samples_path = Path(args.samples)

View File

@@ -62,6 +62,7 @@ import capa.engine
import capa.helpers
import capa.features
import capa.features.freeze
from capa.loader import BACKEND_VIV
logger = logging.getLogger("capa.match-function-id")
@@ -71,61 +72,53 @@ def main(argv=None):
argv = sys.argv[1:]
parser = argparse.ArgumentParser(description="FLIRT match each function")
parser.add_argument("sample", type=str, help="Path to sample to analyze")
capa.main.install_common_args(parser, wanted={"input_file", "signatures", "format"})
parser.add_argument(
"-F",
"--function",
type=lambda x: int(x, 0x10),
help="match a specific function by VA, rather than add functions",
)
parser.add_argument(
"--signature",
action="append",
dest="signatures",
type=str,
default=[],
help="use the given signatures to identify library functions, file system paths to .sig/.pat files.",
)
parser.add_argument("-d", "--debug", action="store_true", help="Enable debugging output on STDERR")
parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
args = parser.parse_args(args=argv)
if args.quiet:
logging.basicConfig(level=logging.ERROR)
logging.getLogger().setLevel(logging.ERROR)
elif args.debug:
logging.basicConfig(level=logging.DEBUG)
logging.getLogger().setLevel(logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
logging.getLogger().setLevel(logging.INFO)
# disable vivisect-related logging, it's verbose and not relevant for capa users
capa.main.set_vivisect_log_level(logging.CRITICAL)
try:
capa.main.handle_common_args(args)
capa.main.ensure_input_exists_from_cli(args)
input_format = capa.main.get_input_format_from_cli(args)
sig_paths = capa.main.get_signatures_from_cli(args, input_format, BACKEND_VIV)
except capa.main.ShouldExitError as e:
return e.status_code
analyzers = []
for sigpath in args.signatures:
sigs = viv_utils.flirt.load_flirt_signature(sigpath)
for sigpath in sig_paths:
sigs = viv_utils.flirt.load_flirt_signature(str(sigpath))
with capa.main.timing("flirt: compiling sigs"):
matcher = flirt.compile(sigs)
analyzer = viv_utils.flirt.FlirtFunctionAnalyzer(matcher, sigpath)
analyzer = viv_utils.flirt.FlirtFunctionAnalyzer(matcher, str(sigpath))
logger.debug("registering viv function analyzer: %s", repr(analyzer))
analyzers.append(analyzer)
vw = viv_utils.getWorkspace(args.sample, analyze=True, should_save=False)
vw = viv_utils.getWorkspace(str(args.input_file), analyze=True, should_save=False)
functions = vw.getFunctions()
if args.function:
functions = [args.function]
seen = set()
for function in functions:
logger.debug("matching function: 0x%04x", function)
for analyzer in analyzers:
name = viv_utils.flirt.match_function_flirt_signatures(analyzer.matcher, vw, function)
viv_utils.flirt.match_function_flirt_signatures(analyzer.matcher, vw, function)
name = viv_utils.get_function_name(vw, function)
if name:
print(f"0x{function:04x}: {name}")
key = (function, name)
if key in seen:
continue
else:
print(f"0x{function:04x}: {name}")
seen.add(key)
return 0

View File

@@ -41,7 +41,6 @@ import timeit
import logging
import argparse
import subprocess
from pathlib import Path
import tqdm
import tabulate
@@ -50,6 +49,7 @@ import capa.main
import capa.perf
import capa.rules
import capa.engine
import capa.loader
import capa.helpers
import capa.features
import capa.features.common
@@ -74,42 +74,22 @@ def main(argv=None):
label += " (dirty)"
parser = argparse.ArgumentParser(description="Profile capa performance")
capa.main.install_common_args(parser, wanted={"format", "os", "sample", "signatures", "rules"})
capa.main.install_common_args(parser, wanted={"format", "os", "input_file", "signatures", "rules"})
parser.add_argument("--number", type=int, default=3, help="batch size of profile collection")
parser.add_argument("--repeat", type=int, default=30, help="batch count of profile collection")
parser.add_argument("--label", type=str, default=label, help="description of the profile collection")
args = parser.parse_args(args=argv)
capa.main.handle_common_args(args)
try:
taste = capa.helpers.get_file_taste(Path(args.sample))
except IOError as e:
logger.error("%s", str(e))
return -1
try:
capa.main.handle_common_args(args)
capa.main.ensure_input_exists_from_cli(args)
input_format = capa.main.get_input_format_from_cli(args)
backend = capa.main.get_backend_from_cli(args, input_format)
with capa.main.timing("load rules"):
rules = capa.main.get_rules(args.rules)
except IOError as e:
logger.error("%s", str(e))
return -1
try:
sig_paths = capa.main.get_signatures(args.signatures)
except IOError as e:
logger.error("%s", str(e))
return -1
if (args.format == "freeze") or (
args.format == capa.features.common.FORMAT_AUTO and capa.features.freeze.is_freeze(taste)
):
extractor = capa.features.freeze.load(Path(args.sample).read_bytes())
else:
extractor = capa.main.get_extractor(
args.sample, args.format, args.os, capa.main.BACKEND_VIV, sig_paths, should_save_workspace=False
)
rules = capa.main.get_rules_from_cli(args)
extractor = capa.main.get_extractor_from_cli(args, input_format, backend)
except capa.main.ShouldExitError as e:
return e.status_code
with tqdm.tqdm(total=args.number * args.repeat, leave=False) as pbar:

View File

@@ -33,6 +33,7 @@ import logging
import argparse
from pathlib import Path
import capa.main
import capa.render.proto
import capa.render.result_document
@@ -44,26 +45,14 @@ def main(argv=None):
argv = sys.argv[1:]
parser = argparse.ArgumentParser(description="Convert a capa JSON result document into the protobuf format")
capa.main.install_common_args(parser)
parser.add_argument("json", type=str, help="path to JSON result document file, produced by `capa --json`")
logging_group = parser.add_argument_group("logging arguments")
logging_group.add_argument("-d", "--debug", action="store_true", help="enable debugging output on STDERR")
logging_group.add_argument(
"-q", "--quiet", action="store_true", help="disable all status output except fatal errors"
)
args = parser.parse_args(args=argv)
if args.quiet:
logging.basicConfig(level=logging.WARNING)
logging.getLogger().setLevel(logging.WARNING)
elif args.debug:
logging.basicConfig(level=logging.DEBUG)
logging.getLogger().setLevel(logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
logging.getLogger().setLevel(logging.INFO)
try:
capa.main.handle_common_args(args)
except capa.main.ShouldExitError as e:
return e.status_code
rd = capa.render.result_document.ResultDocument.from_file(Path(args.json))
pb = capa.render.proto.doc_to_pb2(rd)

View File

@@ -36,6 +36,7 @@ import logging
import argparse
from pathlib import Path
import capa.main
import capa.render.json
import capa.render.proto
import capa.render.proto.capa_pb2
@@ -49,28 +50,16 @@ def main(argv=None):
argv = sys.argv[1:]
parser = argparse.ArgumentParser(description="Convert a capa protobuf result document into the JSON format")
capa.main.install_common_args(parser)
parser.add_argument(
"pb", type=str, help="path to protobuf result document file, produced by `proto-from-results.py`"
)
logging_group = parser.add_argument_group("logging arguments")
logging_group.add_argument("-d", "--debug", action="store_true", help="enable debugging output on STDERR")
logging_group.add_argument(
"-q", "--quiet", action="store_true", help="disable all status output except fatal errors"
)
args = parser.parse_args(args=argv)
if args.quiet:
logging.basicConfig(level=logging.WARNING)
logging.getLogger().setLevel(logging.WARNING)
elif args.debug:
logging.basicConfig(level=logging.DEBUG)
logging.getLogger().setLevel(logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
logging.getLogger().setLevel(logging.INFO)
try:
capa.main.handle_common_args(args)
except capa.main.ShouldExitError as e:
return e.status_code
pb = Path(args.pb).read_bytes()

View File

@@ -55,13 +55,11 @@ Unless required by applicable law or agreed to in writing, software distributed
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
"""
import os
import sys
import logging
import argparse
import collections
from typing import Dict
from pathlib import Path
import colorama
@@ -76,10 +74,7 @@ import capa.render.verbose
import capa.features.freeze
import capa.capabilities.common
import capa.render.result_document as rd
from capa.helpers import get_file_taste
from capa.features.common import FORMAT_AUTO
from capa.features.freeze import Address
from capa.features.extractors.base_extractor import FeatureExtractor, StaticFeatureExtractor
logger = logging.getLogger("capa.show-capabilities-by-function")
@@ -142,67 +137,37 @@ def main(argv=None):
argv = sys.argv[1:]
parser = argparse.ArgumentParser(description="detect capabilities in programs.")
capa.main.install_common_args(parser, wanted={"format", "os", "backend", "sample", "signatures", "rules", "tag"})
capa.main.install_common_args(
parser, wanted={"format", "os", "backend", "input_file", "signatures", "rules", "tag"}
)
args = parser.parse_args(args=argv)
capa.main.handle_common_args(args)
try:
taste = get_file_taste(Path(args.sample))
except IOError as e:
logger.error("%s", str(e))
return -1
try:
rules = capa.main.get_rules(args.rules)
logger.info("successfully loaded %s rules", len(rules))
if args.tag:
rules = rules.filter_rules_by_meta(args.tag)
logger.info("selected %s rules", len(rules))
except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e:
logger.error("%s", str(e))
return -1
try:
sig_paths = capa.main.get_signatures(args.signatures)
except IOError as e:
logger.error("%s", str(e))
return -1
if (args.format == "freeze") or (args.format == FORMAT_AUTO and capa.features.freeze.is_freeze(taste)):
format_ = "freeze"
extractor: FeatureExtractor = capa.features.freeze.load(Path(args.sample).read_bytes())
else:
format_ = args.format
should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None)
try:
extractor = capa.main.get_extractor(
args.sample, args.format, args.os, args.backend, sig_paths, should_save_workspace
)
assert isinstance(extractor, StaticFeatureExtractor)
except capa.exceptions.UnsupportedFormatError:
capa.helpers.log_unsupported_format_error()
return -1
except capa.exceptions.UnsupportedRuntimeError:
capa.helpers.log_unsupported_runtime_error()
return -1
capa.main.handle_common_args(args)
capa.main.ensure_input_exists_from_cli(args)
input_format = capa.main.get_input_format_from_cli(args)
rules = capa.main.get_rules_from_cli(args)
backend = capa.main.get_backend_from_cli(args, input_format)
sample_path = capa.main.get_sample_path_from_cli(args, backend)
if sample_path is None:
os_ = "unknown"
else:
os_ = capa.loader.get_os(sample_path)
extractor = capa.main.get_extractor_from_cli(args, input_format, backend)
except capa.main.ShouldExitError as e:
return e.status_code
capabilities, counts = capa.capabilities.common.find_capabilities(rules, extractor)
meta = capa.main.collect_metadata(argv, args.sample, format_, args.os, args.rules, extractor, counts)
meta.analysis.layout = capa.main.compute_layout(rules, extractor, capabilities)
meta = capa.loader.collect_metadata(argv, args.input_file, input_format, os_, args.rules, extractor, counts)
meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities)
if capa.capabilities.common.has_file_limitation(rules, capabilities):
# bail if capa encountered file limitation e.g. a packed binary
# do show the output in verbose mode, though.
if not (args.verbose or args.vverbose or args.json):
return -1
return capa.main.E_FILE_LIMITATION
# colorama will detect:
# - when on Windows console, and fixup coloring, and
# - when not an interactive session, and disable coloring
# renderers should use coloring and assume it will be stripped out if necessary.
colorama.init()
doc = rd.ResultDocument.from_capa(meta, rules, capabilities)
print(render_matches_by_function(doc))
colorama.deinit()

View File

@@ -64,16 +64,15 @@ Example::
insn: 0x10001027: mnemonic(shl)
...
"""
import os
import sys
import logging
import argparse
from typing import Tuple
from pathlib import Path
import capa.main
import capa.rules
import capa.engine
import capa.loader
import capa.helpers
import capa.features
import capa.exceptions
@@ -81,17 +80,9 @@ import capa.render.verbose as v
import capa.features.freeze
import capa.features.address
import capa.features.extractors.pefile
from capa.helpers import get_auto_format, log_unsupported_runtime_error
from capa.helpers import assert_never
from capa.features.insn import API, Number
from capa.features.common import (
FORMAT_AUTO,
FORMAT_CAPE,
FORMAT_FREEZE,
DYNAMIC_FORMATS,
String,
Feature,
is_global_feature,
)
from capa.features.common import String, Feature, is_global_feature
from capa.features.extractors.base_extractor import FunctionHandle, StaticFeatureExtractor, DynamicFeatureExtractor
logger = logging.getLogger("capa.show-features")
@@ -106,56 +97,33 @@ def main(argv=None):
argv = sys.argv[1:]
parser = argparse.ArgumentParser(description="Show the features that capa extracts from the given sample")
capa.main.install_common_args(parser, wanted={"format", "os", "sample", "signatures", "backend"})
capa.main.install_common_args(parser, wanted={"input_file", "format", "os", "signatures", "backend"})
parser.add_argument("-F", "--function", type=str, help="Show features for specific function")
parser.add_argument("-P", "--process", type=str, help="Show features for specific process name")
args = parser.parse_args(args=argv)
capa.main.handle_common_args(args)
if args.function and args.backend == "pefile":
print("pefile backend does not support extracting function features")
return -1
try:
_ = capa.helpers.get_file_taste(Path(args.sample))
except IOError as e:
logger.error("%s", str(e))
return -1
capa.main.handle_common_args(args)
capa.main.ensure_input_exists_from_cli(args)
try:
sig_paths = capa.main.get_signatures(args.signatures)
except IOError as e:
logger.error("%s", str(e))
return -1
format_ = args.format if args.format != FORMAT_AUTO else get_auto_format(args.sample)
if format_ == FORMAT_FREEZE:
# this should be moved above the previous if clause after implementing
# feature freeze for the dynamic analysis flavor
extractor = capa.features.freeze.load(Path(args.sample).read_bytes())
else:
should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None)
try:
extractor = capa.main.get_extractor(
args.sample, format_, args.os, args.backend, sig_paths, should_save_workspace
)
except capa.exceptions.UnsupportedFormatError as e:
if format_ == FORMAT_CAPE:
capa.helpers.log_unsupported_cape_report_error(str(e))
else:
capa.helpers.log_unsupported_format_error()
return -1
except capa.exceptions.UnsupportedRuntimeError:
log_unsupported_runtime_error()
if args.function and args.backend == "pefile":
print("pefile backend does not support extracting function features")
return -1
if format_ in DYNAMIC_FORMATS:
assert isinstance(extractor, DynamicFeatureExtractor)
input_format = capa.main.get_input_format_from_cli(args)
backend = capa.main.get_backend_from_cli(args, input_format)
extractor = capa.main.get_extractor_from_cli(args, input_format, backend)
except capa.main.ShouldExitError as e:
return e.status_code
if isinstance(extractor, DynamicFeatureExtractor):
print_dynamic_analysis(extractor, args)
else:
assert isinstance(extractor, StaticFeatureExtractor)
elif isinstance(extractor, StaticFeatureExtractor):
print_static_analysis(extractor, args)
else:
assert_never(extractor)
return 0

View File

@@ -8,13 +8,11 @@ Unless required by applicable law or agreed to in writing, software distributed
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
"""
import os
import sys
import typing
import logging
import argparse
from typing import Set, Tuple
from pathlib import Path
from collections import Counter
import tabulate
@@ -31,8 +29,7 @@ import capa.features.freeze
import capa.features.address
import capa.features.extractors.pefile
import capa.features.extractors.base_extractor
from capa.helpers import log_unsupported_runtime_error
from capa.features.common import Feature
from capa.features.common import FORMAT_FREEZE, Feature
from capa.features.extractors.base_extractor import FunctionHandle, StaticFeatureExtractor
logger = logging.getLogger("show-unused-features")
@@ -42,10 +39,9 @@ def format_address(addr: capa.features.address.Address) -> str:
return v.format_address(capa.features.freeze.Address.from_capa((addr)))
def get_rules_feature_set(rules_path) -> Set[Feature]:
ruleset = capa.main.get_rules(rules_path)
def get_rules_feature_set(rules: capa.rules.RuleSet) -> Set[Feature]:
rules_feature_set: Set[Feature] = set()
for _, rule in ruleset.rules.items():
for _, rule in rules.rules.items():
rules_feature_set.update(rule.extract_all_features())
return rules_feature_set
@@ -106,44 +102,23 @@ def main(argv=None):
argv = sys.argv[1:]
parser = argparse.ArgumentParser(description="Show the features that capa doesn't have rules for yet")
capa.main.install_common_args(parser, wanted={"format", "os", "sample", "signatures", "backend", "rules"})
capa.main.install_common_args(parser, wanted={"format", "os", "input_file", "signatures", "backend", "rules"})
parser.add_argument("-F", "--function", type=str, help="Show features for specific function")
args = parser.parse_args(args=argv)
capa.main.handle_common_args(args)
if args.function and args.backend == "pefile":
print("pefile backend does not support extracting function features")
return -1
try:
taste = capa.helpers.get_file_taste(Path(args.sample))
except IOError as e:
logger.error("%s", str(e))
return -1
try:
sig_paths = capa.main.get_signatures(args.signatures)
except IOError as e:
logger.error("%s", str(e))
return -1
if (args.format == "freeze") or (
args.format == capa.features.common.FORMAT_AUTO and capa.features.freeze.is_freeze(taste)
):
extractor = capa.features.freeze.load(Path(args.sample).read_bytes())
else:
should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None)
try:
extractor = capa.main.get_extractor(
args.sample, args.format, args.os, args.backend, sig_paths, should_save_workspace
)
except capa.exceptions.UnsupportedFormatError:
capa.helpers.log_unsupported_format_error()
return -1
except capa.exceptions.UnsupportedRuntimeError:
log_unsupported_runtime_error()
return -1
capa.main.handle_common_args(args)
capa.main.ensure_input_exists_from_cli(args)
rules = capa.main.get_rules_from_cli(args)
input_format = capa.main.get_input_format_from_cli(args)
backend = capa.main.get_backend_from_cli(args, input_format)
extractor = capa.main.get_extractor_from_cli(args, input_format, backend)
except capa.main.ShouldExitError as e:
return e.status_code
assert isinstance(extractor, StaticFeatureExtractor), "only static analysis supported today"
@@ -159,7 +134,7 @@ def main(argv=None):
function_handles = tuple(extractor.get_functions())
if args.function:
if args.format == "freeze":
if input_format == FORMAT_FREEZE:
function_handles = tuple(filter(lambda fh: fh.address == args.function, function_handles))
else:
function_handles = tuple(filter(lambda fh: format_address(fh.address) == args.function, function_handles))
@@ -174,7 +149,7 @@ def main(argv=None):
feature_map.update(get_file_features(function_handles, extractor))
rules_feature_set = get_rules_feature_set(args.rules)
rules_feature_set = get_rules_feature_set(rules)
print_unused_features(feature_map, rules_feature_set)
return 0
@@ -206,7 +181,8 @@ def ida_main():
feature_map.update(get_file_features(function_handles, extractor))
rules_path = capa.main.get_default_root() / "rules"
rules_feature_set = get_rules_feature_set([rules_path])
rules = capa.rules.get_rules([rules_path])
rules_feature_set = get_rules_feature_set(rules)
print_unused_features(feature_map, rules_feature_set)