mirror of
https://github.com/mandiant/capa.git
synced 2025-12-22 07:10:29 -08:00
Changes os.path to pathlib.Path usage
changed args.rules , args.signatures types in handle_common_args.
This commit is contained in:
@@ -382,6 +382,7 @@ def load(buf: bytes) -> capa.features.extractors.base_extractor.FeatureExtractor
|
||||
def main(argv=None):
|
||||
import sys
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import capa.main
|
||||
|
||||
@@ -398,8 +399,7 @@ def main(argv=None):
|
||||
|
||||
extractor = capa.main.get_extractor(args.sample, args.format, args.os, args.backend, sigpaths, False)
|
||||
|
||||
with open(args.output, "wb") as f:
|
||||
f.write(dump(extractor))
|
||||
Path(args.output).write_bytes(dump(extractor))
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ import inspect
|
||||
import logging
|
||||
import contextlib
|
||||
from typing import NoReturn
|
||||
from pathlib import Path
|
||||
|
||||
import tqdm
|
||||
|
||||
@@ -31,8 +32,8 @@ def hex(n: int) -> str:
|
||||
return f"0x{(n):X}"
|
||||
|
||||
|
||||
def get_file_taste(sample_path: str) -> bytes:
|
||||
if not os.path.exists(sample_path):
|
||||
def get_file_taste(sample_path: Path) -> bytes:
|
||||
if not sample_path.exists():
|
||||
raise IOError(f"sample path {sample_path} does not exist or cannot be accessed")
|
||||
with open(sample_path, "rb") as f:
|
||||
taste = f.read(8)
|
||||
|
||||
@@ -9,7 +9,8 @@ import json
|
||||
import logging
|
||||
import datetime
|
||||
import contextlib
|
||||
from typing import Optional
|
||||
from typing import List, Optional
|
||||
from pathlib import Path
|
||||
|
||||
import idc
|
||||
import idaapi
|
||||
@@ -119,7 +120,7 @@ def get_file_sha256():
|
||||
return sha256
|
||||
|
||||
|
||||
def collect_metadata(rules):
|
||||
def collect_metadata(rules: List[Path]):
|
||||
""" """
|
||||
md5 = get_file_md5()
|
||||
sha256 = get_file_sha256()
|
||||
@@ -156,7 +157,7 @@ def collect_metadata(rules):
|
||||
arch=arch,
|
||||
os=os,
|
||||
extractor="ida",
|
||||
rules=rules,
|
||||
rules=tuple(rules),
|
||||
base_address=capa.features.freeze.Address.from_capa(idaapi.get_imagebase()),
|
||||
layout=rdoc.Layout(
|
||||
functions=tuple()
|
||||
|
||||
@@ -11,6 +11,7 @@ import logging
|
||||
import itertools
|
||||
import collections
|
||||
from typing import Any, List, Optional
|
||||
from pathlib import Path
|
||||
|
||||
import idaapi
|
||||
import ida_kernwin
|
||||
@@ -629,7 +630,7 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
if not self.ensure_capa_settings_rule_path():
|
||||
return False
|
||||
|
||||
rule_path: str = settings.user.get(CAPA_SETTINGS_RULE_PATH, "")
|
||||
rule_path: Path = Path(settings.user.get(CAPA_SETTINGS_RULE_PATH, ""))
|
||||
try:
|
||||
|
||||
def on_load_rule(_, i, total):
|
||||
@@ -769,7 +770,7 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
update_wait_box("extracting features")
|
||||
|
||||
try:
|
||||
meta = capa.ida.helpers.collect_metadata([settings.user[CAPA_SETTINGS_RULE_PATH]])
|
||||
meta = capa.ida.helpers.collect_metadata([Path(settings.user[CAPA_SETTINGS_RULE_PATH])])
|
||||
capabilities, counts = capa.main.find_capabilities(
|
||||
ruleset, self.feature_extractor, disable_progress=True
|
||||
)
|
||||
|
||||
144
capa/main.py
144
capa/main.py
@@ -21,6 +21,7 @@ import itertools
|
||||
import contextlib
|
||||
import collections
|
||||
from typing import Any, Dict, List, Tuple, Callable
|
||||
from pathlib import Path
|
||||
|
||||
import halo
|
||||
import tqdm
|
||||
@@ -428,40 +429,38 @@ def is_running_standalone() -> bool:
|
||||
return hasattr(sys, "frozen") and hasattr(sys, "_MEIPASS")
|
||||
|
||||
|
||||
def get_default_root() -> str:
|
||||
def get_default_root() -> Path:
|
||||
"""
|
||||
get the file system path to the default resources directory.
|
||||
under PyInstaller, this comes from _MEIPASS.
|
||||
under source, this is the root directory of the project.
|
||||
Get the file system path to the default resources directory.
|
||||
Under PyInstaller, this comes from _MEIPASS.
|
||||
Under source, this is the root directory of the project.
|
||||
"""
|
||||
if is_running_standalone():
|
||||
# pylance/mypy don't like `sys._MEIPASS` because this isn't standard.
|
||||
# its injected by pyinstaller.
|
||||
# so we'll fetch this attribute dynamically.
|
||||
return getattr(sys, "_MEIPASS")
|
||||
else:
|
||||
return os.path.join(os.path.dirname(__file__), "..")
|
||||
try:
|
||||
meipass = Path(getattr(sys, "_MEIPASS"))
|
||||
return meipass
|
||||
except AttributeError:
|
||||
pass
|
||||
# Return the root directory of the project when not running from a PyInstaller'd executable
|
||||
return Path(__file__).resolve().parent.parent
|
||||
|
||||
|
||||
def get_default_signatures() -> List[str]:
|
||||
def get_default_signatures() -> List[Path]:
|
||||
"""
|
||||
compute a list of file system paths to the default FLIRT signatures.
|
||||
Compute a list of file system paths to the default FLIRT signatures.
|
||||
"""
|
||||
sigs_path = os.path.join(get_default_root(), "sigs")
|
||||
sigs_path = get_default_root() / "sigs"
|
||||
logger.debug("signatures path: %s", sigs_path)
|
||||
|
||||
ret = []
|
||||
for root, _, files in os.walk(sigs_path):
|
||||
for file in files:
|
||||
if not (file.endswith(".pat") or file.endswith(".pat.gz") or file.endswith(".sig")):
|
||||
continue
|
||||
|
||||
ret.append(os.path.join(root, file))
|
||||
for file in sigs_path.rglob("*"):
|
||||
if file.is_file() and file.suffix.lower() in (".pat", ".pat.gz", ".sig"):
|
||||
ret.append(file)
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
def get_workspace(path, format_, sigpaths):
|
||||
def get_workspace(path, format_, sigpaths: List[Path]):
|
||||
"""
|
||||
load the program at the given path into a vivisect workspace using the given format.
|
||||
also apply the given FLIRT signatures.
|
||||
@@ -499,7 +498,7 @@ def get_workspace(path, format_, sigpaths):
|
||||
else:
|
||||
raise ValueError("unexpected format: " + format_)
|
||||
|
||||
viv_utils.flirt.register_flirt_signature_analyzers(vw, sigpaths)
|
||||
viv_utils.flirt.register_flirt_signature_analyzers(vw, [str(s) for s in sigpaths])
|
||||
|
||||
vw.analyze()
|
||||
|
||||
@@ -513,7 +512,7 @@ def get_extractor(
|
||||
format_: str,
|
||||
os_: str,
|
||||
backend: str,
|
||||
sigpaths: List[str],
|
||||
sigpaths: List[Path],
|
||||
should_save_workspace=False,
|
||||
disable_progress=False,
|
||||
) -> FeatureExtractor:
|
||||
@@ -614,42 +613,34 @@ def is_nursery_rule_path(path: str) -> bool:
|
||||
return "nursery" in path
|
||||
|
||||
|
||||
def collect_rule_file_paths(rule_paths: List[str]) -> List[str]:
|
||||
def collect_rule_file_paths(rule_paths: List[Path]) -> List[Path]:
|
||||
"""
|
||||
collect all rule file paths, including those in subdirectories.
|
||||
Collect all rule file paths, including those in subdirectories.
|
||||
"""
|
||||
rule_file_paths = []
|
||||
for rule_path in rule_paths:
|
||||
if not os.path.exists(rule_path):
|
||||
if not rule_path.exists():
|
||||
raise IOError(f"rule path {rule_path} does not exist or cannot be accessed")
|
||||
|
||||
if os.path.isfile(rule_path):
|
||||
if rule_path.is_file():
|
||||
rule_file_paths.append(rule_path)
|
||||
elif os.path.isdir(rule_path):
|
||||
elif rule_path.is_dir():
|
||||
logger.debug("reading rules from directory %s", rule_path)
|
||||
for root, _, files in os.walk(rule_path):
|
||||
if ".git" in root:
|
||||
# the .github directory contains CI config in capa-rules
|
||||
# this includes some .yml files
|
||||
# these are not rules
|
||||
# additionally, .git has files that are not .yml and generate the warning
|
||||
# skip those too
|
||||
# Skip certain directories within the walk
|
||||
continue
|
||||
for file in files:
|
||||
if not file.endswith(".yml"):
|
||||
if not (file.startswith(".git") or file.endswith((".git", ".md", ".txt"))):
|
||||
# expect to see .git* files, readme.md, format.md, and maybe a .git directory
|
||||
# other things maybe are rules, but are mis-named.
|
||||
logger.warning("skipping non-.yml file: %s", file)
|
||||
continue
|
||||
rule_path = os.path.join(root, file)
|
||||
rule_file_paths.append(rule_path)
|
||||
|
||||
rule_file_paths.append(Path(root) / file)
|
||||
return rule_file_paths
|
||||
|
||||
|
||||
# TypeAlias. note: using `foo: TypeAlias = bar` is Python 3.10+
|
||||
RulePath = str
|
||||
RulePath = Path
|
||||
|
||||
|
||||
def on_load_rule_default(_path: RulePath, i: int, _total: int) -> None:
|
||||
@@ -669,17 +660,13 @@ def get_rules(
|
||||
"""
|
||||
if cache_dir is None:
|
||||
cache_dir = capa.rules.cache.get_default_cache_directory()
|
||||
|
||||
# rule_paths may contain directory paths,
|
||||
# so search for file paths recursively.
|
||||
rule_file_paths = collect_rule_file_paths(rule_paths)
|
||||
|
||||
# this list is parallel to `rule_file_paths`:
|
||||
# rule_file_paths[i] corresponds to rule_contents[i].
|
||||
rule_contents = []
|
||||
for file_path in rule_file_paths:
|
||||
with open(file_path, "rb") as f:
|
||||
rule_contents.append(f.read())
|
||||
rule_contents = [file_path.read_bytes() for file_path in rule_file_paths]
|
||||
|
||||
ruleset = capa.rules.cache.load_cached_ruleset(cache_dir, rule_contents)
|
||||
if ruleset is not None:
|
||||
@@ -696,8 +683,8 @@ def get_rules(
|
||||
except capa.rules.InvalidRule:
|
||||
raise
|
||||
else:
|
||||
rule.meta["capa/path"] = path
|
||||
if is_nursery_rule_path(path):
|
||||
rule.meta["capa/path"] = str(path)
|
||||
if is_nursery_rule_path(str(path)):
|
||||
rule.meta["capa/nursery"] = True
|
||||
|
||||
rules.append(rule)
|
||||
@@ -710,27 +697,24 @@ def get_rules(
|
||||
return ruleset
|
||||
|
||||
|
||||
def get_signatures(sigs_path):
|
||||
if not os.path.exists(sigs_path):
|
||||
def get_signatures(sigs_path: Path) -> List[Path]:
|
||||
if not sigs_path.exists():
|
||||
raise IOError(f"signatures path {sigs_path} does not exist or cannot be accessed")
|
||||
|
||||
paths = []
|
||||
if os.path.isfile(sigs_path):
|
||||
paths: List[Path] = []
|
||||
if sigs_path.is_file():
|
||||
paths.append(sigs_path)
|
||||
elif os.path.isdir(sigs_path):
|
||||
logger.debug("reading signatures from directory %s", os.path.abspath(os.path.normpath(sigs_path)))
|
||||
for root, _, files in os.walk(sigs_path):
|
||||
for file in files:
|
||||
if file.endswith((".pat", ".pat.gz", ".sig")):
|
||||
sig_path = os.path.join(root, file)
|
||||
paths.append(sig_path)
|
||||
elif sigs_path.is_dir():
|
||||
logger.debug("reading signatures from directory %s", sigs_path.resolve())
|
||||
for file in sigs_path.rglob("*"):
|
||||
if file.is_file() and file.suffix.lower() in (".pat", ".pat.gz", ".sig"):
|
||||
paths.append(file)
|
||||
|
||||
# nicely normalize and format path so that debugging messages are clearer
|
||||
paths = [os.path.abspath(os.path.normpath(path)) for path in paths]
|
||||
# Convert paths to their absolute and normalized forms
|
||||
paths = [path.resolve().absolute() for path in paths]
|
||||
|
||||
# load signatures in deterministic order: the alphabetic sorting of filename.
|
||||
# this means that `0_sigs.pat` loads before `1_sigs.pat`.
|
||||
paths = sorted(paths, key=os.path.basename)
|
||||
# Sort paths in deterministic order based on filename
|
||||
paths = sorted(paths, key=lambda path: path.name)
|
||||
|
||||
for path in paths:
|
||||
logger.debug("found signature file: %s", path)
|
||||
@@ -743,23 +727,20 @@ def collect_metadata(
|
||||
sample_path: str,
|
||||
format_: str,
|
||||
os_: str,
|
||||
rules_path: List[str],
|
||||
rules_path: List[Path],
|
||||
extractor: capa.features.extractors.base_extractor.FeatureExtractor,
|
||||
) -> rdoc.Metadata:
|
||||
md5 = hashlib.md5()
|
||||
sha1 = hashlib.sha1()
|
||||
sha256 = hashlib.sha256()
|
||||
|
||||
with open(sample_path, "rb") as f:
|
||||
buf = f.read()
|
||||
buf = Path(sample_path).read_bytes()
|
||||
|
||||
md5.update(buf)
|
||||
sha1.update(buf)
|
||||
sha256.update(buf)
|
||||
|
||||
if rules_path != [RULES_PATH_DEFAULT_STRING]:
|
||||
rules_path = [os.path.abspath(os.path.normpath(r)) for r in rules_path]
|
||||
|
||||
rules_path = [r.resolve().absolute() for r in rules_path]
|
||||
format_ = get_format(sample_path) if format_ == FORMAT_AUTO else format_
|
||||
arch = get_arch(sample_path)
|
||||
os_ = get_os(sample_path) if os_ == OS_AUTO else os_
|
||||
@@ -1011,7 +992,7 @@ def handle_common_args(args):
|
||||
raise RuntimeError("unexpected --color value: " + args.color)
|
||||
|
||||
if hasattr(args, "rules"):
|
||||
rules_paths: List[str] = []
|
||||
rules_paths: List[Path] = []
|
||||
|
||||
if args.rules == [RULES_PATH_DEFAULT_STRING]:
|
||||
logger.debug("-" * 80)
|
||||
@@ -1021,9 +1002,9 @@ def handle_common_args(args):
|
||||
logger.debug(" https://github.com/mandiant/capa-rules")
|
||||
logger.debug("-" * 80)
|
||||
|
||||
default_rule_path = os.path.join(get_default_root(), "rules")
|
||||
default_rule_path = get_default_root() / "rules"
|
||||
|
||||
if not os.path.exists(default_rule_path):
|
||||
if not default_rule_path.exists():
|
||||
# when a users installs capa via pip,
|
||||
# this pulls down just the source code - not the default rules.
|
||||
# i'm not sure the default rules should even be written to the library directory,
|
||||
@@ -1035,10 +1016,9 @@ def handle_common_args(args):
|
||||
rules_paths.append(default_rule_path)
|
||||
args.is_default_rules = True
|
||||
else:
|
||||
rules_paths = args.rules
|
||||
|
||||
if RULES_PATH_DEFAULT_STRING in rules_paths:
|
||||
rules_paths.remove(RULES_PATH_DEFAULT_STRING)
|
||||
for rule in args.rules:
|
||||
if RULES_PATH_DEFAULT_STRING != rule:
|
||||
rules_paths.append(Path(rule))
|
||||
|
||||
for rule_path in rules_paths:
|
||||
logger.debug("using rules path: %s", rule_path)
|
||||
@@ -1056,8 +1036,9 @@ def handle_common_args(args):
|
||||
)
|
||||
logger.debug("-" * 80)
|
||||
|
||||
sigs_path = os.path.join(get_default_root(), "sigs")
|
||||
if not os.path.exists(sigs_path):
|
||||
sigs_path = get_default_root() / "sigs"
|
||||
|
||||
if not sigs_path.exists():
|
||||
logger.error(
|
||||
"Using default signature path, but it doesn't exist. "
|
||||
"Please install the signatures first: "
|
||||
@@ -1065,7 +1046,7 @@ def handle_common_args(args):
|
||||
)
|
||||
raise IOError(f"signatures path {sigs_path} does not exist or cannot be accessed")
|
||||
else:
|
||||
sigs_path = args.signatures
|
||||
sigs_path = Path(args.signatures)
|
||||
logger.debug("using signatures path: %s", sigs_path)
|
||||
|
||||
args.signatures = sigs_path
|
||||
@@ -1118,7 +1099,7 @@ def main(argv=None):
|
||||
return ret
|
||||
|
||||
try:
|
||||
_ = get_file_taste(args.sample)
|
||||
_ = get_file_taste(Path(args.sample))
|
||||
except IOError as e:
|
||||
# per our research there's not a programmatic way to render the IOError with non-ASCII filename unless we
|
||||
# handle the IOError separately and reach into the args
|
||||
@@ -1138,7 +1119,7 @@ def main(argv=None):
|
||||
|
||||
try:
|
||||
if is_running_standalone() and args.is_default_rules:
|
||||
cache_dir = os.path.join(get_default_root(), "cache")
|
||||
cache_dir = get_default_root() / "cache"
|
||||
else:
|
||||
cache_dir = capa.rules.cache.get_default_cache_directory()
|
||||
|
||||
@@ -1223,8 +1204,7 @@ def main(argv=None):
|
||||
|
||||
if format_ == FORMAT_FREEZE:
|
||||
# freeze format deserializes directly into an extractor
|
||||
with open(args.sample, "rb") as f:
|
||||
extractor = frz.load(f.read())
|
||||
extractor = frz.load(Path(args.sample).read_bytes())
|
||||
else:
|
||||
# all other formats we must create an extractor,
|
||||
# such as viv, binary ninja, etc. workspaces
|
||||
@@ -1312,7 +1292,7 @@ def ida_main():
|
||||
logger.debug(" https://github.com/mandiant/capa-rules")
|
||||
logger.debug("-" * 80)
|
||||
|
||||
rules_path = os.path.join(get_default_root(), "rules")
|
||||
rules_path = get_default_root() / "rules"
|
||||
logger.debug("rule path: %s", rules_path)
|
||||
rules = get_rules([rules_path])
|
||||
|
||||
|
||||
@@ -136,7 +136,8 @@ def metadata_to_pb2(meta: rd.Metadata) -> capa_pb2.Metadata:
|
||||
arch=meta.analysis.arch,
|
||||
os=meta.analysis.os,
|
||||
extractor=meta.analysis.extractor,
|
||||
rules=meta.analysis.rules,
|
||||
# TODO convert analysis.rule type to Path in capa_pb2.Metadata
|
||||
rules=list(str(rule) for rule in meta.analysis.rules),
|
||||
base_address=addr_to_pb2(meta.analysis.base_address),
|
||||
layout=capa_pb2.Layout(
|
||||
functions=[
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
import datetime
|
||||
import collections
|
||||
from typing import Any, Dict, List, Tuple, Union, Optional
|
||||
from pathlib import Path
|
||||
|
||||
from pydantic import Field, BaseModel
|
||||
|
||||
@@ -73,7 +74,7 @@ class Analysis(Model):
|
||||
arch: str
|
||||
os: str
|
||||
extractor: str
|
||||
rules: Tuple[str, ...]
|
||||
rules: Tuple[Path, ...]
|
||||
base_address: frz.Address
|
||||
layout: Layout
|
||||
feature_counts: FeatureCounts
|
||||
|
||||
@@ -91,7 +91,7 @@ def render_meta(ostream, doc: rd.ResultDocument):
|
||||
("arch", doc.meta.analysis.arch),
|
||||
("extractor", doc.meta.analysis.extractor),
|
||||
("base address", format_address(doc.meta.analysis.base_address)),
|
||||
("rules", "\n".join(doc.meta.analysis.rules)),
|
||||
("rules", "\n".join(tuple(str(rule) for rule in doc.meta.analysis.rules))),
|
||||
("function count", len(doc.meta.analysis.feature_counts.functions)),
|
||||
("library function count", len(doc.meta.analysis.library_functions)),
|
||||
(
|
||||
|
||||
@@ -3,8 +3,8 @@ import zlib
|
||||
import pickle
|
||||
import hashlib
|
||||
import logging
|
||||
import os.path
|
||||
from typing import List, Optional
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass
|
||||
|
||||
import capa.rules
|
||||
@@ -36,7 +36,7 @@ def compute_cache_identifier(rule_content: List[bytes]) -> CacheIdentifier:
|
||||
return hash.hexdigest()
|
||||
|
||||
|
||||
def get_default_cache_directory() -> str:
|
||||
def get_default_cache_directory() -> Path:
|
||||
# ref: https://github.com/mandiant/capa/issues/1212#issuecomment-1361259813
|
||||
#
|
||||
# Linux: $XDG_CACHE_HOME/capa/
|
||||
@@ -45,22 +45,22 @@ def get_default_cache_directory() -> str:
|
||||
|
||||
# ref: https://stackoverflow.com/a/8220141/87207
|
||||
if sys.platform == "linux" or sys.platform == "linux2":
|
||||
directory = os.environ.get("XDG_CACHE_HOME", os.path.join(os.environ["HOME"], ".cache", "capa"))
|
||||
directory = Path(os.environ.get("XDG_CACHE_HOME", Path.home() / ".cache" / "capa"))
|
||||
elif sys.platform == "darwin":
|
||||
directory = os.path.join(os.environ["HOME"], "Library", "Caches", "capa")
|
||||
directory = Path.home() / "Library" / "Caches" / "capa"
|
||||
elif sys.platform == "win32":
|
||||
directory = os.path.join(os.environ["LOCALAPPDATA"], "flare", "capa", "cache")
|
||||
directory = Path(os.environ["LOCALAPPDATA"]) / "flare" / "capa" / "cache"
|
||||
else:
|
||||
raise NotImplementedError(f"unsupported platform: {sys.platform}")
|
||||
|
||||
os.makedirs(directory, exist_ok=True)
|
||||
directory.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
return directory
|
||||
|
||||
|
||||
def get_cache_path(cache_dir: str, id: CacheIdentifier) -> str:
|
||||
def get_cache_path(cache_dir: Path, id: CacheIdentifier) -> Path:
|
||||
filename = "capa-" + id[:8] + ".cache"
|
||||
return os.path.join(cache_dir, filename)
|
||||
return cache_dir / filename
|
||||
|
||||
|
||||
MAGIC = b"capa"
|
||||
@@ -102,30 +102,29 @@ def compute_ruleset_cache_identifier(ruleset: capa.rules.RuleSet) -> CacheIdenti
|
||||
return compute_cache_identifier(rule_contents)
|
||||
|
||||
|
||||
def cache_ruleset(cache_dir: str, ruleset: capa.rules.RuleSet):
|
||||
def cache_ruleset(cache_dir: Path, ruleset: capa.rules.RuleSet):
|
||||
"""
|
||||
cache the given ruleset to disk, using the given cache directory.
|
||||
this can subsequently be reloaded via `load_cached_ruleset`,
|
||||
Cache the given ruleset to disk, using the given cache directory.
|
||||
This can subsequently be reloaded via `load_cached_ruleset`,
|
||||
assuming the capa version and rule content does not change.
|
||||
|
||||
callers should use this function to avoid the performance overhead
|
||||
Callers should use this function to avoid the performance overhead
|
||||
of validating rules on each run.
|
||||
"""
|
||||
id = compute_ruleset_cache_identifier(ruleset)
|
||||
path = get_cache_path(cache_dir, id)
|
||||
if os.path.exists(path):
|
||||
logger.debug("rule set already cached to %s", path)
|
||||
if path.exists():
|
||||
logger.debug("Rule set already cached to %s", path)
|
||||
return
|
||||
|
||||
cache = RuleCache(id, ruleset)
|
||||
with open(path, "wb") as f:
|
||||
f.write(cache.dump())
|
||||
path.write_bytes(cache.dump())
|
||||
|
||||
logger.debug("rule set cached to %s", path)
|
||||
logger.debug("Rule set cached to %s", path)
|
||||
return
|
||||
|
||||
|
||||
def load_cached_ruleset(cache_dir: str, rule_contents: List[bytes]) -> Optional[capa.rules.RuleSet]:
|
||||
def load_cached_ruleset(cache_dir: Path, rule_contents: List[bytes]) -> Optional[capa.rules.RuleSet]:
|
||||
"""
|
||||
load a cached ruleset from disk, using the given cache directory.
|
||||
the raw rule contents are required here to prove that the rules haven't changed
|
||||
@@ -136,20 +135,19 @@ def load_cached_ruleset(cache_dir: str, rule_contents: List[bytes]) -> Optional[
|
||||
"""
|
||||
id = compute_cache_identifier(rule_contents)
|
||||
path = get_cache_path(cache_dir, id)
|
||||
if not os.path.exists(path):
|
||||
if not path.exists():
|
||||
logger.debug("rule set cache does not exist: %s", path)
|
||||
return None
|
||||
|
||||
logger.debug("loading rule set from cache: %s", path)
|
||||
with open(path, "rb") as f:
|
||||
buf = f.read()
|
||||
buf = path.read_bytes()
|
||||
|
||||
try:
|
||||
cache = RuleCache.load(buf)
|
||||
except AssertionError:
|
||||
logger.debug("rule set cache is invalid: %s", path)
|
||||
# delete the cache that seems to be invalid.
|
||||
os.remove(path)
|
||||
path.unlink()
|
||||
return None
|
||||
else:
|
||||
return cache.ruleset
|
||||
|
||||
@@ -37,6 +37,7 @@ import logging
|
||||
import argparse
|
||||
import datetime
|
||||
import itertools
|
||||
from pathlib import Path
|
||||
|
||||
import capa.main
|
||||
import capa.rules
|
||||
@@ -711,7 +712,7 @@ def main(argv=None):
|
||||
logging.getLogger("capa2yara").setLevel(level)
|
||||
|
||||
try:
|
||||
rules = capa.main.get_rules([args.rules])
|
||||
rules = capa.main.get_rules([Path(args.rules)])
|
||||
namespaces = capa.rules.index_rules_by_namespace(list(rules.rules.values()))
|
||||
logger.info("successfully loaded %s rules (including subscope rules which will be ignored)", len(rules))
|
||||
if args.tag:
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
import json
|
||||
import collections
|
||||
from typing import Any, Dict
|
||||
from pathlib import Path
|
||||
|
||||
import capa.main
|
||||
import capa.rules
|
||||
@@ -171,7 +172,7 @@ def capa_details(rules_path, file_path, output_format="dictionary"):
|
||||
capabilities, counts = capa.main.find_capabilities(rules, extractor, disable_progress=True)
|
||||
|
||||
# collect metadata (used only to make rendering more complete)
|
||||
meta = capa.main.collect_metadata([], file_path, FORMAT_AUTO, OS_AUTO, rules_path, extractor)
|
||||
meta = capa.main.collect_metadata([], file_path, FORMAT_AUTO, OS_AUTO, [rules_path], extractor)
|
||||
|
||||
meta.analysis.feature_counts = counts["feature_counts"]
|
||||
meta.analysis.library_functions = counts["library_functions"]
|
||||
@@ -199,11 +200,11 @@ if __name__ == "__main__":
|
||||
import os.path
|
||||
import argparse
|
||||
|
||||
RULES_PATH = os.path.join(os.path.dirname(__file__), "..", "rules")
|
||||
RULES_PATH = capa.main.get_default_root() / "rules"
|
||||
|
||||
parser = argparse.ArgumentParser(description="Extract capabilities from a file")
|
||||
parser.add_argument("file", help="file to extract capabilities from")
|
||||
parser.add_argument("--rules", help="path to rules directory", default=os.path.abspath(RULES_PATH))
|
||||
parser.add_argument("--rules", help="path to rules directory", default=RULES_PATH)
|
||||
parser.add_argument(
|
||||
"--output", help="output format", choices=["dictionary", "json", "texttable"], default="dictionary"
|
||||
)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import sys
|
||||
import logging
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import capa.main
|
||||
import capa.rules
|
||||
@@ -89,7 +90,7 @@ def main():
|
||||
args = parser.parse_args()
|
||||
|
||||
new_rule_path = args.new_rule
|
||||
rules_path = args.rules
|
||||
rules_path = [Path(rule) for rule in args.rules]
|
||||
|
||||
result = find_overlapping_rules(new_rule_path, rules_path)
|
||||
|
||||
|
||||
@@ -34,6 +34,7 @@ import timeit
|
||||
import logging
|
||||
import argparse
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import tqdm
|
||||
import tabulate
|
||||
@@ -81,7 +82,7 @@ def main(argv=None):
|
||||
capa.main.handle_common_args(args)
|
||||
|
||||
try:
|
||||
taste = capa.helpers.get_file_taste(args.sample)
|
||||
taste = capa.helpers.get_file_taste(Path(args.sample))
|
||||
except IOError as e:
|
||||
logger.error("%s", str(e))
|
||||
return -1
|
||||
|
||||
@@ -54,6 +54,7 @@ import logging
|
||||
import argparse
|
||||
import collections
|
||||
from typing import Dict
|
||||
from pathlib import Path
|
||||
|
||||
import colorama
|
||||
|
||||
@@ -136,7 +137,7 @@ def main(argv=None):
|
||||
capa.main.handle_common_args(args)
|
||||
|
||||
try:
|
||||
taste = get_file_taste(args.sample)
|
||||
taste = get_file_taste(Path(args.sample))
|
||||
except IOError as e:
|
||||
logger.error("%s", str(e))
|
||||
return -1
|
||||
|
||||
@@ -67,8 +67,8 @@ Example::
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
import os.path
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import capa.main
|
||||
import capa.rules
|
||||
@@ -102,7 +102,7 @@ def main(argv=None):
|
||||
capa.main.handle_common_args(args)
|
||||
|
||||
try:
|
||||
taste = capa.helpers.get_file_taste(args.sample)
|
||||
taste = capa.helpers.get_file_taste(Path(args.sample))
|
||||
except IOError as e:
|
||||
logger.error("%s", str(e))
|
||||
return -1
|
||||
|
||||
8
setup.py
8
setup.py
@@ -7,6 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import setuptools
|
||||
|
||||
@@ -34,14 +35,11 @@ requirements = [
|
||||
# this sets __version__
|
||||
# via: http://stackoverflow.com/a/7071358/87207
|
||||
# and: http://stackoverflow.com/a/2073599/87207
|
||||
with open(os.path.join("capa", "version.py"), "r") as f:
|
||||
exec(f.read())
|
||||
exec(Path("capa/version.py").read_text())
|
||||
|
||||
|
||||
# via: https://packaging.python.org/guides/making-a-pypi-friendly-readme/
|
||||
this_directory = os.path.abspath(os.path.dirname(__file__))
|
||||
with open(os.path.join(this_directory, "README.md"), "r") as f:
|
||||
long_description = f.read()
|
||||
long_description = (Path(__file__).resolve().parent / "README.md").read_text()
|
||||
|
||||
|
||||
setuptools.setup(
|
||||
|
||||
@@ -14,6 +14,7 @@ import itertools
|
||||
import contextlib
|
||||
import collections
|
||||
from typing import Set, Dict
|
||||
from pathlib import Path
|
||||
from functools import lru_cache
|
||||
|
||||
import pytest
|
||||
@@ -44,9 +45,9 @@ from capa.features.address import Address
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
||||
from capa.features.extractors.dnfile.extractor import DnfileFeatureExtractor
|
||||
|
||||
CD = os.path.dirname(__file__)
|
||||
DOTNET_DIR = os.path.join(CD, "data", "dotnet")
|
||||
DNFILE_TESTFILES = os.path.join(DOTNET_DIR, "dnfile-testfiles")
|
||||
CD = Path(__file__).resolve().parent
|
||||
DOTNET_DIR = CD / "data" / "dotnet"
|
||||
DNFILE_TESTFILES = DOTNET_DIR / "dnfile-testfiles"
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
@@ -94,11 +95,11 @@ def get_viv_extractor(path):
|
||||
import capa.features.extractors.viv.extractor
|
||||
|
||||
sigpaths = [
|
||||
os.path.join(CD, "data", "sigs", "test_aulldiv.pat"),
|
||||
os.path.join(CD, "data", "sigs", "test_aullrem.pat.gz"),
|
||||
os.path.join(CD, "..", "sigs", "1_flare_msvc_rtf_32_64.sig"),
|
||||
os.path.join(CD, "..", "sigs", "2_flare_msvc_atlmfc_32_64.sig"),
|
||||
os.path.join(CD, "..", "sigs", "3_flare_common_libs.sig"),
|
||||
CD / "data" / "sigs" / "test_aulldiv.pat",
|
||||
CD / "data" / "sigs" / "test_aullrem.pat.gz",
|
||||
CD.parent / "sigs" / "1_flare_msvc_rtf_32_64.sig",
|
||||
CD.parent / "sigs" / "2_flare_msvc_atlmfc_32_64.sig",
|
||||
CD.parent / "sigs" / "3_flare_common_libs.sig",
|
||||
]
|
||||
|
||||
if "raw32" in path:
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import fixtures
|
||||
from fixtures import *
|
||||
@@ -52,8 +53,8 @@ def test_binja_feature_counts(sample, scope, feature, expected):
|
||||
|
||||
@pytest.mark.skipif(binja_present is False, reason="Skip binja tests if the binaryninja Python API is not installed")
|
||||
def test_standalone_binja_backend():
|
||||
CD = os.path.dirname(__file__)
|
||||
test_path = os.path.join(CD, "..", "tests", "data", "Practical Malware Analysis Lab 01-01.exe_")
|
||||
CD = Path(__file__).resolve().parent
|
||||
test_path = CD / ".." / "tests" / "data" / "Practical Malware Analysis Lab 01-01.exe_"
|
||||
assert capa.main.main([test_path, "-b", capa.main.BACKEND_BINJA]) == 0
|
||||
|
||||
|
||||
|
||||
@@ -46,14 +46,14 @@ import io
|
||||
import sys
|
||||
import inspect
|
||||
import logging
|
||||
import os.path
|
||||
import binascii
|
||||
import traceback
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
try:
|
||||
sys.path.append(os.path.dirname(__file__))
|
||||
sys.path.append(str(Path(__file__).parent))
|
||||
import fixtures
|
||||
from fixtures import *
|
||||
finally:
|
||||
|
||||
@@ -144,7 +144,7 @@ def assert_meta(meta: rd.Metadata, dst: capa_pb2.Metadata):
|
||||
assert meta.analysis.arch == dst.analysis.arch
|
||||
assert meta.analysis.os == dst.analysis.os
|
||||
assert meta.analysis.extractor == dst.analysis.extractor
|
||||
assert list(meta.analysis.rules) == dst.analysis.rules
|
||||
assert list(str(r) for r in meta.analysis.rules) == dst.analysis.rules
|
||||
assert capa.render.proto.addr_to_pb2(meta.analysis.base_address) == dst.analysis.base_address
|
||||
|
||||
assert len(meta.analysis.layout.functions) == len(dst.analysis.layout.functions)
|
||||
|
||||
@@ -76,12 +76,12 @@ def test_ruleset_cache_save_load():
|
||||
|
||||
path = capa.rules.cache.get_cache_path(cache_dir, id)
|
||||
try:
|
||||
os.remove(path)
|
||||
path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
capa.rules.cache.cache_ruleset(cache_dir, rs)
|
||||
assert os.path.exists(path)
|
||||
assert path.exists()
|
||||
|
||||
assert capa.rules.cache.load_cached_ruleset(cache_dir, content) is not None
|
||||
|
||||
@@ -93,23 +93,23 @@ def test_ruleset_cache_invalid():
|
||||
cache_dir = capa.rules.cache.get_default_cache_directory()
|
||||
path = capa.rules.cache.get_cache_path(cache_dir, id)
|
||||
try:
|
||||
os.remove(path)
|
||||
path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
capa.rules.cache.cache_ruleset(cache_dir, rs)
|
||||
assert os.path.exists(path)
|
||||
assert path.exists()
|
||||
|
||||
with open(path, "rb") as f:
|
||||
buf = f.read()
|
||||
buf = path.read_bytes()
|
||||
|
||||
# corrupt the magic header
|
||||
# Corrupt the magic header
|
||||
buf = b"x" + buf[1:]
|
||||
|
||||
with open(path, "wb") as f:
|
||||
f.write(buf)
|
||||
# Write the modified contents back to the file
|
||||
path.write_bytes(buf)
|
||||
|
||||
assert os.path.exists(path)
|
||||
# Check if the file still exists
|
||||
assert path.exists()
|
||||
assert capa.rules.cache.load_cached_ruleset(cache_dir, content) is None
|
||||
# the invalid cache should be deleted
|
||||
assert not os.path.exists(path)
|
||||
assert not path.exists()
|
||||
|
||||
@@ -10,27 +10,28 @@ import os
|
||||
import sys
|
||||
import textwrap
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from fixtures import *
|
||||
|
||||
CD = os.path.dirname(__file__)
|
||||
CD = Path(__file__).resolve().parent
|
||||
|
||||
|
||||
def get_script_path(s):
|
||||
return os.path.join(CD, "..", "scripts", s)
|
||||
return CD / ".." / "scripts" / s
|
||||
|
||||
|
||||
def get_file_path():
|
||||
return os.path.join(CD, "data", "9324d1a8ae37a36ae560c37448c9705a.exe_")
|
||||
return CD / "data" / "9324d1a8ae37a36ae560c37448c9705a.exe_"
|
||||
|
||||
|
||||
def get_rules_path():
|
||||
return os.path.join(CD, "..", "rules")
|
||||
return CD / ".." / "rules"
|
||||
|
||||
|
||||
def get_rule_path():
|
||||
return os.path.join(get_rules_path(), "lib", "allocate-memory.yml")
|
||||
return get_rules_path() / "lib" / "allocate-memory.yml"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
@@ -52,13 +53,17 @@ def test_scripts(script, args):
|
||||
assert p.returncode == 0
|
||||
|
||||
|
||||
def test_bulk_process(tmpdir):
|
||||
def test_bulk_process(tmp_path):
|
||||
# create test directory to recursively analyze
|
||||
t = tmpdir.mkdir("test")
|
||||
with open(os.path.join(CD, "data", "ping_täst.exe_"), "rb") as f:
|
||||
t.join("test.exe_").write_binary(f.read())
|
||||
t = tmp_path / "test"
|
||||
t.mkdir()
|
||||
|
||||
p = run_program(get_script_path("bulk-process.py"), [t.dirname])
|
||||
source_file = Path(__file__).resolve().parent / "data" / "ping_täst.exe_"
|
||||
dest_file = t / "test.exe_"
|
||||
|
||||
dest_file.write_bytes(source_file.read_bytes())
|
||||
|
||||
p = run_program(get_script_path("bulk-process.py"), [t.parent])
|
||||
assert p.returncode == 0
|
||||
|
||||
|
||||
@@ -68,19 +73,18 @@ def run_program(script_path, args):
|
||||
return subprocess.run(args, stdout=subprocess.PIPE)
|
||||
|
||||
|
||||
def test_proto_conversion(tmpdir):
|
||||
t = tmpdir.mkdir("proto-test")
|
||||
def test_proto_conversion(tmp_path):
|
||||
t = tmp_path / "proto-test"
|
||||
t.mkdir()
|
||||
json_file = Path(__file__).resolve().parent / "data" / "rd" / "Practical Malware Analysis Lab 01-01.dll_.json"
|
||||
|
||||
json = os.path.join(CD, "data", "rd", "Practical Malware Analysis Lab 01-01.dll_.json")
|
||||
|
||||
p = run_program(get_script_path("proto-from-results.py"), [json])
|
||||
p = run_program(get_script_path("proto-from-results.py"), [json_file])
|
||||
assert p.returncode == 0
|
||||
|
||||
pb = os.path.join(t, "pma.pb")
|
||||
with open(pb, "wb") as f:
|
||||
f.write(p.stdout)
|
||||
pb_file = t / "pma.pb"
|
||||
pb_file.write_bytes(p.stdout)
|
||||
|
||||
p = run_program(get_script_path("proto-to-results.py"), [pb])
|
||||
p = run_program(get_script_path("proto-to-results.py"), [pb_file])
|
||||
assert p.returncode == 0
|
||||
|
||||
assert p.stdout.startswith(b'{\n "meta": ') or p.stdout.startswith(b'{\r\n "meta": ')
|
||||
|
||||
Reference in New Issue
Block a user