""" Check the given capa rules for style issues. Usage: $ python scripts/lint.py rules/ """ import os import os.path import sys import string import hashlib import logging import os.path import itertools import posixpath import argparse import capa.main import capa.engine import capa.features import capa.features.insn logger = logging.getLogger("capa.lint") class Lint(object): name = "lint" recommendation = "" def check_rule(self, ctx, rule): return False class NameCasing(Lint): name = "rule name casing" recommendation = "Rename rule using to start with lower case letters" def check_rule(self, ctx, rule): return rule.name[0] in string.ascii_uppercase and rule.name[1] not in string.ascii_uppercase class FilenameDoesntMatchRuleName(Lint): name = "filename doesn't match the rule name" recommendation = 'Rename rule file to match the rule name, expected: "{:s}", found: "{:s}"' def check_rule(self, ctx, rule): expected = rule.name expected = expected.lower() expected = expected.replace(" ", "-") expected = expected.replace("(", "") expected = expected.replace(")", "") expected = expected.replace("+", "") expected = expected.replace("/", "") expected = expected + ".yml" found = os.path.basename(rule.meta["capa/path"]) self.recommendation = self.recommendation.format(expected, found) return expected != found class MissingNamespace(Lint): name = "missing rule namespace" recommendation = "Add meta.namespace so that the rule is emitted correctly" def check_rule(self, ctx, rule): return ( "namespace" not in rule.meta and not is_nursery_rule(rule) and "maec/malware-category" not in rule.meta and "lib" not in rule.meta ) class NamespaceDoesntMatchRulePath(Lint): name = "file path doesn't match rule namespace" recommendation = "Move rule to appropriate directory or update the namespace" def check_rule(self, ctx, rule): # let the other lints catch namespace issues if "namespace" not in rule.meta: return False if is_nursery_rule(rule): return False if "maec/malware-category" in rule.meta: return False if "lib" in rule.meta: return False return rule.meta["namespace"] not in posixpath.normpath(rule.meta["capa/path"]) class MissingScope(Lint): name = "missing scope" recommendation = "Add meta.scope so that the scope is explicit (defaults to `function`)" def check_rule(self, ctx, rule): return "scope" not in rule.meta class InvalidScope(Lint): name = "invalid scope" recommendation = "Use only file, function, or basic block rule scopes" def check_rule(self, ctx, rule): return rule.meta.get("scope") not in ("file", "function", "basic block") class MissingAuthor(Lint): name = "missing author" recommendation = "Add meta.author so that users know who to contact with questions" def check_rule(self, ctx, rule): return "author" not in rule.meta class MissingExamples(Lint): name = "missing examples" recommendation = "Add meta.examples so that the rule can be tested and verified" def check_rule(self, ctx, rule): return ( "examples" not in rule.meta or not isinstance(rule.meta["examples"], list) or len(rule.meta["examples"]) == 0 or rule.meta["examples"] == [None] ) class MissingExampleOffset(Lint): name = "missing example offset" recommendation = "Add offset of example function" def check_rule(self, ctx, rule): if rule.meta.get("scope") in ("function", "basic block"): for example in rule.meta.get("examples", []): if example and ":" not in example: logger.debug("example: %s", example) return True class ExampleFileDNE(Lint): name = "referenced example doesn't exist" recommendation = "Add the referenced example to samples directory ($capa-root/tests/data or supplied via --samples)" def check_rule(self, ctx, rule): if not rule.meta.get("examples"): # let the MissingExamples lint catch this case, don't double report. return False found = False for example in rule.meta.get("examples", []): if example: example_id = example.partition(":")[0] if example_id in ctx["samples"]: found = True break return not found class DoesntMatchExample(Lint): name = "doesn't match on referenced example" recommendation = "Fix the rule logic or provide a different example" def check_rule(self, ctx, rule): if not ctx["is_thorough"]: return False for example in rule.meta.get("examples", []): example_id = example.partition(":")[0] try: path = ctx["samples"][example_id] except KeyError: # lint ExampleFileDNE will catch this. # don't double report. continue try: extractor = capa.main.get_extractor(path, "auto") capabilities = capa.main.find_capabilities(ctx["rules"], extractor, disable_progress=True) except Exception as e: logger.error("failed to extract capabilities: %s %s %s", rule.name, path, e) return True if rule.name not in capabilities: return True class UnusualMetaField(Lint): name = "unusual meta field" recommendation = 'Remove the meta field: "{:s}"' def check_rule(self, ctx, rule): for key in rule.meta.keys(): if key in capa.rules.META_KEYS: continue if key in capa.rules.HIDDEN_META_KEYS: continue self.recommendation = self.recommendation.format(key) return True return False class LibRuleNotInLibDirectory(Lint): name = "lib rule not found in lib directory" recommendation = "Move the rule to the `lib` subdirectory of the rules path" def check_rule(self, ctx, rule): if is_nursery_rule(rule): return False if "lib" not in rule.meta: return False return "/lib/" not in posixpath.normpath(rule.meta["capa/path"]) class LibRuleHasNamespace(Lint): name = "lib rule has a namespace" recommendation = "Remove the namespace from the rule" def check_rule(self, ctx, rule): if "lib" not in rule.meta: return False return "namespace" in rule.meta class FeatureStringTooShort(Lint): name = "feature string too short" recommendation = 'capa only extracts strings with length >= 4; will not match on "{:s}"' def check_features(self, ctx, features): for feature in features: if isinstance(feature, capa.features.String): if len(feature.value) < 4: self.recommendation = self.recommendation.format(feature.value) return True return False class FeatureNegativeNumberOrOffset(Lint): name = "feature value is negative" recommendation = ( "capa treats all numbers as unsigned values; you may specify the number's two's complement " 'representation; will not match on "{:d}"' ) def check_features(self, ctx, features): for feature in features: if isinstance(feature, (capa.features.insn.Number, capa.features.insn.Offset)): if feature.value < 0: self.recommendation = self.recommendation.format(feature.value) return True return False def run_lints(lints, ctx, rule): for lint in lints: if lint.check_rule(ctx, rule): yield lint def run_feature_lints(lints, ctx, features): for lint in lints: if lint.check_features(ctx, features): yield lint NAME_LINTS = ( NameCasing(), FilenameDoesntMatchRuleName(), ) def lint_name(ctx, rule): return run_lints(NAME_LINTS, ctx, rule) SCOPE_LINTS = ( MissingScope(), InvalidScope(), ) def lint_scope(ctx, rule): return run_lints(SCOPE_LINTS, ctx, rule) META_LINTS = ( MissingNamespace(), NamespaceDoesntMatchRulePath(), MissingAuthor(), MissingExamples(), MissingExampleOffset(), ExampleFileDNE(), UnusualMetaField(), LibRuleNotInLibDirectory(), LibRuleHasNamespace(), ) def lint_meta(ctx, rule): return run_lints(META_LINTS, ctx, rule) FEATURE_LINTS = ( FeatureStringTooShort(), FeatureNegativeNumberOrOffset(), ) def lint_features(ctx, rule): features = get_features(ctx, rule) return run_feature_lints(FEATURE_LINTS, ctx, features) def get_features(ctx, rule): # get features from rule and all dependencies including subscopes and matched rules features = [] deps = [ctx["rules"].rules[dep] for dep in rule.get_dependencies()] for r in [rule] + deps: features.extend(get_rule_features(r)) return features def get_rule_features(rule): features = [] def rec(statement): if isinstance(statement, capa.engine.Statement): for child in statement.get_children(): rec(child) else: features.append(statement) rec(rule.statement) return features LOGIC_LINTS = (DoesntMatchExample(),) def lint_logic(ctx, rule): return run_lints(LOGIC_LINTS, ctx, rule) def is_nursery_rule(rule): """ The nursery is a spot for rules that have not yet been fully polished. For example, they may not have references to public example of a technique. Yet, we still want to capture and report on their matches. """ return rule.meta.get("capa/nursery") def lint_rule(ctx, rule): logger.debug(rule.name) violations = list( itertools.chain( lint_name(ctx, rule), lint_scope(ctx, rule), lint_meta(ctx, rule), lint_logic(ctx, rule), lint_features(ctx, rule), ) ) if len(violations) > 0: category = rule.meta.get("rule-category") print("") print( "%s%s %s" % (" (nursery) " if is_nursery_rule(rule) else "", rule.name, ("(%s)" % category) if category else "",) ) level = "WARN" if is_nursery_rule(rule) else "FAIL" for violation in violations: print( "%s %s: %s: %s" % (" " if is_nursery_rule(rule) else "", level, violation.name, violation.recommendation,) ) return len(violations) > 0 and not is_nursery_rule(rule) def lint(ctx, rules): """ Args: samples (Dict[string, string]): map from sample id to path. for each sample, record sample id of sha256, md5, and filename. see `collect_samples(path)`. rules (List[Rule]): the rules to lint. """ did_suggest_fix = False for rule in rules.rules.values(): if rule.meta.get("capa/subscope-rule", False): continue did_suggest_fix = lint_rule(ctx, rule) or did_suggest_fix return did_suggest_fix def collect_samples(path): """ recurse through the given path, collecting all file paths, indexed by their content sha256, md5, and filename. """ samples = {} for root, dirs, files in os.walk(path): for name in files: if name.endswith(".viv"): continue if name.endswith(".idb"): continue if name.endswith(".i64"): continue if name.endswith(".frz"): continue if name.endswith(".fnames"): continue path = os.path.join(root, name) try: with open(path, "rb") as f: buf = f.read() except IOError: continue sha256 = hashlib.sha256() sha256.update(buf) md5 = hashlib.md5() md5.update(buf) samples[sha256.hexdigest().lower()] = path samples[sha256.hexdigest().upper()] = path samples[md5.hexdigest().lower()] = path samples[md5.hexdigest().upper()] = path samples[name] = path return samples def main(argv=None): if argv is None: argv = sys.argv[1:] samples_path = os.path.join(os.path.dirname(__file__), "..", "tests", "data") parser = argparse.ArgumentParser(description="A program.") parser.add_argument("rules", type=str, help="Path to rules") parser.add_argument("--samples", type=str, default=samples_path, help="Path to samples") parser.add_argument( "--thorough", action="store_true", help="Enable thorough linting - takes more time, but does a better job", ) parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging") parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors") args = parser.parse_args(args=argv) if args.verbose: level = logging.DEBUG elif args.quiet: level = logging.ERROR else: level = logging.INFO logging.basicConfig(level=level) logging.getLogger("capa.lint").setLevel(level) capa.main.set_vivisect_log_level(logging.CRITICAL) logging.getLogger("capa").setLevel(logging.CRITICAL) try: rules = capa.main.get_rules(args.rules) rules = capa.rules.RuleSet(rules) logger.info("successfully loaded %s rules", len(rules)) except IOError as e: logger.error("%s", str(e)) return -1 except capa.rules.InvalidRule as e: logger.error("%s", str(e)) return -1 logger.info("collecting potentially referenced samples") if not os.path.exists(args.samples): logger.error("samples path %s does not exist", args.samples) return -1 samples = collect_samples(args.samples) ctx = { "samples": samples, "rules": rules, "is_thorough": args.thorough, } did_violate = lint(ctx, rules) if not did_violate: logger.info("no suggestions, nice!") return 0 else: return 1 if __name__ == "__main__": sys.exit(main())