From e3055bc7406fe7b0f397926919b54573e21c76c0 Mon Sep 17 00:00:00 2001 From: Moritz Raabe Date: Tue, 26 Jan 2021 18:42:47 +0100 Subject: [PATCH] check rule format consistency --- capa/rules.py | 19 +++++++++++------ scripts/capafmt.py | 2 +- scripts/lint.py | 53 ++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 63 insertions(+), 11 deletions(-) diff --git a/capa/rules.py b/capa/rules.py index 09929636..5a27a979 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -614,16 +614,20 @@ class Rule(object): return y @classmethod - def from_yaml(cls, s): - # use pyyaml because it can be much faster than ruamel (pure python) - doc = yaml.load(s, Loader=cls._get_yaml_loader()) + def from_yaml(cls, s, use_ruamel=False): + if use_ruamel: + # ruamel enables nice formatting and doc roundtripping with comments + doc = cls._get_ruamel_yaml_parser().load(s) + else: + # use pyyaml because it can be much faster than ruamel (pure python) + doc = yaml.load(s, Loader=cls._get_yaml_loader()) return cls.from_dict(doc, s) @classmethod - def from_yaml_file(cls, path): + def from_yaml_file(cls, path, use_ruamel=False): with open(path, "rb") as f: try: - return cls.from_yaml(f.read().decode("utf-8")) + return cls.from_yaml(f.read().decode("utf-8"), use_ruamel=use_ruamel) except InvalidRule as e: raise InvalidRuleWithPath(path, str(e)) @@ -716,7 +720,10 @@ class Rule(object): # tweaking `ruamel.indent()` doesn't quite give us the control we want. # so, add the two extra spaces that we've determined we need through experimentation. # see #263 - doc = doc.replace(" description:", " description:") + # only do this for the features section, so the meta description doesn't get reformatted + # assumes features section always exists + features_offset = doc.find("features") + doc = doc[:features_offset] + doc[features_offset:].replace(" description:", " description:") return doc diff --git a/scripts/capafmt.py b/scripts/capafmt.py index 236c464e..bc2ce72d 100644 --- a/scripts/capafmt.py +++ b/scripts/capafmt.py @@ -50,7 +50,7 @@ def main(argv=None): logging.basicConfig(level=level) logging.getLogger("capafmt").setLevel(level) - rule = capa.rules.Rule.from_yaml_file(args.path) + rule = capa.rules.Rule.from_yaml_file(args.path, use_ruamel=True) if args.in_place: with open(args.path, "wb") as f: f.write(rule.to_yaml().encode("utf-8")) diff --git a/scripts/lint.py b/scripts/lint.py index 97f5f562..139c57c5 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -17,6 +17,7 @@ import os import sys import time import string +import difflib import hashlib import logging import os.path @@ -25,6 +26,7 @@ import itertools import posixpath import capa.main +import capa.rules import capa.engine import capa.features import capa.features.insn @@ -277,6 +279,38 @@ class FeatureNegativeNumber(Lint): return False +class FormatSingleEmptyLineEOF(Lint): + name = "EOF format" + recommendation = "end file with a single empty line" + + def check_rule(self, ctx, rule): + if rule.definition.endswith("\n") and not rule.definition.endswith("\n\n"): + return False + return True + + +class FormatIncorrect(Lint): + name = "rule format incorrect" + recommendation_template = "use scripts/capafmt.py or adjust as follows\n{:s}" + + def check_rule(self, ctx, rule): + actual = rule.definition + expected = capa.rules.Rule.from_yaml(rule.definition, use_ruamel=True).to_yaml() + + # ignore different quote characters + actual = actual.replace("'", '"') + expected = expected.replace("'", '"') + + diff = list(difflib.ndiff(actual.splitlines(1), expected.splitlines(1))) + # deltas begin with two-letter code; " " means common line + difflen = len(list(filter(lambda l: not l.startswith(" "), diff))) + if difflen > 0: + self.recommendation = self.recommendation_template.format("".join(diff)) + return True + + return False + + def run_lints(lints, ctx, rule): for lint in lints: if lint.check_rule(ctx, rule): @@ -332,15 +366,25 @@ FEATURE_LINTS = ( ) -def get_normpath(path): - return posixpath.normpath(path).replace(os.sep, "/") - - def lint_features(ctx, rule): features = get_features(ctx, rule) return run_feature_lints(FEATURE_LINTS, ctx, features) +FORMAT_LINTS = ( + FormatSingleEmptyLineEOF(), + FormatIncorrect(), +) + + +def lint_format(ctx, rule): + return run_lints(FORMAT_LINTS, ctx, rule) + + +def get_normpath(path): + return posixpath.normpath(path).replace(os.sep, "/") + + def get_features(ctx, rule): # get features from rule and all dependencies including subscopes and matched rules features = [] @@ -391,6 +435,7 @@ def lint_rule(ctx, rule): lint_meta(ctx, rule), lint_logic(ctx, rule), lint_features(ctx, rule), + lint_format(ctx, rule), ) )