check rule format consistency

2025-12-23 15:37:37 -08:00 · 2021-01-26 18:42:47 +01:00
parent 9406e3dbfb
commit e3055bc740
3 changed files with 63 additions and 11 deletions
--- a/capa/rules.py
+++ b/capa/rules.py
@@ -614,16 +614,20 @@ class Rule(object):
        return y
    @classmethod
-    def from_yaml(cls, s):
+    def from_yaml(cls, s, use_ruamel=False):
-        # use pyyaml because it can be much faster than ruamel (pure python)
+        if use_ruamel:
-        doc = yaml.load(s, Loader=cls._get_yaml_loader())
+            # ruamel enables nice formatting and doc roundtripping with comments
            doc = cls._get_ruamel_yaml_parser().load(s)
        else:
            # use pyyaml because it can be much faster than ruamel (pure python)
            doc = yaml.load(s, Loader=cls._get_yaml_loader())
        return cls.from_dict(doc, s)
    @classmethod
-    def from_yaml_file(cls, path):
+    def from_yaml_file(cls, path, use_ruamel=False):
        with open(path, "rb") as f:
            try:
-                return cls.from_yaml(f.read().decode("utf-8"))
+                return cls.from_yaml(f.read().decode("utf-8"), use_ruamel=use_ruamel)
            except InvalidRule as e:
                raise InvalidRuleWithPath(path, str(e))
@@ -716,7 +720,10 @@ class Rule(object):
        # tweaking `ruamel.indent()` doesn't quite give us the control we want.
        # so, add the two extra spaces that we've determined we need through experimentation.
        # see #263
-        doc = doc.replace("  description:", "    description:")
+        # only do this for the features section, so the meta description doesn't get reformatted
        # assumes features section always exists
        features_offset = doc.find("features")
        doc = doc[:features_offset] + doc[features_offset:].replace("  description:", "    description:")
        return doc
--- a/scripts/capafmt.py
+++ b/scripts/capafmt.py
@@ -50,7 +50,7 @@ def main(argv=None):
    logging.basicConfig(level=level)
    logging.getLogger("capafmt").setLevel(level)
-    rule = capa.rules.Rule.from_yaml_file(args.path)
+    rule = capa.rules.Rule.from_yaml_file(args.path, use_ruamel=True)
    if args.in_place:
        with open(args.path, "wb") as f:
            f.write(rule.to_yaml().encode("utf-8"))
--- a/scripts/lint.py
+++ b/scripts/lint.py
@@ -17,6 +17,7 @@ import os
 import sys
 import time
 import string
 import difflib
 import hashlib
 import logging
 import os.path
@@ -25,6 +26,7 @@ import itertools
 import posixpath
 import capa.main
 import capa.rules
 import capa.engine
 import capa.features
 import capa.features.insn
@@ -277,6 +279,38 @@ class FeatureNegativeNumber(Lint):
        return False
 class FormatSingleEmptyLineEOF(Lint):
    name = "EOF format"
    recommendation = "end file with a single empty line"
    def check_rule(self, ctx, rule):
        if rule.definition.endswith("\n") and not rule.definition.endswith("\n\n"):
            return False
        return True
 class FormatIncorrect(Lint):
    name = "rule format incorrect"
    recommendation_template = "use scripts/capafmt.py or adjust as follows\n{:s}"
    def check_rule(self, ctx, rule):
        actual = rule.definition
        expected = capa.rules.Rule.from_yaml(rule.definition, use_ruamel=True).to_yaml()
        # ignore different quote characters
        actual = actual.replace("'", '"')
        expected = expected.replace("'", '"')
        diff = list(difflib.ndiff(actual.splitlines(1), expected.splitlines(1)))
        # deltas begin with two-letter code; "  " means common line
        difflen = len(list(filter(lambda l: not l.startswith("  "), diff)))
        if difflen > 0:
            self.recommendation = self.recommendation_template.format("".join(diff))
            return True
        return False
 def run_lints(lints, ctx, rule):
    for lint in lints:
        if lint.check_rule(ctx, rule):
@@ -332,15 +366,25 @@ FEATURE_LINTS = (
 )
 def get_normpath(path):
    return posixpath.normpath(path).replace(os.sep, "/")
 def lint_features(ctx, rule):
    features = get_features(ctx, rule)
    return run_feature_lints(FEATURE_LINTS, ctx, features)
 FORMAT_LINTS = (
    FormatSingleEmptyLineEOF(),
    FormatIncorrect(),
 )
 def lint_format(ctx, rule):
    return run_lints(FORMAT_LINTS, ctx, rule)
 def get_normpath(path):
    return posixpath.normpath(path).replace(os.sep, "/")
 def get_features(ctx, rule):
    # get features from rule and all dependencies including subscopes and matched rules
    features = []
@@ -391,6 +435,7 @@ def lint_rule(ctx, rule):
            lint_meta(ctx, rule),
            lint_logic(ctx, rule),
            lint_features(ctx, rule),
            lint_format(ctx, rule),
        )
    )