rules: handle empty or invalid YAML documents in Rule.from_yaml (#2903)

* rules: handle empty or invalid YAML documents in Rule.from_yaml

Empty or whitespace-only .yml files caused a cryptic TypeError in
Rule.from_dict (NoneType not subscriptable) when yaml.load returned None.
This made lint.py abort with a stack trace instead of a clear message.

Add an early guard in Rule.from_yaml that raises InvalidRule with a
descriptive message when the parsed document is None or structurally
invalid.  get_rules() now logs a warning and skips such files so that
scripts/lint.py completes cleanly even when placeholder .yml files
exist in the rules/ or rules/nursery/ directories.

Fixes #2900.

* changelog: add entry for #2900 empty YAML handling

* rules: fix exception check and add get_rules skip test

- Use e.args[0] instead of str(e) to check the error message.
  InvalidRule.__str__ prepends "invalid rule: " so str(e) never
  matched the bare message, causing every InvalidRule to be re-raised.
- Add test_get_rules_skips_empty_yaml to cover the get_rules skip path,
  confirming that an empty file is warned-and-skipped while a valid
  sibling rule is still loaded.

* fix: correct isort import ordering in tests/test_rules.py

Move capa.engine import before capa.rules.cache to satisfy
isort --length-sort ordering.
This commit is contained in:
Devyansh Somvanshi
2026-03-11 02:34:11 +05:30
committed by GitHub
parent 1f4a16cbcc
commit c03d833a84
3 changed files with 39 additions and 1 deletions

View File

@@ -35,6 +35,7 @@
### Bug Fixes ### Bug Fixes
- main: suggest --os flag in unsupported OS error message to help users override ELF OS detection @devs6186 #2577 - main: suggest --os flag in unsupported OS error message to help users override ELF OS detection @devs6186 #2577
- render: escape sample-controlled strings before passing to Rich to prevent MarkupError @devs6186 #2699 - render: escape sample-controlled strings before passing to Rich to prevent MarkupError @devs6186 #2699
- rules: handle empty or invalid YAML documents gracefully in `Rule.from_yaml` and `get_rules` @devs6186 #2900
- Fixed insecure deserialization vulnerability in YAML loading @0x1622 (#2770) - Fixed insecure deserialization vulnerability in YAML loading @0x1622 (#2770)
- loader: gracefully handle ELF files with unsupported architectures kamranulhaq2002@gmail.com #2800 - loader: gracefully handle ELF files with unsupported architectures kamranulhaq2002@gmail.com #2800
- loader: handle SegmentationViolation for malformed ELF files @kami922 #2799 - loader: handle SegmentationViolation for malformed ELF files @kami922 #2799

View File

@@ -1143,6 +1143,8 @@ class Rule:
else: else:
# use pyyaml because it can be much faster than ruamel (pure python) # use pyyaml because it can be much faster than ruamel (pure python)
doc = yaml.load(s, Loader=cls._get_yaml_loader()) doc = yaml.load(s, Loader=cls._get_yaml_loader())
if doc is None or not isinstance(doc, dict) or "rule" not in doc:
raise InvalidRule("empty or invalid YAML document")
return cls.from_dict(doc, s) return cls.from_dict(doc, s)
@classmethod @classmethod
@@ -2231,7 +2233,10 @@ def get_rules(
try: try:
rule = Rule.from_yaml(content.decode("utf-8")) rule = Rule.from_yaml(content.decode("utf-8"))
except InvalidRule: except InvalidRule as e:
if e.args and e.args[0] == "empty or invalid YAML document":
logger.warning("skipping %s: %s", path, e)
continue
raise raise
else: else:
rule.meta["capa/path"] = path.as_posix() rule.meta["capa/path"] = path.as_posix()

View File

@@ -19,6 +19,7 @@ import pytest
import capa.rules import capa.rules
import capa.engine import capa.engine
import capa.rules.cache
import capa.features.common import capa.features.common
import capa.features.address import capa.features.address
from capa.engine import Or from capa.engine import Or
@@ -173,6 +174,37 @@ def test_invalid_rule_statement_descriptions():
) )
def test_empty_yaml_raises_invalid_rule():
# empty or invalid YAML files raise InvalidRule with a clear message (issue #2900)
with pytest.raises(capa.rules.InvalidRule, match="empty or invalid YAML document"):
capa.rules.Rule.from_yaml("")
with pytest.raises(capa.rules.InvalidRule, match="empty or invalid YAML document"):
capa.rules.Rule.from_yaml(" \n \n")
def test_get_rules_skips_empty_yaml(tmp_path):
# get_rules should skip empty files with a warning instead of raising (issue #2900)
(tmp_path / "empty.yml").write_bytes(b"")
(tmp_path / "valid.yml").write_text(
textwrap.dedent(
"""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: process
features:
- api: CreateFile
"""
),
encoding="utf-8",
)
# empty.yml is skipped with a warning; valid.yml is loaded normally
rules = capa.rules.get_rules([tmp_path], cache_dir=tmp_path, enable_cache=False)
assert len(rules) == 1
def test_rule_yaml_not(): def test_rule_yaml_not():
rule = textwrap.dedent( rule = textwrap.dedent(
""" """