diff --git a/capa/main.py b/capa/main.py index 5600caab..63d13cfb 100644 --- a/capa/main.py +++ b/capa/main.py @@ -583,18 +583,9 @@ def get_rules(rule_path): if not os.path.exists(rule_path): raise IOError('%s does not exist or cannot be accessed' % rule_path) - rules = [] + rule_paths = [] if os.path.isfile(rule_path): - logger.info('reading rule file: %s', rule_path) - with open(rule_path, 'rb') as f: - rule = capa.rules.Rule.from_yaml(f.read().decode('utf-8')) - - if is_nursery_rule_path(rule_path): - rule.meta['nursery'] = True - - rules.append(rule) - logger.debug('rule: %s scope: %s', rule.name, rule.scope) - + rule_paths.append(rule_path) elif os.path.isdir(rule_path): logger.info('reading rules from directory %s', rule_path) for root, dirs, files in os.walk(rule_path): @@ -603,18 +594,24 @@ def get_rules(rule_path): logger.warning('skipping non-.yml file: %s', file) continue - path = os.path.join(root, file) - logger.debug('reading rule file: %s', path) - try: - rule = capa.rules.Rule.from_yaml_file(path) - except capa.rules.InvalidRule: - raise - else: - if is_nursery_rule_path(root): - rule.meta['nursery'] = True + rule_path = os.path.join(root, file) + rule_paths.append(rule_path) + + rules = [] + for rule_path in rule_paths: + logger.info('reading rule file: %s', rule_path) + try: + rule = capa.rules.Rule.from_yaml_file(rule_path) + except capa.rules.InvalidRule: + raise + else: + rule.meta['capa/path'] = rule_path + if is_nursery_rule_path(rule_path): + rule.meta['capa/nursery'] = True + + rules.append(rule) + logger.debug('rule: %s scope: %s', rule.name, rule.scope) - rules.append(rule) - logger.debug('rule: %s scope: %s', rule.name, rule.scope) return rules diff --git a/capa/rules.py b/capa/rules.py index a8e9b568..a742a40d 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -566,8 +566,27 @@ class Rule(object): continue move_to_end(meta, key) + # these are meta fields that are internal to capa, + # and added during rule reading/construction. + # they may help use manipulate or index rules, + # but should not be exposed to clients. + hidden_meta_keys = ("capa/nursery", "capa/path") + hidden_meta = { + key: meta.get(key) + for key in hidden_meta_keys + } + + for key in hidden_meta.keys(): + del meta[key] + ostream = six.BytesIO() yaml.dump(definition, ostream) + + for key, value in hidden_meta.items(): + if value is None: + continue + meta[key] = value + return ostream.getvalue().decode('utf-8').rstrip("\n") + "\n" diff --git a/scripts/lint.py b/scripts/lint.py index ecb7d55f..29e30b97 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -45,7 +45,7 @@ class MissingNamespace(Lint): def check_rule(self, ctx, rule): return ('namespace' not in rule.meta and - 'nursery' not in rule.meta and + not is_nursery_rule(rule) and 'maec/malware-category' not in rule.meta and 'lib' not in rule.meta) @@ -250,7 +250,7 @@ def is_nursery_rule(rule): For example, they may not have references to public example of a technique. Yet, we still want to capture and report on their matches. ''' - return rule.meta.get('nursery') + return rule.meta.get('capa/nursery') def lint_rule(ctx, rule): diff --git a/scripts/migrate-rules.py b/scripts/migrate-rules.py index 968e7cce..c8de7d2c 100644 --- a/scripts/migrate-rules.py +++ b/scripts/migrate-rules.py @@ -49,7 +49,7 @@ def read_rules(rule_directory): rules[rule.name] = rule if "nursery" in path: - rule.meta["nursery"] = True + rule.meta["capa/nursery"] = True return rules @@ -132,10 +132,8 @@ def main(argv=None): filename = filename + ".yml" try: - if rule.meta.get("nursery"): + if rule.meta.get("capa/nursery"): directory = os.path.join(args.destination, "nursery") - # this isn't meant to be written into the rule - del rule.meta["nursery"] elif rule.meta.get("lib"): directory = os.path.join(args.destination, "lib") else: