From 39d2a70679b01f7a7cd0f08e22221cd0d5bd6511 Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Mon, 8 May 2023 17:29:01 +0530 Subject: [PATCH] Update detect_duplicate_features.py Using get_rules menthod to get set of all existing rules. --- scripts/detect_duplicate_features.py | 68 +++++++++++++++------------- 1 file changed, 37 insertions(+), 31 deletions(-) diff --git a/scripts/detect_duplicate_features.py b/scripts/detect_duplicate_features.py index 4f1c7198..b44a30ad 100644 --- a/scripts/detect_duplicate_features.py +++ b/scripts/detect_duplicate_features.py @@ -1,11 +1,16 @@ -import os import argparse +import capa.main import capa.rules import capa.engine as ceng -def get_child_features(feature): +def get_child_features(feature) -> list: + """ + args: + \tfeature : capa.rule.Rule.statement containing feature statements + returns a list containg all the features in the rule + """ children = [] if isinstance(feature, (ceng.And, ceng.Or, ceng.Some)): @@ -19,43 +24,45 @@ def get_child_features(feature): def get_features(rule_path): - error = "" + """ + args: + \tfeature : rule path + returns a list containg all the features in the rule + """ feature_list = [] with open(rule_path, "r") as f: try: new_rule = capa.rules.Rule.from_yaml(f.read()) feature_list = get_child_features(new_rule.statement) except Exception as e: - error = "rule :" + rule_path + " " + str(type(e)) + " " + str(e) - return feature_list, error + raise Warning("Error: " + rule_path + " " + str(type(e)) + " " + str(e)) + return feature_list def find_overlapping_rules(new_rule_path, rules_path): if not new_rule_path.endswith(".yml"): raise FileNotFoundError("FileNotFoundError ! New rule file name doesn't end with yml") - new_rule_features, error = get_features(new_rule_path) - if error: - raise Warning(error) + # Loads features of new rule in a list. + new_rule_features = get_features(new_rule_path) - errors: list = [] count = 0 overlapping_rules = [] - for rules in rules_path: - for dirpath, dirnames, filenames in os.walk(rules): - for filename in filenames: - if filename.endswith(".yml"): - rule_path = os.path.join(dirpath, filename) - rule_features, error = get_features(rule_path) - if error: - errors.append(error) - if not len(rule_features): - continue - count += 1 - if any([feature in rule_features for feature in new_rule_features]): - overlapping_rules.append(rule_path) - result = {"overlapping_rules": overlapping_rules, "count": count, "errors": errors} + # capa.rules.RuleSet stores all rules in given paths + ruleset = capa.main.get_rules(rules_path) + + for rule_name, rule in ruleset.rules.items(): + rule_features = get_child_features(rule.statement) + + if not len(rule_features): + continue + count += 1 + # Checks if any features match between existing and new rule. + if any([feature in rule_features for feature in new_rule_features]): + overlapping_rules.append(rule_name) + + result = {"overlapping_rules": overlapping_rules, "count": count} return result @@ -73,15 +80,14 @@ def main(): result = find_overlapping_rules(new_rule_path, rules_path) print("\nNew rule path : %s" % new_rule_path) print("Number of rules checked : %s " % result["count"]) - print("Paths to overlapping rules : ") - for r in result["overlapping_rules"]: - print(r) + if result["overlapping_rules"]: + print("Paths to overlapping rules : ") + for r in result["overlapping_rules"]: + print("- %s" % r) + else: + print("Paths to overlapping rules : None") print("Number of rules containing same features : %s" % len(result["overlapping_rules"])) - if result["errors"]: - print("\nWhile checking following .yml files error occured:") - for error in result["errors"]: - print(error) - print("\n") + print("\n") except Exception as e: print(e)