From 9eacf72366a167267d28fe243a8fa0202e4099b7 Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Sat, 6 May 2023 17:19:57 +0530 Subject: [PATCH] Update detect_duplicate_features.py loading yaml file using capa.rule.Rule.from_yaml. Returning any exception/errors occuring while checking the files. --- scripts/detect_duplicate_features.py | 81 ++++++++++++++-------------- 1 file changed, 39 insertions(+), 42 deletions(-) diff --git a/scripts/detect_duplicate_features.py b/scripts/detect_duplicate_features.py index 9aba66ad..b61d36f9 100644 --- a/scripts/detect_duplicate_features.py +++ b/scripts/detect_duplicate_features.py @@ -1,63 +1,61 @@ import os import argparse -import yaml +import capa.rules +import capa.engine as ceng -def findall_features(features): - feature_list = [] - for feature in features: - if "and" in feature: - and_list = findall_features(feature["and"]) - for x in and_list: - feature_list.append(x) - elif "or" in feature: - or_list = findall_features(feature["or"]) - for y in or_list: - feature_list.append(y) - else: - feature_list.append(feature) +def get_child_features(feature): + children = [] - return feature_list + if isinstance(feature, (ceng.And, ceng.Or, ceng.Some)): + for child in feature.children: + children.extend(get_child_features(child)) + elif isinstance(feature, (ceng.Subscope, ceng.Range, ceng.Not)): + children.extend(get_child_features(feature.child)) + else: + children.append(feature) + return children + + +def get_features(rule_path, errors): + with open(rule_path, "r") as f: + feature_list = [] + try: + new_rule = capa.rules.Rule.from_yaml(f.read()) + feature_list = get_child_features(new_rule.statement) + except Exception as e: + errors.append("rule :" + rule_path + " " + str(type(e)) + " " + str(e)) + return feature_list, errors def find_overlapping_rules(new_rule_path, rules_path): if not new_rule_path.endswith(".yml"): - raise ValueError("ERROR ! New rule path file name incorrect") + raise FileNotFoundError("FileNotFoundError ! New rule file name doesn't end with yml") + new_rule_features, error = get_features(new_rule_path, []) + if error: + raise Warning(error[0]) + + errors: list = [] count = 0 - - with open(new_rule_path, "r") as f: - new_rule = yaml.safe_load(f) - - if "rule" not in new_rule: - raise ValueError("ERROR ! given new rule path isn't a rule") - - new_rule_features = findall_features(new_rule["rule"]["features"]) - overlapping_rules = [] for rules in rules_path: for dirpath, dirnames, filenames in os.walk(rules): for filename in filenames: if filename.endswith(".yml"): rule_path = os.path.join(dirpath, filename) - with open(rule_path, "r") as f: - rule = yaml.safe_load(f) - if "rule" not in rule: - continue - rule_features = findall_features(rule["rule"]["features"]) - count += 1 + rule_features, errors = get_features(rule_path, errors) + if not len(rule_features): + continue + count += 1 if any([feature in rule_features for feature in new_rule_features]): overlapping_rules.append(rule_path) - result = {"overlapping_rules": overlapping_rules, "count": count} - + result = {"overlapping_rules": overlapping_rules, "count": count, "errors": errors} return result -# python script.py --base-dir /path/to/capa/rules rules/anti-analysis/reference-analysis-tools-strings.yml rules - - def main(): parser = argparse.ArgumentParser(description="Find overlapping features in Capa rules.") @@ -68,19 +66,18 @@ def main(): new_rule_path = args.new_rule rules_path = args.rules - try: result = find_overlapping_rules(new_rule_path, rules_path) - print("New rule path : %s" % new_rule_path) + print("\nNew rule path : %s" % new_rule_path) print("Number of rules checked : %s " % result["count"]) print("Paths to overlapping rules : ", result["overlapping_rules"]) print("Number of rules containing same features : %s" % len(result["overlapping_rules"])) + + print("\nWhile checking following .yml files error occured:") + for error in result["errors"]: + print(error) except Exception as e: print(e) - try: - print(result) - except: - pass if __name__ == "__main__":