diff --git a/CHANGELOG.md b/CHANGELOG.md index 97140ce4..d1779212 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,10 +4,11 @@ - extract function and API names from ELF symtab entries @yelhamer https://github.com/mandiant/capa-rules/issues/736 ### New Features +- Utility script to detect feature overlap between new and existing CAPA rules [#1451](https://github.com/mandiant/capa/issues/1451) [@Aayush-Goel-04](https://github.com/aayush-goel-04) ### Breaking Changes -### New Rules (6) +### New Rules (7) - load-code/shellcode/execute-shellcode-via-windows-callback-function ervin.ocampo@mandiant.com jakub.jozwiak@mandiant.com - nursery/execute-shellcode-via-indirect-call ronnie.salomonsen@mandiant.com @@ -15,6 +16,7 @@ - linking/static/aplib/linked-against-aplib still@teamt5.org - communication/mailslot/read-from-mailslot nick.simonian@mandiant.com - nursery/hash-data-using-sha512managed-in-dotnet jonathanlepore@google.com +- nursery/compiled-with-exescript jonathanlepore@google.com - ### Bug Fixes diff --git a/README.md b/README.md index b6d3936f..16d561bc 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa) [![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases) -[![Number of rules](https://img.shields.io/badge/rules-798-blue.svg)](https://github.com/mandiant/capa-rules) +[![Number of rules](https://img.shields.io/badge/rules-799-blue.svg)](https://github.com/mandiant/capa-rules) [![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster) [![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases) [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt) diff --git a/rules b/rules index 0db65de1..188e6552 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 0db65de1614ee81f9a3baf923423e015bfa78c16 +Subproject commit 188e65528ec496eaaa792c3470cb4ab680a1b156 diff --git a/scripts/detect_duplicate_features.py b/scripts/detect_duplicate_features.py new file mode 100644 index 00000000..ff21fd0b --- /dev/null +++ b/scripts/detect_duplicate_features.py @@ -0,0 +1,111 @@ +import sys +import logging +import argparse + +import capa.main +import capa.rules +import capa.engine as ceng + +logger = logging.getLogger("detect_duplicate_features") + + +def get_child_features(feature: ceng.Statement) -> list: + """ + Recursively extracts all feature statements from a given rule statement. + + Args: + feature (capa.engine.Statement): The feature statement to extract features from. + + Returns: + list: A list of all feature statements contained within the given feature statement. + """ + children = [] + + if isinstance(feature, (ceng.And, ceng.Or, ceng.Some)): + for child in feature.children: + children.extend(get_child_features(child)) + elif isinstance(feature, (ceng.Subscope, ceng.Range, ceng.Not)): + children.extend(get_child_features(feature.child)) + else: + children.append(feature) + return children + + +def get_features(rule_path: str) -> list: + """ + Extracts all features from a given rule file. + + Args: + rule_path (str): The path to the rule file to extract features from. + + Returns: + list: A list of all feature statements contained within the rule file. + """ + feature_list = [] + with open(rule_path, "r") as f: + try: + new_rule = capa.rules.Rule.from_yaml(f.read()) + feature_list = get_child_features(new_rule.statement) + except Exception as e: + logger.error("Error: New rule " + rule_path + " " + str(type(e)) + " " + str(e)) + sys.exit(-1) + return feature_list + + +def find_overlapping_rules(new_rule_path, rules_path): + if not new_rule_path.endswith(".yml"): + logger.error("FileNotFoundError ! New rule file name doesn't end with .yml") + sys.exit(-1) + + # Loads features of new rule in a list. + new_rule_features = get_features(new_rule_path) + + count = 0 + overlapping_rules = [] + + # capa.rules.RuleSet stores all rules in given paths + ruleset = capa.main.get_rules(rules_path) + + for rule_name, rule in ruleset.rules.items(): + rule_features = get_child_features(rule.statement) + + if not len(rule_features): + continue + count += 1 + # Checks if any features match between existing and new rule. + if any([feature in rule_features for feature in new_rule_features]): + overlapping_rules.append(rule_name) + + result = {"overlapping_rules": overlapping_rules, "count": count} + return result + + +def main(): + parser = argparse.ArgumentParser(description="Find overlapping features in Capa rules.") + + parser.add_argument("rules", type=str, action="append", help="Path to rules") + parser.add_argument("new_rule", type=str, help="Path to new rule") + + args = parser.parse_args() + + new_rule_path = args.new_rule + rules_path = args.rules + + result = find_overlapping_rules(new_rule_path, rules_path) + + print("\nNew rule path : %s" % new_rule_path) + print("Number of rules checked : %s " % result["count"]) + if result["overlapping_rules"]: + print("Paths to overlapping rules : ") + for r in result["overlapping_rules"]: + print("- %s" % r) + else: + print("Paths to overlapping rules : None") + print("Number of rules containing same features : %s" % len(result["overlapping_rules"])) + print("\n") + + return len(result["overlapping_rules"]) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/setup.py b/setup.py index a4835934..2f8c5348 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ requirements = [ "viv-utils[flirt]==0.7.9", "halo==0.0.31", "networkx==2.5.1", # newer versions no longer support py3.7. - "ruamel.yaml==0.17.21", + "ruamel.yaml==0.17.28", "vivisect==1.1.1", "pefile==2023.2.7", "pyelftools==0.29", diff --git a/tests/test_scripts.py b/tests/test_scripts.py index 3b299a36..2d8fefac 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -8,9 +8,11 @@ import os import sys +import textwrap import subprocess import pytest +from fixtures import * CD = os.path.dirname(__file__) @@ -82,3 +84,112 @@ def test_proto_conversion(tmpdir): assert p.returncode == 0 assert p.stdout.startswith(b'{\n "meta": ') or p.stdout.startswith(b'{\r\n "meta": ') + + +def test_detect_duplicate_features(tmpdir): + TEST_RULE_0 = textwrap.dedent( + """ + rule: + meta: + name: Test Rule 0 + scope: function + features: + - and: + - number: 1 + - not: + - string: process + """ + ) + + TEST_RULESET = { + "rule_1": textwrap.dedent( + """ + rule: + meta: + name: Test Rule 1 + features: + - or: + - string: unique + - number: 2 + - and: + - or: + - arch: i386 + - number: 4 + - not: + - count(mnemonic(xor)): 5 + - not: + - os: linux + """ + ), + "rule_2": textwrap.dedent( + """ + rule: + meta: + name: Test Rule 2 + features: + - and: + - string: "sites.ini" + - basic block: + - and: + - api: CreateFile + - mnemonic: xor + """ + ), + "rule_3": textwrap.dedent( + """ + rule: + meta: + name: Test Rule 3 + features: + - or: + - not: + - number: 4 + - basic block: + - and: + - api: bind + - number: 2 + """ + ), + "rule_4": textwrap.dedent( + """ + rule: + meta: + name: Test Rule 4 + features: + - not: + - string: "expa" + """ + ), + } + + """ + The rule_overlaps list represents the number of overlaps between each rule in the RULESET. + An overlap includes a rule overlap with itself. + The scripts + The overlaps are like: + - Rule 0 has zero overlaps in RULESET + - Rule 1 overlaps with 3 other rules in RULESET + - Rule 4 overlaps with itself in RULESET + These overlap values indicate the number of rules with which + each rule in RULESET has overlapping features. + """ + rule_overlaps = [0, 4, 3, 3, 1] + + rule_dir = tmpdir.mkdir("capa_rule_overlap_test") + rule_paths = [] + + rule_file = tmpdir.join("rule_0.yml") + rule_file.write(TEST_RULE_0) + rule_paths.append(rule_file.strpath) + + for rule_name, RULE_CONTENT in TEST_RULESET.items(): + rule_file = rule_dir.join("%s.yml" % rule_name) + rule_file.write(RULE_CONTENT) + rule_paths.append(rule_file.strpath) + + # tests if number of overlaps for rules in RULESET found are correct. + script_path = get_script_path("detect_duplicate_features.py") + for expected_overlaps, rule_path in zip(rule_overlaps, rule_paths): + args = [rule_dir.strpath, rule_path] + overlaps_found = run_program(script_path, args) + assert overlaps_found.returncode == expected_overlaps