From 3d0bd64e1b7f019f9b4ea1effd886d0e614ad298 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 29 Jun 2020 05:54:56 -0600 Subject: [PATCH 1/4] engine, rules: support matching namespaces, not just rule names closes #37 --- capa/engine.py | 11 ++++++++- capa/rules.py | 57 ++++++++++++++++++++++++++++++++++++++++---- tests/test_engine.py | 50 ++++++++++++++++++++++++++++++++++++++ tests/test_rules.py | 46 +++++++++++++++++++++++++++++++++++ 4 files changed, 159 insertions(+), 5 deletions(-) diff --git a/capa/engine.py b/capa/engine.py index 6922a0f8..4be1e32c 100644 --- a/capa/engine.py +++ b/capa/engine.py @@ -221,6 +221,9 @@ def topologically_order_rules(rules): assumes that the rule dependency graph is a DAG. ''' + # we evaluate `rules` multiple times, so if its a generator, realize it into a list. + rules = list(rules) + namespaces = capa.rules.index_rules_by_namespace(rules) rules = {rule.name: rule for rule in rules} seen = set([]) ret = [] @@ -229,7 +232,7 @@ def topologically_order_rules(rules): if rule.name in seen: return - for dep in rule.get_dependencies(): + for dep in rule.get_dependencies(namespaces): rec(rules[dep]) ret.append(rule) @@ -267,4 +270,10 @@ def match(rules, features, va): results[rule.name].append((va, res)) features[capa.features.MatchedRule(rule.name)].add(va) + namespace = rule.meta.get('namespace') + if namespace: + while namespace: + features[capa.features.MatchedRule(namespace)].add(va) + namespace, _, _ = namespace.rpartition('/') + return (features, results) diff --git a/capa/rules.py b/capa/rules.py index d60c17c9..920a2a50 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -424,12 +424,16 @@ class Rule(object): def __repr__(self): return 'Rule(scope=%s, name=%s)' % (self.scope, self.name) - def get_dependencies(self): + def get_dependencies(self, namespaces): ''' fetch the names of rules this rule relies upon. these are only the direct dependencies; a user must compute the transitive dependency graph themself, if they want it. + Args: + namespaces(Dict[str, List[Rule]]): mapping from namespace name to rules in it. + see `index_rules_by_namespace`. + Returns: List[str]: names of rules upon which this rule depends. ''' @@ -437,7 +441,12 @@ class Rule(object): def rec(statement): if isinstance(statement, capa.features.MatchedRule): - deps.add(statement.rule_name) + if statement.rule_name in namespaces: + # appears to be a namespace + deps.update(map(lambda r: r.name, namespaces[statement.rule_name])) + else: + # must be a rule name + deps.add(statement.rule_name) elif isinstance(statement, Statement): for child in statement.get_children(): @@ -634,12 +643,15 @@ def get_rules_and_dependencies(rules, rule_name): yields: Rule: ''' + # we evaluate `rules` multiple times, so if its a generator, realize it into a list. + rules = list(rules) + namespaces = index_rules_by_namespace(rules) rules = {rule.name: rule for rule in rules} wanted = set([rule_name]) def rec(rule): wanted.add(rule.name) - for dep in rule.get_dependencies(): + for dep in rule.get_dependencies(namespaces): rec(rules[dep]) rec(rules[rule_name]) @@ -664,13 +676,50 @@ def ensure_rule_dependencies_are_met(rules): raises: InvalidRule: if a dependency is not met. ''' + # we evaluate `rules` multiple times, so if its a generator, realize it into a list. + rules = list(rules) + namespaces = index_rules_by_namespace(rules) rules = {rule.name: rule for rule in rules} for rule in rules.values(): - for dep in rule.get_dependencies(): + for dep in rule.get_dependencies(namespaces): if dep not in rules: raise InvalidRule('rule "%s" depends on missing rule "%s"' % (rule.name, dep)) +def index_rules_by_namespace(rules): + ''' + compute the rules that fit into each namespace found within the given rules. + + for example, given: + + - c2/shell :: create reverse shell + - c2/file-transfer :: download and write a file + + return the index: + + c2/shell: [create reverse shell] + c2/file-transfer: [download and write a file] + c2: [create reverse shell, download and write a file] + + Args: + rules (List[Rule]): + + Returns: Dict[str, List[Rule]] + ''' + namespaces = collections.defaultdict(list) + + for rule in rules: + namespace = rule.meta.get('namespace') + if not namespace: + continue + + while namespace: + namespaces[namespace].append(rule) + namespace, _, _ = namespace.rpartition('/') + + return dict(namespaces) + + class RuleSet(object): ''' a ruleset is initialized with a collection of rules, which it verifies and sorts into scopes. diff --git a/tests/test_engine.py b/tests/test_engine.py index 426ac742..3bebc737 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -126,6 +126,7 @@ def test_match_matched_rules(): - match: test rule1 ''')), ] + features, matches = capa.engine.match(capa.engine.topologically_order_rules(rules), {capa.features.insn.Number(100): {1}}, 0x0) assert capa.features.MatchedRule('test rule1') in features @@ -215,3 +216,52 @@ def test_regex_complex(): features, matches = capa.engine.match(capa.engine.topologically_order_rules(rules), {capa.features.String(r'Hardware\Key\key with spaces\some value'): {1}}, 0x0) assert capa.features.MatchedRule('test rule') in features + + +def test_match_namespace(): + rules = [ + capa.rules.Rule.from_yaml(textwrap.dedent(''' + rule: + meta: + name: CreateFile API + namespace: file/create + features: + - api: CreateFile + ''')), + capa.rules.Rule.from_yaml(textwrap.dedent(''' + rule: + meta: + name: WriteFile API + namespace: file/write + features: + - api: WriteFile + ''')), + capa.rules.Rule.from_yaml(textwrap.dedent(''' + rule: + meta: + name: file-create + features: + - match: file/create + ''')), + capa.rules.Rule.from_yaml(textwrap.dedent(''' + rule: + meta: + name: filesystem-any + features: + - match: file + ''')), + ] + + features, matches = capa.engine.match(capa.engine.topologically_order_rules(rules), + {capa.features.insn.API('CreateFile'): {1}}, + 0x0) + assert 'CreateFile API' in matches + assert 'file-create' in matches + assert 'filesystem-any' in matches + + features, matches = capa.engine.match(capa.engine.topologically_order_rules(rules), + {capa.features.insn.API('WriteFile'): {1}}, + 0x0) + assert 'WriteFile API' in matches + assert 'file-create' not in matches + assert 'filesystem-any' in matches diff --git a/tests/test_rules.py b/tests/test_rules.py index e67ff780..3e98b5bf 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -452,3 +452,49 @@ def test_filter_rules_missing_dependency(): - match: rule 2 ''')), ]) + + +def test_rules_namespace_dependencies(): + rules = [ + capa.rules.Rule.from_yaml(textwrap.dedent(''' + rule: + meta: + name: rule 1 + namespace: ns1/nsA + features: + - api: CreateFile + ''')), + capa.rules.Rule.from_yaml(textwrap.dedent(''' + rule: + meta: + name: rule 2 + namespace: ns1/nsB + features: + - api: CreateFile + ''')), + capa.rules.Rule.from_yaml(textwrap.dedent(''' + rule: + meta: + name: rule 3 + features: + - match: ns1/nsA + ''')), + capa.rules.Rule.from_yaml(textwrap.dedent(''' + rule: + meta: + name: rule 4 + features: + - match: ns1 + ''')), + ] + + r3 = set(map(lambda r: r.name, capa.rules.get_rules_and_dependencies(rules, 'rule 3'))) + assert 'rule 1' in r3 + assert 'rule 2' not in r3 + assert 'rule 4' not in r3 + + r4 = set(map(lambda r: r.name, capa.rules.get_rules_and_dependencies(rules, 'rule 4'))) + assert 'rule 1' in r4 + assert 'rule 2' in r4 + assert 'rule 3' not in r4 + From 990c2010e971667455f5f3ed394a91a8b1e1ee2b Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 29 Jun 2020 05:57:46 -0600 Subject: [PATCH 2/4] pep8 --- tests/test_engine.py | 56 +++++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/tests/test_engine.py b/tests/test_engine.py index 3bebc737..0af95518 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -43,8 +43,10 @@ def test_some(): assert Some(2, Number(1), Number(2), Number(3)).evaluate({Number(0): {1}}) == False assert Some(2, Number(1), Number(2), Number(3)).evaluate({Number(0): {1}, Number(1): {1}}) == False assert Some(2, Number(1), Number(2), Number(3)).evaluate({Number(0): {1}, Number(1): {1}, Number(2): {1}}) == True - assert Some(2, Number(1), Number(2), Number(3)).evaluate({Number(0): {1}, Number(1): {1}, Number(2): {1}, Number(3): {1}}) == True - assert Some(2, Number(1), Number(2), Number(3)).evaluate({Number(0): {1}, Number(1): {1}, Number(2): {1}, Number(3): {1}, Number(4): {1}}) == True + assert Some(2, Number(1), Number(2), Number(3)).evaluate( + {Number(0): {1}, Number(1): {1}, Number(2): {1}, Number(3): {1}}) == True + assert Some(2, Number(1), Number(2), Number(3)).evaluate( + {Number(0): {1}, Number(1): {1}, Number(2): {1}, Number(3): {1}, Number(4): {1}}) == True def test_complex(): @@ -118,7 +120,7 @@ def test_match_matched_rules(): features: - number: 100 ''')), - capa.rules.Rule.from_yaml(textwrap.dedent(''' + capa.rules.Rule.from_yaml(textwrap.dedent(''' rule: meta: name: test rule2 @@ -128,14 +130,14 @@ def test_match_matched_rules(): ] features, matches = capa.engine.match(capa.engine.topologically_order_rules(rules), - {capa.features.insn.Number(100): {1}}, 0x0) + {capa.features.insn.Number(100): {1}}, 0x0) assert capa.features.MatchedRule('test rule1') in features assert capa.features.MatchedRule('test rule2') in features # the ordering of the rules must not matter, # the engine should match rules in an appropriate order. features, matches = capa.engine.match(capa.engine.topologically_order_rules(reversed(rules)), - {capa.features.insn.Number(100): {1}}, 0x0) + {capa.features.insn.Number(100): {1}}, 0x0) assert capa.features.MatchedRule('test rule1') in features assert capa.features.MatchedRule('test rule2') in features @@ -168,11 +170,11 @@ def test_regex(): ''')), ] features, matches = capa.engine.match(capa.engine.topologically_order_rules(rules), - {capa.features.insn.Number(100): {1}}, 0x0) + {capa.features.insn.Number(100): {1}}, 0x0) assert capa.features.MatchedRule('test rule') not in features features, matches = capa.engine.match(capa.engine.topologically_order_rules(rules), - {capa.features.String('aaaa'): {1}}, 0x0) + {capa.features.String('aaaa'): {1}}, 0x0) assert capa.features.MatchedRule('test rule') not in features features, matches = capa.engine.match(capa.engine.topologically_order_rules(rules), @@ -180,7 +182,7 @@ def test_regex(): assert capa.features.MatchedRule('test rule') not in features features, matches = capa.engine.match(capa.engine.topologically_order_rules(rules), - {capa.features.String('abbbba'): {1}}, 0x0) + {capa.features.String('abbbba'): {1}}, 0x0) assert capa.features.MatchedRule('test rule') in features assert capa.features.MatchedRule('rule with implied wildcards') in features assert capa.features.MatchedRule('rule with anchor') not in features @@ -214,13 +216,13 @@ def test_regex_complex(): ''')), ] features, matches = capa.engine.match(capa.engine.topologically_order_rules(rules), - {capa.features.String(r'Hardware\Key\key with spaces\some value'): {1}}, 0x0) + {capa.features.String(r'Hardware\Key\key with spaces\some value'): {1}}, 0x0) assert capa.features.MatchedRule('test rule') in features def test_match_namespace(): - rules = [ - capa.rules.Rule.from_yaml(textwrap.dedent(''' + rules = [ + capa.rules.Rule.from_yaml(textwrap.dedent(''' rule: meta: name: CreateFile API @@ -228,7 +230,7 @@ def test_match_namespace(): features: - api: CreateFile ''')), - capa.rules.Rule.from_yaml(textwrap.dedent(''' + capa.rules.Rule.from_yaml(textwrap.dedent(''' rule: meta: name: WriteFile API @@ -236,32 +238,32 @@ def test_match_namespace(): features: - api: WriteFile ''')), - capa.rules.Rule.from_yaml(textwrap.dedent(''' + capa.rules.Rule.from_yaml(textwrap.dedent(''' rule: meta: name: file-create features: - match: file/create ''')), - capa.rules.Rule.from_yaml(textwrap.dedent(''' + capa.rules.Rule.from_yaml(textwrap.dedent(''' rule: meta: name: filesystem-any features: - match: file ''')), - ] + ] - features, matches = capa.engine.match(capa.engine.topologically_order_rules(rules), - {capa.features.insn.API('CreateFile'): {1}}, - 0x0) - assert 'CreateFile API' in matches - assert 'file-create' in matches - assert 'filesystem-any' in matches + features, matches = capa.engine.match(capa.engine.topologically_order_rules(rules), + {capa.features.insn.API('CreateFile'): {1}}, + 0x0) + assert 'CreateFile API' in matches + assert 'file-create' in matches + assert 'filesystem-any' in matches - features, matches = capa.engine.match(capa.engine.topologically_order_rules(rules), - {capa.features.insn.API('WriteFile'): {1}}, - 0x0) - assert 'WriteFile API' in matches - assert 'file-create' not in matches - assert 'filesystem-any' in matches + features, matches = capa.engine.match(capa.engine.topologically_order_rules(rules), + {capa.features.insn.API('WriteFile'): {1}}, + 0x0) + assert 'WriteFile API' in matches + assert 'file-create' not in matches + assert 'filesystem-any' in matches From 970977ade5883014ffafccf5a271f40f127014fe Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Tue, 30 Jun 2020 00:20:40 -0600 Subject: [PATCH 3/4] tests: demonstrate a bit more depth to namespace matching --- tests/test_engine.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_engine.py b/tests/test_engine.py index 0af95518..5c7c9a3c 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -226,7 +226,7 @@ def test_match_namespace(): rule: meta: name: CreateFile API - namespace: file/create + namespace: file/create/CreateFile features: - api: CreateFile ''')), @@ -260,6 +260,9 @@ def test_match_namespace(): assert 'CreateFile API' in matches assert 'file-create' in matches assert 'filesystem-any' in matches + assert capa.features.MatchedRule('file') in features + assert capa.features.MatchedRule('file/create') in features + assert capa.features.MatchedRule('file/create/CreateFile') in features features, matches = capa.engine.match(capa.engine.topologically_order_rules(rules), {capa.features.insn.API('WriteFile'): {1}}, From eee15b7658125d6a3698c5250e797affba9e03ae Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Tue, 30 Jun 2020 00:25:37 -0600 Subject: [PATCH 4/4] rules: add documentation about how namespace vs rule names are matched --- capa/rules.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/capa/rules.py b/capa/rules.py index 9a7c823d..8e42d0fc 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -441,11 +441,19 @@ class Rule(object): def rec(statement): if isinstance(statement, capa.features.MatchedRule): + # we're not sure at this point if the `statement.rule_name` is + # really a rule name or a namespace name (we use `MatchedRule` for both cases). + # we'll give precedence to namespaces, and then assume if that does work, + # that it must be a rule name. + # + # we don't expect any collisions between namespaces and rule names, but its possible. + # most likely would be collision between top level namespace (e.g. `host-interaction`) and rule name. + # but, namespaces tend to use `-` while rule names use ` `. so, unlikely, but possible. if statement.rule_name in namespaces: - # appears to be a namespace + # matches a namespace, so take precedence and don't even check rule names. deps.update(map(lambda r: r.name, namespaces[statement.rule_name])) else: - # must be a rule name + # not a namespace, assume its a rule name. deps.add(statement.rule_name) elif isinstance(statement, Statement):