diff --git a/CHANGELOG.md b/CHANGELOG.md index 83a673a0..d2de4bd5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -93,7 +93,6 @@ It includes many new rules, including all new techniques introduced in MITRE ATT - nursery/list-udp-connections-and-listeners michael.hunhoff@fireeye.com - nursery/log-keystrokes-via-raw-input-data michael.hunhoff@fireeye.com - nursery/register-http-server-url michael.hunhoff@fireeye.com -- ### Bug Fixes @@ -113,6 +112,7 @@ It includes many new rules, including all new techniques introduced in MITRE ATT - linter: check for `or` with always true child statement, e.g. `optional`, colors #348 @mr-tz - json: breaking change in results document; now contains parsed MBC fields instead of canonical representation #526 @mr-tz - json: breaking change: record all matching strings for regex #159 @williballenthin +- main: implement file limitations via rules not code #390 @williballenthin ### Development diff --git a/capa/main.py b/capa/main.py index e25a533b..1f6174b8 100644 --- a/capa/main.py +++ b/capa/main.py @@ -189,57 +189,33 @@ def has_rule_with_namespace(rules, capabilities, rule_cat): return False -def has_file_limitation(rules, capabilities, is_standalone=True): - file_limitations = { - # capa will likely detect installer specific functionality. - # this is probably not what the user wants. - "executable/installer": [ - " This sample appears to be an installer.", - " ", - " capa cannot handle installers well. This means the results may be misleading or incomplete." - " You should try to understand the install mechanism and analyze created files with capa.", - ], - # capa won't detect much in .NET samples. - # it might match some file-level things. - # for consistency, bail on things that we don't support. - "runtime/dotnet": [ - " This sample appears to be a .NET module.", - " ", - " .NET is a cross-platform framework for running managed applications.", - " capa cannot handle non-native files. This means that the results may be misleading or incomplete.", - " You may have to analyze the file manually, using a tool like the .NET decompiler dnSpy.", - ], - # capa will detect dozens of capabilities for AutoIt samples, - # but these are due to the AutoIt runtime, not the payload script. - # so, don't confuse the user with FP matches - bail instead - "compiler/autoit": [ - " This sample appears to be compiled with AutoIt.", - " ", - " AutoIt is a freeware BASIC-like scripting language designed for automating the Windows GUI.", - " capa cannot handle AutoIt scripts. This means that the results will be misleading or incomplete.", - " You may have to analyze the file manually, using a tool like the AutoIt decompiler MyAut2Exe.", - ], - # capa won't detect much in packed samples - "anti-analysis/packer/": [ - " This sample appears to be packed.", - " ", - " Packed samples have often been obfuscated to hide their logic.", - " capa cannot handle obfuscation well. This means the results may be misleading or incomplete.", - " If possible, you should try to unpack this input file before analyzing it with capa.", - ], - } +def is_internal_rule(rule): + return rule.meta.get("namespace", "").startswith("internal/") - for category, dialogue in file_limitations.items(): - if not has_rule_with_namespace(rules, capabilities, category): + +def is_file_limitation_rule(rule): + return rule.meta.get("namespace", "") == "internal/limitation/file" + + +def has_file_limitation(rules, capabilities, is_standalone=True): + file_limitation_rules = list(filter(is_file_limitation_rule, rules.rules.values())) + + for file_limitation_rule in file_limitation_rules: + if file_limitation_rule.name not in capabilities: continue + logger.warning("-" * 80) - for line in dialogue: - logger.warning(line) + for line in file_limitation_rule.meta.get("description", "").split("\n"): + logger.warning(" " + line) + logger.warning(" Identified via rule: %s", file_limitation_rule.name) if is_standalone: logger.warning(" ") logger.warning(" Use -v or -vv if you really want to see the capabilities identified by capa.") logger.warning("-" * 80) + + # bail on first file limitation return True + return False diff --git a/capa/render/__init__.py b/capa/render/__init__.py index 4ab153cc..bb199b50 100644 --- a/capa/render/__init__.py +++ b/capa/render/__init__.py @@ -123,7 +123,7 @@ def convert_match_to_result_document(rules, capabilities, result): if bool(result.success): doc["locations"] = result.locations - # if we have a `match` statement, then we're referencing another rule. + # if we have a `match` statement, then we're referencing another rule or namespace. # this could an external rule (written by a human), or # rule generated to support a subscope (basic block, etc.) # we still want to include the matching logic in this tree. @@ -139,25 +139,65 @@ def convert_match_to_result_document(rules, capabilities, result): and doc["success"] ): - rule_name = doc["node"]["feature"]["match"] - rule = rules[rule_name] - rule_matches = {address: result for (address, result) in capabilities[rule_name]} + name = doc["node"]["feature"]["match"] - if rule.meta.get("capa/subscope-rule"): - # for a subscope rule, fixup the node to be a scope node, rather than a match feature node. + if name in rules: + # this is a rule that we're matching # - # e.g. `contain loop/30c4c78e29bf4d54894fc74f664c62e8` -> `basic block` - scope = rule.meta["scope"] - doc["node"] = { - "type": "statement", - "statement": { - "type": "subscope", - "subscope": scope, - }, - } + # pull matches from the referenced rule into our tree here. + rule_name = doc["node"]["feature"]["match"] + rule = rules[rule_name] + rule_matches = {address: result for (address, result) in capabilities[rule_name]} - for location in doc["locations"]: - doc["children"].append(convert_match_to_result_document(rules, capabilities, rule_matches[location])) + if rule.meta.get("capa/subscope-rule"): + # for a subscope rule, fixup the node to be a scope node, rather than a match feature node. + # + # e.g. `contain loop/30c4c78e29bf4d54894fc74f664c62e8` -> `basic block` + scope = rule.meta["scope"] + doc["node"] = { + "type": "statement", + "statement": { + "type": "subscope", + "subscope": scope, + }, + } + + for location in doc["locations"]: + doc["children"].append(convert_match_to_result_document(rules, capabilities, rule_matches[location])) + else: + # this is a namespace that we're matching + # + # check for all rules in the namespace, + # seeing if they matched. + # if so, pull their matches into our match tree here. + ns_name = doc["node"]["feature"]["match"] + ns_rules = rules.rules_by_namespace[ns_name] + + for rule in ns_rules: + if rule.name in capabilities: + # the rule matched, so splice results into our tree here. + # + # note, there's a shortcoming in our result document schema here: + # we lose the name of the rule that matched in a namespace. + # for example, if we have a statement: `match: runtime/dotnet` + # and we get matches, we can say the following: + # + # match: runtime/dotnet @ 0x0 + # or: + # import: mscoree._CorExeMain @ 0x402000 + # + # however, we lose the fact that it was rule + # "compiled to the .NET platform" + # that contained this logic and did the match. + # + # we could introduce an intermediate node here. + # this would be a breaking change and require updates to the renderers. + # in the meantime, the above might be sufficient. + rule_matches = {address: result for (address, result) in capabilities[rule.name]} + for location in doc["locations"]: + doc["children"].append( + convert_match_to_result_document(rules, capabilities, rule_matches[location]) + ) return doc diff --git a/capa/rules.py b/capa/rules.py index e19bd713..bc05e97f 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -884,6 +884,7 @@ class RuleSet(object): self.function_rules = self._get_rules_for_scope(rules, FUNCTION_SCOPE) self.basic_block_rules = self._get_rules_for_scope(rules, BASIC_BLOCK_SCOPE) self.rules = {rule.name: rule for rule in rules} + self.rules_by_namespace = index_rules_by_namespace(rules) def __len__(self): return len(self.rules) @@ -891,6 +892,9 @@ class RuleSet(object): def __getitem__(self, rulename): return self.rules[rulename] + def __contains__(self, rulename): + return rulename in self.rules + @staticmethod def _get_rules_for_scope(rules, scope): """ diff --git a/scripts/lint.py b/scripts/lint.py index e5c9ab28..9bd27c2e 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -511,7 +511,7 @@ def get_normpath(path): def get_features(ctx, rule): # get features from rule and all dependencies including subscopes and matched rules features = [] - namespaces = capa.rules.index_rules_by_namespace([rule]) + namespaces = ctx["rules"].rules_by_namespace deps = [ctx["rules"].rules[dep] for dep in rule.get_dependencies(namespaces)] for r in [rule] + deps: features.extend(get_rule_features(r))