fix capy2yara.py

This commit is contained in:
Pratham Chauhan
2023-04-05 16:28:00 +05:30
parent 6eaa46ea9a
commit eef1548baa
2 changed files with 18 additions and 10 deletions
+1 -1
View File
@@ -439,7 +439,7 @@ def extract_insn_peb_access_characteristic_features(
return True
value = right.value.value
if not (reg, value) in (("fsbase", 0x30), ("gsbase", 0x60)):
if (reg, value) not in (("fsbase", 0x30), ("gsbase", 0x60)):
return True
results.append((Characteristic("peb access"), ih.address))
+17 -9
View File
@@ -59,7 +59,8 @@ unsupported = ["characteristic", "mnemonic", "offset", "subscope", "Range"]
# -- https://github.com/mandiant/capa-rules/blob/master/collection/file-managers/gather-direct-ftp-information.yml
# -- https://github.com/mandiant/capa-rules/blob/master/collection/browser/gather-firefox-profile-information.yml
# - count(string (1 rule: /executable/subfile/pe/contain-an-embedded-pe-file.yml)
# - count(match( could be done by creating the referenced rule a 2nd time with the condition, that it hits x times (only 1 rule: ./anti-analysis/anti-disasm/contain-anti-disasm-techniques.yml)
# - count(match( could be done by creating the referenced rule a 2nd time with the condition, that it hits x times
# (only 1 rule: ./anti-analysis/anti-disasm/contain-anti-disasm-techniques.yml)
# - it would be technically possible to get the "basic blocks" working, but the rules contain mostly other non supported statements in there => not worth the effort.
# collect all converted rules to be able to check if we have needed sub rules for match:
@@ -128,7 +129,8 @@ def convert_capa_number_to_yara_bytes(number):
def convert_rule_name(rule_name):
# yara rule names: "Identifiers must follow the same lexical conventions of the C programming language, they can contain any alphanumeric character and the underscore character, but the first character cannot be a digit. Rule identifiers are case sensitive and cannot exceed 128 characters." so we replace any non-alphanum with _
# yara rule names: "Identifiers must follow the same lexical conventions of the C programming language, they can contain any alphanumeric character and the underscore character
# but the first character cannot be a digit. Rule identifiers are case sensitive and cannot exceed 128 characters." so we replace any non-alphanum with _
rule_name = re.sub(r"\W", "_", rule_name)
rule_name = "capa_" + rule_name
@@ -197,7 +199,8 @@ def convert_rule(rule, rulename, cround, depth):
# even looking for empty string in dll_regex doesn't work for some files (list below) with pe.imports so do just a string search
# yara_condition += '\tpe.imports(/.{0,30}/i, /' + api + '/) '
# 5fbbfeed28b258c42e0cfeb16718b31c, 2D3EDC218A90F03089CC01715A9F047F, 7EFF498DE13CC734262F87E6B3EF38AB, C91887D861D9BD4A5872249B641BC9F9, a70052c45e907820187c7e6bcdc7ecca, 0596C4EA5AA8DEF47F22C85D75AACA95
# 5fbbfeed28b258c42e0cfeb16718b31c, 2D3EDC218A90F03089CC01715A9F047F, 7EFF498DE13CC734262F87E6B3EF38AB,
# C91887D861D9BD4A5872249B641BC9F9, a70052c45e907820187c7e6bcdc7ecca, 0596C4EA5AA8DEF47F22C85D75AACA95
var_name = "api_" + var_names.pop(0)
# limit regex with word boundary \b but also search for appended A and W
@@ -286,7 +289,8 @@ def convert_rule(rule, rulename, cround, depth):
# all .* in the regexes of capa look like they should be maximum 100 chars so take 1000 to speed up rules and prevent yara warnings on poor performance
regex = regex.replace(".*", ".{,1000}")
# strange: capa accepts regexes with unescaped / like - string: /com/exe4j/runtime/exe4jcontroller/i in capa-rules/compiler/exe4j/compiled-with-exe4j.yml, needs a fix for yara:
# strange: capa accepts regexes with unescaped /
# like - string: /com/exe4j/runtime/exe4jcontroller/i in capa-rules/compiler/exe4j/compiled-with-exe4j.yml, needs a fix for yara:
# would assume that get_value_str() gives the raw string
regex = re.sub(r"(?<!\\)/", r"\/", regex)
@@ -294,7 +298,8 @@ def convert_rule(rule, rulename, cround, depth):
# /reg(|.exe)/ => /reg(.exe)?/
regex = re.sub(r"\(\|([^\)]+)\)", r"(\1)?", regex)
# change beginning of line to null byte, e.g. /^open => /\x00open (not word boundary because we're not looking for the beginning of a word in a text but usually a function name if there's ^ in a capa rule)
# change beginning of line to null byte, e.g. /^open => /\x00open
# (not word boundary because we're not looking for the beginning of a word in a text but usually a function name if there's ^ in a capa rule)
regex = re.sub(r"^\^", r"\\x00", regex)
# regex = re.sub(r"^\^", r"\\b", regex)
@@ -420,7 +425,8 @@ def convert_rule(rule, rulename, cround, depth):
)
# remove last 'or'
# yara_condition = re.sub(r'\sor $', ' ', yara_condition)
rule_comment += "This rule is incomplete because a branch inside an Or-statement had an unsupported feature and was skipped => coverage is reduced compared to the original capa rule. "
rule_comment += "This rule is incomplete because a branch inside an Or-statement had an unsupported feature and was skipped "
rule_comment += "=> coverage is reduced compared to the original capa rule. "
x += 1
incomplete = 1
continue
@@ -446,7 +452,8 @@ def convert_rule(rule, rulename, cround, depth):
+ str(depth)
)
rule_comment += "This rule is incomplete because a branch inside an Or-statement had an unsupported feature and was skipped => coverage is reduced compared to the original capa rule. "
rule_comment += "This rule is incomplete because a branch inside an Or-statement had an unsupported feature and was skipped"
rule_comment += "=> coverage is reduced compared to the original capa rule. "
x += 1
incomplete = 1
continue
@@ -669,7 +676,8 @@ def convert_rules(rules, namespaces, cround, make_priv):
yara += " condition:" + condition_header + yara_condition + "\n}"
# TODO: now the rule is finished and could be automatically checked with the capa-testfile(s) named in meta (doing it for all of them using yara-ci upload at the moment)
# TODO: now the rule is finished and could be automatically checked with the capa-testfile(s) named in meta
# (doing it for all of them using yara-ci upload at the moment)
output_yar(yara)
converted_rules.append(rule_name)
count_incomplete += incomplete
@@ -719,7 +727,7 @@ def main(argv=None):
"// Rules from Mandiant's https://github.com/mandiant/capa-rules converted to YARA using https://github.com/mandiant/capa/blob/master/scripts/capa2yara.py by Arnim Rupp"
)
output_yar(
"// Beware: These are less rules than capa (because not all fit into YARA, stats at EOF) and is less precise because e.g. capas function scopes are applied to the whole file"
"// Beware: These are less rules than capa (because not all fit into YARA, stats at EOF) and is less precise e.g. capas function scopes are applied to the whole file"
)
output_yar(
'// Beware: Some rules are incomplete because an optional branch was not supported by YARA. These rules are marked in a comment in meta: (search for "incomplete")'