fix capy2yara.py

2026-07-28 14:47:08 -07:00 · 2023-04-05 16:28:00 +05:30
parent 6eaa46ea9a
commit eef1548baa
2 changed files with 18 additions and 10 deletions
@@ -439,7 +439,7 @@ def extract_insn_peb_access_characteristic_features(
            return True

        value = right.value.value
-        if not (reg, value) in (("fsbase", 0x30), ("gsbase", 0x60)):
+        if (reg, value) not in (("fsbase", 0x30), ("gsbase", 0x60)):
            return True

        results.append((Characteristic("peb access"), ih.address))
@@ -59,7 +59,8 @@ unsupported = ["characteristic", "mnemonic", "offset", "subscope", "Range"]
 # -- https://github.com/mandiant/capa-rules/blob/master/collection/file-managers/gather-direct-ftp-information.yml
 # -- https://github.com/mandiant/capa-rules/blob/master/collection/browser/gather-firefox-profile-information.yml
 # - count(string    (1 rule: /executable/subfile/pe/contain-an-embedded-pe-file.yml)
-# - count(match( could be done by creating the referenced rule a 2nd time with the condition, that it hits x times (only 1 rule: ./anti-analysis/anti-disasm/contain-anti-disasm-techniques.yml)
+# - count(match( could be done by creating the referenced rule a 2nd time with the condition, that it hits x times 
+# (only 1 rule: ./anti-analysis/anti-disasm/contain-anti-disasm-techniques.yml)
 # - it would be technically possible to get the "basic blocks" working, but the rules contain mostly other non supported statements in there => not worth the effort.

 # collect all converted rules to be able to check if we have needed sub rules for match:
@@ -128,7 +129,8 @@ def convert_capa_number_to_yara_bytes(number):


 def convert_rule_name(rule_name):
-    # yara rule names: "Identifiers must follow the same lexical conventions of the C programming language, they can contain any alphanumeric character and the underscore character, but the first character cannot be a digit. Rule identifiers are case sensitive and cannot exceed 128 characters." so we replace any non-alphanum with _
+    # yara rule names: "Identifiers must follow the same lexical conventions of the C programming language, they can contain any alphanumeric character and the underscore character
+    # but the first character cannot be a digit. Rule identifiers are case sensitive and cannot exceed 128 characters." so we replace any non-alphanum with _
    rule_name = re.sub(r"\W", "_", rule_name)
    rule_name = "capa_" + rule_name

@@ -197,7 +199,8 @@ def convert_rule(rule, rulename, cround, depth):

                # even looking for empty string in dll_regex doesn't work for some files (list below) with pe.imports so do just a string search
                # yara_condition += '\tpe.imports(/.{0,30}/i, /' + api + '/) '
-                # 5fbbfeed28b258c42e0cfeb16718b31c, 2D3EDC218A90F03089CC01715A9F047F, 7EFF498DE13CC734262F87E6B3EF38AB, C91887D861D9BD4A5872249B641BC9F9, a70052c45e907820187c7e6bcdc7ecca, 0596C4EA5AA8DEF47F22C85D75AACA95
+                # 5fbbfeed28b258c42e0cfeb16718b31c, 2D3EDC218A90F03089CC01715A9F047F, 7EFF498DE13CC734262F87E6B3EF38AB,
+                # C91887D861D9BD4A5872249B641BC9F9, a70052c45e907820187c7e6bcdc7ecca, 0596C4EA5AA8DEF47F22C85D75AACA95
                var_name = "api_" + var_names.pop(0)

                # limit regex with word boundary \b but also search for appended A and W
@@ -286,7 +289,8 @@ def convert_rule(rule, rulename, cround, depth):

            # all .* in the regexes of capa look like they should be maximum 100 chars so take 1000 to speed up rules and prevent yara warnings on poor performance
            regex = regex.replace(".*", ".{,1000}")
-            # strange: capa accepts regexes with unescaped / like - string: /com/exe4j/runtime/exe4jcontroller/i in capa-rules/compiler/exe4j/compiled-with-exe4j.yml, needs a fix for yara:
+            # strange: capa accepts regexes with unescaped / 
+            # like - string: /com/exe4j/runtime/exe4jcontroller/i in capa-rules/compiler/exe4j/compiled-with-exe4j.yml, needs a fix for yara:
            # would assume that get_value_str() gives the raw string
            regex = re.sub(r"(?<!\\)/", r"\/", regex)

@@ -294,7 +298,8 @@ def convert_rule(rule, rulename, cround, depth):
            # /reg(|.exe)/ => /reg(.exe)?/
            regex = re.sub(r"\(\|([^\)]+)\)", r"(\1)?", regex)

-            # change beginning of line to null byte, e.g. /^open => /\x00open (not word boundary because we're not looking for the beginning of a word in a text but usually a function name if there's ^ in a capa rule)
+            # change beginning of line to null byte, e.g. /^open => /\x00open 
+            # (not word boundary because we're not looking for the beginning of a word in a text but usually a function name if there's ^ in a capa rule)
            regex = re.sub(r"^\^", r"\\x00", regex)

            # regex = re.sub(r"^\^", r"\\b", regex)
@@ -420,7 +425,8 @@ def convert_rule(rule, rulename, cround, depth):
                        )
                        # remove last 'or'
                        # yara_condition = re.sub(r'\sor $', ' ', yara_condition)
-                        rule_comment += "This rule is incomplete because a branch inside an Or-statement had an unsupported feature and was skipped => coverage is reduced compared to the original capa rule. "
+                        rule_comment += "This rule is incomplete because a branch inside an Or-statement had an unsupported feature and was skipped "
+                        rule_comment += "=> coverage is reduced compared to the original capa rule. "
                        x += 1
                        incomplete = 1
                        continue
@@ -446,7 +452,8 @@ def convert_rule(rule, rulename, cround, depth):
                        + str(depth)
                    )

-                    rule_comment += "This rule is incomplete because a branch inside an Or-statement had an unsupported feature and was skipped => coverage is reduced compared to the original capa rule. "
+                    rule_comment += "This rule is incomplete because a branch inside an Or-statement had an unsupported feature and was skipped"
+                    rule_comment += "=> coverage is reduced compared to the original capa rule. "
                    x += 1
                    incomplete = 1
                    continue
@@ -669,7 +676,8 @@ def convert_rules(rules, namespaces, cround, make_priv):

                yara += "  condition:" + condition_header + yara_condition + "\n}"

-                # TODO: now the rule is finished and could be automatically checked with the capa-testfile(s) named in meta (doing it for all of them using yara-ci upload at the moment)
+                # TODO: now the rule is finished and could be automatically checked with the capa-testfile(s) named in meta 
+                # (doing it for all of them using yara-ci upload at the moment)
                output_yar(yara)
                converted_rules.append(rule_name)
                count_incomplete += incomplete
@@ -719,7 +727,7 @@ def main(argv=None):
        "// Rules from Mandiant's https://github.com/mandiant/capa-rules converted to YARA using https://github.com/mandiant/capa/blob/master/scripts/capa2yara.py by Arnim Rupp"
    )
    output_yar(
-        "// Beware: These are less rules than capa (because not all fit into YARA, stats at EOF) and is less precise because e.g. capas function scopes are applied to the whole file"
+        "// Beware: These are less rules than capa (because not all fit into YARA, stats at EOF) and is less precise e.g. capas function scopes are applied to the whole file"
    )
    output_yar(
        '// Beware: Some rules are incomplete because an optional branch was not supported by YARA. These rules are marked in a comment in meta: (search for "incomplete")'