From 8a386b690949fe0f454b283a96935f8ff6299350 Mon Sep 17 00:00:00 2001 From: Moritz Raabe Date: Tue, 18 May 2021 13:23:19 +0200 Subject: [PATCH 1/6] improve progress bar output --- CHANGELOG.md | 4 +++- capa/main.py | 24 +++++++++++++++++++----- capa/render/verbose.py | 1 + tests/test_main.py | 2 ++ 4 files changed, 25 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3566bb00..a391db12 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,10 +7,11 @@ It includes many new rules, including all new techniques introduced in MITRE ATT ### New Features -- main: auto detect shellcode based on file extension #516 @mr-tz - main: use FLIRT signatures to identify and ignore library code #446 @williballenthin - explorer: IDA 7.6 support #497 @williballenthin - scripts: capa2yara.py convert capa rules to YARA rules #561 @ruppde +- main: auto detect shellcode based on file extension #516 @mr-tz +- main: more detailed progress bar output when loading rules and matching functions #562 @mr-tz ### New Rules (69) @@ -97,6 +98,7 @@ It includes many new rules, including all new techniques introduced in MITRE ATT - rules: update ATT&CK and MBC mappings https://github.com/fireeye/capa-rules/pull/317 @williballenthin - tests: update test cases and caching #545 @mr-tz - show-features: don't show features from library functions #569 @williballenthin +- meta: added `library_functions` field, `feature_counts.functions` does not include library functions any more #562 @mr-tz ### Development diff --git a/capa/main.py b/capa/main.py index 61a0eaa3..3c1bacd3 100644 --- a/capa/main.py +++ b/capa/main.py @@ -128,7 +128,8 @@ def find_capabilities(ruleset, extractor, disable_progress=None): "feature_counts": { "file": 0, "functions": {}, - } + }, + "library_functions": {}, } pbar = tqdm.tqdm @@ -138,13 +139,20 @@ def find_capabilities(ruleset, extractor, disable_progress=None): pbar = lambda s, *args, **kwargs: s functions = list(extractor.get_functions()) + n_funcs = len(functions) - for f in pbar(functions, desc="matching", unit=" functions"): + pb = pbar(functions, desc="matching", unit=" functions", postfix="skipped 0 library functions") + for f in pb: function_address = int(f) if extractor.is_library_function(function_address): function_name = extractor.get_function_name(function_address) logger.debug("skipping library function 0x%x (%s)", function_address, function_name) + meta["library_functions"][function_address] = function_name + n_libs = len(meta["library_functions"]) + percentage = 100 * (n_libs / n_funcs) + if isinstance(pb, tqdm.tqdm): + pb.set_postfix_str("skipped %d library functions (%d%%)" % (n_libs, percentage)) continue function_matches, bb_matches, feature_count = find_function_capabilities(ruleset, extractor, f) @@ -494,7 +502,7 @@ def get_rules(rule_path, disable_progress=False): # to disable progress completely pbar = lambda s, *args, **kwargs: s - for rule_path in pbar(list(rule_paths), desc="loading ", unit=" rules"): + for rule_path in pbar(list(rule_paths), desc="loading ", unit=" rules"): try: rule = capa.rules.Rule.from_yaml_file(rule_path) except capa.rules.InvalidRule: @@ -778,7 +786,7 @@ def main(argv=None): logger.debug("using rules path: %s", rules_path) try: - rules = get_rules(rules_path, disable_progress=args.quiet) + rules = get_rules(rules_path, disable_progress=args.quiet or args.tag) rules = capa.rules.RuleSet(rules) logger.debug( "successfully loaded %s rules", @@ -788,9 +796,15 @@ def main(argv=None): len([i for i in filter(lambda r: "capa/subscope-rule" not in r.meta, rules.rules.values())]), ) if args.tag: + n_rules_all = len(rules) rules = rules.filter_rules_by_meta(args.tag) - logger.debug("selected %s rules", len(rules)) + n_rules = len(rules) + diff = n_rules_all - n_rules + if not args.quiet: + for _ in tqdm.trange(n_rules, desc="loading ", unit=" rules", postfix="skipped %d rules" % diff): + pass for i, r in enumerate(rules.rules, 1): + logger.debug("selected %d rules", len(rules)) # TODO don't display subscope rules? logger.debug(" %d. %s", i, r) except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e: diff --git a/capa/render/verbose.py b/capa/render/verbose.py index 9f43d439..d07845b2 100644 --- a/capa/render/verbose.py +++ b/capa/render/verbose.py @@ -57,6 +57,7 @@ def render_meta(ostream, doc): ("base address", hex(doc["meta"]["analysis"]["base_address"])), ("rules", doc["meta"]["analysis"]["rules"]), ("function count", len(doc["meta"]["analysis"]["feature_counts"]["functions"])), + ("library function count", len(doc["meta"]["analysis"]["library_functions"])), ( "total feature count", doc["meta"]["analysis"]["feature_counts"]["file"] diff --git a/tests/test_main.py b/tests/test_main.py index cde291d1..03f755c4 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -25,6 +25,7 @@ def test_main(z9324d_extractor): assert capa.main.main([path, "-vv"]) == 0 assert capa.main.main([path, "-v"]) == 0 assert capa.main.main([path, "-j"]) == 0 + assert capa.main.main([path, "-q"]) == 0 assert capa.main.main([path]) == 0 @@ -79,6 +80,7 @@ def test_main_shellcode(z499c2_extractor): assert capa.main.main([path, "-vv", "-f", "sc32"]) == 0 assert capa.main.main([path, "-v", "-f", "sc32"]) == 0 assert capa.main.main([path, "-j", "-f", "sc32"]) == 0 + assert capa.main.main([path, "-q", "-f", "sc32"]) == 0 assert capa.main.main([path, "-f", "sc32"]) == 0 # auto detect shellcode based on file extension assert capa.main.main([path]) == 0 From a8e353fe3134f5751bd3dc4c07545cf6ef34c0ae Mon Sep 17 00:00:00 2001 From: Moritz Raabe Date: Thu, 20 May 2021 14:00:01 +0200 Subject: [PATCH 2/6] revert rule loading pbar --- CHANGELOG.md | 2 +- capa/main.py | 8 +------- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a391db12..27964fc7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ It includes many new rules, including all new techniques introduced in MITRE ATT - explorer: IDA 7.6 support #497 @williballenthin - scripts: capa2yara.py convert capa rules to YARA rules #561 @ruppde - main: auto detect shellcode based on file extension #516 @mr-tz -- main: more detailed progress bar output when loading rules and matching functions #562 @mr-tz +- main: more detailed progress bar output when matching functions #562 @mr-tz ### New Rules (69) diff --git a/capa/main.py b/capa/main.py index 3c1bacd3..0da06ea6 100644 --- a/capa/main.py +++ b/capa/main.py @@ -786,7 +786,7 @@ def main(argv=None): logger.debug("using rules path: %s", rules_path) try: - rules = get_rules(rules_path, disable_progress=args.quiet or args.tag) + rules = get_rules(rules_path, disable_progress=args.quiet) rules = capa.rules.RuleSet(rules) logger.debug( "successfully loaded %s rules", @@ -796,13 +796,7 @@ def main(argv=None): len([i for i in filter(lambda r: "capa/subscope-rule" not in r.meta, rules.rules.values())]), ) if args.tag: - n_rules_all = len(rules) rules = rules.filter_rules_by_meta(args.tag) - n_rules = len(rules) - diff = n_rules_all - n_rules - if not args.quiet: - for _ in tqdm.trange(n_rules, desc="loading ", unit=" rules", postfix="skipped %d rules" % diff): - pass for i, r in enumerate(rules.rules, 1): logger.debug("selected %d rules", len(rules)) # TODO don't display subscope rules? From 72da8f3aed35261e9acd5807b9e3eac59ae51228 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Fri, 21 May 2021 07:12:57 +0000 Subject: [PATCH 3/6] Sync capa rules submodule --- CHANGELOG.md | 3 ++- README.md | 2 +- rules | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3566bb00..6daf8792 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ It includes many new rules, including all new techniques introduced in MITRE ATT - explorer: IDA 7.6 support #497 @williballenthin - scripts: capa2yara.py convert capa rules to YARA rules #561 @ruppde -### New Rules (69) +### New Rules (70) - anti-analysis/packer/amber/packed-with-amber @gormaniac - collection/file-managers/gather-3d-ftp-information @re-fox @@ -82,6 +82,7 @@ It includes many new rules, including all new techniques introduced in MITRE ATT - nursery/create-bits-job @mr-tz - nursery/execute-syscall-instruction @kulinacs @mr-tz - nursery/connect-to-wmi-namespace-via-wbemlocator michael.hunhoff@fireeye.com +- anti-analysis/obfuscation/obfuscated-with-callobfuscator johnk3r - ### Bug Fixes diff --git a/README.md b/README.md index 3292927c..289a1f55 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa) [![Last release](https://img.shields.io/github/v/release/fireeye/capa)](https://github.com/fireeye/capa/releases) -[![Number of rules](https://img.shields.io/badge/rules-555-blue.svg)](https://github.com/fireeye/capa-rules) +[![Number of rules](https://img.shields.io/badge/rules-556-blue.svg)](https://github.com/fireeye/capa-rules) [![CI status](https://github.com/fireeye/capa/workflows/CI/badge.svg)](https://github.com/fireeye/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster) [![Downloads](https://img.shields.io/github/downloads/fireeye/capa/total)](https://github.com/fireeye/capa/releases) [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt) diff --git a/rules b/rules index 021b7efd..2bdbda33 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 021b7efdf4fcf21dba96a0d4af0e3b6491bc7d13 +Subproject commit 2bdbda337172be877ab63e499dd627764f42a555 From af5613250f505825c25487120736acae87dc98f5 Mon Sep 17 00:00:00 2001 From: Moritz Raabe Date: Fri, 21 May 2021 11:31:37 +0200 Subject: [PATCH 4/6] lazy import flirt closes #540 --- capa/main.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/capa/main.py b/capa/main.py index 61a0eaa3..c270e1ef 100644 --- a/capa/main.py +++ b/capa/main.py @@ -23,7 +23,6 @@ import collections import halo import tqdm -import flirt import colorama import capa.rules @@ -293,6 +292,8 @@ def get_meta_str(vw): def load_flirt_signature(path): + # lazy import enables us to only require flirt here and not in IDA, for example + import flirt if path.endswith(".sig"): with open(path, "rb") as f: @@ -321,6 +322,8 @@ def register_flirt_signature_analyzers(vw, sigpaths): vw (vivisect.VivWorkspace): sigpaths (List[str]): file system paths of .sig/.pat files """ + # lazy import enables us to only require flirt here and not in IDA, for example + import flirt import viv_utils.flirt for sigpath in sigpaths: From 73f121cf03c1958281e5b817d2c1164e4783b37b Mon Sep 17 00:00:00 2001 From: Arnim Rupp <46819580+ruppde@users.noreply.github.com> Date: Fri, 21 May 2021 16:51:14 +0200 Subject: [PATCH 5/6] Update capa2yara.py bugfix: https://github.com/fireeye/capa-rules/blob/master/collection/get-geographical-location.yml hit an far too many files with /\bcity opposed to the intention of the capa rule ti just hit in function names. changed to /\x00city. --- scripts/capa2yara.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/scripts/capa2yara.py b/scripts/capa2yara.py index 8c51a78d..ca694a54 100644 --- a/scripts/capa2yara.py +++ b/scripts/capa2yara.py @@ -199,6 +199,7 @@ def convert_rule(rule, rulename, cround, depth): var_name = "api_" + var_names.pop(0) # limit regex with word boundary \b but also search for appended A and W + # TODO: better use something like /(\\x00|\\x01|\\x02|\\x03|\\x04)' + api + '(A|W)?\\x00/ ??? yara_strings += "\t$" + var_name + " = /\\b" + api + "(A|W)?\\b/ ascii wide\n" yara_condition += "\t$" + var_name + " " @@ -291,8 +292,10 @@ def convert_rule(rule, rulename, cround, depth): # /reg(|.exe)/ => /reg(.exe)?/ regex = re.sub(r"\(\|([^\)]+)\)", r"(\1)?", regex) - # change begining of line to word boundary, e.g. /^open => /\bopen - regex = re.sub(r"^\^", r"\\b", regex) + # change begining of line to null byte, e.g. /^open => /\x00open (not word boundary because we're not looking for the begining of a word in a text but usually a function name if there's ^ in a capa rule) + regex = re.sub(r"^\^", r"\\x00", regex) + + #regex = re.sub(r"^\^", r"\\b", regex) regex = "/" + regex + "/" if count: @@ -587,30 +590,38 @@ def convert_rules(rules, namespaces, cround): meta_name = "attack" for attack in list(metas[meta]): logger.info("attack:" + attack) + # cut out tag in square brackets, e.g. Defense Evasion::Obfuscated Files or Information [T1027] => T1027 r = re.search(r"\[(T[^\]]*)", attack) if r: tag = r.group(1) logger.info("attack tag:" + tag) tag = re.sub(r"\W", "_", tag) rule_tags += tag + " " + # also add a line "attack = ..." to yaras 'meta:' to keep the long description: + yara_meta += '\tattack = "' + attack + '"\n' elif meta_name == "mbc": for mbc in list(metas[meta]): logger.info("mbc:" + mbc) + # cut out tag in square brackets, e.g. Cryptography::Encrypt Data::RC6 [C0027.010] => C0027.010 r = re.search(r"\[(.[^\]]*)", mbc) if r: tag = r.group(1) logger.info("mbc tag:" + tag) tag = re.sub(r"\W", "_", tag) rule_tags += tag + " " + + # also add a line "mbc = ..." to yaras 'meta:' to keep the long description: + yara_meta += '\tmbc = "' + mbc + '"\n' + for value in metas[meta]: if meta_name == "hash": value = re.sub(r"^([0-9a-f]{20,64}):0x[0-9a-f]{1,10}$", r"\1", value, flags=re.IGNORECASE) - # examples in capa can contain the same hash several times with different offset, so check if it's already there: - # (keeping the offset might be interessting for some but breaks yara-ci for checking of the final rules - if not value in seen_hashes: - yara_meta += "\t" + meta_name + ' = "' + value + '"\n' - seen_hashes.append(value) + # examples in capa can contain the same hash several times with different offset, so check if it's already there: + # (keeping the offset might be interessting for some but breaks yara-ci for checking of the final rules + if not value in seen_hashes: + yara_meta += "\t" + meta_name + ' = "' + value + '"\n' + seen_hashes.append(value) else: # no list: From 7759d2dd794e20ba4b2e36847087863b62330600 Mon Sep 17 00:00:00 2001 From: Arnim Rupp <46819580+ruppde@users.noreply.github.com> Date: Fri, 21 May 2021 17:04:16 +0200 Subject: [PATCH 6/6] Update capa2yara.py --- scripts/capa2yara.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/capa2yara.py b/scripts/capa2yara.py index ca694a54..50619ddb 100644 --- a/scripts/capa2yara.py +++ b/scripts/capa2yara.py @@ -295,7 +295,7 @@ def convert_rule(rule, rulename, cround, depth): # change begining of line to null byte, e.g. /^open => /\x00open (not word boundary because we're not looking for the begining of a word in a text but usually a function name if there's ^ in a capa rule) regex = re.sub(r"^\^", r"\\x00", regex) - #regex = re.sub(r"^\^", r"\\b", regex) + # regex = re.sub(r"^\^", r"\\b", regex) regex = "/" + regex + "/" if count: