From 56536792f8c51c929551b6833b715bd5d7e12335 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 21 Jun 2020 11:37:33 -0600 Subject: [PATCH 01/18] capafmt: initial impl --- capa/rules.py | 107 +++++++++++++++++++++++++++++++++++++++++++++ scripts/capafmt.py | 56 ++++++++++++++++++++++++ 2 files changed, 163 insertions(+) create mode 100644 scripts/capafmt.py diff --git a/capa/rules.py b/capa/rules.py index fe729838..fe631d20 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -508,6 +508,113 @@ class Rule(object): except InvalidRule as e: raise InvalidRuleWithPath(path, str(e)) + def to_yaml(self): + import six + from ruamel.yaml import YAML + + COMMON_KEYS = ("name", "namespace", "rule-category", "author", "att&ck", "mbc", "examples", "scope") + + yaml = YAML(typ='rt') + yaml.default_flow_style = False + + definition = yaml.load(self.definition) + # definition retains a reference to `meta`, + # so we're updating that in place. + meta = definition["rule"]["meta"] + + def move_to_end(m, k): + # ruamel.yaml uses an ordereddict-like structure to track maps (CommentedMap). + # here we refresh the insertion order of the given key. + # this will move it to the end of the sequence. + v = m[k] + del m[k] + m[k] = v + + for key in COMMON_KEYS: + if key in meta: + move_to_end(meta, key) + + for key in sorted(meta.keys()): + if key in COMMON_KEYS: + continue + move_to_end(meta, key) + + ostream = six.BytesIO() + yaml.dump(definition, ostream) + print(ostream.getvalue().decode('utf-8')) + return '' + + + definition = yaml.safe_load(self.definition) + + formatted = DefaultOrderedDict(default_factory=lambda: DefaultOrderedDict(default_factory=DefaultOrderedDict)) + meta = definition["rule"]["meta"] + + + for key in COMMON_KEYS: + if key in meta: + formatted["rule"]["meta"][key] = meta[key] + + for key in sorted(meta.keys()): + if key in COMMON_KEYS: + continue + formatted["rule"]["meta"][key] = meta[key] + + formatted["rule"]["features"] = definition["rule"]["features"] + + return yaml.dump(formatted, Dumper=CapaDumper, default_flow_style=False) + + +class DefaultOrderedDict(collections.OrderedDict): + # Source: http://stackoverflow.com/a/6190500/562769 + def __init__(self, default_factory=None, *a, **kw): + if (default_factory is not None and not isinstance(default_factory, collections.Callable)): + raise TypeError('first argument must be callable') + super(DefaultOrderedDict, self).__init__(*a, **kw) + self.default_factory = default_factory + + def __getitem__(self, key): + try: + return super(DefaultOrderedDict, self).__getitem__(key) + except KeyError: + return self.__missing__(key) + + def __missing__(self, key): + if self.default_factory is None: + raise KeyError(key) + value = self.default_factory() + self[key] = value + return value + + +class CapaDumper(yaml.Dumper): + ''' + Tweak the yaml serializer to emit sequences/lists with additional indentation. + ref: https://stackoverflow.com/a/39681672/87207 + + before: + + rule: + features: + - or: + - count(mnemonic(rdtsc)): 2 or more + - mnemonic: icebp + + after: + + rule: + features: + - or: + - count(mnemonic(rdtsc)): 2 or more + - mnemonic: icebp + ''' + def __init__(self, *args, **kwargs): + super(CapaDumper, self).__init__(*args, **kwargs) + self.add_representer(DefaultOrderedDict, lambda dumper, data: dumper.represent_dict(data.iteritems())) + + def increase_indent(self, flow=False, indentless=False): + return super(CapaDumper, self).increase_indent(flow, False) + def get_rules_with_scope(rules, scope): ''' diff --git a/scripts/capafmt.py b/scripts/capafmt.py new file mode 100644 index 00000000..0167aa21 --- /dev/null +++ b/scripts/capafmt.py @@ -0,0 +1,56 @@ +''' +Reformat the given capa rule into a consistent style. +Use the -i flag to update the rule in-place. + +Usage: + + $ python capafmt.py -i foo.yml +''' +import sys +import logging + +import argparse + +import capa.rules + + +logger = logging.getLogger('capafmt') + + +def main(argv=None): + if argv is None: + argv = sys.argv[1:] + + parser = argparse.ArgumentParser(description='Capa rule formatter.') + parser.add_argument('path', type=str, + help='Path to rule to format') + parser.add_argument('-i', '--in-place', action='store_true', dest='in_place', + help='Format the rule in place, otherwise, write formatted rule to STDOUT') + parser.add_argument('-v', '--verbose', action='store_true', + help='Enable debug logging') + parser.add_argument('-q', '--quiet', action='store_true', + help='Disable all output but errors') + args = parser.parse_args(args=argv) + + if args.verbose: + level = logging.DEBUG + elif args.quiet: + level = logging.ERROR + else: + level = logging.INFO + + logging.basicConfig(level=level) + logging.getLogger('capafmt').setLevel(level) + + rule = capa.rules.Rule.from_yaml_file(args.path) + if args.in_place: + with open(args.path, 'wb') as f: + f.write(rule.to_yaml().encode('utf-8')) + else: + print(rule.to_yaml()) + + return 0 + + +if __name__ == '__main__': + sys.exit(main()) \ No newline at end of file From 3bc6c5805f8a68d11935322ee3e2ba8cdbd38cf3 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 21 Jun 2020 11:53:15 -0600 Subject: [PATCH 02/18] capafmt: use yaml parser that supports comments to reformat --- capa/rules.py | 99 ++++++++++++--------------------------------------- setup.py | 1 + 2 files changed, 23 insertions(+), 77 deletions(-) diff --git a/capa/rules.py b/capa/rules.py index fe631d20..57eae905 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -1,9 +1,12 @@ -import yaml import uuid import codecs import logging import binascii +import six +import yaml +import ruamel.yaml + import capa.engine from capa.engine import * import capa.features @@ -509,13 +512,27 @@ class Rule(object): raise InvalidRuleWithPath(path, str(e)) def to_yaml(self): - import six - from ruamel.yaml import YAML + # reformat the yaml document with a common style. + # this includes: + # - ordering the meta elements + # - indenting the nested items with two spaces + # + # we use the ruamel.yaml parser for this, because it supports roundtripping of documents with comments. + # order the meta elements in the following preferred order. + # any custom keys will come after this. COMMON_KEYS = ("name", "namespace", "rule-category", "author", "att&ck", "mbc", "examples", "scope") - yaml = YAML(typ='rt') + yaml = ruamel.yaml.YAML(typ='rt') + # use block mode, not inline json-like mode yaml.default_flow_style = False + # indent lists by two spaces below their parent + # + # features: + # - or: + # - mnemonic: aesdec + # - mnemonic: vaesdec + yaml.indent(sequence=2, offset=2) definition = yaml.load(self.definition) # definition retains a reference to `meta`, @@ -541,79 +558,7 @@ class Rule(object): ostream = six.BytesIO() yaml.dump(definition, ostream) - print(ostream.getvalue().decode('utf-8')) - return '' - - - definition = yaml.safe_load(self.definition) - - formatted = DefaultOrderedDict(default_factory=lambda: DefaultOrderedDict(default_factory=DefaultOrderedDict)) - meta = definition["rule"]["meta"] - - - for key in COMMON_KEYS: - if key in meta: - formatted["rule"]["meta"][key] = meta[key] - - for key in sorted(meta.keys()): - if key in COMMON_KEYS: - continue - formatted["rule"]["meta"][key] = meta[key] - - formatted["rule"]["features"] = definition["rule"]["features"] - - return yaml.dump(formatted, Dumper=CapaDumper, default_flow_style=False) - - -class DefaultOrderedDict(collections.OrderedDict): - # Source: http://stackoverflow.com/a/6190500/562769 - def __init__(self, default_factory=None, *a, **kw): - if (default_factory is not None and not isinstance(default_factory, collections.Callable)): - raise TypeError('first argument must be callable') - super(DefaultOrderedDict, self).__init__(*a, **kw) - self.default_factory = default_factory - - def __getitem__(self, key): - try: - return super(DefaultOrderedDict, self).__getitem__(key) - except KeyError: - return self.__missing__(key) - - def __missing__(self, key): - if self.default_factory is None: - raise KeyError(key) - value = self.default_factory() - self[key] = value - return value - - -class CapaDumper(yaml.Dumper): - ''' - Tweak the yaml serializer to emit sequences/lists with additional indentation. - ref: https://stackoverflow.com/a/39681672/87207 - - before: - - rule: - features: - - or: - - count(mnemonic(rdtsc)): 2 or more - - mnemonic: icebp - - after: - - rule: - features: - - or: - - count(mnemonic(rdtsc)): 2 or more - - mnemonic: icebp - ''' - def __init__(self, *args, **kwargs): - super(CapaDumper, self).__init__(*args, **kwargs) - self.add_representer(DefaultOrderedDict, lambda dumper, data: dumper.represent_dict(data.iteritems())) - - def increase_indent(self, flow=False, indentless=False): - return super(CapaDumper, self).increase_indent(flow, False) + return ostream.getvalue().decode('utf-8').rstrip("\n") def get_rules_with_scope(rules, scope): diff --git a/setup.py b/setup.py index ae665a6e..c5150ee6 100644 --- a/setup.py +++ b/setup.py @@ -9,6 +9,7 @@ requirements = [ "tqdm", "pyyaml", "tabulate", + "ruamel.yaml" ] if sys.version_info >= (3, 0): From aa357855b85e62012adc2ee4d26a3c38bb61a053 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 21 Jun 2020 12:01:12 -0600 Subject: [PATCH 03/18] capafmt: order meta/features consistently --- capa/rules.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/capa/rules.py b/capa/rules.py index 57eae905..6760bf73 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -547,6 +547,9 @@ class Rule(object): del m[k] m[k] = v + move_to_end(definition["rule"], "meta") + move_to_end(definition["rule"], "features") + for key in COMMON_KEYS: if key in meta: move_to_end(meta, key) From 09fa5a4227f311b773b807e1077a33d5b3b9fffb Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 21 Jun 2020 12:24:32 -0600 Subject: [PATCH 04/18] rules: use ruamel to maintain comments --- capa/rules.py | 45 +++++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/capa/rules.py b/capa/rules.py index 712f5033..0409ca31 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -4,7 +4,6 @@ import logging import binascii import six -import yaml import ruamel.yaml import capa.engine @@ -20,6 +19,11 @@ from capa.features import MAX_BYTES_FEATURE_SIZE logger = logging.getLogger(__name__) +# these are the standard metadata fields, in the preferred order. +# when reformatted, any custom keys will come after these. +META_KEYS = ("name", "namespace", "rule-category", "author", "scope", "att&ck", "mbc", "examples") + + FILE_SCOPE = 'file' FUNCTION_SCOPE = 'function' BASIC_BLOCK_SCOPE = 'basic block' @@ -364,6 +368,23 @@ def second(s): return s[1] +# we use the ruamel.yaml parser because it supports roundtripping of documents with comments. +yaml = ruamel.yaml.YAML(typ='rt') + + +# use block mode, not inline json-like mode +yaml.default_flow_style = False + + +# indent lists by two spaces below their parent +# +# features: +# - or: +# - mnemonic: aesdec +# - mnemonic: vaesdec +yaml.indent(sequence=2, offset=2) + + class Rule(object): def __init__(self, name, scope, statement, meta, definition=''): super(Rule, self).__init__() @@ -493,7 +514,7 @@ class Rule(object): @classmethod def from_yaml(cls, s): - return cls.from_dict(yaml.safe_load(s), s) + return cls.from_dict(yaml.load(s), s) @classmethod def from_yaml_file(cls, path): @@ -509,22 +530,6 @@ class Rule(object): # - ordering the meta elements # - indenting the nested items with two spaces # - # we use the ruamel.yaml parser for this, because it supports roundtripping of documents with comments. - - # order the meta elements in the following preferred order. - # any custom keys will come after this. - COMMON_KEYS = ("name", "namespace", "rule-category", "author", "att&ck", "mbc", "examples", "scope") - - yaml = ruamel.yaml.YAML(typ='rt') - # use block mode, not inline json-like mode - yaml.default_flow_style = False - # indent lists by two spaces below their parent - # - # features: - # - or: - # - mnemonic: aesdec - # - mnemonic: vaesdec - yaml.indent(sequence=2, offset=2) definition = yaml.load(self.definition) # definition retains a reference to `meta`, @@ -542,12 +547,12 @@ class Rule(object): move_to_end(definition["rule"], "meta") move_to_end(definition["rule"], "features") - for key in COMMON_KEYS: + for key in META_KEYS: if key in meta: move_to_end(meta, key) for key in sorted(meta.keys()): - if key in COMMON_KEYS: + if key in META_KEYS: continue move_to_end(meta, key) From fea1177c5e0884b1aa213da300b6cbf86ea40d5c Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 21 Jun 2020 13:03:07 -0600 Subject: [PATCH 05/18] add tests for formatting --- tests/test_fmt.py | 71 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 tests/test_fmt.py diff --git a/tests/test_fmt.py b/tests/test_fmt.py new file mode 100644 index 00000000..024c7598 --- /dev/null +++ b/tests/test_fmt.py @@ -0,0 +1,71 @@ +import textwrap + +import capa.rules + +EXPECTED = textwrap.dedent('''\ + rule: + meta: + name: test rule + author: user@domain.com + scope: function + examples: + - foo1234 + - bar5678 + features: + - and: + - number: 1 + - number: 2''') + + +def test_rule_reformat_top_level_elements(): + rule = textwrap.dedent('''\ + rule: + features: + - and: + - number: 1 + - number: 2 + meta: + name: test rule + author: user@domain.com + scope: function + examples: + - foo1234 + - bar5678''') + + assert capa.rules.Rule.from_yaml(rule).to_yaml() == EXPECTED + + +def test_rule_reformat_indentation(): + rule = textwrap.dedent('''\ + rule: + meta: + name: test rule + author: user@domain.com + scope: function + examples: + - foo1234 + - bar5678 + features: + - and: + - number: 1 + - number: 2''') + + assert capa.rules.Rule.from_yaml(rule).to_yaml() == EXPECTED + + +def test_rule_reformat_order(): + rule = textwrap.dedent('''\ + rule: + meta: + author: user@domain.com + examples: + - foo1234 + - bar5678 + scope: function + name: test rule + features: + - and: + - number: 1 + - number: 2''') + + assert capa.rules.Rule.from_yaml(rule).to_yaml() == EXPECTED From 72b6ee5cf3bb393f0b8a8e11c2b81f7b4307b4e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ana=20Mar=C3=ADa=20Mart=C3=ADnez=20G=C3=B3mez?= Date: Wed, 24 Jun 2020 12:23:51 +0200 Subject: [PATCH 06/18] Remove Build Status from README This should have been removed as part of the migration. The build status in the README will be reimplemented using GitHub Actions and https://shields.io --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 934ead30..94c9c71f 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ # capa -[![Build Status](https://drone.oneteamed.net/api/badges/FLARE/capa/status.svg)](https://drone.oneteamed.net/FLARE/capa) capa detects capabilities in executable files. You run it against a .exe or .dll and it tells you what it thinks the program can do. From d2d1f26e7b8b795fa195a7a469b2b613b675e676 Mon Sep 17 00:00:00 2001 From: Moritz Raabe Date: Wed, 24 Jun 2020 12:55:35 +0200 Subject: [PATCH 07/18] update documentation --- README.md | 13 +++++++++++-- capa/main.py | 8 ++++---- rules | 2 +- tests/test_viv_features.py | 1 + 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 934ead30..daf87eee 100644 --- a/README.md +++ b/README.md @@ -227,6 +227,11 @@ For this to match, the function must: If only one of these features is found in a function, the rule will not match. +## limitations +### circular rule dependencies +While capa supports [matching on prior rule matches](#matching-prior-rule-matches) users should ensure that their rules do not introduce circular dependencies between rules. + + # extracted features ## function features @@ -249,10 +254,14 @@ though possibly a local function (like `malloc`) extracted via FLIRT. The parameter is a string describing the function name, specified like `module.functionname` or `functionname`. +Windows API functions that take string arguments come in two API versions. For example `CreateProcessA` takes ANSI strings and `CreateProcessW` takes Unicode strings. capa extracts these API features both with and without the suffix character `A` or `W`. That means you can write a rule to match on both APIs using the base name. If you want to match a specific API version, you can include the suffix. + Example: - api: kernel32.CreateFileA - api: CreateFileA + api: kernel32.CreateFile # matches both Ansi (CreateFileA) and Unicode (CreateFileW) versions + api: CreateFile + api: GetEnvironmentVariableW # only matches on Unicode version + ### number A number used by the logic of the program. diff --git a/capa/main.py b/capa/main.py index f040e646..edaa9e9c 100644 --- a/capa/main.py +++ b/capa/main.py @@ -640,7 +640,7 @@ def main(argv=None): logger.error("-" * 80) logger.error(" Input file does not appear to be a PE file.") logger.error(" ") - logger.error(" Today, capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64).") + logger.error(" capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64).") logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.") logger.error("-" * 80) return -1 @@ -648,7 +648,7 @@ def main(argv=None): logger.error("-" * 80) logger.error(" Unsupported runtime or Python interpreter.") logger.error(" ") - logger.error(" Today, capa supports running under Python 2.7 using Vivisect for binary analysis.") + logger.error(" capa supports running under Python 2.7 using Vivisect for binary analysis.") logger.error(" It can also run within IDA Pro, using either Python 2.7 or 3.5+.") logger.error(" ") logger.error(" If you're seeing this message on the command line, please ensure you're running Python 2.7.") @@ -679,7 +679,7 @@ def main(argv=None): logger.warning(" ") logger.warning(" .NET is a cross-platform framework for running managed applications.") logger.warning( - " Today, capa cannot handle non-native files. This means that the results may be misleading or incomplete.") + " capa cannot handle non-native files. This means that the results may be misleading or incomplete.") logger.warning(" You may have to analyze the file manually, using a tool like the .NET decompiler dnSpy.") logger.warning(" ") logger.warning(" Use -v or -vv if you really want to see the capabilities identified by capa.") @@ -698,7 +698,7 @@ def main(argv=None): logger.warning(" ") logger.warning(" AutoIt is a freeware BASIC-like scripting language designed for automating the Windows GUI.") logger.warning( - " Today, capa cannot handle AutoIt scripts. This means that the results will be misleading or incomplete.") + " capa cannot handle AutoIt scripts. This means that the results will be misleading or incomplete.") logger.warning(" You may have to analyze the file manually, using a tool like the AutoIt decompiler MyAut2Exe.") logger.warning(" ") logger.warning(" Use -v or -vv if you really want to see the capabilities identified by capa.") diff --git a/rules b/rules index eaae0e89..d34acf7b 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit eaae0e89a75eae6d564873287ae55afb30f8970b +Subproject commit d34acf7bbd5a2132740bcce00703f657a39dfcdd diff --git a/tests/test_viv_features.py b/tests/test_viv_features.py index a76de67c..78314765 100644 --- a/tests/test_viv_features.py +++ b/tests/test_viv_features.py @@ -62,6 +62,7 @@ def test_api_features(mimikatz): def test_api_features_64_bit(sample_a198216798ca38f280dc413f8c57f2c2): features = extract_function_features(viv_utils.Function(sample_a198216798ca38f280dc413f8c57f2c2.vw, 0x4011B0)) assert capa.features.insn.API('kernel32.GetStringTypeA') in features + assert capa.features.insn.API('kernel32.GetStringTypeW') not in features assert capa.features.insn.API('kernel32.GetStringType') in features assert capa.features.insn.API('GetStringTypeA') in features assert capa.features.insn.API('GetStringType') in features From 730f0b21fe04d9babf2473b9dfc9b9d381d77b14 Mon Sep 17 00:00:00 2001 From: Moritz Raabe Date: Wed, 24 Jun 2020 13:23:46 +0200 Subject: [PATCH 08/18] document -t (tag) option --- doc/usage.md | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/doc/usage.md b/doc/usage.md index cd187865..e48295d5 100644 --- a/doc/usage.md +++ b/doc/usage.md @@ -1,5 +1,7 @@ -# Usage -## Command line +# capa usage + + +# command line After you have downloaded the standalone version of capa or installed it via `pip` (see the [installation](installation.md) documentation) you can run capa directly from your terminal shell. - `$ capa -h` @@ -7,14 +9,21 @@ After you have downloaded the standalone version of capa or installed it via `pi In this mode capa relies on vivisect which only runs under Python 2. -## IDA Pro +## only run selected rules +Use the `-t` option to only run selected rules. This is the preferred method over specifying a rule path which fails if dependent rules reside in other directories. + +``` +$ capa -t communication malware.exe +``` + +# IDA Pro capa runs from within IDA Pro. Run `capa/main.py` via File - Script file... (ALT + F7). When running in IDA, capa uses IDA's disassembly and file analysis as its backend. These results may vary from the standalone version that uses vivisect. In IDA, capa supports Python 2 and Python 3. If you encounter issues with your specific setup please open a new [Issue](https://github.com/fireeye/capa/issues). -## IDA plugins +## IDA Pro plugins capa comes with two IDA Pro plugins located in the `capa/ida` directory. ### capa explorer @@ -22,5 +31,5 @@ The capa explorer allows you to interactively display and browse capabilities ca ![capa explorer](capa_explorer.png) -### Rule generator +### rule generator The rule generator helps you to easily write new rules based on the function you are currently analyzing in your IDA disassembly view. From 95b3b129ecd57297ad41b5e3dc5c7aa08abfe8ac Mon Sep 17 00:00:00 2001 From: Moritz Date: Wed, 24 Jun 2020 14:29:50 +0200 Subject: [PATCH 09/18] Update setup-hooks.sh --- scripts/setup-hooks.sh | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/scripts/setup-hooks.sh b/scripts/setup-hooks.sh index 924a35c2..941e0e58 100755 --- a/scripts/setup-hooks.sh +++ b/scripts/setup-hooks.sh @@ -1,4 +1,8 @@ -#!/bin/sh +#!/usr/bin/env bash + +set -e +set -u +set -o pipefail GIT_DIR=`git rev-parse --show-toplevel` cd $GIT_DIR @@ -8,9 +12,9 @@ cd $GIT_DIR # After that append `scripts/hooks/$arg` and ensure they can be run create_hook() { if [[ ! -e .git/hooks/$1 ]]; then - echo '#!/bin/sh' > .git/hooks/$1 + echo "#!/bin/sh" > ".git/hooks/$1" fi - cat scripts/hooks/$1 >> .git/hooks/$1 + cat scripts/hooks/$1 >> ".git/hooks/$1" chmod +x .git/hooks/$1 } @@ -19,7 +23,6 @@ create_hook 'post-commit' create_hook 'pre-push' echo '\n#### Installing linter/test dependencies\n' -pip install pycodestyle -pytest-sugar +pip install pycodestyle pytest-sugar pip install https://github.com/williballenthin/vivisect/zipball/master python setup.py develop From beba3fb3c702165bba06c0cc2999273670733d6c Mon Sep 17 00:00:00 2001 From: Moritz Raabe Date: Wed, 24 Jun 2020 15:00:35 +0200 Subject: [PATCH 10/18] double to single quotes --- capa/features/extractors/__init__.py | 2 +- capa/features/extractors/strings.py | 18 ++-- capa/features/extractors/viv/__init__.py | 2 +- capa/features/freeze.py | 24 ++--- capa/ida/ida_capa_explorer.py | 2 +- capa/main.py | 102 ++++++++++---------- scripts/show-features.py | 16 +-- scripts/testbed/_dump_fnames.py | 6 +- scripts/testbed/_export_fimages.py | 2 +- scripts/testbed/freeze_features.py | 20 ++-- scripts/testbed/run_rule_on_testbed.py | 36 +++---- scripts/testbed/start_ida_dump_fnames.py | 16 +-- scripts/testbed/start_ida_export_fimages.py | 24 ++--- setup.py | 34 +++---- tests/test_freeze.py | 4 +- tests/test_main.py | 19 +++- tests/test_viv_features.py | 4 +- 17 files changed, 173 insertions(+), 158 deletions(-) diff --git a/capa/features/extractors/__init__.py b/capa/features/extractors/__init__.py index 71ad490a..0486a63a 100644 --- a/capa/features/extractors/__init__.py +++ b/capa/features/extractors/__init__.py @@ -10,7 +10,7 @@ try: except (ImportError, SyntaxError): pass -__all__ = ["ida", "viv"] +__all__ = ['ida', 'viv'] class FeatureExtractor(object): diff --git a/capa/features/extractors/strings.py b/capa/features/extractors/strings.py index 1ef33be8..de25b17c 100644 --- a/capa/features/extractors/strings.py +++ b/capa/features/extractors/strings.py @@ -7,13 +7,13 @@ import re from collections import namedtuple -ASCII_BYTE = r" !\"#\$%&\'\(\)\*\+,-\./0123456789:;<=>\?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\}\\\~\t".encode('ascii') -ASCII_RE_4 = re.compile(b"([%s]{%d,})" % (ASCII_BYTE, 4)) -UNICODE_RE_4 = re.compile(b"((?:[%s]\x00){%d,})" % (ASCII_BYTE, 4)) -REPEATS = [b"A", b"\x00", b"\xfe", b"\xff"] +ASCII_BYTE = r' !\"#\$%&\'\(\)\*\+,-\./0123456789:;<=>\?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\}\\\~\t'.encode('ascii') +ASCII_RE_4 = re.compile(b'([%s]{%d,})' % (ASCII_BYTE, 4)) +UNICODE_RE_4 = re.compile(b'((?:[%s]\x00){%d,})' % (ASCII_BYTE, 4)) +REPEATS = [b'A', b'\x00', b'\xfe', b'\xff'] SLICE_SIZE = 4096 -String = namedtuple("String", ["s", "offset"]) +String = namedtuple('String', ['s', 'offset']) def buf_filled_with(buf, character): @@ -46,10 +46,10 @@ def extract_ascii_strings(buf, n=4): if n == 4: r = ASCII_RE_4 else: - reg = b"([%s]{%d,})" % (ASCII_BYTE, n) + reg = b'([%s]{%d,})' % (ASCII_BYTE, n) r = re.compile(reg) for match in r.finditer(buf): - yield String(match.group().decode("ascii"), match.start()) + yield String(match.group().decode('ascii'), match.start()) def extract_unicode_strings(buf, n=4): @@ -72,11 +72,11 @@ def extract_unicode_strings(buf, n=4): if n == 4: r = UNICODE_RE_4 else: - reg = b"((?:[%s]\x00){%d,})" % (ASCII_BYTE, n) + reg = b'((?:[%s]\x00){%d,})' % (ASCII_BYTE, n) r = re.compile(reg) for match in r.finditer(buf): try: - yield String(match.group().decode("utf-16"), match.start()) + yield String(match.group().decode('utf-16'), match.start()) except UnicodeDecodeError: pass diff --git a/capa/features/extractors/viv/__init__.py b/capa/features/extractors/viv/__init__.py index b916000e..eb503ce0 100644 --- a/capa/features/extractors/viv/__init__.py +++ b/capa/features/extractors/viv/__init__.py @@ -13,7 +13,7 @@ import file import function import basicblock import insn -__all__ = ["file", "function", "basicblock", "insn"] +__all__ = ['file', 'function', 'basicblock', 'insn'] def get_va(self): diff --git a/capa/features/freeze.py b/capa/features/freeze.py index 393e4fb1..0499cf7a 100644 --- a/capa/features/freeze.py +++ b/capa/features/freeze.py @@ -237,17 +237,17 @@ def main(argv=None): ] format_help = ', '.join(['%s: %s' % (f[0], f[1]) for f in formats]) - parser = argparse.ArgumentParser(description="save capa features to a file") - parser.add_argument("sample", type=str, - help="Path to sample to analyze") - parser.add_argument("output", type=str, - help="Path to output file") - parser.add_argument("-v", "--verbose", action="store_true", - help="Enable verbose output") - parser.add_argument("-q", "--quiet", action="store_true", - help="Disable all output but errors") - parser.add_argument("-f", "--format", choices=[f[0] for f in formats], default="auto", - help="Select sample format, %s" % format_help) + parser = argparse.ArgumentParser(description='save capa features to a file') + parser.add_argument('sample', type=str, + help='Path to sample to analyze') + parser.add_argument('output', type=str, + help='Path to output file') + parser.add_argument('-v', '--verbose', action='store_true', + help='Enable verbose output') + parser.add_argument('-q', '--quiet', action='store_true', + help='Disable all output but errors') + parser.add_argument('-f', '--format', choices=[f[0] for f in formats], default='auto', + help='Select sample format, %s' % format_help) args = parser.parse_args(args=argv) if args.quiet: @@ -271,6 +271,6 @@ def main(argv=None): return 0 -if __name__ == "__main__": +if __name__ == '__main__': import sys sys.exit(main()) diff --git a/capa/ida/ida_capa_explorer.py b/capa/ida/ida_capa_explorer.py index 3c7f81a5..d40c7ab8 100644 --- a/capa/ida/ida_capa_explorer.py +++ b/capa/ida/ida_capa_explorer.py @@ -362,7 +362,7 @@ class CapaExplorerForm(idaapi.PluginForm): technique = parts[2].replace('-', ' ') techniques.add(technique) if len(parts) > 3: - raise capa.rules.InvalidRule(capa.main.RULE_CATEGORY + " tag must have at most three components") + raise capa.rules.InvalidRule(capa.main.RULE_CATEGORY + ' tag must have at most three components') # set row count to max set size self._view_summary.setRowCount(max(map(len, (rules, objectives, behaviors, techniques)))) diff --git a/capa/main.py b/capa/main.py index f040e646..bacb2741 100644 --- a/capa/main.py +++ b/capa/main.py @@ -221,7 +221,7 @@ def render_capabilities_default(ruleset, results): technique = parts[2].replace('-', ' ') techniques.add(technique) if len(parts) > 3: - raise capa.rules.InvalidRule(RULE_CATEGORY + " tag must have at most three components") + raise capa.rules.InvalidRule(RULE_CATEGORY + ' tag must have at most three components') if technique: o[objective][behavior][technique][rule.name] = rule @@ -349,7 +349,7 @@ def render_result(res, indent=''): if sum(map(lambda c: c.success, res.children)) > 0: print('%soptional:' % indent) else: - print("%s%d or more" % (indent, res.statement.count)) + print('%s%d or more' % (indent, res.statement.count)) elif not isinstance(res.statement, (capa.features.Feature, capa.engine.Element, capa.engine.Range, capa.engine.Regex)): # when rending a structural node (and/or/not), # then we only care about the node name. @@ -517,7 +517,7 @@ def get_rules(rule_path): with open(rule_path, 'rb') as f: rule = capa.rules.Rule.from_yaml(f.read().decode('utf-8')) - if is_nursery_rule_path(root): + if is_nursery_rule_path(rule_path): rule.meta['nursery'] = True rules.append(rule) @@ -637,35 +637,35 @@ def main(argv=None): try: extractor = get_extractor(args.sample, args.format) except UnsupportedFormatError: - logger.error("-" * 80) - logger.error(" Input file does not appear to be a PE file.") - logger.error(" ") - logger.error(" Today, capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64).") - logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.") - logger.error("-" * 80) + logger.error('-' * 80) + logger.error(' Input file does not appear to be a PE file.') + logger.error(' ') + logger.error(' Today, capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64).') + logger.error(' If you don\'t know the input file type, you can try using the `file` utility to guess it.') + logger.error('-' * 80) return -1 except UnsupportedRuntimeError: - logger.error("-" * 80) - logger.error(" Unsupported runtime or Python interpreter.") - logger.error(" ") - logger.error(" Today, capa supports running under Python 2.7 using Vivisect for binary analysis.") - logger.error(" It can also run within IDA Pro, using either Python 2.7 or 3.5+.") - logger.error(" ") - logger.error(" If you're seeing this message on the command line, please ensure you're running Python 2.7.") - logger.error("-" * 80) + logger.error('-' * 80) + logger.error(' Unsupported runtime or Python interpreter.') + logger.error(' ') + logger.error(' Today, capa supports running under Python 2.7 using Vivisect for binary analysis.') + logger.error(' It can also run within IDA Pro, using either Python 2.7 or 3.5+.') + logger.error(' ') + logger.error(' If you\'re seeing this message on the command line, please ensure you\'re running Python 2.7.') + logger.error('-' * 80) return -1 capabilities = find_capabilities(rules, extractor) if appears_rule_cat(rules, capabilities, 'other-features/installer/'): - logger.warning("-" * 80) - logger.warning(" This sample appears to be an installer.") - logger.warning(" ") - logger.warning(" capa cannot handle installers well. This means the results may be misleading or incomplete.") - logger.warning(" You should try to understand the install mechanism and analyze created files with capa.") - logger.warning(" ") - logger.warning(" Use -v or -vv if you really want to see the capabilities identified by capa.") - logger.warning("-" * 80) + logger.warning('-' * 80) + logger.warning(' This sample appears to be an installer.') + logger.warning(' ') + logger.warning(' capa cannot handle installers well. This means the results may be misleading or incomplete.') + logger.warning(' You should try to understand the install mechanism and analyze created files with capa.') + logger.warning(' ') + logger.warning(' Use -v or -vv if you really want to see the capabilities identified by capa.') + logger.warning('-' * 80) # capa will likely detect installer specific functionality. # this is probably not what the user wants. # @@ -674,16 +674,16 @@ def main(argv=None): return -1 if appears_rule_cat(rules, capabilities, 'other-features/compiled-to-dot-net'): - logger.warning("-" * 80) - logger.warning(" This sample appears to be a .NET module.") - logger.warning(" ") - logger.warning(" .NET is a cross-platform framework for running managed applications.") + logger.warning('-' * 80) + logger.warning(' This sample appears to be a .NET module.') + logger.warning(' ') + logger.warning(' .NET is a cross-platform framework for running managed applications.') logger.warning( - " Today, capa cannot handle non-native files. This means that the results may be misleading or incomplete.") - logger.warning(" You may have to analyze the file manually, using a tool like the .NET decompiler dnSpy.") - logger.warning(" ") - logger.warning(" Use -v or -vv if you really want to see the capabilities identified by capa.") - logger.warning("-" * 80) + ' Today, capa cannot handle non-native files. This means that the results may be misleading or incomplete.') + logger.warning(' You may have to analyze the file manually, using a tool like the .NET decompiler dnSpy.') + logger.warning(' ') + logger.warning(' Use -v or -vv if you really want to see the capabilities identified by capa.') + logger.warning('-' * 80) # capa won't detect much in .NET samples. # it might match some file-level things. # for consistency, bail on things that we don't support. @@ -693,16 +693,16 @@ def main(argv=None): return -1 if appears_rule_cat(rules, capabilities, 'other-features/compiled-with-autoit'): - logger.warning("-" * 80) - logger.warning(" This sample appears to be compiled with AutoIt.") - logger.warning(" ") - logger.warning(" AutoIt is a freeware BASIC-like scripting language designed for automating the Windows GUI.") + logger.warning('-' * 80) + logger.warning(' This sample appears to be compiled with AutoIt.') + logger.warning(' ') + logger.warning(' AutoIt is a freeware BASIC-like scripting language designed for automating the Windows GUI.') logger.warning( - " Today, capa cannot handle AutoIt scripts. This means that the results will be misleading or incomplete.") - logger.warning(" You may have to analyze the file manually, using a tool like the AutoIt decompiler MyAut2Exe.") - logger.warning(" ") - logger.warning(" Use -v or -vv if you really want to see the capabilities identified by capa.") - logger.warning("-" * 80) + ' Today, capa cannot handle AutoIt scripts. This means that the results will be misleading or incomplete.') + logger.warning(' You may have to analyze the file manually, using a tool like the AutoIt decompiler MyAut2Exe.') + logger.warning(' ') + logger.warning(' Use -v or -vv if you really want to see the capabilities identified by capa.') + logger.warning('-' * 80) # capa will detect dozens of capabilities for AutoIt samples, # but these are due to the AutoIt runtime, not the payload script. # so, don't confuse the user with FP matches - bail instead @@ -712,13 +712,13 @@ def main(argv=None): return -1 if appears_rule_cat(rules, capabilities, 'anti-analysis/packing/'): - logger.warning("-" * 80) - logger.warning(" This sample appears packed.") - logger.warning(" ") - logger.warning(" Packed samples have often been obfuscated to hide their logic.") - logger.warning(" capa cannot handle obfuscation well. This means the results may be misleading or incomplete.") - logger.warning(" If possible, you should try to unpack this input file before analyzing it with capa.") - logger.warning("-" * 80) + logger.warning('-' * 80) + logger.warning(' This sample appears packed.') + logger.warning(' ') + logger.warning(' Packed samples have often been obfuscated to hide their logic.') + logger.warning(' capa cannot handle obfuscation well. This means the results may be misleading or incomplete.') + logger.warning(' If possible, you should try to unpack this input file before analyzing it with capa.') + logger.warning('-' * 80) if args.vverbose: render_capabilities_vverbose(capabilities) @@ -770,7 +770,7 @@ def is_runtime_ida(): return True -if __name__ == "__main__": +if __name__ == '__main__': if is_runtime_ida(): ida_main() else: diff --git a/scripts/show-features.py b/scripts/show-features.py index 73bcf33a..da88f87c 100644 --- a/scripts/show-features.py +++ b/scripts/show-features.py @@ -28,13 +28,13 @@ def main(argv=None): ] format_help = ', '.join(['%s: %s' % (f[0], f[1]) for f in formats]) - parser = argparse.ArgumentParser(description="detect capabilities in programs.") - parser.add_argument("sample", type=str, - help="Path to sample to analyze") - parser.add_argument("-f", "--format", choices=[f[0] for f in formats], default="auto", - help="Select sample format, %s" % format_help) - parser.add_argument("-F", "--function", type=lambda x: int(x, 0), - help="Show features for specific function") + parser = argparse.ArgumentParser(description='detect capabilities in programs.') + parser.add_argument('sample', type=str, + help='Path to sample to analyze') + parser.add_argument('-f', '--format', choices=[f[0] for f in formats], default='auto', + help='Select sample format, %s' % format_help) + parser.add_argument('-F', '--function', type=lambda x: int(x, 0), + help='Show features for specific function') args = parser.parse_args(args=argv) logging.basicConfig(level=logging.INFO) @@ -77,5 +77,5 @@ def main(argv=None): return 0 -if __name__ == "__main__": +if __name__ == '__main__': sys.exit(main()) diff --git a/scripts/testbed/_dump_fnames.py b/scripts/testbed/_dump_fnames.py index f52fd113..7210f2dc 100644 --- a/scripts/testbed/_dump_fnames.py +++ b/scripts/testbed/_dump_fnames.py @@ -26,7 +26,7 @@ def main(): fnames = {} for f in idautils.Functions(): fname = idc.get_name(f) - if fname.startswith("sub_"): + if fname.startswith('sub_'): continue name_demangled = idc.demangle_name(fname, INF_SHORT_DN_ATTR) @@ -35,12 +35,12 @@ def main(): fnames[f] = fname - with open(idc.ARGV[1], "w") as f: + with open(idc.ARGV[1], 'w') as f: json.dump(fnames, f) # exit IDA idc.qexit(0) -if __name__ == "__main__": +if __name__ == '__main__': main() diff --git a/scripts/testbed/_export_fimages.py b/scripts/testbed/_export_fimages.py index 2052a6b3..8c4a7d4e 100644 --- a/scripts/testbed/_export_fimages.py +++ b/scripts/testbed/_export_fimages.py @@ -40,5 +40,5 @@ def main(): idc.qexit(0) -if __name__ == "__main__": +if __name__ == '__main__': main() diff --git a/scripts/testbed/freeze_features.py b/scripts/testbed/freeze_features.py index 689e885f..8c160559 100644 --- a/scripts/testbed/freeze_features.py +++ b/scripts/testbed/freeze_features.py @@ -66,15 +66,15 @@ def main(argv=None): if argv is None: argv = sys.argv[1:] - parser = argparse.ArgumentParser(description="Freeze capa features of a file or of files in a directory") - parser.add_argument("file_path", type=str, - help="Path to file or directory to analyze") - parser.add_argument("-r", "--reprocess", action="store_true", default=False, - help="Overwrite existing analysis") - parser.add_argument("-v", "--verbose", action="store_true", - help="Enable verbose output") - parser.add_argument("-q", "--quiet", action="store_true", - help="Disable all output but errors") + parser = argparse.ArgumentParser(description='Freeze capa features of a file or of files in a directory') + parser.add_argument('file_path', type=str, + help='Path to file or directory to analyze') + parser.add_argument('-r', '--reprocess', action='store_true', default=False, + help='Overwrite existing analysis') + parser.add_argument('-v', '--verbose', action='store_true', + help='Enable verbose output') + parser.add_argument('-q', '--quiet', action='store_true', + help='Disable all output but errors') args = parser.parse_args(args=argv) if args.quiet: @@ -98,5 +98,5 @@ def main(argv=None): return 0 -if __name__ == "__main__": +if __name__ == '__main__': sys.exit(main()) diff --git a/scripts/testbed/run_rule_on_testbed.py b/scripts/testbed/run_rule_on_testbed.py index a8247719..e8be9c2a 100644 --- a/scripts/testbed/run_rule_on_testbed.py +++ b/scripts/testbed/run_rule_on_testbed.py @@ -217,23 +217,23 @@ def main(argv=None): if argv is None: argv = sys.argv[1:] - parser = argparse.ArgumentParser(description="Run capa rule file against frozen features in a directory") - parser.add_argument("rules", type=str, - help="Path to directory containing rules") - parser.add_argument("rule_name", type=str, - help="Name of rule to test") - parser.add_argument("frozen_path", type=str, - help="Path to frozen feature file or directory") - parser.add_argument("-f", "--fast", action="store_true", - help="Don't test slow files") - parser.add_argument("-o", "--only_matching", action="store_true", - help="Print only if rule matches") - parser.add_argument("-s", "--save_image", action="store", - help="Directory to save exported images of function graphs") - parser.add_argument("-v", "--verbose", action="count", default=0, - help="Increase output verbosity") - parser.add_argument("-q", "--quiet", action="store_true", - help="Disable all output but errors") + parser = argparse.ArgumentParser(description='Run capa rule file against frozen features in a directory') + parser.add_argument('rules', type=str, + help='Path to directory containing rules') + parser.add_argument('rule_name', type=str, + help='Name of rule to test') + parser.add_argument('frozen_path', type=str, + help='Path to frozen feature file or directory') + parser.add_argument('-f', '--fast', action='store_true', + help='Don't test slow files') + parser.add_argument('-o', '--only_matching', action='store_true', + help='Print only if rule matches') + parser.add_argument('-s', '--save_image', action='store', + help='Directory to save exported images of function graphs') + parser.add_argument('-v', '--verbose', action='count', default=0, + help='Increase output verbosity') + parser.add_argument('-q', '--quiet', action='store_true', + help='Disable all output but errors') args = parser.parse_args(args=argv) if args.quiet: @@ -293,5 +293,5 @@ def main(argv=None): print_summary(args.verbose, time0) -if __name__ == "__main__": +if __name__ == '__main__': sys.exit(main()) diff --git a/scripts/testbed/start_ida_dump_fnames.py b/scripts/testbed/start_ida_dump_fnames.py index abeec75c..b812105c 100644 --- a/scripts/testbed/start_ida_dump_fnames.py +++ b/scripts/testbed/start_ida_dump_fnames.py @@ -85,13 +85,13 @@ def get_function_names(fnames_file): def main(): parser = argparse.ArgumentParser( - description="Launch IDA Pro in autonomous mode to dump function names of a file or of files in a directory") - parser.add_argument("file_path", type=str, - help="File or directory path to analyze") - parser.add_argument("-r", "--reprocess", action="store_true", default=False, - help="Overwrite existing analysis") - parser.add_argument("-v", "--verbose", action="store_true", - help="Enable verbose output") + description='Launch IDA Pro in autonomous mode to dump function names of a file or of files in a directory') + parser.add_argument('file_path', type=str, + help='File or directory path to analyze') + parser.add_argument('-r', '--reprocess', action='store_true', default=False, + help='Overwrite existing analysis') + parser.add_argument('-v', '--verbose', action='store_true', + help='Enable verbose output') args = parser.parse_args(args=sys.argv[1:]) if args.verbose: @@ -127,5 +127,5 @@ def main(): return 0 -if __name__ == "__main__": +if __name__ == '__main__': sys.exit(main()) diff --git a/scripts/testbed/start_ida_export_fimages.py b/scripts/testbed/start_ida_export_fimages.py index 53f48037..18b08bf6 100644 --- a/scripts/testbed/start_ida_export_fimages.py +++ b/scripts/testbed/start_ida_export_fimages.py @@ -101,17 +101,17 @@ def get_md5_hexdigest(sample_path): def main(): parser = argparse.ArgumentParser( - description="Launch IDA Pro in autonomous mode to export images of function graphs") - parser.add_argument("file_path", type=str, - help="File to export from") - parser.add_argument("out_dir", type=str, - help="Export target directory") - parser.add_argument("-f", "--functions", action="store", - help="Comma separated list of functions to export") - parser.add_argument("-m", "--manual", action="store_true", - help="Manual mode: show IDA dialog boxes") - parser.add_argument("-v", "--verbose", action="store_true", - help="Enable verbose output") + description='Launch IDA Pro in autonomous mode to export images of function graphs') + parser.add_argument('file_path', type=str, + help='File to export from') + parser.add_argument('out_dir', type=str, + help='Export target directory') + parser.add_argument('-f', '--functions', action='store', + help='Comma separated list of functions to export') + parser.add_argument('-m', '--manual', action='store_true', + help='Manual mode: show IDA dialog boxes') + parser.add_argument('-v', '--verbose', action='store_true', + help='Enable verbose output') args = parser.parse_args(args=sys.argv[1:]) if args.verbose: @@ -131,5 +131,5 @@ def main(): return 0 -if __name__ == "__main__": +if __name__ == '__main__': sys.exit(main()) diff --git a/setup.py b/setup.py index ae665a6e..d73a389a 100644 --- a/setup.py +++ b/setup.py @@ -5,26 +5,26 @@ import setuptools requirements = [ - "six", - "tqdm", - "pyyaml", - "tabulate", + 'six', + 'tqdm', + 'pyyaml', + 'tabulate', ] if sys.version_info >= (3, 0): # py3 - requirements.append("networkx") + requirements.append('networkx') else: # py2 - requirements.append("enum34") - requirements.append("vivisect") - requirements.append("viv-utils") - requirements.append("networkx==2.2") # v2.2 is last version supported by Python 2.7 + requirements.append('enum34') + requirements.append('vivisect') + requirements.append('viv-utils') + requirements.append('networkx==2.2') # v2.2 is last version supported by Python 2.7 # this sets __version__ # via: http://stackoverflow.com/a/7071358/87207 # and: http://stackoverflow.com/a/2073599/87207 -with open(os.path.join("capa", "version.py"), "rb") as f: +with open(os.path.join('capa', 'version.py'), 'rb') as f: exec(f.read()) @@ -35,17 +35,17 @@ def get_rule_paths(): setuptools.setup( name='capa', version=__version__, - description="", - long_description="", - author="Willi Ballenthin, Moritz Raabe", + description='', + long_description='', + author='Willi Ballenthin, Moritz Raabe', author_email='william.ballenthin@mandiant.com, moritz.raabe@mandiant.com', url='https://www.github.com/fireeye/capa', packages=setuptools.find_packages(exclude=['tests', 'testbed']), package_dir={'capa': 'capa'}, package_data={'capa': get_rule_paths()}, entry_points={ - "console_scripts": [ - "capa=capa.main:main", + 'console_scripts': [ + 'capa=capa.main:main', ] }, include_package_data=True, @@ -56,7 +56,7 @@ setuptools.setup( 'Development Status :: 3 - Alpha', 'Intended Audience :: Developers', 'Natural Language :: English', - "Programming Language :: Python :: 2", - "Programming Language :: Python :: 3", + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 3', ], ) diff --git a/tests/test_freeze.py b/tests/test_freeze.py index f7aff936..29c05fde 100644 --- a/tests/test_freeze.py +++ b/tests/test_freeze.py @@ -159,12 +159,12 @@ def test_serialize_features(): def test_freeze_sample(tmpdir, sample_9324d1a8ae37a36ae560c37448c9705a): # tmpdir fixture handles cleanup - o = tmpdir.mkdir("capa").join("test.frz").strpath + o = tmpdir.mkdir('capa').join('test.frz').strpath assert capa.features.freeze.main([sample_9324d1a8ae37a36ae560c37448c9705a.path, o, '-v']) == 0 def test_freeze_load_sample(tmpdir, sample_9324d1a8ae37a36ae560c37448c9705a): - o = tmpdir.mkdir("capa").join("test.frz") + o = tmpdir.mkdir('capa').join('test.frz') viv_extractor = capa.features.extractors.viv.VivisectFeatureExtractor(sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path) with open(o.strpath, 'wb') as f: diff --git a/tests/test_main.py b/tests/test_main.py index 6e4524c6..5c5640bc 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -15,6 +15,21 @@ def test_main(sample_9324d1a8ae37a36ae560c37448c9705a): assert capa.main.main([sample_9324d1a8ae37a36ae560c37448c9705a.path, '-v']) == 0 +def test_main_single_rule(sample_9324d1a8ae37a36ae560c37448c9705a, tmpdir): + # tests a single rule can be loaded successfully + RULE_CONTENT = textwrap.dedent(''' + rule: + meta: + name: test rule + scope: file + features: + - string: test + ''') + rule_file = tmpdir.mkdir('capa').join('rule.yml') + rule_file.write(RULE_CONTENT) + assert capa.main.main([sample_9324d1a8ae37a36ae560c37448c9705a.path, '-v', '-r', rule_file.strpath]) == 0 + + def test_main_shellcode(sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32): assert capa.main.main([sample_499c2a85f6e8142c3f48d4251c9c7cd6_raw32.path, '-v', '-f', 'sc32']) == 0 @@ -25,7 +40,7 @@ def test_ruleset(): rule: meta: name: file rule - scope: file + scope: file features: - characteristic(embedded pe): y ''')), @@ -33,7 +48,7 @@ def test_ruleset(): rule: meta: name: function rule - scope: function + scope: function features: - characteristic(switch): y ''')), diff --git a/tests/test_viv_features.py b/tests/test_viv_features.py index a76de67c..c773a7d2 100644 --- a/tests/test_viv_features.py +++ b/tests/test_viv_features.py @@ -82,14 +82,14 @@ def test_string_features(mimikatz): def test_byte_features(sample_9324d1a8ae37a36ae560c37448c9705a): features = extract_function_features(viv_utils.Function(sample_9324d1a8ae37a36ae560c37448c9705a.vw, 0x406F60)) - wanted = capa.features.Bytes(b"\xED\x24\x9E\xF4\x52\xA9\x07\x47\x55\x8E\xE1\xAB\x30\x8E\x23\x61") + wanted = capa.features.Bytes(b'\xED\x24\x9E\xF4\x52\xA9\x07\x47\x55\x8E\xE1\xAB\x30\x8E\x23\x61') # use `==` rather than `is` because the result is not `True` but a truthy value. assert wanted.evaluate(features) == True def test_byte_features64(sample_lab21_01): features = extract_function_features(viv_utils.Function(sample_lab21_01.vw, 0x1400010C0)) - wanted = capa.features.Bytes(b"\x32\xA2\xDF\x2D\x99\x2B\x00\x00") + wanted = capa.features.Bytes(b'\x32\xA2\xDF\x2D\x99\x2B\x00\x00') # use `==` rather than `is` because the result is not `True` but a truthy value. assert wanted.evaluate(features) == True From bd92ded34428f3f3e2bfeefe5c25cf8fbea64958 Mon Sep 17 00:00:00 2001 From: Moritz Date: Wed, 24 Jun 2020 16:24:34 +0200 Subject: [PATCH 11/18] remove six requirement --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index d73a389a..79c79ba0 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,6 @@ import setuptools requirements = [ - 'six', 'tqdm', 'pyyaml', 'tabulate', From 7e1e9e66188d9598023266a4510c08a8e776f06d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ana=20Mar=C3=ADa=20Mart=C3=ADnez=20G=C3=B3mez?= Date: Wed, 24 Jun 2020 12:22:32 +0200 Subject: [PATCH 12/18] Get rid of the Element class The `Element` class is just used for testing. By using `Element` we are not testing the actual code. Also, every time we implement a new feature for the `Feature` class, we need to implement it for `Element` as well. Replace `Element` by `Integer`. --- capa/engine.py | 16 ------ capa/ida/explorer/model.py | 2 +- capa/main.py | 2 +- capa/rules.py | 8 --- tests/test_engine.py | 109 ++++++++++++++++++------------------- tests/test_rules.py | 63 +++++++++++---------- 6 files changed, 87 insertions(+), 113 deletions(-) diff --git a/capa/engine.py b/capa/engine.py index 25da62ed..99512eb2 100644 --- a/capa/engine.py +++ b/capa/engine.py @@ -145,22 +145,6 @@ class Some(Statement): return Result(success, self, results) -class Element(Statement): - '''match if the child is contained in the ctx set.''' - def __init__(self, child): - super(Element, self).__init__() - self.child = child - - def __hash__(self): - return hash((self.name, self.child)) - - def __eq__(self, other): - return self.name == other.name and self.child == other.child - - def evaluate(self, ctx): - return Result(self.child in ctx, self, []) - - class Range(Statement): '''match if the child is contained in the ctx set with a count in the given range.''' def __init__(self, child, min=None, max=None): diff --git a/capa/ida/explorer/model.py b/capa/ida/explorer/model.py index 38dd6d96..ba472644 100644 --- a/capa/ida/explorer/model.py +++ b/capa/ida/explorer/model.py @@ -322,7 +322,7 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel): parent2 = parent else: parent2 = CapaExplorerDefaultItem(parent, '%d or more' % result.statement.count) - elif not isinstance(result.statement, (capa.features.Feature, capa.engine.Element, capa.engine.Range, capa.engine.Regex)): + elif not isinstance(result.statement, (capa.features.Feature, capa.engine.Range, capa.engine.Regex)): # when rending a structural node (and/or/not) then we only care about the node name. ''' succs = list(filter(lambda c: bool(c), result.children)) diff --git a/capa/main.py b/capa/main.py index 840425a0..662eebd0 100644 --- a/capa/main.py +++ b/capa/main.py @@ -350,7 +350,7 @@ def render_result(res, indent=''): print('%soptional:' % indent) else: print('%s%d or more' % (indent, res.statement.count)) - elif not isinstance(res.statement, (capa.features.Feature, capa.engine.Element, capa.engine.Range, capa.engine.Regex)): + elif not isinstance(res.statement, (capa.features.Feature, capa.engine.Range, capa.engine.Regex)): # when rending a structural node (and/or/not), # then we only care about the node name. # diff --git a/capa/rules.py b/capa/rules.py index fe729838..b880d328 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -24,7 +24,6 @@ BASIC_BLOCK_SCOPE = 'basic block' SUPPORTED_FEATURES = { FILE_SCOPE: set([ - capa.engine.Element, capa.features.MatchedRule, capa.features.file.Export, capa.features.file.Import, @@ -33,7 +32,6 @@ SUPPORTED_FEATURES = { capa.features.String, ]), FUNCTION_SCOPE: set([ - capa.engine.Element, capa.features.MatchedRule, capa.features.insn.API, capa.features.insn.Number, @@ -56,7 +54,6 @@ SUPPORTED_FEATURES = { capa.features.Characteristic('recursive call') ]), BASIC_BLOCK_SCOPE: set([ - capa.engine.Element, capa.features.MatchedRule, capa.features.insn.API, capa.features.insn.Number, @@ -180,8 +177,6 @@ def parse_feature(key): return capa.features.insn.Mnemonic elif key == 'basic blocks': return capa.features.basicblock.BasicBlock - elif key == 'element': - return Element elif key.startswith('characteristic(') and key.endswith(')'): characteristic = key[len('characteristic('):-len(')')] return lambda v: capa.features.Characteristic(characteristic, v) @@ -311,9 +306,6 @@ def build_statements(d, scope): if term in ('number', 'offset', 'bytes'): value, symbol = parse_symbol(arg, term) feature = Feature(value, symbol) - elif term in ('element'): - arg = parse_int(arg) - feature = Feature(arg) else: # arg is string, like: # diff --git a/tests/test_engine.py b/tests/test_engine.py index d56e3da6..426ac742 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -3,96 +3,95 @@ import textwrap import capa.rules import capa.engine from capa.engine import * -import capa.features +from capa.features import * +from capa.features.insn import * -def test_element(): - assert Element(1).evaluate(set([0])) == False - assert Element(1).evaluate(set([1])) == True - assert Element(1).evaluate(set([None])) == False - assert Element(1).evaluate(set([''])) == False - assert Element(1).evaluate(set([False])) == False +def test_number(): + assert Number(1).evaluate({Number(0): {1}}) == False + assert Number(1).evaluate({Number(1): {1}}) == True + assert Number(1).evaluate({Number(2): {1, 2}}) == False def test_and(): - assert And(Element(1)).evaluate(set([0])) == False - assert And(Element(1)).evaluate(set([1])) == True - assert And(Element(1), Element(2)).evaluate(set([0])) == False - assert And(Element(1), Element(2)).evaluate(set([1])) == False - assert And(Element(1), Element(2)).evaluate(set([2])) == False - assert And(Element(1), Element(2)).evaluate(set([1, 2])) == True + assert And(Number(1)).evaluate({Number(0): {1}}) == False + assert And(Number(1)).evaluate({Number(1): {1}}) == True + assert And(Number(1), Number(2)).evaluate({Number(0): {1}}) == False + assert And(Number(1), Number(2)).evaluate({Number(1): {1}}) == False + assert And(Number(1), Number(2)).evaluate({Number(2): {1}}) == False + assert And(Number(1), Number(2)).evaluate({Number(1): {1}, Number(2): {2}}) == True def test_or(): - assert Or(Element(1)).evaluate(set([0])) == False - assert Or(Element(1)).evaluate(set([1])) == True - assert Or(Element(1), Element(2)).evaluate(set([0])) == False - assert Or(Element(1), Element(2)).evaluate(set([1])) == True - assert Or(Element(1), Element(2)).evaluate(set([2])) == True - assert Or(Element(1), Element(2)).evaluate(set([1, 2])) == True + assert Or(Number(1)).evaluate({Number(0): {1}}) == False + assert Or(Number(1)).evaluate({Number(1): {1}}) == True + assert Or(Number(1), Number(2)).evaluate({Number(0): {1}}) == False + assert Or(Number(1), Number(2)).evaluate({Number(1): {1}}) == True + assert Or(Number(1), Number(2)).evaluate({Number(2): {1}}) == True + assert Or(Number(1), Number(2)).evaluate({Number(1): {1}, Number(2): {2}}) == True def test_not(): - assert Not(Element(1)).evaluate(set([0])) == True - assert Not(Element(1)).evaluate(set([1])) == False + assert Not(Number(1)).evaluate({Number(0): {1}}) == True + assert Not(Number(1)).evaluate({Number(1): {1}}) == False def test_some(): - assert Some(0, Element(1)).evaluate(set([0])) == True - assert Some(1, Element(1)).evaluate(set([0])) == False + assert Some(0, Number(1)).evaluate({Number(0): {1}}) == True + assert Some(1, Number(1)).evaluate({Number(0): {1}}) == False - assert Some(2, Element(1), Element(2), Element(3)).evaluate(set([0])) == False - assert Some(2, Element(1), Element(2), Element(3)).evaluate(set([0, 1])) == False - assert Some(2, Element(1), Element(2), Element(3)).evaluate(set([0, 1, 2])) == True - assert Some(2, Element(1), Element(2), Element(3)).evaluate(set([0, 1, 2, 3])) == True - assert Some(2, Element(1), Element(2), Element(3)).evaluate(set([0, 1, 2, 3, 4])) == True + assert Some(2, Number(1), Number(2), Number(3)).evaluate({Number(0): {1}}) == False + assert Some(2, Number(1), Number(2), Number(3)).evaluate({Number(0): {1}, Number(1): {1}}) == False + assert Some(2, Number(1), Number(2), Number(3)).evaluate({Number(0): {1}, Number(1): {1}, Number(2): {1}}) == True + assert Some(2, Number(1), Number(2), Number(3)).evaluate({Number(0): {1}, Number(1): {1}, Number(2): {1}, Number(3): {1}}) == True + assert Some(2, Number(1), Number(2), Number(3)).evaluate({Number(0): {1}, Number(1): {1}, Number(2): {1}, Number(3): {1}, Number(4): {1}}) == True def test_complex(): assert True == Or( - And(Element(1), Element(2)), - Or(Element(3), - Some(2, Element(4), Element(5), Element(6))) - ).evaluate(set([5, 6, 7, 8])) + And(Number(1), Number(2)), + Or(Number(3), + Some(2, Number(4), Number(5), Number(6))) + ).evaluate({Number(5): {1}, Number(6): {1}, Number(7): {1}, Number(8): {1}}) assert False == Or( - And(Element(1), Element(2)), - Or(Element(3), - Some(2, Element(4), Element(5))) - ).evaluate(set([5, 6, 7, 8])) + And(Number(1), Number(2)), + Or(Number(3), + Some(2, Number(4), Number(5))) + ).evaluate({Number(5): {1}, Number(6): {1}, Number(7): {1}, Number(8): {1}}) def test_range(): # unbounded range, but no matching feature - assert Range(Element(1)).evaluate({Element(2): {}}) == False + assert Range(Number(1)).evaluate({Number(2): {}}) == False # unbounded range with matching feature should always match - assert Range(Element(1)).evaluate({Element(1): {}}) == True - assert Range(Element(1)).evaluate({Element(1): {0}}) == True + assert Range(Number(1)).evaluate({Number(1): {}}) == True + assert Range(Number(1)).evaluate({Number(1): {0}}) == True # unbounded max - assert Range(Element(1), min=1).evaluate({Element(1): {0}}) == True - assert Range(Element(1), min=2).evaluate({Element(1): {0}}) == False - assert Range(Element(1), min=2).evaluate({Element(1): {0, 1}}) == True + assert Range(Number(1), min=1).evaluate({Number(1): {0}}) == True + assert Range(Number(1), min=2).evaluate({Number(1): {0}}) == False + assert Range(Number(1), min=2).evaluate({Number(1): {0, 1}}) == True # unbounded min - assert Range(Element(1), max=0).evaluate({Element(1): {0}}) == False - assert Range(Element(1), max=1).evaluate({Element(1): {0}}) == True - assert Range(Element(1), max=2).evaluate({Element(1): {0}}) == True - assert Range(Element(1), max=2).evaluate({Element(1): {0, 1}}) == True - assert Range(Element(1), max=2).evaluate({Element(1): {0, 1, 3}}) == False + assert Range(Number(1), max=0).evaluate({Number(1): {0}}) == False + assert Range(Number(1), max=1).evaluate({Number(1): {0}}) == True + assert Range(Number(1), max=2).evaluate({Number(1): {0}}) == True + assert Range(Number(1), max=2).evaluate({Number(1): {0, 1}}) == True + assert Range(Number(1), max=2).evaluate({Number(1): {0, 1, 3}}) == False # we can do an exact match by setting min==max - assert Range(Element(1), min=1, max=1).evaluate({Element(1): {}}) == False - assert Range(Element(1), min=1, max=1).evaluate({Element(1): {1}}) == True - assert Range(Element(1), min=1, max=1).evaluate({Element(1): {1, 2}}) == False + assert Range(Number(1), min=1, max=1).evaluate({Number(1): {}}) == False + assert Range(Number(1), min=1, max=1).evaluate({Number(1): {1}}) == True + assert Range(Number(1), min=1, max=1).evaluate({Number(1): {1, 2}}) == False # bounded range - assert Range(Element(1), min=1, max=3).evaluate({Element(1): {}}) == False - assert Range(Element(1), min=1, max=3).evaluate({Element(1): {1}}) == True - assert Range(Element(1), min=1, max=3).evaluate({Element(1): {1, 2}}) == True - assert Range(Element(1), min=1, max=3).evaluate({Element(1): {1, 2, 3}}) == True - assert Range(Element(1), min=1, max=3).evaluate({Element(1): {1, 2, 3, 4}}) == False + assert Range(Number(1), min=1, max=3).evaluate({Number(1): {}}) == False + assert Range(Number(1), min=1, max=3).evaluate({Number(1): {1}}) == True + assert Range(Number(1), min=1, max=3).evaluate({Number(1): {1, 2}}) == True + assert Range(Number(1), min=1, max=3).evaluate({Number(1): {1, 2, 3}}) == True + assert Range(Number(1), min=1, max=3).evaluate({Number(1): {1, 2, 3, 4}}) == False def test_match_adds_matched_rule_feature(): diff --git a/tests/test_rules.py b/tests/test_rules.py index bac921f6..e67ff780 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -3,14 +3,13 @@ import textwrap import pytest import capa.rules -from capa.engine import Element from capa.features.insn import Number, Offset def test_rule_ctor(): - r = capa.rules.Rule('test rule', capa.rules.FUNCTION_SCOPE, Element(1), {}) - assert r.evaluate(set([0])) == False - assert r.evaluate(set([1])) == True + r = capa.rules.Rule('test rule', capa.rules.FUNCTION_SCOPE, Number(1), {}) + assert r.evaluate({Number(0): {1}}) == False + assert r.evaluate({Number(1): {1}}) == True def test_rule_yaml(): @@ -25,14 +24,14 @@ def test_rule_yaml(): - bar5678 features: - and: - - element: 1 - - element: 2 + - number: 1 + - number: 2 ''') r = capa.rules.Rule.from_yaml(rule) - assert r.evaluate(set([0])) == False - assert r.evaluate(set([0, 1])) == False - assert r.evaluate(set([0, 1, 2])) == True - assert r.evaluate(set([0, 1, 2, 3])) == True + assert r.evaluate({Number(0): {1}}) == False + assert r.evaluate({Number(0): {1}, Number(1): {1}}) == False + assert r.evaluate({Number(0): {1}, Number(1): {1}, Number(2): {1}}) == True + assert r.evaluate({Number(0): {1}, Number(1): {1}, Number(2): {1}, Number(3): {1}}) == True def test_rule_yaml_complex(): @@ -43,18 +42,18 @@ def test_rule_yaml_complex(): features: - or: - and: - - element: 1 - - element: 2 + - number: 1 + - number: 2 - or: - - element: 3 + - number: 3 - 2 or more: - - element: 4 - - element: 5 - - element: 6 + - number: 4 + - number: 5 + - number: 6 ''') r = capa.rules.Rule.from_yaml(rule) - assert r.evaluate(set([5, 6, 7, 8])) == True - assert r.evaluate(set([6, 7, 8])) == False + assert r.evaluate({Number(5): {1}, Number(6): {1}, Number(7): {1}, Number(8): {1}}) == True + assert r.evaluate({Number(6): {1}, Number(7): {1}, Number(8): {1}}) == False def test_rule_yaml_not(): @@ -64,13 +63,13 @@ def test_rule_yaml_not(): name: test rule features: - and: - - element: 1 + - number: 1 - not: - - element: 2 + - number: 2 ''') r = capa.rules.Rule.from_yaml(rule) - assert r.evaluate(set([1])) == True - assert r.evaluate(set([1, 2])) == False + assert r.evaluate({Number(1): {1}}) == True + assert r.evaluate({Number(1): {1}, Number(2): {1}}) == False def test_rule_yaml_count(): @@ -79,12 +78,12 @@ def test_rule_yaml_count(): meta: name: test rule features: - - count(element(100)): 1 + - count(number(100)): 1 ''') r = capa.rules.Rule.from_yaml(rule) - assert r.evaluate({Element(100): {}}) == False - assert r.evaluate({Element(100): {1}}) == True - assert r.evaluate({Element(100): {1, 2}}) == False + assert r.evaluate({Number(100): {}}) == False + assert r.evaluate({Number(100): {1}}) == True + assert r.evaluate({Number(100): {1, 2}}) == False def test_rule_yaml_count_range(): @@ -93,13 +92,13 @@ def test_rule_yaml_count_range(): meta: name: test rule features: - - count(element(100)): (1, 2) + - count(number(100)): (1, 2) ''') r = capa.rules.Rule.from_yaml(rule) - assert r.evaluate({Element(100): {}}) == False - assert r.evaluate({Element(100): {1}}) == True - assert r.evaluate({Element(100): {1, 2}}) == True - assert r.evaluate({Element(100): {1, 2, 3}}) == False + assert r.evaluate({Number(100): {}}) == False + assert r.evaluate({Number(100): {1}}) == True + assert r.evaluate({Number(100): {1, 2}}) == True + assert r.evaluate({Number(100): {1, 2, 3}}) == False def test_invalid_rule_feature(): @@ -239,7 +238,7 @@ def test_invalid_rules(): meta: name: test rule features: - - characteristic(count(element(100))): True + - characteristic(count(number(100))): True ''')) From b505197af1f4ec0e5eef5842bf7ad615539ad448 Mon Sep 17 00:00:00 2001 From: Michael Hunhoff Date: Wed, 24 Jun 2020 15:03:21 -0600 Subject: [PATCH 13/18] documentation changes --- README.md | 12 ++++++------ doc/installation.md | 10 +++++----- doc/usage.md | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 3da7582a..f3fb3d16 100644 --- a/README.md +++ b/README.md @@ -153,7 +153,7 @@ Here's an example rule used by capa: Rules are yaml files that follow a certain schema. -The top level element is a dictionary named `rule` with two required children dictionaries: +The top-level element is a dictionary named `rule` with two required children dictionaries: `meta` and `features`. @@ -168,7 +168,7 @@ Here are the common fields: - `rule-category` is required when a rule describes a behavior (as opposed to matching a role or disposition). The rule category specifies an objective, behavior, and technique matched by this rule, using a format like `$objective/$behavior/$technique`. -An objective is a high level goal of a program, such as "communication". +An objective is a high-level goal of a program, such as "communication". A behavior is something that a program may do, such as "communication via socket". A technique is a way of implementing some behavior, such as "send-data". @@ -177,7 +177,7 @@ A technique is a way of implementing some behavior, such as "send-data". - `maec/analysis-conclusion` is required when the rule describes a disposition, such as `benign` or `malicious`. - `scope` indicates to which feature set this rule applies. - It can takes the following values: + It can take the following values: - **`basic block`:** limits matches to a basic block. It is used to achieve locality in rules (for example for parameters of a function). - **`function`:** identify functions. @@ -253,7 +253,7 @@ though possibly a local function (like `malloc`) extracted via FLIRT. The parameter is a string describing the function name, specified like `module.functionname` or `functionname`. -Windows API functions that take string arguments come in two API versions. For example `CreateProcessA` takes ANSI strings and `CreateProcessW` takes Unicode strings. capa extracts these API features both with and without the suffix character `A` or `W`. That means you can write a rule to match on both APIs using the base name. If you want to match a specific API version, you can include the suffix. +Windows API functions that take string arguments come in two API versions. For example, `CreateProcessA` takes ANSI strings and `CreateProcessW` takes Unicode strings. capa extracts these API features both with and without the suffix character `A` or `W`. That means you can write a rule to match on both APIs using the base name. If you want to match a specific API version, you can include the suffix. Example: @@ -350,7 +350,7 @@ Characteristics are features that are extracted by the analysis engine. They are one-off features that seem interesting to the authors. For example, the `characteristic(nzxor)` feature describes non-zeroing XOR instructions. -captdet does not support instruction pattern matching, +capa does not support instruction pattern matching, so a select set of interesting instructions are pulled out as characteristics. | characteristic | scope | description | @@ -449,7 +449,7 @@ You can specify a rule match expression like so: - match: process creation Rules are uniquely identified by their `rule.meta.name` property; -this is the value that should appear on the right hand side of the `match` expression. +this is the value that should appear on the right-hand side of the `match` expression. capa will refuse to run if a rule dependency is not present during matching. diff --git a/doc/installation.md b/doc/installation.md index e7fd76ff..403574d5 100644 --- a/doc/installation.md +++ b/doc/installation.md @@ -1,8 +1,8 @@ # Installation -You can install capa in a few different ways. First, if you simply want to use capa, just download the [standalone binary](https://github.com/fireeye/capa/releases). If you want to use capa as a Python library, you can install the package directly from Github using `pip`. If you'd like to contribute patches or features to capa, you can work with a local copy of the source code. +You can install capa in a few different ways. First, if you simply want to use capa, just download the [standalone binary](https://github.com/fireeye/capa/releases). If you want to use capa as a Python library, you can install the package directly from GitHub using `pip`. If you'd like to contribute patches or features to capa, you can work with a local copy of the source code. ## Method 1: Standalone installation -If you simply want to use capa, use the standalone binaries we host on Github: https://github.com/fireeye/capa/releases. These binary executable files contain all the source code, Python interpreter, and associated resources needed to make capa run. This means you can run it without any installation! Just invoke the file using your terminal shell to see the help documentation. +If you simply want to use capa, use the standalone binaries we host on GitHub: https://github.com/fireeye/capa/releases. These binary executable files contain all the source code, Python interpreter, and associated resources needed to make capa run. This means you can run it without any installation! Just invoke the file using your terminal shell to see the help documentation. We used PyInstaller to create these packages. @@ -14,14 +14,14 @@ First, install the requirements. `$ pip install https://github.com/williballenthin/vivisect/zipball/master` ### 2. Install capa module -Second, use `pip` to install the capa module to your local Python environment. This fetches the library code to your computer, but does not keep editable source files around for you to hack on. If you'd like to edit the source files, see below. +Second, use `pip` to install the capa module to your local Python environment. This fetches the library code to your computer but does not keep editable source files around for you to hack on. If you'd like to edit the source files, see below. `$ pip install https://github.com/fireeye/capa/archive/master.zip` ### 3. Use capa You can now import the `capa` module from a Python script or use the IDA Pro plugins from the `capa/ida` directory. For more information please see the [usage](usage.md) documentation. ## Method 3: Inspecting the capa source code -If you'd like to review and modify the capa source code, you'll need to check it out from Github and install it locally. By following these instructions, you'll maintain a local directory of source code that you can modify and run easily. +If you'd like to review and modify the capa source code, you'll need to check it out from GitHub and install it locally. By following these instructions, you'll maintain a local directory of source code that you can modify and run easily. ### 1. Install requirements First, install the requirements. @@ -53,4 +53,4 @@ If you plan to contribute to capa, you may want to setup the hooks. Run `scripts/setup-hooks.sh` to set the following hooks up: - The `post-commit` hook runs the linter after every `git commit`, letting you know if there are code style or rule linter offenses you need to fix. - The `pre-push` hook runs the linter and the tests and block the `git push` if they do not succeed. - This way you realise if everything is alright without the need of sending a PR. + This way you realize if everything is alright without the need of sending a PR. diff --git a/doc/usage.md b/doc/usage.md index e48295d5..dfd6f06d 100644 --- a/doc/usage.md +++ b/doc/usage.md @@ -21,7 +21,7 @@ capa runs from within IDA Pro. Run `capa/main.py` via File - Script file... (ALT When running in IDA, capa uses IDA's disassembly and file analysis as its backend. These results may vary from the standalone version that uses vivisect. -In IDA, capa supports Python 2 and Python 3. If you encounter issues with your specific setup please open a new [Issue](https://github.com/fireeye/capa/issues). +In IDA, capa supports Python 2 and Python 3. If you encounter issues with your specific setup, please open a new [Issue](https://github.com/fireeye/capa/issues). ## IDA Pro plugins capa comes with two IDA Pro plugins located in the `capa/ida` directory. From ee3a688e1bdefc42e3d75ba3d16abfbadfca2d52 Mon Sep 17 00:00:00 2001 From: Moritz Raabe Date: Thu, 25 Jun 2020 10:10:58 +0200 Subject: [PATCH 14/18] update rules --- rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules b/rules index d34acf7b..4b8de632 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit d34acf7bbd5a2132740bcce00703f657a39dfcdd +Subproject commit 4b8de6324ecc4bf971e9c9ec20d183021140fce0 From 2db385dd8003271dc467664ecfff5b1e866640d1 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 21 Jun 2020 15:47:29 -0600 Subject: [PATCH 15/18] rule: fmt: avoid word wrapping lines --- capa/rules.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/capa/rules.py b/capa/rules.py index 0409ca31..528961e6 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -384,6 +384,9 @@ yaml.default_flow_style = False # - mnemonic: vaesdec yaml.indent(sequence=2, offset=2) +# avoid word wrapping +yaml.width = 4096 + class Rule(object): def __init__(self, name, scope, statement, meta, definition=''): From b23ff0358b453664eb8dd8da9a612c6386c2343a Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 21 Jun 2020 15:48:06 -0600 Subject: [PATCH 16/18] rules: pep8 --- capa/rules.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/capa/rules.py b/capa/rules.py index 528961e6..62b935e2 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -30,15 +30,15 @@ BASIC_BLOCK_SCOPE = 'basic block' SUPPORTED_FEATURES = { - FILE_SCOPE: set([ + FILE_SCOPE: { capa.features.MatchedRule, capa.features.file.Export, capa.features.file.Import, capa.features.file.Section, capa.features.Characteristic('embedded pe'), capa.features.String, - ]), - FUNCTION_SCOPE: set([ + }, + FUNCTION_SCOPE: { capa.features.MatchedRule, capa.features.insn.API, capa.features.insn.Number, @@ -59,8 +59,8 @@ SUPPORTED_FEATURES = { capa.features.Characteristic('indirect call'), capa.features.Characteristic('loop'), capa.features.Characteristic('recursive call') - ]), - BASIC_BLOCK_SCOPE: set([ + }, + BASIC_BLOCK_SCOPE: { capa.features.MatchedRule, capa.features.insn.API, capa.features.insn.Number, @@ -76,7 +76,7 @@ SUPPORTED_FEATURES = { capa.features.Characteristic('tight loop'), capa.features.Characteristic('stack string'), capa.features.Characteristic('indirect call') - ]), + }, } From b3ca9f0daff12fa4a3bef3d335dc49a787db84e4 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 21 Jun 2020 15:51:11 -0600 Subject: [PATCH 17/18] rule: fmt: add trailing newline to capa files --- capa/rules.py | 2 +- scripts/capafmt.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/capa/rules.py b/capa/rules.py index 62b935e2..bf21bf20 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -561,7 +561,7 @@ class Rule(object): ostream = six.BytesIO() yaml.dump(definition, ostream) - return ostream.getvalue().decode('utf-8').rstrip("\n") + return ostream.getvalue().decode('utf-8').rstrip("\n") + "\n" def get_rules_with_scope(rules, scope): diff --git a/scripts/capafmt.py b/scripts/capafmt.py index 0167aa21..a8d049eb 100644 --- a/scripts/capafmt.py +++ b/scripts/capafmt.py @@ -47,7 +47,7 @@ def main(argv=None): with open(args.path, 'wb') as f: f.write(rule.to_yaml().encode('utf-8')) else: - print(rule.to_yaml()) + print(rule.to_yaml().rstrip("\n")) return 0 From c8eee01f31bf332bd5129f915bac6622bc8a65ad Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 21 Jun 2020 16:43:24 -0600 Subject: [PATCH 18/18] rules: fmt: add additional meta field ordering --- capa/rules.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/rules.py b/capa/rules.py index bf21bf20..6b53b038 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -21,7 +21,7 @@ logger = logging.getLogger(__name__) # these are the standard metadata fields, in the preferred order. # when reformatted, any custom keys will come after these. -META_KEYS = ("name", "namespace", "rule-category", "author", "scope", "att&ck", "mbc", "examples") +META_KEYS = ("name", "namespace", "rule-category", "author", "description", "lib", "scope", "att&ck", "mbc", "references", "examples") FILE_SCOPE = 'file'