diff --git a/scripts/capafmt.py b/scripts/capafmt.py index a8d049eb..b808145d 100644 --- a/scripts/capafmt.py +++ b/scripts/capafmt.py @@ -1,11 +1,11 @@ -''' +""" Reformat the given capa rule into a consistent style. Use the -i flag to update the rule in-place. Usage: $ python capafmt.py -i foo.yml -''' +""" import sys import logging @@ -14,22 +14,24 @@ import argparse import capa.rules -logger = logging.getLogger('capafmt') +logger = logging.getLogger("capafmt") def main(argv=None): if argv is None: argv = sys.argv[1:] - parser = argparse.ArgumentParser(description='Capa rule formatter.') - parser.add_argument('path', type=str, - help='Path to rule to format') - parser.add_argument('-i', '--in-place', action='store_true', dest='in_place', - help='Format the rule in place, otherwise, write formatted rule to STDOUT') - parser.add_argument('-v', '--verbose', action='store_true', - help='Enable debug logging') - parser.add_argument('-q', '--quiet', action='store_true', - help='Disable all output but errors') + parser = argparse.ArgumentParser(description="Capa rule formatter.") + parser.add_argument("path", type=str, help="Path to rule to format") + parser.add_argument( + "-i", + "--in-place", + action="store_true", + dest="in_place", + help="Format the rule in place, otherwise, write formatted rule to STDOUT", + ) + parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging") + parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors") args = parser.parse_args(args=argv) if args.verbose: @@ -40,17 +42,17 @@ def main(argv=None): level = logging.INFO logging.basicConfig(level=level) - logging.getLogger('capafmt').setLevel(level) + logging.getLogger("capafmt").setLevel(level) rule = capa.rules.Rule.from_yaml_file(args.path) if args.in_place: - with open(args.path, 'wb') as f: - f.write(rule.to_yaml().encode('utf-8')) + with open(args.path, "wb") as f: + f.write(rule.to_yaml().encode("utf-8")) else: print(rule.to_yaml().rstrip("\n")) return 0 -if __name__ == '__main__': - sys.exit(main()) \ No newline at end of file +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/lint.py b/scripts/lint.py index cbbfcb71..1a1aee9f 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -1,10 +1,10 @@ -''' +""" Check the given capa rules for style issues. Usage: $ python scripts/lint.py rules/ -''' +""" import os import os.path import sys @@ -22,41 +22,40 @@ import capa.engine import capa.features import capa.features.insn -logger = logging.getLogger('capa.lint') +logger = logging.getLogger("capa.lint") class Lint(object): - name = 'lint' - recommendation = '' + name = "lint" + recommendation = "" def check_rule(self, ctx, rule): return False class NameCasing(Lint): - name = 'rule name casing' - recommendation = 'Rename rule using to start with lower case letters' + name = "rule name casing" + recommendation = "Rename rule using to start with lower case letters" def check_rule(self, ctx, rule): - return (rule.name[0] in string.ascii_uppercase and - rule.name[1] not in string.ascii_uppercase) + return rule.name[0] in string.ascii_uppercase and rule.name[1] not in string.ascii_uppercase class FilenameDoesntMatchRuleName(Lint): - name = 'filename doesn\'t match the rule name' + name = "filename doesn't match the rule name" recommendation = 'Rename rule file to match the rule name, expected: "{:s}", found: "{:s}"' def check_rule(self, ctx, rule): expected = rule.name expected = expected.lower() - expected = expected.replace(' ', '-') - expected = expected.replace('(', '') - expected = expected.replace(')', '') - expected = expected.replace('+', '') - expected = expected.replace('/', '') - expected = expected + '.yml' + expected = expected.replace(" ", "-") + expected = expected.replace("(", "") + expected = expected.replace(")", "") + expected = expected.replace("+", "") + expected = expected.replace("/", "") + expected = expected + ".yml" - found = os.path.basename(rule.meta['capa/path']) + found = os.path.basename(rule.meta["capa/path"]) self.recommendation = self.recommendation.format(expected, found) @@ -64,95 +63,99 @@ class FilenameDoesntMatchRuleName(Lint): class MissingNamespace(Lint): - name = 'missing rule namespace' - recommendation = 'Add meta.namespace so that the rule is emitted correctly' + name = "missing rule namespace" + recommendation = "Add meta.namespace so that the rule is emitted correctly" def check_rule(self, ctx, rule): - return ('namespace' not in rule.meta and - not is_nursery_rule(rule) and - 'maec/malware-category' not in rule.meta and - 'lib' not in rule.meta) + return ( + "namespace" not in rule.meta + and not is_nursery_rule(rule) + and "maec/malware-category" not in rule.meta + and "lib" not in rule.meta + ) class NamespaceDoesntMatchRulePath(Lint): - name = 'file path doesn\'t match rule namespace' - recommendation = 'Move rule to appropriate directory or update the namespace' + name = "file path doesn't match rule namespace" + recommendation = "Move rule to appropriate directory or update the namespace" def check_rule(self, ctx, rule): # let the other lints catch namespace issues - if 'namespace' not in rule.meta: + if "namespace" not in rule.meta: return False if is_nursery_rule(rule): return False - if 'maec/malware-category' in rule.meta: + if "maec/malware-category" in rule.meta: return False - if 'lib' in rule.meta: + if "lib" in rule.meta: return False - return rule.meta['namespace'] not in posixpath.normpath(rule.meta['capa/path']) + return rule.meta["namespace"] not in posixpath.normpath(rule.meta["capa/path"]) class MissingScope(Lint): - name = 'missing scope' - recommendation = 'Add meta.scope so that the scope is explicit (defaults to `function`)' + name = "missing scope" + recommendation = "Add meta.scope so that the scope is explicit (defaults to `function`)" def check_rule(self, ctx, rule): - return 'scope' not in rule.meta + return "scope" not in rule.meta class InvalidScope(Lint): - name = 'invalid scope' - recommendation = 'Use only file, function, or basic block rule scopes' + name = "invalid scope" + recommendation = "Use only file, function, or basic block rule scopes" def check_rule(self, ctx, rule): - return rule.meta.get('scope') not in ('file', 'function', 'basic block') + return rule.meta.get("scope") not in ("file", "function", "basic block") class MissingAuthor(Lint): - name = 'missing author' - recommendation = 'Add meta.author so that users know who to contact with questions' + name = "missing author" + recommendation = "Add meta.author so that users know who to contact with questions" def check_rule(self, ctx, rule): - return 'author' not in rule.meta + return "author" not in rule.meta class MissingExamples(Lint): - name = 'missing examples' - recommendation = 'Add meta.examples so that the rule can be tested and verified' + name = "missing examples" + recommendation = "Add meta.examples so that the rule can be tested and verified" def check_rule(self, ctx, rule): - return ('examples' not in rule.meta or - not isinstance(rule.meta['examples'], list) or - len(rule.meta['examples']) == 0 or - rule.meta['examples'] == [None]) + return ( + "examples" not in rule.meta + or not isinstance(rule.meta["examples"], list) + or len(rule.meta["examples"]) == 0 + or rule.meta["examples"] == [None] + ) class MissingExampleOffset(Lint): - name = 'missing example offset' - recommendation = 'Add offset of example function' + name = "missing example offset" + recommendation = "Add offset of example function" def check_rule(self, ctx, rule): - if rule.meta.get('scope') in ('function', 'basic block'): - for example in rule.meta.get('examples', []): - if example and ':' not in example: - logger.debug('example: %s', example) + if rule.meta.get("scope") in ("function", "basic block"): + for example in rule.meta.get("examples", []): + if example and ":" not in example: + logger.debug("example: %s", example) return True class ExampleFileDNE(Lint): - name = 'referenced example doesn\'t exist' - recommendation = 'Add the referenced example to samples directory ($capa-root/tests/data or supplied via --samples)' + name = "referenced example doesn't exist" + recommendation = "Add the referenced example to samples directory ($capa-root/tests/data or supplied via --samples)" def check_rule(self, ctx, rule): - if not rule.meta.get('examples'): + if not rule.meta.get("examples"): # let the MissingExamples lint catch this case, don't double report. return False found = False - for example in rule.meta.get('examples', []): + for example in rule.meta.get("examples", []): if example: - example_id = example.partition(':')[0] - if example_id in ctx['samples']: + example_id = example.partition(":")[0] + if example_id in ctx["samples"]: found = True break @@ -160,27 +163,27 @@ class ExampleFileDNE(Lint): class DoesntMatchExample(Lint): - name = 'doesn\'t match on referenced example' - recommendation = 'Fix the rule logic or provide a different example' + name = "doesn't match on referenced example" + recommendation = "Fix the rule logic or provide a different example" def check_rule(self, ctx, rule): - if not ctx['is_thorough']: + if not ctx["is_thorough"]: return False - for example in rule.meta.get('examples', []): - example_id = example.partition(':')[0] + for example in rule.meta.get("examples", []): + example_id = example.partition(":")[0] try: - path = ctx['samples'][example_id] + path = ctx["samples"][example_id] except KeyError: # lint ExampleFileDNE will catch this. # don't double report. continue try: - extractor = capa.main.get_extractor(path, 'auto') - capabilities = capa.main.find_capabilities(ctx['rules'], extractor, disable_progress=True) + extractor = capa.main.get_extractor(path, "auto") + capabilities = capa.main.find_capabilities(ctx["rules"], extractor, disable_progress=True) except Exception as e: - logger.error('failed to extract capabilities: %s %s %s', rule.name, path, e) + logger.error("failed to extract capabilities: %s %s %s", rule.name, path, e) return True if rule.name not in capabilities: @@ -188,7 +191,7 @@ class DoesntMatchExample(Lint): class UnusualMetaField(Lint): - name = 'unusual meta field' + name = "unusual meta field" recommendation = 'Remove the meta field: "{:s}"' def check_rule(self, ctx, rule): @@ -204,32 +207,32 @@ class UnusualMetaField(Lint): class LibRuleNotInLibDirectory(Lint): - name = 'lib rule not found in lib directory' - recommendation = 'Move the rule to the `lib` subdirectory of the rules path' + name = "lib rule not found in lib directory" + recommendation = "Move the rule to the `lib` subdirectory of the rules path" def check_rule(self, ctx, rule): if is_nursery_rule(rule): return False - if 'lib' not in rule.meta: + if "lib" not in rule.meta: return False - return '/lib/' not in posixpath.normpath(rule.meta['capa/path']) + return "/lib/" not in posixpath.normpath(rule.meta["capa/path"]) class LibRuleHasNamespace(Lint): - name = 'lib rule has a namespace' - recommendation = 'Remove the namespace from the rule' + name = "lib rule has a namespace" + recommendation = "Remove the namespace from the rule" def check_rule(self, ctx, rule): - if 'lib' not in rule.meta: + if "lib" not in rule.meta: return False - return 'namespace' in rule.meta + return "namespace" in rule.meta class FeatureStringTooShort(Lint): - name = 'feature string too short' + name = "feature string too short" recommendation = 'capa only extracts strings with length >= 4; will not match on "{:s}"' def check_features(self, ctx, features): @@ -242,9 +245,11 @@ class FeatureStringTooShort(Lint): class FeatureNegativeNumberOrOffset(Lint): - name = 'feature value is negative' - recommendation = 'capa treats all numbers as unsigned values; you may specify the number\'s two\'s complement ' \ - 'representation; will not match on "{:d}"' + name = "feature value is negative" + recommendation = ( + "capa treats all numbers as unsigned values; you may specify the number's two's complement " + 'representation; will not match on "{:d}"' + ) def check_features(self, ctx, features): for feature in features: @@ -318,7 +323,7 @@ def lint_features(ctx, rule): def get_features(ctx, rule): # get features from rule and all dependencies including subscopes and matched rules features = [] - deps = [ctx['rules'].rules[dep] for dep in rule.get_dependencies()] + deps = [ctx["rules"].rules[dep] for dep in rule.get_dependencies()] for r in [rule] + deps: features.extend(get_rule_features(r)) return features @@ -338,9 +343,7 @@ def get_rule_features(rule): return features -LOGIC_LINTS = ( - DoesntMatchExample(), -) +LOGIC_LINTS = (DoesntMatchExample(),) def lint_logic(ctx, rule): @@ -348,53 +351,58 @@ def lint_logic(ctx, rule): def is_nursery_rule(rule): - ''' + """ The nursery is a spot for rules that have not yet been fully polished. For example, they may not have references to public example of a technique. Yet, we still want to capture and report on their matches. - ''' - return rule.meta.get('capa/nursery') + """ + return rule.meta.get("capa/nursery") def lint_rule(ctx, rule): logger.debug(rule.name) - violations = list(itertools.chain( - lint_name(ctx, rule), - lint_scope(ctx, rule), - lint_meta(ctx, rule), - lint_logic(ctx, rule), - lint_features(ctx, rule), - )) + violations = list( + itertools.chain( + lint_name(ctx, rule), + lint_scope(ctx, rule), + lint_meta(ctx, rule), + lint_logic(ctx, rule), + lint_features(ctx, rule), + ) + ) if len(violations) > 0: - category = rule.meta.get('rule-category') + category = rule.meta.get("rule-category") - print('') - print('%s%s %s' % (' (nursery) ' if is_nursery_rule(rule) else '', - rule.name, - ('(%s)' % category) if category else '')) + print("") + print( + "%s%s %s" + % (" (nursery) " if is_nursery_rule(rule) else "", rule.name, ("(%s)" % category) if category else "",) + ) - level = 'WARN' if is_nursery_rule(rule) else 'FAIL' + level = "WARN" if is_nursery_rule(rule) else "FAIL" for violation in violations: - print('%s %s: %s: %s' % ( - ' ' if is_nursery_rule(rule) else '', level, violation.name, violation.recommendation)) + print( + "%s %s: %s: %s" + % (" " if is_nursery_rule(rule) else "", level, violation.name, violation.recommendation,) + ) return len(violations) > 0 and not is_nursery_rule(rule) def lint(ctx, rules): - ''' + """ Args: samples (Dict[string, string]): map from sample id to path. for each sample, record sample id of sha256, md5, and filename. see `collect_samples(path)`. rules (List[Rule]): the rules to lint. - ''' + """ did_suggest_fix = False for rule in rules.rules.values(): - if rule.meta.get('capa/subscope-rule', False): + if rule.meta.get("capa/subscope-rule", False): continue did_suggest_fix = lint_rule(ctx, rule) or did_suggest_fix @@ -403,27 +411,27 @@ def lint(ctx, rules): def collect_samples(path): - ''' + """ recurse through the given path, collecting all file paths, indexed by their content sha256, md5, and filename. - ''' + """ samples = {} for root, dirs, files in os.walk(path): for name in files: - if name.endswith('.viv'): + if name.endswith(".viv"): continue - if name.endswith('.idb'): + if name.endswith(".idb"): continue - if name.endswith('.i64'): + if name.endswith(".i64"): continue - if name.endswith('.frz'): + if name.endswith(".frz"): continue - if name.endswith('.fnames'): + if name.endswith(".fnames"): continue path = os.path.join(root, name) try: - with open(path, 'rb') as f: + with open(path, "rb") as f: buf = f.read() except IOError: continue @@ -447,19 +455,16 @@ def main(argv=None): if argv is None: argv = sys.argv[1:] - samples_path = os.path.join(os.path.dirname(__file__), '..', 'tests', 'data') + samples_path = os.path.join(os.path.dirname(__file__), "..", "tests", "data") - parser = argparse.ArgumentParser(description='A program.') - parser.add_argument('rules', type=str, - help='Path to rules') - parser.add_argument('--samples', type=str, default=samples_path, - help='Path to samples') - parser.add_argument('--thorough', action='store_true', - help='Enable thorough linting - takes more time, but does a better job') - parser.add_argument('-v', '--verbose', action='store_true', - help='Enable debug logging') - parser.add_argument('-q', '--quiet', action='store_true', - help='Disable all output but errors') + parser = argparse.ArgumentParser(description="A program.") + parser.add_argument("rules", type=str, help="Path to rules") + parser.add_argument("--samples", type=str, default=samples_path, help="Path to samples") + parser.add_argument( + "--thorough", action="store_true", help="Enable thorough linting - takes more time, but does a better job", + ) + parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging") + parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors") args = parser.parse_args(args=argv) if args.verbose: @@ -470,42 +475,42 @@ def main(argv=None): level = logging.INFO logging.basicConfig(level=level) - logging.getLogger('capa.lint').setLevel(level) + logging.getLogger("capa.lint").setLevel(level) capa.main.set_vivisect_log_level(logging.CRITICAL) - logging.getLogger('capa').setLevel(logging.CRITICAL) + logging.getLogger("capa").setLevel(logging.CRITICAL) try: rules = capa.main.get_rules(args.rules) rules = capa.rules.RuleSet(rules) - logger.info('successfully loaded %s rules', len(rules)) + logger.info("successfully loaded %s rules", len(rules)) except IOError as e: - logger.error('%s', str(e)) + logger.error("%s", str(e)) return -1 except capa.rules.InvalidRule as e: - logger.error('%s', str(e)) + logger.error("%s", str(e)) return -1 - logger.info('collecting potentially referenced samples') + logger.info("collecting potentially referenced samples") if not os.path.exists(args.samples): - logger.error('samples path %s does not exist', args.samples) + logger.error("samples path %s does not exist", args.samples) return -1 samples = collect_samples(args.samples) ctx = { - 'samples': samples, - 'rules': rules, - 'is_thorough': args.thorough, + "samples": samples, + "rules": rules, + "is_thorough": args.thorough, } did_violate = lint(ctx, rules) if not did_violate: - logger.info('no suggestions, nice!') + logger.info("no suggestions, nice!") return 0 else: return 1 -if __name__ == '__main__': +if __name__ == "__main__": sys.exit(main()) diff --git a/scripts/show-features.py b/scripts/show-features.py index da88f87c..e8ea988b 100644 --- a/scripts/show-features.py +++ b/scripts/show-features.py @@ -1,7 +1,7 @@ #!/usr/bin/env python2 -''' +""" show the features extracted by capa. -''' +""" import sys import logging @@ -20,28 +20,27 @@ def main(argv=None): argv = sys.argv[1:] formats = [ - ('auto', '(default) detect file type automatically'), - ('pe', 'Windows PE file'), - ('sc32', '32-bit shellcode'), - ('sc64', '64-bit shellcode'), - ('freeze', 'features previously frozen by capa'), + ("auto", "(default) detect file type automatically"), + ("pe", "Windows PE file"), + ("sc32", "32-bit shellcode"), + ("sc64", "64-bit shellcode"), + ("freeze", "features previously frozen by capa"), ] - format_help = ', '.join(['%s: %s' % (f[0], f[1]) for f in formats]) + format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats]) - parser = argparse.ArgumentParser(description='detect capabilities in programs.') - parser.add_argument('sample', type=str, - help='Path to sample to analyze') - parser.add_argument('-f', '--format', choices=[f[0] for f in formats], default='auto', - help='Select sample format, %s' % format_help) - parser.add_argument('-F', '--function', type=lambda x: int(x, 0), - help='Show features for specific function') + parser = argparse.ArgumentParser(description="detect capabilities in programs.") + parser.add_argument("sample", type=str, help="Path to sample to analyze") + parser.add_argument( + "-f", "--format", choices=[f[0] for f in formats], default="auto", help="Select sample format, %s" % format_help + ) + parser.add_argument("-F", "--function", type=lambda x: int(x, 0), help="Show features for specific function") args = parser.parse_args(args=argv) logging.basicConfig(level=logging.INFO) logging.getLogger().setLevel(logging.INFO) - if args.format == 'freeze': - with open(args.sample, 'rb') as f: + if args.format == "freeze": + with open(args.sample, "rb") as f: extractor = capa.features.freeze.load(f.read()) else: vw = capa.main.get_workspace(args.sample, args.format) @@ -50,32 +49,32 @@ def main(argv=None): if not args.function: for feature, va in extractor.extract_file_features(): if va: - print('file: 0x%08x: %s' % (va, feature)) + print("file: 0x%08x: %s" % (va, feature)) else: - print('file: 0x00000000: %s' % (feature)) + print("file: 0x00000000: %s" % (feature)) functions = extractor.get_functions() if args.function: - if args.format == 'freeze': + if args.format == "freeze": functions = filter(lambda f: f == args.function, functions) else: functions = filter(lambda f: f.va == args.function, functions) for f in functions: for feature, va in extractor.extract_function_features(f): - print('func: 0x%08x: %s' % (va, feature)) + print("func: 0x%08x: %s" % (va, feature)) for bb in extractor.get_basic_blocks(f): for feature, va in extractor.extract_basic_block_features(f, bb): - print('bb : 0x%08x: %s' % (va, feature)) + print("bb : 0x%08x: %s" % (va, feature)) for insn in extractor.get_instructions(f, bb): for feature, va in extractor.extract_insn_features(f, bb, insn): - print('insn: 0x%08x: %s' % (va, feature)) + print("insn: 0x%08x: %s" % (va, feature)) return 0 -if __name__ == '__main__': +if __name__ == "__main__": sys.exit(main())