From 867de57062713aef2fc522fb2937560bcb81e82a Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 3 Jul 2020 10:12:03 -0600 Subject: [PATCH] main: find_capabilities: extract feature counts per item, too closes #95 closes #96 --- capa/ida/ida_capa_explorer.py | 7 ++++-- capa/main.py | 32 ++++++++++++++++++-------- scripts/lint.py | 2 +- scripts/testbed/run_rule_on_testbed.py | 3 ++- tests/test_freeze.py | 2 +- tests/test_main.py | 10 ++++---- 6 files changed, 37 insertions(+), 19 deletions(-) diff --git a/capa/ida/ida_capa_explorer.py b/capa/ida/ida_capa_explorer.py index 970e8a62..801985bf 100644 --- a/capa/ida/ida_capa_explorer.py +++ b/capa/ida/ida_capa_explorer.py @@ -339,7 +339,11 @@ class CapaExplorerForm(idaapi.PluginForm): rules_path = os.path.join(os.path.dirname(self.file_loc), "../..", "rules") rules = capa.main.get_rules(rules_path) rules = capa.rules.RuleSet(rules) - capabilities = capa.main.find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor(), True) + + meta = capa.ida.helpers.collect_metadata() + + capabilities, counts = capa.main.find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor(), True) + meta.update(counts) # support binary files specifically for x86/AMD64 shellcode # warn user binary file is loaded but still allow capa to process it @@ -364,7 +368,6 @@ class CapaExplorerForm(idaapi.PluginForm): logger.info("analysis completed.") - meta = capa.ida.helpers.collect_metadata() doc = capa.render.convert_capabilities_to_result_document(meta, rules, capabilities) self.model_data.render_capa_doc(doc) diff --git a/capa/main.py b/capa/main.py index 52516a8f..9cbb1589 100644 --- a/capa/main.py +++ b/capa/main.py @@ -68,7 +68,7 @@ def find_function_capabilities(ruleset, extractor, f): function_features[capa.features.MatchedRule(rule_name)].add(va) _, function_matches = capa.engine.match(ruleset.function_rules, function_features, oint(f)) - return function_matches, bb_matches + return function_matches, bb_matches, len(function_features) def find_file_capabilities(ruleset, extractor, function_features): @@ -84,20 +84,30 @@ def find_file_capabilities(ruleset, extractor, function_features): if feature not in file_features: file_features[feature] = set() - logger.info("analyzed file and extracted %d features", len(file_features)) + logger.debug("analyzed file and extracted %d features", len(file_features)) file_features.update(function_features) _, matches = capa.engine.match(ruleset.file_rules, file_features, 0x0) - return matches + return matches, len(file_features) def find_capabilities(ruleset, extractor, disable_progress=None): all_function_matches = collections.defaultdict(list) all_bb_matches = collections.defaultdict(list) + meta = { + "counts": { + "file": 0, + "functions": {}, + } + } + for f in tqdm.tqdm(extractor.get_functions(), disable=disable_progress, unit=" functions"): - function_matches, bb_matches = find_function_capabilities(ruleset, extractor, f) + function_matches, bb_matches, feature_count = find_function_capabilities(ruleset, extractor, f) + meta["counts"]["functions"][f.__int__()] = feature_count + logger.debug("analyzed function 0x%x and extracted %d features", f.__int__(), feature_count) + for rule_name, res in function_matches.items(): all_function_matches[rule_name].extend(res) for rule_name, res in bb_matches.items(): @@ -110,14 +120,15 @@ def find_capabilities(ruleset, extractor, disable_progress=None): for rule_name, results in all_function_matches.items() } - all_file_matches = find_file_capabilities(ruleset, extractor, function_features) + all_file_matches, feature_count = find_file_capabilities(ruleset, extractor, function_features) + meta["counts"]["file"] = feature_count matches = {} matches.update(all_bb_matches) matches.update(all_function_matches) matches.update(all_file_matches) - return matches + return matches, meta def has_rule_with_namespace(rules, capabilities, rule_cat): @@ -485,7 +496,8 @@ def main(argv=None): meta = collect_metadata(argv, args.sample, format, extractor) - capabilities = find_capabilities(rules, extractor) + capabilities, counts = find_capabilities(rules, extractor) + meta.update(counts) if has_file_limitation(rules, capabilities): # bail if capa encountered file limitation e.g. a packed binary @@ -542,12 +554,14 @@ def ida_main(): rules = get_rules(rules_path) rules = capa.rules.RuleSet(rules) - capabilities = find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor()) + meta = capa.ida.helpers.collect_metadata() + + capabilities, counts = find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor()) + meta.update(counts) if has_file_limitation(rules, capabilities, is_standalone=False): capa.ida.helpers.inform_user_ida_ui("capa encountered warnings during analysis") - meta = capa.ida.helpers.collect_metadata() print(capa.render.render_default(meta, rules, capabilities)) diff --git a/scripts/lint.py b/scripts/lint.py index 69bf3bf4..7638e2c7 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -180,7 +180,7 @@ class DoesntMatchExample(Lint): try: extractor = capa.main.get_extractor(path, "auto") - capabilities = capa.main.find_capabilities(ctx["rules"], extractor, disable_progress=True) + capabilities, meta = capa.main.find_capabilities(ctx["rules"], extractor, disable_progress=True) except Exception as e: logger.error("failed to extract capabilities: %s %s %s", rule.name, path, e) return True diff --git a/scripts/testbed/run_rule_on_testbed.py b/scripts/testbed/run_rule_on_testbed.py index 2e6b9ce8..aa78a830 100644 --- a/scripts/testbed/run_rule_on_testbed.py +++ b/scripts/testbed/run_rule_on_testbed.py @@ -93,7 +93,8 @@ def get_capabilities(path, rules): logger.debug("matching rules in %s", path) with open(path, "rb") as f: extractor = capa.features.freeze.load(f.read()) - return capa.main.find_capabilities(rules, extractor, disable_progress=True) + capabilities, meta = capa.main.find_capabilities(rules, extractor, disable_progress=True) + return capabilities def get_function_hits(capabilities, rule_name): diff --git a/tests/test_freeze.py b/tests/test_freeze.py index f81114ca..ef9fe1bd 100644 --- a/tests/test_freeze.py +++ b/tests/test_freeze.py @@ -59,7 +59,7 @@ def test_null_feature_extractor(): ), ] ) - capabilities = capa.main.find_capabilities(rules, EXTRACTOR) + capabilities, meta = capa.main.find_capabilities(rules, EXTRACTOR) assert "xor loop" in capabilities diff --git a/tests/test_main.py b/tests/test_main.py index 3010b494..c35b4b4b 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -147,7 +147,7 @@ def test_match_across_scopes_file_function(sample_9324d1a8ae37a36ae560c37448c970 extractor = capa.features.extractors.viv.VivisectFeatureExtractor( sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path, ) - capabilities = capa.main.find_capabilities(rules, extractor) + capabilities, meta = capa.main.find_capabilities(rules, extractor) assert "install service" in capabilities assert ".text section" in capabilities assert ".text section and install service" in capabilities @@ -212,7 +212,7 @@ def test_match_across_scopes(sample_9324d1a8ae37a36ae560c37448c9705a): extractor = capa.features.extractors.viv.VivisectFeatureExtractor( sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path ) - capabilities = capa.main.find_capabilities(rules, extractor) + capabilities, meta = capa.main.find_capabilities(rules, extractor) assert "tight loop" in capabilities assert "kill thread loop" in capabilities assert "kill thread program" in capabilities @@ -241,7 +241,7 @@ def test_subscope_bb_rules(sample_9324d1a8ae37a36ae560c37448c9705a): extractor = capa.features.extractors.viv.VivisectFeatureExtractor( sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path, ) - capabilities = capa.main.find_capabilities(rules, extractor) + capabilities, meta = capa.main.find_capabilities(rules, extractor) assert "test rule" in capabilities @@ -267,7 +267,7 @@ def test_byte_matching(sample_9324d1a8ae37a36ae560c37448c9705a): extractor = capa.features.extractors.viv.VivisectFeatureExtractor( sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path, ) - capabilities = capa.main.find_capabilities(rules, extractor) + capabilities, meta = capa.main.find_capabilities(rules, extractor) assert "byte match test" in capabilities @@ -294,5 +294,5 @@ def test_count_bb(sample_9324d1a8ae37a36ae560c37448c9705a): extractor = capa.features.extractors.viv.VivisectFeatureExtractor( sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path, ) - capabilities = capa.main.find_capabilities(rules, extractor) + capabilities, meta = capa.main.find_capabilities(rules, extractor) assert "count bb" in capabilities