main: find_capabilities: extract feature counts per item, too

closes #95
closes #96
This commit is contained in:
William Ballenthin
2020-07-03 10:12:03 -06:00
parent 5cd10bfc75
commit 867de57062
6 changed files with 37 additions and 19 deletions

View File

@@ -339,7 +339,11 @@ class CapaExplorerForm(idaapi.PluginForm):
rules_path = os.path.join(os.path.dirname(self.file_loc), "../..", "rules") rules_path = os.path.join(os.path.dirname(self.file_loc), "../..", "rules")
rules = capa.main.get_rules(rules_path) rules = capa.main.get_rules(rules_path)
rules = capa.rules.RuleSet(rules) rules = capa.rules.RuleSet(rules)
capabilities = capa.main.find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor(), True)
meta = capa.ida.helpers.collect_metadata()
capabilities, counts = capa.main.find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor(), True)
meta.update(counts)
# support binary files specifically for x86/AMD64 shellcode # support binary files specifically for x86/AMD64 shellcode
# warn user binary file is loaded but still allow capa to process it # warn user binary file is loaded but still allow capa to process it
@@ -364,7 +368,6 @@ class CapaExplorerForm(idaapi.PluginForm):
logger.info("analysis completed.") logger.info("analysis completed.")
meta = capa.ida.helpers.collect_metadata()
doc = capa.render.convert_capabilities_to_result_document(meta, rules, capabilities) doc = capa.render.convert_capabilities_to_result_document(meta, rules, capabilities)
self.model_data.render_capa_doc(doc) self.model_data.render_capa_doc(doc)

View File

@@ -68,7 +68,7 @@ def find_function_capabilities(ruleset, extractor, f):
function_features[capa.features.MatchedRule(rule_name)].add(va) function_features[capa.features.MatchedRule(rule_name)].add(va)
_, function_matches = capa.engine.match(ruleset.function_rules, function_features, oint(f)) _, function_matches = capa.engine.match(ruleset.function_rules, function_features, oint(f))
return function_matches, bb_matches return function_matches, bb_matches, len(function_features)
def find_file_capabilities(ruleset, extractor, function_features): def find_file_capabilities(ruleset, extractor, function_features):
@@ -84,20 +84,30 @@ def find_file_capabilities(ruleset, extractor, function_features):
if feature not in file_features: if feature not in file_features:
file_features[feature] = set() file_features[feature] = set()
logger.info("analyzed file and extracted %d features", len(file_features)) logger.debug("analyzed file and extracted %d features", len(file_features))
file_features.update(function_features) file_features.update(function_features)
_, matches = capa.engine.match(ruleset.file_rules, file_features, 0x0) _, matches = capa.engine.match(ruleset.file_rules, file_features, 0x0)
return matches return matches, len(file_features)
def find_capabilities(ruleset, extractor, disable_progress=None): def find_capabilities(ruleset, extractor, disable_progress=None):
all_function_matches = collections.defaultdict(list) all_function_matches = collections.defaultdict(list)
all_bb_matches = collections.defaultdict(list) all_bb_matches = collections.defaultdict(list)
meta = {
"counts": {
"file": 0,
"functions": {},
}
}
for f in tqdm.tqdm(extractor.get_functions(), disable=disable_progress, unit=" functions"): for f in tqdm.tqdm(extractor.get_functions(), disable=disable_progress, unit=" functions"):
function_matches, bb_matches = find_function_capabilities(ruleset, extractor, f) function_matches, bb_matches, feature_count = find_function_capabilities(ruleset, extractor, f)
meta["counts"]["functions"][f.__int__()] = feature_count
logger.debug("analyzed function 0x%x and extracted %d features", f.__int__(), feature_count)
for rule_name, res in function_matches.items(): for rule_name, res in function_matches.items():
all_function_matches[rule_name].extend(res) all_function_matches[rule_name].extend(res)
for rule_name, res in bb_matches.items(): for rule_name, res in bb_matches.items():
@@ -110,14 +120,15 @@ def find_capabilities(ruleset, extractor, disable_progress=None):
for rule_name, results in all_function_matches.items() for rule_name, results in all_function_matches.items()
} }
all_file_matches = find_file_capabilities(ruleset, extractor, function_features) all_file_matches, feature_count = find_file_capabilities(ruleset, extractor, function_features)
meta["counts"]["file"] = feature_count
matches = {} matches = {}
matches.update(all_bb_matches) matches.update(all_bb_matches)
matches.update(all_function_matches) matches.update(all_function_matches)
matches.update(all_file_matches) matches.update(all_file_matches)
return matches return matches, meta
def has_rule_with_namespace(rules, capabilities, rule_cat): def has_rule_with_namespace(rules, capabilities, rule_cat):
@@ -485,7 +496,8 @@ def main(argv=None):
meta = collect_metadata(argv, args.sample, format, extractor) meta = collect_metadata(argv, args.sample, format, extractor)
capabilities = find_capabilities(rules, extractor) capabilities, counts = find_capabilities(rules, extractor)
meta.update(counts)
if has_file_limitation(rules, capabilities): if has_file_limitation(rules, capabilities):
# bail if capa encountered file limitation e.g. a packed binary # bail if capa encountered file limitation e.g. a packed binary
@@ -542,12 +554,14 @@ def ida_main():
rules = get_rules(rules_path) rules = get_rules(rules_path)
rules = capa.rules.RuleSet(rules) rules = capa.rules.RuleSet(rules)
capabilities = find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor()) meta = capa.ida.helpers.collect_metadata()
capabilities, counts = find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor())
meta.update(counts)
if has_file_limitation(rules, capabilities, is_standalone=False): if has_file_limitation(rules, capabilities, is_standalone=False):
capa.ida.helpers.inform_user_ida_ui("capa encountered warnings during analysis") capa.ida.helpers.inform_user_ida_ui("capa encountered warnings during analysis")
meta = capa.ida.helpers.collect_metadata()
print(capa.render.render_default(meta, rules, capabilities)) print(capa.render.render_default(meta, rules, capabilities))

View File

@@ -180,7 +180,7 @@ class DoesntMatchExample(Lint):
try: try:
extractor = capa.main.get_extractor(path, "auto") extractor = capa.main.get_extractor(path, "auto")
capabilities = capa.main.find_capabilities(ctx["rules"], extractor, disable_progress=True) capabilities, meta = capa.main.find_capabilities(ctx["rules"], extractor, disable_progress=True)
except Exception as e: except Exception as e:
logger.error("failed to extract capabilities: %s %s %s", rule.name, path, e) logger.error("failed to extract capabilities: %s %s %s", rule.name, path, e)
return True return True

View File

@@ -93,7 +93,8 @@ def get_capabilities(path, rules):
logger.debug("matching rules in %s", path) logger.debug("matching rules in %s", path)
with open(path, "rb") as f: with open(path, "rb") as f:
extractor = capa.features.freeze.load(f.read()) extractor = capa.features.freeze.load(f.read())
return capa.main.find_capabilities(rules, extractor, disable_progress=True) capabilities, meta = capa.main.find_capabilities(rules, extractor, disable_progress=True)
return capabilities
def get_function_hits(capabilities, rule_name): def get_function_hits(capabilities, rule_name):

View File

@@ -59,7 +59,7 @@ def test_null_feature_extractor():
), ),
] ]
) )
capabilities = capa.main.find_capabilities(rules, EXTRACTOR) capabilities, meta = capa.main.find_capabilities(rules, EXTRACTOR)
assert "xor loop" in capabilities assert "xor loop" in capabilities

View File

@@ -147,7 +147,7 @@ def test_match_across_scopes_file_function(sample_9324d1a8ae37a36ae560c37448c970
extractor = capa.features.extractors.viv.VivisectFeatureExtractor( extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path, sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
) )
capabilities = capa.main.find_capabilities(rules, extractor) capabilities, meta = capa.main.find_capabilities(rules, extractor)
assert "install service" in capabilities assert "install service" in capabilities
assert ".text section" in capabilities assert ".text section" in capabilities
assert ".text section and install service" in capabilities assert ".text section and install service" in capabilities
@@ -212,7 +212,7 @@ def test_match_across_scopes(sample_9324d1a8ae37a36ae560c37448c9705a):
extractor = capa.features.extractors.viv.VivisectFeatureExtractor( extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path
) )
capabilities = capa.main.find_capabilities(rules, extractor) capabilities, meta = capa.main.find_capabilities(rules, extractor)
assert "tight loop" in capabilities assert "tight loop" in capabilities
assert "kill thread loop" in capabilities assert "kill thread loop" in capabilities
assert "kill thread program" in capabilities assert "kill thread program" in capabilities
@@ -241,7 +241,7 @@ def test_subscope_bb_rules(sample_9324d1a8ae37a36ae560c37448c9705a):
extractor = capa.features.extractors.viv.VivisectFeatureExtractor( extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path, sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
) )
capabilities = capa.main.find_capabilities(rules, extractor) capabilities, meta = capa.main.find_capabilities(rules, extractor)
assert "test rule" in capabilities assert "test rule" in capabilities
@@ -267,7 +267,7 @@ def test_byte_matching(sample_9324d1a8ae37a36ae560c37448c9705a):
extractor = capa.features.extractors.viv.VivisectFeatureExtractor( extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path, sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
) )
capabilities = capa.main.find_capabilities(rules, extractor) capabilities, meta = capa.main.find_capabilities(rules, extractor)
assert "byte match test" in capabilities assert "byte match test" in capabilities
@@ -294,5 +294,5 @@ def test_count_bb(sample_9324d1a8ae37a36ae560c37448c9705a):
extractor = capa.features.extractors.viv.VivisectFeatureExtractor( extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path, sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
) )
capabilities = capa.main.find_capabilities(rules, extractor) capabilities, meta = capa.main.find_capabilities(rules, extractor)
assert "count bb" in capabilities assert "count bb" in capabilities