main: find_capabilities: extract feature counts per item, too

closes #95
closes #96
This commit is contained in:
William Ballenthin
2020-07-03 10:12:03 -06:00
parent 5cd10bfc75
commit 867de57062
6 changed files with 37 additions and 19 deletions

View File

@@ -339,7 +339,11 @@ class CapaExplorerForm(idaapi.PluginForm):
rules_path = os.path.join(os.path.dirname(self.file_loc), "../..", "rules")
rules = capa.main.get_rules(rules_path)
rules = capa.rules.RuleSet(rules)
capabilities = capa.main.find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor(), True)
meta = capa.ida.helpers.collect_metadata()
capabilities, counts = capa.main.find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor(), True)
meta.update(counts)
# support binary files specifically for x86/AMD64 shellcode
# warn user binary file is loaded but still allow capa to process it
@@ -364,7 +368,6 @@ class CapaExplorerForm(idaapi.PluginForm):
logger.info("analysis completed.")
meta = capa.ida.helpers.collect_metadata()
doc = capa.render.convert_capabilities_to_result_document(meta, rules, capabilities)
self.model_data.render_capa_doc(doc)

View File

@@ -68,7 +68,7 @@ def find_function_capabilities(ruleset, extractor, f):
function_features[capa.features.MatchedRule(rule_name)].add(va)
_, function_matches = capa.engine.match(ruleset.function_rules, function_features, oint(f))
return function_matches, bb_matches
return function_matches, bb_matches, len(function_features)
def find_file_capabilities(ruleset, extractor, function_features):
@@ -84,20 +84,30 @@ def find_file_capabilities(ruleset, extractor, function_features):
if feature not in file_features:
file_features[feature] = set()
logger.info("analyzed file and extracted %d features", len(file_features))
logger.debug("analyzed file and extracted %d features", len(file_features))
file_features.update(function_features)
_, matches = capa.engine.match(ruleset.file_rules, file_features, 0x0)
return matches
return matches, len(file_features)
def find_capabilities(ruleset, extractor, disable_progress=None):
all_function_matches = collections.defaultdict(list)
all_bb_matches = collections.defaultdict(list)
meta = {
"counts": {
"file": 0,
"functions": {},
}
}
for f in tqdm.tqdm(extractor.get_functions(), disable=disable_progress, unit=" functions"):
function_matches, bb_matches = find_function_capabilities(ruleset, extractor, f)
function_matches, bb_matches, feature_count = find_function_capabilities(ruleset, extractor, f)
meta["counts"]["functions"][f.__int__()] = feature_count
logger.debug("analyzed function 0x%x and extracted %d features", f.__int__(), feature_count)
for rule_name, res in function_matches.items():
all_function_matches[rule_name].extend(res)
for rule_name, res in bb_matches.items():
@@ -110,14 +120,15 @@ def find_capabilities(ruleset, extractor, disable_progress=None):
for rule_name, results in all_function_matches.items()
}
all_file_matches = find_file_capabilities(ruleset, extractor, function_features)
all_file_matches, feature_count = find_file_capabilities(ruleset, extractor, function_features)
meta["counts"]["file"] = feature_count
matches = {}
matches.update(all_bb_matches)
matches.update(all_function_matches)
matches.update(all_file_matches)
return matches
return matches, meta
def has_rule_with_namespace(rules, capabilities, rule_cat):
@@ -485,7 +496,8 @@ def main(argv=None):
meta = collect_metadata(argv, args.sample, format, extractor)
capabilities = find_capabilities(rules, extractor)
capabilities, counts = find_capabilities(rules, extractor)
meta.update(counts)
if has_file_limitation(rules, capabilities):
# bail if capa encountered file limitation e.g. a packed binary
@@ -542,12 +554,14 @@ def ida_main():
rules = get_rules(rules_path)
rules = capa.rules.RuleSet(rules)
capabilities = find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor())
meta = capa.ida.helpers.collect_metadata()
capabilities, counts = find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor())
meta.update(counts)
if has_file_limitation(rules, capabilities, is_standalone=False):
capa.ida.helpers.inform_user_ida_ui("capa encountered warnings during analysis")
meta = capa.ida.helpers.collect_metadata()
print(capa.render.render_default(meta, rules, capabilities))

View File

@@ -180,7 +180,7 @@ class DoesntMatchExample(Lint):
try:
extractor = capa.main.get_extractor(path, "auto")
capabilities = capa.main.find_capabilities(ctx["rules"], extractor, disable_progress=True)
capabilities, meta = capa.main.find_capabilities(ctx["rules"], extractor, disable_progress=True)
except Exception as e:
logger.error("failed to extract capabilities: %s %s %s", rule.name, path, e)
return True

View File

@@ -93,7 +93,8 @@ def get_capabilities(path, rules):
logger.debug("matching rules in %s", path)
with open(path, "rb") as f:
extractor = capa.features.freeze.load(f.read())
return capa.main.find_capabilities(rules, extractor, disable_progress=True)
capabilities, meta = capa.main.find_capabilities(rules, extractor, disable_progress=True)
return capabilities
def get_function_hits(capabilities, rule_name):

View File

@@ -59,7 +59,7 @@ def test_null_feature_extractor():
),
]
)
capabilities = capa.main.find_capabilities(rules, EXTRACTOR)
capabilities, meta = capa.main.find_capabilities(rules, EXTRACTOR)
assert "xor loop" in capabilities

View File

@@ -147,7 +147,7 @@ def test_match_across_scopes_file_function(sample_9324d1a8ae37a36ae560c37448c970
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
)
capabilities = capa.main.find_capabilities(rules, extractor)
capabilities, meta = capa.main.find_capabilities(rules, extractor)
assert "install service" in capabilities
assert ".text section" in capabilities
assert ".text section and install service" in capabilities
@@ -212,7 +212,7 @@ def test_match_across_scopes(sample_9324d1a8ae37a36ae560c37448c9705a):
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path
)
capabilities = capa.main.find_capabilities(rules, extractor)
capabilities, meta = capa.main.find_capabilities(rules, extractor)
assert "tight loop" in capabilities
assert "kill thread loop" in capabilities
assert "kill thread program" in capabilities
@@ -241,7 +241,7 @@ def test_subscope_bb_rules(sample_9324d1a8ae37a36ae560c37448c9705a):
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
)
capabilities = capa.main.find_capabilities(rules, extractor)
capabilities, meta = capa.main.find_capabilities(rules, extractor)
assert "test rule" in capabilities
@@ -267,7 +267,7 @@ def test_byte_matching(sample_9324d1a8ae37a36ae560c37448c9705a):
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
)
capabilities = capa.main.find_capabilities(rules, extractor)
capabilities, meta = capa.main.find_capabilities(rules, extractor)
assert "byte match test" in capabilities
@@ -294,5 +294,5 @@ def test_count_bb(sample_9324d1a8ae37a36ae560c37448c9705a):
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
)
capabilities = capa.main.find_capabilities(rules, extractor)
capabilities, meta = capa.main.find_capabilities(rules, extractor)
assert "count bb" in capabilities