mirror of
https://github.com/mandiant/capa.git
synced 2025-12-23 07:28:34 -08:00
main: find_capabilities: extract feature counts per item, too
closes #95 closes #96
This commit is contained in:
@@ -339,7 +339,11 @@ class CapaExplorerForm(idaapi.PluginForm):
|
|||||||
rules_path = os.path.join(os.path.dirname(self.file_loc), "../..", "rules")
|
rules_path = os.path.join(os.path.dirname(self.file_loc), "../..", "rules")
|
||||||
rules = capa.main.get_rules(rules_path)
|
rules = capa.main.get_rules(rules_path)
|
||||||
rules = capa.rules.RuleSet(rules)
|
rules = capa.rules.RuleSet(rules)
|
||||||
capabilities = capa.main.find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor(), True)
|
|
||||||
|
meta = capa.ida.helpers.collect_metadata()
|
||||||
|
|
||||||
|
capabilities, counts = capa.main.find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor(), True)
|
||||||
|
meta.update(counts)
|
||||||
|
|
||||||
# support binary files specifically for x86/AMD64 shellcode
|
# support binary files specifically for x86/AMD64 shellcode
|
||||||
# warn user binary file is loaded but still allow capa to process it
|
# warn user binary file is loaded but still allow capa to process it
|
||||||
@@ -364,7 +368,6 @@ class CapaExplorerForm(idaapi.PluginForm):
|
|||||||
|
|
||||||
logger.info("analysis completed.")
|
logger.info("analysis completed.")
|
||||||
|
|
||||||
meta = capa.ida.helpers.collect_metadata()
|
|
||||||
doc = capa.render.convert_capabilities_to_result_document(meta, rules, capabilities)
|
doc = capa.render.convert_capabilities_to_result_document(meta, rules, capabilities)
|
||||||
|
|
||||||
self.model_data.render_capa_doc(doc)
|
self.model_data.render_capa_doc(doc)
|
||||||
|
|||||||
32
capa/main.py
32
capa/main.py
@@ -68,7 +68,7 @@ def find_function_capabilities(ruleset, extractor, f):
|
|||||||
function_features[capa.features.MatchedRule(rule_name)].add(va)
|
function_features[capa.features.MatchedRule(rule_name)].add(va)
|
||||||
|
|
||||||
_, function_matches = capa.engine.match(ruleset.function_rules, function_features, oint(f))
|
_, function_matches = capa.engine.match(ruleset.function_rules, function_features, oint(f))
|
||||||
return function_matches, bb_matches
|
return function_matches, bb_matches, len(function_features)
|
||||||
|
|
||||||
|
|
||||||
def find_file_capabilities(ruleset, extractor, function_features):
|
def find_file_capabilities(ruleset, extractor, function_features):
|
||||||
@@ -84,20 +84,30 @@ def find_file_capabilities(ruleset, extractor, function_features):
|
|||||||
if feature not in file_features:
|
if feature not in file_features:
|
||||||
file_features[feature] = set()
|
file_features[feature] = set()
|
||||||
|
|
||||||
logger.info("analyzed file and extracted %d features", len(file_features))
|
logger.debug("analyzed file and extracted %d features", len(file_features))
|
||||||
|
|
||||||
file_features.update(function_features)
|
file_features.update(function_features)
|
||||||
|
|
||||||
_, matches = capa.engine.match(ruleset.file_rules, file_features, 0x0)
|
_, matches = capa.engine.match(ruleset.file_rules, file_features, 0x0)
|
||||||
return matches
|
return matches, len(file_features)
|
||||||
|
|
||||||
|
|
||||||
def find_capabilities(ruleset, extractor, disable_progress=None):
|
def find_capabilities(ruleset, extractor, disable_progress=None):
|
||||||
all_function_matches = collections.defaultdict(list)
|
all_function_matches = collections.defaultdict(list)
|
||||||
all_bb_matches = collections.defaultdict(list)
|
all_bb_matches = collections.defaultdict(list)
|
||||||
|
|
||||||
|
meta = {
|
||||||
|
"counts": {
|
||||||
|
"file": 0,
|
||||||
|
"functions": {},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for f in tqdm.tqdm(extractor.get_functions(), disable=disable_progress, unit=" functions"):
|
for f in tqdm.tqdm(extractor.get_functions(), disable=disable_progress, unit=" functions"):
|
||||||
function_matches, bb_matches = find_function_capabilities(ruleset, extractor, f)
|
function_matches, bb_matches, feature_count = find_function_capabilities(ruleset, extractor, f)
|
||||||
|
meta["counts"]["functions"][f.__int__()] = feature_count
|
||||||
|
logger.debug("analyzed function 0x%x and extracted %d features", f.__int__(), feature_count)
|
||||||
|
|
||||||
for rule_name, res in function_matches.items():
|
for rule_name, res in function_matches.items():
|
||||||
all_function_matches[rule_name].extend(res)
|
all_function_matches[rule_name].extend(res)
|
||||||
for rule_name, res in bb_matches.items():
|
for rule_name, res in bb_matches.items():
|
||||||
@@ -110,14 +120,15 @@ def find_capabilities(ruleset, extractor, disable_progress=None):
|
|||||||
for rule_name, results in all_function_matches.items()
|
for rule_name, results in all_function_matches.items()
|
||||||
}
|
}
|
||||||
|
|
||||||
all_file_matches = find_file_capabilities(ruleset, extractor, function_features)
|
all_file_matches, feature_count = find_file_capabilities(ruleset, extractor, function_features)
|
||||||
|
meta["counts"]["file"] = feature_count
|
||||||
|
|
||||||
matches = {}
|
matches = {}
|
||||||
matches.update(all_bb_matches)
|
matches.update(all_bb_matches)
|
||||||
matches.update(all_function_matches)
|
matches.update(all_function_matches)
|
||||||
matches.update(all_file_matches)
|
matches.update(all_file_matches)
|
||||||
|
|
||||||
return matches
|
return matches, meta
|
||||||
|
|
||||||
|
|
||||||
def has_rule_with_namespace(rules, capabilities, rule_cat):
|
def has_rule_with_namespace(rules, capabilities, rule_cat):
|
||||||
@@ -485,7 +496,8 @@ def main(argv=None):
|
|||||||
|
|
||||||
meta = collect_metadata(argv, args.sample, format, extractor)
|
meta = collect_metadata(argv, args.sample, format, extractor)
|
||||||
|
|
||||||
capabilities = find_capabilities(rules, extractor)
|
capabilities, counts = find_capabilities(rules, extractor)
|
||||||
|
meta.update(counts)
|
||||||
|
|
||||||
if has_file_limitation(rules, capabilities):
|
if has_file_limitation(rules, capabilities):
|
||||||
# bail if capa encountered file limitation e.g. a packed binary
|
# bail if capa encountered file limitation e.g. a packed binary
|
||||||
@@ -542,12 +554,14 @@ def ida_main():
|
|||||||
rules = get_rules(rules_path)
|
rules = get_rules(rules_path)
|
||||||
rules = capa.rules.RuleSet(rules)
|
rules = capa.rules.RuleSet(rules)
|
||||||
|
|
||||||
capabilities = find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor())
|
meta = capa.ida.helpers.collect_metadata()
|
||||||
|
|
||||||
|
capabilities, counts = find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor())
|
||||||
|
meta.update(counts)
|
||||||
|
|
||||||
if has_file_limitation(rules, capabilities, is_standalone=False):
|
if has_file_limitation(rules, capabilities, is_standalone=False):
|
||||||
capa.ida.helpers.inform_user_ida_ui("capa encountered warnings during analysis")
|
capa.ida.helpers.inform_user_ida_ui("capa encountered warnings during analysis")
|
||||||
|
|
||||||
meta = capa.ida.helpers.collect_metadata()
|
|
||||||
print(capa.render.render_default(meta, rules, capabilities))
|
print(capa.render.render_default(meta, rules, capabilities))
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -180,7 +180,7 @@ class DoesntMatchExample(Lint):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
extractor = capa.main.get_extractor(path, "auto")
|
extractor = capa.main.get_extractor(path, "auto")
|
||||||
capabilities = capa.main.find_capabilities(ctx["rules"], extractor, disable_progress=True)
|
capabilities, meta = capa.main.find_capabilities(ctx["rules"], extractor, disable_progress=True)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("failed to extract capabilities: %s %s %s", rule.name, path, e)
|
logger.error("failed to extract capabilities: %s %s %s", rule.name, path, e)
|
||||||
return True
|
return True
|
||||||
|
|||||||
@@ -93,7 +93,8 @@ def get_capabilities(path, rules):
|
|||||||
logger.debug("matching rules in %s", path)
|
logger.debug("matching rules in %s", path)
|
||||||
with open(path, "rb") as f:
|
with open(path, "rb") as f:
|
||||||
extractor = capa.features.freeze.load(f.read())
|
extractor = capa.features.freeze.load(f.read())
|
||||||
return capa.main.find_capabilities(rules, extractor, disable_progress=True)
|
capabilities, meta = capa.main.find_capabilities(rules, extractor, disable_progress=True)
|
||||||
|
return capabilities
|
||||||
|
|
||||||
|
|
||||||
def get_function_hits(capabilities, rule_name):
|
def get_function_hits(capabilities, rule_name):
|
||||||
|
|||||||
@@ -59,7 +59,7 @@ def test_null_feature_extractor():
|
|||||||
),
|
),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
capabilities = capa.main.find_capabilities(rules, EXTRACTOR)
|
capabilities, meta = capa.main.find_capabilities(rules, EXTRACTOR)
|
||||||
assert "xor loop" in capabilities
|
assert "xor loop" in capabilities
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -147,7 +147,7 @@ def test_match_across_scopes_file_function(sample_9324d1a8ae37a36ae560c37448c970
|
|||||||
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
|
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
|
||||||
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
|
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
|
||||||
)
|
)
|
||||||
capabilities = capa.main.find_capabilities(rules, extractor)
|
capabilities, meta = capa.main.find_capabilities(rules, extractor)
|
||||||
assert "install service" in capabilities
|
assert "install service" in capabilities
|
||||||
assert ".text section" in capabilities
|
assert ".text section" in capabilities
|
||||||
assert ".text section and install service" in capabilities
|
assert ".text section and install service" in capabilities
|
||||||
@@ -212,7 +212,7 @@ def test_match_across_scopes(sample_9324d1a8ae37a36ae560c37448c9705a):
|
|||||||
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
|
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
|
||||||
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path
|
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path
|
||||||
)
|
)
|
||||||
capabilities = capa.main.find_capabilities(rules, extractor)
|
capabilities, meta = capa.main.find_capabilities(rules, extractor)
|
||||||
assert "tight loop" in capabilities
|
assert "tight loop" in capabilities
|
||||||
assert "kill thread loop" in capabilities
|
assert "kill thread loop" in capabilities
|
||||||
assert "kill thread program" in capabilities
|
assert "kill thread program" in capabilities
|
||||||
@@ -241,7 +241,7 @@ def test_subscope_bb_rules(sample_9324d1a8ae37a36ae560c37448c9705a):
|
|||||||
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
|
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
|
||||||
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
|
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
|
||||||
)
|
)
|
||||||
capabilities = capa.main.find_capabilities(rules, extractor)
|
capabilities, meta = capa.main.find_capabilities(rules, extractor)
|
||||||
assert "test rule" in capabilities
|
assert "test rule" in capabilities
|
||||||
|
|
||||||
|
|
||||||
@@ -267,7 +267,7 @@ def test_byte_matching(sample_9324d1a8ae37a36ae560c37448c9705a):
|
|||||||
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
|
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
|
||||||
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
|
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
|
||||||
)
|
)
|
||||||
capabilities = capa.main.find_capabilities(rules, extractor)
|
capabilities, meta = capa.main.find_capabilities(rules, extractor)
|
||||||
assert "byte match test" in capabilities
|
assert "byte match test" in capabilities
|
||||||
|
|
||||||
|
|
||||||
@@ -294,5 +294,5 @@ def test_count_bb(sample_9324d1a8ae37a36ae560c37448c9705a):
|
|||||||
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
|
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
|
||||||
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
|
sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
|
||||||
)
|
)
|
||||||
capabilities = capa.main.find_capabilities(rules, extractor)
|
capabilities, meta = capa.main.find_capabilities(rules, extractor)
|
||||||
assert "count bb" in capabilities
|
assert "count bb" in capabilities
|
||||||
|
|||||||
Reference in New Issue
Block a user