mirror of
https://github.com/mandiant/capa.git
synced 2026-01-07 19:01:04 -08:00
Merge branch 'dynamic-feature-extraction' into capabilities-module
This commit is contained in:
@@ -318,6 +318,8 @@ def get_data_path_by_name(name) -> Path:
|
||||
return CD / "data" / "499c2a85f6e8142c3f48d4251c9c7cd6.raw32"
|
||||
elif name.startswith("9324d"):
|
||||
return CD / "data" / "9324d1a8ae37a36ae560c37448c9705a.exe_"
|
||||
elif name.startswith("395eb"):
|
||||
return CD / "data" / "395eb0ddd99d2c9e37b6d0b73485ee9c.exe_"
|
||||
elif name.startswith("a1982"):
|
||||
return CD / "data" / "a198216798ca38f280dc413f8c57f2c2.exe_"
|
||||
elif name.startswith("a933a"):
|
||||
@@ -1346,6 +1348,11 @@ def z9324d_extractor():
|
||||
return get_extractor(get_data_path_by_name("9324d..."))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def z395eb_extractor():
|
||||
return get_extractor(get_data_path_by_name("395eb..."))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pma12_04_extractor():
|
||||
return get_extractor(get_data_path_by_name("pma12-04"))
|
||||
@@ -1432,29 +1439,42 @@ def get_result_doc(path: Path):
|
||||
|
||||
@pytest.fixture
|
||||
def pma0101_rd():
|
||||
# python -m capa.main tests/data/Practical\ Malware\ Analysis\ Lab\ 01-01.dll_ --json > tests/data/rd/Practical\ Malware\ Analysis\ Lab\ 01-01.dll_.json
|
||||
return get_result_doc(CD / "data" / "rd" / "Practical Malware Analysis Lab 01-01.dll_.json")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dotnet_1c444e_rd():
|
||||
# .NET sample
|
||||
# python -m capa.main tests/data/dotnet/1c444ebeba24dcba8628b7dfe5fec7c6.exe_ --json > tests/data/rd/1c444ebeba24dcba8628b7dfe5fec7c6.exe_.json
|
||||
return get_result_doc(CD / "data" / "rd" / "1c444ebeba24dcba8628b7dfe5fec7c6.exe_.json")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def a3f3bbc_rd():
|
||||
# python -m capa.main tests/data/3f3bbcf8fd90bdcdcdc5494314ed4225.exe_ --json > tests/data/rd/3f3bbcf8fd90bdcdcdc5494314ed4225.exe_.json
|
||||
return get_result_doc(CD / "data" / "rd" / "3f3bbcf8fd90bdcdcdc5494314ed4225.exe_.json")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def al_khaserx86_rd():
|
||||
# python -m capa.main tests/data/al-khaser_x86.exe_ --json > tests/data/rd/al-khaser_x86.exe_.json
|
||||
return get_result_doc(CD / "data" / "rd" / "al-khaser_x86.exe_.json")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def al_khaserx64_rd():
|
||||
# python -m capa.main tests/data/al-khaser_x64.exe_ --json > tests/data/rd/al-khaser_x64.exe_.json
|
||||
return get_result_doc(CD / "data" / "rd" / "al-khaser_x64.exe_.json")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def a076114_rd():
|
||||
# python -m capa.main tests/data/0761142efbda6c4b1e801223de723578.dll_ --json > tests/data/rd/0761142efbda6c4b1e801223de723578.dll_.json
|
||||
return get_result_doc(CD / "data" / "rd" / "0761142efbda6c4b1e801223de723578.dll_.json")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dynamic_a0000a6_rd():
|
||||
# python -m capa.main tests/data/dynamic/cape/v2.2/0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json --json > tests/data/rd/0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json
|
||||
return get_result_doc(CD / "data" / "rd" / "0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json")
|
||||
|
||||
@@ -46,7 +46,7 @@ def test_doc_to_pb2(request, rd_file):
|
||||
assert matches.meta.name == m.name
|
||||
assert cmp_optional(matches.meta.namespace, m.namespace)
|
||||
assert list(matches.meta.authors) == m.authors
|
||||
assert capa.render.proto.scope_to_pb2(matches.meta.scope) == m.scope
|
||||
assert capa.render.proto.scopes_to_pb2(matches.meta.scopes) == m.scopes
|
||||
|
||||
assert len(matches.meta.attack) == len(m.attack)
|
||||
for rd_attack, proto_attack in zip(matches.meta.attack, m.attack):
|
||||
@@ -116,10 +116,27 @@ def test_addr_to_pb2():
|
||||
|
||||
|
||||
def test_scope_to_pb2():
|
||||
assert capa.render.proto.scope_to_pb2(capa.rules.Scope(capa.rules.Scope.FILE)) == capa_pb2.SCOPE_FILE
|
||||
assert capa.render.proto.scope_to_pb2(capa.rules.Scope(capa.rules.Scope.FUNCTION)) == capa_pb2.SCOPE_FUNCTION
|
||||
assert capa.render.proto.scope_to_pb2(capa.rules.Scope(capa.rules.Scope.BASIC_BLOCK)) == capa_pb2.SCOPE_BASIC_BLOCK
|
||||
assert capa.render.proto.scope_to_pb2(capa.rules.Scope(capa.rules.Scope.INSTRUCTION)) == capa_pb2.SCOPE_INSTRUCTION
|
||||
assert capa.render.proto.scope_to_pb2(capa.rules.Scope.FILE) == capa_pb2.SCOPE_FILE
|
||||
assert capa.render.proto.scope_to_pb2(capa.rules.Scope.FUNCTION) == capa_pb2.SCOPE_FUNCTION
|
||||
assert capa.render.proto.scope_to_pb2(capa.rules.Scope.BASIC_BLOCK) == capa_pb2.SCOPE_BASIC_BLOCK
|
||||
assert capa.render.proto.scope_to_pb2(capa.rules.Scope.INSTRUCTION) == capa_pb2.SCOPE_INSTRUCTION
|
||||
assert capa.render.proto.scope_to_pb2(capa.rules.Scope.PROCESS) == capa_pb2.SCOPE_PROCESS
|
||||
assert capa.render.proto.scope_to_pb2(capa.rules.Scope.THREAD) == capa_pb2.SCOPE_THREAD
|
||||
assert capa.render.proto.scope_to_pb2(capa.rules.Scope.CALL) == capa_pb2.SCOPE_CALL
|
||||
|
||||
|
||||
def test_scopes_to_pb2():
|
||||
assert capa.render.proto.scopes_to_pb2(
|
||||
capa.rules.Scopes.from_dict({"static": "file", "dynamic": "file"})
|
||||
) == capa_pb2.Scopes(
|
||||
static=capa_pb2.SCOPE_FILE,
|
||||
dynamic=capa_pb2.SCOPE_FILE,
|
||||
)
|
||||
assert capa.render.proto.scopes_to_pb2(
|
||||
capa.rules.Scopes.from_dict({"static": "file", "dynamic": "unsupported"})
|
||||
) == capa_pb2.Scopes(
|
||||
static=capa_pb2.SCOPE_FILE,
|
||||
)
|
||||
|
||||
|
||||
def cmp_optional(a: Any, b: Any) -> bool:
|
||||
@@ -128,8 +145,59 @@ def cmp_optional(a: Any, b: Any) -> bool:
|
||||
return a == b
|
||||
|
||||
|
||||
def assert_static_analyis(analysis: rd.StaticAnalysis, dst: capa_pb2.StaticAnalysis):
|
||||
assert analysis.format == dst.format
|
||||
assert analysis.arch == dst.arch
|
||||
assert analysis.os == dst.os
|
||||
assert analysis.extractor == dst.extractor
|
||||
assert list(analysis.rules) == dst.rules
|
||||
|
||||
assert capa.render.proto.addr_to_pb2(analysis.base_address) == dst.base_address
|
||||
|
||||
assert len(analysis.layout.functions) == len(dst.layout.functions)
|
||||
for rd_f, proto_f in zip(analysis.layout.functions, dst.layout.functions):
|
||||
assert capa.render.proto.addr_to_pb2(rd_f.address) == proto_f.address
|
||||
|
||||
assert len(rd_f.matched_basic_blocks) == len(proto_f.matched_basic_blocks)
|
||||
for rd_bb, proto_bb in zip(rd_f.matched_basic_blocks, proto_f.matched_basic_blocks):
|
||||
assert capa.render.proto.addr_to_pb2(rd_bb.address) == proto_bb.address
|
||||
|
||||
assert analysis.feature_counts.file == dst.feature_counts.file
|
||||
assert len(analysis.feature_counts.functions) == len(dst.feature_counts.functions)
|
||||
for rd_cf, proto_cf in zip(analysis.feature_counts.functions, dst.feature_counts.functions):
|
||||
assert capa.render.proto.addr_to_pb2(rd_cf.address) == proto_cf.address
|
||||
assert rd_cf.count == proto_cf.count
|
||||
|
||||
assert len(analysis.library_functions) == len(dst.library_functions)
|
||||
for rd_lf, proto_lf in zip(analysis.library_functions, dst.library_functions):
|
||||
assert capa.render.proto.addr_to_pb2(rd_lf.address) == proto_lf.address
|
||||
assert rd_lf.name == proto_lf.name
|
||||
|
||||
|
||||
def assert_dynamic_analyis(analysis: rd.DynamicAnalysis, dst: capa_pb2.DynamicAnalysis):
|
||||
assert analysis.format == dst.format
|
||||
assert analysis.arch == dst.arch
|
||||
assert analysis.os == dst.os
|
||||
assert analysis.extractor == dst.extractor
|
||||
assert list(analysis.rules) == dst.rules
|
||||
|
||||
assert len(analysis.layout.processes) == len(dst.layout.processes)
|
||||
for rd_p, proto_p in zip(analysis.layout.processes, dst.layout.processes):
|
||||
assert capa.render.proto.addr_to_pb2(rd_p.address) == proto_p.address
|
||||
|
||||
assert len(rd_p.matched_threads) == len(proto_p.matched_threads)
|
||||
for rd_t, proto_t in zip(rd_p.matched_threads, proto_p.matched_threads):
|
||||
assert capa.render.proto.addr_to_pb2(rd_t.address) == proto_t.address
|
||||
|
||||
assert analysis.feature_counts.processes == dst.feature_counts.processes
|
||||
assert len(analysis.feature_counts.processes) == len(dst.feature_counts.processes)
|
||||
for rd_cp, proto_cp in zip(analysis.feature_counts.processes, dst.feature_counts.processes):
|
||||
assert capa.render.proto.addr_to_pb2(rd_cp.address) == proto_cp.address
|
||||
assert rd_cp.count == proto_cp.count
|
||||
|
||||
|
||||
def assert_meta(meta: rd.Metadata, dst: capa_pb2.Metadata):
|
||||
assert isinstance(rd.Metadata.analysis, rd.StaticAnalysis)
|
||||
assert isinstance(meta.analysis, rd.StaticAnalysis)
|
||||
assert str(meta.timestamp) == dst.timestamp
|
||||
assert meta.version == dst.version
|
||||
if meta.argv is None:
|
||||
@@ -142,32 +210,18 @@ def assert_meta(meta: rd.Metadata, dst: capa_pb2.Metadata):
|
||||
assert meta.sample.sha256 == dst.sample.sha256
|
||||
assert meta.sample.path == dst.sample.path
|
||||
|
||||
assert meta.analysis.format == dst.analysis.format
|
||||
assert meta.analysis.arch == dst.analysis.arch
|
||||
assert meta.analysis.os == dst.analysis.os
|
||||
assert meta.analysis.extractor == dst.analysis.extractor
|
||||
assert list(meta.analysis.rules) == dst.analysis.rules
|
||||
assert capa.render.proto.addr_to_pb2(meta.analysis.base_address) == dst.analysis.base_address
|
||||
|
||||
assert isinstance(rd.Metadata.analysis.layout, rd.StaticLayout)
|
||||
assert len(meta.analysis.layout.functions) == len(dst.analysis.layout.functions)
|
||||
for rd_f, proto_f in zip(meta.analysis.layout.functions, dst.analysis.layout.functions):
|
||||
assert capa.render.proto.addr_to_pb2(rd_f.address) == proto_f.address
|
||||
|
||||
assert len(rd_f.matched_basic_blocks) == len(proto_f.matched_basic_blocks)
|
||||
for rd_bb, proto_bb in zip(rd_f.matched_basic_blocks, proto_f.matched_basic_blocks):
|
||||
assert capa.render.proto.addr_to_pb2(rd_bb.address) == proto_bb.address
|
||||
|
||||
assert meta.analysis.feature_counts.file == dst.analysis.feature_counts.file
|
||||
assert len(meta.analysis.feature_counts.functions) == len(dst.analysis.feature_counts.functions)
|
||||
for rd_cf, proto_cf in zip(meta.analysis.feature_counts.functions, dst.analysis.feature_counts.functions):
|
||||
assert capa.render.proto.addr_to_pb2(rd_cf.address) == proto_cf.address
|
||||
assert rd_cf.count == proto_cf.count
|
||||
|
||||
assert len(meta.analysis.library_functions) == len(dst.analysis.library_functions)
|
||||
for rd_lf, proto_lf in zip(meta.analysis.library_functions, dst.analysis.library_functions):
|
||||
assert capa.render.proto.addr_to_pb2(rd_lf.address) == proto_lf.address
|
||||
assert rd_lf.name == proto_lf.name
|
||||
if meta.flavor == rd.Flavor.STATIC:
|
||||
assert dst.flavor == capa_pb2.FLAVOR_STATIC
|
||||
assert dst.WhichOneof("analysis2") == "static_analysis"
|
||||
assert isinstance(meta.analysis, rd.StaticAnalysis)
|
||||
assert_static_analyis(meta.analysis, dst.static_analysis)
|
||||
elif meta.flavor == rd.Flavor.DYNAMIC:
|
||||
assert dst.flavor == capa_pb2.FLAVOR_DYNAMIC
|
||||
assert dst.WhichOneof("analysis2") == "dynamic_analysis"
|
||||
assert isinstance(meta.analysis, rd.DynamicAnalysis)
|
||||
assert_dynamic_analyis(meta.analysis, dst.dynamic_analysis)
|
||||
else:
|
||||
assert_never(dst.flavor)
|
||||
|
||||
|
||||
def assert_match(ma: rd.Match, mb: capa_pb2.Match):
|
||||
@@ -320,20 +374,22 @@ def assert_round_trip(doc: rd.ResultDocument):
|
||||
# show the round trip works
|
||||
# first by comparing the objects directly,
|
||||
# which works thanks to pydantic model equality.
|
||||
assert one.meta == two.meta
|
||||
assert one.rules == two.rules
|
||||
assert one == two
|
||||
|
||||
# second by showing their protobuf representations are the same.
|
||||
assert capa.render.proto.doc_to_pb2(one).SerializeToString(deterministic=True) == capa.render.proto.doc_to_pb2(
|
||||
two
|
||||
).SerializeToString(deterministic=True)
|
||||
one_bytes = capa.render.proto.doc_to_pb2(one).SerializeToString(deterministic=True)
|
||||
two_bytes = capa.render.proto.doc_to_pb2(two).SerializeToString(deterministic=True)
|
||||
assert one_bytes == two_bytes
|
||||
|
||||
# now show that two different versions are not equal.
|
||||
three = copy.deepcopy(two)
|
||||
three.meta.__dict__.update({"version": "0.0.0"})
|
||||
assert one.meta.version != three.meta.version
|
||||
assert one != three
|
||||
assert capa.render.proto.doc_to_pb2(one).SerializeToString(deterministic=True) != capa.render.proto.doc_to_pb2(
|
||||
three
|
||||
).SerializeToString(deterministic=True)
|
||||
three_bytes = capa.render.proto.doc_to_pb2(three).SerializeToString(deterministic=True)
|
||||
assert one_bytes != three_bytes
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
@@ -345,6 +401,7 @@ def assert_round_trip(doc: rd.ResultDocument):
|
||||
pytest.param("a076114_rd"),
|
||||
pytest.param("pma0101_rd"),
|
||||
pytest.param("dotnet_1c444e_rd"),
|
||||
pytest.param("dynamic_a0000a6_rd"),
|
||||
],
|
||||
)
|
||||
def test_round_trip(request, rd_file):
|
||||
@@ -1531,3 +1531,75 @@ def test_property_access_symbol():
|
||||
)
|
||||
is True
|
||||
)
|
||||
|
||||
|
||||
def test_translate_com_features():
|
||||
r = capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scopes:
|
||||
static: basic block
|
||||
dynamic: call
|
||||
features:
|
||||
- com/class: WICPngDecoder
|
||||
# 389ea17b-5078-4cde-b6ef-25c15175c751 WICPngDecoder
|
||||
# e018945b-aa86-4008-9bd4-6777a1e40c11 WICPngDecoder
|
||||
"""
|
||||
)
|
||||
)
|
||||
com_name = "WICPngDecoder"
|
||||
com_features = [
|
||||
capa.features.common.Bytes(b"{\xa1\x9e8xP\xdeL\xb6\xef%\xc1Qu\xc7Q", f"CLSID_{com_name} as bytes"),
|
||||
capa.features.common.StringFactory("389ea17b-5078-4cde-b6ef-25c15175c751", f"CLSID_{com_name} as GUID string"),
|
||||
capa.features.common.Bytes(b"[\x94\x18\xe0\x86\xaa\x08@\x9b\xd4gw\xa1\xe4\x0c\x11", f"IID_{com_name} as bytes"),
|
||||
capa.features.common.StringFactory("e018945b-aa86-4008-9bd4-6777a1e40c11", f"IID_{com_name} as GUID string"),
|
||||
]
|
||||
assert set(com_features) == set(r.statement.get_children())
|
||||
|
||||
|
||||
def test_invalid_com_features():
|
||||
# test for unknown COM class
|
||||
with pytest.raises(capa.rules.InvalidRule):
|
||||
_ = capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
features:
|
||||
- com/class: invalid_com
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
# test for unknown COM interface
|
||||
with pytest.raises(capa.rules.InvalidRule):
|
||||
_ = capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
features:
|
||||
- com/interface: invalid_com
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
# test for invalid COM type
|
||||
# valid_com_types = "class", "interface"
|
||||
with pytest.raises(capa.rules.InvalidRule):
|
||||
_ = capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
features:
|
||||
- com/invalid_COM_type: WICPngDecoder
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user