Merge branch 'dynamic-feature-extraction' into capabilities-module

This commit is contained in:
Yacine
2023-10-20 08:55:49 +02:00
committed by GitHub
22 changed files with 1245 additions and 3919 deletions

View File

@@ -318,6 +318,8 @@ def get_data_path_by_name(name) -> Path:
return CD / "data" / "499c2a85f6e8142c3f48d4251c9c7cd6.raw32"
elif name.startswith("9324d"):
return CD / "data" / "9324d1a8ae37a36ae560c37448c9705a.exe_"
elif name.startswith("395eb"):
return CD / "data" / "395eb0ddd99d2c9e37b6d0b73485ee9c.exe_"
elif name.startswith("a1982"):
return CD / "data" / "a198216798ca38f280dc413f8c57f2c2.exe_"
elif name.startswith("a933a"):
@@ -1346,6 +1348,11 @@ def z9324d_extractor():
return get_extractor(get_data_path_by_name("9324d..."))
@pytest.fixture
def z395eb_extractor():
return get_extractor(get_data_path_by_name("395eb..."))
@pytest.fixture
def pma12_04_extractor():
return get_extractor(get_data_path_by_name("pma12-04"))
@@ -1432,29 +1439,42 @@ def get_result_doc(path: Path):
@pytest.fixture
def pma0101_rd():
# python -m capa.main tests/data/Practical\ Malware\ Analysis\ Lab\ 01-01.dll_ --json > tests/data/rd/Practical\ Malware\ Analysis\ Lab\ 01-01.dll_.json
return get_result_doc(CD / "data" / "rd" / "Practical Malware Analysis Lab 01-01.dll_.json")
@pytest.fixture
def dotnet_1c444e_rd():
# .NET sample
# python -m capa.main tests/data/dotnet/1c444ebeba24dcba8628b7dfe5fec7c6.exe_ --json > tests/data/rd/1c444ebeba24dcba8628b7dfe5fec7c6.exe_.json
return get_result_doc(CD / "data" / "rd" / "1c444ebeba24dcba8628b7dfe5fec7c6.exe_.json")
@pytest.fixture
def a3f3bbc_rd():
# python -m capa.main tests/data/3f3bbcf8fd90bdcdcdc5494314ed4225.exe_ --json > tests/data/rd/3f3bbcf8fd90bdcdcdc5494314ed4225.exe_.json
return get_result_doc(CD / "data" / "rd" / "3f3bbcf8fd90bdcdcdc5494314ed4225.exe_.json")
@pytest.fixture
def al_khaserx86_rd():
# python -m capa.main tests/data/al-khaser_x86.exe_ --json > tests/data/rd/al-khaser_x86.exe_.json
return get_result_doc(CD / "data" / "rd" / "al-khaser_x86.exe_.json")
@pytest.fixture
def al_khaserx64_rd():
# python -m capa.main tests/data/al-khaser_x64.exe_ --json > tests/data/rd/al-khaser_x64.exe_.json
return get_result_doc(CD / "data" / "rd" / "al-khaser_x64.exe_.json")
@pytest.fixture
def a076114_rd():
# python -m capa.main tests/data/0761142efbda6c4b1e801223de723578.dll_ --json > tests/data/rd/0761142efbda6c4b1e801223de723578.dll_.json
return get_result_doc(CD / "data" / "rd" / "0761142efbda6c4b1e801223de723578.dll_.json")
@pytest.fixture
def dynamic_a0000a6_rd():
# python -m capa.main tests/data/dynamic/cape/v2.2/0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json --json > tests/data/rd/0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json
return get_result_doc(CD / "data" / "rd" / "0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json")

View File

@@ -46,7 +46,7 @@ def test_doc_to_pb2(request, rd_file):
assert matches.meta.name == m.name
assert cmp_optional(matches.meta.namespace, m.namespace)
assert list(matches.meta.authors) == m.authors
assert capa.render.proto.scope_to_pb2(matches.meta.scope) == m.scope
assert capa.render.proto.scopes_to_pb2(matches.meta.scopes) == m.scopes
assert len(matches.meta.attack) == len(m.attack)
for rd_attack, proto_attack in zip(matches.meta.attack, m.attack):
@@ -116,10 +116,27 @@ def test_addr_to_pb2():
def test_scope_to_pb2():
assert capa.render.proto.scope_to_pb2(capa.rules.Scope(capa.rules.Scope.FILE)) == capa_pb2.SCOPE_FILE
assert capa.render.proto.scope_to_pb2(capa.rules.Scope(capa.rules.Scope.FUNCTION)) == capa_pb2.SCOPE_FUNCTION
assert capa.render.proto.scope_to_pb2(capa.rules.Scope(capa.rules.Scope.BASIC_BLOCK)) == capa_pb2.SCOPE_BASIC_BLOCK
assert capa.render.proto.scope_to_pb2(capa.rules.Scope(capa.rules.Scope.INSTRUCTION)) == capa_pb2.SCOPE_INSTRUCTION
assert capa.render.proto.scope_to_pb2(capa.rules.Scope.FILE) == capa_pb2.SCOPE_FILE
assert capa.render.proto.scope_to_pb2(capa.rules.Scope.FUNCTION) == capa_pb2.SCOPE_FUNCTION
assert capa.render.proto.scope_to_pb2(capa.rules.Scope.BASIC_BLOCK) == capa_pb2.SCOPE_BASIC_BLOCK
assert capa.render.proto.scope_to_pb2(capa.rules.Scope.INSTRUCTION) == capa_pb2.SCOPE_INSTRUCTION
assert capa.render.proto.scope_to_pb2(capa.rules.Scope.PROCESS) == capa_pb2.SCOPE_PROCESS
assert capa.render.proto.scope_to_pb2(capa.rules.Scope.THREAD) == capa_pb2.SCOPE_THREAD
assert capa.render.proto.scope_to_pb2(capa.rules.Scope.CALL) == capa_pb2.SCOPE_CALL
def test_scopes_to_pb2():
assert capa.render.proto.scopes_to_pb2(
capa.rules.Scopes.from_dict({"static": "file", "dynamic": "file"})
) == capa_pb2.Scopes(
static=capa_pb2.SCOPE_FILE,
dynamic=capa_pb2.SCOPE_FILE,
)
assert capa.render.proto.scopes_to_pb2(
capa.rules.Scopes.from_dict({"static": "file", "dynamic": "unsupported"})
) == capa_pb2.Scopes(
static=capa_pb2.SCOPE_FILE,
)
def cmp_optional(a: Any, b: Any) -> bool:
@@ -128,8 +145,59 @@ def cmp_optional(a: Any, b: Any) -> bool:
return a == b
def assert_static_analyis(analysis: rd.StaticAnalysis, dst: capa_pb2.StaticAnalysis):
assert analysis.format == dst.format
assert analysis.arch == dst.arch
assert analysis.os == dst.os
assert analysis.extractor == dst.extractor
assert list(analysis.rules) == dst.rules
assert capa.render.proto.addr_to_pb2(analysis.base_address) == dst.base_address
assert len(analysis.layout.functions) == len(dst.layout.functions)
for rd_f, proto_f in zip(analysis.layout.functions, dst.layout.functions):
assert capa.render.proto.addr_to_pb2(rd_f.address) == proto_f.address
assert len(rd_f.matched_basic_blocks) == len(proto_f.matched_basic_blocks)
for rd_bb, proto_bb in zip(rd_f.matched_basic_blocks, proto_f.matched_basic_blocks):
assert capa.render.proto.addr_to_pb2(rd_bb.address) == proto_bb.address
assert analysis.feature_counts.file == dst.feature_counts.file
assert len(analysis.feature_counts.functions) == len(dst.feature_counts.functions)
for rd_cf, proto_cf in zip(analysis.feature_counts.functions, dst.feature_counts.functions):
assert capa.render.proto.addr_to_pb2(rd_cf.address) == proto_cf.address
assert rd_cf.count == proto_cf.count
assert len(analysis.library_functions) == len(dst.library_functions)
for rd_lf, proto_lf in zip(analysis.library_functions, dst.library_functions):
assert capa.render.proto.addr_to_pb2(rd_lf.address) == proto_lf.address
assert rd_lf.name == proto_lf.name
def assert_dynamic_analyis(analysis: rd.DynamicAnalysis, dst: capa_pb2.DynamicAnalysis):
assert analysis.format == dst.format
assert analysis.arch == dst.arch
assert analysis.os == dst.os
assert analysis.extractor == dst.extractor
assert list(analysis.rules) == dst.rules
assert len(analysis.layout.processes) == len(dst.layout.processes)
for rd_p, proto_p in zip(analysis.layout.processes, dst.layout.processes):
assert capa.render.proto.addr_to_pb2(rd_p.address) == proto_p.address
assert len(rd_p.matched_threads) == len(proto_p.matched_threads)
for rd_t, proto_t in zip(rd_p.matched_threads, proto_p.matched_threads):
assert capa.render.proto.addr_to_pb2(rd_t.address) == proto_t.address
assert analysis.feature_counts.processes == dst.feature_counts.processes
assert len(analysis.feature_counts.processes) == len(dst.feature_counts.processes)
for rd_cp, proto_cp in zip(analysis.feature_counts.processes, dst.feature_counts.processes):
assert capa.render.proto.addr_to_pb2(rd_cp.address) == proto_cp.address
assert rd_cp.count == proto_cp.count
def assert_meta(meta: rd.Metadata, dst: capa_pb2.Metadata):
assert isinstance(rd.Metadata.analysis, rd.StaticAnalysis)
assert isinstance(meta.analysis, rd.StaticAnalysis)
assert str(meta.timestamp) == dst.timestamp
assert meta.version == dst.version
if meta.argv is None:
@@ -142,32 +210,18 @@ def assert_meta(meta: rd.Metadata, dst: capa_pb2.Metadata):
assert meta.sample.sha256 == dst.sample.sha256
assert meta.sample.path == dst.sample.path
assert meta.analysis.format == dst.analysis.format
assert meta.analysis.arch == dst.analysis.arch
assert meta.analysis.os == dst.analysis.os
assert meta.analysis.extractor == dst.analysis.extractor
assert list(meta.analysis.rules) == dst.analysis.rules
assert capa.render.proto.addr_to_pb2(meta.analysis.base_address) == dst.analysis.base_address
assert isinstance(rd.Metadata.analysis.layout, rd.StaticLayout)
assert len(meta.analysis.layout.functions) == len(dst.analysis.layout.functions)
for rd_f, proto_f in zip(meta.analysis.layout.functions, dst.analysis.layout.functions):
assert capa.render.proto.addr_to_pb2(rd_f.address) == proto_f.address
assert len(rd_f.matched_basic_blocks) == len(proto_f.matched_basic_blocks)
for rd_bb, proto_bb in zip(rd_f.matched_basic_blocks, proto_f.matched_basic_blocks):
assert capa.render.proto.addr_to_pb2(rd_bb.address) == proto_bb.address
assert meta.analysis.feature_counts.file == dst.analysis.feature_counts.file
assert len(meta.analysis.feature_counts.functions) == len(dst.analysis.feature_counts.functions)
for rd_cf, proto_cf in zip(meta.analysis.feature_counts.functions, dst.analysis.feature_counts.functions):
assert capa.render.proto.addr_to_pb2(rd_cf.address) == proto_cf.address
assert rd_cf.count == proto_cf.count
assert len(meta.analysis.library_functions) == len(dst.analysis.library_functions)
for rd_lf, proto_lf in zip(meta.analysis.library_functions, dst.analysis.library_functions):
assert capa.render.proto.addr_to_pb2(rd_lf.address) == proto_lf.address
assert rd_lf.name == proto_lf.name
if meta.flavor == rd.Flavor.STATIC:
assert dst.flavor == capa_pb2.FLAVOR_STATIC
assert dst.WhichOneof("analysis2") == "static_analysis"
assert isinstance(meta.analysis, rd.StaticAnalysis)
assert_static_analyis(meta.analysis, dst.static_analysis)
elif meta.flavor == rd.Flavor.DYNAMIC:
assert dst.flavor == capa_pb2.FLAVOR_DYNAMIC
assert dst.WhichOneof("analysis2") == "dynamic_analysis"
assert isinstance(meta.analysis, rd.DynamicAnalysis)
assert_dynamic_analyis(meta.analysis, dst.dynamic_analysis)
else:
assert_never(dst.flavor)
def assert_match(ma: rd.Match, mb: capa_pb2.Match):
@@ -320,20 +374,22 @@ def assert_round_trip(doc: rd.ResultDocument):
# show the round trip works
# first by comparing the objects directly,
# which works thanks to pydantic model equality.
assert one.meta == two.meta
assert one.rules == two.rules
assert one == two
# second by showing their protobuf representations are the same.
assert capa.render.proto.doc_to_pb2(one).SerializeToString(deterministic=True) == capa.render.proto.doc_to_pb2(
two
).SerializeToString(deterministic=True)
one_bytes = capa.render.proto.doc_to_pb2(one).SerializeToString(deterministic=True)
two_bytes = capa.render.proto.doc_to_pb2(two).SerializeToString(deterministic=True)
assert one_bytes == two_bytes
# now show that two different versions are not equal.
three = copy.deepcopy(two)
three.meta.__dict__.update({"version": "0.0.0"})
assert one.meta.version != three.meta.version
assert one != three
assert capa.render.proto.doc_to_pb2(one).SerializeToString(deterministic=True) != capa.render.proto.doc_to_pb2(
three
).SerializeToString(deterministic=True)
three_bytes = capa.render.proto.doc_to_pb2(three).SerializeToString(deterministic=True)
assert one_bytes != three_bytes
@pytest.mark.parametrize(
@@ -345,6 +401,7 @@ def assert_round_trip(doc: rd.ResultDocument):
pytest.param("a076114_rd"),
pytest.param("pma0101_rd"),
pytest.param("dotnet_1c444e_rd"),
pytest.param("dynamic_a0000a6_rd"),
],
)
def test_round_trip(request, rd_file):

View File

@@ -1531,3 +1531,75 @@ def test_property_access_symbol():
)
is True
)
def test_translate_com_features():
r = capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
scopes:
static: basic block
dynamic: call
features:
- com/class: WICPngDecoder
# 389ea17b-5078-4cde-b6ef-25c15175c751 WICPngDecoder
# e018945b-aa86-4008-9bd4-6777a1e40c11 WICPngDecoder
"""
)
)
com_name = "WICPngDecoder"
com_features = [
capa.features.common.Bytes(b"{\xa1\x9e8xP\xdeL\xb6\xef%\xc1Qu\xc7Q", f"CLSID_{com_name} as bytes"),
capa.features.common.StringFactory("389ea17b-5078-4cde-b6ef-25c15175c751", f"CLSID_{com_name} as GUID string"),
capa.features.common.Bytes(b"[\x94\x18\xe0\x86\xaa\x08@\x9b\xd4gw\xa1\xe4\x0c\x11", f"IID_{com_name} as bytes"),
capa.features.common.StringFactory("e018945b-aa86-4008-9bd4-6777a1e40c11", f"IID_{com_name} as GUID string"),
]
assert set(com_features) == set(r.statement.get_children())
def test_invalid_com_features():
# test for unknown COM class
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- com/class: invalid_com
"""
)
)
# test for unknown COM interface
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- com/interface: invalid_com
"""
)
)
# test for invalid COM type
# valid_com_types = "class", "interface"
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- com/invalid_COM_type: WICPngDecoder
"""
)
)