mirror of
https://github.com/mandiant/capa.git
synced 2026-03-14 14:07:17 -07:00
Compare commits
699 Commits
arm-suppor
...
dex-suppor
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ec1ddb506c | ||
|
|
f229c8ecb8 | ||
|
|
e3da2d88d0 | ||
|
|
e4eb4340b1 | ||
|
|
a8e7611252 | ||
|
|
8531acd7c5 | ||
|
|
e2f655428e | ||
|
|
b5a4d766d9 | ||
|
|
b77103a646 | ||
|
|
036f147df8 | ||
|
|
52d20d2f46 | ||
|
|
e90be5a9bb | ||
|
|
d6f7d2180f | ||
|
|
d1b213aaac | ||
|
|
51ddadbc87 | ||
|
|
cd52b1937b | ||
|
|
ca14dab804 | ||
|
|
fbe0440361 | ||
|
|
4c3586b5e9 | ||
|
|
47019e4d7c | ||
|
|
a236a952bc | ||
|
|
73ea822123 | ||
|
|
3c159a1f52 | ||
|
|
7db40c3af8 | ||
|
|
9a996d07c7 | ||
|
|
93cfb6ef8c | ||
|
|
a29c320f95 | ||
|
|
277d7e0687 | ||
|
|
e66c2efcf5 | ||
|
|
583f8b5688 | ||
|
|
b4c6bf859e | ||
|
|
ba9da0dd82 | ||
|
|
92770dd5c7 | ||
|
|
8946cb633e | ||
|
|
8f0eb5676e | ||
|
|
cb1a037502 | ||
|
|
c8d0071443 | ||
|
|
e6b8a3e505 | ||
|
|
f328df1bc4 | ||
|
|
d1aa1557b2 | ||
|
|
a0929124ec | ||
|
|
84ed6c8d24 | ||
|
|
61c8e30f65 | ||
|
|
6a4994f1ef | ||
|
|
fce105060d | ||
|
|
d84457eac7 | ||
|
|
890c879e7c | ||
|
|
f201ef1d22 | ||
|
|
f763d14266 | ||
|
|
6f0be06f86 | ||
|
|
347687579c | ||
|
|
d61d1dc591 | ||
|
|
235a3bede0 | ||
|
|
cf35d2c497 | ||
|
|
f6048b9e99 | ||
|
|
9d1e60d4a2 | ||
|
|
fb1235d26f | ||
|
|
3fe2328bd2 | ||
|
|
647abb669f | ||
|
|
a5e1eca8cc | ||
|
|
fdb96709ae | ||
|
|
490271e50b | ||
|
|
a870c92a2f | ||
|
|
de5f08871e | ||
|
|
2f60ec03af | ||
|
|
987eb2d358 | ||
|
|
6e3fff4bae | ||
|
|
a705bf9eab | ||
|
|
c68c68d5cb | ||
|
|
82013f0e24 | ||
|
|
210a13d94e | ||
|
|
0d5ff45c76 | ||
|
|
11b98cb0b1 | ||
|
|
3c9ab63521 | ||
|
|
a2fde921aa | ||
|
|
d4f7c77be8 | ||
|
|
f0f95824ac | ||
|
|
0ba5c23847 | ||
|
|
dee0aa73eb | ||
|
|
41a397661f | ||
|
|
52997e70a0 | ||
|
|
1acc2d1959 | ||
|
|
74f70856a6 | ||
|
|
e5b7ee96fc | ||
|
|
92d43f5327 | ||
|
|
48abd297a8 | ||
|
|
d64a10a287 | ||
|
|
abf83fe8cf | ||
|
|
6380d936ae | ||
|
|
18ab8d28d9 | ||
|
|
a52af3895a | ||
|
|
5d31bc462b | ||
|
|
7678897334 | ||
|
|
75ff58edaa | ||
|
|
eb12ec43f0 | ||
|
|
f7c72cd1c3 | ||
|
|
0da614aa4f | ||
|
|
9c81ccf88a | ||
|
|
c141f7ec6e | ||
|
|
274a710bb1 | ||
|
|
4a7e488e4c | ||
|
|
348120dea9 | ||
|
|
435eea1b80 | ||
|
|
621d42a093 | ||
|
|
15701c6d12 | ||
|
|
ec7fc86dc5 | ||
|
|
8d55c2f249 | ||
|
|
66607f1412 | ||
|
|
0097822e51 | ||
|
|
e559cc27d5 | ||
|
|
a0cec3f07d | ||
|
|
874faf0901 | ||
|
|
4750913fad | ||
|
|
e7198b2aaf | ||
|
|
426931c392 | ||
|
|
fec1e6a947 | ||
|
|
db53424548 | ||
|
|
8029fed31c | ||
|
|
3572b512d9 | ||
|
|
ab06c94d80 | ||
|
|
9e6919f33c | ||
|
|
99042f232d | ||
|
|
393b0e63f0 | ||
|
|
ee4f02908c | ||
|
|
c9df78252a | ||
|
|
788251ba2b | ||
|
|
62d4b008c5 | ||
|
|
be6f87318e | ||
|
|
aae72667a3 | ||
|
|
d6c5d98b0d | ||
|
|
d5ae2ffd91 | ||
|
|
96fb204d9d | ||
|
|
20604c4b41 | ||
|
|
423d942bd0 | ||
|
|
f9b87417e6 | ||
|
|
fc4618e234 | ||
|
|
1143f2ba56 | ||
|
|
10dc4b92b1 | ||
|
|
bfecf414fb | ||
|
|
0231ceef87 | ||
|
|
0ae8f34aff | ||
|
|
b8b55f4e19 | ||
|
|
d42829d7e7 | ||
|
|
c724a4b311 | ||
|
|
84e22b187d | ||
|
|
b6a0d6e1f3 | ||
|
|
1cb3ca61cd | ||
|
|
288313a300 | ||
|
|
2cc6a37713 | ||
|
|
fbeb33a91f | ||
|
|
3519125e03 | ||
|
|
98360328f9 | ||
|
|
3d4facd9a3 | ||
|
|
8b0ba1e656 | ||
|
|
7bc3fba7b0 | ||
|
|
d5e187bc70 | ||
|
|
85610a82c5 | ||
|
|
f2011c162c | ||
|
|
37caeb2736 | ||
|
|
5c48f38208 | ||
|
|
8687c740d5 | ||
|
|
9609d63f8a | ||
|
|
772f806eb6 | ||
|
|
5eaba611d1 | ||
|
|
b6f13f3489 | ||
|
|
178cfce456 | ||
|
|
94cf53a1e3 | ||
|
|
2cfd45022a | ||
|
|
26a2d1b4d1 | ||
|
|
6dbd3768ce | ||
|
|
21f9e0736d | ||
|
|
7cd5aa1c40 | ||
|
|
55e4fddc51 | ||
|
|
1aac4a1a69 | ||
|
|
92daf3a530 | ||
|
|
547502051f | ||
|
|
884b714be2 | ||
|
|
7205bc26ef | ||
|
|
e1b3a3f6b4 | ||
|
|
cb5fa36fc8 | ||
|
|
8ee97acf2a | ||
|
|
44d05f9498 | ||
|
|
bf233c1c7a | ||
|
|
182a9868ca | ||
|
|
40d9587fa4 | ||
|
|
430fdb074b | ||
|
|
0324d24490 | ||
|
|
41c286d1a3 | ||
|
|
187cf40d6f | ||
|
|
c37a0e525c | ||
|
|
de0c35b6ad | ||
|
|
d99b454c0e | ||
|
|
44f156925a | ||
|
|
599c115767 | ||
|
|
6ecc9b77b9 | ||
|
|
412d296d6b | ||
|
|
db32d90480 | ||
|
|
9a66c265db | ||
|
|
a1aca3aeb3 | ||
|
|
ffe6ab6842 | ||
|
|
d1b7afbe13 | ||
|
|
77de088ac9 | ||
|
|
40ba6679f0 | ||
|
|
8b6fa35e9f | ||
|
|
f85ea915bf | ||
|
|
312ad48041 | ||
|
|
65b80d4d13 | ||
|
|
fb098fde5f | ||
|
|
eedec933c2 | ||
|
|
559f2fd162 | ||
|
|
953b2e82d2 | ||
|
|
cd268d6327 | ||
|
|
23ecb248a5 | ||
|
|
bc165331db | ||
|
|
5d66a389d3 | ||
|
|
248a51c15f | ||
|
|
8a0628f357 | ||
|
|
2ec87f717a | ||
|
|
4430fce314 | ||
|
|
174c8121ca | ||
|
|
fa1371cfa8 | ||
|
|
a0a2b07b85 | ||
|
|
a9daa92c9a | ||
|
|
b315aacd73 | ||
|
|
3dd051582a | ||
|
|
5f7b4fbf74 | ||
|
|
8b287c1704 | ||
|
|
28a722d4c3 | ||
|
|
35f64f37bb | ||
|
|
7d9ae57692 | ||
|
|
838205b375 | ||
|
|
0fbec49708 | ||
|
|
0bdc727dce | ||
|
|
8ea7708a38 | ||
|
|
c6c54c316f | ||
|
|
8331ed6ea0 | ||
|
|
72e836166f | ||
|
|
d64ab41dfd | ||
|
|
5b4c167489 | ||
|
|
2a757b0cbb | ||
|
|
69836a0f13 | ||
|
|
866c7c5ce4 | ||
|
|
3725618d50 | ||
|
|
766b05e5c3 | ||
|
|
dd0eadb438 | ||
|
|
f905ed611b | ||
|
|
cfa703eaae | ||
|
|
9ec1bf3e42 | ||
|
|
d83c0e70de | ||
|
|
1d8e650d7b | ||
|
|
99caa87a3d | ||
|
|
6317153ef0 | ||
|
|
24dad6bcc4 | ||
|
|
73c158ad68 | ||
|
|
47330e69d4 | ||
|
|
0987673bf3 | ||
|
|
2c75f786c3 | ||
|
|
09afcfbac1 | ||
|
|
ab3747e448 | ||
|
|
9dc457e61e | ||
|
|
9eb88e6ca7 | ||
|
|
214a355b9c | ||
|
|
4d538b939e | ||
|
|
8c9e676868 | ||
|
|
b0133f0aa1 | ||
|
|
49adecb25c | ||
|
|
e9a9b3a6b6 | ||
|
|
d5daa79547 | ||
|
|
90df85b332 | ||
|
|
88ee6e661e | ||
|
|
08c9bbcc91 | ||
|
|
f96b9e6a6e | ||
|
|
9bbd3184b0 | ||
|
|
e4c1361d42 | ||
|
|
17e4765728 | ||
|
|
7e258a91ec | ||
|
|
b88853f327 | ||
|
|
a60401fc7e | ||
|
|
a734358377 | ||
|
|
ebcbad3ae3 | ||
|
|
8ff74d4a04 | ||
|
|
bd0d8eb403 | ||
|
|
9b79aa1983 | ||
|
|
172968c77e | ||
|
|
f1a7049ab5 | ||
|
|
155a2904fb | ||
|
|
4c2e8fd718 | ||
|
|
95e279a03b | ||
|
|
f2909c82f3 | ||
|
|
164b08276c | ||
|
|
b930523d44 | ||
|
|
f34b0355e7 | ||
|
|
3ee56e3bee | ||
|
|
49bf2eb6d4 | ||
|
|
707dee4c3f | ||
|
|
0ded827290 | ||
|
|
f74107d960 | ||
|
|
acd3a30d27 | ||
|
|
b636f23e3c | ||
|
|
70eae1a6f0 | ||
|
|
3574bd49bd | ||
|
|
46217a3acb | ||
|
|
9eb1255b29 | ||
|
|
d66f834e54 | ||
|
|
7c101f01e5 | ||
|
|
42689ef1da | ||
|
|
5ba7325646 | ||
|
|
86effec1a2 | ||
|
|
cdb469eca0 | ||
|
|
39c8fd8286 | ||
|
|
5730e5515f | ||
|
|
901ba551bc | ||
|
|
77b3fadf79 | ||
|
|
44fc3357d1 | ||
|
|
25414044ef | ||
|
|
d1068991e3 | ||
|
|
4ab240e990 | ||
|
|
9489927bed | ||
|
|
c160f45849 | ||
|
|
5b585c0e39 | ||
|
|
c6ee919619 | ||
|
|
675ad364ac | ||
|
|
21cefa0932 | ||
|
|
89c8c6d212 | ||
|
|
e5af7165ea | ||
|
|
ee936f9257 | ||
|
|
6482848fa4 | ||
|
|
7c2a736c4b | ||
|
|
918ec22667 | ||
|
|
1027da9be0 | ||
|
|
521bd25d31 | ||
|
|
e7c0bea6e5 | ||
|
|
a8bd5b1119 | ||
|
|
9144d12e51 | ||
|
|
d741544514 | ||
|
|
5e31f0df23 | ||
|
|
18dff9d664 | ||
|
|
350094759a | ||
|
|
b10275e851 | ||
|
|
05cf7201ad | ||
|
|
8cd5e03e87 | ||
|
|
120917e0b5 | ||
|
|
264958ebfe | ||
|
|
3614ce1409 | ||
|
|
c80542ded3 | ||
|
|
3350a936b7 | ||
|
|
724db83920 | ||
|
|
8788a40d12 | ||
|
|
6f7bf96776 | ||
|
|
e943a71dff | ||
|
|
4be1c89c5b | ||
|
|
2eda053c79 | ||
|
|
26539e68d9 | ||
|
|
046427cf55 | ||
|
|
25aabcd7e4 | ||
|
|
d8bea816dd | ||
|
|
bb2b1824a9 | ||
|
|
59a129d6d6 | ||
|
|
db40d9bc7a | ||
|
|
827b4b29b4 | ||
|
|
2a31b16567 | ||
|
|
c001c883f7 | ||
|
|
476c7ff749 | ||
|
|
4978aa74e7 | ||
|
|
4411911664 | ||
|
|
0e1ce21488 | ||
|
|
88aa17fa7b | ||
|
|
d648fdf6c0 | ||
|
|
846bd62817 | ||
|
|
84cddc70fd | ||
|
|
2a83f1fc23 | ||
|
|
751231b730 | ||
|
|
c6d400bcf3 | ||
|
|
fd1cd05b99 | ||
|
|
8202e9e921 | ||
|
|
3c069a6784 | ||
|
|
e100a63cc8 | ||
|
|
3057b5fb9d | ||
|
|
c91dc71e75 | ||
|
|
f48e4a8ad8 | ||
|
|
dafbefb325 | ||
|
|
6de23a9748 | ||
|
|
1cf33e4343 | ||
|
|
34db63171f | ||
|
|
19495f69d7 | ||
|
|
c1fbb27d73 | ||
|
|
3cf748a135 | ||
|
|
85b58d041b | ||
|
|
ae9d773e04 | ||
|
|
582bb7c897 | ||
|
|
681d4fb007 | ||
|
|
a185341a4d | ||
|
|
aacd9f51b3 | ||
|
|
95148d445a | ||
|
|
65ac422e36 | ||
|
|
5ffb6ca0cd | ||
|
|
85f151303a | ||
|
|
216cd01b3c | ||
|
|
23bd2e7cd4 | ||
|
|
f461f65a86 | ||
|
|
8dc4adbb5e | ||
|
|
8b36cd1e35 | ||
|
|
cd700a1782 | ||
|
|
60e94adeb1 | ||
|
|
eafed0f1d4 | ||
|
|
7c14c51012 | ||
|
|
4f9d24598f | ||
|
|
4277b4bef8 | ||
|
|
3c3205adf1 | ||
|
|
4e1527df95 | ||
|
|
ca2760fb46 | ||
|
|
61924672e2 | ||
|
|
7fdd988e4f | ||
|
|
a85e0523f8 | ||
|
|
462024ad03 | ||
|
|
f0d09899a1 | ||
|
|
b8212b3da7 | ||
|
|
3d812edc4d | ||
|
|
2efb7f2975 | ||
|
|
44c5e96cf0 | ||
|
|
97c878db22 | ||
|
|
16e32f8441 | ||
|
|
d6aced5ec7 | ||
|
|
b843382065 | ||
|
|
f4bdff0824 | ||
|
|
d8c28e80eb | ||
|
|
344b3e9931 | ||
|
|
c32ac19c0d | ||
|
|
d13114e907 | ||
|
|
90298fe2c8 | ||
|
|
3d1a1fb9fa | ||
|
|
830bad54bd | ||
|
|
c4ba5afe6b | ||
|
|
4ec39d49aa | ||
|
|
ab585ef951 | ||
|
|
674122999f | ||
|
|
8085caef35 | ||
|
|
3ab3c61d5e | ||
|
|
736b2cd689 | ||
|
|
bd8331678c | ||
|
|
6f3fb42385 | ||
|
|
da4e887aee | ||
|
|
b1e468dae4 | ||
|
|
6d1a885864 | ||
|
|
24b3abd706 | ||
|
|
806bc1853d | ||
|
|
6ee1dfd656 | ||
|
|
ab092cb536 | ||
|
|
b4cf50fb6e | ||
|
|
2b2b2b6545 | ||
|
|
fd7b926a33 | ||
|
|
482e0d386b | ||
|
|
d99b16ed5e | ||
|
|
0a4fe58ac6 | ||
|
|
8ac9caf45c | ||
|
|
1029b369f2 | ||
|
|
5ae588deaa | ||
|
|
a2f31ab8ae | ||
|
|
666c9c21a1 | ||
|
|
a675c4c7a1 | ||
|
|
16eab6b5e5 | ||
|
|
d520bfc753 | ||
|
|
301b10d261 | ||
|
|
e38e56ccf6 | ||
|
|
7de223f116 | ||
|
|
c5d08ec0d1 | ||
|
|
4e4b1235c3 | ||
|
|
e5d7903475 | ||
|
|
bc46bf3202 | ||
|
|
4af84e53d5 | ||
|
|
e3f60ea0fb | ||
|
|
ce15a2b01e | ||
|
|
9c878458b8 | ||
|
|
53d897da09 | ||
|
|
17030395c6 | ||
|
|
34d3d6c1f9 | ||
|
|
e335c9f977 | ||
|
|
4ee38cbe29 | ||
|
|
12c9154f55 | ||
|
|
0e312d6dfe | ||
|
|
7e18eeddba | ||
|
|
0db7141e33 | ||
|
|
1ef0b16f11 | ||
|
|
37c1bf98eb | ||
|
|
85d4c00096 | ||
|
|
078978a5b5 | ||
|
|
841d393f8b | ||
|
|
740d1f6d4e | ||
|
|
b615c103ef | ||
|
|
f879f53a6b | ||
|
|
42baa10bcb | ||
|
|
6feb9f540f | ||
|
|
f86ecfe446 | ||
|
|
64a16314ab | ||
|
|
dccebaeff8 | ||
|
|
d2e5dea3e2 | ||
|
|
ec59886031 | ||
|
|
917dd8b0db | ||
|
|
63e273efd4 | ||
|
|
9394194031 | ||
|
|
af256bc0e9 | ||
|
|
37e4b913b0 | ||
|
|
722ee2f3d0 | ||
|
|
e5f5d542d0 | ||
|
|
1ac64aca10 | ||
|
|
78054eea5a | ||
|
|
ff63b0ff1a | ||
|
|
e2e367f091 | ||
|
|
5aa1a1afc7 | ||
|
|
a2d6bd693b | ||
|
|
7f57fccefb | ||
|
|
72e123e319 | ||
|
|
d29e7140b6 | ||
|
|
b6580f99db | ||
|
|
605fbaf803 | ||
|
|
03b0493d29 | ||
|
|
5e295f59a4 | ||
|
|
f3135630d1 | ||
|
|
e140fba5df | ||
|
|
fa7a7c294e | ||
|
|
9dd65bfcb9 | ||
|
|
a8f722c4de | ||
|
|
0c56291e4a | ||
|
|
c916e3b07f | ||
|
|
32f936ce8c | ||
|
|
47aebcbdd4 | ||
|
|
4649c9a61d | ||
|
|
9300e68225 | ||
|
|
19e40a3383 | ||
|
|
9ffe85fd9c | ||
|
|
8ba86e9cea | ||
|
|
c042a28af1 | ||
|
|
1b59efc79a | ||
|
|
f1d7ac36eb | ||
|
|
21cecb2aec | ||
|
|
8a93a06b71 | ||
|
|
d2ff0af34a | ||
|
|
ae5f2ec104 | ||
|
|
6f0566581e | ||
|
|
e726c7894c | ||
|
|
c4bb4d9508 | ||
|
|
cfad228d3c | ||
|
|
670faf1d1d | ||
|
|
659163a93c | ||
|
|
2b163edc0e | ||
|
|
0d38f85db7 | ||
|
|
1dc2825a75 | ||
|
|
630e2d23c9 | ||
|
|
c73187e7d4 | ||
|
|
e18afe5d1e | ||
|
|
7534e3f739 | ||
|
|
0e01d91cec | ||
|
|
06aea6b97c | ||
|
|
a99ff813cb | ||
|
|
92734416a6 | ||
|
|
2f32d4fe49 | ||
|
|
81d35eb645 | ||
|
|
ac24ac2507 | ||
|
|
b172f9a354 | ||
|
|
63e4d3d5eb | ||
|
|
c74c8871f8 | ||
|
|
3f5d08aedb | ||
|
|
ddcb299834 | ||
|
|
a9f70dd1e5 | ||
|
|
aff0c6b49b | ||
|
|
417bb42ac8 | ||
|
|
040ed4fa57 | ||
|
|
94fc7b4e9a | ||
|
|
172e7a7649 | ||
|
|
37ed138dcf | ||
|
|
5f6aade92b | ||
|
|
0c62a5736e | ||
|
|
f1406c1ffd | ||
|
|
1cdc3e5232 | ||
|
|
bd9870254e | ||
|
|
0442b8c1e1 | ||
|
|
585876d6af | ||
|
|
902d726ea6 | ||
|
|
3f35b426dd | ||
|
|
761d861888 | ||
|
|
9f185ed5c0 | ||
|
|
63b2077335 | ||
|
|
12d5beec6e | ||
|
|
b77e68df19 | ||
|
|
fcdd4fa410 | ||
|
|
07c48bca68 | ||
|
|
79ff76d124 | ||
|
|
de2ba1ca94 | ||
|
|
45002bd51d | ||
|
|
be7ebad956 | ||
|
|
64189a4d08 | ||
|
|
708cb28ed0 | ||
|
|
6712801b01 | ||
|
|
f29db693c8 | ||
|
|
0502bfd95d | ||
|
|
78a3901c61 | ||
|
|
0a4e3008af | ||
|
|
d03ba5394f | ||
|
|
2262e6c7d0 | ||
|
|
31a349b13b | ||
|
|
1ba143ef26 | ||
|
|
1532ce1bab | ||
|
|
fa9b920b71 | ||
|
|
40b2d5f724 | ||
|
|
0623a5a8de | ||
|
|
cfa1d08e7e | ||
|
|
6196814672 | ||
|
|
f5af2bf393 | ||
|
|
374fb033c1 | ||
|
|
4db80e75a4 | ||
|
|
8547277958 | ||
|
|
ec3366b0e5 | ||
|
|
48bd04b387 | ||
|
|
41a481252c | ||
|
|
a7cf3b5b10 | ||
|
|
ba63188f27 | ||
|
|
9cc34cb70f | ||
|
|
b9a4d72b42 | ||
|
|
8eef210547 | ||
|
|
ef999ed954 | ||
|
|
33de609560 | ||
|
|
624151c3f7 | ||
|
|
c88f859dae | ||
|
|
49b77d5477 | ||
|
|
d4c4a17eb7 | ||
|
|
3c8abab574 | ||
|
|
38596f8d0e | ||
|
|
4acdca090d | ||
|
|
f02178852b | ||
|
|
98e7acddf4 | ||
|
|
9458e851c0 | ||
|
|
a04512d7b8 | ||
|
|
d6fa832d83 | ||
|
|
dbad921fa5 | ||
|
|
e1535dd574 | ||
|
|
22640eb900 | ||
|
|
7e51e03043 | ||
|
|
865616284f | ||
|
|
0cf728b7e1 | ||
|
|
a2d563b081 | ||
|
|
8119aa6933 | ||
|
|
6b953363d1 | ||
|
|
139b240250 | ||
|
|
36b5dff1f0 | ||
|
|
7ae07d4de5 | ||
|
|
59ef52a271 | ||
|
|
34a1b22a38 | ||
|
|
b4f01fa6c2 | ||
|
|
2d6d16dcd0 | ||
|
|
1ccae4fef2 | ||
|
|
ee30acab32 | ||
|
|
5189bef325 | ||
|
|
17597580f4 | ||
|
|
f97f9e8646 | ||
|
|
91f1d41324 | ||
|
|
d9d9d98ea0 | ||
|
|
e7115c7316 | ||
|
|
6c58e26f14 | ||
|
|
dc371580a5 | ||
|
|
2a047073e9 | ||
|
|
6e3b1bc240 | ||
|
|
51faaae1d0 | ||
|
|
f55804ef06 | ||
|
|
e671e1c87c | ||
|
|
a7aa817dce | ||
|
|
dcce4db6d5 | ||
|
|
64c4f0f1aa | ||
|
|
a8f928200b | ||
|
|
58d42b09d9 | ||
|
|
0cd481b149 | ||
|
|
a66c55ca14 | ||
|
|
18715dbe2e | ||
|
|
23dee61389 | ||
|
|
23dc3f29cd | ||
|
|
4c701f4b6c | ||
|
|
7a94f524b4 | ||
|
|
23deb41436 | ||
|
|
7198ebefc9 | ||
|
|
32cb57532e | ||
|
|
edcfece993 | ||
|
|
baf209f3cc | ||
|
|
ece47c9ed5 | ||
|
|
3d40ed968a | ||
|
|
10f56de5e8 | ||
|
|
5ee4fc2cd5 | ||
|
|
a7917a0f3d | ||
|
|
0274cf3ec7 | ||
|
|
3aa7c96902 | ||
|
|
ffa1851bbf | ||
|
|
45c3345bbc | ||
|
|
a6ca3aaa66 | ||
|
|
5a10b612a1 | ||
|
|
632b3ff07c | ||
|
|
efe1d1c0ac | ||
|
|
86e2f83a7d | ||
|
|
a2b3a38f86 | ||
|
|
f243749d38 | ||
|
|
dac103c621 | ||
|
|
35e53e9691 |
3
.github/pyinstaller/pyinstaller.spec
vendored
3
.github/pyinstaller/pyinstaller.spec
vendored
@@ -17,6 +17,7 @@ a = Analysis(
|
||||
# when invoking pyinstaller from the project root,
|
||||
# this gets invoked from the directory of the spec file,
|
||||
# i.e. ./.github/pyinstaller
|
||||
("../../assets", "assets"),
|
||||
("../../rules", "rules"),
|
||||
("../../sigs", "sigs"),
|
||||
("../../cache", "cache"),
|
||||
@@ -79,7 +80,7 @@ exe = EXE(
|
||||
name="capa",
|
||||
icon="logo.ico",
|
||||
debug=False,
|
||||
strip=None,
|
||||
strip=False,
|
||||
upx=True,
|
||||
console=True,
|
||||
)
|
||||
|
||||
26
.github/workflows/build.yml
vendored
26
.github/workflows/build.yml
vendored
@@ -11,34 +11,41 @@ permissions:
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: PyInstaller for ${{ matrix.os }}
|
||||
name: PyInstaller for ${{ matrix.os }} / Py ${{ matrix.python_version }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
# set to false for debugging
|
||||
fail-fast: true
|
||||
matrix:
|
||||
# using Python 3.8 to support running across multiple operating systems including Windows 7
|
||||
include:
|
||||
- os: ubuntu-20.04
|
||||
# use old linux so that the shared library versioning is more portable
|
||||
artifact_name: capa
|
||||
asset_name: linux
|
||||
python_version: 3.8
|
||||
- os: ubuntu-20.04
|
||||
artifact_name: capa
|
||||
asset_name: linux-py311
|
||||
python_version: 3.11
|
||||
- os: windows-2019
|
||||
artifact_name: capa.exe
|
||||
asset_name: windows
|
||||
python_version: 3.8
|
||||
- os: macos-11
|
||||
# use older macOS for assumed better portability
|
||||
artifact_name: capa
|
||||
asset_name: macos
|
||||
python_version: 3.8
|
||||
steps:
|
||||
- name: Checkout capa
|
||||
uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0
|
||||
with:
|
||||
submodules: true
|
||||
# using Python 3.8 to support running across multiple operating systems including Windows 7
|
||||
- name: Set up Python 3.8
|
||||
- name: Set up Python ${{ matrix.python_version }}
|
||||
uses: actions/setup-python@d27e3f3d7c64b4bbf8e4abfb9b63b83e846e0435 # v4.5.0
|
||||
with:
|
||||
python-version: 3.8
|
||||
python-version: ${{ matrix.python_version }}
|
||||
- if: matrix.os == 'ubuntu-20.04'
|
||||
run: sudo apt-get install -y libyaml-dev
|
||||
- name: Upgrade pip, setuptools
|
||||
@@ -55,13 +62,17 @@ jobs:
|
||||
run: dist/capa "tests/data/499c2a85f6e8142c3f48d4251c9c7cd6.raw32"
|
||||
- name: Does it run (ELF)?
|
||||
run: dist/capa "tests/data/7351f8a40c5450557b24622417fc478d.elf_"
|
||||
- name: Does it run (CAPE)?
|
||||
run: |
|
||||
7z e "tests/data/dynamic/cape/v2.2/d46900384c78863420fb3e297d0a2f743cd2b6b3f7f82bf64059a168e07aceb7.json.gz"
|
||||
dist/capa "d46900384c78863420fb3e297d0a2f743cd2b6b3f7f82bf64059a168e07aceb7.json"
|
||||
- uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # v3.1.2
|
||||
with:
|
||||
name: ${{ matrix.asset_name }}
|
||||
path: dist/${{ matrix.artifact_name }}
|
||||
|
||||
test_run:
|
||||
name: Test run on ${{ matrix.os }}
|
||||
name: Test run on ${{ matrix.os }} / ${{ matrix.asset_name }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
needs: [build]
|
||||
strategy:
|
||||
@@ -71,6 +82,9 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
artifact_name: capa
|
||||
asset_name: linux
|
||||
- os: ubuntu-22.04
|
||||
artifact_name: capa
|
||||
asset_name: linux-py311
|
||||
- os: windows-2022
|
||||
artifact_name: capa.exe
|
||||
asset_name: windows
|
||||
@@ -96,6 +110,8 @@ jobs:
|
||||
include:
|
||||
- asset_name: linux
|
||||
artifact_name: capa
|
||||
- asset_name: linux-py311
|
||||
artifact_name: capa
|
||||
- asset_name: windows
|
||||
artifact_name: capa.exe
|
||||
- asset_name: macos
|
||||
|
||||
21
.github/workflows/pip-audit.yml
vendored
Normal file
21
.github/workflows/pip-audit.yml
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
name: PIP audit
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 8 * * 1'
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 20
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.11"]
|
||||
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- uses: pypa/gh-action-pip-audit@v1.0.8
|
||||
with:
|
||||
inputs: .
|
||||
18
.github/workflows/tests.yml
vendored
18
.github/workflows/tests.yml
vendored
@@ -39,13 +39,13 @@ jobs:
|
||||
- name: Lint with ruff
|
||||
run: pre-commit run ruff
|
||||
- name: Lint with isort
|
||||
run: pre-commit run isort
|
||||
run: pre-commit run isort --show-diff-on-failure
|
||||
- name: Lint with black
|
||||
run: pre-commit run black
|
||||
run: pre-commit run black --show-diff-on-failure
|
||||
- name: Lint with flake8
|
||||
run: pre-commit run flake8
|
||||
run: pre-commit run flake8 --hook-stage manual
|
||||
- name: Check types with mypy
|
||||
run: pre-commit run mypy
|
||||
run: pre-commit run mypy --hook-stage manual
|
||||
|
||||
rule_linter:
|
||||
runs-on: ubuntu-20.04
|
||||
@@ -95,6 +95,10 @@ jobs:
|
||||
run: sudo apt-get install -y libyaml-dev
|
||||
- name: Install capa
|
||||
run: pip install -e .[dev]
|
||||
- name: Run tests (fast)
|
||||
# this set of tests runs about 80% of the cases in 20% of the time,
|
||||
# and should catch most errors quickly.
|
||||
run: pre-commit run pytest-fast --all-files --hook-stage manual
|
||||
- name: Run tests
|
||||
run: pytest -v tests/
|
||||
|
||||
@@ -103,7 +107,7 @@ jobs:
|
||||
env:
|
||||
BN_SERIAL: ${{ secrets.BN_SERIAL }}
|
||||
runs-on: ubuntu-20.04
|
||||
needs: [code_style, rule_linter]
|
||||
needs: [tests]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -143,7 +147,7 @@ jobs:
|
||||
ghidra-tests:
|
||||
name: Ghidra tests for ${{ matrix.python-version }}
|
||||
runs-on: ubuntu-20.04
|
||||
needs: [code_style, rule_linter]
|
||||
needs: [tests]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -197,4 +201,4 @@ jobs:
|
||||
cat ../output.log
|
||||
exit_code=$(cat ../output.log | grep exit | awk '{print $NF}')
|
||||
exit $exit_code
|
||||
|
||||
|
||||
2
.gitmodules
vendored
2
.gitmodules
vendored
@@ -1,6 +1,8 @@
|
||||
[submodule "rules"]
|
||||
path = rules
|
||||
url = ../capa-rules.git
|
||||
branch = dynamic-syntax
|
||||
[submodule "tests/data"]
|
||||
path = tests/data
|
||||
url = ../capa-testfiles.git
|
||||
branch = dynamic-feature-extractor
|
||||
|
||||
@@ -25,7 +25,7 @@ repos:
|
||||
hooks:
|
||||
- id: isort
|
||||
name: isort
|
||||
stages: [commit, push]
|
||||
stages: [commit, push, manual]
|
||||
language: system
|
||||
entry: isort
|
||||
args:
|
||||
@@ -45,7 +45,7 @@ repos:
|
||||
hooks:
|
||||
- id: black
|
||||
name: black
|
||||
stages: [commit, push]
|
||||
stages: [commit, push, manual]
|
||||
language: system
|
||||
entry: black
|
||||
args:
|
||||
@@ -62,7 +62,7 @@ repos:
|
||||
hooks:
|
||||
- id: ruff
|
||||
name: ruff
|
||||
stages: [commit, push]
|
||||
stages: [commit, push, manual]
|
||||
language: system
|
||||
entry: ruff
|
||||
args:
|
||||
@@ -79,7 +79,7 @@ repos:
|
||||
hooks:
|
||||
- id: flake8
|
||||
name: flake8
|
||||
stages: [commit, push]
|
||||
stages: [push, manual]
|
||||
language: system
|
||||
entry: flake8
|
||||
args:
|
||||
@@ -97,7 +97,7 @@ repos:
|
||||
hooks:
|
||||
- id: mypy
|
||||
name: mypy
|
||||
stages: [commit, push]
|
||||
stages: [push, manual]
|
||||
language: system
|
||||
entry: mypy
|
||||
args:
|
||||
@@ -109,3 +109,21 @@ repos:
|
||||
- "tests/"
|
||||
always_run: true
|
||||
pass_filenames: false
|
||||
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: pytest-fast
|
||||
name: pytest (fast)
|
||||
stages: [manual]
|
||||
language: system
|
||||
entry: pytest
|
||||
args:
|
||||
- "tests/"
|
||||
- "--ignore=tests/test_binja_features.py"
|
||||
- "--ignore=tests/test_ghidra_features.py"
|
||||
- "--ignore=tests/test_ida_features.py"
|
||||
- "--ignore=tests/test_viv_features.py"
|
||||
- "--ignore=tests/test_main.py"
|
||||
- "--ignore=tests/test_scripts.py"
|
||||
always_run: true
|
||||
pass_filenames: false
|
||||
|
||||
56
CHANGELOG.md
56
CHANGELOG.md
@@ -3,22 +3,66 @@
|
||||
## master (unreleased)
|
||||
|
||||
### New Features
|
||||
- ghidra: add Ghidra feature extractor and supporting code #1770 @colton-gabertan
|
||||
- ghidra: add entry script helping users run capa against a loaded Ghidra database #1767 @mike-hunhoff
|
||||
- add Ghidra backend #1770 #1767 @colton-gabertan @mike-hunhoff
|
||||
- add dynamic analysis via CAPE sandbox reports #48 #1535 @yelhamer
|
||||
- add call scope #771 @yelhamer
|
||||
- add thread scope #1517 @yelhamer
|
||||
- add process scope #1517 @yelhamer
|
||||
- rules: change `meta.scope` to `meta.scopes` @yelhamer
|
||||
- protobuf: add `Metadata.flavor` @williballenthin
|
||||
- binja: add support for forwarded exports #1646 @xusheng6
|
||||
- binja: add support for symtab names #1504 @xusheng6
|
||||
- add com class/interface features #322 @Aayush-goel-04
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
### New Rules (1)
|
||||
- remove the `SCOPE_*` constants in favor of the `Scope` enum #1764 @williballenthin
|
||||
- protobuf: deprecate `RuleMetadata.scope` in favor of `RuleMetadata.scopes` @williballenthin
|
||||
- protobuf: deprecate `Metadata.analysis` in favor of `Metadata.analysis2` that is dynamic analysis aware @williballenthin
|
||||
- update freeze format to v3, adding support for dynamic analysis @williballenthin
|
||||
- extractor: ignore DLL name for api features #1815 @mr-tz
|
||||
|
||||
### New Rules (34)
|
||||
|
||||
- nursery/get-ntoskrnl-base-address @mr-tz
|
||||
- host-interaction/network/connectivity/set-tcp-connection-state @johnk3r
|
||||
- nursery/capture-process-snapshot-data @mr-tz
|
||||
- collection/network/capture-packets-using-sharppcap jakub.jozwiak@mandiant.com
|
||||
- nursery/communicate-with-kernel-module-via-netlink-socket-on-linux michael.hunhoff@mandiant.com
|
||||
- nursery/get-current-pid-on-linux michael.hunhoff@mandiant.com
|
||||
- nursery/get-file-system-information-on-linux michael.hunhoff@mandiant.com
|
||||
- nursery/get-password-database-entry-on-linux michael.hunhoff@mandiant.com
|
||||
- nursery/mark-thread-detached-on-linux michael.hunhoff@mandiant.com
|
||||
- nursery/persist-via-gnome-autostart-on-linux michael.hunhoff@mandiant.com
|
||||
- nursery/set-thread-name-on-linux michael.hunhoff@mandiant.com
|
||||
- load-code/dotnet/load-windows-common-language-runtime michael.hunhoff@mandiant.com blas.kojusner@mandiant.com jakub.jozwiak@mandiant.com
|
||||
- nursery/log-keystrokes-via-input-method-manager @mr-tz
|
||||
- nursery/encrypt-data-using-rc4-via-systemfunction032 richard.weiss@mandiant.com
|
||||
- nursery/add-value-to-global-atom-table @mr-tz
|
||||
- nursery/enumerate-processes-that-use-resource @Ana06
|
||||
- host-interaction/process/inject/allocate-or-change-rwx-memory @mr-tz
|
||||
- lib/allocate-or-change-rw-memory 0x534a@mailbox.org @mr-tz
|
||||
- lib/change-memory-protection @mr-tz
|
||||
- anti-analysis/anti-av/patch-antimalware-scan-interface-function jakub.jozwiak@mandiant.com
|
||||
- executable/dotnet-singlefile/bundled-with-dotnet-single-file-deployment sara.rincon@mandiant.com
|
||||
- internal/limitation/file/internal-dotnet-single-file-deployment-limitation sara.rincon@mandiant.com
|
||||
- data-manipulation/encoding/encode-data-using-add-xor-sub-operations jakub.jozwiak@mandiant.com
|
||||
- nursery/access-camera-in-dotnet-on-android michael.hunhoff@mandiant.com
|
||||
- nursery/capture-microphone-audio-in-dotnet-on-android michael.hunhoff@mandiant.com
|
||||
- nursery/capture-screenshot-in-dotnet-on-android michael.hunhoff@mandiant.com
|
||||
- nursery/check-for-incoming-call-in-dotnet-on-android michael.hunhoff@mandiant.com
|
||||
- nursery/check-for-outgoing-call-in-dotnet-on-android michael.hunhoff@mandiant.com
|
||||
- nursery/compiled-with-xamarin michael.hunhoff@mandiant.com
|
||||
- nursery/get-os-version-in-dotnet-on-android michael.hunhoff@mandiant.com
|
||||
- data-manipulation/compression/create-cabinet-on-windows michael.hunhoff@mandiant.com jakub.jozwiak@mandiant.com
|
||||
- data-manipulation/compression/extract-cabinet-on-windows jakub.jozwiak@mandiant.com
|
||||
- lib/create-file-decompression-interface-context-on-windows jakub.jozwiak@mandiant.com
|
||||
-
|
||||
|
||||
### Bug Fixes
|
||||
- ghidra: fix ints_to_bytes performance #1761 @mike-hunhoff
|
||||
- ghidra: fix `ints_to_bytes` performance #1761 @mike-hunhoff
|
||||
- binja: improve function call site detection @xusheng6
|
||||
- binja: use binaryninja.load to open files @xusheng6
|
||||
- binja: use `binaryninja.load` to open files @xusheng6
|
||||
- binja: bump binja version to 3.5 #1789 @xusheng6
|
||||
|
||||
### capa explorer IDA Pro plugin
|
||||
@@ -1582,4 +1626,4 @@ Download a standalone binary below and checkout the readme [here on GitHub](http
|
||||
### Raw diffs
|
||||
|
||||
- [capa v1.0.0...v1.1.0](https://github.com/mandiant/capa/compare/v1.0.0...v1.1.0)
|
||||
- [capa-rules v1.0.0...v1.1.0](https://github.com/mandiant/capa-rules/compare/v1.0.0...v1.1.0)
|
||||
- [capa-rules v1.0.0...v1.1.0](https://github.com/mandiant/capa-rules/compare/v1.0.0...v1.1.0)
|
||||
129
README.md
129
README.md
@@ -2,13 +2,13 @@
|
||||
|
||||
[](https://pypi.org/project/flare-capa)
|
||||
[](https://github.com/mandiant/capa/releases)
|
||||
[](https://github.com/mandiant/capa-rules)
|
||||
[](https://github.com/mandiant/capa-rules)
|
||||
[](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster)
|
||||
[](https://github.com/mandiant/capa/releases)
|
||||
[](LICENSE.txt)
|
||||
|
||||
capa detects capabilities in executable files.
|
||||
You run it against a PE, ELF, .NET module, or shellcode file and it tells you what it thinks the program can do.
|
||||
You run it against a PE, ELF, .NET module, shellcode file, or a sandbox report and it tells you what it thinks the program can do.
|
||||
For example, it might suggest that the file is a backdoor, is capable of installing services, or relies on HTTP to communicate.
|
||||
|
||||
Check out:
|
||||
@@ -125,6 +125,96 @@ function @ 0x4011C0
|
||||
...
|
||||
```
|
||||
|
||||
Additionally, capa also supports analyzing [CAPE](https://github.com/kevoreilly/CAPEv2) sandbox reports for dynamic capabilty extraction.
|
||||
In order to use this, you first submit your sample to CAPE for analysis, and then run capa against the generated report (JSON).
|
||||
|
||||
Here's an example of running capa against a packed binary, and then running capa against the CAPE report of that binary:
|
||||
|
||||
```yaml
|
||||
$ capa 05be49819139a3fdcdbddbdefd298398779521f3d68daa25275cc77508e42310.exe
|
||||
WARNING:capa.capabilities.common:--------------------------------------------------------------------------------
|
||||
WARNING:capa.capabilities.common: This sample appears to be packed.
|
||||
WARNING:capa.capabilities.common:
|
||||
WARNING:capa.capabilities.common: Packed samples have often been obfuscated to hide their logic.
|
||||
WARNING:capa.capabilities.common: capa cannot handle obfuscation well using static analysis. This means the results may be misleading or incomplete.
|
||||
WARNING:capa.capabilities.common: If possible, you should try to unpack this input file before analyzing it with capa.
|
||||
WARNING:capa.capabilities.common: Alternatively, run the sample in a supported sandbox and invoke capa against the report to obtain dynamic analysis results.
|
||||
WARNING:capa.capabilities.common:
|
||||
WARNING:capa.capabilities.common: Identified via rule: (internal) packer file limitation
|
||||
WARNING:capa.capabilities.common:
|
||||
WARNING:capa.capabilities.common: Use -v or -vv if you really want to see the capabilities identified by capa.
|
||||
WARNING:capa.capabilities.common:--------------------------------------------------------------------------------
|
||||
|
||||
$ capa 05be49819139a3fdcdbddbdefd298398779521f3d68daa25275cc77508e42310.json
|
||||
|
||||
┍━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑
|
||||
│ ATT&CK Tactic │ ATT&CK Technique │
|
||||
┝━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥
|
||||
│ CREDENTIAL ACCESS │ Credentials from Password Stores T1555 │
|
||||
├────────────────────────┼────────────────────────────────────────────────────────────────────────────────────┤
|
||||
│ DEFENSE EVASION │ File and Directory Permissions Modification T1222 │
|
||||
│ │ Modify Registry T1112 │
|
||||
│ │ Obfuscated Files or Information T1027 │
|
||||
│ │ Virtualization/Sandbox Evasion::User Activity Based Checks T1497.002 │
|
||||
├────────────────────────┼────────────────────────────────────────────────────────────────────────────────────┤
|
||||
│ DISCOVERY │ Account Discovery T1087 │
|
||||
│ │ Application Window Discovery T1010 │
|
||||
│ │ File and Directory Discovery T1083 │
|
||||
│ │ Query Registry T1012 │
|
||||
│ │ System Information Discovery T1082 │
|
||||
│ │ System Location Discovery::System Language Discovery T1614.001 │
|
||||
│ │ System Owner/User Discovery T1033 │
|
||||
├────────────────────────┼────────────────────────────────────────────────────────────────────────────────────┤
|
||||
│ EXECUTION │ System Services::Service Execution T1569.002 │
|
||||
├────────────────────────┼────────────────────────────────────────────────────────────────────────────────────┤
|
||||
│ PERSISTENCE │ Boot or Logon Autostart Execution::Registry Run Keys / Startup Folder T1547.001 │
|
||||
│ │ Boot or Logon Autostart Execution::Winlogon Helper DLL T1547.004 │
|
||||
│ │ Create or Modify System Process::Windows Service T1543.003 │
|
||||
┕━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙
|
||||
|
||||
┍━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑
|
||||
│ Capability │ Namespace │
|
||||
┝━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥
|
||||
│ check for unmoving mouse cursor (3 matches) │ anti-analysis/anti-vm/vm-detection │
|
||||
│ gather bitkinex information │ collection/file-managers │
|
||||
│ gather classicftp information │ collection/file-managers │
|
||||
│ gather filezilla information │ collection/file-managers │
|
||||
│ gather total-commander information │ collection/file-managers │
|
||||
│ gather ultrafxp information │ collection/file-managers │
|
||||
│ resolve DNS (23 matches) │ communication/dns │
|
||||
│ initialize Winsock library (7 matches) │ communication/socket │
|
||||
│ act as TCP client (3 matches) │ communication/tcp/client │
|
||||
│ create new key via CryptAcquireContext │ data-manipulation/encryption │
|
||||
│ encrypt or decrypt via WinCrypt │ data-manipulation/encryption │
|
||||
│ hash data via WinCrypt │ data-manipulation/hashing │
|
||||
│ initialize hashing via WinCrypt │ data-manipulation/hashing │
|
||||
│ hash data with MD5 │ data-manipulation/hashing/md5 │
|
||||
│ generate random numbers via WinAPI │ data-manipulation/prng │
|
||||
│ extract resource via kernel32 functions (2 matches) │ executable/resource │
|
||||
│ interact with driver via control codes (2 matches) │ host-interaction/driver │
|
||||
│ get Program Files directory (18 matches) │ host-interaction/file-system │
|
||||
│ get common file path (575 matches) │ host-interaction/file-system │
|
||||
│ create directory (2 matches) │ host-interaction/file-system/create │
|
||||
│ delete file │ host-interaction/file-system/delete │
|
||||
│ get file attributes (122 matches) │ host-interaction/file-system/meta │
|
||||
│ set file attributes (8 matches) │ host-interaction/file-system/meta │
|
||||
│ move file │ host-interaction/file-system/move │
|
||||
│ find taskbar (3 matches) │ host-interaction/gui/taskbar/find │
|
||||
│ get keyboard layout (12 matches) │ host-interaction/hardware/keyboard │
|
||||
│ get disk size │ host-interaction/hardware/storage │
|
||||
│ get hostname (4 matches) │ host-interaction/os/hostname │
|
||||
│ allocate or change RWX memory (3 matches) │ host-interaction/process/inject │
|
||||
│ query or enumerate registry key (3 matches) │ host-interaction/registry │
|
||||
│ query or enumerate registry value (8 matches) │ host-interaction/registry │
|
||||
│ delete registry key │ host-interaction/registry/delete │
|
||||
│ start service │ host-interaction/service/start │
|
||||
│ get session user name │ host-interaction/session │
|
||||
│ persist via Run registry key │ persistence/registry/run │
|
||||
│ persist via Winlogon Helper DLL registry key │ persistence/registry/winlogon-helper │
|
||||
│ persist via Windows service (2 matches) │ persistence/service │
|
||||
┕━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙
|
||||
```
|
||||
|
||||
capa uses a collection of rules to identify capabilities within a program.
|
||||
These rules are easy to write, even for those new to reverse engineering.
|
||||
By authoring rules, you can extend the capabilities that capa recognizes.
|
||||
@@ -135,31 +225,30 @@ Here's an example rule used by capa:
|
||||
```yaml
|
||||
rule:
|
||||
meta:
|
||||
name: hash data with CRC32
|
||||
namespace: data-manipulation/checksum/crc32
|
||||
name: create TCP socket
|
||||
namespace: communication/socket/tcp
|
||||
authors:
|
||||
- moritz.raabe@mandiant.com
|
||||
scope: function
|
||||
- william.ballenthin@mandiant.com
|
||||
- joakim@intezer.com
|
||||
- anushka.virgaonkar@mandiant.com
|
||||
scopes:
|
||||
static: basic block
|
||||
dynamic: call
|
||||
mbc:
|
||||
- Data::Checksum::CRC32 [C0032.001]
|
||||
- Communication::Socket Communication::Create TCP Socket [C0001.011]
|
||||
examples:
|
||||
- 2D3EDC218A90F03089CC01715A9F047F:0x403CBD
|
||||
- 7D28CB106CB54876B2A5C111724A07CD:0x402350 # RtlComputeCrc32
|
||||
- 7EFF498DE13CC734262F87E6B3EF38AB:0x100084A6
|
||||
- Practical Malware Analysis Lab 01-01.dll_:0x10001010
|
||||
features:
|
||||
- or:
|
||||
- and:
|
||||
- mnemonic: shr
|
||||
- number: 6 = IPPROTO_TCP
|
||||
- number: 1 = SOCK_STREAM
|
||||
- number: 2 = AF_INET
|
||||
- or:
|
||||
- number: 0xEDB88320
|
||||
- bytes: 00 00 00 00 96 30 07 77 2C 61 0E EE BA 51 09 99 19 C4 6D 07 8F F4 6A 70 35 A5 63 E9 A3 95 64 9E = crc32_tab
|
||||
- number: 8
|
||||
- characteristic: nzxor
|
||||
- and:
|
||||
- number: 0x8320
|
||||
- number: 0xEDB8
|
||||
- characteristic: nzxor
|
||||
- api: RtlComputeCrc32
|
||||
- api: ws2_32.socket
|
||||
- api: ws2_32.WSASocket
|
||||
- api: socket
|
||||
- property/read: System.Net.Sockets.TcpClient::Client
|
||||
```
|
||||
|
||||
The [github.com/mandiant/capa-rules](https://github.com/mandiant/capa-rules) repository contains hundreds of standard library rules that are distributed with capa.
|
||||
|
||||
BIN
assets/classes.json.gz
Normal file
BIN
assets/classes.json.gz
Normal file
Binary file not shown.
BIN
assets/interfaces.json.gz
Normal file
BIN
assets/interfaces.json.gz
Normal file
Binary file not shown.
0
capa/capabilities/__init__.py
Normal file
0
capa/capabilities/__init__.py
Normal file
79
capa/capabilities/common.py
Normal file
79
capa/capabilities/common.py
Normal file
@@ -0,0 +1,79 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
import itertools
|
||||
import collections
|
||||
from typing import Any, Tuple
|
||||
|
||||
from capa.rules import Scope, RuleSet
|
||||
from capa.engine import FeatureSet, MatchResults
|
||||
from capa.features.address import NO_ADDRESS
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor, StaticFeatureExtractor, DynamicFeatureExtractor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def find_file_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, function_features: FeatureSet):
|
||||
file_features: FeatureSet = collections.defaultdict(set)
|
||||
|
||||
for feature, va in itertools.chain(extractor.extract_file_features(), extractor.extract_global_features()):
|
||||
# not all file features may have virtual addresses.
|
||||
# if not, then at least ensure the feature shows up in the index.
|
||||
# the set of addresses will still be empty.
|
||||
if va:
|
||||
file_features[feature].add(va)
|
||||
else:
|
||||
if feature not in file_features:
|
||||
file_features[feature] = set()
|
||||
|
||||
logger.debug("analyzed file and extracted %d features", len(file_features))
|
||||
|
||||
file_features.update(function_features)
|
||||
|
||||
_, matches = ruleset.match(Scope.FILE, file_features, NO_ADDRESS)
|
||||
return matches, len(file_features)
|
||||
|
||||
|
||||
def has_file_limitation(rules: RuleSet, capabilities: MatchResults, is_standalone=True) -> bool:
|
||||
file_limitation_rules = list(filter(lambda r: r.is_file_limitation_rule(), rules.rules.values()))
|
||||
|
||||
for file_limitation_rule in file_limitation_rules:
|
||||
if file_limitation_rule.name not in capabilities:
|
||||
continue
|
||||
|
||||
logger.warning("-" * 80)
|
||||
for line in file_limitation_rule.meta.get("description", "").split("\n"):
|
||||
logger.warning(" %s", line)
|
||||
logger.warning(" Identified via rule: %s", file_limitation_rule.name)
|
||||
if is_standalone:
|
||||
logger.warning(" ")
|
||||
logger.warning(" Use -v or -vv if you really want to see the capabilities identified by capa.")
|
||||
logger.warning("-" * 80)
|
||||
|
||||
# bail on first file limitation
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def find_capabilities(
|
||||
ruleset: RuleSet, extractor: FeatureExtractor, disable_progress=None, **kwargs
|
||||
) -> Tuple[MatchResults, Any]:
|
||||
from capa.capabilities.static import find_static_capabilities
|
||||
from capa.capabilities.dynamic import find_dynamic_capabilities
|
||||
|
||||
if isinstance(extractor, StaticFeatureExtractor):
|
||||
# for the time being, extractors are either static or dynamic.
|
||||
# Remove this assertion once that has changed
|
||||
assert not isinstance(extractor, DynamicFeatureExtractor)
|
||||
return find_static_capabilities(ruleset, extractor, disable_progress=disable_progress, **kwargs)
|
||||
if isinstance(extractor, DynamicFeatureExtractor):
|
||||
return find_dynamic_capabilities(ruleset, extractor, disable_progress=disable_progress, **kwargs)
|
||||
|
||||
raise ValueError(f"unexpected extractor type: {extractor.__class__.__name__}")
|
||||
198
capa/capabilities/dynamic.py
Normal file
198
capa/capabilities/dynamic.py
Normal file
@@ -0,0 +1,198 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
import itertools
|
||||
import collections
|
||||
from typing import Any, Tuple
|
||||
|
||||
import tqdm
|
||||
|
||||
import capa.perf
|
||||
import capa.features.freeze as frz
|
||||
import capa.render.result_document as rdoc
|
||||
from capa.rules import Scope, RuleSet
|
||||
from capa.engine import FeatureSet, MatchResults
|
||||
from capa.helpers import redirecting_print_to_tqdm
|
||||
from capa.capabilities.common import find_file_capabilities
|
||||
from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle, DynamicFeatureExtractor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def find_call_capabilities(
|
||||
ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
||||
) -> Tuple[FeatureSet, MatchResults]:
|
||||
"""
|
||||
find matches for the given rules for the given call.
|
||||
|
||||
returns: tuple containing (features for call, match results for call)
|
||||
"""
|
||||
# all features found for the call.
|
||||
features: FeatureSet = collections.defaultdict(set)
|
||||
|
||||
for feature, addr in itertools.chain(
|
||||
extractor.extract_call_features(ph, th, ch), extractor.extract_global_features()
|
||||
):
|
||||
features[feature].add(addr)
|
||||
|
||||
# matches found at this thread.
|
||||
_, matches = ruleset.match(Scope.CALL, features, ch.address)
|
||||
|
||||
for rule_name, res in matches.items():
|
||||
rule = ruleset[rule_name]
|
||||
for addr, _ in res:
|
||||
capa.engine.index_rule_matches(features, rule, [addr])
|
||||
|
||||
return features, matches
|
||||
|
||||
|
||||
def find_thread_capabilities(
|
||||
ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle, th: ThreadHandle
|
||||
) -> Tuple[FeatureSet, MatchResults, MatchResults]:
|
||||
"""
|
||||
find matches for the given rules within the given thread.
|
||||
|
||||
returns: tuple containing (features for thread, match results for thread, match results for calls)
|
||||
"""
|
||||
# all features found within this thread,
|
||||
# includes features found within calls.
|
||||
features: FeatureSet = collections.defaultdict(set)
|
||||
|
||||
# matches found at the call scope.
|
||||
# might be found at different calls, thats ok.
|
||||
call_matches: MatchResults = collections.defaultdict(list)
|
||||
|
||||
for ch in extractor.get_calls(ph, th):
|
||||
ifeatures, imatches = find_call_capabilities(ruleset, extractor, ph, th, ch)
|
||||
for feature, vas in ifeatures.items():
|
||||
features[feature].update(vas)
|
||||
|
||||
for rule_name, res in imatches.items():
|
||||
call_matches[rule_name].extend(res)
|
||||
|
||||
for feature, va in itertools.chain(extractor.extract_thread_features(ph, th), extractor.extract_global_features()):
|
||||
features[feature].add(va)
|
||||
|
||||
# matches found within this thread.
|
||||
_, matches = ruleset.match(Scope.THREAD, features, th.address)
|
||||
|
||||
for rule_name, res in matches.items():
|
||||
rule = ruleset[rule_name]
|
||||
for va, _ in res:
|
||||
capa.engine.index_rule_matches(features, rule, [va])
|
||||
|
||||
return features, matches, call_matches
|
||||
|
||||
|
||||
def find_process_capabilities(
|
||||
ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle
|
||||
) -> Tuple[MatchResults, MatchResults, MatchResults, int]:
|
||||
"""
|
||||
find matches for the given rules within the given process.
|
||||
|
||||
returns: tuple containing (match results for process, match results for threads, match results for calls, number of features)
|
||||
"""
|
||||
# all features found within this process,
|
||||
# includes features found within threads (and calls).
|
||||
process_features: FeatureSet = collections.defaultdict(set)
|
||||
|
||||
# matches found at the basic threads.
|
||||
# might be found at different threads, thats ok.
|
||||
thread_matches: MatchResults = collections.defaultdict(list)
|
||||
|
||||
# matches found at the call scope.
|
||||
# might be found at different calls, thats ok.
|
||||
call_matches: MatchResults = collections.defaultdict(list)
|
||||
|
||||
for th in extractor.get_threads(ph):
|
||||
features, tmatches, cmatches = find_thread_capabilities(ruleset, extractor, ph, th)
|
||||
for feature, vas in features.items():
|
||||
process_features[feature].update(vas)
|
||||
|
||||
for rule_name, res in tmatches.items():
|
||||
thread_matches[rule_name].extend(res)
|
||||
|
||||
for rule_name, res in cmatches.items():
|
||||
call_matches[rule_name].extend(res)
|
||||
|
||||
for feature, va in itertools.chain(extractor.extract_process_features(ph), extractor.extract_global_features()):
|
||||
process_features[feature].add(va)
|
||||
|
||||
_, process_matches = ruleset.match(Scope.PROCESS, process_features, ph.address)
|
||||
return process_matches, thread_matches, call_matches, len(process_features)
|
||||
|
||||
|
||||
def find_dynamic_capabilities(
|
||||
ruleset: RuleSet, extractor: DynamicFeatureExtractor, disable_progress=None
|
||||
) -> Tuple[MatchResults, Any]:
|
||||
all_process_matches: MatchResults = collections.defaultdict(list)
|
||||
all_thread_matches: MatchResults = collections.defaultdict(list)
|
||||
all_call_matches: MatchResults = collections.defaultdict(list)
|
||||
|
||||
feature_counts = rdoc.DynamicFeatureCounts(file=0, processes=())
|
||||
|
||||
assert isinstance(extractor, DynamicFeatureExtractor)
|
||||
with redirecting_print_to_tqdm(disable_progress):
|
||||
with tqdm.contrib.logging.logging_redirect_tqdm():
|
||||
pbar = tqdm.tqdm
|
||||
if disable_progress:
|
||||
# do not use tqdm to avoid unnecessary side effects when caller intends
|
||||
# to disable progress completely
|
||||
def pbar(s, *args, **kwargs):
|
||||
return s
|
||||
|
||||
processes = list(extractor.get_processes())
|
||||
|
||||
pb = pbar(processes, desc="matching", unit=" processes", leave=False)
|
||||
for p in pb:
|
||||
process_matches, thread_matches, call_matches, feature_count = find_process_capabilities(
|
||||
ruleset, extractor, p
|
||||
)
|
||||
feature_counts.processes += (
|
||||
rdoc.ProcessFeatureCount(address=frz.Address.from_capa(p.address), count=feature_count),
|
||||
)
|
||||
logger.debug("analyzed %s and extracted %d features", p.address, feature_count)
|
||||
|
||||
for rule_name, res in process_matches.items():
|
||||
all_process_matches[rule_name].extend(res)
|
||||
for rule_name, res in thread_matches.items():
|
||||
all_thread_matches[rule_name].extend(res)
|
||||
for rule_name, res in call_matches.items():
|
||||
all_call_matches[rule_name].extend(res)
|
||||
|
||||
# collection of features that captures the rule matches within process and thread scopes.
|
||||
# mapping from feature (matched rule) to set of addresses at which it matched.
|
||||
process_and_lower_features: FeatureSet = collections.defaultdict(set)
|
||||
for rule_name, results in itertools.chain(
|
||||
all_process_matches.items(), all_thread_matches.items(), all_call_matches.items()
|
||||
):
|
||||
locations = {p[0] for p in results}
|
||||
rule = ruleset[rule_name]
|
||||
capa.engine.index_rule_matches(process_and_lower_features, rule, locations)
|
||||
|
||||
all_file_matches, feature_count = find_file_capabilities(ruleset, extractor, process_and_lower_features)
|
||||
feature_counts.file = feature_count
|
||||
|
||||
matches = dict(
|
||||
itertools.chain(
|
||||
# each rule exists in exactly one scope,
|
||||
# so there won't be any overlap among these following MatchResults,
|
||||
# and we can merge the dictionaries naively.
|
||||
all_thread_matches.items(),
|
||||
all_process_matches.items(),
|
||||
all_call_matches.items(),
|
||||
all_file_matches.items(),
|
||||
)
|
||||
)
|
||||
|
||||
meta = {
|
||||
"feature_counts": feature_counts,
|
||||
}
|
||||
|
||||
return matches, meta
|
||||
233
capa/capabilities/static.py
Normal file
233
capa/capabilities/static.py
Normal file
@@ -0,0 +1,233 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import time
|
||||
import logging
|
||||
import itertools
|
||||
import collections
|
||||
from typing import Any, Tuple
|
||||
|
||||
import tqdm.contrib.logging
|
||||
|
||||
import capa.perf
|
||||
import capa.features.freeze as frz
|
||||
import capa.render.result_document as rdoc
|
||||
from capa.rules import Scope, RuleSet
|
||||
from capa.engine import FeatureSet, MatchResults
|
||||
from capa.helpers import redirecting_print_to_tqdm
|
||||
from capa.capabilities.common import find_file_capabilities
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, StaticFeatureExtractor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def find_instruction_capabilities(
|
||||
ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
|
||||
) -> Tuple[FeatureSet, MatchResults]:
|
||||
"""
|
||||
find matches for the given rules for the given instruction.
|
||||
|
||||
returns: tuple containing (features for instruction, match results for instruction)
|
||||
"""
|
||||
# all features found for the instruction.
|
||||
features: FeatureSet = collections.defaultdict(set)
|
||||
|
||||
for feature, addr in itertools.chain(
|
||||
extractor.extract_insn_features(f, bb, insn), extractor.extract_global_features()
|
||||
):
|
||||
features[feature].add(addr)
|
||||
|
||||
# matches found at this instruction.
|
||||
_, matches = ruleset.match(Scope.INSTRUCTION, features, insn.address)
|
||||
|
||||
for rule_name, res in matches.items():
|
||||
rule = ruleset[rule_name]
|
||||
for addr, _ in res:
|
||||
capa.engine.index_rule_matches(features, rule, [addr])
|
||||
|
||||
return features, matches
|
||||
|
||||
|
||||
def find_basic_block_capabilities(
|
||||
ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle
|
||||
) -> Tuple[FeatureSet, MatchResults, MatchResults]:
|
||||
"""
|
||||
find matches for the given rules within the given basic block.
|
||||
|
||||
returns: tuple containing (features for basic block, match results for basic block, match results for instructions)
|
||||
"""
|
||||
# all features found within this basic block,
|
||||
# includes features found within instructions.
|
||||
features: FeatureSet = collections.defaultdict(set)
|
||||
|
||||
# matches found at the instruction scope.
|
||||
# might be found at different instructions, thats ok.
|
||||
insn_matches: MatchResults = collections.defaultdict(list)
|
||||
|
||||
for insn in extractor.get_instructions(f, bb):
|
||||
ifeatures, imatches = find_instruction_capabilities(ruleset, extractor, f, bb, insn)
|
||||
for feature, vas in ifeatures.items():
|
||||
features[feature].update(vas)
|
||||
|
||||
for rule_name, res in imatches.items():
|
||||
insn_matches[rule_name].extend(res)
|
||||
|
||||
for feature, va in itertools.chain(
|
||||
extractor.extract_basic_block_features(f, bb), extractor.extract_global_features()
|
||||
):
|
||||
features[feature].add(va)
|
||||
|
||||
# matches found within this basic block.
|
||||
_, matches = ruleset.match(Scope.BASIC_BLOCK, features, bb.address)
|
||||
|
||||
for rule_name, res in matches.items():
|
||||
rule = ruleset[rule_name]
|
||||
for va, _ in res:
|
||||
capa.engine.index_rule_matches(features, rule, [va])
|
||||
|
||||
return features, matches, insn_matches
|
||||
|
||||
|
||||
def find_code_capabilities(
|
||||
ruleset: RuleSet, extractor: StaticFeatureExtractor, fh: FunctionHandle
|
||||
) -> Tuple[MatchResults, MatchResults, MatchResults, int]:
|
||||
"""
|
||||
find matches for the given rules within the given function.
|
||||
|
||||
returns: tuple containing (match results for function, match results for basic blocks, match results for instructions, number of features)
|
||||
"""
|
||||
# all features found within this function,
|
||||
# includes features found within basic blocks (and instructions).
|
||||
function_features: FeatureSet = collections.defaultdict(set)
|
||||
|
||||
# matches found at the basic block scope.
|
||||
# might be found at different basic blocks, thats ok.
|
||||
bb_matches: MatchResults = collections.defaultdict(list)
|
||||
|
||||
# matches found at the instruction scope.
|
||||
# might be found at different instructions, thats ok.
|
||||
insn_matches: MatchResults = collections.defaultdict(list)
|
||||
|
||||
for bb in extractor.get_basic_blocks(fh):
|
||||
features, bmatches, imatches = find_basic_block_capabilities(ruleset, extractor, fh, bb)
|
||||
for feature, vas in features.items():
|
||||
function_features[feature].update(vas)
|
||||
|
||||
for rule_name, res in bmatches.items():
|
||||
bb_matches[rule_name].extend(res)
|
||||
|
||||
for rule_name, res in imatches.items():
|
||||
insn_matches[rule_name].extend(res)
|
||||
|
||||
for feature, va in itertools.chain(extractor.extract_function_features(fh), extractor.extract_global_features()):
|
||||
function_features[feature].add(va)
|
||||
|
||||
_, function_matches = ruleset.match(Scope.FUNCTION, function_features, fh.address)
|
||||
return function_matches, bb_matches, insn_matches, len(function_features)
|
||||
|
||||
|
||||
def find_static_capabilities(
|
||||
ruleset: RuleSet, extractor: StaticFeatureExtractor, disable_progress=None
|
||||
) -> Tuple[MatchResults, Any]:
|
||||
all_function_matches: MatchResults = collections.defaultdict(list)
|
||||
all_bb_matches: MatchResults = collections.defaultdict(list)
|
||||
all_insn_matches: MatchResults = collections.defaultdict(list)
|
||||
|
||||
feature_counts = rdoc.StaticFeatureCounts(file=0, functions=())
|
||||
library_functions: Tuple[rdoc.LibraryFunction, ...] = ()
|
||||
|
||||
assert isinstance(extractor, StaticFeatureExtractor)
|
||||
with redirecting_print_to_tqdm(disable_progress):
|
||||
with tqdm.contrib.logging.logging_redirect_tqdm():
|
||||
pbar = tqdm.tqdm
|
||||
if capa.helpers.is_runtime_ghidra():
|
||||
# Ghidrathon interpreter cannot properly handle
|
||||
# the TMonitor thread that is created via a monitor_interval
|
||||
# > 0
|
||||
pbar.monitor_interval = 0
|
||||
if disable_progress:
|
||||
# do not use tqdm to avoid unnecessary side effects when caller intends
|
||||
# to disable progress completely
|
||||
def pbar(s, *args, **kwargs):
|
||||
return s
|
||||
|
||||
functions = list(extractor.get_functions())
|
||||
n_funcs = len(functions)
|
||||
|
||||
pb = pbar(functions, desc="matching", unit=" functions", postfix="skipped 0 library functions", leave=False)
|
||||
for f in pb:
|
||||
t0 = time.time()
|
||||
if extractor.is_library_function(f.address):
|
||||
function_name = extractor.get_function_name(f.address)
|
||||
logger.debug("skipping library function 0x%x (%s)", f.address, function_name)
|
||||
library_functions += (
|
||||
rdoc.LibraryFunction(address=frz.Address.from_capa(f.address), name=function_name),
|
||||
)
|
||||
n_libs = len(library_functions)
|
||||
percentage = round(100 * (n_libs / n_funcs))
|
||||
if isinstance(pb, tqdm.tqdm):
|
||||
pb.set_postfix_str(f"skipped {n_libs} library functions ({percentage}%)")
|
||||
continue
|
||||
|
||||
function_matches, bb_matches, insn_matches, feature_count = find_code_capabilities(
|
||||
ruleset, extractor, f
|
||||
)
|
||||
feature_counts.functions += (
|
||||
rdoc.FunctionFeatureCount(address=frz.Address.from_capa(f.address), count=feature_count),
|
||||
)
|
||||
t1 = time.time()
|
||||
|
||||
match_count = sum(len(res) for res in function_matches.values())
|
||||
match_count += sum(len(res) for res in bb_matches.values())
|
||||
match_count += sum(len(res) for res in insn_matches.values())
|
||||
logger.debug(
|
||||
"analyzed function 0x%x and extracted %d features, %d matches in %0.02fs",
|
||||
f.address,
|
||||
feature_count,
|
||||
match_count,
|
||||
t1 - t0,
|
||||
)
|
||||
|
||||
for rule_name, res in function_matches.items():
|
||||
all_function_matches[rule_name].extend(res)
|
||||
for rule_name, res in bb_matches.items():
|
||||
all_bb_matches[rule_name].extend(res)
|
||||
for rule_name, res in insn_matches.items():
|
||||
all_insn_matches[rule_name].extend(res)
|
||||
|
||||
# collection of features that captures the rule matches within function, BB, and instruction scopes.
|
||||
# mapping from feature (matched rule) to set of addresses at which it matched.
|
||||
function_and_lower_features: FeatureSet = collections.defaultdict(set)
|
||||
for rule_name, results in itertools.chain(
|
||||
all_function_matches.items(), all_bb_matches.items(), all_insn_matches.items()
|
||||
):
|
||||
locations = {p[0] for p in results}
|
||||
rule = ruleset[rule_name]
|
||||
capa.engine.index_rule_matches(function_and_lower_features, rule, locations)
|
||||
|
||||
all_file_matches, feature_count = find_file_capabilities(ruleset, extractor, function_and_lower_features)
|
||||
feature_counts.file = feature_count
|
||||
|
||||
matches = dict(
|
||||
itertools.chain(
|
||||
# each rule exists in exactly one scope,
|
||||
# so there won't be any overlap among these following MatchResults,
|
||||
# and we can merge the dictionaries naively.
|
||||
all_insn_matches.items(),
|
||||
all_bb_matches.items(),
|
||||
all_function_matches.items(),
|
||||
all_file_matches.items(),
|
||||
)
|
||||
)
|
||||
|
||||
meta = {
|
||||
"feature_counts": feature_counts,
|
||||
"library_functions": library_functions,
|
||||
}
|
||||
|
||||
return matches, meta
|
||||
@@ -304,7 +304,7 @@ def match(rules: List["capa.rules.Rule"], features: FeatureSet, addr: Address) -
|
||||
other strategies can be imagined that match differently; implement these elsewhere.
|
||||
specifically, this routine does "top down" matching of the given rules against the feature set.
|
||||
"""
|
||||
results = collections.defaultdict(list) # type: MatchResults
|
||||
results: MatchResults = collections.defaultdict(list)
|
||||
|
||||
# copy features so that we can modify it
|
||||
# without affecting the caller (keep this function pure)
|
||||
|
||||
@@ -19,3 +19,7 @@ class UnsupportedArchError(ValueError):
|
||||
|
||||
class UnsupportedOSError(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
class EmptyReportError(ValueError):
|
||||
pass
|
||||
|
||||
@@ -43,6 +43,79 @@ class AbsoluteVirtualAddress(int, Address):
|
||||
return int.__hash__(self)
|
||||
|
||||
|
||||
class ProcessAddress(Address):
|
||||
"""an address of a process in a dynamic execution trace"""
|
||||
|
||||
def __init__(self, pid: int, ppid: int = 0):
|
||||
assert ppid >= 0
|
||||
assert pid > 0
|
||||
self.ppid = ppid
|
||||
self.pid = pid
|
||||
|
||||
def __repr__(self):
|
||||
return "process(%s%s)" % (
|
||||
f"ppid: {self.ppid}, " if self.ppid > 0 else "",
|
||||
f"pid: {self.pid}",
|
||||
)
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.ppid, self.pid))
|
||||
|
||||
def __eq__(self, other):
|
||||
assert isinstance(other, ProcessAddress)
|
||||
return (self.ppid, self.pid) == (other.ppid, other.pid)
|
||||
|
||||
def __lt__(self, other):
|
||||
assert isinstance(other, ProcessAddress)
|
||||
return (self.ppid, self.pid) < (other.ppid, other.pid)
|
||||
|
||||
|
||||
class ThreadAddress(Address):
|
||||
"""addresses a thread in a dynamic execution trace"""
|
||||
|
||||
def __init__(self, process: ProcessAddress, tid: int):
|
||||
assert tid >= 0
|
||||
self.process = process
|
||||
self.tid = tid
|
||||
|
||||
def __repr__(self):
|
||||
return f"{self.process}, thread(tid: {self.tid})"
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.process, self.tid))
|
||||
|
||||
def __eq__(self, other):
|
||||
assert isinstance(other, ThreadAddress)
|
||||
return (self.process, self.tid) == (other.process, other.tid)
|
||||
|
||||
def __lt__(self, other):
|
||||
assert isinstance(other, ThreadAddress)
|
||||
return (self.process, self.tid) < (other.process, other.tid)
|
||||
|
||||
|
||||
class DynamicCallAddress(Address):
|
||||
"""addesses a call in a dynamic execution trace"""
|
||||
|
||||
def __init__(self, thread: ThreadAddress, id: int):
|
||||
assert id >= 0
|
||||
self.thread = thread
|
||||
self.id = id
|
||||
|
||||
def __repr__(self):
|
||||
return f"{self.thread}, call(id: {self.id})"
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.thread, self.id))
|
||||
|
||||
def __eq__(self, other):
|
||||
assert isinstance(other, DynamicCallAddress)
|
||||
return (self.thread, self.id) == (other.thread, other.id)
|
||||
|
||||
def __lt__(self, other):
|
||||
assert isinstance(other, DynamicCallAddress)
|
||||
return (self.thread, self.id) < (other.thread, other.id)
|
||||
|
||||
|
||||
class RelativeVirtualAddress(int, Address):
|
||||
"""a memory address relative to a base address"""
|
||||
|
||||
@@ -104,6 +177,34 @@ class DNTokenOffsetAddress(Address):
|
||||
return self.token + self.offset
|
||||
|
||||
|
||||
class DexMethodAddress(int, Address):
|
||||
def __new__(cls, offset: int):
|
||||
return int.__new__(cls, offset)
|
||||
|
||||
def __repr__(self):
|
||||
return f"DexMethodAddress(offset={hex(self)})"
|
||||
|
||||
def __str__(self) -> str:
|
||||
return repr(self)
|
||||
|
||||
def __hash__(self):
|
||||
return int.__hash__(self)
|
||||
|
||||
|
||||
class DexClassAddress(int, Address):
|
||||
def __new__(cls, offset: int):
|
||||
return int.__new__(cls, offset)
|
||||
|
||||
def __repr__(self):
|
||||
return f"DexClassAddress(offset={hex(self)})"
|
||||
|
||||
def __str__(self) -> str:
|
||||
return repr(self)
|
||||
|
||||
def __hash__(self):
|
||||
return int.__hash__(self)
|
||||
|
||||
|
||||
class _NoAddress(Address):
|
||||
def __eq__(self, other):
|
||||
return True
|
||||
|
||||
@@ -409,7 +409,9 @@ ARCH_I386 = "i386"
|
||||
ARCH_AMD64 = "amd64"
|
||||
# dotnet
|
||||
ARCH_ANY = "any"
|
||||
VALID_ARCH = (ARCH_I386, ARCH_AMD64, ARCH_ANY)
|
||||
# dex
|
||||
ARCH_DALVIK = "dalvik"
|
||||
VALID_ARCH = (ARCH_I386, ARCH_AMD64, ARCH_ANY, ARCH_DALVIK)
|
||||
|
||||
|
||||
class Arch(Feature):
|
||||
@@ -421,10 +423,11 @@ class Arch(Feature):
|
||||
OS_WINDOWS = "windows"
|
||||
OS_LINUX = "linux"
|
||||
OS_MACOS = "macos"
|
||||
OS_ANDROID = "android"
|
||||
# dotnet
|
||||
OS_ANY = "any"
|
||||
VALID_OS = {os.value for os in capa.features.extractors.elf.OS}
|
||||
VALID_OS.update({OS_WINDOWS, OS_LINUX, OS_MACOS, OS_ANY})
|
||||
VALID_OS.update({OS_WINDOWS, OS_LINUX, OS_MACOS, OS_ANY, OS_ANDROID})
|
||||
# internal only, not to be used in rules
|
||||
OS_AUTO = "auto"
|
||||
|
||||
@@ -452,11 +455,24 @@ class OS(Feature):
|
||||
FORMAT_PE = "pe"
|
||||
FORMAT_ELF = "elf"
|
||||
FORMAT_DOTNET = "dotnet"
|
||||
VALID_FORMAT = (FORMAT_PE, FORMAT_ELF, FORMAT_DOTNET)
|
||||
FORMAT_DEX = "dex"
|
||||
VALID_FORMAT = (FORMAT_PE, FORMAT_ELF, FORMAT_DOTNET, FORMAT_DEX)
|
||||
# internal only, not to be used in rules
|
||||
FORMAT_AUTO = "auto"
|
||||
FORMAT_SC32 = "sc32"
|
||||
FORMAT_SC64 = "sc64"
|
||||
FORMAT_CAPE = "cape"
|
||||
STATIC_FORMATS = {
|
||||
FORMAT_SC32,
|
||||
FORMAT_SC64,
|
||||
FORMAT_PE,
|
||||
FORMAT_ELF,
|
||||
FORMAT_DOTNET,
|
||||
FORMAT_DEX,
|
||||
}
|
||||
DYNAMIC_FORMATS = {
|
||||
FORMAT_CAPE,
|
||||
}
|
||||
FORMAT_FREEZE = "freeze"
|
||||
FORMAT_RESULT = "result"
|
||||
FORMAT_UNKNOWN = "unknown"
|
||||
|
||||
@@ -7,13 +7,18 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import abc
|
||||
import hashlib
|
||||
import dataclasses
|
||||
from typing import Any, Dict, Tuple, Union, Iterator
|
||||
from dataclasses import dataclass
|
||||
|
||||
# TODO(williballenthin): use typing.TypeAlias directly when Python 3.9 is deprecated
|
||||
# https://github.com/mandiant/capa/issues/1699
|
||||
from typing_extensions import TypeAlias
|
||||
|
||||
import capa.features.address
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.address import Address, ThreadAddress, ProcessAddress, DynamicCallAddress, AbsoluteVirtualAddress
|
||||
|
||||
# feature extractors may reference functions, BBs, insns by opaque handle values.
|
||||
# you can use the `.address` property to get and render the address of the feature.
|
||||
@@ -22,6 +27,24 @@ from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
# the feature extractor from which they were created.
|
||||
|
||||
|
||||
@dataclass
|
||||
class SampleHashes:
|
||||
md5: str
|
||||
sha1: str
|
||||
sha256: str
|
||||
|
||||
@classmethod
|
||||
def from_bytes(cls, buf: bytes) -> "SampleHashes":
|
||||
md5 = hashlib.md5()
|
||||
sha1 = hashlib.sha1()
|
||||
sha256 = hashlib.sha256()
|
||||
md5.update(buf)
|
||||
sha1.update(buf)
|
||||
sha256.update(buf)
|
||||
|
||||
return cls(md5=md5.hexdigest(), sha1=sha1.hexdigest(), sha256=sha256.hexdigest())
|
||||
|
||||
|
||||
@dataclass
|
||||
class FunctionHandle:
|
||||
"""reference to a function recognized by a feature extractor.
|
||||
@@ -63,16 +86,18 @@ class InsnHandle:
|
||||
inner: Any
|
||||
|
||||
|
||||
class FeatureExtractor:
|
||||
class StaticFeatureExtractor:
|
||||
"""
|
||||
FeatureExtractor defines the interface for fetching features from a sample.
|
||||
StaticFeatureExtractor defines the interface for fetching features from a
|
||||
sample without running it; extractors that rely on the execution trace of
|
||||
a sample must implement the other sibling class, DynamicFeatureExtracor.
|
||||
|
||||
There may be multiple backends that support fetching features for capa.
|
||||
For example, we use vivisect by default, but also want to support saving
|
||||
and restoring features from a JSON file.
|
||||
When we restore the features, we'd like to use exactly the same matching logic
|
||||
to find matching rules.
|
||||
Therefore, we can define a FeatureExtractor that provides features from the
|
||||
Therefore, we can define a StaticFeatureExtractor that provides features from the
|
||||
serialized JSON file and do matching without a binary analysis pass.
|
||||
Also, this provides a way to hook in an IDA backend.
|
||||
|
||||
@@ -81,13 +106,14 @@ class FeatureExtractor:
|
||||
|
||||
__metaclass__ = abc.ABCMeta
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, hashes: SampleHashes):
|
||||
#
|
||||
# note: a subclass should define ctor parameters for its own use.
|
||||
# for example, the Vivisect feature extract might require the vw and/or path.
|
||||
# this base class doesn't know what to do with that info, though.
|
||||
#
|
||||
super().__init__()
|
||||
self._sample_hashes = hashes
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_base_address(self) -> Union[AbsoluteVirtualAddress, capa.features.address._NoAddress]:
|
||||
@@ -100,6 +126,12 @@ class FeatureExtractor:
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_sample_hashes(self) -> SampleHashes:
|
||||
"""
|
||||
fetch the hashes for the sample contained within the extractor.
|
||||
"""
|
||||
return self._sample_hashes
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
@@ -262,3 +294,177 @@ class FeatureExtractor:
|
||||
Tuple[Feature, Address]: feature and its location
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProcessHandle:
|
||||
"""
|
||||
reference to a process extracted by the sandbox.
|
||||
|
||||
Attributes:
|
||||
address: process's address (pid)
|
||||
inner: sandbox-specific data
|
||||
"""
|
||||
|
||||
address: ProcessAddress
|
||||
inner: Any
|
||||
|
||||
|
||||
@dataclass
|
||||
class ThreadHandle:
|
||||
"""
|
||||
reference to a thread extracted by the sandbox.
|
||||
|
||||
Attributes:
|
||||
address: thread's address (tid)
|
||||
inner: sandbox-specific data
|
||||
"""
|
||||
|
||||
address: ThreadAddress
|
||||
inner: Any
|
||||
|
||||
|
||||
@dataclass
|
||||
class CallHandle:
|
||||
"""
|
||||
reference to an api call extracted by the sandbox.
|
||||
|
||||
Attributes:
|
||||
address: call's address, such as event index or id
|
||||
inner: sandbox-specific data
|
||||
"""
|
||||
|
||||
address: DynamicCallAddress
|
||||
inner: Any
|
||||
|
||||
|
||||
class DynamicFeatureExtractor:
|
||||
"""
|
||||
DynamicFeatureExtractor defines the interface for fetching features from a
|
||||
sandbox' analysis of a sample; extractors that rely on statically analyzing
|
||||
a sample must implement the sibling extractor, StaticFeatureExtractor.
|
||||
|
||||
Features are grouped mainly into threads that alongside their meta-features are also grouped into
|
||||
processes (that also have their own features). Other scopes (such as function and file) may also apply
|
||||
for a specific sandbox.
|
||||
|
||||
This class is not instantiated directly; it is the base class for other implementations.
|
||||
"""
|
||||
|
||||
__metaclass__ = abc.ABCMeta
|
||||
|
||||
def __init__(self, hashes: SampleHashes):
|
||||
#
|
||||
# note: a subclass should define ctor parameters for its own use.
|
||||
# for example, the Vivisect feature extract might require the vw and/or path.
|
||||
# this base class doesn't know what to do with that info, though.
|
||||
#
|
||||
super().__init__()
|
||||
self._sample_hashes = hashes
|
||||
|
||||
def get_sample_hashes(self) -> SampleHashes:
|
||||
"""
|
||||
fetch the hashes for the sample contained within the extractor.
|
||||
"""
|
||||
return self._sample_hashes
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
extract features found at every scope ("global").
|
||||
|
||||
example::
|
||||
|
||||
extractor = CapeFeatureExtractor.from_report(json.loads(buf))
|
||||
for feature, addr in extractor.get_global_features():
|
||||
print(addr, feature)
|
||||
|
||||
yields:
|
||||
Tuple[Feature, Address]: feature and its location
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
extract file-scope features.
|
||||
|
||||
example::
|
||||
|
||||
extractor = CapeFeatureExtractor.from_report(json.loads(buf))
|
||||
for feature, addr in extractor.get_file_features():
|
||||
print(addr, feature)
|
||||
|
||||
yields:
|
||||
Tuple[Feature, Address]: feature and its location
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_processes(self) -> Iterator[ProcessHandle]:
|
||||
"""
|
||||
Enumerate processes in the trace.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
Yields all the features of a process. These include:
|
||||
- file features of the process' image
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_process_name(self, ph: ProcessHandle) -> str:
|
||||
"""
|
||||
Returns the human-readable name for the given process,
|
||||
such as the filename.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
||||
"""
|
||||
Enumerate threads in the given process.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
Yields all the features of a thread. These include:
|
||||
- sequenced api traces
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_calls(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[CallHandle]:
|
||||
"""
|
||||
Enumerate calls in the given thread
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_call_features(
|
||||
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
Yields all features of a call. These include:
|
||||
- api name
|
||||
- bytes/strings/numbers extracted from arguments
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_call_name(self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> str:
|
||||
"""
|
||||
Returns the human-readable name for the given call,
|
||||
such as as rendered API log entry, like:
|
||||
|
||||
Foo(1, "two", b"\x00\x11") -> -1
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
FeatureExtractor: TypeAlias = Union[StaticFeatureExtractor, DynamicFeatureExtractor]
|
||||
|
||||
@@ -17,12 +17,18 @@ import capa.features.extractors.binja.function
|
||||
import capa.features.extractors.binja.basicblock
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
|
||||
from capa.features.extractors.base_extractor import (
|
||||
BBHandle,
|
||||
InsnHandle,
|
||||
SampleHashes,
|
||||
FunctionHandle,
|
||||
StaticFeatureExtractor,
|
||||
)
|
||||
|
||||
|
||||
class BinjaFeatureExtractor(FeatureExtractor):
|
||||
class BinjaFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self, bv: binja.BinaryView):
|
||||
super().__init__()
|
||||
super().__init__(hashes=SampleHashes.from_bytes(bv.file.raw.read(0, len(bv.file.raw))))
|
||||
self.bv = bv
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
self.global_features.extend(capa.features.extractors.binja.file.extract_file_format(self.bv))
|
||||
|
||||
@@ -115,13 +115,13 @@ def extract_file_import_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address
|
||||
for sym in bv.get_symbols_of_type(SymbolType.ImportAddressSymbol):
|
||||
lib_name = str(sym.namespace)
|
||||
addr = AbsoluteVirtualAddress(sym.address)
|
||||
for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym.short_name):
|
||||
for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym.short_name, include_dll=True):
|
||||
yield Import(name), addr
|
||||
|
||||
ordinal = sym.ordinal
|
||||
if ordinal != 0 and (lib_name != ""):
|
||||
ordinal_name = f"#{ordinal}"
|
||||
for name in capa.features.extractors.helpers.generate_symbols(lib_name, ordinal_name):
|
||||
for name in capa.features.extractors.helpers.generate_symbols(lib_name, ordinal_name, include_dll=True):
|
||||
yield Import(name), addr
|
||||
|
||||
|
||||
|
||||
0
capa/features/extractors/cape/__init__.py
Normal file
0
capa/features/extractors/cape/__init__.py
Normal file
62
capa/features/extractors/cape/call.py
Normal file
62
capa/features/extractors/cape/call.py
Normal file
@@ -0,0 +1,62 @@
|
||||
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
from capa.helpers import assert_never
|
||||
from capa.features.insn import API, Number
|
||||
from capa.features.common import String, Feature
|
||||
from capa.features.address import Address
|
||||
from capa.features.extractors.cape.models import Call
|
||||
from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
this method extracts the given call's features (such as API name and arguments),
|
||||
and returns them as API, Number, and String features.
|
||||
|
||||
args:
|
||||
ph: process handle (for defining the extraction scope)
|
||||
th: thread handle (for defining the extraction scope)
|
||||
ch: call handle (for defining the extraction scope)
|
||||
|
||||
yields:
|
||||
Feature, address; where Feature is either: API, Number, or String.
|
||||
"""
|
||||
call: Call = ch.inner
|
||||
|
||||
# list similar to disassembly: arguments right-to-left, call
|
||||
for arg in reversed(call.arguments):
|
||||
value = arg.value
|
||||
if isinstance(value, list) and len(value) == 0:
|
||||
# unsure why CAPE captures arguments as empty lists?
|
||||
continue
|
||||
|
||||
elif isinstance(value, str):
|
||||
yield String(value), ch.address
|
||||
|
||||
elif isinstance(value, int):
|
||||
yield Number(value), ch.address
|
||||
|
||||
else:
|
||||
assert_never(value)
|
||||
|
||||
yield API(call.api), ch.address
|
||||
|
||||
|
||||
def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
for handler in CALL_HANDLERS:
|
||||
for feature, addr in handler(ph, th, ch):
|
||||
yield feature, addr
|
||||
|
||||
|
||||
CALL_HANDLERS = (extract_call_features,)
|
||||
145
capa/features/extractors/cape/extractor.py
Normal file
145
capa/features/extractors/cape/extractor.py
Normal file
@@ -0,0 +1,145 @@
|
||||
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import Dict, Tuple, Union, Iterator
|
||||
|
||||
import capa.features.extractors.cape.call
|
||||
import capa.features.extractors.cape.file
|
||||
import capa.features.extractors.cape.thread
|
||||
import capa.features.extractors.cape.global_
|
||||
import capa.features.extractors.cape.process
|
||||
from capa.exceptions import EmptyReportError, UnsupportedFormatError
|
||||
from capa.features.common import Feature, Characteristic
|
||||
from capa.features.address import NO_ADDRESS, Address, AbsoluteVirtualAddress, _NoAddress
|
||||
from capa.features.extractors.cape.models import Call, Static, Process, CapeReport
|
||||
from capa.features.extractors.base_extractor import (
|
||||
CallHandle,
|
||||
SampleHashes,
|
||||
ThreadHandle,
|
||||
ProcessHandle,
|
||||
DynamicFeatureExtractor,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
TESTED_VERSIONS = {"2.2-CAPE", "2.4-CAPE"}
|
||||
|
||||
|
||||
class CapeExtractor(DynamicFeatureExtractor):
|
||||
def __init__(self, report: CapeReport):
|
||||
super().__init__(
|
||||
hashes=SampleHashes(
|
||||
md5=report.target.file.md5.lower(),
|
||||
sha1=report.target.file.sha1.lower(),
|
||||
sha256=report.target.file.sha256.lower(),
|
||||
)
|
||||
)
|
||||
self.report: CapeReport = report
|
||||
|
||||
# pre-compute these because we'll yield them at *every* scope.
|
||||
self.global_features = list(capa.features.extractors.cape.global_.extract_features(self.report))
|
||||
|
||||
def get_base_address(self) -> Union[AbsoluteVirtualAddress, _NoAddress, None]:
|
||||
# value according to the PE header, the actual trace may use a different imagebase
|
||||
assert self.report.static is not None and self.report.static.pe is not None
|
||||
return AbsoluteVirtualAddress(self.report.static.pe.imagebase)
|
||||
|
||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
yield from self.global_features
|
||||
|
||||
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.cape.file.extract_features(self.report)
|
||||
|
||||
def get_processes(self) -> Iterator[ProcessHandle]:
|
||||
yield from capa.features.extractors.cape.file.get_processes(self.report)
|
||||
|
||||
def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.cape.process.extract_features(ph)
|
||||
|
||||
def get_process_name(self, ph) -> str:
|
||||
process: Process = ph.inner
|
||||
return process.process_name
|
||||
|
||||
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
||||
yield from capa.features.extractors.cape.process.get_threads(ph)
|
||||
|
||||
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
if False:
|
||||
# force this routine to be a generator,
|
||||
# but we don't actually have any elements to generate.
|
||||
yield Characteristic("never"), NO_ADDRESS
|
||||
return
|
||||
|
||||
def get_calls(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[CallHandle]:
|
||||
yield from capa.features.extractors.cape.thread.get_calls(ph, th)
|
||||
|
||||
def extract_call_features(
|
||||
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.cape.call.extract_features(ph, th, ch)
|
||||
|
||||
def get_call_name(self, ph, th, ch) -> str:
|
||||
call: Call = ch.inner
|
||||
|
||||
parts = []
|
||||
parts.append(call.api)
|
||||
parts.append("(")
|
||||
for argument in call.arguments:
|
||||
parts.append(argument.name)
|
||||
parts.append("=")
|
||||
|
||||
if argument.pretty_value:
|
||||
parts.append(argument.pretty_value)
|
||||
else:
|
||||
if isinstance(argument.value, int):
|
||||
parts.append(hex(argument.value))
|
||||
elif isinstance(argument.value, str):
|
||||
parts.append('"')
|
||||
parts.append(argument.value)
|
||||
parts.append('"')
|
||||
elif isinstance(argument.value, list):
|
||||
pass
|
||||
else:
|
||||
capa.helpers.assert_never(argument.value)
|
||||
|
||||
parts.append(", ")
|
||||
if call.arguments:
|
||||
# remove the trailing comma
|
||||
parts.pop()
|
||||
parts.append(")")
|
||||
parts.append(" -> ")
|
||||
if call.pretty_return:
|
||||
parts.append(call.pretty_return)
|
||||
else:
|
||||
parts.append(hex(call.return_))
|
||||
|
||||
return "".join(parts)
|
||||
|
||||
@classmethod
|
||||
def from_report(cls, report: Dict) -> "CapeExtractor":
|
||||
cr = CapeReport.model_validate(report)
|
||||
|
||||
if cr.info.version not in TESTED_VERSIONS:
|
||||
logger.warning("CAPE version '%s' not tested/supported yet", cr.info.version)
|
||||
|
||||
# observed in 2.4-CAPE reports from capesandbox.com
|
||||
if cr.static is None and cr.target.file.pe is not None:
|
||||
cr.static = Static()
|
||||
cr.static.pe = cr.target.file.pe
|
||||
|
||||
if cr.static is None:
|
||||
raise UnsupportedFormatError("CAPE report missing static analysis")
|
||||
|
||||
if cr.static.pe is None:
|
||||
raise UnsupportedFormatError("CAPE report missing PE analysis")
|
||||
|
||||
if len(cr.behavior.processes) == 0:
|
||||
raise EmptyReportError("CAPE did not capture any processes")
|
||||
|
||||
return cls(cr)
|
||||
132
capa/features/extractors/cape/file.py
Normal file
132
capa/features/extractors/cape/file.py
Normal file
@@ -0,0 +1,132 @@
|
||||
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
from capa.features.file import Export, Import, Section
|
||||
from capa.features.common import String, Feature
|
||||
from capa.features.address import NO_ADDRESS, Address, ProcessAddress, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.helpers import generate_symbols
|
||||
from capa.features.extractors.cape.models import CapeReport
|
||||
from capa.features.extractors.base_extractor import ProcessHandle
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_processes(report: CapeReport) -> Iterator[ProcessHandle]:
|
||||
"""
|
||||
get all the created processes for a sample
|
||||
"""
|
||||
seen_processes = {}
|
||||
for process in report.behavior.processes:
|
||||
addr = ProcessAddress(pid=process.process_id, ppid=process.parent_id)
|
||||
yield ProcessHandle(address=addr, inner=process)
|
||||
|
||||
# check for pid and ppid reuse
|
||||
if addr not in seen_processes:
|
||||
seen_processes[addr] = [process]
|
||||
else:
|
||||
logger.warning(
|
||||
"pid and ppid reuse detected between process %s and process%s: %s",
|
||||
process,
|
||||
"es" if len(seen_processes[addr]) > 1 else "",
|
||||
seen_processes[addr],
|
||||
)
|
||||
seen_processes[addr].append(process)
|
||||
|
||||
|
||||
def extract_import_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
extract imported function names
|
||||
"""
|
||||
assert report.static is not None and report.static.pe is not None
|
||||
imports = report.static.pe.imports
|
||||
|
||||
if isinstance(imports, dict):
|
||||
imports = list(imports.values())
|
||||
|
||||
assert isinstance(imports, list)
|
||||
|
||||
for library in imports:
|
||||
for function in library.imports:
|
||||
if not function.name:
|
||||
continue
|
||||
|
||||
for name in generate_symbols(library.dll, function.name, include_dll=True):
|
||||
yield Import(name), AbsoluteVirtualAddress(function.address)
|
||||
|
||||
|
||||
def extract_export_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
assert report.static is not None and report.static.pe is not None
|
||||
for function in report.static.pe.exports:
|
||||
yield Export(function.name), AbsoluteVirtualAddress(function.address)
|
||||
|
||||
|
||||
def extract_section_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
assert report.static is not None and report.static.pe is not None
|
||||
for section in report.static.pe.sections:
|
||||
yield Section(section.name), AbsoluteVirtualAddress(section.virtual_address)
|
||||
|
||||
|
||||
def extract_file_strings(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
if report.strings is not None:
|
||||
for string in report.strings:
|
||||
yield String(string), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_used_regkeys(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
for regkey in report.behavior.summary.keys:
|
||||
yield String(regkey), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_used_files(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
for file in report.behavior.summary.files:
|
||||
yield String(file), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_used_mutexes(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
for mutex in report.behavior.summary.mutexes:
|
||||
yield String(mutex), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_used_commands(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
for cmd in report.behavior.summary.executed_commands:
|
||||
yield String(cmd), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_used_apis(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
for symbol in report.behavior.summary.resolved_apis:
|
||||
yield String(symbol), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_used_services(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
for svc in report.behavior.summary.created_services:
|
||||
yield String(svc), NO_ADDRESS
|
||||
for svc in report.behavior.summary.started_services:
|
||||
yield String(svc), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_features(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
for handler in FILE_HANDLERS:
|
||||
for feature, addr in handler(report):
|
||||
yield feature, addr
|
||||
|
||||
|
||||
FILE_HANDLERS = (
|
||||
extract_import_names,
|
||||
extract_export_names,
|
||||
extract_section_names,
|
||||
extract_file_strings,
|
||||
extract_used_regkeys,
|
||||
extract_used_files,
|
||||
extract_used_mutexes,
|
||||
extract_used_commands,
|
||||
extract_used_apis,
|
||||
extract_used_services,
|
||||
)
|
||||
93
capa/features/extractors/cape/global_.py
Normal file
93
capa/features/extractors/cape/global_.py
Normal file
@@ -0,0 +1,93 @@
|
||||
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
from capa.features.common import (
|
||||
OS,
|
||||
OS_ANY,
|
||||
OS_LINUX,
|
||||
ARCH_I386,
|
||||
FORMAT_PE,
|
||||
ARCH_AMD64,
|
||||
FORMAT_ELF,
|
||||
OS_WINDOWS,
|
||||
Arch,
|
||||
Format,
|
||||
Feature,
|
||||
)
|
||||
from capa.features.address import NO_ADDRESS, Address
|
||||
from capa.features.extractors.cape.models import CapeReport
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_arch(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
if "Intel 80386" in report.target.file.type:
|
||||
yield Arch(ARCH_I386), NO_ADDRESS
|
||||
elif "x86-64" in report.target.file.type:
|
||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||
else:
|
||||
logger.warning("unrecognized Architecture: %s", report.target.file.type)
|
||||
raise ValueError(
|
||||
f"unrecognized Architecture from the CAPE report; output of file command: {report.target.file.type}"
|
||||
)
|
||||
|
||||
|
||||
def extract_format(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
if "PE" in report.target.file.type:
|
||||
yield Format(FORMAT_PE), NO_ADDRESS
|
||||
elif "ELF" in report.target.file.type:
|
||||
yield Format(FORMAT_ELF), NO_ADDRESS
|
||||
else:
|
||||
logger.warning("unknown file format, file command output: %s", report.target.file.type)
|
||||
raise ValueError(
|
||||
"unrecognized file format from the CAPE report; output of file command: {report.target.file.type}"
|
||||
)
|
||||
|
||||
|
||||
def extract_os(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
# this variable contains the output of the file command
|
||||
file_output = report.target.file.type
|
||||
|
||||
if "windows" in file_output.lower():
|
||||
yield OS(OS_WINDOWS), NO_ADDRESS
|
||||
elif "elf" in file_output.lower():
|
||||
# operating systems recognized by the file command: https://github.com/file/file/blob/master/src/readelf.c#L609
|
||||
if "Linux" in file_output:
|
||||
yield OS(OS_LINUX), NO_ADDRESS
|
||||
elif "Hurd" in file_output:
|
||||
yield OS("hurd"), NO_ADDRESS
|
||||
elif "Solaris" in file_output:
|
||||
yield OS("solaris"), NO_ADDRESS
|
||||
elif "kFreeBSD" in file_output:
|
||||
yield OS("freebsd"), NO_ADDRESS
|
||||
elif "kNetBSD" in file_output:
|
||||
yield OS("netbsd"), NO_ADDRESS
|
||||
else:
|
||||
# if the operating system information is missing from the cape report, it's likely a bug
|
||||
logger.warning("unrecognized OS: %s", file_output)
|
||||
raise ValueError("unrecognized OS from the CAPE report; output of file command: {file_output}")
|
||||
else:
|
||||
# the sample is shellcode
|
||||
logger.debug("unsupported file format, file command output: %s", file_output)
|
||||
yield OS(OS_ANY), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_features(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
||||
for global_handler in GLOBAL_HANDLER:
|
||||
for feature, addr in global_handler(report):
|
||||
yield feature, addr
|
||||
|
||||
|
||||
GLOBAL_HANDLER = (
|
||||
extract_format,
|
||||
extract_os,
|
||||
extract_arch,
|
||||
)
|
||||
29
capa/features/extractors/cape/helpers.py
Normal file
29
capa/features/extractors/cape/helpers.py
Normal file
@@ -0,0 +1,29 @@
|
||||
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from capa.features.extractors.base_extractor import ProcessHandle
|
||||
|
||||
|
||||
def find_process(processes: List[Dict[str, Any]], ph: ProcessHandle) -> Dict[str, Any]:
|
||||
"""
|
||||
find a specific process identified by a process handler.
|
||||
|
||||
args:
|
||||
processes: a list of processes extracted by CAPE
|
||||
ph: handle of the sought process
|
||||
|
||||
return:
|
||||
a CAPE-defined dictionary for the sought process' information
|
||||
"""
|
||||
|
||||
for process in processes:
|
||||
if ph.address.ppid == process["parent_id"] and ph.address.pid == process["process_id"]:
|
||||
return process
|
||||
return {}
|
||||
446
capa/features/extractors/cape/models.py
Normal file
446
capa/features/extractors/cape/models.py
Normal file
@@ -0,0 +1,446 @@
|
||||
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import binascii
|
||||
from typing import Any, Dict, List, Union, Literal, Optional
|
||||
|
||||
from pydantic import Field, BaseModel, ConfigDict
|
||||
from typing_extensions import Annotated, TypeAlias
|
||||
from pydantic.functional_validators import BeforeValidator
|
||||
|
||||
|
||||
def validate_hex_int(value):
|
||||
if isinstance(value, str):
|
||||
return int(value, 16) if value.startswith("0x") else int(value, 10)
|
||||
else:
|
||||
return value
|
||||
|
||||
|
||||
def validate_hex_bytes(value):
|
||||
return binascii.unhexlify(value) if isinstance(value, str) else value
|
||||
|
||||
|
||||
HexInt = Annotated[int, BeforeValidator(validate_hex_int)]
|
||||
HexBytes = Annotated[bytes, BeforeValidator(validate_hex_bytes)]
|
||||
|
||||
|
||||
# a model that *cannot* have extra fields
|
||||
# if they do, pydantic raises an exception.
|
||||
# use this for models we rely upon and cannot change.
|
||||
#
|
||||
# for things that may be extended and we don't care,
|
||||
# use FlexibleModel.
|
||||
class ExactModel(BaseModel):
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
|
||||
# a model that can have extra fields that we ignore.
|
||||
# use this if we don't want to raise an exception for extra
|
||||
# data fields that we didn't expect.
|
||||
class FlexibleModel(BaseModel):
|
||||
pass
|
||||
|
||||
|
||||
# use this type to indicate that we won't model this data.
|
||||
# because its not relevant to our use in capa.
|
||||
#
|
||||
# while its nice to have full coverage of the data shape,
|
||||
# it can easily change and break our parsing.
|
||||
# so we really only want to describe what we'll use.
|
||||
Skip: TypeAlias = Optional[Any]
|
||||
|
||||
|
||||
# mark fields that we haven't seen yet and need to model.
|
||||
# pydantic should raise an error when encountering data
|
||||
# in a field with this type.
|
||||
# then we can update the model with the discovered shape.
|
||||
TODO: TypeAlias = None
|
||||
ListTODO: TypeAlias = List[None]
|
||||
DictTODO: TypeAlias = ExactModel
|
||||
|
||||
EmptyDict: TypeAlias = BaseModel
|
||||
EmptyList: TypeAlias = List[Any]
|
||||
|
||||
|
||||
class Info(FlexibleModel):
|
||||
version: str
|
||||
|
||||
|
||||
class ImportedSymbol(ExactModel):
|
||||
address: HexInt
|
||||
name: Optional[str] = None
|
||||
|
||||
|
||||
class ImportedDll(ExactModel):
|
||||
dll: str
|
||||
imports: List[ImportedSymbol]
|
||||
|
||||
|
||||
class DirectoryEntry(ExactModel):
|
||||
name: str
|
||||
virtual_address: HexInt
|
||||
size: HexInt
|
||||
|
||||
|
||||
class Section(ExactModel):
|
||||
name: str
|
||||
raw_address: HexInt
|
||||
virtual_address: HexInt
|
||||
virtual_size: HexInt
|
||||
size_of_data: HexInt
|
||||
characteristics: str
|
||||
characteristics_raw: HexInt
|
||||
entropy: float
|
||||
|
||||
|
||||
class Resource(ExactModel):
|
||||
name: str
|
||||
language: Optional[str] = None
|
||||
sublanguage: str
|
||||
filetype: Optional[str]
|
||||
offset: HexInt
|
||||
size: HexInt
|
||||
entropy: float
|
||||
|
||||
|
||||
class DigitalSigner(FlexibleModel):
|
||||
md5_fingerprint: str
|
||||
not_after: str
|
||||
not_before: str
|
||||
serial_number: str
|
||||
sha1_fingerprint: str
|
||||
sha256_fingerprint: str
|
||||
|
||||
issuer_commonName: Optional[str] = None
|
||||
issuer_countryName: Optional[str] = None
|
||||
issuer_localityName: Optional[str] = None
|
||||
issuer_organizationName: Optional[str] = None
|
||||
issuer_stateOrProvinceName: Optional[str] = None
|
||||
|
||||
subject_commonName: Optional[str] = None
|
||||
subject_countryName: Optional[str] = None
|
||||
subject_localityName: Optional[str] = None
|
||||
subject_organizationName: Optional[str] = None
|
||||
subject_stateOrProvinceName: Optional[str] = None
|
||||
|
||||
extensions_authorityInfoAccess_caIssuers: Optional[str] = None
|
||||
extensions_authorityKeyIdentifier: Optional[str] = None
|
||||
extensions_cRLDistributionPoints_0: Optional[str] = None
|
||||
extensions_certificatePolicies_0: Optional[str] = None
|
||||
extensions_subjectAltName_0: Optional[str] = None
|
||||
extensions_subjectKeyIdentifier: Optional[str] = None
|
||||
|
||||
|
||||
class AuxSigner(ExactModel):
|
||||
name: str
|
||||
issued_to: str = Field(alias="Issued to")
|
||||
issued_by: str = Field(alias="Issued by")
|
||||
expires: str = Field(alias="Expires")
|
||||
sha1_hash: str = Field(alias="SHA1 hash")
|
||||
|
||||
|
||||
class Signer(ExactModel):
|
||||
aux_sha1: Optional[str] = None
|
||||
aux_timestamp: Optional[str] = None
|
||||
aux_valid: Optional[bool] = None
|
||||
aux_error: Optional[bool] = None
|
||||
aux_error_desc: Optional[str] = None
|
||||
aux_signers: Optional[List[AuxSigner]] = None
|
||||
|
||||
|
||||
class Overlay(ExactModel):
|
||||
offset: HexInt
|
||||
size: HexInt
|
||||
|
||||
|
||||
class KV(ExactModel):
|
||||
name: str
|
||||
value: str
|
||||
|
||||
|
||||
class ExportedSymbol(ExactModel):
|
||||
address: HexInt
|
||||
name: str
|
||||
ordinal: int
|
||||
|
||||
|
||||
class PE(ExactModel):
|
||||
peid_signatures: TODO
|
||||
imagebase: HexInt
|
||||
entrypoint: HexInt
|
||||
reported_checksum: HexInt
|
||||
actual_checksum: HexInt
|
||||
osversion: str
|
||||
pdbpath: Optional[str] = None
|
||||
timestamp: str
|
||||
|
||||
# List[ImportedDll], or Dict[basename(dll), ImportedDll]
|
||||
imports: Union[List[ImportedDll], Dict[str, ImportedDll]]
|
||||
imported_dll_count: Optional[int] = None
|
||||
imphash: str
|
||||
|
||||
exported_dll_name: Optional[str] = None
|
||||
exports: List[ExportedSymbol]
|
||||
|
||||
dirents: List[DirectoryEntry]
|
||||
sections: List[Section]
|
||||
|
||||
ep_bytes: Optional[HexBytes] = None
|
||||
|
||||
overlay: Optional[Overlay] = None
|
||||
resources: List[Resource]
|
||||
versioninfo: List[KV]
|
||||
|
||||
# base64 encoded data
|
||||
icon: Optional[str] = None
|
||||
# MD5-like hash
|
||||
icon_hash: Optional[str] = None
|
||||
# MD5-like hash
|
||||
icon_fuzzy: Optional[str] = None
|
||||
# short hex string
|
||||
icon_dhash: Optional[str] = None
|
||||
|
||||
digital_signers: List[DigitalSigner]
|
||||
guest_signers: Signer
|
||||
|
||||
|
||||
# TODO(mr-tz): target.file.dotnet, target.file.extracted_files, target.file.extracted_files_tool,
|
||||
# target.file.extracted_files_time
|
||||
# https://github.com/mandiant/capa/issues/1814
|
||||
class File(FlexibleModel):
|
||||
type: str
|
||||
cape_type_code: Optional[int] = None
|
||||
cape_type: Optional[str] = None
|
||||
|
||||
pid: Optional[Union[int, Literal[""]]] = None
|
||||
name: Union[List[str], str]
|
||||
path: str
|
||||
guest_paths: Union[List[str], str, None]
|
||||
timestamp: Optional[str] = None
|
||||
|
||||
#
|
||||
# hashes
|
||||
#
|
||||
crc32: str
|
||||
md5: str
|
||||
sha1: str
|
||||
sha256: str
|
||||
sha512: str
|
||||
sha3_384: str
|
||||
ssdeep: str
|
||||
# unsure why this would ever be "False"
|
||||
tlsh: Optional[Union[str, bool]] = None
|
||||
rh_hash: Optional[str] = None
|
||||
|
||||
#
|
||||
# other metadata, static analysis
|
||||
#
|
||||
size: int
|
||||
pe: Optional[PE] = None
|
||||
ep_bytes: Optional[HexBytes] = None
|
||||
entrypoint: Optional[int] = None
|
||||
data: Optional[str] = None
|
||||
strings: Optional[List[str]] = None
|
||||
|
||||
#
|
||||
# detections (skip)
|
||||
#
|
||||
yara: Skip = None
|
||||
cape_yara: Skip = None
|
||||
clamav: Skip = None
|
||||
virustotal: Skip = None
|
||||
|
||||
|
||||
class ProcessFile(File):
|
||||
#
|
||||
# like a File, but also has dynamic analysis results
|
||||
#
|
||||
pid: Optional[int] = None
|
||||
process_path: Optional[str] = None
|
||||
process_name: Optional[str] = None
|
||||
module_path: Optional[str] = None
|
||||
virtual_address: Optional[HexInt] = None
|
||||
target_pid: Optional[Union[int, str]] = None
|
||||
target_path: Optional[str] = None
|
||||
target_process: Optional[str] = None
|
||||
|
||||
|
||||
class Argument(ExactModel):
|
||||
name: str
|
||||
# unsure why empty list is provided here
|
||||
value: Union[HexInt, int, str, EmptyList]
|
||||
pretty_value: Optional[str] = None
|
||||
|
||||
|
||||
class Call(ExactModel):
|
||||
timestamp: str
|
||||
thread_id: int
|
||||
category: str
|
||||
|
||||
api: str
|
||||
|
||||
arguments: List[Argument]
|
||||
status: bool
|
||||
return_: HexInt = Field(alias="return")
|
||||
pretty_return: Optional[str] = None
|
||||
|
||||
repeated: int
|
||||
|
||||
# virtual addresses
|
||||
caller: HexInt
|
||||
parentcaller: HexInt
|
||||
|
||||
# index into calls array
|
||||
id: int
|
||||
|
||||
|
||||
class Process(ExactModel):
|
||||
process_id: int
|
||||
process_name: str
|
||||
parent_id: int
|
||||
module_path: str
|
||||
first_seen: str
|
||||
calls: List[Call]
|
||||
threads: List[int]
|
||||
environ: Dict[str, str]
|
||||
|
||||
|
||||
class ProcessTree(ExactModel):
|
||||
name: str
|
||||
pid: int
|
||||
parent_id: int
|
||||
module_path: str
|
||||
threads: List[int]
|
||||
environ: Dict[str, str]
|
||||
children: List["ProcessTree"]
|
||||
|
||||
|
||||
class Summary(ExactModel):
|
||||
files: List[str]
|
||||
read_files: List[str]
|
||||
write_files: List[str]
|
||||
delete_files: List[str]
|
||||
keys: List[str]
|
||||
read_keys: List[str]
|
||||
write_keys: List[str]
|
||||
delete_keys: List[str]
|
||||
executed_commands: List[str]
|
||||
resolved_apis: List[str]
|
||||
mutexes: List[str]
|
||||
created_services: List[str]
|
||||
started_services: List[str]
|
||||
|
||||
|
||||
class EncryptedBuffer(ExactModel):
|
||||
process_name: str
|
||||
pid: int
|
||||
|
||||
api_call: str
|
||||
buffer: str
|
||||
buffer_size: Optional[int] = None
|
||||
crypt_key: Optional[Union[HexInt, str]] = None
|
||||
|
||||
|
||||
class Behavior(ExactModel):
|
||||
summary: Summary
|
||||
|
||||
# list of processes, of threads, of calls
|
||||
processes: List[Process]
|
||||
# tree of processes
|
||||
processtree: List[ProcessTree]
|
||||
|
||||
anomaly: List[str]
|
||||
encryptedbuffers: List[EncryptedBuffer]
|
||||
# these are small objects that describe atomic events,
|
||||
# like file move, registery access.
|
||||
# we'll detect the same with our API call analyis.
|
||||
enhanced: Skip = None
|
||||
|
||||
|
||||
class Target(ExactModel):
|
||||
category: str
|
||||
file: File
|
||||
pe: Optional[PE] = None
|
||||
|
||||
|
||||
class Static(ExactModel):
|
||||
pe: Optional[PE] = None
|
||||
flare_capa: Skip = None
|
||||
|
||||
|
||||
class Cape(ExactModel):
|
||||
payloads: List[ProcessFile]
|
||||
configs: Skip = None
|
||||
|
||||
|
||||
# flexible because there may be more sorts of analysis
|
||||
# but we only care about the ones described here.
|
||||
class CapeReport(FlexibleModel):
|
||||
# the input file, I think
|
||||
target: Target
|
||||
# info about the processing job, like machine and distributed metadata.
|
||||
info: Info
|
||||
|
||||
#
|
||||
# static analysis results
|
||||
#
|
||||
static: Optional[Static] = None
|
||||
strings: Optional[List[str]] = None
|
||||
|
||||
#
|
||||
# dynamic analysis results
|
||||
#
|
||||
# post-processed results: process tree, anomalies, etc
|
||||
behavior: Behavior
|
||||
|
||||
# post-processed results: payloads and extracted configs
|
||||
CAPE: Optional[Cape] = None
|
||||
dropped: Optional[List[File]] = None
|
||||
procdump: Optional[List[ProcessFile]] = None
|
||||
procmemory: ListTODO
|
||||
|
||||
# =========================================================================
|
||||
# information we won't use in capa
|
||||
#
|
||||
|
||||
#
|
||||
# NBIs and HBIs
|
||||
# these are super interesting, but they don't enable use to detect behaviors.
|
||||
# they take a lot of code to model and details to maintain.
|
||||
#
|
||||
# if we come up with a future use for this, go ahead and re-enable!
|
||||
#
|
||||
network: Skip = None
|
||||
suricata: Skip = None
|
||||
curtain: Skip = None
|
||||
sysmon: Skip = None
|
||||
url_analysis: Skip = None
|
||||
|
||||
# screenshot hash values
|
||||
deduplicated_shots: Skip = None
|
||||
# k-v pairs describing the time it took to run each stage.
|
||||
statistics: Skip = None
|
||||
# k-v pairs of ATT&CK ID to signature name or similar.
|
||||
ttps: Skip = None
|
||||
# debug log messages
|
||||
debug: Skip = None
|
||||
|
||||
# various signature matches
|
||||
# we could potentially extend capa to use this info one day,
|
||||
# though it would be quite sandbox-specific,
|
||||
# and more detection-oriented than capability detection.
|
||||
signatures: Skip = None
|
||||
malfamily_tag: Optional[str] = None
|
||||
malscore: float
|
||||
detections: Skip = None
|
||||
detections2pid: Optional[Dict[int, List[str]]] = None
|
||||
# AV detections for the sample.
|
||||
virustotal: Skip = None
|
||||
|
||||
@classmethod
|
||||
def from_buf(cls, buf: bytes) -> "CapeReport":
|
||||
return cls.model_validate_json(buf)
|
||||
48
capa/features/extractors/cape/process.py
Normal file
48
capa/features/extractors/cape/process.py
Normal file
@@ -0,0 +1,48 @@
|
||||
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import List, Tuple, Iterator
|
||||
|
||||
from capa.features.common import String, Feature
|
||||
from capa.features.address import Address, ThreadAddress
|
||||
from capa.features.extractors.cape.models import Process
|
||||
from capa.features.extractors.base_extractor import ThreadHandle, ProcessHandle
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_threads(ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
||||
"""
|
||||
get the threads associated with a given process
|
||||
"""
|
||||
process: Process = ph.inner
|
||||
threads: List[int] = process.threads
|
||||
|
||||
for thread in threads:
|
||||
address: ThreadAddress = ThreadAddress(process=ph.address, tid=thread)
|
||||
yield ThreadHandle(address=address, inner={})
|
||||
|
||||
|
||||
def extract_environ_strings(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
extract strings from a process' provided environment variables.
|
||||
"""
|
||||
process: Process = ph.inner
|
||||
|
||||
for value in (value for value in process.environ.values() if value):
|
||||
yield String(value), ph.address
|
||||
|
||||
|
||||
def extract_features(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
for handler in PROCESS_HANDLERS:
|
||||
for feature, addr in handler(ph):
|
||||
yield feature, addr
|
||||
|
||||
|
||||
PROCESS_HANDLERS = (extract_environ_strings,)
|
||||
32
capa/features/extractors/cape/thread.py
Normal file
32
capa/features/extractors/cape/thread.py
Normal file
@@ -0,0 +1,32 @@
|
||||
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import Iterator
|
||||
|
||||
from capa.features.address import DynamicCallAddress
|
||||
from capa.features.extractors.helpers import generate_symbols
|
||||
from capa.features.extractors.cape.models import Process
|
||||
from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_calls(ph: ProcessHandle, th: ThreadHandle) -> Iterator[CallHandle]:
|
||||
process: Process = ph.inner
|
||||
|
||||
tid = th.address.tid
|
||||
for call_index, call in enumerate(process.calls):
|
||||
if call.thread_id != tid:
|
||||
continue
|
||||
|
||||
for symbol in generate_symbols("", call.api):
|
||||
call.api = symbol
|
||||
|
||||
addr = DynamicCallAddress(thread=th.address, id=call_index)
|
||||
yield CallHandle(address=addr, inner=call)
|
||||
@@ -6,6 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import io
|
||||
import re
|
||||
import logging
|
||||
import binascii
|
||||
import contextlib
|
||||
@@ -23,8 +24,11 @@ from capa.features.common import (
|
||||
OS_AUTO,
|
||||
ARCH_ANY,
|
||||
FORMAT_PE,
|
||||
FORMAT_DEX,
|
||||
FORMAT_ELF,
|
||||
OS_ANDROID,
|
||||
OS_WINDOWS,
|
||||
ARCH_DALVIK,
|
||||
FORMAT_FREEZE,
|
||||
FORMAT_RESULT,
|
||||
Arch,
|
||||
@@ -40,7 +44,9 @@ logger = logging.getLogger(__name__)
|
||||
# match strings for formats
|
||||
MATCH_PE = b"MZ"
|
||||
MATCH_ELF = b"\x7fELF"
|
||||
MATCH_DEX = b"dex\n"
|
||||
MATCH_RESULT = b'{"meta":'
|
||||
MATCH_JSON_OBJECT = b'{"'
|
||||
|
||||
|
||||
def extract_file_strings(buf, **kwargs) -> Iterator[Tuple[String, Address]]:
|
||||
@@ -59,10 +65,17 @@ def extract_format(buf) -> Iterator[Tuple[Feature, Address]]:
|
||||
yield Format(FORMAT_PE), NO_ADDRESS
|
||||
elif buf.startswith(MATCH_ELF):
|
||||
yield Format(FORMAT_ELF), NO_ADDRESS
|
||||
elif len(buf) > 8 and buf.startswith(MATCH_DEX) and buf[7] == 0x00:
|
||||
yield Format(FORMAT_DEX), NO_ADDRESS
|
||||
elif is_freeze(buf):
|
||||
yield Format(FORMAT_FREEZE), NO_ADDRESS
|
||||
elif buf.startswith(MATCH_RESULT):
|
||||
yield Format(FORMAT_RESULT), NO_ADDRESS
|
||||
elif re.sub(rb"\s", b"", buf[:20]).startswith(MATCH_JSON_OBJECT):
|
||||
# potential start of JSON object data without whitespace
|
||||
# we don't know what it is exactly, but may support it (e.g. a dynamic CAPE sandbox report)
|
||||
# skip verdict here and let subsequent code analyze this further
|
||||
return
|
||||
else:
|
||||
# we likely end up here:
|
||||
# 1. handling a file format (e.g. macho)
|
||||
@@ -89,6 +102,9 @@ def extract_arch(buf) -> Iterator[Tuple[Feature, Address]]:
|
||||
|
||||
yield Arch(arch), NO_ADDRESS
|
||||
|
||||
elif len(buf) > 8 and buf.startswith(MATCH_DEX) and buf[7] == 0x00:
|
||||
yield Arch(ARCH_DALVIK), NO_ADDRESS
|
||||
|
||||
else:
|
||||
# we likely end up here:
|
||||
# 1. handling shellcode, or
|
||||
@@ -122,6 +138,9 @@ def extract_os(buf, os=OS_AUTO) -> Iterator[Tuple[Feature, Address]]:
|
||||
|
||||
yield OS(os), NO_ADDRESS
|
||||
|
||||
elif len(buf) > 8 and buf.startswith(MATCH_DEX) and buf[7] == 0x00:
|
||||
yield OS(OS_ANDROID), NO_ADDRESS
|
||||
|
||||
else:
|
||||
# we likely end up here:
|
||||
# 1. handling shellcode, or
|
||||
|
||||
421
capa/features/extractors/dexfile.py
Normal file
421
capa/features/extractors/dexfile.py
Normal file
@@ -0,0 +1,421 @@
|
||||
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import struct
|
||||
import logging
|
||||
from typing import Set, Dict, List, Tuple, Iterator, Optional, TypedDict
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass
|
||||
|
||||
import dexparser.disassembler as disassembler
|
||||
from dexparser import DEXParser, uleb128_value
|
||||
|
||||
from capa.features.file import Import, FunctionName
|
||||
from capa.features.common import (
|
||||
OS,
|
||||
FORMAT_DEX,
|
||||
OS_ANDROID,
|
||||
ARCH_DALVIK,
|
||||
Arch,
|
||||
Class,
|
||||
Format,
|
||||
String,
|
||||
Feature,
|
||||
Namespace,
|
||||
)
|
||||
from capa.features.address import NO_ADDRESS, Address, DexClassAddress, DexMethodAddress, FileOffsetAddress
|
||||
from capa.features.extractors.base_extractor import (
|
||||
BBHandle,
|
||||
InsnHandle,
|
||||
SampleHashes,
|
||||
FunctionHandle,
|
||||
StaticFeatureExtractor,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Reference: https://source.android.com/docs/core/runtime/dex-format
|
||||
|
||||
|
||||
class DexProtoId(TypedDict):
|
||||
shorty_idx: int
|
||||
return_type_idx: int
|
||||
param_off: int
|
||||
|
||||
|
||||
class DexMethodId(TypedDict):
|
||||
class_idx: int
|
||||
proto_idx: int
|
||||
name_idx: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class DexAnalyzedMethod:
|
||||
class_type: str
|
||||
name: str
|
||||
shorty_descriptor: str
|
||||
return_type: str
|
||||
parameters: List[str]
|
||||
id_offset: int = 0
|
||||
code_offset: int = 0
|
||||
access_flags: Optional[int] = None
|
||||
|
||||
@property
|
||||
def address(self):
|
||||
# NOTE: Some methods do not have code, in that case we use the method_id offset
|
||||
if self.has_code:
|
||||
return self.code_offset
|
||||
else:
|
||||
return self.id_offset
|
||||
|
||||
@property
|
||||
def has_code(self):
|
||||
# NOTE: code_offset is zero if the method is abstract/native or not defined in a class
|
||||
return self.code_offset != 0
|
||||
|
||||
@property
|
||||
def has_definition(self):
|
||||
# NOTE: access_flags is only known if the method is defined in a class
|
||||
return self.access_flags is not None
|
||||
|
||||
@property
|
||||
def qualified_name(self):
|
||||
return f"{self.class_type}::{self.name}"
|
||||
|
||||
|
||||
class DexFieldId(TypedDict):
|
||||
class_idx: int
|
||||
type_idx: int
|
||||
name_idx: int
|
||||
|
||||
|
||||
class DexClassDef(TypedDict):
|
||||
class_idx: int
|
||||
access_flags: int
|
||||
superclass_idx: int
|
||||
interfaces_off: int
|
||||
source_file_idx: int
|
||||
annotations_off: int
|
||||
class_data_off: int
|
||||
static_values_off: int
|
||||
|
||||
|
||||
class DexFieldDef(TypedDict):
|
||||
diff: int
|
||||
access_flags: int
|
||||
|
||||
|
||||
class DexMethodDef(TypedDict):
|
||||
diff: int
|
||||
access_flags: int
|
||||
code_off: int
|
||||
|
||||
|
||||
class DexClassData(TypedDict):
|
||||
static_fields: List[DexFieldDef]
|
||||
instance_fields: List[DexFieldDef]
|
||||
direct_methods: List[DexMethodDef]
|
||||
virtual_methods: List[DexMethodDef]
|
||||
|
||||
|
||||
@dataclass
|
||||
class DexAnalyzedClass:
|
||||
offset: int
|
||||
class_type: str
|
||||
superclass_type: str
|
||||
interfaces: List[str]
|
||||
source_file: str
|
||||
data: Optional[DexClassData]
|
||||
|
||||
|
||||
class DexAnnotation(TypedDict):
|
||||
visibility: int
|
||||
type_idx_diff: int
|
||||
size_diff: int
|
||||
name_idx_diff: int
|
||||
value_type: int
|
||||
encoded_value: int
|
||||
|
||||
|
||||
class DexAnalysis:
|
||||
def get_strings(self):
|
||||
# NOTE: Copied from dexparser, upstream later
|
||||
|
||||
strings: List[Tuple[int, bytes]] = []
|
||||
string_ids_off = self.dex.header_data["string_ids_off"]
|
||||
|
||||
for i in range(self.dex.header_data["string_ids_size"]):
|
||||
offset = struct.unpack("<L", self.dex.data[string_ids_off + (i * 4) : string_ids_off + (i * 4) + 4])[0]
|
||||
c_size, size_offset = uleb128_value(self.dex.data, offset)
|
||||
c_char = self.dex.data[offset + size_offset : offset + size_offset + c_size]
|
||||
strings.append((offset, c_char))
|
||||
|
||||
return strings
|
||||
|
||||
def __init__(self, dex: DEXParser):
|
||||
self.dex = dex
|
||||
|
||||
self.strings = self.get_strings()
|
||||
self.strings_utf8: List[str] = []
|
||||
for _, data in self.strings:
|
||||
# NOTE: This is technically incorrect
|
||||
# Reference: https://source.android.com/devices/tech/dalvik/dex-format#mutf-8
|
||||
self.strings_utf8.append(data.decode("utf-8", errors="backslashreplace"))
|
||||
|
||||
self.type_ids: List[int] = dex.get_typeids()
|
||||
self.method_ids: List[DexMethodId] = dex.get_methods()
|
||||
self.proto_ids: List[DexProtoId] = dex.get_protoids()
|
||||
self.field_ids: List[DexFieldId] = dex.get_fieldids()
|
||||
self.class_defs: List[DexClassDef] = dex.get_classdef_data()
|
||||
|
||||
self._is_analyzing = True
|
||||
self.used_classes: Set[str] = set()
|
||||
self.classes = self._analyze_classes()
|
||||
self.methods = self._analyze_methods()
|
||||
self.methods_by_address: Dict[int, DexAnalyzedMethod] = {m.address: m for m in self.methods}
|
||||
|
||||
self.namespaces: Set[str] = set()
|
||||
for class_type in self.used_classes:
|
||||
idx = class_type.rfind(".")
|
||||
if idx != -1:
|
||||
self.namespaces.add(class_type[:idx])
|
||||
|
||||
for class_type in self.classes:
|
||||
self.used_classes.remove(class_type)
|
||||
|
||||
# Only available after code analysis
|
||||
self._is_analyzing = False
|
||||
|
||||
def analyze_code(self):
|
||||
# Loop over the classes and analyze them
|
||||
# self.classes: List[DexClass] = self.dex.get_class_data(offset=-1)
|
||||
# self.annotations: List[DexAnnotation] = dex.get_annotations(offset=-1)
|
||||
# self.static_values: List[int] = dex.get_static_values(offset=-1)
|
||||
pass
|
||||
|
||||
def get_string(self, index: int) -> str:
|
||||
return self.strings_utf8[index]
|
||||
|
||||
def _decode_descriptor(self, descriptor: str) -> str:
|
||||
first = descriptor[0]
|
||||
if first == "L":
|
||||
pretty = descriptor[1:-1].replace("/", ".")
|
||||
if self._is_analyzing:
|
||||
self.used_classes.add(pretty)
|
||||
elif first == "[":
|
||||
pretty = self._decode_descriptor(descriptor[1:]) + "[]"
|
||||
else:
|
||||
pretty = disassembler.type_descriptor[first]
|
||||
return pretty
|
||||
|
||||
def get_pretty_type(self, index: int) -> str:
|
||||
if index == 0xFFFFFFFF:
|
||||
return "<NO_INDEX>"
|
||||
descriptor = self.get_string(self.type_ids[index])
|
||||
return self._decode_descriptor(descriptor)
|
||||
|
||||
def _analyze_classes(self):
|
||||
classes: Dict[str, DexAnalyzedClass] = {}
|
||||
offset = self.dex.header_data["class_defs_off"]
|
||||
for index, clazz in enumerate(self.class_defs):
|
||||
class_type = self.get_pretty_type(clazz["class_idx"])
|
||||
|
||||
# Superclass
|
||||
superclass_idx = clazz["superclass_idx"]
|
||||
if superclass_idx != 0xFFFFFFFF:
|
||||
superclass_type = self.get_pretty_type(superclass_idx)
|
||||
else:
|
||||
superclass_type = ""
|
||||
|
||||
# Interfaces
|
||||
interfaces = []
|
||||
interfaces_offset = clazz["interfaces_off"]
|
||||
if interfaces_offset != 0:
|
||||
size = struct.unpack("<L", self.dex.data[interfaces_offset : interfaces_offset + 4])[0]
|
||||
for i in range(size):
|
||||
type_idx = struct.unpack(
|
||||
"<H", self.dex.data[interfaces_offset + 4 + i * 2 : interfaces_offset + 6 + i * 2]
|
||||
)[0]
|
||||
interface_type = self.get_pretty_type(type_idx)
|
||||
interfaces.append(interface_type)
|
||||
|
||||
# Source file
|
||||
source_file_idx = clazz["source_file_idx"]
|
||||
if source_file_idx != 0xFFFFFFFF:
|
||||
source_file = self.get_string(source_file_idx)
|
||||
else:
|
||||
source_file = ""
|
||||
|
||||
# Data
|
||||
data_offset = clazz["class_data_off"]
|
||||
if data_offset != 0:
|
||||
data = self.dex.get_class_data(data_offset)
|
||||
else:
|
||||
data = None
|
||||
|
||||
classes[class_type] = DexAnalyzedClass(
|
||||
offset=offset + index * 32,
|
||||
class_type=class_type,
|
||||
superclass_type=superclass_type,
|
||||
interfaces=interfaces,
|
||||
source_file=source_file,
|
||||
data=data,
|
||||
)
|
||||
return classes
|
||||
|
||||
def _analyze_methods(self):
|
||||
methods: List[DexAnalyzedMethod] = []
|
||||
for method_id in self.method_ids:
|
||||
proto = self.proto_ids[method_id["proto_idx"]]
|
||||
parameters = []
|
||||
|
||||
param_off = proto["param_off"]
|
||||
if param_off != 0:
|
||||
size = struct.unpack("<L", self.dex.data[param_off : param_off + 4])[0]
|
||||
for i in range(size):
|
||||
type_idx = struct.unpack("<H", self.dex.data[param_off + 4 + i * 2 : param_off + 6 + i * 2])[0]
|
||||
param_type = self.get_pretty_type(type_idx)
|
||||
parameters.append(param_type)
|
||||
|
||||
methods.append(
|
||||
DexAnalyzedMethod(
|
||||
class_type=self.get_pretty_type(method_id["class_idx"]),
|
||||
name=self.get_string(method_id["name_idx"]),
|
||||
shorty_descriptor=self.get_string(proto["shorty_idx"]),
|
||||
return_type=self.get_pretty_type(proto["return_type_idx"]),
|
||||
parameters=parameters,
|
||||
)
|
||||
)
|
||||
|
||||
# Fill in the missing method data
|
||||
for clazz in self.classes.values():
|
||||
if clazz.data is None:
|
||||
continue
|
||||
|
||||
for method_def in clazz.data["direct_methods"]:
|
||||
diff = method_def["diff"]
|
||||
methods[diff].access_flags = method_def["access_flags"]
|
||||
methods[diff].code_offset = method_def["code_off"]
|
||||
|
||||
for method_def in clazz.data["virtual_methods"]:
|
||||
diff = method_def["diff"]
|
||||
methods[diff].access_flags = method_def["access_flags"]
|
||||
methods[diff].code_offset = method_def["code_off"]
|
||||
|
||||
# Fill in the missing code offsets with fake data
|
||||
offset = self.dex.header_data["method_ids_off"]
|
||||
for index, method in enumerate(methods):
|
||||
method.id_offset = offset + index * 8
|
||||
|
||||
return methods
|
||||
|
||||
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
yield Format(FORMAT_DEX), NO_ADDRESS
|
||||
|
||||
for i in range(len(self.strings)):
|
||||
yield String(self.strings_utf8[i]), FileOffsetAddress(self.strings[i][0])
|
||||
|
||||
for method in self.methods:
|
||||
if method.has_definition:
|
||||
yield FunctionName(method.qualified_name), DexMethodAddress(method.address)
|
||||
else:
|
||||
yield Import(method.qualified_name), DexMethodAddress(method.address)
|
||||
|
||||
for namespace in self.namespaces:
|
||||
yield Namespace(namespace), NO_ADDRESS
|
||||
|
||||
for clazz in self.classes.values():
|
||||
yield Class(clazz.class_type), DexClassAddress(clazz.offset)
|
||||
|
||||
for class_type in self.used_classes:
|
||||
yield Class(class_type), NO_ADDRESS
|
||||
|
||||
|
||||
class DexFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self, path: Path, *, code_analysis: bool):
|
||||
super().__init__(hashes=SampleHashes.from_bytes(path.read_bytes()))
|
||||
self.path: Path = path
|
||||
self.code_analysis = code_analysis
|
||||
self.dex = DEXParser(filedir=str(path))
|
||||
self.analysis = DexAnalysis(self.dex)
|
||||
|
||||
# Perform more expensive code analysis only when requested
|
||||
if self.code_analysis:
|
||||
self.analysis.analyze_code()
|
||||
|
||||
def todo(self):
|
||||
import inspect
|
||||
|
||||
message = "[DexparserFeatureExtractor:TODO] " + inspect.stack()[1].function
|
||||
logger.debug(message)
|
||||
|
||||
def get_base_address(self):
|
||||
return NO_ADDRESS
|
||||
|
||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
# These are hardcoded global features
|
||||
yield Format(FORMAT_DEX), NO_ADDRESS
|
||||
yield OS(OS_ANDROID), NO_ADDRESS
|
||||
yield Arch(ARCH_DALVIK), NO_ADDRESS
|
||||
|
||||
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
yield from self.analysis.extract_file_features()
|
||||
|
||||
def is_library_function(self, addr: Address) -> bool:
|
||||
assert isinstance(addr, DexMethodAddress)
|
||||
method = self.analysis.methods_by_address[addr]
|
||||
# exclude androidx/kotlin stuff?
|
||||
return not method.has_definition
|
||||
|
||||
def get_function_name(self, addr: Address) -> str:
|
||||
assert isinstance(addr, DexMethodAddress)
|
||||
method = self.analysis.methods_by_address[addr]
|
||||
return method.qualified_name
|
||||
|
||||
def get_functions(self) -> Iterator[FunctionHandle]:
|
||||
if not self.code_analysis:
|
||||
raise Exception("code analysis is disabled")
|
||||
|
||||
for method in self.analysis.methods:
|
||||
yield FunctionHandle(DexMethodAddress(method.address), method)
|
||||
|
||||
def extract_function_features(self, f: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
if not self.code_analysis:
|
||||
raise Exception("code analysis is disabled")
|
||||
method: DexAnalyzedMethod = f.inner
|
||||
if method.has_code:
|
||||
return self.todo()
|
||||
yield
|
||||
|
||||
def get_basic_blocks(self, f: FunctionHandle) -> Iterator[BBHandle]:
|
||||
if not self.code_analysis:
|
||||
raise Exception("code analysis is disabled")
|
||||
method: DexAnalyzedMethod = f.inner
|
||||
if method.has_code:
|
||||
return self.todo()
|
||||
yield
|
||||
|
||||
def extract_basic_block_features(self, f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
if not self.code_analysis:
|
||||
raise Exception("code analysis is disabled")
|
||||
return self.todo()
|
||||
yield
|
||||
|
||||
def get_instructions(self, f: FunctionHandle, bb: BBHandle) -> Iterator[InsnHandle]:
|
||||
if not self.code_analysis:
|
||||
raise Exception("code analysis is disabled")
|
||||
return self.todo()
|
||||
yield
|
||||
|
||||
def extract_insn_features(
|
||||
self, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
if not self.code_analysis:
|
||||
raise Exception("code analysis is disabled")
|
||||
return self.todo()
|
||||
yield
|
||||
@@ -22,7 +22,13 @@ import capa.features.extractors.dnfile.function
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import NO_ADDRESS, Address, DNTokenAddress, DNTokenOffsetAddress
|
||||
from capa.features.extractors.dnfile.types import DnType, DnUnmanagedMethod
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
|
||||
from capa.features.extractors.base_extractor import (
|
||||
BBHandle,
|
||||
InsnHandle,
|
||||
SampleHashes,
|
||||
FunctionHandle,
|
||||
StaticFeatureExtractor,
|
||||
)
|
||||
from capa.features.extractors.dnfile.helpers import (
|
||||
get_dotnet_types,
|
||||
get_dotnet_fields,
|
||||
@@ -68,10 +74,10 @@ class DnFileFeatureExtractorCache:
|
||||
return self.types.get(token)
|
||||
|
||||
|
||||
class DnfileFeatureExtractor(FeatureExtractor):
|
||||
class DnfileFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self, path: Path):
|
||||
super().__init__()
|
||||
self.pe: dnfile.dnPE = dnfile.dnPE(str(path))
|
||||
super().__init__(hashes=SampleHashes.from_bytes(path.read_bytes()))
|
||||
|
||||
# pre-compute .NET token lookup tables; each .NET method has access to this cache for feature extraction
|
||||
# most relevant at instruction scope
|
||||
|
||||
@@ -1,158 +0,0 @@
|
||||
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
from pathlib import Path
|
||||
|
||||
import dnfile
|
||||
import pefile
|
||||
|
||||
from capa.features.common import (
|
||||
OS,
|
||||
OS_ANY,
|
||||
ARCH_ANY,
|
||||
ARCH_I386,
|
||||
FORMAT_PE,
|
||||
ARCH_AMD64,
|
||||
FORMAT_DOTNET,
|
||||
Arch,
|
||||
Format,
|
||||
Feature,
|
||||
)
|
||||
from capa.features.address import NO_ADDRESS, Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_file_format(**kwargs) -> Iterator[Tuple[Feature, Address]]:
|
||||
yield Format(FORMAT_PE), NO_ADDRESS
|
||||
yield Format(FORMAT_DOTNET), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_os(**kwargs) -> Iterator[Tuple[Feature, Address]]:
|
||||
yield OS(OS_ANY), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Feature, Address]]:
|
||||
# to distinguish in more detail, see https://stackoverflow.com/a/23614024/10548020
|
||||
# .NET 4.5 added option: any CPU, 32-bit preferred
|
||||
assert pe.net is not None
|
||||
assert pe.net.Flags is not None
|
||||
|
||||
if pe.net.Flags.CLR_32BITREQUIRED and pe.PE_TYPE == pefile.OPTIONAL_HEADER_MAGIC_PE:
|
||||
yield Arch(ARCH_I386), NO_ADDRESS
|
||||
elif not pe.net.Flags.CLR_32BITREQUIRED and pe.PE_TYPE == pefile.OPTIONAL_HEADER_MAGIC_PE_PLUS:
|
||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||
else:
|
||||
yield Arch(ARCH_ANY), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]:
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, address in file_handler(pe=pe): # type: ignore
|
||||
yield feature, address
|
||||
|
||||
|
||||
FILE_HANDLERS = (
|
||||
# extract_file_export_names,
|
||||
# extract_file_import_names,
|
||||
# extract_file_section_names,
|
||||
# extract_file_strings,
|
||||
# extract_file_function_names,
|
||||
extract_file_format,
|
||||
)
|
||||
|
||||
|
||||
def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]:
|
||||
for handler in GLOBAL_HANDLERS:
|
||||
for feature, addr in handler(pe=pe): # type: ignore
|
||||
yield feature, addr
|
||||
|
||||
|
||||
GLOBAL_HANDLERS = (
|
||||
extract_file_os,
|
||||
extract_file_arch,
|
||||
)
|
||||
|
||||
|
||||
class DnfileFeatureExtractor(FeatureExtractor):
|
||||
def __init__(self, path: Path):
|
||||
super().__init__()
|
||||
self.path: Path = path
|
||||
self.pe: dnfile.dnPE = dnfile.dnPE(str(path))
|
||||
|
||||
def get_base_address(self) -> AbsoluteVirtualAddress:
|
||||
return AbsoluteVirtualAddress(0x0)
|
||||
|
||||
def get_entry_point(self) -> int:
|
||||
# self.pe.net.Flags.CLT_NATIVE_ENTRYPOINT
|
||||
# True: native EP: Token
|
||||
# False: managed EP: RVA
|
||||
assert self.pe.net is not None
|
||||
assert self.pe.net.struct is not None
|
||||
|
||||
return self.pe.net.struct.EntryPointTokenOrRva
|
||||
|
||||
def extract_global_features(self):
|
||||
yield from extract_global_features(self.pe)
|
||||
|
||||
def extract_file_features(self):
|
||||
yield from extract_file_features(self.pe)
|
||||
|
||||
def is_dotnet_file(self) -> bool:
|
||||
return bool(self.pe.net)
|
||||
|
||||
def is_mixed_mode(self) -> bool:
|
||||
assert self.pe is not None
|
||||
assert self.pe.net is not None
|
||||
assert self.pe.net.Flags is not None
|
||||
|
||||
return not bool(self.pe.net.Flags.CLR_ILONLY)
|
||||
|
||||
def get_runtime_version(self) -> Tuple[int, int]:
|
||||
assert self.pe is not None
|
||||
assert self.pe.net is not None
|
||||
assert self.pe.net.struct is not None
|
||||
|
||||
return self.pe.net.struct.MajorRuntimeVersion, self.pe.net.struct.MinorRuntimeVersion
|
||||
|
||||
def get_meta_version_string(self) -> str:
|
||||
assert self.pe.net is not None
|
||||
assert self.pe.net.metadata is not None
|
||||
assert self.pe.net.metadata.struct is not None
|
||||
assert self.pe.net.metadata.struct.Version is not None
|
||||
|
||||
vbuf = self.pe.net.metadata.struct.Version
|
||||
assert isinstance(vbuf, bytes)
|
||||
|
||||
return vbuf.rstrip(b"\x00").decode("utf-8")
|
||||
|
||||
def get_functions(self):
|
||||
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def extract_function_features(self, f):
|
||||
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def get_basic_blocks(self, f):
|
||||
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def extract_basic_block_features(self, f, bb):
|
||||
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def get_instructions(self, f, bb):
|
||||
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def extract_insn_features(self, f, bb, insn):
|
||||
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def is_library_function(self, va):
|
||||
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def get_function_name(self, va):
|
||||
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
|
||||
@@ -31,9 +31,9 @@ from capa.features.common import (
|
||||
Characteristic,
|
||||
)
|
||||
from capa.features.address import NO_ADDRESS, Address, DNTokenAddress
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
from capa.features.extractors.dnfile.types import DnType
|
||||
from capa.features.extractors.base_extractor import SampleHashes, StaticFeatureExtractor
|
||||
from capa.features.extractors.dnfile.helpers import (
|
||||
DnType,
|
||||
iter_dotnet_table,
|
||||
is_dotnet_mixed_mode,
|
||||
get_dotnet_managed_imports,
|
||||
@@ -57,7 +57,7 @@ def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Impor
|
||||
|
||||
for imp in get_dotnet_unmanaged_imports(pe):
|
||||
# like kernel32.CreateFileA
|
||||
for name in capa.features.extractors.helpers.generate_symbols(imp.module, imp.method):
|
||||
for name in capa.features.extractors.helpers.generate_symbols(imp.module, imp.method, include_dll=True):
|
||||
yield Import(name), DNTokenAddress(imp.token)
|
||||
|
||||
|
||||
@@ -165,9 +165,9 @@ GLOBAL_HANDLERS = (
|
||||
)
|
||||
|
||||
|
||||
class DotnetFileFeatureExtractor(FeatureExtractor):
|
||||
class DotnetFileFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self, path: Path):
|
||||
super().__init__()
|
||||
super().__init__(hashes=SampleHashes.from_bytes(path.read_bytes()))
|
||||
self.path: Path = path
|
||||
self.pe: dnfile.dnPE = dnfile.dnPE(str(path))
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ import capa.features.extractors.common
|
||||
from capa.features.file import Export, Import, Section
|
||||
from capa.features.common import OS, FORMAT_ELF, Arch, Format, Feature
|
||||
from capa.features.address import NO_ADDRESS, FileOffsetAddress, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
from capa.features.extractors.base_extractor import SampleHashes, StaticFeatureExtractor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -154,9 +154,9 @@ GLOBAL_HANDLERS = (
|
||||
)
|
||||
|
||||
|
||||
class ElfFeatureExtractor(FeatureExtractor):
|
||||
class ElfFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self, path: Path):
|
||||
super().__init__()
|
||||
super().__init__(SampleHashes.from_bytes(path.read_bytes()))
|
||||
self.path: Path = path
|
||||
self.elf = ELFFile(io.BytesIO(path.read_bytes()))
|
||||
|
||||
|
||||
@@ -14,14 +14,32 @@ import capa.features.extractors.ghidra.function
|
||||
import capa.features.extractors.ghidra.basicblock
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
|
||||
from capa.features.extractors.base_extractor import (
|
||||
BBHandle,
|
||||
InsnHandle,
|
||||
SampleHashes,
|
||||
FunctionHandle,
|
||||
StaticFeatureExtractor,
|
||||
)
|
||||
|
||||
|
||||
class GhidraFeatureExtractor(FeatureExtractor):
|
||||
class GhidraFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
import capa.features.extractors.ghidra.helpers as ghidra_helpers
|
||||
|
||||
super().__init__(
|
||||
SampleHashes(
|
||||
md5=capa.ghidra.helpers.get_file_md5(),
|
||||
# ghidra doesn't expose this hash.
|
||||
# https://ghidra.re/ghidra_docs/api/ghidra/program/model/listing/Program.html
|
||||
#
|
||||
# the hashes are stored in the database, not computed on the fly,
|
||||
# so its probably not trivial to add SHA1.
|
||||
sha1="",
|
||||
sha256=capa.ghidra.helpers.get_file_sha256(),
|
||||
)
|
||||
)
|
||||
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
self.global_features.extend(capa.features.extractors.ghidra.file.extract_file_format())
|
||||
self.global_features.extend(capa.features.extractors.ghidra.global_.extract_os())
|
||||
|
||||
@@ -34,7 +34,7 @@ def find_embedded_pe(block_bytez: bytes, mz_xor: List[Tuple[bytes, bytes, int]])
|
||||
for match in re.finditer(re.escape(mzx), block_bytez):
|
||||
todo.append((match.start(), mzx, pex, i))
|
||||
|
||||
seg_max = len(block_bytez) # type: ignore [name-defined] # noqa: F821
|
||||
seg_max = len(block_bytez) # noqa: F821
|
||||
while len(todo):
|
||||
off, mzx, pex, i = todo.pop()
|
||||
|
||||
@@ -112,7 +112,7 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
|
||||
if "Ordinal_" in fstr[1]:
|
||||
fstr[1] = f"#{fstr[1].split('_')[1]}"
|
||||
|
||||
for name in capa.features.extractors.helpers.generate_symbols(fstr[0][:-4], fstr[1]):
|
||||
for name in capa.features.extractors.helpers.generate_symbols(fstr[0][:-4], fstr[1], include_dll=True):
|
||||
yield Import(name), AbsoluteVirtualAddress(addr)
|
||||
|
||||
|
||||
|
||||
@@ -41,38 +41,50 @@ def is_ordinal(symbol: str) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def generate_symbols(dll: str, symbol: str) -> Iterator[str]:
|
||||
def generate_symbols(dll: str, symbol: str, include_dll=False) -> Iterator[str]:
|
||||
"""
|
||||
for a given dll and symbol name, generate variants.
|
||||
we over-generate features to make matching easier.
|
||||
these include:
|
||||
- kernel32.CreateFileA
|
||||
- kernel32.CreateFile
|
||||
- CreateFileA
|
||||
- CreateFile
|
||||
- ws2_32.#1
|
||||
|
||||
note that since capa v7 only `import` features and APIs called via ordinal include DLL names:
|
||||
- kernel32.CreateFileA
|
||||
- kernel32.CreateFile
|
||||
- ws2_32.#1
|
||||
|
||||
for `api` features dll names are good for documentation but not used during matching
|
||||
"""
|
||||
# normalize dll name
|
||||
dll = dll.lower()
|
||||
|
||||
# kernel32.CreateFileA
|
||||
yield f"{dll}.{symbol}"
|
||||
# trim extensions observed in dynamic traces
|
||||
dll = dll[0:-4] if dll.endswith(".dll") else dll
|
||||
dll = dll[0:-4] if dll.endswith(".drv") else dll
|
||||
|
||||
if include_dll or is_ordinal(symbol):
|
||||
# ws2_32.#1
|
||||
# kernel32.CreateFileA
|
||||
yield f"{dll}.{symbol}"
|
||||
|
||||
if not is_ordinal(symbol):
|
||||
# CreateFileA
|
||||
yield symbol
|
||||
|
||||
if is_aw_function(symbol):
|
||||
# kernel32.CreateFile
|
||||
yield f"{dll}.{symbol[:-1]}"
|
||||
if is_aw_function(symbol):
|
||||
if include_dll:
|
||||
# kernel32.CreateFile
|
||||
yield f"{dll}.{symbol[:-1]}"
|
||||
|
||||
if not is_ordinal(symbol):
|
||||
# CreateFile
|
||||
yield symbol[:-1]
|
||||
|
||||
|
||||
def reformat_forwarded_export_name(forwarded_name: str) -> str:
|
||||
"""
|
||||
a forwarded export has a DLL name/path an symbol name.
|
||||
a forwarded export has a DLL name/path and symbol name.
|
||||
we want the former to be lowercase, and the latter to be verbatim.
|
||||
"""
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
from typing import List, Tuple, Iterator
|
||||
|
||||
import idaapi
|
||||
import ida_nalt
|
||||
|
||||
import capa.ida.helpers
|
||||
import capa.features.extractors.elf
|
||||
@@ -18,12 +19,22 @@ import capa.features.extractors.ida.function
|
||||
import capa.features.extractors.ida.basicblock
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
|
||||
from capa.features.extractors.base_extractor import (
|
||||
BBHandle,
|
||||
InsnHandle,
|
||||
SampleHashes,
|
||||
FunctionHandle,
|
||||
StaticFeatureExtractor,
|
||||
)
|
||||
|
||||
|
||||
class IdaFeatureExtractor(FeatureExtractor):
|
||||
class IdaFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
super().__init__(
|
||||
hashes=SampleHashes(
|
||||
md5=ida_nalt.retrieve_input_file_md5(), sha1="(unknown)", sha256=ida_nalt.retrieve_input_file_sha256()
|
||||
)
|
||||
)
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
self.global_features.extend(capa.features.extractors.ida.file.extract_file_format())
|
||||
self.global_features.extend(capa.features.extractors.ida.global_.extract_os())
|
||||
|
||||
@@ -110,7 +110,7 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
|
||||
if info[1] and info[2]:
|
||||
# e.g. in mimikatz: ('cabinet', 'FCIAddFile', 11L)
|
||||
# extract by name here and by ordinal below
|
||||
for name in capa.features.extractors.helpers.generate_symbols(info[0], info[1]):
|
||||
for name in capa.features.extractors.helpers.generate_symbols(info[0], info[1], include_dll=True):
|
||||
yield Import(name), addr
|
||||
dll = info[0]
|
||||
symbol = f"#{info[2]}"
|
||||
@@ -123,7 +123,7 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
|
||||
else:
|
||||
continue
|
||||
|
||||
for name in capa.features.extractors.helpers.generate_symbols(dll, symbol):
|
||||
for name in capa.features.extractors.helpers.generate_symbols(dll, symbol, include_dll=True):
|
||||
yield Import(name), addr
|
||||
|
||||
for ea, info in capa.features.extractors.ida.helpers.get_file_externs().items():
|
||||
|
||||
@@ -5,12 +5,24 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import Dict, List, Tuple
|
||||
from typing import Dict, List, Tuple, Union
|
||||
from dataclasses import dataclass
|
||||
|
||||
from typing_extensions import TypeAlias
|
||||
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import NO_ADDRESS, Address
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
|
||||
from capa.features.address import NO_ADDRESS, Address, ThreadAddress, ProcessAddress, DynamicCallAddress
|
||||
from capa.features.extractors.base_extractor import (
|
||||
BBHandle,
|
||||
CallHandle,
|
||||
InsnHandle,
|
||||
SampleHashes,
|
||||
ThreadHandle,
|
||||
ProcessHandle,
|
||||
FunctionHandle,
|
||||
StaticFeatureExtractor,
|
||||
DynamicFeatureExtractor,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -31,7 +43,7 @@ class FunctionFeatures:
|
||||
|
||||
|
||||
@dataclass
|
||||
class NullFeatureExtractor(FeatureExtractor):
|
||||
class NullStaticFeatureExtractor(StaticFeatureExtractor):
|
||||
"""
|
||||
An extractor that extracts some user-provided features.
|
||||
|
||||
@@ -39,6 +51,7 @@ class NullFeatureExtractor(FeatureExtractor):
|
||||
"""
|
||||
|
||||
base_address: Address
|
||||
sample_hashes: SampleHashes
|
||||
global_features: List[Feature]
|
||||
file_features: List[Tuple[Address, Feature]]
|
||||
functions: Dict[Address, FunctionFeatures]
|
||||
@@ -46,6 +59,9 @@ class NullFeatureExtractor(FeatureExtractor):
|
||||
def get_base_address(self):
|
||||
return self.base_address
|
||||
|
||||
def get_sample_hashes(self) -> SampleHashes:
|
||||
return self.sample_hashes
|
||||
|
||||
def extract_global_features(self):
|
||||
for feature in self.global_features:
|
||||
yield feature, NO_ADDRESS
|
||||
@@ -77,3 +93,78 @@ class NullFeatureExtractor(FeatureExtractor):
|
||||
def extract_insn_features(self, f, bb, insn):
|
||||
for address, feature in self.functions[f.address].basic_blocks[bb.address].instructions[insn.address].features:
|
||||
yield feature, address
|
||||
|
||||
|
||||
@dataclass
|
||||
class CallFeatures:
|
||||
name: str
|
||||
features: List[Tuple[Address, Feature]]
|
||||
|
||||
|
||||
@dataclass
|
||||
class ThreadFeatures:
|
||||
features: List[Tuple[Address, Feature]]
|
||||
calls: Dict[Address, CallFeatures]
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProcessFeatures:
|
||||
features: List[Tuple[Address, Feature]]
|
||||
threads: Dict[Address, ThreadFeatures]
|
||||
name: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class NullDynamicFeatureExtractor(DynamicFeatureExtractor):
|
||||
base_address: Address
|
||||
sample_hashes: SampleHashes
|
||||
global_features: List[Feature]
|
||||
file_features: List[Tuple[Address, Feature]]
|
||||
processes: Dict[Address, ProcessFeatures]
|
||||
|
||||
def extract_global_features(self):
|
||||
for feature in self.global_features:
|
||||
yield feature, NO_ADDRESS
|
||||
|
||||
def get_sample_hashes(self) -> SampleHashes:
|
||||
return self.sample_hashes
|
||||
|
||||
def extract_file_features(self):
|
||||
for address, feature in self.file_features:
|
||||
yield feature, address
|
||||
|
||||
def get_processes(self):
|
||||
for address in sorted(self.processes.keys()):
|
||||
assert isinstance(address, ProcessAddress)
|
||||
yield ProcessHandle(address=address, inner={})
|
||||
|
||||
def extract_process_features(self, ph):
|
||||
for addr, feature in self.processes[ph.address].features:
|
||||
yield feature, addr
|
||||
|
||||
def get_process_name(self, ph) -> str:
|
||||
return self.processes[ph.address].name
|
||||
|
||||
def get_threads(self, ph):
|
||||
for address in sorted(self.processes[ph.address].threads.keys()):
|
||||
assert isinstance(address, ThreadAddress)
|
||||
yield ThreadHandle(address=address, inner={})
|
||||
|
||||
def extract_thread_features(self, ph, th):
|
||||
for addr, feature in self.processes[ph.address].threads[th.address].features:
|
||||
yield feature, addr
|
||||
|
||||
def get_calls(self, ph, th):
|
||||
for address in sorted(self.processes[ph.address].threads[th.address].calls.keys()):
|
||||
assert isinstance(address, DynamicCallAddress)
|
||||
yield CallHandle(address=address, inner={})
|
||||
|
||||
def extract_call_features(self, ph, th, ch):
|
||||
for address, feature in self.processes[ph.address].threads[th.address].calls[ch.address].features:
|
||||
yield feature, address
|
||||
|
||||
def get_call_name(self, ph, th, ch) -> str:
|
||||
return self.processes[ph.address].threads[th.address].calls[ch.address].name
|
||||
|
||||
|
||||
NullFeatureExtractor: TypeAlias = Union[NullStaticFeatureExtractor, NullDynamicFeatureExtractor]
|
||||
|
||||
@@ -19,7 +19,7 @@ import capa.features.extractors.strings
|
||||
from capa.features.file import Export, Import, Section
|
||||
from capa.features.common import OS, ARCH_I386, FORMAT_PE, ARCH_AMD64, OS_WINDOWS, Arch, Format, Characteristic
|
||||
from capa.features.address import NO_ADDRESS, FileOffsetAddress, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
from capa.features.extractors.base_extractor import SampleHashes, StaticFeatureExtractor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -84,7 +84,7 @@ def extract_file_import_names(pe, **kwargs):
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
|
||||
for name in capa.features.extractors.helpers.generate_symbols(modname, impname):
|
||||
for name in capa.features.extractors.helpers.generate_symbols(modname, impname, include_dll=True):
|
||||
yield Import(name), AbsoluteVirtualAddress(imp.address)
|
||||
|
||||
|
||||
@@ -185,9 +185,9 @@ GLOBAL_HANDLERS = (
|
||||
)
|
||||
|
||||
|
||||
class PefileFeatureExtractor(FeatureExtractor):
|
||||
class PefileFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self, path: Path):
|
||||
super().__init__()
|
||||
super().__init__(hashes=SampleHashes.from_bytes(path.read_bytes()))
|
||||
self.path: Path = path
|
||||
self.pe = pefile.PE(str(path))
|
||||
|
||||
|
||||
@@ -140,7 +140,7 @@ def is_printable_ascii(chars: bytes) -> bool:
|
||||
|
||||
|
||||
def is_printable_utf16le(chars: bytes) -> bool:
|
||||
if all(c == b"\x00" for c in chars[1::2]):
|
||||
if all(c == 0x0 for c in chars[1::2]):
|
||||
return is_printable_ascii(chars[::2])
|
||||
return False
|
||||
|
||||
|
||||
@@ -20,17 +20,23 @@ import capa.features.extractors.viv.function
|
||||
import capa.features.extractors.viv.basicblock
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
|
||||
from capa.features.extractors.base_extractor import (
|
||||
BBHandle,
|
||||
InsnHandle,
|
||||
SampleHashes,
|
||||
FunctionHandle,
|
||||
StaticFeatureExtractor,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class VivisectFeatureExtractor(FeatureExtractor):
|
||||
class VivisectFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self, vw, path: Path, os):
|
||||
super().__init__()
|
||||
self.vw = vw
|
||||
self.path = path
|
||||
self.buf = path.read_bytes()
|
||||
super().__init__(hashes=SampleHashes.from_bytes(self.buf))
|
||||
|
||||
# pre-compute these because we'll yield them at *every* scope.
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
|
||||
@@ -73,7 +73,7 @@ def extract_file_import_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address]]
|
||||
impname = "#" + impname[len("ord") :]
|
||||
|
||||
addr = AbsoluteVirtualAddress(va)
|
||||
for name in capa.features.extractors.helpers.generate_symbols(modname, impname):
|
||||
for name in capa.features.extractors.helpers.generate_symbols(modname, impname, include_dll=True):
|
||||
yield Import(name), addr
|
||||
|
||||
|
||||
|
||||
@@ -9,13 +9,18 @@ Unless required by applicable law or agreed to in writing, software distributed
|
||||
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and limitations under the License.
|
||||
"""
|
||||
import json
|
||||
import zlib
|
||||
import logging
|
||||
from enum import Enum
|
||||
from typing import List, Tuple, Union
|
||||
from typing import List, Tuple, Union, Literal
|
||||
|
||||
from pydantic import Field, BaseModel, ConfigDict
|
||||
|
||||
# TODO(williballenthin): use typing.TypeAlias directly in Python 3.10+
|
||||
# https://github.com/mandiant/capa/issues/1699
|
||||
from typing_extensions import TypeAlias
|
||||
|
||||
import capa.helpers
|
||||
import capa.version
|
||||
import capa.features.file
|
||||
@@ -23,12 +28,20 @@ import capa.features.insn
|
||||
import capa.features.common
|
||||
import capa.features.address
|
||||
import capa.features.basicblock
|
||||
import capa.features.extractors.base_extractor
|
||||
import capa.features.extractors.null as null
|
||||
from capa.helpers import assert_never
|
||||
from capa.features.freeze.features import Feature, feature_from_capa
|
||||
from capa.features.extractors.base_extractor import (
|
||||
SampleHashes,
|
||||
FeatureExtractor,
|
||||
StaticFeatureExtractor,
|
||||
DynamicFeatureExtractor,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
CURRENT_VERSION = 3
|
||||
|
||||
|
||||
class HashableModel(BaseModel):
|
||||
model_config = ConfigDict(frozen=True)
|
||||
@@ -40,12 +53,17 @@ class AddressType(str, Enum):
|
||||
FILE = "file"
|
||||
DN_TOKEN = "dn token"
|
||||
DN_TOKEN_OFFSET = "dn token offset"
|
||||
DEX_METHOD_INDEX = "dex method index"
|
||||
DEX_CLASS_INDEX = "dex class index"
|
||||
PROCESS = "process"
|
||||
THREAD = "thread"
|
||||
CALL = "call"
|
||||
NO_ADDRESS = "no address"
|
||||
|
||||
|
||||
class Address(HashableModel):
|
||||
type: AddressType
|
||||
value: Union[int, Tuple[int, int], None] = None # None default value to support deserialization of NO_ADDRESS
|
||||
value: Union[int, Tuple[int, ...], None] = None # None default value to support deserialization of NO_ADDRESS
|
||||
|
||||
@classmethod
|
||||
def from_capa(cls, a: capa.features.address.Address) -> "Address":
|
||||
@@ -64,6 +82,21 @@ class Address(HashableModel):
|
||||
elif isinstance(a, capa.features.address.DNTokenOffsetAddress):
|
||||
return cls(type=AddressType.DN_TOKEN_OFFSET, value=(a.token, a.offset))
|
||||
|
||||
elif isinstance(a, capa.features.address.DexMethodAddress):
|
||||
return cls(type=AddressType.DEX_METHOD_INDEX, value=int(a))
|
||||
|
||||
elif isinstance(a, capa.features.address.DexClassAddress):
|
||||
return cls(type=AddressType.DEX_CLASS_INDEX, value=int(a))
|
||||
|
||||
elif isinstance(a, capa.features.address.ProcessAddress):
|
||||
return cls(type=AddressType.PROCESS, value=(a.ppid, a.pid))
|
||||
|
||||
elif isinstance(a, capa.features.address.ThreadAddress):
|
||||
return cls(type=AddressType.THREAD, value=(a.process.ppid, a.process.pid, a.tid))
|
||||
|
||||
elif isinstance(a, capa.features.address.DynamicCallAddress):
|
||||
return cls(type=AddressType.CALL, value=(a.thread.process.ppid, a.thread.process.pid, a.thread.tid, a.id))
|
||||
|
||||
elif a == capa.features.address.NO_ADDRESS or isinstance(a, capa.features.address._NoAddress):
|
||||
return cls(type=AddressType.NO_ADDRESS, value=None)
|
||||
|
||||
@@ -100,6 +133,41 @@ class Address(HashableModel):
|
||||
assert isinstance(offset, int)
|
||||
return capa.features.address.DNTokenOffsetAddress(token, offset)
|
||||
|
||||
elif self.type is AddressType.DEX_METHOD_INDEX:
|
||||
assert isinstance(self.value, int)
|
||||
return capa.features.address.DexMethodAddress(self.value)
|
||||
|
||||
elif self.type is AddressType.DEX_CLASS_INDEX:
|
||||
assert isinstance(self.value, int)
|
||||
return capa.features.address.DexClassAddress(self.value)
|
||||
|
||||
elif self.type is AddressType.PROCESS:
|
||||
assert isinstance(self.value, tuple)
|
||||
ppid, pid = self.value
|
||||
assert isinstance(ppid, int)
|
||||
assert isinstance(pid, int)
|
||||
return capa.features.address.ProcessAddress(ppid=ppid, pid=pid)
|
||||
|
||||
elif self.type is AddressType.THREAD:
|
||||
assert isinstance(self.value, tuple)
|
||||
ppid, pid, tid = self.value
|
||||
assert isinstance(ppid, int)
|
||||
assert isinstance(pid, int)
|
||||
assert isinstance(tid, int)
|
||||
return capa.features.address.ThreadAddress(
|
||||
process=capa.features.address.ProcessAddress(ppid=ppid, pid=pid), tid=tid
|
||||
)
|
||||
|
||||
elif self.type is AddressType.CALL:
|
||||
assert isinstance(self.value, tuple)
|
||||
ppid, pid, tid, id_ = self.value
|
||||
return capa.features.address.DynamicCallAddress(
|
||||
thread=capa.features.address.ThreadAddress(
|
||||
process=capa.features.address.ProcessAddress(ppid=ppid, pid=pid), tid=tid
|
||||
),
|
||||
id=id_,
|
||||
)
|
||||
|
||||
elif self.type is AddressType.NO_ADDRESS:
|
||||
return capa.features.address.NO_ADDRESS
|
||||
|
||||
@@ -130,6 +198,48 @@ class FileFeature(HashableModel):
|
||||
feature: Feature
|
||||
|
||||
|
||||
class ProcessFeature(HashableModel):
|
||||
"""
|
||||
args:
|
||||
process: the address of the process to which this feature belongs.
|
||||
address: the address at which this feature is found.
|
||||
|
||||
process != address because, e.g., the feature may be found *within* the scope (process).
|
||||
"""
|
||||
|
||||
process: Address
|
||||
address: Address
|
||||
feature: Feature
|
||||
|
||||
|
||||
class ThreadFeature(HashableModel):
|
||||
"""
|
||||
args:
|
||||
thread: the address of the thread to which this feature belongs.
|
||||
address: the address at which this feature is found.
|
||||
|
||||
thread != address because, e.g., the feature may be found *within* the scope (thread).
|
||||
"""
|
||||
|
||||
thread: Address
|
||||
address: Address
|
||||
feature: Feature
|
||||
|
||||
|
||||
class CallFeature(HashableModel):
|
||||
"""
|
||||
args:
|
||||
call: the address of the call to which this feature belongs.
|
||||
address: the address at which this feature is found.
|
||||
|
||||
call != address for consistency with Process and Thread.
|
||||
"""
|
||||
|
||||
call: Address
|
||||
address: Address
|
||||
feature: Feature
|
||||
|
||||
|
||||
class FunctionFeature(HashableModel):
|
||||
"""
|
||||
args:
|
||||
@@ -167,8 +277,7 @@ class InstructionFeature(HashableModel):
|
||||
instruction: the address of the instruction to which this feature belongs.
|
||||
address: the address at which this feature is found.
|
||||
|
||||
instruction != address because, e.g., the feature may be found *within* the scope (basic block),
|
||||
versus right at its starting address.
|
||||
instruction != address because, for consistency with Function and BasicBlock.
|
||||
"""
|
||||
|
||||
instruction: Address
|
||||
@@ -194,13 +303,42 @@ class FunctionFeatures(BaseModel):
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
|
||||
class Features(BaseModel):
|
||||
class CallFeatures(BaseModel):
|
||||
address: Address
|
||||
name: str
|
||||
features: Tuple[CallFeature, ...]
|
||||
|
||||
|
||||
class ThreadFeatures(BaseModel):
|
||||
address: Address
|
||||
features: Tuple[ThreadFeature, ...]
|
||||
calls: Tuple[CallFeatures, ...]
|
||||
|
||||
|
||||
class ProcessFeatures(BaseModel):
|
||||
address: Address
|
||||
name: str
|
||||
features: Tuple[ProcessFeature, ...]
|
||||
threads: Tuple[ThreadFeatures, ...]
|
||||
|
||||
|
||||
class StaticFeatures(BaseModel):
|
||||
global_: Tuple[GlobalFeature, ...] = Field(alias="global")
|
||||
file: Tuple[FileFeature, ...]
|
||||
functions: Tuple[FunctionFeatures, ...]
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
|
||||
class DynamicFeatures(BaseModel):
|
||||
global_: Tuple[GlobalFeature, ...] = Field(alias="global")
|
||||
file: Tuple[FileFeature, ...]
|
||||
processes: Tuple[ProcessFeatures, ...]
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
|
||||
Features: TypeAlias = Union[StaticFeatures, DynamicFeatures]
|
||||
|
||||
|
||||
class Extractor(BaseModel):
|
||||
name: str
|
||||
version: str = capa.version.__version__
|
||||
@@ -208,18 +346,19 @@ class Extractor(BaseModel):
|
||||
|
||||
|
||||
class Freeze(BaseModel):
|
||||
version: int = 2
|
||||
version: int = CURRENT_VERSION
|
||||
base_address: Address = Field(alias="base address")
|
||||
sample_hashes: SampleHashes
|
||||
flavor: Literal["static", "dynamic"]
|
||||
extractor: Extractor
|
||||
features: Features
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
|
||||
def dumps(extractor: capa.features.extractors.base_extractor.FeatureExtractor) -> str:
|
||||
def dumps_static(extractor: StaticFeatureExtractor) -> str:
|
||||
"""
|
||||
serialize the given extractor to a string
|
||||
"""
|
||||
|
||||
global_features: List[GlobalFeature] = []
|
||||
for feature, _ in extractor.extract_global_features():
|
||||
global_features.append(
|
||||
@@ -298,7 +437,7 @@ def dumps(extractor: capa.features.extractors.base_extractor.FeatureExtractor) -
|
||||
# Mypy is unable to recognise `basic_blocks` as a argument due to alias
|
||||
)
|
||||
|
||||
features = Features(
|
||||
features = StaticFeatures(
|
||||
global_=global_features,
|
||||
file=tuple(file_features),
|
||||
functions=tuple(function_features),
|
||||
@@ -306,8 +445,10 @@ def dumps(extractor: capa.features.extractors.base_extractor.FeatureExtractor) -
|
||||
# Mypy is unable to recognise `global_` as a argument due to alias
|
||||
|
||||
freeze = Freeze(
|
||||
version=2,
|
||||
version=CURRENT_VERSION,
|
||||
base_address=Address.from_capa(extractor.get_base_address()),
|
||||
sample_hashes=extractor.get_sample_hashes(),
|
||||
flavor="static",
|
||||
extractor=Extractor(name=extractor.__class__.__name__),
|
||||
features=features,
|
||||
) # type: ignore
|
||||
@@ -316,16 +457,127 @@ def dumps(extractor: capa.features.extractors.base_extractor.FeatureExtractor) -
|
||||
return freeze.model_dump_json()
|
||||
|
||||
|
||||
def loads(s: str) -> capa.features.extractors.base_extractor.FeatureExtractor:
|
||||
"""deserialize a set of features (as a NullFeatureExtractor) from a string."""
|
||||
import capa.features.extractors.null as null
|
||||
def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str:
|
||||
"""
|
||||
serialize the given extractor to a string
|
||||
"""
|
||||
global_features: List[GlobalFeature] = []
|
||||
for feature, _ in extractor.extract_global_features():
|
||||
global_features.append(
|
||||
GlobalFeature(
|
||||
feature=feature_from_capa(feature),
|
||||
)
|
||||
)
|
||||
|
||||
file_features: List[FileFeature] = []
|
||||
for feature, address in extractor.extract_file_features():
|
||||
file_features.append(
|
||||
FileFeature(
|
||||
feature=feature_from_capa(feature),
|
||||
address=Address.from_capa(address),
|
||||
)
|
||||
)
|
||||
|
||||
process_features: List[ProcessFeatures] = []
|
||||
for p in extractor.get_processes():
|
||||
paddr = Address.from_capa(p.address)
|
||||
pname = extractor.get_process_name(p)
|
||||
pfeatures = [
|
||||
ProcessFeature(
|
||||
process=paddr,
|
||||
address=Address.from_capa(addr),
|
||||
feature=feature_from_capa(feature),
|
||||
)
|
||||
for feature, addr in extractor.extract_process_features(p)
|
||||
]
|
||||
|
||||
threads = []
|
||||
for t in extractor.get_threads(p):
|
||||
taddr = Address.from_capa(t.address)
|
||||
tfeatures = [
|
||||
ThreadFeature(
|
||||
basic_block=taddr,
|
||||
address=Address.from_capa(addr),
|
||||
feature=feature_from_capa(feature),
|
||||
) # type: ignore
|
||||
# Mypy is unable to recognise `basic_block` as a argument due to alias
|
||||
for feature, addr in extractor.extract_thread_features(p, t)
|
||||
]
|
||||
|
||||
calls = []
|
||||
for call in extractor.get_calls(p, t):
|
||||
caddr = Address.from_capa(call.address)
|
||||
cname = extractor.get_call_name(p, t, call)
|
||||
cfeatures = [
|
||||
CallFeature(
|
||||
call=caddr,
|
||||
address=Address.from_capa(addr),
|
||||
feature=feature_from_capa(feature),
|
||||
)
|
||||
for feature, addr in extractor.extract_call_features(p, t, call)
|
||||
]
|
||||
|
||||
calls.append(
|
||||
CallFeatures(
|
||||
address=caddr,
|
||||
name=cname,
|
||||
features=tuple(cfeatures),
|
||||
)
|
||||
)
|
||||
|
||||
threads.append(
|
||||
ThreadFeatures(
|
||||
address=taddr,
|
||||
features=tuple(tfeatures),
|
||||
calls=tuple(calls),
|
||||
)
|
||||
)
|
||||
|
||||
process_features.append(
|
||||
ProcessFeatures(
|
||||
address=paddr,
|
||||
name=pname,
|
||||
features=tuple(pfeatures),
|
||||
threads=tuple(threads),
|
||||
)
|
||||
)
|
||||
|
||||
features = DynamicFeatures(
|
||||
global_=global_features,
|
||||
file=tuple(file_features),
|
||||
processes=tuple(process_features),
|
||||
) # type: ignore
|
||||
# Mypy is unable to recognise `global_` as a argument due to alias
|
||||
|
||||
# workaround around mypy issue: https://github.com/python/mypy/issues/1424
|
||||
get_base_addr = getattr(extractor, "get_base_addr", None)
|
||||
base_addr = get_base_addr() if get_base_addr else capa.features.address.NO_ADDRESS
|
||||
|
||||
freeze = Freeze(
|
||||
version=CURRENT_VERSION,
|
||||
base_address=Address.from_capa(base_addr),
|
||||
sample_hashes=extractor.get_sample_hashes(),
|
||||
flavor="dynamic",
|
||||
extractor=Extractor(name=extractor.__class__.__name__),
|
||||
features=features,
|
||||
) # type: ignore
|
||||
# Mypy is unable to recognise `base_address` as a argument due to alias
|
||||
|
||||
return freeze.model_dump_json()
|
||||
|
||||
|
||||
def loads_static(s: str) -> StaticFeatureExtractor:
|
||||
"""deserialize a set of features (as a NullStaticFeatureExtractor) from a string."""
|
||||
freeze = Freeze.model_validate_json(s)
|
||||
if freeze.version != 2:
|
||||
if freeze.version != CURRENT_VERSION:
|
||||
raise ValueError(f"unsupported freeze format version: {freeze.version}")
|
||||
|
||||
return null.NullFeatureExtractor(
|
||||
assert freeze.flavor == "static"
|
||||
assert isinstance(freeze.features, StaticFeatures)
|
||||
|
||||
return null.NullStaticFeatureExtractor(
|
||||
base_address=freeze.base_address.to_capa(),
|
||||
sample_hashes=freeze.sample_hashes,
|
||||
global_features=[f.feature.to_capa() for f in freeze.features.global_],
|
||||
file_features=[(f.address.to_capa(), f.feature.to_capa()) for f in freeze.features.file],
|
||||
functions={
|
||||
@@ -349,10 +601,59 @@ def loads(s: str) -> capa.features.extractors.base_extractor.FeatureExtractor:
|
||||
)
|
||||
|
||||
|
||||
def loads_dynamic(s: str) -> DynamicFeatureExtractor:
|
||||
"""deserialize a set of features (as a NullDynamicFeatureExtractor) from a string."""
|
||||
freeze = Freeze.model_validate_json(s)
|
||||
if freeze.version != CURRENT_VERSION:
|
||||
raise ValueError(f"unsupported freeze format version: {freeze.version}")
|
||||
|
||||
assert freeze.flavor == "dynamic"
|
||||
assert isinstance(freeze.features, DynamicFeatures)
|
||||
|
||||
return null.NullDynamicFeatureExtractor(
|
||||
base_address=freeze.base_address.to_capa(),
|
||||
sample_hashes=freeze.sample_hashes,
|
||||
global_features=[f.feature.to_capa() for f in freeze.features.global_],
|
||||
file_features=[(f.address.to_capa(), f.feature.to_capa()) for f in freeze.features.file],
|
||||
processes={
|
||||
p.address.to_capa(): null.ProcessFeatures(
|
||||
name=p.name,
|
||||
features=[(fe.address.to_capa(), fe.feature.to_capa()) for fe in p.features],
|
||||
threads={
|
||||
t.address.to_capa(): null.ThreadFeatures(
|
||||
features=[(fe.address.to_capa(), fe.feature.to_capa()) for fe in t.features],
|
||||
calls={
|
||||
c.address.to_capa(): null.CallFeatures(
|
||||
name=c.name,
|
||||
features=[(fe.address.to_capa(), fe.feature.to_capa()) for fe in c.features],
|
||||
)
|
||||
for c in t.calls
|
||||
},
|
||||
)
|
||||
for t in p.threads
|
||||
},
|
||||
)
|
||||
for p in freeze.features.processes
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
MAGIC = "capa0000".encode("ascii")
|
||||
|
||||
|
||||
def dump(extractor: capa.features.extractors.base_extractor.FeatureExtractor) -> bytes:
|
||||
def dumps(extractor: FeatureExtractor) -> str:
|
||||
"""serialize the given extractor to a string."""
|
||||
if isinstance(extractor, StaticFeatureExtractor):
|
||||
doc = dumps_static(extractor)
|
||||
elif isinstance(extractor, DynamicFeatureExtractor):
|
||||
doc = dumps_dynamic(extractor)
|
||||
else:
|
||||
raise ValueError("Invalid feature extractor")
|
||||
|
||||
return doc
|
||||
|
||||
|
||||
def dump(extractor: FeatureExtractor) -> bytes:
|
||||
"""serialize the given extractor to a byte array."""
|
||||
return MAGIC + zlib.compress(dumps(extractor).encode("utf-8"))
|
||||
|
||||
@@ -361,11 +662,28 @@ def is_freeze(buf: bytes) -> bool:
|
||||
return buf[: len(MAGIC)] == MAGIC
|
||||
|
||||
|
||||
def load(buf: bytes) -> capa.features.extractors.base_extractor.FeatureExtractor:
|
||||
def loads(s: str):
|
||||
doc = json.loads(s)
|
||||
|
||||
if doc["version"] != CURRENT_VERSION:
|
||||
raise ValueError(f"unsupported freeze format version: {doc['version']}")
|
||||
|
||||
if doc["flavor"] == "static":
|
||||
return loads_static(s)
|
||||
elif doc["flavor"] == "dynamic":
|
||||
return loads_dynamic(s)
|
||||
else:
|
||||
raise ValueError(f"unsupported freeze format flavor: {doc['flavor']}")
|
||||
|
||||
|
||||
def load(buf: bytes):
|
||||
"""deserialize a set of features (as a NullFeatureExtractor) from a byte array."""
|
||||
if not is_freeze(buf):
|
||||
raise ValueError("missing magic header")
|
||||
return loads(zlib.decompress(buf[len(MAGIC) :]).decode("utf-8"))
|
||||
|
||||
s = zlib.decompress(buf[len(MAGIC) :]).decode("utf-8")
|
||||
|
||||
return loads(s)
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
|
||||
@@ -19,6 +19,7 @@ import capa.main
|
||||
import capa.rules
|
||||
import capa.ghidra.helpers
|
||||
import capa.render.default
|
||||
import capa.capabilities.common
|
||||
import capa.features.extractors.ghidra.extractor
|
||||
|
||||
logger = logging.getLogger("capa_ghidra")
|
||||
@@ -73,13 +74,13 @@ def run_headless():
|
||||
meta = capa.ghidra.helpers.collect_metadata([rules_path])
|
||||
extractor = capa.features.extractors.ghidra.extractor.GhidraFeatureExtractor()
|
||||
|
||||
capabilities, counts = capa.main.find_capabilities(rules, extractor, False)
|
||||
capabilities, counts = capa.capabilities.common.find_capabilities(rules, extractor, False)
|
||||
|
||||
meta.analysis.feature_counts = counts["feature_counts"]
|
||||
meta.analysis.library_functions = counts["library_functions"]
|
||||
meta.analysis.layout = capa.main.compute_layout(rules, extractor, capabilities)
|
||||
|
||||
if capa.main.has_file_limitation(rules, capabilities, is_standalone=True):
|
||||
if capa.capabilities.common.has_file_limitation(rules, capabilities, is_standalone=True):
|
||||
logger.info("capa encountered warnings during analysis")
|
||||
|
||||
if args.json:
|
||||
@@ -123,13 +124,13 @@ def run_ui():
|
||||
meta = capa.ghidra.helpers.collect_metadata([rules_path])
|
||||
extractor = capa.features.extractors.ghidra.extractor.GhidraFeatureExtractor()
|
||||
|
||||
capabilities, counts = capa.main.find_capabilities(rules, extractor, True)
|
||||
capabilities, counts = capa.capabilities.common.find_capabilities(rules, extractor, True)
|
||||
|
||||
meta.analysis.feature_counts = counts["feature_counts"]
|
||||
meta.analysis.library_functions = counts["library_functions"]
|
||||
meta.analysis.layout = capa.main.compute_layout(rules, extractor, capabilities)
|
||||
|
||||
if capa.main.has_file_limitation(rules, capabilities, is_standalone=False):
|
||||
if capa.capabilities.common.has_file_limitation(rules, capabilities, is_standalone=False):
|
||||
logger.info("capa encountered warnings during analysis")
|
||||
|
||||
if verbose == "vverbose":
|
||||
|
||||
@@ -143,17 +143,18 @@ def collect_metadata(rules: List[Path]):
|
||||
sha256=sha256,
|
||||
path=currentProgram().getExecutablePath(), # type: ignore [name-defined] # noqa: F821
|
||||
),
|
||||
analysis=rdoc.Analysis(
|
||||
flavor=rdoc.Flavor.STATIC,
|
||||
analysis=rdoc.StaticAnalysis(
|
||||
format=currentProgram().getExecutableFormat(), # type: ignore [name-defined] # noqa: F821
|
||||
arch=arch,
|
||||
os=os,
|
||||
extractor="ghidra",
|
||||
rules=tuple(r.resolve().absolute().as_posix() for r in rules),
|
||||
base_address=capa.features.freeze.Address.from_capa(currentProgram().getImageBase().getOffset()), # type: ignore [name-defined] # noqa: F821
|
||||
layout=rdoc.Layout(
|
||||
layout=rdoc.StaticLayout(
|
||||
functions=(),
|
||||
),
|
||||
feature_counts=rdoc.FeatureCounts(file=0, functions=()),
|
||||
feature_counts=rdoc.StaticFeatureCounts(file=0, functions=()),
|
||||
library_functions=(),
|
||||
),
|
||||
)
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import json
|
||||
import inspect
|
||||
import logging
|
||||
import contextlib
|
||||
@@ -15,10 +16,11 @@ from pathlib import Path
|
||||
import tqdm
|
||||
|
||||
from capa.exceptions import UnsupportedFormatError
|
||||
from capa.features.common import FORMAT_PE, FORMAT_SC32, FORMAT_SC64, FORMAT_DOTNET, FORMAT_UNKNOWN, Format
|
||||
from capa.features.common import FORMAT_PE, FORMAT_CAPE, FORMAT_SC32, FORMAT_SC64, FORMAT_DOTNET, FORMAT_UNKNOWN, Format
|
||||
|
||||
EXTENSIONS_SHELLCODE_32 = ("sc32", "raw32")
|
||||
EXTENSIONS_SHELLCODE_64 = ("sc64", "raw64")
|
||||
EXTENSIONS_DYNAMIC = ("json", "json_")
|
||||
EXTENSIONS_ELF = "elf_"
|
||||
|
||||
logger = logging.getLogger("capa")
|
||||
@@ -57,14 +59,31 @@ def assert_never(value) -> NoReturn:
|
||||
assert False, f"Unhandled value: {value} ({type(value).__name__})" # noqa: B011
|
||||
|
||||
|
||||
def get_format_from_extension(sample: Path) -> str:
|
||||
if sample.name.endswith(EXTENSIONS_SHELLCODE_32):
|
||||
return FORMAT_SC32
|
||||
elif sample.name.endswith(EXTENSIONS_SHELLCODE_64):
|
||||
return FORMAT_SC64
|
||||
def get_format_from_report(sample: Path) -> str:
|
||||
report = json.load(sample.open(encoding="utf-8"))
|
||||
|
||||
if "CAPE" in report:
|
||||
return FORMAT_CAPE
|
||||
|
||||
if "target" in report and "info" in report and "behavior" in report:
|
||||
# CAPE report that's missing the "CAPE" key,
|
||||
# which is not going to be much use, but its correct.
|
||||
return FORMAT_CAPE
|
||||
|
||||
return FORMAT_UNKNOWN
|
||||
|
||||
|
||||
def get_format_from_extension(sample: Path) -> str:
|
||||
format_ = FORMAT_UNKNOWN
|
||||
if sample.name.endswith(EXTENSIONS_SHELLCODE_32):
|
||||
format_ = FORMAT_SC32
|
||||
elif sample.name.endswith(EXTENSIONS_SHELLCODE_64):
|
||||
format_ = FORMAT_SC64
|
||||
elif sample.name.endswith(EXTENSIONS_DYNAMIC):
|
||||
format_ = get_format_from_report(sample)
|
||||
return format_
|
||||
|
||||
|
||||
def get_auto_format(path: Path) -> str:
|
||||
format_ = get_format(path)
|
||||
if format_ == FORMAT_UNKNOWN:
|
||||
@@ -77,13 +96,13 @@ def get_auto_format(path: Path) -> str:
|
||||
def get_format(sample: Path) -> str:
|
||||
# imported locally to avoid import cycle
|
||||
from capa.features.extractors.common import extract_format
|
||||
from capa.features.extractors.dnfile_ import DnfileFeatureExtractor
|
||||
from capa.features.extractors.dotnetfile import DotnetFileFeatureExtractor
|
||||
|
||||
buf = sample.read_bytes()
|
||||
|
||||
for feature, _ in extract_format(buf):
|
||||
if feature == Format(FORMAT_PE):
|
||||
dnfile_extractor = DnfileFeatureExtractor(sample)
|
||||
dnfile_extractor = DotnetFileFeatureExtractor(sample)
|
||||
if dnfile_extractor.is_dotnet_file():
|
||||
feature = Format(FORMAT_DOTNET)
|
||||
|
||||
@@ -128,15 +147,32 @@ def redirecting_print_to_tqdm(disable_progress):
|
||||
|
||||
def log_unsupported_format_error():
|
||||
logger.error("-" * 80)
|
||||
logger.error(" Input file does not appear to be a PE or ELF file.")
|
||||
logger.error(" Input file does not appear to be a supported file.")
|
||||
logger.error(" ")
|
||||
logger.error(
|
||||
" capa currently only supports analyzing PE and ELF files (or shellcode, when using --format sc32|sc64)."
|
||||
)
|
||||
logger.error(" See all supported file formats via capa's help output (-h).")
|
||||
logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.")
|
||||
logger.error("-" * 80)
|
||||
|
||||
|
||||
def log_unsupported_cape_report_error(error: str):
|
||||
logger.error("-" * 80)
|
||||
logger.error("Input file is not a valid CAPE report: %s", error)
|
||||
logger.error(" ")
|
||||
logger.error(" capa currently only supports analyzing standard CAPE reports in JSON format.")
|
||||
logger.error(
|
||||
" Please make sure your report file is in the standard format and contains both the static and dynamic sections."
|
||||
)
|
||||
logger.error("-" * 80)
|
||||
|
||||
|
||||
def log_empty_cape_report_error(error: str):
|
||||
logger.error("-" * 80)
|
||||
logger.error(" CAPE report is empty or only contains little useful data: %s", error)
|
||||
logger.error(" ")
|
||||
logger.error(" Please make sure the sandbox run captures useful behaviour of your sample.")
|
||||
logger.error("-" * 80)
|
||||
|
||||
|
||||
def log_unsupported_os_error():
|
||||
logger.error("-" * 80)
|
||||
logger.error(" Input file does not appear to target a supported OS.")
|
||||
|
||||
@@ -152,14 +152,15 @@ def collect_metadata(rules: List[Path]):
|
||||
sha256=sha256,
|
||||
path=idaapi.get_input_file_path(),
|
||||
),
|
||||
analysis=rdoc.Analysis(
|
||||
flavor=rdoc.Flavor.STATIC,
|
||||
analysis=rdoc.StaticAnalysis(
|
||||
format=idaapi.get_file_type_name(),
|
||||
arch=arch,
|
||||
os=os,
|
||||
extractor="ida",
|
||||
rules=tuple(r.resolve().absolute().as_posix() for r in rules),
|
||||
base_address=capa.features.freeze.Address.from_capa(idaapi.get_imagebase()),
|
||||
layout=rdoc.Layout(
|
||||
layout=rdoc.StaticLayout(
|
||||
functions=(),
|
||||
# this is updated after capabilities have been collected.
|
||||
# will look like:
|
||||
@@ -167,7 +168,7 @@ def collect_metadata(rules: List[Path]):
|
||||
# "functions": { 0x401000: { "matched_basic_blocks": [ 0x401000, 0x401005, ... ] }, ... }
|
||||
),
|
||||
# ignore these for now - not used by IDA plugin.
|
||||
feature_counts=rdoc.FeatureCounts(file=0, functions=()),
|
||||
feature_counts=rdoc.StaticFeatureCounts(file=0, functions=()),
|
||||
library_functions=(),
|
||||
),
|
||||
)
|
||||
|
||||
@@ -25,6 +25,7 @@ import capa.version
|
||||
import capa.ida.helpers
|
||||
import capa.render.json
|
||||
import capa.features.common
|
||||
import capa.capabilities.common
|
||||
import capa.render.result_document
|
||||
import capa.features.extractors.ida.extractor
|
||||
from capa.rules import Rule
|
||||
@@ -768,7 +769,7 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
|
||||
try:
|
||||
meta = capa.ida.helpers.collect_metadata([Path(settings.user[CAPA_SETTINGS_RULE_PATH])])
|
||||
capabilities, counts = capa.main.find_capabilities(
|
||||
capabilities, counts = capa.capabilities.common.find_capabilities(
|
||||
ruleset, self.feature_extractor, disable_progress=True
|
||||
)
|
||||
|
||||
@@ -810,7 +811,7 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
|
||||
capa.ida.helpers.inform_user_ida_ui("capa encountered file type warnings during analysis")
|
||||
|
||||
if capa.main.has_file_limitation(ruleset, capabilities, is_standalone=False):
|
||||
if capa.capabilities.common.has_file_limitation(ruleset, capabilities, is_standalone=False):
|
||||
capa.ida.helpers.inform_user_ida_ui("capa encountered file limitation warnings during analysis")
|
||||
except Exception as e:
|
||||
logger.exception("Failed to check for file limitations (error: %s)", e)
|
||||
@@ -1192,10 +1193,13 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
return
|
||||
|
||||
is_match: bool = False
|
||||
if self.rulegen_current_function is not None and rule.scope in (
|
||||
capa.rules.Scope.FUNCTION,
|
||||
capa.rules.Scope.BASIC_BLOCK,
|
||||
capa.rules.Scope.INSTRUCTION,
|
||||
if self.rulegen_current_function is not None and any(
|
||||
s in rule.scopes
|
||||
for s in (
|
||||
capa.rules.Scope.FUNCTION,
|
||||
capa.rules.Scope.BASIC_BLOCK,
|
||||
capa.rules.Scope.INSTRUCTION,
|
||||
)
|
||||
):
|
||||
try:
|
||||
_, func_matches, bb_matches, insn_matches = self.rulegen_feature_cache.find_code_capabilities(
|
||||
@@ -1205,13 +1209,13 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
self.set_rulegen_status(f"Failed to create function rule matches from rule set ({e})")
|
||||
return
|
||||
|
||||
if rule.scope == capa.rules.Scope.FUNCTION and rule.name in func_matches:
|
||||
if capa.rules.Scope.FUNCTION in rule.scopes and rule.name in func_matches:
|
||||
is_match = True
|
||||
elif rule.scope == capa.rules.Scope.BASIC_BLOCK and rule.name in bb_matches:
|
||||
elif capa.rules.Scope.BASIC_BLOCK in rule.scopes and rule.name in bb_matches:
|
||||
is_match = True
|
||||
elif rule.scope == capa.rules.Scope.INSTRUCTION and rule.name in insn_matches:
|
||||
elif capa.rules.Scope.INSTRUCTION in rule.scopes and rule.name in insn_matches:
|
||||
is_match = True
|
||||
elif rule.scope == capa.rules.Scope.FILE:
|
||||
elif capa.rules.Scope.FILE in rule.scopes:
|
||||
try:
|
||||
_, file_matches = self.rulegen_feature_cache.find_file_capabilities(ruleset)
|
||||
except Exception as e:
|
||||
|
||||
@@ -500,16 +500,16 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
location = location_.to_capa()
|
||||
|
||||
parent2: CapaExplorerDataItem
|
||||
if rule.meta.scope == capa.rules.FILE_SCOPE:
|
||||
if capa.rules.Scope.FILE in rule.meta.scopes:
|
||||
parent2 = parent
|
||||
elif rule.meta.scope == capa.rules.FUNCTION_SCOPE:
|
||||
elif capa.rules.Scope.FUNCTION in rule.meta.scopes:
|
||||
parent2 = CapaExplorerFunctionItem(parent, location)
|
||||
elif rule.meta.scope == capa.rules.BASIC_BLOCK_SCOPE:
|
||||
elif capa.rules.Scope.BASIC_BLOCK in rule.meta.scopes:
|
||||
parent2 = CapaExplorerBlockItem(parent, location)
|
||||
elif rule.meta.scope == capa.rules.INSTRUCTION_SCOPE:
|
||||
elif capa.rules.Scope.INSTRUCTION in rule.meta.scopes:
|
||||
parent2 = CapaExplorerInstructionItem(parent, location)
|
||||
else:
|
||||
raise RuntimeError("unexpected rule scope: " + str(rule.meta.scope))
|
||||
raise RuntimeError("unexpected rule scope: " + str(rule.meta.scopes.static))
|
||||
|
||||
self.render_capa_doc_match(parent2, match, doc)
|
||||
|
||||
|
||||
613
capa/main.py
613
capa/main.py
@@ -11,23 +11,21 @@ See the License for the specific language governing permissions and limitations
|
||||
import io
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
import hashlib
|
||||
import logging
|
||||
import argparse
|
||||
import datetime
|
||||
import textwrap
|
||||
import itertools
|
||||
import contextlib
|
||||
import collections
|
||||
from typing import Any, Dict, List, Tuple, Callable, Optional
|
||||
from types import TracebackType
|
||||
from typing import Any, Set, Dict, List, Callable, Optional
|
||||
from pathlib import Path
|
||||
|
||||
import halo
|
||||
import tqdm
|
||||
import colorama
|
||||
import tqdm.contrib.logging
|
||||
from pefile import PEFormatError
|
||||
from typing_extensions import assert_never
|
||||
from elftools.common.exceptions import ELFError
|
||||
|
||||
import capa.perf
|
||||
@@ -47,38 +45,53 @@ import capa.render.result_document
|
||||
import capa.render.result_document as rdoc
|
||||
import capa.features.extractors.common
|
||||
import capa.features.extractors.pefile
|
||||
import capa.features.extractors.dnfile_
|
||||
import capa.features.extractors.dexfile
|
||||
import capa.features.extractors.elffile
|
||||
import capa.features.extractors.dotnetfile
|
||||
import capa.features.extractors.base_extractor
|
||||
from capa.rules import Rule, Scope, RuleSet
|
||||
from capa.engine import FeatureSet, MatchResults
|
||||
import capa.features.extractors.cape.extractor
|
||||
from capa.rules import Rule, RuleSet
|
||||
from capa.engine import MatchResults
|
||||
from capa.helpers import (
|
||||
get_format,
|
||||
get_file_taste,
|
||||
get_auto_format,
|
||||
log_unsupported_os_error,
|
||||
redirecting_print_to_tqdm,
|
||||
log_unsupported_arch_error,
|
||||
log_empty_cape_report_error,
|
||||
log_unsupported_format_error,
|
||||
log_unsupported_cape_report_error,
|
||||
)
|
||||
from capa.exceptions import (
|
||||
EmptyReportError,
|
||||
UnsupportedOSError,
|
||||
UnsupportedArchError,
|
||||
UnsupportedFormatError,
|
||||
UnsupportedRuntimeError,
|
||||
)
|
||||
from capa.exceptions import UnsupportedOSError, UnsupportedArchError, UnsupportedFormatError, UnsupportedRuntimeError
|
||||
from capa.features.common import (
|
||||
OS_AUTO,
|
||||
OS_LINUX,
|
||||
OS_MACOS,
|
||||
FORMAT_PE,
|
||||
FORMAT_DEX,
|
||||
FORMAT_ELF,
|
||||
OS_WINDOWS,
|
||||
FORMAT_AUTO,
|
||||
FORMAT_CAPE,
|
||||
FORMAT_SC32,
|
||||
FORMAT_SC64,
|
||||
FORMAT_DOTNET,
|
||||
FORMAT_FREEZE,
|
||||
FORMAT_RESULT,
|
||||
)
|
||||
from capa.features.address import NO_ADDRESS, Address
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
|
||||
from capa.features.address import Address
|
||||
from capa.capabilities.common import find_capabilities, has_file_limitation, find_file_capabilities
|
||||
from capa.features.extractors.base_extractor import (
|
||||
SampleHashes,
|
||||
FeatureExtractor,
|
||||
StaticFeatureExtractor,
|
||||
DynamicFeatureExtractor,
|
||||
)
|
||||
|
||||
RULES_PATH_DEFAULT_STRING = "(embedded rules)"
|
||||
SIGNATURES_PATH_DEFAULT_STRING = "(embedded signatures)"
|
||||
@@ -98,6 +111,9 @@ E_INVALID_FILE_ARCH = 17
|
||||
E_INVALID_FILE_OS = 18
|
||||
E_UNSUPPORTED_IDA_VERSION = 19
|
||||
E_UNSUPPORTED_GHIDRA_VERSION = 20
|
||||
E_MISSING_CAPE_STATIC_ANALYSIS = 21
|
||||
E_MISSING_CAPE_DYNAMIC_ANALYSIS = 22
|
||||
E_EMPTY_REPORT = 23
|
||||
|
||||
logger = logging.getLogger("capa")
|
||||
|
||||
@@ -120,267 +136,6 @@ def set_vivisect_log_level(level):
|
||||
logging.getLogger("Elf").setLevel(level)
|
||||
|
||||
|
||||
def find_instruction_capabilities(
|
||||
ruleset: RuleSet, extractor: FeatureExtractor, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
|
||||
) -> Tuple[FeatureSet, MatchResults]:
|
||||
"""
|
||||
find matches for the given rules for the given instruction.
|
||||
|
||||
returns: tuple containing (features for instruction, match results for instruction)
|
||||
"""
|
||||
# all features found for the instruction.
|
||||
features = collections.defaultdict(set) # type: FeatureSet
|
||||
|
||||
for feature, addr in itertools.chain(
|
||||
extractor.extract_insn_features(f, bb, insn), extractor.extract_global_features()
|
||||
):
|
||||
features[feature].add(addr)
|
||||
|
||||
# matches found at this instruction.
|
||||
_, matches = ruleset.match(Scope.INSTRUCTION, features, insn.address)
|
||||
|
||||
for rule_name, res in matches.items():
|
||||
rule = ruleset[rule_name]
|
||||
for addr, _ in res:
|
||||
capa.engine.index_rule_matches(features, rule, [addr])
|
||||
|
||||
return features, matches
|
||||
|
||||
|
||||
def find_basic_block_capabilities(
|
||||
ruleset: RuleSet, extractor: FeatureExtractor, f: FunctionHandle, bb: BBHandle
|
||||
) -> Tuple[FeatureSet, MatchResults, MatchResults]:
|
||||
"""
|
||||
find matches for the given rules within the given basic block.
|
||||
|
||||
returns: tuple containing (features for basic block, match results for basic block, match results for instructions)
|
||||
"""
|
||||
# all features found within this basic block,
|
||||
# includes features found within instructions.
|
||||
features = collections.defaultdict(set) # type: FeatureSet
|
||||
|
||||
# matches found at the instruction scope.
|
||||
# might be found at different instructions, thats ok.
|
||||
insn_matches = collections.defaultdict(list) # type: MatchResults
|
||||
|
||||
for insn in extractor.get_instructions(f, bb):
|
||||
ifeatures, imatches = find_instruction_capabilities(ruleset, extractor, f, bb, insn)
|
||||
for feature, vas in ifeatures.items():
|
||||
features[feature].update(vas)
|
||||
|
||||
for rule_name, res in imatches.items():
|
||||
insn_matches[rule_name].extend(res)
|
||||
|
||||
for feature, va in itertools.chain(
|
||||
extractor.extract_basic_block_features(f, bb), extractor.extract_global_features()
|
||||
):
|
||||
features[feature].add(va)
|
||||
|
||||
# matches found within this basic block.
|
||||
_, matches = ruleset.match(Scope.BASIC_BLOCK, features, bb.address)
|
||||
|
||||
for rule_name, res in matches.items():
|
||||
rule = ruleset[rule_name]
|
||||
for va, _ in res:
|
||||
capa.engine.index_rule_matches(features, rule, [va])
|
||||
|
||||
return features, matches, insn_matches
|
||||
|
||||
|
||||
def find_code_capabilities(
|
||||
ruleset: RuleSet, extractor: FeatureExtractor, fh: FunctionHandle
|
||||
) -> Tuple[MatchResults, MatchResults, MatchResults, int]:
|
||||
"""
|
||||
find matches for the given rules within the given function.
|
||||
|
||||
returns: tuple containing (match results for function, match results for basic blocks, match results for instructions, number of features)
|
||||
"""
|
||||
# all features found within this function,
|
||||
# includes features found within basic blocks (and instructions).
|
||||
function_features = collections.defaultdict(set) # type: FeatureSet
|
||||
|
||||
# matches found at the basic block scope.
|
||||
# might be found at different basic blocks, thats ok.
|
||||
bb_matches = collections.defaultdict(list) # type: MatchResults
|
||||
|
||||
# matches found at the instruction scope.
|
||||
# might be found at different instructions, thats ok.
|
||||
insn_matches = collections.defaultdict(list) # type: MatchResults
|
||||
|
||||
for bb in extractor.get_basic_blocks(fh):
|
||||
features, bmatches, imatches = find_basic_block_capabilities(ruleset, extractor, fh, bb)
|
||||
for feature, vas in features.items():
|
||||
function_features[feature].update(vas)
|
||||
|
||||
for rule_name, res in bmatches.items():
|
||||
bb_matches[rule_name].extend(res)
|
||||
|
||||
for rule_name, res in imatches.items():
|
||||
insn_matches[rule_name].extend(res)
|
||||
|
||||
for feature, va in itertools.chain(extractor.extract_function_features(fh), extractor.extract_global_features()):
|
||||
function_features[feature].add(va)
|
||||
|
||||
_, function_matches = ruleset.match(Scope.FUNCTION, function_features, fh.address)
|
||||
return function_matches, bb_matches, insn_matches, len(function_features)
|
||||
|
||||
|
||||
def find_file_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, function_features: FeatureSet):
|
||||
file_features = collections.defaultdict(set) # type: FeatureSet
|
||||
|
||||
for feature, va in itertools.chain(extractor.extract_file_features(), extractor.extract_global_features()):
|
||||
# not all file features may have virtual addresses.
|
||||
# if not, then at least ensure the feature shows up in the index.
|
||||
# the set of addresses will still be empty.
|
||||
if va:
|
||||
file_features[feature].add(va)
|
||||
else:
|
||||
if feature not in file_features:
|
||||
file_features[feature] = set()
|
||||
|
||||
logger.debug("analyzed file and extracted %d features", len(file_features))
|
||||
|
||||
file_features.update(function_features)
|
||||
|
||||
_, matches = ruleset.match(Scope.FILE, file_features, NO_ADDRESS)
|
||||
return matches, len(file_features)
|
||||
|
||||
|
||||
def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_progress=None) -> Tuple[MatchResults, Any]:
|
||||
all_function_matches = collections.defaultdict(list) # type: MatchResults
|
||||
all_bb_matches = collections.defaultdict(list) # type: MatchResults
|
||||
all_insn_matches = collections.defaultdict(list) # type: MatchResults
|
||||
|
||||
feature_counts = rdoc.FeatureCounts(file=0, functions=())
|
||||
library_functions: Tuple[rdoc.LibraryFunction, ...] = ()
|
||||
|
||||
with redirecting_print_to_tqdm(disable_progress):
|
||||
with tqdm.contrib.logging.logging_redirect_tqdm():
|
||||
pbar = tqdm.tqdm
|
||||
if capa.helpers.is_runtime_ghidra():
|
||||
# Ghidrathon interpreter cannot properly handle
|
||||
# the TMonitor thread that is created via a monitor_interval
|
||||
# > 0
|
||||
pbar.monitor_interval = 0
|
||||
if disable_progress:
|
||||
# do not use tqdm to avoid unnecessary side effects when caller intends
|
||||
# to disable progress completely
|
||||
def pbar(s, *args, **kwargs):
|
||||
return s
|
||||
|
||||
functions = list(extractor.get_functions())
|
||||
n_funcs = len(functions)
|
||||
|
||||
pb = pbar(functions, desc="matching", unit=" functions", postfix="skipped 0 library functions", leave=False)
|
||||
for f in pb:
|
||||
t0 = time.time()
|
||||
if extractor.is_library_function(f.address):
|
||||
function_name = extractor.get_function_name(f.address)
|
||||
logger.debug("skipping library function 0x%x (%s)", f.address, function_name)
|
||||
library_functions += (
|
||||
rdoc.LibraryFunction(address=frz.Address.from_capa(f.address), name=function_name),
|
||||
)
|
||||
n_libs = len(library_functions)
|
||||
percentage = round(100 * (n_libs / n_funcs))
|
||||
if isinstance(pb, tqdm.tqdm):
|
||||
pb.set_postfix_str(f"skipped {n_libs} library functions ({percentage}%)")
|
||||
continue
|
||||
|
||||
function_matches, bb_matches, insn_matches, feature_count = find_code_capabilities(
|
||||
ruleset, extractor, f
|
||||
)
|
||||
feature_counts.functions += (
|
||||
rdoc.FunctionFeatureCount(address=frz.Address.from_capa(f.address), count=feature_count),
|
||||
)
|
||||
t1 = time.time()
|
||||
|
||||
match_count = sum(len(res) for res in function_matches.values())
|
||||
match_count += sum(len(res) for res in bb_matches.values())
|
||||
match_count += sum(len(res) for res in insn_matches.values())
|
||||
logger.debug(
|
||||
"analyzed function 0x%x and extracted %d features, %d matches in %0.02fs",
|
||||
f.address,
|
||||
feature_count,
|
||||
match_count,
|
||||
t1 - t0,
|
||||
)
|
||||
|
||||
for rule_name, res in function_matches.items():
|
||||
all_function_matches[rule_name].extend(res)
|
||||
for rule_name, res in bb_matches.items():
|
||||
all_bb_matches[rule_name].extend(res)
|
||||
for rule_name, res in insn_matches.items():
|
||||
all_insn_matches[rule_name].extend(res)
|
||||
|
||||
# collection of features that captures the rule matches within function, BB, and instruction scopes.
|
||||
# mapping from feature (matched rule) to set of addresses at which it matched.
|
||||
function_and_lower_features: FeatureSet = collections.defaultdict(set)
|
||||
for rule_name, results in itertools.chain(
|
||||
all_function_matches.items(), all_bb_matches.items(), all_insn_matches.items()
|
||||
):
|
||||
locations = {p[0] for p in results}
|
||||
rule = ruleset[rule_name]
|
||||
capa.engine.index_rule_matches(function_and_lower_features, rule, locations)
|
||||
|
||||
all_file_matches, feature_count = find_file_capabilities(ruleset, extractor, function_and_lower_features)
|
||||
feature_counts.file = feature_count
|
||||
|
||||
matches = dict(
|
||||
itertools.chain(
|
||||
# each rule exists in exactly one scope,
|
||||
# so there won't be any overlap among these following MatchResults,
|
||||
# and we can merge the dictionaries naively.
|
||||
all_insn_matches.items(),
|
||||
all_bb_matches.items(),
|
||||
all_function_matches.items(),
|
||||
all_file_matches.items(),
|
||||
)
|
||||
)
|
||||
|
||||
meta = {
|
||||
"feature_counts": feature_counts,
|
||||
"library_functions": library_functions,
|
||||
}
|
||||
|
||||
return matches, meta
|
||||
|
||||
|
||||
def has_rule_with_namespace(rules: RuleSet, capabilities: MatchResults, namespace: str) -> bool:
|
||||
return any(
|
||||
rules.rules[rule_name].meta.get("namespace", "").startswith(namespace) for rule_name in capabilities.keys()
|
||||
)
|
||||
|
||||
|
||||
def is_internal_rule(rule: Rule) -> bool:
|
||||
return rule.meta.get("namespace", "").startswith("internal/")
|
||||
|
||||
|
||||
def is_file_limitation_rule(rule: Rule) -> bool:
|
||||
return rule.meta.get("namespace", "") == "internal/limitation/file"
|
||||
|
||||
|
||||
def has_file_limitation(rules: RuleSet, capabilities: MatchResults, is_standalone=True) -> bool:
|
||||
file_limitation_rules = list(filter(is_file_limitation_rule, rules.rules.values()))
|
||||
|
||||
for file_limitation_rule in file_limitation_rules:
|
||||
if file_limitation_rule.name not in capabilities:
|
||||
continue
|
||||
|
||||
logger.warning("-" * 80)
|
||||
for line in file_limitation_rule.meta.get("description", "").split("\n"):
|
||||
logger.warning(" %s", line)
|
||||
logger.warning(" Identified via rule: %s", file_limitation_rule.name)
|
||||
if is_standalone:
|
||||
logger.warning(" ")
|
||||
logger.warning(" Use -v or -vv if you really want to see the capabilities identified by capa.")
|
||||
logger.warning("-" * 80)
|
||||
|
||||
# bail on first file limitation
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def is_supported_format(sample: Path) -> bool:
|
||||
"""
|
||||
Return if this is a supported file based on magic header values
|
||||
@@ -532,7 +287,8 @@ def get_extractor(
|
||||
UnsupportedArchError
|
||||
UnsupportedOSError
|
||||
"""
|
||||
if format_ not in (FORMAT_SC32, FORMAT_SC64):
|
||||
|
||||
if format_ not in (FORMAT_SC32, FORMAT_SC64, FORMAT_CAPE):
|
||||
if not is_supported_format(path):
|
||||
raise UnsupportedFormatError()
|
||||
|
||||
@@ -542,11 +298,22 @@ def get_extractor(
|
||||
if os_ == OS_AUTO and not is_supported_os(path):
|
||||
raise UnsupportedOSError()
|
||||
|
||||
if format_ == FORMAT_DOTNET:
|
||||
if format_ == FORMAT_CAPE:
|
||||
import capa.features.extractors.cape.extractor
|
||||
|
||||
report = json.load(Path(path).open(encoding="utf-8"))
|
||||
return capa.features.extractors.cape.extractor.CapeExtractor.from_report(report)
|
||||
|
||||
elif format_ == FORMAT_DOTNET:
|
||||
import capa.features.extractors.dnfile.extractor
|
||||
|
||||
return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path)
|
||||
|
||||
elif format_ == FORMAT_DEX:
|
||||
import capa.features.extractors.dexfile
|
||||
|
||||
return capa.features.extractors.dexfile.DexFeatureExtractor(path, code_analysis=True)
|
||||
|
||||
elif backend == BACKEND_BINJA:
|
||||
from capa.features.extractors.binja.find_binja_api import find_binja_path
|
||||
|
||||
@@ -610,11 +377,18 @@ def get_file_extractors(sample: Path, format_: str) -> List[FeatureExtractor]:
|
||||
|
||||
elif format_ == FORMAT_DOTNET:
|
||||
file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(sample))
|
||||
file_extractors.append(capa.features.extractors.dnfile_.DnfileFeatureExtractor(sample))
|
||||
file_extractors.append(capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor(sample))
|
||||
|
||||
elif format_ == capa.features.extractors.common.FORMAT_ELF:
|
||||
elif format_ == capa.features.common.FORMAT_ELF:
|
||||
file_extractors.append(capa.features.extractors.elffile.ElfFeatureExtractor(sample))
|
||||
|
||||
elif format_ == capa.features.common.FORMAT_DEX:
|
||||
file_extractors.append(capa.features.extractors.dexfile.DexFeatureExtractor(sample, code_analysis=False))
|
||||
|
||||
elif format_ == FORMAT_CAPE:
|
||||
report = json.load(Path(sample).open(encoding="utf-8"))
|
||||
file_extractors.append(capa.features.extractors.cape.extractor.CapeExtractor.from_report(report))
|
||||
|
||||
return file_extractors
|
||||
|
||||
|
||||
@@ -696,7 +470,7 @@ def get_rules(
|
||||
if ruleset is not None:
|
||||
return ruleset
|
||||
|
||||
rules = [] # type: List[Rule]
|
||||
rules: List[Rule] = []
|
||||
|
||||
total_rule_count = len(rule_file_paths)
|
||||
for i, (path, content) in enumerate(zip(rule_file_paths, rule_contents)):
|
||||
@@ -711,7 +485,7 @@ def get_rules(
|
||||
rule.meta["capa/nursery"] = is_nursery_rule_path(path)
|
||||
|
||||
rules.append(rule)
|
||||
logger.debug("loaded rule: '%s' with scope: %s", rule.name, rule.scope)
|
||||
logger.debug("loaded rule: '%s' with scope: %s", rule.name, rule.scopes)
|
||||
|
||||
ruleset = capa.rules.RuleSet(rules)
|
||||
|
||||
@@ -746,60 +520,177 @@ def get_signatures(sigs_path: Path) -> List[Path]:
|
||||
return paths
|
||||
|
||||
|
||||
def collect_metadata(
|
||||
argv: List[str],
|
||||
sample_path: Path,
|
||||
format_: str,
|
||||
os_: str,
|
||||
rules_path: List[Path],
|
||||
extractor: capa.features.extractors.base_extractor.FeatureExtractor,
|
||||
) -> rdoc.Metadata:
|
||||
md5 = hashlib.md5()
|
||||
sha1 = hashlib.sha1()
|
||||
sha256 = hashlib.sha256()
|
||||
|
||||
buf = sample_path.read_bytes()
|
||||
|
||||
md5.update(buf)
|
||||
sha1.update(buf)
|
||||
sha256.update(buf)
|
||||
|
||||
rules = tuple(r.resolve().absolute().as_posix() for r in rules_path)
|
||||
format_ = get_format(sample_path) if format_ == FORMAT_AUTO else format_
|
||||
arch = get_arch(sample_path)
|
||||
os_ = get_os(sample_path) if os_ == OS_AUTO else os_
|
||||
|
||||
return rdoc.Metadata(
|
||||
timestamp=datetime.datetime.now(),
|
||||
version=capa.version.__version__,
|
||||
argv=tuple(argv) if argv else None,
|
||||
sample=rdoc.Sample(
|
||||
md5=md5.hexdigest(),
|
||||
sha1=sha1.hexdigest(),
|
||||
sha256=sha256.hexdigest(),
|
||||
path=sample_path.resolve().absolute().as_posix(),
|
||||
),
|
||||
analysis=rdoc.Analysis(
|
||||
def get_sample_analysis(format_, arch, os_, extractor, rules_path, counts):
|
||||
if isinstance(extractor, StaticFeatureExtractor):
|
||||
return rdoc.StaticAnalysis(
|
||||
format=format_,
|
||||
arch=arch,
|
||||
os=os_,
|
||||
extractor=extractor.__class__.__name__,
|
||||
rules=rules,
|
||||
rules=tuple(rules_path),
|
||||
base_address=frz.Address.from_capa(extractor.get_base_address()),
|
||||
layout=rdoc.Layout(
|
||||
layout=rdoc.StaticLayout(
|
||||
functions=(),
|
||||
# this is updated after capabilities have been collected.
|
||||
# will look like:
|
||||
#
|
||||
# "functions": { 0x401000: { "matched_basic_blocks": [ 0x401000, 0x401005, ... ] }, ... }
|
||||
),
|
||||
feature_counts=rdoc.FeatureCounts(file=0, functions=()),
|
||||
library_functions=(),
|
||||
feature_counts=counts["feature_counts"],
|
||||
library_functions=counts["library_functions"],
|
||||
)
|
||||
elif isinstance(extractor, DynamicFeatureExtractor):
|
||||
return rdoc.DynamicAnalysis(
|
||||
format=format_,
|
||||
arch=arch,
|
||||
os=os_,
|
||||
extractor=extractor.__class__.__name__,
|
||||
rules=tuple(rules_path),
|
||||
layout=rdoc.DynamicLayout(
|
||||
processes=(),
|
||||
),
|
||||
feature_counts=counts["feature_counts"],
|
||||
)
|
||||
else:
|
||||
raise ValueError("invalid extractor type")
|
||||
|
||||
|
||||
def collect_metadata(
|
||||
argv: List[str],
|
||||
sample_path: Path,
|
||||
format_: str,
|
||||
os_: str,
|
||||
rules_path: List[Path],
|
||||
extractor: FeatureExtractor,
|
||||
counts: dict,
|
||||
) -> rdoc.Metadata:
|
||||
# if it's a binary sample we hash it, if it's a report
|
||||
# we fetch the hashes from the report
|
||||
sample_hashes: SampleHashes = extractor.get_sample_hashes()
|
||||
md5, sha1, sha256 = sample_hashes.md5, sample_hashes.sha1, sample_hashes.sha256
|
||||
|
||||
global_feats = list(extractor.extract_global_features())
|
||||
extractor_format = [f.value for (f, _) in global_feats if isinstance(f, capa.features.common.Format)]
|
||||
extractor_arch = [f.value for (f, _) in global_feats if isinstance(f, capa.features.common.Arch)]
|
||||
extractor_os = [f.value for (f, _) in global_feats if isinstance(f, capa.features.common.OS)]
|
||||
|
||||
format_ = str(extractor_format[0]) if extractor_format else "unknown" if format_ == FORMAT_AUTO else format_
|
||||
arch = str(extractor_arch[0]) if extractor_arch else "unknown"
|
||||
os_ = str(extractor_os[0]) if extractor_os else "unknown" if os_ == OS_AUTO else os_
|
||||
|
||||
if isinstance(extractor, StaticFeatureExtractor):
|
||||
meta_class: type = rdoc.StaticMetadata
|
||||
elif isinstance(extractor, DynamicFeatureExtractor):
|
||||
meta_class = rdoc.DynamicMetadata
|
||||
else:
|
||||
assert_never(extractor)
|
||||
|
||||
rules = tuple(r.resolve().absolute().as_posix() for r in rules_path)
|
||||
|
||||
return meta_class(
|
||||
timestamp=datetime.datetime.now(),
|
||||
version=capa.version.__version__,
|
||||
argv=tuple(argv) if argv else None,
|
||||
sample=rdoc.Sample(
|
||||
md5=md5,
|
||||
sha1=sha1,
|
||||
sha256=sha256,
|
||||
path=Path(sample_path).resolve().as_posix(),
|
||||
),
|
||||
analysis=get_sample_analysis(
|
||||
format_,
|
||||
arch,
|
||||
os_,
|
||||
extractor,
|
||||
rules,
|
||||
counts,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def compute_layout(rules, extractor, capabilities) -> rdoc.Layout:
|
||||
def compute_dynamic_layout(rules, extractor: DynamicFeatureExtractor, capabilities: MatchResults) -> rdoc.DynamicLayout:
|
||||
"""
|
||||
compute a metadata structure that links threads
|
||||
to the processes in which they're found.
|
||||
|
||||
only collect the threads at which some rule matched.
|
||||
otherwise, we may pollute the json document with
|
||||
a large amount of un-referenced data.
|
||||
"""
|
||||
assert isinstance(extractor, DynamicFeatureExtractor)
|
||||
|
||||
matched_calls: Set[Address] = set()
|
||||
|
||||
def result_rec(result: capa.features.common.Result):
|
||||
for loc in result.locations:
|
||||
if isinstance(loc, capa.features.address.DynamicCallAddress):
|
||||
matched_calls.add(loc)
|
||||
for child in result.children:
|
||||
result_rec(child)
|
||||
|
||||
for matches in capabilities.values():
|
||||
for _, result in matches:
|
||||
result_rec(result)
|
||||
|
||||
names_by_process: Dict[Address, str] = {}
|
||||
names_by_call: Dict[Address, str] = {}
|
||||
|
||||
matched_processes: Set[Address] = set()
|
||||
matched_threads: Set[Address] = set()
|
||||
|
||||
threads_by_process: Dict[Address, List[Address]] = {}
|
||||
calls_by_thread: Dict[Address, List[Address]] = {}
|
||||
|
||||
for p in extractor.get_processes():
|
||||
threads_by_process[p.address] = []
|
||||
|
||||
for t in extractor.get_threads(p):
|
||||
calls_by_thread[t.address] = []
|
||||
|
||||
for c in extractor.get_calls(p, t):
|
||||
if c.address in matched_calls:
|
||||
names_by_call[c.address] = extractor.get_call_name(p, t, c)
|
||||
calls_by_thread[t.address].append(c.address)
|
||||
|
||||
if calls_by_thread[t.address]:
|
||||
matched_threads.add(t.address)
|
||||
threads_by_process[p.address].append(t.address)
|
||||
|
||||
if threads_by_process[p.address]:
|
||||
matched_processes.add(p.address)
|
||||
names_by_process[p.address] = extractor.get_process_name(p)
|
||||
|
||||
layout = rdoc.DynamicLayout(
|
||||
processes=tuple(
|
||||
rdoc.ProcessLayout(
|
||||
address=frz.Address.from_capa(p),
|
||||
name=names_by_process[p],
|
||||
matched_threads=tuple(
|
||||
rdoc.ThreadLayout(
|
||||
address=frz.Address.from_capa(t),
|
||||
matched_calls=tuple(
|
||||
rdoc.CallLayout(
|
||||
address=frz.Address.from_capa(c),
|
||||
name=names_by_call[c],
|
||||
)
|
||||
for c in calls_by_thread[t]
|
||||
if c in matched_calls
|
||||
),
|
||||
)
|
||||
for t in threads
|
||||
if t in matched_threads
|
||||
) # this object is open to extension in the future,
|
||||
# such as with the function name, etc.
|
||||
)
|
||||
for p, threads in threads_by_process.items()
|
||||
if p in matched_processes
|
||||
)
|
||||
)
|
||||
|
||||
return layout
|
||||
|
||||
|
||||
def compute_static_layout(rules, extractor: StaticFeatureExtractor, capabilities) -> rdoc.StaticLayout:
|
||||
"""
|
||||
compute a metadata structure that links basic blocks
|
||||
to the functions in which they're found.
|
||||
@@ -819,12 +710,12 @@ def compute_layout(rules, extractor, capabilities) -> rdoc.Layout:
|
||||
matched_bbs = set()
|
||||
for rule_name, matches in capabilities.items():
|
||||
rule = rules[rule_name]
|
||||
if rule.meta.get("scope") == capa.rules.BASIC_BLOCK_SCOPE:
|
||||
if capa.rules.Scope.BASIC_BLOCK in rule.scopes:
|
||||
for addr, _ in matches:
|
||||
assert addr in functions_by_bb
|
||||
matched_bbs.add(addr)
|
||||
|
||||
layout = rdoc.Layout(
|
||||
layout = rdoc.StaticLayout(
|
||||
functions=tuple(
|
||||
rdoc.FunctionLayout(
|
||||
address=frz.Address.from_capa(f),
|
||||
@@ -841,6 +732,15 @@ def compute_layout(rules, extractor, capabilities) -> rdoc.Layout:
|
||||
return layout
|
||||
|
||||
|
||||
def compute_layout(rules, extractor, capabilities) -> rdoc.Layout:
|
||||
if isinstance(extractor, StaticFeatureExtractor):
|
||||
return compute_static_layout(rules, extractor, capabilities)
|
||||
elif isinstance(extractor, DynamicFeatureExtractor):
|
||||
return compute_dynamic_layout(rules, extractor, capabilities)
|
||||
else:
|
||||
raise ValueError("extractor must be either a static or dynamic extracotr")
|
||||
|
||||
|
||||
def install_common_args(parser, wanted=None):
|
||||
"""
|
||||
register a common set of command line arguments for re-use by main & scripts.
|
||||
@@ -907,8 +807,10 @@ def install_common_args(parser, wanted=None):
|
||||
(FORMAT_PE, "Windows PE file"),
|
||||
(FORMAT_DOTNET, ".NET PE file"),
|
||||
(FORMAT_ELF, "Executable and Linkable Format"),
|
||||
(FORMAT_DEX, "Android DEX file"),
|
||||
(FORMAT_SC32, "32-bit shellcode"),
|
||||
(FORMAT_SC64, "64-bit shellcode"),
|
||||
(FORMAT_CAPE, "CAPE sandbox report"),
|
||||
(FORMAT_FREEZE, "features previously frozen by capa"),
|
||||
]
|
||||
format_help = ", ".join([f"{f[0]}: {f[1]}" for f in formats])
|
||||
@@ -1087,6 +989,27 @@ def handle_common_args(args):
|
||||
args.signatures = sigs_path
|
||||
|
||||
|
||||
def simple_message_exception_handler(exctype, value: BaseException, traceback: TracebackType):
|
||||
"""
|
||||
prints friendly message on unexpected exceptions to regular users (debug mode shows regular stack trace)
|
||||
|
||||
args:
|
||||
# TODO(aaronatp): Once capa drops support for Python 3.8, move the exctype type annotation to
|
||||
# the function parameters and remove the "# type: ignore[assignment]" from the relevant place
|
||||
# in the main function, see (https://github.com/mandiant/capa/issues/1896)
|
||||
exctype (type[BaseException]): exception class
|
||||
"""
|
||||
|
||||
if exctype is KeyboardInterrupt:
|
||||
print("KeyboardInterrupt detected, program terminated")
|
||||
else:
|
||||
print(
|
||||
f"Unexpected exception raised: {exctype}. Please run capa in debug mode (-d/--debug) "
|
||||
+ "to see the stack trace. Please also report your issue on the capa GitHub page so we "
|
||||
+ "can improve the code! (https://github.com/mandiant/capa/issues)"
|
||||
)
|
||||
|
||||
|
||||
def main(argv: Optional[List[str]] = None):
|
||||
if sys.version_info < (3, 8):
|
||||
raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.8+")
|
||||
@@ -1129,6 +1052,8 @@ def main(argv: Optional[List[str]] = None):
|
||||
install_common_args(parser, {"sample", "format", "backend", "os", "signatures", "rules", "tag"})
|
||||
parser.add_argument("-j", "--json", action="store_true", help="emit JSON instead of text")
|
||||
args = parser.parse_args(args=argv)
|
||||
if not args.debug:
|
||||
sys.excepthook = simple_message_exception_handler # type: ignore[assignment]
|
||||
ret = handle_common_args(args)
|
||||
if ret is not None and ret != 0:
|
||||
return ret
|
||||
@@ -1165,7 +1090,7 @@ def main(argv: Optional[List[str]] = None):
|
||||
# during the load of the RuleSet, we extract subscope statements into their own rules
|
||||
# that are subsequently `match`ed upon. this inflates the total rule count.
|
||||
# so, filter out the subscope rules when reporting total number of loaded rules.
|
||||
len(list(filter(lambda r: not r.is_subscope_rule(), rules.rules.values()))),
|
||||
len(list(filter(lambda r: not (r.is_subscope_rule()), rules.rules.values()))),
|
||||
)
|
||||
if args.tag:
|
||||
rules = rules.filter_rules_by_meta(args.tag)
|
||||
@@ -1204,8 +1129,26 @@ def main(argv: Optional[List[str]] = None):
|
||||
except (ELFError, OverflowError) as e:
|
||||
logger.error("Input file '%s' is not a valid ELF file: %s", args.sample, str(e))
|
||||
return E_CORRUPT_FILE
|
||||
except UnsupportedFormatError as e:
|
||||
if format_ == FORMAT_CAPE:
|
||||
log_unsupported_cape_report_error(str(e))
|
||||
else:
|
||||
log_unsupported_format_error()
|
||||
return E_INVALID_FILE_TYPE
|
||||
except EmptyReportError as e:
|
||||
if format_ == FORMAT_CAPE:
|
||||
log_empty_cape_report_error(str(e))
|
||||
return E_EMPTY_REPORT
|
||||
else:
|
||||
log_unsupported_format_error()
|
||||
return E_INVALID_FILE_TYPE
|
||||
|
||||
found_file_limitation = False
|
||||
for file_extractor in file_extractors:
|
||||
if isinstance(file_extractor, DynamicFeatureExtractor):
|
||||
# Dynamic feature extractors can handle packed samples
|
||||
continue
|
||||
|
||||
try:
|
||||
pure_file_capabilities, _ = find_file_capabilities(rules, file_extractor, {})
|
||||
except PEFormatError as e:
|
||||
@@ -1217,7 +1160,8 @@ def main(argv: Optional[List[str]] = None):
|
||||
|
||||
# file limitations that rely on non-file scope won't be detected here.
|
||||
# nor on FunctionName features, because pefile doesn't support this.
|
||||
if has_file_limitation(rules, pure_file_capabilities):
|
||||
found_file_limitation = has_file_limitation(rules, pure_file_capabilities)
|
||||
if found_file_limitation:
|
||||
# bail if capa encountered file limitation e.g. a packed binary
|
||||
# do show the output in verbose mode, though.
|
||||
if not (args.verbose or args.vverbose or args.json):
|
||||
@@ -1239,7 +1183,7 @@ def main(argv: Optional[List[str]] = None):
|
||||
|
||||
if format_ == FORMAT_FREEZE:
|
||||
# freeze format deserializes directly into an extractor
|
||||
extractor = frz.load(Path(args.sample).read_bytes())
|
||||
extractor: FeatureExtractor = frz.load(Path(args.sample).read_bytes())
|
||||
else:
|
||||
# all other formats we must create an extractor,
|
||||
# such as viv, binary ninja, etc. workspaces
|
||||
@@ -1257,6 +1201,9 @@ def main(argv: Optional[List[str]] = None):
|
||||
|
||||
should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None)
|
||||
|
||||
# TODO(mr-tz): this should be wrapped and refactored as it's tedious to update everywhere
|
||||
# see same code and show-features above examples
|
||||
# https://github.com/mandiant/capa/issues/1813
|
||||
try:
|
||||
extractor = get_extractor(
|
||||
args.sample,
|
||||
@@ -1267,8 +1214,11 @@ def main(argv: Optional[List[str]] = None):
|
||||
should_save_workspace,
|
||||
disable_progress=args.quiet or args.debug,
|
||||
)
|
||||
except UnsupportedFormatError:
|
||||
log_unsupported_format_error()
|
||||
except UnsupportedFormatError as e:
|
||||
if format_ == FORMAT_CAPE:
|
||||
log_unsupported_cape_report_error(str(e))
|
||||
else:
|
||||
log_unsupported_format_error()
|
||||
return E_INVALID_FILE_TYPE
|
||||
except UnsupportedArchError:
|
||||
log_unsupported_arch_error()
|
||||
@@ -1277,16 +1227,13 @@ def main(argv: Optional[List[str]] = None):
|
||||
log_unsupported_os_error()
|
||||
return E_INVALID_FILE_OS
|
||||
|
||||
meta = collect_metadata(argv, args.sample, args.format, args.os, args.rules, extractor)
|
||||
|
||||
capabilities, counts = find_capabilities(rules, extractor, disable_progress=args.quiet)
|
||||
|
||||
meta.analysis.feature_counts = counts["feature_counts"]
|
||||
meta.analysis.library_functions = counts["library_functions"]
|
||||
meta = collect_metadata(argv, args.sample, args.format, args.os, args.rules, extractor, counts)
|
||||
meta.analysis.layout = compute_layout(rules, extractor, capabilities)
|
||||
|
||||
if has_file_limitation(rules, capabilities):
|
||||
# bail if capa encountered file limitation e.g. a packed binary
|
||||
if isinstance(extractor, StaticFeatureExtractor) and found_file_limitation:
|
||||
# bail if capa's static feature extractor encountered file limitation e.g. a packed binary
|
||||
# do show the output in verbose mode, though.
|
||||
if not (args.verbose or args.vverbose or args.json):
|
||||
return E_FILE_LIMITATION
|
||||
|
||||
@@ -33,6 +33,7 @@ def render_meta(doc: rd.ResultDocument, ostream: StringIO):
|
||||
(width("md5", 22), width(doc.meta.sample.md5, 82)),
|
||||
("sha1", doc.meta.sample.sha1),
|
||||
("sha256", doc.meta.sample.sha256),
|
||||
("analysis", doc.meta.flavor),
|
||||
("os", doc.meta.analysis.os),
|
||||
("format", doc.meta.analysis.format),
|
||||
("arch", doc.meta.analysis.arch),
|
||||
|
||||
@@ -38,16 +38,6 @@ from capa.helpers import assert_never
|
||||
from capa.features.freeze import AddressType
|
||||
|
||||
|
||||
def dict_tuple_to_list_values(d: Dict) -> Dict:
|
||||
o = {}
|
||||
for k, v in d.items():
|
||||
if isinstance(v, tuple):
|
||||
o[k] = list(v)
|
||||
else:
|
||||
o[k] = v
|
||||
return o
|
||||
|
||||
|
||||
def int_to_pb2(v: int) -> capa_pb2.Integer:
|
||||
if v < -2_147_483_648:
|
||||
raise ValueError(f"value underflow: {v}")
|
||||
@@ -100,6 +90,51 @@ def addr_to_pb2(addr: frz.Address) -> capa_pb2.Address:
|
||||
token_offset=capa_pb2.Token_Offset(token=int_to_pb2(token), offset=offset),
|
||||
)
|
||||
|
||||
elif addr.type is AddressType.PROCESS:
|
||||
assert isinstance(addr.value, tuple)
|
||||
ppid, pid = addr.value
|
||||
assert isinstance(ppid, int)
|
||||
assert isinstance(pid, int)
|
||||
return capa_pb2.Address(
|
||||
type=capa_pb2.AddressType.ADDRESSTYPE_PROCESS,
|
||||
ppid_pid=capa_pb2.Ppid_Pid(
|
||||
ppid=int_to_pb2(ppid),
|
||||
pid=int_to_pb2(pid),
|
||||
),
|
||||
)
|
||||
|
||||
elif addr.type is AddressType.THREAD:
|
||||
assert isinstance(addr.value, tuple)
|
||||
ppid, pid, tid = addr.value
|
||||
assert isinstance(ppid, int)
|
||||
assert isinstance(pid, int)
|
||||
assert isinstance(tid, int)
|
||||
return capa_pb2.Address(
|
||||
type=capa_pb2.AddressType.ADDRESSTYPE_THREAD,
|
||||
ppid_pid_tid=capa_pb2.Ppid_Pid_Tid(
|
||||
ppid=int_to_pb2(ppid),
|
||||
pid=int_to_pb2(pid),
|
||||
tid=int_to_pb2(tid),
|
||||
),
|
||||
)
|
||||
|
||||
elif addr.type is AddressType.CALL:
|
||||
assert isinstance(addr.value, tuple)
|
||||
ppid, pid, tid, id_ = addr.value
|
||||
assert isinstance(ppid, int)
|
||||
assert isinstance(pid, int)
|
||||
assert isinstance(tid, int)
|
||||
assert isinstance(id_, int)
|
||||
return capa_pb2.Address(
|
||||
type=capa_pb2.AddressType.ADDRESSTYPE_CALL,
|
||||
ppid_pid_tid_id=capa_pb2.Ppid_Pid_Tid_Id(
|
||||
ppid=int_to_pb2(ppid),
|
||||
pid=int_to_pb2(pid),
|
||||
tid=int_to_pb2(tid),
|
||||
id=int_to_pb2(id_),
|
||||
),
|
||||
)
|
||||
|
||||
elif addr.type is AddressType.NO_ADDRESS:
|
||||
# value == None, so only set type
|
||||
return capa_pb2.Address(type=capa_pb2.AddressType.ADDRESSTYPE_NO_ADDRESS)
|
||||
@@ -117,49 +152,129 @@ def scope_to_pb2(scope: capa.rules.Scope) -> capa_pb2.Scope.ValueType:
|
||||
return capa_pb2.Scope.SCOPE_BASIC_BLOCK
|
||||
elif scope == capa.rules.Scope.INSTRUCTION:
|
||||
return capa_pb2.Scope.SCOPE_INSTRUCTION
|
||||
elif scope == capa.rules.Scope.PROCESS:
|
||||
return capa_pb2.Scope.SCOPE_PROCESS
|
||||
elif scope == capa.rules.Scope.THREAD:
|
||||
return capa_pb2.Scope.SCOPE_THREAD
|
||||
elif scope == capa.rules.Scope.CALL:
|
||||
return capa_pb2.Scope.SCOPE_CALL
|
||||
else:
|
||||
assert_never(scope)
|
||||
|
||||
|
||||
def metadata_to_pb2(meta: rd.Metadata) -> capa_pb2.Metadata:
|
||||
return capa_pb2.Metadata(
|
||||
timestamp=str(meta.timestamp),
|
||||
version=meta.version,
|
||||
argv=meta.argv,
|
||||
sample=google.protobuf.json_format.ParseDict(meta.sample.model_dump(), capa_pb2.Sample()),
|
||||
analysis=capa_pb2.Analysis(
|
||||
format=meta.analysis.format,
|
||||
arch=meta.analysis.arch,
|
||||
os=meta.analysis.os,
|
||||
extractor=meta.analysis.extractor,
|
||||
rules=list(meta.analysis.rules),
|
||||
base_address=addr_to_pb2(meta.analysis.base_address),
|
||||
layout=capa_pb2.Layout(
|
||||
functions=[
|
||||
capa_pb2.FunctionLayout(
|
||||
address=addr_to_pb2(f.address),
|
||||
matched_basic_blocks=[
|
||||
capa_pb2.BasicBlockLayout(address=addr_to_pb2(bb.address)) for bb in f.matched_basic_blocks
|
||||
],
|
||||
)
|
||||
for f in meta.analysis.layout.functions
|
||||
]
|
||||
),
|
||||
feature_counts=capa_pb2.FeatureCounts(
|
||||
file=meta.analysis.feature_counts.file,
|
||||
functions=[
|
||||
capa_pb2.FunctionFeatureCount(address=addr_to_pb2(f.address), count=f.count)
|
||||
for f in meta.analysis.feature_counts.functions
|
||||
],
|
||||
),
|
||||
library_functions=[
|
||||
capa_pb2.LibraryFunction(address=addr_to_pb2(lf.address), name=lf.name)
|
||||
for lf in meta.analysis.library_functions
|
||||
def scopes_to_pb2(scopes: capa.rules.Scopes) -> capa_pb2.Scopes:
|
||||
doc = {}
|
||||
if scopes.static:
|
||||
doc["static"] = scope_to_pb2(scopes.static)
|
||||
if scopes.dynamic:
|
||||
doc["dynamic"] = scope_to_pb2(scopes.dynamic)
|
||||
|
||||
return google.protobuf.json_format.ParseDict(doc, capa_pb2.Scopes())
|
||||
|
||||
|
||||
def flavor_to_pb2(flavor: rd.Flavor) -> capa_pb2.Flavor.ValueType:
|
||||
if flavor == rd.Flavor.STATIC:
|
||||
return capa_pb2.Flavor.FLAVOR_STATIC
|
||||
elif flavor == rd.Flavor.DYNAMIC:
|
||||
return capa_pb2.Flavor.FLAVOR_DYNAMIC
|
||||
else:
|
||||
assert_never(flavor)
|
||||
|
||||
|
||||
def static_analysis_to_pb2(analysis: rd.StaticAnalysis) -> capa_pb2.StaticAnalysis:
|
||||
return capa_pb2.StaticAnalysis(
|
||||
format=analysis.format,
|
||||
arch=analysis.arch,
|
||||
os=analysis.os,
|
||||
extractor=analysis.extractor,
|
||||
rules=list(analysis.rules),
|
||||
base_address=addr_to_pb2(analysis.base_address),
|
||||
layout=capa_pb2.StaticLayout(
|
||||
functions=[
|
||||
capa_pb2.FunctionLayout(
|
||||
address=addr_to_pb2(f.address),
|
||||
matched_basic_blocks=[
|
||||
capa_pb2.BasicBlockLayout(address=addr_to_pb2(bb.address)) for bb in f.matched_basic_blocks
|
||||
],
|
||||
)
|
||||
for f in analysis.layout.functions
|
||||
]
|
||||
),
|
||||
feature_counts=capa_pb2.StaticFeatureCounts(
|
||||
file=analysis.feature_counts.file,
|
||||
functions=[
|
||||
capa_pb2.FunctionFeatureCount(address=addr_to_pb2(f.address), count=f.count)
|
||||
for f in analysis.feature_counts.functions
|
||||
],
|
||||
),
|
||||
library_functions=[
|
||||
capa_pb2.LibraryFunction(address=addr_to_pb2(lf.address), name=lf.name) for lf in analysis.library_functions
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def dynamic_analysis_to_pb2(analysis: rd.DynamicAnalysis) -> capa_pb2.DynamicAnalysis:
|
||||
return capa_pb2.DynamicAnalysis(
|
||||
format=analysis.format,
|
||||
arch=analysis.arch,
|
||||
os=analysis.os,
|
||||
extractor=analysis.extractor,
|
||||
rules=list(analysis.rules),
|
||||
layout=capa_pb2.DynamicLayout(
|
||||
processes=[
|
||||
capa_pb2.ProcessLayout(
|
||||
address=addr_to_pb2(p.address),
|
||||
name=p.name,
|
||||
matched_threads=[
|
||||
capa_pb2.ThreadLayout(
|
||||
address=addr_to_pb2(t.address),
|
||||
matched_calls=[
|
||||
capa_pb2.CallLayout(
|
||||
address=addr_to_pb2(c.address),
|
||||
name=c.name,
|
||||
)
|
||||
for c in t.matched_calls
|
||||
],
|
||||
)
|
||||
for t in p.matched_threads
|
||||
],
|
||||
)
|
||||
for p in analysis.layout.processes
|
||||
]
|
||||
),
|
||||
feature_counts=capa_pb2.DynamicFeatureCounts(
|
||||
file=analysis.feature_counts.file,
|
||||
processes=[
|
||||
capa_pb2.ProcessFeatureCount(address=addr_to_pb2(p.address), count=p.count)
|
||||
for p in analysis.feature_counts.processes
|
||||
],
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def metadata_to_pb2(meta: rd.Metadata) -> capa_pb2.Metadata:
|
||||
if isinstance(meta.analysis, rd.StaticAnalysis):
|
||||
return capa_pb2.Metadata(
|
||||
timestamp=str(meta.timestamp),
|
||||
version=meta.version,
|
||||
argv=meta.argv,
|
||||
sample=google.protobuf.json_format.ParseDict(meta.sample.model_dump(), capa_pb2.Sample()),
|
||||
flavor=flavor_to_pb2(meta.flavor),
|
||||
static_analysis=static_analysis_to_pb2(meta.analysis),
|
||||
)
|
||||
elif isinstance(meta.analysis, rd.DynamicAnalysis):
|
||||
return capa_pb2.Metadata(
|
||||
timestamp=str(meta.timestamp),
|
||||
version=meta.version,
|
||||
argv=meta.argv,
|
||||
sample=google.protobuf.json_format.ParseDict(meta.sample.model_dump(), capa_pb2.Sample()),
|
||||
flavor=flavor_to_pb2(meta.flavor),
|
||||
dynamic_analysis=dynamic_analysis_to_pb2(meta.analysis),
|
||||
)
|
||||
else:
|
||||
assert_never(meta.analysis)
|
||||
|
||||
|
||||
def statement_to_pb2(statement: rd.Statement) -> capa_pb2.StatementNode:
|
||||
if isinstance(statement, rd.RangeStatement):
|
||||
return capa_pb2.StatementNode(
|
||||
@@ -390,15 +505,51 @@ def match_to_pb2(match: rd.Match) -> capa_pb2.Match:
|
||||
assert_never(match)
|
||||
|
||||
|
||||
def rule_metadata_to_pb2(rule_metadata: rd.RuleMetadata) -> capa_pb2.RuleMetadata:
|
||||
# after manual type conversions to the RuleMetadata, we can rely on the protobuf json parser
|
||||
# conversions include tuple -> list and rd.Enum -> proto.enum
|
||||
meta = dict_tuple_to_list_values(rule_metadata.model_dump())
|
||||
meta["scope"] = scope_to_pb2(meta["scope"])
|
||||
meta["attack"] = list(map(dict_tuple_to_list_values, meta.get("attack", [])))
|
||||
meta["mbc"] = list(map(dict_tuple_to_list_values, meta.get("mbc", [])))
|
||||
def attack_to_pb2(attack: rd.AttackSpec) -> capa_pb2.AttackSpec:
|
||||
return capa_pb2.AttackSpec(
|
||||
parts=list(attack.parts),
|
||||
tactic=attack.tactic,
|
||||
technique=attack.technique,
|
||||
subtechnique=attack.subtechnique,
|
||||
id=attack.id,
|
||||
)
|
||||
|
||||
return google.protobuf.json_format.ParseDict(meta, capa_pb2.RuleMetadata())
|
||||
|
||||
def mbc_to_pb2(mbc: rd.MBCSpec) -> capa_pb2.MBCSpec:
|
||||
return capa_pb2.MBCSpec(
|
||||
parts=list(mbc.parts),
|
||||
objective=mbc.objective,
|
||||
behavior=mbc.behavior,
|
||||
method=mbc.method,
|
||||
id=mbc.id,
|
||||
)
|
||||
|
||||
|
||||
def maec_to_pb2(maec: rd.MaecMetadata) -> capa_pb2.MaecMetadata:
|
||||
return capa_pb2.MaecMetadata(
|
||||
analysis_conclusion=maec.analysis_conclusion or "",
|
||||
analysis_conclusion_ov=maec.analysis_conclusion_ov or "",
|
||||
malware_family=maec.malware_family or "",
|
||||
malware_category=maec.malware_category or "",
|
||||
malware_category_ov=maec.malware_category_ov or "",
|
||||
)
|
||||
|
||||
|
||||
def rule_metadata_to_pb2(rule_metadata: rd.RuleMetadata) -> capa_pb2.RuleMetadata:
|
||||
return capa_pb2.RuleMetadata(
|
||||
name=rule_metadata.name,
|
||||
namespace=rule_metadata.namespace or "",
|
||||
authors=rule_metadata.authors,
|
||||
attack=[attack_to_pb2(m) for m in rule_metadata.attack],
|
||||
mbc=[mbc_to_pb2(m) for m in rule_metadata.mbc],
|
||||
references=rule_metadata.references,
|
||||
examples=rule_metadata.examples,
|
||||
description=rule_metadata.description,
|
||||
lib=rule_metadata.lib,
|
||||
maec=maec_to_pb2(rule_metadata.maec),
|
||||
is_subscope_rule=rule_metadata.is_subscope_rule,
|
||||
scopes=scopes_to_pb2(rule_metadata.scopes),
|
||||
)
|
||||
|
||||
|
||||
def doc_to_pb2(doc: rd.ResultDocument) -> capa_pb2.ResultDocument:
|
||||
@@ -459,6 +610,24 @@ def addr_from_pb2(addr: capa_pb2.Address) -> frz.Address:
|
||||
offset = addr.token_offset.offset
|
||||
return frz.Address(type=frz.AddressType.DN_TOKEN_OFFSET, value=(token, offset))
|
||||
|
||||
elif addr.type == capa_pb2.AddressType.ADDRESSTYPE_PROCESS:
|
||||
ppid = int_from_pb2(addr.ppid_pid.ppid)
|
||||
pid = int_from_pb2(addr.ppid_pid.pid)
|
||||
return frz.Address(type=frz.AddressType.PROCESS, value=(ppid, pid))
|
||||
|
||||
elif addr.type == capa_pb2.AddressType.ADDRESSTYPE_THREAD:
|
||||
ppid = int_from_pb2(addr.ppid_pid_tid.ppid)
|
||||
pid = int_from_pb2(addr.ppid_pid_tid.pid)
|
||||
tid = int_from_pb2(addr.ppid_pid_tid.tid)
|
||||
return frz.Address(type=frz.AddressType.THREAD, value=(ppid, pid, tid))
|
||||
|
||||
elif addr.type == capa_pb2.AddressType.ADDRESSTYPE_CALL:
|
||||
ppid = int_from_pb2(addr.ppid_pid_tid_id.ppid)
|
||||
pid = int_from_pb2(addr.ppid_pid_tid_id.pid)
|
||||
tid = int_from_pb2(addr.ppid_pid_tid_id.tid)
|
||||
id_ = int_from_pb2(addr.ppid_pid_tid_id.id)
|
||||
return frz.Address(type=frz.AddressType.CALL, value=(ppid, pid, tid, id_))
|
||||
|
||||
elif addr.type == capa_pb2.AddressType.ADDRESSTYPE_NO_ADDRESS:
|
||||
return frz.Address(type=frz.AddressType.NO_ADDRESS, value=None)
|
||||
|
||||
@@ -475,63 +644,146 @@ def scope_from_pb2(scope: capa_pb2.Scope.ValueType) -> capa.rules.Scope:
|
||||
return capa.rules.Scope.BASIC_BLOCK
|
||||
elif scope == capa_pb2.Scope.SCOPE_INSTRUCTION:
|
||||
return capa.rules.Scope.INSTRUCTION
|
||||
elif scope == capa_pb2.Scope.SCOPE_PROCESS:
|
||||
return capa.rules.Scope.PROCESS
|
||||
elif scope == capa_pb2.Scope.SCOPE_THREAD:
|
||||
return capa.rules.Scope.THREAD
|
||||
elif scope == capa_pb2.Scope.SCOPE_CALL:
|
||||
return capa.rules.Scope.CALL
|
||||
else:
|
||||
assert_never(scope)
|
||||
|
||||
|
||||
def metadata_from_pb2(meta: capa_pb2.Metadata) -> rd.Metadata:
|
||||
return rd.Metadata(
|
||||
timestamp=datetime.datetime.fromisoformat(meta.timestamp),
|
||||
version=meta.version,
|
||||
argv=tuple(meta.argv) if meta.argv else None,
|
||||
sample=rd.Sample(
|
||||
md5=meta.sample.md5,
|
||||
sha1=meta.sample.sha1,
|
||||
sha256=meta.sample.sha256,
|
||||
path=meta.sample.path,
|
||||
),
|
||||
analysis=rd.Analysis(
|
||||
format=meta.analysis.format,
|
||||
arch=meta.analysis.arch,
|
||||
os=meta.analysis.os,
|
||||
extractor=meta.analysis.extractor,
|
||||
rules=tuple(meta.analysis.rules),
|
||||
base_address=addr_from_pb2(meta.analysis.base_address),
|
||||
layout=rd.Layout(
|
||||
functions=tuple(
|
||||
[
|
||||
rd.FunctionLayout(
|
||||
address=addr_from_pb2(f.address),
|
||||
matched_basic_blocks=tuple(
|
||||
[
|
||||
rd.BasicBlockLayout(address=addr_from_pb2(bb.address))
|
||||
for bb in f.matched_basic_blocks
|
||||
]
|
||||
),
|
||||
)
|
||||
for f in meta.analysis.layout.functions
|
||||
]
|
||||
)
|
||||
),
|
||||
feature_counts=rd.FeatureCounts(
|
||||
file=meta.analysis.feature_counts.file,
|
||||
functions=tuple(
|
||||
[
|
||||
rd.FunctionFeatureCount(address=addr_from_pb2(f.address), count=f.count)
|
||||
for f in meta.analysis.feature_counts.functions
|
||||
]
|
||||
),
|
||||
),
|
||||
library_functions=tuple(
|
||||
def scopes_from_pb2(scopes: capa_pb2.Scopes) -> capa.rules.Scopes:
|
||||
return capa.rules.Scopes(
|
||||
static=scope_from_pb2(scopes.static) if scopes.static else None,
|
||||
dynamic=scope_from_pb2(scopes.dynamic) if scopes.dynamic else None,
|
||||
)
|
||||
|
||||
|
||||
def flavor_from_pb2(flavor: capa_pb2.Flavor.ValueType) -> rd.Flavor:
|
||||
if flavor == capa_pb2.Flavor.FLAVOR_STATIC:
|
||||
return rd.Flavor.STATIC
|
||||
elif flavor == capa_pb2.Flavor.FLAVOR_DYNAMIC:
|
||||
return rd.Flavor.DYNAMIC
|
||||
else:
|
||||
assert_never(flavor)
|
||||
|
||||
|
||||
def static_analysis_from_pb2(analysis: capa_pb2.StaticAnalysis) -> rd.StaticAnalysis:
|
||||
return rd.StaticAnalysis(
|
||||
format=analysis.format,
|
||||
arch=analysis.arch,
|
||||
os=analysis.os,
|
||||
extractor=analysis.extractor,
|
||||
rules=tuple(analysis.rules),
|
||||
base_address=addr_from_pb2(analysis.base_address),
|
||||
layout=rd.StaticLayout(
|
||||
functions=tuple(
|
||||
[
|
||||
rd.LibraryFunction(address=addr_from_pb2(lf.address), name=lf.name)
|
||||
for lf in meta.analysis.library_functions
|
||||
rd.FunctionLayout(
|
||||
address=addr_from_pb2(f.address),
|
||||
matched_basic_blocks=tuple(
|
||||
[rd.BasicBlockLayout(address=addr_from_pb2(bb.address)) for bb in f.matched_basic_blocks]
|
||||
),
|
||||
)
|
||||
for f in analysis.layout.functions
|
||||
]
|
||||
)
|
||||
),
|
||||
feature_counts=rd.StaticFeatureCounts(
|
||||
file=analysis.feature_counts.file,
|
||||
functions=tuple(
|
||||
[
|
||||
rd.FunctionFeatureCount(address=addr_from_pb2(f.address), count=f.count)
|
||||
for f in analysis.feature_counts.functions
|
||||
]
|
||||
),
|
||||
),
|
||||
library_functions=tuple(
|
||||
[rd.LibraryFunction(address=addr_from_pb2(lf.address), name=lf.name) for lf in analysis.library_functions]
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def dynamic_analysis_from_pb2(analysis: capa_pb2.DynamicAnalysis) -> rd.DynamicAnalysis:
|
||||
return rd.DynamicAnalysis(
|
||||
format=analysis.format,
|
||||
arch=analysis.arch,
|
||||
os=analysis.os,
|
||||
extractor=analysis.extractor,
|
||||
rules=tuple(analysis.rules),
|
||||
layout=rd.DynamicLayout(
|
||||
processes=tuple(
|
||||
[
|
||||
rd.ProcessLayout(
|
||||
address=addr_from_pb2(p.address),
|
||||
name=p.name,
|
||||
matched_threads=tuple(
|
||||
[
|
||||
rd.ThreadLayout(
|
||||
address=addr_from_pb2(t.address),
|
||||
matched_calls=tuple(
|
||||
[
|
||||
rd.CallLayout(address=addr_from_pb2(c.address), name=c.name)
|
||||
for c in t.matched_calls
|
||||
]
|
||||
),
|
||||
)
|
||||
for t in p.matched_threads
|
||||
]
|
||||
),
|
||||
)
|
||||
for p in analysis.layout.processes
|
||||
]
|
||||
)
|
||||
),
|
||||
feature_counts=rd.DynamicFeatureCounts(
|
||||
file=analysis.feature_counts.file,
|
||||
processes=tuple(
|
||||
[
|
||||
rd.ProcessFeatureCount(address=addr_from_pb2(p.address), count=p.count)
|
||||
for p in analysis.feature_counts.processes
|
||||
]
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def metadata_from_pb2(meta: capa_pb2.Metadata) -> rd.Metadata:
|
||||
analysis_type = meta.WhichOneof("analysis2")
|
||||
if analysis_type == "static_analysis":
|
||||
return rd.Metadata(
|
||||
timestamp=datetime.datetime.fromisoformat(meta.timestamp),
|
||||
version=meta.version,
|
||||
argv=tuple(meta.argv) if meta.argv else None,
|
||||
sample=rd.Sample(
|
||||
md5=meta.sample.md5,
|
||||
sha1=meta.sample.sha1,
|
||||
sha256=meta.sample.sha256,
|
||||
path=meta.sample.path,
|
||||
),
|
||||
flavor=flavor_from_pb2(meta.flavor),
|
||||
analysis=static_analysis_from_pb2(meta.static_analysis),
|
||||
)
|
||||
elif analysis_type == "dynamic_analysis":
|
||||
return rd.Metadata(
|
||||
timestamp=datetime.datetime.fromisoformat(meta.timestamp),
|
||||
version=meta.version,
|
||||
argv=tuple(meta.argv) if meta.argv else None,
|
||||
sample=rd.Sample(
|
||||
md5=meta.sample.md5,
|
||||
sha1=meta.sample.sha1,
|
||||
sha256=meta.sample.sha256,
|
||||
path=meta.sample.path,
|
||||
),
|
||||
flavor=flavor_from_pb2(meta.flavor),
|
||||
analysis=dynamic_analysis_from_pb2(meta.dynamic_analysis),
|
||||
)
|
||||
else:
|
||||
assert_never(analysis_type)
|
||||
|
||||
|
||||
def statement_from_pb2(statement: capa_pb2.StatementNode) -> rd.Statement:
|
||||
type_ = statement.WhichOneof("statement")
|
||||
|
||||
@@ -711,7 +963,7 @@ def rule_metadata_from_pb2(pb: capa_pb2.RuleMetadata) -> rd.RuleMetadata:
|
||||
name=pb.name,
|
||||
namespace=pb.namespace or None,
|
||||
authors=tuple(pb.authors),
|
||||
scope=scope_from_pb2(pb.scope),
|
||||
scopes=scopes_from_pb2(pb.scopes),
|
||||
attack=tuple([attack_from_pb2(attack) for attack in pb.attack]),
|
||||
mbc=tuple([mbc_from_pb2(mbc) for mbc in pb.mbc]),
|
||||
references=tuple(pb.references),
|
||||
|
||||
@@ -11,6 +11,9 @@ message Address {
|
||||
oneof value {
|
||||
Integer v = 2;
|
||||
Token_Offset token_offset = 3;
|
||||
Ppid_Pid ppid_pid = 4;
|
||||
Ppid_Pid_Tid ppid_pid_tid = 5;
|
||||
Ppid_Pid_Tid_Id ppid_pid_tid_id = 6;
|
||||
};
|
||||
}
|
||||
|
||||
@@ -22,6 +25,9 @@ enum AddressType {
|
||||
ADDRESSTYPE_DN_TOKEN = 4;
|
||||
ADDRESSTYPE_DN_TOKEN_OFFSET = 5;
|
||||
ADDRESSTYPE_NO_ADDRESS = 6;
|
||||
ADDRESSTYPE_PROCESS = 7;
|
||||
ADDRESSTYPE_THREAD = 8;
|
||||
ADDRESSTYPE_CALL = 9;
|
||||
}
|
||||
|
||||
message Analysis {
|
||||
@@ -82,6 +88,25 @@ message CompoundStatement {
|
||||
optional string description = 2;
|
||||
}
|
||||
|
||||
message DynamicAnalysis {
|
||||
string format = 1;
|
||||
string arch = 2;
|
||||
string os = 3;
|
||||
string extractor = 4;
|
||||
repeated string rules = 5;
|
||||
DynamicLayout layout = 6;
|
||||
DynamicFeatureCounts feature_counts = 7;
|
||||
}
|
||||
|
||||
message DynamicFeatureCounts {
|
||||
uint64 file = 1;
|
||||
repeated ProcessFeatureCount processes = 2;
|
||||
}
|
||||
|
||||
message DynamicLayout {
|
||||
repeated ProcessLayout processes = 1;
|
||||
}
|
||||
|
||||
message ExportFeature {
|
||||
string type = 1;
|
||||
string export = 2;
|
||||
@@ -192,12 +217,26 @@ message MatchFeature {
|
||||
optional string description = 3;
|
||||
}
|
||||
|
||||
enum Flavor {
|
||||
FLAVOR_UNSPECIFIED = 0;
|
||||
FLAVOR_STATIC = 1;
|
||||
FLAVOR_DYNAMIC = 2;
|
||||
}
|
||||
|
||||
message Metadata {
|
||||
string timestamp = 1; // iso8601 format, like: 2019-01-01T00:00:00Z
|
||||
string version = 2;
|
||||
repeated string argv = 3;
|
||||
Sample sample = 4;
|
||||
Analysis analysis = 5;
|
||||
// deprecated in v7.0.
|
||||
// use analysis2 instead.
|
||||
Analysis analysis = 5 [deprecated = true];
|
||||
Flavor flavor = 6;
|
||||
oneof analysis2 {
|
||||
// use analysis2 instead of analysis (deprecated in v7.0).
|
||||
StaticAnalysis static_analysis = 7;
|
||||
DynamicAnalysis dynamic_analysis = 8;
|
||||
};
|
||||
}
|
||||
|
||||
message MnemonicFeature {
|
||||
@@ -244,6 +283,17 @@ message OperandOffsetFeature {
|
||||
optional string description = 4;
|
||||
}
|
||||
|
||||
message ProcessFeatureCount {
|
||||
Address address = 1;
|
||||
uint64 count = 2;
|
||||
}
|
||||
|
||||
message ProcessLayout {
|
||||
Address address = 1;
|
||||
repeated ThreadLayout matched_threads = 2;
|
||||
string name = 3;
|
||||
}
|
||||
|
||||
message PropertyFeature {
|
||||
string type = 1;
|
||||
string property_ = 2; // property is a Python top-level decorator name
|
||||
@@ -281,7 +331,9 @@ message RuleMetadata {
|
||||
string name = 1;
|
||||
string namespace = 2;
|
||||
repeated string authors = 3;
|
||||
Scope scope = 4;
|
||||
// deprecated in v7.0.
|
||||
// use scopes instead.
|
||||
Scope scope = 4 [deprecated = true];
|
||||
repeated AttackSpec attack = 5;
|
||||
repeated MBCSpec mbc = 6;
|
||||
repeated string references = 7;
|
||||
@@ -290,6 +342,8 @@ message RuleMetadata {
|
||||
bool lib = 10;
|
||||
MaecMetadata maec = 11;
|
||||
bool is_subscope_rule = 12;
|
||||
// use scopes over scope (deprecated in v7.0).
|
||||
Scopes scopes = 13;
|
||||
}
|
||||
|
||||
message Sample {
|
||||
@@ -305,6 +359,14 @@ enum Scope {
|
||||
SCOPE_FUNCTION = 2;
|
||||
SCOPE_BASIC_BLOCK = 3;
|
||||
SCOPE_INSTRUCTION = 4;
|
||||
SCOPE_PROCESS = 5;
|
||||
SCOPE_THREAD = 6;
|
||||
SCOPE_CALL = 7;
|
||||
}
|
||||
|
||||
message Scopes {
|
||||
optional Scope static = 1;
|
||||
optional Scope dynamic = 2;
|
||||
}
|
||||
|
||||
message SectionFeature {
|
||||
@@ -329,6 +391,27 @@ message StatementNode {
|
||||
};
|
||||
}
|
||||
|
||||
message StaticAnalysis {
|
||||
string format = 1;
|
||||
string arch = 2;
|
||||
string os = 3;
|
||||
string extractor = 4;
|
||||
repeated string rules = 5;
|
||||
Address base_address = 6;
|
||||
StaticLayout layout = 7;
|
||||
StaticFeatureCounts feature_counts = 8;
|
||||
repeated LibraryFunction library_functions = 9;
|
||||
}
|
||||
|
||||
message StaticFeatureCounts {
|
||||
uint64 file = 1;
|
||||
repeated FunctionFeatureCount functions = 2;
|
||||
}
|
||||
|
||||
message StaticLayout {
|
||||
repeated FunctionLayout functions = 1;
|
||||
}
|
||||
|
||||
message StringFeature {
|
||||
string type = 1;
|
||||
string string = 2;
|
||||
@@ -347,6 +430,16 @@ message SubstringFeature {
|
||||
optional string description = 3;
|
||||
}
|
||||
|
||||
message CallLayout {
|
||||
Address address = 1;
|
||||
string name = 2;
|
||||
}
|
||||
|
||||
message ThreadLayout {
|
||||
Address address = 1;
|
||||
repeated CallLayout matched_calls = 2;
|
||||
}
|
||||
|
||||
message Addresses { repeated Address address = 1; }
|
||||
|
||||
message Pair_Address_Match {
|
||||
@@ -359,6 +452,24 @@ message Token_Offset {
|
||||
uint64 offset = 2; // offset is always >= 0
|
||||
}
|
||||
|
||||
message Ppid_Pid {
|
||||
Integer ppid = 1;
|
||||
Integer pid = 2;
|
||||
}
|
||||
|
||||
message Ppid_Pid_Tid {
|
||||
Integer ppid = 1;
|
||||
Integer pid = 2;
|
||||
Integer tid = 3;
|
||||
}
|
||||
|
||||
message Ppid_Pid_Tid_Id {
|
||||
Integer ppid = 1;
|
||||
Integer pid = 2;
|
||||
Integer tid = 3;
|
||||
Integer id = 4;
|
||||
}
|
||||
|
||||
message Integer { oneof value { uint64 u = 1; sint64 i = 2; } } // unsigned or signed int
|
||||
|
||||
message Number { oneof value { uint64 u = 1; sint64 i = 2; double f = 3; } }
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -31,6 +31,9 @@ class _AddressTypeEnumTypeWrapper(google.protobuf.internal.enum_type_wrapper._En
|
||||
ADDRESSTYPE_DN_TOKEN: _AddressType.ValueType # 4
|
||||
ADDRESSTYPE_DN_TOKEN_OFFSET: _AddressType.ValueType # 5
|
||||
ADDRESSTYPE_NO_ADDRESS: _AddressType.ValueType # 6
|
||||
ADDRESSTYPE_PROCESS: _AddressType.ValueType # 7
|
||||
ADDRESSTYPE_THREAD: _AddressType.ValueType # 8
|
||||
ADDRESSTYPE_CALL: _AddressType.ValueType # 9
|
||||
|
||||
class AddressType(_AddressType, metaclass=_AddressTypeEnumTypeWrapper): ...
|
||||
|
||||
@@ -41,8 +44,28 @@ ADDRESSTYPE_FILE: AddressType.ValueType # 3
|
||||
ADDRESSTYPE_DN_TOKEN: AddressType.ValueType # 4
|
||||
ADDRESSTYPE_DN_TOKEN_OFFSET: AddressType.ValueType # 5
|
||||
ADDRESSTYPE_NO_ADDRESS: AddressType.ValueType # 6
|
||||
ADDRESSTYPE_PROCESS: AddressType.ValueType # 7
|
||||
ADDRESSTYPE_THREAD: AddressType.ValueType # 8
|
||||
ADDRESSTYPE_CALL: AddressType.ValueType # 9
|
||||
global___AddressType = AddressType
|
||||
|
||||
class _Flavor:
|
||||
ValueType = typing.NewType("ValueType", builtins.int)
|
||||
V: typing_extensions.TypeAlias = ValueType
|
||||
|
||||
class _FlavorEnumTypeWrapper(google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[_Flavor.ValueType], builtins.type):
|
||||
DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor
|
||||
FLAVOR_UNSPECIFIED: _Flavor.ValueType # 0
|
||||
FLAVOR_STATIC: _Flavor.ValueType # 1
|
||||
FLAVOR_DYNAMIC: _Flavor.ValueType # 2
|
||||
|
||||
class Flavor(_Flavor, metaclass=_FlavorEnumTypeWrapper): ...
|
||||
|
||||
FLAVOR_UNSPECIFIED: Flavor.ValueType # 0
|
||||
FLAVOR_STATIC: Flavor.ValueType # 1
|
||||
FLAVOR_DYNAMIC: Flavor.ValueType # 2
|
||||
global___Flavor = Flavor
|
||||
|
||||
class _Scope:
|
||||
ValueType = typing.NewType("ValueType", builtins.int)
|
||||
V: typing_extensions.TypeAlias = ValueType
|
||||
@@ -54,6 +77,9 @@ class _ScopeEnumTypeWrapper(google.protobuf.internal.enum_type_wrapper._EnumType
|
||||
SCOPE_FUNCTION: _Scope.ValueType # 2
|
||||
SCOPE_BASIC_BLOCK: _Scope.ValueType # 3
|
||||
SCOPE_INSTRUCTION: _Scope.ValueType # 4
|
||||
SCOPE_PROCESS: _Scope.ValueType # 5
|
||||
SCOPE_THREAD: _Scope.ValueType # 6
|
||||
SCOPE_CALL: _Scope.ValueType # 7
|
||||
|
||||
class Scope(_Scope, metaclass=_ScopeEnumTypeWrapper): ...
|
||||
|
||||
@@ -62,6 +88,9 @@ SCOPE_FILE: Scope.ValueType # 1
|
||||
SCOPE_FUNCTION: Scope.ValueType # 2
|
||||
SCOPE_BASIC_BLOCK: Scope.ValueType # 3
|
||||
SCOPE_INSTRUCTION: Scope.ValueType # 4
|
||||
SCOPE_PROCESS: Scope.ValueType # 5
|
||||
SCOPE_THREAD: Scope.ValueType # 6
|
||||
SCOPE_CALL: Scope.ValueType # 7
|
||||
global___Scope = Scope
|
||||
|
||||
@typing_extensions.final
|
||||
@@ -94,21 +123,33 @@ class Address(google.protobuf.message.Message):
|
||||
TYPE_FIELD_NUMBER: builtins.int
|
||||
V_FIELD_NUMBER: builtins.int
|
||||
TOKEN_OFFSET_FIELD_NUMBER: builtins.int
|
||||
PPID_PID_FIELD_NUMBER: builtins.int
|
||||
PPID_PID_TID_FIELD_NUMBER: builtins.int
|
||||
PPID_PID_TID_ID_FIELD_NUMBER: builtins.int
|
||||
type: global___AddressType.ValueType
|
||||
@property
|
||||
def v(self) -> global___Integer: ...
|
||||
@property
|
||||
def token_offset(self) -> global___Token_Offset: ...
|
||||
@property
|
||||
def ppid_pid(self) -> global___Ppid_Pid: ...
|
||||
@property
|
||||
def ppid_pid_tid(self) -> global___Ppid_Pid_Tid: ...
|
||||
@property
|
||||
def ppid_pid_tid_id(self) -> global___Ppid_Pid_Tid_Id: ...
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
type: global___AddressType.ValueType = ...,
|
||||
v: global___Integer | None = ...,
|
||||
token_offset: global___Token_Offset | None = ...,
|
||||
ppid_pid: global___Ppid_Pid | None = ...,
|
||||
ppid_pid_tid: global___Ppid_Pid_Tid | None = ...,
|
||||
ppid_pid_tid_id: global___Ppid_Pid_Tid_Id | None = ...,
|
||||
) -> None: ...
|
||||
def HasField(self, field_name: typing_extensions.Literal["token_offset", b"token_offset", "v", b"v", "value", b"value"]) -> builtins.bool: ...
|
||||
def ClearField(self, field_name: typing_extensions.Literal["token_offset", b"token_offset", "type", b"type", "v", b"v", "value", b"value"]) -> None: ...
|
||||
def WhichOneof(self, oneof_group: typing_extensions.Literal["value", b"value"]) -> typing_extensions.Literal["v", "token_offset"] | None: ...
|
||||
def HasField(self, field_name: typing_extensions.Literal["ppid_pid", b"ppid_pid", "ppid_pid_tid", b"ppid_pid_tid", "ppid_pid_tid_id", b"ppid_pid_tid_id", "token_offset", b"token_offset", "v", b"v", "value", b"value"]) -> builtins.bool: ...
|
||||
def ClearField(self, field_name: typing_extensions.Literal["ppid_pid", b"ppid_pid", "ppid_pid_tid", b"ppid_pid_tid", "ppid_pid_tid_id", b"ppid_pid_tid_id", "token_offset", b"token_offset", "type", b"type", "v", b"v", "value", b"value"]) -> None: ...
|
||||
def WhichOneof(self, oneof_group: typing_extensions.Literal["value", b"value"]) -> typing_extensions.Literal["v", "token_offset", "ppid_pid", "ppid_pid_tid", "ppid_pid_tid_id"] | None: ...
|
||||
|
||||
global___Address = Address
|
||||
|
||||
@@ -335,6 +376,78 @@ class CompoundStatement(google.protobuf.message.Message):
|
||||
|
||||
global___CompoundStatement = CompoundStatement
|
||||
|
||||
@typing_extensions.final
|
||||
class DynamicAnalysis(google.protobuf.message.Message):
|
||||
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
||||
|
||||
FORMAT_FIELD_NUMBER: builtins.int
|
||||
ARCH_FIELD_NUMBER: builtins.int
|
||||
OS_FIELD_NUMBER: builtins.int
|
||||
EXTRACTOR_FIELD_NUMBER: builtins.int
|
||||
RULES_FIELD_NUMBER: builtins.int
|
||||
LAYOUT_FIELD_NUMBER: builtins.int
|
||||
FEATURE_COUNTS_FIELD_NUMBER: builtins.int
|
||||
format: builtins.str
|
||||
arch: builtins.str
|
||||
os: builtins.str
|
||||
extractor: builtins.str
|
||||
@property
|
||||
def rules(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: ...
|
||||
@property
|
||||
def layout(self) -> global___DynamicLayout: ...
|
||||
@property
|
||||
def feature_counts(self) -> global___DynamicFeatureCounts: ...
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
format: builtins.str = ...,
|
||||
arch: builtins.str = ...,
|
||||
os: builtins.str = ...,
|
||||
extractor: builtins.str = ...,
|
||||
rules: collections.abc.Iterable[builtins.str] | None = ...,
|
||||
layout: global___DynamicLayout | None = ...,
|
||||
feature_counts: global___DynamicFeatureCounts | None = ...,
|
||||
) -> None: ...
|
||||
def HasField(self, field_name: typing_extensions.Literal["feature_counts", b"feature_counts", "layout", b"layout"]) -> builtins.bool: ...
|
||||
def ClearField(self, field_name: typing_extensions.Literal["arch", b"arch", "extractor", b"extractor", "feature_counts", b"feature_counts", "format", b"format", "layout", b"layout", "os", b"os", "rules", b"rules"]) -> None: ...
|
||||
|
||||
global___DynamicAnalysis = DynamicAnalysis
|
||||
|
||||
@typing_extensions.final
|
||||
class DynamicFeatureCounts(google.protobuf.message.Message):
|
||||
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
||||
|
||||
FILE_FIELD_NUMBER: builtins.int
|
||||
PROCESSES_FIELD_NUMBER: builtins.int
|
||||
file: builtins.int
|
||||
@property
|
||||
def processes(self) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___ProcessFeatureCount]: ...
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
file: builtins.int = ...,
|
||||
processes: collections.abc.Iterable[global___ProcessFeatureCount] | None = ...,
|
||||
) -> None: ...
|
||||
def ClearField(self, field_name: typing_extensions.Literal["file", b"file", "processes", b"processes"]) -> None: ...
|
||||
|
||||
global___DynamicFeatureCounts = DynamicFeatureCounts
|
||||
|
||||
@typing_extensions.final
|
||||
class DynamicLayout(google.protobuf.message.Message):
|
||||
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
||||
|
||||
PROCESSES_FIELD_NUMBER: builtins.int
|
||||
@property
|
||||
def processes(self) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___ProcessLayout]: ...
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
processes: collections.abc.Iterable[global___ProcessLayout] | None = ...,
|
||||
) -> None: ...
|
||||
def ClearField(self, field_name: typing_extensions.Literal["processes", b"processes"]) -> None: ...
|
||||
|
||||
global___DynamicLayout = DynamicLayout
|
||||
|
||||
@typing_extensions.final
|
||||
class ExportFeature(google.protobuf.message.Message):
|
||||
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
||||
@@ -776,6 +889,9 @@ class Metadata(google.protobuf.message.Message):
|
||||
ARGV_FIELD_NUMBER: builtins.int
|
||||
SAMPLE_FIELD_NUMBER: builtins.int
|
||||
ANALYSIS_FIELD_NUMBER: builtins.int
|
||||
FLAVOR_FIELD_NUMBER: builtins.int
|
||||
STATIC_ANALYSIS_FIELD_NUMBER: builtins.int
|
||||
DYNAMIC_ANALYSIS_FIELD_NUMBER: builtins.int
|
||||
timestamp: builtins.str
|
||||
"""iso8601 format, like: 2019-01-01T00:00:00Z"""
|
||||
version: builtins.str
|
||||
@@ -784,7 +900,16 @@ class Metadata(google.protobuf.message.Message):
|
||||
@property
|
||||
def sample(self) -> global___Sample: ...
|
||||
@property
|
||||
def analysis(self) -> global___Analysis: ...
|
||||
def analysis(self) -> global___Analysis:
|
||||
"""deprecated in v7.0.
|
||||
use analysis2 instead.
|
||||
"""
|
||||
flavor: global___Flavor.ValueType
|
||||
@property
|
||||
def static_analysis(self) -> global___StaticAnalysis:
|
||||
"""use analysis2 instead of analysis (deprecated in v7.0)."""
|
||||
@property
|
||||
def dynamic_analysis(self) -> global___DynamicAnalysis: ...
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
@@ -793,9 +918,13 @@ class Metadata(google.protobuf.message.Message):
|
||||
argv: collections.abc.Iterable[builtins.str] | None = ...,
|
||||
sample: global___Sample | None = ...,
|
||||
analysis: global___Analysis | None = ...,
|
||||
flavor: global___Flavor.ValueType = ...,
|
||||
static_analysis: global___StaticAnalysis | None = ...,
|
||||
dynamic_analysis: global___DynamicAnalysis | None = ...,
|
||||
) -> None: ...
|
||||
def HasField(self, field_name: typing_extensions.Literal["analysis", b"analysis", "sample", b"sample"]) -> builtins.bool: ...
|
||||
def ClearField(self, field_name: typing_extensions.Literal["analysis", b"analysis", "argv", b"argv", "sample", b"sample", "timestamp", b"timestamp", "version", b"version"]) -> None: ...
|
||||
def HasField(self, field_name: typing_extensions.Literal["analysis", b"analysis", "analysis2", b"analysis2", "dynamic_analysis", b"dynamic_analysis", "sample", b"sample", "static_analysis", b"static_analysis"]) -> builtins.bool: ...
|
||||
def ClearField(self, field_name: typing_extensions.Literal["analysis", b"analysis", "analysis2", b"analysis2", "argv", b"argv", "dynamic_analysis", b"dynamic_analysis", "flavor", b"flavor", "sample", b"sample", "static_analysis", b"static_analysis", "timestamp", b"timestamp", "version", b"version"]) -> None: ...
|
||||
def WhichOneof(self, oneof_group: typing_extensions.Literal["analysis2", b"analysis2"]) -> typing_extensions.Literal["static_analysis", "dynamic_analysis"] | None: ...
|
||||
|
||||
global___Metadata = Metadata
|
||||
|
||||
@@ -973,6 +1102,50 @@ class OperandOffsetFeature(google.protobuf.message.Message):
|
||||
|
||||
global___OperandOffsetFeature = OperandOffsetFeature
|
||||
|
||||
@typing_extensions.final
|
||||
class ProcessFeatureCount(google.protobuf.message.Message):
|
||||
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
||||
|
||||
ADDRESS_FIELD_NUMBER: builtins.int
|
||||
COUNT_FIELD_NUMBER: builtins.int
|
||||
@property
|
||||
def address(self) -> global___Address: ...
|
||||
count: builtins.int
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
address: global___Address | None = ...,
|
||||
count: builtins.int = ...,
|
||||
) -> None: ...
|
||||
def HasField(self, field_name: typing_extensions.Literal["address", b"address"]) -> builtins.bool: ...
|
||||
def ClearField(self, field_name: typing_extensions.Literal["address", b"address", "count", b"count"]) -> None: ...
|
||||
|
||||
global___ProcessFeatureCount = ProcessFeatureCount
|
||||
|
||||
@typing_extensions.final
|
||||
class ProcessLayout(google.protobuf.message.Message):
|
||||
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
||||
|
||||
ADDRESS_FIELD_NUMBER: builtins.int
|
||||
MATCHED_THREADS_FIELD_NUMBER: builtins.int
|
||||
NAME_FIELD_NUMBER: builtins.int
|
||||
@property
|
||||
def address(self) -> global___Address: ...
|
||||
@property
|
||||
def matched_threads(self) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___ThreadLayout]: ...
|
||||
name: builtins.str
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
address: global___Address | None = ...,
|
||||
matched_threads: collections.abc.Iterable[global___ThreadLayout] | None = ...,
|
||||
name: builtins.str = ...,
|
||||
) -> None: ...
|
||||
def HasField(self, field_name: typing_extensions.Literal["address", b"address"]) -> builtins.bool: ...
|
||||
def ClearField(self, field_name: typing_extensions.Literal["address", b"address", "matched_threads", b"matched_threads", "name", b"name"]) -> None: ...
|
||||
|
||||
global___ProcessLayout = ProcessLayout
|
||||
|
||||
@typing_extensions.final
|
||||
class PropertyFeature(google.protobuf.message.Message):
|
||||
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
||||
@@ -1136,11 +1309,15 @@ class RuleMetadata(google.protobuf.message.Message):
|
||||
LIB_FIELD_NUMBER: builtins.int
|
||||
MAEC_FIELD_NUMBER: builtins.int
|
||||
IS_SUBSCOPE_RULE_FIELD_NUMBER: builtins.int
|
||||
SCOPES_FIELD_NUMBER: builtins.int
|
||||
name: builtins.str
|
||||
namespace: builtins.str
|
||||
@property
|
||||
def authors(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: ...
|
||||
scope: global___Scope.ValueType
|
||||
"""deprecated in v7.0.
|
||||
use scopes instead.
|
||||
"""
|
||||
@property
|
||||
def attack(self) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___AttackSpec]: ...
|
||||
@property
|
||||
@@ -1154,6 +1331,9 @@ class RuleMetadata(google.protobuf.message.Message):
|
||||
@property
|
||||
def maec(self) -> global___MaecMetadata: ...
|
||||
is_subscope_rule: builtins.bool
|
||||
@property
|
||||
def scopes(self) -> global___Scopes:
|
||||
"""use scopes over scope (deprecated in v7.0)."""
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
@@ -1169,9 +1349,10 @@ class RuleMetadata(google.protobuf.message.Message):
|
||||
lib: builtins.bool = ...,
|
||||
maec: global___MaecMetadata | None = ...,
|
||||
is_subscope_rule: builtins.bool = ...,
|
||||
scopes: global___Scopes | None = ...,
|
||||
) -> None: ...
|
||||
def HasField(self, field_name: typing_extensions.Literal["maec", b"maec"]) -> builtins.bool: ...
|
||||
def ClearField(self, field_name: typing_extensions.Literal["attack", b"attack", "authors", b"authors", "description", b"description", "examples", b"examples", "is_subscope_rule", b"is_subscope_rule", "lib", b"lib", "maec", b"maec", "mbc", b"mbc", "name", b"name", "namespace", b"namespace", "references", b"references", "scope", b"scope"]) -> None: ...
|
||||
def HasField(self, field_name: typing_extensions.Literal["maec", b"maec", "scopes", b"scopes"]) -> builtins.bool: ...
|
||||
def ClearField(self, field_name: typing_extensions.Literal["attack", b"attack", "authors", b"authors", "description", b"description", "examples", b"examples", "is_subscope_rule", b"is_subscope_rule", "lib", b"lib", "maec", b"maec", "mbc", b"mbc", "name", b"name", "namespace", b"namespace", "references", b"references", "scope", b"scope", "scopes", b"scopes"]) -> None: ...
|
||||
|
||||
global___RuleMetadata = RuleMetadata
|
||||
|
||||
@@ -1199,6 +1380,29 @@ class Sample(google.protobuf.message.Message):
|
||||
|
||||
global___Sample = Sample
|
||||
|
||||
@typing_extensions.final
|
||||
class Scopes(google.protobuf.message.Message):
|
||||
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
||||
|
||||
STATIC_FIELD_NUMBER: builtins.int
|
||||
DYNAMIC_FIELD_NUMBER: builtins.int
|
||||
static: global___Scope.ValueType
|
||||
dynamic: global___Scope.ValueType
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
static: global___Scope.ValueType | None = ...,
|
||||
dynamic: global___Scope.ValueType | None = ...,
|
||||
) -> None: ...
|
||||
def HasField(self, field_name: typing_extensions.Literal["_dynamic", b"_dynamic", "_static", b"_static", "dynamic", b"dynamic", "static", b"static"]) -> builtins.bool: ...
|
||||
def ClearField(self, field_name: typing_extensions.Literal["_dynamic", b"_dynamic", "_static", b"_static", "dynamic", b"dynamic", "static", b"static"]) -> None: ...
|
||||
@typing.overload
|
||||
def WhichOneof(self, oneof_group: typing_extensions.Literal["_dynamic", b"_dynamic"]) -> typing_extensions.Literal["dynamic"] | None: ...
|
||||
@typing.overload
|
||||
def WhichOneof(self, oneof_group: typing_extensions.Literal["_static", b"_static"]) -> typing_extensions.Literal["static"] | None: ...
|
||||
|
||||
global___Scopes = Scopes
|
||||
|
||||
@typing_extensions.final
|
||||
class SectionFeature(google.protobuf.message.Message):
|
||||
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
||||
@@ -1278,6 +1482,86 @@ class StatementNode(google.protobuf.message.Message):
|
||||
|
||||
global___StatementNode = StatementNode
|
||||
|
||||
@typing_extensions.final
|
||||
class StaticAnalysis(google.protobuf.message.Message):
|
||||
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
||||
|
||||
FORMAT_FIELD_NUMBER: builtins.int
|
||||
ARCH_FIELD_NUMBER: builtins.int
|
||||
OS_FIELD_NUMBER: builtins.int
|
||||
EXTRACTOR_FIELD_NUMBER: builtins.int
|
||||
RULES_FIELD_NUMBER: builtins.int
|
||||
BASE_ADDRESS_FIELD_NUMBER: builtins.int
|
||||
LAYOUT_FIELD_NUMBER: builtins.int
|
||||
FEATURE_COUNTS_FIELD_NUMBER: builtins.int
|
||||
LIBRARY_FUNCTIONS_FIELD_NUMBER: builtins.int
|
||||
format: builtins.str
|
||||
arch: builtins.str
|
||||
os: builtins.str
|
||||
extractor: builtins.str
|
||||
@property
|
||||
def rules(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: ...
|
||||
@property
|
||||
def base_address(self) -> global___Address: ...
|
||||
@property
|
||||
def layout(self) -> global___StaticLayout: ...
|
||||
@property
|
||||
def feature_counts(self) -> global___StaticFeatureCounts: ...
|
||||
@property
|
||||
def library_functions(self) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___LibraryFunction]: ...
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
format: builtins.str = ...,
|
||||
arch: builtins.str = ...,
|
||||
os: builtins.str = ...,
|
||||
extractor: builtins.str = ...,
|
||||
rules: collections.abc.Iterable[builtins.str] | None = ...,
|
||||
base_address: global___Address | None = ...,
|
||||
layout: global___StaticLayout | None = ...,
|
||||
feature_counts: global___StaticFeatureCounts | None = ...,
|
||||
library_functions: collections.abc.Iterable[global___LibraryFunction] | None = ...,
|
||||
) -> None: ...
|
||||
def HasField(self, field_name: typing_extensions.Literal["base_address", b"base_address", "feature_counts", b"feature_counts", "layout", b"layout"]) -> builtins.bool: ...
|
||||
def ClearField(self, field_name: typing_extensions.Literal["arch", b"arch", "base_address", b"base_address", "extractor", b"extractor", "feature_counts", b"feature_counts", "format", b"format", "layout", b"layout", "library_functions", b"library_functions", "os", b"os", "rules", b"rules"]) -> None: ...
|
||||
|
||||
global___StaticAnalysis = StaticAnalysis
|
||||
|
||||
@typing_extensions.final
|
||||
class StaticFeatureCounts(google.protobuf.message.Message):
|
||||
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
||||
|
||||
FILE_FIELD_NUMBER: builtins.int
|
||||
FUNCTIONS_FIELD_NUMBER: builtins.int
|
||||
file: builtins.int
|
||||
@property
|
||||
def functions(self) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___FunctionFeatureCount]: ...
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
file: builtins.int = ...,
|
||||
functions: collections.abc.Iterable[global___FunctionFeatureCount] | None = ...,
|
||||
) -> None: ...
|
||||
def ClearField(self, field_name: typing_extensions.Literal["file", b"file", "functions", b"functions"]) -> None: ...
|
||||
|
||||
global___StaticFeatureCounts = StaticFeatureCounts
|
||||
|
||||
@typing_extensions.final
|
||||
class StaticLayout(google.protobuf.message.Message):
|
||||
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
||||
|
||||
FUNCTIONS_FIELD_NUMBER: builtins.int
|
||||
@property
|
||||
def functions(self) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___FunctionLayout]: ...
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
functions: collections.abc.Iterable[global___FunctionLayout] | None = ...,
|
||||
) -> None: ...
|
||||
def ClearField(self, field_name: typing_extensions.Literal["functions", b"functions"]) -> None: ...
|
||||
|
||||
global___StaticLayout = StaticLayout
|
||||
|
||||
@typing_extensions.final
|
||||
class StringFeature(google.protobuf.message.Message):
|
||||
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
||||
@@ -1347,6 +1631,47 @@ class SubstringFeature(google.protobuf.message.Message):
|
||||
|
||||
global___SubstringFeature = SubstringFeature
|
||||
|
||||
@typing_extensions.final
|
||||
class CallLayout(google.protobuf.message.Message):
|
||||
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
||||
|
||||
ADDRESS_FIELD_NUMBER: builtins.int
|
||||
NAME_FIELD_NUMBER: builtins.int
|
||||
@property
|
||||
def address(self) -> global___Address: ...
|
||||
name: builtins.str
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
address: global___Address | None = ...,
|
||||
name: builtins.str = ...,
|
||||
) -> None: ...
|
||||
def HasField(self, field_name: typing_extensions.Literal["address", b"address"]) -> builtins.bool: ...
|
||||
def ClearField(self, field_name: typing_extensions.Literal["address", b"address", "name", b"name"]) -> None: ...
|
||||
|
||||
global___CallLayout = CallLayout
|
||||
|
||||
@typing_extensions.final
|
||||
class ThreadLayout(google.protobuf.message.Message):
|
||||
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
||||
|
||||
ADDRESS_FIELD_NUMBER: builtins.int
|
||||
MATCHED_CALLS_FIELD_NUMBER: builtins.int
|
||||
@property
|
||||
def address(self) -> global___Address: ...
|
||||
@property
|
||||
def matched_calls(self) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___CallLayout]: ...
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
address: global___Address | None = ...,
|
||||
matched_calls: collections.abc.Iterable[global___CallLayout] | None = ...,
|
||||
) -> None: ...
|
||||
def HasField(self, field_name: typing_extensions.Literal["address", b"address"]) -> builtins.bool: ...
|
||||
def ClearField(self, field_name: typing_extensions.Literal["address", b"address", "matched_calls", b"matched_calls"]) -> None: ...
|
||||
|
||||
global___ThreadLayout = ThreadLayout
|
||||
|
||||
@typing_extensions.final
|
||||
class Addresses(google.protobuf.message.Message):
|
||||
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
||||
@@ -1405,6 +1730,81 @@ class Token_Offset(google.protobuf.message.Message):
|
||||
|
||||
global___Token_Offset = Token_Offset
|
||||
|
||||
@typing_extensions.final
|
||||
class Ppid_Pid(google.protobuf.message.Message):
|
||||
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
||||
|
||||
PPID_FIELD_NUMBER: builtins.int
|
||||
PID_FIELD_NUMBER: builtins.int
|
||||
@property
|
||||
def ppid(self) -> global___Integer: ...
|
||||
@property
|
||||
def pid(self) -> global___Integer: ...
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
ppid: global___Integer | None = ...,
|
||||
pid: global___Integer | None = ...,
|
||||
) -> None: ...
|
||||
def HasField(self, field_name: typing_extensions.Literal["pid", b"pid", "ppid", b"ppid"]) -> builtins.bool: ...
|
||||
def ClearField(self, field_name: typing_extensions.Literal["pid", b"pid", "ppid", b"ppid"]) -> None: ...
|
||||
|
||||
global___Ppid_Pid = Ppid_Pid
|
||||
|
||||
@typing_extensions.final
|
||||
class Ppid_Pid_Tid(google.protobuf.message.Message):
|
||||
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
||||
|
||||
PPID_FIELD_NUMBER: builtins.int
|
||||
PID_FIELD_NUMBER: builtins.int
|
||||
TID_FIELD_NUMBER: builtins.int
|
||||
@property
|
||||
def ppid(self) -> global___Integer: ...
|
||||
@property
|
||||
def pid(self) -> global___Integer: ...
|
||||
@property
|
||||
def tid(self) -> global___Integer: ...
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
ppid: global___Integer | None = ...,
|
||||
pid: global___Integer | None = ...,
|
||||
tid: global___Integer | None = ...,
|
||||
) -> None: ...
|
||||
def HasField(self, field_name: typing_extensions.Literal["pid", b"pid", "ppid", b"ppid", "tid", b"tid"]) -> builtins.bool: ...
|
||||
def ClearField(self, field_name: typing_extensions.Literal["pid", b"pid", "ppid", b"ppid", "tid", b"tid"]) -> None: ...
|
||||
|
||||
global___Ppid_Pid_Tid = Ppid_Pid_Tid
|
||||
|
||||
@typing_extensions.final
|
||||
class Ppid_Pid_Tid_Id(google.protobuf.message.Message):
|
||||
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
||||
|
||||
PPID_FIELD_NUMBER: builtins.int
|
||||
PID_FIELD_NUMBER: builtins.int
|
||||
TID_FIELD_NUMBER: builtins.int
|
||||
ID_FIELD_NUMBER: builtins.int
|
||||
@property
|
||||
def ppid(self) -> global___Integer: ...
|
||||
@property
|
||||
def pid(self) -> global___Integer: ...
|
||||
@property
|
||||
def tid(self) -> global___Integer: ...
|
||||
@property
|
||||
def id(self) -> global___Integer: ...
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
ppid: global___Integer | None = ...,
|
||||
pid: global___Integer | None = ...,
|
||||
tid: global___Integer | None = ...,
|
||||
id: global___Integer | None = ...,
|
||||
) -> None: ...
|
||||
def HasField(self, field_name: typing_extensions.Literal["id", b"id", "pid", b"pid", "ppid", b"ppid", "tid", b"tid"]) -> builtins.bool: ...
|
||||
def ClearField(self, field_name: typing_extensions.Literal["id", b"id", "pid", b"pid", "ppid", b"ppid", "tid", b"tid"]) -> None: ...
|
||||
|
||||
global___Ppid_Pid_Tid_Id = Ppid_Pid_Tid_Id
|
||||
|
||||
@typing_extensions.final
|
||||
class Integer(google.protobuf.message.Message):
|
||||
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
||||
|
||||
@@ -7,10 +7,12 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import datetime
|
||||
import collections
|
||||
from enum import Enum
|
||||
from typing import Dict, List, Tuple, Union, Literal, Optional
|
||||
from pathlib import Path
|
||||
|
||||
from pydantic import Field, BaseModel, ConfigDict
|
||||
from typing_extensions import TypeAlias
|
||||
|
||||
import capa.rules
|
||||
import capa.engine
|
||||
@@ -47,10 +49,33 @@ class FunctionLayout(Model):
|
||||
matched_basic_blocks: Tuple[BasicBlockLayout, ...]
|
||||
|
||||
|
||||
class Layout(Model):
|
||||
class CallLayout(Model):
|
||||
address: frz.Address
|
||||
name: str
|
||||
|
||||
|
||||
class ThreadLayout(Model):
|
||||
address: frz.Address
|
||||
matched_calls: Tuple[CallLayout, ...]
|
||||
|
||||
|
||||
class ProcessLayout(Model):
|
||||
address: frz.Address
|
||||
name: str
|
||||
matched_threads: Tuple[ThreadLayout, ...]
|
||||
|
||||
|
||||
class StaticLayout(Model):
|
||||
functions: Tuple[FunctionLayout, ...]
|
||||
|
||||
|
||||
class DynamicLayout(Model):
|
||||
processes: Tuple[ProcessLayout, ...]
|
||||
|
||||
|
||||
Layout: TypeAlias = Union[StaticLayout, DynamicLayout]
|
||||
|
||||
|
||||
class LibraryFunction(Model):
|
||||
address: frz.Address
|
||||
name: str
|
||||
@@ -61,31 +86,73 @@ class FunctionFeatureCount(Model):
|
||||
count: int
|
||||
|
||||
|
||||
class FeatureCounts(Model):
|
||||
class ProcessFeatureCount(Model):
|
||||
address: frz.Address
|
||||
count: int
|
||||
|
||||
|
||||
class StaticFeatureCounts(Model):
|
||||
file: int
|
||||
functions: Tuple[FunctionFeatureCount, ...]
|
||||
|
||||
|
||||
class Analysis(Model):
|
||||
class DynamicFeatureCounts(Model):
|
||||
file: int
|
||||
processes: Tuple[ProcessFeatureCount, ...]
|
||||
|
||||
|
||||
FeatureCounts: TypeAlias = Union[StaticFeatureCounts, DynamicFeatureCounts]
|
||||
|
||||
|
||||
class StaticAnalysis(Model):
|
||||
format: str
|
||||
arch: str
|
||||
os: str
|
||||
extractor: str
|
||||
rules: Tuple[str, ...]
|
||||
base_address: frz.Address
|
||||
layout: Layout
|
||||
feature_counts: FeatureCounts
|
||||
layout: StaticLayout
|
||||
feature_counts: StaticFeatureCounts
|
||||
library_functions: Tuple[LibraryFunction, ...]
|
||||
|
||||
|
||||
class DynamicAnalysis(Model):
|
||||
format: str
|
||||
arch: str
|
||||
os: str
|
||||
extractor: str
|
||||
rules: Tuple[str, ...]
|
||||
layout: DynamicLayout
|
||||
feature_counts: DynamicFeatureCounts
|
||||
|
||||
|
||||
Analysis: TypeAlias = Union[StaticAnalysis, DynamicAnalysis]
|
||||
|
||||
|
||||
class Flavor(str, Enum):
|
||||
STATIC = "static"
|
||||
DYNAMIC = "dynamic"
|
||||
|
||||
|
||||
class Metadata(Model):
|
||||
timestamp: datetime.datetime
|
||||
version: str
|
||||
argv: Optional[Tuple[str, ...]]
|
||||
sample: Sample
|
||||
flavor: Flavor
|
||||
analysis: Analysis
|
||||
|
||||
|
||||
class StaticMetadata(Metadata):
|
||||
flavor: Flavor = Flavor.STATIC
|
||||
analysis: StaticAnalysis
|
||||
|
||||
|
||||
class DynamicMetadata(Metadata):
|
||||
flavor: Flavor = Flavor.DYNAMIC
|
||||
analysis: DynamicAnalysis
|
||||
|
||||
|
||||
class CompoundStatementType:
|
||||
AND = "and"
|
||||
OR = "or"
|
||||
@@ -155,7 +222,7 @@ def statement_from_capa(node: capa.engine.Statement) -> Statement:
|
||||
description=node.description,
|
||||
min=node.min,
|
||||
max=node.max,
|
||||
child=frz.feature_from_capa(node.child),
|
||||
child=frzf.feature_from_capa(node.child),
|
||||
)
|
||||
|
||||
elif isinstance(node, capa.engine.Subscope):
|
||||
@@ -181,7 +248,7 @@ def node_from_capa(node: Union[capa.engine.Statement, capa.engine.Feature]) -> N
|
||||
return StatementNode(statement=statement_from_capa(node))
|
||||
|
||||
elif isinstance(node, capa.engine.Feature):
|
||||
return FeatureNode(feature=frz.feature_from_capa(node))
|
||||
return FeatureNode(feature=frzf.feature_from_capa(node))
|
||||
|
||||
else:
|
||||
assert_never(node)
|
||||
@@ -308,9 +375,11 @@ class Match(FrozenModel):
|
||||
# e.g. `contain loop/30c4c78e29bf4d54894fc74f664c62e8` -> `basic block`
|
||||
#
|
||||
# note! replace `node`
|
||||
# subscopes cannot have both a static and dynamic scope set
|
||||
assert None in (rule.scopes.static, rule.scopes.dynamic)
|
||||
node = StatementNode(
|
||||
statement=SubscopeStatement(
|
||||
scope=rule.meta["scope"],
|
||||
scope=rule.scopes.static or rule.scopes.dynamic,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -505,7 +574,7 @@ class RuleMetadata(FrozenModel):
|
||||
name: str
|
||||
namespace: Optional[str] = None
|
||||
authors: Tuple[str, ...]
|
||||
scope: capa.rules.Scope
|
||||
scopes: capa.rules.Scopes
|
||||
attack: Tuple[AttackSpec, ...] = Field(alias="att&ck")
|
||||
mbc: Tuple[MBCSpec, ...]
|
||||
references: Tuple[str, ...]
|
||||
@@ -522,7 +591,7 @@ class RuleMetadata(FrozenModel):
|
||||
name=rule.meta.get("name"),
|
||||
namespace=rule.meta.get("namespace"),
|
||||
authors=rule.meta.get("authors"),
|
||||
scope=capa.rules.Scope(rule.meta.get("scope")),
|
||||
scopes=capa.rules.Scopes.from_dict(rule.meta.get("scopes")),
|
||||
attack=tuple(map(AttackSpec.from_str, rule.meta.get("att&ck", []))),
|
||||
mbc=tuple(map(MBCSpec.from_str, rule.meta.get("mbc", []))),
|
||||
references=rule.meta.get("references", []),
|
||||
|
||||
@@ -24,6 +24,11 @@ def bold2(s: str) -> str:
|
||||
return termcolor.colored(s, "green")
|
||||
|
||||
|
||||
def mute(s: str) -> str:
|
||||
"""draw attention away from the given string"""
|
||||
return termcolor.colored(s, "dark_grey")
|
||||
|
||||
|
||||
def warn(s: str) -> str:
|
||||
return termcolor.colored(s, "yellow")
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ Unless required by applicable law or agreed to in writing, software distributed
|
||||
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and limitations under the License.
|
||||
"""
|
||||
import enum
|
||||
from typing import cast
|
||||
|
||||
import tabulate
|
||||
|
||||
@@ -54,13 +54,98 @@ def format_address(address: frz.Address) -> str:
|
||||
assert isinstance(token, int)
|
||||
assert isinstance(offset, int)
|
||||
return f"token({capa.helpers.hex(token)})+{capa.helpers.hex(offset)}"
|
||||
elif address.type == frz.AddressType.DEX_METHOD_INDEX:
|
||||
assert isinstance(address.value, int)
|
||||
return f"method({capa.helpers.hex(address.value)})"
|
||||
elif address.type == frz.AddressType.DEX_CLASS_INDEX:
|
||||
assert isinstance(address.value, int)
|
||||
return f"class({capa.helpers.hex(address.value)})"
|
||||
elif address.type == frz.AddressType.PROCESS:
|
||||
assert isinstance(address.value, tuple)
|
||||
ppid, pid = address.value
|
||||
assert isinstance(ppid, int)
|
||||
assert isinstance(pid, int)
|
||||
return f"process{{pid:{pid}}}"
|
||||
elif address.type == frz.AddressType.THREAD:
|
||||
assert isinstance(address.value, tuple)
|
||||
ppid, pid, tid = address.value
|
||||
assert isinstance(ppid, int)
|
||||
assert isinstance(pid, int)
|
||||
assert isinstance(tid, int)
|
||||
return f"process{{pid:{pid},tid:{tid}}}"
|
||||
elif address.type == frz.AddressType.CALL:
|
||||
assert isinstance(address.value, tuple)
|
||||
ppid, pid, tid, id_ = address.value
|
||||
return f"process{{pid:{pid},tid:{tid},call:{id_}}}"
|
||||
elif address.type == frz.AddressType.NO_ADDRESS:
|
||||
return "global"
|
||||
else:
|
||||
raise ValueError("unexpected address type")
|
||||
|
||||
|
||||
def render_meta(ostream, doc: rd.ResultDocument):
|
||||
def _get_process_name(layout: rd.DynamicLayout, addr: frz.Address) -> str:
|
||||
for p in layout.processes:
|
||||
if p.address == addr:
|
||||
return p.name
|
||||
|
||||
raise ValueError("name not found for process", addr)
|
||||
|
||||
|
||||
def _get_call_name(layout: rd.DynamicLayout, addr: frz.Address) -> str:
|
||||
call = addr.to_capa()
|
||||
assert isinstance(call, capa.features.address.DynamicCallAddress)
|
||||
|
||||
thread = frz.Address.from_capa(call.thread)
|
||||
process = frz.Address.from_capa(call.thread.process)
|
||||
|
||||
# danger: O(n**3)
|
||||
for p in layout.processes:
|
||||
if p.address == process:
|
||||
for t in p.matched_threads:
|
||||
if t.address == thread:
|
||||
for c in t.matched_calls:
|
||||
if c.address == addr:
|
||||
return c.name
|
||||
raise ValueError("name not found for call", addr)
|
||||
|
||||
|
||||
def render_process(layout: rd.DynamicLayout, addr: frz.Address) -> str:
|
||||
process = addr.to_capa()
|
||||
assert isinstance(process, capa.features.address.ProcessAddress)
|
||||
name = _get_process_name(layout, addr)
|
||||
return f"{name}{{pid:{process.pid}}}"
|
||||
|
||||
|
||||
def render_thread(layout: rd.DynamicLayout, addr: frz.Address) -> str:
|
||||
thread = addr.to_capa()
|
||||
assert isinstance(thread, capa.features.address.ThreadAddress)
|
||||
name = _get_process_name(layout, frz.Address.from_capa(thread.process))
|
||||
return f"{name}{{pid:{thread.process.pid},tid:{thread.tid}}}"
|
||||
|
||||
|
||||
def render_call(layout: rd.DynamicLayout, addr: frz.Address) -> str:
|
||||
call = addr.to_capa()
|
||||
assert isinstance(call, capa.features.address.DynamicCallAddress)
|
||||
|
||||
pname = _get_process_name(layout, frz.Address.from_capa(call.thread.process))
|
||||
cname = _get_call_name(layout, addr)
|
||||
|
||||
fname, _, rest = cname.partition("(")
|
||||
args, _, rest = rest.rpartition(")")
|
||||
|
||||
s = []
|
||||
s.append(f"{fname}(")
|
||||
for arg in args.split(", "):
|
||||
s.append(f" {arg},")
|
||||
s.append(f"){rest}")
|
||||
|
||||
newline = "\n"
|
||||
return (
|
||||
f"{pname}{{pid:{call.thread.process.pid},tid:{call.thread.tid},call:{call.id}}}\n{rutils.mute(newline.join(s))}"
|
||||
)
|
||||
|
||||
|
||||
def render_static_meta(ostream, meta: rd.StaticMetadata):
|
||||
"""
|
||||
like:
|
||||
|
||||
@@ -73,36 +158,90 @@ def render_meta(ostream, doc: rd.ResultDocument):
|
||||
os windows
|
||||
format pe
|
||||
arch amd64
|
||||
analysis static
|
||||
extractor VivisectFeatureExtractor
|
||||
base address 0x10000000
|
||||
rules (embedded rules)
|
||||
function count 42
|
||||
total feature count 1918
|
||||
"""
|
||||
|
||||
rows = [
|
||||
("md5", doc.meta.sample.md5),
|
||||
("sha1", doc.meta.sample.sha1),
|
||||
("sha256", doc.meta.sample.sha256),
|
||||
("path", doc.meta.sample.path),
|
||||
("timestamp", doc.meta.timestamp),
|
||||
("capa version", doc.meta.version),
|
||||
("os", doc.meta.analysis.os),
|
||||
("format", doc.meta.analysis.format),
|
||||
("arch", doc.meta.analysis.arch),
|
||||
("extractor", doc.meta.analysis.extractor),
|
||||
("base address", format_address(doc.meta.analysis.base_address)),
|
||||
("rules", "\n".join(doc.meta.analysis.rules)),
|
||||
("function count", len(doc.meta.analysis.feature_counts.functions)),
|
||||
("library function count", len(doc.meta.analysis.library_functions)),
|
||||
("md5", meta.sample.md5),
|
||||
("sha1", meta.sample.sha1),
|
||||
("sha256", meta.sample.sha256),
|
||||
("path", meta.sample.path),
|
||||
("timestamp", meta.timestamp),
|
||||
("capa version", meta.version),
|
||||
("os", meta.analysis.os),
|
||||
("format", meta.analysis.format),
|
||||
("arch", meta.analysis.arch),
|
||||
("analysis", meta.flavor.value),
|
||||
("extractor", meta.analysis.extractor),
|
||||
("base address", format_address(meta.analysis.base_address)),
|
||||
("rules", "\n".join(meta.analysis.rules)),
|
||||
("function count", len(meta.analysis.feature_counts.functions)),
|
||||
("library function count", len(meta.analysis.library_functions)),
|
||||
(
|
||||
"total feature count",
|
||||
doc.meta.analysis.feature_counts.file + sum(f.count for f in doc.meta.analysis.feature_counts.functions),
|
||||
meta.analysis.feature_counts.file + sum(f.count for f in meta.analysis.feature_counts.functions),
|
||||
),
|
||||
]
|
||||
|
||||
ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
|
||||
|
||||
|
||||
def render_dynamic_meta(ostream, meta: rd.DynamicMetadata):
|
||||
"""
|
||||
like:
|
||||
|
||||
md5 84882c9d43e23d63b82004fae74ebb61
|
||||
sha1 c6fb3b50d946bec6f391aefa4e54478cf8607211
|
||||
sha256 5eced7367ed63354b4ed5c556e2363514293f614c2c2eb187273381b2ef5f0f9
|
||||
path /tmp/packed-report,jspn
|
||||
timestamp 2023-07-17T10:17:05.796933
|
||||
capa version 0.0.0
|
||||
os windows
|
||||
format pe
|
||||
arch amd64
|
||||
extractor CAPEFeatureExtractor
|
||||
rules (embedded rules)
|
||||
process count 42
|
||||
total feature count 1918
|
||||
"""
|
||||
|
||||
rows = [
|
||||
("md5", meta.sample.md5),
|
||||
("sha1", meta.sample.sha1),
|
||||
("sha256", meta.sample.sha256),
|
||||
("path", meta.sample.path),
|
||||
("timestamp", meta.timestamp),
|
||||
("capa version", meta.version),
|
||||
("os", meta.analysis.os),
|
||||
("format", meta.analysis.format),
|
||||
("arch", meta.analysis.arch),
|
||||
("analysis", meta.flavor.value),
|
||||
("extractor", meta.analysis.extractor),
|
||||
("rules", "\n".join(meta.analysis.rules)),
|
||||
("process count", len(meta.analysis.feature_counts.processes)),
|
||||
(
|
||||
"total feature count",
|
||||
meta.analysis.feature_counts.file + sum(p.count for p in meta.analysis.feature_counts.processes),
|
||||
),
|
||||
]
|
||||
|
||||
ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
|
||||
|
||||
|
||||
def render_meta(osstream, doc: rd.ResultDocument):
|
||||
if doc.meta.flavor == rd.Flavor.STATIC:
|
||||
render_static_meta(osstream, cast(rd.StaticMetadata, doc.meta))
|
||||
elif doc.meta.flavor == rd.Flavor.DYNAMIC:
|
||||
render_dynamic_meta(osstream, cast(rd.DynamicMetadata, doc.meta))
|
||||
else:
|
||||
raise ValueError("invalid meta analysis")
|
||||
|
||||
|
||||
def render_rules(ostream, doc: rd.ResultDocument):
|
||||
"""
|
||||
like:
|
||||
@@ -126,22 +265,55 @@ def render_rules(ostream, doc: rd.ResultDocument):
|
||||
had_match = True
|
||||
|
||||
rows = []
|
||||
for key in ("namespace", "description", "scope"):
|
||||
v = getattr(rule.meta, key)
|
||||
if not v:
|
||||
continue
|
||||
|
||||
if isinstance(v, list) and len(v) == 1:
|
||||
v = v[0]
|
||||
ns = rule.meta.namespace
|
||||
if ns:
|
||||
rows.append(("namespace", ns))
|
||||
|
||||
if isinstance(v, enum.Enum):
|
||||
v = v.value
|
||||
desc = rule.meta.description
|
||||
if desc:
|
||||
rows.append(("description", desc))
|
||||
|
||||
rows.append((key, v))
|
||||
if doc.meta.flavor == rd.Flavor.STATIC:
|
||||
scope = rule.meta.scopes.static
|
||||
elif doc.meta.flavor == rd.Flavor.DYNAMIC:
|
||||
scope = rule.meta.scopes.dynamic
|
||||
else:
|
||||
raise ValueError("invalid meta analysis")
|
||||
if scope:
|
||||
rows.append(("scope", scope.value))
|
||||
|
||||
if rule.meta.scope != capa.rules.FILE_SCOPE:
|
||||
if capa.rules.Scope.FILE not in rule.meta.scopes:
|
||||
locations = [m[0] for m in doc.rules[rule.meta.name].matches]
|
||||
rows.append(("matches", "\n".join(map(format_address, locations))))
|
||||
lines = []
|
||||
|
||||
if doc.meta.flavor == rd.Flavor.STATIC:
|
||||
lines = [format_address(loc) for loc in locations]
|
||||
elif doc.meta.flavor == rd.Flavor.DYNAMIC:
|
||||
assert rule.meta.scopes.dynamic is not None
|
||||
assert isinstance(doc.meta.analysis.layout, rd.DynamicLayout)
|
||||
|
||||
if rule.meta.scopes.dynamic == capa.rules.Scope.PROCESS:
|
||||
lines = [render_process(doc.meta.analysis.layout, loc) for loc in locations]
|
||||
elif rule.meta.scopes.dynamic == capa.rules.Scope.THREAD:
|
||||
lines = [render_thread(doc.meta.analysis.layout, loc) for loc in locations]
|
||||
elif rule.meta.scopes.dynamic == capa.rules.Scope.CALL:
|
||||
# because we're only in verbose mode, we won't show the full call details (name, args, retval)
|
||||
# we'll only show the details of the thread in which the calls are found.
|
||||
# so select the thread locations and render those.
|
||||
thread_locations = set()
|
||||
for loc in locations:
|
||||
cloc = loc.to_capa()
|
||||
assert isinstance(cloc, capa.features.address.DynamicCallAddress)
|
||||
thread_locations.add(frz.Address.from_capa(cloc.thread))
|
||||
|
||||
lines = [render_thread(doc.meta.analysis.layout, loc) for loc in thread_locations]
|
||||
else:
|
||||
capa.helpers.assert_never(rule.meta.scopes.dynamic)
|
||||
else:
|
||||
capa.helpers.assert_never(doc.meta.flavor)
|
||||
|
||||
rows.append(("matches", "\n".join(lines)))
|
||||
|
||||
ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
|
||||
ostream.write("\n")
|
||||
|
||||
@@ -5,7 +5,8 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
import textwrap
|
||||
from typing import Dict, Iterable, Optional
|
||||
|
||||
import tabulate
|
||||
@@ -22,8 +23,29 @@ import capa.features.freeze.features as frzf
|
||||
from capa.rules import RuleSet
|
||||
from capa.engine import MatchResults
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def render_locations(ostream, locations: Iterable[frz.Address]):
|
||||
|
||||
def hanging_indent(s: str, indent: int) -> str:
|
||||
"""
|
||||
indent the given string, except the first line,
|
||||
such as if the string finishes an existing line.
|
||||
|
||||
e.g.,
|
||||
|
||||
EXISTINGSTUFFHERE + hanging_indent("xxxx...", 1)
|
||||
|
||||
becomes:
|
||||
|
||||
EXISTINGSTUFFHERExxxxx
|
||||
xxxxxx
|
||||
xxxxxx
|
||||
"""
|
||||
prefix = " " * indent
|
||||
return textwrap.indent(s, prefix=prefix)[len(prefix) :]
|
||||
|
||||
|
||||
def render_locations(ostream, layout: rd.Layout, locations: Iterable[frz.Address], indent: int):
|
||||
import capa.render.verbose as v
|
||||
|
||||
# its possible to have an empty locations array here,
|
||||
@@ -35,9 +57,23 @@ def render_locations(ostream, locations: Iterable[frz.Address]):
|
||||
return
|
||||
|
||||
ostream.write(" @ ")
|
||||
location0 = locations[0]
|
||||
|
||||
if len(locations) == 1:
|
||||
ostream.write(v.format_address(locations[0]))
|
||||
location = locations[0]
|
||||
|
||||
if location.type == frz.AddressType.CALL:
|
||||
assert isinstance(layout, rd.DynamicLayout)
|
||||
ostream.write(hanging_indent(v.render_call(layout, location), indent + 1))
|
||||
else:
|
||||
ostream.write(v.format_address(locations[0]))
|
||||
|
||||
elif location0.type == frz.AddressType.CALL and len(locations) > 1:
|
||||
location = locations[0]
|
||||
|
||||
assert isinstance(layout, rd.DynamicLayout)
|
||||
s = f"{v.render_call(layout, location)}\nand {(len(locations) - 1)} more..."
|
||||
ostream.write(hanging_indent(s, indent + 1))
|
||||
|
||||
elif len(locations) > 4:
|
||||
# don't display too many locations, because it becomes very noisy.
|
||||
@@ -52,7 +88,7 @@ def render_locations(ostream, locations: Iterable[frz.Address]):
|
||||
raise RuntimeError("unreachable")
|
||||
|
||||
|
||||
def render_statement(ostream, match: rd.Match, statement: rd.Statement, indent=0):
|
||||
def render_statement(ostream, layout: rd.Layout, match: rd.Match, statement: rd.Statement, indent: int):
|
||||
ostream.write(" " * indent)
|
||||
|
||||
if isinstance(statement, rd.SubscopeStatement):
|
||||
@@ -114,7 +150,7 @@ def render_statement(ostream, match: rd.Match, statement: rd.Statement, indent=0
|
||||
|
||||
if statement.description:
|
||||
ostream.write(f" = {statement.description}")
|
||||
render_locations(ostream, match.locations)
|
||||
render_locations(ostream, layout, match.locations, indent)
|
||||
ostream.writeln("")
|
||||
|
||||
else:
|
||||
@@ -125,7 +161,9 @@ def render_string_value(s: str) -> str:
|
||||
return f'"{capa.features.common.escape_string(s)}"'
|
||||
|
||||
|
||||
def render_feature(ostream, match: rd.Match, feature: frzf.Feature, indent=0):
|
||||
def render_feature(
|
||||
ostream, layout: rd.Layout, rule: rd.RuleMatches, match: rd.Match, feature: frzf.Feature, indent: int
|
||||
):
|
||||
ostream.write(" " * indent)
|
||||
|
||||
key = feature.type
|
||||
@@ -176,8 +214,17 @@ def render_feature(ostream, match: rd.Match, feature: frzf.Feature, indent=0):
|
||||
ostream.write(capa.rules.DESCRIPTION_SEPARATOR)
|
||||
ostream.write(feature.description)
|
||||
|
||||
if not isinstance(feature, (frzf.OSFeature, frzf.ArchFeature, frzf.FormatFeature)):
|
||||
render_locations(ostream, match.locations)
|
||||
if isinstance(feature, (frzf.OSFeature, frzf.ArchFeature, frzf.FormatFeature)):
|
||||
# don't show the location of these global features
|
||||
pass
|
||||
elif isinstance(layout, rd.DynamicLayout) and rule.meta.scopes.dynamic == capa.rules.Scope.CALL:
|
||||
# if we're in call scope, then the call will have been rendered at the top
|
||||
# of the output, so don't re-render it again for each feature.
|
||||
pass
|
||||
elif isinstance(feature, (frzf.OSFeature, frzf.ArchFeature, frzf.FormatFeature)):
|
||||
pass
|
||||
else:
|
||||
render_locations(ostream, layout, match.locations, indent)
|
||||
ostream.write("\n")
|
||||
else:
|
||||
# like:
|
||||
@@ -193,15 +240,19 @@ def render_feature(ostream, match: rd.Match, feature: frzf.Feature, indent=0):
|
||||
ostream.write(" " * (indent + 1))
|
||||
ostream.write("- ")
|
||||
ostream.write(rutils.bold2(render_string_value(capture)))
|
||||
render_locations(ostream, locations)
|
||||
if isinstance(layout, rd.DynamicLayout) and rule.meta.scopes.dynamic == capa.rules.Scope.CALL:
|
||||
# like above, don't re-render calls when in call scope.
|
||||
pass
|
||||
else:
|
||||
render_locations(ostream, layout, locations, indent=indent)
|
||||
ostream.write("\n")
|
||||
|
||||
|
||||
def render_node(ostream, match: rd.Match, node: rd.Node, indent=0):
|
||||
def render_node(ostream, layout: rd.Layout, rule: rd.RuleMatches, match: rd.Match, node: rd.Node, indent: int):
|
||||
if isinstance(node, rd.StatementNode):
|
||||
render_statement(ostream, match, node.statement, indent=indent)
|
||||
render_statement(ostream, layout, match, node.statement, indent=indent)
|
||||
elif isinstance(node, rd.FeatureNode):
|
||||
render_feature(ostream, match, node.feature, indent=indent)
|
||||
render_feature(ostream, layout, rule, match, node.feature, indent=indent)
|
||||
else:
|
||||
raise RuntimeError("unexpected node type: " + str(node))
|
||||
|
||||
@@ -214,7 +265,7 @@ MODE_SUCCESS = "success"
|
||||
MODE_FAILURE = "failure"
|
||||
|
||||
|
||||
def render_match(ostream, match: rd.Match, indent=0, mode=MODE_SUCCESS):
|
||||
def render_match(ostream, layout: rd.Layout, rule: rd.RuleMatches, match: rd.Match, indent=0, mode=MODE_SUCCESS):
|
||||
child_mode = mode
|
||||
if mode == MODE_SUCCESS:
|
||||
# display only nodes that evaluated successfully.
|
||||
@@ -246,10 +297,10 @@ def render_match(ostream, match: rd.Match, indent=0, mode=MODE_SUCCESS):
|
||||
else:
|
||||
raise RuntimeError("unexpected mode: " + mode)
|
||||
|
||||
render_node(ostream, match, match.node, indent=indent)
|
||||
render_node(ostream, layout, rule, match, match.node, indent=indent)
|
||||
|
||||
for child in match.children:
|
||||
render_match(ostream, child, indent=indent + 1, mode=child_mode)
|
||||
render_match(ostream, layout, rule, child, indent=indent + 1, mode=child_mode)
|
||||
|
||||
|
||||
def render_rules(ostream, doc: rd.ResultDocument):
|
||||
@@ -260,7 +311,8 @@ def render_rules(ostream, doc: rd.ResultDocument):
|
||||
check for OutputDebugString error
|
||||
namespace anti-analysis/anti-debugging/debugger-detection
|
||||
author michael.hunhoff@mandiant.com
|
||||
scope function
|
||||
static scope: function
|
||||
dynamic scope: process
|
||||
mbc Anti-Behavioral Analysis::Detect Debugger::OutputDebugString
|
||||
function @ 0x10004706
|
||||
and:
|
||||
@@ -268,13 +320,20 @@ def render_rules(ostream, doc: rd.ResultDocument):
|
||||
api: kernel32.GetLastError @ 0x10004A87
|
||||
api: kernel32.OutputDebugString @ 0x10004767, 0x10004787, 0x10004816, 0x10004895
|
||||
"""
|
||||
functions_by_bb: Dict[capa.features.address.Address, capa.features.address.Address] = {}
|
||||
for finfo in doc.meta.analysis.layout.functions:
|
||||
faddress = finfo.address.to_capa()
|
||||
import capa.render.verbose as v
|
||||
|
||||
for bb in finfo.matched_basic_blocks:
|
||||
bbaddress = bb.address.to_capa()
|
||||
functions_by_bb[bbaddress] = faddress
|
||||
functions_by_bb: Dict[capa.features.address.Address, capa.features.address.Address] = {}
|
||||
if isinstance(doc.meta.analysis, rd.StaticAnalysis):
|
||||
for finfo in doc.meta.analysis.layout.functions:
|
||||
faddress = finfo.address.to_capa()
|
||||
|
||||
for bb in finfo.matched_basic_blocks:
|
||||
bbaddress = bb.address.to_capa()
|
||||
functions_by_bb[bbaddress] = faddress
|
||||
elif isinstance(doc.meta.analysis, rd.DynamicAnalysis):
|
||||
pass
|
||||
else:
|
||||
raise ValueError("invalid analysis field in the document's meta")
|
||||
|
||||
had_match = False
|
||||
|
||||
@@ -323,7 +382,13 @@ def render_rules(ostream, doc: rd.ResultDocument):
|
||||
|
||||
rows.append(("author", ", ".join(rule.meta.authors)))
|
||||
|
||||
rows.append(("scope", rule.meta.scope.value))
|
||||
if doc.meta.flavor == rd.Flavor.STATIC:
|
||||
assert rule.meta.scopes.static is not None
|
||||
rows.append(("scope", rule.meta.scopes.static.value))
|
||||
|
||||
if doc.meta.flavor == rd.Flavor.DYNAMIC:
|
||||
assert rule.meta.scopes.dynamic is not None
|
||||
rows.append(("scope", rule.meta.scopes.dynamic.value))
|
||||
|
||||
if rule.meta.attack:
|
||||
rows.append(("att&ck", ", ".join([rutils.format_parts_id(v) for v in rule.meta.attack])))
|
||||
@@ -339,7 +404,7 @@ def render_rules(ostream, doc: rd.ResultDocument):
|
||||
|
||||
ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
|
||||
|
||||
if rule.meta.scope == capa.rules.FILE_SCOPE:
|
||||
if capa.rules.Scope.FILE in rule.meta.scopes:
|
||||
matches = doc.rules[rule.meta.name].matches
|
||||
if len(matches) != 1:
|
||||
# i think there should only ever be one match per file-scope rule,
|
||||
@@ -347,22 +412,42 @@ def render_rules(ostream, doc: rd.ResultDocument):
|
||||
# but i'm not 100% sure if this is/will always be true.
|
||||
# so, lets be explicit about our assumptions and raise an exception if they fail.
|
||||
raise RuntimeError(f"unexpected file scope match count: {len(matches)}")
|
||||
first_address, first_match = matches[0]
|
||||
render_match(ostream, first_match, indent=0)
|
||||
_, first_match = matches[0]
|
||||
render_match(ostream, doc.meta.analysis.layout, rule, first_match, indent=0)
|
||||
else:
|
||||
for location, match in sorted(doc.rules[rule.meta.name].matches):
|
||||
ostream.write(rule.meta.scope)
|
||||
ostream.write(" @ ")
|
||||
ostream.write(capa.render.verbose.format_address(location))
|
||||
if doc.meta.flavor == rd.Flavor.STATIC:
|
||||
assert rule.meta.scopes.static is not None
|
||||
ostream.write(rule.meta.scopes.static.value)
|
||||
ostream.write(" @ ")
|
||||
ostream.write(capa.render.verbose.format_address(location))
|
||||
|
||||
if rule.meta.scope == capa.rules.BASIC_BLOCK_SCOPE:
|
||||
ostream.write(
|
||||
" in function "
|
||||
+ capa.render.verbose.format_address(frz.Address.from_capa(functions_by_bb[location.to_capa()]))
|
||||
)
|
||||
if rule.meta.scopes.static == capa.rules.Scope.BASIC_BLOCK:
|
||||
func = frz.Address.from_capa(functions_by_bb[location.to_capa()])
|
||||
ostream.write(f" in function {capa.render.verbose.format_address(func)}")
|
||||
|
||||
elif doc.meta.flavor == rd.Flavor.DYNAMIC:
|
||||
assert rule.meta.scopes.dynamic is not None
|
||||
assert isinstance(doc.meta.analysis.layout, rd.DynamicLayout)
|
||||
|
||||
ostream.write(rule.meta.scopes.dynamic.value)
|
||||
|
||||
ostream.write(" @ ")
|
||||
|
||||
if rule.meta.scopes.dynamic == capa.rules.Scope.PROCESS:
|
||||
ostream.write(v.render_process(doc.meta.analysis.layout, location))
|
||||
elif rule.meta.scopes.dynamic == capa.rules.Scope.THREAD:
|
||||
ostream.write(v.render_thread(doc.meta.analysis.layout, location))
|
||||
elif rule.meta.scopes.dynamic == capa.rules.Scope.CALL:
|
||||
ostream.write(hanging_indent(v.render_call(doc.meta.analysis.layout, location), indent=1))
|
||||
else:
|
||||
capa.helpers.assert_never(rule.meta.scopes.dynamic)
|
||||
|
||||
else:
|
||||
capa.helpers.assert_never(doc.meta.flavor)
|
||||
|
||||
ostream.write("\n")
|
||||
render_match(ostream, match, indent=1)
|
||||
render_match(ostream, doc.meta.analysis.layout, rule, match, indent=1)
|
||||
if rule.meta.lib:
|
||||
# only show first match
|
||||
break
|
||||
|
||||
@@ -8,6 +8,8 @@
|
||||
|
||||
import io
|
||||
import re
|
||||
import gzip
|
||||
import json
|
||||
import uuid
|
||||
import codecs
|
||||
import logging
|
||||
@@ -25,7 +27,8 @@ except ImportError:
|
||||
# https://github.com/python/mypy/issues/1153
|
||||
from backports.functools_lru_cache import lru_cache # type: ignore
|
||||
|
||||
from typing import Any, Set, Dict, List, Tuple, Union, Iterator
|
||||
from typing import Any, Set, Dict, List, Tuple, Union, Iterator, Optional
|
||||
from dataclasses import asdict, dataclass
|
||||
|
||||
import yaml
|
||||
import pydantic
|
||||
@@ -59,7 +62,7 @@ META_KEYS = (
|
||||
"authors",
|
||||
"description",
|
||||
"lib",
|
||||
"scope",
|
||||
"scopes",
|
||||
"att&ck",
|
||||
"mbc",
|
||||
"references",
|
||||
@@ -74,28 +77,113 @@ HIDDEN_META_KEYS = ("capa/nursery", "capa/path")
|
||||
|
||||
class Scope(str, Enum):
|
||||
FILE = "file"
|
||||
PROCESS = "process"
|
||||
THREAD = "thread"
|
||||
CALL = "call"
|
||||
FUNCTION = "function"
|
||||
BASIC_BLOCK = "basic block"
|
||||
INSTRUCTION = "instruction"
|
||||
|
||||
# used only to specify supported features per scope.
|
||||
# not used to validate rules.
|
||||
GLOBAL = "global"
|
||||
|
||||
FILE_SCOPE = Scope.FILE.value
|
||||
FUNCTION_SCOPE = Scope.FUNCTION.value
|
||||
BASIC_BLOCK_SCOPE = Scope.BASIC_BLOCK.value
|
||||
INSTRUCTION_SCOPE = Scope.INSTRUCTION.value
|
||||
# used only to specify supported features per scope.
|
||||
# not used to validate rules.
|
||||
GLOBAL_SCOPE = "global"
|
||||
@classmethod
|
||||
def to_yaml(cls, representer, node):
|
||||
return representer.represent_str(f"{node.value}")
|
||||
|
||||
|
||||
# these literals are used to check if the flavor
|
||||
# of a rule is correct.
|
||||
STATIC_SCOPES = {
|
||||
Scope.FILE,
|
||||
Scope.GLOBAL,
|
||||
Scope.FUNCTION,
|
||||
Scope.BASIC_BLOCK,
|
||||
Scope.INSTRUCTION,
|
||||
}
|
||||
DYNAMIC_SCOPES = {
|
||||
Scope.FILE,
|
||||
Scope.GLOBAL,
|
||||
Scope.PROCESS,
|
||||
Scope.THREAD,
|
||||
Scope.CALL,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class Scopes:
|
||||
# when None, the scope is not supported by a rule
|
||||
static: Optional[Scope] = None
|
||||
# when None, the scope is not supported by a rule
|
||||
dynamic: Optional[Scope] = None
|
||||
|
||||
def __contains__(self, scope: Scope) -> bool:
|
||||
return (scope == self.static) or (scope == self.dynamic)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
if self.static and self.dynamic:
|
||||
return f"static-scope: {self.static}, dynamic-scope: {self.dynamic}"
|
||||
elif self.static:
|
||||
return f"static-scope: {self.static}"
|
||||
elif self.dynamic:
|
||||
return f"dynamic-scope: {self.dynamic}"
|
||||
else:
|
||||
raise ValueError("invalid rules class. at least one scope must be specified")
|
||||
|
||||
@classmethod
|
||||
def from_dict(self, scopes: Dict[str, str]) -> "Scopes":
|
||||
# make local copy so we don't make changes outside of this routine.
|
||||
# we'll use the value None to indicate the scope is not supported.
|
||||
scopes_: Dict[str, Optional[str]] = dict(scopes)
|
||||
|
||||
# mark non-specified scopes as invalid
|
||||
if "static" not in scopes_:
|
||||
raise InvalidRule("static scope must be provided")
|
||||
if "dynamic" not in scopes_:
|
||||
raise InvalidRule("dynamic scope must be provided")
|
||||
|
||||
# check the syntax of the meta `scopes` field
|
||||
if sorted(scopes_) != ["dynamic", "static"]:
|
||||
raise InvalidRule("scope flavors can be either static or dynamic")
|
||||
|
||||
if scopes_["static"] == "unsupported":
|
||||
scopes_["static"] = None
|
||||
if scopes_["dynamic"] == "unsupported":
|
||||
scopes_["dynamic"] = None
|
||||
|
||||
# unspecified is used to indicate a rule is yet to be migrated.
|
||||
# TODO(williballenthin): this scope term should be removed once all rules have been migrated.
|
||||
# https://github.com/mandiant/capa/issues/1747
|
||||
if scopes_["static"] == "unspecified":
|
||||
scopes_["static"] = None
|
||||
if scopes_["dynamic"] == "unspecified":
|
||||
scopes_["dynamic"] = None
|
||||
|
||||
if (not scopes_["static"]) and (not scopes_["dynamic"]):
|
||||
raise InvalidRule("invalid scopes value. At least one scope must be specified")
|
||||
|
||||
# check that all the specified scopes are valid
|
||||
if scopes_["static"] and scopes_["static"] not in STATIC_SCOPES:
|
||||
raise InvalidRule(f"{scopes_['static']} is not a valid static scope")
|
||||
|
||||
if scopes_["dynamic"] and scopes_["dynamic"] not in DYNAMIC_SCOPES:
|
||||
raise InvalidRule(f"{scopes_['dynamic']} is not a valid dynamic scope")
|
||||
|
||||
return Scopes(
|
||||
static=Scope(scopes_["static"]) if scopes_["static"] else None,
|
||||
dynamic=Scope(scopes_["dynamic"]) if scopes_["dynamic"] else None,
|
||||
)
|
||||
|
||||
|
||||
SUPPORTED_FEATURES: Dict[str, Set] = {
|
||||
GLOBAL_SCOPE: {
|
||||
Scope.GLOBAL: {
|
||||
# these will be added to other scopes, see below.
|
||||
capa.features.common.OS,
|
||||
capa.features.common.Arch,
|
||||
capa.features.common.Format,
|
||||
},
|
||||
FILE_SCOPE: {
|
||||
Scope.FILE: {
|
||||
capa.features.common.MatchedRule,
|
||||
capa.features.file.Export,
|
||||
capa.features.file.Import,
|
||||
@@ -108,7 +196,19 @@ SUPPORTED_FEATURES: Dict[str, Set] = {
|
||||
capa.features.common.Characteristic("mixed mode"),
|
||||
capa.features.common.Characteristic("forwarded export"),
|
||||
},
|
||||
FUNCTION_SCOPE: {
|
||||
Scope.PROCESS: {
|
||||
capa.features.common.MatchedRule,
|
||||
},
|
||||
Scope.THREAD: set(),
|
||||
Scope.CALL: {
|
||||
capa.features.common.MatchedRule,
|
||||
capa.features.common.Regex,
|
||||
capa.features.common.String,
|
||||
capa.features.common.Substring,
|
||||
capa.features.insn.API,
|
||||
capa.features.insn.Number,
|
||||
},
|
||||
Scope.FUNCTION: {
|
||||
capa.features.common.MatchedRule,
|
||||
capa.features.basicblock.BasicBlock,
|
||||
capa.features.common.Characteristic("calls from"),
|
||||
@@ -117,13 +217,13 @@ SUPPORTED_FEATURES: Dict[str, Set] = {
|
||||
capa.features.common.Characteristic("recursive call"),
|
||||
# plus basic block scope features, see below
|
||||
},
|
||||
BASIC_BLOCK_SCOPE: {
|
||||
Scope.BASIC_BLOCK: {
|
||||
capa.features.common.MatchedRule,
|
||||
capa.features.common.Characteristic("tight loop"),
|
||||
capa.features.common.Characteristic("stack string"),
|
||||
# plus instruction scope features, see below
|
||||
},
|
||||
INSTRUCTION_SCOPE: {
|
||||
Scope.INSTRUCTION: {
|
||||
capa.features.common.MatchedRule,
|
||||
capa.features.insn.API,
|
||||
capa.features.insn.Property,
|
||||
@@ -148,15 +248,24 @@ SUPPORTED_FEATURES: Dict[str, Set] = {
|
||||
}
|
||||
|
||||
# global scope features are available in all other scopes
|
||||
SUPPORTED_FEATURES[INSTRUCTION_SCOPE].update(SUPPORTED_FEATURES[GLOBAL_SCOPE])
|
||||
SUPPORTED_FEATURES[BASIC_BLOCK_SCOPE].update(SUPPORTED_FEATURES[GLOBAL_SCOPE])
|
||||
SUPPORTED_FEATURES[FUNCTION_SCOPE].update(SUPPORTED_FEATURES[GLOBAL_SCOPE])
|
||||
SUPPORTED_FEATURES[FILE_SCOPE].update(SUPPORTED_FEATURES[GLOBAL_SCOPE])
|
||||
SUPPORTED_FEATURES[Scope.INSTRUCTION].update(SUPPORTED_FEATURES[Scope.GLOBAL])
|
||||
SUPPORTED_FEATURES[Scope.BASIC_BLOCK].update(SUPPORTED_FEATURES[Scope.GLOBAL])
|
||||
SUPPORTED_FEATURES[Scope.FUNCTION].update(SUPPORTED_FEATURES[Scope.GLOBAL])
|
||||
SUPPORTED_FEATURES[Scope.FILE].update(SUPPORTED_FEATURES[Scope.GLOBAL])
|
||||
SUPPORTED_FEATURES[Scope.PROCESS].update(SUPPORTED_FEATURES[Scope.GLOBAL])
|
||||
SUPPORTED_FEATURES[Scope.THREAD].update(SUPPORTED_FEATURES[Scope.GLOBAL])
|
||||
SUPPORTED_FEATURES[Scope.CALL].update(SUPPORTED_FEATURES[Scope.GLOBAL])
|
||||
|
||||
|
||||
# all call scope features are also thread features
|
||||
SUPPORTED_FEATURES[Scope.THREAD].update(SUPPORTED_FEATURES[Scope.CALL])
|
||||
# all thread scope features are also process features
|
||||
SUPPORTED_FEATURES[Scope.PROCESS].update(SUPPORTED_FEATURES[Scope.THREAD])
|
||||
|
||||
# all instruction scope features are also basic block features
|
||||
SUPPORTED_FEATURES[BASIC_BLOCK_SCOPE].update(SUPPORTED_FEATURES[INSTRUCTION_SCOPE])
|
||||
SUPPORTED_FEATURES[Scope.BASIC_BLOCK].update(SUPPORTED_FEATURES[Scope.INSTRUCTION])
|
||||
# all basic block scope features are also function scope features
|
||||
SUPPORTED_FEATURES[FUNCTION_SCOPE].update(SUPPORTED_FEATURES[BASIC_BLOCK_SCOPE])
|
||||
SUPPORTED_FEATURES[Scope.FUNCTION].update(SUPPORTED_FEATURES[Scope.BASIC_BLOCK])
|
||||
|
||||
|
||||
class InvalidRule(ValueError):
|
||||
@@ -194,22 +303,91 @@ class InvalidRuleSet(ValueError):
|
||||
return str(self)
|
||||
|
||||
|
||||
def ensure_feature_valid_for_scope(scope: str, feature: Union[Feature, Statement]):
|
||||
def ensure_feature_valid_for_scopes(scopes: Scopes, feature: Union[Feature, Statement]):
|
||||
# construct a dict of all supported features
|
||||
supported_features: Set = set()
|
||||
if scopes.static:
|
||||
supported_features.update(SUPPORTED_FEATURES[scopes.static])
|
||||
if scopes.dynamic:
|
||||
supported_features.update(SUPPORTED_FEATURES[scopes.dynamic])
|
||||
|
||||
# if the given feature is a characteristic,
|
||||
# check that is a valid characteristic for the given scope.
|
||||
if (
|
||||
isinstance(feature, capa.features.common.Characteristic)
|
||||
and isinstance(feature.value, str)
|
||||
and capa.features.common.Characteristic(feature.value) not in SUPPORTED_FEATURES[scope]
|
||||
and capa.features.common.Characteristic(feature.value) not in supported_features
|
||||
):
|
||||
raise InvalidRule(f"feature {feature} not supported for scope {scope}")
|
||||
raise InvalidRule(f"feature {feature} not supported for scopes {scopes}")
|
||||
|
||||
if not isinstance(feature, capa.features.common.Characteristic):
|
||||
# features of this scope that are not Characteristics will be Type instances.
|
||||
# check that the given feature is one of these types.
|
||||
types_for_scope = filter(lambda t: isinstance(t, type), SUPPORTED_FEATURES[scope])
|
||||
if not isinstance(feature, tuple(types_for_scope)): # type: ignore
|
||||
raise InvalidRule(f"feature {feature} not supported for scope {scope}")
|
||||
types_for_scope = filter(lambda t: isinstance(t, type), supported_features)
|
||||
if not isinstance(feature, tuple(types_for_scope)):
|
||||
raise InvalidRule(f"feature {feature} not supported for scopes {scopes}")
|
||||
|
||||
|
||||
class ComType(Enum):
|
||||
CLASS = "class"
|
||||
INTERFACE = "interface"
|
||||
|
||||
|
||||
# COM data source https://github.com/stevemk14ebr/COM-Code-Helper/tree/master
|
||||
VALID_COM_TYPES = {
|
||||
ComType.CLASS: {"db_path": "assets/classes.json.gz", "prefix": "CLSID_"},
|
||||
ComType.INTERFACE: {"db_path": "assets/interfaces.json.gz", "prefix": "IID_"},
|
||||
}
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def load_com_database(com_type: ComType) -> Dict[str, List[str]]:
|
||||
com_db_path: Path = capa.main.get_default_root() / VALID_COM_TYPES[com_type]["db_path"]
|
||||
|
||||
if not com_db_path.exists():
|
||||
raise IOError(f"COM database path '{com_db_path}' does not exist or cannot be accessed")
|
||||
|
||||
try:
|
||||
with gzip.open(com_db_path, "rb") as gzfile:
|
||||
return json.loads(gzfile.read().decode("utf-8"))
|
||||
except Exception as e:
|
||||
raise IOError(f"Error loading COM database from '{com_db_path}'") from e
|
||||
|
||||
|
||||
def translate_com_feature(com_name: str, com_type: ComType) -> ceng.Or:
|
||||
com_db = load_com_database(com_type)
|
||||
guid_strings: Optional[List[str]] = com_db.get(com_name)
|
||||
if guid_strings is None or len(guid_strings) == 0:
|
||||
logger.error(" %s doesn't exist in COM %s database", com_name, com_type)
|
||||
raise InvalidRule(f"'{com_name}' doesn't exist in COM {com_type} database")
|
||||
|
||||
com_features: List = []
|
||||
for guid_string in guid_strings:
|
||||
hex_chars = guid_string.replace("-", "")
|
||||
h = [hex_chars[i : i + 2] for i in range(0, len(hex_chars), 2)]
|
||||
reordered_hex_pairs = [
|
||||
h[3],
|
||||
h[2],
|
||||
h[1],
|
||||
h[0],
|
||||
h[5],
|
||||
h[4],
|
||||
h[7],
|
||||
h[6],
|
||||
h[8],
|
||||
h[9],
|
||||
h[10],
|
||||
h[11],
|
||||
h[12],
|
||||
h[13],
|
||||
h[14],
|
||||
h[15],
|
||||
]
|
||||
guid_bytes = bytes.fromhex("".join(reordered_hex_pairs))
|
||||
prefix = VALID_COM_TYPES[com_type]["prefix"]
|
||||
com_features.append(capa.features.common.StringFactory(guid_string, f"{prefix+com_name} as GUID string"))
|
||||
com_features.append(capa.features.common.Bytes(guid_bytes, f"{prefix+com_name} as bytes"))
|
||||
return ceng.Or(com_features)
|
||||
|
||||
|
||||
def parse_int(s: str) -> int:
|
||||
@@ -417,53 +595,101 @@ def pop_statement_description_entry(d):
|
||||
return description["description"]
|
||||
|
||||
|
||||
def build_statements(d, scope: str):
|
||||
def trim_dll_part(api: str) -> str:
|
||||
# ordinal imports, like ws2_32.#1, keep dll
|
||||
if ".#" in api:
|
||||
return api
|
||||
|
||||
# kernel32.CreateFileA
|
||||
if api.count(".") == 1:
|
||||
api = api.split(".")[1]
|
||||
return api
|
||||
|
||||
|
||||
def build_statements(d, scopes: Scopes):
|
||||
if len(d.keys()) > 2:
|
||||
raise InvalidRule("too many statements")
|
||||
|
||||
key = list(d.keys())[0]
|
||||
description = pop_statement_description_entry(d[key])
|
||||
if key == "and":
|
||||
return ceng.And([build_statements(dd, scope) for dd in d[key]], description=description)
|
||||
return ceng.And([build_statements(dd, scopes) for dd in d[key]], description=description)
|
||||
elif key == "or":
|
||||
return ceng.Or([build_statements(dd, scope) for dd in d[key]], description=description)
|
||||
return ceng.Or([build_statements(dd, scopes) for dd in d[key]], description=description)
|
||||
elif key == "not":
|
||||
if len(d[key]) != 1:
|
||||
raise InvalidRule("not statement must have exactly one child statement")
|
||||
return ceng.Not(build_statements(d[key][0], scope), description=description)
|
||||
return ceng.Not(build_statements(d[key][0], scopes), description=description)
|
||||
elif key.endswith(" or more"):
|
||||
count = int(key[: -len("or more")])
|
||||
return ceng.Some(count, [build_statements(dd, scope) for dd in d[key]], description=description)
|
||||
return ceng.Some(count, [build_statements(dd, scopes) for dd in d[key]], description=description)
|
||||
elif key == "optional":
|
||||
# `optional` is an alias for `0 or more`
|
||||
# which is useful for documenting behaviors,
|
||||
# like with `write file`, we might say that `WriteFile` is optionally found alongside `CreateFileA`.
|
||||
return ceng.Some(0, [build_statements(dd, scope) for dd in d[key]], description=description)
|
||||
return ceng.Some(0, [build_statements(dd, scopes) for dd in d[key]], description=description)
|
||||
|
||||
elif key == "process":
|
||||
if Scope.FILE not in scopes:
|
||||
raise InvalidRule("process subscope supported only for file scope")
|
||||
|
||||
if len(d[key]) != 1:
|
||||
raise InvalidRule("subscope must have exactly one child statement")
|
||||
|
||||
return ceng.Subscope(
|
||||
Scope.PROCESS, build_statements(d[key][0], Scopes(dynamic=Scope.PROCESS)), description=description
|
||||
)
|
||||
|
||||
elif key == "thread":
|
||||
if all(s not in scopes for s in (Scope.FILE, Scope.PROCESS)):
|
||||
raise InvalidRule("thread subscope supported only for the process scope")
|
||||
|
||||
if len(d[key]) != 1:
|
||||
raise InvalidRule("subscope must have exactly one child statement")
|
||||
|
||||
return ceng.Subscope(
|
||||
Scope.THREAD, build_statements(d[key][0], Scopes(dynamic=Scope.THREAD)), description=description
|
||||
)
|
||||
|
||||
elif key == "call":
|
||||
if all(s not in scopes for s in (Scope.FILE, Scope.PROCESS, Scope.THREAD)):
|
||||
raise InvalidRule("call subscope supported only for the process and thread scopes")
|
||||
|
||||
if len(d[key]) != 1:
|
||||
raise InvalidRule("subscope must have exactly one child statement")
|
||||
|
||||
return ceng.Subscope(
|
||||
Scope.CALL, build_statements(d[key][0], Scopes(dynamic=Scope.CALL)), description=description
|
||||
)
|
||||
|
||||
elif key == "function":
|
||||
if scope != FILE_SCOPE:
|
||||
if Scope.FILE not in scopes:
|
||||
raise InvalidRule("function subscope supported only for file scope")
|
||||
|
||||
if len(d[key]) != 1:
|
||||
raise InvalidRule("subscope must have exactly one child statement")
|
||||
|
||||
return ceng.Subscope(FUNCTION_SCOPE, build_statements(d[key][0], FUNCTION_SCOPE), description=description)
|
||||
return ceng.Subscope(
|
||||
Scope.FUNCTION, build_statements(d[key][0], Scopes(static=Scope.FUNCTION)), description=description
|
||||
)
|
||||
|
||||
elif key == "basic block":
|
||||
if scope != FUNCTION_SCOPE:
|
||||
if Scope.FUNCTION not in scopes:
|
||||
raise InvalidRule("basic block subscope supported only for function scope")
|
||||
|
||||
if len(d[key]) != 1:
|
||||
raise InvalidRule("subscope must have exactly one child statement")
|
||||
|
||||
return ceng.Subscope(BASIC_BLOCK_SCOPE, build_statements(d[key][0], BASIC_BLOCK_SCOPE), description=description)
|
||||
return ceng.Subscope(
|
||||
Scope.BASIC_BLOCK, build_statements(d[key][0], Scopes(static=Scope.BASIC_BLOCK)), description=description
|
||||
)
|
||||
|
||||
elif key == "instruction":
|
||||
if scope not in (FUNCTION_SCOPE, BASIC_BLOCK_SCOPE):
|
||||
if all(s not in scopes for s in (Scope.FUNCTION, Scope.BASIC_BLOCK)):
|
||||
raise InvalidRule("instruction subscope supported only for function and basic block scope")
|
||||
|
||||
if len(d[key]) == 1:
|
||||
statements = build_statements(d[key][0], INSTRUCTION_SCOPE)
|
||||
statements = build_statements(d[key][0], Scopes(static=Scope.INSTRUCTION))
|
||||
else:
|
||||
# for instruction subscopes, we support a shorthand in which the top level AND is implied.
|
||||
# the following are equivalent:
|
||||
@@ -477,9 +703,9 @@ def build_statements(d, scope: str):
|
||||
# - arch: i386
|
||||
# - mnemonic: cmp
|
||||
#
|
||||
statements = ceng.And([build_statements(dd, INSTRUCTION_SCOPE) for dd in d[key]])
|
||||
statements = ceng.And([build_statements(dd, Scopes(static=Scope.INSTRUCTION)) for dd in d[key]])
|
||||
|
||||
return ceng.Subscope(INSTRUCTION_SCOPE, statements, description=description)
|
||||
return ceng.Subscope(Scope.INSTRUCTION, statements, description=description)
|
||||
|
||||
elif key.startswith("count(") and key.endswith(")"):
|
||||
# e.g.:
|
||||
@@ -507,6 +733,10 @@ def build_statements(d, scope: str):
|
||||
# count(number(0x100 = description))
|
||||
if term != "string":
|
||||
value, description = parse_description(arg, term)
|
||||
|
||||
if term == "api":
|
||||
value = trim_dll_part(value)
|
||||
|
||||
feature = Feature(value, description=description)
|
||||
else:
|
||||
# arg is string (which doesn't support inline descriptions), like:
|
||||
@@ -518,7 +748,7 @@ def build_statements(d, scope: str):
|
||||
feature = Feature(arg)
|
||||
else:
|
||||
feature = Feature()
|
||||
ensure_feature_valid_for_scope(scope, feature)
|
||||
ensure_feature_valid_for_scopes(scopes, feature)
|
||||
|
||||
count = d[key]
|
||||
if isinstance(count, int):
|
||||
@@ -552,7 +782,7 @@ def build_statements(d, scope: str):
|
||||
feature = capa.features.insn.OperandNumber(index, value, description=description)
|
||||
except ValueError as e:
|
||||
raise InvalidRule(str(e)) from e
|
||||
ensure_feature_valid_for_scope(scope, feature)
|
||||
ensure_feature_valid_for_scopes(scopes, feature)
|
||||
return feature
|
||||
|
||||
elif key.startswith("operand[") and key.endswith("].offset"):
|
||||
@@ -568,7 +798,7 @@ def build_statements(d, scope: str):
|
||||
feature = capa.features.insn.OperandOffset(index, value, description=description)
|
||||
except ValueError as e:
|
||||
raise InvalidRule(str(e)) from e
|
||||
ensure_feature_valid_for_scope(scope, feature)
|
||||
ensure_feature_valid_for_scopes(scopes, feature)
|
||||
return feature
|
||||
|
||||
elif (
|
||||
@@ -588,17 +818,28 @@ def build_statements(d, scope: str):
|
||||
feature = capa.features.insn.Property(value, access=access, description=description)
|
||||
except ValueError as e:
|
||||
raise InvalidRule(str(e)) from e
|
||||
ensure_feature_valid_for_scope(scope, feature)
|
||||
ensure_feature_valid_for_scopes(scopes, feature)
|
||||
return feature
|
||||
|
||||
elif key.startswith("com/"):
|
||||
com_type = str(key[len("com/") :]).upper()
|
||||
if com_type not in [item.name for item in ComType]:
|
||||
raise InvalidRule(f"unexpected COM type: {com_type}")
|
||||
value, description = parse_description(d[key], key, d.get("description"))
|
||||
return translate_com_feature(value, ComType[com_type])
|
||||
|
||||
else:
|
||||
Feature = parse_feature(key)
|
||||
value, description = parse_description(d[key], key, d.get("description"))
|
||||
|
||||
if key == "api":
|
||||
value = trim_dll_part(value)
|
||||
|
||||
try:
|
||||
feature = Feature(value, description=description)
|
||||
except ValueError as e:
|
||||
raise InvalidRule(str(e)) from e
|
||||
ensure_feature_valid_for_scope(scope, feature)
|
||||
ensure_feature_valid_for_scopes(scopes, feature)
|
||||
return feature
|
||||
|
||||
|
||||
@@ -611,10 +852,10 @@ def second(s: List[Any]) -> Any:
|
||||
|
||||
|
||||
class Rule:
|
||||
def __init__(self, name: str, scope: str, statement: Statement, meta, definition=""):
|
||||
def __init__(self, name: str, scopes: Scopes, statement: Statement, meta, definition=""):
|
||||
super().__init__()
|
||||
self.name = name
|
||||
self.scope = scope
|
||||
self.scopes = scopes
|
||||
self.statement = statement
|
||||
self.meta = meta
|
||||
self.definition = definition
|
||||
@@ -623,7 +864,7 @@ class Rule:
|
||||
return f"Rule(name={self.name})"
|
||||
|
||||
def __repr__(self):
|
||||
return f"Rule(scope={self.scope}, name={self.name})"
|
||||
return f"Rule(scope={self.scopes}, name={self.name})"
|
||||
|
||||
def get_dependencies(self, namespaces):
|
||||
"""
|
||||
@@ -681,13 +922,19 @@ class Rule:
|
||||
# the name is a randomly generated, hopefully unique value.
|
||||
# ideally, this won't every be rendered to a user.
|
||||
name = self.name + "/" + uuid.uuid4().hex
|
||||
if subscope.scope in STATIC_SCOPES:
|
||||
scopes = Scopes(static=subscope.scope)
|
||||
elif subscope.scope in DYNAMIC_SCOPES:
|
||||
scopes = Scopes(dynamic=subscope.scope)
|
||||
else:
|
||||
raise InvalidRule(f"scope {subscope.scope} is not a valid subscope")
|
||||
new_rule = Rule(
|
||||
name,
|
||||
subscope.scope,
|
||||
scopes,
|
||||
subscope.child,
|
||||
{
|
||||
"name": name,
|
||||
"scope": subscope.scope,
|
||||
"scopes": asdict(scopes),
|
||||
# these derived rules are never meant to be inspected separately,
|
||||
# they are dependencies for the parent rule,
|
||||
# so mark it as such.
|
||||
@@ -712,6 +959,9 @@ class Rule:
|
||||
for child in statement.get_children():
|
||||
yield from self._extract_subscope_rules_rec(child)
|
||||
|
||||
def is_file_limitation_rule(self) -> bool:
|
||||
return self.meta.get("namespace", "") == "internal/limitation/file"
|
||||
|
||||
def is_subscope_rule(self):
|
||||
return bool(self.meta.get("capa/subscope-rule", False))
|
||||
|
||||
@@ -774,9 +1024,21 @@ class Rule:
|
||||
def from_dict(cls, d: Dict[str, Any], definition: str) -> "Rule":
|
||||
meta = d["rule"]["meta"]
|
||||
name = meta["name"]
|
||||
|
||||
# if scope is not specified, default to function scope.
|
||||
# this is probably the mode that rule authors will start with.
|
||||
scope = meta.get("scope", FUNCTION_SCOPE)
|
||||
# each rule has two scopes, a static-flavor scope, and a
|
||||
# dynamic-flavor one. which one is used depends on the analysis type.
|
||||
if "scope" in meta:
|
||||
raise InvalidRule(f"legacy rule detected (rule.meta.scope), please update to the new syntax: {name}")
|
||||
elif "scopes" in meta:
|
||||
scopes_ = meta.get("scopes")
|
||||
else:
|
||||
raise InvalidRule("please specify at least one of this rule's (static/dynamic) scopes")
|
||||
if not isinstance(scopes_, dict):
|
||||
raise InvalidRule("the scopes field must contain a dictionary specifying the scopes")
|
||||
|
||||
scopes: Scopes = Scopes.from_dict(scopes_)
|
||||
statements = d["rule"]["features"]
|
||||
|
||||
# the rule must start with a single logic node.
|
||||
@@ -787,16 +1049,13 @@ class Rule:
|
||||
if isinstance(statements[0], ceng.Subscope):
|
||||
raise InvalidRule("top level statement may not be a subscope")
|
||||
|
||||
if scope not in SUPPORTED_FEATURES.keys():
|
||||
raise InvalidRule("{:s} is not a supported scope".format(scope))
|
||||
|
||||
meta = d["rule"]["meta"]
|
||||
if not isinstance(meta.get("att&ck", []), list):
|
||||
raise InvalidRule("ATT&CK mapping must be a list")
|
||||
if not isinstance(meta.get("mbc", []), list):
|
||||
raise InvalidRule("MBC mapping must be a list")
|
||||
|
||||
return cls(name, scope, build_statements(statements[0], scope), meta, definition)
|
||||
return cls(name, scopes, build_statements(statements[0], scopes), meta, definition)
|
||||
|
||||
@staticmethod
|
||||
@lru_cache()
|
||||
@@ -824,7 +1083,7 @@ class Rule:
|
||||
|
||||
# leave quotes unchanged.
|
||||
# manually verified this property exists, even if mypy complains.
|
||||
y.preserve_quotes = True # type: ignore
|
||||
y.preserve_quotes = True
|
||||
|
||||
# indent lists by two spaces below their parent
|
||||
#
|
||||
@@ -836,7 +1095,7 @@ class Rule:
|
||||
|
||||
# avoid word wrapping
|
||||
# manually verified this property exists, even if mypy complains.
|
||||
y.width = 4096 # type: ignore
|
||||
y.width = 4096
|
||||
|
||||
return y
|
||||
|
||||
@@ -895,10 +1154,8 @@ class Rule:
|
||||
del meta[k]
|
||||
for k, v in self.meta.items():
|
||||
meta[k] = v
|
||||
|
||||
# the name and scope of the rule instance overrides anything in meta.
|
||||
meta["name"] = self.name
|
||||
meta["scope"] = self.scope
|
||||
|
||||
def move_to_end(m, k):
|
||||
# ruamel.yaml uses an ordereddict-like structure to track maps (CommentedMap).
|
||||
@@ -919,7 +1176,6 @@ class Rule:
|
||||
if key in META_KEYS:
|
||||
continue
|
||||
move_to_end(meta, key)
|
||||
|
||||
# save off the existing hidden meta values,
|
||||
# emit the document,
|
||||
# and re-add the hidden meta.
|
||||
@@ -974,12 +1230,11 @@ class Rule:
|
||||
return doc
|
||||
|
||||
|
||||
def get_rules_with_scope(rules, scope) -> List[Rule]:
|
||||
def get_rules_with_scope(rules, scope: Scope) -> List[Rule]:
|
||||
"""
|
||||
from the given collection of rules, select those with the given scope.
|
||||
`scope` is one of the capa.rules.*_SCOPE constants.
|
||||
"""
|
||||
return [rule for rule in rules if rule.scope == scope]
|
||||
return [rule for rule in rules if scope in rule.scopes]
|
||||
|
||||
|
||||
def get_rules_and_dependencies(rules: List[Rule], rule_name: str) -> Iterator[Rule]:
|
||||
@@ -1104,7 +1359,10 @@ class RuleSet:
|
||||
capa.engine.match(ruleset.file_rules, ...)
|
||||
"""
|
||||
|
||||
def __init__(self, rules: List[Rule]):
|
||||
def __init__(
|
||||
self,
|
||||
rules: List[Rule],
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
ensure_rules_are_unique(rules)
|
||||
@@ -1126,15 +1384,23 @@ class RuleSet:
|
||||
|
||||
rules = capa.optimizer.optimize_rules(rules)
|
||||
|
||||
self.file_rules = self._get_rules_for_scope(rules, FILE_SCOPE)
|
||||
self.function_rules = self._get_rules_for_scope(rules, FUNCTION_SCOPE)
|
||||
self.basic_block_rules = self._get_rules_for_scope(rules, BASIC_BLOCK_SCOPE)
|
||||
self.instruction_rules = self._get_rules_for_scope(rules, INSTRUCTION_SCOPE)
|
||||
self.file_rules = self._get_rules_for_scope(rules, Scope.FILE)
|
||||
self.process_rules = self._get_rules_for_scope(rules, Scope.PROCESS)
|
||||
self.thread_rules = self._get_rules_for_scope(rules, Scope.THREAD)
|
||||
self.call_rules = self._get_rules_for_scope(rules, Scope.CALL)
|
||||
self.function_rules = self._get_rules_for_scope(rules, Scope.FUNCTION)
|
||||
self.basic_block_rules = self._get_rules_for_scope(rules, Scope.BASIC_BLOCK)
|
||||
self.instruction_rules = self._get_rules_for_scope(rules, Scope.INSTRUCTION)
|
||||
self.rules = {rule.name: rule for rule in rules}
|
||||
self.rules_by_namespace = index_rules_by_namespace(rules)
|
||||
|
||||
# unstable
|
||||
(self._easy_file_rules_by_feature, self._hard_file_rules) = self._index_rules_by_feature(self.file_rules)
|
||||
(self._easy_process_rules_by_feature, self._hard_process_rules) = self._index_rules_by_feature(
|
||||
self.process_rules
|
||||
)
|
||||
(self._easy_thread_rules_by_feature, self._hard_thread_rules) = self._index_rules_by_feature(self.thread_rules)
|
||||
(self._easy_call_rules_by_feature, self._hard_call_rules) = self._index_rules_by_feature(self.call_rules)
|
||||
(self._easy_function_rules_by_feature, self._hard_function_rules) = self._index_rules_by_feature(
|
||||
self.function_rules
|
||||
)
|
||||
@@ -1380,16 +1646,25 @@ class RuleSet:
|
||||
except that it may be more performant.
|
||||
"""
|
||||
easy_rules_by_feature = {}
|
||||
if scope is Scope.FILE:
|
||||
if scope == Scope.FILE:
|
||||
easy_rules_by_feature = self._easy_file_rules_by_feature
|
||||
hard_rule_names = self._hard_file_rules
|
||||
elif scope is Scope.FUNCTION:
|
||||
elif scope == Scope.PROCESS:
|
||||
easy_rules_by_feature = self._easy_process_rules_by_feature
|
||||
hard_rule_names = self._hard_process_rules
|
||||
elif scope == Scope.THREAD:
|
||||
easy_rules_by_feature = self._easy_thread_rules_by_feature
|
||||
hard_rule_names = self._hard_thread_rules
|
||||
elif scope == Scope.CALL:
|
||||
easy_rules_by_feature = self._easy_call_rules_by_feature
|
||||
hard_rule_names = self._hard_call_rules
|
||||
elif scope == Scope.FUNCTION:
|
||||
easy_rules_by_feature = self._easy_function_rules_by_feature
|
||||
hard_rule_names = self._hard_function_rules
|
||||
elif scope is Scope.BASIC_BLOCK:
|
||||
elif scope == Scope.BASIC_BLOCK:
|
||||
easy_rules_by_feature = self._easy_basic_block_rules_by_feature
|
||||
hard_rule_names = self._hard_basic_block_rules
|
||||
elif scope is Scope.INSTRUCTION:
|
||||
elif scope == Scope.INSTRUCTION:
|
||||
easy_rules_by_feature = self._easy_instruction_rules_by_feature
|
||||
hard_rule_names = self._hard_instruction_rules
|
||||
else:
|
||||
|
||||
@@ -105,27 +105,28 @@ To install these development dependencies, run:
|
||||
|
||||
We use [pre-commit](https://pre-commit.com/) so that its trivial to run the same linters & configuration locally as in CI.
|
||||
|
||||
Run all linters liks:
|
||||
Run all linters like:
|
||||
|
||||
❯ pre-commit run --all-files
|
||||
❯ pre-commit run --hook-stage=manual --all-files
|
||||
isort....................................................................Passed
|
||||
black....................................................................Passed
|
||||
ruff.....................................................................Passed
|
||||
flake8...................................................................Passed
|
||||
mypy.....................................................................Passed
|
||||
pytest (fast)............................................................Passed
|
||||
|
||||
Or run a single linter like:
|
||||
|
||||
❯ pre-commit run --all-files isort
|
||||
❯ pre-commit run --all-files --hook-stage=manual isort
|
||||
isort....................................................................Passed
|
||||
|
||||
|
||||
Importantly, you can configure pre-commit to run automatically before every commit by running:
|
||||
|
||||
❯ pre-commit install --hook-type pre-commit
|
||||
❯ pre-commit install --hook-type=pre-commit
|
||||
pre-commit installed at .git/hooks/pre-commit
|
||||
|
||||
❯ pre-commit install --hook-type pre-push
|
||||
❯ pre-commit install --hook-type=pre-push
|
||||
pre-commit installed at .git/hooks/pre-push
|
||||
|
||||
This way you can ensure that you don't commit code style or formatting offenses.
|
||||
|
||||
@@ -37,19 +37,20 @@ dependencies = [
|
||||
"tabulate==0.9.0",
|
||||
"colorama==0.4.6",
|
||||
"termcolor==2.3.0",
|
||||
"wcwidth==0.2.6",
|
||||
"wcwidth==0.2.12",
|
||||
"ida-settings==2.1.0",
|
||||
"viv-utils[flirt]==0.7.9",
|
||||
"halo==0.0.31",
|
||||
"networkx==3.1",
|
||||
"ruamel.yaml==0.17.32",
|
||||
"ruamel.yaml==0.18.5",
|
||||
"vivisect==1.1.1",
|
||||
"pefile==2023.2.7",
|
||||
"pyelftools==0.30",
|
||||
"dnfile==0.13.0",
|
||||
"dnfile==0.14.1",
|
||||
"dncil==1.0.2",
|
||||
"pydantic==2.1.1",
|
||||
"pydantic==2.4.0",
|
||||
"protobuf==4.23.4",
|
||||
"dexparser==1.2.0",
|
||||
]
|
||||
dynamic = ["version"]
|
||||
|
||||
@@ -61,26 +62,26 @@ packages = ["capa"]
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = [
|
||||
"pre-commit==3.4.0",
|
||||
"pytest==7.4.2",
|
||||
"pre-commit==3.5.0",
|
||||
"pytest==7.4.3",
|
||||
"pytest-sugar==0.9.7",
|
||||
"pytest-instafail==0.5.0",
|
||||
"pytest-cov==4.1.0",
|
||||
"flake8==6.1.0",
|
||||
"flake8-bugbear==23.7.10",
|
||||
"flake8-encodings==0.5.0.post1",
|
||||
"flake8-bugbear==23.11.26",
|
||||
"flake8-encodings==0.5.1",
|
||||
"flake8-comprehensions==3.14.0",
|
||||
"flake8-logging-format==0.9.0",
|
||||
"flake8-no-implicit-concat==0.3.4",
|
||||
"flake8-no-implicit-concat==0.3.5",
|
||||
"flake8-print==5.0.0",
|
||||
"flake8-todos==0.3.0",
|
||||
"flake8-simplify==0.20.0",
|
||||
"flake8-simplify==0.21.0",
|
||||
"flake8-use-pathlib==0.3.0",
|
||||
"flake8-copyright==0.2.4",
|
||||
"ruff==0.0.291",
|
||||
"black==23.7.0",
|
||||
"ruff==0.1.6",
|
||||
"black==23.11.0",
|
||||
"isort==5.11.4",
|
||||
"mypy==1.5.1",
|
||||
"mypy==1.7.1",
|
||||
"psutil==5.9.2",
|
||||
"stix2==3.0.1",
|
||||
"requests==2.31.0",
|
||||
@@ -92,12 +93,12 @@ dev = [
|
||||
"types-tabulate==0.9.0.3",
|
||||
"types-termcolor==1.1.4",
|
||||
"types-psutil==5.8.23",
|
||||
"types_requests==2.31.0.2",
|
||||
"types_requests==2.31.0.10",
|
||||
"types-protobuf==4.23.0.3",
|
||||
]
|
||||
build = [
|
||||
"pyinstaller==5.10.1",
|
||||
"setuptools==68.0.0",
|
||||
"pyinstaller==6.2.0",
|
||||
"setuptools==69.0.2",
|
||||
"build==1.0.3"
|
||||
]
|
||||
|
||||
|
||||
2
rules
2
rules
Submodule rules updated: 2d615e2386...57b3911a72
@@ -75,6 +75,7 @@ import capa
|
||||
import capa.main
|
||||
import capa.rules
|
||||
import capa.render.json
|
||||
import capa.capabilities.common
|
||||
import capa.render.result_document as rd
|
||||
from capa.features.common import OS_AUTO
|
||||
|
||||
@@ -112,7 +113,7 @@ def get_capa_results(args):
|
||||
extractor = capa.main.get_extractor(
|
||||
path, format, os_, capa.main.BACKEND_VIV, sigpaths, should_save_workspace, disable_progress=True
|
||||
)
|
||||
except capa.main.UnsupportedFormatError:
|
||||
except capa.exceptions.UnsupportedFormatError:
|
||||
# i'm 100% sure if multiprocessing will reliably raise exceptions across process boundaries.
|
||||
# so instead, return an object with explicit success/failure status.
|
||||
#
|
||||
@@ -123,7 +124,7 @@ def get_capa_results(args):
|
||||
"status": "error",
|
||||
"error": f"input file does not appear to be a PE file: {path}",
|
||||
}
|
||||
except capa.main.UnsupportedRuntimeError:
|
||||
except capa.exceptions.UnsupportedRuntimeError:
|
||||
return {
|
||||
"path": path,
|
||||
"status": "error",
|
||||
@@ -136,11 +137,9 @@ def get_capa_results(args):
|
||||
"error": f"unexpected error: {e}",
|
||||
}
|
||||
|
||||
meta = capa.main.collect_metadata([], path, format, os_, [], extractor)
|
||||
capabilities, counts = capa.main.find_capabilities(rules, extractor, disable_progress=True)
|
||||
capabilities, counts = capa.capabilities.common.find_capabilities(rules, extractor, disable_progress=True)
|
||||
|
||||
meta.analysis.feature_counts = counts["feature_counts"]
|
||||
meta.analysis.library_functions = counts["library_functions"]
|
||||
meta = capa.main.collect_metadata([], path, format, os_, [], extractor, counts)
|
||||
meta.analysis.layout = capa.main.compute_layout(rules, extractor, capabilities)
|
||||
|
||||
doc = rd.ResultDocument.from_capa(meta, rules, capabilities)
|
||||
|
||||
@@ -566,7 +566,7 @@ def convert_rules(rules, namespaces, cround, make_priv):
|
||||
logger.info("skipping already converted rule capa: %s - yara rule: %s", rule.name, rule_name)
|
||||
continue
|
||||
|
||||
logger.info("-------------------------- DOING RULE CAPA: %s - yara rule: ", rule.name, rule_name)
|
||||
logger.info("-------------------------- DOING RULE CAPA: %s - yara rule: %s", rule.name, rule_name)
|
||||
if "capa/path" in rule.meta:
|
||||
url = get_rule_url(rule.meta["capa/path"])
|
||||
else:
|
||||
@@ -603,7 +603,12 @@ def convert_rules(rules, namespaces, cround, make_priv):
|
||||
meta_name = meta
|
||||
# e.g. 'examples:' can be a list
|
||||
seen_hashes = []
|
||||
if isinstance(metas[meta], list):
|
||||
if isinstance(metas[meta], dict):
|
||||
if meta_name == "scopes":
|
||||
yara_meta += "\t" + "static scope" + ' = "' + metas[meta]["static"] + '"\n'
|
||||
yara_meta += "\t" + "dynamic scope" + ' = "' + metas[meta]["dynamic"] + '"\n'
|
||||
|
||||
elif isinstance(metas[meta], list):
|
||||
if meta_name == "examples":
|
||||
meta_name = "hash"
|
||||
if meta_name == "att&ck":
|
||||
|
||||
@@ -19,6 +19,7 @@ import capa.features
|
||||
import capa.render.json
|
||||
import capa.render.utils as rutils
|
||||
import capa.render.default
|
||||
import capa.capabilities.common
|
||||
import capa.render.result_document as rd
|
||||
import capa.features.freeze.features as frzf
|
||||
from capa.features.common import OS_AUTO, FORMAT_AUTO
|
||||
@@ -175,13 +176,10 @@ def capa_details(rules_path: Path, file_path: Path, output_format="dictionary"):
|
||||
extractor = capa.main.get_extractor(
|
||||
file_path, FORMAT_AUTO, OS_AUTO, capa.main.BACKEND_VIV, [], False, disable_progress=True
|
||||
)
|
||||
capabilities, counts = capa.main.find_capabilities(rules, extractor, disable_progress=True)
|
||||
capabilities, counts = capa.capabilities.common.find_capabilities(rules, extractor, disable_progress=True)
|
||||
|
||||
# collect metadata (used only to make rendering more complete)
|
||||
meta = capa.main.collect_metadata([], file_path, FORMAT_AUTO, OS_AUTO, [rules_path], extractor)
|
||||
|
||||
meta.analysis.feature_counts = counts["feature_counts"]
|
||||
meta.analysis.library_functions = counts["library_functions"]
|
||||
meta = capa.main.collect_metadata([], file_path, FORMAT_AUTO, OS_AUTO, [rules_path], extractor, counts)
|
||||
meta.analysis.layout = capa.main.compute_layout(rules, extractor, capabilities)
|
||||
|
||||
capa_output: Any = False
|
||||
|
||||
@@ -90,7 +90,7 @@ def main():
|
||||
continue
|
||||
if rule.meta.is_subscope_rule:
|
||||
continue
|
||||
if rule.meta.scope != capa.rules.Scope.FUNCTION:
|
||||
if rule.meta.scopes.static == capa.rules.Scope.FUNCTION:
|
||||
continue
|
||||
|
||||
ns = rule.meta.namespace
|
||||
|
||||
@@ -41,6 +41,7 @@ import capa.rules
|
||||
import capa.engine
|
||||
import capa.helpers
|
||||
import capa.features.insn
|
||||
import capa.capabilities.common
|
||||
from capa.rules import Rule, RuleSet
|
||||
from capa.features.common import OS_AUTO, String, Feature, Substring
|
||||
from capa.render.result_document import RuleMetadata
|
||||
@@ -151,20 +152,74 @@ class NamespaceDoesntMatchRulePath(Lint):
|
||||
return rule.meta["namespace"] not in get_normpath(rule.meta["capa/path"])
|
||||
|
||||
|
||||
class MissingScope(Lint):
|
||||
name = "missing scope"
|
||||
recommendation = "Add meta.scope so that the scope is explicit (defaults to `function`)"
|
||||
class MissingScopes(Lint):
|
||||
name = "missing scopes"
|
||||
recommendation = (
|
||||
"Add meta.scopes with both the static (meta.scopes.static) and dynamic (meta.scopes.dynamic) scopes"
|
||||
)
|
||||
|
||||
def check_rule(self, ctx: Context, rule: Rule):
|
||||
return "scope" not in rule.meta
|
||||
return "scopes" not in rule.meta
|
||||
|
||||
|
||||
class InvalidScope(Lint):
|
||||
name = "invalid scope"
|
||||
recommendation = "Use only file, function, basic block, or instruction rule scopes"
|
||||
class MissingStaticScope(Lint):
|
||||
name = "missing static scope"
|
||||
recommendation = (
|
||||
"Add a static scope for the rule (file, function, basic block, instruction, or unspecified/unsupported)"
|
||||
)
|
||||
|
||||
def check_rule(self, ctx: Context, rule: Rule):
|
||||
return rule.meta.get("scope") not in ("file", "function", "basic block", "instruction")
|
||||
return "static" not in rule.meta.get("scopes")
|
||||
|
||||
|
||||
class MissingDynamicScope(Lint):
|
||||
name = "missing dynamic scope"
|
||||
recommendation = "Add a dynamic scope for the rule (file, process, thread, call, or unspecified/unsupported)"
|
||||
|
||||
def check_rule(self, ctx: Context, rule: Rule):
|
||||
return "dynamic" not in rule.meta.get("scopes")
|
||||
|
||||
|
||||
class InvalidStaticScope(Lint):
|
||||
name = "invalid static scope"
|
||||
recommendation = (
|
||||
"For the static scope, use either: file, function, basic block, instruction, or unspecified/unsupported"
|
||||
)
|
||||
|
||||
def check_rule(self, ctx: Context, rule: Rule):
|
||||
return rule.meta.get("scopes").get("static") not in (
|
||||
"file",
|
||||
"function",
|
||||
"basic block",
|
||||
"instruction",
|
||||
"unspecified",
|
||||
"unsupported",
|
||||
)
|
||||
|
||||
|
||||
class InvalidDynamicScope(Lint):
|
||||
name = "invalid static scope"
|
||||
recommendation = "For the dynamic scope, use either: file, process, thread, call, or unspecified/unsupported"
|
||||
|
||||
def check_rule(self, ctx: Context, rule: Rule):
|
||||
return rule.meta.get("scopes").get("dynamic") not in (
|
||||
"file",
|
||||
"process",
|
||||
"thread",
|
||||
"call",
|
||||
"unspecified",
|
||||
"unsupported",
|
||||
)
|
||||
|
||||
|
||||
class InvalidScopes(Lint):
|
||||
name = "invalid scopes"
|
||||
recommendation = "At least one scope (static or dynamic) must be specified"
|
||||
|
||||
def check_rule(self, ctx: Context, rule: Rule):
|
||||
return (rule.meta.get("scopes").get("static") in ("unspecified", "unsupported")) and (
|
||||
rule.meta.get("scopes").get("dynamic") in ("unspecified", "unsupported")
|
||||
)
|
||||
|
||||
|
||||
class MissingAuthors(Lint):
|
||||
@@ -305,14 +360,14 @@ def get_sample_capabilities(ctx: Context, path: Path) -> Set[str]:
|
||||
elif nice_path.name.endswith(capa.helpers.EXTENSIONS_SHELLCODE_64):
|
||||
format_ = "sc64"
|
||||
else:
|
||||
format_ = capa.main.get_auto_format(nice_path)
|
||||
format_ = capa.helpers.get_auto_format(nice_path)
|
||||
|
||||
logger.debug("analyzing sample: %s", nice_path)
|
||||
extractor = capa.main.get_extractor(
|
||||
nice_path, format_, OS_AUTO, capa.main.BACKEND_VIV, DEFAULT_SIGNATURES, False, disable_progress=True
|
||||
)
|
||||
|
||||
capabilities, _ = capa.main.find_capabilities(ctx.rules, extractor, disable_progress=True)
|
||||
capabilities, _ = capa.capabilities.common.find_capabilities(ctx.rules, extractor, disable_progress=True)
|
||||
# mypy doesn't seem to be happy with the MatchResults type alias & set(...keys())?
|
||||
# so we ignore a few types here.
|
||||
capabilities = set(capabilities.keys()) # type: ignore
|
||||
@@ -700,14 +755,18 @@ def lint_name(ctx: Context, rule: Rule):
|
||||
return run_lints(NAME_LINTS, ctx, rule)
|
||||
|
||||
|
||||
SCOPE_LINTS = (
|
||||
MissingScope(),
|
||||
InvalidScope(),
|
||||
SCOPES_LINTS = (
|
||||
MissingScopes(),
|
||||
MissingStaticScope(),
|
||||
MissingDynamicScope(),
|
||||
InvalidStaticScope(),
|
||||
InvalidDynamicScope(),
|
||||
InvalidScopes(),
|
||||
)
|
||||
|
||||
|
||||
def lint_scope(ctx: Context, rule: Rule):
|
||||
return run_lints(SCOPE_LINTS, ctx, rule)
|
||||
return run_lints(SCOPES_LINTS, ctx, rule)
|
||||
|
||||
|
||||
META_LINTS = (
|
||||
|
||||
@@ -54,6 +54,7 @@ import capa.helpers
|
||||
import capa.features
|
||||
import capa.features.common
|
||||
import capa.features.freeze
|
||||
import capa.capabilities.common
|
||||
|
||||
logger = logging.getLogger("capa.profile")
|
||||
|
||||
@@ -114,7 +115,7 @@ def main(argv=None):
|
||||
|
||||
def do_iteration():
|
||||
capa.perf.reset()
|
||||
capa.main.find_capabilities(rules, extractor, disable_progress=True)
|
||||
capa.capabilities.common.find_capabilities(rules, extractor, disable_progress=True)
|
||||
pbar.update(1)
|
||||
|
||||
samples = timeit.repeat(do_iteration, number=args.number, repeat=args.repeat)
|
||||
|
||||
@@ -47,7 +47,7 @@ from typing import Dict, List
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
from stix2 import Filter, MemoryStore, AttackPattern # type: ignore
|
||||
from stix2 import Filter, MemoryStore, AttackPattern
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
||||
|
||||
|
||||
@@ -74,10 +74,12 @@ import capa.exceptions
|
||||
import capa.render.utils as rutils
|
||||
import capa.render.verbose
|
||||
import capa.features.freeze
|
||||
import capa.capabilities.common
|
||||
import capa.render.result_document as rd
|
||||
from capa.helpers import get_file_taste
|
||||
from capa.features.common import FORMAT_AUTO
|
||||
from capa.features.freeze import Address
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor, StaticFeatureExtractor
|
||||
|
||||
logger = logging.getLogger("capa.show-capabilities-by-function")
|
||||
|
||||
@@ -101,6 +103,7 @@ def render_matches_by_function(doc: rd.ResultDocument):
|
||||
- send HTTP request
|
||||
- connect to HTTP server
|
||||
"""
|
||||
assert isinstance(doc.meta.analysis, rd.StaticAnalysis)
|
||||
functions_by_bb: Dict[Address, Address] = {}
|
||||
for finfo in doc.meta.analysis.layout.functions:
|
||||
faddress = finfo.address
|
||||
@@ -113,10 +116,10 @@ def render_matches_by_function(doc: rd.ResultDocument):
|
||||
|
||||
matches_by_function = collections.defaultdict(set)
|
||||
for rule in rutils.capability_rules(doc):
|
||||
if rule.meta.scope == capa.rules.FUNCTION_SCOPE:
|
||||
if capa.rules.Scope.FUNCTION in rule.meta.scopes:
|
||||
for addr, _ in rule.matches:
|
||||
matches_by_function[addr].add(rule.meta.name)
|
||||
elif rule.meta.scope == capa.rules.BASIC_BLOCK_SCOPE:
|
||||
elif capa.rules.Scope.BASIC_BLOCK in rule.meta.scopes:
|
||||
for addr, _ in rule.matches:
|
||||
function = functions_by_bb[addr]
|
||||
matches_by_function[function].add(rule.meta.name)
|
||||
@@ -167,7 +170,7 @@ def main(argv=None):
|
||||
|
||||
if (args.format == "freeze") or (args.format == FORMAT_AUTO and capa.features.freeze.is_freeze(taste)):
|
||||
format_ = "freeze"
|
||||
extractor = capa.features.freeze.load(Path(args.sample).read_bytes())
|
||||
extractor: FeatureExtractor = capa.features.freeze.load(Path(args.sample).read_bytes())
|
||||
else:
|
||||
format_ = args.format
|
||||
should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None)
|
||||
@@ -176,6 +179,7 @@ def main(argv=None):
|
||||
extractor = capa.main.get_extractor(
|
||||
args.sample, args.format, args.os, args.backend, sig_paths, should_save_workspace
|
||||
)
|
||||
assert isinstance(extractor, StaticFeatureExtractor)
|
||||
except capa.exceptions.UnsupportedFormatError:
|
||||
capa.helpers.log_unsupported_format_error()
|
||||
return -1
|
||||
@@ -183,14 +187,12 @@ def main(argv=None):
|
||||
capa.helpers.log_unsupported_runtime_error()
|
||||
return -1
|
||||
|
||||
meta = capa.main.collect_metadata(argv, args.sample, format_, args.os, args.rules, extractor)
|
||||
capabilities, counts = capa.main.find_capabilities(rules, extractor)
|
||||
capabilities, counts = capa.capabilities.common.find_capabilities(rules, extractor)
|
||||
|
||||
meta.analysis.feature_counts = counts["feature_counts"]
|
||||
meta.analysis.library_functions = counts["library_functions"]
|
||||
meta = capa.main.collect_metadata(argv, args.sample, format_, args.os, args.rules, extractor, counts)
|
||||
meta.analysis.layout = capa.main.compute_layout(rules, extractor, capabilities)
|
||||
|
||||
if capa.main.has_file_limitation(rules, capabilities):
|
||||
if capa.capabilities.common.has_file_limitation(rules, capabilities):
|
||||
# bail if capa encountered file limitation e.g. a packed binary
|
||||
# do show the output in verbose mode, though.
|
||||
if not (args.verbose or args.vverbose or args.json):
|
||||
|
||||
@@ -78,13 +78,21 @@ import capa.helpers
|
||||
import capa.features
|
||||
import capa.exceptions
|
||||
import capa.render.verbose as v
|
||||
import capa.features.common
|
||||
import capa.features.freeze
|
||||
import capa.features.address
|
||||
import capa.features.extractors.pefile
|
||||
import capa.features.extractors.base_extractor
|
||||
from capa.helpers import log_unsupported_runtime_error
|
||||
from capa.features.extractors.base_extractor import FunctionHandle
|
||||
from capa.helpers import get_auto_format, log_unsupported_runtime_error
|
||||
from capa.features.insn import API, Number
|
||||
from capa.features.common import (
|
||||
FORMAT_AUTO,
|
||||
FORMAT_CAPE,
|
||||
FORMAT_FREEZE,
|
||||
DYNAMIC_FORMATS,
|
||||
String,
|
||||
Feature,
|
||||
is_global_feature,
|
||||
)
|
||||
from capa.features.extractors.base_extractor import FunctionHandle, StaticFeatureExtractor, DynamicFeatureExtractor
|
||||
|
||||
logger = logging.getLogger("capa.show-features")
|
||||
|
||||
@@ -101,6 +109,7 @@ def main(argv=None):
|
||||
capa.main.install_common_args(parser, wanted={"format", "os", "sample", "signatures", "backend"})
|
||||
|
||||
parser.add_argument("-F", "--function", type=str, help="Show features for specific function")
|
||||
parser.add_argument("-P", "--process", type=str, help="Show features for specific process name")
|
||||
args = parser.parse_args(args=argv)
|
||||
capa.main.handle_common_args(args)
|
||||
|
||||
@@ -109,7 +118,7 @@ def main(argv=None):
|
||||
return -1
|
||||
|
||||
try:
|
||||
taste = capa.helpers.get_file_taste(Path(args.sample))
|
||||
_ = capa.helpers.get_file_taste(Path(args.sample))
|
||||
except IOError as e:
|
||||
logger.error("%s", str(e))
|
||||
return -1
|
||||
@@ -120,23 +129,38 @@ def main(argv=None):
|
||||
logger.error("%s", str(e))
|
||||
return -1
|
||||
|
||||
if (args.format == "freeze") or (
|
||||
args.format == capa.features.common.FORMAT_AUTO and capa.features.freeze.is_freeze(taste)
|
||||
):
|
||||
format_ = args.format if args.format != FORMAT_AUTO else get_auto_format(args.sample)
|
||||
if format_ == FORMAT_FREEZE:
|
||||
# this should be moved above the previous if clause after implementing
|
||||
# feature freeze for the dynamic analysis flavor
|
||||
extractor = capa.features.freeze.load(Path(args.sample).read_bytes())
|
||||
else:
|
||||
should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None)
|
||||
try:
|
||||
extractor = capa.main.get_extractor(
|
||||
args.sample, args.format, args.os, args.backend, sig_paths, should_save_workspace
|
||||
args.sample, format_, args.os, args.backend, sig_paths, should_save_workspace
|
||||
)
|
||||
except capa.exceptions.UnsupportedFormatError:
|
||||
capa.helpers.log_unsupported_format_error()
|
||||
except capa.exceptions.UnsupportedFormatError as e:
|
||||
if format_ == FORMAT_CAPE:
|
||||
capa.helpers.log_unsupported_cape_report_error(str(e))
|
||||
else:
|
||||
capa.helpers.log_unsupported_format_error()
|
||||
return -1
|
||||
except capa.exceptions.UnsupportedRuntimeError:
|
||||
log_unsupported_runtime_error()
|
||||
return -1
|
||||
|
||||
if format_ in DYNAMIC_FORMATS:
|
||||
assert isinstance(extractor, DynamicFeatureExtractor)
|
||||
print_dynamic_analysis(extractor, args)
|
||||
else:
|
||||
assert isinstance(extractor, StaticFeatureExtractor)
|
||||
print_static_analysis(extractor, args)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def print_static_analysis(extractor: StaticFeatureExtractor, args):
|
||||
for feature, addr in extractor.extract_global_features():
|
||||
print(f"global: {format_address(addr)}: {feature}")
|
||||
|
||||
@@ -165,9 +189,112 @@ def main(argv=None):
|
||||
print(f"{args.function} not a function")
|
||||
return -1
|
||||
|
||||
print_features(function_handles, extractor)
|
||||
print_static_features(function_handles, extractor)
|
||||
|
||||
return 0
|
||||
|
||||
def print_dynamic_analysis(extractor: DynamicFeatureExtractor, args):
|
||||
for feature, addr in extractor.extract_global_features():
|
||||
print(f"global: {format_address(addr)}: {feature}")
|
||||
|
||||
if not args.process:
|
||||
for feature, addr in extractor.extract_file_features():
|
||||
print(f"file: {format_address(addr)}: {feature}")
|
||||
|
||||
process_handles = tuple(extractor.get_processes())
|
||||
|
||||
if args.process:
|
||||
process_handles = tuple(filter(lambda ph: ph.inner["name"] == args.process, process_handles))
|
||||
if args.process not in [ph.inner["name"] for ph in args.process]:
|
||||
print(f"{args.process} not a process")
|
||||
return -1
|
||||
|
||||
print_dynamic_features(process_handles, extractor)
|
||||
|
||||
|
||||
def print_static_features(functions, extractor: StaticFeatureExtractor):
|
||||
for f in functions:
|
||||
if extractor.is_library_function(f.address):
|
||||
function_name = extractor.get_function_name(f.address)
|
||||
logger.debug("skipping library function %s (%s)", format_address(f.address), function_name)
|
||||
continue
|
||||
|
||||
print(f"func: {format_address(f.address)}")
|
||||
|
||||
for feature, addr in extractor.extract_function_features(f):
|
||||
if is_global_feature(feature):
|
||||
continue
|
||||
|
||||
if f.address != addr:
|
||||
print(f" func: {format_address(f.address)}: {feature} -> {format_address(addr)}")
|
||||
else:
|
||||
print(f" func: {format_address(f.address)}: {feature}")
|
||||
|
||||
for bb in extractor.get_basic_blocks(f):
|
||||
for feature, addr in extractor.extract_basic_block_features(f, bb):
|
||||
if is_global_feature(feature):
|
||||
continue
|
||||
|
||||
if bb.address != addr:
|
||||
print(f" bb: {format_address(bb.address)}: {feature} -> {format_address(addr)}")
|
||||
else:
|
||||
print(f" bb: {format_address(bb.address)}: {feature}")
|
||||
|
||||
for insn in extractor.get_instructions(f, bb):
|
||||
for feature, addr in extractor.extract_insn_features(f, bb, insn):
|
||||
if is_global_feature(feature):
|
||||
continue
|
||||
|
||||
try:
|
||||
if insn.address != addr:
|
||||
print(
|
||||
f" insn: {format_address(f.address)}: {format_address(insn.address)}: {feature} -> {format_address(addr)}"
|
||||
)
|
||||
else:
|
||||
print(f" insn: {format_address(insn.address)}: {feature}")
|
||||
|
||||
except UnicodeEncodeError:
|
||||
# may be an issue while piping to less and encountering non-ascii characters
|
||||
continue
|
||||
|
||||
|
||||
def print_dynamic_features(processes, extractor: DynamicFeatureExtractor):
|
||||
for p in processes:
|
||||
print(f"proc: {p.inner.process_name} (ppid={p.address.ppid}, pid={p.address.pid})")
|
||||
|
||||
for feature, addr in extractor.extract_process_features(p):
|
||||
if is_global_feature(feature):
|
||||
continue
|
||||
|
||||
print(f" proc: {p.inner.process_name}: {feature}")
|
||||
|
||||
for t in extractor.get_threads(p):
|
||||
print(f" thread: {t.address.tid}")
|
||||
for feature, addr in extractor.extract_thread_features(p, t):
|
||||
if is_global_feature(feature):
|
||||
continue
|
||||
|
||||
if feature != Feature(0):
|
||||
print(f" {format_address(addr)}: {feature}")
|
||||
|
||||
for call in extractor.get_calls(p, t):
|
||||
apis = []
|
||||
arguments = []
|
||||
for feature, addr in extractor.extract_call_features(p, t, call):
|
||||
if is_global_feature(feature):
|
||||
continue
|
||||
|
||||
if isinstance(feature, API):
|
||||
assert isinstance(addr, capa.features.address.DynamicCallAddress)
|
||||
apis.append((addr.id, str(feature.value)))
|
||||
|
||||
if isinstance(feature, (Number, String)):
|
||||
arguments.append(str(feature.value))
|
||||
|
||||
if not apis:
|
||||
print(f" arguments=[{', '.join(arguments)}]")
|
||||
|
||||
for cid, api in apis:
|
||||
print(f" call {cid}: {api}({', '.join(arguments)})")
|
||||
|
||||
|
||||
def ida_main():
|
||||
@@ -194,7 +321,7 @@ def ida_main():
|
||||
print(f"{hex(function)} not a function")
|
||||
return -1
|
||||
|
||||
print_features(function_handles, extractor)
|
||||
print_static_features(function_handles, extractor)
|
||||
|
||||
return 0
|
||||
|
||||
@@ -209,57 +336,11 @@ def ghidra_main():
|
||||
|
||||
function_handles = tuple(extractor.get_functions())
|
||||
|
||||
print_features(function_handles, extractor)
|
||||
print_static_features(function_handles, extractor)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def print_features(functions, extractor: capa.features.extractors.base_extractor.FeatureExtractor):
|
||||
for f in functions:
|
||||
if extractor.is_library_function(f.address):
|
||||
function_name = extractor.get_function_name(f.address)
|
||||
logger.debug("skipping library function %s (%s)", format_address(f.address), function_name)
|
||||
continue
|
||||
|
||||
print(f"func: {format_address(f.address)}")
|
||||
|
||||
for feature, addr in extractor.extract_function_features(f):
|
||||
if capa.features.common.is_global_feature(feature):
|
||||
continue
|
||||
|
||||
if f.address != addr:
|
||||
print(f" func: {format_address(f.address)}: {feature} -> {format_address(addr)}")
|
||||
else:
|
||||
print(f" func: {format_address(f.address)}: {feature}")
|
||||
|
||||
for bb in extractor.get_basic_blocks(f):
|
||||
for feature, addr in extractor.extract_basic_block_features(f, bb):
|
||||
if capa.features.common.is_global_feature(feature):
|
||||
continue
|
||||
|
||||
if bb.address != addr:
|
||||
print(f" bb: {format_address(bb.address)}: {feature} -> {format_address(addr)}")
|
||||
else:
|
||||
print(f" bb: {format_address(bb.address)}: {feature}")
|
||||
|
||||
for insn in extractor.get_instructions(f, bb):
|
||||
for feature, addr in extractor.extract_insn_features(f, bb, insn):
|
||||
if capa.features.common.is_global_feature(feature):
|
||||
continue
|
||||
|
||||
try:
|
||||
if insn.address != addr:
|
||||
print(
|
||||
f" insn: {format_address(f.address)}: {format_address(insn.address)}: {feature} -> {format_address(addr)}"
|
||||
)
|
||||
else:
|
||||
print(f" insn: {format_address(insn.address)}: {feature}")
|
||||
|
||||
except UnicodeEncodeError:
|
||||
# may be an issue while piping to less and encountering non-ascii characters
|
||||
continue
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if capa.helpers.is_runtime_ida():
|
||||
ida_main()
|
||||
|
||||
@@ -33,7 +33,7 @@ import capa.features.extractors.pefile
|
||||
import capa.features.extractors.base_extractor
|
||||
from capa.helpers import log_unsupported_runtime_error
|
||||
from capa.features.common import Feature
|
||||
from capa.features.extractors.base_extractor import FunctionHandle
|
||||
from capa.features.extractors.base_extractor import FunctionHandle, StaticFeatureExtractor
|
||||
|
||||
logger = logging.getLogger("show-unused-features")
|
||||
|
||||
@@ -52,7 +52,7 @@ def get_rules_feature_set(rules_path) -> Set[Feature]:
|
||||
|
||||
|
||||
def get_file_features(
|
||||
functions: Tuple[FunctionHandle, ...], extractor: capa.features.extractors.base_extractor.FeatureExtractor
|
||||
functions: Tuple[FunctionHandle, ...], extractor: capa.features.extractors.base_extractor.StaticFeatureExtractor
|
||||
) -> typing.Counter[Feature]:
|
||||
feature_map: typing.Counter[Feature] = Counter()
|
||||
|
||||
@@ -145,6 +145,8 @@ def main(argv=None):
|
||||
log_unsupported_runtime_error()
|
||||
return -1
|
||||
|
||||
assert isinstance(extractor, StaticFeatureExtractor), "only static analysis supported today"
|
||||
|
||||
feature_map: typing.Counter[Feature] = Counter()
|
||||
|
||||
feature_map.update([feature for feature, _ in extractor.extract_global_features()])
|
||||
|
||||
Submodule tests/data updated: 87bd888e19...5c4886b2b7
@@ -38,7 +38,14 @@ from capa.features.common import (
|
||||
FeatureAccess,
|
||||
)
|
||||
from capa.features.address import Address
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
||||
from capa.features.extractors.base_extractor import (
|
||||
BBHandle,
|
||||
CallHandle,
|
||||
InsnHandle,
|
||||
ThreadHandle,
|
||||
ProcessHandle,
|
||||
FunctionHandle,
|
||||
)
|
||||
from capa.features.extractors.dnfile.extractor import DnfileFeatureExtractor
|
||||
|
||||
CD = Path(__file__).resolve().parent
|
||||
@@ -134,10 +141,11 @@ def get_pefile_extractor(path: Path):
|
||||
return extractor
|
||||
|
||||
|
||||
def get_dotnetfile_extractor(path: Path):
|
||||
import capa.features.extractors.dotnetfile
|
||||
@lru_cache(maxsize=1)
|
||||
def get_dnfile_extractor(path: Path):
|
||||
import capa.features.extractors.dnfile.extractor
|
||||
|
||||
extractor = capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor(path)
|
||||
extractor = capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path)
|
||||
|
||||
# overload the extractor so that the fixture exposes `extractor.path`
|
||||
setattr(extractor, "path", path.as_posix())
|
||||
@@ -146,10 +154,10 @@ def get_dotnetfile_extractor(path: Path):
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_dnfile_extractor(path: Path):
|
||||
import capa.features.extractors.dnfile.extractor
|
||||
def get_dotnetfile_extractor(path: Path):
|
||||
import capa.features.extractors.dotnetfile
|
||||
|
||||
extractor = capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path)
|
||||
extractor = capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor(path)
|
||||
|
||||
# overload the extractor so that the fixture exposes `extractor.path`
|
||||
setattr(extractor, "path", path.as_posix())
|
||||
@@ -181,6 +189,20 @@ def get_binja_extractor(path: Path):
|
||||
return extractor
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_cape_extractor(path):
|
||||
import gzip
|
||||
import json
|
||||
|
||||
from capa.features.extractors.cape.extractor import CapeExtractor
|
||||
|
||||
with gzip.open(path, "r") as compressed_report:
|
||||
report_json = compressed_report.read()
|
||||
report = json.loads(report_json)
|
||||
|
||||
return CapeExtractor.from_report(report)
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_ghidra_extractor(path: Path):
|
||||
import capa.features.extractors.ghidra.extractor
|
||||
@@ -206,6 +228,36 @@ def extract_file_features(extractor):
|
||||
return features
|
||||
|
||||
|
||||
def extract_process_features(extractor, ph):
|
||||
features = collections.defaultdict(set)
|
||||
for th in extractor.get_threads(ph):
|
||||
for ch in extractor.get_calls(ph, th):
|
||||
for feature, va in extractor.extract_call_features(ph, th, ch):
|
||||
features[feature].add(va)
|
||||
for feature, va in extractor.extract_thread_features(ph, th):
|
||||
features[feature].add(va)
|
||||
for feature, va in extractor.extract_process_features(ph):
|
||||
features[feature].add(va)
|
||||
return features
|
||||
|
||||
|
||||
def extract_thread_features(extractor, ph, th):
|
||||
features = collections.defaultdict(set)
|
||||
for ch in extractor.get_calls(ph, th):
|
||||
for feature, va in extractor.extract_call_features(ph, th, ch):
|
||||
features[feature].add(va)
|
||||
for feature, va in extractor.extract_thread_features(ph, th):
|
||||
features[feature].add(va)
|
||||
return features
|
||||
|
||||
|
||||
def extract_call_features(extractor, ph, th, ch):
|
||||
features = collections.defaultdict(set)
|
||||
for feature, addr in extractor.extract_call_features(ph, th, ch):
|
||||
features[feature].add(addr)
|
||||
return features
|
||||
|
||||
|
||||
# f may not be hashable (e.g. ida func_t) so cannot @lru_cache this
|
||||
def extract_function_features(extractor, fh):
|
||||
features = collections.defaultdict(set)
|
||||
@@ -267,6 +319,8 @@ def get_data_path_by_name(name) -> Path:
|
||||
return CD / "data" / "499c2a85f6e8142c3f48d4251c9c7cd6.raw32"
|
||||
elif name.startswith("9324d"):
|
||||
return CD / "data" / "9324d1a8ae37a36ae560c37448c9705a.exe_"
|
||||
elif name.startswith("395eb"):
|
||||
return CD / "data" / "395eb0ddd99d2c9e37b6d0b73485ee9c.exe_"
|
||||
elif name.startswith("a1982"):
|
||||
return CD / "data" / "a198216798ca38f280dc413f8c57f2c2.exe_"
|
||||
elif name.startswith("a933a"):
|
||||
@@ -317,6 +371,24 @@ def get_data_path_by_name(name) -> Path:
|
||||
return CD / "data" / "294b8db1f2702b60fb2e42fdc50c2cee6a5046112da9a5703a548a4fa50477bc.elf_"
|
||||
elif name.startswith("2bf18d"):
|
||||
return CD / "data" / "2bf18d0403677378adad9001b1243211.elf_"
|
||||
elif name.startswith("0000a657"):
|
||||
return (
|
||||
CD
|
||||
/ "data"
|
||||
/ "dynamic"
|
||||
/ "cape"
|
||||
/ "v2.2"
|
||||
/ "0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json.gz"
|
||||
)
|
||||
elif name.startswith("d46900"):
|
||||
return (
|
||||
CD
|
||||
/ "data"
|
||||
/ "dynamic"
|
||||
/ "cape"
|
||||
/ "v2.2"
|
||||
/ "d46900384c78863420fb3e297d0a2f743cd2b6b3f7f82bf64059a168e07aceb7.json.gz"
|
||||
)
|
||||
elif name.startswith("ea2876"):
|
||||
return CD / "data" / "ea2876e9175410b6f6719f80ee44b9553960758c7d0f7bed73c0fe9a78d8e669.dll_"
|
||||
elif name.startswith("1038a2"):
|
||||
@@ -396,6 +468,27 @@ def sample(request):
|
||||
return resolve_sample(request.param)
|
||||
|
||||
|
||||
def get_process(extractor, ppid: int, pid: int) -> ProcessHandle:
|
||||
for ph in extractor.get_processes():
|
||||
if ph.address.ppid == ppid and ph.address.pid == pid:
|
||||
return ph
|
||||
raise ValueError("process not found")
|
||||
|
||||
|
||||
def get_thread(extractor, ph: ProcessHandle, tid: int) -> ThreadHandle:
|
||||
for th in extractor.get_threads(ph):
|
||||
if th.address.tid == tid:
|
||||
return th
|
||||
raise ValueError("thread not found")
|
||||
|
||||
|
||||
def get_call(extractor, ph: ProcessHandle, th: ThreadHandle, cid: int) -> CallHandle:
|
||||
for ch in extractor.get_calls(ph, th):
|
||||
if ch.address.id == cid:
|
||||
return ch
|
||||
raise ValueError("call not found")
|
||||
|
||||
|
||||
def get_function(extractor, fva: int) -> FunctionHandle:
|
||||
for fh in extractor.get_functions():
|
||||
if isinstance(extractor, DnfileFeatureExtractor):
|
||||
@@ -503,6 +596,63 @@ def resolve_scope(scope):
|
||||
|
||||
inner_function.__name__ = scope
|
||||
return inner_function
|
||||
elif "call=" in scope:
|
||||
# like `process=(pid:ppid),thread=tid,call=id`
|
||||
assert "process=" in scope
|
||||
assert "thread=" in scope
|
||||
pspec, _, spec = scope.partition(",")
|
||||
tspec, _, cspec = spec.partition(",")
|
||||
pspec = pspec.partition("=")[2][1:-1].split(":")
|
||||
assert len(pspec) == 2
|
||||
pid, ppid = map(int, pspec)
|
||||
tid = int(tspec.partition("=")[2])
|
||||
cid = int(cspec.partition("=")[2])
|
||||
|
||||
def inner_call(extractor):
|
||||
ph = get_process(extractor, ppid, pid)
|
||||
th = get_thread(extractor, ph, tid)
|
||||
ch = get_call(extractor, ph, th, cid)
|
||||
features = extract_call_features(extractor, ph, th, ch)
|
||||
for k, vs in extract_global_features(extractor).items():
|
||||
features[k].update(vs)
|
||||
return features
|
||||
|
||||
inner_call.__name__ = scope
|
||||
return inner_call
|
||||
elif "thread=" in scope:
|
||||
# like `process=(pid:ppid),thread=tid`
|
||||
assert "process=" in scope
|
||||
pspec, _, tspec = scope.partition(",")
|
||||
pspec = pspec.partition("=")[2][1:-1].split(":")
|
||||
assert len(pspec) == 2
|
||||
pid, ppid = map(int, pspec)
|
||||
tid = int(tspec.partition("=")[2])
|
||||
|
||||
def inner_thread(extractor):
|
||||
ph = get_process(extractor, ppid, pid)
|
||||
th = get_thread(extractor, ph, tid)
|
||||
features = extract_thread_features(extractor, ph, th)
|
||||
for k, vs in extract_global_features(extractor).items():
|
||||
features[k].update(vs)
|
||||
return features
|
||||
|
||||
inner_thread.__name__ = scope
|
||||
return inner_thread
|
||||
elif "process=" in scope:
|
||||
# like `process=(pid:ppid)`
|
||||
pspec = scope.partition("=")[2][1:-1].split(":")
|
||||
assert len(pspec) == 2
|
||||
pid, ppid = map(int, pspec)
|
||||
|
||||
def inner_process(extractor):
|
||||
ph = get_process(extractor, ppid, pid)
|
||||
features = extract_process_features(extractor, ph)
|
||||
for k, vs in extract_global_features(extractor).items():
|
||||
features[k].update(vs)
|
||||
return features
|
||||
|
||||
inner_process.__name__ = scope
|
||||
return inner_process
|
||||
else:
|
||||
raise ValueError("unexpected scope fixture")
|
||||
|
||||
@@ -528,6 +678,84 @@ def parametrize(params, values, **kwargs):
|
||||
return pytest.mark.parametrize(params, values, ids=ids, **kwargs)
|
||||
|
||||
|
||||
DYNAMIC_FEATURE_PRESENCE_TESTS = sorted(
|
||||
[
|
||||
# file/string
|
||||
("0000a657", "file", capa.features.common.String("T_Ba?.BcRJa"), True),
|
||||
("0000a657", "file", capa.features.common.String("GetNamedPipeClientSessionId"), True),
|
||||
("0000a657", "file", capa.features.common.String("nope"), False),
|
||||
# file/sections
|
||||
("0000a657", "file", capa.features.file.Section(".rdata"), True),
|
||||
("0000a657", "file", capa.features.file.Section(".nope"), False),
|
||||
# file/imports
|
||||
("0000a657", "file", capa.features.file.Import("NdrSimpleTypeUnmarshall"), True),
|
||||
("0000a657", "file", capa.features.file.Import("Nope"), False),
|
||||
# file/exports
|
||||
("0000a657", "file", capa.features.file.Export("Nope"), False),
|
||||
# process/environment variables
|
||||
(
|
||||
"0000a657",
|
||||
"process=(1180:3052)",
|
||||
capa.features.common.String("C:\\Users\\comp\\AppData\\Roaming\\Microsoft\\Jxoqwnx\\jxoqwn.exe"),
|
||||
True,
|
||||
),
|
||||
("0000a657", "process=(1180:3052)", capa.features.common.String("nope"), False),
|
||||
# thread/api calls
|
||||
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("NtQueryValueKey"), True),
|
||||
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("GetActiveWindow"), False),
|
||||
# thread/number call argument
|
||||
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.Number(0x000000EC), True),
|
||||
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.Number(110173), False),
|
||||
# thread/string call argument
|
||||
("0000a657", "process=(2852:3052),thread=2804", capa.features.common.String("SetThreadUILanguage"), True),
|
||||
("0000a657", "process=(2852:3052),thread=2804", capa.features.common.String("nope"), False),
|
||||
("0000a657", "process=(2852:3052),thread=2804,call=56", capa.features.insn.API("NtQueryValueKey"), True),
|
||||
("0000a657", "process=(2852:3052),thread=2804,call=1958", capa.features.insn.API("nope"), False),
|
||||
],
|
||||
# order tests by (file, item)
|
||||
# so that our LRU cache is most effective.
|
||||
key=lambda t: (t[0], t[1]),
|
||||
)
|
||||
|
||||
DYNAMIC_FEATURE_COUNT_TESTS = sorted(
|
||||
[
|
||||
# file/string
|
||||
("0000a657", "file", capa.features.common.String("T_Ba?.BcRJa"), 1),
|
||||
("0000a657", "file", capa.features.common.String("GetNamedPipeClientSessionId"), 1),
|
||||
("0000a657", "file", capa.features.common.String("nope"), 0),
|
||||
# file/sections
|
||||
("0000a657", "file", capa.features.file.Section(".rdata"), 1),
|
||||
("0000a657", "file", capa.features.file.Section(".nope"), 0),
|
||||
# file/imports
|
||||
("0000a657", "file", capa.features.file.Import("NdrSimpleTypeUnmarshall"), 1),
|
||||
("0000a657", "file", capa.features.file.Import("Nope"), 0),
|
||||
# file/exports
|
||||
("0000a657", "file", capa.features.file.Export("Nope"), 0),
|
||||
# process/environment variables
|
||||
(
|
||||
"0000a657",
|
||||
"process=(1180:3052)",
|
||||
capa.features.common.String("C:\\Users\\comp\\AppData\\Roaming\\Microsoft\\Jxoqwnx\\jxoqwn.exe"),
|
||||
2,
|
||||
),
|
||||
("0000a657", "process=(1180:3052)", capa.features.common.String("nope"), 0),
|
||||
# thread/api calls
|
||||
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("NtQueryValueKey"), 7),
|
||||
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("GetActiveWindow"), 0),
|
||||
# thread/number call argument
|
||||
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.Number(0x000000EC), 1),
|
||||
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.Number(110173), 0),
|
||||
# thread/string call argument
|
||||
("0000a657", "process=(2852:3052),thread=2804", capa.features.common.String("SetThreadUILanguage"), 1),
|
||||
("0000a657", "process=(2852:3052),thread=2804", capa.features.common.String("nope"), 0),
|
||||
("0000a657", "process=(2852:3052),thread=2804,call=56", capa.features.insn.API("NtQueryValueKey"), 1),
|
||||
("0000a657", "process=(2852:3052),thread=2804,call=1958", capa.features.insn.API("nope"), 0),
|
||||
],
|
||||
# order tests by (file, item)
|
||||
# so that our LRU cache is most effective.
|
||||
key=lambda t: (t[0], t[1]),
|
||||
)
|
||||
|
||||
FEATURE_PRESENCE_TESTS = sorted(
|
||||
[
|
||||
# file/characteristic("embedded pe")
|
||||
@@ -552,6 +780,7 @@ FEATURE_PRESENCE_TESTS = sorted(
|
||||
("mimikatz", "file", capa.features.file.Import("advapi32.CryptSetHashParam"), True),
|
||||
("mimikatz", "file", capa.features.file.Import("CryptSetHashParam"), True),
|
||||
("mimikatz", "file", capa.features.file.Import("kernel32.IsWow64Process"), True),
|
||||
("mimikatz", "file", capa.features.file.Import("IsWow64Process"), True),
|
||||
("mimikatz", "file", capa.features.file.Import("msvcrt.exit"), True),
|
||||
("mimikatz", "file", capa.features.file.Import("cabinet.#11"), True),
|
||||
("mimikatz", "file", capa.features.file.Import("#11"), False),
|
||||
@@ -632,11 +861,12 @@ FEATURE_PRESENCE_TESTS = sorted(
|
||||
# .text:004018C0 8D 4B 02 lea ecx, [ebx+2]
|
||||
("mimikatz", "function=0x401873,bb=0x4018B2,insn=0x4018C0", capa.features.insn.Number(0x2), True),
|
||||
# insn/api
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContextW"), True),
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContext"), True),
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptGenKey"), True),
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptImportKey"), True),
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptDestroyKey"), True),
|
||||
# not extracting dll anymore
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContextW"), False),
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContext"), False),
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptGenKey"), False),
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptImportKey"), False),
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptDestroyKey"), False),
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptAcquireContextW"), True),
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptAcquireContext"), True),
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptGenKey"), True),
|
||||
@@ -645,7 +875,8 @@ FEATURE_PRESENCE_TESTS = sorted(
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("Nope"), False),
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.Nope"), False),
|
||||
# insn/api: thunk
|
||||
("mimikatz", "function=0x4556E5", capa.features.insn.API("advapi32.LsaQueryInformationPolicy"), True),
|
||||
# not extracting dll anymore
|
||||
("mimikatz", "function=0x4556E5", capa.features.insn.API("advapi32.LsaQueryInformationPolicy"), False),
|
||||
("mimikatz", "function=0x4556E5", capa.features.insn.API("LsaQueryInformationPolicy"), True),
|
||||
# insn/api: x64
|
||||
(
|
||||
@@ -669,10 +900,15 @@ FEATURE_PRESENCE_TESTS = sorted(
|
||||
("mimikatz", "function=0x40B3C6", capa.features.insn.API("LocalFree"), True),
|
||||
("c91887...", "function=0x40156F", capa.features.insn.API("CloseClipboard"), True),
|
||||
# insn/api: resolve indirect calls
|
||||
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CreatePipe"), True),
|
||||
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.SetHandleInformation"), True),
|
||||
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CloseHandle"), True),
|
||||
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.WriteFile"), True),
|
||||
# not extracting dll anymore
|
||||
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CreatePipe"), False),
|
||||
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.SetHandleInformation"), False),
|
||||
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CloseHandle"), False),
|
||||
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.WriteFile"), False),
|
||||
("c91887...", "function=0x401A77", capa.features.insn.API("CreatePipe"), True),
|
||||
("c91887...", "function=0x401A77", capa.features.insn.API("SetHandleInformation"), True),
|
||||
("c91887...", "function=0x401A77", capa.features.insn.API("CloseHandle"), True),
|
||||
("c91887...", "function=0x401A77", capa.features.insn.API("WriteFile"), True),
|
||||
# insn/string
|
||||
("mimikatz", "function=0x40105D", capa.features.common.String("SCardControl"), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.common.String("SCardTransmit"), True),
|
||||
@@ -847,7 +1083,8 @@ FEATURE_PRESENCE_TESTS_DOTNET = sorted(
|
||||
("_1c444", "file", capa.features.file.Import("CreateCompatibleBitmap"), True),
|
||||
("_1c444", "file", capa.features.file.Import("gdi32::CreateCompatibleBitmap"), False),
|
||||
("_1c444", "function=0x1F68", capa.features.insn.API("GetWindowDC"), True),
|
||||
("_1c444", "function=0x1F68", capa.features.insn.API("user32.GetWindowDC"), True),
|
||||
# not extracting dll anymore
|
||||
("_1c444", "function=0x1F68", capa.features.insn.API("user32.GetWindowDC"), False),
|
||||
("_1c444", "function=0x1F68", capa.features.insn.Number(0xCC0020), True),
|
||||
("_1c444", "token=0x600001D", capa.features.common.Characteristic("calls to"), True),
|
||||
("_1c444", "token=0x6000018", capa.features.common.Characteristic("calls to"), False),
|
||||
@@ -1121,6 +1358,11 @@ def z9324d_extractor():
|
||||
return get_extractor(get_data_path_by_name("9324d..."))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def z395eb_extractor():
|
||||
return get_extractor(get_data_path_by_name("395eb..."))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pma12_04_extractor():
|
||||
return get_extractor(get_data_path_by_name("pma12-04"))
|
||||
@@ -1207,29 +1449,42 @@ def get_result_doc(path: Path):
|
||||
|
||||
@pytest.fixture
|
||||
def pma0101_rd():
|
||||
# python -m capa.main tests/data/Practical\ Malware\ Analysis\ Lab\ 01-01.dll_ --json > tests/data/rd/Practical\ Malware\ Analysis\ Lab\ 01-01.dll_.json
|
||||
return get_result_doc(CD / "data" / "rd" / "Practical Malware Analysis Lab 01-01.dll_.json")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dotnet_1c444e_rd():
|
||||
# .NET sample
|
||||
# python -m capa.main tests/data/dotnet/1c444ebeba24dcba8628b7dfe5fec7c6.exe_ --json > tests/data/rd/1c444ebeba24dcba8628b7dfe5fec7c6.exe_.json
|
||||
return get_result_doc(CD / "data" / "rd" / "1c444ebeba24dcba8628b7dfe5fec7c6.exe_.json")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def a3f3bbc_rd():
|
||||
# python -m capa.main tests/data/3f3bbcf8fd90bdcdcdc5494314ed4225.exe_ --json > tests/data/rd/3f3bbcf8fd90bdcdcdc5494314ed4225.exe_.json
|
||||
return get_result_doc(CD / "data" / "rd" / "3f3bbcf8fd90bdcdcdc5494314ed4225.exe_.json")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def al_khaserx86_rd():
|
||||
# python -m capa.main tests/data/al-khaser_x86.exe_ --json > tests/data/rd/al-khaser_x86.exe_.json
|
||||
return get_result_doc(CD / "data" / "rd" / "al-khaser_x86.exe_.json")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def al_khaserx64_rd():
|
||||
# python -m capa.main tests/data/al-khaser_x64.exe_ --json > tests/data/rd/al-khaser_x64.exe_.json
|
||||
return get_result_doc(CD / "data" / "rd" / "al-khaser_x64.exe_.json")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def a076114_rd():
|
||||
# python -m capa.main tests/data/0761142efbda6c4b1e801223de723578.dll_ --json > tests/data/rd/0761142efbda6c4b1e801223de723578.dll_.json
|
||||
return get_result_doc(CD / "data" / "rd" / "0761142efbda6c4b1e801223de723578.dll_.json")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dynamic_a0000a6_rd():
|
||||
# python -m capa.main tests/data/dynamic/cape/v2.2/0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json --json > tests/data/rd/0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json
|
||||
return get_result_doc(CD / "data" / "rd" / "0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json")
|
||||
|
||||
309
tests/test_capabilities.py
Normal file
309
tests/test_capabilities.py
Normal file
@@ -0,0 +1,309 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import textwrap
|
||||
|
||||
import capa.capabilities.common
|
||||
|
||||
|
||||
def test_match_across_scopes_file_function(z9324d_extractor):
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
# this rule should match on a function (0x4073F0)
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: install service
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
examples:
|
||||
- 9324d1a8ae37a36ae560c37448c9705a:0x4073F0
|
||||
features:
|
||||
- and:
|
||||
- api: advapi32.OpenSCManagerA
|
||||
- api: advapi32.CreateServiceA
|
||||
- api: advapi32.StartServiceA
|
||||
"""
|
||||
)
|
||||
),
|
||||
# this rule should match on a file feature
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: .text section
|
||||
scopes:
|
||||
static: file
|
||||
dynamic: process
|
||||
examples:
|
||||
- 9324d1a8ae37a36ae560c37448c9705a
|
||||
features:
|
||||
- section: .text
|
||||
"""
|
||||
)
|
||||
),
|
||||
# this rule should match on earlier rule matches:
|
||||
# - install service, with function scope
|
||||
# - .text section, with file scope
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: .text section and install service
|
||||
scopes:
|
||||
static: file
|
||||
dynamic: process
|
||||
examples:
|
||||
- 9324d1a8ae37a36ae560c37448c9705a
|
||||
features:
|
||||
- and:
|
||||
- match: install service
|
||||
- match: .text section
|
||||
"""
|
||||
)
|
||||
),
|
||||
]
|
||||
)
|
||||
capabilities, meta = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
|
||||
assert "install service" in capabilities
|
||||
assert ".text section" in capabilities
|
||||
assert ".text section and install service" in capabilities
|
||||
|
||||
|
||||
def test_match_across_scopes(z9324d_extractor):
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
# this rule should match on a basic block (including at least 0x403685)
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: tight loop
|
||||
scopes:
|
||||
static: basic block
|
||||
dynamic: process
|
||||
examples:
|
||||
- 9324d1a8ae37a36ae560c37448c9705a:0x403685
|
||||
features:
|
||||
- characteristic: tight loop
|
||||
"""
|
||||
)
|
||||
),
|
||||
# this rule should match on a function (0x403660)
|
||||
# based on API, as well as prior basic block rule match
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: kill thread loop
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
examples:
|
||||
- 9324d1a8ae37a36ae560c37448c9705a:0x403660
|
||||
features:
|
||||
- and:
|
||||
- api: kernel32.TerminateThread
|
||||
- api: kernel32.CloseHandle
|
||||
- match: tight loop
|
||||
"""
|
||||
)
|
||||
),
|
||||
# this rule should match on a file feature and a prior function rule match
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: kill thread program
|
||||
scopes:
|
||||
static: file
|
||||
dynamic: process
|
||||
examples:
|
||||
- 9324d1a8ae37a36ae560c37448c9705a
|
||||
features:
|
||||
- and:
|
||||
- section: .text
|
||||
- match: kill thread loop
|
||||
"""
|
||||
)
|
||||
),
|
||||
]
|
||||
)
|
||||
capabilities, meta = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
|
||||
assert "tight loop" in capabilities
|
||||
assert "kill thread loop" in capabilities
|
||||
assert "kill thread program" in capabilities
|
||||
|
||||
|
||||
def test_subscope_bb_rules(z9324d_extractor):
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- and:
|
||||
- basic block:
|
||||
- characteristic: tight loop
|
||||
"""
|
||||
)
|
||||
)
|
||||
]
|
||||
)
|
||||
# tight loop at 0x403685
|
||||
capabilities, meta = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
|
||||
assert "test rule" in capabilities
|
||||
|
||||
|
||||
def test_byte_matching(z9324d_extractor):
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: byte match test
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- and:
|
||||
- bytes: ED 24 9E F4 52 A9 07 47 55 8E E1 AB 30 8E 23 61
|
||||
"""
|
||||
)
|
||||
)
|
||||
]
|
||||
)
|
||||
capabilities, meta = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
|
||||
assert "byte match test" in capabilities
|
||||
|
||||
|
||||
def test_com_feature_matching(z395eb_extractor):
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: initialize IWebBrowser2
|
||||
scopes:
|
||||
static: basic block
|
||||
dynamic: unsupported
|
||||
features:
|
||||
- and:
|
||||
- api: ole32.CoCreateInstance
|
||||
- com/class: InternetExplorer #bytes: 01 DF 02 00 00 00 00 00 C0 00 00 00 00 00 00 46 = CLSID_InternetExplorer
|
||||
- com/interface: IWebBrowser2 #bytes: 61 16 0C D3 AF CD D0 11 8A 3E 00 C0 4F C9 E2 6E = IID_IWebBrowser2
|
||||
"""
|
||||
)
|
||||
)
|
||||
]
|
||||
)
|
||||
capabilities, meta = capa.main.find_capabilities(rules, z395eb_extractor)
|
||||
assert "initialize IWebBrowser2" in capabilities
|
||||
|
||||
|
||||
def test_count_bb(z9324d_extractor):
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: count bb
|
||||
namespace: test
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- and:
|
||||
- count(basic blocks): 1 or more
|
||||
"""
|
||||
)
|
||||
)
|
||||
]
|
||||
)
|
||||
capabilities, meta = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
|
||||
assert "count bb" in capabilities
|
||||
|
||||
|
||||
def test_instruction_scope(z9324d_extractor):
|
||||
# .text:004071A4 68 E8 03 00 00 push 3E8h
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: push 1000
|
||||
namespace: test
|
||||
scopes:
|
||||
static: instruction
|
||||
dynamic: process
|
||||
features:
|
||||
- and:
|
||||
- mnemonic: push
|
||||
- number: 1000
|
||||
"""
|
||||
)
|
||||
)
|
||||
]
|
||||
)
|
||||
capabilities, meta = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
|
||||
assert "push 1000" in capabilities
|
||||
assert 0x4071A4 in {result[0] for result in capabilities["push 1000"]}
|
||||
|
||||
|
||||
def test_instruction_subscope(z9324d_extractor):
|
||||
# .text:00406F60 sub_406F60 proc near
|
||||
# [...]
|
||||
# .text:004071A4 68 E8 03 00 00 push 3E8h
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: push 1000 on i386
|
||||
namespace: test
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- and:
|
||||
- arch: i386
|
||||
- instruction:
|
||||
- mnemonic: push
|
||||
- number: 1000
|
||||
"""
|
||||
)
|
||||
)
|
||||
]
|
||||
)
|
||||
capabilities, meta = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
|
||||
assert "push 1000 on i386" in capabilities
|
||||
assert 0x406F60 in {result[0] for result in capabilities["push 1000 on i386"]}
|
||||
27
tests/test_cape_features.py
Normal file
27
tests/test_cape_features.py
Normal file
@@ -0,0 +1,27 @@
|
||||
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import fixtures
|
||||
|
||||
|
||||
@fixtures.parametrize(
|
||||
"sample,scope,feature,expected",
|
||||
fixtures.DYNAMIC_FEATURE_PRESENCE_TESTS,
|
||||
indirect=["sample", "scope"],
|
||||
)
|
||||
def test_cape_features(sample, scope, feature, expected):
|
||||
fixtures.do_test_feature_presence(fixtures.get_cape_extractor, sample, scope, feature, expected)
|
||||
|
||||
|
||||
@fixtures.parametrize(
|
||||
"sample,scope,feature,expected",
|
||||
fixtures.DYNAMIC_FEATURE_COUNT_TESTS,
|
||||
indirect=["sample", "scope"],
|
||||
)
|
||||
def test_cape_feature_counts(sample, scope, feature, expected):
|
||||
fixtures.do_test_feature_count(fixtures.get_cape_extractor, sample, scope, feature, expected)
|
||||
72
tests/test_cape_model.py
Normal file
72
tests/test_cape_model.py
Normal file
@@ -0,0 +1,72 @@
|
||||
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import gzip
|
||||
from pathlib import Path
|
||||
|
||||
import fixtures
|
||||
|
||||
from capa.features.extractors.cape.models import Call, CapeReport
|
||||
|
||||
CD = Path(__file__).resolve().parent
|
||||
CAPE_DIR = CD / "data" / "dynamic" / "cape"
|
||||
|
||||
|
||||
@fixtures.parametrize(
|
||||
"version,filename",
|
||||
[
|
||||
("v2.2", "0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json.gz"),
|
||||
("v2.2", "55dcd38773f4104b95589acc87d93bf8b4a264b4a6d823b73fb6a7ab8144c08b.json.gz"),
|
||||
("v2.2", "77c961050aa252d6d595ec5120981abf02068c968f4a5be5958d10e87aa6f0e8.json.gz"),
|
||||
("v2.2", "d46900384c78863420fb3e297d0a2f743cd2b6b3f7f82bf64059a168e07aceb7.json.gz"),
|
||||
("v2.4", "36d218f384010cce9f58b8193b7d8cc855d1dff23f80d16e13a883e152d07921.json.gz"),
|
||||
("v2.4", "41ce492f04accef7931b84b8548a6ca717ffabb9bedc4f624de2d37a5345036c.json.gz"),
|
||||
("v2.4", "515a6269965ccdf1005008e017ec87fafb97fd2464af1c393ad93b438f6f33fe.json.gz"),
|
||||
("v2.4", "5d61700feabba201e1ba98df3c8210a3090c8c9f9adbf16cb3d1da3aaa2a9d96.json.gz"),
|
||||
("v2.4", "5effaf6795932d8b36755f89f99ce7436421ea2bd1ed5bc55476530c1a22009f.json.gz"),
|
||||
("v2.4", "873275144af88e9b95ea2c59ece39b8ce5a9d7fe09774b683050098ac965054d.json.gz"),
|
||||
("v2.4", "8b9aaf4fad227cde7a7dabce7ba187b0b923301718d9d40de04bdd15c9b22905.json.gz"),
|
||||
("v2.4", "b1c4aa078880c579961dc5ec899b2c2e08ae5db80b4263e4ca9607a68e2faef9.json.gz"),
|
||||
("v2.4", "fb7ade52dc5a1d6128b9c217114a46d0089147610f99f5122face29e429a1e74.json.gz"),
|
||||
],
|
||||
)
|
||||
def test_cape_model_can_load(version: str, filename: str):
|
||||
path = CAPE_DIR / version / filename
|
||||
buf = gzip.decompress(path.read_bytes())
|
||||
report = CapeReport.from_buf(buf)
|
||||
assert report is not None
|
||||
|
||||
|
||||
def test_cape_model_argument():
|
||||
call = Call.model_validate_json(
|
||||
"""
|
||||
{
|
||||
"timestamp": "2023-10-20 12:30:14,015",
|
||||
"thread_id": "2380",
|
||||
"caller": "0x7797dff8",
|
||||
"parentcaller": "0x77973486",
|
||||
"category": "system",
|
||||
"api": "TestApiCall",
|
||||
"status": true,
|
||||
"return": "0x00000000",
|
||||
"arguments": [
|
||||
{
|
||||
"name": "Value Base 10",
|
||||
"value": "30"
|
||||
},
|
||||
{
|
||||
"name": "Value Base 16",
|
||||
"value": "0x30"
|
||||
}
|
||||
],
|
||||
"repeated": 19,
|
||||
"id": 0
|
||||
}
|
||||
"""
|
||||
)
|
||||
assert call.arguments[0].value == 30
|
||||
assert call.arguments[1].value == 0x30
|
||||
@@ -1,33 +0,0 @@
|
||||
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import fixtures
|
||||
|
||||
|
||||
@fixtures.parametrize(
|
||||
"sample,scope,feature,expected",
|
||||
fixtures.FEATURE_PRESENCE_TESTS_DOTNET,
|
||||
indirect=["sample", "scope"],
|
||||
)
|
||||
def test_dnfile_features(sample, scope, feature, expected):
|
||||
fixtures.do_test_feature_presence(fixtures.get_dnfile_extractor, sample, scope, feature, expected)
|
||||
|
||||
|
||||
@fixtures.parametrize(
|
||||
"extractor,function,expected",
|
||||
[
|
||||
("b9f5b_dotnetfile_extractor", "is_dotnet_file", True),
|
||||
("b9f5b_dotnetfile_extractor", "is_mixed_mode", False),
|
||||
("mixed_mode_64_dotnetfile_extractor", "is_mixed_mode", True),
|
||||
("b9f5b_dotnetfile_extractor", "get_entry_point", 0x6000007),
|
||||
("b9f5b_dotnetfile_extractor", "get_runtime_version", (2, 5)),
|
||||
("b9f5b_dotnetfile_extractor", "get_meta_version_string", "v2.0.50727"),
|
||||
],
|
||||
)
|
||||
def test_dnfile_extractor(request, extractor, function, expected):
|
||||
extractor_function = getattr(request.getfixturevalue(extractor), function)
|
||||
assert extractor_function() == expected
|
||||
86
tests/test_extractor_hashing.py
Normal file
86
tests/test_extractor_hashing.py
Normal file
@@ -0,0 +1,86 @@
|
||||
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
|
||||
import pytest
|
||||
import fixtures
|
||||
|
||||
from capa.features.extractors.base_extractor import SampleHashes
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def test_viv_hash_extraction():
|
||||
assert fixtures.get_viv_extractor(fixtures.get_data_path_by_name("mimikatz")).get_sample_hashes() == SampleHashes(
|
||||
md5="5f66b82558ca92e54e77f216ef4c066c",
|
||||
sha1="e4f82e4d7f22938dc0a0ff8a4a7ad2a763643d38",
|
||||
sha256="131314a6f6d1d263c75b9909586b3e1bd837036329ace5e69241749e861ac01d",
|
||||
)
|
||||
|
||||
|
||||
def test_pefile_hash_extraction():
|
||||
assert fixtures.get_pefile_extractor(
|
||||
fixtures.get_data_path_by_name("mimikatz")
|
||||
).get_sample_hashes() == SampleHashes(
|
||||
md5="5f66b82558ca92e54e77f216ef4c066c",
|
||||
sha1="e4f82e4d7f22938dc0a0ff8a4a7ad2a763643d38",
|
||||
sha256="131314a6f6d1d263c75b9909586b3e1bd837036329ace5e69241749e861ac01d",
|
||||
)
|
||||
|
||||
|
||||
def test_dnfile_hash_extraction():
|
||||
assert fixtures.get_dnfile_extractor(fixtures.get_data_path_by_name("b9f5b")).get_sample_hashes() == SampleHashes(
|
||||
md5="b9f5bd514485fb06da39beff051b9fdc",
|
||||
sha1="c72a2e50410475a51d897d29ffbbaf2103754d53",
|
||||
sha256="34acc4c0b61b5ce0b37c3589f97d1f23e6d84011a241e6f85683ee517ce786f1",
|
||||
)
|
||||
|
||||
|
||||
def test_dotnetfile_hash_extraction():
|
||||
assert fixtures.get_dotnetfile_extractor(
|
||||
fixtures.get_data_path_by_name("b9f5b")
|
||||
).get_sample_hashes() == SampleHashes(
|
||||
md5="b9f5bd514485fb06da39beff051b9fdc",
|
||||
sha1="c72a2e50410475a51d897d29ffbbaf2103754d53",
|
||||
sha256="34acc4c0b61b5ce0b37c3589f97d1f23e6d84011a241e6f85683ee517ce786f1",
|
||||
)
|
||||
|
||||
|
||||
def test_cape_hash_extraction():
|
||||
assert fixtures.get_cape_extractor(fixtures.get_data_path_by_name("0000a657")).get_sample_hashes() == SampleHashes(
|
||||
md5="e2147b5333879f98d515cd9aa905d489",
|
||||
sha1="ad4d520fb7792b4a5701df973d6bd8a6cbfbb57f",
|
||||
sha256="0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82",
|
||||
)
|
||||
|
||||
|
||||
# We need to skip the binja test if we cannot import binaryninja, e.g., in GitHub CI.
|
||||
binja_present: bool = False
|
||||
try:
|
||||
import binaryninja
|
||||
|
||||
try:
|
||||
binaryninja.load(source=b"\x90")
|
||||
except RuntimeError:
|
||||
logger.warning("Binary Ninja license is not valid, provide via $BN_LICENSE or license.dat")
|
||||
else:
|
||||
binja_present = True
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
@pytest.mark.skipif(binja_present is False, reason="Skip binja tests if the binaryninja Python API is not installed")
|
||||
def test_binja_hash_extraction():
|
||||
extractor = fixtures.get_binja_extractor(fixtures.get_data_path_by_name("mimikatz"))
|
||||
hashes = SampleHashes(
|
||||
md5="5f66b82558ca92e54e77f216ef4c066c",
|
||||
sha1="e4f82e4d7f22938dc0a0ff8a4a7ad2a763643d38",
|
||||
sha256="131314a6f6d1d263c75b9909586b3e1bd837036329ace5e69241749e861ac01d",
|
||||
)
|
||||
assert extractor.get_sample_hashes() == hashes
|
||||
@@ -17,7 +17,9 @@ EXPECTED = textwrap.dedent(
|
||||
name: test rule
|
||||
authors:
|
||||
- user@domain.com
|
||||
scope: function
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
examples:
|
||||
- foo1234
|
||||
- bar5678
|
||||
@@ -41,7 +43,9 @@ def test_rule_reformat_top_level_elements():
|
||||
name: test rule
|
||||
authors:
|
||||
- user@domain.com
|
||||
scope: function
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
examples:
|
||||
- foo1234
|
||||
- bar5678
|
||||
@@ -59,7 +63,9 @@ def test_rule_reformat_indentation():
|
||||
name: test rule
|
||||
authors:
|
||||
- user@domain.com
|
||||
scope: function
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
examples:
|
||||
- foo1234
|
||||
- bar5678
|
||||
@@ -83,7 +89,9 @@ def test_rule_reformat_order():
|
||||
examples:
|
||||
- foo1234
|
||||
- bar5678
|
||||
scope: function
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
name: test rule
|
||||
features:
|
||||
- and:
|
||||
@@ -107,7 +115,9 @@ def test_rule_reformat_meta_update():
|
||||
examples:
|
||||
- foo1234
|
||||
- bar5678
|
||||
scope: function
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
name: AAAA
|
||||
features:
|
||||
- and:
|
||||
@@ -131,7 +141,9 @@ def test_rule_reformat_string_description():
|
||||
name: test rule
|
||||
authors:
|
||||
- user@domain.com
|
||||
scope: function
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- and:
|
||||
- string: foo
|
||||
|
||||
165
tests/test_freeze_dynamic.py
Normal file
165
tests/test_freeze_dynamic.py
Normal file
@@ -0,0 +1,165 @@
|
||||
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import textwrap
|
||||
from typing import List
|
||||
from pathlib import Path
|
||||
|
||||
import fixtures
|
||||
|
||||
import capa.main
|
||||
import capa.rules
|
||||
import capa.helpers
|
||||
import capa.features.file
|
||||
import capa.features.insn
|
||||
import capa.features.common
|
||||
import capa.features.freeze
|
||||
import capa.features.basicblock
|
||||
import capa.features.extractors.null
|
||||
import capa.features.extractors.base_extractor
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import (
|
||||
SampleHashes,
|
||||
ThreadHandle,
|
||||
ProcessHandle,
|
||||
ThreadAddress,
|
||||
ProcessAddress,
|
||||
DynamicCallAddress,
|
||||
DynamicFeatureExtractor,
|
||||
)
|
||||
|
||||
EXTRACTOR = capa.features.extractors.null.NullDynamicFeatureExtractor(
|
||||
base_address=AbsoluteVirtualAddress(0x401000),
|
||||
sample_hashes=SampleHashes(
|
||||
md5="6eb7ee7babf913d75df3f86c229df9e7",
|
||||
sha1="2a082494519acd5130d5120fa48786df7275fdd7",
|
||||
sha256="0c7d1a34eb9fd55bedbf37ba16e3d5dd8c1dd1d002479cc4af27ef0f82bb4792",
|
||||
),
|
||||
global_features=[],
|
||||
file_features=[
|
||||
(AbsoluteVirtualAddress(0x402345), capa.features.common.Characteristic("embedded pe")),
|
||||
],
|
||||
processes={
|
||||
ProcessAddress(pid=1): capa.features.extractors.null.ProcessFeatures(
|
||||
name="explorer.exe",
|
||||
features=[],
|
||||
threads={
|
||||
ThreadAddress(ProcessAddress(pid=1), tid=1): capa.features.extractors.null.ThreadFeatures(
|
||||
features=[],
|
||||
calls={
|
||||
DynamicCallAddress(
|
||||
thread=ThreadAddress(ProcessAddress(pid=1), tid=1), id=1
|
||||
): capa.features.extractors.null.CallFeatures(
|
||||
name="CreateFile(12)",
|
||||
features=[
|
||||
(
|
||||
DynamicCallAddress(thread=ThreadAddress(ProcessAddress(pid=1), tid=1), id=1),
|
||||
capa.features.insn.API("CreateFile"),
|
||||
),
|
||||
(
|
||||
DynamicCallAddress(thread=ThreadAddress(ProcessAddress(pid=1), tid=1), id=1),
|
||||
capa.features.insn.Number(12),
|
||||
),
|
||||
],
|
||||
),
|
||||
DynamicCallAddress(
|
||||
thread=ThreadAddress(ProcessAddress(pid=1), tid=1), id=2
|
||||
): capa.features.extractors.null.CallFeatures(
|
||||
name="WriteFile()",
|
||||
features=[
|
||||
(
|
||||
DynamicCallAddress(thread=ThreadAddress(ProcessAddress(pid=1), tid=1), id=2),
|
||||
capa.features.insn.API("WriteFile"),
|
||||
),
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
},
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def addresses(s) -> List[Address]:
|
||||
return sorted(i.address for i in s)
|
||||
|
||||
|
||||
def test_null_feature_extractor():
|
||||
ph = ProcessHandle(ProcessAddress(pid=1), None)
|
||||
th = ThreadHandle(ThreadAddress(ProcessAddress(pid=1), tid=1), None)
|
||||
|
||||
assert addresses(EXTRACTOR.get_processes()) == [ProcessAddress(pid=1)]
|
||||
assert addresses(EXTRACTOR.get_threads(ph)) == [ThreadAddress(ProcessAddress(pid=1), tid=1)]
|
||||
assert addresses(EXTRACTOR.get_calls(ph, th)) == [
|
||||
DynamicCallAddress(thread=ThreadAddress(ProcessAddress(pid=1), tid=1), id=1),
|
||||
DynamicCallAddress(thread=ThreadAddress(ProcessAddress(pid=1), tid=1), id=2),
|
||||
]
|
||||
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: create file
|
||||
scopes:
|
||||
static: basic block
|
||||
dynamic: call
|
||||
features:
|
||||
- and:
|
||||
- api: CreateFile
|
||||
"""
|
||||
)
|
||||
),
|
||||
]
|
||||
)
|
||||
capabilities, _ = capa.main.find_capabilities(rules, EXTRACTOR)
|
||||
assert "create file" in capabilities
|
||||
|
||||
|
||||
def compare_extractors(a: DynamicFeatureExtractor, b: DynamicFeatureExtractor):
|
||||
assert list(a.extract_file_features()) == list(b.extract_file_features())
|
||||
|
||||
assert addresses(a.get_processes()) == addresses(b.get_processes())
|
||||
for p in a.get_processes():
|
||||
assert addresses(a.get_threads(p)) == addresses(b.get_threads(p))
|
||||
assert sorted(set(a.extract_process_features(p))) == sorted(set(b.extract_process_features(p)))
|
||||
|
||||
for t in a.get_threads(p):
|
||||
assert addresses(a.get_calls(p, t)) == addresses(b.get_calls(p, t))
|
||||
assert sorted(set(a.extract_thread_features(p, t))) == sorted(set(b.extract_thread_features(p, t)))
|
||||
|
||||
for c in a.get_calls(p, t):
|
||||
assert sorted(set(a.extract_call_features(p, t, c))) == sorted(set(b.extract_call_features(p, t, c)))
|
||||
|
||||
|
||||
def test_freeze_str_roundtrip():
|
||||
load = capa.features.freeze.loads
|
||||
dump = capa.features.freeze.dumps
|
||||
reanimated = load(dump(EXTRACTOR))
|
||||
compare_extractors(EXTRACTOR, reanimated)
|
||||
|
||||
|
||||
def test_freeze_bytes_roundtrip():
|
||||
load = capa.features.freeze.load
|
||||
dump = capa.features.freeze.dump
|
||||
reanimated = load(dump(EXTRACTOR))
|
||||
compare_extractors(EXTRACTOR, reanimated)
|
||||
|
||||
|
||||
def test_freeze_load_sample(tmpdir):
|
||||
o = tmpdir.mkdir("capa").join("test.frz")
|
||||
|
||||
extractor = fixtures.get_cape_extractor(fixtures.get_data_path_by_name("d46900"))
|
||||
|
||||
Path(o.strpath).write_bytes(capa.features.freeze.dump(extractor))
|
||||
|
||||
null_extractor = capa.features.freeze.load(Path(o.strpath).read_bytes())
|
||||
|
||||
compare_extractors(extractor, null_extractor)
|
||||
@@ -22,10 +22,15 @@ import capa.features.basicblock
|
||||
import capa.features.extractors.null
|
||||
import capa.features.extractors.base_extractor
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import BBHandle, FunctionHandle
|
||||
from capa.features.extractors.base_extractor import BBHandle, SampleHashes, FunctionHandle
|
||||
|
||||
EXTRACTOR = capa.features.extractors.null.NullFeatureExtractor(
|
||||
EXTRACTOR = capa.features.extractors.null.NullStaticFeatureExtractor(
|
||||
base_address=AbsoluteVirtualAddress(0x401000),
|
||||
sample_hashes=SampleHashes(
|
||||
md5="6eb7ee7babf913d75df3f86c229df9e7",
|
||||
sha1="2a082494519acd5130d5120fa48786df7275fdd7",
|
||||
sha256="0c7d1a34eb9fd55bedbf37ba16e3d5dd8c1dd1d002479cc4af27ef0f82bb4792",
|
||||
),
|
||||
global_features=[],
|
||||
file_features=[
|
||||
(AbsoluteVirtualAddress(0x402345), capa.features.common.Characteristic("embedded pe")),
|
||||
@@ -83,7 +88,9 @@ def test_null_feature_extractor():
|
||||
rule:
|
||||
meta:
|
||||
name: xor loop
|
||||
scope: basic block
|
||||
scopes:
|
||||
static: basic block
|
||||
dynamic: process
|
||||
features:
|
||||
- and:
|
||||
- characteristic: tight loop
|
||||
@@ -119,8 +126,8 @@ def compare_extractors(a, b):
|
||||
|
||||
|
||||
def test_freeze_str_roundtrip():
|
||||
load = capa.features.freeze.loads
|
||||
dump = capa.features.freeze.dumps
|
||||
load = capa.features.freeze.loads_static
|
||||
dump = capa.features.freeze.dumps_static
|
||||
reanimated = load(dump(EXTRACTOR))
|
||||
compare_extractors(EXTRACTOR, reanimated)
|
||||
|
||||
@@ -133,7 +140,7 @@ def test_freeze_bytes_roundtrip():
|
||||
|
||||
|
||||
def roundtrip_feature(feature):
|
||||
assert feature == capa.features.freeze.feature_from_capa(feature).to_capa()
|
||||
assert feature == capa.features.freeze.features.feature_from_capa(feature).to_capa()
|
||||
|
||||
|
||||
def test_serialize_features():
|
||||
@@ -20,3 +20,47 @@ def test_all_zeros():
|
||||
assert helpers.all_zeros(b) is True
|
||||
assert helpers.all_zeros(c) is False
|
||||
assert helpers.all_zeros(d) is False
|
||||
|
||||
|
||||
def test_generate_symbols():
|
||||
assert list(helpers.generate_symbols("name.dll", "api", include_dll=True)) == list(
|
||||
helpers.generate_symbols("name", "api", include_dll=True)
|
||||
)
|
||||
assert list(helpers.generate_symbols("name.dll", "api", include_dll=False)) == list(
|
||||
helpers.generate_symbols("name", "api", include_dll=False)
|
||||
)
|
||||
|
||||
# A/W import
|
||||
symbols = list(helpers.generate_symbols("kernel32", "CreateFileA", include_dll=True))
|
||||
assert len(symbols) == 4
|
||||
assert "kernel32.CreateFileA" in symbols
|
||||
assert "kernel32.CreateFile" in symbols
|
||||
assert "CreateFileA" in symbols
|
||||
assert "CreateFile" in symbols
|
||||
|
||||
# import
|
||||
symbols = list(helpers.generate_symbols("kernel32", "WriteFile", include_dll=True))
|
||||
assert len(symbols) == 2
|
||||
assert "kernel32.WriteFile" in symbols
|
||||
assert "WriteFile" in symbols
|
||||
|
||||
# ordinal import
|
||||
symbols = list(helpers.generate_symbols("ws2_32", "#1", include_dll=True))
|
||||
assert len(symbols) == 1
|
||||
assert "ws2_32.#1" in symbols
|
||||
|
||||
# A/W api
|
||||
symbols = list(helpers.generate_symbols("kernel32", "CreateFileA", include_dll=False))
|
||||
assert len(symbols) == 2
|
||||
assert "CreateFileA" in symbols
|
||||
assert "CreateFile" in symbols
|
||||
|
||||
# api
|
||||
symbols = list(helpers.generate_symbols("kernel32", "WriteFile", include_dll=False))
|
||||
assert len(symbols) == 1
|
||||
assert "WriteFile" in symbols
|
||||
|
||||
# ordinal api
|
||||
symbols = list(helpers.generate_symbols("ws2_32", "#1", include_dll=False))
|
||||
assert len(symbols) == 1
|
||||
assert "ws2_32.#1" in symbols
|
||||
|
||||
@@ -6,8 +6,10 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import gzip
|
||||
import json
|
||||
import textwrap
|
||||
from pathlib import Path
|
||||
|
||||
import fixtures
|
||||
|
||||
@@ -34,7 +36,9 @@ def test_main_single_rule(z9324d_extractor, tmpdir):
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scope: file
|
||||
scopes:
|
||||
static: file
|
||||
dynamic: file
|
||||
authors:
|
||||
- test
|
||||
features:
|
||||
@@ -95,7 +99,9 @@ def test_ruleset():
|
||||
rule:
|
||||
meta:
|
||||
name: file rule
|
||||
scope: file
|
||||
scopes:
|
||||
static: file
|
||||
dynamic: process
|
||||
features:
|
||||
- characteristic: embedded pe
|
||||
"""
|
||||
@@ -107,7 +113,9 @@ def test_ruleset():
|
||||
rule:
|
||||
meta:
|
||||
name: function rule
|
||||
scope: function
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- characteristic: tight loop
|
||||
"""
|
||||
@@ -119,267 +127,91 @@ def test_ruleset():
|
||||
rule:
|
||||
meta:
|
||||
name: basic block rule
|
||||
scope: basic block
|
||||
scopes:
|
||||
static: basic block
|
||||
dynamic: process
|
||||
features:
|
||||
- characteristic: nzxor
|
||||
"""
|
||||
)
|
||||
),
|
||||
]
|
||||
)
|
||||
assert len(rules.file_rules) == 1
|
||||
assert len(rules.function_rules) == 1
|
||||
assert len(rules.basic_block_rules) == 1
|
||||
|
||||
|
||||
def test_match_across_scopes_file_function(z9324d_extractor):
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
# this rule should match on a function (0x4073F0)
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: install service
|
||||
scope: function
|
||||
examples:
|
||||
- 9324d1a8ae37a36ae560c37448c9705a:0x4073F0
|
||||
name: process rule
|
||||
scopes:
|
||||
static: file
|
||||
dynamic: process
|
||||
features:
|
||||
- and:
|
||||
- api: advapi32.OpenSCManagerA
|
||||
- api: advapi32.CreateServiceA
|
||||
- api: advapi32.StartServiceA
|
||||
- string: "explorer.exe"
|
||||
"""
|
||||
)
|
||||
),
|
||||
# this rule should match on a file feature
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: .text section
|
||||
scope: file
|
||||
examples:
|
||||
- 9324d1a8ae37a36ae560c37448c9705a
|
||||
features:
|
||||
- section: .text
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: thread rule
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: thread
|
||||
features:
|
||||
- api: RegDeleteKey
|
||||
"""
|
||||
)
|
||||
),
|
||||
# this rule should match on earlier rule matches:
|
||||
# - install service, with function scope
|
||||
# - .text section, with file scope
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: .text section and install service
|
||||
scope: file
|
||||
examples:
|
||||
- 9324d1a8ae37a36ae560c37448c9705a
|
||||
features:
|
||||
- and:
|
||||
- match: install service
|
||||
- match: .text section
|
||||
"""
|
||||
)
|
||||
),
|
||||
]
|
||||
)
|
||||
capabilities, meta = capa.main.find_capabilities(rules, z9324d_extractor)
|
||||
assert "install service" in capabilities
|
||||
assert ".text section" in capabilities
|
||||
assert ".text section and install service" in capabilities
|
||||
|
||||
|
||||
def test_match_across_scopes(z9324d_extractor):
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
# this rule should match on a basic block (including at least 0x403685)
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: tight loop
|
||||
scope: basic block
|
||||
examples:
|
||||
- 9324d1a8ae37a36ae560c37448c9705a:0x403685
|
||||
features:
|
||||
- characteristic: tight loop
|
||||
"""
|
||||
)
|
||||
),
|
||||
# this rule should match on a function (0x403660)
|
||||
# based on API, as well as prior basic block rule match
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: kill thread loop
|
||||
scope: function
|
||||
examples:
|
||||
- 9324d1a8ae37a36ae560c37448c9705a:0x403660
|
||||
name: test call subscope
|
||||
scopes:
|
||||
static: basic block
|
||||
dynamic: thread
|
||||
features:
|
||||
- and:
|
||||
- api: kernel32.TerminateThread
|
||||
- api: kernel32.CloseHandle
|
||||
- match: tight loop
|
||||
- string: "explorer.exe"
|
||||
- call:
|
||||
- api: HttpOpenRequestW
|
||||
"""
|
||||
)
|
||||
),
|
||||
# this rule should match on a file feature and a prior function rule match
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: kill thread program
|
||||
scope: file
|
||||
examples:
|
||||
- 9324d1a8ae37a36ae560c37448c9705a
|
||||
features:
|
||||
- and:
|
||||
- section: .text
|
||||
- match: kill thread loop
|
||||
"""
|
||||
)
|
||||
),
|
||||
]
|
||||
)
|
||||
capabilities, meta = capa.main.find_capabilities(rules, z9324d_extractor)
|
||||
assert "tight loop" in capabilities
|
||||
assert "kill thread loop" in capabilities
|
||||
assert "kill thread program" in capabilities
|
||||
|
||||
|
||||
def test_subscope_bb_rules(z9324d_extractor):
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scope: function
|
||||
scopes:
|
||||
static: instruction
|
||||
dynamic: call
|
||||
features:
|
||||
- and:
|
||||
- basic block:
|
||||
- characteristic: tight loop
|
||||
- and:
|
||||
- or:
|
||||
- api: socket
|
||||
- and:
|
||||
- os: linux
|
||||
- mnemonic: syscall
|
||||
- number: 41 = socket()
|
||||
- number: 6 = IPPROTO_TCP
|
||||
- number: 1 = SOCK_STREAM
|
||||
- number: 2 = AF_INET
|
||||
"""
|
||||
)
|
||||
)
|
||||
),
|
||||
]
|
||||
)
|
||||
# tight loop at 0x403685
|
||||
capabilities, meta = capa.main.find_capabilities(rules, z9324d_extractor)
|
||||
assert "test rule" in capabilities
|
||||
|
||||
|
||||
def test_byte_matching(z9324d_extractor):
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: byte match test
|
||||
scope: function
|
||||
features:
|
||||
- and:
|
||||
- bytes: ED 24 9E F4 52 A9 07 47 55 8E E1 AB 30 8E 23 61
|
||||
"""
|
||||
)
|
||||
)
|
||||
]
|
||||
)
|
||||
capabilities, meta = capa.main.find_capabilities(rules, z9324d_extractor)
|
||||
assert "byte match test" in capabilities
|
||||
|
||||
|
||||
def test_count_bb(z9324d_extractor):
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: count bb
|
||||
namespace: test
|
||||
scope: function
|
||||
features:
|
||||
- and:
|
||||
- count(basic blocks): 1 or more
|
||||
"""
|
||||
)
|
||||
)
|
||||
]
|
||||
)
|
||||
capabilities, meta = capa.main.find_capabilities(rules, z9324d_extractor)
|
||||
assert "count bb" in capabilities
|
||||
|
||||
|
||||
def test_instruction_scope(z9324d_extractor):
|
||||
# .text:004071A4 68 E8 03 00 00 push 3E8h
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: push 1000
|
||||
namespace: test
|
||||
scope: instruction
|
||||
features:
|
||||
- and:
|
||||
- mnemonic: push
|
||||
- number: 1000
|
||||
"""
|
||||
)
|
||||
)
|
||||
]
|
||||
)
|
||||
capabilities, meta = capa.main.find_capabilities(rules, z9324d_extractor)
|
||||
assert "push 1000" in capabilities
|
||||
assert 0x4071A4 in {result[0] for result in capabilities["push 1000"]}
|
||||
|
||||
|
||||
def test_instruction_subscope(z9324d_extractor):
|
||||
# .text:00406F60 sub_406F60 proc near
|
||||
# [...]
|
||||
# .text:004071A4 68 E8 03 00 00 push 3E8h
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: push 1000 on i386
|
||||
namespace: test
|
||||
scope: function
|
||||
features:
|
||||
- and:
|
||||
- arch: i386
|
||||
- instruction:
|
||||
- mnemonic: push
|
||||
- number: 1000
|
||||
"""
|
||||
)
|
||||
)
|
||||
]
|
||||
)
|
||||
capabilities, meta = capa.main.find_capabilities(rules, z9324d_extractor)
|
||||
assert "push 1000 on i386" in capabilities
|
||||
assert 0x406F60 in {result[0] for result in capabilities["push 1000 on i386"]}
|
||||
assert len(rules.file_rules) == 2
|
||||
assert len(rules.function_rules) == 2
|
||||
assert len(rules.basic_block_rules) == 2
|
||||
assert len(rules.instruction_rules) == 1
|
||||
assert len(rules.process_rules) == 4
|
||||
assert len(rules.thread_rules) == 2
|
||||
assert len(rules.call_rules) == 2
|
||||
|
||||
|
||||
def test_fix262(pma16_01_extractor, capsys):
|
||||
@@ -468,3 +300,59 @@ def test_main_rd():
|
||||
assert capa.main.main([path, "-j"]) == 0
|
||||
assert capa.main.main([path, "-q"]) == 0
|
||||
assert capa.main.main([path]) == 0
|
||||
|
||||
|
||||
def extract_cape_report(tmp_path: Path, gz: Path) -> Path:
|
||||
report = tmp_path / "report.json"
|
||||
report.write_bytes(gzip.decompress(gz.read_bytes()))
|
||||
return report
|
||||
|
||||
|
||||
def test_main_cape1(tmp_path):
|
||||
path = extract_cape_report(tmp_path, fixtures.get_data_path_by_name("0000a657"))
|
||||
|
||||
# TODO(williballenthin): use default rules set
|
||||
# https://github.com/mandiant/capa/pull/1696
|
||||
rules = tmp_path / "rules"
|
||||
rules.mkdir()
|
||||
(rules / "create-or-open-registry-key.yml").write_text(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: create or open registry key
|
||||
authors:
|
||||
- testing
|
||||
scopes:
|
||||
static: instruction
|
||||
dynamic: call
|
||||
features:
|
||||
- or:
|
||||
- api: advapi32.RegOpenKey
|
||||
- api: advapi32.RegOpenKeyEx
|
||||
- api: advapi32.RegCreateKey
|
||||
- api: advapi32.RegCreateKeyEx
|
||||
- api: advapi32.RegOpenCurrentUser
|
||||
- api: advapi32.RegOpenKeyTransacted
|
||||
- api: advapi32.RegOpenUserClassesRoot
|
||||
- api: advapi32.RegCreateKeyTransacted
|
||||
- api: ZwOpenKey
|
||||
- api: ZwOpenKeyEx
|
||||
- api: ZwCreateKey
|
||||
- api: ZwOpenKeyTransacted
|
||||
- api: ZwOpenKeyTransactedEx
|
||||
- api: ZwCreateKeyTransacted
|
||||
- api: NtOpenKey
|
||||
- api: NtCreateKey
|
||||
- api: SHRegOpenUSKey
|
||||
- api: SHRegCreateUSKey
|
||||
- api: RtlCreateRegistryKey
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
assert capa.main.main([str(path), "-r", str(rules)]) == 0
|
||||
assert capa.main.main([str(path), "-q", "-r", str(rules)]) == 0
|
||||
assert capa.main.main([str(path), "-j", "-r", str(rules)]) == 0
|
||||
assert capa.main.main([str(path), "-v", "-r", str(rules)]) == 0
|
||||
assert capa.main.main([str(path), "-vv", "-r", str(rules)]) == 0
|
||||
|
||||
@@ -43,6 +43,9 @@ def test_match_simple():
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
namespace: testns1/testns2
|
||||
features:
|
||||
- number: 100
|
||||
@@ -63,6 +66,9 @@ def test_match_range_exact():
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- count(number(100)): 2
|
||||
"""
|
||||
@@ -87,7 +93,10 @@ def test_match_range_range():
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
name: test rule
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- count(number(100)): (2, 3)
|
||||
"""
|
||||
@@ -117,6 +126,9 @@ def test_match_range_exact_zero():
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- count(number(100)): 0
|
||||
"""
|
||||
@@ -142,7 +154,10 @@ def test_match_range_with_zero():
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
name: test rule
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- count(number(100)): (0, 1)
|
||||
"""
|
||||
@@ -169,6 +184,9 @@ def test_match_adds_matched_rule_feature():
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- number: 100
|
||||
"""
|
||||
@@ -187,6 +205,9 @@ def test_match_matched_rules():
|
||||
rule:
|
||||
meta:
|
||||
name: test rule1
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- number: 100
|
||||
"""
|
||||
@@ -198,6 +219,9 @@ def test_match_matched_rules():
|
||||
rule:
|
||||
meta:
|
||||
name: test rule2
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- match: test rule1
|
||||
"""
|
||||
@@ -232,6 +256,9 @@ def test_match_namespace():
|
||||
rule:
|
||||
meta:
|
||||
name: CreateFile API
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
namespace: file/create/CreateFile
|
||||
features:
|
||||
- api: CreateFile
|
||||
@@ -244,6 +271,9 @@ def test_match_namespace():
|
||||
rule:
|
||||
meta:
|
||||
name: WriteFile API
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
namespace: file/write
|
||||
features:
|
||||
- api: WriteFile
|
||||
@@ -256,6 +286,9 @@ def test_match_namespace():
|
||||
rule:
|
||||
meta:
|
||||
name: file-create
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- match: file/create
|
||||
"""
|
||||
@@ -267,6 +300,9 @@ def test_match_namespace():
|
||||
rule:
|
||||
meta:
|
||||
name: filesystem-any
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- match: file
|
||||
"""
|
||||
@@ -304,6 +340,9 @@ def test_match_substring():
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- and:
|
||||
- substring: abc
|
||||
@@ -355,6 +394,9 @@ def test_match_regex():
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- and:
|
||||
- string: /.*bbbb.*/
|
||||
@@ -367,6 +409,9 @@ def test_match_regex():
|
||||
rule:
|
||||
meta:
|
||||
name: rule with implied wildcards
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- and:
|
||||
- string: /bbbb/
|
||||
@@ -379,6 +424,9 @@ def test_match_regex():
|
||||
rule:
|
||||
meta:
|
||||
name: rule with anchor
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- and:
|
||||
- string: /^bbbb/
|
||||
@@ -425,6 +473,9 @@ def test_match_regex_ignorecase():
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- and:
|
||||
- string: /.*bbbb.*/i
|
||||
@@ -448,6 +499,9 @@ def test_match_regex_complex():
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- or:
|
||||
- string: /.*HARDWARE\\Key\\key with spaces\\.*/i
|
||||
@@ -471,6 +525,9 @@ def test_match_regex_values_always_string():
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- or:
|
||||
- string: /123/
|
||||
@@ -500,6 +557,9 @@ def test_match_not():
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
namespace: testns1/testns2
|
||||
features:
|
||||
- not:
|
||||
@@ -518,6 +578,9 @@ def test_match_not_not():
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
namespace: testns1/testns2
|
||||
features:
|
||||
- not:
|
||||
@@ -537,6 +600,9 @@ def test_match_operand_number():
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- and:
|
||||
- operand[0].number: 0x10
|
||||
@@ -564,6 +630,9 @@ def test_match_operand_offset():
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- and:
|
||||
- operand[0].offset: 0x10
|
||||
@@ -591,6 +660,9 @@ def test_match_property_access():
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- and:
|
||||
- property/read: System.IO.FileInfo::Length
|
||||
@@ -632,6 +704,9 @@ def test_match_os_any():
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- or:
|
||||
- and:
|
||||
|
||||
@@ -23,7 +23,9 @@ def test_optimizer_order():
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scope: function
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- and:
|
||||
- substring: "foo"
|
||||
|
||||
@@ -46,7 +46,7 @@ def test_doc_to_pb2(request, rd_file):
|
||||
assert matches.meta.name == m.name
|
||||
assert cmp_optional(matches.meta.namespace, m.namespace)
|
||||
assert list(matches.meta.authors) == m.authors
|
||||
assert capa.render.proto.scope_to_pb2(matches.meta.scope) == m.scope
|
||||
assert capa.render.proto.scopes_to_pb2(matches.meta.scopes) == m.scopes
|
||||
|
||||
assert len(matches.meta.attack) == len(m.attack)
|
||||
for rd_attack, proto_attack in zip(matches.meta.attack, m.attack):
|
||||
@@ -116,10 +116,27 @@ def test_addr_to_pb2():
|
||||
|
||||
|
||||
def test_scope_to_pb2():
|
||||
assert capa.render.proto.scope_to_pb2(capa.rules.Scope(capa.rules.FILE_SCOPE)) == capa_pb2.SCOPE_FILE
|
||||
assert capa.render.proto.scope_to_pb2(capa.rules.Scope(capa.rules.FUNCTION_SCOPE)) == capa_pb2.SCOPE_FUNCTION
|
||||
assert capa.render.proto.scope_to_pb2(capa.rules.Scope(capa.rules.BASIC_BLOCK_SCOPE)) == capa_pb2.SCOPE_BASIC_BLOCK
|
||||
assert capa.render.proto.scope_to_pb2(capa.rules.Scope(capa.rules.INSTRUCTION_SCOPE)) == capa_pb2.SCOPE_INSTRUCTION
|
||||
assert capa.render.proto.scope_to_pb2(capa.rules.Scope.FILE) == capa_pb2.SCOPE_FILE
|
||||
assert capa.render.proto.scope_to_pb2(capa.rules.Scope.FUNCTION) == capa_pb2.SCOPE_FUNCTION
|
||||
assert capa.render.proto.scope_to_pb2(capa.rules.Scope.BASIC_BLOCK) == capa_pb2.SCOPE_BASIC_BLOCK
|
||||
assert capa.render.proto.scope_to_pb2(capa.rules.Scope.INSTRUCTION) == capa_pb2.SCOPE_INSTRUCTION
|
||||
assert capa.render.proto.scope_to_pb2(capa.rules.Scope.PROCESS) == capa_pb2.SCOPE_PROCESS
|
||||
assert capa.render.proto.scope_to_pb2(capa.rules.Scope.THREAD) == capa_pb2.SCOPE_THREAD
|
||||
assert capa.render.proto.scope_to_pb2(capa.rules.Scope.CALL) == capa_pb2.SCOPE_CALL
|
||||
|
||||
|
||||
def test_scopes_to_pb2():
|
||||
assert capa.render.proto.scopes_to_pb2(
|
||||
capa.rules.Scopes.from_dict({"static": "file", "dynamic": "file"})
|
||||
) == capa_pb2.Scopes(
|
||||
static=capa_pb2.SCOPE_FILE,
|
||||
dynamic=capa_pb2.SCOPE_FILE,
|
||||
)
|
||||
assert capa.render.proto.scopes_to_pb2(
|
||||
capa.rules.Scopes.from_dict({"static": "file", "dynamic": "unsupported"})
|
||||
) == capa_pb2.Scopes(
|
||||
static=capa_pb2.SCOPE_FILE,
|
||||
)
|
||||
|
||||
|
||||
def cmp_optional(a: Any, b: Any) -> bool:
|
||||
@@ -128,7 +145,59 @@ def cmp_optional(a: Any, b: Any) -> bool:
|
||||
return a == b
|
||||
|
||||
|
||||
def assert_static_analyis(analysis: rd.StaticAnalysis, dst: capa_pb2.StaticAnalysis):
|
||||
assert analysis.format == dst.format
|
||||
assert analysis.arch == dst.arch
|
||||
assert analysis.os == dst.os
|
||||
assert analysis.extractor == dst.extractor
|
||||
assert list(analysis.rules) == dst.rules
|
||||
|
||||
assert capa.render.proto.addr_to_pb2(analysis.base_address) == dst.base_address
|
||||
|
||||
assert len(analysis.layout.functions) == len(dst.layout.functions)
|
||||
for rd_f, proto_f in zip(analysis.layout.functions, dst.layout.functions):
|
||||
assert capa.render.proto.addr_to_pb2(rd_f.address) == proto_f.address
|
||||
|
||||
assert len(rd_f.matched_basic_blocks) == len(proto_f.matched_basic_blocks)
|
||||
for rd_bb, proto_bb in zip(rd_f.matched_basic_blocks, proto_f.matched_basic_blocks):
|
||||
assert capa.render.proto.addr_to_pb2(rd_bb.address) == proto_bb.address
|
||||
|
||||
assert analysis.feature_counts.file == dst.feature_counts.file
|
||||
assert len(analysis.feature_counts.functions) == len(dst.feature_counts.functions)
|
||||
for rd_cf, proto_cf in zip(analysis.feature_counts.functions, dst.feature_counts.functions):
|
||||
assert capa.render.proto.addr_to_pb2(rd_cf.address) == proto_cf.address
|
||||
assert rd_cf.count == proto_cf.count
|
||||
|
||||
assert len(analysis.library_functions) == len(dst.library_functions)
|
||||
for rd_lf, proto_lf in zip(analysis.library_functions, dst.library_functions):
|
||||
assert capa.render.proto.addr_to_pb2(rd_lf.address) == proto_lf.address
|
||||
assert rd_lf.name == proto_lf.name
|
||||
|
||||
|
||||
def assert_dynamic_analyis(analysis: rd.DynamicAnalysis, dst: capa_pb2.DynamicAnalysis):
|
||||
assert analysis.format == dst.format
|
||||
assert analysis.arch == dst.arch
|
||||
assert analysis.os == dst.os
|
||||
assert analysis.extractor == dst.extractor
|
||||
assert list(analysis.rules) == dst.rules
|
||||
|
||||
assert len(analysis.layout.processes) == len(dst.layout.processes)
|
||||
for rd_p, proto_p in zip(analysis.layout.processes, dst.layout.processes):
|
||||
assert capa.render.proto.addr_to_pb2(rd_p.address) == proto_p.address
|
||||
|
||||
assert len(rd_p.matched_threads) == len(proto_p.matched_threads)
|
||||
for rd_t, proto_t in zip(rd_p.matched_threads, proto_p.matched_threads):
|
||||
assert capa.render.proto.addr_to_pb2(rd_t.address) == proto_t.address
|
||||
|
||||
assert analysis.feature_counts.processes == dst.feature_counts.processes
|
||||
assert len(analysis.feature_counts.processes) == len(dst.feature_counts.processes)
|
||||
for rd_cp, proto_cp in zip(analysis.feature_counts.processes, dst.feature_counts.processes):
|
||||
assert capa.render.proto.addr_to_pb2(rd_cp.address) == proto_cp.address
|
||||
assert rd_cp.count == proto_cp.count
|
||||
|
||||
|
||||
def assert_meta(meta: rd.Metadata, dst: capa_pb2.Metadata):
|
||||
assert isinstance(meta.analysis, rd.StaticAnalysis)
|
||||
assert str(meta.timestamp) == dst.timestamp
|
||||
assert meta.version == dst.version
|
||||
if meta.argv is None:
|
||||
@@ -141,31 +210,18 @@ def assert_meta(meta: rd.Metadata, dst: capa_pb2.Metadata):
|
||||
assert meta.sample.sha256 == dst.sample.sha256
|
||||
assert meta.sample.path == dst.sample.path
|
||||
|
||||
assert meta.analysis.format == dst.analysis.format
|
||||
assert meta.analysis.arch == dst.analysis.arch
|
||||
assert meta.analysis.os == dst.analysis.os
|
||||
assert meta.analysis.extractor == dst.analysis.extractor
|
||||
assert list(meta.analysis.rules) == dst.analysis.rules
|
||||
assert capa.render.proto.addr_to_pb2(meta.analysis.base_address) == dst.analysis.base_address
|
||||
|
||||
assert len(meta.analysis.layout.functions) == len(dst.analysis.layout.functions)
|
||||
for rd_f, proto_f in zip(meta.analysis.layout.functions, dst.analysis.layout.functions):
|
||||
assert capa.render.proto.addr_to_pb2(rd_f.address) == proto_f.address
|
||||
|
||||
assert len(rd_f.matched_basic_blocks) == len(proto_f.matched_basic_blocks)
|
||||
for rd_bb, proto_bb in zip(rd_f.matched_basic_blocks, proto_f.matched_basic_blocks):
|
||||
assert capa.render.proto.addr_to_pb2(rd_bb.address) == proto_bb.address
|
||||
|
||||
assert meta.analysis.feature_counts.file == dst.analysis.feature_counts.file
|
||||
assert len(meta.analysis.feature_counts.functions) == len(dst.analysis.feature_counts.functions)
|
||||
for rd_cf, proto_cf in zip(meta.analysis.feature_counts.functions, dst.analysis.feature_counts.functions):
|
||||
assert capa.render.proto.addr_to_pb2(rd_cf.address) == proto_cf.address
|
||||
assert rd_cf.count == proto_cf.count
|
||||
|
||||
assert len(meta.analysis.library_functions) == len(dst.analysis.library_functions)
|
||||
for rd_lf, proto_lf in zip(meta.analysis.library_functions, dst.analysis.library_functions):
|
||||
assert capa.render.proto.addr_to_pb2(rd_lf.address) == proto_lf.address
|
||||
assert rd_lf.name == proto_lf.name
|
||||
if meta.flavor == rd.Flavor.STATIC:
|
||||
assert dst.flavor == capa_pb2.FLAVOR_STATIC
|
||||
assert dst.WhichOneof("analysis2") == "static_analysis"
|
||||
assert isinstance(meta.analysis, rd.StaticAnalysis)
|
||||
assert_static_analyis(meta.analysis, dst.static_analysis)
|
||||
elif meta.flavor == rd.Flavor.DYNAMIC:
|
||||
assert dst.flavor == capa_pb2.FLAVOR_DYNAMIC
|
||||
assert dst.WhichOneof("analysis2") == "dynamic_analysis"
|
||||
assert isinstance(meta.analysis, rd.DynamicAnalysis)
|
||||
assert_dynamic_analyis(meta.analysis, dst.dynamic_analysis)
|
||||
else:
|
||||
assert_never(dst.flavor)
|
||||
|
||||
|
||||
def assert_match(ma: rd.Match, mb: capa_pb2.Match):
|
||||
@@ -318,20 +374,22 @@ def assert_round_trip(doc: rd.ResultDocument):
|
||||
# show the round trip works
|
||||
# first by comparing the objects directly,
|
||||
# which works thanks to pydantic model equality.
|
||||
assert one.meta == two.meta
|
||||
assert one.rules == two.rules
|
||||
assert one == two
|
||||
|
||||
# second by showing their protobuf representations are the same.
|
||||
assert capa.render.proto.doc_to_pb2(one).SerializeToString(deterministic=True) == capa.render.proto.doc_to_pb2(
|
||||
two
|
||||
).SerializeToString(deterministic=True)
|
||||
one_bytes = capa.render.proto.doc_to_pb2(one).SerializeToString(deterministic=True)
|
||||
two_bytes = capa.render.proto.doc_to_pb2(two).SerializeToString(deterministic=True)
|
||||
assert one_bytes == two_bytes
|
||||
|
||||
# now show that two different versions are not equal.
|
||||
three = copy.deepcopy(two)
|
||||
three.meta.__dict__.update({"version": "0.0.0"})
|
||||
assert one.meta.version != three.meta.version
|
||||
assert one != three
|
||||
assert capa.render.proto.doc_to_pb2(one).SerializeToString(deterministic=True) != capa.render.proto.doc_to_pb2(
|
||||
three
|
||||
).SerializeToString(deterministic=True)
|
||||
three_bytes = capa.render.proto.doc_to_pb2(three).SerializeToString(deterministic=True)
|
||||
assert one_bytes != three_bytes
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
@@ -343,6 +401,7 @@ def assert_round_trip(doc: rd.ResultDocument):
|
||||
pytest.param("a076114_rd"),
|
||||
pytest.param("pma0101_rd"),
|
||||
pytest.param("dotnet_1c444e_rd"),
|
||||
pytest.param("dynamic_a0000a6_rd"),
|
||||
],
|
||||
)
|
||||
def test_round_trip(request, rd_file):
|
||||
|
||||
@@ -50,7 +50,9 @@ def test_render_meta_attack():
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scope: function
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
authors:
|
||||
- foo
|
||||
att&ck:
|
||||
@@ -86,7 +88,9 @@ def test_render_meta_mbc():
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scope: function
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
authors:
|
||||
- foo
|
||||
mbc:
|
||||
@@ -154,6 +158,35 @@ def test_render_vverbose_feature(feature, expected):
|
||||
captures={},
|
||||
)
|
||||
|
||||
capa.render.vverbose.render_feature(ostream, matches, feature, indent=0)
|
||||
layout = capa.render.result_document.StaticLayout(functions=())
|
||||
|
||||
src = textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
authors:
|
||||
- user@domain.com
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
examples:
|
||||
- foo1234
|
||||
- bar5678
|
||||
features:
|
||||
- and:
|
||||
- number: 1
|
||||
- number: 2
|
||||
"""
|
||||
)
|
||||
rule = capa.rules.Rule.from_yaml(src)
|
||||
|
||||
rm = capa.render.result_document.RuleMatches(
|
||||
meta=capa.render.result_document.RuleMetadata.from_capa(rule),
|
||||
source=src,
|
||||
matches=(),
|
||||
)
|
||||
|
||||
capa.render.vverbose.render_feature(ostream, layout, rm, matches, feature, indent=0)
|
||||
|
||||
assert ostream.getvalue().strip() == expected
|
||||
|
||||
@@ -20,7 +20,9 @@ R1 = capa.rules.Rule.from_yaml(
|
||||
name: test rule
|
||||
authors:
|
||||
- user@domain.com
|
||||
scope: function
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
examples:
|
||||
- foo1234
|
||||
- bar5678
|
||||
@@ -40,7 +42,9 @@ R2 = capa.rules.Rule.from_yaml(
|
||||
name: test rule 2
|
||||
authors:
|
||||
- user@domain.com
|
||||
scope: function
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
examples:
|
||||
- foo1234
|
||||
- bar5678
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -20,9 +20,11 @@ def test_rule_scope_instruction():
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scope: instruction
|
||||
scopes:
|
||||
static: instruction
|
||||
dynamic: unsupported
|
||||
features:
|
||||
- and:
|
||||
- and:
|
||||
- mnemonic: mov
|
||||
- arch: i386
|
||||
- os: windows
|
||||
@@ -37,7 +39,9 @@ def test_rule_scope_instruction():
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scope: instruction
|
||||
scopes:
|
||||
static: instruction
|
||||
dynamic: unsupported
|
||||
features:
|
||||
- characteristic: embedded pe
|
||||
"""
|
||||
@@ -54,7 +58,9 @@ def test_rule_subscope_instruction():
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scope: function
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- and:
|
||||
- instruction:
|
||||
@@ -83,7 +89,9 @@ def test_scope_instruction_implied_and():
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scope: function
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- and:
|
||||
- instruction:
|
||||
@@ -102,7 +110,9 @@ def test_scope_instruction_description():
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scope: function
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- and:
|
||||
- instruction:
|
||||
@@ -120,7 +130,9 @@ def test_scope_instruction_description():
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scope: function
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- and:
|
||||
- instruction:
|
||||
|
||||
@@ -75,6 +75,7 @@ def run_program(script_path, args):
|
||||
return subprocess.run(args, stdout=subprocess.PIPE)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason="result document test files haven't been updated yet")
|
||||
def test_proto_conversion(tmp_path):
|
||||
t = tmp_path / "proto-test"
|
||||
t.mkdir()
|
||||
@@ -98,7 +99,9 @@ def test_detect_duplicate_features(tmpdir):
|
||||
rule:
|
||||
meta:
|
||||
name: Test Rule 0
|
||||
scope: function
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- and:
|
||||
- number: 1
|
||||
@@ -113,6 +116,9 @@ def test_detect_duplicate_features(tmpdir):
|
||||
rule:
|
||||
meta:
|
||||
name: Test Rule 1
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- or:
|
||||
- string: unique
|
||||
@@ -132,6 +138,9 @@ def test_detect_duplicate_features(tmpdir):
|
||||
rule:
|
||||
meta:
|
||||
name: Test Rule 2
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- and:
|
||||
- string: "sites.ini"
|
||||
@@ -146,6 +155,9 @@ def test_detect_duplicate_features(tmpdir):
|
||||
rule:
|
||||
meta:
|
||||
name: Test Rule 3
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- or:
|
||||
- not:
|
||||
@@ -161,6 +173,9 @@ def test_detect_duplicate_features(tmpdir):
|
||||
rule:
|
||||
meta:
|
||||
name: Test Rule 4
|
||||
scopes:
|
||||
static: function
|
||||
dynamic: process
|
||||
features:
|
||||
- not:
|
||||
- string: "expa"
|
||||
|
||||
Reference in New Issue
Block a user