Compare commits

...

182 Commits

Author SHA1 Message Date
Willi Ballenthin
8121f291c3 version: bump to v1.5.1 2021-02-09 09:20:03 -07:00
Moritz
b721b5fcff Merge pull request #420 from fireeye/williballenthin-patch-2
setup: pin viv-utils version
2021-02-09 16:49:11 +01:00
Willi Ballenthin
521dfe0337 setup: bump viv-utils to 0.3.19 2021-02-09 08:18:17 -07:00
Capa Bot
7dc78b7837 Sync capa rules submodule 2021-02-09 15:17:09 +00:00
Capa Bot
6636b9d56c Sync capa-testfiles submodule 2021-02-09 12:56:48 +00:00
Capa Bot
325c6cc805 Sync capa rules submodule 2021-02-09 09:58:41 +00:00
Capa Bot
6a6e205973 Sync capa-testfiles submodule 2021-02-08 19:07:40 +00:00
Capa Bot
46ec25d286 Sync capa rules submodule 2021-02-08 17:49:32 +00:00
Capa Bot
6e33a22676 Sync capa rules submodule 2021-02-08 17:48:52 +00:00
Capa Bot
6e81de9e44 Sync capa rules submodule 2021-02-08 17:45:01 +00:00
Willi Ballenthin
03f7bbc3a5 setup: pin viv-utils version 2021-02-08 10:30:31 -07:00
Willi Ballenthin
4354bc9108 Merge pull request #415 from fireeye/williballenthin-patch-2
v1.5.0
2021-02-08 09:55:43 -07:00
Willi Ballenthin
b8fcc2ff0c Merge pull request #417 from fireeye/smda/calls-from-no-api
remove apirefs from calls from
2021-02-08 09:54:04 -07:00
Moritz Raabe
55b7ae10a7 remove apirefs from calls from
closes #416
2021-02-08 11:56:01 +01:00
Willi Ballenthin
6d2a6c98d1 changelog: v1.5.0 2021-02-05 10:59:30 -07:00
Capa Bot
05998b5d05 Sync capa-testfiles submodule 2021-02-04 08:19:32 +00:00
Capa Bot
1063f3fcda Sync capa rules submodule 2021-02-03 18:13:29 +00:00
Capa Bot
93c5e4637b Sync capa rules submodule 2021-02-03 15:15:51 +00:00
Moritz
073c2b5754 Merge pull request #412 from fireeye/ida/meta-add-baseaddr
add imagebase to IDA meta data
2021-02-02 16:48:22 +01:00
mike-hunhoff
ef41d74b82 Merge pull request #411 from fireeye/fix/410
fixes #410
2021-02-02 08:38:23 -07:00
Moritz Raabe
84b3f38810 add imagebase to IDA meta data 2021-02-02 13:54:46 +01:00
mike-hunhoff
2288f38a11 Update capa/main.py
Co-authored-by: Willi Ballenthin <willi.ballenthin@gmail.com>
2021-02-01 12:45:36 -07:00
mike-hunhoff
dbc4e06657 Update capa/main.py
Co-authored-by: Willi Ballenthin <willi.ballenthin@gmail.com>
2021-02-01 12:45:29 -07:00
Michael Hunhoff
2433777a76 fixes #410 2021-02-01 11:43:24 -07:00
Moritz
bb7001f5f2 Merge pull request #409 from fireeye/fix/extract-bytes
improve bytes feature extraction
2021-02-01 17:38:40 +01:00
Moritz Raabe
9b5aaa40de improve bytes feature extraction 2021-02-01 17:17:22 +01:00
Capa Bot
96d74f48f4 Sync capa rules submodule 2021-02-01 11:55:33 +00:00
Capa Bot
f07af25a6a Sync capa rules submodule 2021-01-28 16:52:21 +00:00
Willi Ballenthin
14e65c4601 Merge pull request #401 from fireeye/linter-format
Lint rule formatting and improved rule dump
2021-01-28 09:18:20 -07:00
Capa Bot
b5c2fb0259 Sync capa rules submodule 2021-01-28 16:06:09 +00:00
Capa Bot
92d98db7bb Sync capa-testfiles submodule 2021-01-28 15:25:17 +00:00
Moritz
e6f7ef604a Merge pull request #404 from fireeye/bugfix/403
fixing #403
2021-01-28 11:17:39 +01:00
Moritz Raabe
0eb8d3e47c fix time debug output 2021-01-28 11:09:25 +01:00
Moritz Raabe
072e30498b adjust negative hex numbers in to_yaml 2021-01-28 10:54:17 +01:00
Moritz Raabe
d6e73577af dont change quotes when dumping 2021-01-28 10:54:17 +01:00
Moritz Raabe
a81f98be8e manual adjust negative numbers 2021-01-28 10:54:17 +01:00
Moritz Raabe
0980e35c29 simplify string comparison 2021-01-28 10:54:17 +01:00
Moritz Raabe
336c2a3aff add option to only check reformat status 2021-01-28 10:54:17 +01:00
Moritz Raabe
e3055bc740 check rule format consistency 2021-01-28 10:54:17 +01:00
Capa Bot
9406e3dbfb Sync capa rules submodule 2021-01-28 09:52:43 +00:00
Moritz
5307b7e1b1 Merge pull request #408 from fireeye/fix/lint-lib-path
adjust expected lib path and log time
2021-01-28 10:28:30 +01:00
Moritz Raabe
f18a8f5b31 adjust expected lib path and log time 2021-01-28 10:18:03 +01:00
Moritz
cfe99c4b72 Merge pull request #407 from fireeye/fix/lint-logging
disable extractor progress
2021-01-28 09:25:07 +01:00
Moritz Raabe
0d439c0f55 disable extractor progress 2021-01-28 09:22:15 +01:00
Moritz
6288a96a8b Merge pull request #406 from fireeye/ci/disable-python36
Disable Python 3.6 tests
2021-01-28 08:35:42 +01:00
Moritz
819b6f6ccf Merge pull request #402 from fireeye/lib-rules-subscoped
potential fix for #398
2021-01-28 08:35:28 +01:00
Moritz Raabe
4bc06aa8cd closes #405 2021-01-28 08:23:15 +01:00
Moritz Raabe
7b64425c24 update doc and test case 2021-01-28 08:18:23 +01:00
Michael Hunhoff
44c9d6a22b fixing #403 2021-01-27 18:29:53 -07:00
Moritz Raabe
c750447d62 potential fix for #398 2021-01-27 17:59:56 +01:00
Willi Ballenthin
059ec8f3f2 Merge pull request #400 from fireeye/ci/enable-py39-2
bump smda, enable Python 3.9
2021-01-22 07:18:54 -07:00
Moritz Raabe
2c5508febd bump smda, enable Python 3.9 2021-01-22 10:00:25 +01:00
Capa Bot
905fff041b Sync capa rules submodule 2021-01-21 21:32:42 +00:00
Willi Ballenthin
20ce29b033 Merge pull request #396 from fireeye/dependabot/pip/smda-1.5.11
Bump smda from 1.5.10 to 1.5.11
2021-01-19 08:21:00 -07:00
Capa Bot
4bd93a680e Sync capa-testfiles submodule 2021-01-18 08:02:29 +00:00
dependabot[bot]
c9bf7f424d Bump smda from 1.5.10 to 1.5.11
Bumps [smda](https://github.com/danielplohmann/smda) from 1.5.10 to 1.5.11.
- [Release notes](https://github.com/danielplohmann/smda/releases)
- [Commits](https://github.com/danielplohmann/smda/commits)

Signed-off-by: dependabot[bot] <support@github.com>
2021-01-18 06:44:33 +00:00
Capa Bot
4cde2e1a78 Sync capa rules submodule 2021-01-16 15:39:09 +00:00
Capa Bot
48c045d381 Sync capa rules submodule 2021-01-12 18:30:44 +00:00
Capa Bot
2b385ead7f Sync capa rules submodule 2021-01-12 18:30:11 +00:00
Capa Bot
0fcc9f3df6 Sync capa-testfiles submodule 2021-01-12 18:27:32 +00:00
Capa Bot
b251202804 Sync capa-testfiles submodule 2021-01-12 18:27:11 +00:00
Capa Bot
6967010281 Sync capa-testfiles submodule 2021-01-12 18:26:12 +00:00
Capa Bot
7e0846e66a Sync capa rules submodule 2021-01-12 17:55:13 +00:00
Moritz
4e3daad96d Merge pull request #391 from fireeye/fix/freeze-base-addr
add base address to freeze
2021-01-11 11:30:29 +01:00
Capa Bot
37fb3da5db Sync capa rules submodule 2021-01-08 16:36:36 +00:00
Capa Bot
762f48957c Sync capa rules submodule 2021-01-08 15:16:32 +00:00
Capa Bot
c1af7b8783 Sync capa-testfiles submodule 2021-01-08 15:14:26 +00:00
Moritz Raabe
f89084677d add base address to freeze 2021-01-08 14:48:26 +01:00
Capa Bot
0716084bbb Sync capa-testfiles submodule 2021-01-08 08:46:53 +00:00
Capa Bot
a6c946e6c9 Sync capa rules submodule 2021-01-07 13:59:20 +00:00
Capa Bot
3f6e088faa Sync capa-testfiles submodule 2021-01-07 11:53:24 +00:00
Capa Bot
9abdd5813b Sync capa rules submodule 2021-01-07 07:47:28 +00:00
Capa Bot
f33ea36e6f Sync capa rules submodule 2021-01-05 15:49:04 +00:00
Moritz
8788e0a9c9 Merge pull request #388 from fireeye/ci/linter-update
lint with tags
2021-01-05 16:37:21 +01:00
Moritz Raabe
b1c1cb4b9b lint with --tag 2021-01-05 16:16:35 +01:00
Capa Bot
982d4ac472 Sync capa-testfiles submodule 2021-01-04 14:42:43 +00:00
Capa Bot
b7a8d667b9 Sync capa rules submodule 2021-01-04 12:51:43 +00:00
Capa Bot
8f8729df05 Sync capa-testfiles submodule 2020-12-30 19:06:28 +00:00
Capa Bot
e928d281dd Sync capa-testfiles submodule 2020-12-30 15:21:36 +00:00
Capa Bot
625583f5ab Sync capa rules submodule 2020-12-23 12:44:25 +00:00
Capa Bot
ab54553dd2 Sync capa rules submodule 2020-12-22 17:16:54 +00:00
Moritz
47bf7b1325 Merge pull request #375 from doomedraven/return_dict
add render to dict, is the same as default but just in dictionary so …
2020-12-22 15:52:50 +01:00
Moritz
145d75f579 Merge pull request #381 from fireeye/fix/viv-set-logger-levels
set level of more viv loggers explicitly
2020-12-22 15:52:05 +01:00
Capa Bot
01d976d7f7 Sync capa rules submodule 2020-12-22 13:17:37 +00:00
Capa Bot
095e3720ab Sync capa-testfiles submodule 2020-12-22 12:00:35 +00:00
Capa Bot
d62a37fe1f Sync capa-testfiles submodule 2020-12-21 16:17:33 +00:00
Capa Bot
5323f2fc31 Sync capa rules submodule 2020-12-17 17:14:43 +00:00
Capa Bot
5539cb0d08 Sync capa rules submodule 2020-12-17 17:12:21 +00:00
Capa Bot
76e80106d6 Sync capa-testfiles submodule 2020-12-17 09:29:56 +00:00
Capa Bot
9ab7b9a033 Sync capa rules submodule 2020-12-16 20:47:34 +00:00
Capa Bot
fe97d6a349 Sync capa-testfiles submodule 2020-12-15 19:23:15 +00:00
Capa Bot
2242c2afe8 Sync capa-testfiles submodule 2020-12-15 19:19:09 +00:00
Willi Ballenthin
ec25fb5c36 Merge pull request #384 from fireeye/dependabot/pip/smda-1.5.10
Bump smda from 1.5.9 to 1.5.10
2020-12-14 10:32:31 -07:00
dependabot[bot]
ce25f5cadd Bump smda from 1.5.9 to 1.5.10
Bumps [smda](https://github.com/danielplohmann/smda) from 1.5.9 to 1.5.10.
- [Release notes](https://github.com/danielplohmann/smda/releases)
- [Commits](https://github.com/danielplohmann/smda/commits)

Signed-off-by: dependabot[bot] <support@github.com>
2020-12-14 07:15:58 +00:00
Capa Bot
1099f40f19 Sync capa rules submodule 2020-12-12 05:43:31 +00:00
Capa Bot
70368b3f1e Sync capa rules submodule 2020-12-11 10:42:16 +00:00
Capa Bot
0181ebad45 Sync capa-testfiles submodule 2020-12-10 17:38:00 +00:00
DoomedRaven
e158e3f13c remove type hint to make CI happy 2020-12-08 21:46:39 +01:00
DoomedRaven
b1bbded23c black -l 120 . 2020-12-08 21:39:50 +01:00
DoomedRaven
b77d9d3738 isort --profile black --length-sort --line-width 120 capa_as_library.py 2020-12-08 21:34:42 +01:00
DoomedRaven
d0b2421752 isort capa_as_library.py 2020-12-08 20:53:26 +01:00
DoomedRaven
96b65a7c60 add example how to render it as library
```
>>> from capa_as_library import capa_details
>>> details = capa_details("/opt/CAPEv2/storage/analyses/83/binary", "dictionary")
>>> from pprint import pprint as pp
>>> pp(details)
{'ATTCK': {'DEFENSE EVASION': ['Obfuscated Files or Information [T1027]',
                               'Virtualization/Sandbox Evasion::System Checks '
                               '[T1497.001]'],
           'EXECUTION': ['Shared Modules [T1129]']},
 'CAPABILITY': {'anti-analysis/anti-vm/vm-detection': ['execute anti-VM '
                                                       'instructions (3 '
                                                       'matches)'],
                'anti-analysis/obfuscation/string/stackstring': ['contain '
                                                                 'obfuscated '
                                                                 'stackstrings'],
                'data-manipulation/encryption/rc4': ['encrypt data using RC4 '
                                                     'PRGA'],
                'executable/pe/section/rsrc': ['contain a resource (.rsrc) '
                                               'section'],
                'host-interaction/cli': ['accept command line arguments'],
                'host-interaction/environment-variable': ['query environment '
                                                          'variable'],
                'host-interaction/file-system/read': ['read .ini file',
                                                      'read file'],
                'host-interaction/file-system/write': ['write file (3 '
                                                       'matches)'],
                'host-interaction/process': ['get thread local storage value '
                                             '(3 matches)',
                                             'set thread local storage value '
                                             '(2 matches)'],
                'host-interaction/process/terminate': ['terminate process (3 '
                                                       'matches)'],
                'host-interaction/thread/terminate': ['terminate thread'],
                'linking/runtime-linking': ['link function at runtime (7 '
                                            'matches)',
                                            'link many functions at runtime'],
                'load-code/pe': ['parse PE header (3 matches)']},
 'MBC': {'ANTI-BEHAVIORAL ANALYSIS': ['Virtual Machine Detection::Instruction '
                                      'Testing [B0009.029]'],
         'ANTI-STATIC ANALYSIS': ['Disassembler Evasion::Argument Obfuscation '
                                  '[B0012.001]'],
         'CRYPTOGRAPHY': ['Encrypt Data::RC4 [C0027.009]',
                          'Generate Pseudo-random Sequence::RC4 PRGA '
                          '[C0021.004]']},
 'md5': 'ad56c384476a81faef9aebd60b2f4623',
 'path': '/opt/CAPEv2/storage/analyses/83/binary',
 'sha1': 'aa027d89f5d3f991ad3e14ffb681616a77621836',
 'sha256': '16995e059eb47de0b58a95ce2c3d863d964a7a16064d4298cee9db1de266e68d'}
>>>
```
2020-12-08 20:00:24 +01:00
Willi Ballenthin
177c90093e Merge pull request #380 from doomedraven/patch-1
fix is_ordinal IndexError
2020-12-08 09:21:53 -07:00
Moritz Raabe
28ee091107 set level of more viv loggers explicitly 2020-12-08 16:30:23 +01:00
doomedraven
64c71d8e6d fix is_ordinal IndexError
```
 Traceback (most recent call last):
   File "/opt/CAPE/utils/../lib/cuckoo/common/cape_utils.py", line 223, in flare_capa_details
     capabilities, counts = capa.main.find_capabilities(rules, extractor, disable_progress=True)
   File "/usr/local/lib/python2.7/dist-packages/capa/main.py", line 116, in find_capabilities
     function_matches, bb_matches, feature_count = find_function_capabilities(ruleset, extractor, f)
   File "/usr/local/lib/python2.7/dist-packages/capa/main.py", line 68, in find_function_capabilities
     for feature, va in extractor.extract_insn_features(f, bb, insn):
   File "/usr/local/lib/python2.7/dist-packages/capa/features/extractors/viv/__init__.py", line 84, in extract_insn_features
     for feature, va in capa.features.extractors.viv.insn.extract_features(f, bb, insn):
   File "/usr/local/lib/python2.7/dist-packages/capa/features/extractors/viv/insn.py", line 599, in extract_features
     for feature, va in insn_handler(f, bb, insn):
   File "/usr/local/lib/python2.7/dist-packages/capa/features/extractors/viv/insn.py", line 93, in extract_insn_api_features
     for name in capa.features.extractors.helpers.generate_symbols(dll, symbol):
   File "/usr/local/lib/python2.7/dist-packages/capa/features/extractors/helpers.py", line 61, in generate_symbols
     if not is_ordinal(symbol):
   File "/usr/local/lib/python2.7/dist-packages/capa/features/extractors/helpers.py", line 45, in is_ordinal
     return symbol[0] == "#"
 IndexError: string index out of range
```
2020-12-08 09:50:00 +01:00
Moritz
9ce0c94e17 Merge pull request #379 from fireeye/fix/nzxor-xor-instructions
add more xor instructions
2020-12-08 09:37:35 +01:00
Moritz Raabe
08c3372635 add more xor instructions 2020-12-08 09:21:50 +01:00
Capa Bot
2fafc70b69 Sync capa-testfiles submodule 2020-12-07 18:06:53 +00:00
Capa Bot
0e62ebe3a2 Sync capa-testfiles submodule 2020-12-07 17:16:01 +00:00
Moritz
1cc4d20b89 Merge pull request #373 from fireeye/ci/setup-dependabot
add dependabot config
2020-12-07 18:03:57 +01:00
Capa Bot
af4889894a Sync capa rules submodule 2020-12-04 08:31:42 +00:00
Moritz
429a5e1ea3 Merge pull request #378 from fireeye/fix/viv-string-extractor
fix: add viv extract strings for i386ImmMemOper operands
2020-12-04 08:55:23 +01:00
Moritz Raabe
4ef860eb07 fix: add viv extract strings for i386ImmMemOper operands 2020-12-03 20:24:29 +01:00
Capa Bot
b59ebf30c6 Sync capa-testfiles submodule 2020-12-03 18:57:45 +00:00
Capa Bot
a1ae8d54a6 Sync capa rules submodule 2020-12-02 15:24:15 +00:00
Capa Bot
8155207bea Sync capa rules submodule 2020-12-02 15:13:30 +00:00
Capa Bot
337d2cfa6d Sync capa rules submodule 2020-12-02 15:12:27 +00:00
Capa Bot
df2229782b Sync capa rules submodule 2020-12-02 15:08:55 +00:00
doomedraven
5920552649 small improvements 2020-12-01 20:31:56 +01:00
doomedraven
b4827fcb00 add render to dict, is the same as default but just in dictionary so simplifies the integrations 2020-12-01 19:43:54 +01:00
Willi Ballenthin
63983ccb65 Merge pull request #372 from doomedraven/patch-1
Simple example how to use capa as library
2020-12-01 06:56:44 -07:00
Willi Ballenthin
eac7e2b749 capa_as_library: style and comments 2020-12-01 06:54:55 -07:00
Moritz Raabe
65a365bca1 update halo requirements py2/3 2020-12-01 11:46:53 +01:00
Moritz Raabe
fecd0e11eb add dependabot config 2020-12-01 11:46:14 +01:00
doomedraven
51ad526cfc Simple example how to use capa as library
Just quick example how to use capa as library, to save time to someone, reading code and scripts
2020-12-01 11:20:49 +01:00
Moritz
10a062017d Merge pull request #370 from fireeye/pin-smda
pin smda
2020-12-01 11:10:23 +01:00
Moritz Raabe
0d351794db pin smda
addresses #369
2020-12-01 11:02:36 +01:00
Capa Bot
067e3ffced Sync capa-testfiles submodule 2020-11-30 19:36:59 +00:00
Capa Bot
50d55fae56 Sync capa-testfiles submodule 2020-11-23 17:55:56 +00:00
Capa Bot
ce63628d3d Sync capa rules submodule 2020-11-19 15:43:59 +00:00
Capa Bot
13df7f90f6 Sync capa rules submodule 2020-11-19 15:09:24 +00:00
Capa Bot
f5099b873d Sync capa rules submodule 2020-11-19 11:40:38 +00:00
Capa Bot
70eb38895d Sync capa-testfiles submodule 2020-11-18 16:28:34 +00:00
Capa Bot
7aea9fa1d2 Sync capa rules submodule 2020-11-16 19:38:02 +00:00
Capa Bot
5d30be31e0 Sync capa rules submodule 2020-11-16 09:44:08 +00:00
Capa Bot
7abe66e3de Sync capa rules submodule 2020-11-16 06:40:23 +00:00
mike-hunhoff
49ef5e5e64 Merge pull request #364 from fireeye/viv/fix-353
improve viv extractor unicode string detection
2020-11-10 17:56:47 -07:00
Michael Hunhoff
c2266bc105 improve viv extractor unicode string detection with supporting unit test 2020-11-10 12:23:07 -07:00
Moritz
a813e219e6 Merge pull request #363 from fireeye/williballenthin-patch-1
ci: disable py3.9 testing
2020-11-09 21:14:36 +01:00
Moritz
1c1fb20546 Merge pull request #355 from danielplohmann/backend-smda
initial commit for backend-smda
2020-11-09 21:13:51 +01:00
Willi Ballenthin
65feb60bb8 ci: disable py3.9 testing 2020-11-09 13:06:37 -07:00
Daniel Plohmann (jupiter)
f7492c7dc7 throw UnsupportedRuntimeError if SmdaFeatureExtractor is used with a Python version < 3.0 2020-11-09 16:20:08 +01:00
Moritz Raabe
dfc805b89b improvements for PR #355 2020-11-09 13:39:19 +01:00
Moritz Raabe
75defc13a0 disable fail-fast for tests job 2020-11-09 13:22:23 +01:00
Daniel Plohmann (jupiter)
7d4888bb77 addressing the comments in the PR discussion 2020-11-06 10:09:06 +01:00
Daniel Plohmann (jupiter)
1a34029171 Merge branch 'master' of github.com:fireeye/capa into backend-smda 2020-11-06 09:50:09 +01:00
Willi Ballenthin
f6ad4652e4 Merge pull request #358 from fireeye/doc/pyinstaller
document PyInstaller build process
2020-11-05 09:19:51 -07:00
pnx@pyrite
1e25604b0b replacement test for nested x64 thunks - still needs to be verified for vivisect 2020-11-05 16:31:47 +01:00
pnx@pyrite
3a43ffa641 adjusted identification of thunks via SMDA. 2020-11-05 12:58:07 +01:00
Capa Bot
8f6bcf3d98 Sync capa rules submodule 2020-11-03 14:23:36 +00:00
Moritz Raabe
0fd9753681 document PyInstaller build process
closes #357
2020-11-03 15:03:32 +01:00
Capa Bot
76a04dfe25 Sync capa rules submodule 2020-11-03 13:20:30 +00:00
Capa Bot
16317182e3 Sync capa-testfiles submodule 2020-11-03 13:14:45 +00:00
Daniel Plohmann (jupiter)
6bcdf64f67 formatting 2020-10-30 15:34:02 +01:00
Daniel Plohmann (jupiter)
d276a07a71 comments on a test where disassembly differs among backends 2020-10-30 15:29:38 +01:00
Daniel Plohmann (jupiter)
f3b59b342a Merge branch 'backend-smda' of github.com:danielplohmann/capa into backend-smda 2020-10-30 15:25:45 +01:00
Daniel Plohmann (jupiter)
4a0f1f22ba test fixes 2020-10-30 15:25:42 +01:00
Jon Crussell
0c85e7604c use magical derefs
Found derefs in viv/insn.py, does exactly what we need!
2020-10-30 07:23:24 -07:00
Jon Crussell
8f6a46e2d8 add check for pointer to string
Check if memory referenced is a pointer to a string. Fixes mimikatz
string test.
2020-10-30 07:01:07 -07:00
Daniel Plohmann (jupiter)
74b2c18296 down to 14 failed 2020-10-29 20:05:50 +01:00
Jon Crussell
b12d0b6424 tests: add smda backend test
40 failed, 73 passed.
2020-10-29 09:56:28 -07:00
Daniel Plohmann (jupiter)
60ddf0400e addressing review 2020-10-29 17:47:10 +01:00
Daniel Plohmann (jupiter)
669d3484c0 Merge remote-tracking branch 'origin/master' into backend-smda 2020-10-29 17:38:21 +01:00
William Ballenthin
5420ad97a3 sync submodules 2020-10-29 09:42:56 -06:00
Daniel Plohmann (jupiter)
36822926af initial commit for backend-smda 2020-10-29 11:28:22 +01:00
Capa Bot
eef8f2e781 Sync capa rules submodule 2020-10-29 03:50:40 +00:00
Capa Bot
31ac667623 Sync capa rules submodule 2020-10-27 15:16:07 +00:00
Capa Bot
868ceb25bf Sync capa rules submodule 2020-10-27 15:15:30 +00:00
Capa Bot
ee3ab94774 Sync capa rules submodule 2020-10-27 15:15:04 +00:00
Capa Bot
1c47877a8c Sync capa rules submodule 2020-10-27 15:14:22 +00:00
Capa Bot
84698462f3 Sync capa rules submodule 2020-10-27 15:13:25 +00:00
Capa Bot
da7dc793e7 Sync capa rules submodule 2020-10-27 15:12:51 +00:00
Capa Bot
044ee83fbc Sync capa-testfiles submodule 2020-10-26 16:48:15 +00:00
Capa Bot
aea324c4a8 Sync capa rules submodule 2020-10-26 16:47:44 +00:00
Capa Bot
4d05b20830 Sync capa rules submodule 2020-10-26 16:46:53 +00:00
Willi Ballenthin
276928951c build: event published/edited, not created 2020-10-23 15:17:32 -06:00
Willi Ballenthin
9486654e77 changelog: v1.4.1 2020-10-23 15:13:22 -06:00
Willi Ballenthin
2a2b4cbb06 Merge pull request #351 from fireeye/ci-build-windows-vcpython27
fix build on windows-latest
2020-10-23 15:10:56 -06:00
Willi Ballenthin
3ba4a8cdd8 Update build.yml 2020-10-23 15:07:13 -06:00
Willi Ballenthin
8820dabab9 Update build.yml 2020-10-23 14:59:34 -06:00
Willi Ballenthin
f9d89301df Update build.yml 2020-10-23 14:58:44 -06:00
Willi Ballenthin
7edb93d3ad Update build.yml 2020-10-23 14:57:14 -06:00
31 changed files with 1388 additions and 73 deletions

6
.github/dependabot.yml vendored Normal file
View File

@@ -0,0 +1,6 @@
version: 2
updates:
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "weekly"

View File

@@ -2,7 +2,7 @@ name: build
on:
release:
types: [created, edited]
types: [edited, published]
jobs:
build:
@@ -32,6 +32,10 @@ jobs:
python-version: 2.7
- if: matrix.os == 'ubuntu-latest'
run: sudo apt-get install -y libyaml-dev
- if: matrix.os == 'windows-latest'
run: |
choco install vcredist2008
choco install --ignore-dependencies vcpython27
- name: Install PyInstaller
# pyinstaller 4 doesn't support Python 2.7
run: pip install 'pyinstaller==3.*'

View File

@@ -45,13 +45,13 @@ jobs:
runs-on: ubuntu-latest
needs: [code_style, rule_linter]
strategy:
fail-fast: false
matrix:
include:
- python: 2.7
- python: 3.6
- python: 3.7
- python: 3.8
- python: '3.9.0-rc.1' # Python latest
- python: 3.9.1
steps:
- name: Checkout capa with submodules
uses: actions/checkout@v2

View File

@@ -1,5 +1,116 @@
# Change Log
## v1.5.0 (2021-02-05)
This release brings support for running capa under Python 3 via [SMDA](https://github.com/danielplohmann/smda), more thorough CI testing and linting, better extraction of strings and byte features, and 50 (!) new rules. We appreciate everyone who opened issues, provided feedback, and contributed code and rules. A special shout out to the following new project contributors:
- @johnk3r
- @doomedraven
- @stvemillertime
- @itreallynick
- @0x534a
@dzbeck also added [Malware Behavior Catalog](https://github.com/MBCProject/mbc-markdown) (MBC) and ATT&CK mappings for many rules.
Download a standalone binary below and checkout the readme [here on GitHub](https://github.com/fireeye/capa/). Report issues on our [issue tracker](https://github.com/fireeye/capa/issues) and contribute new rules at [capa-rules](https://github.com/fireeye/capa-rules/).
### New Features
- py3 support via SMDA #355 @danielplohmann @jcrussell
- scripts: example of using capa as a library #372, #380 @doomedraven
- ci: enable dependabot #373 @mr-tz
- ci: lint rules @mr-tz
- ci: lint rule format #401 @mr-tz
- freeze: add base address #391 @mr-tz
- json: meta: add base address #412 @mr-tz
### New Rules (50)
- 64-bit execution via heavens gate @recvfrom
- contain anti-disasm techniques @mr-tz
- check for microsoft office emulation @re-fox
- check for windows sandbox via device @re-fox
- check for windows sandbox via dns suffix @re-fox
- check for windows sandbox via genuine state @re-fox
- check for windows sandbox via process name @re-fox
- check for windows sandbox via registry @re-fox
- capture microphone audio @re-fox
- capture public ip @re-fox
- get domain trust relationships @johnk3r
- check HTTP status code @mr-tz
- compiled with perl2exe @re-fox
- compiled with ps2exe @re-fox
- compiled with pyarmor @stvemillertime, @itreallynick
- validate payment card number using luhn algorithm @re-fox
- hash data using fnv @re-fox @mr-tz
- generate random numbers via WinAPI @mike-hunhoff @johnk3r
- enumerate files recursively @re-fox
- get file system object information @mike-hunhoff
- read virtual disk @re-fox
- register minifilter driver @mike-hunhoff
- start minifilter driver @mike-hunhoff
- enumerate gui resources @johnk3r
- simulate CTRL ALT DEL @mike-hunhoff
- hijack thread execution @0x534a
- inject dll @0x534a
- inject pe @0x534a
- create or open registry key @mike-hunhoff
- delete registry value @mike-hunhoff
- query or enumerate registry key @mike-hunhoff
- query or enumerate registry value @mike-hunhoff
- resume thread @0x534a
- suspend thread @0x534a
- allocate memory @0x534a
- allocate RW memory @0x534a
- contain pusha popa sequence @mr-tz
- create or open file @mike-hunhoff
- open process @0x534a
- open thread @0x534a
- get kernel32 base address @mr-tz
- get ntdll base address @mr-tz
- encrypt or decrypt data via BCrypt @mike-hunhoff
- generate random numbers using the Delphi LCG @williballenthin
- hash data via BCrypt @mike-hunhoff
- migrate process to active window station @williballenthin
- patch process command line @williballenthin
- resolve function by hash @williballenthin
- persist via Winlogon Helper DLL registry key @0x534a
- schedule task via command line @0x534a
### Bug Fixes
- doc: pyinstaller build process @mr-tz
- ida: better bytes extraction #409 @mike-hunhoff
- viv: better unicode string extraction #364 @mike-hunhoff
- viv: better unicode string extraction #378 @mr-tz
- viv: more xor instructions #379 @mr-tz
- viv: decrease logging verbosity #381 @mr-tz
- rules: fix api description syntax #403 @mike-hunhoff
- main: disable progress background thread #410 @mike-hunhoff
### Changes
- rules: return lib rules for scopes #398 @mr-tz
### Raw diffs
- [capa v1.4.1...v1.5.0](https://github.com/fireeye/capa/compare/v1.4.1...v1.5.0)
- [capa-rules v1.4.0...v1.5.0](https://github.com/fireeye/capa-rules/compare/v1.4.0...v1.5.0)
## v1.4.1 (2020-10-23)
This release fixes an issue building capa on our CI server, which prevented us from building standalone binaries for v1.4.1.
### Bug Fixes
- install VC dependencies for Python 2.7 during Windows build
### Raw diffs
- [capa v1.4.0...v1.4.1](https://github.com/fireeye/capa/compare/v1.4.0...v1.4.1)
- [capa-rules v1.4.0...v1.4.1](https://github.com/fireeye/capa-rules/compare/v1.4.0...v1.4.1)
## v1.4.0 (2020-10-23)
This capa release includes changes to the rule parsing, enhanced feature extraction, various bug fixes, and improved capa scripts. Everyone should benefit from the improved functionality and performance. The community helped to add 69 new rules. We appreciate everyone who opened issues, provided feedback, and contributed code and rules. A special shout out to the following new project contributors:

View File

@@ -1,7 +1,7 @@
![capa](.github/logo.png)
[![CI status](https://github.com/fireeye/capa/workflows/CI/badge.svg)](https://github.com/fireeye/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster)
[![Number of rules](https://img.shields.io/badge/rules-414-blue.svg)](https://github.com/fireeye/capa-rules)
[![Number of rules](https://img.shields.io/badge/rules-458-blue.svg)](https://github.com/fireeye/capa-rules)
[![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt)
capa detects capabilities in executable files.

View File

@@ -42,7 +42,9 @@ def is_ordinal(symbol):
"""
is the given symbol an ordinal that is prefixed by "#"?
"""
return symbol[0] == "#"
if symbol:
return symbol[0] == "#"
return False
def generate_symbols(dll, symbol):

View File

@@ -166,6 +166,10 @@ def basic_block_size(bb):
def read_bytes_at(ea, count):
""" """
# check if byte has a value, see get_wide_byte doc
if not idc.is_loaded(ea):
return b""
segm_end = idc.get_segm_end(ea)
if ea + count > segm_end:
return idc.get_bytes(ea, segm_end - ea)

View File

@@ -148,6 +148,9 @@ def extract_insn_bytes_features(f, bb, insn):
example:
push offset iid_004118d4_IShellLinkA ; riid
"""
if idaapi.is_call_insn(insn):
return
ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
if ref != insn.ea:
extracted_bytes = capa.features.extractors.ida.helpers.read_bytes_at(ref, MAX_BYTES_FEATURE_SIZE)
@@ -302,7 +305,7 @@ def extract_insn_nzxor_characteristic_features(f, bb, insn):
bb (IDA BasicBlock)
insn (IDA insn_t)
"""
if insn.itype != idaapi.NN_xor:
if insn.itype not in (idaapi.NN_xor, idaapi.NN_xorpd, idaapi.NN_xorps, idaapi.NN_pxor):
return
if capa.features.extractors.ida.helpers.is_operand_equal(insn.Op1, insn.Op2):
return

View File

@@ -0,0 +1,52 @@
import sys
import types
from smda.common.SmdaReport import SmdaReport
from smda.common.SmdaInstruction import SmdaInstruction
import capa.features.extractors.smda.file
import capa.features.extractors.smda.insn
import capa.features.extractors.smda.function
import capa.features.extractors.smda.basicblock
from capa.main import UnsupportedRuntimeError
from capa.features.extractors import FeatureExtractor
class SmdaFeatureExtractor(FeatureExtractor):
def __init__(self, smda_report: SmdaReport, path):
super(SmdaFeatureExtractor, self).__init__()
if sys.version_info < (3, 0):
raise UnsupportedRuntimeError("SMDA should only be used with Python 3.")
self.smda_report = smda_report
self.path = path
def get_base_address(self):
return self.smda_report.base_addr
def extract_file_features(self):
for feature, va in capa.features.extractors.smda.file.extract_features(self.smda_report, self.path):
yield feature, va
def get_functions(self):
for function in self.smda_report.getFunctions():
yield function
def extract_function_features(self, f):
for feature, va in capa.features.extractors.smda.function.extract_features(f):
yield feature, va
def get_basic_blocks(self, f):
for bb in f.getBlocks():
yield bb
def extract_basic_block_features(self, f, bb):
for feature, va in capa.features.extractors.smda.basicblock.extract_features(f, bb):
yield feature, va
def get_instructions(self, f, bb):
for smda_ins in bb.getInstructions():
yield smda_ins
def extract_insn_features(self, f, bb, insn):
for feature, va in capa.features.extractors.smda.insn.extract_features(f, bb, insn):
yield feature, va

View File

@@ -0,0 +1,131 @@
import sys
import string
import struct
from capa.features import Characteristic
from capa.features.basicblock import BasicBlock
from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
def _bb_has_tight_loop(f, bb):
"""
parse tight loops, true if last instruction in basic block branches to bb start
"""
return bb.offset in f.blockrefs[bb.offset] if bb.offset in f.blockrefs else False
def extract_bb_tight_loop(f, bb):
""" check basic block for tight loop indicators """
if _bb_has_tight_loop(f, bb):
yield Characteristic("tight loop"), bb.offset
def _bb_has_stackstring(f, bb):
"""
extract potential stackstring creation, using the following heuristics:
- basic block contains enough moves of constant bytes to the stack
"""
count = 0
for instr in bb.getInstructions():
if is_mov_imm_to_stack(instr):
count += get_printable_len(instr.getDetailed())
if count > MIN_STACKSTRING_LEN:
return True
return False
def get_operands(smda_ins):
return [o.strip() for o in smda_ins.operands.split(",")]
def extract_stackstring(f, bb):
""" check basic block for stackstring indicators """
if _bb_has_stackstring(f, bb):
yield Characteristic("stack string"), bb.offset
def is_mov_imm_to_stack(smda_ins):
"""
Return if instruction moves immediate onto stack
"""
if not smda_ins.mnemonic.startswith("mov"):
return False
try:
dst, src = get_operands(smda_ins)
except ValueError:
# not two operands
return False
try:
int(src, 16)
except ValueError:
return False
if not any(regname in dst for regname in ["ebp", "rbp", "esp", "rsp"]):
return False
return True
def is_printable_ascii(chars):
return all(c < 127 and chr(c) in string.printable for c in chars)
def is_printable_utf16le(chars):
if all(c == 0x00 for c in chars[1::2]):
return is_printable_ascii(chars[::2])
def get_printable_len(instr):
"""
Return string length if all operand bytes are ascii or utf16-le printable
Works on a capstone instruction
"""
# should have exactly two operands for mov immediate
if len(instr.operands) != 2:
return 0
op_value = instr.operands[1].value.imm
if instr.imm_size == 1:
chars = struct.pack("<B", op_value & 0xFF)
elif instr.imm_size == 2:
chars = struct.pack("<H", op_value & 0xFFFF)
elif instr.imm_size == 4:
chars = struct.pack("<I", op_value & 0xFFFFFFFF)
elif instr.imm_size == 8:
chars = struct.pack("<Q", op_value & 0xFFFFFFFFFFFFFFFF)
else:
raise ValueError("Unhandled operand data type 0x%x." % instr.imm_size)
if is_printable_ascii(chars):
return instr.imm_size
if is_printable_utf16le(chars):
return instr.imm_size // 2
return 0
def extract_features(f, bb):
"""
extract features from the given basic block.
args:
f (smda.common.SmdaFunction): the function from which to extract features
bb (smda.common.SmdaBasicBlock): the basic block to process.
yields:
Feature, set[VA]: the features and their location found in this basic block.
"""
yield BasicBlock(), bb.offset
for bb_handler in BASIC_BLOCK_HANDLERS:
for feature, va in bb_handler(f, bb):
yield feature, va
BASIC_BLOCK_HANDLERS = (
extract_bb_tight_loop,
extract_stackstring,
)

View File

@@ -0,0 +1,139 @@
import struct
# if we have SMDA we definitely have lief
import lief
import capa.features.extractors.helpers
import capa.features.extractors.strings
from capa.features import String, Characteristic
from capa.features.file import Export, Import, Section
def carve(pbytes, offset=0):
"""
Return a list of (offset, size, xor) tuples of embedded PEs
Based on the version from vivisect:
https://github.com/vivisect/vivisect/blob/7be4037b1cecc4551b397f840405a1fc606f9b53/PE/carve.py#L19
And its IDA adaptation:
capa/features/extractors/ida/file.py
"""
mz_xor = [
(
capa.features.extractors.helpers.xor_static(b"MZ", i),
capa.features.extractors.helpers.xor_static(b"PE", i),
i,
)
for i in range(256)
]
pblen = len(pbytes)
todo = [(pbytes.find(mzx, offset), mzx, pex, i) for mzx, pex, i in mz_xor]
todo = [(off, mzx, pex, i) for (off, mzx, pex, i) in todo if off != -1]
while len(todo):
off, mzx, pex, i = todo.pop()
# The MZ header has one field we will check
# e_lfanew is at 0x3c
e_lfanew = off + 0x3C
if pblen < (e_lfanew + 4):
continue
newoff = struct.unpack("<I", capa.features.extractors.helpers.xor_static(pbytes[e_lfanew : e_lfanew + 4], i))[0]
nextres = pbytes.find(mzx, off + 1)
if nextres != -1:
todo.append((nextres, mzx, pex, i))
peoff = off + newoff
if pblen < (peoff + 2):
continue
if pbytes[peoff : peoff + 2] == pex:
yield (off, i)
def extract_file_embedded_pe(smda_report, file_path):
with open(file_path, "rb") as f:
fbytes = f.read()
for offset, i in carve(fbytes, 1):
yield Characteristic("embedded pe"), offset
def extract_file_export_names(smda_report, file_path):
lief_binary = lief.parse(file_path)
if lief_binary is not None:
for function in lief_binary.exported_functions:
yield Export(function.name), function.address
def extract_file_import_names(smda_report, file_path):
# extract import table info via LIEF
lief_binary = lief.parse(file_path)
if not isinstance(lief_binary, lief.PE.Binary):
return
for imported_library in lief_binary.imports:
library_name = imported_library.name.lower()
library_name = library_name[:-4] if library_name.endswith(".dll") else library_name
for func in imported_library.entries:
if func.name:
va = func.iat_address + smda_report.base_addr
for name in capa.features.extractors.helpers.generate_symbols(library_name, func.name):
yield Import(name), va
elif func.is_ordinal:
for name in capa.features.extractors.helpers.generate_symbols(library_name, "#%s" % func.ordinal):
yield Import(name), va
def extract_file_section_names(smda_report, file_path):
lief_binary = lief.parse(file_path)
if not isinstance(lief_binary, lief.PE.Binary):
return
if lief_binary and lief_binary.sections:
base_address = lief_binary.optional_header.imagebase
for section in lief_binary.sections:
yield Section(section.name), base_address + section.virtual_address
def extract_file_strings(smda_report, file_path):
"""
extract ASCII and UTF-16 LE strings from file
"""
with open(file_path, "rb") as f:
b = f.read()
for s in capa.features.extractors.strings.extract_ascii_strings(b):
yield String(s.s), s.offset
for s in capa.features.extractors.strings.extract_unicode_strings(b):
yield String(s.s), s.offset
def extract_features(smda_report, file_path):
"""
extract file features from given workspace
args:
smda_report (smda.common.SmdaReport): a SmdaReport
file_path: path to the input file
yields:
Tuple[Feature, VA]: a feature and its location.
"""
for file_handler in FILE_HANDLERS:
result = file_handler(smda_report, file_path)
for feature, va in file_handler(smda_report, file_path):
yield feature, va
FILE_HANDLERS = (
extract_file_embedded_pe,
extract_file_export_names,
extract_file_import_names,
extract_file_section_names,
extract_file_strings,
)

View File

@@ -0,0 +1,38 @@
from capa.features import Characteristic
from capa.features.extractors import loops
def extract_function_calls_to(f):
for inref in f.inrefs:
yield Characteristic("calls to"), inref
def extract_function_loop(f):
"""
parse if a function has a loop
"""
edges = []
for bb_from, bb_tos in f.blockrefs.items():
for bb_to in bb_tos:
edges.append((bb_from, bb_to))
if edges and loops.has_loop(edges):
yield Characteristic("loop"), f.offset
def extract_features(f):
"""
extract features from the given function.
args:
f (smda.common.SmdaFunction): the function from which to extract features
yields:
Feature, set[VA]: the features and their location found in this function.
"""
for func_handler in FUNCTION_HANDLERS:
for feature, va in func_handler(f):
yield feature, va
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop)

View File

@@ -0,0 +1,393 @@
import re
import string
import struct
from smda.common.SmdaReport import SmdaReport
import capa.features.extractors.helpers
from capa.features import (
ARCH_X32,
ARCH_X64,
MAX_BYTES_FEATURE_SIZE,
THUNK_CHAIN_DEPTH_DELTA,
Bytes,
String,
Characteristic,
)
from capa.features.insn import API, Number, Offset, Mnemonic
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
# byte range within the first and returning basic blocks, this helps to reduce FP features
SECURITY_COOKIE_BYTES_DELTA = 0x40
PATTERN_HEXNUM = re.compile(r"[+\-] (?P<num>0x[a-fA-F0-9]+)")
PATTERN_SINGLENUM = re.compile(r"[+\-] (?P<num>[0-9])")
def get_arch(smda_report):
if smda_report.architecture == "intel":
if smda_report.bitness == 32:
return ARCH_X32
elif smda_report.bitness == 64:
return ARCH_X64
else:
raise NotImplementedError
def extract_insn_api_features(f, bb, insn):
"""parse API features from the given instruction."""
if insn.offset in f.apirefs:
api_entry = f.apirefs[insn.offset]
# reformat
dll_name, api_name = api_entry.split("!")
dll_name = dll_name.split(".")[0]
dll_name = dll_name.lower()
for name in capa.features.extractors.helpers.generate_symbols(dll_name, api_name):
yield API(name), insn.offset
elif insn.offset in f.outrefs:
current_function = f
current_instruction = insn
for index in range(THUNK_CHAIN_DEPTH_DELTA):
if current_function and len(current_function.outrefs[current_instruction.offset]) == 1:
target = current_function.outrefs[current_instruction.offset][0]
referenced_function = current_function.smda_report.getFunction(target)
if referenced_function:
# TODO SMDA: implement this function for both jmp and call, checking if function has 1 instruction which refs an API
if referenced_function.isApiThunk():
api_entry = (
referenced_function.apirefs[target] if target in referenced_function.apirefs else None
)
if api_entry:
# reformat
dll_name, api_name = api_entry.split("!")
dll_name = dll_name.split(".")[0]
dll_name = dll_name.lower()
for name in capa.features.extractors.helpers.generate_symbols(dll_name, api_name):
yield API(name), insn.offset
elif referenced_function.num_instructions == 1 and referenced_function.num_outrefs == 1:
current_function = referenced_function
current_instruction = [i for i in referenced_function.getInstructions()][0]
else:
return
def extract_insn_number_features(f, bb, insn):
"""parse number features from the given instruction."""
# example:
#
# push 3136B0h ; dwControlCode
operands = [o.strip() for o in insn.operands.split(",")]
if insn.mnemonic == "add" and operands[0] in ["esp", "rsp"]:
# skip things like:
#
# .text:00401140 call sub_407E2B
# .text:00401145 add esp, 0Ch
return
for operand in operands:
try:
yield Number(int(operand, 16)), insn.offset
yield Number(int(operand, 16), arch=get_arch(f.smda_report)), insn.offset
except:
continue
def read_bytes(smda_report, va, num_bytes=None):
"""
read up to MAX_BYTES_FEATURE_SIZE from the given address.
"""
rva = va - smda_report.base_addr
if smda_report.buffer is None:
return
buffer_end = len(smda_report.buffer)
max_bytes = num_bytes if num_bytes is not None else MAX_BYTES_FEATURE_SIZE
if rva + max_bytes > buffer_end:
return smda_report.buffer[rva:]
else:
return smda_report.buffer[rva : rva + max_bytes]
def derefs(smda_report, p):
"""
recursively follow the given pointer, yielding the valid memory addresses along the way.
useful when you may have a pointer to string, or pointer to pointer to string, etc.
this is a "do what i mean" type of helper function.
based on the implementation in viv/insn.py
"""
depth = 0
while True:
if not smda_report.isAddrWithinMemoryImage(p):
return
yield p
bytes_ = read_bytes(smda_report, p, num_bytes=4)
val = struct.unpack("I", bytes_)[0]
# sanity: pointer points to self
if val == p:
return
# sanity: avoid chains of pointers that are unreasonably deep
depth += 1
if depth > 10:
return
p = val
def extract_insn_bytes_features(f, bb, insn):
"""
parse byte sequence features from the given instruction.
example:
# push offset iid_004118d4_IShellLinkA ; riid
"""
for data_ref in insn.getDataRefs():
for v in derefs(f.smda_report, data_ref):
bytes_read = read_bytes(f.smda_report, v)
if bytes_read is None:
continue
if capa.features.extractors.helpers.all_zeros(bytes_read):
continue
yield Bytes(bytes_read), insn.offset
def detect_ascii_len(smda_report, offset):
if smda_report.buffer is None:
return 0
ascii_len = 0
rva = offset - smda_report.base_addr
char = smda_report.buffer[rva]
while char < 127 and chr(char) in string.printable:
ascii_len += 1
rva += 1
char = smda_report.buffer[rva]
if char == 0:
return ascii_len
return 0
def detect_unicode_len(smda_report, offset):
if smda_report.buffer is None:
return 0
unicode_len = 0
rva = offset - smda_report.base_addr
char = smda_report.buffer[rva]
second_char = smda_report.buffer[rva + 1]
while char < 127 and chr(char) in string.printable and second_char == 0:
unicode_len += 2
rva += 2
char = smda_report.buffer[rva]
second_char = smda_report.buffer[rva + 1]
if char == 0 and second_char == 0:
return unicode_len
return 0
def read_string(smda_report, offset):
alen = detect_ascii_len(smda_report, offset)
if alen > 1:
return read_bytes(smda_report, offset, alen).decode("utf-8")
ulen = detect_unicode_len(smda_report, offset)
if ulen > 2:
return read_bytes(smda_report, offset, ulen).decode("utf-16")
def extract_insn_string_features(f, bb, insn):
"""parse string features from the given instruction."""
# example:
#
# push offset aAcr ; "ACR > "
for data_ref in insn.getDataRefs():
for v in derefs(f.smda_report, data_ref):
string_read = read_string(f.smda_report, v)
if string_read:
yield String(string_read.rstrip("\x00")), insn.offset
def extract_insn_offset_features(f, bb, insn):
"""parse structure offset features from the given instruction."""
# examples:
#
# mov eax, [esi + 4]
# mov eax, [esi + ecx + 16384]
operands = [o.strip() for o in insn.operands.split(",")]
for operand in operands:
if not "ptr" in operand:
continue
if "esp" in operand or "ebp" in operand or "rbp" in operand:
continue
number = 0
number_hex = re.search(PATTERN_HEXNUM, operand)
number_int = re.search(PATTERN_SINGLENUM, operand)
if number_hex:
number = int(number_hex.group("num"), 16)
number = -1 * number if number_hex.group().startswith("-") else number
elif number_int:
number = int(number_int.group("num"))
number = -1 * number if number_int.group().startswith("-") else number
yield Offset(number), insn.offset
yield Offset(number, arch=get_arch(f.smda_report)), insn.offset
def is_security_cookie(f, bb, insn):
"""
check if an instruction is related to security cookie checks
"""
# security cookie check should use SP or BP
operands = [o.strip() for o in insn.operands.split(",")]
if operands[1] not in ["esp", "ebp", "rsp", "rbp"]:
return False
for index, block in enumerate(f.getBlocks()):
# expect security cookie init in first basic block within first bytes (instructions)
block_instructions = [i for i in block.getInstructions()]
if index == 0 and insn.offset < (block_instructions[0].offset + SECURITY_COOKIE_BYTES_DELTA):
return True
# ... or within last bytes (instructions) before a return
if block_instructions[-1].mnemonic.startswith("ret") and insn.offset > (
block_instructions[-1].offset - SECURITY_COOKIE_BYTES_DELTA
):
return True
return False
def extract_insn_nzxor_characteristic_features(f, bb, insn):
"""
parse non-zeroing XOR instruction from the given instruction.
ignore expected non-zeroing XORs, e.g. security cookies.
"""
if insn.mnemonic not in ("xor", "xorpd", "xorps", "pxor"):
return
operands = [o.strip() for o in insn.operands.split(",")]
if operands[0] == operands[1]:
return
if is_security_cookie(f, bb, insn):
return
yield Characteristic("nzxor"), insn.offset
def extract_insn_mnemonic_features(f, bb, insn):
"""parse mnemonic features from the given instruction."""
yield Mnemonic(insn.mnemonic), insn.offset
def extract_insn_peb_access_characteristic_features(f, bb, insn):
"""
parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64
"""
if insn.mnemonic not in ["push", "mov"]:
return
operands = [o.strip() for o in insn.operands.split(",")]
for operand in operands:
if "fs:" in operand and "0x30" in operand:
yield Characteristic("peb access"), insn.offset
elif "gs:" in operand and "0x60" in operand:
yield Characteristic("peb access"), insn.offset
def extract_insn_segment_access_features(f, bb, insn):
""" parse the instruction for access to fs or gs """
operands = [o.strip() for o in insn.operands.split(",")]
for operand in operands:
if "fs:" in operand:
yield Characteristic("fs access"), insn.offset
elif "gs:" in operand:
yield Characteristic("gs access"), insn.offset
def extract_insn_cross_section_cflow(f, bb, insn):
"""
inspect the instruction for a CALL or JMP that crosses section boundaries.
"""
if insn.mnemonic in ["call", "jmp"]:
if insn.offset in f.apirefs:
return
smda_report = insn.smda_function.smda_report
if insn.offset in f.outrefs:
for target in f.outrefs[insn.offset]:
if smda_report.getSection(insn.offset) != smda_report.getSection(target):
yield Characteristic("cross section flow"), insn.offset
elif insn.operands.startswith("0x"):
target = int(insn.operands, 16)
if smda_report.getSection(insn.offset) != smda_report.getSection(target):
yield Characteristic("cross section flow"), insn.offset
# this is a feature that's most relevant at the function scope,
# however, its most efficient to extract at the instruction scope.
def extract_function_calls_from(f, bb, insn):
if insn.mnemonic != "call":
return
if insn.offset in f.outrefs:
for outref in f.outrefs[insn.offset]:
yield Characteristic("calls from"), outref
if outref == f.offset:
# if we found a jump target and it's the function address
# mark as recursive
yield Characteristic("recursive call"), outref
if insn.offset in f.apirefs:
yield Characteristic("calls from"), insn.offset
# this is a feature that's most relevant at the function or basic block scope,
# however, its most efficient to extract at the instruction scope.
def extract_function_indirect_call_characteristic_features(f, bb, insn):
"""
extract indirect function call characteristic (e.g., call eax or call dword ptr [edx+4])
does not include calls like => call ds:dword_ABD4974
"""
if insn.mnemonic != "call":
return
if insn.operands.startswith("0x"):
return False
if "qword ptr" in insn.operands and "rip" in insn.operands:
return False
if insn.operands.startswith("dword ptr [0x"):
return False
# call edx
# call dword ptr [eax+50h]
# call qword ptr [rsp+78h]
yield Characteristic("indirect call"), insn.offset
def extract_features(f, bb, insn):
"""
extract features from the given insn.
args:
f (smda.common.SmdaFunction): the function to process.
bb (smda.common.SmdaBasicBlock): the basic block to process.
insn (smda.common.SmdaInstruction): the instruction to process.
yields:
Feature, set[VA]: the features and their location found in this insn.
"""
for insn_handler in INSTRUCTION_HANDLERS:
for feature, va in insn_handler(f, bb, insn):
yield feature, va
INSTRUCTION_HANDLERS = (
extract_insn_api_features,
extract_insn_number_features,
extract_insn_string_features,
extract_insn_bytes_features,
extract_insn_offset_features,
extract_insn_nzxor_characteristic_features,
extract_insn_mnemonic_features,
extract_insn_peb_access_characteristic_features,
extract_insn_cross_section_cflow,
extract_insn_segment_access_features,
extract_function_calls_from,
extract_function_indirect_call_characteristic_features,
)

View File

@@ -258,10 +258,10 @@ def extract_insn_bytes_features(f, bb, insn):
example:
# push offset iid_004118d4_IShellLinkA ; riid
"""
for oper in insn.opers:
if insn.mnem == "call":
continue
if insn.mnem == "call":
return
for oper in insn.opers:
if isinstance(oper, envi.archs.i386.disasm.i386ImmOper):
v = oper.getOperValue(oper)
elif isinstance(oper, envi.archs.i386.disasm.i386RegMemOper):
@@ -311,6 +311,10 @@ def read_string(vw, offset):
# vivisect seems to mis-detect the end unicode strings
# off by one, too short
ulen += 1
else:
# vivisect seems to mis-detect the end unicode strings
# off by two, too short
ulen += 2
return read_memory(vw, offset, ulen).decode("utf-16")
raise ValueError("not a string", offset)
@@ -325,6 +329,9 @@ def extract_insn_string_features(f, bb, insn):
for oper in insn.opers:
if isinstance(oper, envi.archs.i386.disasm.i386ImmOper):
v = oper.getOperValue(oper)
elif isinstance(oper, envi.archs.i386.disasm.i386ImmMemOper):
# like 0x10056CB4 in `lea eax, dword [0x10056CB4]`
v = oper.imm
elif isinstance(oper, envi.archs.i386.disasm.i386SibOper):
# like 0x401000 in `mov eax, 0x401000[2 * ebx]`
v = oper.imm
@@ -415,7 +422,7 @@ def extract_insn_nzxor_characteristic_features(f, bb, insn):
parse non-zeroing XOR instruction from the given instruction.
ignore expected non-zeroing XORs, e.g. security cookies.
"""
if insn.mnem != "xor":
if insn.mnem not in ("xor", "xorpd", "xorps", "pxor"):
return
if insn.opers[0] == insn.opers[1]:

View File

@@ -5,6 +5,7 @@ json format:
{
'version': 1,
'base address': int(base address),
'functions': {
int(function va): {
'basic blocks': {
@@ -86,6 +87,7 @@ def dumps(extractor):
"""
ret = {
"version": 1,
"base address": extractor.get_base_address(),
"functions": {},
"scopes": {
"file": [],
@@ -147,6 +149,7 @@ def loads(s):
raise ValueError("unsupported freeze format version: %d" % (doc.get("version")))
features = {
"base address": doc.get("base address"),
"file features": [],
"functions": {},
}

View File

@@ -16,7 +16,7 @@ class API(Feature):
modname, _, impname = name.rpartition(".")
name = modname.lower() + "." + impname
super(API, self).__init__(name, description)
super(API, self).__init__(name, description=description)
class Number(Feature):

View File

@@ -103,6 +103,7 @@ def collect_metadata():
"analysis": {
"format": idaapi.get_file_type_name(),
"extractor": "ida",
"base_address": idaapi.get_imagebase(),
},
"version": capa.version.__version__,
}

View File

@@ -40,8 +40,11 @@ logger = logging.getLogger("capa")
def set_vivisect_log_level(level):
logging.getLogger("vivisect").setLevel(level)
logging.getLogger("vivisect.base").setLevel(level)
logging.getLogger("vivisect.impemu").setLevel(level)
logging.getLogger("vtrace").setLevel(level)
logging.getLogger("envi").setLevel(level)
logging.getLogger("envi.codeflow").setLevel(level)
def find_function_capabilities(ruleset, extractor, f):
@@ -112,7 +115,13 @@ def find_capabilities(ruleset, extractor, disable_progress=None):
}
}
for f in tqdm.tqdm(list(extractor.get_functions()), disable=disable_progress, desc="matching", unit=" functions"):
pbar = tqdm.tqdm
if disable_progress:
# do not use tqdm to avoid unnecessary side effects when caller intends
# to disable progress completely
pbar = lambda s, *args, **kwargs: s
for f in pbar(list(extractor.get_functions()), desc="matching", unit=" functions"):
function_matches, bb_matches, feature_count = find_function_capabilities(ruleset, extractor, f)
meta["feature_counts"]["functions"][f.__int__()] = feature_count
logger.debug("analyzed function 0x%x and extracted %d features", f.__int__(), feature_count)
@@ -295,7 +304,19 @@ class UnsupportedRuntimeError(RuntimeError):
def get_extractor_py3(path, format, disable_progress=False):
raise UnsupportedRuntimeError()
from smda.SmdaConfig import SmdaConfig
from smda.Disassembler import Disassembler
import capa.features.extractors.smda
smda_report = None
with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress):
config = SmdaConfig()
config.STORE_BUFFER = True
smda_disasm = Disassembler(config)
smda_report = smda_disasm.disassembleFile(path)
return capa.features.extractors.smda.SmdaFeatureExtractor(smda_report, path)
def get_extractor(path, format, disable_progress=False):
@@ -351,7 +372,13 @@ def get_rules(rule_path, disable_progress=False):
rules = []
for rule_path in tqdm.tqdm(list(rule_paths), disable=disable_progress, desc="loading ", unit=" rules"):
pbar = tqdm.tqdm
if disable_progress:
# do not use tqdm to avoid unnecessary side effects when caller intends
# to disable progress completely
pbar = lambda s, *args, **kwargs: s
for rule_path in pbar(list(rule_paths), desc="loading ", unit=" rules"):
try:
rule = capa.rules.Rule.from_yaml_file(rule_path)
except capa.rules.InvalidRule:
@@ -446,14 +473,23 @@ def main(argv=None):
parser = argparse.ArgumentParser(
description=desc, epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument(
# in #328 we noticed that the sample path is not handled correctly if it contains non-ASCII characters
# https://stackoverflow.com/a/22947334/ offers a solution and decoding using getfilesystemencoding works
# in our testing, however other sources suggest `sys.stdin.encoding` (https://stackoverflow.com/q/4012571/)
"sample",
type=lambda s: s.decode(sys.getfilesystemencoding()),
help="path to sample to analyze",
)
if sys.version_info >= (3, 0):
parser.add_argument(
# Python 3 str handles non-ASCII arguments correctly
"sample",
type=str,
help="path to sample to analyze",
)
else:
parser.add_argument(
# in #328 we noticed that the sample path is not handled correctly if it contains non-ASCII characters
# https://stackoverflow.com/a/22947334/ offers a solution and decoding using getfilesystemencoding works
# in our testing, however other sources suggest `sys.stdin.encoding` (https://stackoverflow.com/q/4012571/)
"sample",
type=lambda s: s.decode(sys.getfilesystemencoding()),
help="path to sample to analyze",
)
parser.add_argument("--version", action="version", version="%(prog)s {:s}".format(capa.version.__version__))
parser.add_argument(
"-r",
@@ -550,7 +586,7 @@ def main(argv=None):
# during the load of the RuleSet, we extract subscope statements into their own rules
# that are subsequently `match`ed upon. this inflates the total rule count.
# so, filter out the subscope rules when reporting total number of loaded rules.
len(filter(lambda r: "capa/subscope-rule" not in r.meta, rules.rules.values())),
len([i for i in filter(lambda r: "capa/subscope-rule" not in r.meta, rules.rules.values())]),
)
if args.tag:
rules = rules.filter_rules_by_meta(args.tag)

View File

@@ -6,6 +6,7 @@
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import re
import uuid
import codecs
import logging
@@ -600,6 +601,9 @@ class Rule(object):
# use block mode, not inline json-like mode
y.default_flow_style = False
# leave quotes unchanged
y.preserve_quotes = True
# indent lists by two spaces below their parent
#
# features:
@@ -614,16 +618,20 @@ class Rule(object):
return y
@classmethod
def from_yaml(cls, s):
# use pyyaml because it can be much faster than ruamel (pure python)
doc = yaml.load(s, Loader=cls._get_yaml_loader())
def from_yaml(cls, s, use_ruamel=False):
if use_ruamel:
# ruamel enables nice formatting and doc roundtripping with comments
doc = cls._get_ruamel_yaml_parser().load(s)
else:
# use pyyaml because it can be much faster than ruamel (pure python)
doc = yaml.load(s, Loader=cls._get_yaml_loader())
return cls.from_dict(doc, s)
@classmethod
def from_yaml_file(cls, path):
def from_yaml_file(cls, path, use_ruamel=False):
with open(path, "rb") as f:
try:
return cls.from_yaml(f.read().decode("utf-8"))
return cls.from_yaml(f.read().decode("utf-8"), use_ruamel=use_ruamel)
except InvalidRule as e:
raise InvalidRuleWithPath(path, str(e))
@@ -716,7 +724,18 @@ class Rule(object):
# tweaking `ruamel.indent()` doesn't quite give us the control we want.
# so, add the two extra spaces that we've determined we need through experimentation.
# see #263
doc = doc.replace(" description:", " description:")
# only do this for the features section, so the meta description doesn't get reformatted
# assumes features section always exists
features_offset = doc.find("features")
doc = doc[:features_offset] + doc[features_offset:].replace(" description:", " description:")
# for negative hex numbers, yaml dump outputs:
# - offset: !!int '0x-30'
# we prefer:
# - offset: -0x30
# the below regex makes these adjustments and while ugly, we don't have to explore the ruamel.yaml insides
doc = re.sub(r"!!int '0x-([0-9a-fA-F]+)'", r"-0x\1", doc)
return doc
@@ -866,7 +885,8 @@ class RuleSet(object):
given a collection of rules, collect the rules that are needed at the given scope.
these rules are ordered topologically.
don't include "lib" rules, unless they are dependencies of other rules.
don't include auto-generated "subscope" rules.
we want to include general "lib" rules here - even if they are not dependencies of other rules, see #398
"""
scope_rules = set([])
@@ -875,7 +895,7 @@ class RuleSet(object):
# at lower scope, e.g. function scope.
# so, we find all dependencies of all rules, and later will filter them down.
for rule in rules:
if rule.meta.get("lib", False):
if rule.meta.get("capa/subscope-rule", False):
continue
scope_rules.update(get_rules_and_dependencies(rules, rule.name))

View File

@@ -1 +1 @@
__version__ = "1.4.0"
__version__ = "1.5.1"

View File

@@ -74,8 +74,20 @@ Note that some development dependencies (including the black code formatter) req
To check the code style, formatting and run the tests you can run the script `scripts/ci.sh`.
You can run it with the argument `no_tests` to skip the tests and only run the code style and formatting: `scripts/ci.sh no_tests`
### 3. Setup hooks [optional]
### 3. Compile binary using PyInstaller
We compile capa standalone binaries using PyInstaller. To reproduce the build process check out the source code as described above and follow these steps.
#### Install PyInstaller:
For Python 2.7: `$ pip install 'pyinstaller==3.*'` (PyInstaller 4 doesn't support Python 2.7)
For Python 3: `$ pip install 'pyinstaller`
#### Run Pyinstaller
`$ pyinstaller .github/pyinstaller/pyinstaller.spec`
You can find the compiled binary in the created directory `dist/`.
### 4. Setup hooks [optional]
If you plan to contribute to capa, you may want to setup the hooks.
Run `scripts/setup-hooks.sh` to set the following hooks up:
- The `pre-commit` hook runs checks before every `git commit`.
@@ -84,4 +96,3 @@ Run `scripts/setup-hooks.sh` to set the following hooks up:
- The `pre-push` hook runs checks before every `git push`.
It runs `scripts/ci.sh` aborting the push if there are code style or rule linter offenses or if the tests fail.
This way you can ensure everything is alright before sending a pull request.

2
rules

Submodule rules updated: 6830d707c7...a3274122c5

214
scripts/capa_as_library.py Normal file
View File

@@ -0,0 +1,214 @@
#!/usr/bin/env python3
import json
import collections
import capa.main
import capa.rules
import capa.engine
import capa.features
import capa.render.utils as rutils
from capa.engine import *
from capa.render import convert_capabilities_to_result_document
# edit this to set the path for file to analyze and rule directory
RULES_PATH = "/tmp/capa/rules/"
# load rules from disk
rules = capa.main.get_rules(RULES_PATH, disable_progress=True)
rules = capa.rules.RuleSet(rules)
# == Render ddictionary helpers
def render_meta(doc, ostream):
ostream["md5"] = doc["meta"]["sample"]["md5"]
ostream["sha1"] = doc["meta"]["sample"]["sha1"]
ostream["sha256"] = doc["meta"]["sample"]["sha256"]
ostream["path"] = doc["meta"]["sample"]["path"]
def find_subrule_matches(doc):
"""
collect the rule names that have been matched as a subrule match.
this way we can avoid displaying entries for things that are too specific.
"""
matches = set([])
def rec(node):
if not node["success"]:
# there's probably a bug here for rules that do `not: match: ...`
# but we don't have any examples of this yet
return
elif node["node"]["type"] == "statement":
for child in node["children"]:
rec(child)
elif node["node"]["type"] == "feature":
if node["node"]["feature"]["type"] == "match":
matches.add(node["node"]["feature"]["match"])
for rule in rutils.capability_rules(doc):
for node in rule["matches"].values():
rec(node)
return matches
def render_capabilities(doc, ostream):
"""
example::
{'CAPABILITY': {'accept command line arguments': 'host-interaction/cli',
'allocate thread local storage (2 matches)': 'host-interaction/process',
'check for time delay via GetTickCount': 'anti-analysis/anti-debugging/debugger-detection',
'check if process is running under wine': 'anti-analysis/anti-emulation/wine',
'contain a resource (.rsrc) section': 'executable/pe/section/rsrc',
'write file (3 matches)': 'host-interaction/file-system/write'}
}
"""
subrule_matches = find_subrule_matches(doc)
ostream["CAPABILITY"] = dict()
for rule in rutils.capability_rules(doc):
if rule["meta"]["name"] in subrule_matches:
# rules that are also matched by other rules should not get rendered by default.
# this cuts down on the amount of output while giving approx the same detail.
# see #224
continue
count = len(rule["matches"])
if count == 1:
capability = rule["meta"]["name"]
else:
capability = "%s (%d matches)" % (rule["meta"]["name"], count)
ostream["CAPABILITY"].setdefault(rule["meta"]["namespace"], list())
ostream["CAPABILITY"][rule["meta"]["namespace"]].append(capability)
def render_attack(doc, ostream):
"""
example::
{'ATT&CK': {'COLLECTION': ['Input Capture::Keylogging [T1056.001]'],
'DEFENSE EVASION': ['Obfuscated Files or Information [T1027]',
'Virtualization/Sandbox Evasion::System Checks '
'[T1497.001]'],
'DISCOVERY': ['File and Directory Discovery [T1083]',
'Query Registry [T1012]',
'System Information Discovery [T1082]'],
'EXECUTION': ['Shared Modules [T1129]']}
}
"""
ostream["ATTCK"] = dict()
tactics = collections.defaultdict(set)
for rule in rutils.capability_rules(doc):
if not rule["meta"].get("att&ck"):
continue
for attack in rule["meta"]["att&ck"]:
tactic, _, rest = attack.partition("::")
if "::" in rest:
technique, _, rest = rest.partition("::")
subtechnique, _, id = rest.rpartition(" ")
tactics[tactic].add((technique, subtechnique, id))
else:
technique, _, id = rest.rpartition(" ")
tactics[tactic].add((technique, id))
for tactic, techniques in sorted(tactics.items()):
inner_rows = []
for spec in sorted(techniques):
if len(spec) == 2:
technique, id = spec
inner_rows.append("%s %s" % (technique, id))
elif len(spec) == 3:
technique, subtechnique, id = spec
inner_rows.append("%s::%s %s" % (technique, subtechnique, id))
else:
raise RuntimeError("unexpected ATT&CK spec format")
ostream["ATTCK"].setdefault(tactic.upper(), inner_rows)
def render_mbc(doc, ostream):
"""
example::
{'MBC': {'ANTI-BEHAVIORAL ANALYSIS': ['Debugger Detection::Timing/Delay Check '
'GetTickCount [B0001.032]',
'Emulator Detection [B0004]',
'Virtual Machine Detection::Instruction '
'Testing [B0009.029]',
'Virtual Machine Detection [B0009]'],
'COLLECTION': ['Keylogging::Polling [F0002.002]'],
'CRYPTOGRAPHY': ['Encrypt Data::RC4 [C0027.009]',
'Generate Pseudo-random Sequence::RC4 PRGA '
'[C0021.004]']}
}
"""
ostream["MBC"] = dict()
objectives = collections.defaultdict(set)
for rule in rutils.capability_rules(doc):
if not rule["meta"].get("mbc"):
continue
mbcs = rule["meta"]["mbc"]
if not isinstance(mbcs, list):
raise ValueError("invalid rule: MBC mapping is not a list")
for mbc in mbcs:
objective, _, rest = mbc.partition("::")
if "::" in rest:
behavior, _, rest = rest.partition("::")
method, _, id = rest.rpartition(" ")
objectives[objective].add((behavior, method, id))
else:
behavior, _, id = rest.rpartition(" ")
objectives[objective].add((behavior, id))
for objective, behaviors in sorted(objectives.items()):
inner_rows = []
for spec in sorted(behaviors):
if len(spec) == 2:
behavior, id = spec
inner_rows.append("%s %s" % (behavior, id))
elif len(spec) == 3:
behavior, method, id = spec
inner_rows.append("%s::%s %s" % (behavior, method, id))
else:
raise RuntimeError("unexpected MBC spec format")
ostream["MBC"].setdefault(objective.upper(), inner_rows)
def render_dictionary(doc):
ostream = dict()
render_meta(doc, ostream)
render_attack(doc, ostream)
render_mbc(doc, ostream)
render_capabilities(doc, ostream)
return ostream
# ==== render dictionary helpers
def capa_details(file_path, output_format="dictionary"):
# extract features and find capabilities
extractor = capa.main.get_extractor(file_path, "auto", disable_progress=True)
capabilities, counts = capa.main.find_capabilities(rules, extractor, disable_progress=True)
# collect metadata (used only to make rendering more complete)
meta = capa.main.collect_metadata("", file_path, RULES_PATH, "auto", extractor)
meta["analysis"].update(counts)
capa_output = False
if output_format == "dictionary":
# ...as python dictionary, simplified as textable but in dictionary
doc = convert_capabilities_to_result_document(meta, rules, capabilities)
capa_output = render_dictionary(doc)
elif output_format == "json":
# render results
# ...as json
capa_output = json.loads(capa.render.render_json(meta, rules, capabilities))
elif output_format == "texttable":
# ...as human readable text table
capa_output = capa.render.render_default(meta, rules, capabilities)
return capa_output

View File

@@ -38,6 +38,12 @@ def main(argv=None):
)
parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging")
parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
parser.add_argument(
"-c",
"--check",
action="store_true",
help="Don't output (reformatted) rule, only return status. 0 = no changes, 1 = would reformat",
)
args = parser.parse_args(args=argv)
if args.verbose:
@@ -50,12 +56,22 @@ def main(argv=None):
logging.basicConfig(level=level)
logging.getLogger("capafmt").setLevel(level)
rule = capa.rules.Rule.from_yaml_file(args.path)
rule = capa.rules.Rule.from_yaml_file(args.path, use_ruamel=True)
reformatted_rule = rule.to_yaml()
if args.check:
if rule.definition == reformatted_rule:
logger.info("rule is formatted correctly, nice! (%s)", rule.name)
return 0
else:
logger.info("rule requires reformatting (%s)", rule.name)
return 1
if args.in_place:
with open(args.path, "wb") as f:
f.write(rule.to_yaml().encode("utf-8"))
f.write(reformatted_rule.encode("utf-8"))
else:
print(rule.to_yaml().rstrip("\n"))
print(reformatted_rule)
return 0

View File

@@ -15,7 +15,9 @@ See the License for the specific language governing permissions and limitations
"""
import os
import sys
import time
import string
import difflib
import hashlib
import logging
import os.path
@@ -24,6 +26,7 @@ import itertools
import posixpath
import capa.main
import capa.rules
import capa.engine
import capa.features
import capa.features.insn
@@ -194,7 +197,7 @@ class DoesntMatchExample(Lint):
continue
try:
extractor = capa.main.get_extractor(path, "auto")
extractor = capa.main.get_extractor(path, "auto", disable_progress=True)
capabilities, meta = capa.main.find_capabilities(ctx["rules"], extractor, disable_progress=True)
except Exception as e:
logger.error("failed to extract capabilities: %s %s %s", rule.name, path, e)
@@ -232,7 +235,7 @@ class LibRuleNotInLibDirectory(Lint):
if "lib" not in rule.meta:
return False
return "/lib/" not in get_normpath(rule.meta["capa/path"])
return "lib/" not in get_normpath(rule.meta["capa/path"])
class LibRuleHasNamespace(Lint):
@@ -276,6 +279,32 @@ class FeatureNegativeNumber(Lint):
return False
class FormatSingleEmptyLineEOF(Lint):
name = "EOF format"
recommendation = "end file with a single empty line"
def check_rule(self, ctx, rule):
if rule.definition.endswith("\n") and not rule.definition.endswith("\n\n"):
return False
return True
class FormatIncorrect(Lint):
name = "rule format incorrect"
recommendation_template = "use scripts/capafmt.py or adjust as follows\n{:s}"
def check_rule(self, ctx, rule):
actual = rule.definition
expected = capa.rules.Rule.from_yaml(rule.definition, use_ruamel=True).to_yaml()
if actual != expected:
diff = difflib.ndiff(actual.splitlines(1), expected.splitlines(1))
self.recommendation = self.recommendation_template.format("".join(diff))
return True
return False
def run_lints(lints, ctx, rule):
for lint in lints:
if lint.check_rule(ctx, rule):
@@ -331,15 +360,25 @@ FEATURE_LINTS = (
)
def get_normpath(path):
return posixpath.normpath(path).replace(os.sep, "/")
def lint_features(ctx, rule):
features = get_features(ctx, rule)
return run_feature_lints(FEATURE_LINTS, ctx, features)
FORMAT_LINTS = (
FormatSingleEmptyLineEOF(),
FormatIncorrect(),
)
def lint_format(ctx, rule):
return run_lints(FORMAT_LINTS, ctx, rule)
def get_normpath(path):
return posixpath.normpath(path).replace(os.sep, "/")
def get_features(ctx, rule):
# get features from rule and all dependencies including subscopes and matched rules
features = []
@@ -390,6 +429,7 @@ def lint_rule(ctx, rule):
lint_meta(ctx, rule),
lint_logic(ctx, rule),
lint_features(ctx, rule),
lint_format(ctx, rule),
)
)
@@ -500,6 +540,7 @@ def main(argv=None):
action="store_true",
help="Enable thorough linting - takes more time, but does a better job",
)
parser.add_argument("-t", "--tag", type=str, help="filter on rule meta field values")
parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging")
parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
args = parser.parse_args(args=argv)
@@ -516,15 +557,20 @@ def main(argv=None):
capa.main.set_vivisect_log_level(logging.CRITICAL)
logging.getLogger("capa").setLevel(logging.CRITICAL)
logging.getLogger("viv_utils").setLevel(logging.CRITICAL)
time0 = time.time()
try:
rules = capa.main.get_rules(args.rules)
rules = capa.main.get_rules(args.rules, disable_progress=True)
rules = capa.rules.RuleSet(rules)
logger.info("successfully loaded %s rules", len(rules))
except IOError as e:
logger.error("%s", str(e))
return -1
except capa.rules.InvalidRule as e:
if args.tag:
rules = rules.filter_rules_by_meta(args.tag)
logger.debug("selected %s rules", len(rules))
for i, r in enumerate(rules.rules, 1):
logger.debug(" %d. %s", i, r)
except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e:
logger.error("%s", str(e))
return -1
@@ -542,6 +588,10 @@ def main(argv=None):
}
did_violate = lint(ctx, rules)
min, sec = divmod(time.time() - time0, 60)
logger.debug("lints ran for ~ %02d:%02dm", min, sec)
if not did_violate:
logger.info("no suggestions, nice!")
return 0

View File

@@ -11,7 +11,6 @@ import sys
import setuptools
# halo==0.0.30 is the last version to support py2.7
requirements = [
"six",
"tqdm",
@@ -21,18 +20,20 @@ requirements = [
"termcolor",
"ruamel.yaml",
"wcwidth",
"halo==0.0.30",
"ida-settings==2.1.0",
]
if sys.version_info >= (3, 0):
# py3
requirements.append("halo")
requirements.append("networkx")
requirements.append("smda==1.5.13")
else:
# py2
requirements.append("enum34==1.1.6") # v1.1.6 is needed by halo 0.0.30 / spinners 0.0.24
requirements.append("halo==0.0.30") # halo==0.0.30 is the last version to support py2.7
requirements.append("vivisect==0.1.0")
requirements.append("viv-utils")
requirements.append("viv-utils==0.3.19")
requirements.append("networkx==2.2") # v2.2 is last version supported by Python 2.7
requirements.append("backports.functools-lru-cache")

View File

@@ -10,6 +10,7 @@
import os
import sys
import os.path
import binascii
import contextlib
import collections
@@ -78,7 +79,33 @@ def get_viv_extractor(path):
vw = capa.main.get_workspace(path, "sc64", should_save=False)
else:
vw = capa.main.get_workspace(path, "auto", should_save=True)
return capa.features.extractors.viv.VivisectFeatureExtractor(vw, path)
extractor = capa.features.extractors.viv.VivisectFeatureExtractor(vw, path)
fixup_viv(path, extractor)
return extractor
def fixup_viv(path, extractor):
"""
vivisect fixups to overcome differences between backends
"""
if "3b13b" in path:
# vivisect only recognizes calling thunk function at 0x10001573
extractor.vw.makeFunction(0x10006860)
@lru_cache()
def get_smda_extractor(path):
from smda.SmdaConfig import SmdaConfig
from smda.Disassembler import Disassembler
import capa.features.extractors.smda
config = SmdaConfig()
config.STORE_BUFFER = True
disasm = Disassembler(config)
report = disasm.disassembleFile(path)
return capa.features.extractors.smda.SmdaFeatureExtractor(report, path)
@lru_cache()
@@ -129,6 +156,8 @@ def get_data_path_by_name(name):
return os.path.join(CD, "data", "Practical Malware Analysis Lab 21-01.exe_")
elif name == "al-khaser x86":
return os.path.join(CD, "data", "al-khaser_x86.exe_")
elif name == "al-khaser x64":
return os.path.join(CD, "data", "al-khaser_x64.exe_")
elif name.startswith("39c05"):
return os.path.join(CD, "data", "39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.dll_")
elif name.startswith("499c2"):
@@ -149,8 +178,12 @@ def get_data_path_by_name(name):
return os.path.join(CD, "data", "82BF6347ACF15E5D883715DC289D8A2B.exe_")
elif name.startswith("pingtaest"):
return os.path.join(CD, "data", "ping_täst.exe_")
elif name.startswith("77329"):
return os.path.join(CD, "data", "773290480d5445f11d3dc1b800728966.exe_")
elif name.startswith("3b13b"):
return os.path.join(CD, "data", "3b13b6f1d7cd14dc4a097a12e2e505c0a4cff495262261e2bfc991df238b9b04.dll_")
else:
raise ValueError("unexpected sample fixture")
raise ValueError("unexpected sample fixture: %s" % name)
def get_sample_md5_by_name(name):
@@ -169,6 +202,8 @@ def get_sample_md5_by_name(name):
return "c8403fb05244e23a7931c766409b5e22"
elif name == "al-khaser x86":
return "db648cd247281954344f1d810c6fd590"
elif name == "al-khaser x64":
return "3cb21ae76ff3da4b7e02d77ff76e82be"
elif name.startswith("39c05"):
return "b7841b9d5dc1f511a93cc7576672ec0c"
elif name.startswith("499c2"):
@@ -187,8 +222,13 @@ def get_sample_md5_by_name(name):
return "64d9f7d96b99467f36e22fada623c3bb"
elif name.startswith("82bf6"):
return "82bf6347acf15e5d883715dc289d8a2b"
elif name.startswith("77329"):
return "773290480d5445f11d3dc1b800728966"
elif name.startswith("3b13b"):
# file name is SHA256 hash
return "56a6ffe6a02941028cc8235204eef31d"
else:
raise ValueError("unexpected sample fixture")
raise ValueError("unexpected sample fixture: %s" % name)
def resolve_sample(sample):
@@ -377,7 +417,7 @@ FEATURE_PRESENCE_TESTS = [
),
("kernel32-64", "function=0x1800202B0", capa.features.insn.API("RtlCaptureContext"), True),
# insn/api: x64 nested thunk
("82bf6", "function=0x140059342", capa.features.insn.API("ElfClearEventLogFile"), True),
("al-khaser x64", "function=0x14004B4F0", capa.features.insn.API("__vcrt_GetModuleHandle"), True),
# insn/api: call via jmp
("mimikatz", "function=0x40B3C6", capa.features.insn.API("LocalFree"), True),
("c91887...", "function=0x40156F", capa.features.insn.API("CloseClipboard"), True),
@@ -392,16 +432,21 @@ FEATURE_PRESENCE_TESTS = [
("mimikatz", "function=0x40105D", capa.features.String("SCardTransmit"), True),
("mimikatz", "function=0x40105D", capa.features.String("ACR > "), True),
("mimikatz", "function=0x40105D", capa.features.String("nope"), False),
("773290...", "function=0x140001140", capa.features.String(r"%s:\\OfficePackagesForWDAG"), True),
# insn/regex, issue #262
("pma16-01", "function=0x4021B0", capa.features.Regex("HTTP/1.0"), True),
("pma16-01", "function=0x4021B0", capa.features.Regex("www.practicalmalwareanalysis.com"), False),
# insn/string, pointer to string
("mimikatz", "function=0x44EDEF", capa.features.String("INPUTEVENT"), True),
# insn/string, direct memory reference
("mimikatz", "function=0x46D6CE", capa.features.String("(null)"), True),
# insn/bytes
("mimikatz", "function=0x40105D", capa.features.Bytes("SCardControl".encode("utf-16le")), True),
("mimikatz", "function=0x40105D", capa.features.Bytes("SCardTransmit".encode("utf-16le")), True),
("mimikatz", "function=0x40105D", capa.features.Bytes("ACR > ".encode("utf-16le")), True),
("mimikatz", "function=0x40105D", capa.features.Bytes("nope".encode("ascii")), False),
# IDA features included byte sequences read from invalid memory, fixed in #409
("mimikatz", "function=0x44570F", capa.features.Bytes(binascii.unhexlify("FF" * 256)), False),
# insn/bytes, pointer to bytes
("mimikatz", "function=0x44EDEF", capa.features.Bytes("INPUTEVENT".encode("utf-16le")), True),
# insn/characteristic(nzxor)
@@ -409,6 +454,9 @@ FEATURE_PRESENCE_TESTS = [
("mimikatz", "function=0x40105D", capa.features.Characteristic("nzxor"), False),
# insn/characteristic(nzxor): no security cookies
("mimikatz", "function=0x46D534", capa.features.Characteristic("nzxor"), False),
# insn/characteristic(nzxor): xorps
# viv needs fixup to recognize function, see above
("3b13b...", "function=0x10006860", capa.features.Characteristic("nzxor"), True),
# insn/characteristic(peb access)
("kernel32-64", "function=0x1800017D0", capa.features.Characteristic("peb access"), True),
("mimikatz", "function=0x4556E5", capa.features.Characteristic("peb access"), False),
@@ -473,7 +521,7 @@ def do_test_feature_count(get_extractor, sample, scope, feature, expected):
def get_extractor(path):
if sys.version_info >= (3, 0):
raise RuntimeError("no supported py3 backends yet")
extractor = get_smda_extractor(path)
else:
extractor = get_viv_extractor(path)

View File

@@ -19,7 +19,6 @@ import capa.features
from capa.engine import *
@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
def test_main(z9324d_extractor):
# tests rules can be loaded successfully and all output modes
path = z9324d_extractor.path
@@ -29,7 +28,6 @@ def test_main(z9324d_extractor):
assert capa.main.main([path]) == 0
@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
def test_main_single_rule(z9324d_extractor, tmpdir):
# tests a single rule can be loaded successfully
RULE_CONTENT = textwrap.dedent(
@@ -58,7 +56,6 @@ def test_main_single_rule(z9324d_extractor, tmpdir):
)
@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
def test_main_non_ascii_filename(pingtaest_extractor, tmpdir, capsys):
# on py2.7, need to be careful about str (which can hold bytes)
# vs unicode (which is only unicode characters).
@@ -71,18 +68,22 @@ def test_main_non_ascii_filename(pingtaest_extractor, tmpdir, capsys):
std = capsys.readouterr()
# but here, we have to use a unicode instance,
# because capsys has decoded the output for us.
assert pingtaest_extractor.path.decode("utf-8") in std.out
if sys.version_info >= (3, 0):
assert pingtaest_extractor.path in std.out
else:
assert pingtaest_extractor.path.decode("utf-8") in std.out
@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
def test_main_non_ascii_filename_nonexistent(tmpdir, caplog):
NON_ASCII_FILENAME = "täst_not_there.exe"
assert capa.main.main(["-q", NON_ASCII_FILENAME]) == -1
assert NON_ASCII_FILENAME.decode("utf-8") in caplog.text
if sys.version_info >= (3, 0):
assert NON_ASCII_FILENAME in caplog.text
else:
assert NON_ASCII_FILENAME.decode("utf-8") in caplog.text
@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
def test_main_shellcode(z499c2_extractor):
path = z499c2_extractor.path
assert capa.main.main([path, "-vv", "-f", "sc32"]) == 0
@@ -137,7 +138,6 @@ def test_ruleset():
assert len(rules.basic_block_rules) == 1
@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
def test_match_across_scopes_file_function(z9324d_extractor):
rules = capa.rules.RuleSet(
[
@@ -201,7 +201,6 @@ def test_match_across_scopes_file_function(z9324d_extractor):
assert ".text section and install service" in capabilities
@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
def test_match_across_scopes(z9324d_extractor):
rules = capa.rules.RuleSet(
[
@@ -264,7 +263,6 @@ def test_match_across_scopes(z9324d_extractor):
assert "kill thread program" in capabilities
@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
def test_subscope_bb_rules(z9324d_extractor):
rules = capa.rules.RuleSet(
[
@@ -289,7 +287,6 @@ def test_subscope_bb_rules(z9324d_extractor):
assert "test rule" in capabilities
@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
def test_byte_matching(z9324d_extractor):
rules = capa.rules.RuleSet(
[
@@ -312,7 +309,6 @@ def test_byte_matching(z9324d_extractor):
assert "byte match test" in capabilities
@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
def test_count_bb(z9324d_extractor):
rules = capa.rules.RuleSet(
[
@@ -336,7 +332,6 @@ def test_count_bb(z9324d_extractor):
assert "count bb" in capabilities
@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
def test_fix262(pma16_01_extractor, capsys):
# tests rules can be loaded successfully and all output modes
path = pma16_01_extractor.path
@@ -347,7 +342,6 @@ def test_fix262(pma16_01_extractor, capsys):
assert "www.practicalmalwareanalysis.com" not in std.out
@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
def test_not_render_rules_also_matched(z9324d_extractor, capsys):
# rules that are also matched by other rules should not get rendered by default.
# this cuts down on the amount of output while giving approx the same detail.

View File

@@ -282,7 +282,8 @@ def test_lib_rules():
),
]
)
assert len(rules.function_rules) == 1
# lib rules are added to the rule set
assert len(rules.function_rules) == 2
def test_subscope_rules():

View File

@@ -0,0 +1,30 @@
# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import sys
from fixtures import *
@parametrize(
"sample,scope,feature,expected",
FEATURE_PRESENCE_TESTS,
indirect=["sample", "scope"],
)
def test_smda_features(sample, scope, feature, expected):
with xfail(sys.version_info < (3, 0), reason="SMDA only works on py3"):
do_test_feature_presence(get_smda_extractor, sample, scope, feature, expected)
@parametrize(
"sample,scope,feature,expected",
FEATURE_COUNT_TESTS,
indirect=["sample", "scope"],
)
def test_smda_feature_counts(sample, scope, feature, expected):
with xfail(sys.version_info < (3, 0), reason="SMDA only works on py3"):
do_test_feature_count(get_smda_extractor, sample, scope, feature, expected)