diff --git a/CHANGELOG.md b/CHANGELOG.md index e8700fa4..42cfd75f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,103 @@ # Change Log +## v1.5.0 (2021-02-05) + +This release brings support for running capa under Python 3 via [SMDA](https://github.com/danielplohmann/smda), more thorough CI testing and linting, better extraction of strings and byte features, and 50 (!) new rules. We appreciate everyone who opened issues, provided feedback, and contributed code and rules. A special shout out to the following new project contributors: + + - @johnk3r + - @doomedraven + - @stvemillertime + - @itreallynick + - @0x534a + +@dzbeck also added [Malware Behavior Catalog](https://github.com/MBCProject/mbc-markdown) (MBC) and ATT&CK mappings for many rules. + +Download a standalone binary below and checkout the readme [here on GitHub](https://github.com/fireeye/capa/). Report issues on our [issue tracker](https://github.com/fireeye/capa/issues) and contribute new rules at [capa-rules](https://github.com/fireeye/capa-rules/). + + +### New Features + + - py3 support via SMDA #355 @danielplohmann @jcrussell + - scripts: example of using capa as a library #372, #380 @doomedraven + - ci: enable dependabot #373 @mr-tz + - ci: lint rules @mr-tz + - ci: lint rule format #401 @mr-tz + - freeze: add base address #391 @mr-tz + - json: meta: add base address #412 @mr-tz + +### New Rules (50) + + - 64-bit execution via heavens gate @recvfrom + - contain anti-disasm techniques @mr-tz + - check for microsoft office emulation @re-fox + - check for windows sandbox via device @re-fox + - check for windows sandbox via dns suffix @re-fox + - check for windows sandbox via genuine state @re-fox + - check for windows sandbox via process name @re-fox + - check for windows sandbox via registry @re-fox + - capture microphone audio @re-fox + - capture public ip @re-fox + - get domain trust relationships @johnk3r + - check HTTP status code @mr-tz + - compiled with perl2exe @re-fox + - compiled with ps2exe @re-fox + - compiled with pyarmor @stvemillertime, @itreallynick + - validate payment card number using luhn algorithm @re-fox + - hash data using fnv @re-fox @mr-tz + - generate random numbers via WinAPI @mike-hunhoff @johnk3r + - enumerate files recursively @re-fox + - get file system object information @mike-hunhoff + - read virtual disk @re-fox + - register minifilter driver @mike-hunhoff + - start minifilter driver @mike-hunhoff + - enumerate gui resources @johnk3r + - simulate CTRL ALT DEL @mike-hunhoff + - hijack thread execution @0x534a + - inject dll @0x534a + - inject pe @0x534a + - create or open registry key @mike-hunhoff + - delete registry value @mike-hunhoff + - query or enumerate registry key @mike-hunhoff + - query or enumerate registry value @mike-hunhoff + - resume thread @0x534a + - suspend thread @0x534a + - allocate memory @0x534a + - allocate RW memory @0x534a + - contain pusha popa sequence @mr-tz + - create or open file @mike-hunhoff + - open process @0x534a + - open thread @0x534a + - get kernel32 base address @mr-tz + - get ntdll base address @mr-tz + - encrypt or decrypt data via BCrypt @mike-hunhoff + - generate random numbers using the Delphi LCG @williballenthin + - hash data via BCrypt @mike-hunhoff + - migrate process to active window station @williballenthin + - patch process command line @williballenthin + - resolve function by hash @williballenthin + - persist via Winlogon Helper DLL registry key @0x534a + - schedule task via command line @0x534a + +### Bug Fixes + + - doc: pyinstaller build process @mr-tz + - ida: better bytes extraction #409 @mike-hunhoff + - viv: better unicode string extraction #364 @mike-hunhoff + - viv: better unicode string extraction #378 @mr-tz + - viv: more xor instructions #379 @mr-tz + - viv: decrease logging verbosity #381 @mr-tz + - rules: fix api description syntax #403 @mike-hunhoff + - main: disable progress background thread #410 @mike-hunhoff + +### Changes + + - rules: return lib rules for scopes #398 @mr-tz + +### Raw diffs + + - [capa v1.4.1...v1.5.0](https://github.com/fireeye/capa/compare/v1.4.1...v1.5.0) + - [capa-rules v1.4.0...v1.5.0](https://github.com/fireeye/capa-rules/compare/v1.4.0...v1.5.0) + ## v1.4.1 (2020-10-23) This release fixes an issue building capa on our CI server, which prevented us from building standalone binaries for v1.4.1. diff --git a/README.md b/README.md index 664eebbd..eef892cf 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ ![capa](.github/logo.png) [![CI status](https://github.com/fireeye/capa/workflows/CI/badge.svg)](https://github.com/fireeye/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster) -[![Number of rules](https://img.shields.io/badge/rules-446-blue.svg)](https://github.com/fireeye/capa-rules) +[![Number of rules](https://img.shields.io/badge/rules-458-blue.svg)](https://github.com/fireeye/capa-rules) [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt) capa detects capabilities in executable files. diff --git a/capa/features/extractors/ida/helpers.py b/capa/features/extractors/ida/helpers.py index 1d996311..487e6686 100644 --- a/capa/features/extractors/ida/helpers.py +++ b/capa/features/extractors/ida/helpers.py @@ -166,6 +166,10 @@ def basic_block_size(bb): def read_bytes_at(ea, count): """ """ + # check if byte has a value, see get_wide_byte doc + if not idc.is_loaded(ea): + return b"" + segm_end = idc.get_segm_end(ea) if ea + count > segm_end: return idc.get_bytes(ea, segm_end - ea) diff --git a/capa/features/extractors/ida/insn.py b/capa/features/extractors/ida/insn.py index 7513cde2..3f5aef52 100644 --- a/capa/features/extractors/ida/insn.py +++ b/capa/features/extractors/ida/insn.py @@ -148,6 +148,9 @@ def extract_insn_bytes_features(f, bb, insn): example: push offset iid_004118d4_IShellLinkA ; riid """ + if idaapi.is_call_insn(insn): + return + ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn) if ref != insn.ea: extracted_bytes = capa.features.extractors.ida.helpers.read_bytes_at(ref, MAX_BYTES_FEATURE_SIZE) diff --git a/capa/features/extractors/smda/insn.py b/capa/features/extractors/smda/insn.py index f674461a..bf23f607 100644 --- a/capa/features/extractors/smda/insn.py +++ b/capa/features/extractors/smda/insn.py @@ -336,7 +336,7 @@ def extract_function_calls_from(f, bb, insn): # mark as recursive yield Characteristic("recursive call"), outref if insn.offset in f.apirefs: - yield Characteristic("calls from"), f.apirefs[insn.offset] + yield Characteristic("calls from"), insn.offset # this is a feature that's most relevant at the function or basic block scope, diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 0aadc512..e708ee47 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -258,10 +258,10 @@ def extract_insn_bytes_features(f, bb, insn): example: # push offset iid_004118d4_IShellLinkA ; riid """ - for oper in insn.opers: - if insn.mnem == "call": - continue + if insn.mnem == "call": + return + for oper in insn.opers: if isinstance(oper, envi.archs.i386.disasm.i386ImmOper): v = oper.getOperValue(oper) elif isinstance(oper, envi.archs.i386.disasm.i386RegMemOper): diff --git a/capa/ida/helpers/__init__.py b/capa/ida/helpers/__init__.py index adaf8981..020483f9 100644 --- a/capa/ida/helpers/__init__.py +++ b/capa/ida/helpers/__init__.py @@ -115,6 +115,7 @@ def collect_metadata(): "analysis": { "format": idaapi.get_file_type_name(), "extractor": "ida", + "base_address": idaapi.get_imagebase(), }, "version": capa.version.__version__, } diff --git a/capa/main.py b/capa/main.py index 0e60d6c0..27d5a9c3 100644 --- a/capa/main.py +++ b/capa/main.py @@ -115,7 +115,13 @@ def find_capabilities(ruleset, extractor, disable_progress=None): } } - for f in tqdm.tqdm(list(extractor.get_functions()), disable=disable_progress, desc="matching", unit=" functions"): + pbar = tqdm.tqdm + if disable_progress: + # do not use tqdm to avoid unnecessary side effects when caller intends + # to disable progress completely + pbar = lambda s, *args, **kwargs: s + + for f in pbar(list(extractor.get_functions()), desc="matching", unit=" functions"): function_matches, bb_matches, feature_count = find_function_capabilities(ruleset, extractor, f) meta["feature_counts"]["functions"][f.__int__()] = feature_count logger.debug("analyzed function 0x%x and extracted %d features", f.__int__(), feature_count) @@ -366,7 +372,13 @@ def get_rules(rule_path, disable_progress=False): rules = [] - for rule_path in tqdm.tqdm(list(rule_paths), disable=disable_progress, desc="loading ", unit=" rules"): + pbar = tqdm.tqdm + if disable_progress: + # do not use tqdm to avoid unnecessary side effects when caller intends + # to disable progress completely + pbar = lambda s, *args, **kwargs: s + + for rule_path in pbar(list(rule_paths), desc="loading ", unit=" rules"): try: rule = capa.rules.Rule.from_yaml_file(rule_path) except capa.rules.InvalidRule: diff --git a/capa/rules.py b/capa/rules.py index 09929636..b9a42056 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -6,6 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. +import re import uuid import codecs import logging @@ -600,6 +601,9 @@ class Rule(object): # use block mode, not inline json-like mode y.default_flow_style = False + # leave quotes unchanged + y.preserve_quotes = True + # indent lists by two spaces below their parent # # features: @@ -614,16 +618,20 @@ class Rule(object): return y @classmethod - def from_yaml(cls, s): - # use pyyaml because it can be much faster than ruamel (pure python) - doc = yaml.load(s, Loader=cls._get_yaml_loader()) + def from_yaml(cls, s, use_ruamel=False): + if use_ruamel: + # ruamel enables nice formatting and doc roundtripping with comments + doc = cls._get_ruamel_yaml_parser().load(s) + else: + # use pyyaml because it can be much faster than ruamel (pure python) + doc = yaml.load(s, Loader=cls._get_yaml_loader()) return cls.from_dict(doc, s) @classmethod - def from_yaml_file(cls, path): + def from_yaml_file(cls, path, use_ruamel=False): with open(path, "rb") as f: try: - return cls.from_yaml(f.read().decode("utf-8")) + return cls.from_yaml(f.read().decode("utf-8"), use_ruamel=use_ruamel) except InvalidRule as e: raise InvalidRuleWithPath(path, str(e)) @@ -716,7 +724,18 @@ class Rule(object): # tweaking `ruamel.indent()` doesn't quite give us the control we want. # so, add the two extra spaces that we've determined we need through experimentation. # see #263 - doc = doc.replace(" description:", " description:") + # only do this for the features section, so the meta description doesn't get reformatted + # assumes features section always exists + features_offset = doc.find("features") + doc = doc[:features_offset] + doc[features_offset:].replace(" description:", " description:") + + # for negative hex numbers, yaml dump outputs: + # - offset: !!int '0x-30' + # we prefer: + # - offset: -0x30 + # the below regex makes these adjustments and while ugly, we don't have to explore the ruamel.yaml insides + doc = re.sub(r"!!int '0x-([0-9a-fA-F]+)'", r"-0x\1", doc) + return doc diff --git a/rules b/rules index 37351674..80b88d9e 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 37351674f65a50e845ad637418c408932676139a +Subproject commit 80b88d9e1db7b0fc0bdeea752e1b3d5badcfc3fb diff --git a/scripts/capafmt.py b/scripts/capafmt.py index 236c464e..a0b2a7c6 100644 --- a/scripts/capafmt.py +++ b/scripts/capafmt.py @@ -38,6 +38,12 @@ def main(argv=None): ) parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging") parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors") + parser.add_argument( + "-c", + "--check", + action="store_true", + help="Don't output (reformatted) rule, only return status. 0 = no changes, 1 = would reformat", + ) args = parser.parse_args(args=argv) if args.verbose: @@ -50,12 +56,22 @@ def main(argv=None): logging.basicConfig(level=level) logging.getLogger("capafmt").setLevel(level) - rule = capa.rules.Rule.from_yaml_file(args.path) + rule = capa.rules.Rule.from_yaml_file(args.path, use_ruamel=True) + reformatted_rule = rule.to_yaml() + + if args.check: + if rule.definition == reformatted_rule: + logger.info("rule is formatted correctly, nice! (%s)", rule.name) + return 0 + else: + logger.info("rule requires reformatting (%s)", rule.name) + return 1 + if args.in_place: with open(args.path, "wb") as f: - f.write(rule.to_yaml().encode("utf-8")) + f.write(reformatted_rule.encode("utf-8")) else: - print(rule.to_yaml().rstrip("\n")) + print(reformatted_rule) return 0 diff --git a/scripts/lint.py b/scripts/lint.py index 97f5f562..2ae04537 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -17,6 +17,7 @@ import os import sys import time import string +import difflib import hashlib import logging import os.path @@ -25,6 +26,7 @@ import itertools import posixpath import capa.main +import capa.rules import capa.engine import capa.features import capa.features.insn @@ -277,6 +279,32 @@ class FeatureNegativeNumber(Lint): return False +class FormatSingleEmptyLineEOF(Lint): + name = "EOF format" + recommendation = "end file with a single empty line" + + def check_rule(self, ctx, rule): + if rule.definition.endswith("\n") and not rule.definition.endswith("\n\n"): + return False + return True + + +class FormatIncorrect(Lint): + name = "rule format incorrect" + recommendation_template = "use scripts/capafmt.py or adjust as follows\n{:s}" + + def check_rule(self, ctx, rule): + actual = rule.definition + expected = capa.rules.Rule.from_yaml(rule.definition, use_ruamel=True).to_yaml() + + if actual != expected: + diff = difflib.ndiff(actual.splitlines(1), expected.splitlines(1)) + self.recommendation = self.recommendation_template.format("".join(diff)) + return True + + return False + + def run_lints(lints, ctx, rule): for lint in lints: if lint.check_rule(ctx, rule): @@ -332,15 +360,25 @@ FEATURE_LINTS = ( ) -def get_normpath(path): - return posixpath.normpath(path).replace(os.sep, "/") - - def lint_features(ctx, rule): features = get_features(ctx, rule) return run_feature_lints(FEATURE_LINTS, ctx, features) +FORMAT_LINTS = ( + FormatSingleEmptyLineEOF(), + FormatIncorrect(), +) + + +def lint_format(ctx, rule): + return run_lints(FORMAT_LINTS, ctx, rule) + + +def get_normpath(path): + return posixpath.normpath(path).replace(os.sep, "/") + + def get_features(ctx, rule): # get features from rule and all dependencies including subscopes and matched rules features = [] @@ -391,6 +429,7 @@ def lint_rule(ctx, rule): lint_meta(ctx, rule), lint_logic(ctx, rule), lint_features(ctx, rule), + lint_format(ctx, rule), ) ) @@ -518,6 +557,7 @@ def main(argv=None): capa.main.set_vivisect_log_level(logging.CRITICAL) logging.getLogger("capa").setLevel(logging.CRITICAL) + logging.getLogger("viv_utils").setLevel(logging.CRITICAL) time0 = time.time() @@ -549,8 +589,8 @@ def main(argv=None): did_violate = lint(ctx, rules) - diff = time.time() - time0 - logger.debug("lint ran for ~ %02d:%02d", (diff // 60), diff) + min, sec = divmod(time.time() - time0, 60) + logger.debug("lints ran for ~ %02d:%02dm", min, sec) if not did_violate: logger.info("no suggestions, nice!") diff --git a/tests/data b/tests/data index fac3eb57..36821e21 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit fac3eb5708269f2439ccb7e03a7f4f65770c47b4 +Subproject commit 36821e216abfa41d683b747c31af6a47ab2d8ec9 diff --git a/tests/fixtures.py b/tests/fixtures.py index a9fc6913..4261408b 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -10,6 +10,7 @@ import os import sys import os.path +import binascii import contextlib import collections @@ -444,6 +445,8 @@ FEATURE_PRESENCE_TESTS = [ ("mimikatz", "function=0x40105D", capa.features.Bytes("SCardTransmit".encode("utf-16le")), True), ("mimikatz", "function=0x40105D", capa.features.Bytes("ACR > ".encode("utf-16le")), True), ("mimikatz", "function=0x40105D", capa.features.Bytes("nope".encode("ascii")), False), + # IDA features included byte sequences read from invalid memory, fixed in #409 + ("mimikatz", "function=0x44570F", capa.features.Bytes(binascii.unhexlify("FF" * 256)), False), # insn/bytes, pointer to bytes ("mimikatz", "function=0x44EDEF", capa.features.Bytes("INPUTEVENT".encode("utf-16le")), True), # insn/characteristic(nzxor)