merge upstream

2026-01-26 03:04:56 -08:00 · 2021-02-09 07:55:53 -07:00
parent c8a99c247c 6636b9d56c
commit 1a804ed97b
14 changed files with 220 additions and 24 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,103 @@
 # Change Log

+## v1.5.0 (2021-02-05)
+
+This release brings support for running capa under Python 3 via [SMDA](https://github.com/danielplohmann/smda), more thorough CI testing and linting, better extraction of strings and byte features, and 50 (!) new rules. We appreciate everyone who opened issues, provided feedback, and contributed code and rules. A special shout out to the following new project contributors:
+
+  - @johnk3r
+  - @doomedraven
+  - @stvemillertime
+  - @itreallynick
+  - @0x534a
+  
+@dzbeck also added [Malware Behavior Catalog](https://github.com/MBCProject/mbc-markdown) (MBC) and ATT&CK mappings for many rules.
+
+Download a standalone binary below and checkout the readme [here on GitHub](https://github.com/fireeye/capa/). Report issues on our [issue tracker](https://github.com/fireeye/capa/issues) and contribute new rules at [capa-rules](https://github.com/fireeye/capa-rules/).
+
+
+### New Features
+
+  - py3 support via SMDA #355 @danielplohmann @jcrussell
+  - scripts: example of using capa as a library #372, #380 @doomedraven
+  - ci: enable dependabot #373 @mr-tz
+  - ci: lint rules @mr-tz
+  - ci: lint rule format #401 @mr-tz
+  - freeze: add base address #391 @mr-tz
+  - json: meta: add base address #412 @mr-tz
+
+### New Rules (50)
+
+  - 64-bit execution via heavens gate @recvfrom
+  - contain anti-disasm techniques @mr-tz
+  - check for microsoft office emulation @re-fox
+  - check for windows sandbox via device @re-fox
+  - check for windows sandbox via dns suffix @re-fox
+  - check for windows sandbox via genuine state @re-fox
+  - check for windows sandbox via process name @re-fox
+  - check for windows sandbox via registry @re-fox
+  - capture microphone audio @re-fox
+  - capture public ip @re-fox
+  - get domain trust relationships @johnk3r
+  - check HTTP status code @mr-tz
+  - compiled with perl2exe @re-fox
+  - compiled with ps2exe @re-fox
+  - compiled with pyarmor @stvemillertime, @itreallynick
+  - validate payment card number using luhn algorithm @re-fox
+  - hash data using fnv @re-fox @mr-tz
+  - generate random numbers via WinAPI @mike-hunhoff @johnk3r
+  - enumerate files recursively @re-fox
+  - get file system object information @mike-hunhoff
+  - read virtual disk @re-fox
+  - register minifilter driver @mike-hunhoff
+  - start minifilter driver @mike-hunhoff
+  - enumerate gui resources @johnk3r
+  - simulate CTRL ALT DEL @mike-hunhoff
+  - hijack thread execution @0x534a
+  - inject dll @0x534a
+  - inject pe @0x534a
+  - create or open registry key @mike-hunhoff
+  - delete registry value @mike-hunhoff
+  - query or enumerate registry key @mike-hunhoff
+  - query or enumerate registry value @mike-hunhoff
+  - resume thread @0x534a
+  - suspend thread @0x534a
+  - allocate memory @0x534a
+  - allocate RW memory @0x534a
+  - contain pusha popa sequence @mr-tz
+  - create or open file @mike-hunhoff
+  - open process @0x534a
+  - open thread @0x534a
+  - get kernel32 base address @mr-tz
+  - get ntdll base address @mr-tz
+  - encrypt or decrypt data via BCrypt @mike-hunhoff
+  - generate random numbers using the Delphi LCG @williballenthin
+  - hash data via BCrypt @mike-hunhoff
+  - migrate process to active window station @williballenthin
+  - patch process command line @williballenthin
+  - resolve function by hash @williballenthin
+  - persist via Winlogon Helper DLL registry key @0x534a
+  - schedule task via command line @0x534a
+
+### Bug Fixes
+
+  - doc: pyinstaller build process @mr-tz
+  - ida: better bytes extraction #409 @mike-hunhoff
+  - viv: better unicode string extraction #364 @mike-hunhoff
+  - viv: better unicode string extraction #378 @mr-tz
+  - viv: more xor instructions #379 @mr-tz
+  - viv: decrease logging verbosity #381 @mr-tz
+  - rules: fix api description syntax #403 @mike-hunhoff
+  - main: disable progress background thread #410 @mike-hunhoff
+  
+### Changes
+
+  - rules: return lib rules for scopes #398 @mr-tz
+  
+### Raw diffs
+
+  - [capa v1.4.1...v1.5.0](https://github.com/fireeye/capa/compare/v1.4.1...v1.5.0)
+  - [capa-rules v1.4.0...v1.5.0](https://github.com/fireeye/capa-rules/compare/v1.4.0...v1.5.0)
+
 ## v1.4.1 (2020-10-23)

 This release fixes an issue building capa on our CI server, which prevented us from building standalone binaries for v1.4.1.
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 ![capa](.github/logo.png)

 [![CI status](https://github.com/fireeye/capa/workflows/CI/badge.svg)](https://github.com/fireeye/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster)
-[![Number of rules](https://img.shields.io/badge/rules-446-blue.svg)](https://github.com/fireeye/capa-rules)
+[![Number of rules](https://img.shields.io/badge/rules-458-blue.svg)](https://github.com/fireeye/capa-rules)
 [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt)

 capa detects capabilities in executable files.
--- a/capa/features/extractors/ida/helpers.py
+++ b/capa/features/extractors/ida/helpers.py
@@ -166,6 +166,10 @@ def basic_block_size(bb):

 def read_bytes_at(ea, count):
    """ """
+    # check if byte has a value, see get_wide_byte doc
+    if not idc.is_loaded(ea):
+        return b""
+
    segm_end = idc.get_segm_end(ea)
    if ea + count > segm_end:
        return idc.get_bytes(ea, segm_end - ea)
--- a/capa/features/extractors/ida/insn.py
+++ b/capa/features/extractors/ida/insn.py
@@ -148,6 +148,9 @@ def extract_insn_bytes_features(f, bb, insn):
    example:
        push    offset iid_004118d4_IShellLinkA ; riid
    """
+    if idaapi.is_call_insn(insn):
+        return
+
    ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
    if ref != insn.ea:
        extracted_bytes = capa.features.extractors.ida.helpers.read_bytes_at(ref, MAX_BYTES_FEATURE_SIZE)
--- a/capa/features/extractors/smda/insn.py
+++ b/capa/features/extractors/smda/insn.py
@@ -336,7 +336,7 @@ def extract_function_calls_from(f, bb, insn):
                # mark as recursive
                yield Characteristic("recursive call"), outref
    if insn.offset in f.apirefs:
-        yield Characteristic("calls from"), f.apirefs[insn.offset]
+        yield Characteristic("calls from"), insn.offset


 # this is a feature that's most relevant at the function or basic block scope,
--- a/capa/features/extractors/viv/insn.py
+++ b/capa/features/extractors/viv/insn.py
@@ -258,10 +258,10 @@ def extract_insn_bytes_features(f, bb, insn):
    example:
        #     push    offset iid_004118d4_IShellLinkA ; riid
    """
-    for oper in insn.opers:
-        if insn.mnem == "call":
-            continue
+    if insn.mnem == "call":
+        return

+    for oper in insn.opers:
        if isinstance(oper, envi.archs.i386.disasm.i386ImmOper):
            v = oper.getOperValue(oper)
        elif isinstance(oper, envi.archs.i386.disasm.i386RegMemOper):
--- a/capa/ida/helpers/init.py
+++ b/capa/ida/helpers/init.py
@@ -115,6 +115,7 @@ def collect_metadata():
        "analysis": {
            "format": idaapi.get_file_type_name(),
            "extractor": "ida",
+            "base_address": idaapi.get_imagebase(),
        },
        "version": capa.version.__version__,
    }
--- a/capa/main.py
+++ b/capa/main.py
@@ -115,7 +115,13 @@ def find_capabilities(ruleset, extractor, disable_progress=None):
        }
    }

-    for f in tqdm.tqdm(list(extractor.get_functions()), disable=disable_progress, desc="matching", unit=" functions"):
+    pbar = tqdm.tqdm
+    if disable_progress:
+        # do not use tqdm to avoid unnecessary side effects when caller intends
+        # to disable progress completely
+        pbar = lambda s, *args, **kwargs: s
+
+    for f in pbar(list(extractor.get_functions()), desc="matching", unit=" functions"):
        function_matches, bb_matches, feature_count = find_function_capabilities(ruleset, extractor, f)
        meta["feature_counts"]["functions"][f.__int__()] = feature_count
        logger.debug("analyzed function 0x%x and extracted %d features", f.__int__(), feature_count)
@@ -366,7 +372,13 @@ def get_rules(rule_path, disable_progress=False):

    rules = []

-    for rule_path in tqdm.tqdm(list(rule_paths), disable=disable_progress, desc="loading ", unit="     rules"):
+    pbar = tqdm.tqdm
+    if disable_progress:
+        # do not use tqdm to avoid unnecessary side effects when caller intends
+        # to disable progress completely
+        pbar = lambda s, *args, **kwargs: s
+
+    for rule_path in pbar(list(rule_paths), desc="loading ", unit="     rules"):
        try:
            rule = capa.rules.Rule.from_yaml_file(rule_path)
        except capa.rules.InvalidRule:
--- a/capa/rules.py
+++ b/capa/rules.py
@@ -6,6 +6,7 @@
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.

+import re
 import uuid
 import codecs
 import logging
@@ -600,6 +601,9 @@ class Rule(object):
        # use block mode, not inline json-like mode
        y.default_flow_style = False

+        # leave quotes unchanged
+        y.preserve_quotes = True
+
        # indent lists by two spaces below their parent
        #
        #     features:
@@ -614,16 +618,20 @@ class Rule(object):
        return y

    @classmethod
-    def from_yaml(cls, s):
-        # use pyyaml because it can be much faster than ruamel (pure python)
-        doc = yaml.load(s, Loader=cls._get_yaml_loader())
+    def from_yaml(cls, s, use_ruamel=False):
+        if use_ruamel:
+            # ruamel enables nice formatting and doc roundtripping with comments
+            doc = cls._get_ruamel_yaml_parser().load(s)
+        else:
+            # use pyyaml because it can be much faster than ruamel (pure python)
+            doc = yaml.load(s, Loader=cls._get_yaml_loader())
        return cls.from_dict(doc, s)

    @classmethod
-    def from_yaml_file(cls, path):
+    def from_yaml_file(cls, path, use_ruamel=False):
        with open(path, "rb") as f:
            try:
-                return cls.from_yaml(f.read().decode("utf-8"))
+                return cls.from_yaml(f.read().decode("utf-8"), use_ruamel=use_ruamel)
            except InvalidRule as e:
                raise InvalidRuleWithPath(path, str(e))

@@ -716,7 +724,18 @@ class Rule(object):
        # tweaking `ruamel.indent()` doesn't quite give us the control we want.
        # so, add the two extra spaces that we've determined we need through experimentation.
        # see #263
-        doc = doc.replace("  description:", "    description:")
+        # only do this for the features section, so the meta description doesn't get reformatted
+        # assumes features section always exists
+        features_offset = doc.find("features")
+        doc = doc[:features_offset] + doc[features_offset:].replace("  description:", "    description:")
+
+        # for negative hex numbers, yaml dump outputs:
+        # - offset: !!int '0x-30'
+        # we prefer:
+        # - offset: -0x30
+        # the below regex makes these adjustments and while ugly, we don't have to explore the ruamel.yaml insides
+        doc = re.sub(r"!!int '0x-([0-9a-fA-F]+)'", r"-0x\1", doc)
+
        return doc


--- a/2
+++ b/2
--- a/scripts/capafmt.py
+++ b/scripts/capafmt.py
@@ -38,6 +38,12 @@ def main(argv=None):
    )
    parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging")
    parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
+    parser.add_argument(
+        "-c",
+        "--check",
+        action="store_true",
+        help="Don't output (reformatted) rule, only return status. 0 = no changes, 1 = would reformat",
+    )
    args = parser.parse_args(args=argv)

    if args.verbose:
@@ -50,12 +56,22 @@ def main(argv=None):
    logging.basicConfig(level=level)
    logging.getLogger("capafmt").setLevel(level)

-    rule = capa.rules.Rule.from_yaml_file(args.path)
+    rule = capa.rules.Rule.from_yaml_file(args.path, use_ruamel=True)
+    reformatted_rule = rule.to_yaml()
+
+    if args.check:
+        if rule.definition == reformatted_rule:
+            logger.info("rule is formatted correctly, nice! (%s)", rule.name)
+            return 0
+        else:
+            logger.info("rule requires reformatting (%s)", rule.name)
+            return 1
+
    if args.in_place:
        with open(args.path, "wb") as f:
-            f.write(rule.to_yaml().encode("utf-8"))
+            f.write(reformatted_rule.encode("utf-8"))
    else:
-        print(rule.to_yaml().rstrip("\n"))
+        print(reformatted_rule)

    return 0

--- a/scripts/lint.py
+++ b/scripts/lint.py
@@ -17,6 +17,7 @@ import os
 import sys
 import time
 import string
+import difflib
 import hashlib
 import logging
 import os.path
@@ -25,6 +26,7 @@ import itertools
 import posixpath

 import capa.main
+import capa.rules
 import capa.engine
 import capa.features
 import capa.features.insn
@@ -277,6 +279,32 @@ class FeatureNegativeNumber(Lint):
        return False


+class FormatSingleEmptyLineEOF(Lint):
+    name = "EOF format"
+    recommendation = "end file with a single empty line"
+
+    def check_rule(self, ctx, rule):
+        if rule.definition.endswith("\n") and not rule.definition.endswith("\n\n"):
+            return False
+        return True
+
+
+class FormatIncorrect(Lint):
+    name = "rule format incorrect"
+    recommendation_template = "use scripts/capafmt.py or adjust as follows\n{:s}"
+
+    def check_rule(self, ctx, rule):
+        actual = rule.definition
+        expected = capa.rules.Rule.from_yaml(rule.definition, use_ruamel=True).to_yaml()
+
+        if actual != expected:
+            diff = difflib.ndiff(actual.splitlines(1), expected.splitlines(1))
+            self.recommendation = self.recommendation_template.format("".join(diff))
+            return True
+
+        return False
+
+
 def run_lints(lints, ctx, rule):
    for lint in lints:
        if lint.check_rule(ctx, rule):
@@ -332,15 +360,25 @@ FEATURE_LINTS = (
 )


-def get_normpath(path):
-    return posixpath.normpath(path).replace(os.sep, "/")
-
-
 def lint_features(ctx, rule):
    features = get_features(ctx, rule)
    return run_feature_lints(FEATURE_LINTS, ctx, features)


+FORMAT_LINTS = (
+    FormatSingleEmptyLineEOF(),
+    FormatIncorrect(),
+)
+
+
+def lint_format(ctx, rule):
+    return run_lints(FORMAT_LINTS, ctx, rule)
+
+
+def get_normpath(path):
+    return posixpath.normpath(path).replace(os.sep, "/")
+
+
 def get_features(ctx, rule):
    # get features from rule and all dependencies including subscopes and matched rules
    features = []
@@ -391,6 +429,7 @@ def lint_rule(ctx, rule):
            lint_meta(ctx, rule),
            lint_logic(ctx, rule),
            lint_features(ctx, rule),
+            lint_format(ctx, rule),
        )
    )

@@ -518,6 +557,7 @@ def main(argv=None):

    capa.main.set_vivisect_log_level(logging.CRITICAL)
    logging.getLogger("capa").setLevel(logging.CRITICAL)
+    logging.getLogger("viv_utils").setLevel(logging.CRITICAL)

    time0 = time.time()

@@ -549,8 +589,8 @@ def main(argv=None):

    did_violate = lint(ctx, rules)

-    diff = time.time() - time0
-    logger.debug("lint ran for ~ %02d:%02d", (diff // 60), diff)
+    min, sec = divmod(time.time() - time0, 60)
+    logger.debug("lints ran for ~ %02d:%02dm", min, sec)

    if not did_violate:
        logger.info("no suggestions, nice!")
--- a/tests/data
+++ b/tests/data
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -10,6 +10,7 @@
 import os
 import sys
 import os.path
+import binascii
 import contextlib
 import collections

@@ -444,6 +445,8 @@ FEATURE_PRESENCE_TESTS = [
    ("mimikatz", "function=0x40105D", capa.features.Bytes("SCardTransmit".encode("utf-16le")), True),
    ("mimikatz", "function=0x40105D", capa.features.Bytes("ACR  > ".encode("utf-16le")), True),
    ("mimikatz", "function=0x40105D", capa.features.Bytes("nope".encode("ascii")), False),
+    # IDA features included byte sequences read from invalid memory, fixed in #409
+    ("mimikatz", "function=0x44570F", capa.features.Bytes(binascii.unhexlify("FF" * 256)), False),
    # insn/bytes, pointer to bytes
    ("mimikatz", "function=0x44EDEF", capa.features.Bytes("INPUTEVENT".encode("utf-16le")), True),
    # insn/characteristic(nzxor)