From fb4ef6b9933862f8f2b5b3edc476f9be265c250a Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 31 Aug 2020 15:38:07 -0600 Subject: [PATCH 01/11] tests: add tests for #262 --- tests/fixtures.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/fixtures.py b/tests/fixtures.py index f5eb6e53..ef8f8bfe 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -122,6 +122,8 @@ def get_data_path_by_name(name): return os.path.join(CD, "data", "kernel32-64.dll_") elif name == "pma12-04": return os.path.join(CD, "data", "Practical Malware Analysis Lab 12-04.exe_") + elif name == "pma16-01": + return os.path.join(CD, "data", "Practical Malware Analysis Lab 16-01.exe_") elif name == "pma21-01": return os.path.join(CD, "data", "Practical Malware Analysis Lab 21-01.exe_") elif name == "al-khaser x86": @@ -154,6 +156,8 @@ def get_sample_md5_by_name(name): return "a8565440629ac87f6fef7d588fe3ff0f" elif name == "pma12-04": return "56bed8249e7c2982a90e54e1e55391a2" + elif name == "pma16-01": + return "7faafc7e4a5c736ebfee6abbbc812d80" elif name == "pma21-01": return "c8403fb05244e23a7931c766409b5e22" elif name == "al-khaser x86": @@ -365,6 +369,9 @@ FEATURE_PRESENCE_TESTS = [ ("mimikatz", "function=0x40105D", capa.features.String("SCardTransmit"), True), ("mimikatz", "function=0x40105D", capa.features.String("ACR > "), True), ("mimikatz", "function=0x40105D", capa.features.String("nope"), False), + # insn/regex, issue #262 + ("pma16-01", "function=0x4021B0", capa.features.Regex("HTTP/1.0"), True), + ("pma16-01", "function=0x4021B0", capa.features.Regex("www.practicalmalwareanalysis.com"), False), # insn/string, pointer to string ("mimikatz", "function=0x44EDEF", capa.features.String("INPUTEVENT"), True), # insn/bytes From 322d2ad549f41444f53844bd1123a6a3521951ac Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 31 Aug 2020 15:51:49 -0600 Subject: [PATCH 02/11] tests: main: add tests for #262 --- tests/fixtures.py | 5 +++++ tests/test_main.py | 11 +++++++++++ 2 files changed, 16 insertions(+) diff --git a/tests/fixtures.py b/tests/fixtures.py index ef8f8bfe..086bb4d6 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -483,6 +483,11 @@ def pma12_04_extractor(): return get_extractor(get_data_path_by_name("pma12-04")) +@pytest.fixture +def pma16_01_extractor(): + return get_extractor(get_data_path_by_name("pma16-01")) + + @pytest.fixture def bfb9b_extractor(): return get_extractor(get_data_path_by_name("bfb9b...")) diff --git a/tests/test_main.py b/tests/test_main.py index 4c92c7f0..6b8eaf57 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -309,3 +309,14 @@ def test_count_bb(z9324d_extractor): ) capabilities, meta = capa.main.find_capabilities(rules, z9324d_extractor) assert "count bb" in capabilities + + +@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2") +def test_fix262(pma16_01_extractor, capsys): + # tests rules can be loaded successfully and all output modes + path = pma16_01_extractor.path + assert capa.main.main([path, "-vv", "-t", "send HTTP request", "-q"]) == 0 + + std = capsys.readouterr() + assert "HTTP/1.0" in std.out + assert "www.practicalmalwareanalysis.com" not in std.out From 076a47de1ca52a8a65151b052668cf3f35d8131b Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 31 Aug 2020 16:15:33 -0600 Subject: [PATCH 03/11] features: fix matching of a regex multiple times --- capa/features/__init__.py | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/capa/features/__init__.py b/capa/features/__init__.py index 5d59e4da..24178e11 100644 --- a/capa/features/__init__.py +++ b/capa/features/__init__.py @@ -139,7 +139,6 @@ class Regex(String): raise ValueError( "invalid regular expression: %s it should use Python syntax, try it at https://pythex.org" % value ) - self.match = None def evaluate(self, ctx): for feature, locations in ctx.items(): @@ -151,10 +150,37 @@ class Regex(String): # using this mode cleans is more convenient for rule authors, # so that they don't have to prefix/suffix their terms like: /.*foo.*/. if self.re.search(feature.value): - self.match = feature.value - return capa.engine.Result(True, self, [], locations=locations) + # unlike other features, we cannot return put a reference to `self` directly in a `Result`. + # this is because `self` may match on many strings, so we can't stuff the matched into it. + # instead, return a new instance that has a reference to the regex and the matched value. + # see #262. + return capa.engine.Result(True, _MatchedRegex(self, feature.value), [], locations=locations) - return capa.engine.Result(False, self, []) + return capa.engine.Result(False, _MatchedRegex(self, None), []) + + def __str__(self): + return "regex(string =~ %s)" % self.value + + +class _MatchedRegex(Regex): + """ + this represents a specific instances of a regular expression feature match. + treat it the same as a `Regex` except you also have the `match` field. + this should only ever be constructed by `Regex.evaluate()`. + """ + + def __init__(self, regex, match): + """ + args: + regex (Regex): the regex feature that matches + match (string|None): the matching string or None if it doesn't match + """ + super(_MatchedRegex, self).__init__(regex.value, description=regex.description) + # we want this to collide with the name of `Regex` above, + # so that it works nicely with the renderers. + self.name = "regex" + # this may be None if the regex doesn't match + self.match = match def __str__(self): return 'regex(string =~ %s, matched = "%s")' % (self.value, self.match) From 4656275ee00d2bdef1ec36f194408e3db28d7bff Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 31 Aug 2020 16:20:30 -0600 Subject: [PATCH 04/11] features: documentation wording --- capa/features/__init__.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/capa/features/__init__.py b/capa/features/__init__.py index 24178e11..0a9a59c7 100644 --- a/capa/features/__init__.py +++ b/capa/features/__init__.py @@ -151,8 +151,8 @@ class Regex(String): # so that they don't have to prefix/suffix their terms like: /.*foo.*/. if self.re.search(feature.value): # unlike other features, we cannot return put a reference to `self` directly in a `Result`. - # this is because `self` may match on many strings, so we can't stuff the matched into it. - # instead, return a new instance that has a reference to the regex and the matched value. + # this is because `self` may match on many strings, so we can't stuff the matched value into it. + # instead, return a new instance that has a reference to both the regex and the matched value. # see #262. return capa.engine.Result(True, _MatchedRegex(self, feature.value), [], locations=locations) @@ -164,9 +164,10 @@ class Regex(String): class _MatchedRegex(Regex): """ - this represents a specific instances of a regular expression feature match. - treat it the same as a `Regex` except you also have the `match` field. - this should only ever be constructed by `Regex.evaluate()`. + this represents a specific instance of a regular expression feature match. + treat it the same as a `Regex` except it has the `match` field that contains the complete string that matched. + + note: this type should only ever be constructed by `Regex.evaluate()`. it is not part of the public API. """ def __init__(self, regex, match): From 0734edf6f0449a0fbbea8c3a2cc73449440bf25a Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 31 Aug 2020 16:34:10 -0600 Subject: [PATCH 05/11] tests: fmt: add test for #263 --- tests/test_fmt.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/test_fmt.py b/tests/test_fmt.py index 1ca4725a..bac79d6c 100644 --- a/tests/test_fmt.py +++ b/tests/test_fmt.py @@ -92,6 +92,8 @@ def test_rule_reformat_order(): def test_rule_reformat_meta_update(): + # test updating the rule content after parsing + rule = textwrap.dedent( """ rule: @@ -112,3 +114,23 @@ def test_rule_reformat_meta_update(): rule = capa.rules.Rule.from_yaml(rule) rule.name = "test rule" assert rule.to_yaml() == EXPECTED + + +def test_rule_reformat_string_description(): + # see #263 + src = textwrap.dedent( + """ + rule: + meta: + name: test rule + author: user@domain.com + scope: function + features: + - and: + - string: foo + description: bar + """ + ) + + rule = capa.rules.Rule.from_yaml(src) + assert rule.to_yaml() == src From 7e0ebb8c5b1da5f0e5e3451df3a41b3d5c409c36 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 31 Aug 2020 16:49:54 -0600 Subject: [PATCH 06/11] rules: fmt: fix formatting of description block closes #263 --- capa/rules.py | 14 +++++++++++++- tests/test_fmt.py | 3 ++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/capa/rules.py b/capa/rules.py index 6b9eba58..55ce2014 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -624,7 +624,19 @@ class Rule(object): continue meta[key] = value - return ostream.getvalue().decode("utf-8").rstrip("\n") + "\n" + doc = ostream.getvalue().decode("utf-8").rstrip("\n") + "\n" + # when we have something like: + # + # and: + # - string: foo + # description: bar + # + # we want the `description` horizontally aligned with the start of the `string`. + # tweaking `ruamel.indent()` doesn't quite give us the control we want. + # so, add the two extra spaces that we've determined we need through experimentation. + # see #263 + doc = doc.replace(" description:", " description:") + return doc def get_rules_with_scope(rules, scope): diff --git a/tests/test_fmt.py b/tests/test_fmt.py index bac79d6c..92bd4ffa 100644 --- a/tests/test_fmt.py +++ b/tests/test_fmt.py @@ -117,6 +117,7 @@ def test_rule_reformat_meta_update(): def test_rule_reformat_string_description(): + # the `description` should be aligned with the preceding feature name. # see #263 src = textwrap.dedent( """ @@ -130,7 +131,7 @@ def test_rule_reformat_string_description(): - string: foo description: bar """ - ) + ).lstrip() rule = capa.rules.Rule.from_yaml(src) assert rule.to_yaml() == src From 7310b0feda65962b689d2db72382d9fda5f231e4 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 31 Aug 2020 16:55:54 -0600 Subject: [PATCH 07/11] rules: documentation formatting --- capa/rules.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/capa/rules.py b/capa/rules.py index 55ce2014..81df0c80 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -631,7 +631,13 @@ class Rule(object): # - string: foo # description: bar # - # we want the `description` horizontally aligned with the start of the `string`. + # we want the `description` horizontally aligned with the start of the `string` (like above). + # however, ruamel will give us (which I don't think is even valid yaml): + # + # and: + # - string: foo + # description: bar + # # tweaking `ruamel.indent()` doesn't quite give us the control we want. # so, add the two extra spaces that we've determined we need through experimentation. # see #263 From 5f7f718fe48da18c0acc98220e888b978f5e36b1 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 31 Aug 2020 20:31:36 -0600 Subject: [PATCH 08/11] tests: add test for #276 --- tests/fixtures.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/fixtures.py b/tests/fixtures.py index 086bb4d6..8ca5437d 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -142,6 +142,8 @@ def get_data_path_by_name(name): return os.path.join(CD, "data", "bfb9b5391a13d0afd787e87ab90f14f5.dll_") elif name.startswith("c9188"): return os.path.join(CD, "data", "c91887d861d9bd4a5872249b641bc9f9.exe_") + elif name.startswith("64d9f"): + return os.path.join(CD, "data", "64d9f7d96b99467f36e22fada623c3bb.dll_") else: raise ValueError("unexpected sample fixture") @@ -176,6 +178,8 @@ def get_sample_md5_by_name(name): return "bfb9b5391a13d0afd787e87ab90f14f5" elif name.startswith("c9188"): return "c91887d861d9bd4a5872249b641bc9f9" + elif name.startswith("64d9f"): + return "64d9f7d96b99467f36e22fada623c3bb" else: raise ValueError("unexpected sample fixture") @@ -317,6 +321,8 @@ FEATURE_PRESENCE_TESTS = [ ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0), True), ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x4), True), ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0xC), True), + # insn/offset, issue #276 + ("64d9f", "function=0x10001510,bb=0x100015B0", capa.features.insn.Offset(0x4000), True), # insn/offset: stack references ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x8), False), ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x10), False), From d27200687347a49e9fd5121ba0136b818ea4dd69 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 31 Aug 2020 20:41:45 -0600 Subject: [PATCH 09/11] features: insn: viv: extract offset from SibOper operands closes #276 --- capa/features/extractors/viv/insn.py | 39 ++++++++++++++++++---------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 0e091ffa..7375bc37 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -318,25 +318,38 @@ def extract_insn_offset_features(f, bb, insn): # # .text:0040112F cmp [esi+4], ebx for oper in insn.opers: + # this is for both x32 and x64 - if not isinstance(oper, envi.archs.i386.disasm.i386RegMemOper): - continue + # like [esi + 4] + # reg ^ + # disp + if isinstance(oper, envi.archs.i386.disasm.i386RegMemOper): + if oper.reg == envi.archs.i386.disasm.REG_ESP: + continue - if oper.reg == envi.archs.i386.disasm.REG_ESP: - continue + if oper.reg == envi.archs.i386.disasm.REG_EBP: + continue - if oper.reg == envi.archs.i386.disasm.REG_EBP: - continue + # TODO: do x64 support for real. + if oper.reg == envi.archs.amd64.disasm.REG_RBP: + continue - # TODO: do x64 support for real. - if oper.reg == envi.archs.amd64.disasm.REG_RBP: - continue + # viv already decodes offsets as signed + v = oper.disp - # viv already decodes offsets as signed - v = oper.disp + yield Offset(v), insn.va + yield Offset(v, arch=get_arch(f.vw)), insn.va - yield Offset(v), insn.va - yield Offset(v, arch=get_arch(f.vw)), insn.va + # like: [esi + ecx + 16384] + # reg ^ ^ + # index ^ + # disp + elif isinstance(oper, envi.archs.i386.disasm.i386SibOper): + # viv already decodes offsets as signed + v = oper.disp + + yield Offset(v), insn.va + yield Offset(v, arch=get_arch(f.vw)), insn.va def is_security_cookie(f, bb, insn): From 08099f93a1bc83c3956ac61342aa55c79ce025cc Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Tue, 1 Sep 2020 16:56:04 +0000 Subject: [PATCH 10/11] Sync capa-testfiles submodule --- tests/data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data b/tests/data index e6f6ca89..c3a35d4b 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit e6f6ca898d323c38040af9b012533cca04c46d88 +Subproject commit c3a35d4b6430ed61ffef59d672a2a8b6061e23fe From b386933a04ace36afc9607b1c03f051643386024 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Tue, 1 Sep 2020 18:13:40 +0000 Subject: [PATCH 11/11] Sync capa rules submodule --- README.md | 2 +- rules | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 75dd0f1a..ea373375 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ ![capa](.github/logo.png) [![CI status](https://github.com/fireeye/capa/workflows/CI/badge.svg)](https://github.com/fireeye/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster) -[![Number of rules](https://img.shields.io/badge/rules-342-blue.svg)](https://github.com/fireeye/capa-rules) +[![Number of rules](https://img.shields.io/badge/rules-343-blue.svg)](https://github.com/fireeye/capa-rules) [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt) capa detects capabilities in executable files. diff --git a/rules b/rules index 1549f6f8..d5467445 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 1549f6f885710580efe5c17cbaeeb2f8877c39d5 +Subproject commit d54674456840cfa558624efb17a0100576deb269