diff --git a/capa/features/__init__.py b/capa/features/__init__.py index 5d59e4da..0a9a59c7 100644 --- a/capa/features/__init__.py +++ b/capa/features/__init__.py @@ -139,7 +139,6 @@ class Regex(String): raise ValueError( "invalid regular expression: %s it should use Python syntax, try it at https://pythex.org" % value ) - self.match = None def evaluate(self, ctx): for feature, locations in ctx.items(): @@ -151,10 +150,38 @@ class Regex(String): # using this mode cleans is more convenient for rule authors, # so that they don't have to prefix/suffix their terms like: /.*foo.*/. if self.re.search(feature.value): - self.match = feature.value - return capa.engine.Result(True, self, [], locations=locations) + # unlike other features, we cannot return put a reference to `self` directly in a `Result`. + # this is because `self` may match on many strings, so we can't stuff the matched value into it. + # instead, return a new instance that has a reference to both the regex and the matched value. + # see #262. + return capa.engine.Result(True, _MatchedRegex(self, feature.value), [], locations=locations) - return capa.engine.Result(False, self, []) + return capa.engine.Result(False, _MatchedRegex(self, None), []) + + def __str__(self): + return "regex(string =~ %s)" % self.value + + +class _MatchedRegex(Regex): + """ + this represents a specific instance of a regular expression feature match. + treat it the same as a `Regex` except it has the `match` field that contains the complete string that matched. + + note: this type should only ever be constructed by `Regex.evaluate()`. it is not part of the public API. + """ + + def __init__(self, regex, match): + """ + args: + regex (Regex): the regex feature that matches + match (string|None): the matching string or None if it doesn't match + """ + super(_MatchedRegex, self).__init__(regex.value, description=regex.description) + # we want this to collide with the name of `Regex` above, + # so that it works nicely with the renderers. + self.name = "regex" + # this may be None if the regex doesn't match + self.match = match def __str__(self): return 'regex(string =~ %s, matched = "%s")' % (self.value, self.match) diff --git a/tests/fixtures.py b/tests/fixtures.py index f5eb6e53..086bb4d6 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -122,6 +122,8 @@ def get_data_path_by_name(name): return os.path.join(CD, "data", "kernel32-64.dll_") elif name == "pma12-04": return os.path.join(CD, "data", "Practical Malware Analysis Lab 12-04.exe_") + elif name == "pma16-01": + return os.path.join(CD, "data", "Practical Malware Analysis Lab 16-01.exe_") elif name == "pma21-01": return os.path.join(CD, "data", "Practical Malware Analysis Lab 21-01.exe_") elif name == "al-khaser x86": @@ -154,6 +156,8 @@ def get_sample_md5_by_name(name): return "a8565440629ac87f6fef7d588fe3ff0f" elif name == "pma12-04": return "56bed8249e7c2982a90e54e1e55391a2" + elif name == "pma16-01": + return "7faafc7e4a5c736ebfee6abbbc812d80" elif name == "pma21-01": return "c8403fb05244e23a7931c766409b5e22" elif name == "al-khaser x86": @@ -365,6 +369,9 @@ FEATURE_PRESENCE_TESTS = [ ("mimikatz", "function=0x40105D", capa.features.String("SCardTransmit"), True), ("mimikatz", "function=0x40105D", capa.features.String("ACR > "), True), ("mimikatz", "function=0x40105D", capa.features.String("nope"), False), + # insn/regex, issue #262 + ("pma16-01", "function=0x4021B0", capa.features.Regex("HTTP/1.0"), True), + ("pma16-01", "function=0x4021B0", capa.features.Regex("www.practicalmalwareanalysis.com"), False), # insn/string, pointer to string ("mimikatz", "function=0x44EDEF", capa.features.String("INPUTEVENT"), True), # insn/bytes @@ -476,6 +483,11 @@ def pma12_04_extractor(): return get_extractor(get_data_path_by_name("pma12-04")) +@pytest.fixture +def pma16_01_extractor(): + return get_extractor(get_data_path_by_name("pma16-01")) + + @pytest.fixture def bfb9b_extractor(): return get_extractor(get_data_path_by_name("bfb9b...")) diff --git a/tests/test_main.py b/tests/test_main.py index 4c92c7f0..6b8eaf57 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -309,3 +309,14 @@ def test_count_bb(z9324d_extractor): ) capabilities, meta = capa.main.find_capabilities(rules, z9324d_extractor) assert "count bb" in capabilities + + +@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2") +def test_fix262(pma16_01_extractor, capsys): + # tests rules can be loaded successfully and all output modes + path = pma16_01_extractor.path + assert capa.main.main([path, "-vv", "-t", "send HTTP request", "-q"]) == 0 + + std = capsys.readouterr() + assert "HTTP/1.0" in std.out + assert "www.practicalmalwareanalysis.com" not in std.out