diff --git a/CHANGELOG.md b/CHANGELOG.md index e5e952d9..fb38fffa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,7 @@ - loader: handle SegmentationViolation for malformed ELF files @kami922 #2799 - lint: disable rule caching during linting @Maijin #2817 - vmray: skip processes with invalid PID or missing filename @EclipseAditya #2807 +- features: fix Regex.get_value_str() returning escaped pattern instead of raw regex @EclipseAditya #1909 - render: use default styling for dynamic -vv API/call details so they are easier to see @devs6186 #1865 - address: fix TypeError when sorting locations containing mixed address types @devs6186 #2195 diff --git a/capa/features/common.py b/capa/features/common.py index 44d42cce..5bde5d35 100644 --- a/capa/features/common.py +++ b/capa/features/common.py @@ -369,6 +369,12 @@ class Regex(String): else: return Result(False, _MatchedRegex(self, {}), []) + def get_value_str(self) -> str: + # return the raw regex pattern, not the escaped version from String.get_value_str(). + # see #1909. + assert isinstance(self.value, str) + return self.value + def __str__(self): assert isinstance(self.value, str) return f"regex(string =~ {self.value})" diff --git a/tests/test_match.py b/tests/test_match.py index 9e763bbc..8fdd146d 100644 --- a/tests/test_match.py +++ b/tests/test_match.py @@ -572,6 +572,19 @@ def test_match_regex_values_always_string(): assert capa.features.common.MatchedRule("test rule") in features +@pytest.mark.parametrize( + "pattern", + [ + "/test\\.exe/", + "/hello/i", + "/foo\\\\bar/", + ], +) +def test_regex_get_value_str(pattern): + # Regex.get_value_str() must return the raw pattern without escaping, see #1909. + assert capa.features.common.Regex(pattern).get_value_str() == pattern + + @pytest.mark.xfail(reason="can't have top level NOT") def test_match_only_not(): rule = textwrap.dedent(