rules: add substring feature

closes #737
This commit is contained in:
William Ballenthin
2021-08-24 11:35:01 -06:00
parent 7d2e550b84
commit 6989e8b8cf
8 changed files with 149 additions and 3 deletions

View File

@@ -135,6 +135,77 @@ class String(Feature):
super(String, self).__init__(value, description=description) super(String, self).__init__(value, description=description)
class Substring(String):
def __init__(self, value: str, description=None):
super(Substring, self).__init__(value, description=description)
self.value = value
def evaluate(self, ctx):
# mapping from string value to list of locations.
# will unique the locations later on.
matches = collections.defaultdict(list)
for feature, locations in ctx.items():
if not isinstance(feature, (String,)):
continue
if not isinstance(feature.value, str):
# this is a programming error: String should only contain str
raise ValueError("unexpected feature value type")
if self.value in feature.value:
matches[feature.value].extend(locations)
if matches:
# finalize: defaultdict -> dict
# which makes json serialization easier
matches = dict(matches)
# collect all locations
locations = set()
for s in matches.keys():
matches[s] = list(set(matches[s]))
locations.update(matches[s])
# unlike other features, we cannot return put a reference to `self` directly in a `Result`.
# this is because `self` may match on many strings, so we can't stuff the matched value into it.
# instead, return a new instance that has a reference to both the substring and the matched values.
return capa.engine.Result(True, _MatchedSubstring(self, matches), [], locations=locations)
else:
return capa.engine.Result(False, _MatchedSubstring(self, None), [])
def __str__(self):
return "substring(%s)" % self.value
class _MatchedSubstring(Substring):
"""
this represents specific match instances of a substring feature.
treat it the same as a `Substring` except it has the `matches` field that contains the complete strings that matched.
note: this type should only ever be constructed by `Substring.evaluate()`. it is not part of the public API.
"""
def __init__(self, substring: Substring, matches):
"""
args:
substring (Substring): the substring feature that matches.
match (Dict[string, List[int]]|None): mapping from matching string to its locations.
"""
super(_MatchedSubstring, self).__init__(str(substring.value), description=substring.description)
# we want this to collide with the name of `Substring` above,
# so that it works nicely with the renderers.
self.name = "substring"
# this may be None if the substring doesn't match
self.matches = matches
def __str__(self):
return 'substring("%s", matches = %s)' % (
self.value,
", ".join(map(lambda s: '"' + s + '"', (self.matches or {}).keys())),
)
class Regex(String): class Regex(String):
def __init__(self, value: str, description=None): def __init__(self, value: str, description=None):
super(Regex, self).__init__(value, description=description) super(Regex, self).__init__(value, description=description)

View File

@@ -562,7 +562,7 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
parent, display, source=doc["rules"].get(feature[feature["type"]], {}).get("source", "") parent, display, source=doc["rules"].get(feature[feature["type"]], {}).get("source", "")
) )
if feature["type"] == "regex": if feature["type"] in ("regex", "substring"):
for s, locations in feature["matches"].items(): for s, locations in feature["matches"].items():
if location in locations: if location in locations:
return CapaExplorerStringViewItem( return CapaExplorerStringViewItem(

View File

@@ -74,7 +74,7 @@ def convert_feature_to_result_document(feature):
result = {"type": feature.name, feature.name: feature.get_value_str()} result = {"type": feature.name, feature.name: feature.get_value_str()}
if feature.description: if feature.description:
result["description"] = feature.description result["description"] = feature.description
if feature.name == "regex": if feature.name in ("regex", "substring"):
result["matches"] = feature.matches result["matches"] = feature.matches
return result return result

View File

@@ -97,7 +97,7 @@ def render_feature(ostream, match, feature, indent=0):
key = feature["type"] key = feature["type"]
value = feature[feature["type"]] value = feature[feature["type"]]
if key != "regex": if key not in ("regex", "substring"):
# like: # like:
# number: 10 = SOME_CONSTANT @ 0x401000 # number: 10 = SOME_CONSTANT @ 0x401000
if key == "string": if key == "string":

View File

@@ -219,6 +219,8 @@ def parse_feature(key: str):
return capa.features.insn.API return capa.features.insn.API
elif key == "string": elif key == "string":
return capa.features.common.StringFactory return capa.features.common.StringFactory
elif key == "substring":
return capa.features.common.Substring
elif key == "bytes": elif key == "bytes":
return capa.features.common.Bytes return capa.features.common.Bytes
elif key == "number": elif key == "number":

View File

@@ -467,6 +467,7 @@ FEATURE_PRESENCE_TESTS = sorted(
# insn/regex, issue #262 # insn/regex, issue #262
("pma16-01", "function=0x4021B0", capa.features.common.Regex("HTTP/1.0"), True), ("pma16-01", "function=0x4021B0", capa.features.common.Regex("HTTP/1.0"), True),
("pma16-01", "function=0x4021B0", capa.features.common.Regex("www.practicalmalwareanalysis.com"), False), ("pma16-01", "function=0x4021B0", capa.features.common.Regex("www.practicalmalwareanalysis.com"), False),
("pma16-01", "function=0x4021B0", capa.features.common.Substring("practicalmalwareanalysis.com"), False),
# insn/string, pointer to string # insn/string, pointer to string
("mimikatz", "function=0x44EDEF", capa.features.common.String("INPUTEVENT"), True), ("mimikatz", "function=0x44EDEF", capa.features.common.String("INPUTEVENT"), True),
# insn/string, direct memory reference # insn/string, direct memory reference

View File

@@ -284,6 +284,57 @@ def test_match_matched_rules():
assert capa.features.common.MatchedRule("test rule2") in features assert capa.features.common.MatchedRule("test rule2") in features
def test_substring():
rules = [
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- and:
- substring: abc
"""
)
),
]
features, matches = capa.engine.match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("aaaa"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") not in features
features, matches = capa.engine.match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("abc"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
features, matches = capa.engine.match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("111abc222"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
features, matches = capa.engine.match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("111abc"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
features, matches = capa.engine.match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("abc222"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
def test_regex(): def test_regex():
rules = [ rules = [
capa.rules.Rule.from_yaml( capa.rules.Rule.from_yaml(

View File

@@ -28,6 +28,7 @@ from capa.features.common import (
Arch, Arch,
Format, Format,
String, String,
Substring,
) )
@@ -747,6 +748,26 @@ def test_string_values_special_characters():
assert (String("bye\nbye") in children) == True assert (String("bye\nbye") in children) == True
def test_substring_feature():
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- or:
- substring: abc
- substring: "def"
- substring: "gh\\ni"
"""
)
r = capa.rules.Rule.from_yaml(rule)
children = list(r.statement.get_children())
assert (Substring("abc") in children) == True
assert (Substring("def") in children) == True
assert (Substring("gh\ni") in children) == True
def test_regex_values_always_string(): def test_regex_values_always_string():
rules = [ rules = [
capa.rules.Rule.from_yaml( capa.rules.Rule.from_yaml(