From 3dc3e083a91afe8539f8876890cee614cb9ea6bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ana=20Mar=C3=ADa=20Mart=C3=ADnez=20G=C3=B3mez?= Date: Mon, 6 Jul 2020 07:21:42 +0200 Subject: [PATCH] Remove args from Features `args` and `value` are a duplication for most of the features: `args = [value]` get rid of `args` and introduce `value` for `Feature` (the main class instead of the subclasses). This makes the code simpler. Refactoring the `freeze_serialize` function to not use an array would simplify the code a bit more, but that needs a bit more of work. --- capa/features/__init__.py | 38 +++++++++++++++++-------------------- capa/features/basicblock.py | 9 ++++++++- capa/features/file.py | 9 +++------ capa/features/insn.py | 15 ++++++--------- capa/render/__init__.py | 2 +- capa/rules.py | 8 ++++---- rules | 2 +- 7 files changed, 40 insertions(+), 43 deletions(-) diff --git a/capa/features/__init__.py b/capa/features/__init__.py index c37ad4bd..860ca8cc 100644 --- a/capa/features/__init__.py +++ b/capa/features/__init__.py @@ -21,28 +21,28 @@ def hex_string(h): class Feature(object): - def __init__(self, args, description=None): + def __init__(self, value, description=None): super(Feature, self).__init__() self.name = self.__class__.__name__.lower() - self.args = args + self.value = value self.description = description def __hash__(self): - return hash((self.name, tuple(self.args))) + return hash((self.name, self.value)) def __eq__(self, other): - return self.name == other.name and self.args == other.args + return self.name == other.name and self.value == other.value - # Used to overwrite the rendering of the feature args in `__str__` and the + # Used to overwrite the rendering of the feature value in `__str__` and the # json output - def get_args_str(self): - return ",".join(self.args) + def get_value_str(self): + return self.value def __str__(self): if self.description: - return "%s(%s = %s)" % (self.name, self.get_args_str(), self.description) + return "%s(%s = %s)" % (self.name, self.get_value_str(), self.description) else: - return "%s(%s)" % (self.name, self.get_args_str()) + return "%s(%s)" % (self.name, self.get_value_str()) def __repr__(self): return str(self) @@ -54,7 +54,7 @@ class Feature(object): return self.__dict__ def freeze_serialize(self): - return (self.__class__.__name__, self.args) + return (self.__class__.__name__, [self.value]) @classmethod def freeze_deserialize(cls, args): @@ -62,16 +62,14 @@ class Feature(object): class MatchedRule(Feature): - def __init__(self, rule_name, description=None): - super(MatchedRule, self).__init__([rule_name], description) + def __init__(self, value, description=None): + super(MatchedRule, self).__init__(value, description) self.name = "match" - self.rule_name = rule_name class Characteristic(Feature): def __init__(self, value, description=None): - super(Characteristic, self).__init__([value], description) - self.value = value + super(Characteristic, self).__init__(value, description) def freeze_serialize(self): # in an older version of capa, characteristics could theoretically match non-existence (value=False). @@ -89,14 +87,12 @@ class Characteristic(Feature): class String(Feature): def __init__(self, value, description=None): - super(String, self).__init__([value], description) - self.value = value + super(String, self).__init__(value, description) class Bytes(Feature): def __init__(self, value, description=None): - super(Bytes, self).__init__([value], description) - self.value = value + super(Bytes, self).__init__(value, description) def evaluate(self, ctx): for feature, locations in ctx.items(): @@ -108,11 +104,11 @@ class Bytes(Feature): return capa.engine.Result(False, self, []) - def get_args_str(self): + def get_value_str(self): return hex_string(bytes_to_str(self.value)) def freeze_serialize(self): - return (self.__class__.__name__, [bytes_to_str(x).upper() for x in self.args]) + return (self.__class__.__name__, [bytes_to_str(self.value).upper()]) @classmethod def freeze_deserialize(cls, args): diff --git a/capa/features/basicblock.py b/capa/features/basicblock.py index c1c7234c..90a96138 100644 --- a/capa/features/basicblock.py +++ b/capa/features/basicblock.py @@ -3,7 +3,14 @@ from capa.features import Feature class BasicBlock(Feature): def __init__(self): - super(BasicBlock, self).__init__([]) + super(BasicBlock, self).__init__(None) def __str__(self): return "basic block" + + def freeze_serialize(self): + return (self.__class__.__name__, []) + + @classmethod + def freeze_deserialize(cls, args): + return cls() diff --git a/capa/features/file.py b/capa/features/file.py index 396edd1f..6e1ecb3e 100644 --- a/capa/features/file.py +++ b/capa/features/file.py @@ -4,19 +4,16 @@ from capa.features import Feature class Export(Feature): def __init__(self, value, description=None): # value is export name - super(Export, self).__init__([value], description) - self.value = value + super(Export, self).__init__(value, description) class Import(Feature): def __init__(self, value, description=None): # value is import name - super(Import, self).__init__([value], description) - self.value = value + super(Import, self).__init__(value, description) class Section(Feature): def __init__(self, value, description=None): # value is section name - super(Section, self).__init__([value], description) - self.value = value + super(Section, self).__init__(value, description) diff --git a/capa/features/insn.py b/capa/features/insn.py index f6b24b43..69dd2c04 100644 --- a/capa/features/insn.py +++ b/capa/features/insn.py @@ -8,28 +8,25 @@ class API(Feature): modname, impname = name.split(".") name = modname.lower() + "." + impname - super(API, self).__init__([name], description) + super(API, self).__init__(name, description) class Number(Feature): def __init__(self, value, description=None): - super(Number, self).__init__([value], description) - self.value = value + super(Number, self).__init__(value, description) - def get_args_str(self): + def get_value_str(self): return "0x%X" % self.value class Offset(Feature): def __init__(self, value, description=None): - super(Offset, self).__init__([value], description) - self.value = value + super(Offset, self).__init__(value, description) - def get_args_str(self): + def get_value_str(self): return "0x%X" % self.value class Mnemonic(Feature): def __init__(self, value, description=None): - super(Mnemonic, self).__init__([value], description) - self.value = value + super(Mnemonic, self).__init__(value, description) diff --git a/capa/render/__init__.py b/capa/render/__init__.py index 5b625217..2d93a41a 100644 --- a/capa/render/__init__.py +++ b/capa/render/__init__.py @@ -85,7 +85,7 @@ def convert_feature_to_result_document(feature): "type": "characteristic" }, """ - result = {"type": feature.name, feature.name: feature.get_args_str()} + result = {"type": feature.name, feature.name: feature.get_value_str()} if feature.description: result["description"] = feature.description diff --git a/capa/rules.py b/capa/rules.py index 09508df2..861fddb8 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -423,7 +423,7 @@ class Rule(object): def rec(statement): if isinstance(statement, capa.features.MatchedRule): - # we're not sure at this point if the `statement.rule_name` is + # we're not sure at this point if the `statement.value` is # really a rule name or a namespace name (we use `MatchedRule` for both cases). # we'll give precedence to namespaces, and then assume if that does work, # that it must be a rule name. @@ -431,12 +431,12 @@ class Rule(object): # we don't expect any collisions between namespaces and rule names, but its possible. # most likely would be collision between top level namespace (e.g. `host-interaction`) and rule name. # but, namespaces tend to use `-` while rule names use ` `. so, unlikely, but possible. - if statement.rule_name in namespaces: + if statement.value in namespaces: # matches a namespace, so take precedence and don't even check rule names. - deps.update(map(lambda r: r.name, namespaces[statement.rule_name])) + deps.update(map(lambda r: r.name, namespaces[statement.value])) else: # not a namespace, assume its a rule name. - deps.add(statement.rule_name) + deps.add(statement.value) elif isinstance(statement, Statement): for child in statement.get_children(): diff --git a/rules b/rules index a8621978..54885300 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit a8621978cf510a53965b919d7575a1e57d8284ce +Subproject commit 548853005591996b11d6b8d1140c9e353254e9f2