update pydantic models to guarantee type coercion (#1176)

* add CompoundStatement to fix Pydantic typing bug

* explorer: fix #1151

* explorer: support rendering operand number/offset
This commit is contained in:
Mike Hunhoff
2022-09-20 08:38:19 -06:00
committed by GitHub
parent 8521f85742
commit e1735f0a5e
7 changed files with 240 additions and 50 deletions

View File

@@ -20,6 +20,7 @@
### Bug Fixes
- render: convert feature attributes to aliased dictionary for vverbose #1152 @mike-hunhoff
- decouple Token dependency / extractor and features #1139 @mr-tz
- update pydantic model to guarantee type coercion #1176 @mike-hunhoff
- do not overwrite version in version.py during PyInstaller build #1169 @mr-tz
### capa explorer IDA Pro plugin

View File

@@ -145,10 +145,13 @@ class BasicBlockFeature(HashableModel):
versus right at its starting address.
"""
basic_block: Address
basic_block: Address = Field(alias="basic block")
address: Address
feature: Feature
class Config:
allow_population_by_field_name = True
class InstructionFeature(HashableModel):
"""
@@ -179,7 +182,7 @@ class BasicBlockFeatures(BaseModel):
class FunctionFeatures(BaseModel):
address: Address
features: Tuple[FunctionFeature, ...]
basic_blocks: Tuple[BasicBlockFeatures, ...] = Field(alias="basic block")
basic_blocks: Tuple[BasicBlockFeatures, ...] = Field(alias="basic blocks")
class Config:
allow_population_by_field_name = True

View File

@@ -340,7 +340,6 @@ Feature = Union[
MnemonicFeature,
OperandNumberFeature,
OperandOffsetFeature,
# this has to go last because...? pydantic fails to serialize correctly otherwise.
# possibly because this feature has no associated value?
# Note! this must be last, see #1161
BasicBlockFeature,
]

View File

@@ -365,12 +365,13 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
@param doc: result doc
"""
if isinstance(statement, (rd.AndStatement, rd.OrStatement, rd.OptionalStatement)):
if isinstance(statement, rd.CompoundStatement):
if statement.type != rd.CompoundStatementType.NOT:
display = statement.type
if statement.description:
display += " (%s)" % statement.description
return CapaExplorerDefaultItem(parent, display)
elif isinstance(statement, rd.NotStatement):
elif isinstance(statement, rd.CompoundStatement) and statement.type == rd.CompoundStatementType.NOT:
# TODO: do we display 'not'
pass
elif isinstance(statement, rd.SomeStatement):
@@ -424,7 +425,7 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
return
# optional statement with no successful children is empty
if isinstance(match.node, rd.StatementNode) and isinstance(match.node.statement, rd.OptionalStatement):
if isinstance(match.node, rd.StatementNode) and match.node.statement.type == rd.CompoundStatementType.OPTIONAL:
if not any(map(lambda m: m.success, match.children)):
return
@@ -524,7 +525,7 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
@param feature: capa feature read from doc
"""
key = feature.type
value = getattr(feature, feature.type)
value = feature.dict(by_alias=True).get(feature.type)
if value:
if isinstance(feature, frzf.StringFeature):
@@ -638,6 +639,8 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
frzf.MnemonicFeature,
frzf.NumberFeature,
frzf.OffsetFeature,
frzf.OperandNumberFeature,
frzf.OperandOffsetFeature,
),
):
# display instruction preview

View File

@@ -124,22 +124,19 @@ class Metadata(FrozenModel):
)
class CompoundStatementType:
AND = "and"
OR = "or"
NOT = "not"
OPTIONAL = "optional"
class StatementModel(FrozenModel):
...
class AndStatement(StatementModel):
type = "and"
description: Optional[str]
class OrStatement(StatementModel):
type = "or"
description: Optional[str]
class NotStatement(StatementModel):
type = "not"
class CompoundStatement(StatementModel):
type: str
description: Optional[str]
@@ -149,11 +146,6 @@ class SomeStatement(StatementModel):
count: int
class OptionalStatement(StatementModel):
type = "optional"
description: Optional[str]
class RangeStatement(StatementModel):
type = "range"
description: Optional[str]
@@ -165,17 +157,15 @@ class RangeStatement(StatementModel):
class SubscopeStatement(StatementModel):
type = "subscope"
description: Optional[str]
scope = capa.rules.Scope
scope: capa.rules.Scope
Statement = Union[
OptionalStatement,
AndStatement,
OrStatement,
NotStatement,
SomeStatement,
# Note! order matters, see #1161
RangeStatement,
SomeStatement,
SubscopeStatement,
CompoundStatement,
]
@@ -185,18 +175,12 @@ class StatementNode(FrozenModel):
def statement_from_capa(node: capa.engine.Statement) -> Statement:
if isinstance(node, capa.engine.And):
return AndStatement(description=node.description)
elif isinstance(node, capa.engine.Or):
return OrStatement(description=node.description)
elif isinstance(node, capa.engine.Not):
return NotStatement(description=node.description)
if isinstance(node, (capa.engine.And, capa.engine.Or, capa.engine.Not)):
return CompoundStatement(type=node.__class__.__name__.lower(), description=node.description)
elif isinstance(node, capa.engine.Some):
if node.count == 0:
return OptionalStatement(description=node.description)
return CompoundStatement(type=CompoundStatementType.OPTIONAL, description=node.description)
else:
return SomeStatement(

View File

@@ -64,7 +64,7 @@ def render_statement(ostream, match: rd.Match, statement: rd.Statement, indent=0
ostream.write(" = %s" % statement.description)
ostream.writeln("")
elif isinstance(statement, (rd.AndStatement, rd.OrStatement, rd.OptionalStatement, rd.NotStatement)):
elif isinstance(statement, (rd.CompoundStatement)):
# emit `and:` `or:` `optional:` `not:`
ostream.write(statement.type)
@@ -87,7 +87,7 @@ def render_statement(ostream, match: rd.Match, statement: rd.Statement, indent=0
# so, we have to inline some of the feature rendering here.
child = statement.child
value = getattr(child, child.type)
value = child.dict(by_alias=True).get(child.type)
if value:
if isinstance(child, frzf.StringFeature):
@@ -211,12 +211,12 @@ def render_match(ostream, match: rd.Match, indent=0, mode=MODE_SUCCESS):
return
# optional statement with no successful children is empty
if isinstance(match.node, rd.StatementNode) and isinstance(match.node.statement, rd.OptionalStatement):
if isinstance(match.node, rd.StatementNode) and match.node.statement.type == rd.CompoundStatementType.OPTIONAL:
if not any(map(lambda m: m.success, match.children)):
return
# not statement, so invert the child mode to show failed evaluations
if isinstance(match.node, rd.StatementNode) and isinstance(match.node.statement, rd.NotStatement):
if isinstance(match.node, rd.StatementNode) and match.node.statement.type == rd.CompoundStatementType.NOT:
child_mode = MODE_FAILURE
elif mode == MODE_FAILURE:
@@ -225,12 +225,12 @@ def render_match(ostream, match: rd.Match, indent=0, mode=MODE_SUCCESS):
return
# optional statement with successful children is not relevant
if isinstance(match.node, rd.StatementNode) and isinstance(match.node.statement, rd.OptionalStatement):
if isinstance(match.node, rd.StatementNode) and match.node.statement.type == rd.CompoundStatementType.OPTIONAL:
if any(map(lambda m: m.success, match.children)):
return
# not statement, so invert the child mode to show successful evaluations
if isinstance(match.node, rd.StatementNode) and isinstance(match.node.statement, rd.NotStatement):
if isinstance(match.node, rd.StatementNode) and match.node.statement.type == rd.CompoundStatementType.NOT:
child_mode = MODE_SUCCESS
else:
raise RuntimeError("unexpected mode: " + mode)

View File

@@ -0,0 +1,200 @@
# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import capa
import capa.engine as ceng
import capa.render.result_document as rdoc
import capa.features.freeze.features as frzf
def test_optional_node_from_capa():
node = rdoc.node_from_capa(
ceng.Some(
0,
[],
)
)
assert isinstance(node.statement, rdoc.CompoundStatement)
assert node.statement.type == rdoc.CompoundStatementType.OPTIONAL
def test_some_node_from_capa():
node = rdoc.node_from_capa(
ceng.Some(
1,
[
capa.features.insn.Number(0),
],
)
)
assert isinstance(node.statement, rdoc.SomeStatement)
def test_range_node_from_capa():
node = rdoc.node_from_capa(
ceng.Range(
capa.features.insn.Number(0),
)
)
assert isinstance(node.statement, rdoc.RangeStatement)
def test_subscope_node_from_capa():
node = rdoc.node_from_capa(
ceng.Subscope(
capa.rules.Scope.BASIC_BLOCK,
capa.features.insn.Number(0),
)
)
assert isinstance(node.statement, rdoc.SubscopeStatement)
def test_and_node_from_capa():
node = rdoc.node_from_capa(
ceng.And(
[
capa.features.insn.Number(0),
],
)
)
assert isinstance(node.statement, rdoc.CompoundStatement)
assert node.statement.type == rdoc.CompoundStatementType.AND
def test_or_node_from_capa():
node = rdoc.node_from_capa(
ceng.Or(
[
capa.features.insn.Number(0),
],
)
)
assert isinstance(node.statement, rdoc.CompoundStatement)
assert node.statement.type == rdoc.CompoundStatementType.OR
def test_not_node_from_capa():
node = rdoc.node_from_capa(
ceng.Not(
[
capa.features.insn.Number(0),
],
)
)
assert isinstance(node.statement, rdoc.CompoundStatement)
assert node.statement.type == rdoc.CompoundStatementType.NOT
def test_os_node_from_capa():
node = rdoc.node_from_capa(capa.features.common.OS(""))
assert isinstance(node.feature, frzf.OSFeature)
def test_arch_node_from_capa():
node = rdoc.node_from_capa(capa.features.common.Arch(""))
assert isinstance(node.feature, frzf.ArchFeature)
def test_format_node_from_capa():
node = rdoc.node_from_capa(capa.features.common.Format(""))
assert isinstance(node.feature, frzf.FormatFeature)
def test_match_node_from_capa():
node = rdoc.node_from_capa(capa.features.common.MatchedRule(""))
assert isinstance(node.feature, frzf.MatchFeature)
def test_characteristic_node_from_capa():
node = rdoc.node_from_capa(capa.features.common.Characteristic(""))
assert isinstance(node.feature, frzf.CharacteristicFeature)
def test_substring_node_from_capa():
node = rdoc.node_from_capa(capa.features.common.Substring(""))
assert isinstance(node.feature, frzf.SubstringFeature)
def test_regex_node_from_capa():
node = rdoc.node_from_capa(capa.features.common.Regex(""))
assert isinstance(node.feature, frzf.RegexFeature)
def test_class_node_from_capa():
node = rdoc.node_from_capa(capa.features.common.Class(""))
assert isinstance(node.feature, frzf.ClassFeature)
def test_namespace_node_from_capa():
node = rdoc.node_from_capa(capa.features.common.Namespace(""))
assert isinstance(node.feature, frzf.NamespaceFeature)
def test_bytes_node_from_capa():
node = rdoc.node_from_capa(capa.features.common.Bytes(b""))
assert isinstance(node.feature, frzf.BytesFeature)
def test_export_node_from_capa():
node = rdoc.node_from_capa(capa.features.file.Export(""))
assert isinstance(node.feature, frzf.ExportFeature)
def test_import_node_from_capa():
node = rdoc.node_from_capa(capa.features.file.Import(""))
assert isinstance(node.feature, frzf.ImportFeature)
def test_section_node_from_capa():
node = rdoc.node_from_capa(capa.features.file.Section(""))
assert isinstance(node.feature, frzf.SectionFeature)
def test_function_name_node_from_capa():
node = rdoc.node_from_capa(capa.features.file.FunctionName(""))
assert isinstance(node.feature, frzf.FunctionNameFeature)
def test_api_node_from_capa():
node = rdoc.node_from_capa(capa.features.insn.API(""))
assert isinstance(node.feature, frzf.APIFeature)
def test_property_node_from_capa():
node = rdoc.node_from_capa(capa.features.insn.Property(""))
assert isinstance(node.feature, frzf.PropertyFeature)
def test_number_node_from_capa():
node = rdoc.node_from_capa(capa.features.insn.Number(0))
assert isinstance(node.feature, frzf.NumberFeature)
def test_offset_node_from_capa():
node = rdoc.node_from_capa(capa.features.insn.Offset(0))
assert isinstance(node.feature, frzf.OffsetFeature)
def test_mnemonic_node_from_capa():
node = rdoc.node_from_capa(capa.features.insn.Mnemonic(""))
assert isinstance(node.feature, frzf.MnemonicFeature)
def test_operand_number_node_from_capa():
node = rdoc.node_from_capa(capa.features.insn.OperandNumber(0, 0))
assert isinstance(node.feature, frzf.OperandNumberFeature)
def test_operand_offset_node_from_capa():
node = rdoc.node_from_capa(capa.features.insn.OperandOffset(0, 0))
assert isinstance(node.feature, frzf.OperandOffsetFeature)
def test_basic_block_node_from_capa():
node = rdoc.node_from_capa(capa.features.basicblock.BasicBlock(""))
assert isinstance(node.feature, frzf.BasicBlockFeature)