From 2bed3468f61ea358747836893edb5a8bb4509a23 Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Thu, 3 Aug 2023 17:21:46 +0530 Subject: [PATCH 1/6] bump pydantic to 2.1.1 --- CHANGELOG.md | 3 +- capa/features/common.py | 4 +-- capa/features/freeze/__init__.py | 29 ++++++----------- capa/features/freeze/features.py | 54 +++++++++++++++----------------- capa/ida/plugin/form.py | 2 +- capa/render/json.py | 2 +- capa/render/result_document.py | 32 +++++++------------ pyproject.toml | 2 +- scripts/bulk-process.py | 4 ++- scripts/proto-to-results.py | 2 +- tests/test_result_document.py | 6 ++-- 11 files changed, 61 insertions(+), 79 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f285d59e..d4639a58 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,8 @@ ## master (unreleased) ### New Features -- ELF: implement file import and export name extractor #1607 @Aayush-Goel-04 +- ELF: implement file import and export name extractor #1607 #1608 @Aayush-Goel-04 +- bump pydantic from 1.10.9 to 2.1.1 #1582 @Aayush-Goel-04 ### Breaking Changes diff --git a/capa/features/common.py b/capa/features/common.py index 3216c38e..9278f7e8 100644 --- a/capa/features/common.py +++ b/capa/features/common.py @@ -136,8 +136,8 @@ class Feature(abc.ABC): # noqa: B024 import capa.features.freeze.features return ( - capa.features.freeze.features.feature_from_capa(self).json() - < capa.features.freeze.features.feature_from_capa(other).json() + capa.features.freeze.features.feature_from_capa(self).model_dump_json() + < capa.features.freeze.features.feature_from_capa(other).model_dump_json() ) def get_name_str(self) -> str: diff --git a/capa/features/freeze/__init__.py b/capa/features/freeze/__init__.py index 9584990f..ebbf7d1d 100644 --- a/capa/features/freeze/__init__.py +++ b/capa/features/freeze/__init__.py @@ -14,7 +14,7 @@ import logging from enum import Enum from typing import List, Tuple, Union -from pydantic import Field, BaseModel +from pydantic import Field, BaseModel, ConfigDict import capa.helpers import capa.version @@ -31,8 +31,7 @@ logger = logging.getLogger(__name__) class HashableModel(BaseModel): - class Config: - frozen = True + model_config = ConfigDict(frozen=True) class AddressType(str, Enum): @@ -46,7 +45,7 @@ class AddressType(str, Enum): class Address(HashableModel): type: AddressType - value: Union[int, Tuple[int, int], None] + value: Union[int, Tuple[int, int], None] = None @classmethod def from_capa(cls, a: capa.features.address.Address) -> "Address": @@ -159,9 +158,7 @@ class BasicBlockFeature(HashableModel): basic_block: Address = Field(alias="basic block") address: Address feature: Feature - - class Config: - allow_population_by_field_name = True + model_config = ConfigDict(populate_by_name=True) class InstructionFeature(HashableModel): @@ -194,26 +191,20 @@ class FunctionFeatures(BaseModel): address: Address features: Tuple[FunctionFeature, ...] basic_blocks: Tuple[BasicBlockFeatures, ...] = Field(alias="basic blocks") - - class Config: - allow_population_by_field_name = True + model_config = ConfigDict(populate_by_name=True) class Features(BaseModel): global_: Tuple[GlobalFeature, ...] = Field(alias="global") file: Tuple[FileFeature, ...] functions: Tuple[FunctionFeatures, ...] - - class Config: - allow_population_by_field_name = True + model_config = ConfigDict(populate_by_name=True) class Extractor(BaseModel): name: str version: str = capa.version.__version__ - - class Config: - allow_population_by_field_name = True + model_config = ConfigDict(populate_by_name=True) class Freeze(BaseModel): @@ -221,9 +212,7 @@ class Freeze(BaseModel): base_address: Address = Field(alias="base address") extractor: Extractor features: Features - - class Config: - allow_population_by_field_name = True + model_config = ConfigDict(populate_by_name=True) def dumps(extractor: capa.features.extractors.base_extractor.FeatureExtractor) -> str: @@ -324,7 +313,7 @@ def dumps(extractor: capa.features.extractors.base_extractor.FeatureExtractor) - ) # type: ignore # Mypy is unable to recognise `base_address` as a argument due to alias - return freeze.json() + return freeze.model_dump_json() def loads(s: str) -> capa.features.extractors.base_extractor.FeatureExtractor: diff --git a/capa/features/freeze/features.py b/capa/features/freeze/features.py index ba651672..dd0b1f2f 100644 --- a/capa/features/freeze/features.py +++ b/capa/features/freeze/features.py @@ -8,7 +8,7 @@ import binascii from typing import Union, Optional -from pydantic import Field, BaseModel +from pydantic import Field, BaseModel, ConfigDict import capa.features.file import capa.features.insn @@ -17,9 +17,7 @@ import capa.features.basicblock class FeatureModel(BaseModel): - class Config: - frozen = True - allow_population_by_field_name = True + model_config = ConfigDict(frozen=True, populate_by_name=True) def to_capa(self) -> capa.features.common.Feature: if isinstance(self, OSFeature): @@ -213,141 +211,141 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature": class OSFeature(FeatureModel): type: str = "os" os: str - description: Optional[str] + description: Optional[str] = None class ArchFeature(FeatureModel): type: str = "arch" arch: str - description: Optional[str] + description: Optional[str] = None class FormatFeature(FeatureModel): type: str = "format" format: str - description: Optional[str] + description: Optional[str] = None class MatchFeature(FeatureModel): type: str = "match" match: str - description: Optional[str] + description: Optional[str] = None class CharacteristicFeature(FeatureModel): type: str = "characteristic" characteristic: str - description: Optional[str] + description: Optional[str] = None class ExportFeature(FeatureModel): type: str = "export" export: str - description: Optional[str] + description: Optional[str] = None class ImportFeature(FeatureModel): type: str = "import" import_: str = Field(alias="import") - description: Optional[str] + description: Optional[str] = None class SectionFeature(FeatureModel): type: str = "section" section: str - description: Optional[str] + description: Optional[str] = None class FunctionNameFeature(FeatureModel): type: str = "function name" function_name: str = Field(alias="function name") - description: Optional[str] + description: Optional[str] = None class SubstringFeature(FeatureModel): type: str = "substring" substring: str - description: Optional[str] + description: Optional[str] = None class RegexFeature(FeatureModel): type: str = "regex" regex: str - description: Optional[str] + description: Optional[str] = None class StringFeature(FeatureModel): type: str = "string" string: str - description: Optional[str] + description: Optional[str] = None class ClassFeature(FeatureModel): type: str = "class" class_: str = Field(alias="class") - description: Optional[str] + description: Optional[str] = None class NamespaceFeature(FeatureModel): type: str = "namespace" namespace: str - description: Optional[str] + description: Optional[str] = None class BasicBlockFeature(FeatureModel): type: str = "basic block" - description: Optional[str] + description: Optional[str] = None class APIFeature(FeatureModel): type: str = "api" api: str - description: Optional[str] + description: Optional[str] = None class PropertyFeature(FeatureModel): type: str = "property" - access: Optional[str] + access: Optional[str] = None property: str - description: Optional[str] + description: Optional[str] = None class NumberFeature(FeatureModel): type: str = "number" number: Union[int, float] - description: Optional[str] + description: Optional[str] = None class BytesFeature(FeatureModel): type: str = "bytes" bytes: str - description: Optional[str] + description: Optional[str] = None class OffsetFeature(FeatureModel): type: str = "offset" offset: int - description: Optional[str] + description: Optional[str] = None class MnemonicFeature(FeatureModel): type: str = "mnemonic" mnemonic: str - description: Optional[str] + description: Optional[str] = None class OperandNumberFeature(FeatureModel): type: str = "operand number" index: int operand_number: int = Field(alias="operand number") - description: Optional[str] + description: Optional[str] = None class OperandOffsetFeature(FeatureModel): type: str = "operand offset" index: int operand_offset: int = Field(alias="operand offset") - description: Optional[str] + description: Optional[str] = None Feature = Union[ diff --git a/capa/ida/plugin/form.py b/capa/ida/plugin/form.py index 19628e2a..aa076ede 100644 --- a/capa/ida/plugin/form.py +++ b/capa/ida/plugin/form.py @@ -1304,7 +1304,7 @@ class CapaExplorerForm(idaapi.PluginForm): idaapi.info("No program analysis to save.") return - s = self.resdoc_cache.json().encode("utf-8") + s = self.resdoc_cache.model_dump_json().encode("utf-8") path = Path(self.ask_user_capa_json_file()) if not path.exists(): diff --git a/capa/render/json.py b/capa/render/json.py index d015641d..dcd535fe 100644 --- a/capa/render/json.py +++ b/capa/render/json.py @@ -11,4 +11,4 @@ from capa.engine import MatchResults def render(meta, rules: RuleSet, capabilities: MatchResults) -> str: - return rd.ResultDocument.from_capa(meta, rules, capabilities).json(exclude_none=True) + return rd.ResultDocument.from_capa(meta, rules, capabilities).model_dump_json(exclude_none=True) diff --git a/capa/render/result_document.py b/capa/render/result_document.py index 0919207c..8874b613 100644 --- a/capa/render/result_document.py +++ b/capa/render/result_document.py @@ -9,7 +9,7 @@ import datetime import collections from typing import Dict, List, Tuple, Union, Optional -from pydantic import Field, BaseModel +from pydantic import Field, BaseModel, ConfigDict import capa.rules import capa.engine @@ -23,14 +23,11 @@ from capa.helpers import assert_never class FrozenModel(BaseModel): - class Config: - frozen = True - extra = "forbid" + model_config = ConfigDict(frozen=True, extra="forbid") class Model(BaseModel): - class Config: - extra = "forbid" + model_config = ConfigDict(extra="forbid") class Sample(Model): @@ -83,7 +80,7 @@ class Analysis(Model): class Metadata(Model): timestamp: datetime.datetime version: str - argv: Optional[Tuple[str, ...]] + argv: Optional[Tuple[str, ...]] = None sample: Sample analysis: Analysis @@ -105,13 +102,13 @@ class CompoundStatement(StatementModel): class SomeStatement(StatementModel): - type = "some" + type: str = "some" description: Optional[str] = None count: int class RangeStatement(StatementModel): - type = "range" + type: str = "range" description: Optional[str] = None min: int max: int @@ -119,7 +116,7 @@ class RangeStatement(StatementModel): class SubscopeStatement(StatementModel): - type = "subscope" + type: str = "subscope" description: Optional[str] = None scope: capa.rules.Scope @@ -134,7 +131,7 @@ Statement = Union[ class StatementNode(FrozenModel): - type = "statement" + type: str = "statement" statement: Statement @@ -171,7 +168,7 @@ def statement_from_capa(node: capa.engine.Statement) -> Statement: class FeatureNode(FrozenModel): - type = "feature" + type: str = "feature" feature: frz.Feature @@ -500,15 +497,12 @@ class MaecMetadata(FrozenModel): malware_family: Optional[str] = Field(None, alias="malware-family") malware_category: Optional[str] = Field(None, alias="malware-category") malware_category_ov: Optional[str] = Field(None, alias="malware-category-ov") - - class Config: - frozen = True - allow_population_by_field_name = True + model_config = ConfigDict(frozen=True, populate_by_name=True) class RuleMetadata(FrozenModel): name: str - namespace: Optional[str] + namespace: Optional[str] = None authors: Tuple[str, ...] scope: capa.rules.Scope attack: Tuple[AttackSpec, ...] = Field(alias="att&ck") @@ -546,9 +540,7 @@ class RuleMetadata(FrozenModel): ) # type: ignore # Mypy is unable to recognise arguments due to alias - class Config: - frozen = True - allow_population_by_field_name = True + model_config = ConfigDict(frozen=True, populate_by_name=True) class RuleMatches(FrozenModel): diff --git a/pyproject.toml b/pyproject.toml index c5911126..5e8c5d68 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,7 @@ dependencies = [ "pyelftools==0.29", "dnfile==0.13.0", "dncil==1.0.2", - "pydantic==1.10.9", + "pydantic==2.1.1", "protobuf==4.23.4", ] dynamic = ["version"] diff --git a/scripts/bulk-process.py b/scripts/bulk-process.py index fb6ecdd3..5ca3f6d1 100644 --- a/scripts/bulk-process.py +++ b/scripts/bulk-process.py @@ -214,7 +214,9 @@ def main(argv=None): if result["status"] == "error": logger.warning(result["error"]) elif result["status"] == "ok": - results[result["path"].as_posix()] = rd.ResultDocument.parse_obj(result["ok"]).json(exclude_none=True) + results[result["path"].as_posix()] = rd.ResultDocument.parse_obj(result["ok"]).model_dump_json( + exclude_none=True + ) else: raise ValueError(f"unexpected status: {result['status']}") diff --git a/scripts/proto-to-results.py b/scripts/proto-to-results.py index 28092a2b..3bb16570 100644 --- a/scripts/proto-to-results.py +++ b/scripts/proto-to-results.py @@ -78,7 +78,7 @@ def main(argv=None): rdpb.ParseFromString(pb) rd = capa.render.proto.doc_from_pb2(rdpb) - print(rd.json(exclude_none=True, indent=2, sort_keys=True)) + print(rd.model_dump_json(exclude_none=True, indent=2)) if __name__ == "__main__": diff --git a/tests/test_result_document.py b/tests/test_result_document.py index 27a1dbb2..352e126d 100644 --- a/tests/test_result_document.py +++ b/tests/test_result_document.py @@ -236,7 +236,7 @@ def test_basic_block_node_from_capa(): def assert_round_trip(rd: rdoc.ResultDocument): one = rd - doc = one.json(exclude_none=True) + doc = one.model_dump_json(exclude_none=True) two = rdoc.ResultDocument.parse_raw(doc) # show the round trip works @@ -244,14 +244,14 @@ def assert_round_trip(rd: rdoc.ResultDocument): # which works thanks to pydantic model equality. assert one == two # second by showing their json representations are the same. - assert one.json(exclude_none=True) == two.json(exclude_none=True) + assert one.model_dump_json(exclude_none=True) == two.model_dump_json(exclude_none=True) # now show that two different versions are not equal. three = copy.deepcopy(two) three.meta.__dict__.update({"version": "0.0.0"}) assert one.meta.version != three.meta.version assert one != three - assert one.json(exclude_none=True) != three.json(exclude_none=True) + assert one.model_dump_json(exclude_none=True) != three.model_dump_json(exclude_none=True) @pytest.mark.parametrize( From c7dde262edfb24ae83f6aaa28b43821095e76d6d Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Thu, 3 Aug 2023 22:40:01 +0530 Subject: [PATCH 2/6] remove initial instantiation for fields. --- capa/features/freeze/__init__.py | 2 +- capa/features/freeze/features.py | 48 ++++++++++++++++---------------- capa/render/result_document.py | 4 +-- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/capa/features/freeze/__init__.py b/capa/features/freeze/__init__.py index ebbf7d1d..c564a3ac 100644 --- a/capa/features/freeze/__init__.py +++ b/capa/features/freeze/__init__.py @@ -45,7 +45,7 @@ class AddressType(str, Enum): class Address(HashableModel): type: AddressType - value: Union[int, Tuple[int, int], None] = None + value: Union[int, Tuple[int, int], None] @classmethod def from_capa(cls, a: capa.features.address.Address) -> "Address": diff --git a/capa/features/freeze/features.py b/capa/features/freeze/features.py index dd0b1f2f..f4f18088 100644 --- a/capa/features/freeze/features.py +++ b/capa/features/freeze/features.py @@ -211,141 +211,141 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature": class OSFeature(FeatureModel): type: str = "os" os: str - description: Optional[str] = None + description: Optional[str] class ArchFeature(FeatureModel): type: str = "arch" arch: str - description: Optional[str] = None + description: Optional[str] class FormatFeature(FeatureModel): type: str = "format" format: str - description: Optional[str] = None + description: Optional[str] class MatchFeature(FeatureModel): type: str = "match" match: str - description: Optional[str] = None + description: Optional[str] class CharacteristicFeature(FeatureModel): type: str = "characteristic" characteristic: str - description: Optional[str] = None + description: Optional[str] class ExportFeature(FeatureModel): type: str = "export" export: str - description: Optional[str] = None + description: Optional[str] class ImportFeature(FeatureModel): type: str = "import" import_: str = Field(alias="import") - description: Optional[str] = None + description: Optional[str] class SectionFeature(FeatureModel): type: str = "section" section: str - description: Optional[str] = None + description: Optional[str] class FunctionNameFeature(FeatureModel): type: str = "function name" function_name: str = Field(alias="function name") - description: Optional[str] = None + description: Optional[str] class SubstringFeature(FeatureModel): type: str = "substring" substring: str - description: Optional[str] = None + description: Optional[str] class RegexFeature(FeatureModel): type: str = "regex" regex: str - description: Optional[str] = None + description: Optional[str] class StringFeature(FeatureModel): type: str = "string" string: str - description: Optional[str] = None + description: Optional[str] class ClassFeature(FeatureModel): type: str = "class" class_: str = Field(alias="class") - description: Optional[str] = None + description: Optional[str] class NamespaceFeature(FeatureModel): type: str = "namespace" namespace: str - description: Optional[str] = None + description: Optional[str] class BasicBlockFeature(FeatureModel): type: str = "basic block" - description: Optional[str] = None + description: Optional[str] class APIFeature(FeatureModel): type: str = "api" api: str - description: Optional[str] = None + description: Optional[str] class PropertyFeature(FeatureModel): type: str = "property" - access: Optional[str] = None + access: Optional[str] property: str - description: Optional[str] = None + description: Optional[str] class NumberFeature(FeatureModel): type: str = "number" number: Union[int, float] - description: Optional[str] = None + description: Optional[str] class BytesFeature(FeatureModel): type: str = "bytes" bytes: str - description: Optional[str] = None + description: Optional[str] class OffsetFeature(FeatureModel): type: str = "offset" offset: int - description: Optional[str] = None + description: Optional[str] class MnemonicFeature(FeatureModel): type: str = "mnemonic" mnemonic: str - description: Optional[str] = None + description: Optional[str] class OperandNumberFeature(FeatureModel): type: str = "operand number" index: int operand_number: int = Field(alias="operand number") - description: Optional[str] = None + description: Optional[str] class OperandOffsetFeature(FeatureModel): type: str = "operand offset" index: int operand_offset: int = Field(alias="operand offset") - description: Optional[str] = None + description: Optional[str] Feature = Union[ diff --git a/capa/render/result_document.py b/capa/render/result_document.py index 8874b613..47591f24 100644 --- a/capa/render/result_document.py +++ b/capa/render/result_document.py @@ -80,7 +80,7 @@ class Analysis(Model): class Metadata(Model): timestamp: datetime.datetime version: str - argv: Optional[Tuple[str, ...]] = None + argv: Optional[Tuple[str, ...]] sample: Sample analysis: Analysis @@ -502,7 +502,7 @@ class MaecMetadata(FrozenModel): class RuleMetadata(FrozenModel): name: str - namespace: Optional[str] = None + namespace: Optional[str] authors: Tuple[str, ...] scope: capa.rules.Scope attack: Tuple[AttackSpec, ...] = Field(alias="att&ck") From 261baca683951b6e37c824d2d44eb533045e73bd Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Fri, 4 Aug 2023 01:35:41 +0530 Subject: [PATCH 3/6] updated deperecated functions --- capa/features/freeze/__init__.py | 2 +- capa/features/freeze/features.py | 48 ++++++++++++++++---------------- capa/render/proto/__init__.py | 4 +-- capa/render/result_document.py | 16 +++++------ capa/render/vverbose.py | 4 +-- 5 files changed, 37 insertions(+), 37 deletions(-) diff --git a/capa/features/freeze/__init__.py b/capa/features/freeze/__init__.py index c564a3ac..ebbf7d1d 100644 --- a/capa/features/freeze/__init__.py +++ b/capa/features/freeze/__init__.py @@ -45,7 +45,7 @@ class AddressType(str, Enum): class Address(HashableModel): type: AddressType - value: Union[int, Tuple[int, int], None] + value: Union[int, Tuple[int, int], None] = None @classmethod def from_capa(cls, a: capa.features.address.Address) -> "Address": diff --git a/capa/features/freeze/features.py b/capa/features/freeze/features.py index f4f18088..dd0b1f2f 100644 --- a/capa/features/freeze/features.py +++ b/capa/features/freeze/features.py @@ -211,141 +211,141 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature": class OSFeature(FeatureModel): type: str = "os" os: str - description: Optional[str] + description: Optional[str] = None class ArchFeature(FeatureModel): type: str = "arch" arch: str - description: Optional[str] + description: Optional[str] = None class FormatFeature(FeatureModel): type: str = "format" format: str - description: Optional[str] + description: Optional[str] = None class MatchFeature(FeatureModel): type: str = "match" match: str - description: Optional[str] + description: Optional[str] = None class CharacteristicFeature(FeatureModel): type: str = "characteristic" characteristic: str - description: Optional[str] + description: Optional[str] = None class ExportFeature(FeatureModel): type: str = "export" export: str - description: Optional[str] + description: Optional[str] = None class ImportFeature(FeatureModel): type: str = "import" import_: str = Field(alias="import") - description: Optional[str] + description: Optional[str] = None class SectionFeature(FeatureModel): type: str = "section" section: str - description: Optional[str] + description: Optional[str] = None class FunctionNameFeature(FeatureModel): type: str = "function name" function_name: str = Field(alias="function name") - description: Optional[str] + description: Optional[str] = None class SubstringFeature(FeatureModel): type: str = "substring" substring: str - description: Optional[str] + description: Optional[str] = None class RegexFeature(FeatureModel): type: str = "regex" regex: str - description: Optional[str] + description: Optional[str] = None class StringFeature(FeatureModel): type: str = "string" string: str - description: Optional[str] + description: Optional[str] = None class ClassFeature(FeatureModel): type: str = "class" class_: str = Field(alias="class") - description: Optional[str] + description: Optional[str] = None class NamespaceFeature(FeatureModel): type: str = "namespace" namespace: str - description: Optional[str] + description: Optional[str] = None class BasicBlockFeature(FeatureModel): type: str = "basic block" - description: Optional[str] + description: Optional[str] = None class APIFeature(FeatureModel): type: str = "api" api: str - description: Optional[str] + description: Optional[str] = None class PropertyFeature(FeatureModel): type: str = "property" - access: Optional[str] + access: Optional[str] = None property: str - description: Optional[str] + description: Optional[str] = None class NumberFeature(FeatureModel): type: str = "number" number: Union[int, float] - description: Optional[str] + description: Optional[str] = None class BytesFeature(FeatureModel): type: str = "bytes" bytes: str - description: Optional[str] + description: Optional[str] = None class OffsetFeature(FeatureModel): type: str = "offset" offset: int - description: Optional[str] + description: Optional[str] = None class MnemonicFeature(FeatureModel): type: str = "mnemonic" mnemonic: str - description: Optional[str] + description: Optional[str] = None class OperandNumberFeature(FeatureModel): type: str = "operand number" index: int operand_number: int = Field(alias="operand number") - description: Optional[str] + description: Optional[str] = None class OperandOffsetFeature(FeatureModel): type: str = "operand offset" index: int operand_offset: int = Field(alias="operand offset") - description: Optional[str] + description: Optional[str] = None Feature = Union[ diff --git a/capa/render/proto/__init__.py b/capa/render/proto/__init__.py index 2457b7ec..03aed65c 100644 --- a/capa/render/proto/__init__.py +++ b/capa/render/proto/__init__.py @@ -126,7 +126,7 @@ def metadata_to_pb2(meta: rd.Metadata) -> capa_pb2.Metadata: timestamp=str(meta.timestamp), version=meta.version, argv=meta.argv, - sample=google.protobuf.json_format.ParseDict(meta.sample.dict(), capa_pb2.Sample()), + sample=google.protobuf.json_format.ParseDict(meta.sample.model_dump(), capa_pb2.Sample()), analysis=capa_pb2.Analysis( format=meta.analysis.format, arch=meta.analysis.arch, @@ -393,7 +393,7 @@ def match_to_pb2(match: rd.Match) -> capa_pb2.Match: def rule_metadata_to_pb2(rule_metadata: rd.RuleMetadata) -> capa_pb2.RuleMetadata: # after manual type conversions to the RuleMetadata, we can rely on the protobuf json parser # conversions include tuple -> list and rd.Enum -> proto.enum - meta = dict_tuple_to_list_values(rule_metadata.dict()) + meta = dict_tuple_to_list_values(rule_metadata.model_dump()) meta["scope"] = scope_to_pb2(meta["scope"]) meta["attack"] = list(map(dict_tuple_to_list_values, meta.get("attack", []))) meta["mbc"] = list(map(dict_tuple_to_list_values, meta.get("mbc", []))) diff --git a/capa/render/result_document.py b/capa/render/result_document.py index 47591f24..03ea245f 100644 --- a/capa/render/result_document.py +++ b/capa/render/result_document.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import datetime import collections -from typing import Dict, List, Tuple, Union, Optional +from typing import Dict, List, Tuple, Union, Literal, Optional from pydantic import Field, BaseModel, ConfigDict @@ -80,7 +80,7 @@ class Analysis(Model): class Metadata(Model): timestamp: datetime.datetime version: str - argv: Optional[Tuple[str, ...]] + argv: Optional[Tuple[str, ...]] = None sample: Sample analysis: Analysis @@ -102,13 +102,13 @@ class CompoundStatement(StatementModel): class SomeStatement(StatementModel): - type: str = "some" + type: Literal["some"] = "some" description: Optional[str] = None count: int class RangeStatement(StatementModel): - type: str = "range" + type: Literal["range"] = "range" description: Optional[str] = None min: int max: int @@ -116,7 +116,7 @@ class RangeStatement(StatementModel): class SubscopeStatement(StatementModel): - type: str = "subscope" + type: Literal["subscope"] = "subscope" description: Optional[str] = None scope: capa.rules.Scope @@ -131,7 +131,7 @@ Statement = Union[ class StatementNode(FrozenModel): - type: str = "statement" + type: Literal["statement"] = "statement" statement: Statement @@ -168,7 +168,7 @@ def statement_from_capa(node: capa.engine.Statement) -> Statement: class FeatureNode(FrozenModel): - type: str = "feature" + type: Literal["feature"] = "feature" feature: frz.Feature @@ -502,7 +502,7 @@ class MaecMetadata(FrozenModel): class RuleMetadata(FrozenModel): name: str - namespace: Optional[str] + namespace: Optional[str] = None authors: Tuple[str, ...] scope: capa.rules.Scope attack: Tuple[AttackSpec, ...] = Field(alias="att&ck") diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index 59189833..03ff8c84 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -88,7 +88,7 @@ def render_statement(ostream, match: rd.Match, statement: rd.Statement, indent=0 # so, we have to inline some of the feature rendering here. child = statement.child - value = child.dict(by_alias=True).get(child.type) + value = child.model_dump(by_alias=True).get(child.type) if value: if isinstance(child, frzf.StringFeature): @@ -141,7 +141,7 @@ def render_feature(ostream, match: rd.Match, feature: frzf.Feature, indent=0): value = feature.class_ else: # convert attributes to dictionary using aliased names, if applicable - value = feature.dict(by_alias=True).get(key) + value = feature.model_dump(by_alias=True).get(key) if value is None: raise ValueError(f"{key} contains None") From fd61456164fc8502b7e42a413c9ab3c0dab0817a Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Fri, 4 Aug 2023 09:07:13 +0530 Subject: [PATCH 4/6] Update capa/features/freeze/__init__.py Co-authored-by: Willi Ballenthin --- capa/features/freeze/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/freeze/__init__.py b/capa/features/freeze/__init__.py index ebbf7d1d..d29048a9 100644 --- a/capa/features/freeze/__init__.py +++ b/capa/features/freeze/__init__.py @@ -45,7 +45,7 @@ class AddressType(str, Enum): class Address(HashableModel): type: AddressType - value: Union[int, Tuple[int, int], None] = None + value: Union[int, Tuple[int, int], None] = None # None default value to support deserialization of NO_ADDRESS @classmethod def from_capa(cls, a: capa.features.address.Address) -> "Address": From a4b00b9064b12a95061974f30129f4347a668b1e Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Fri, 4 Aug 2023 10:26:56 +0530 Subject: [PATCH 5/6] remove exclude_none = True to not drop none fields --- capa/render/result_document.py | 2 +- scripts/bulk-process.py | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/capa/render/result_document.py b/capa/render/result_document.py index 03ea245f..ef899fd2 100644 --- a/capa/render/result_document.py +++ b/capa/render/result_document.py @@ -80,7 +80,7 @@ class Analysis(Model): class Metadata(Model): timestamp: datetime.datetime version: str - argv: Optional[Tuple[str, ...]] = None + argv: Optional[Tuple[str, ...]] sample: Sample analysis: Analysis diff --git a/scripts/bulk-process.py b/scripts/bulk-process.py index 5ca3f6d1..264e7719 100644 --- a/scripts/bulk-process.py +++ b/scripts/bulk-process.py @@ -144,8 +144,7 @@ def get_capa_results(args): meta.analysis.layout = capa.main.compute_layout(rules, extractor, capabilities) doc = rd.ResultDocument.from_capa(meta, rules, capabilities) - - return {"path": path, "status": "ok", "ok": doc.dict(exclude_none=True)} + return {"path": path, "status": "ok", "ok": doc.model_dump()} def main(argv=None): @@ -214,9 +213,7 @@ def main(argv=None): if result["status"] == "error": logger.warning(result["error"]) elif result["status"] == "ok": - results[result["path"].as_posix()] = rd.ResultDocument.parse_obj(result["ok"]).model_dump_json( - exclude_none=True - ) + results[result["path"].as_posix()] = rd.ResultDocument.model_validate(result["ok"]).model_dump_json() else: raise ValueError(f"unexpected status: {result['status']}") From 851da255605f408f8c4c0b2fc18955f64faab350 Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Fri, 4 Aug 2023 10:43:34 +0530 Subject: [PATCH 6/6] Update bulk-process.py --- scripts/bulk-process.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/bulk-process.py b/scripts/bulk-process.py index 264e7719..64c05417 100644 --- a/scripts/bulk-process.py +++ b/scripts/bulk-process.py @@ -213,7 +213,9 @@ def main(argv=None): if result["status"] == "error": logger.warning(result["error"]) elif result["status"] == "ok": - results[result["path"].as_posix()] = rd.ResultDocument.model_validate(result["ok"]).model_dump_json() + results[result["path"].as_posix()] = rd.ResultDocument.model_validate(result["ok"]).model_dump_json( + exclude_none=True + ) else: raise ValueError(f"unexpected status: {result['status']}")