From 2bed3468f61ea358747836893edb5a8bb4509a23 Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Thu, 3 Aug 2023 17:21:46 +0530 Subject: [PATCH 01/27] bump pydantic to 2.1.1 --- CHANGELOG.md | 3 +- capa/features/common.py | 4 +-- capa/features/freeze/__init__.py | 29 ++++++----------- capa/features/freeze/features.py | 54 +++++++++++++++----------------- capa/ida/plugin/form.py | 2 +- capa/render/json.py | 2 +- capa/render/result_document.py | 32 +++++++------------ pyproject.toml | 2 +- scripts/bulk-process.py | 4 ++- scripts/proto-to-results.py | 2 +- tests/test_result_document.py | 6 ++-- 11 files changed, 61 insertions(+), 79 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f285d59e..d4639a58 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,8 @@ ## master (unreleased) ### New Features -- ELF: implement file import and export name extractor #1607 @Aayush-Goel-04 +- ELF: implement file import and export name extractor #1607 #1608 @Aayush-Goel-04 +- bump pydantic from 1.10.9 to 2.1.1 #1582 @Aayush-Goel-04 ### Breaking Changes diff --git a/capa/features/common.py b/capa/features/common.py index 3216c38e..9278f7e8 100644 --- a/capa/features/common.py +++ b/capa/features/common.py @@ -136,8 +136,8 @@ class Feature(abc.ABC): # noqa: B024 import capa.features.freeze.features return ( - capa.features.freeze.features.feature_from_capa(self).json() - < capa.features.freeze.features.feature_from_capa(other).json() + capa.features.freeze.features.feature_from_capa(self).model_dump_json() + < capa.features.freeze.features.feature_from_capa(other).model_dump_json() ) def get_name_str(self) -> str: diff --git a/capa/features/freeze/__init__.py b/capa/features/freeze/__init__.py index 9584990f..ebbf7d1d 100644 --- a/capa/features/freeze/__init__.py +++ b/capa/features/freeze/__init__.py @@ -14,7 +14,7 @@ import logging from enum import Enum from typing import List, Tuple, Union -from pydantic import Field, BaseModel +from pydantic import Field, BaseModel, ConfigDict import capa.helpers import capa.version @@ -31,8 +31,7 @@ logger = logging.getLogger(__name__) class HashableModel(BaseModel): - class Config: - frozen = True + model_config = ConfigDict(frozen=True) class AddressType(str, Enum): @@ -46,7 +45,7 @@ class AddressType(str, Enum): class Address(HashableModel): type: AddressType - value: Union[int, Tuple[int, int], None] + value: Union[int, Tuple[int, int], None] = None @classmethod def from_capa(cls, a: capa.features.address.Address) -> "Address": @@ -159,9 +158,7 @@ class BasicBlockFeature(HashableModel): basic_block: Address = Field(alias="basic block") address: Address feature: Feature - - class Config: - allow_population_by_field_name = True + model_config = ConfigDict(populate_by_name=True) class InstructionFeature(HashableModel): @@ -194,26 +191,20 @@ class FunctionFeatures(BaseModel): address: Address features: Tuple[FunctionFeature, ...] basic_blocks: Tuple[BasicBlockFeatures, ...] = Field(alias="basic blocks") - - class Config: - allow_population_by_field_name = True + model_config = ConfigDict(populate_by_name=True) class Features(BaseModel): global_: Tuple[GlobalFeature, ...] = Field(alias="global") file: Tuple[FileFeature, ...] functions: Tuple[FunctionFeatures, ...] - - class Config: - allow_population_by_field_name = True + model_config = ConfigDict(populate_by_name=True) class Extractor(BaseModel): name: str version: str = capa.version.__version__ - - class Config: - allow_population_by_field_name = True + model_config = ConfigDict(populate_by_name=True) class Freeze(BaseModel): @@ -221,9 +212,7 @@ class Freeze(BaseModel): base_address: Address = Field(alias="base address") extractor: Extractor features: Features - - class Config: - allow_population_by_field_name = True + model_config = ConfigDict(populate_by_name=True) def dumps(extractor: capa.features.extractors.base_extractor.FeatureExtractor) -> str: @@ -324,7 +313,7 @@ def dumps(extractor: capa.features.extractors.base_extractor.FeatureExtractor) - ) # type: ignore # Mypy is unable to recognise `base_address` as a argument due to alias - return freeze.json() + return freeze.model_dump_json() def loads(s: str) -> capa.features.extractors.base_extractor.FeatureExtractor: diff --git a/capa/features/freeze/features.py b/capa/features/freeze/features.py index ba651672..dd0b1f2f 100644 --- a/capa/features/freeze/features.py +++ b/capa/features/freeze/features.py @@ -8,7 +8,7 @@ import binascii from typing import Union, Optional -from pydantic import Field, BaseModel +from pydantic import Field, BaseModel, ConfigDict import capa.features.file import capa.features.insn @@ -17,9 +17,7 @@ import capa.features.basicblock class FeatureModel(BaseModel): - class Config: - frozen = True - allow_population_by_field_name = True + model_config = ConfigDict(frozen=True, populate_by_name=True) def to_capa(self) -> capa.features.common.Feature: if isinstance(self, OSFeature): @@ -213,141 +211,141 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature": class OSFeature(FeatureModel): type: str = "os" os: str - description: Optional[str] + description: Optional[str] = None class ArchFeature(FeatureModel): type: str = "arch" arch: str - description: Optional[str] + description: Optional[str] = None class FormatFeature(FeatureModel): type: str = "format" format: str - description: Optional[str] + description: Optional[str] = None class MatchFeature(FeatureModel): type: str = "match" match: str - description: Optional[str] + description: Optional[str] = None class CharacteristicFeature(FeatureModel): type: str = "characteristic" characteristic: str - description: Optional[str] + description: Optional[str] = None class ExportFeature(FeatureModel): type: str = "export" export: str - description: Optional[str] + description: Optional[str] = None class ImportFeature(FeatureModel): type: str = "import" import_: str = Field(alias="import") - description: Optional[str] + description: Optional[str] = None class SectionFeature(FeatureModel): type: str = "section" section: str - description: Optional[str] + description: Optional[str] = None class FunctionNameFeature(FeatureModel): type: str = "function name" function_name: str = Field(alias="function name") - description: Optional[str] + description: Optional[str] = None class SubstringFeature(FeatureModel): type: str = "substring" substring: str - description: Optional[str] + description: Optional[str] = None class RegexFeature(FeatureModel): type: str = "regex" regex: str - description: Optional[str] + description: Optional[str] = None class StringFeature(FeatureModel): type: str = "string" string: str - description: Optional[str] + description: Optional[str] = None class ClassFeature(FeatureModel): type: str = "class" class_: str = Field(alias="class") - description: Optional[str] + description: Optional[str] = None class NamespaceFeature(FeatureModel): type: str = "namespace" namespace: str - description: Optional[str] + description: Optional[str] = None class BasicBlockFeature(FeatureModel): type: str = "basic block" - description: Optional[str] + description: Optional[str] = None class APIFeature(FeatureModel): type: str = "api" api: str - description: Optional[str] + description: Optional[str] = None class PropertyFeature(FeatureModel): type: str = "property" - access: Optional[str] + access: Optional[str] = None property: str - description: Optional[str] + description: Optional[str] = None class NumberFeature(FeatureModel): type: str = "number" number: Union[int, float] - description: Optional[str] + description: Optional[str] = None class BytesFeature(FeatureModel): type: str = "bytes" bytes: str - description: Optional[str] + description: Optional[str] = None class OffsetFeature(FeatureModel): type: str = "offset" offset: int - description: Optional[str] + description: Optional[str] = None class MnemonicFeature(FeatureModel): type: str = "mnemonic" mnemonic: str - description: Optional[str] + description: Optional[str] = None class OperandNumberFeature(FeatureModel): type: str = "operand number" index: int operand_number: int = Field(alias="operand number") - description: Optional[str] + description: Optional[str] = None class OperandOffsetFeature(FeatureModel): type: str = "operand offset" index: int operand_offset: int = Field(alias="operand offset") - description: Optional[str] + description: Optional[str] = None Feature = Union[ diff --git a/capa/ida/plugin/form.py b/capa/ida/plugin/form.py index 19628e2a..aa076ede 100644 --- a/capa/ida/plugin/form.py +++ b/capa/ida/plugin/form.py @@ -1304,7 +1304,7 @@ class CapaExplorerForm(idaapi.PluginForm): idaapi.info("No program analysis to save.") return - s = self.resdoc_cache.json().encode("utf-8") + s = self.resdoc_cache.model_dump_json().encode("utf-8") path = Path(self.ask_user_capa_json_file()) if not path.exists(): diff --git a/capa/render/json.py b/capa/render/json.py index d015641d..dcd535fe 100644 --- a/capa/render/json.py +++ b/capa/render/json.py @@ -11,4 +11,4 @@ from capa.engine import MatchResults def render(meta, rules: RuleSet, capabilities: MatchResults) -> str: - return rd.ResultDocument.from_capa(meta, rules, capabilities).json(exclude_none=True) + return rd.ResultDocument.from_capa(meta, rules, capabilities).model_dump_json(exclude_none=True) diff --git a/capa/render/result_document.py b/capa/render/result_document.py index 0919207c..8874b613 100644 --- a/capa/render/result_document.py +++ b/capa/render/result_document.py @@ -9,7 +9,7 @@ import datetime import collections from typing import Dict, List, Tuple, Union, Optional -from pydantic import Field, BaseModel +from pydantic import Field, BaseModel, ConfigDict import capa.rules import capa.engine @@ -23,14 +23,11 @@ from capa.helpers import assert_never class FrozenModel(BaseModel): - class Config: - frozen = True - extra = "forbid" + model_config = ConfigDict(frozen=True, extra="forbid") class Model(BaseModel): - class Config: - extra = "forbid" + model_config = ConfigDict(extra="forbid") class Sample(Model): @@ -83,7 +80,7 @@ class Analysis(Model): class Metadata(Model): timestamp: datetime.datetime version: str - argv: Optional[Tuple[str, ...]] + argv: Optional[Tuple[str, ...]] = None sample: Sample analysis: Analysis @@ -105,13 +102,13 @@ class CompoundStatement(StatementModel): class SomeStatement(StatementModel): - type = "some" + type: str = "some" description: Optional[str] = None count: int class RangeStatement(StatementModel): - type = "range" + type: str = "range" description: Optional[str] = None min: int max: int @@ -119,7 +116,7 @@ class RangeStatement(StatementModel): class SubscopeStatement(StatementModel): - type = "subscope" + type: str = "subscope" description: Optional[str] = None scope: capa.rules.Scope @@ -134,7 +131,7 @@ Statement = Union[ class StatementNode(FrozenModel): - type = "statement" + type: str = "statement" statement: Statement @@ -171,7 +168,7 @@ def statement_from_capa(node: capa.engine.Statement) -> Statement: class FeatureNode(FrozenModel): - type = "feature" + type: str = "feature" feature: frz.Feature @@ -500,15 +497,12 @@ class MaecMetadata(FrozenModel): malware_family: Optional[str] = Field(None, alias="malware-family") malware_category: Optional[str] = Field(None, alias="malware-category") malware_category_ov: Optional[str] = Field(None, alias="malware-category-ov") - - class Config: - frozen = True - allow_population_by_field_name = True + model_config = ConfigDict(frozen=True, populate_by_name=True) class RuleMetadata(FrozenModel): name: str - namespace: Optional[str] + namespace: Optional[str] = None authors: Tuple[str, ...] scope: capa.rules.Scope attack: Tuple[AttackSpec, ...] = Field(alias="att&ck") @@ -546,9 +540,7 @@ class RuleMetadata(FrozenModel): ) # type: ignore # Mypy is unable to recognise arguments due to alias - class Config: - frozen = True - allow_population_by_field_name = True + model_config = ConfigDict(frozen=True, populate_by_name=True) class RuleMatches(FrozenModel): diff --git a/pyproject.toml b/pyproject.toml index c5911126..5e8c5d68 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,7 @@ dependencies = [ "pyelftools==0.29", "dnfile==0.13.0", "dncil==1.0.2", - "pydantic==1.10.9", + "pydantic==2.1.1", "protobuf==4.23.4", ] dynamic = ["version"] diff --git a/scripts/bulk-process.py b/scripts/bulk-process.py index fb6ecdd3..5ca3f6d1 100644 --- a/scripts/bulk-process.py +++ b/scripts/bulk-process.py @@ -214,7 +214,9 @@ def main(argv=None): if result["status"] == "error": logger.warning(result["error"]) elif result["status"] == "ok": - results[result["path"].as_posix()] = rd.ResultDocument.parse_obj(result["ok"]).json(exclude_none=True) + results[result["path"].as_posix()] = rd.ResultDocument.parse_obj(result["ok"]).model_dump_json( + exclude_none=True + ) else: raise ValueError(f"unexpected status: {result['status']}") diff --git a/scripts/proto-to-results.py b/scripts/proto-to-results.py index 28092a2b..3bb16570 100644 --- a/scripts/proto-to-results.py +++ b/scripts/proto-to-results.py @@ -78,7 +78,7 @@ def main(argv=None): rdpb.ParseFromString(pb) rd = capa.render.proto.doc_from_pb2(rdpb) - print(rd.json(exclude_none=True, indent=2, sort_keys=True)) + print(rd.model_dump_json(exclude_none=True, indent=2)) if __name__ == "__main__": diff --git a/tests/test_result_document.py b/tests/test_result_document.py index 27a1dbb2..352e126d 100644 --- a/tests/test_result_document.py +++ b/tests/test_result_document.py @@ -236,7 +236,7 @@ def test_basic_block_node_from_capa(): def assert_round_trip(rd: rdoc.ResultDocument): one = rd - doc = one.json(exclude_none=True) + doc = one.model_dump_json(exclude_none=True) two = rdoc.ResultDocument.parse_raw(doc) # show the round trip works @@ -244,14 +244,14 @@ def assert_round_trip(rd: rdoc.ResultDocument): # which works thanks to pydantic model equality. assert one == two # second by showing their json representations are the same. - assert one.json(exclude_none=True) == two.json(exclude_none=True) + assert one.model_dump_json(exclude_none=True) == two.model_dump_json(exclude_none=True) # now show that two different versions are not equal. three = copy.deepcopy(two) three.meta.__dict__.update({"version": "0.0.0"}) assert one.meta.version != three.meta.version assert one != three - assert one.json(exclude_none=True) != three.json(exclude_none=True) + assert one.model_dump_json(exclude_none=True) != three.model_dump_json(exclude_none=True) @pytest.mark.parametrize( From c7dde262edfb24ae83f6aaa28b43821095e76d6d Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Thu, 3 Aug 2023 22:40:01 +0530 Subject: [PATCH 02/27] remove initial instantiation for fields. --- capa/features/freeze/__init__.py | 2 +- capa/features/freeze/features.py | 48 ++++++++++++++++---------------- capa/render/result_document.py | 4 +-- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/capa/features/freeze/__init__.py b/capa/features/freeze/__init__.py index ebbf7d1d..c564a3ac 100644 --- a/capa/features/freeze/__init__.py +++ b/capa/features/freeze/__init__.py @@ -45,7 +45,7 @@ class AddressType(str, Enum): class Address(HashableModel): type: AddressType - value: Union[int, Tuple[int, int], None] = None + value: Union[int, Tuple[int, int], None] @classmethod def from_capa(cls, a: capa.features.address.Address) -> "Address": diff --git a/capa/features/freeze/features.py b/capa/features/freeze/features.py index dd0b1f2f..f4f18088 100644 --- a/capa/features/freeze/features.py +++ b/capa/features/freeze/features.py @@ -211,141 +211,141 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature": class OSFeature(FeatureModel): type: str = "os" os: str - description: Optional[str] = None + description: Optional[str] class ArchFeature(FeatureModel): type: str = "arch" arch: str - description: Optional[str] = None + description: Optional[str] class FormatFeature(FeatureModel): type: str = "format" format: str - description: Optional[str] = None + description: Optional[str] class MatchFeature(FeatureModel): type: str = "match" match: str - description: Optional[str] = None + description: Optional[str] class CharacteristicFeature(FeatureModel): type: str = "characteristic" characteristic: str - description: Optional[str] = None + description: Optional[str] class ExportFeature(FeatureModel): type: str = "export" export: str - description: Optional[str] = None + description: Optional[str] class ImportFeature(FeatureModel): type: str = "import" import_: str = Field(alias="import") - description: Optional[str] = None + description: Optional[str] class SectionFeature(FeatureModel): type: str = "section" section: str - description: Optional[str] = None + description: Optional[str] class FunctionNameFeature(FeatureModel): type: str = "function name" function_name: str = Field(alias="function name") - description: Optional[str] = None + description: Optional[str] class SubstringFeature(FeatureModel): type: str = "substring" substring: str - description: Optional[str] = None + description: Optional[str] class RegexFeature(FeatureModel): type: str = "regex" regex: str - description: Optional[str] = None + description: Optional[str] class StringFeature(FeatureModel): type: str = "string" string: str - description: Optional[str] = None + description: Optional[str] class ClassFeature(FeatureModel): type: str = "class" class_: str = Field(alias="class") - description: Optional[str] = None + description: Optional[str] class NamespaceFeature(FeatureModel): type: str = "namespace" namespace: str - description: Optional[str] = None + description: Optional[str] class BasicBlockFeature(FeatureModel): type: str = "basic block" - description: Optional[str] = None + description: Optional[str] class APIFeature(FeatureModel): type: str = "api" api: str - description: Optional[str] = None + description: Optional[str] class PropertyFeature(FeatureModel): type: str = "property" - access: Optional[str] = None + access: Optional[str] property: str - description: Optional[str] = None + description: Optional[str] class NumberFeature(FeatureModel): type: str = "number" number: Union[int, float] - description: Optional[str] = None + description: Optional[str] class BytesFeature(FeatureModel): type: str = "bytes" bytes: str - description: Optional[str] = None + description: Optional[str] class OffsetFeature(FeatureModel): type: str = "offset" offset: int - description: Optional[str] = None + description: Optional[str] class MnemonicFeature(FeatureModel): type: str = "mnemonic" mnemonic: str - description: Optional[str] = None + description: Optional[str] class OperandNumberFeature(FeatureModel): type: str = "operand number" index: int operand_number: int = Field(alias="operand number") - description: Optional[str] = None + description: Optional[str] class OperandOffsetFeature(FeatureModel): type: str = "operand offset" index: int operand_offset: int = Field(alias="operand offset") - description: Optional[str] = None + description: Optional[str] Feature = Union[ diff --git a/capa/render/result_document.py b/capa/render/result_document.py index 8874b613..47591f24 100644 --- a/capa/render/result_document.py +++ b/capa/render/result_document.py @@ -80,7 +80,7 @@ class Analysis(Model): class Metadata(Model): timestamp: datetime.datetime version: str - argv: Optional[Tuple[str, ...]] = None + argv: Optional[Tuple[str, ...]] sample: Sample analysis: Analysis @@ -502,7 +502,7 @@ class MaecMetadata(FrozenModel): class RuleMetadata(FrozenModel): name: str - namespace: Optional[str] = None + namespace: Optional[str] authors: Tuple[str, ...] scope: capa.rules.Scope attack: Tuple[AttackSpec, ...] = Field(alias="att&ck") From 261baca683951b6e37c824d2d44eb533045e73bd Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Fri, 4 Aug 2023 01:35:41 +0530 Subject: [PATCH 03/27] updated deperecated functions --- capa/features/freeze/__init__.py | 2 +- capa/features/freeze/features.py | 48 ++++++++++++++++---------------- capa/render/proto/__init__.py | 4 +-- capa/render/result_document.py | 16 +++++------ capa/render/vverbose.py | 4 +-- 5 files changed, 37 insertions(+), 37 deletions(-) diff --git a/capa/features/freeze/__init__.py b/capa/features/freeze/__init__.py index c564a3ac..ebbf7d1d 100644 --- a/capa/features/freeze/__init__.py +++ b/capa/features/freeze/__init__.py @@ -45,7 +45,7 @@ class AddressType(str, Enum): class Address(HashableModel): type: AddressType - value: Union[int, Tuple[int, int], None] + value: Union[int, Tuple[int, int], None] = None @classmethod def from_capa(cls, a: capa.features.address.Address) -> "Address": diff --git a/capa/features/freeze/features.py b/capa/features/freeze/features.py index f4f18088..dd0b1f2f 100644 --- a/capa/features/freeze/features.py +++ b/capa/features/freeze/features.py @@ -211,141 +211,141 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature": class OSFeature(FeatureModel): type: str = "os" os: str - description: Optional[str] + description: Optional[str] = None class ArchFeature(FeatureModel): type: str = "arch" arch: str - description: Optional[str] + description: Optional[str] = None class FormatFeature(FeatureModel): type: str = "format" format: str - description: Optional[str] + description: Optional[str] = None class MatchFeature(FeatureModel): type: str = "match" match: str - description: Optional[str] + description: Optional[str] = None class CharacteristicFeature(FeatureModel): type: str = "characteristic" characteristic: str - description: Optional[str] + description: Optional[str] = None class ExportFeature(FeatureModel): type: str = "export" export: str - description: Optional[str] + description: Optional[str] = None class ImportFeature(FeatureModel): type: str = "import" import_: str = Field(alias="import") - description: Optional[str] + description: Optional[str] = None class SectionFeature(FeatureModel): type: str = "section" section: str - description: Optional[str] + description: Optional[str] = None class FunctionNameFeature(FeatureModel): type: str = "function name" function_name: str = Field(alias="function name") - description: Optional[str] + description: Optional[str] = None class SubstringFeature(FeatureModel): type: str = "substring" substring: str - description: Optional[str] + description: Optional[str] = None class RegexFeature(FeatureModel): type: str = "regex" regex: str - description: Optional[str] + description: Optional[str] = None class StringFeature(FeatureModel): type: str = "string" string: str - description: Optional[str] + description: Optional[str] = None class ClassFeature(FeatureModel): type: str = "class" class_: str = Field(alias="class") - description: Optional[str] + description: Optional[str] = None class NamespaceFeature(FeatureModel): type: str = "namespace" namespace: str - description: Optional[str] + description: Optional[str] = None class BasicBlockFeature(FeatureModel): type: str = "basic block" - description: Optional[str] + description: Optional[str] = None class APIFeature(FeatureModel): type: str = "api" api: str - description: Optional[str] + description: Optional[str] = None class PropertyFeature(FeatureModel): type: str = "property" - access: Optional[str] + access: Optional[str] = None property: str - description: Optional[str] + description: Optional[str] = None class NumberFeature(FeatureModel): type: str = "number" number: Union[int, float] - description: Optional[str] + description: Optional[str] = None class BytesFeature(FeatureModel): type: str = "bytes" bytes: str - description: Optional[str] + description: Optional[str] = None class OffsetFeature(FeatureModel): type: str = "offset" offset: int - description: Optional[str] + description: Optional[str] = None class MnemonicFeature(FeatureModel): type: str = "mnemonic" mnemonic: str - description: Optional[str] + description: Optional[str] = None class OperandNumberFeature(FeatureModel): type: str = "operand number" index: int operand_number: int = Field(alias="operand number") - description: Optional[str] + description: Optional[str] = None class OperandOffsetFeature(FeatureModel): type: str = "operand offset" index: int operand_offset: int = Field(alias="operand offset") - description: Optional[str] + description: Optional[str] = None Feature = Union[ diff --git a/capa/render/proto/__init__.py b/capa/render/proto/__init__.py index 2457b7ec..03aed65c 100644 --- a/capa/render/proto/__init__.py +++ b/capa/render/proto/__init__.py @@ -126,7 +126,7 @@ def metadata_to_pb2(meta: rd.Metadata) -> capa_pb2.Metadata: timestamp=str(meta.timestamp), version=meta.version, argv=meta.argv, - sample=google.protobuf.json_format.ParseDict(meta.sample.dict(), capa_pb2.Sample()), + sample=google.protobuf.json_format.ParseDict(meta.sample.model_dump(), capa_pb2.Sample()), analysis=capa_pb2.Analysis( format=meta.analysis.format, arch=meta.analysis.arch, @@ -393,7 +393,7 @@ def match_to_pb2(match: rd.Match) -> capa_pb2.Match: def rule_metadata_to_pb2(rule_metadata: rd.RuleMetadata) -> capa_pb2.RuleMetadata: # after manual type conversions to the RuleMetadata, we can rely on the protobuf json parser # conversions include tuple -> list and rd.Enum -> proto.enum - meta = dict_tuple_to_list_values(rule_metadata.dict()) + meta = dict_tuple_to_list_values(rule_metadata.model_dump()) meta["scope"] = scope_to_pb2(meta["scope"]) meta["attack"] = list(map(dict_tuple_to_list_values, meta.get("attack", []))) meta["mbc"] = list(map(dict_tuple_to_list_values, meta.get("mbc", []))) diff --git a/capa/render/result_document.py b/capa/render/result_document.py index 47591f24..03ea245f 100644 --- a/capa/render/result_document.py +++ b/capa/render/result_document.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import datetime import collections -from typing import Dict, List, Tuple, Union, Optional +from typing import Dict, List, Tuple, Union, Literal, Optional from pydantic import Field, BaseModel, ConfigDict @@ -80,7 +80,7 @@ class Analysis(Model): class Metadata(Model): timestamp: datetime.datetime version: str - argv: Optional[Tuple[str, ...]] + argv: Optional[Tuple[str, ...]] = None sample: Sample analysis: Analysis @@ -102,13 +102,13 @@ class CompoundStatement(StatementModel): class SomeStatement(StatementModel): - type: str = "some" + type: Literal["some"] = "some" description: Optional[str] = None count: int class RangeStatement(StatementModel): - type: str = "range" + type: Literal["range"] = "range" description: Optional[str] = None min: int max: int @@ -116,7 +116,7 @@ class RangeStatement(StatementModel): class SubscopeStatement(StatementModel): - type: str = "subscope" + type: Literal["subscope"] = "subscope" description: Optional[str] = None scope: capa.rules.Scope @@ -131,7 +131,7 @@ Statement = Union[ class StatementNode(FrozenModel): - type: str = "statement" + type: Literal["statement"] = "statement" statement: Statement @@ -168,7 +168,7 @@ def statement_from_capa(node: capa.engine.Statement) -> Statement: class FeatureNode(FrozenModel): - type: str = "feature" + type: Literal["feature"] = "feature" feature: frz.Feature @@ -502,7 +502,7 @@ class MaecMetadata(FrozenModel): class RuleMetadata(FrozenModel): name: str - namespace: Optional[str] + namespace: Optional[str] = None authors: Tuple[str, ...] scope: capa.rules.Scope attack: Tuple[AttackSpec, ...] = Field(alias="att&ck") diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index 59189833..03ff8c84 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -88,7 +88,7 @@ def render_statement(ostream, match: rd.Match, statement: rd.Statement, indent=0 # so, we have to inline some of the feature rendering here. child = statement.child - value = child.dict(by_alias=True).get(child.type) + value = child.model_dump(by_alias=True).get(child.type) if value: if isinstance(child, frzf.StringFeature): @@ -141,7 +141,7 @@ def render_feature(ostream, match: rd.Match, feature: frzf.Feature, indent=0): value = feature.class_ else: # convert attributes to dictionary using aliased names, if applicable - value = feature.dict(by_alias=True).get(key) + value = feature.model_dump(by_alias=True).get(key) if value is None: raise ValueError(f"{key} contains None") From fd61456164fc8502b7e42a413c9ab3c0dab0817a Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Fri, 4 Aug 2023 09:07:13 +0530 Subject: [PATCH 04/27] Update capa/features/freeze/__init__.py Co-authored-by: Willi Ballenthin --- capa/features/freeze/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/freeze/__init__.py b/capa/features/freeze/__init__.py index ebbf7d1d..d29048a9 100644 --- a/capa/features/freeze/__init__.py +++ b/capa/features/freeze/__init__.py @@ -45,7 +45,7 @@ class AddressType(str, Enum): class Address(HashableModel): type: AddressType - value: Union[int, Tuple[int, int], None] = None + value: Union[int, Tuple[int, int], None] = None # None default value to support deserialization of NO_ADDRESS @classmethod def from_capa(cls, a: capa.features.address.Address) -> "Address": From a4b00b9064b12a95061974f30129f4347a668b1e Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Fri, 4 Aug 2023 10:26:56 +0530 Subject: [PATCH 05/27] remove exclude_none = True to not drop none fields --- capa/render/result_document.py | 2 +- scripts/bulk-process.py | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/capa/render/result_document.py b/capa/render/result_document.py index 03ea245f..ef899fd2 100644 --- a/capa/render/result_document.py +++ b/capa/render/result_document.py @@ -80,7 +80,7 @@ class Analysis(Model): class Metadata(Model): timestamp: datetime.datetime version: str - argv: Optional[Tuple[str, ...]] = None + argv: Optional[Tuple[str, ...]] sample: Sample analysis: Analysis diff --git a/scripts/bulk-process.py b/scripts/bulk-process.py index 5ca3f6d1..264e7719 100644 --- a/scripts/bulk-process.py +++ b/scripts/bulk-process.py @@ -144,8 +144,7 @@ def get_capa_results(args): meta.analysis.layout = capa.main.compute_layout(rules, extractor, capabilities) doc = rd.ResultDocument.from_capa(meta, rules, capabilities) - - return {"path": path, "status": "ok", "ok": doc.dict(exclude_none=True)} + return {"path": path, "status": "ok", "ok": doc.model_dump()} def main(argv=None): @@ -214,9 +213,7 @@ def main(argv=None): if result["status"] == "error": logger.warning(result["error"]) elif result["status"] == "ok": - results[result["path"].as_posix()] = rd.ResultDocument.parse_obj(result["ok"]).model_dump_json( - exclude_none=True - ) + results[result["path"].as_posix()] = rd.ResultDocument.model_validate(result["ok"]).model_dump_json() else: raise ValueError(f"unexpected status: {result['status']}") From 851da255605f408f8c4c0b2fc18955f64faab350 Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Fri, 4 Aug 2023 10:43:34 +0530 Subject: [PATCH 06/27] Update bulk-process.py --- scripts/bulk-process.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/bulk-process.py b/scripts/bulk-process.py index 264e7719..64c05417 100644 --- a/scripts/bulk-process.py +++ b/scripts/bulk-process.py @@ -213,7 +213,9 @@ def main(argv=None): if result["status"] == "error": logger.warning(result["error"]) elif result["status"] == "ok": - results[result["path"].as_posix()] = rd.ResultDocument.model_validate(result["ok"]).model_dump_json() + results[result["path"].as_posix()] = rd.ResultDocument.model_validate(result["ok"]).model_dump_json( + exclude_none=True + ) else: raise ValueError(f"unexpected status: {result['status']}") From 17aad56800f0da671091a471663428c4622aaaa3 Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Sun, 6 Aug 2023 22:53:50 +0530 Subject: [PATCH 07/27] Script to get unused features Used show_features.py script --- capa/engine.py | 16 ++ scripts/detect_duplicate_features.py | 42 ++--- scripts/show-unused-features.py | 223 +++++++++++++++++++++++++++ 3 files changed, 252 insertions(+), 29 deletions(-) create mode 100644 scripts/show-unused-features.py diff --git a/capa/engine.py b/capa/engine.py index 8ae36d3e..ba199364 100644 --- a/capa/engine.py +++ b/capa/engine.py @@ -88,6 +88,22 @@ class Statement: if child is existing: children[i] = new + def get_all_features(self) -> Set[Feature]: + """ + recursively extracts all feature statements from a given rule statement. + + returns: + set: A set of all feature statements contained within the given feature statement. + """ + feature_set: set = set() + + for child in self.get_children(): + if isinstance(child, Statement): + feature_set.update(child.get_all_features()) + else: + feature_set.add(child) + return feature_set + class And(Statement): """ diff --git a/scripts/detect_duplicate_features.py b/scripts/detect_duplicate_features.py index dd9b9838..e48e2ade 100644 --- a/scripts/detect_duplicate_features.py +++ b/scripts/detect_duplicate_features.py @@ -8,38 +8,18 @@ import sys import logging import argparse +from typing import Set from pathlib import Path import capa.main import capa.rules import capa.engine as ceng +from capa.features.common import Feature logger = logging.getLogger("detect_duplicate_features") -def get_child_features(feature: ceng.Statement) -> list: - """ - Recursively extracts all feature statements from a given rule statement. - - Args: - feature (capa.engine.Statement): The feature statement to extract features from. - - Returns: - list: A list of all feature statements contained within the given feature statement. - """ - children = [] - - if isinstance(feature, (ceng.And, ceng.Or, ceng.Some)): - for child in feature.children: - children.extend(get_child_features(child)) - elif isinstance(feature, (ceng.Subscope, ceng.Range, ceng.Not)): - children.extend(get_child_features(feature.child)) - else: - children.append(feature) - return children - - -def get_features(rule_path: str) -> list: +def get_features(rule_path: str) -> Set[Feature]: """ Extracts all features from a given rule file. @@ -47,17 +27,18 @@ def get_features(rule_path: str) -> list: rule_path (str): The path to the rule file to extract features from. Returns: - list: A list of all feature statements contained within the rule file. + set: A set of all feature statements contained within the rule file. """ - feature_list = [] with Path(rule_path).open("r", encoding="utf-8") as f: try: new_rule = capa.rules.Rule.from_yaml(f.read()) - feature_list = get_child_features(new_rule.statement) + if isinstance(new_rule.statement, ceng.Statement): + return new_rule.statement.get_all_features() + else: + return (new_rule.statement,) except Exception as e: logger.error("Error: New rule %s %s %s", rule_path, str(type(e)), str(e)) sys.exit(-1) - return feature_list def find_overlapping_rules(new_rule_path, rules_path): @@ -67,7 +48,6 @@ def find_overlapping_rules(new_rule_path, rules_path): # Loads features of new rule in a list. new_rule_features = get_features(new_rule_path) - count = 0 overlapping_rules = [] @@ -75,7 +55,11 @@ def find_overlapping_rules(new_rule_path, rules_path): ruleset = capa.main.get_rules(rules_path) for rule_name, rule in ruleset.rules.items(): - rule_features = get_child_features(rule.statement) + rule_features = set() + if isinstance(rule.statement, ceng.Statement): + rule_features = rule.statement.get_all_features() + else: + rule_features.add(rule.statement) if not len(rule_features): continue diff --git a/scripts/show-unused-features.py b/scripts/show-unused-features.py new file mode 100644 index 00000000..56f00c6a --- /dev/null +++ b/scripts/show-unused-features.py @@ -0,0 +1,223 @@ +#!/usr/bin/env python2 +""" +Copyright (C) 2023 Mandiant, Inc. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. +You may obtain a copy of the License at: [package root]/LICENSE.txt +Unless required by applicable law or agreed to in writing, software distributed under the License + is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. +""" +import os +import sys +import logging +import argparse +from typing import Set, Tuple +from pathlib import Path + +import tabulate +from termcolor import colored + +import capa.main +import capa.rules +import capa.engine as ceng +import capa.helpers +import capa.features +import capa.exceptions +import capa.render.verbose as v +import capa.features.freeze +import capa.features.address +import capa.features.extractors.pefile +import capa.features.extractors.base_extractor +from capa.helpers import log_unsupported_runtime_error +from capa.features.common import Feature +from capa.features.extractors.base_extractor import FunctionHandle + +logger = logging.getLogger("capa.show-features") + + +def format_address(addr: capa.features.address.Address) -> str: + return v.format_address(capa.features.freeze.Address.from_capa((addr))) + + +def get_rules_feature_set(rules_path) -> Set[Feature]: + ruleset = capa.main.get_rules(rules_path) + rules_feature_set = set() + for _, rule in ruleset.rules.items(): + rule_features: set = set() + if isinstance(rule.statement, ceng.Statement): + rule_features.update(rule.statement.get_all_features()) + else: + rule_features.add(rule.statement) + + rules_feature_set.update(rule_features) + return rules_feature_set + + +def get_file_features(functions, extractor: capa.features.extractors.base_extractor.FeatureExtractor): + feature_map: dict = {} + + for f in functions: + if extractor.is_library_function(f.address): + function_name = extractor.get_function_name(f.address) + logger.debug("skipping library function %s (%s)", format_address(f.address), function_name) + continue + + for feature, _ in extractor.extract_function_features(f): + if capa.features.common.is_global_feature(feature): + continue + feature_map[feature] = feature_map.get(feature, 0) + 1 + + for bb in extractor.get_basic_blocks(f): + for feature, _ in extractor.extract_basic_block_features(f, bb): + if capa.features.common.is_global_feature(feature): + continue + feature_map[feature] = feature_map.get(feature, 0) + 1 + + for insn in extractor.get_instructions(f, bb): + for feature, _ in extractor.extract_insn_features(f, bb, insn): + if capa.features.common.is_global_feature(feature): + continue + feature_map[feature] = feature_map.get(feature, 0) + 1 + return feature_map + + +def main(argv=None): + if argv is None: + argv = sys.argv[1:] + + parser = argparse.ArgumentParser(description="Show the features that capa extracts from the given sample") + capa.main.install_common_args(parser, wanted={"format", "os", "sample", "signatures", "backend", "rules"}) + + parser.add_argument("-F", "--function", type=str, help="Show features for specific function") + args = parser.parse_args(args=argv) + capa.main.handle_common_args(args) + + if args.function and args.backend == "pefile": + print("pefile backend does not support extracting function features") + return -1 + + try: + taste = capa.helpers.get_file_taste(Path(args.sample)) + except IOError as e: + logger.error("%s", str(e)) + return -1 + + try: + sig_paths = capa.main.get_signatures(args.signatures) + except IOError as e: + logger.error("%s", str(e)) + return -1 + + if (args.format == "freeze") or ( + args.format == capa.features.common.FORMAT_AUTO and capa.features.freeze.is_freeze(taste) + ): + extractor = capa.features.freeze.load(Path(args.sample).read_bytes()) + else: + should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None) + try: + extractor = capa.main.get_extractor( + args.sample, args.format, args.os, args.backend, sig_paths, should_save_workspace + ) + except capa.exceptions.UnsupportedFormatError: + capa.helpers.log_unsupported_format_error() + return -1 + except capa.exceptions.UnsupportedRuntimeError: + log_unsupported_runtime_error() + return -1 + + feature_map: dict = {} + + for feature, _ in extractor.extract_global_features(): + feature_map[feature] = feature_map.get(feature, 0) + 1 + + function_handles: Tuple[FunctionHandle, ...] + if isinstance(extractor, capa.features.extractors.pefile.PefileFeatureExtractor): + # pefile extractor doesn't extract function features + function_handles = () + else: + function_handles = tuple(extractor.get_functions()) + + if args.function: + if args.format == "freeze": + function_handles = tuple(filter(lambda fh: fh.address == args.function, function_handles)) + else: + function_handles = tuple(filter(lambda fh: format_address(fh.address) == args.function, function_handles)) + + if args.function not in [format_address(fh.address) for fh in function_handles]: + print(f"{args.function} not a function") + return -1 + + if len(function_handles) == 0: + print(f"{args.function} not a function") + return -1 + + feature_map.update(get_file_features(function_handles, extractor)) + + rules_feature_set = get_rules_feature_set(args.rules) + + highlight_unused_features(feature_map, rules_feature_set) + return 0 + + +def ida_main(): + import idc + + import capa.main + import capa.features.extractors.ida.extractor + + function = idc.get_func_attr(idc.here(), idc.FUNCATTR_START) + print(f"getting features for current function {hex(function)}") + + extractor = capa.features.extractors.ida.extractor.IdaFeatureExtractor() + feature_map: dict = {} + + if not function: + for feature, _ in extractor.extract_file_features(): + feature_map[feature] = feature_map.get(feature, 0) + 1 + return + + function_handles = tuple(extractor.get_functions()) + + if function: + function_handles = tuple(filter(lambda fh: fh.inner.start_ea == function, function_handles)) + + if len(function_handles) == 0: + print(f"{hex(function)} not a function") + return -1 + + feature_map.update(get_file_features(function_handles, extractor)) + + rules_path = capa.main.get_default_root() / "rules" + rules_feature_set = get_rules_feature_set([rules_path]) + + highlight_unused_features(feature_map, rules_feature_set) + + return 0 + + +def highlight_unused_features(feature_map: dict, rules_feature_set: set): + unused_features = [] + for feature, count in feature_map.items(): + if feature in rules_feature_set: + continue + unused_features.append((get_colored(str(count)), get_colored(feature.__str__()))) + print("\n") + print(tabulate.tabulate(unused_features, headers=["Count", "Feature"], tablefmt="plain")) + print("\n") + + +def get_colored(s: str): + if "(" in s: + s_split = s.split("(") + s_split[0] = colored(s_split[0], "cyan") + return "(".join(s_split) + else: + return colored(s, "cyan") + + +if __name__ == "__main__": + if capa.helpers.is_runtime_ida(): + ida_main() + else: + sys.exit(main()) From a2254852b02e68f853d908269895fc06bbc9dc76 Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Sun, 6 Aug 2023 22:55:54 +0530 Subject: [PATCH 08/27] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f285d59e..0e7a99b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### New Features - ELF: implement file import and export name extractor #1607 @Aayush-Goel-04 +- develop script to highlight the features that are not used during matching #331 @Aayush-Goel-04 ### Breaking Changes From 9660f1e5abba7d9ba40c3213febf9294e3450119 Mon Sep 17 00:00:00 2001 From: mr-tz Date: Mon, 7 Aug 2023 14:00:30 +0200 Subject: [PATCH 09/27] add default backend --- scripts/lint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/lint.py b/scripts/lint.py index 85de2bfc..2d03418c 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -309,7 +309,7 @@ def get_sample_capabilities(ctx: Context, path: Path) -> Set[str]: logger.debug("analyzing sample: %s", nice_path) extractor = capa.main.get_extractor( - nice_path, format_, OS_AUTO, "", DEFAULT_SIGNATURES, False, disable_progress=True + nice_path, format_, OS_AUTO, capa.main.BACKEND_VIV, DEFAULT_SIGNATURES, False, disable_progress=True ) capabilities, _ = capa.main.find_capabilities(ctx.rules, extractor, disable_progress=True) From f50a5e8efcd8bc3a52155a0d8eba444453176c52 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Mon, 7 Aug 2023 13:28:05 +0000 Subject: [PATCH 10/27] Sync capa rules submodule --- rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules b/rules index 7685a232..e7d0b790 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 7685a232d94acbe7e69addb8bd89d752c9fa27a2 +Subproject commit e7d0b790b452da362986e43859775b1c2953a111 From 298a07dc07c15918d70c61dcf4de675373849381 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Mon, 7 Aug 2023 13:31:25 +0000 Subject: [PATCH 11/27] Sync capa rules submodule --- CHANGELOG.md | 3 ++- README.md | 2 +- rules | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 03aea0e7..5dd14d3b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,11 +8,12 @@ ### Breaking Changes -### New Rules (4) +### New Rules (5) - executable/pe/export/forwarded-export ronnie.salomonsen@mandiant.com - host-interaction/bootloader/get-uefi-variable jakub.jozwiak@mandiant.com - host-interaction/bootloader/set-uefi-variable jakub.jozwiak@mandiant.com +- nursery/enumerate-device-drivers-on-linux @mr-tz - ### Bug Fixes diff --git a/README.md b/README.md index 1bf6b159..fef7e4d6 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa) [![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases) -[![Number of rules](https://img.shields.io/badge/rules-826-blue.svg)](https://github.com/mandiant/capa-rules) +[![Number of rules](https://img.shields.io/badge/rules-828-blue.svg)](https://github.com/mandiant/capa-rules) [![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster) [![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases) [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt) diff --git a/rules b/rules index e7d0b790..3e7e50c0 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit e7d0b790b452da362986e43859775b1c2953a111 +Subproject commit 3e7e50c09ad10845d03846ec247bbea0eea82337 From eb7aa63be63cb3e47c4b7896158977a61d41cb4d Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Mon, 7 Aug 2023 13:54:49 +0000 Subject: [PATCH 12/27] Sync capa rules submodule --- rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules b/rules index 3e7e50c0..149cf2d1 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 3e7e50c09ad10845d03846ec247bbea0eea82337 +Subproject commit 149cf2d133a0ea08b4eb250388e9f93c67b83cbf From 57e393bf7a85fc1dde3200279a3dcdc72f53c0dc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 7 Aug 2023 14:42:21 +0000 Subject: [PATCH 13/27] build(deps-dev): bump ruff from 0.0.280 to 0.0.282 Bumps [ruff](https://github.com/astral-sh/ruff) from 0.0.280 to 0.0.282. - [Release notes](https://github.com/astral-sh/ruff/releases) - [Changelog](https://github.com/astral-sh/ruff/blob/main/BREAKING_CHANGES.md) - [Commits](https://github.com/astral-sh/ruff/compare/v0.0.280...v0.0.282) --- updated-dependencies: - dependency-name: ruff dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5e8c5d68..45dcbe97 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,7 +77,7 @@ dev = [ "flake8-simplify==0.20.0", "flake8-use-pathlib==0.3.0", "flake8-copyright==0.2.4", - "ruff==0.0.280", + "ruff==0.0.282", "black==23.7.0", "isort==5.11.4", "mypy==1.4.1", From 358888178a9c47c5265ffddd2f8028f32b013dff Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 7 Aug 2023 14:42:32 +0000 Subject: [PATCH 14/27] build(deps-dev): bump flake8 from 6.0.0 to 6.1.0 Bumps [flake8](https://github.com/pycqa/flake8) from 6.0.0 to 6.1.0. - [Commits](https://github.com/pycqa/flake8/compare/6.0.0...6.1.0) --- updated-dependencies: - dependency-name: flake8 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5e8c5d68..952ebebf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,7 +66,7 @@ dev = [ "pytest-sugar==0.9.7", "pytest-instafail==0.5.0", "pytest-cov==4.1.0", - "flake8==6.0.0", + "flake8==6.1.0", "flake8-bugbear==23.7.10", "flake8-encodings==0.5.0.post1", "flake8-comprehensions==3.14.0", From 75a4f309b440cd8b444caa23aa87369f74e8a685 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 7 Aug 2023 14:42:39 +0000 Subject: [PATCH 15/27] build(deps-dev): bump types-protobuf from 4.23.0.2 to 4.23.0.3 Bumps [types-protobuf](https://github.com/python/typeshed) from 4.23.0.2 to 4.23.0.3. - [Commits](https://github.com/python/typeshed/commits) --- updated-dependencies: - dependency-name: types-protobuf dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5e8c5d68..b5a44a82 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -93,7 +93,7 @@ dev = [ "types-termcolor==1.1.4", "types-psutil==5.8.23", "types_requests==2.31.0.2", - "types-protobuf==4.23.0.2", + "types-protobuf==4.23.0.3", ] build = [ "pyinstaller==5.10.1", From 80e224ec7c544f24073d6a385d3ad42d01e66b15 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 7 Aug 2023 14:42:44 +0000 Subject: [PATCH 16/27] build(deps-dev): bump mypy-protobuf from 3.4.0 to 3.5.0 Bumps [mypy-protobuf](https://github.com/nipunn1313/mypy-protobuf) from 3.4.0 to 3.5.0. - [Changelog](https://github.com/nipunn1313/mypy-protobuf/blob/main/CHANGELOG.md) - [Commits](https://github.com/nipunn1313/mypy-protobuf/compare/v3.4.0...v3.5.0) --- updated-dependencies: - dependency-name: mypy-protobuf dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5e8c5d68..29933256 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,7 +84,7 @@ dev = [ "psutil==5.9.2", "stix2==3.0.1", "requests==2.31.0", - "mypy-protobuf==3.4.0", + "mypy-protobuf==3.5.0", # type stubs for mypy "types-backports==0.1.3", "types-colorama==0.4.15.11", From 0fdc1dd3f595ef1e1218226bd81a1d3937a57e70 Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Mon, 7 Aug 2023 21:00:29 +0530 Subject: [PATCH 17/27] Type Hints done , get_all_feature to Rule class --- capa/engine.py | 16 ------- capa/rules/__init__.py | 27 ++++++++++++ scripts/detect_duplicate_features.py | 12 +----- scripts/show-unused-features.py | 62 ++++++++++++++-------------- 4 files changed, 59 insertions(+), 58 deletions(-) diff --git a/capa/engine.py b/capa/engine.py index ba199364..8ae36d3e 100644 --- a/capa/engine.py +++ b/capa/engine.py @@ -88,22 +88,6 @@ class Statement: if child is existing: children[i] = new - def get_all_features(self) -> Set[Feature]: - """ - recursively extracts all feature statements from a given rule statement. - - returns: - set: A set of all feature statements contained within the given feature statement. - """ - feature_set: set = set() - - for child in self.get_children(): - if isinstance(child, Statement): - feature_set.update(child.get_all_features()) - else: - feature_set.add(child) - return feature_set - class And(Statement): """ diff --git a/capa/rules/__init__.py b/capa/rules/__init__.py index 45d822a5..2fe7ea50 100644 --- a/capa/rules/__init__.py +++ b/capa/rules/__init__.py @@ -738,6 +738,33 @@ class Rule: yield from self._extract_subscope_rules_rec(self.statement) + def extract_all_features_rec(self, statement) -> Set[Feature]: + """ + recursively extracts all feature statements from a given rule statement. + + returns: + set: A set of all feature statements contained within the given feature statement. + """ + feature_set: Set[Feature] = set() + + for child in statement.get_children(): + if isinstance(child, Statement): + feature_set.update(self.extract_all_features_rec(child)) + else: + feature_set.add(child) + return feature_set + + def extract_all_features(self) -> Set[Feature]: + if not isinstance(self.statement, ceng.Statement): + # For rules with single feature like + # anti-analysis\obfuscation\obfuscated-with-advobfuscator.yml + # contains a single feature - substring , which is of type String + return { + self.statement, + } + + return self.extract_all_features_rec(self.statement) + def evaluate(self, features: FeatureSet, short_circuit=True): capa.perf.counters["evaluate.feature"] += 1 capa.perf.counters["evaluate.feature.rule"] += 1 diff --git a/scripts/detect_duplicate_features.py b/scripts/detect_duplicate_features.py index e48e2ade..6737d7fa 100644 --- a/scripts/detect_duplicate_features.py +++ b/scripts/detect_duplicate_features.py @@ -13,7 +13,6 @@ from pathlib import Path import capa.main import capa.rules -import capa.engine as ceng from capa.features.common import Feature logger = logging.getLogger("detect_duplicate_features") @@ -32,10 +31,7 @@ def get_features(rule_path: str) -> Set[Feature]: with Path(rule_path).open("r", encoding="utf-8") as f: try: new_rule = capa.rules.Rule.from_yaml(f.read()) - if isinstance(new_rule.statement, ceng.Statement): - return new_rule.statement.get_all_features() - else: - return (new_rule.statement,) + return new_rule.extract_all_features() except Exception as e: logger.error("Error: New rule %s %s %s", rule_path, str(type(e)), str(e)) sys.exit(-1) @@ -55,11 +51,7 @@ def find_overlapping_rules(new_rule_path, rules_path): ruleset = capa.main.get_rules(rules_path) for rule_name, rule in ruleset.rules.items(): - rule_features = set() - if isinstance(rule.statement, ceng.Statement): - rule_features = rule.statement.get_all_features() - else: - rule_features.add(rule.statement) + rule_features = rule.extract_all_features() if not len(rule_features): continue diff --git a/scripts/show-unused-features.py b/scripts/show-unused-features.py index 56f00c6a..c3020d57 100644 --- a/scripts/show-unused-features.py +++ b/scripts/show-unused-features.py @@ -12,7 +12,7 @@ import os import sys import logging import argparse -from typing import Set, Tuple +from typing import Set, Dict, Tuple from pathlib import Path import tabulate @@ -20,11 +20,11 @@ from termcolor import colored import capa.main import capa.rules -import capa.engine as ceng import capa.helpers import capa.features import capa.exceptions import capa.render.verbose as v +import capa.features.common import capa.features.freeze import capa.features.address import capa.features.extractors.pefile @@ -42,20 +42,17 @@ def format_address(addr: capa.features.address.Address) -> str: def get_rules_feature_set(rules_path) -> Set[Feature]: ruleset = capa.main.get_rules(rules_path) - rules_feature_set = set() + rules_feature_set: Set[Feature] = set() for _, rule in ruleset.rules.items(): - rule_features: set = set() - if isinstance(rule.statement, ceng.Statement): - rule_features.update(rule.statement.get_all_features()) - else: - rule_features.add(rule.statement) + rules_feature_set.update(rule.extract_all_features()) - rules_feature_set.update(rule_features) return rules_feature_set -def get_file_features(functions, extractor: capa.features.extractors.base_extractor.FeatureExtractor): - feature_map: dict = {} +def get_file_features( + functions: Tuple[FunctionHandle, ...], extractor: capa.features.extractors.base_extractor.FeatureExtractor +) -> Dict[Feature, int]: + feature_map: Dict[Feature, int] = {} for f in functions: if extractor.is_library_function(f.address): @@ -82,11 +79,32 @@ def get_file_features(functions, extractor: capa.features.extractors.base_extrac return feature_map +def get_colored(s: str): + if "(" in s and ")" in s: + s_split = s.split("(", 1) + s_color = colored(s_split[1][:-1], "cyan") + return f"{s_split[0]}({s_color})" + else: + return colored(s, "cyan") + + +def highlight_unused_features(feature_map: Dict[Feature, int], rules_feature_set: Set[Feature]): + unused_features = [] + for feature, count in feature_map.items(): + if feature in rules_feature_set: + continue + unused_features.append((get_colored(str(count)), get_colored(str(feature)))) + unused_features = sorted(unused_features, key=lambda x: x[0]) + print("\n") + print(tabulate.tabulate(unused_features, headers=["Count", "Feature"], tablefmt="plain")) + print("\n") + + def main(argv=None): if argv is None: argv = sys.argv[1:] - parser = argparse.ArgumentParser(description="Show the features that capa extracts from the given sample") + parser = argparse.ArgumentParser(description="Show the features that capa doesn't have rules for yet") capa.main.install_common_args(parser, wanted={"format", "os", "sample", "signatures", "backend", "rules"}) parser.add_argument("-F", "--function", type=str, help="Show features for specific function") @@ -196,26 +214,6 @@ def ida_main(): return 0 -def highlight_unused_features(feature_map: dict, rules_feature_set: set): - unused_features = [] - for feature, count in feature_map.items(): - if feature in rules_feature_set: - continue - unused_features.append((get_colored(str(count)), get_colored(feature.__str__()))) - print("\n") - print(tabulate.tabulate(unused_features, headers=["Count", "Feature"], tablefmt="plain")) - print("\n") - - -def get_colored(s: str): - if "(" in s: - s_split = s.split("(") - s_split[0] = colored(s_split[0], "cyan") - return "(".join(s_split) - else: - return colored(s, "cyan") - - if __name__ == "__main__": if capa.helpers.is_runtime_ida(): ida_main() From 232c9ce35c29911e30883bf939ce7869798254ee Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Mon, 7 Aug 2023 22:43:25 +0530 Subject: [PATCH 18/27] Add test for script & output rendered --- scripts/show-unused-features.py | 6 ++++-- tests/test_scripts.py | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/show-unused-features.py b/scripts/show-unused-features.py index c3020d57..1a84f5bb 100644 --- a/scripts/show-unused-features.py +++ b/scripts/show-unused-features.py @@ -93,8 +93,10 @@ def highlight_unused_features(feature_map: Dict[Feature, int], rules_feature_set for feature, count in feature_map.items(): if feature in rules_feature_set: continue - unused_features.append((get_colored(str(count)), get_colored(str(feature)))) - unused_features = sorted(unused_features, key=lambda x: x[0]) + unused_features.append((count, get_colored(str(feature)))) + unused_features = [ + (get_colored(str(count)), feature) for count, feature in sorted(unused_features, key=lambda x: x[0]) + ] print("\n") print(tabulate.tabulate(unused_features, headers=["Count", "Feature"], tablefmt="plain")) print("\n") diff --git a/tests/test_scripts.py b/tests/test_scripts.py index 4baa96a9..7c91bc57 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -45,6 +45,7 @@ def get_rule_path(): pytest.param("show-capabilities-by-function.py", [get_file_path()]), pytest.param("show-features.py", [get_file_path()]), pytest.param("show-features.py", ["-F", "0x407970", get_file_path()]), + pytest.param("show-unused-features.py", [get_file_path()]), pytest.param("capa_as_library.py", [get_file_path()]), ], ) From fb3ae0267e176d623278c3fdc5dd962fa194fd3b Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Tue, 8 Aug 2023 17:04:23 +0530 Subject: [PATCH 19/27] Update scripts/show-unused-features.py Co-authored-by: Moritz --- scripts/show-unused-features.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/show-unused-features.py b/scripts/show-unused-features.py index 1a84f5bb..f1c3b9ec 100644 --- a/scripts/show-unused-features.py +++ b/scripts/show-unused-features.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 """ Copyright (C) 2023 Mandiant, Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); From 9c1aa2fc5d7043f210bdbeba0f7ba2925a1a6d18 Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Tue, 8 Aug 2023 18:35:04 +0530 Subject: [PATCH 20/27] Update show-unused-features.py --- scripts/show-unused-features.py | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/scripts/show-unused-features.py b/scripts/show-unused-features.py index f1c3b9ec..8b42cf43 100644 --- a/scripts/show-unused-features.py +++ b/scripts/show-unused-features.py @@ -12,8 +12,9 @@ import os import sys import logging import argparse -from typing import Set, Dict, Tuple +from typing import Set, Tuple from pathlib import Path +from collections import Counter import tabulate from termcolor import colored @@ -33,7 +34,7 @@ from capa.helpers import log_unsupported_runtime_error from capa.features.common import Feature from capa.features.extractors.base_extractor import FunctionHandle -logger = logging.getLogger("capa.show-features") +logger = logging.getLogger("show-unused-feature") def format_address(addr: capa.features.address.Address) -> str: @@ -51,8 +52,8 @@ def get_rules_feature_set(rules_path) -> Set[Feature]: def get_file_features( functions: Tuple[FunctionHandle, ...], extractor: capa.features.extractors.base_extractor.FeatureExtractor -) -> Dict[Feature, int]: - feature_map: Dict[Feature, int] = {} +) -> Counter[Feature]: + feature_map: Counter[Feature] = Counter() for f in functions: if extractor.is_library_function(f.address): @@ -63,19 +64,19 @@ def get_file_features( for feature, _ in extractor.extract_function_features(f): if capa.features.common.is_global_feature(feature): continue - feature_map[feature] = feature_map.get(feature, 0) + 1 + feature_map.update([feature]) for bb in extractor.get_basic_blocks(f): for feature, _ in extractor.extract_basic_block_features(f, bb): if capa.features.common.is_global_feature(feature): continue - feature_map[feature] = feature_map.get(feature, 0) + 1 + feature_map.update([feature]) for insn in extractor.get_instructions(f, bb): for feature, _ in extractor.extract_insn_features(f, bb, insn): if capa.features.common.is_global_feature(feature): continue - feature_map[feature] = feature_map.get(feature, 0) + 1 + feature_map.update([feature]) return feature_map @@ -88,7 +89,7 @@ def get_colored(s: str): return colored(s, "cyan") -def highlight_unused_features(feature_map: Dict[Feature, int], rules_feature_set: Set[Feature]): +def print_unused_features(feature_map: Counter[Feature], rules_feature_set: Set[Feature]): unused_features = [] for feature, count in feature_map.items(): if feature in rules_feature_set: @@ -146,10 +147,9 @@ def main(argv=None): log_unsupported_runtime_error() return -1 - feature_map: dict = {} + feature_map: Counter[Feature] = Counter() - for feature, _ in extractor.extract_global_features(): - feature_map[feature] = feature_map.get(feature, 0) + 1 + feature_map.update([feature for feature, _ in extractor.extract_global_features()]) function_handles: Tuple[FunctionHandle, ...] if isinstance(extractor, capa.features.extractors.pefile.PefileFeatureExtractor): @@ -176,7 +176,7 @@ def main(argv=None): rules_feature_set = get_rules_feature_set(args.rules) - highlight_unused_features(feature_map, rules_feature_set) + print_unused_features(feature_map, rules_feature_set) return 0 @@ -190,12 +190,9 @@ def ida_main(): print(f"getting features for current function {hex(function)}") extractor = capa.features.extractors.ida.extractor.IdaFeatureExtractor() - feature_map: dict = {} + feature_map: Counter[Feature] = Counter() - if not function: - for feature, _ in extractor.extract_file_features(): - feature_map[feature] = feature_map.get(feature, 0) + 1 - return + feature_map.update([feature for feature, _ in extractor.extract_file_features()]) function_handles = tuple(extractor.get_functions()) @@ -211,7 +208,7 @@ def ida_main(): rules_path = capa.main.get_default_root() / "rules" rules_feature_set = get_rules_feature_set([rules_path]) - highlight_unused_features(feature_map, rules_feature_set) + print_unused_features(feature_map, rules_feature_set) return 0 From c497ad82536ea879623483cbbef38fe018a4620e Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Tue, 8 Aug 2023 18:36:25 +0530 Subject: [PATCH 21/27] Update show-unused-features.py --- scripts/show-unused-features.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/show-unused-features.py b/scripts/show-unused-features.py index 8b42cf43..dde77b4c 100644 --- a/scripts/show-unused-features.py +++ b/scripts/show-unused-features.py @@ -34,7 +34,7 @@ from capa.helpers import log_unsupported_runtime_error from capa.features.common import Feature from capa.features.extractors.base_extractor import FunctionHandle -logger = logging.getLogger("show-unused-feature") +logger = logging.getLogger("show-unused-features") def format_address(addr: capa.features.address.Address) -> str: From 448aa9cd219ecb7d45804d0c79284bfd3efc6bff Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Tue, 8 Aug 2023 14:04:46 -0600 Subject: [PATCH 22/27] explorer: fix unhandled exception when resolving rule path (#1693) --- CHANGELOG.md | 1 + capa/ida/plugin/form.py | 11 ++++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5dd14d3b..095682a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ - linter: skip native API check for NtProtectVirtualMemory #1675 @williballenthin ### capa explorer IDA Pro plugin +- fix unhandled exception when resolving rule path #1693 @mike-hunhoff ### Development diff --git a/capa/ida/plugin/form.py b/capa/ida/plugin/form.py index aa076ede..a079f1d9 100644 --- a/capa/ida/plugin/form.py +++ b/capa/ida/plugin/form.py @@ -573,10 +573,11 @@ class CapaExplorerForm(idaapi.PluginForm): def ensure_capa_settings_rule_path(self): try: - path: Path = Path(settings.user.get(CAPA_SETTINGS_RULE_PATH, "")) + path: str = settings.user.get(CAPA_SETTINGS_RULE_PATH, "") # resolve rules directory - check self and settings first, then ask user - if not path.exists(): + # pathlib.Path considers "" equivalent to "." so we first check if rule path is an empty string + if not path or not Path(path).exists(): # configure rules selection messagebox rules_message = QtWidgets.QMessageBox() rules_message.setIcon(QtWidgets.QMessageBox.Information) @@ -594,15 +595,15 @@ class CapaExplorerForm(idaapi.PluginForm): if pressed == QtWidgets.QMessageBox.Cancel: raise UserCancelledError() - path = Path(self.ask_user_directory()) + path = self.ask_user_directory() if not path: raise UserCancelledError() - if not path.exists(): + if not Path(path).exists(): logger.error("rule path %s does not exist or cannot be accessed", path) return False - settings.user[CAPA_SETTINGS_RULE_PATH] = str(path) + settings.user[CAPA_SETTINGS_RULE_PATH] = path except UserCancelledError: capa.ida.helpers.inform_user_ida_ui("Analysis requires capa rules") logger.warning( From d81f3a461e6e5ba4e13d1a49f1250cd3979bad06 Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Wed, 9 Aug 2023 14:20:53 +0530 Subject: [PATCH 23/27] Update show-unused-features.py --- scripts/show-unused-features.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/scripts/show-unused-features.py b/scripts/show-unused-features.py index dde77b4c..dc5ce72a 100644 --- a/scripts/show-unused-features.py +++ b/scripts/show-unused-features.py @@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations """ import os import sys +import typing import logging import argparse from typing import Set, Tuple @@ -52,8 +53,8 @@ def get_rules_feature_set(rules_path) -> Set[Feature]: def get_file_features( functions: Tuple[FunctionHandle, ...], extractor: capa.features.extractors.base_extractor.FeatureExtractor -) -> Counter[Feature]: - feature_map: Counter[Feature] = Counter() +) -> typing.Counter[Feature]: + feature_map: typing.Counter[Feature] = Counter() for f in functions: if extractor.is_library_function(f.address): @@ -89,7 +90,7 @@ def get_colored(s: str): return colored(s, "cyan") -def print_unused_features(feature_map: Counter[Feature], rules_feature_set: Set[Feature]): +def print_unused_features(feature_map: typing.Counter[Feature], rules_feature_set: Set[Feature]): unused_features = [] for feature, count in feature_map.items(): if feature in rules_feature_set: @@ -147,7 +148,7 @@ def main(argv=None): log_unsupported_runtime_error() return -1 - feature_map: Counter[Feature] = Counter() + feature_map: typing.Counter[Feature] = Counter() feature_map.update([feature for feature, _ in extractor.extract_global_features()]) @@ -190,7 +191,7 @@ def ida_main(): print(f"getting features for current function {hex(function)}") extractor = capa.features.extractors.ida.extractor.IdaFeatureExtractor() - feature_map: Counter[Feature] = Counter() + feature_map: typing.Counter[Feature] = Counter() feature_map.update([feature for feature, _ in extractor.extract_file_features()]) From df9828dd7fe74275d0512d42f487dc39df375ec9 Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Wed, 9 Aug 2023 15:32:12 +0530 Subject: [PATCH 24/27] Update capa/rules/__init__.py Co-authored-by: Willi Ballenthin --- capa/rules/__init__.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/capa/rules/__init__.py b/capa/rules/__init__.py index 2fe7ea50..688b1733 100644 --- a/capa/rules/__init__.py +++ b/capa/rules/__init__.py @@ -738,23 +738,23 @@ class Rule: yield from self._extract_subscope_rules_rec(self.statement) - def extract_all_features_rec(self, statement) -> Set[Feature]: - """ - recursively extracts all feature statements from a given rule statement. - - returns: - set: A set of all feature statements contained within the given feature statement. - """ + def _extract_all_features_rec(self, statement) -> Set[Feature]: feature_set: Set[Feature] = set() for child in statement.get_children(): if isinstance(child, Statement): - feature_set.update(self.extract_all_features_rec(child)) + feature_set.update(self._extract_all_features_rec(child)) else: feature_set.add(child) return feature_set def extract_all_features(self) -> Set[Feature]: + """ + recursively extracts all feature statements in this rule. + + returns: + set: A set of all feature statements contained within this rule. + """ if not isinstance(self.statement, ceng.Statement): # For rules with single feature like # anti-analysis\obfuscation\obfuscated-with-advobfuscator.yml @@ -763,7 +763,7 @@ class Rule: self.statement, } - return self.extract_all_features_rec(self.statement) + return self._extract_all_features_rec(self.statement) def evaluate(self, features: FeatureSet, short_circuit=True): capa.perf.counters["evaluate.feature"] += 1 From 57486733e795f08fe4444a64664b0b45e519193b Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Wed, 9 Aug 2023 15:33:03 +0530 Subject: [PATCH 25/27] Update scripts/show-unused-features.py Co-authored-by: Willi Ballenthin --- scripts/show-unused-features.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/show-unused-features.py b/scripts/show-unused-features.py index dc5ce72a..d8b32d39 100644 --- a/scripts/show-unused-features.py +++ b/scripts/show-unused-features.py @@ -97,7 +97,7 @@ def print_unused_features(feature_map: typing.Counter[Feature], rules_feature_se continue unused_features.append((count, get_colored(str(feature)))) unused_features = [ - (get_colored(str(count)), feature) for count, feature in sorted(unused_features, key=lambda x: x[0]) + (str(count), feature) for count, feature in sorted(unused_features, key=lambda x: x[0]) ] print("\n") print(tabulate.tabulate(unused_features, headers=["Count", "Feature"], tablefmt="plain")) From a383022cffca6669710789dd5e72ed49d650a9de Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Wed, 9 Aug 2023 15:37:38 +0530 Subject: [PATCH 26/27] Update show-unused-features.py --- scripts/show-unused-features.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/scripts/show-unused-features.py b/scripts/show-unused-features.py index d8b32d39..dbd6c8c8 100644 --- a/scripts/show-unused-features.py +++ b/scripts/show-unused-features.py @@ -92,13 +92,10 @@ def get_colored(s: str): def print_unused_features(feature_map: typing.Counter[Feature], rules_feature_set: Set[Feature]): unused_features = [] - for feature, count in feature_map.items(): + for feature, count in reversed(feature_map.most_common()): if feature in rules_feature_set: continue - unused_features.append((count, get_colored(str(feature)))) - unused_features = [ - (str(count), feature) for count, feature in sorted(unused_features, key=lambda x: x[0]) - ] + unused_features.append((str(count), get_colored(str(feature)))) print("\n") print(tabulate.tabulate(unused_features, headers=["Count", "Feature"], tablefmt="plain")) print("\n") From e5efc158b757f09b81239aba583380d75650f060 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Thu, 10 Aug 2023 07:26:08 +0000 Subject: [PATCH 27/27] Sync capa-testfiles submodule --- tests/data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data b/tests/data index 71b5378d..cf965117 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit 71b5378dbbe426dd318143f630c5f249c42830a1 +Subproject commit cf965117cbb3d7391bf01ab8dfb049262a3ad4fd