Merge branch 'master' into dynamic-feature-extraction

This commit is contained in:
Willi Ballenthin
2023-08-10 13:21:49 +00:00
21 changed files with 341 additions and 136 deletions

View File

@@ -4,23 +4,21 @@
### New Features ### New Features
- ELF: implement file import and export name extractor #1607 @Aayush-Goel-04 - ELF: implement file import and export name extractor #1607 @Aayush-Goel-04
- Add a dynamic feature extractor for the CAPE sandbox @yelhamer [#1535](https://github.com/mandiant/capa/issues/1535) - bump pydantic from 1.10.9 to 2.1.1 #1582 @Aayush-Goel-04
- Add unit tests for the new CAPE extractor #1563 @yelhamer - develop script to highlight the features that are not used during matching #331 @Aayush-Goel-04
- Add a CAPE file format and CAPE-based dynamic feature extraction to scripts/show-features.py #1566 @yelhamer - implement dynamic analysis via CAPE sandbox #48 #1535 @yelhamer
- Add a new process scope for the dynamic analysis flavor #1517 @yelhamer - add call scope #771 @yelhamer
- Add a new thread scope for the dynamic analysis flavor #1517 @yelhamer - add process scope for the dynamic analysis flavor #1517 @yelhamer
- Add support for flavor-based rule scopes @yelhamer - Add thread scope for the dynamic analysis flavor #1517 @yelhamer
- Add ProcessesAddress and ThreadAddress #1612 @yelhamer
- Add dynamic capability extraction @yelhamer
- Add support for mixed-scopes rules @yelhamer
- Add a call scope @yelhamer
### Breaking Changes ### Breaking Changes
### New Rules (4) ### New Rules (4)
- executable/pe/export/forwarded-export ronnie.salomonsen@mandiant.com - executable/pe/export/forwarded-export ronnie.salomonsen@mandiant.com
- host-interaction/bootloader/get-uefi-variable jakub.jozwiak@mandiant.com - host-interaction/bootloader/get-uefi-variable jakub.jozwiak@mandiant.com
- host-interaction/bootloader/set-uefi-variable jakub.jozwiak@mandiant.com - host-interaction/bootloader/set-uefi-variable jakub.jozwiak@mandiant.com
- nursery/enumerate-device-drivers-on-linux @mr-tz
- -
### Bug Fixes ### Bug Fixes
@@ -29,6 +27,7 @@
- linter: skip native API check for NtProtectVirtualMemory #1675 @williballenthin - linter: skip native API check for NtProtectVirtualMemory #1675 @williballenthin
### capa explorer IDA Pro plugin ### capa explorer IDA Pro plugin
- fix unhandled exception when resolving rule path #1693 @mike-hunhoff
### Development ### Development

View File

@@ -2,7 +2,7 @@
[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa)
[![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases) [![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases)
[![Number of rules](https://img.shields.io/badge/rules-826-blue.svg)](https://github.com/mandiant/capa-rules) [![Number of rules](https://img.shields.io/badge/rules-828-blue.svg)](https://github.com/mandiant/capa-rules)
[![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster) [![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster)
[![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases) [![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases)
[![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt) [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt)

View File

@@ -136,8 +136,8 @@ class Feature(abc.ABC): # noqa: B024
import capa.features.freeze.features import capa.features.freeze.features
return ( return (
capa.features.freeze.features.feature_from_capa(self).json() capa.features.freeze.features.feature_from_capa(self).model_dump_json()
< capa.features.freeze.features.feature_from_capa(other).json() < capa.features.freeze.features.feature_from_capa(other).model_dump_json()
) )
def get_name_str(self) -> str: def get_name_str(self) -> str:

View File

@@ -13,6 +13,7 @@ from typing import Any, Dict, Tuple, Union, Iterator
from dataclasses import dataclass from dataclasses import dataclass
# TODO(williballenthin): use typing.TypeAlias directly when Python 3.9 is deprecated # TODO(williballenthin): use typing.TypeAlias directly when Python 3.9 is deprecated
# https://github.com/mandiant/capa/issues/1699
from typing_extensions import TypeAlias from typing_extensions import TypeAlias
import capa.features.address import capa.features.address

View File

@@ -14,7 +14,7 @@ import logging
from enum import Enum from enum import Enum
from typing import List, Tuple, Union from typing import List, Tuple, Union
from pydantic import Field, BaseModel from pydantic import Field, BaseModel, ConfigDict
from typing_extensions import TypeAlias from typing_extensions import TypeAlias
import capa.helpers import capa.helpers
@@ -38,8 +38,7 @@ logger = logging.getLogger(__name__)
class HashableModel(BaseModel): class HashableModel(BaseModel):
class Config: model_config = ConfigDict(frozen=True)
frozen = True
class AddressType(str, Enum): class AddressType(str, Enum):
@@ -57,7 +56,7 @@ class AddressType(str, Enum):
class Address(HashableModel): class Address(HashableModel):
type: AddressType type: AddressType
value: Union[int, Tuple[int, ...], None] value: Union[int, Tuple[int, ...], None] = None # None default value to support deserialization of NO_ADDRESS
@classmethod @classmethod
def from_capa(cls, a: capa.features.address.Address) -> "Address": def from_capa(cls, a: capa.features.address.Address) -> "Address":
@@ -271,9 +270,7 @@ class BasicBlockFeature(HashableModel):
basic_block: Address = Field(alias="basic block") basic_block: Address = Field(alias="basic block")
address: Address address: Address
feature: Feature feature: Feature
model_config = ConfigDict(populate_by_name=True)
class Config:
allow_population_by_field_name = True
class InstructionFeature(HashableModel): class InstructionFeature(HashableModel):
@@ -306,9 +303,7 @@ class FunctionFeatures(BaseModel):
address: Address address: Address
features: Tuple[FunctionFeature, ...] features: Tuple[FunctionFeature, ...]
basic_blocks: Tuple[BasicBlockFeatures, ...] = Field(alias="basic blocks") basic_blocks: Tuple[BasicBlockFeatures, ...] = Field(alias="basic blocks")
model_config = ConfigDict(populate_by_name=True)
class Config:
allow_population_by_field_name = True
class CallFeatures(BaseModel): class CallFeatures(BaseModel):
@@ -332,9 +327,7 @@ class StaticFeatures(BaseModel):
global_: Tuple[GlobalFeature, ...] = Field(alias="global") global_: Tuple[GlobalFeature, ...] = Field(alias="global")
file: Tuple[FileFeature, ...] file: Tuple[FileFeature, ...]
functions: Tuple[FunctionFeatures, ...] functions: Tuple[FunctionFeatures, ...]
model_config = ConfigDict(populate_by_name=True)
class Config:
allow_population_by_field_name = True
class DynamicFeatures(BaseModel): class DynamicFeatures(BaseModel):
@@ -352,9 +345,7 @@ Features: TypeAlias = Union[StaticFeatures, DynamicFeatures]
class Extractor(BaseModel): class Extractor(BaseModel):
name: str name: str
version: str = capa.version.__version__ version: str = capa.version.__version__
model_config = ConfigDict(populate_by_name=True)
class Config:
allow_population_by_field_name = True
class Freeze(BaseModel): class Freeze(BaseModel):
@@ -363,9 +354,7 @@ class Freeze(BaseModel):
sample_hashes: SampleHashes sample_hashes: SampleHashes
extractor: Extractor extractor: Extractor
features: Features features: Features
model_config = ConfigDict(populate_by_name=True)
class Config:
allow_population_by_field_name = True
def dumps_static(extractor: StaticFeatureExtractor) -> str: def dumps_static(extractor: StaticFeatureExtractor) -> str:
@@ -467,7 +456,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str:
) # type: ignore ) # type: ignore
# Mypy is unable to recognise `base_address` as a argument due to alias # Mypy is unable to recognise `base_address` as a argument due to alias
return freeze.json() return freeze.model_dump_json()
def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str: def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str:

View File

@@ -8,7 +8,7 @@
import binascii import binascii
from typing import Union, Optional from typing import Union, Optional
from pydantic import Field, BaseModel from pydantic import Field, BaseModel, ConfigDict
import capa.features.file import capa.features.file
import capa.features.insn import capa.features.insn
@@ -17,9 +17,7 @@ import capa.features.basicblock
class FeatureModel(BaseModel): class FeatureModel(BaseModel):
class Config: model_config = ConfigDict(frozen=True, populate_by_name=True)
frozen = True
allow_population_by_field_name = True
def to_capa(self) -> capa.features.common.Feature: def to_capa(self) -> capa.features.common.Feature:
if isinstance(self, OSFeature): if isinstance(self, OSFeature):
@@ -213,141 +211,141 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature":
class OSFeature(FeatureModel): class OSFeature(FeatureModel):
type: str = "os" type: str = "os"
os: str os: str
description: Optional[str] description: Optional[str] = None
class ArchFeature(FeatureModel): class ArchFeature(FeatureModel):
type: str = "arch" type: str = "arch"
arch: str arch: str
description: Optional[str] description: Optional[str] = None
class FormatFeature(FeatureModel): class FormatFeature(FeatureModel):
type: str = "format" type: str = "format"
format: str format: str
description: Optional[str] description: Optional[str] = None
class MatchFeature(FeatureModel): class MatchFeature(FeatureModel):
type: str = "match" type: str = "match"
match: str match: str
description: Optional[str] description: Optional[str] = None
class CharacteristicFeature(FeatureModel): class CharacteristicFeature(FeatureModel):
type: str = "characteristic" type: str = "characteristic"
characteristic: str characteristic: str
description: Optional[str] description: Optional[str] = None
class ExportFeature(FeatureModel): class ExportFeature(FeatureModel):
type: str = "export" type: str = "export"
export: str export: str
description: Optional[str] description: Optional[str] = None
class ImportFeature(FeatureModel): class ImportFeature(FeatureModel):
type: str = "import" type: str = "import"
import_: str = Field(alias="import") import_: str = Field(alias="import")
description: Optional[str] description: Optional[str] = None
class SectionFeature(FeatureModel): class SectionFeature(FeatureModel):
type: str = "section" type: str = "section"
section: str section: str
description: Optional[str] description: Optional[str] = None
class FunctionNameFeature(FeatureModel): class FunctionNameFeature(FeatureModel):
type: str = "function name" type: str = "function name"
function_name: str = Field(alias="function name") function_name: str = Field(alias="function name")
description: Optional[str] description: Optional[str] = None
class SubstringFeature(FeatureModel): class SubstringFeature(FeatureModel):
type: str = "substring" type: str = "substring"
substring: str substring: str
description: Optional[str] description: Optional[str] = None
class RegexFeature(FeatureModel): class RegexFeature(FeatureModel):
type: str = "regex" type: str = "regex"
regex: str regex: str
description: Optional[str] description: Optional[str] = None
class StringFeature(FeatureModel): class StringFeature(FeatureModel):
type: str = "string" type: str = "string"
string: str string: str
description: Optional[str] description: Optional[str] = None
class ClassFeature(FeatureModel): class ClassFeature(FeatureModel):
type: str = "class" type: str = "class"
class_: str = Field(alias="class") class_: str = Field(alias="class")
description: Optional[str] description: Optional[str] = None
class NamespaceFeature(FeatureModel): class NamespaceFeature(FeatureModel):
type: str = "namespace" type: str = "namespace"
namespace: str namespace: str
description: Optional[str] description: Optional[str] = None
class BasicBlockFeature(FeatureModel): class BasicBlockFeature(FeatureModel):
type: str = "basic block" type: str = "basic block"
description: Optional[str] description: Optional[str] = None
class APIFeature(FeatureModel): class APIFeature(FeatureModel):
type: str = "api" type: str = "api"
api: str api: str
description: Optional[str] description: Optional[str] = None
class PropertyFeature(FeatureModel): class PropertyFeature(FeatureModel):
type: str = "property" type: str = "property"
access: Optional[str] access: Optional[str] = None
property: str property: str
description: Optional[str] description: Optional[str] = None
class NumberFeature(FeatureModel): class NumberFeature(FeatureModel):
type: str = "number" type: str = "number"
number: Union[int, float] number: Union[int, float]
description: Optional[str] description: Optional[str] = None
class BytesFeature(FeatureModel): class BytesFeature(FeatureModel):
type: str = "bytes" type: str = "bytes"
bytes: str bytes: str
description: Optional[str] description: Optional[str] = None
class OffsetFeature(FeatureModel): class OffsetFeature(FeatureModel):
type: str = "offset" type: str = "offset"
offset: int offset: int
description: Optional[str] description: Optional[str] = None
class MnemonicFeature(FeatureModel): class MnemonicFeature(FeatureModel):
type: str = "mnemonic" type: str = "mnemonic"
mnemonic: str mnemonic: str
description: Optional[str] description: Optional[str] = None
class OperandNumberFeature(FeatureModel): class OperandNumberFeature(FeatureModel):
type: str = "operand number" type: str = "operand number"
index: int index: int
operand_number: int = Field(alias="operand number") operand_number: int = Field(alias="operand number")
description: Optional[str] description: Optional[str] = None
class OperandOffsetFeature(FeatureModel): class OperandOffsetFeature(FeatureModel):
type: str = "operand offset" type: str = "operand offset"
index: int index: int
operand_offset: int = Field(alias="operand offset") operand_offset: int = Field(alias="operand offset")
description: Optional[str] description: Optional[str] = None
Feature = Union[ Feature = Union[

View File

@@ -573,10 +573,11 @@ class CapaExplorerForm(idaapi.PluginForm):
def ensure_capa_settings_rule_path(self): def ensure_capa_settings_rule_path(self):
try: try:
path: Path = Path(settings.user.get(CAPA_SETTINGS_RULE_PATH, "")) path: str = settings.user.get(CAPA_SETTINGS_RULE_PATH, "")
# resolve rules directory - check self and settings first, then ask user # resolve rules directory - check self and settings first, then ask user
if not path.exists(): # pathlib.Path considers "" equivalent to "." so we first check if rule path is an empty string
if not path or not Path(path).exists():
# configure rules selection messagebox # configure rules selection messagebox
rules_message = QtWidgets.QMessageBox() rules_message = QtWidgets.QMessageBox()
rules_message.setIcon(QtWidgets.QMessageBox.Information) rules_message.setIcon(QtWidgets.QMessageBox.Information)
@@ -594,15 +595,15 @@ class CapaExplorerForm(idaapi.PluginForm):
if pressed == QtWidgets.QMessageBox.Cancel: if pressed == QtWidgets.QMessageBox.Cancel:
raise UserCancelledError() raise UserCancelledError()
path = Path(self.ask_user_directory()) path = self.ask_user_directory()
if not path: if not path:
raise UserCancelledError() raise UserCancelledError()
if not path.exists(): if not Path(path).exists():
logger.error("rule path %s does not exist or cannot be accessed", path) logger.error("rule path %s does not exist or cannot be accessed", path)
return False return False
settings.user[CAPA_SETTINGS_RULE_PATH] = str(path) settings.user[CAPA_SETTINGS_RULE_PATH] = path
except UserCancelledError: except UserCancelledError:
capa.ida.helpers.inform_user_ida_ui("Analysis requires capa rules") capa.ida.helpers.inform_user_ida_ui("Analysis requires capa rules")
logger.warning( logger.warning(
@@ -1307,7 +1308,7 @@ class CapaExplorerForm(idaapi.PluginForm):
idaapi.info("No program analysis to save.") idaapi.info("No program analysis to save.")
return return
s = self.resdoc_cache.json().encode("utf-8") s = self.resdoc_cache.model_dump_json().encode("utf-8")
path = Path(self.ask_user_capa_json_file()) path = Path(self.ask_user_capa_json_file())
if not path.exists(): if not path.exists():

View File

@@ -11,4 +11,4 @@ from capa.engine import MatchResults
def render(meta, rules: RuleSet, capabilities: MatchResults) -> str: def render(meta, rules: RuleSet, capabilities: MatchResults) -> str:
return rd.ResultDocument.from_capa(meta, rules, capabilities).json(exclude_none=True) return rd.ResultDocument.from_capa(meta, rules, capabilities).model_dump_json(exclude_none=True)

View File

@@ -127,7 +127,7 @@ def metadata_to_pb2(meta: rd.Metadata) -> capa_pb2.Metadata:
timestamp=str(meta.timestamp), timestamp=str(meta.timestamp),
version=meta.version, version=meta.version,
argv=meta.argv, argv=meta.argv,
sample=google.protobuf.json_format.ParseDict(meta.sample.dict(), capa_pb2.Sample()), sample=google.protobuf.json_format.ParseDict(meta.sample.model_dump(), capa_pb2.Sample()),
analysis=capa_pb2.Analysis( analysis=capa_pb2.Analysis(
format=meta.analysis.format, format=meta.analysis.format,
arch=meta.analysis.arch, arch=meta.analysis.arch,
@@ -394,7 +394,7 @@ def match_to_pb2(match: rd.Match) -> capa_pb2.Match:
def rule_metadata_to_pb2(rule_metadata: rd.RuleMetadata) -> capa_pb2.RuleMetadata: def rule_metadata_to_pb2(rule_metadata: rd.RuleMetadata) -> capa_pb2.RuleMetadata:
# after manual type conversions to the RuleMetadata, we can rely on the protobuf json parser # after manual type conversions to the RuleMetadata, we can rely on the protobuf json parser
# conversions include tuple -> list and rd.Enum -> proto.enum # conversions include tuple -> list and rd.Enum -> proto.enum
meta = dict_tuple_to_list_values(rule_metadata.dict()) meta = dict_tuple_to_list_values(rule_metadata.model_dump())
meta["scope"] = scope_to_pb2(meta["scope"]) meta["scope"] = scope_to_pb2(meta["scope"])
meta["attack"] = list(map(dict_tuple_to_list_values, meta.get("attack", []))) meta["attack"] = list(map(dict_tuple_to_list_values, meta.get("attack", [])))
meta["mbc"] = list(map(dict_tuple_to_list_values, meta.get("mbc", []))) meta["mbc"] = list(map(dict_tuple_to_list_values, meta.get("mbc", [])))

View File

@@ -7,9 +7,9 @@
# See the License for the specific language governing permissions and limitations under the License. # See the License for the specific language governing permissions and limitations under the License.
import datetime import datetime
import collections import collections
from typing import Dict, List, Tuple, Union, Optional from typing import Dict, List, Tuple, Union, Literal, Optional
from pydantic import Field, BaseModel from pydantic import Field, BaseModel, ConfigDict
from typing_extensions import TypeAlias from typing_extensions import TypeAlias
import capa.rules import capa.rules
@@ -24,14 +24,11 @@ from capa.helpers import assert_never
class FrozenModel(BaseModel): class FrozenModel(BaseModel):
class Config: model_config = ConfigDict(frozen=True, extra="forbid")
frozen = True
extra = "forbid"
class Model(BaseModel): class Model(BaseModel):
class Config: model_config = ConfigDict(extra="forbid")
extra = "forbid"
class Sample(Model): class Sample(Model):
@@ -148,13 +145,13 @@ class CompoundStatement(StatementModel):
class SomeStatement(StatementModel): class SomeStatement(StatementModel):
type = "some" type: Literal["some"] = "some"
description: Optional[str] = None description: Optional[str] = None
count: int count: int
class RangeStatement(StatementModel): class RangeStatement(StatementModel):
type = "range" type: Literal["range"] = "range"
description: Optional[str] = None description: Optional[str] = None
min: int min: int
max: int max: int
@@ -162,7 +159,7 @@ class RangeStatement(StatementModel):
class SubscopeStatement(StatementModel): class SubscopeStatement(StatementModel):
type = "subscope" type: Literal["subscope"] = "subscope"
description: Optional[str] = None description: Optional[str] = None
scope: capa.rules.Scope scope: capa.rules.Scope
@@ -177,7 +174,7 @@ Statement = Union[
class StatementNode(FrozenModel): class StatementNode(FrozenModel):
type = "statement" type: Literal["statement"] = "statement"
statement: Statement statement: Statement
@@ -214,7 +211,7 @@ def statement_from_capa(node: capa.engine.Statement) -> Statement:
class FeatureNode(FrozenModel): class FeatureNode(FrozenModel):
type = "feature" type: Literal["feature"] = "feature"
feature: frz.Feature feature: frz.Feature
@@ -543,15 +540,12 @@ class MaecMetadata(FrozenModel):
malware_family: Optional[str] = Field(None, alias="malware-family") malware_family: Optional[str] = Field(None, alias="malware-family")
malware_category: Optional[str] = Field(None, alias="malware-category") malware_category: Optional[str] = Field(None, alias="malware-category")
malware_category_ov: Optional[str] = Field(None, alias="malware-category-ov") malware_category_ov: Optional[str] = Field(None, alias="malware-category-ov")
model_config = ConfigDict(frozen=True, populate_by_name=True)
class Config:
frozen = True
allow_population_by_field_name = True
class RuleMetadata(FrozenModel): class RuleMetadata(FrozenModel):
name: str name: str
namespace: Optional[str] namespace: Optional[str] = None
authors: Tuple[str, ...] authors: Tuple[str, ...]
scopes: capa.rules.Scopes scopes: capa.rules.Scopes
attack: Tuple[AttackSpec, ...] = Field(alias="att&ck") attack: Tuple[AttackSpec, ...] = Field(alias="att&ck")
@@ -589,9 +583,7 @@ class RuleMetadata(FrozenModel):
) # type: ignore ) # type: ignore
# Mypy is unable to recognise arguments due to alias # Mypy is unable to recognise arguments due to alias
class Config: model_config = ConfigDict(frozen=True, populate_by_name=True)
frozen = True
allow_population_by_field_name = True
class RuleMatches(FrozenModel): class RuleMatches(FrozenModel):

View File

@@ -88,7 +88,7 @@ def render_statement(ostream, match: rd.Match, statement: rd.Statement, indent=0
# so, we have to inline some of the feature rendering here. # so, we have to inline some of the feature rendering here.
child = statement.child child = statement.child
value = child.dict(by_alias=True).get(child.type) value = child.model_dump(by_alias=True).get(child.type)
if value: if value:
if isinstance(child, frzf.StringFeature): if isinstance(child, frzf.StringFeature):
@@ -141,7 +141,7 @@ def render_feature(ostream, match: rd.Match, feature: frzf.Feature, indent=0):
value = feature.class_ value = feature.class_
else: else:
# convert attributes to dictionary using aliased names, if applicable # convert attributes to dictionary using aliased names, if applicable
value = feature.dict(by_alias=True).get(key) value = feature.model_dump(by_alias=True).get(key)
if value is None: if value is None:
raise ValueError(f"{key} contains None") raise ValueError(f"{key} contains None")

View File

@@ -887,6 +887,33 @@ class Rule:
yield from self._extract_subscope_rules_rec(self.statement) yield from self._extract_subscope_rules_rec(self.statement)
def _extract_all_features_rec(self, statement) -> Set[Feature]:
feature_set: Set[Feature] = set()
for child in statement.get_children():
if isinstance(child, Statement):
feature_set.update(self._extract_all_features_rec(child))
else:
feature_set.add(child)
return feature_set
def extract_all_features(self) -> Set[Feature]:
"""
recursively extracts all feature statements in this rule.
returns:
set: A set of all feature statements contained within this rule.
"""
if not isinstance(self.statement, ceng.Statement):
# For rules with single feature like
# anti-analysis\obfuscation\obfuscated-with-advobfuscator.yml
# contains a single feature - substring , which is of type String
return {
self.statement,
}
return self._extract_all_features_rec(self.statement)
def evaluate(self, features: FeatureSet, short_circuit=True): def evaluate(self, features: FeatureSet, short_circuit=True):
capa.perf.counters["evaluate.feature"] += 1 capa.perf.counters["evaluate.feature"] += 1
capa.perf.counters["evaluate.feature.rule"] += 1 capa.perf.counters["evaluate.feature.rule"] += 1

View File

@@ -48,7 +48,7 @@ dependencies = [
"pyelftools==0.29", "pyelftools==0.29",
"dnfile==0.13.0", "dnfile==0.13.0",
"dncil==1.0.2", "dncil==1.0.2",
"pydantic==1.10.9", "pydantic==2.1.1",
"protobuf==4.23.4", "protobuf==4.23.4",
] ]
dynamic = ["version"] dynamic = ["version"]
@@ -66,7 +66,7 @@ dev = [
"pytest-sugar==0.9.7", "pytest-sugar==0.9.7",
"pytest-instafail==0.5.0", "pytest-instafail==0.5.0",
"pytest-cov==4.1.0", "pytest-cov==4.1.0",
"flake8==6.0.0", "flake8==6.1.0",
"flake8-bugbear==23.7.10", "flake8-bugbear==23.7.10",
"flake8-encodings==0.5.0.post1", "flake8-encodings==0.5.0.post1",
"flake8-comprehensions==3.14.0", "flake8-comprehensions==3.14.0",
@@ -77,14 +77,14 @@ dev = [
"flake8-simplify==0.20.0", "flake8-simplify==0.20.0",
"flake8-use-pathlib==0.3.0", "flake8-use-pathlib==0.3.0",
"flake8-copyright==0.2.4", "flake8-copyright==0.2.4",
"ruff==0.0.280", "ruff==0.0.282",
"black==23.7.0", "black==23.7.0",
"isort==5.11.4", "isort==5.11.4",
"mypy==1.4.1", "mypy==1.4.1",
"psutil==5.9.2", "psutil==5.9.2",
"stix2==3.0.1", "stix2==3.0.1",
"requests==2.31.0", "requests==2.31.0",
"mypy-protobuf==3.4.0", "mypy-protobuf==3.5.0",
# type stubs for mypy # type stubs for mypy
"types-backports==0.1.3", "types-backports==0.1.3",
"types-colorama==0.4.15.11", "types-colorama==0.4.15.11",
@@ -93,7 +93,7 @@ dev = [
"types-termcolor==1.1.4", "types-termcolor==1.1.4",
"types-psutil==5.8.23", "types-psutil==5.8.23",
"types_requests==2.31.0.2", "types_requests==2.31.0.2",
"types-protobuf==4.23.0.2", "types-protobuf==4.23.0.3",
] ]
build = [ build = [
"pyinstaller==5.10.1", "pyinstaller==5.10.1",

2
rules

Submodule rules updated: 7685a232d9...149cf2d133

View File

@@ -142,8 +142,7 @@ def get_capa_results(args):
meta.analysis.layout = capa.main.compute_layout(rules, extractor, capabilities) meta.analysis.layout = capa.main.compute_layout(rules, extractor, capabilities)
doc = rd.ResultDocument.from_capa(meta, rules, capabilities) doc = rd.ResultDocument.from_capa(meta, rules, capabilities)
return {"path": path, "status": "ok", "ok": doc.model_dump()}
return {"path": path, "status": "ok", "ok": doc.dict(exclude_none=True)}
def main(argv=None): def main(argv=None):
@@ -212,7 +211,9 @@ def main(argv=None):
if result["status"] == "error": if result["status"] == "error":
logger.warning(result["error"]) logger.warning(result["error"])
elif result["status"] == "ok": elif result["status"] == "ok":
results[result["path"].as_posix()] = rd.ResultDocument.parse_obj(result["ok"]).json(exclude_none=True) results[result["path"].as_posix()] = rd.ResultDocument.model_validate(result["ok"]).model_dump_json(
exclude_none=True
)
else: else:
raise ValueError(f"unexpected status: {result['status']}") raise ValueError(f"unexpected status: {result['status']}")

View File

@@ -8,38 +8,17 @@
import sys import sys
import logging import logging
import argparse import argparse
from typing import Set
from pathlib import Path from pathlib import Path
import capa.main import capa.main
import capa.rules import capa.rules
import capa.engine as ceng from capa.features.common import Feature
logger = logging.getLogger("detect_duplicate_features") logger = logging.getLogger("detect_duplicate_features")
def get_child_features(feature: ceng.Statement) -> list: def get_features(rule_path: str) -> Set[Feature]:
"""
Recursively extracts all feature statements from a given rule statement.
Args:
feature (capa.engine.Statement): The feature statement to extract features from.
Returns:
list: A list of all feature statements contained within the given feature statement.
"""
children = []
if isinstance(feature, (ceng.And, ceng.Or, ceng.Some)):
for child in feature.children:
children.extend(get_child_features(child))
elif isinstance(feature, (ceng.Subscope, ceng.Range, ceng.Not)):
children.extend(get_child_features(feature.child))
else:
children.append(feature)
return children
def get_features(rule_path: str) -> list:
""" """
Extracts all features from a given rule file. Extracts all features from a given rule file.
@@ -47,17 +26,15 @@ def get_features(rule_path: str) -> list:
rule_path (str): The path to the rule file to extract features from. rule_path (str): The path to the rule file to extract features from.
Returns: Returns:
list: A list of all feature statements contained within the rule file. set: A set of all feature statements contained within the rule file.
""" """
feature_list = []
with Path(rule_path).open("r", encoding="utf-8") as f: with Path(rule_path).open("r", encoding="utf-8") as f:
try: try:
new_rule = capa.rules.Rule.from_yaml(f.read()) new_rule = capa.rules.Rule.from_yaml(f.read())
feature_list = get_child_features(new_rule.statement) return new_rule.extract_all_features()
except Exception as e: except Exception as e:
logger.error("Error: New rule %s %s %s", rule_path, str(type(e)), str(e)) logger.error("Error: New rule %s %s %s", rule_path, str(type(e)), str(e))
sys.exit(-1) sys.exit(-1)
return feature_list
def find_overlapping_rules(new_rule_path, rules_path): def find_overlapping_rules(new_rule_path, rules_path):
@@ -67,7 +44,6 @@ def find_overlapping_rules(new_rule_path, rules_path):
# Loads features of new rule in a list. # Loads features of new rule in a list.
new_rule_features = get_features(new_rule_path) new_rule_features = get_features(new_rule_path)
count = 0 count = 0
overlapping_rules = [] overlapping_rules = []
@@ -75,7 +51,7 @@ def find_overlapping_rules(new_rule_path, rules_path):
ruleset = capa.main.get_rules(rules_path) ruleset = capa.main.get_rules(rules_path)
for rule_name, rule in ruleset.rules.items(): for rule_name, rule in ruleset.rules.items():
rule_features = get_child_features(rule.statement) rule_features = rule.extract_all_features()
if not len(rule_features): if not len(rule_features):
continue continue

View File

@@ -309,7 +309,7 @@ def get_sample_capabilities(ctx: Context, path: Path) -> Set[str]:
logger.debug("analyzing sample: %s", nice_path) logger.debug("analyzing sample: %s", nice_path)
extractor = capa.main.get_extractor( extractor = capa.main.get_extractor(
nice_path, format_, OS_AUTO, "", DEFAULT_SIGNATURES, False, disable_progress=True nice_path, format_, OS_AUTO, capa.main.BACKEND_VIV, DEFAULT_SIGNATURES, False, disable_progress=True
) )
capabilities, _ = capa.main.find_capabilities(ctx.rules, extractor, disable_progress=True) capabilities, _ = capa.main.find_capabilities(ctx.rules, extractor, disable_progress=True)

View File

@@ -78,7 +78,7 @@ def main(argv=None):
rdpb.ParseFromString(pb) rdpb.ParseFromString(pb)
rd = capa.render.proto.doc_from_pb2(rdpb) rd = capa.render.proto.doc_from_pb2(rdpb)
print(rd.json(exclude_none=True, indent=2, sort_keys=True)) print(rd.model_dump_json(exclude_none=True, indent=2))
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -0,0 +1,220 @@
#!/usr/bin/env python3
"""
Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at: [package root]/LICENSE.txt
Unless required by applicable law or agreed to in writing, software distributed under the License
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
"""
import os
import sys
import typing
import logging
import argparse
from typing import Set, Tuple
from pathlib import Path
from collections import Counter
import tabulate
from termcolor import colored
import capa.main
import capa.rules
import capa.helpers
import capa.features
import capa.exceptions
import capa.render.verbose as v
import capa.features.common
import capa.features.freeze
import capa.features.address
import capa.features.extractors.pefile
import capa.features.extractors.base_extractor
from capa.helpers import log_unsupported_runtime_error
from capa.features.common import Feature
from capa.features.extractors.base_extractor import FunctionHandle, StaticFeatureExtractor
logger = logging.getLogger("show-unused-features")
def format_address(addr: capa.features.address.Address) -> str:
return v.format_address(capa.features.freeze.Address.from_capa((addr)))
def get_rules_feature_set(rules_path) -> Set[Feature]:
ruleset = capa.main.get_rules(rules_path)
rules_feature_set: Set[Feature] = set()
for _, rule in ruleset.rules.items():
rules_feature_set.update(rule.extract_all_features())
return rules_feature_set
def get_file_features(
functions: Tuple[FunctionHandle, ...], extractor: capa.features.extractors.base_extractor.StaticFeatureExtractor
) -> typing.Counter[Feature]:
feature_map: typing.Counter[Feature] = Counter()
for f in functions:
if extractor.is_library_function(f.address):
function_name = extractor.get_function_name(f.address)
logger.debug("skipping library function %s (%s)", format_address(f.address), function_name)
continue
for feature, _ in extractor.extract_function_features(f):
if capa.features.common.is_global_feature(feature):
continue
feature_map.update([feature])
for bb in extractor.get_basic_blocks(f):
for feature, _ in extractor.extract_basic_block_features(f, bb):
if capa.features.common.is_global_feature(feature):
continue
feature_map.update([feature])
for insn in extractor.get_instructions(f, bb):
for feature, _ in extractor.extract_insn_features(f, bb, insn):
if capa.features.common.is_global_feature(feature):
continue
feature_map.update([feature])
return feature_map
def get_colored(s: str):
if "(" in s and ")" in s:
s_split = s.split("(", 1)
s_color = colored(s_split[1][:-1], "cyan")
return f"{s_split[0]}({s_color})"
else:
return colored(s, "cyan")
def print_unused_features(feature_map: typing.Counter[Feature], rules_feature_set: Set[Feature]):
unused_features = []
for feature, count in reversed(feature_map.most_common()):
if feature in rules_feature_set:
continue
unused_features.append((str(count), get_colored(str(feature))))
print("\n")
print(tabulate.tabulate(unused_features, headers=["Count", "Feature"], tablefmt="plain"))
print("\n")
def main(argv=None):
if argv is None:
argv = sys.argv[1:]
parser = argparse.ArgumentParser(description="Show the features that capa doesn't have rules for yet")
capa.main.install_common_args(parser, wanted={"format", "os", "sample", "signatures", "backend", "rules"})
parser.add_argument("-F", "--function", type=str, help="Show features for specific function")
args = parser.parse_args(args=argv)
capa.main.handle_common_args(args)
if args.function and args.backend == "pefile":
print("pefile backend does not support extracting function features")
return -1
try:
taste = capa.helpers.get_file_taste(Path(args.sample))
except IOError as e:
logger.error("%s", str(e))
return -1
try:
sig_paths = capa.main.get_signatures(args.signatures)
except IOError as e:
logger.error("%s", str(e))
return -1
if (args.format == "freeze") or (
args.format == capa.features.common.FORMAT_AUTO and capa.features.freeze.is_freeze(taste)
):
extractor = capa.features.freeze.load(Path(args.sample).read_bytes())
else:
should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None)
try:
extractor = capa.main.get_extractor(
args.sample, args.format, args.os, args.backend, sig_paths, should_save_workspace
)
except capa.exceptions.UnsupportedFormatError:
capa.helpers.log_unsupported_format_error()
return -1
except capa.exceptions.UnsupportedRuntimeError:
log_unsupported_runtime_error()
return -1
assert isinstance(extractor, StaticFeatureExtractor), "only static analysis supported today"
feature_map: typing.Counter[Feature] = Counter()
feature_map.update([feature for feature, _ in extractor.extract_global_features()])
function_handles: Tuple[FunctionHandle, ...]
if isinstance(extractor, capa.features.extractors.pefile.PefileFeatureExtractor):
# pefile extractor doesn't extract function features
function_handles = ()
else:
function_handles = tuple(extractor.get_functions())
if args.function:
if args.format == "freeze":
function_handles = tuple(filter(lambda fh: fh.address == args.function, function_handles))
else:
function_handles = tuple(filter(lambda fh: format_address(fh.address) == args.function, function_handles))
if args.function not in [format_address(fh.address) for fh in function_handles]:
print(f"{args.function} not a function")
return -1
if len(function_handles) == 0:
print(f"{args.function} not a function")
return -1
feature_map.update(get_file_features(function_handles, extractor))
rules_feature_set = get_rules_feature_set(args.rules)
print_unused_features(feature_map, rules_feature_set)
return 0
def ida_main():
import idc
import capa.main
import capa.features.extractors.ida.extractor
function = idc.get_func_attr(idc.here(), idc.FUNCATTR_START)
print(f"getting features for current function {hex(function)}")
extractor = capa.features.extractors.ida.extractor.IdaFeatureExtractor()
feature_map: typing.Counter[Feature] = Counter()
feature_map.update([feature for feature, _ in extractor.extract_file_features()])
function_handles = tuple(extractor.get_functions())
if function:
function_handles = tuple(filter(lambda fh: fh.inner.start_ea == function, function_handles))
if len(function_handles) == 0:
print(f"{hex(function)} not a function")
return -1
feature_map.update(get_file_features(function_handles, extractor))
rules_path = capa.main.get_default_root() / "rules"
rules_feature_set = get_rules_feature_set([rules_path])
print_unused_features(feature_map, rules_feature_set)
return 0
if __name__ == "__main__":
if capa.helpers.is_runtime_ida():
ida_main()
else:
sys.exit(main())

View File

@@ -236,7 +236,7 @@ def test_basic_block_node_from_capa():
def assert_round_trip(rd: rdoc.ResultDocument): def assert_round_trip(rd: rdoc.ResultDocument):
one = rd one = rd
doc = one.json(exclude_none=True) doc = one.model_dump_json(exclude_none=True)
two = rdoc.ResultDocument.parse_raw(doc) two = rdoc.ResultDocument.parse_raw(doc)
# show the round trip works # show the round trip works
@@ -244,14 +244,14 @@ def assert_round_trip(rd: rdoc.ResultDocument):
# which works thanks to pydantic model equality. # which works thanks to pydantic model equality.
assert one == two assert one == two
# second by showing their json representations are the same. # second by showing their json representations are the same.
assert one.json(exclude_none=True) == two.json(exclude_none=True) assert one.model_dump_json(exclude_none=True) == two.model_dump_json(exclude_none=True)
# now show that two different versions are not equal. # now show that two different versions are not equal.
three = copy.deepcopy(two) three = copy.deepcopy(two)
three.meta.__dict__.update({"version": "0.0.0"}) three.meta.__dict__.update({"version": "0.0.0"})
assert one.meta.version != three.meta.version assert one.meta.version != three.meta.version
assert one != three assert one != three
assert one.json(exclude_none=True) != three.json(exclude_none=True) assert one.model_dump_json(exclude_none=True) != three.model_dump_json(exclude_none=True)
@pytest.mark.parametrize( @pytest.mark.parametrize(

View File

@@ -51,6 +51,7 @@ def get_rule_path():
), ),
pytest.param("show-features.py", [get_file_path()]), pytest.param("show-features.py", [get_file_path()]),
pytest.param("show-features.py", ["-F", "0x407970", get_file_path()]), pytest.param("show-features.py", ["-F", "0x407970", get_file_path()]),
pytest.param("show-unused-features.py", [get_file_path()]),
pytest.param( pytest.param(
"capa_as_library.py", [get_file_path()], marks=pytest.mark.xfail(reason="relies on legacy ruleset") "capa_as_library.py", [get_file_path()], marks=pytest.mark.xfail(reason="relies on legacy ruleset")
), ),