cape: models: parse minimum fields required for analysis (#2607)

* cape: models: parse minimum fields required for analysis

* update CHANGELOG
This commit is contained in:
Mike Hunhoff
2025-02-19 08:55:12 -07:00
committed by GitHub
parent f85cd80d90
commit a278bf593a
2 changed files with 128 additions and 116 deletions

View File

@@ -11,6 +11,7 @@
- -
### Bug Fixes ### Bug Fixes
- only parse CAPE fields required for analysis @mike-hunhoff #2607
### capa Explorer Web ### capa Explorer Web

View File

@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from typing import Any, Union, Literal, Optional, Annotated, TypeAlias from typing import Any, Union, Optional, Annotated, TypeAlias
from pydantic import Field, BaseModel, ConfigDict from pydantic import Field, BaseModel, ConfigDict
from pydantic.functional_validators import BeforeValidator from pydantic.functional_validators import BeforeValidator
@@ -75,34 +75,37 @@ class Info(FlexibleModel):
version: str version: str
class ImportedSymbol(ExactModel): class ImportedSymbol(FlexibleModel):
address: HexInt address: HexInt
name: Optional[str] = None name: Optional[str] = None
class ImportedDll(ExactModel): class ImportedDll(FlexibleModel):
dll: str dll: str
imports: list[ImportedSymbol] imports: list[ImportedSymbol]
class DirectoryEntry(ExactModel): """
class DirectoryEntry(FlexibleModel):
name: str name: str
virtual_address: HexInt virtual_address: HexInt
size: HexInt size: HexInt
"""
class Section(ExactModel): class Section(FlexibleModel):
name: str name: str
raw_address: HexInt # raw_address: HexInt
virtual_address: HexInt virtual_address: HexInt
virtual_size: HexInt # virtual_size: HexInt
size_of_data: HexInt # size_of_data: HexInt
characteristics: str # characteristics: str
characteristics_raw: HexInt # characteristics_raw: HexInt
entropy: float # entropy: float
class Resource(ExactModel): """
class Resource(FlexibleModel):
name: str name: str
language: Optional[str] = None language: Optional[str] = None
sublanguage: str sublanguage: str
@@ -140,7 +143,7 @@ class DigitalSigner(FlexibleModel):
extensions_subjectKeyIdentifier: Optional[str] = None extensions_subjectKeyIdentifier: Optional[str] = None
class AuxSigner(ExactModel): class AuxSigner(FlexibleModel):
name: str name: str
issued_to: str = Field(alias="Issued to") issued_to: str = Field(alias="Issued to")
issued_by: str = Field(alias="Issued by") issued_by: str = Field(alias="Issued by")
@@ -148,7 +151,7 @@ class AuxSigner(ExactModel):
sha1_hash: str = Field(alias="SHA1 hash") sha1_hash: str = Field(alias="SHA1 hash")
class Signer(ExactModel): class Signer(FlexibleModel):
aux_sha1: Optional[str] = None aux_sha1: Optional[str] = None
aux_timestamp: Optional[str] = None aux_timestamp: Optional[str] = None
aux_valid: Optional[bool] = None aux_valid: Optional[bool] = None
@@ -157,60 +160,61 @@ class Signer(ExactModel):
aux_signers: Optional[list[AuxSigner]] = None aux_signers: Optional[list[AuxSigner]] = None
class Overlay(ExactModel): class Overlay(FlexibleModel):
offset: HexInt offset: HexInt
size: HexInt size: HexInt
class KV(ExactModel): class KV(FlexibleModel):
name: str name: str
value: str value: str
"""
class ExportedSymbol(ExactModel): class ExportedSymbol(FlexibleModel):
address: HexInt address: HexInt
name: str name: str
ordinal: int # ordinal: int
class PE(ExactModel): class PE(FlexibleModel):
peid_signatures: TODO # peid_signatures: TODO
imagebase: HexInt imagebase: HexInt
entrypoint: HexInt # entrypoint: HexInt
reported_checksum: HexInt # reported_checksum: HexInt
actual_checksum: HexInt # actual_checksum: HexInt
osversion: str # osversion: str
pdbpath: Optional[str] = None # pdbpath: Optional[str] = None
timestamp: str # timestamp: str
# list[ImportedDll], or dict[basename(dll), ImportedDll] # list[ImportedDll], or dict[basename(dll), ImportedDll]
imports: Union[list[ImportedDll], dict[str, ImportedDll]] imports: Union[list[ImportedDll], dict[str, ImportedDll]]
imported_dll_count: Optional[int] = None # imported_dll_count: Optional[int] = None
imphash: str # imphash: str
exported_dll_name: Optional[str] = None # exported_dll_name: Optional[str] = None
exports: list[ExportedSymbol] exports: list[ExportedSymbol]
dirents: list[DirectoryEntry] # dirents: list[DirectoryEntry]
sections: list[Section] sections: list[Section]
ep_bytes: Optional[HexBytes] = None # ep_bytes: Optional[HexBytes] = None
overlay: Optional[Overlay] = None # overlay: Optional[Overlay] = None
resources: list[Resource] # resources: list[Resource]
versioninfo: list[KV] # versioninfo: list[KV]
# base64 encoded data # base64 encoded data
icon: Optional[str] = None # icon: Optional[str] = None
# MD5-like hash # MD5-like hash
icon_hash: Optional[str] = None # icon_hash: Optional[str] = None
# MD5-like hash # MD5-like hash
icon_fuzzy: Optional[str] = None # icon_fuzzy: Optional[str] = None
# short hex string # short hex string
icon_dhash: Optional[str] = None # icon_dhash: Optional[str] = None
digital_signers: list[DigitalSigner] # digital_signers: list[DigitalSigner]
guest_signers: Signer # guest_signers: Signer
# TODO(mr-tz): target.file.dotnet, target.file.extracted_files, target.file.extracted_files_tool, # TODO(mr-tz): target.file.dotnet, target.file.extracted_files, target.file.extracted_files_tool,
@@ -218,48 +222,49 @@ class PE(ExactModel):
# https://github.com/mandiant/capa/issues/1814 # https://github.com/mandiant/capa/issues/1814
class File(FlexibleModel): class File(FlexibleModel):
type: str type: str
cape_type_code: Optional[int] = None # cape_type_code: Optional[int] = None
cape_type: Optional[str] = None # cape_type: Optional[str] = None
pid: Optional[Union[int, Literal[""]]] = None # pid: Optional[Union[int, Literal[""]]] = None
name: Union[list[str], str] # name: Union[list[str], str]
path: str # path: str
guest_paths: Union[list[str], str, None] # guest_paths: Union[list[str], str, None]
timestamp: Optional[str] = None # timestamp: Optional[str] = None
# #
# hashes # hashes
# #
crc32: str # crc32: str
md5: str md5: str
sha1: str sha1: str
sha256: str sha256: str
sha512: str # sha512: str
sha3_384: Optional[str] = None # sha3_384: Optional[str] = None
ssdeep: str # ssdeep: str
# unsure why this would ever be "False" # unsure why this would ever be "False"
tlsh: Optional[Union[str, bool]] = None # tlsh: Optional[Union[str, bool]] = None
rh_hash: Optional[str] = None # rh_hash: Optional[str] = None
# #
# other metadata, static analysis # other metadata, static analysis
# #
size: int # size: int
pe: Optional[PE] = None pe: Optional[PE] = None
ep_bytes: Optional[HexBytes] = None # ep_bytes: Optional[HexBytes] = None
entrypoint: Optional[int] = None # entrypoint: Optional[int] = None
data: Optional[str] = None # data: Optional[str] = None
strings: Optional[list[str]] = None # strings: Optional[list[str]] = None
# #
# detections (skip) # detections (skip)
# #
yara: Skip = None # yara: Skip = None
cape_yara: Skip = None # cape_yara: Skip = None
clamav: Skip = None # clamav: Skip = None
virustotal: Skip = None # virustotal: Skip = None
"""
class ProcessFile(File): class ProcessFile(File):
# #
# like a File, but also has dynamic analysis results # like a File, but also has dynamic analysis results
@@ -272,35 +277,36 @@ class ProcessFile(File):
target_pid: Optional[Union[int, str]] = None target_pid: Optional[Union[int, str]] = None
target_path: Optional[str] = None target_path: Optional[str] = None
target_process: Optional[str] = None target_process: Optional[str] = None
"""
class Argument(ExactModel): class Argument(FlexibleModel):
name: str name: str
# unsure why empty list is provided here # unsure why empty list is provided here
value: Union[HexInt, int, str, EmptyList] value: Union[HexInt, int, str, EmptyList]
pretty_value: Optional[str] = None pretty_value: Optional[str] = None
class Call(ExactModel): class Call(FlexibleModel):
timestamp: str # timestamp: str
thread_id: int thread_id: int
category: str # category: str
api: str api: str
arguments: list[Argument] arguments: list[Argument]
status: bool # status: bool
return_: HexInt = Field(alias="return") return_: HexInt = Field(alias="return")
pretty_return: Optional[str] = None pretty_return: Optional[str] = None
repeated: int # repeated: int
# virtual addresses # virtual addresses
caller: HexInt # caller: HexInt
parentcaller: HexInt # parentcaller: HexInt
# index into calls array # index into calls array
id: int # id: int
# FlexibleModel to account for extended fields # FlexibleModel to account for extended fields
@@ -310,14 +316,15 @@ class Process(FlexibleModel):
process_id: int process_id: int
process_name: str process_name: str
parent_id: int parent_id: int
module_path: str # module_path: str
first_seen: str # first_seen: str
calls: list[Call] calls: list[Call]
threads: list[int] threads: list[int]
environ: dict[str, str] environ: dict[str, str]
class ProcessTree(ExactModel): """
class ProcessTree(FlexibleModel):
name: str name: str
pid: int pid: int
parent_id: int parent_id: int
@@ -325,17 +332,18 @@ class ProcessTree(ExactModel):
threads: list[int] threads: list[int]
environ: dict[str, str] environ: dict[str, str]
children: list["ProcessTree"] children: list["ProcessTree"]
"""
class Summary(ExactModel): class Summary(FlexibleModel):
files: list[str] files: list[str]
read_files: list[str] # read_files: list[str]
write_files: list[str] # write_files: list[str]
delete_files: list[str] # delete_files: list[str]
keys: list[str] keys: list[str]
read_keys: list[str] # read_keys: list[str]
write_keys: list[str] # write_keys: list[str]
delete_keys: list[str] # delete_keys: list[str]
executed_commands: list[str] executed_commands: list[str]
resolved_apis: list[str] resolved_apis: list[str]
mutexes: list[str] mutexes: list[str]
@@ -343,7 +351,8 @@ class Summary(ExactModel):
started_services: list[str] started_services: list[str]
class EncryptedBuffer(ExactModel): """
class EncryptedBuffer(FlexibleModel):
process_name: str process_name: str
pid: int pid: int
@@ -351,38 +360,41 @@ class EncryptedBuffer(ExactModel):
buffer: str buffer: str
buffer_size: Optional[int] = None buffer_size: Optional[int] = None
crypt_key: Optional[Union[HexInt, str]] = None crypt_key: Optional[Union[HexInt, str]] = None
"""
class Behavior(ExactModel): class Behavior(FlexibleModel):
summary: Summary summary: Summary
# list of processes, of threads, of calls # list of processes, of threads, of calls
processes: list[Process] processes: list[Process]
# tree of processes # tree of processes
processtree: list[ProcessTree] # processtree: list[ProcessTree]
anomaly: list[str] # anomaly: list[str]
encryptedbuffers: list[EncryptedBuffer] # encryptedbuffers: list[EncryptedBuffer]
# these are small objects that describe atomic events, # these are small objects that describe atomic events,
# like file move, registry access. # like file move, registry access.
# we'll detect the same with our API call analysis. # we'll detect the same with our API call analysis.
enhanced: Skip = None # enhanced: Skip = None
class Target(ExactModel): class Target(FlexibleModel):
category: str # category: str
file: File file: File
# pe: Optional[PE] = None
class Static(FlexibleModel):
pe: Optional[PE] = None pe: Optional[PE] = None
# flare_capa: Skip = None
class Static(ExactModel): """
pe: Optional[PE] = None class Cape(FlexibleModel):
flare_capa: Skip = None
class Cape(ExactModel):
payloads: list[ProcessFile] payloads: list[ProcessFile]
configs: Skip = None configs: Skip = None
"""
# flexible because there may be more sorts of analysis # flexible because there may be more sorts of analysis
@@ -405,15 +417,14 @@ class CapeReport(FlexibleModel):
# post-processed results: process tree, anomalies, etc # post-processed results: process tree, anomalies, etc
behavior: Behavior behavior: Behavior
# post-processed results: payloads and extracted configs
CAPE: Optional[Union[Cape, list]] = None
dropped: Optional[list[File]] = None
procdump: Optional[list[ProcessFile]] = None
procmemory: Optional[ListTODO] = None
# ========================================================================= # =========================================================================
# information we won't use in capa # information we won't use in capa
# #
# post-processed results: payloads and extracted configs
# CAPE: Optional[Union[Cape, list]] = None
# dropped: Optional[list[File]] = None
# procdump: Optional[list[ProcessFile]] = None
# procmemory: Optional[ListTODO] = None
# #
# NBIs and HBIs # NBIs and HBIs
@@ -422,32 +433,32 @@ class CapeReport(FlexibleModel):
# #
# if we come up with a future use for this, go ahead and re-enable! # if we come up with a future use for this, go ahead and re-enable!
# #
network: Skip = None # network: Skip = None
suricata: Skip = None # suricata: Skip = None
curtain: Skip = None # curtain: Skip = None
sysmon: Skip = None # sysmon: Skip = None
url_analysis: Skip = None # url_analysis: Skip = None
# screenshot hash values # screenshot hash values
deduplicated_shots: Skip = None # deduplicated_shots: Skip = None
# k-v pairs describing the time it took to run each stage. # k-v pairs describing the time it took to run each stage.
statistics: Skip = None # statistics: Skip = None
# k-v pairs of ATT&CK ID to signature name or similar. # k-v pairs of ATT&CK ID to signature name or similar.
ttps: Skip = None # ttps: Skip = None
# debug log messages # debug log messages
debug: Skip = None # debug: Skip = None
# various signature matches # various signature matches
# we could potentially extend capa to use this info one day, # we could potentially extend capa to use this info one day,
# though it would be quite sandbox-specific, # though it would be quite sandbox-specific,
# and more detection-oriented than capability detection. # and more detection-oriented than capability detection.
signatures: Skip = None # signatures: Skip = None
malfamily_tag: Optional[str] = None # malfamily_tag: Optional[str] = None
malscore: float # malscore: float
detections: Skip = None # detections: Skip = None
detections2pid: Optional[dict[int, list[str]]] = None # detections2pid: Optional[dict[int, list[str]]] = None
# AV detections for the sample. # AV detections for the sample.
virustotal: Skip = None # virustotal: Skip = None
@classmethod @classmethod
def from_buf(cls, buf: bytes) -> "CapeReport": def from_buf(cls, buf: bytes) -> "CapeReport":