From 85a85e99bfe214280f5de3e481df0b1d30bbabfe Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Tue, 18 Jun 2024 15:38:44 -0600 Subject: [PATCH] vmray: emit recorded artifacts as strings --- capa/features/extractors/vmray/file.py | 34 +++++++++++++- capa/features/extractors/vmray/models.py | 56 +++++++++++++++++++----- 2 files changed, 76 insertions(+), 14 deletions(-) diff --git a/capa/features/extractors/vmray/file.py b/capa/features/extractors/vmray/file.py index 5a28b472..2c6463c1 100644 --- a/capa/features/extractors/vmray/file.py +++ b/capa/features/extractors/vmray/file.py @@ -9,8 +9,8 @@ import logging from typing import Dict, Tuple, Iterator from capa.features.file import Export, Section -from capa.features.common import Feature -from capa.features.address import Address, ProcessAddress, AbsoluteVirtualAddress +from capa.features.common import String, Feature +from capa.features.address import NO_ADDRESS, Address, ProcessAddress, AbsoluteVirtualAddress from capa.features.extractors.vmray import VMRayAnalysis from capa.features.extractors.vmray.models import Process from capa.features.extractors.base_extractor import ProcessHandle @@ -44,6 +44,31 @@ def extract_section_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Ad yield Section(name), AbsoluteVirtualAddress(addr) +def extract_referenced_filenames(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: + for _, filename in analysis.sv2.filenames.items(): + yield String(filename.filename), NO_ADDRESS + + +def extract_referenced_mutex_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: + for _, mutex in analysis.sv2.mutexes.items(): + yield String(mutex.name), NO_ADDRESS + + +def extract_referenced_domain_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: + for _, domain in analysis.sv2.domains.items(): + yield String(domain.domain), NO_ADDRESS + + +def extract_referenced_ip_addresses(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: + for _, ip_address in analysis.sv2.ip_addresses.items(): + yield String(ip_address.ip_address), NO_ADDRESS + + +def extract_referenced_registry_key_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: + for _, registry_record in analysis.sv2.registry_records.items(): + yield String(registry_record.reg_key_name), NO_ADDRESS + + def extract_features(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: for handler in FILE_HANDLERS: for feature, addr in handler(analysis): @@ -54,5 +79,10 @@ FILE_HANDLERS = ( extract_import_names, extract_export_names, extract_section_names, + extract_referenced_filenames, + extract_referenced_mutex_names, + extract_referenced_domain_names, + extract_referenced_ip_addresses, + extract_referenced_registry_key_names, # extract_file_strings, ) diff --git a/capa/features/extractors/vmray/models.py b/capa/features/extractors/vmray/models.py index 4ee6e9e9..8b910bfe 100644 --- a/capa/features/extractors/vmray/models.py +++ b/capa/features/extractors/vmray/models.py @@ -151,14 +151,40 @@ class Process(BaseModel): ref_parent_process: Optional[GenericReference] = None -class Artifacts(BaseModel): - ref_processes: List[GenericReference] = [] - ref_domains: List[GenericReference] = [] - ref_filenames: List[GenericReference] = [] - ref_files: List[GenericReference] = [] - ref_ip_addresses: List[GenericReference] = [] - ref_mutexes: List[GenericReference] = [] - ref_registry_records: List[GenericReference] = [] +class Filename(BaseModel): + filename: str + is_artifact: bool + is_ioc: bool + verdict: str + + +class Mutex(BaseModel): + name: str + is_artifact: bool + is_ioc: bool + verdict: str + + +class Registry(BaseModel): + reg_key_name: str + reg_key_value_type: Optional[str] = None + is_artifact: bool + is_ioc: bool + verdict: str + + +class Domain(BaseModel): + domain: str + is_artifact: bool + is_ioc: bool + verdict: str + + +class IPAddress(BaseModel): + ip_address: str + is_artifact: bool + is_ioc: bool + verdict: str class AnalysisMetadata(BaseModel): @@ -168,8 +194,14 @@ class AnalysisMetadata(BaseModel): class SummaryV2(BaseModel): analysis_metadata: AnalysisMetadata - artifacts: Artifacts - files: Dict[str, File] - static_data: Dict[str, StaticData] - processes: Dict[str, Process] + static_data: Dict[str, StaticData] = {} + + # recorded artifacts + files: Dict[str, File] = {} + processes: Dict[str, Process] = {} + filenames: Dict[str, Filename] = {} + mutexes: Dict[str, Mutex] = {} + domains: Dict[str, Domain] = {} + ip_addresses: Dict[str, IPAddress] = {} + registry_records: Dict[str, Registry] = {}