From a9dafe283c8ad7982d7f4ff34842b772c3b0d25e Mon Sep 17 00:00:00 2001 From: mr-tz Date: Thu, 13 Jun 2024 16:37:45 +0000 Subject: [PATCH] example using pydantic-xml to parse flog.xml --- capa/features/extractors/vmray/__init__.py | 0 capa/features/extractors/vmray/extractor.py | 28 ++++++++++++++ capa/features/extractors/vmray/models.py | 43 +++++++++++++++++++++ 3 files changed, 71 insertions(+) create mode 100644 capa/features/extractors/vmray/__init__.py create mode 100644 capa/features/extractors/vmray/extractor.py create mode 100644 capa/features/extractors/vmray/models.py diff --git a/capa/features/extractors/vmray/__init__.py b/capa/features/extractors/vmray/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/capa/features/extractors/vmray/extractor.py b/capa/features/extractors/vmray/extractor.py new file mode 100644 index 00000000..7a3565a3 --- /dev/null +++ b/capa/features/extractors/vmray/extractor.py @@ -0,0 +1,28 @@ +from typing import Dict +from pathlib import Path + +import pydantic_xml + +from capa.features.extractors.vmray.models import Analysis +from capa.features.extractors.base_extractor import SampleHashes, DynamicFeatureExtractor + +# TODO also/or look into xmltodict? + + +class VMRayExtractor(DynamicFeatureExtractor): + def __init__(self, report: Path): ... + + @classmethod + def from_report(cls, report: Path) -> "VMRayExtractor": + print(report.read_text()[:200]) + + vr = Analysis.from_xml(report.read_text()) + + print(vr) + + +if __name__ == "__main__": + import sys + + input_path = Path(sys.argv[1]) + VMRayExtractor.from_report(input_path) diff --git a/capa/features/extractors/vmray/models.py b/capa/features/extractors/vmray/models.py new file mode 100644 index 00000000..e8036bed --- /dev/null +++ b/capa/features/extractors/vmray/models.py @@ -0,0 +1,43 @@ +# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. +from typing import Any, Dict, List, Union, Literal, Optional + +# TODO install/force lxml? +from pydantic_xml import BaseXmlModel, attr, element + + +class FunctionCall(BaseXmlModel, tag="fncall"): + # ts: str = attr() + # fncall_id: int = attr() + # process_id: int = attr() + name: str = attr() + # in_: element(name="in") + # out: element() + + +class MonitorProcess(BaseXmlModel, tag="monitor_process"): + ts: str = attr() + process_id: int = attr() + image_name: str = attr() + + +class MonitorThread(BaseXmlModel, tag="monitor_thread"): + ts: str = attr() + thread_id: int = attr() + process_id: int = attr() + os_tid: str = attr() # TODO hex + + +class Analysis(BaseXmlModel, tag="analysis"): + log_version: str = attr() + analyzer_version: str = attr() + analysis_date: str = attr() + processes: List[MonitorProcess] = element(tag="monitor_process") + threads: List[MonitorThread] = element(tag="monitor_thread") + # failing so far... + # fncall: List[FunctionCall] = element(tag="fncall")