example using pydantic-xml to parse flog.xml

This commit is contained in:
mr-tz
2024-06-13 16:37:45 +00:00
parent e87e8484b6
commit a9dafe283c
3 changed files with 71 additions and 0 deletions

View File

@@ -0,0 +1,28 @@
from typing import Dict
from pathlib import Path
import pydantic_xml
from capa.features.extractors.vmray.models import Analysis
from capa.features.extractors.base_extractor import SampleHashes, DynamicFeatureExtractor
# TODO also/or look into xmltodict?
class VMRayExtractor(DynamicFeatureExtractor):
def __init__(self, report: Path): ...
@classmethod
def from_report(cls, report: Path) -> "VMRayExtractor":
print(report.read_text()[:200])
vr = Analysis.from_xml(report.read_text())
print(vr)
if __name__ == "__main__":
import sys
input_path = Path(sys.argv[1])
VMRayExtractor.from_report(input_path)

View File

@@ -0,0 +1,43 @@
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
from typing import Any, Dict, List, Union, Literal, Optional
# TODO install/force lxml?
from pydantic_xml import BaseXmlModel, attr, element
class FunctionCall(BaseXmlModel, tag="fncall"):
# ts: str = attr()
# fncall_id: int = attr()
# process_id: int = attr()
name: str = attr()
# in_: element(name="in")
# out: element()
class MonitorProcess(BaseXmlModel, tag="monitor_process"):
ts: str = attr()
process_id: int = attr()
image_name: str = attr()
class MonitorThread(BaseXmlModel, tag="monitor_thread"):
ts: str = attr()
thread_id: int = attr()
process_id: int = attr()
os_tid: str = attr() # TODO hex
class Analysis(BaseXmlModel, tag="analysis"):
log_version: str = attr()
analyzer_version: str = attr()
analysis_date: str = attr()
processes: List[MonitorProcess] = element(tag="monitor_process")
threads: List[MonitorThread] = element(tag="monitor_thread")
# failing so far...
# fncall: List[FunctionCall] = element(tag="fncall")