diff --git a/capa/features/extractors/vmray/__init__.py b/capa/features/extractors/vmray/__init__.py
index 141a2595..c605c718 100644
--- a/capa/features/extractors/vmray/__init__.py
+++ b/capa/features/extractors/vmray/__init__.py
@@ -12,10 +12,8 @@ from pathlib import Path
from zipfile import ZipFile
from collections import defaultdict
-import xmltodict
-
from capa.exceptions import UnsupportedFormatError
-from capa.features.extractors.vmray.models import File, Flog, SummaryV2, StaticData, FunctionCall
+from capa.features.extractors.vmray.models import File, Flog, SummaryV2, StaticData, FunctionCall, xml_to_dict
logger = logging.getLogger(__name__)
@@ -37,8 +35,8 @@ class VMRayAnalysis:
# flog.xml contains all of the call information that VMRay captured during execution
flog_xml = self.zipfile.read("logs/flog.xml", pwd=DEFAULT_ARCHIVE_PASSWORD)
- flog_json = xmltodict.parse(flog_xml, attr_prefix="")
- self.flog = Flog.model_validate(flog_json)
+ flog_dict = xml_to_dict(flog_xml)
+ self.flog = Flog.model_validate(flog_dict)
if self.flog.analysis.log_version not in SUPPORTED_FLOG_VERSIONS:
logger.warning("VMRay feature extractor does not support flog version %s", self.flog.analysis.log_version)
diff --git a/capa/features/extractors/vmray/models.py b/capa/features/extractors/vmray/models.py
index 6d61ce15..6f999744 100644
--- a/capa/features/extractors/vmray/models.py
+++ b/capa/features/extractors/vmray/models.py
@@ -8,6 +8,7 @@
from typing import Dict, List, Union, Optional
+import xmltodict
from pydantic import Field, BaseModel
from typing_extensions import Annotated
from pydantic.functional_validators import BeforeValidator
@@ -54,6 +55,10 @@ PARAM_TYPE_INT = (
)
+def xml_to_dict(xml):
+ return xmltodict.parse(xml, attr_prefix="")
+
+
def hexint(value: Union[str, int]) -> int:
if isinstance(value, str):
return int(value, 16) if value.startswith("0x") else int(value, 10)
diff --git a/tests/test_vmray_model.py b/tests/test_vmray_model.py
new file mode 100644
index 00000000..0b0d86bf
--- /dev/null
+++ b/tests/test_vmray_model.py
@@ -0,0 +1,59 @@
+# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+import textwrap
+
+from capa.features.extractors.vmray.models import Param, FunctionCall, xml_to_dict
+
+
+def test_vmray_model_call():
+ call_xml = textwrap.dedent(
+ """
+
+
+
+
+
+
+
+
+
+ """
+ )
+ call: FunctionCall = FunctionCall.model_validate(xml_to_dict(call_xml)["fncall"])
+
+ assert call.fncall_id == 18
+ assert call.process_id == 1
+ assert call.thread_id == 1
+ assert call.name == "time"
+ assert call.params_in is not None
+ assert call.params_out is not None
+
+
+def test_vmray_model_call_param():
+ param_xml = textwrap.dedent(
+ """
+
+ """
+ )
+ param: Param = Param.model_validate(xml_to_dict(param_xml)["param"])
+
+ assert param.value == "16"
+
+
+def test_vmray_model_call_param_deref():
+ param_xml = textwrap.dedent(
+ """
+
+
+
+ """
+ )
+ param: Param = Param.model_validate(xml_to_dict(param_xml)["param"])
+
+ assert param.deref is not None
+ assert param.deref.value == "Hello world"