Merge branch 'master' into dynamic-feature-extraction

2025-12-21 23:00:29 -08:00 · 2023-08-15 14:01:30 +02:00
parent 827b4b29b4 7e78133925
commit bb2b1824a9
10 changed files with 29 additions and 11 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -28,6 +28,8 @@
 - linter: skip native API check for NtProtectVirtualMemory #1675 @williballenthin 
 - OS: detect Android ELF files #1705 @williballenthin
 - ELF: fix parsing of symtab #1704 @williballenthin
 - result document: don't use deprecated pydantic functions #1718 @williballenthin
 - pytest: don't mark IDA tests as pytest tests #1719 @williballenthin
 ### capa explorer IDA Pro plugin
 - fix unhandled exception when resolving rule path #1693 @mike-hunhoff
--- a/capa/features/freeze/init.py
+++ b/capa/features/freeze/init.py
@@ -540,7 +540,7 @@ def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str:
 def loads_static(s: str) -> StaticFeatureExtractor:
    """deserialize a set of features (as a NullStaticFeatureExtractor) from a string."""
-    freeze = Freeze.parse_raw(s)
+    freeze = Freeze.model_validate_json(s)
    if freeze.version != 2:
        raise ValueError(f"unsupported freeze format version: {freeze.version}")
--- a/capa/ida/helpers.py
+++ b/capa/ida/helpers.py
@@ -5,7 +5,6 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
 import json
 import logging
 import datetime
 import contextlib
@@ -224,7 +223,7 @@ def load_and_verify_cached_results() -> Optional[rdoc.ResultDocument]:
    logger.debug("loading cached capa results from netnode '%s'", CAPA_NETNODE)
    n = netnode.Netnode(CAPA_NETNODE)
-    doc = rdoc.ResultDocument.parse_obj(json.loads(n[NETNODE_RESULTS]))
+    doc = rdoc.ResultDocument.model_validate_json(n[NETNODE_RESULTS])
    for rule in rutils.capability_rules(doc):
        for location_, _ in rule.matches:
--- a/capa/main.py
+++ b/capa/main.py
@@ -1517,7 +1517,7 @@ def main(argv: Optional[List[str]] = None):
    if format_ == FORMAT_RESULT:
        # result document directly parses into meta, capabilities
-        result_doc = capa.render.result_document.ResultDocument.parse_file(args.sample)
+        result_doc = capa.render.result_document.ResultDocument.from_file(Path(args.sample))
        meta, capabilities = result_doc.to_capa()
    else:
--- a/capa/render/result_document.py
+++ b/capa/render/result_document.py
@@ -9,6 +9,7 @@ import datetime
 import collections
 from enum import Enum
 from typing import Dict, List, Tuple, Union, Literal, Optional
 from pathlib import Path
 from pydantic import Field, BaseModel, ConfigDict
 from typing_extensions import TypeAlias
@@ -646,3 +647,7 @@ class ResultDocument(FrozenModel):
                capabilities[rule_name].append((addr.to_capa(), result))
        return self.meta, capabilities
    @classmethod
    def from_file(cls, path: Path) -> "ResultDocument":
        return cls.model_validate_json(path.read_text(encoding="utf-8"))
--- a/scripts/import-to-ida.py
+++ b/scripts/import-to-ida.py
@@ -30,6 +30,7 @@ See the License for the specific language governing permissions and limitations
 """
 import logging
 import binascii
 from pathlib import Path
 import ida_nalt
 import ida_funcs
@@ -68,7 +69,7 @@ def main():
    if not path:
        return 0
-    result_doc = capa.render.result_document.ResultDocument.parse_file(path)
+    result_doc = capa.render.result_document.ResultDocument.from_file(Path(path))
    meta, capabilities = result_doc.to_capa()
    # in IDA 7.4, the MD5 hash may be truncated, for example:
--- a/scripts/proto-from-results.py
+++ b/scripts/proto-from-results.py
@@ -31,6 +31,7 @@ Example:
 import sys
 import logging
 import argparse
 from pathlib import Path
 import capa.render.proto
 import capa.render.result_document
@@ -64,7 +65,7 @@ def main(argv=None):
        logging.basicConfig(level=logging.INFO)
        logging.getLogger().setLevel(logging.INFO)
-    rd = capa.render.result_document.ResultDocument.parse_file(args.json)
+    rd = capa.render.result_document.ResultDocument.from_file(Path(args.json))
    pb = capa.render.proto.doc_to_pb2(rd)
    sys.stdout.buffer.write(pb.SerializeToString(deterministic=True))
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -1407,8 +1407,8 @@ def _039a6_dotnetfile_extractor():
    return get_dnfile_extractor(get_data_path_by_name("_039a6"))
-def get_result_doc(path):
+def get_result_doc(path: Path):
-    return capa.render.result_document.ResultDocument.parse_file(path)
+    return capa.render.result_document.ResultDocument.from_file(path)
@pytest.fixture
--- a/tests/test_ida_features.py
+++ b/tests/test_ida_features.py
@@ -92,6 +92,15 @@ def get_ida_extractor(_path):
    return capa.features.extractors.ida.extractor.IdaFeatureExtractor()
 def nocollect(f):
    "don't collect the decorated function as a pytest test"
    f.__test__ = False
    return f
 # although these look like pytest tests, they're not, because they don't run within pytest
 # (the runner is below) and they use `yield`, which is deprecated.
@nocollect
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
 def test_ida_features():
    # we're guaranteed to be in a function here, so there's a stack frame
@@ -118,6 +127,7 @@ def test_ida_features():
            yield this_name, id, "pass", None
@nocollect
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
 def test_ida_feature_counts():
    # we're guaranteed to be in a function here, so there's a stack frame
--- a/tests/test_result_document.py
+++ b/tests/test_result_document.py
@@ -237,7 +237,7 @@ def assert_round_trip(rd: rdoc.ResultDocument):
    one = rd
    doc = one.model_dump_json(exclude_none=True)
-    two = rdoc.ResultDocument.parse_raw(doc)
+    two = rdoc.ResultDocument.model_validate_json(doc)
    # show the round trip works
    # first by comparing the objects directly,
@@ -289,14 +289,14 @@ def test_round_trip(request, rd_file):
@pytest.mark.xfail(reason="samples haven't been modified to the scopes keyword")
 def test_json_to_rdoc():
    path = fixtures.get_data_path_by_name("pma01-01-rd")
-    assert isinstance(rdoc.ResultDocument.parse_file(path), rdoc.ResultDocument)
+    assert isinstance(rdoc.ResultDocument.from_file(path), rdoc.ResultDocument)
@pytest.mark.xfail(reason="samples haven't been modified to the scopes keyword")
 def test_rdoc_to_capa():
    path = fixtures.get_data_path_by_name("pma01-01-rd")
-    rd = rdoc.ResultDocument.parse_file(path)
+    rd = rdoc.ResultDocument.from_file(path)
    meta, capabilites = rd.to_capa()
    assert isinstance(meta, rdoc.Metadata)