Merge branch 'master' into dynamic-feature-extraction

This commit is contained in:
Willi Ballenthin
2023-08-15 14:01:30 +02:00
committed by GitHub
10 changed files with 29 additions and 11 deletions

View File

@@ -28,6 +28,8 @@
- linter: skip native API check for NtProtectVirtualMemory #1675 @williballenthin
- OS: detect Android ELF files #1705 @williballenthin
- ELF: fix parsing of symtab #1704 @williballenthin
- result document: don't use deprecated pydantic functions #1718 @williballenthin
- pytest: don't mark IDA tests as pytest tests #1719 @williballenthin
### capa explorer IDA Pro plugin
- fix unhandled exception when resolving rule path #1693 @mike-hunhoff

View File

@@ -540,7 +540,7 @@ def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str:
def loads_static(s: str) -> StaticFeatureExtractor:
"""deserialize a set of features (as a NullStaticFeatureExtractor) from a string."""
freeze = Freeze.parse_raw(s)
freeze = Freeze.model_validate_json(s)
if freeze.version != 2:
raise ValueError(f"unsupported freeze format version: {freeze.version}")

View File

@@ -5,7 +5,6 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import json
import logging
import datetime
import contextlib
@@ -224,7 +223,7 @@ def load_and_verify_cached_results() -> Optional[rdoc.ResultDocument]:
logger.debug("loading cached capa results from netnode '%s'", CAPA_NETNODE)
n = netnode.Netnode(CAPA_NETNODE)
doc = rdoc.ResultDocument.parse_obj(json.loads(n[NETNODE_RESULTS]))
doc = rdoc.ResultDocument.model_validate_json(n[NETNODE_RESULTS])
for rule in rutils.capability_rules(doc):
for location_, _ in rule.matches:

View File

@@ -1517,7 +1517,7 @@ def main(argv: Optional[List[str]] = None):
if format_ == FORMAT_RESULT:
# result document directly parses into meta, capabilities
result_doc = capa.render.result_document.ResultDocument.parse_file(args.sample)
result_doc = capa.render.result_document.ResultDocument.from_file(Path(args.sample))
meta, capabilities = result_doc.to_capa()
else:

View File

@@ -9,6 +9,7 @@ import datetime
import collections
from enum import Enum
from typing import Dict, List, Tuple, Union, Literal, Optional
from pathlib import Path
from pydantic import Field, BaseModel, ConfigDict
from typing_extensions import TypeAlias
@@ -646,3 +647,7 @@ class ResultDocument(FrozenModel):
capabilities[rule_name].append((addr.to_capa(), result))
return self.meta, capabilities
@classmethod
def from_file(cls, path: Path) -> "ResultDocument":
return cls.model_validate_json(path.read_text(encoding="utf-8"))

View File

@@ -30,6 +30,7 @@ See the License for the specific language governing permissions and limitations
"""
import logging
import binascii
from pathlib import Path
import ida_nalt
import ida_funcs
@@ -68,7 +69,7 @@ def main():
if not path:
return 0
result_doc = capa.render.result_document.ResultDocument.parse_file(path)
result_doc = capa.render.result_document.ResultDocument.from_file(Path(path))
meta, capabilities = result_doc.to_capa()
# in IDA 7.4, the MD5 hash may be truncated, for example:

View File

@@ -31,6 +31,7 @@ Example:
import sys
import logging
import argparse
from pathlib import Path
import capa.render.proto
import capa.render.result_document
@@ -64,7 +65,7 @@ def main(argv=None):
logging.basicConfig(level=logging.INFO)
logging.getLogger().setLevel(logging.INFO)
rd = capa.render.result_document.ResultDocument.parse_file(args.json)
rd = capa.render.result_document.ResultDocument.from_file(Path(args.json))
pb = capa.render.proto.doc_to_pb2(rd)
sys.stdout.buffer.write(pb.SerializeToString(deterministic=True))

View File

@@ -1407,8 +1407,8 @@ def _039a6_dotnetfile_extractor():
return get_dnfile_extractor(get_data_path_by_name("_039a6"))
def get_result_doc(path):
return capa.render.result_document.ResultDocument.parse_file(path)
def get_result_doc(path: Path):
return capa.render.result_document.ResultDocument.from_file(path)
@pytest.fixture

View File

@@ -92,6 +92,15 @@ def get_ida_extractor(_path):
return capa.features.extractors.ida.extractor.IdaFeatureExtractor()
def nocollect(f):
"don't collect the decorated function as a pytest test"
f.__test__ = False
return f
# although these look like pytest tests, they're not, because they don't run within pytest
# (the runner is below) and they use `yield`, which is deprecated.
@nocollect
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
def test_ida_features():
# we're guaranteed to be in a function here, so there's a stack frame
@@ -118,6 +127,7 @@ def test_ida_features():
yield this_name, id, "pass", None
@nocollect
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
def test_ida_feature_counts():
# we're guaranteed to be in a function here, so there's a stack frame

View File

@@ -237,7 +237,7 @@ def assert_round_trip(rd: rdoc.ResultDocument):
one = rd
doc = one.model_dump_json(exclude_none=True)
two = rdoc.ResultDocument.parse_raw(doc)
two = rdoc.ResultDocument.model_validate_json(doc)
# show the round trip works
# first by comparing the objects directly,
@@ -289,14 +289,14 @@ def test_round_trip(request, rd_file):
@pytest.mark.xfail(reason="samples haven't been modified to the scopes keyword")
def test_json_to_rdoc():
path = fixtures.get_data_path_by_name("pma01-01-rd")
assert isinstance(rdoc.ResultDocument.parse_file(path), rdoc.ResultDocument)
assert isinstance(rdoc.ResultDocument.from_file(path), rdoc.ResultDocument)
@pytest.mark.xfail(reason="samples haven't been modified to the scopes keyword")
def test_rdoc_to_capa():
path = fixtures.get_data_path_by_name("pma01-01-rd")
rd = rdoc.ResultDocument.parse_file(path)
rd = rdoc.ResultDocument.from_file(path)
meta, capabilites = rd.to_capa()
assert isinstance(meta, rdoc.Metadata)