Merge branch 'master' into dynamic-feature-extraction

This commit is contained in:
Willi Ballenthin
2023-08-15 14:01:30 +02:00
committed by GitHub
10 changed files with 29 additions and 11 deletions

View File

@@ -28,6 +28,8 @@
- linter: skip native API check for NtProtectVirtualMemory #1675 @williballenthin - linter: skip native API check for NtProtectVirtualMemory #1675 @williballenthin
- OS: detect Android ELF files #1705 @williballenthin - OS: detect Android ELF files #1705 @williballenthin
- ELF: fix parsing of symtab #1704 @williballenthin - ELF: fix parsing of symtab #1704 @williballenthin
- result document: don't use deprecated pydantic functions #1718 @williballenthin
- pytest: don't mark IDA tests as pytest tests #1719 @williballenthin
### capa explorer IDA Pro plugin ### capa explorer IDA Pro plugin
- fix unhandled exception when resolving rule path #1693 @mike-hunhoff - fix unhandled exception when resolving rule path #1693 @mike-hunhoff

View File

@@ -540,7 +540,7 @@ def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str:
def loads_static(s: str) -> StaticFeatureExtractor: def loads_static(s: str) -> StaticFeatureExtractor:
"""deserialize a set of features (as a NullStaticFeatureExtractor) from a string.""" """deserialize a set of features (as a NullStaticFeatureExtractor) from a string."""
freeze = Freeze.parse_raw(s) freeze = Freeze.model_validate_json(s)
if freeze.version != 2: if freeze.version != 2:
raise ValueError(f"unsupported freeze format version: {freeze.version}") raise ValueError(f"unsupported freeze format version: {freeze.version}")

View File

@@ -5,7 +5,6 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License # Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License. # See the License for the specific language governing permissions and limitations under the License.
import json
import logging import logging
import datetime import datetime
import contextlib import contextlib
@@ -224,7 +223,7 @@ def load_and_verify_cached_results() -> Optional[rdoc.ResultDocument]:
logger.debug("loading cached capa results from netnode '%s'", CAPA_NETNODE) logger.debug("loading cached capa results from netnode '%s'", CAPA_NETNODE)
n = netnode.Netnode(CAPA_NETNODE) n = netnode.Netnode(CAPA_NETNODE)
doc = rdoc.ResultDocument.parse_obj(json.loads(n[NETNODE_RESULTS])) doc = rdoc.ResultDocument.model_validate_json(n[NETNODE_RESULTS])
for rule in rutils.capability_rules(doc): for rule in rutils.capability_rules(doc):
for location_, _ in rule.matches: for location_, _ in rule.matches:

View File

@@ -1517,7 +1517,7 @@ def main(argv: Optional[List[str]] = None):
if format_ == FORMAT_RESULT: if format_ == FORMAT_RESULT:
# result document directly parses into meta, capabilities # result document directly parses into meta, capabilities
result_doc = capa.render.result_document.ResultDocument.parse_file(args.sample) result_doc = capa.render.result_document.ResultDocument.from_file(Path(args.sample))
meta, capabilities = result_doc.to_capa() meta, capabilities = result_doc.to_capa()
else: else:

View File

@@ -9,6 +9,7 @@ import datetime
import collections import collections
from enum import Enum from enum import Enum
from typing import Dict, List, Tuple, Union, Literal, Optional from typing import Dict, List, Tuple, Union, Literal, Optional
from pathlib import Path
from pydantic import Field, BaseModel, ConfigDict from pydantic import Field, BaseModel, ConfigDict
from typing_extensions import TypeAlias from typing_extensions import TypeAlias
@@ -646,3 +647,7 @@ class ResultDocument(FrozenModel):
capabilities[rule_name].append((addr.to_capa(), result)) capabilities[rule_name].append((addr.to_capa(), result))
return self.meta, capabilities return self.meta, capabilities
@classmethod
def from_file(cls, path: Path) -> "ResultDocument":
return cls.model_validate_json(path.read_text(encoding="utf-8"))

View File

@@ -30,6 +30,7 @@ See the License for the specific language governing permissions and limitations
""" """
import logging import logging
import binascii import binascii
from pathlib import Path
import ida_nalt import ida_nalt
import ida_funcs import ida_funcs
@@ -68,7 +69,7 @@ def main():
if not path: if not path:
return 0 return 0
result_doc = capa.render.result_document.ResultDocument.parse_file(path) result_doc = capa.render.result_document.ResultDocument.from_file(Path(path))
meta, capabilities = result_doc.to_capa() meta, capabilities = result_doc.to_capa()
# in IDA 7.4, the MD5 hash may be truncated, for example: # in IDA 7.4, the MD5 hash may be truncated, for example:

View File

@@ -31,6 +31,7 @@ Example:
import sys import sys
import logging import logging
import argparse import argparse
from pathlib import Path
import capa.render.proto import capa.render.proto
import capa.render.result_document import capa.render.result_document
@@ -64,7 +65,7 @@ def main(argv=None):
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logging.getLogger().setLevel(logging.INFO) logging.getLogger().setLevel(logging.INFO)
rd = capa.render.result_document.ResultDocument.parse_file(args.json) rd = capa.render.result_document.ResultDocument.from_file(Path(args.json))
pb = capa.render.proto.doc_to_pb2(rd) pb = capa.render.proto.doc_to_pb2(rd)
sys.stdout.buffer.write(pb.SerializeToString(deterministic=True)) sys.stdout.buffer.write(pb.SerializeToString(deterministic=True))

View File

@@ -1407,8 +1407,8 @@ def _039a6_dotnetfile_extractor():
return get_dnfile_extractor(get_data_path_by_name("_039a6")) return get_dnfile_extractor(get_data_path_by_name("_039a6"))
def get_result_doc(path): def get_result_doc(path: Path):
return capa.render.result_document.ResultDocument.parse_file(path) return capa.render.result_document.ResultDocument.from_file(path)
@pytest.fixture @pytest.fixture

View File

@@ -92,6 +92,15 @@ def get_ida_extractor(_path):
return capa.features.extractors.ida.extractor.IdaFeatureExtractor() return capa.features.extractors.ida.extractor.IdaFeatureExtractor()
def nocollect(f):
"don't collect the decorated function as a pytest test"
f.__test__ = False
return f
# although these look like pytest tests, they're not, because they don't run within pytest
# (the runner is below) and they use `yield`, which is deprecated.
@nocollect
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA") @pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
def test_ida_features(): def test_ida_features():
# we're guaranteed to be in a function here, so there's a stack frame # we're guaranteed to be in a function here, so there's a stack frame
@@ -118,6 +127,7 @@ def test_ida_features():
yield this_name, id, "pass", None yield this_name, id, "pass", None
@nocollect
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA") @pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
def test_ida_feature_counts(): def test_ida_feature_counts():
# we're guaranteed to be in a function here, so there's a stack frame # we're guaranteed to be in a function here, so there's a stack frame

View File

@@ -237,7 +237,7 @@ def assert_round_trip(rd: rdoc.ResultDocument):
one = rd one = rd
doc = one.model_dump_json(exclude_none=True) doc = one.model_dump_json(exclude_none=True)
two = rdoc.ResultDocument.parse_raw(doc) two = rdoc.ResultDocument.model_validate_json(doc)
# show the round trip works # show the round trip works
# first by comparing the objects directly, # first by comparing the objects directly,
@@ -289,14 +289,14 @@ def test_round_trip(request, rd_file):
@pytest.mark.xfail(reason="samples haven't been modified to the scopes keyword") @pytest.mark.xfail(reason="samples haven't been modified to the scopes keyword")
def test_json_to_rdoc(): def test_json_to_rdoc():
path = fixtures.get_data_path_by_name("pma01-01-rd") path = fixtures.get_data_path_by_name("pma01-01-rd")
assert isinstance(rdoc.ResultDocument.parse_file(path), rdoc.ResultDocument) assert isinstance(rdoc.ResultDocument.from_file(path), rdoc.ResultDocument)
@pytest.mark.xfail(reason="samples haven't been modified to the scopes keyword") @pytest.mark.xfail(reason="samples haven't been modified to the scopes keyword")
def test_rdoc_to_capa(): def test_rdoc_to_capa():
path = fixtures.get_data_path_by_name("pma01-01-rd") path = fixtures.get_data_path_by_name("pma01-01-rd")
rd = rdoc.ResultDocument.parse_file(path) rd = rdoc.ResultDocument.from_file(path)
meta, capabilites = rd.to_capa() meta, capabilites = rd.to_capa()
assert isinstance(meta, rdoc.Metadata) assert isinstance(meta, rdoc.Metadata)