diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e271b67..f77e67ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,6 +50,7 @@ - fix: correct wrong dict key in VMRay _compute_monitor_threads assertion (used thread_id instead of process_id) @williballenthin fix: replace assert with isinstance guard in get_callee for invalid MethodSpec tokens @williballenthin - fix: replace assert with isinstance guard in get_callee for invalid MethodSpec tokens @williballenthin +- fix: remove extract_file_format from FILE_HANDLERS in five extractors to prevent duplicate Format features @williballenthin (SURF-51) - fix: replace assert with guard so 2-operand ARM ADD/SUB instructions are skipped instead of crashing @williballenthin (SURF-50) - fix: omit trailing ` -> ` suffix in syscall names when there is no return value @williballenthin (SURF-49) - fix: use AbsoluteVirtualAddress instead of FileOffsetAddress for string addresses in Ghidra and IDA file extractors @williballenthin (SURF-48) diff --git a/capa/features/extractors/binja/file.py b/capa/features/extractors/binja/file.py index 181142d5..5d6b136a 100644 --- a/capa/features/extractors/binja/file.py +++ b/capa/features/extractors/binja/file.py @@ -31,7 +31,7 @@ from capa.features.common import ( Feature, Characteristic, ) -from capa.features.address import NO_ADDRESS, Address, AbsoluteVirtualAddress +from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, AbsoluteVirtualAddress from capa.features.extractors.binja.helpers import read_c_string, unmangle_c_name @@ -181,5 +181,4 @@ FILE_HANDLERS = ( extract_file_section_names, extract_file_embedded_pe, extract_file_function_names, - extract_file_format, ) diff --git a/capa/features/extractors/dnfile/file.py b/capa/features/extractors/dnfile/file.py index 7a723e8d..29bea470 100644 --- a/capa/features/extractors/dnfile/file.py +++ b/capa/features/extractors/dnfile/file.py @@ -63,7 +63,6 @@ FILE_HANDLERS = ( extract_file_import_names, extract_file_function_names, extract_file_strings, - extract_file_format, extract_file_mixed_mode_characteristic_features, extract_file_namespace_features, extract_file_class_features, diff --git a/capa/features/extractors/ghidra/file.py b/capa/features/extractors/ghidra/file.py index 70b49e9a..5b4f1463 100644 --- a/capa/features/extractors/ghidra/file.py +++ b/capa/features/extractors/ghidra/file.py @@ -241,5 +241,4 @@ FILE_HANDLERS = ( extract_file_section_names, extract_file_strings, extract_file_function_names, - extract_file_format, ) diff --git a/capa/features/extractors/ida/file.py b/capa/features/extractors/ida/file.py index f3744ebe..62fffa0c 100644 --- a/capa/features/extractors/ida/file.py +++ b/capa/features/extractors/ida/file.py @@ -212,5 +212,4 @@ FILE_HANDLERS = ( extract_file_section_names, extract_file_embedded_pe, extract_file_function_names, - extract_file_format, ) diff --git a/capa/features/extractors/viv/file.py b/capa/features/extractors/viv/file.py index ed1bafec..4caef75e 100644 --- a/capa/features/extractors/viv/file.py +++ b/capa/features/extractors/viv/file.py @@ -154,5 +154,4 @@ FILE_HANDLERS = ( extract_file_section_names, extract_file_strings, extract_file_function_names, - extract_file_format, ) diff --git a/tests/test_dnfile_features.py b/tests/test_dnfile_features.py index f00f05e3..51106aa6 100644 --- a/tests/test_dnfile_features.py +++ b/tests/test_dnfile_features.py @@ -19,9 +19,13 @@ import pytest import fixtures from dncil.clr.token import Token +from capa.features.common import Format from capa.features.extractors.dnfile.insn import get_callee from capa.features.extractors.dnfile.helpers import get_dotnet_table_row, calculate_dotnet_token_value -from capa.features.extractors.dnfile.extractor import DnFileFeatureExtractorCache +from capa.features.extractors.dnfile.extractor import ( + DnfileFeatureExtractor, + DnFileFeatureExtractorCache, +) CD = Path(__file__).resolve().parent @@ -81,6 +85,22 @@ def test_get_dotnet_table_row_out_of_bounds(): assert get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, len(table.rows) + 1) is None +def test_no_duplicate_format_feature_in_dnfile_extractor(): + path = fixtures.DNFILE_TESTFILES / "hello-world" / "hello-world.exe" + if not path.exists(): + pytest.skip("test data not available") + + extractor = DnfileFeatureExtractor(path) + + format_values = [ + f.value + for f, _ in list(extractor.extract_file_features()) + list(extractor.extract_global_features()) + if isinstance(f, Format) + ] + + assert len(format_values) == len(set(format_values)), f"duplicate Format features: {format_values}" + + def test_get_callee_invalid_methodspec_token_returns_none(): path = CD / "data" / "2dae11cc5f86f5399b560b8837c26274b7e09431deed669b0844fef44e917915.exe_" pe = dnfile.dnPE(str(path))