mirror of
https://github.com/mandiant/capa.git
synced 2026-06-12 19:11:32 -07:00
fix: remove extract_file_format from FILE_HANDLERS in five extractors
Five extractors (ghidra, dnfile, viv, binja, ida) stored Format in global_features during __init__ and also included extract_file_format in FILE_HANDLERS. This caused find_file_capabilities to emit the Format feature twice, inflating feature counts. Removing extract_file_format from FILE_HANDLERS in all five extractors ensures Format is emitted once via global_features only.
This commit is contained in:
committed by
Willi Ballenthin
parent
e2c8ab4bff
commit
d32492d208
@@ -50,6 +50,7 @@
|
||||
- fix: correct wrong dict key in VMRay _compute_monitor_threads assertion (used thread_id instead of process_id) @williballenthin
|
||||
fix: replace assert with isinstance guard in get_callee for invalid MethodSpec tokens @williballenthin
|
||||
- fix: replace assert with isinstance guard in get_callee for invalid MethodSpec tokens @williballenthin
|
||||
- fix: remove extract_file_format from FILE_HANDLERS in five extractors to prevent duplicate Format features @williballenthin (SURF-51)
|
||||
- fix: replace assert with guard so 2-operand ARM ADD/SUB instructions are skipped instead of crashing @williballenthin (SURF-50)
|
||||
- fix: omit trailing ` -> ` suffix in syscall names when there is no return value @williballenthin (SURF-49)
|
||||
- fix: use AbsoluteVirtualAddress instead of FileOffsetAddress for string addresses in Ghidra and IDA file extractors @williballenthin (SURF-48)
|
||||
|
||||
@@ -31,7 +31,7 @@ from capa.features.common import (
|
||||
Feature,
|
||||
Characteristic,
|
||||
)
|
||||
from capa.features.address import NO_ADDRESS, Address, AbsoluteVirtualAddress
|
||||
from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.binja.helpers import read_c_string, unmangle_c_name
|
||||
|
||||
|
||||
@@ -181,5 +181,4 @@ FILE_HANDLERS = (
|
||||
extract_file_section_names,
|
||||
extract_file_embedded_pe,
|
||||
extract_file_function_names,
|
||||
extract_file_format,
|
||||
)
|
||||
|
||||
@@ -63,7 +63,6 @@ FILE_HANDLERS = (
|
||||
extract_file_import_names,
|
||||
extract_file_function_names,
|
||||
extract_file_strings,
|
||||
extract_file_format,
|
||||
extract_file_mixed_mode_characteristic_features,
|
||||
extract_file_namespace_features,
|
||||
extract_file_class_features,
|
||||
|
||||
@@ -241,5 +241,4 @@ FILE_HANDLERS = (
|
||||
extract_file_section_names,
|
||||
extract_file_strings,
|
||||
extract_file_function_names,
|
||||
extract_file_format,
|
||||
)
|
||||
|
||||
@@ -212,5 +212,4 @@ FILE_HANDLERS = (
|
||||
extract_file_section_names,
|
||||
extract_file_embedded_pe,
|
||||
extract_file_function_names,
|
||||
extract_file_format,
|
||||
)
|
||||
|
||||
@@ -154,5 +154,4 @@ FILE_HANDLERS = (
|
||||
extract_file_section_names,
|
||||
extract_file_strings,
|
||||
extract_file_function_names,
|
||||
extract_file_format,
|
||||
)
|
||||
|
||||
@@ -19,9 +19,13 @@ import pytest
|
||||
import fixtures
|
||||
from dncil.clr.token import Token
|
||||
|
||||
from capa.features.common import Format
|
||||
from capa.features.extractors.dnfile.insn import get_callee
|
||||
from capa.features.extractors.dnfile.helpers import get_dotnet_table_row, calculate_dotnet_token_value
|
||||
from capa.features.extractors.dnfile.extractor import DnFileFeatureExtractorCache
|
||||
from capa.features.extractors.dnfile.extractor import (
|
||||
DnfileFeatureExtractor,
|
||||
DnFileFeatureExtractorCache,
|
||||
)
|
||||
|
||||
CD = Path(__file__).resolve().parent
|
||||
|
||||
@@ -81,6 +85,22 @@ def test_get_dotnet_table_row_out_of_bounds():
|
||||
assert get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, len(table.rows) + 1) is None
|
||||
|
||||
|
||||
def test_no_duplicate_format_feature_in_dnfile_extractor():
|
||||
path = fixtures.DNFILE_TESTFILES / "hello-world" / "hello-world.exe"
|
||||
if not path.exists():
|
||||
pytest.skip("test data not available")
|
||||
|
||||
extractor = DnfileFeatureExtractor(path)
|
||||
|
||||
format_values = [
|
||||
f.value
|
||||
for f, _ in list(extractor.extract_file_features()) + list(extractor.extract_global_features())
|
||||
if isinstance(f, Format)
|
||||
]
|
||||
|
||||
assert len(format_values) == len(set(format_values)), f"duplicate Format features: {format_values}"
|
||||
|
||||
|
||||
def test_get_callee_invalid_methodspec_token_returns_none():
|
||||
path = CD / "data" / "2dae11cc5f86f5399b560b8837c26274b7e09431deed669b0844fef44e917915.exe_"
|
||||
pe = dnfile.dnPE(str(path))
|
||||
|
||||
Reference in New Issue
Block a user