mirror of
https://github.com/mandiant/capa.git
synced 2026-06-12 19:11:32 -07:00
fix: remove extract_file_format from FILE_HANDLERS in five extractors
Five extractors (ghidra, dnfile, viv, binja, ida) stored Format in global_features during __init__ and also included extract_file_format in FILE_HANDLERS. This caused find_file_capabilities to emit the Format feature twice, inflating feature counts. Removing extract_file_format from FILE_HANDLERS in all five extractors ensures Format is emitted once via global_features only.
This commit is contained in:
committed by
Willi Ballenthin
parent
e2c8ab4bff
commit
d32492d208
@@ -19,9 +19,13 @@ import pytest
|
||||
import fixtures
|
||||
from dncil.clr.token import Token
|
||||
|
||||
from capa.features.common import Format
|
||||
from capa.features.extractors.dnfile.insn import get_callee
|
||||
from capa.features.extractors.dnfile.helpers import get_dotnet_table_row, calculate_dotnet_token_value
|
||||
from capa.features.extractors.dnfile.extractor import DnFileFeatureExtractorCache
|
||||
from capa.features.extractors.dnfile.extractor import (
|
||||
DnfileFeatureExtractor,
|
||||
DnFileFeatureExtractorCache,
|
||||
)
|
||||
|
||||
CD = Path(__file__).resolve().parent
|
||||
|
||||
@@ -81,6 +85,22 @@ def test_get_dotnet_table_row_out_of_bounds():
|
||||
assert get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, len(table.rows) + 1) is None
|
||||
|
||||
|
||||
def test_no_duplicate_format_feature_in_dnfile_extractor():
|
||||
path = fixtures.DNFILE_TESTFILES / "hello-world" / "hello-world.exe"
|
||||
if not path.exists():
|
||||
pytest.skip("test data not available")
|
||||
|
||||
extractor = DnfileFeatureExtractor(path)
|
||||
|
||||
format_values = [
|
||||
f.value
|
||||
for f, _ in list(extractor.extract_file_features()) + list(extractor.extract_global_features())
|
||||
if isinstance(f, Format)
|
||||
]
|
||||
|
||||
assert len(format_values) == len(set(format_values)), f"duplicate Format features: {format_values}"
|
||||
|
||||
|
||||
def test_get_callee_invalid_methodspec_token_returns_none():
|
||||
path = CD / "data" / "2dae11cc5f86f5399b560b8837c26274b7e09431deed669b0844fef44e917915.exe_"
|
||||
pe = dnfile.dnPE(str(path))
|
||||
|
||||
Reference in New Issue
Block a user