mirror of
https://github.com/mandiant/capa.git
synced 2026-04-28 11:53:20 -07:00
Merge branch 'master' into idalib-tests
This commit is contained in:
Submodule tests/data updated: 5ea5d9f572...689960a966
@@ -288,13 +288,33 @@ def get_vmray_extractor(path):
|
||||
return VMRayExtractor.from_zipfile(path)
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
GHIDRA_CACHE: dict[Path, tuple] = {}
|
||||
|
||||
|
||||
def get_ghidra_extractor(path: Path):
|
||||
# we need to start PyGhidra before importing the extractor
|
||||
# because the extractor imports Ghidra modules that are only available after PyGhidra is started
|
||||
import pyghidra
|
||||
|
||||
if not pyghidra.started():
|
||||
pyghidra.start()
|
||||
|
||||
import capa.features.extractors.ghidra.context
|
||||
import capa.features.extractors.ghidra.extractor
|
||||
|
||||
extractor = capa.features.extractors.ghidra.extractor.GhidraFeatureExtractor()
|
||||
setattr(extractor, "path", path.as_posix())
|
||||
if path in GHIDRA_CACHE:
|
||||
extractor, program, flat_api, monitor = GHIDRA_CACHE[path]
|
||||
capa.features.extractors.ghidra.context.set_context(program, flat_api, monitor)
|
||||
return extractor
|
||||
|
||||
# We use a larger cache size to avoid re-opening the same file multiple times
|
||||
# which is very slow with Ghidra.
|
||||
extractor = capa.loader.get_extractor(
|
||||
path, FORMAT_AUTO, OS_AUTO, capa.loader.BACKEND_GHIDRA, [], disable_progress=True
|
||||
)
|
||||
|
||||
ctx = capa.features.extractors.ghidra.context.get_context()
|
||||
GHIDRA_CACHE[path] = (extractor, ctx.program, ctx.flat_api, ctx.monitor)
|
||||
return extractor
|
||||
|
||||
|
||||
|
||||
@@ -11,95 +11,42 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
Must invoke this script from within the Ghidra Runtime Environment
|
||||
"""
|
||||
import sys
|
||||
import logging
|
||||
from pathlib import Path
|
||||
import os
|
||||
import importlib.util
|
||||
|
||||
import pytest
|
||||
import fixtures
|
||||
|
||||
try:
|
||||
sys.path.append(str(Path(__file__).parent))
|
||||
import fixtures
|
||||
finally:
|
||||
sys.path.pop()
|
||||
import capa.features.common
|
||||
|
||||
ghidra_present = importlib.util.find_spec("pyghidra") is not None and "GHIDRA_INSTALL_DIR" in os.environ
|
||||
|
||||
|
||||
logger = logging.getLogger("test_ghidra_features")
|
||||
|
||||
ghidra_present: bool = False
|
||||
try:
|
||||
import ghidra # noqa: F401
|
||||
|
||||
ghidra_present = True
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
def standardize_posix_str(psx_str):
|
||||
"""fixture test passes the PosixPath to the test data
|
||||
|
||||
params: psx_str - PosixPath() to the test data
|
||||
return: string that matches test-id sample name
|
||||
"""
|
||||
|
||||
if "Practical Malware Analysis Lab" in str(psx_str):
|
||||
# <PosixPath>/'Practical Malware Analysis Lab 16-01.exe_' -> 'pma16-01'
|
||||
wanted_str = "pma" + str(psx_str).split("/")[-1][len("Practical Malware Analysis Lab ") : -5]
|
||||
else:
|
||||
# <PosixPath>/mimikatz.exe_ -> mimikatz
|
||||
wanted_str = str(psx_str).split("/")[-1][:-5]
|
||||
|
||||
if "_" in wanted_str:
|
||||
# al-khaser_x86 -> al-khaser x86
|
||||
wanted_str = wanted_str.replace("_", " ")
|
||||
|
||||
return wanted_str
|
||||
|
||||
|
||||
def check_input_file(wanted):
|
||||
"""check that test is running on the loaded sample
|
||||
|
||||
params: wanted - PosixPath() passed from test arg
|
||||
"""
|
||||
|
||||
import capa.ghidra.helpers as ghidra_helpers
|
||||
|
||||
found = ghidra_helpers.get_file_md5()
|
||||
sample_name = standardize_posix_str(wanted)
|
||||
|
||||
if not found.startswith(fixtures.get_sample_md5_by_name(sample_name)):
|
||||
raise RuntimeError(f"please run the tests against sample with MD5: `{found}`")
|
||||
|
||||
|
||||
@pytest.mark.skipif(ghidra_present is False, reason="Ghidra tests must be ran within Ghidra")
|
||||
@fixtures.parametrize("sample,scope,feature,expected", fixtures.FEATURE_PRESENCE_TESTS, indirect=["sample", "scope"])
|
||||
@pytest.mark.skipif(ghidra_present is False, reason="PyGhidra not installed")
|
||||
@fixtures.parametrize(
|
||||
"sample,scope,feature,expected",
|
||||
[
|
||||
(
|
||||
pytest.param(
|
||||
*t,
|
||||
marks=pytest.mark.xfail(
|
||||
reason="specific to Vivisect and basic blocks do not align with Ghidra's analysis"
|
||||
),
|
||||
)
|
||||
if t[0] == "294b8d..." and t[2] == capa.features.common.String("\r\n\x00:ht")
|
||||
else t
|
||||
)
|
||||
for t in fixtures.FEATURE_PRESENCE_TESTS
|
||||
],
|
||||
indirect=["sample", "scope"],
|
||||
)
|
||||
def test_ghidra_features(sample, scope, feature, expected):
|
||||
try:
|
||||
check_input_file(sample)
|
||||
except RuntimeError:
|
||||
pytest.skip(reason="Test must be ran against sample loaded in Ghidra")
|
||||
|
||||
fixtures.do_test_feature_presence(fixtures.get_ghidra_extractor, sample, scope, feature, expected)
|
||||
|
||||
|
||||
@pytest.mark.skipif(ghidra_present is False, reason="Ghidra tests must be ran within Ghidra")
|
||||
@pytest.mark.skipif(ghidra_present is False, reason="PyGhidra not installed")
|
||||
@fixtures.parametrize(
|
||||
"sample,scope,feature,expected", fixtures.FEATURE_COUNT_TESTS_GHIDRA, indirect=["sample", "scope"]
|
||||
)
|
||||
def test_ghidra_feature_counts(sample, scope, feature, expected):
|
||||
try:
|
||||
check_input_file(sample)
|
||||
except RuntimeError:
|
||||
pytest.skip(reason="Test must be ran against sample loaded in Ghidra")
|
||||
|
||||
fixtures.do_test_feature_count(fixtures.get_ghidra_extractor, sample, scope, feature, expected)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# No support for faulthandler module in Ghidrathon, see:
|
||||
# https://github.com/mandiant/Ghidrathon/issues/70
|
||||
sys.exit(pytest.main(["--pyargs", "-p no:faulthandler", "test_ghidra_features"]))
|
||||
|
||||
Reference in New Issue
Block a user