Compare commits

...

6 Commits

Author SHA1 Message Date
mr-tz
dc08843e2d address idalib-based test fails 2025-12-11 14:18:13 +00:00
Moritz
074f7c742c Merge branch 'master' into idalib-tests 2025-11-24 19:52:40 +01:00
Willi Ballenthin
cf463676b2 fixtures: remove dups 2025-11-03 12:47:12 +01:00
Willi Ballenthin
b5e5840a63 lints 2025-10-29 20:29:08 +01:00
Willi Ballenthin
f252b6bbd0 changelog 2025-10-29 20:23:12 +01:00
Willi Ballenthin
eda53ab3c1 tests: add feature tests for idalib 2025-10-29 20:20:57 +01:00
7 changed files with 171 additions and 21 deletions

View File

@@ -138,6 +138,7 @@ repos:
- "--ignore=tests/test_ghidra_features.py" - "--ignore=tests/test_ghidra_features.py"
- "--ignore=tests/test_ida_features.py" - "--ignore=tests/test_ida_features.py"
- "--ignore=tests/test_viv_features.py" - "--ignore=tests/test_viv_features.py"
- "--ignore=tests/test_idalib_features.py"
- "--ignore=tests/test_main.py" - "--ignore=tests/test_main.py"
- "--ignore=tests/test_scripts.py" - "--ignore=tests/test_scripts.py"
always_run: true always_run: true

View File

@@ -47,6 +47,7 @@ Additionally a Binary Ninja bug has been fixed. Released binaries now include AR
### New Features ### New Features
- ci: add support for arm64 binary releases - ci: add support for arm64 binary releases
- tests: run tests against IDA via idalib @williballenthin #2742
### Breaking Changes ### Breaking Changes

View File

@@ -18,6 +18,8 @@ import idaapi
import idautils import idautils
import capa.features.extractors.ida.helpers import capa.features.extractors.ida.helpers
from capa.features.file import FunctionName
from capa.features.insn import API
from capa.features.common import Feature, Characteristic from capa.features.common import Feature, Characteristic
from capa.features.address import Address, AbsoluteVirtualAddress from capa.features.address import Address, AbsoluteVirtualAddress
from capa.features.extractors import loops from capa.features.extractors import loops
@@ -50,10 +52,22 @@ def extract_recursive_call(fh: FunctionHandle):
yield Characteristic("recursive call"), fh.address yield Characteristic("recursive call"), fh.address
def extract_function_alternative_names(fh: FunctionHandle):
"""Get all alternative names for an address."""
for aname in capa.features.extractors.ida.helpers.get_function_alternative_names(fh.inner.start_ea):
yield FunctionName(aname), fh.address
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
for func_handler in FUNCTION_HANDLERS: for func_handler in FUNCTION_HANDLERS:
for feature, addr in func_handler(fh): for feature, addr in func_handler(fh):
yield feature, addr yield feature, addr
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call) FUNCTION_HANDLERS = (
extract_function_calls_to,
extract_function_loop,
extract_recursive_call,
extract_function_alternative_names,
)

View File

@@ -20,6 +20,7 @@ import idaapi
import ida_nalt import ida_nalt
import idautils import idautils
import ida_bytes import ida_bytes
import ida_funcs
import ida_segment import ida_segment
from capa.features.address import AbsoluteVirtualAddress from capa.features.address import AbsoluteVirtualAddress
@@ -436,3 +437,23 @@ def is_basic_block_return(bb: idaapi.BasicBlock) -> bool:
def has_sib(oper: idaapi.op_t) -> bool: def has_sib(oper: idaapi.op_t) -> bool:
# via: https://reverseengineering.stackexchange.com/a/14300 # via: https://reverseengineering.stackexchange.com/a/14300
return oper.specflag1 == 1 return oper.specflag1 == 1
def get_function_alternative_names(fva: int):
"""Get all alternative names for an address."""
# Check indented comment
cmt = ida_bytes.get_cmt(fva, False) # False = non-repeatable
if cmt:
for line in cmt.split("\n"):
if line.startswith("Alternative name is '") and line.endswith("'"):
name = line[len("Alternative name is '") : -1] # Extract name between quotes
yield name
# Check function comment
func_cmt = ida_funcs.get_func_cmt(idaapi.get_func(fva), False)
if func_cmt:
for line in func_cmt.split("\n"):
if line.startswith("Alternative name is '") and line.endswith("'"):
name = line[len("Alternative name is '") : -1]
yield name

View File

@@ -22,9 +22,11 @@ import idautils
import capa.features.extractors.helpers import capa.features.extractors.helpers
import capa.features.extractors.ida.helpers import capa.features.extractors.ida.helpers
from capa.features.file import FunctionName
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Feature, Characteristic from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Feature, Characteristic
from capa.features.address import Address, AbsoluteVirtualAddress from capa.features.address import Address, AbsoluteVirtualAddress
from capa.features.extractors.ida.function import extract_function_alternative_names
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
# security cookie checks may perform non-zeroing XORs, these are expected within a certain # security cookie checks may perform non-zeroing XORs, these are expected within a certain
@@ -129,8 +131,8 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
# not a function (start) # not a function (start)
return return
if target_func.flags & idaapi.FUNC_LIB: name = idaapi.get_name(target_func.start_ea)
name = idaapi.get_name(target_func.start_ea) if target_func.flags & idaapi.FUNC_LIB or not name.startswith("sub_"):
yield API(name), ih.address yield API(name), ih.address
if name.startswith("_"): if name.startswith("_"):
# some linkers may prefix linked routines with a `_` to avoid name collisions. # some linkers may prefix linked routines with a `_` to avoid name collisions.
@@ -139,6 +141,10 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
# see: https://stackoverflow.com/a/2628384/87207 # see: https://stackoverflow.com/a/2628384/87207
yield API(name[1:]), ih.address yield API(name[1:]), ih.address
for altname in capa.features.extractors.ida.helpers.get_function_alternative_names(target_func.start_ea):
yield FunctionName(altname), ih.address
yield API(altname), ih.address
def extract_insn_number_features( def extract_insn_number_features(
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle

View File

@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import logging
import contextlib import contextlib
import collections import collections
from pathlib import Path from pathlib import Path
@@ -21,6 +21,7 @@ from functools import lru_cache
import pytest import pytest
import capa.main import capa.main
import capa.helpers
import capa.features.file import capa.features.file
import capa.features.insn import capa.features.insn
import capa.features.common import capa.features.common
@@ -53,6 +54,7 @@ from capa.features.extractors.base_extractor import (
) )
from capa.features.extractors.dnfile.extractor import DnfileFeatureExtractor from capa.features.extractors.dnfile.extractor import DnfileFeatureExtractor
logger = logging.getLogger(__name__)
CD = Path(__file__).resolve().parent CD = Path(__file__).resolve().parent
DOTNET_DIR = CD / "data" / "dotnet" DOTNET_DIR = CD / "data" / "dotnet"
DNFILE_TESTFILES = DOTNET_DIR / "dnfile-testfiles" DNFILE_TESTFILES = DOTNET_DIR / "dnfile-testfiles"
@@ -200,6 +202,65 @@ def get_binja_extractor(path: Path):
return extractor return extractor
# we can't easily cache this because the extractor relies on global state (the opened database)
# which also has to be closed elsewhere. so, the idalib tests will just take a little bit to run.
def get_idalib_extractor(path: Path):
import capa.features.extractors.ida.idalib as idalib
if not idalib.has_idalib():
raise RuntimeError("cannot find IDA idalib module.")
if not idalib.load_idalib():
raise RuntimeError("failed to load IDA idalib module.")
import idapro
import ida_auto
import capa.features.extractors.ida.extractor
logger.debug("idalib: opening database...")
idapro.enable_console_messages(False)
# - 0 - Success (database not packed)
# - 1 - Success (database was packed)
# - 2 - User cancelled or 32-64 bit conversion failed
# - 4 - Database initialization failed
# - -1 - Generic errors (database already open, auto-analysis failed, etc.)
# - -2 - User cancelled operation
ret = idapro.open_database(str(path), run_auto_analysis=True)
if ret not in (0, 1):
raise RuntimeError("failed to analyze input file")
logger.debug("idalib: waiting for analysis...")
ida_auto.auto_wait()
logger.debug("idalib: opened database.")
extractor = capa.features.extractors.ida.extractor.IdaFeatureExtractor()
fixup_idalib(path, extractor)
return extractor
def fixup_idalib(path: Path, extractor):
"""
IDA fixups to overcome differences between backends
"""
import idaapi
import ida_funcs
def remove_library_id_flag(fva):
f = idaapi.get_func(fva)
f.flags &= ~ida_funcs.FUNC_LIB
ida_funcs.update_func(f)
if "kernel32-64" in path.name:
# remove (correct) library function id, so we can test x64 thunk
remove_library_id_flag(0x1800202B0)
if "al-khaser_x64" in path.name:
# remove (correct) library function id, so we can test x64 nested thunk
remove_library_id_flag(0x14004B4F0)
@lru_cache(maxsize=1) @lru_cache(maxsize=1)
def get_cape_extractor(path): def get_cape_extractor(path):
from capa.helpers import load_json_from_path from capa.helpers import load_json_from_path
@@ -894,20 +955,8 @@ FEATURE_PRESENCE_TESTS = sorted(
("mimikatz", "function=0x4556E5", capa.features.insn.API("advapi32.LsaQueryInformationPolicy"), False), ("mimikatz", "function=0x4556E5", capa.features.insn.API("advapi32.LsaQueryInformationPolicy"), False),
("mimikatz", "function=0x4556E5", capa.features.insn.API("LsaQueryInformationPolicy"), True), ("mimikatz", "function=0x4556E5", capa.features.insn.API("LsaQueryInformationPolicy"), True),
# insn/api: x64 # insn/api: x64
(
"kernel32-64",
"function=0x180001010",
capa.features.insn.API("RtlVirtualUnwind"),
True,
),
("kernel32-64", "function=0x180001010", capa.features.insn.API("RtlVirtualUnwind"), True), ("kernel32-64", "function=0x180001010", capa.features.insn.API("RtlVirtualUnwind"), True),
# insn/api: x64 thunk # insn/api: x64 thunk
(
"kernel32-64",
"function=0x1800202B0",
capa.features.insn.API("RtlCaptureContext"),
True,
),
("kernel32-64", "function=0x1800202B0", capa.features.insn.API("RtlCaptureContext"), True), ("kernel32-64", "function=0x1800202B0", capa.features.insn.API("RtlCaptureContext"), True),
# insn/api: x64 nested thunk # insn/api: x64 nested thunk
("al-khaser x64", "function=0x14004B4F0", capa.features.insn.API("__vcrt_GetModuleHandle"), True), ("al-khaser x64", "function=0x14004B4F0", capa.features.insn.API("__vcrt_GetModuleHandle"), True),
@@ -995,20 +1044,20 @@ FEATURE_PRESENCE_TESTS = sorted(
("pma16-01", "file", OS(OS_WINDOWS), True), ("pma16-01", "file", OS(OS_WINDOWS), True),
("pma16-01", "file", OS(OS_LINUX), False), ("pma16-01", "file", OS(OS_LINUX), False),
("mimikatz", "file", OS(OS_WINDOWS), True), ("mimikatz", "file", OS(OS_WINDOWS), True),
("pma16-01", "function=0x404356", OS(OS_WINDOWS), True), ("pma16-01", "function=0x401100", OS(OS_WINDOWS), True),
("pma16-01", "function=0x404356,bb=0x4043B9", OS(OS_WINDOWS), True), ("pma16-01", "function=0x401100,bb=0x401130", OS(OS_WINDOWS), True),
("mimikatz", "function=0x40105D", OS(OS_WINDOWS), True), ("mimikatz", "function=0x40105D", OS(OS_WINDOWS), True),
("pma16-01", "file", Arch(ARCH_I386), True), ("pma16-01", "file", Arch(ARCH_I386), True),
("pma16-01", "file", Arch(ARCH_AMD64), False), ("pma16-01", "file", Arch(ARCH_AMD64), False),
("mimikatz", "file", Arch(ARCH_I386), True), ("mimikatz", "file", Arch(ARCH_I386), True),
("pma16-01", "function=0x404356", Arch(ARCH_I386), True), ("pma16-01", "function=0x401100", Arch(ARCH_I386), True),
("pma16-01", "function=0x404356,bb=0x4043B9", Arch(ARCH_I386), True), ("pma16-01", "function=0x401100,bb=0x401130", Arch(ARCH_I386), True),
("mimikatz", "function=0x40105D", Arch(ARCH_I386), True), ("mimikatz", "function=0x40105D", Arch(ARCH_I386), True),
("pma16-01", "file", Format(FORMAT_PE), True), ("pma16-01", "file", Format(FORMAT_PE), True),
("pma16-01", "file", Format(FORMAT_ELF), False), ("pma16-01", "file", Format(FORMAT_ELF), False),
("mimikatz", "file", Format(FORMAT_PE), True), ("mimikatz", "file", Format(FORMAT_PE), True),
# format is also a global feature # format is also a global feature
("pma16-01", "function=0x404356", Format(FORMAT_PE), True), ("pma16-01", "function=0x401100", Format(FORMAT_PE), True),
("mimikatz", "function=0x456BB9", Format(FORMAT_PE), True), ("mimikatz", "function=0x456BB9", Format(FORMAT_PE), True),
# elf support # elf support
("7351f.elf", "file", OS(OS_LINUX), True), ("7351f.elf", "file", OS(OS_LINUX), True),

View File

@@ -0,0 +1,58 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import pytest
import fixtures
import capa.features.extractors.ida.idalib
logger = logging.getLogger(__name__)
idalib_present = capa.features.extractors.ida.idalib.has_idalib()
@pytest.mark.skipif(idalib_present is False, reason="Skip idalib tests if the idalib Python API is not installed")
@fixtures.parametrize(
"sample,scope,feature,expected",
fixtures.FEATURE_PRESENCE_TESTS + fixtures.FEATURE_SYMTAB_FUNC_TESTS,
indirect=["sample", "scope"],
)
def test_idalib_features(sample, scope, feature, expected):
try:
fixtures.do_test_feature_presence(fixtures.get_idalib_extractor, sample, scope, feature, expected)
finally:
logger.debug("closing database...")
import idapro
idapro.close_database(save=False)
logger.debug("opened database.")
@pytest.mark.skipif(idalib_present is False, reason="Skip idalib tests if the idalib Python API is not installed")
@fixtures.parametrize(
"sample,scope,feature,expected",
fixtures.FEATURE_COUNT_TESTS,
indirect=["sample", "scope"],
)
def test_idalib_feature_counts(sample, scope, feature, expected):
try:
fixtures.do_test_feature_count(fixtures.get_idalib_extractor, sample, scope, feature, expected)
finally:
logger.debug("closing database...")
import idapro
idapro.close_database(save=False)
logger.debug("closed database.")