mirror of
https://github.com/mandiant/capa.git
synced 2025-12-14 08:30:44 -08:00
Compare commits
6 Commits
backend/py
...
idalib-tes
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dc08843e2d | ||
|
|
074f7c742c | ||
|
|
cf463676b2 | ||
|
|
b5e5840a63 | ||
|
|
f252b6bbd0 | ||
|
|
eda53ab3c1 |
@@ -138,6 +138,7 @@ repos:
|
|||||||
- "--ignore=tests/test_ghidra_features.py"
|
- "--ignore=tests/test_ghidra_features.py"
|
||||||
- "--ignore=tests/test_ida_features.py"
|
- "--ignore=tests/test_ida_features.py"
|
||||||
- "--ignore=tests/test_viv_features.py"
|
- "--ignore=tests/test_viv_features.py"
|
||||||
|
- "--ignore=tests/test_idalib_features.py"
|
||||||
- "--ignore=tests/test_main.py"
|
- "--ignore=tests/test_main.py"
|
||||||
- "--ignore=tests/test_scripts.py"
|
- "--ignore=tests/test_scripts.py"
|
||||||
always_run: true
|
always_run: true
|
||||||
|
|||||||
@@ -47,6 +47,7 @@ Additionally a Binary Ninja bug has been fixed. Released binaries now include AR
|
|||||||
### New Features
|
### New Features
|
||||||
|
|
||||||
- ci: add support for arm64 binary releases
|
- ci: add support for arm64 binary releases
|
||||||
|
- tests: run tests against IDA via idalib @williballenthin #2742
|
||||||
|
|
||||||
### Breaking Changes
|
### Breaking Changes
|
||||||
|
|
||||||
|
|||||||
@@ -18,6 +18,8 @@ import idaapi
|
|||||||
import idautils
|
import idautils
|
||||||
|
|
||||||
import capa.features.extractors.ida.helpers
|
import capa.features.extractors.ida.helpers
|
||||||
|
from capa.features.file import FunctionName
|
||||||
|
from capa.features.insn import API
|
||||||
from capa.features.common import Feature, Characteristic
|
from capa.features.common import Feature, Characteristic
|
||||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||||
from capa.features.extractors import loops
|
from capa.features.extractors import loops
|
||||||
@@ -50,10 +52,22 @@ def extract_recursive_call(fh: FunctionHandle):
|
|||||||
yield Characteristic("recursive call"), fh.address
|
yield Characteristic("recursive call"), fh.address
|
||||||
|
|
||||||
|
|
||||||
|
def extract_function_alternative_names(fh: FunctionHandle):
|
||||||
|
"""Get all alternative names for an address."""
|
||||||
|
|
||||||
|
for aname in capa.features.extractors.ida.helpers.get_function_alternative_names(fh.inner.start_ea):
|
||||||
|
yield FunctionName(aname), fh.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
for func_handler in FUNCTION_HANDLERS:
|
for func_handler in FUNCTION_HANDLERS:
|
||||||
for feature, addr in func_handler(fh):
|
for feature, addr in func_handler(fh):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|
||||||
|
|
||||||
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call)
|
FUNCTION_HANDLERS = (
|
||||||
|
extract_function_calls_to,
|
||||||
|
extract_function_loop,
|
||||||
|
extract_recursive_call,
|
||||||
|
extract_function_alternative_names,
|
||||||
|
)
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ import idaapi
|
|||||||
import ida_nalt
|
import ida_nalt
|
||||||
import idautils
|
import idautils
|
||||||
import ida_bytes
|
import ida_bytes
|
||||||
|
import ida_funcs
|
||||||
import ida_segment
|
import ida_segment
|
||||||
|
|
||||||
from capa.features.address import AbsoluteVirtualAddress
|
from capa.features.address import AbsoluteVirtualAddress
|
||||||
@@ -436,3 +437,23 @@ def is_basic_block_return(bb: idaapi.BasicBlock) -> bool:
|
|||||||
def has_sib(oper: idaapi.op_t) -> bool:
|
def has_sib(oper: idaapi.op_t) -> bool:
|
||||||
# via: https://reverseengineering.stackexchange.com/a/14300
|
# via: https://reverseengineering.stackexchange.com/a/14300
|
||||||
return oper.specflag1 == 1
|
return oper.specflag1 == 1
|
||||||
|
|
||||||
|
|
||||||
|
def get_function_alternative_names(fva: int):
|
||||||
|
"""Get all alternative names for an address."""
|
||||||
|
|
||||||
|
# Check indented comment
|
||||||
|
cmt = ida_bytes.get_cmt(fva, False) # False = non-repeatable
|
||||||
|
if cmt:
|
||||||
|
for line in cmt.split("\n"):
|
||||||
|
if line.startswith("Alternative name is '") and line.endswith("'"):
|
||||||
|
name = line[len("Alternative name is '") : -1] # Extract name between quotes
|
||||||
|
yield name
|
||||||
|
|
||||||
|
# Check function comment
|
||||||
|
func_cmt = ida_funcs.get_func_cmt(idaapi.get_func(fva), False)
|
||||||
|
if func_cmt:
|
||||||
|
for line in func_cmt.split("\n"):
|
||||||
|
if line.startswith("Alternative name is '") and line.endswith("'"):
|
||||||
|
name = line[len("Alternative name is '") : -1]
|
||||||
|
yield name
|
||||||
|
|||||||
@@ -22,9 +22,11 @@ import idautils
|
|||||||
|
|
||||||
import capa.features.extractors.helpers
|
import capa.features.extractors.helpers
|
||||||
import capa.features.extractors.ida.helpers
|
import capa.features.extractors.ida.helpers
|
||||||
|
from capa.features.file import FunctionName
|
||||||
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
|
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
|
||||||
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Feature, Characteristic
|
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Feature, Characteristic
|
||||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||||
|
from capa.features.extractors.ida.function import extract_function_alternative_names
|
||||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
||||||
|
|
||||||
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
|
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
|
||||||
@@ -129,8 +131,8 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
|
|||||||
# not a function (start)
|
# not a function (start)
|
||||||
return
|
return
|
||||||
|
|
||||||
if target_func.flags & idaapi.FUNC_LIB:
|
name = idaapi.get_name(target_func.start_ea)
|
||||||
name = idaapi.get_name(target_func.start_ea)
|
if target_func.flags & idaapi.FUNC_LIB or not name.startswith("sub_"):
|
||||||
yield API(name), ih.address
|
yield API(name), ih.address
|
||||||
if name.startswith("_"):
|
if name.startswith("_"):
|
||||||
# some linkers may prefix linked routines with a `_` to avoid name collisions.
|
# some linkers may prefix linked routines with a `_` to avoid name collisions.
|
||||||
@@ -139,6 +141,10 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
|
|||||||
# see: https://stackoverflow.com/a/2628384/87207
|
# see: https://stackoverflow.com/a/2628384/87207
|
||||||
yield API(name[1:]), ih.address
|
yield API(name[1:]), ih.address
|
||||||
|
|
||||||
|
for altname in capa.features.extractors.ida.helpers.get_function_alternative_names(target_func.start_ea):
|
||||||
|
yield FunctionName(altname), ih.address
|
||||||
|
yield API(altname), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_number_features(
|
def extract_insn_number_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
|
|||||||
@@ -12,7 +12,7 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
import logging
|
||||||
import contextlib
|
import contextlib
|
||||||
import collections
|
import collections
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -21,6 +21,7 @@ from functools import lru_cache
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
import capa.main
|
import capa.main
|
||||||
|
import capa.helpers
|
||||||
import capa.features.file
|
import capa.features.file
|
||||||
import capa.features.insn
|
import capa.features.insn
|
||||||
import capa.features.common
|
import capa.features.common
|
||||||
@@ -53,6 +54,7 @@ from capa.features.extractors.base_extractor import (
|
|||||||
)
|
)
|
||||||
from capa.features.extractors.dnfile.extractor import DnfileFeatureExtractor
|
from capa.features.extractors.dnfile.extractor import DnfileFeatureExtractor
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
CD = Path(__file__).resolve().parent
|
CD = Path(__file__).resolve().parent
|
||||||
DOTNET_DIR = CD / "data" / "dotnet"
|
DOTNET_DIR = CD / "data" / "dotnet"
|
||||||
DNFILE_TESTFILES = DOTNET_DIR / "dnfile-testfiles"
|
DNFILE_TESTFILES = DOTNET_DIR / "dnfile-testfiles"
|
||||||
@@ -200,6 +202,65 @@ def get_binja_extractor(path: Path):
|
|||||||
return extractor
|
return extractor
|
||||||
|
|
||||||
|
|
||||||
|
# we can't easily cache this because the extractor relies on global state (the opened database)
|
||||||
|
# which also has to be closed elsewhere. so, the idalib tests will just take a little bit to run.
|
||||||
|
def get_idalib_extractor(path: Path):
|
||||||
|
import capa.features.extractors.ida.idalib as idalib
|
||||||
|
|
||||||
|
if not idalib.has_idalib():
|
||||||
|
raise RuntimeError("cannot find IDA idalib module.")
|
||||||
|
|
||||||
|
if not idalib.load_idalib():
|
||||||
|
raise RuntimeError("failed to load IDA idalib module.")
|
||||||
|
|
||||||
|
import idapro
|
||||||
|
import ida_auto
|
||||||
|
|
||||||
|
import capa.features.extractors.ida.extractor
|
||||||
|
|
||||||
|
logger.debug("idalib: opening database...")
|
||||||
|
|
||||||
|
idapro.enable_console_messages(False)
|
||||||
|
# - 0 - Success (database not packed)
|
||||||
|
# - 1 - Success (database was packed)
|
||||||
|
# - 2 - User cancelled or 32-64 bit conversion failed
|
||||||
|
# - 4 - Database initialization failed
|
||||||
|
# - -1 - Generic errors (database already open, auto-analysis failed, etc.)
|
||||||
|
# - -2 - User cancelled operation
|
||||||
|
ret = idapro.open_database(str(path), run_auto_analysis=True)
|
||||||
|
if ret not in (0, 1):
|
||||||
|
raise RuntimeError("failed to analyze input file")
|
||||||
|
|
||||||
|
logger.debug("idalib: waiting for analysis...")
|
||||||
|
ida_auto.auto_wait()
|
||||||
|
logger.debug("idalib: opened database.")
|
||||||
|
|
||||||
|
extractor = capa.features.extractors.ida.extractor.IdaFeatureExtractor()
|
||||||
|
fixup_idalib(path, extractor)
|
||||||
|
return extractor
|
||||||
|
|
||||||
|
|
||||||
|
def fixup_idalib(path: Path, extractor):
|
||||||
|
"""
|
||||||
|
IDA fixups to overcome differences between backends
|
||||||
|
"""
|
||||||
|
import idaapi
|
||||||
|
import ida_funcs
|
||||||
|
|
||||||
|
def remove_library_id_flag(fva):
|
||||||
|
f = idaapi.get_func(fva)
|
||||||
|
f.flags &= ~ida_funcs.FUNC_LIB
|
||||||
|
ida_funcs.update_func(f)
|
||||||
|
|
||||||
|
if "kernel32-64" in path.name:
|
||||||
|
# remove (correct) library function id, so we can test x64 thunk
|
||||||
|
remove_library_id_flag(0x1800202B0)
|
||||||
|
|
||||||
|
if "al-khaser_x64" in path.name:
|
||||||
|
# remove (correct) library function id, so we can test x64 nested thunk
|
||||||
|
remove_library_id_flag(0x14004B4F0)
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=1)
|
@lru_cache(maxsize=1)
|
||||||
def get_cape_extractor(path):
|
def get_cape_extractor(path):
|
||||||
from capa.helpers import load_json_from_path
|
from capa.helpers import load_json_from_path
|
||||||
@@ -894,20 +955,8 @@ FEATURE_PRESENCE_TESTS = sorted(
|
|||||||
("mimikatz", "function=0x4556E5", capa.features.insn.API("advapi32.LsaQueryInformationPolicy"), False),
|
("mimikatz", "function=0x4556E5", capa.features.insn.API("advapi32.LsaQueryInformationPolicy"), False),
|
||||||
("mimikatz", "function=0x4556E5", capa.features.insn.API("LsaQueryInformationPolicy"), True),
|
("mimikatz", "function=0x4556E5", capa.features.insn.API("LsaQueryInformationPolicy"), True),
|
||||||
# insn/api: x64
|
# insn/api: x64
|
||||||
(
|
|
||||||
"kernel32-64",
|
|
||||||
"function=0x180001010",
|
|
||||||
capa.features.insn.API("RtlVirtualUnwind"),
|
|
||||||
True,
|
|
||||||
),
|
|
||||||
("kernel32-64", "function=0x180001010", capa.features.insn.API("RtlVirtualUnwind"), True),
|
("kernel32-64", "function=0x180001010", capa.features.insn.API("RtlVirtualUnwind"), True),
|
||||||
# insn/api: x64 thunk
|
# insn/api: x64 thunk
|
||||||
(
|
|
||||||
"kernel32-64",
|
|
||||||
"function=0x1800202B0",
|
|
||||||
capa.features.insn.API("RtlCaptureContext"),
|
|
||||||
True,
|
|
||||||
),
|
|
||||||
("kernel32-64", "function=0x1800202B0", capa.features.insn.API("RtlCaptureContext"), True),
|
("kernel32-64", "function=0x1800202B0", capa.features.insn.API("RtlCaptureContext"), True),
|
||||||
# insn/api: x64 nested thunk
|
# insn/api: x64 nested thunk
|
||||||
("al-khaser x64", "function=0x14004B4F0", capa.features.insn.API("__vcrt_GetModuleHandle"), True),
|
("al-khaser x64", "function=0x14004B4F0", capa.features.insn.API("__vcrt_GetModuleHandle"), True),
|
||||||
@@ -995,20 +1044,20 @@ FEATURE_PRESENCE_TESTS = sorted(
|
|||||||
("pma16-01", "file", OS(OS_WINDOWS), True),
|
("pma16-01", "file", OS(OS_WINDOWS), True),
|
||||||
("pma16-01", "file", OS(OS_LINUX), False),
|
("pma16-01", "file", OS(OS_LINUX), False),
|
||||||
("mimikatz", "file", OS(OS_WINDOWS), True),
|
("mimikatz", "file", OS(OS_WINDOWS), True),
|
||||||
("pma16-01", "function=0x404356", OS(OS_WINDOWS), True),
|
("pma16-01", "function=0x401100", OS(OS_WINDOWS), True),
|
||||||
("pma16-01", "function=0x404356,bb=0x4043B9", OS(OS_WINDOWS), True),
|
("pma16-01", "function=0x401100,bb=0x401130", OS(OS_WINDOWS), True),
|
||||||
("mimikatz", "function=0x40105D", OS(OS_WINDOWS), True),
|
("mimikatz", "function=0x40105D", OS(OS_WINDOWS), True),
|
||||||
("pma16-01", "file", Arch(ARCH_I386), True),
|
("pma16-01", "file", Arch(ARCH_I386), True),
|
||||||
("pma16-01", "file", Arch(ARCH_AMD64), False),
|
("pma16-01", "file", Arch(ARCH_AMD64), False),
|
||||||
("mimikatz", "file", Arch(ARCH_I386), True),
|
("mimikatz", "file", Arch(ARCH_I386), True),
|
||||||
("pma16-01", "function=0x404356", Arch(ARCH_I386), True),
|
("pma16-01", "function=0x401100", Arch(ARCH_I386), True),
|
||||||
("pma16-01", "function=0x404356,bb=0x4043B9", Arch(ARCH_I386), True),
|
("pma16-01", "function=0x401100,bb=0x401130", Arch(ARCH_I386), True),
|
||||||
("mimikatz", "function=0x40105D", Arch(ARCH_I386), True),
|
("mimikatz", "function=0x40105D", Arch(ARCH_I386), True),
|
||||||
("pma16-01", "file", Format(FORMAT_PE), True),
|
("pma16-01", "file", Format(FORMAT_PE), True),
|
||||||
("pma16-01", "file", Format(FORMAT_ELF), False),
|
("pma16-01", "file", Format(FORMAT_ELF), False),
|
||||||
("mimikatz", "file", Format(FORMAT_PE), True),
|
("mimikatz", "file", Format(FORMAT_PE), True),
|
||||||
# format is also a global feature
|
# format is also a global feature
|
||||||
("pma16-01", "function=0x404356", Format(FORMAT_PE), True),
|
("pma16-01", "function=0x401100", Format(FORMAT_PE), True),
|
||||||
("mimikatz", "function=0x456BB9", Format(FORMAT_PE), True),
|
("mimikatz", "function=0x456BB9", Format(FORMAT_PE), True),
|
||||||
# elf support
|
# elf support
|
||||||
("7351f.elf", "file", OS(OS_LINUX), True),
|
("7351f.elf", "file", OS(OS_LINUX), True),
|
||||||
|
|||||||
58
tests/test_idalib_features.py
Normal file
58
tests/test_idalib_features.py
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
# Copyright 2020 Google LLC
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import fixtures
|
||||||
|
|
||||||
|
import capa.features.extractors.ida.idalib
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
idalib_present = capa.features.extractors.ida.idalib.has_idalib()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(idalib_present is False, reason="Skip idalib tests if the idalib Python API is not installed")
|
||||||
|
@fixtures.parametrize(
|
||||||
|
"sample,scope,feature,expected",
|
||||||
|
fixtures.FEATURE_PRESENCE_TESTS + fixtures.FEATURE_SYMTAB_FUNC_TESTS,
|
||||||
|
indirect=["sample", "scope"],
|
||||||
|
)
|
||||||
|
def test_idalib_features(sample, scope, feature, expected):
|
||||||
|
try:
|
||||||
|
fixtures.do_test_feature_presence(fixtures.get_idalib_extractor, sample, scope, feature, expected)
|
||||||
|
finally:
|
||||||
|
logger.debug("closing database...")
|
||||||
|
import idapro
|
||||||
|
|
||||||
|
idapro.close_database(save=False)
|
||||||
|
logger.debug("opened database.")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(idalib_present is False, reason="Skip idalib tests if the idalib Python API is not installed")
|
||||||
|
@fixtures.parametrize(
|
||||||
|
"sample,scope,feature,expected",
|
||||||
|
fixtures.FEATURE_COUNT_TESTS,
|
||||||
|
indirect=["sample", "scope"],
|
||||||
|
)
|
||||||
|
def test_idalib_feature_counts(sample, scope, feature, expected):
|
||||||
|
try:
|
||||||
|
fixtures.do_test_feature_count(fixtures.get_idalib_extractor, sample, scope, feature, expected)
|
||||||
|
finally:
|
||||||
|
logger.debug("closing database...")
|
||||||
|
import idapro
|
||||||
|
|
||||||
|
idapro.close_database(save=False)
|
||||||
|
logger.debug("closed database.")
|
||||||
Reference in New Issue
Block a user