mirror of
https://github.com/mandiant/capa.git
synced 2025-12-05 20:40:05 -08:00
add idalib backend
This commit is contained in:
@@ -4,6 +4,8 @@
|
||||
|
||||
### New Features
|
||||
|
||||
- add IDA v9.0 backend via idalib #2376 @williballenthin
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
### New Rules (0)
|
||||
|
||||
@@ -32,7 +32,9 @@ class IdaFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
hashes=SampleHashes(
|
||||
md5=ida_nalt.retrieve_input_file_md5(), sha1="(unknown)", sha256=ida_nalt.retrieve_input_file_sha256()
|
||||
md5=ida_nalt.retrieve_input_file_md5().hex(),
|
||||
sha1="(unknown)",
|
||||
sha256=ida_nalt.retrieve_input_file_sha256().hex(),
|
||||
)
|
||||
)
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
|
||||
113
capa/features/extractors/ida/idalib.py
Normal file
113
capa/features/extractors/ida/idalib.py
Normal file
@@ -0,0 +1,113 @@
|
||||
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import logging
|
||||
import importlib.util
|
||||
from typing import Optional
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def is_idalib_installed() -> bool:
|
||||
try:
|
||||
return importlib.util.find_spec("ida") is not None
|
||||
except ModuleNotFoundError:
|
||||
return False
|
||||
|
||||
|
||||
def get_idalib_user_config_path() -> Optional[Path]:
|
||||
"""Get the path to the user's config file based on platform following IDA's user directories."""
|
||||
# derived from `py-activate-idalib.py` from IDA v9.0 Beta 4
|
||||
|
||||
if sys.platform == "win32":
|
||||
# On Windows, use the %APPDATA%\Hex-Rays\IDA Pro directory
|
||||
config_dir = Path(os.getenv("APPDATA")) / "Hex-Rays" / "IDA Pro"
|
||||
else:
|
||||
# On macOS and Linux, use ~/.idapro
|
||||
config_dir = Path.home() / ".idapro"
|
||||
|
||||
# Return the full path to the config file (now in JSON format)
|
||||
user_config_path = config_dir / "ida-config.json"
|
||||
if not user_config_path.exists():
|
||||
return None
|
||||
return user_config_path
|
||||
|
||||
|
||||
def find_idalib() -> Optional[Path]:
|
||||
config_path = get_idalib_user_config_path()
|
||||
if not config_path:
|
||||
return None
|
||||
|
||||
config = json.loads(config_path.read_text(encoding="utf-8"))
|
||||
|
||||
try:
|
||||
ida_install_dir = Path(config["Paths"]["ida-install-dir"])
|
||||
except KeyError:
|
||||
return None
|
||||
|
||||
if not ida_install_dir.exists():
|
||||
return None
|
||||
|
||||
libname = {
|
||||
"win32": "idalib.dll",
|
||||
"linux": "libidalib.so",
|
||||
"linux2": "libidalib.so",
|
||||
"darwin": "libidalib.dylib",
|
||||
}[sys.platform]
|
||||
|
||||
if not (ida_install_dir / "ida.hlp").is_file():
|
||||
return None
|
||||
|
||||
if not (ida_install_dir / libname).is_file():
|
||||
return None
|
||||
|
||||
idalib_path = ida_install_dir / "idalib" / "python"
|
||||
if not idalib_path.exists():
|
||||
return None
|
||||
|
||||
if not (idalib_path / "ida" / "__init__.py").is_file():
|
||||
return None
|
||||
|
||||
return idalib_path
|
||||
|
||||
|
||||
def has_idalib() -> bool:
|
||||
if is_idalib_installed():
|
||||
logger.debug("found installed IDA idalib API")
|
||||
return True
|
||||
|
||||
logger.debug("IDA idalib API not installed, searching...")
|
||||
|
||||
idalib_path = find_idalib()
|
||||
if not idalib_path:
|
||||
logger.debug("failed to find IDA idalib installation")
|
||||
|
||||
logger.debug("found IDA idalib API: %s", idalib_path)
|
||||
return idalib_path is not None
|
||||
|
||||
|
||||
def load_idalib() -> bool:
|
||||
try:
|
||||
import ida
|
||||
|
||||
return True
|
||||
except ImportError:
|
||||
idalib_path = find_idalib()
|
||||
if not idalib_path:
|
||||
return False
|
||||
|
||||
sys.path.append(idalib_path.absolute().as_posix())
|
||||
try:
|
||||
import ida # noqa: F401 unused import
|
||||
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
@@ -6,9 +6,12 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import os
|
||||
import io
|
||||
import sys
|
||||
import gzip
|
||||
import ctypes
|
||||
import inspect
|
||||
import tempfile
|
||||
import logging
|
||||
import contextlib
|
||||
import importlib.util
|
||||
@@ -81,6 +84,47 @@ def assert_never(value) -> NoReturn:
|
||||
assert False, f"Unhandled value: {value} ({type(value).__name__})" # noqa: B011
|
||||
|
||||
|
||||
# Redirect stdout at the C runtime level,
|
||||
# which lets us handle native libraries that spam stdout.
|
||||
# via: https://eli.thegreenplace.net/2015/redirecting-all-kinds-of-stdout-in-python/
|
||||
LIBC = ctypes.CDLL(None)
|
||||
C_STDOUT = ctypes.c_void_p.in_dll(LIBC, "stdout")
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def stdout_redirector(stream):
|
||||
# The original fd stdout points to. Usually 1 on POSIX systems.
|
||||
original_stdout_fd = sys.stdout.fileno()
|
||||
|
||||
def _redirect_stdout(to_fd):
|
||||
"""Redirect stdout to the given file descriptor."""
|
||||
# Flush the C-level buffer stdout
|
||||
LIBC.fflush(C_STDOUT)
|
||||
# Flush and close sys.stdout - also closes the file descriptor (fd)
|
||||
sys.stdout.close()
|
||||
# Make original_stdout_fd point to the same file as to_fd
|
||||
os.dup2(to_fd, original_stdout_fd)
|
||||
# Create a new sys.stdout that points to the redirected fd
|
||||
sys.stdout = io.TextIOWrapper(os.fdopen(original_stdout_fd, 'wb'))
|
||||
|
||||
# Save a copy of the original stdout fd in saved_stdout_fd
|
||||
saved_stdout_fd = os.dup(original_stdout_fd)
|
||||
try:
|
||||
# Create a temporary file and redirect stdout to it
|
||||
tfile = tempfile.TemporaryFile(mode='w+b')
|
||||
_redirect_stdout(tfile.fileno())
|
||||
# Yield to caller, then redirect stdout back to the saved fd
|
||||
yield
|
||||
_redirect_stdout(saved_stdout_fd)
|
||||
# Copy contents of temporary file to the given stream
|
||||
tfile.flush()
|
||||
tfile.seek(0, io.SEEK_SET)
|
||||
stream.write(tfile.read())
|
||||
finally:
|
||||
tfile.close()
|
||||
os.close(saved_stdout_fd)
|
||||
|
||||
|
||||
def load_json_from_path(json_path: Path):
|
||||
with gzip.open(json_path, "r") as compressed_report:
|
||||
try:
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import io
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
@@ -69,6 +70,7 @@ BACKEND_DRAKVUF = "drakvuf"
|
||||
BACKEND_VMRAY = "vmray"
|
||||
BACKEND_FREEZE = "freeze"
|
||||
BACKEND_BINEXPORT2 = "binexport2"
|
||||
BACKEND_IDA = "ida"
|
||||
|
||||
|
||||
class CorruptFile(ValueError):
|
||||
@@ -321,6 +323,36 @@ def get_extractor(
|
||||
|
||||
return capa.features.extractors.binexport2.extractor.BinExport2FeatureExtractor(be2, buf)
|
||||
|
||||
elif backend == BACKEND_IDA:
|
||||
import capa.features.extractors.ida.idalib as idalib
|
||||
|
||||
if not idalib.has_idalib():
|
||||
raise RuntimeError(
|
||||
# TODO(williballenthin): add more details here
|
||||
"cannot find IDA idalib module."
|
||||
)
|
||||
|
||||
if not idalib.load_idalib():
|
||||
raise RuntimeError("failed to load IDA idalib module.")
|
||||
|
||||
import ida
|
||||
import ida_auto
|
||||
|
||||
import capa.features.extractors.ida.extractor
|
||||
|
||||
logger.debug("idalib: opening database...")
|
||||
# idalib writes to stdout (ugh), so we have to capture that
|
||||
# so as not to screw up structured output.
|
||||
with capa.helpers.stdout_redirector(io.BytesIO()):
|
||||
if ida.open_database(str(input_path), run_auto_analysis=True):
|
||||
raise RuntimeError("failed to analyze input file")
|
||||
|
||||
logger.debug("idalib: waiting for analysis...")
|
||||
ida_auto.auto_wait()
|
||||
logger.debug("idalib: opened database.")
|
||||
|
||||
return capa.features.extractors.ida.extractor.IdaFeatureExtractor()
|
||||
|
||||
else:
|
||||
raise ValueError("unexpected backend: " + backend)
|
||||
|
||||
|
||||
@@ -43,6 +43,7 @@ import capa.features.extractors.common
|
||||
from capa.rules import RuleSet
|
||||
from capa.engine import MatchResults
|
||||
from capa.loader import (
|
||||
BACKEND_IDA,
|
||||
BACKEND_VIV,
|
||||
BACKEND_CAPE,
|
||||
BACKEND_BINJA,
|
||||
@@ -283,6 +284,7 @@ def install_common_args(parser, wanted=None):
|
||||
backends = [
|
||||
(BACKEND_AUTO, "(default) detect appropriate backend automatically"),
|
||||
(BACKEND_VIV, "vivisect"),
|
||||
(BACKEND_IDA, "IDA via idalib"),
|
||||
(BACKEND_PEFILE, "pefile (file features only)"),
|
||||
(BACKEND_BINJA, "Binary Ninja"),
|
||||
(BACKEND_DOTNET, ".NET"),
|
||||
|
||||
@@ -185,6 +185,7 @@ known_first_party = [
|
||||
"ghidra",
|
||||
"ida",
|
||||
"ida_ida",
|
||||
"ida_auto",
|
||||
"ida_bytes",
|
||||
"ida_entry",
|
||||
"ida_funcs",
|
||||
|
||||
@@ -1,6 +1,13 @@
|
||||
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import logging
|
||||
import importlib.util
|
||||
from typing import Optional
|
||||
@@ -9,6 +16,8 @@ from pathlib import Path
|
||||
import rich
|
||||
import rich.table
|
||||
|
||||
from capa.features.extractors.ida.idalib import find_idalib, load_idalib, is_idalib_installed
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -143,103 +152,6 @@ def load_vivisect() -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def is_idalib_installed() -> bool:
|
||||
try:
|
||||
return importlib.util.find_spec("ida") is not None
|
||||
except ModuleNotFoundError:
|
||||
return False
|
||||
|
||||
|
||||
def get_idalib_user_config_path() -> Optional[Path]:
|
||||
"""Get the path to the user's config file based on platform following IDA's user directories."""
|
||||
# derived from `py-activate-idalib.py` from IDA v9.0 Beta 4
|
||||
|
||||
if sys.platform == "win32":
|
||||
# On Windows, use the %APPDATA%\Hex-Rays\IDA Pro directory
|
||||
config_dir = Path(os.getenv("APPDATA")) / "Hex-Rays" / "IDA Pro"
|
||||
else:
|
||||
# On macOS and Linux, use ~/.idapro
|
||||
config_dir = Path.home() / ".idapro"
|
||||
|
||||
# Return the full path to the config file (now in JSON format)
|
||||
user_config_path = config_dir / "ida-config.json"
|
||||
if not user_config_path.exists():
|
||||
return None
|
||||
return user_config_path
|
||||
|
||||
|
||||
def find_idalib() -> Optional[Path]:
|
||||
config_path = get_idalib_user_config_path()
|
||||
if not config_path:
|
||||
return None
|
||||
|
||||
config = json.loads(config_path.read_text(encoding="utf-8"))
|
||||
|
||||
try:
|
||||
ida_install_dir = Path(config["Paths"]["ida-install-dir"])
|
||||
except KeyError:
|
||||
return None
|
||||
|
||||
if not ida_install_dir.exists():
|
||||
return None
|
||||
|
||||
libname = {
|
||||
"win32": "idalib.dll",
|
||||
"linux": "libidalib.so",
|
||||
"linux2": "libidalib.so",
|
||||
"darwin": "libidalib.dylib",
|
||||
}[sys.platform]
|
||||
|
||||
if not (ida_install_dir / "ida.hlp").is_file():
|
||||
return None
|
||||
|
||||
if not (ida_install_dir / libname).is_file():
|
||||
return None
|
||||
|
||||
idalib_path = ida_install_dir / "idalib" / "python"
|
||||
if not idalib_path.exists():
|
||||
return None
|
||||
|
||||
if not (idalib_path / "ida" / "__init__.py").is_file():
|
||||
return None
|
||||
|
||||
return idalib_path
|
||||
|
||||
|
||||
def has_idalib() -> bool:
|
||||
if is_idalib_installed():
|
||||
logger.debug("found installed IDA idalib API")
|
||||
return True
|
||||
|
||||
logger.debug("IDA idalib API not installed, searching...")
|
||||
|
||||
idalib_path = find_idalib()
|
||||
if not idalib_path:
|
||||
logger.debug("failed to find IDA idalib installation")
|
||||
|
||||
logger.debug("found IDA idalib API: %s", idalib_path)
|
||||
return idalib_path is not None
|
||||
|
||||
|
||||
def load_idalib() -> bool:
|
||||
try:
|
||||
import ida
|
||||
|
||||
return True
|
||||
except ImportError:
|
||||
idalib_path = find_idalib()
|
||||
if not idalib_path:
|
||||
return False
|
||||
|
||||
sys.path.append(idalib_path.absolute().as_posix())
|
||||
try:
|
||||
import ida # noqa: F401 unused import
|
||||
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user