mirror of
https://github.com/mandiant/capa.git
synced 2025-12-05 20:40:05 -08:00
add idalib backend
This commit is contained in:
@@ -4,6 +4,8 @@
|
|||||||
|
|
||||||
### New Features
|
### New Features
|
||||||
|
|
||||||
|
- add IDA v9.0 backend via idalib #2376 @williballenthin
|
||||||
|
|
||||||
### Breaking Changes
|
### Breaking Changes
|
||||||
|
|
||||||
### New Rules (0)
|
### New Rules (0)
|
||||||
|
|||||||
@@ -32,7 +32,9 @@ class IdaFeatureExtractor(StaticFeatureExtractor):
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__(
|
super().__init__(
|
||||||
hashes=SampleHashes(
|
hashes=SampleHashes(
|
||||||
md5=ida_nalt.retrieve_input_file_md5(), sha1="(unknown)", sha256=ida_nalt.retrieve_input_file_sha256()
|
md5=ida_nalt.retrieve_input_file_md5().hex(),
|
||||||
|
sha1="(unknown)",
|
||||||
|
sha256=ida_nalt.retrieve_input_file_sha256().hex(),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
self.global_features: List[Tuple[Feature, Address]] = []
|
self.global_features: List[Tuple[Feature, Address]] = []
|
||||||
|
|||||||
113
capa/features/extractors/ida/idalib.py
Normal file
113
capa/features/extractors/ida/idalib.py
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||||
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import importlib.util
|
||||||
|
from typing import Optional
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def is_idalib_installed() -> bool:
|
||||||
|
try:
|
||||||
|
return importlib.util.find_spec("ida") is not None
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def get_idalib_user_config_path() -> Optional[Path]:
|
||||||
|
"""Get the path to the user's config file based on platform following IDA's user directories."""
|
||||||
|
# derived from `py-activate-idalib.py` from IDA v9.0 Beta 4
|
||||||
|
|
||||||
|
if sys.platform == "win32":
|
||||||
|
# On Windows, use the %APPDATA%\Hex-Rays\IDA Pro directory
|
||||||
|
config_dir = Path(os.getenv("APPDATA")) / "Hex-Rays" / "IDA Pro"
|
||||||
|
else:
|
||||||
|
# On macOS and Linux, use ~/.idapro
|
||||||
|
config_dir = Path.home() / ".idapro"
|
||||||
|
|
||||||
|
# Return the full path to the config file (now in JSON format)
|
||||||
|
user_config_path = config_dir / "ida-config.json"
|
||||||
|
if not user_config_path.exists():
|
||||||
|
return None
|
||||||
|
return user_config_path
|
||||||
|
|
||||||
|
|
||||||
|
def find_idalib() -> Optional[Path]:
|
||||||
|
config_path = get_idalib_user_config_path()
|
||||||
|
if not config_path:
|
||||||
|
return None
|
||||||
|
|
||||||
|
config = json.loads(config_path.read_text(encoding="utf-8"))
|
||||||
|
|
||||||
|
try:
|
||||||
|
ida_install_dir = Path(config["Paths"]["ida-install-dir"])
|
||||||
|
except KeyError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if not ida_install_dir.exists():
|
||||||
|
return None
|
||||||
|
|
||||||
|
libname = {
|
||||||
|
"win32": "idalib.dll",
|
||||||
|
"linux": "libidalib.so",
|
||||||
|
"linux2": "libidalib.so",
|
||||||
|
"darwin": "libidalib.dylib",
|
||||||
|
}[sys.platform]
|
||||||
|
|
||||||
|
if not (ida_install_dir / "ida.hlp").is_file():
|
||||||
|
return None
|
||||||
|
|
||||||
|
if not (ida_install_dir / libname).is_file():
|
||||||
|
return None
|
||||||
|
|
||||||
|
idalib_path = ida_install_dir / "idalib" / "python"
|
||||||
|
if not idalib_path.exists():
|
||||||
|
return None
|
||||||
|
|
||||||
|
if not (idalib_path / "ida" / "__init__.py").is_file():
|
||||||
|
return None
|
||||||
|
|
||||||
|
return idalib_path
|
||||||
|
|
||||||
|
|
||||||
|
def has_idalib() -> bool:
|
||||||
|
if is_idalib_installed():
|
||||||
|
logger.debug("found installed IDA idalib API")
|
||||||
|
return True
|
||||||
|
|
||||||
|
logger.debug("IDA idalib API not installed, searching...")
|
||||||
|
|
||||||
|
idalib_path = find_idalib()
|
||||||
|
if not idalib_path:
|
||||||
|
logger.debug("failed to find IDA idalib installation")
|
||||||
|
|
||||||
|
logger.debug("found IDA idalib API: %s", idalib_path)
|
||||||
|
return idalib_path is not None
|
||||||
|
|
||||||
|
|
||||||
|
def load_idalib() -> bool:
|
||||||
|
try:
|
||||||
|
import ida
|
||||||
|
|
||||||
|
return True
|
||||||
|
except ImportError:
|
||||||
|
idalib_path = find_idalib()
|
||||||
|
if not idalib_path:
|
||||||
|
return False
|
||||||
|
|
||||||
|
sys.path.append(idalib_path.absolute().as_posix())
|
||||||
|
try:
|
||||||
|
import ida # noqa: F401 unused import
|
||||||
|
|
||||||
|
return True
|
||||||
|
except ImportError:
|
||||||
|
return False
|
||||||
@@ -6,9 +6,12 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import os
|
import os
|
||||||
|
import io
|
||||||
import sys
|
import sys
|
||||||
import gzip
|
import gzip
|
||||||
|
import ctypes
|
||||||
import inspect
|
import inspect
|
||||||
|
import tempfile
|
||||||
import logging
|
import logging
|
||||||
import contextlib
|
import contextlib
|
||||||
import importlib.util
|
import importlib.util
|
||||||
@@ -81,6 +84,47 @@ def assert_never(value) -> NoReturn:
|
|||||||
assert False, f"Unhandled value: {value} ({type(value).__name__})" # noqa: B011
|
assert False, f"Unhandled value: {value} ({type(value).__name__})" # noqa: B011
|
||||||
|
|
||||||
|
|
||||||
|
# Redirect stdout at the C runtime level,
|
||||||
|
# which lets us handle native libraries that spam stdout.
|
||||||
|
# via: https://eli.thegreenplace.net/2015/redirecting-all-kinds-of-stdout-in-python/
|
||||||
|
LIBC = ctypes.CDLL(None)
|
||||||
|
C_STDOUT = ctypes.c_void_p.in_dll(LIBC, "stdout")
|
||||||
|
|
||||||
|
|
||||||
|
@contextlib.contextmanager
|
||||||
|
def stdout_redirector(stream):
|
||||||
|
# The original fd stdout points to. Usually 1 on POSIX systems.
|
||||||
|
original_stdout_fd = sys.stdout.fileno()
|
||||||
|
|
||||||
|
def _redirect_stdout(to_fd):
|
||||||
|
"""Redirect stdout to the given file descriptor."""
|
||||||
|
# Flush the C-level buffer stdout
|
||||||
|
LIBC.fflush(C_STDOUT)
|
||||||
|
# Flush and close sys.stdout - also closes the file descriptor (fd)
|
||||||
|
sys.stdout.close()
|
||||||
|
# Make original_stdout_fd point to the same file as to_fd
|
||||||
|
os.dup2(to_fd, original_stdout_fd)
|
||||||
|
# Create a new sys.stdout that points to the redirected fd
|
||||||
|
sys.stdout = io.TextIOWrapper(os.fdopen(original_stdout_fd, 'wb'))
|
||||||
|
|
||||||
|
# Save a copy of the original stdout fd in saved_stdout_fd
|
||||||
|
saved_stdout_fd = os.dup(original_stdout_fd)
|
||||||
|
try:
|
||||||
|
# Create a temporary file and redirect stdout to it
|
||||||
|
tfile = tempfile.TemporaryFile(mode='w+b')
|
||||||
|
_redirect_stdout(tfile.fileno())
|
||||||
|
# Yield to caller, then redirect stdout back to the saved fd
|
||||||
|
yield
|
||||||
|
_redirect_stdout(saved_stdout_fd)
|
||||||
|
# Copy contents of temporary file to the given stream
|
||||||
|
tfile.flush()
|
||||||
|
tfile.seek(0, io.SEEK_SET)
|
||||||
|
stream.write(tfile.read())
|
||||||
|
finally:
|
||||||
|
tfile.close()
|
||||||
|
os.close(saved_stdout_fd)
|
||||||
|
|
||||||
|
|
||||||
def load_json_from_path(json_path: Path):
|
def load_json_from_path(json_path: Path):
|
||||||
with gzip.open(json_path, "r") as compressed_report:
|
with gzip.open(json_path, "r") as compressed_report:
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -5,6 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
import io
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import logging
|
import logging
|
||||||
@@ -69,6 +70,7 @@ BACKEND_DRAKVUF = "drakvuf"
|
|||||||
BACKEND_VMRAY = "vmray"
|
BACKEND_VMRAY = "vmray"
|
||||||
BACKEND_FREEZE = "freeze"
|
BACKEND_FREEZE = "freeze"
|
||||||
BACKEND_BINEXPORT2 = "binexport2"
|
BACKEND_BINEXPORT2 = "binexport2"
|
||||||
|
BACKEND_IDA = "ida"
|
||||||
|
|
||||||
|
|
||||||
class CorruptFile(ValueError):
|
class CorruptFile(ValueError):
|
||||||
@@ -321,6 +323,36 @@ def get_extractor(
|
|||||||
|
|
||||||
return capa.features.extractors.binexport2.extractor.BinExport2FeatureExtractor(be2, buf)
|
return capa.features.extractors.binexport2.extractor.BinExport2FeatureExtractor(be2, buf)
|
||||||
|
|
||||||
|
elif backend == BACKEND_IDA:
|
||||||
|
import capa.features.extractors.ida.idalib as idalib
|
||||||
|
|
||||||
|
if not idalib.has_idalib():
|
||||||
|
raise RuntimeError(
|
||||||
|
# TODO(williballenthin): add more details here
|
||||||
|
"cannot find IDA idalib module."
|
||||||
|
)
|
||||||
|
|
||||||
|
if not idalib.load_idalib():
|
||||||
|
raise RuntimeError("failed to load IDA idalib module.")
|
||||||
|
|
||||||
|
import ida
|
||||||
|
import ida_auto
|
||||||
|
|
||||||
|
import capa.features.extractors.ida.extractor
|
||||||
|
|
||||||
|
logger.debug("idalib: opening database...")
|
||||||
|
# idalib writes to stdout (ugh), so we have to capture that
|
||||||
|
# so as not to screw up structured output.
|
||||||
|
with capa.helpers.stdout_redirector(io.BytesIO()):
|
||||||
|
if ida.open_database(str(input_path), run_auto_analysis=True):
|
||||||
|
raise RuntimeError("failed to analyze input file")
|
||||||
|
|
||||||
|
logger.debug("idalib: waiting for analysis...")
|
||||||
|
ida_auto.auto_wait()
|
||||||
|
logger.debug("idalib: opened database.")
|
||||||
|
|
||||||
|
return capa.features.extractors.ida.extractor.IdaFeatureExtractor()
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise ValueError("unexpected backend: " + backend)
|
raise ValueError("unexpected backend: " + backend)
|
||||||
|
|
||||||
|
|||||||
@@ -43,6 +43,7 @@ import capa.features.extractors.common
|
|||||||
from capa.rules import RuleSet
|
from capa.rules import RuleSet
|
||||||
from capa.engine import MatchResults
|
from capa.engine import MatchResults
|
||||||
from capa.loader import (
|
from capa.loader import (
|
||||||
|
BACKEND_IDA,
|
||||||
BACKEND_VIV,
|
BACKEND_VIV,
|
||||||
BACKEND_CAPE,
|
BACKEND_CAPE,
|
||||||
BACKEND_BINJA,
|
BACKEND_BINJA,
|
||||||
@@ -283,6 +284,7 @@ def install_common_args(parser, wanted=None):
|
|||||||
backends = [
|
backends = [
|
||||||
(BACKEND_AUTO, "(default) detect appropriate backend automatically"),
|
(BACKEND_AUTO, "(default) detect appropriate backend automatically"),
|
||||||
(BACKEND_VIV, "vivisect"),
|
(BACKEND_VIV, "vivisect"),
|
||||||
|
(BACKEND_IDA, "IDA via idalib"),
|
||||||
(BACKEND_PEFILE, "pefile (file features only)"),
|
(BACKEND_PEFILE, "pefile (file features only)"),
|
||||||
(BACKEND_BINJA, "Binary Ninja"),
|
(BACKEND_BINJA, "Binary Ninja"),
|
||||||
(BACKEND_DOTNET, ".NET"),
|
(BACKEND_DOTNET, ".NET"),
|
||||||
|
|||||||
@@ -185,6 +185,7 @@ known_first_party = [
|
|||||||
"ghidra",
|
"ghidra",
|
||||||
"ida",
|
"ida",
|
||||||
"ida_ida",
|
"ida_ida",
|
||||||
|
"ida_auto",
|
||||||
"ida_bytes",
|
"ida_bytes",
|
||||||
"ida_entry",
|
"ida_entry",
|
||||||
"ida_funcs",
|
"ida_funcs",
|
||||||
|
|||||||
@@ -1,6 +1,13 @@
|
|||||||
|
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||||
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import json
|
|
||||||
import logging
|
import logging
|
||||||
import importlib.util
|
import importlib.util
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
@@ -9,6 +16,8 @@ from pathlib import Path
|
|||||||
import rich
|
import rich
|
||||||
import rich.table
|
import rich.table
|
||||||
|
|
||||||
|
from capa.features.extractors.ida.idalib import find_idalib, load_idalib, is_idalib_installed
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@@ -143,103 +152,6 @@ def load_vivisect() -> bool:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def is_idalib_installed() -> bool:
|
|
||||||
try:
|
|
||||||
return importlib.util.find_spec("ida") is not None
|
|
||||||
except ModuleNotFoundError:
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def get_idalib_user_config_path() -> Optional[Path]:
|
|
||||||
"""Get the path to the user's config file based on platform following IDA's user directories."""
|
|
||||||
# derived from `py-activate-idalib.py` from IDA v9.0 Beta 4
|
|
||||||
|
|
||||||
if sys.platform == "win32":
|
|
||||||
# On Windows, use the %APPDATA%\Hex-Rays\IDA Pro directory
|
|
||||||
config_dir = Path(os.getenv("APPDATA")) / "Hex-Rays" / "IDA Pro"
|
|
||||||
else:
|
|
||||||
# On macOS and Linux, use ~/.idapro
|
|
||||||
config_dir = Path.home() / ".idapro"
|
|
||||||
|
|
||||||
# Return the full path to the config file (now in JSON format)
|
|
||||||
user_config_path = config_dir / "ida-config.json"
|
|
||||||
if not user_config_path.exists():
|
|
||||||
return None
|
|
||||||
return user_config_path
|
|
||||||
|
|
||||||
|
|
||||||
def find_idalib() -> Optional[Path]:
|
|
||||||
config_path = get_idalib_user_config_path()
|
|
||||||
if not config_path:
|
|
||||||
return None
|
|
||||||
|
|
||||||
config = json.loads(config_path.read_text(encoding="utf-8"))
|
|
||||||
|
|
||||||
try:
|
|
||||||
ida_install_dir = Path(config["Paths"]["ida-install-dir"])
|
|
||||||
except KeyError:
|
|
||||||
return None
|
|
||||||
|
|
||||||
if not ida_install_dir.exists():
|
|
||||||
return None
|
|
||||||
|
|
||||||
libname = {
|
|
||||||
"win32": "idalib.dll",
|
|
||||||
"linux": "libidalib.so",
|
|
||||||
"linux2": "libidalib.so",
|
|
||||||
"darwin": "libidalib.dylib",
|
|
||||||
}[sys.platform]
|
|
||||||
|
|
||||||
if not (ida_install_dir / "ida.hlp").is_file():
|
|
||||||
return None
|
|
||||||
|
|
||||||
if not (ida_install_dir / libname).is_file():
|
|
||||||
return None
|
|
||||||
|
|
||||||
idalib_path = ida_install_dir / "idalib" / "python"
|
|
||||||
if not idalib_path.exists():
|
|
||||||
return None
|
|
||||||
|
|
||||||
if not (idalib_path / "ida" / "__init__.py").is_file():
|
|
||||||
return None
|
|
||||||
|
|
||||||
return idalib_path
|
|
||||||
|
|
||||||
|
|
||||||
def has_idalib() -> bool:
|
|
||||||
if is_idalib_installed():
|
|
||||||
logger.debug("found installed IDA idalib API")
|
|
||||||
return True
|
|
||||||
|
|
||||||
logger.debug("IDA idalib API not installed, searching...")
|
|
||||||
|
|
||||||
idalib_path = find_idalib()
|
|
||||||
if not idalib_path:
|
|
||||||
logger.debug("failed to find IDA idalib installation")
|
|
||||||
|
|
||||||
logger.debug("found IDA idalib API: %s", idalib_path)
|
|
||||||
return idalib_path is not None
|
|
||||||
|
|
||||||
|
|
||||||
def load_idalib() -> bool:
|
|
||||||
try:
|
|
||||||
import ida
|
|
||||||
|
|
||||||
return True
|
|
||||||
except ImportError:
|
|
||||||
idalib_path = find_idalib()
|
|
||||||
if not idalib_path:
|
|
||||||
return False
|
|
||||||
|
|
||||||
sys.path.append(idalib_path.absolute().as_posix())
|
|
||||||
try:
|
|
||||||
import ida # noqa: F401 unused import
|
|
||||||
|
|
||||||
return True
|
|
||||||
except ImportError:
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user