diff --git a/capa/analysis/flirt.py b/capa/analysis/flirt.py new file mode 100644 index 00000000..fc3e6a37 --- /dev/null +++ b/capa/analysis/flirt.py @@ -0,0 +1,164 @@ +import io +import sys +import time +import logging +import argparse +from pathlib import Path + +import rich +from rich.console import Console +from rich.logging import RichHandler + +import capa.helpers +import capa.features.extractors.ida.idalib as idalib + +if not idalib.has_idalib(): + raise RuntimeError("cannot find IDA idalib module.") + +if not idalib.load_idalib(): + raise RuntimeError("failed to load IDA idalib module.") + +import idaapi +import idapro +import ida_auto +import idautils +import ida_funcs + +logger = logging.getLogger(__name__) + + +from pydantic import BaseModel + + +def colorbool(v: bool) -> str: + if v: + return f"[green]{str(v)}[/green]" + else: + return f"[red]{str(v)}[/red]" + + +def colorname(n: str) -> str: + if n.startswith("sub_"): + return n + else: + return f"[cyan]{n}[/cyan]" + + +class FunctionId(BaseModel): + address: int + is_library: bool + is_thunk: bool + name: str + + def to_row(self): + row = [hex(self.address)] + row.append(colorbool(self.is_library)) + row.append(colorbool(self.is_thunk)) + row.append(colorname(self.name)) + return row + + +def configure_logging(args): + if args.quiet: + logging.getLogger().setLevel(logging.WARNING) + elif args.debug: + logging.getLogger().setLevel(logging.DEBUG) + else: + logging.getLogger().setLevel(logging.INFO) + + # use [/] after the logger name to reset any styling, + # and prevent the color from carrying over to the message + logformat = "[dim]%(name)s[/]: %(message)s" + + # set markup=True to allow the use of Rich's markup syntax in log messages + rich_handler = RichHandler(markup=True, show_time=False, show_path=True, console=capa.helpers.log_console) + rich_handler.setFormatter(logging.Formatter(logformat)) + + # use RichHandler for root logger + logging.getLogger().addHandler(rich_handler) + + if args.debug: + logging.getLogger("capa").setLevel(logging.DEBUG) + logging.getLogger("viv_utils").setLevel(logging.DEBUG) + else: + logging.getLogger("capa").setLevel(logging.ERROR) + logging.getLogger("viv_utils").setLevel(logging.ERROR) + + +def main(argv=None): + if argv is None: + argv = sys.argv[1:] + + parser = argparse.ArgumentParser(description="Identify library functions using FLIRT.") + parser.add_argument( + "input_file", + type=Path, + help="path to file to analyze", + ) + parser.add_argument("-d", "--debug", action="store_true", help="enable debugging output on STDERR") + parser.add_argument("-q", "--quiet", action="store_true", help="disable all output but errors") + args = parser.parse_args(args=argv) + + configure_logging(args) + + time0 = time.time() + + # stderr=True is used here to redirect the spinner banner to stderr, so that users can redirect capa's output. + console = Console(stderr=True, quiet=False) + + logger.debug("idalib: opening database...") + # idalib writes to stdout (ugh), so we have to capture that + # so as not to screw up structured output. + with capa.helpers.stdout_redirector(io.BytesIO()): + with console.status("analyzing program...", spinner="dots"): + if idapro.open_database(str(args.input_file), run_auto_analysis=True): + raise RuntimeError("failed to analyze input file") + + logger.debug("idalib: waiting for analysis...") + + # TODO: add more signature (files) + # TOOD: apply more signatures + + ida_auto.auto_wait() + logger.debug("idalib: opened database.") + + table = rich.table.Table() + table.add_column("FVA") + table.add_column("library?") + table.add_column("thunk?") + table.add_column("name") + + LIBONLY = True + count = 0 + + for ea in idautils.Functions(start=None, end=None): + f = idaapi.get_func(ea) + is_thunk = bool(f.flags & idaapi.FUNC_THUNK) + is_lib = bool(f.flags & idaapi.FUNC_LIB) + fname = idaapi.get_func_name(ea) + + if LIBONLY and not is_lib: + continue + + fid = FunctionId(address=ea, is_library=is_lib, is_thunk=is_thunk, name=fname) + table.add_row(*fid.to_row()) + + count += 1 + if count > 50: + break + + rich.print(table) + + # TODO can we include which signature matched per function? + for index in range(0, ida_funcs.get_idasgn_qty()): + signame, optlibs, nmatches = ida_funcs.get_idasgn_desc_with_matches(index) + rich.print(signame, optlibs, nmatches) + + idapro.close_database() + + min, sec = divmod(time.time() - time0, 60) + logger.debug("FLIRT-based library identification ran for ~ %02d:%02dm", min, sec) + + +if __name__ == "__main__": + sys.exit(main())