simplify and include thunks

This commit is contained in:
mr-tz
2024-10-21 11:50:25 +00:00
parent c3b8e7c638
commit 077fa2e7e1
2 changed files with 24 additions and 134 deletions

View File

@@ -6,19 +6,8 @@
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import io
import sys
import time
import logging
import argparse
from pathlib import Path
import rich
from pydantic import BaseModel
from rich.console import Console
from rich.logging import RichHandler
import capa.helpers
import capa.features.extractors.ida.idalib as idalib
if not idalib.has_idalib():
@@ -28,138 +17,22 @@ if not idalib.load_idalib():
raise RuntimeError("failed to load IDA idalib module.")
import idaapi
import idapro
import ida_auto
import idautils
import ida_funcs
logger = logging.getLogger(__name__)
def colorbool(v: bool) -> str:
if v:
return f"[green]{str(v)}[/green]"
else:
return f"[red]{str(v)}[/red]"
def colorname(n: str) -> str:
if n.startswith("sub_"):
return n
else:
return f"[cyan]{n}[/cyan]"
class FunctionId(BaseModel):
address: int
va: int
is_library: bool
is_thunk: bool
name: str
def to_row(self):
row = [hex(self.address)]
row.append(colorbool(self.is_library))
row.append(colorbool(self.is_thunk))
row.append(colorname(self.name))
return row
def configure_logging(args):
if args.quiet:
logging.getLogger().setLevel(logging.WARNING)
elif args.debug:
logging.getLogger().setLevel(logging.DEBUG)
else:
logging.getLogger().setLevel(logging.INFO)
# use [/] after the logger name to reset any styling,
# and prevent the color from carrying over to the message
logformat = "[dim]%(name)s[/]: %(message)s"
# set markup=True to allow the use of Rich's markup syntax in log messages
rich_handler = RichHandler(markup=True, show_time=False, show_path=True, console=capa.helpers.log_console)
rich_handler.setFormatter(logging.Formatter(logformat))
# use RichHandler for root logger
logging.getLogger().addHandler(rich_handler)
if args.debug:
logging.getLogger("capa").setLevel(logging.DEBUG)
logging.getLogger("viv_utils").setLevel(logging.DEBUG)
else:
logging.getLogger("capa").setLevel(logging.ERROR)
logging.getLogger("viv_utils").setLevel(logging.ERROR)
def get_flirt_matches(lib_only=True):
for ea in idautils.Functions(start=None, end=None):
f = idaapi.get_func(ea)
is_thunk = bool(f.flags & idaapi.FUNC_THUNK)
for fva in idautils.Functions():
f = idaapi.get_func(fva)
is_lib = bool(f.flags & idaapi.FUNC_LIB)
fname = idaapi.get_func_name(ea)
fname = idaapi.get_func_name(fva)
if lib_only and not is_lib:
continue
yield FunctionId(address=ea, is_library=is_lib, is_thunk=is_thunk, name=fname)
def main(argv=None):
if argv is None:
argv = sys.argv[1:]
parser = argparse.ArgumentParser(description="Identify library functions using FLIRT.")
parser.add_argument(
"input_file",
type=Path,
help="path to file to analyze",
)
parser.add_argument("-d", "--debug", action="store_true", help="enable debugging output on STDERR")
parser.add_argument("-q", "--quiet", action="store_true", help="disable all output but errors")
args = parser.parse_args(args=argv)
configure_logging(args)
time0 = time.time()
# stderr=True is used here to redirect the spinner banner to stderr, so that users can redirect capa's output.
console = Console(stderr=True, quiet=False)
logger.debug("idalib: opening database...")
# idalib writes to stdout (ugh), so we have to capture that
# so as not to screw up structured output.
with capa.helpers.stdout_redirector(io.BytesIO()):
with console.status("analyzing program...", spinner="dots"):
if idapro.open_database(str(args.input_file), run_auto_analysis=True):
raise RuntimeError("failed to analyze input file")
logger.debug("idalib: waiting for analysis...")
ida_auto.auto_wait()
logger.debug("idalib: opened database.")
table = rich.table.Table()
table.add_column("FVA")
table.add_column("library?")
table.add_column("thunk?")
table.add_column("name")
for i, fid in enumerate(get_flirt_matches()):
table.add_row(*fid.to_row())
if i > 50:
break
rich.print(table)
for index in range(0, ida_funcs.get_idasgn_qty()):
signame, optlibs, nmatches = ida_funcs.get_idasgn_desc_with_matches(index)
rich.print(signame, optlibs, nmatches)
idapro.close_database()
min, sec = divmod(time.time() - time0, 60)
logger.debug("FLIRT-based library identification ran for ~ %02d:%02dm", min, sec)
if __name__ == "__main__":
sys.exit(main())
yield FunctionId(va=fva, is_library=is_lib, name=fname)

View File

@@ -42,6 +42,7 @@ logger = logging.getLogger(__name__)
class Classification(str, Enum):
USER = "user"
THUNK = "thunk"
LIBRARY = "library"
UNKNOWN = "unknown"
@@ -103,6 +104,11 @@ def ida_session(input_path: Path, use_temp_dir=True):
t.unlink()
def is_thunk_function(fva):
f = idaapi.get_func(fva)
return bool(f.flags & idaapi.FUNC_THUNK)
def main(argv=None):
if argv is None:
argv = sys.argv[1:]
@@ -130,13 +136,24 @@ def main(argv=None):
for flirt_match in capa.analysis.flirt.get_flirt_matches():
function_classifications.append(
FunctionClassification(
va=flirt_match.address,
va=flirt_match.va,
classification=Classification.LIBRARY,
method=Method.FLIRT,
# note: we cannot currently include which signature matched per function via the IDA API
)
)
# thunks
for fva in idautils.Functions():
if is_thunk_function(fva):
function_classifications.append(
FunctionClassification(
va=fva,
classification=Classification.THUNK,
method=None,
)
)
with capa.main.timing("string-based library identification"):
for string_match in capa.analysis.strings.get_string_matches(dbs):
function_classifications.append(
@@ -152,7 +169,7 @@ def main(argv=None):
if args.json:
doc = FunctionIdResults(function_classifications=[])
classifications_by_va = capa.analysis.strings.create_index(function_classifications, "va")
for va in idautils.Functions(start=0, end=None):
for va in idautils.Functions():
if classifications := classifications_by_va.get(va):
doc.function_classifications.extend(classifications)
else: