From 58e4a30156668bbd280efbb5295ccb840aebe080 Mon Sep 17 00:00:00 2001 From: Xusheng Date: Thu, 29 Feb 2024 15:53:28 +0800 Subject: [PATCH] binja: fix stack string detection and always use builtin function outlining --- capa/features/extractors/binja/basicblock.py | 67 ++------------------ 1 file changed, 5 insertions(+), 62 deletions(-) diff --git a/capa/features/extractors/binja/basicblock.py b/capa/features/extractors/binja/basicblock.py index 568ecc7a..e74c9f48 100644 --- a/capa/features/extractors/binja/basicblock.py +++ b/capa/features/extractors/binja/basicblock.py @@ -7,17 +7,15 @@ # See the License for the specific language governing permissions and limitations under the License. import string -import struct from typing import Tuple, Iterator -from binaryninja import Function, Settings +from binaryninja import Function from binaryninja import BasicBlock as BinjaBasicBlock from binaryninja import ( BinaryView, SymbolType, RegisterValueType, VariableSourceType, - MediumLevelILSetVar, MediumLevelILOperation, MediumLevelILBasicBlock, MediumLevelILInstruction, @@ -29,11 +27,6 @@ from capa.features.basicblock import BasicBlock from capa.features.extractors.helpers import MIN_STACKSTRING_LEN from capa.features.extractors.base_extractor import BBHandle, FunctionHandle -use_const_outline: bool = False -settings: Settings = Settings() -if settings.contains("analysis.outlining.builtins") and settings.get_bool("analysis.outlining.builtins"): - use_const_outline = True - def get_printable_len_ascii(s: bytes) -> int: """Return string length if all operand bytes are ascii or utf16-le printable""" @@ -65,7 +58,7 @@ def get_stack_string_len(f: Function, il: MediumLevelILInstruction) -> int: addr = target.value.value sym = bv.get_symbol_at(addr) - if not sym or sym.type != SymbolType.LibraryFunctionSymbol: + if not sym or sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.SymbolicFunctionSymbol]: return 0 if sym.name not in ["__builtin_strncpy", "__builtin_strcpy", "__builtin_wcscpy"]: @@ -91,52 +84,6 @@ def get_stack_string_len(f: Function, il: MediumLevelILInstruction) -> int: return max(get_printable_len_ascii(bytes(s)), get_printable_len_wide(bytes(s))) -def get_printable_len(il: MediumLevelILSetVar) -> int: - """Return string length if all operand bytes are ascii or utf16-le printable""" - width = il.dest.type.width - value = il.src.value.value - - if width == 1: - chars = struct.pack(" bool: - """verify instruction moves immediate onto stack""" - if il.operation != MediumLevelILOperation.MLIL_SET_VAR: - return False - - if il.src.operation != MediumLevelILOperation.MLIL_CONST: - return False - - if il.dest.source_type != VariableSourceType.StackVariableSourceType: - return False - - return True - - def bb_contains_stackstring(f: Function, bb: MediumLevelILBasicBlock) -> bool: """check basic block for stackstring indicators @@ -144,14 +91,10 @@ def bb_contains_stackstring(f: Function, bb: MediumLevelILBasicBlock) -> bool: """ count = 0 for il in bb: - if use_const_outline: - count += get_stack_string_len(f, il) - else: - if is_mov_imm_to_stack(il): - count += get_printable_len(il) + count += get_stack_string_len(f, il) + if count > MIN_STACKSTRING_LEN: + return True - if count > MIN_STACKSTRING_LEN: - return True return False