mirror of
https://github.com/mandiant/capa.git
synced 2026-04-28 11:53:20 -07:00
fix: address Pyright diagnostics in vivisect extractors (chunk 8)
- basicblock.py: fix real bug (/ -> // for integer division in get_printable_len); type: ignore for _dis_regctx (dynamically set) - extractor.py: cast+assert for funcy.cached_property basic_blocks/instructions; type: ignore for get_function_name Address vs int - file.py: assert pe/IMAGE_NT_HEADERS not None instead of type: ignore - function.py: cast+assert for funcy.cached_property basic_blocks/instructions; type: ignore for getBranches() base return type - insn.py: type hint derefs() as Iterator[int]; isinstance guard before derefs calls; import Elf + isinstance assert for parsedbin; cast for f.basic_blocks[0] and bb.instructions; type: ignore for dynamically-injected REG_* constants and getBranches()
This commit is contained in:
committed by
Willi Ballenthin
parent
191c889adf
commit
f5e3aa4a3b
@@ -107,7 +107,7 @@ def is_mov_imm_to_stack(instr: envi.archs.i386.disasm.i386Opcode) -> bool:
|
||||
if not dst.reg:
|
||||
return False
|
||||
|
||||
rname = dst._dis_regctx.getRegisterName(dst.reg)
|
||||
rname = dst._dis_regctx.getRegisterName(dst.reg) # type: ignore # _dis_regctx set dynamically by i386 disassembler on each operand
|
||||
if rname not in ["ebp", "rbp", "esp", "rsp"]:
|
||||
return False
|
||||
|
||||
@@ -132,7 +132,7 @@ def get_printable_len(oper: envi.archs.i386.disasm.i386ImmOper) -> int:
|
||||
if is_printable_ascii(chars):
|
||||
return oper.tsize
|
||||
elif is_printable_utf16le(chars):
|
||||
return oper.tsize / 2
|
||||
return oper.tsize // 2
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
@@ -13,9 +13,10 @@
|
||||
# limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import Any, Iterator
|
||||
from typing import Any, Iterator, cast
|
||||
from pathlib import Path
|
||||
|
||||
import envi
|
||||
import viv_utils
|
||||
import viv_utils.flirt
|
||||
|
||||
@@ -73,7 +74,9 @@ class VivisectFeatureExtractor(StaticFeatureExtractor):
|
||||
|
||||
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
||||
f: viv_utils.Function = fh.inner
|
||||
for bb in f.basic_blocks:
|
||||
basic_blocks = cast(list[viv_utils.BasicBlock], f.basic_blocks)
|
||||
assert isinstance(basic_blocks, list)
|
||||
for bb in basic_blocks:
|
||||
yield BBHandle(address=AbsoluteVirtualAddress(bb.va), inner=bb)
|
||||
|
||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh) -> Iterator[tuple[Feature, Address]]:
|
||||
@@ -81,7 +84,9 @@ class VivisectFeatureExtractor(StaticFeatureExtractor):
|
||||
|
||||
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
||||
bb: viv_utils.BasicBlock = bbh.inner
|
||||
for insn in bb.instructions:
|
||||
instructions = cast(list[envi.Opcode], bb.instructions)
|
||||
assert isinstance(instructions, list)
|
||||
for insn in instructions:
|
||||
yield InsnHandle(address=AbsoluteVirtualAddress(insn.va), inner=insn)
|
||||
|
||||
def extract_insn_features(
|
||||
@@ -93,4 +98,4 @@ class VivisectFeatureExtractor(StaticFeatureExtractor):
|
||||
return viv_utils.flirt.is_library_function(self.vw, addr)
|
||||
|
||||
def get_function_name(self, addr):
|
||||
return viv_utils.get_function_name(self.vw, addr)
|
||||
return viv_utils.get_function_name(self.vw, addr) # type: ignore # addr is AbsoluteVirtualAddress (int subclass) in this extractor
|
||||
|
||||
@@ -50,6 +50,8 @@ def extract_file_export_names(vw: vivisect.VivWorkspace, **kwargs) -> Iterator[t
|
||||
|
||||
if vw.getMeta("Format") == "pe":
|
||||
pe = vw.parsedbin
|
||||
assert pe is not None
|
||||
assert pe.IMAGE_NT_HEADERS is not None
|
||||
baseaddr = pe.IMAGE_NT_HEADERS.OptionalHeader.ImageBase
|
||||
for rva, _, forwarded_name in vw.getFileMeta(get_first_vw_filename(vw), "forwarders"):
|
||||
try:
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from typing import Iterator
|
||||
from typing import Iterator, cast
|
||||
|
||||
import envi
|
||||
import viv_utils
|
||||
@@ -75,9 +75,13 @@ def extract_function_loop(fhandle: FunctionHandle) -> Iterator[tuple[Feature, Ad
|
||||
|
||||
edges = []
|
||||
|
||||
for bb in f.basic_blocks:
|
||||
if len(bb.instructions) > 0:
|
||||
for bva, bflags in bb.instructions[-1].getBranches():
|
||||
basic_blocks = cast(list[viv_utils.BasicBlock], f.basic_blocks)
|
||||
assert isinstance(basic_blocks, list)
|
||||
for bb in basic_blocks:
|
||||
instructions = cast(list[envi.Opcode], bb.instructions)
|
||||
assert isinstance(instructions, list)
|
||||
if len(instructions) > 0:
|
||||
for bva, bflags in instructions[-1].getBranches(): # type: ignore # getBranches returns () in base; overridden at runtime
|
||||
if bva is None:
|
||||
# vivisect may be unable to recover the call target, e.g. on dynamic calls like `call esi`
|
||||
# for this bva is None, and we don't want to add it for loop detection, ref: vivisect#574
|
||||
@@ -87,7 +91,7 @@ def extract_function_loop(fhandle: FunctionHandle) -> Iterator[tuple[Feature, Ad
|
||||
bflags & envi.BR_COND
|
||||
or bflags & envi.BR_FALL
|
||||
or bflags & envi.BR_TABLE
|
||||
or bb.instructions[-1].mnem == "jmp"
|
||||
or instructions[-1].mnem == "jmp"
|
||||
):
|
||||
edges.append((bb.va, bva))
|
||||
|
||||
|
||||
@@ -12,8 +12,9 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from typing import Callable, Iterator
|
||||
from typing import Callable, Iterator, cast
|
||||
|
||||
import Elf
|
||||
import envi
|
||||
import envi.exc
|
||||
import viv_utils
|
||||
@@ -122,7 +123,9 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato
|
||||
# the symbol table gets stored as a function's attribute in order to avoid running
|
||||
# this code every time the call is made, thus preventing the computational overhead.
|
||||
try:
|
||||
fh.ctx["cache"]["symtab"] = SymTab.from_viv(f.vw.parsedbin)
|
||||
parsedbin = f.vw.parsedbin
|
||||
assert isinstance(parsedbin, Elf.Elf)
|
||||
fh.ctx["cache"]["symtab"] = SymTab.from_viv(parsedbin)
|
||||
except Exception:
|
||||
fh.ctx["cache"]["symtab"] = None
|
||||
|
||||
@@ -192,7 +195,7 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato
|
||||
yield API(name), ih.address
|
||||
|
||||
|
||||
def derefs(vw, p):
|
||||
def derefs(vw, p: int) -> Iterator[int]:
|
||||
"""
|
||||
recursively follow the given pointer, yielding the valid memory addresses along the way.
|
||||
useful when you may have a pointer to string, or pointer to pointer to string, etc.
|
||||
@@ -211,14 +214,14 @@ def derefs(vw, p):
|
||||
return
|
||||
|
||||
try:
|
||||
next = vw.readMemoryPtr(p)
|
||||
next_p: int = vw.readMemoryPtr(p) # type: ignore # vw has no stubs; readMemoryPtr returns int
|
||||
except Exception:
|
||||
# if not enough bytes can be read, such as end of the section.
|
||||
# unfortunately, viv returns a plain old generic `Exception` for this.
|
||||
return
|
||||
|
||||
# sanity: pointer points to self
|
||||
if next == p:
|
||||
if next_p == p:
|
||||
return
|
||||
|
||||
# sanity: avoid chains of pointers that are unreasonably deep
|
||||
@@ -226,7 +229,7 @@ def derefs(vw, p):
|
||||
if depth > 10:
|
||||
return
|
||||
|
||||
p = next
|
||||
p = next_p
|
||||
|
||||
|
||||
def read_memory(vw, va: int, size: int) -> bytes:
|
||||
@@ -295,6 +298,9 @@ def extract_insn_bytes_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Itera
|
||||
else:
|
||||
continue
|
||||
|
||||
if not isinstance(v, int):
|
||||
continue
|
||||
|
||||
for vv in derefs(f.vw, v):
|
||||
try:
|
||||
buf = read_bytes(f.vw, vv)
|
||||
@@ -356,21 +362,23 @@ def is_security_cookie(f, bb, insn) -> bool:
|
||||
# security cookie check should use SP or BP
|
||||
oper = insn.opers[1]
|
||||
if oper.isReg() and oper.reg not in [
|
||||
envi.archs.i386.regs.REG_ESP,
|
||||
envi.archs.i386.regs.REG_EBP,
|
||||
envi.archs.amd64.regs.REG_RBP,
|
||||
envi.archs.amd64.regs.REG_RSP,
|
||||
envi.archs.i386.regs.REG_ESP, # type: ignore # REG_ESP dynamically injected by e_reg.addLocalEnums()
|
||||
envi.archs.i386.regs.REG_EBP, # type: ignore # REG_EBP dynamically injected
|
||||
envi.archs.amd64.regs.REG_RBP, # type: ignore # REG_RBP dynamically injected
|
||||
envi.archs.amd64.regs.REG_RSP, # type: ignore # REG_RSP dynamically injected
|
||||
]:
|
||||
return False
|
||||
|
||||
# expect security cookie init in first basic block within first bytes (instructions)
|
||||
bb0 = f.basic_blocks[0]
|
||||
bb0 = cast(list[viv_utils.BasicBlock], f.basic_blocks)[0]
|
||||
|
||||
if bb == bb0 and insn.va < (bb.va + SECURITY_COOKIE_BYTES_DELTA):
|
||||
return True
|
||||
|
||||
# ... or within last bytes (instructions) before a return
|
||||
elif bb.instructions[-1].isReturn() and insn.va > (bb.va + bb.size - SECURITY_COOKIE_BYTES_DELTA):
|
||||
elif cast(list[envi.Opcode], bb.instructions)[-1].isReturn() and insn.va > (
|
||||
bb.va + bb.size - SECURITY_COOKIE_BYTES_DELTA
|
||||
):
|
||||
return True
|
||||
|
||||
return False
|
||||
@@ -486,7 +494,7 @@ def extract_insn_cross_section_cflow(fh: FunctionHandle, bb, ih: InsnHandle) ->
|
||||
insn: envi.Opcode = ih.inner
|
||||
f: viv_utils.Function = fh.inner
|
||||
|
||||
for va, flags in insn.getBranches():
|
||||
for va, flags in insn.getBranches(): # type: ignore # getBranches() base returns (); overridden at runtime to return list of (va, flags) tuples
|
||||
if va is None:
|
||||
# va may be none for dynamic branches that haven't been resolved, such as `jmp eax`.
|
||||
continue
|
||||
@@ -608,7 +616,7 @@ def extract_op_number_features(
|
||||
# assume it's not also a constant.
|
||||
return
|
||||
|
||||
if insn.mnem == "add" and insn.opers[0].isReg() and insn.opers[0].reg == envi.archs.i386.regs.REG_ESP:
|
||||
if insn.mnem == "add" and insn.opers[0].isReg() and insn.opers[0].reg == envi.archs.i386.regs.REG_ESP: # type: ignore # REG_ESP dynamically injected by e_reg.addLocalEnums()
|
||||
# skip things like:
|
||||
#
|
||||
# .text:00401140 call sub_407E2B
|
||||
@@ -643,13 +651,13 @@ def extract_op_offset_features(
|
||||
# reg ^
|
||||
# disp
|
||||
if isinstance(oper, envi.archs.i386.disasm.i386RegMemOper):
|
||||
if oper.reg == envi.archs.i386.regs.REG_ESP:
|
||||
if oper.reg == envi.archs.i386.regs.REG_ESP: # type: ignore # REG_ESP dynamically injected
|
||||
return
|
||||
|
||||
if oper.reg == envi.archs.i386.regs.REG_EBP:
|
||||
if oper.reg == envi.archs.i386.regs.REG_EBP: # type: ignore # REG_EBP dynamically injected
|
||||
return
|
||||
|
||||
if oper.reg == envi.archs.amd64.regs.REG_RBP:
|
||||
if oper.reg == envi.archs.amd64.regs.REG_RBP: # type: ignore # REG_RBP dynamically injected
|
||||
return
|
||||
|
||||
# viv already decodes offsets as signed
|
||||
@@ -702,6 +710,9 @@ def extract_op_string_features(
|
||||
else:
|
||||
return
|
||||
|
||||
if not isinstance(v, int):
|
||||
return
|
||||
|
||||
for vv in derefs(f.vw, v):
|
||||
try:
|
||||
s = read_string(f.vw, vv).rstrip("\x00")
|
||||
|
||||
Reference in New Issue
Block a user