fix: address Pyright diagnostics in vivisect extractors (chunk 8)

- basicblock.py: fix real bug (/ -> // for integer division in get_printable_len); type: ignore for _dis_regctx (dynamically set)
- extractor.py: cast+assert for funcy.cached_property basic_blocks/instructions; type: ignore for get_function_name Address vs int
- file.py: assert pe/IMAGE_NT_HEADERS not None instead of type: ignore
- function.py: cast+assert for funcy.cached_property basic_blocks/instructions; type: ignore for getBranches() base return type
- insn.py: type hint derefs() as Iterator[int]; isinstance guard before derefs calls; import Elf + isinstance assert for parsedbin; cast for f.basic_blocks[0] and bb.instructions; type: ignore for dynamically-injected REG_* constants and getBranches()
This commit is contained in:
Willi Ballenthin
2026-04-22 18:41:47 +03:00
committed by Willi Ballenthin
parent 191c889adf
commit f5e3aa4a3b
5 changed files with 50 additions and 28 deletions

View File

@@ -107,7 +107,7 @@ def is_mov_imm_to_stack(instr: envi.archs.i386.disasm.i386Opcode) -> bool:
if not dst.reg:
return False
rname = dst._dis_regctx.getRegisterName(dst.reg)
rname = dst._dis_regctx.getRegisterName(dst.reg) # type: ignore # _dis_regctx set dynamically by i386 disassembler on each operand
if rname not in ["ebp", "rbp", "esp", "rsp"]:
return False
@@ -132,7 +132,7 @@ def get_printable_len(oper: envi.archs.i386.disasm.i386ImmOper) -> int:
if is_printable_ascii(chars):
return oper.tsize
elif is_printable_utf16le(chars):
return oper.tsize / 2
return oper.tsize // 2
else:
return 0

View File

@@ -13,9 +13,10 @@
# limitations under the License.
import logging
from typing import Any, Iterator
from typing import Any, Iterator, cast
from pathlib import Path
import envi
import viv_utils
import viv_utils.flirt
@@ -73,7 +74,9 @@ class VivisectFeatureExtractor(StaticFeatureExtractor):
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
f: viv_utils.Function = fh.inner
for bb in f.basic_blocks:
basic_blocks = cast(list[viv_utils.BasicBlock], f.basic_blocks)
assert isinstance(basic_blocks, list)
for bb in basic_blocks:
yield BBHandle(address=AbsoluteVirtualAddress(bb.va), inner=bb)
def extract_basic_block_features(self, fh: FunctionHandle, bbh) -> Iterator[tuple[Feature, Address]]:
@@ -81,7 +84,9 @@ class VivisectFeatureExtractor(StaticFeatureExtractor):
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
bb: viv_utils.BasicBlock = bbh.inner
for insn in bb.instructions:
instructions = cast(list[envi.Opcode], bb.instructions)
assert isinstance(instructions, list)
for insn in instructions:
yield InsnHandle(address=AbsoluteVirtualAddress(insn.va), inner=insn)
def extract_insn_features(
@@ -93,4 +98,4 @@ class VivisectFeatureExtractor(StaticFeatureExtractor):
return viv_utils.flirt.is_library_function(self.vw, addr)
def get_function_name(self, addr):
return viv_utils.get_function_name(self.vw, addr)
return viv_utils.get_function_name(self.vw, addr) # type: ignore # addr is AbsoluteVirtualAddress (int subclass) in this extractor

View File

@@ -50,6 +50,8 @@ def extract_file_export_names(vw: vivisect.VivWorkspace, **kwargs) -> Iterator[t
if vw.getMeta("Format") == "pe":
pe = vw.parsedbin
assert pe is not None
assert pe.IMAGE_NT_HEADERS is not None
baseaddr = pe.IMAGE_NT_HEADERS.OptionalHeader.ImageBase
for rva, _, forwarded_name in vw.getFileMeta(get_first_vw_filename(vw), "forwarders"):
try:

View File

@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Iterator
from typing import Iterator, cast
import envi
import viv_utils
@@ -75,9 +75,13 @@ def extract_function_loop(fhandle: FunctionHandle) -> Iterator[tuple[Feature, Ad
edges = []
for bb in f.basic_blocks:
if len(bb.instructions) > 0:
for bva, bflags in bb.instructions[-1].getBranches():
basic_blocks = cast(list[viv_utils.BasicBlock], f.basic_blocks)
assert isinstance(basic_blocks, list)
for bb in basic_blocks:
instructions = cast(list[envi.Opcode], bb.instructions)
assert isinstance(instructions, list)
if len(instructions) > 0:
for bva, bflags in instructions[-1].getBranches(): # type: ignore # getBranches returns () in base; overridden at runtime
if bva is None:
# vivisect may be unable to recover the call target, e.g. on dynamic calls like `call esi`
# for this bva is None, and we don't want to add it for loop detection, ref: vivisect#574
@@ -87,7 +91,7 @@ def extract_function_loop(fhandle: FunctionHandle) -> Iterator[tuple[Feature, Ad
bflags & envi.BR_COND
or bflags & envi.BR_FALL
or bflags & envi.BR_TABLE
or bb.instructions[-1].mnem == "jmp"
or instructions[-1].mnem == "jmp"
):
edges.append((bb.va, bva))

View File

@@ -12,8 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Callable, Iterator
from typing import Callable, Iterator, cast
import Elf
import envi
import envi.exc
import viv_utils
@@ -122,7 +123,9 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato
# the symbol table gets stored as a function's attribute in order to avoid running
# this code every time the call is made, thus preventing the computational overhead.
try:
fh.ctx["cache"]["symtab"] = SymTab.from_viv(f.vw.parsedbin)
parsedbin = f.vw.parsedbin
assert isinstance(parsedbin, Elf.Elf)
fh.ctx["cache"]["symtab"] = SymTab.from_viv(parsedbin)
except Exception:
fh.ctx["cache"]["symtab"] = None
@@ -192,7 +195,7 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato
yield API(name), ih.address
def derefs(vw, p):
def derefs(vw, p: int) -> Iterator[int]:
"""
recursively follow the given pointer, yielding the valid memory addresses along the way.
useful when you may have a pointer to string, or pointer to pointer to string, etc.
@@ -211,14 +214,14 @@ def derefs(vw, p):
return
try:
next = vw.readMemoryPtr(p)
next_p: int = vw.readMemoryPtr(p) # type: ignore # vw has no stubs; readMemoryPtr returns int
except Exception:
# if not enough bytes can be read, such as end of the section.
# unfortunately, viv returns a plain old generic `Exception` for this.
return
# sanity: pointer points to self
if next == p:
if next_p == p:
return
# sanity: avoid chains of pointers that are unreasonably deep
@@ -226,7 +229,7 @@ def derefs(vw, p):
if depth > 10:
return
p = next
p = next_p
def read_memory(vw, va: int, size: int) -> bytes:
@@ -295,6 +298,9 @@ def extract_insn_bytes_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Itera
else:
continue
if not isinstance(v, int):
continue
for vv in derefs(f.vw, v):
try:
buf = read_bytes(f.vw, vv)
@@ -356,21 +362,23 @@ def is_security_cookie(f, bb, insn) -> bool:
# security cookie check should use SP or BP
oper = insn.opers[1]
if oper.isReg() and oper.reg not in [
envi.archs.i386.regs.REG_ESP,
envi.archs.i386.regs.REG_EBP,
envi.archs.amd64.regs.REG_RBP,
envi.archs.amd64.regs.REG_RSP,
envi.archs.i386.regs.REG_ESP, # type: ignore # REG_ESP dynamically injected by e_reg.addLocalEnums()
envi.archs.i386.regs.REG_EBP, # type: ignore # REG_EBP dynamically injected
envi.archs.amd64.regs.REG_RBP, # type: ignore # REG_RBP dynamically injected
envi.archs.amd64.regs.REG_RSP, # type: ignore # REG_RSP dynamically injected
]:
return False
# expect security cookie init in first basic block within first bytes (instructions)
bb0 = f.basic_blocks[0]
bb0 = cast(list[viv_utils.BasicBlock], f.basic_blocks)[0]
if bb == bb0 and insn.va < (bb.va + SECURITY_COOKIE_BYTES_DELTA):
return True
# ... or within last bytes (instructions) before a return
elif bb.instructions[-1].isReturn() and insn.va > (bb.va + bb.size - SECURITY_COOKIE_BYTES_DELTA):
elif cast(list[envi.Opcode], bb.instructions)[-1].isReturn() and insn.va > (
bb.va + bb.size - SECURITY_COOKIE_BYTES_DELTA
):
return True
return False
@@ -486,7 +494,7 @@ def extract_insn_cross_section_cflow(fh: FunctionHandle, bb, ih: InsnHandle) ->
insn: envi.Opcode = ih.inner
f: viv_utils.Function = fh.inner
for va, flags in insn.getBranches():
for va, flags in insn.getBranches(): # type: ignore # getBranches() base returns (); overridden at runtime to return list of (va, flags) tuples
if va is None:
# va may be none for dynamic branches that haven't been resolved, such as `jmp eax`.
continue
@@ -608,7 +616,7 @@ def extract_op_number_features(
# assume it's not also a constant.
return
if insn.mnem == "add" and insn.opers[0].isReg() and insn.opers[0].reg == envi.archs.i386.regs.REG_ESP:
if insn.mnem == "add" and insn.opers[0].isReg() and insn.opers[0].reg == envi.archs.i386.regs.REG_ESP: # type: ignore # REG_ESP dynamically injected by e_reg.addLocalEnums()
# skip things like:
#
# .text:00401140 call sub_407E2B
@@ -643,13 +651,13 @@ def extract_op_offset_features(
# reg ^
# disp
if isinstance(oper, envi.archs.i386.disasm.i386RegMemOper):
if oper.reg == envi.archs.i386.regs.REG_ESP:
if oper.reg == envi.archs.i386.regs.REG_ESP: # type: ignore # REG_ESP dynamically injected
return
if oper.reg == envi.archs.i386.regs.REG_EBP:
if oper.reg == envi.archs.i386.regs.REG_EBP: # type: ignore # REG_EBP dynamically injected
return
if oper.reg == envi.archs.amd64.regs.REG_RBP:
if oper.reg == envi.archs.amd64.regs.REG_RBP: # type: ignore # REG_RBP dynamically injected
return
# viv already decodes offsets as signed
@@ -702,6 +710,9 @@ def extract_op_string_features(
else:
return
if not isinstance(v, int):
return
for vv in derefs(f.vw, v):
try:
s = read_string(f.vw, vv).rstrip("\x00")