From a6dd0faf9f5c50f89352e6ca4172335176aad807 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 22 Apr 2026 20:09:44 +0300 Subject: [PATCH] fix: use integer division in get_printable_len for UTF-16 LE operands `get_printable_len` returned a float for UTF-16 LE operands due to `/` instead of `//`, violating the `-> int` annotation and silently propagating a float into `_bb_has_stackstring`'s accumulator. Aligns with the IDA extractor equivalent. Closes SURF-58 --- CHANGELOG.md | 3 +-- tests/test_helpers.py | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e650fe95..4b4b787c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,8 +48,7 @@ - fix: remove unreachable backports.functools_lru_cache fallback and dead dependency @williballenthin - fix: Scopes.from_dict uses cls instead of self so subclasses return the correct type @williballenthin - fix: correct wrong dict key in VMRay _compute_monitor_threads assertion (used thread_id instead of process_id) @williballenthin -fix: replace assert with isinstance guard in get_callee for invalid MethodSpec tokens @williballenthin -- fix: replace assert with isinstance guard in get_callee for invalid MethodSpec tokens @williballenthin +- fix: use integer division in get_printable_len for UTF-16 LE operands @williballenthin (SURF-58) - fix: break thunk chain loop after resolving import to avoid duplicate API features @williballenthin (SURF-57) - fix: pass insn instead of oper to getOperValue/getOperAddr in viv insn extractor @williballenthin (SURF-56) - fix: implement extract_function_loop in dnfile extractor to detect backward branches as loops @williballenthin (SURF-55) diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 3717fe84..2f458fc1 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -14,6 +14,7 @@ import codecs +import dataclasses from pathlib import Path import pytest @@ -40,6 +41,11 @@ from capa.features.common import ( FORMAT_BINEXPORT2, ) from capa.features.extractors import helpers +from capa.features.extractors.viv.basicblock import ( + get_printable_len, + is_printable_ascii, + is_printable_utf16le, +) CD = Path(__file__).resolve().parent DRAKVUF_LOG_GZ = ( @@ -180,3 +186,37 @@ def test_get_file_taste_reads_first_bytes(tmp_path): def test_get_file_taste_missing_file_raises(): with pytest.raises(IOError): get_file_taste(Path("/nonexistent/path/sample.exe")) + + +def test_is_printable_ascii(): + assert is_printable_ascii(b"AB") is True + assert is_printable_ascii(b"A\x00") is False + assert is_printable_ascii(b"\x80\x81") is False + + +def test_is_printable_utf16le(): + assert is_printable_utf16le(b"A\x00B\x00") is True + assert is_printable_utf16le(b"AB") is False + assert is_printable_utf16le(b"\x80\x00\x81\x00") is False + + +def test_get_printable_len_returns_int(): + @dataclasses.dataclass + class FakeOper: + tsize: int + imm: int + + ascii_oper = FakeOper(tsize=4, imm=int.from_bytes(b"ABCD", "little")) + result = get_printable_len(ascii_oper) + assert isinstance(result, int) + assert result == 4 + + utf16_oper = FakeOper(tsize=4, imm=int.from_bytes(b"A\x00B\x00", "little")) + result = get_printable_len(utf16_oper) + assert isinstance(result, int) + assert result == 2 + + utf16_oper_8 = FakeOper(tsize=8, imm=int.from_bytes(b"A\x00B\x00C\x00D\x00", "little")) + result = get_printable_len(utf16_oper_8) + assert isinstance(result, int) + assert result == 4