From ced180ddbc15e2f50ca008251938838f22598083 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Mon, 18 May 2026 02:20:10 -0600 Subject: [PATCH] perf: optimize all_zeros using fast bytes comparison (#3078) * perf: optimize all_zeros using fast bytes comparison --- CHANGELOG.md | 1 + capa/features/extractors/helpers.py | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8782286b..f4c218e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -115,6 +115,7 @@ - fix: replace assert with isinstance guard in get_callee for invalid MethodSpec tokens @williballenthin (SURF-41) - fix: incorrect bytes() constructor usage in buf_filled_with @mike-hunhoff #3077 - fix: remove redundant code related to cli loading @mike-hunhoff #3076 +- fix: optimize all_zeros using fast bytes comparison @mike-hunhoff #3078 ### capa Explorer Web diff --git a/capa/features/extractors/helpers.py b/capa/features/extractors/helpers.py index eb546f50..9fad74ef 100644 --- a/capa/features/extractors/helpers.py +++ b/capa/features/extractors/helpers.py @@ -14,7 +14,6 @@ import struct -import builtins from typing import Iterator MIN_STACKSTRING_LEN = 8 @@ -108,7 +107,12 @@ def reformat_forwarded_export_name(forwarded_name: str) -> str: def all_zeros(bytez: bytes) -> bool: - return all(b == 0 for b in builtins.bytes(bytez)) + # Using `bytez == b'\x00' * len(bytez)` is much faster than `all(b == 0 for b in bytez)` + # because it relies on the optimized C implementation of bytes comparison. + # While it creates a temporary bytes object, the buffers passed here are small + # (typically capped at MAX_BYTES_FEATURE_SIZE = 256 bytes), so the memory overhead is negligible. + bytez = bytes(bytez) + return bytez == b"\x00" * len(bytez) def twos_complement(val: int, bits: int) -> int: