Replace binascii and struct with native Python methods (#2582)

* refactor: replace binascii with bytes for hex conversions

Signed-off-by: vibhatsu <maulikbarot2915@gmail.com>

* refactor: replace struct unpacking with bytes conversion

Signed-off-by: vibhatsu <maulikbarot2915@gmail.com>

* simplify byte extraction for ELF header

Signed-off-by: vibhatsu <maulikbarot2915@gmail.com>

* Revert "refactor: replace struct unpacking with bytes conversion"

This reverts commit 483f8c9a85.

* update CHANGELOG

Signed-off-by: vibhatsu <maulikbarot2915@gmail.com>

---------

Signed-off-by: vibhatsu <maulikbarot2915@gmail.com>
Co-authored-by: Willi Ballenthin <wballenthin@google.com>
This commit is contained in:
vibhatsu
2025-02-04 14:23:36 +05:30
committed by GitHub
parent 96f9e7cffc
commit a8e8935212
7 changed files with 11 additions and 16 deletions

View File

@@ -31,6 +31,7 @@
- strings: add type hints and fix uncovered bugs @williballenthin #2555 - strings: add type hints and fix uncovered bugs @williballenthin #2555
- elffile: handle symbols without a name @williballenthin #2553 - elffile: handle symbols without a name @williballenthin #2553
- project: remove pytest-cov that wasn't used @williballenthin @2491 - project: remove pytest-cov that wasn't used @williballenthin @2491
- replace binascii methods with native Python methods @v1bh475u #2582
- rules: scopes can now have subscope blocks with the same scope @williballenthin #2584 - rules: scopes can now have subscope blocks with the same scope @williballenthin #2584
### capa Explorer Web ### capa Explorer Web

View File

@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import binascii
from typing import Any, Union, Literal, Optional, Annotated, TypeAlias from typing import Any, Union, Literal, Optional, Annotated, TypeAlias
from pydantic import Field, BaseModel, ConfigDict from pydantic import Field, BaseModel, ConfigDict
@@ -27,7 +26,7 @@ def validate_hex_int(value):
def validate_hex_bytes(value): def validate_hex_bytes(value):
return binascii.unhexlify(value) if isinstance(value, str) else value return bytes.fromhex(value) if isinstance(value, str) else value
HexInt = Annotated[int, BeforeValidator(validate_hex_int)] HexInt = Annotated[int, BeforeValidator(validate_hex_int)]

View File

@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import binascii
from typing import Union, Literal, Optional, Annotated from typing import Union, Literal, Optional, Annotated
from pydantic import Field, BaseModel, ConfigDict from pydantic import Field, BaseModel, ConfigDict
@@ -85,7 +84,7 @@ class FeatureModel(BaseModel):
return capa.features.insn.Number(self.number, description=self.description) return capa.features.insn.Number(self.number, description=self.description)
elif isinstance(self, BytesFeature): elif isinstance(self, BytesFeature):
return capa.features.common.Bytes(binascii.unhexlify(self.bytes), description=self.description) return capa.features.common.Bytes(bytes.fromhex(self.bytes), description=self.description)
elif isinstance(self, OffsetFeature): elif isinstance(self, OffsetFeature):
return capa.features.insn.Offset(self.offset, description=self.description) return capa.features.insn.Offset(self.offset, description=self.description)
@@ -191,7 +190,7 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature":
elif isinstance(f, capa.features.common.Bytes): elif isinstance(f, capa.features.common.Bytes):
buf = f.value buf = f.value
assert isinstance(buf, bytes) assert isinstance(buf, bytes)
return BytesFeature(bytes=binascii.hexlify(buf).decode("ascii"), description=f.description) return BytesFeature(bytes=bytes.hex(buf), description=f.description)
elif isinstance(f, capa.features.insn.Offset): elif isinstance(f, capa.features.insn.Offset):
assert isinstance(f.value, int) assert isinstance(f.value, int)

View File

@@ -36,7 +36,6 @@ Check the output window for any errors, and/or the summary of changes.
""" """
import logging import logging
import binascii
from pathlib import Path from pathlib import Path
import ida_nalt import ida_nalt
@@ -85,7 +84,7 @@ def main():
# #
# see: https://github.com/idapython/bin/issues/11 # see: https://github.com/idapython/bin/issues/11
a = meta.sample.md5.lower() a = meta.sample.md5.lower()
b = binascii.hexlify(ida_nalt.retrieve_input_file_md5()).decode("ascii").lower() b = bytes.hex(ida_nalt.retrieve_input_file_md5()).lower()
if not a.startswith(b): if not a.startswith(b):
logger.error("sample mismatch") logger.error("sample mismatch")
return -2 return -2

View File

@@ -13,7 +13,6 @@
# limitations under the License. # limitations under the License.
import binascii
import contextlib import contextlib
import collections import collections
from pathlib import Path from pathlib import Path
@@ -942,17 +941,17 @@ FEATURE_PRESENCE_TESTS = sorted(
# insn/string, direct memory reference # insn/string, direct memory reference
("mimikatz", "function=0x46D6CE", capa.features.common.String("(null)"), True), ("mimikatz", "function=0x46D6CE", capa.features.common.String("(null)"), True),
# insn/bytes # insn/bytes
("mimikatz", "function=0x401517", capa.features.common.Bytes(binascii.unhexlify("CA3B0E000000F8AF47")), True), ("mimikatz", "function=0x401517", capa.features.common.Bytes(bytes.fromhex("CA3B0E000000F8AF47")), True),
("mimikatz", "function=0x404414", capa.features.common.Bytes(binascii.unhexlify("0180000040EA4700")), True), ("mimikatz", "function=0x404414", capa.features.common.Bytes(bytes.fromhex("0180000040EA4700")), True),
# don't extract byte features for obvious strings # don't extract byte features for obvious strings
("mimikatz", "function=0x40105D", capa.features.common.Bytes("SCardControl".encode("utf-16le")), False), ("mimikatz", "function=0x40105D", capa.features.common.Bytes("SCardControl".encode("utf-16le")), False),
("mimikatz", "function=0x40105D", capa.features.common.Bytes("SCardTransmit".encode("utf-16le")), False), ("mimikatz", "function=0x40105D", capa.features.common.Bytes("SCardTransmit".encode("utf-16le")), False),
("mimikatz", "function=0x40105D", capa.features.common.Bytes("ACR > ".encode("utf-16le")), False), ("mimikatz", "function=0x40105D", capa.features.common.Bytes("ACR > ".encode("utf-16le")), False),
("mimikatz", "function=0x40105D", capa.features.common.Bytes("nope".encode("ascii")), False), ("mimikatz", "function=0x40105D", capa.features.common.Bytes("nope".encode("ascii")), False),
# push offset aAcsAcr1220 ; "ACS..." -> where ACS == 41 00 43 00 == valid pointer to middle of instruction # push offset aAcsAcr1220 ; "ACS..." -> where ACS == 41 00 43 00 == valid pointer to middle of instruction
("mimikatz", "function=0x401000", capa.features.common.Bytes(binascii.unhexlify("FDFF59F647")), False), ("mimikatz", "function=0x401000", capa.features.common.Bytes(bytes.fromhex("FDFF59F647")), False),
# IDA features included byte sequences read from invalid memory, fixed in #409 # IDA features included byte sequences read from invalid memory, fixed in #409
("mimikatz", "function=0x44570F", capa.features.common.Bytes(binascii.unhexlify("FF" * 256)), False), ("mimikatz", "function=0x44570F", capa.features.common.Bytes(bytes.fromhex("FF" * 256)), False),
# insn/bytes, pointer to string bytes # insn/bytes, pointer to string bytes
("mimikatz", "function=0x44EDEF", capa.features.common.Bytes("INPUTEVENT".encode("utf-16le")), False), ("mimikatz", "function=0x44EDEF", capa.features.common.Bytes("INPUTEVENT".encode("utf-16le")), False),
# insn/characteristic(nzxor) # insn/characteristic(nzxor)

View File

@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import binascii
from typing import cast from typing import cast
import pytest import pytest
@@ -302,7 +301,7 @@ FEATURE_PRESENCE_TESTS_BE2_ELF_AARCH64 = sorted(
( (
"d1e650.ghidra.be2", "d1e650.ghidra.be2",
"function=0x1165a4", "function=0x1165a4",
capa.features.common.Bytes(binascii.unhexlify("E405B89370BA6B419CD7925275BF6FCC1E8360CC")), capa.features.common.Bytes(bytes.fromhex("E405B89370BA6B419CD7925275BF6FCC1E8360CC")),
True, True,
), ),
# # don't extract byte features for obvious strings # # don't extract byte features for obvious strings

View File

@@ -60,7 +60,6 @@ import io
import sys import sys
import inspect import inspect
import logging import logging
import binascii
import traceback import traceback
from pathlib import Path from pathlib import Path
@@ -86,7 +85,7 @@ def check_input_file(wanted):
except UnicodeDecodeError: except UnicodeDecodeError:
# in IDA 7.5 or so, GetInputFileMD5 started returning raw binary # in IDA 7.5 or so, GetInputFileMD5 started returning raw binary
# rather than the hex digest # rather than the hex digest
found = binascii.hexlify(idautils.GetInputFileMD5()[:15]).decode("ascii").lower() found = bytes.hex(idautils.GetInputFileMD5()[:15]).lower()
if not wanted.startswith(found): if not wanted.startswith(found):
raise RuntimeError(f"please run the tests against sample with MD5: `{wanted}`") raise RuntimeError(f"please run the tests against sample with MD5: `{wanted}`")