mirror of
https://github.com/mandiant/capa.git
synced 2025-12-05 20:40:05 -08:00
Replace binascii and struct with native Python methods (#2582)
* refactor: replace binascii with bytes for hex conversions
Signed-off-by: vibhatsu <maulikbarot2915@gmail.com>
* refactor: replace struct unpacking with bytes conversion
Signed-off-by: vibhatsu <maulikbarot2915@gmail.com>
* simplify byte extraction for ELF header
Signed-off-by: vibhatsu <maulikbarot2915@gmail.com>
* Revert "refactor: replace struct unpacking with bytes conversion"
This reverts commit 483f8c9a85.
* update CHANGELOG
Signed-off-by: vibhatsu <maulikbarot2915@gmail.com>
---------
Signed-off-by: vibhatsu <maulikbarot2915@gmail.com>
Co-authored-by: Willi Ballenthin <wballenthin@google.com>
This commit is contained in:
@@ -31,6 +31,7 @@
|
|||||||
- strings: add type hints and fix uncovered bugs @williballenthin #2555
|
- strings: add type hints and fix uncovered bugs @williballenthin #2555
|
||||||
- elffile: handle symbols without a name @williballenthin #2553
|
- elffile: handle symbols without a name @williballenthin #2553
|
||||||
- project: remove pytest-cov that wasn't used @williballenthin @2491
|
- project: remove pytest-cov that wasn't used @williballenthin @2491
|
||||||
|
- replace binascii methods with native Python methods @v1bh475u #2582
|
||||||
- rules: scopes can now have subscope blocks with the same scope @williballenthin #2584
|
- rules: scopes can now have subscope blocks with the same scope @williballenthin #2584
|
||||||
|
|
||||||
### capa Explorer Web
|
### capa Explorer Web
|
||||||
|
|||||||
@@ -12,7 +12,6 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
import binascii
|
|
||||||
from typing import Any, Union, Literal, Optional, Annotated, TypeAlias
|
from typing import Any, Union, Literal, Optional, Annotated, TypeAlias
|
||||||
|
|
||||||
from pydantic import Field, BaseModel, ConfigDict
|
from pydantic import Field, BaseModel, ConfigDict
|
||||||
@@ -27,7 +26,7 @@ def validate_hex_int(value):
|
|||||||
|
|
||||||
|
|
||||||
def validate_hex_bytes(value):
|
def validate_hex_bytes(value):
|
||||||
return binascii.unhexlify(value) if isinstance(value, str) else value
|
return bytes.fromhex(value) if isinstance(value, str) else value
|
||||||
|
|
||||||
|
|
||||||
HexInt = Annotated[int, BeforeValidator(validate_hex_int)]
|
HexInt = Annotated[int, BeforeValidator(validate_hex_int)]
|
||||||
|
|||||||
@@ -12,7 +12,6 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
import binascii
|
|
||||||
from typing import Union, Literal, Optional, Annotated
|
from typing import Union, Literal, Optional, Annotated
|
||||||
|
|
||||||
from pydantic import Field, BaseModel, ConfigDict
|
from pydantic import Field, BaseModel, ConfigDict
|
||||||
@@ -85,7 +84,7 @@ class FeatureModel(BaseModel):
|
|||||||
return capa.features.insn.Number(self.number, description=self.description)
|
return capa.features.insn.Number(self.number, description=self.description)
|
||||||
|
|
||||||
elif isinstance(self, BytesFeature):
|
elif isinstance(self, BytesFeature):
|
||||||
return capa.features.common.Bytes(binascii.unhexlify(self.bytes), description=self.description)
|
return capa.features.common.Bytes(bytes.fromhex(self.bytes), description=self.description)
|
||||||
|
|
||||||
elif isinstance(self, OffsetFeature):
|
elif isinstance(self, OffsetFeature):
|
||||||
return capa.features.insn.Offset(self.offset, description=self.description)
|
return capa.features.insn.Offset(self.offset, description=self.description)
|
||||||
@@ -191,7 +190,7 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature":
|
|||||||
elif isinstance(f, capa.features.common.Bytes):
|
elif isinstance(f, capa.features.common.Bytes):
|
||||||
buf = f.value
|
buf = f.value
|
||||||
assert isinstance(buf, bytes)
|
assert isinstance(buf, bytes)
|
||||||
return BytesFeature(bytes=binascii.hexlify(buf).decode("ascii"), description=f.description)
|
return BytesFeature(bytes=bytes.hex(buf), description=f.description)
|
||||||
|
|
||||||
elif isinstance(f, capa.features.insn.Offset):
|
elif isinstance(f, capa.features.insn.Offset):
|
||||||
assert isinstance(f.value, int)
|
assert isinstance(f.value, int)
|
||||||
|
|||||||
@@ -36,7 +36,6 @@ Check the output window for any errors, and/or the summary of changes.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import binascii
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import ida_nalt
|
import ida_nalt
|
||||||
@@ -85,7 +84,7 @@ def main():
|
|||||||
#
|
#
|
||||||
# see: https://github.com/idapython/bin/issues/11
|
# see: https://github.com/idapython/bin/issues/11
|
||||||
a = meta.sample.md5.lower()
|
a = meta.sample.md5.lower()
|
||||||
b = binascii.hexlify(ida_nalt.retrieve_input_file_md5()).decode("ascii").lower()
|
b = bytes.hex(ida_nalt.retrieve_input_file_md5()).lower()
|
||||||
if not a.startswith(b):
|
if not a.startswith(b):
|
||||||
logger.error("sample mismatch")
|
logger.error("sample mismatch")
|
||||||
return -2
|
return -2
|
||||||
|
|||||||
@@ -13,7 +13,6 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
|
||||||
import binascii
|
|
||||||
import contextlib
|
import contextlib
|
||||||
import collections
|
import collections
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -942,17 +941,17 @@ FEATURE_PRESENCE_TESTS = sorted(
|
|||||||
# insn/string, direct memory reference
|
# insn/string, direct memory reference
|
||||||
("mimikatz", "function=0x46D6CE", capa.features.common.String("(null)"), True),
|
("mimikatz", "function=0x46D6CE", capa.features.common.String("(null)"), True),
|
||||||
# insn/bytes
|
# insn/bytes
|
||||||
("mimikatz", "function=0x401517", capa.features.common.Bytes(binascii.unhexlify("CA3B0E000000F8AF47")), True),
|
("mimikatz", "function=0x401517", capa.features.common.Bytes(bytes.fromhex("CA3B0E000000F8AF47")), True),
|
||||||
("mimikatz", "function=0x404414", capa.features.common.Bytes(binascii.unhexlify("0180000040EA4700")), True),
|
("mimikatz", "function=0x404414", capa.features.common.Bytes(bytes.fromhex("0180000040EA4700")), True),
|
||||||
# don't extract byte features for obvious strings
|
# don't extract byte features for obvious strings
|
||||||
("mimikatz", "function=0x40105D", capa.features.common.Bytes("SCardControl".encode("utf-16le")), False),
|
("mimikatz", "function=0x40105D", capa.features.common.Bytes("SCardControl".encode("utf-16le")), False),
|
||||||
("mimikatz", "function=0x40105D", capa.features.common.Bytes("SCardTransmit".encode("utf-16le")), False),
|
("mimikatz", "function=0x40105D", capa.features.common.Bytes("SCardTransmit".encode("utf-16le")), False),
|
||||||
("mimikatz", "function=0x40105D", capa.features.common.Bytes("ACR > ".encode("utf-16le")), False),
|
("mimikatz", "function=0x40105D", capa.features.common.Bytes("ACR > ".encode("utf-16le")), False),
|
||||||
("mimikatz", "function=0x40105D", capa.features.common.Bytes("nope".encode("ascii")), False),
|
("mimikatz", "function=0x40105D", capa.features.common.Bytes("nope".encode("ascii")), False),
|
||||||
# push offset aAcsAcr1220 ; "ACS..." -> where ACS == 41 00 43 00 == valid pointer to middle of instruction
|
# push offset aAcsAcr1220 ; "ACS..." -> where ACS == 41 00 43 00 == valid pointer to middle of instruction
|
||||||
("mimikatz", "function=0x401000", capa.features.common.Bytes(binascii.unhexlify("FDFF59F647")), False),
|
("mimikatz", "function=0x401000", capa.features.common.Bytes(bytes.fromhex("FDFF59F647")), False),
|
||||||
# IDA features included byte sequences read from invalid memory, fixed in #409
|
# IDA features included byte sequences read from invalid memory, fixed in #409
|
||||||
("mimikatz", "function=0x44570F", capa.features.common.Bytes(binascii.unhexlify("FF" * 256)), False),
|
("mimikatz", "function=0x44570F", capa.features.common.Bytes(bytes.fromhex("FF" * 256)), False),
|
||||||
# insn/bytes, pointer to string bytes
|
# insn/bytes, pointer to string bytes
|
||||||
("mimikatz", "function=0x44EDEF", capa.features.common.Bytes("INPUTEVENT".encode("utf-16le")), False),
|
("mimikatz", "function=0x44EDEF", capa.features.common.Bytes("INPUTEVENT".encode("utf-16le")), False),
|
||||||
# insn/characteristic(nzxor)
|
# insn/characteristic(nzxor)
|
||||||
|
|||||||
@@ -12,7 +12,6 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
import binascii
|
|
||||||
from typing import cast
|
from typing import cast
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
@@ -302,7 +301,7 @@ FEATURE_PRESENCE_TESTS_BE2_ELF_AARCH64 = sorted(
|
|||||||
(
|
(
|
||||||
"d1e650.ghidra.be2",
|
"d1e650.ghidra.be2",
|
||||||
"function=0x1165a4",
|
"function=0x1165a4",
|
||||||
capa.features.common.Bytes(binascii.unhexlify("E405B89370BA6B419CD7925275BF6FCC1E8360CC")),
|
capa.features.common.Bytes(bytes.fromhex("E405B89370BA6B419CD7925275BF6FCC1E8360CC")),
|
||||||
True,
|
True,
|
||||||
),
|
),
|
||||||
# # don't extract byte features for obvious strings
|
# # don't extract byte features for obvious strings
|
||||||
|
|||||||
@@ -60,7 +60,6 @@ import io
|
|||||||
import sys
|
import sys
|
||||||
import inspect
|
import inspect
|
||||||
import logging
|
import logging
|
||||||
import binascii
|
|
||||||
import traceback
|
import traceback
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
@@ -86,7 +85,7 @@ def check_input_file(wanted):
|
|||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
# in IDA 7.5 or so, GetInputFileMD5 started returning raw binary
|
# in IDA 7.5 or so, GetInputFileMD5 started returning raw binary
|
||||||
# rather than the hex digest
|
# rather than the hex digest
|
||||||
found = binascii.hexlify(idautils.GetInputFileMD5()[:15]).decode("ascii").lower()
|
found = bytes.hex(idautils.GetInputFileMD5()[:15]).lower()
|
||||||
|
|
||||||
if not wanted.startswith(found):
|
if not wanted.startswith(found):
|
||||||
raise RuntimeError(f"please run the tests against sample with MD5: `{wanted}`")
|
raise RuntimeError(f"please run the tests against sample with MD5: `{wanted}`")
|
||||||
|
|||||||
Reference in New Issue
Block a user