fix: handle bare NTLM hash detection with BOM, null bytes, and encoding artifacts

Use utf-8-sig encoding to natively strip BOM and remove null bytes from
UTF-16 artifacts so the bare hash regex matches correctly. Replace the
unhelpful "unknown format" error with a diagnostic message showing the
actual first-line content and expected formats.
This commit is contained in:
Justin Bollinger
2026-03-16 14:17:37 -04:00
parent c237f299b4
commit 8731bb2d5e
2 changed files with 156 additions and 3 deletions

View File

@@ -3825,8 +3825,8 @@ def main():
if hcatHashType == "1000":
lmHashesFound = False
pwdump_format = False
with open(hcatHashFile, "r") as f:
hcatHashFileLine = f.readline().strip().lstrip("\ufeff")
with open(hcatHashFile, "r", encoding="utf-8-sig") as f:
hcatHashFileLine = f.readline().strip().replace("\x00", "")
if re.search(r"[a-f0-9A-F]{32}:[a-f0-9A-F]{32}:::", hcatHashFileLine):
pwdump_format = True
print("PWDUMP format detected...")
@@ -3897,7 +3897,11 @@ def main():
print("NetNTLMv2 format detected")
print("Note: Hash type should be 5500 for NetNTLMv2 hashes")
else:
print("unknown format....does it have usernames?")
print(f"Unrecognized hash format on first line: {hcatHashFileLine!r}")
print(
"Expected one of: pwdump (user:RID:LM:NT:::),"
" bare hash (32 hex chars), user:hash, or NetNTLMv2"
)
exit(1)
# Detect and optionally filter computer accounts from NetNTLM hashes
if hcatHashType in ("5500", "5600"):

View File

@@ -0,0 +1,149 @@
"""Tests for bare NTLM hash format detection in preprocessing.
Verifies that the first-line format detection at main.py:3828-3905
correctly identifies bare 32-char hex hashes under various encoding
and whitespace conditions.
"""
import re
import sys
import importlib
import pytest
@pytest.fixture
def main_module(monkeypatch):
"""Load hate_crack.main with SKIP_INIT to access helper functions."""
monkeypatch.setenv("HATE_CRACK_SKIP_INIT", "1")
if "hate_crack.main" in sys.modules:
mod = sys.modules["hate_crack.main"]
importlib.reload(mod)
return mod
import hate_crack.main as mod
return mod
def _read_first_line(path):
"""Replicate the first-line reading logic from main.py:3828-3829."""
with open(path, "r", encoding="utf-8-sig") as f:
return f.readline().strip().replace("\x00", "")
BARE_HASH_PATTERN = re.compile(r"^[a-f0-9A-F]{32}$")
PWDUMP_PATTERN = re.compile(r"[a-f0-9A-F]{32}:[a-f0-9A-F]{32}:::")
USER_HASH_PATTERN = re.compile(r"^.+:[a-f0-9A-F]{32}$")
class TestBareHashDetection:
"""Bare 32-char NTLM hash detection."""
def test_bare_hash_detected(self, tmp_path):
hash_file = tmp_path / "bare.txt"
hash_file.write_text("aad3b435b51404eeaad3b435b51404ee\n")
line = _read_first_line(str(hash_file))
assert BARE_HASH_PATTERN.search(line)
def test_bare_hash_uppercase(self, tmp_path):
hash_file = tmp_path / "bare.txt"
hash_file.write_text("AAD3B435B51404EEAAD3B435B51404EE\n")
line = _read_first_line(str(hash_file))
assert BARE_HASH_PATTERN.search(line)
def test_bare_hash_mixed_case(self, tmp_path):
hash_file = tmp_path / "bare.txt"
hash_file.write_text("Aad3b435B51404eeAAD3b435b51404EE\n")
line = _read_first_line(str(hash_file))
assert BARE_HASH_PATTERN.search(line)
def test_bare_hash_with_bom(self, tmp_path):
hash_file = tmp_path / "bare.txt"
hash_file.write_bytes(b"\xef\xbb\xbfaad3b435b51404eeaad3b435b51404ee\n")
line = _read_first_line(str(hash_file))
assert BARE_HASH_PATTERN.search(line), f"BOM not stripped: {line!r}"
def test_bare_hash_with_crlf(self, tmp_path):
hash_file = tmp_path / "bare.txt"
hash_file.write_bytes(b"aad3b435b51404eeaad3b435b51404ee\r\n")
line = _read_first_line(str(hash_file))
assert BARE_HASH_PATTERN.search(line), f"CRLF not stripped: {line!r}"
def test_bare_hash_with_bom_and_crlf(self, tmp_path):
hash_file = tmp_path / "bare.txt"
hash_file.write_bytes(
b"\xef\xbb\xbfaad3b435b51404eeaad3b435b51404ee\r\n"
)
line = _read_first_line(str(hash_file))
assert BARE_HASH_PATTERN.search(line), f"BOM+CRLF not handled: {line!r}"
def test_bare_hash_with_trailing_space(self, tmp_path):
hash_file = tmp_path / "bare.txt"
hash_file.write_text("aad3b435b51404eeaad3b435b51404ee \n")
line = _read_first_line(str(hash_file))
assert BARE_HASH_PATTERN.search(line), f"Trailing space not stripped: {line!r}"
def test_bare_hash_with_null_bytes(self, tmp_path):
"""UTF-16LE encoded file read as UTF-8 produces null bytes."""
hash_file = tmp_path / "bare.txt"
raw = b"a\x00a\x00d\x003\x00b\x004\x003\x005\x00b\x005\x001\x004\x000\x004\x00e\x00e\x00a\x00a\x00d\x003\x00b\x004\x003\x005\x00b\x005\x001\x004\x000\x004\x00e\x00e\x00\n\x00"
hash_file.write_bytes(raw)
line = _read_first_line(str(hash_file))
assert BARE_HASH_PATTERN.search(line), f"Null bytes not stripped: {line!r}"
def test_not_bare_hash_31_chars(self, tmp_path):
hash_file = tmp_path / "short.txt"
hash_file.write_text("aad3b435b51404eeaad3b435b51404e\n")
line = _read_first_line(str(hash_file))
assert not BARE_HASH_PATTERN.search(line)
def test_not_bare_hash_33_chars(self, tmp_path):
hash_file = tmp_path / "long.txt"
hash_file.write_text("aad3b435b51404eeaad3b435b51404eee\n")
line = _read_first_line(str(hash_file))
assert not BARE_HASH_PATTERN.search(line)
def test_not_bare_hash_non_hex(self, tmp_path):
hash_file = tmp_path / "nonhex.txt"
hash_file.write_text("zad3b435b51404eeaad3b435b51404ee\n")
line = _read_first_line(str(hash_file))
assert not BARE_HASH_PATTERN.search(line)
class TestFormatDetectionPriority:
"""Verify the detection chain matches the correct format."""
def test_pwdump_takes_priority(self, tmp_path):
hash_file = tmp_path / "pwdump.txt"
hash_file.write_text(
"admin:500:aad3b435b51404eeaad3b435b51404ee:31d6cfe0d16ae931b73c59d7e0c089c0:::\n"
)
line = _read_first_line(str(hash_file))
assert PWDUMP_PATTERN.search(line)
def test_user_hash_detected(self, tmp_path):
hash_file = tmp_path / "userhash.txt"
hash_file.write_text("admin:aad3b435b51404eeaad3b435b51404ee\n")
line = _read_first_line(str(hash_file))
assert not PWDUMP_PATTERN.search(line)
assert not BARE_HASH_PATTERN.search(line)
assert USER_HASH_PATTERN.search(line)
def test_bare_hash_not_confused_with_user_hash(self, tmp_path):
hash_file = tmp_path / "bare.txt"
hash_file.write_text("aad3b435b51404eeaad3b435b51404ee\n")
line = _read_first_line(str(hash_file))
assert not PWDUMP_PATTERN.search(line)
assert BARE_HASH_PATTERN.search(line)
class TestErrorMessageOnUnrecognizedFormat:
"""Verify the improved error message for unrecognized formats."""
def test_unrecognized_format_shows_repr(self, tmp_path, main_module, capsys):
hash_file = tmp_path / "weird.txt"
hash_file.write_text("not_a_valid_hash_format\n")
line = _read_first_line(str(hash_file))
assert not BARE_HASH_PATTERN.search(line)
assert not PWDUMP_PATTERN.search(line)
assert not USER_HASH_PATTERN.search(line)