Files
hate_crack/tests/test_utils.py
T
Justin Bollinger 4eacf9d9ee fix: force LC_ALL=C for sort -u subprocesses to handle non-UTF-8 bytes
macOS `sort` is locale-strict: under LC_COLLATE=en_US.UTF-8 (the default
on most macOS shells) it errors out with "sort: Illegal byte sequence"
when stdin contains bytes that are not valid UTF-8. Cracked-password
streams routinely contain such bytes - hex-encoded fields, mixed
encodings, binary garbage from poorly-encoded source hashes - so this
fires in real fingerprint runs whenever the pot already has any non-
ASCII output.

Symptom in the fingerprint attack: the expander -> sort pipeline emits
"sort: Illegal byte sequence" and produces an empty .expanded file. The
empty-.expanded guard added in the previous patch then triggers the
"no candidates to expand" skip message - which is misleading, because
the user does have cracks; they just got dropped on the sort step.

Pass env={**os.environ, "LC_ALL": "C"} to all three subprocess.Popen
calls that invoke `sort -u`:
  - _write_field_sorted_unique  (main.py:1163)
  - hcatFingerprint expander    (main.py:1544)
  - hcatLMtoNT combinator dedupe (main.py:2995)

LC_ALL=C makes sort byte-collation only. Dedup correctness is
unaffected (byte equality is locale-independent), and hashcat doesn't
care about wordlist order.

Also adds an AST-level test that fails if any future `sort` Popen lacks
an env kwarg, so the locale fix can't silently regress.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-26 15:04:42 -04:00

241 lines
7.5 KiB
Python

import logging
import os
import importlib
from hate_crack import api
from hate_crack import cli
from hate_crack import formatting
def test_orig_cwd_uses_env_var(monkeypatch, tmp_path):
"""orig_cwd() returns HATE_CRACK_ORIG_CWD when set."""
monkeypatch.setenv("HATE_CRACK_ORIG_CWD", str(tmp_path))
assert cli.orig_cwd() == str(tmp_path)
def test_orig_cwd_falls_back_to_getcwd(monkeypatch):
"""orig_cwd() falls back to os.getcwd() when env var is unset."""
monkeypatch.delenv("HATE_CRACK_ORIG_CWD", raising=False)
assert cli.orig_cwd() == os.getcwd()
def test_resolve_path_none_and_expand():
assert cli.resolve_path("") is None
resolved = cli.resolve_path("~")
assert resolved is not None
assert os.path.isabs(resolved)
def test_resolve_path_uses_orig_cwd_for_relative_paths(monkeypatch, tmp_path):
"""When HATE_CRACK_ORIG_CWD is set, relative paths resolve against it."""
monkeypatch.setenv("HATE_CRACK_ORIG_CWD", str(tmp_path))
result = cli.resolve_path("hashes.txt")
assert result == os.path.join(str(tmp_path), "hashes.txt")
def test_resolve_path_ignores_orig_cwd_for_absolute_paths(monkeypatch, tmp_path):
"""Absolute paths are returned as-is regardless of HATE_CRACK_ORIG_CWD."""
monkeypatch.setenv("HATE_CRACK_ORIG_CWD", str(tmp_path))
result = cli.resolve_path("/absolute/path/hashes.txt")
assert result == "/absolute/path/hashes.txt"
def test_resolve_path_without_orig_cwd_uses_abspath(monkeypatch):
"""Without HATE_CRACK_ORIG_CWD, falls back to os.path.abspath."""
monkeypatch.delenv("HATE_CRACK_ORIG_CWD", raising=False)
result = cli.resolve_path("hashes.txt")
assert result == os.path.abspath("hashes.txt")
def test_setup_logging_adds_single_streamhandler(tmp_path):
logger = logging.getLogger("hate_crack_test")
logger.handlers.clear()
cli.setup_logging(logger, str(tmp_path), debug_mode=True)
cli.setup_logging(logger, str(tmp_path), debug_mode=True)
stream_handlers = [
h
for h in logger.handlers
if isinstance(h, logging.StreamHandler)
and not isinstance(h, logging.FileHandler)
]
assert len(stream_handlers) == 1
file_handlers = [h for h in logger.handlers if isinstance(h, logging.FileHandler)]
assert file_handlers == []
logger.handlers.clear()
def test_print_multicolumn_list_truncates(capsys, monkeypatch):
# Avoid patching os.get_terminal_size (pytest uses it internally).
monkeypatch.setattr(formatting, "_terminal_width", lambda default=120: 10)
formatting.print_multicolumn_list(
"Title",
["abcdefghijk"],
min_col_width=1,
max_col_width=10,
)
captured = capsys.readouterr()
assert "..." in captured.out
def test_print_multicolumn_list_empty_entries(capsys):
formatting.print_multicolumn_list("Empty", [])
captured = capsys.readouterr()
assert "(none)" in captured.out
def test_get_hcat_wordlists_dir_from_config(tmp_path, monkeypatch):
config_path = tmp_path / "config.json"
config_path.write_text('{"hcatWordlists": "wordlists"}')
monkeypatch.setattr(api, "_resolve_config_path", lambda: str(config_path))
monkeypatch.setattr(api, "_get_hate_path", lambda: str(tmp_path))
result = api.get_hcat_wordlists_dir()
assert result == str(tmp_path / "wordlists")
assert os.path.isdir(result)
def test_get_hcat_wordlists_dir_fallback_cwd(tmp_path, monkeypatch):
monkeypatch.setattr(api, "_resolve_config_path", lambda: None)
monkeypatch.chdir(tmp_path)
result = api.get_hcat_wordlists_dir()
assert result == str(tmp_path / "wordlists")
assert os.path.isdir(result)
def test_get_rules_dir_from_config(tmp_path, monkeypatch):
config_path = tmp_path / "config.json"
config_path.write_text('{"rules_directory": "rules"}')
monkeypatch.setattr(api, "_resolve_config_path", lambda: str(config_path))
monkeypatch.setattr(api, "_get_hate_path", lambda: str(tmp_path))
result = api.get_rules_dir()
assert result == str(tmp_path / "rules")
assert os.path.isdir(result)
def test_get_rules_dir_fallback_cwd(tmp_path, monkeypatch):
monkeypatch.setattr(api, "_resolve_config_path", lambda: None)
monkeypatch.chdir(tmp_path)
result = api.get_rules_dir()
assert result == str(tmp_path / "rules")
assert os.path.isdir(result)
def test_cleanup_torrent_files_removes_only_torrents(tmp_path):
torrent = tmp_path / "a.torrent"
keep = tmp_path / "b.txt"
torrent.write_text("data")
keep.write_text("data")
api.cleanup_torrent_files(directory=str(tmp_path))
assert not torrent.exists()
assert keep.exists()
def test_cleanup_torrent_files_missing_dir(capsys, tmp_path):
missing = tmp_path / "missing"
api.cleanup_torrent_files(directory=str(missing))
captured = capsys.readouterr()
assert "Failed to cleanup torrent files" in captured.out
def test_register_torrent_cleanup_idempotent(monkeypatch):
calls = []
def fake_register(fn):
calls.append(fn)
monkeypatch.setattr(api, "_TORRENT_CLEANUP_REGISTERED", False)
monkeypatch.setattr("atexit.register", fake_register)
api.register_torrent_cleanup()
api.register_torrent_cleanup()
assert len(calls) == 1
def test_line_count_and_write_helpers(tmp_path, monkeypatch):
monkeypatch.setenv("HATE_CRACK_SKIP_INIT", "1")
from hate_crack import main as main_module
importlib.reload(main_module)
input_path = tmp_path / "input.txt"
input_path.write_text("a:b:c\nno-delim\n1:2:3\n")
out_delimited = tmp_path / "out_delimited.txt"
out_unique = tmp_path / "out_unique.txt"
assert main_module.lineCount(str(input_path)) == 3
assert main_module.lineCount(str(tmp_path / "missing.txt")) == 0
assert (
main_module._write_delimited_field(str(input_path), str(out_delimited), 2)
is True
)
assert out_delimited.read_text().splitlines() == ["b", "2"]
assert (
main_module._write_delimited_field(
str(tmp_path / "missing.txt"), str(out_delimited), 2
)
is False
)
class FakePopen:
def __init__(self, args, stdin=None, stdout=None, text=None, **_kwargs):
self.stdin = FakeStdin(self)
self._stdout = stdout
self._data = None
def wait(self):
for line in sorted(set(self._data)):
self._stdout.write(line + "\n")
return 0
class FakeStdin:
def __init__(self, popen):
self._popen = popen
self._lines = []
def write(self, data):
self._lines.append(data.rstrip("\n"))
def close(self):
self._popen._data = self._lines
monkeypatch.setattr(main_module.subprocess, "Popen", FakePopen)
assert (
main_module._write_field_sorted_unique(str(input_path), str(out_unique), 2)
is True
)
assert out_unique.read_text().splitlines() == ["2", "b"]
def test_get_customer_hashfiles_with_hashtype_filters(monkeypatch):
hv = api.HashviewAPI("https://example", "key")
monkeypatch.setattr(
hv,
"get_customer_hashfiles",
lambda customer_id: [
{"customer_id": customer_id, "hashtype": "1000"},
{"customer_id": customer_id, "hash_type": "0"},
],
)
matches = hv.get_customer_hashfiles_with_hashtype(1, target_hashtype="1000")
assert len(matches) == 1
assert matches[0]["hashtype"] == "1000"
none = hv.get_customer_hashfiles_with_hashtype(1, target_hashtype="999")
assert none == []