Files
hate_crack/tests/test_passgpt_attack.py
T
Justin Bollinger b6524cbdc4 feat: add training time estimates and device selection to PassGPT menu
Show estimated training times for CUDA/MPS/CPU before starting a
training run. Add device selection prompt with cuda as the default.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-18 11:27:09 -05:00

515 lines
18 KiB
Python

import os
import sys
from unittest.mock import MagicMock, patch
import pytest
from hate_crack.passgpt_train import (
_count_lines,
_estimate_training_memory_mb,
_get_available_memory_mb,
)
@pytest.fixture
def main_module(hc_module):
"""Return the underlying hate_crack.main module for direct patching."""
return hc_module._main
class TestHcatPassGPT:
def test_builds_correct_pipe_commands(self, main_module):
with (
patch.object(main_module, "hcatBin", "hashcat"),
patch.object(main_module, "hcatTuning", "--force"),
patch.object(main_module, "hcatPotfilePath", ""),
patch.object(main_module, "hcatHashFile", "/tmp/hashes.txt", create=True),
patch.object(
main_module, "passgptModel", "javirandor/passgpt-10characters"
),
patch.object(main_module, "passgptBatchSize", 1024),
patch("hate_crack.main.subprocess.Popen") as mock_popen,
):
mock_gen_proc = MagicMock()
mock_gen_proc.stdout = MagicMock()
mock_hashcat_proc = MagicMock()
mock_hashcat_proc.wait.return_value = None
mock_gen_proc.wait.return_value = None
mock_popen.side_effect = [mock_gen_proc, mock_hashcat_proc]
main_module.hcatPassGPT("1000", "/tmp/hashes.txt", 500000)
assert mock_popen.call_count == 2
# First call: passgpt generator
gen_cmd = mock_popen.call_args_list[0][0][0]
assert gen_cmd[0] == sys.executable
assert "-m" in gen_cmd
assert "hate_crack.passgpt_generate" in gen_cmd
assert "--num" in gen_cmd
assert "500000" in gen_cmd
assert "--model" in gen_cmd
assert "javirandor/passgpt-10characters" in gen_cmd
assert "--batch-size" in gen_cmd
assert "1024" in gen_cmd
# Second call: hashcat
hashcat_cmd = mock_popen.call_args_list[1][0][0]
assert hashcat_cmd[0] == "hashcat"
assert "1000" in hashcat_cmd
assert "/tmp/hashes.txt" in hashcat_cmd
def test_custom_model_and_batch_size(self, main_module):
with (
patch.object(main_module, "hcatBin", "hashcat"),
patch.object(main_module, "hcatTuning", "--force"),
patch.object(main_module, "hcatPotfilePath", ""),
patch.object(main_module, "hcatHashFile", "/tmp/hashes.txt", create=True),
patch.object(
main_module, "passgptModel", "javirandor/passgpt-10characters"
),
patch.object(main_module, "passgptBatchSize", 1024),
patch("hate_crack.main.subprocess.Popen") as mock_popen,
):
mock_gen_proc = MagicMock()
mock_gen_proc.stdout = MagicMock()
mock_hashcat_proc = MagicMock()
mock_hashcat_proc.wait.return_value = None
mock_gen_proc.wait.return_value = None
mock_popen.side_effect = [mock_gen_proc, mock_hashcat_proc]
main_module.hcatPassGPT(
"1000",
"/tmp/hashes.txt",
100000,
model_name="custom/model",
batch_size=512,
)
gen_cmd = mock_popen.call_args_list[0][0][0]
assert "custom/model" in gen_cmd
assert "512" in gen_cmd
class TestHcatPassGPTTrain:
def test_builds_correct_subprocess_command(self, main_module, tmp_path):
training_file = tmp_path / "wordlist.txt"
training_file.write_text("password123\nabc456\n")
with (
patch.object(
main_module, "passgptModel", "javirandor/passgpt-10characters"
),
patch("hate_crack.main.subprocess.Popen") as mock_popen,
):
mock_proc = MagicMock()
mock_proc.returncode = 0
mock_proc.wait.return_value = None
mock_popen.return_value = mock_proc
with patch.object(
main_module,
"_passgpt_model_dir",
return_value=str(tmp_path / "models"),
):
result = main_module.hcatPassGPTTrain(str(training_file))
assert result is not None
assert mock_popen.call_count == 1
cmd = mock_popen.call_args[0][0]
assert cmd[0] == sys.executable
assert "-m" in cmd
assert "hate_crack.passgpt_train" in cmd
assert "--training-file" in cmd
assert str(training_file) in cmd
assert "--base-model" in cmd
assert "javirandor/passgpt-10characters" in cmd
assert "--output-dir" in cmd
def test_missing_training_file(self, main_module, capsys):
result = main_module.hcatPassGPTTrain("/nonexistent/wordlist.txt")
assert result is None
captured = capsys.readouterr()
assert "Training file not found" in captured.out
def test_custom_base_model(self, main_module, tmp_path):
training_file = tmp_path / "wordlist.txt"
training_file.write_text("test\n")
with patch("hate_crack.main.subprocess.Popen") as mock_popen:
mock_proc = MagicMock()
mock_proc.returncode = 0
mock_proc.wait.return_value = None
mock_popen.return_value = mock_proc
with patch.object(
main_module,
"_passgpt_model_dir",
return_value=str(tmp_path / "models"),
):
main_module.hcatPassGPTTrain(
str(training_file), base_model="custom/base-model"
)
cmd = mock_popen.call_args[0][0]
assert "custom/base-model" in cmd
def test_training_failure_returns_none(self, main_module, tmp_path):
training_file = tmp_path / "wordlist.txt"
training_file.write_text("test\n")
with (
patch.object(
main_module, "passgptModel", "javirandor/passgpt-10characters"
),
patch("hate_crack.main.subprocess.Popen") as mock_popen,
):
mock_proc = MagicMock()
mock_proc.returncode = 1
mock_proc.wait.return_value = None
mock_popen.return_value = mock_proc
with patch.object(
main_module,
"_passgpt_model_dir",
return_value=str(tmp_path / "models"),
):
result = main_module.hcatPassGPTTrain(str(training_file))
assert result is None
class TestPassGPTModelDir:
def test_creates_directory(self, main_module, tmp_path):
target = str(tmp_path / "passgpt_models")
with patch("hate_crack.main.os.path.expanduser", return_value=str(tmp_path)):
result = main_module._passgpt_model_dir()
assert os.path.isdir(result)
assert result.endswith("passgpt")
class TestPassGPTAttackHandler:
def _make_ctx(self, model_dir=None):
ctx = MagicMock()
ctx.HAS_ML_DEPS = True
ctx.passgptMaxCandidates = 1000000
ctx.passgptModel = "javirandor/passgpt-10characters"
ctx.passgptBatchSize = 1024
ctx.hcatHashType = "1000"
ctx.hcatHashFile = "/tmp/hashes.txt"
ctx.hcatWordlists = "/tmp/wordlists"
if model_dir is None:
ctx._passgpt_model_dir.return_value = "/nonexistent/empty"
else:
ctx._passgpt_model_dir.return_value = model_dir
return ctx
def test_select_default_model_and_generate(self):
ctx = self._make_ctx()
# "1" selects default model, "" accepts default max candidates
inputs = iter(["1", ""])
with (
patch("builtins.input", side_effect=inputs),
patch("hate_crack.attacks.os.path.isdir", return_value=False),
):
from hate_crack.attacks import passgpt_attack
passgpt_attack(ctx)
ctx.hcatPassGPT.assert_called_once_with(
"1000",
"/tmp/hashes.txt",
1000000,
model_name="javirandor/passgpt-10characters",
batch_size=1024,
)
def test_select_local_model(self, tmp_path):
# Create a fake local model directory
model_dir = tmp_path / "passgpt"
local_model = model_dir / "my_model"
local_model.mkdir(parents=True)
(local_model / "config.json").write_text("{}")
ctx = self._make_ctx(model_dir=str(model_dir))
# "2" selects the local model, "" accepts default max candidates
inputs = iter(["2", ""])
with (
patch("builtins.input", side_effect=inputs),
patch("hate_crack.attacks.os.path.isdir", return_value=True),
patch("hate_crack.attacks.os.listdir", return_value=["my_model"]),
patch("hate_crack.attacks.os.path.isfile", return_value=True),
patch(
"hate_crack.attacks.os.path.isdir",
side_effect=lambda p: True,
),
):
from hate_crack.attacks import passgpt_attack
passgpt_attack(ctx)
ctx.hcatPassGPT.assert_called_once()
call_kwargs = ctx.hcatPassGPT.call_args
# The model_name should be the local path
assert call_kwargs[1]["model_name"] == str(local_model)
def test_train_new_model(self):
ctx = self._make_ctx()
ctx.select_file_with_autocomplete.return_value = "/tmp/wordlist.txt"
ctx.hcatPassGPTTrain.return_value = "/home/user/.hate_crack/passgpt/wordlist"
# "T" for train, "" for default base model, "" for default device (cuda), "" for default max candidates
inputs = iter(["T", "", "", ""])
with (
patch("builtins.input", side_effect=inputs),
patch("hate_crack.attacks.os.path.isdir", return_value=False),
):
from hate_crack.attacks import passgpt_attack
passgpt_attack(ctx)
ctx.hcatPassGPTTrain.assert_called_once_with(
"/tmp/wordlist.txt", "javirandor/passgpt-10characters", device="cuda"
)
ctx.hcatPassGPT.assert_called_once()
call_kwargs = ctx.hcatPassGPT.call_args
assert call_kwargs[1]["model_name"] == "/home/user/.hate_crack/passgpt/wordlist"
def test_train_failure_aborts(self):
ctx = self._make_ctx()
ctx.select_file_with_autocomplete.return_value = "/tmp/wordlist.txt"
ctx.hcatPassGPTTrain.return_value = None
# "T" for train, "" for default base model, "" for default device (cuda)
inputs = iter(["T", "", ""])
with (
patch("builtins.input", side_effect=inputs),
patch("hate_crack.attacks.os.path.isdir", return_value=False),
):
from hate_crack.attacks import passgpt_attack
passgpt_attack(ctx)
ctx.hcatPassGPTTrain.assert_called_once()
ctx.hcatPassGPT.assert_not_called()
def test_ml_deps_missing(self, capsys):
ctx = MagicMock()
ctx.HAS_ML_DEPS = False
from hate_crack.attacks import passgpt_attack
passgpt_attack(ctx)
captured = capsys.readouterr()
assert "ML dependencies" in captured.out
assert "uv pip install" in captured.out
ctx.hcatPassGPT.assert_not_called()
def test_custom_max_candidates(self):
ctx = self._make_ctx()
# "1" selects default model, "500000" for custom max candidates
inputs = iter(["1", "500000"])
with (
patch("builtins.input", side_effect=inputs),
patch("hate_crack.attacks.os.path.isdir", return_value=False),
):
from hate_crack.attacks import passgpt_attack
passgpt_attack(ctx)
ctx.hcatPassGPT.assert_called_once_with(
"1000",
"/tmp/hashes.txt",
500000,
model_name="javirandor/passgpt-10characters",
batch_size=1024,
)
class TestGetAvailableMemoryMb:
def test_returns_int_or_none(self):
result = _get_available_memory_mb()
assert result is None or isinstance(result, int)
def test_never_crashes_on_any_platform(self):
# Should not raise regardless of platform
_get_available_memory_mb()
def test_returns_positive_when_detected(self):
result = _get_available_memory_mb()
if result is not None:
assert result > 0
class TestCountLines:
def test_counts_non_empty_lines(self, tmp_path):
f = tmp_path / "test.txt"
f.write_text("line1\nline2\n\nline3\n")
assert _count_lines(str(f)) == 3
def test_empty_file(self, tmp_path):
f = tmp_path / "empty.txt"
f.write_text("")
assert _count_lines(str(f)) == 0
class TestEstimateTrainingMemoryMb:
def test_returns_reasonable_estimate(self, tmp_path):
f = tmp_path / "words.txt"
f.write_text("password\n" * 1000)
estimate = _estimate_training_memory_mb(str(f))
# Should include at least model + optimizer overhead (~1700MB)
assert estimate >= 1700
def test_max_lines_reduces_estimate(self, tmp_path):
f = tmp_path / "words.txt"
f.write_text("password\n" * 100000)
full = _estimate_training_memory_mb(str(f))
limited = _estimate_training_memory_mb(str(f), max_lines=100)
assert limited <= full
class TestMemoryPrecheck:
def test_aborts_when_insufficient(self, tmp_path):
f = tmp_path / "words.txt"
f.write_text("password\n" * 10)
with (
patch("hate_crack.passgpt_train._get_available_memory_mb", return_value=1),
patch(
"hate_crack.passgpt_train._estimate_training_memory_mb",
return_value=5000,
),
pytest.raises(SystemExit),
):
from hate_crack.passgpt_train import train
train(
training_file=str(f),
output_dir=str(tmp_path / "out"),
base_model="test",
epochs=1,
batch_size=1,
device="cpu",
)
def test_skips_when_detection_fails(self, tmp_path):
"""When memory detection returns None, training proceeds past the pre-check."""
f = tmp_path / "words.txt"
f.write_text("password\n" * 10)
mock_tokenizer = MagicMock()
mock_model = MagicMock()
mock_model.config.n_positions = 16
mock_trainer = MagicMock()
with (
patch(
"hate_crack.passgpt_train._get_available_memory_mb", return_value=None
),
patch(
"hate_crack.passgpt_train._estimate_training_memory_mb",
return_value=5000,
),
patch("hate_crack.passgpt_train._configure_mps"),
patch(
"transformers.RobertaTokenizerFast.from_pretrained",
return_value=mock_tokenizer,
),
patch(
"transformers.GPT2LMHeadModel.from_pretrained",
return_value=mock_model,
),
patch("transformers.Trainer", return_value=mock_trainer),
patch("transformers.TrainingArguments"),
):
from hate_crack.passgpt_train import train
train(
training_file=str(f),
output_dir=str(tmp_path / "out"),
base_model="test",
epochs=1,
batch_size=1,
device="cpu",
)
mock_trainer.train.assert_called_once()
class TestMaxLines:
def test_count_lines_respects_limit(self, tmp_path):
f = tmp_path / "words.txt"
f.write_text("password\n" * 1000)
# _count_lines doesn't have a limit, but _estimate uses max_lines
total = _count_lines(str(f))
assert total == 1000
def test_estimate_uses_max_lines(self, tmp_path):
f = tmp_path / "words.txt"
f.write_text("password\n" * 10000)
est_full = _estimate_training_memory_mb(str(f))
est_limited = _estimate_training_memory_mb(str(f), max_lines=10)
assert est_limited <= est_full
class TestMemoryLimitAutoTune:
def test_auto_tunes_max_lines(self, tmp_path, capsys):
f = tmp_path / "words.txt"
f.write_text("password\n" * 100)
mock_tokenizer = MagicMock()
mock_model = MagicMock()
mock_model.config.n_positions = 16
mock_trainer = MagicMock()
with (
patch(
"hate_crack.passgpt_train._get_available_memory_mb", return_value=None
),
patch("hate_crack.passgpt_train._configure_mps"),
patch(
"transformers.RobertaTokenizerFast.from_pretrained",
return_value=mock_tokenizer,
),
patch(
"transformers.GPT2LMHeadModel.from_pretrained",
return_value=mock_model,
),
patch("transformers.Trainer", return_value=mock_trainer),
patch("transformers.TrainingArguments"),
):
from hate_crack.passgpt_train import train
train(
training_file=str(f),
output_dir=str(tmp_path / "out"),
base_model="test",
epochs=1,
batch_size=1,
device="cpu",
memory_limit=2000,
)
captured = capsys.readouterr()
assert "--memory-limit 2000MB: auto-set --max-lines" in captured.err
def test_memory_limit_too_low_exits(self, tmp_path):
f = tmp_path / "words.txt"
f.write_text("password\n" * 10)
with pytest.raises(SystemExit):
from hate_crack.passgpt_train import train
train(
training_file=str(f),
output_dir=str(tmp_path / "out"),
base_model="test",
epochs=1,
batch_size=1,
device="cpu",
memory_limit=1, # 1MB - way too low
)