From 613c16e5673b180049501ff460b458a90ccb58fb Mon Sep 17 00:00:00 2001 From: Justin Bollinger Date: Tue, 3 Mar 2026 14:31:41 -0500 Subject: [PATCH] refactor: remove PassGPT attack (option 17) Remove the ML-based PassGPT password generation feature entirely: modules, config keys, [ml] optional dependency group, menu entries, tests, and documentation. --- README.md | 90 ------ TESTING.md | 1 - config.json.example | 4 - hate_crack.py | 2 - hate_crack/attacks.py | 92 ------ hate_crack/main.py | 162 ---------- hate_crack/passgpt_generate.py | 170 ----------- hate_crack/passgpt_train.py | 343 --------------------- pyproject.toml | 8 +- tests/test_passgpt_attack.py | 525 --------------------------------- tests/test_ui_menu_options.py | 10 - 11 files changed, 1 insertion(+), 1406 deletions(-) delete mode 100644 hate_crack/passgpt_generate.py delete mode 100644 hate_crack/passgpt_train.py delete mode 100644 tests/test_passgpt_attack.py diff --git a/README.md b/README.md index 6bf3474..88872c8 100644 --- a/README.md +++ b/README.md @@ -363,21 +363,6 @@ This installs hooks defined in `prek.toml` using the pre-commit local-repo TOML Note: prek 0.3.3 expects `repos = [...]` at the top level. The old `[hooks.] commands = [...]` format is not supported. -### Optional Dependencies - -The optional `[ml]` group includes ML/AI features required for the PassGPT attack: -- **torch** - PyTorch deep learning framework (for PassGPT attack and training) -- **transformers** - HuggingFace transformers library (for GPT-2 models) -- **datasets** - HuggingFace datasets library (for fine-tuning support) -- **accelerate** - HuggingFace training acceleration library - -Install with: -```bash -uv pip install -e ".[ml]" -``` - -PassGPT (option 17) will be hidden from the menu if ML dependencies are not installed. - ### Dev Dependencies The optional `[dev]` group includes: @@ -624,7 +609,6 @@ Tests automatically run on GitHub Actions for every push and pull request (Ubunt (14) Loopback Attack (15) LLM Attack (16) OMEN Attack - (17) PassGPT Attack (90) Download rules from Hashmob.net (91) Analyze Hashcat Rules @@ -766,76 +750,6 @@ Uses the Ordered Markov ENumerator (OMEN) to train a statistical password model * Pipes generated candidates directly into hashcat for cracking * Model files are stored in `~/.hate_crack/omen/` for persistence across sessions -#### PassGPT Attack -Uses PassGPT, a GPT-2 based password generator trained on leaked password datasets, to generate candidate passwords. PassGPT produces higher-quality candidates than traditional Markov models by leveraging transformer-based language modeling. You can use the default HuggingFace model or fine-tune a custom model on your own password wordlist. - -**Note:** This menu item is hidden unless ML dependencies are installed. - -**Requirements:** ML dependencies must be installed separately: -```bash -uv pip install -e ".[ml]" -``` - -This installs PyTorch and HuggingFace Transformers. GPU acceleration (CUDA/MPS) is auto-detected but not required. - -**Configuration keys:** -- `passgptModel` - HuggingFace model name (default: `javirandor/passgpt-10characters`) -- `passgptMaxCandidates` - Maximum candidates to generate (default: 1000000) -- `passgptBatchSize` - Generation batch size (default: 1024) -- `passgptTrainingList` - Default wordlist for fine-tuning (default: `rockyou.txt`) - -**Supported models:** -- `javirandor/passgpt-10characters` - Trained on passwords up to 10 characters (default) -- `javirandor/passgpt-16characters` - Trained on passwords up to 16 characters -- Any compatible GPT-2 model on HuggingFace -- Locally fine-tuned models (stored in `~/.hate_crack/passgpt/`) - -**Training a Custom Model:** -When you select the PassGPT Attack (option 17), the menu presents: -- List of available models (default HF model + any locally fine-tuned models) -- Option (T) to train a new model on a custom wordlist -- Fine-tuned models are automatically saved to `~/.hate_crack/passgpt//` for reuse - -To train a new model: -1. Select option (T) from the model selection menu -2. Choose a training wordlist (supports tab-complete file selection) -3. Optionally specify a base model (defaults to configured `passgptModel`) -4. Training will fine-tune the model on your wordlist and save it locally - -Fine-tuned models can be reused in future cracking sessions and appear in the model selection menu alongside the default models. - -**Apple Silicon (MPS) Performance Notes:** -- Batch size is automatically capped at 64 to prevent memory errors on MPS devices -- GPU memory watermark ratios are configured for stability (50% high, 30% low) -- Specify `--device cpu` to force CPU generation if MPS has issues - -**Standalone usage:** - -Generate candidates: -```bash -python -m hate_crack.passgpt_generate --num 1000 --model javirandor/passgpt-10characters -``` - -Fine-tune a custom model: -```bash -python -m hate_crack.passgpt_train --training-file wordlist.txt --output-dir ~/.hate_crack/passgpt/my_model -``` - -**Generator command-line options:** -- `--num` - Number of candidates to generate (default: 1000000) -- `--model` - HuggingFace model name or local path (default: javirandor/passgpt-10characters) -- `--batch-size` - Generation batch size (default: 1024) -- `--max-length` - Max token length including special tokens (default: 12) -- `--device` - Device: cuda, mps, or cpu (default: auto-detect) - -**Training command-line options:** -- `--training-file` - Path to password wordlist for fine-tuning (required) -- `--output-dir` - Directory to save the fine-tuned model (required) -- `--base-model` - Base HuggingFace model to fine-tune (default: javirandor/passgpt-10characters) -- `--epochs` - Number of training epochs (default: 3) -- `--batch-size` - Training batch size (default: 8) -- `--device` - Device: cuda, mps, or cpu (default: auto-detect) - #### Download Rules from Hashmob.net Downloads the latest rule files from Hashmob.net's rule repository. These rules are curated and optimized for password cracking and can be used with the Quick Crack and Loopback Attack modes. @@ -872,10 +786,6 @@ Interactive menu for downloading and managing wordlists from Weakpass.com via Bi Version 2.0+ - Added automatic update checks on startup (check_for_updates config option) - Added `packaging` dependency for version comparison - - Added PassGPT Attack (option 17) using GPT-2 based ML password generation - - Added PassGPT fine-tuning capability for custom password models - - Added PassGPT configuration keys (passgptModel, passgptMaxCandidates, passgptBatchSize, passgptTrainingList) - - Added `[ml]` optional dependency group for PyTorch, Transformers, and Datasets - Added OMEN Attack (option 16) using statistical model-based password generation - Added OMEN configuration keys (omenTrainingList, omenMaxCandidates) - Added LLM Attack (option 15) using Ollama for AI-generated password candidates diff --git a/TESTING.md b/TESTING.md index 50c3ef2..36021a1 100644 --- a/TESTING.md +++ b/TESTING.md @@ -70,7 +70,6 @@ No external services, binaries, or network access required. | `test_invalid_hcatpath.py` | Startup error on invalid hashcat path | | `test_version_check.py` | Update check logic | | `test_omen_attack.py` | OMEN attack handler | -| `test_passgpt_attack.py` | PassGPT attack handler | | `test_pipal.py` | Pipal integration helpers | | `test_pipal_integration.py` | Pipal menu and output parsing | | `test_dependencies.py` | External dependency detection | diff --git a/config.json.example b/config.json.example index ab4ac69..392cdea 100644 --- a/config.json.example +++ b/config.json.example @@ -26,9 +26,5 @@ "ollamaNumCtx": 2048, "omenTrainingList": "rockyou.txt", "omenMaxCandidates": 1000000, - "passgptModel": "javirandor/passgpt-10characters", - "passgptMaxCandidates": 1000000, - "passgptBatchSize": 1024, - "passgptTrainingList": "rockyou.txt", "check_for_updates": true } diff --git a/hate_crack.py b/hate_crack.py index d9fb181..a73b440 100755 --- a/hate_crack.py +++ b/hate_crack.py @@ -95,8 +95,6 @@ def get_main_menu_options(): "98": show_readme, "99": quit_hc, } - if globals().get("HAS_ML_DEPS"): - options["17"] = _attacks.passgpt_attack # Only show Hashview API when configured. if globals().get("hashview_api_key"): options["94"] = hashview_api diff --git a/hate_crack/attacks.py b/hate_crack/attacks.py index f93586b..d720d3d 100644 --- a/hate_crack/attacks.py +++ b/hate_crack/attacks.py @@ -532,95 +532,3 @@ def omen_attack(ctx: Any) -> None: if not max_candidates: max_candidates = str(ctx.omenMaxCandidates) ctx.hcatOmen(ctx.hcatHashType, ctx.hcatHashFile, int(max_candidates)) - - -def passgpt_attack(ctx: Any) -> None: - print("\n\tPassGPT Attack (ML Password Generator)") - if not ctx.HAS_ML_DEPS: - print("\n\tPassGPT requires ML dependencies. Install them with:") - print('\t uv pip install -e ".[ml]"') - return - - # Build model choices: default HF model + any local fine-tuned models - default_model = ctx.passgptModel - models = [(default_model, f"{default_model} (default)")] - - model_dir = ctx._passgpt_model_dir() - if os.path.isdir(model_dir): - for entry in sorted(os.listdir(model_dir)): - entry_path = os.path.join(model_dir, entry) - if os.path.isdir(entry_path) and os.path.isfile( - os.path.join(entry_path, "config.json") - ): - models.append((entry_path, f"{entry} (local)")) - - print("\n\tSelect a model:") - for i, (_, label) in enumerate(models, 1): - print(f"\t ({i}) {label}") - print("\t (T) Train a new model") - - choice = input("\n\tChoice: ").strip() - - if choice.upper() == "T": - print("\n\tTrain a new PassGPT model") - print("\n\t--- Estimated Training Times (14M passwords, 3 epochs) ---") - print("\t CUDA (RTX 3090/4090): 1-3 hours") - print("\t MPS (Apple Silicon): 6-12 hours") - print("\t CPU: Very slow (not recommended)") - print("\t Use --max-lines to reduce training data for faster runs.") - training_file = ctx.select_file_with_autocomplete( - "Select training wordlist", base_dir=ctx.hcatWordlists - ) - if not training_file: - print("\n\tNo training file selected. Aborting.") - return - if isinstance(training_file, list): - training_file = training_file[0] - base = input(f"\n\tBase model ({default_model}): ").strip() - if not base: - base = default_model - - from hate_crack.passgpt_train import _detect_device - - detected = _detect_device() - device_labels = {"cuda": "cuda", "mps": "mps (Apple Silicon)", "cpu": "cpu"} - device_options = ["cuda", "mps", "cpu"] - print("\n\tSelect training device:") - for i, dev in enumerate(device_options, 1): - label = device_labels[dev] - suffix = " (detected)" if dev == detected else "" - print(f"\t ({i}) {label}{suffix}") - default_idx = device_options.index(detected) + 1 - device_choice = input(f"\n\tDevice [{default_idx}]: ").strip() - device_map = {"1": "cuda", "2": "mps", "3": "cpu", "": detected} - device = device_map.get(device_choice, detected) - - result = ctx.hcatPassGPTTrain(training_file, base, device=device) - if result is None: - print("\n\tTraining failed. Returning to menu.") - return - model_name = result - else: - try: - idx = int(choice) - 1 - if 0 <= idx < len(models): - model_name = models[idx][0] - else: - print("\n\tInvalid selection.") - return - except ValueError: - print("\n\tInvalid selection.") - return - - max_candidates = input( - f"\n\tMax candidates to generate ({ctx.passgptMaxCandidates}): " - ).strip() - if not max_candidates: - max_candidates = str(ctx.passgptMaxCandidates) - ctx.hcatPassGPT( - ctx.hcatHashType, - ctx.hcatHashFile, - int(max_candidates), - model_name=model_name, - batch_size=ctx.passgptBatchSize, - ) diff --git a/hate_crack/main.py b/hate_crack/main.py index 5a0e0d3..bf98513 100755 --- a/hate_crack/main.py +++ b/hate_crack/main.py @@ -37,18 +37,6 @@ try: except Exception: pass -# Disable HuggingFace telemetry before any HF-related imports -os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1" - -HAS_ML_DEPS = False -try: - import torch # noqa: F401 - import transformers # noqa: F401 - - HAS_ML_DEPS = True -except Exception: - pass - # Ensure project root is on sys.path so package imports work when loaded via spec. _root_dir = os.path.dirname(os.path.realpath(__file__)) if _root_dir not in sys.path: @@ -511,42 +499,6 @@ except KeyError as e: ) ) omenMaxCandidates = int(default_config.get("omenMaxCandidates", 1000000)) -try: - passgptModel = config_parser["passgptModel"] -except KeyError as e: - print( - "{0} is not defined in config.json using defaults from config.json.example".format( - e - ) - ) - passgptModel = default_config.get("passgptModel", "javirandor/passgpt-10characters") -try: - passgptMaxCandidates = int(config_parser["passgptMaxCandidates"]) -except KeyError as e: - print( - "{0} is not defined in config.json using defaults from config.json.example".format( - e - ) - ) - passgptMaxCandidates = int(default_config.get("passgptMaxCandidates", 1000000)) -try: - passgptBatchSize = int(config_parser["passgptBatchSize"]) -except KeyError as e: - print( - "{0} is not defined in config.json using defaults from config.json.example".format( - e - ) - ) - passgptBatchSize = int(default_config.get("passgptBatchSize", 1024)) -try: - passgptTrainingList = config_parser["passgptTrainingList"] -except KeyError as e: - print( - "{0} is not defined in config.json using defaults from config.json.example".format( - e - ) - ) - passgptTrainingList = default_config.get("passgptTrainingList", "rockyou.txt") try: check_for_updates_enabled = config_parser["check_for_updates"] except KeyError as e: @@ -698,7 +650,6 @@ hcatGoodMeasureBaseList = _normalize_wordlist_setting( ) hcatPrinceBaseList = _normalize_wordlist_setting(hcatPrinceBaseList, wordlists_dir) omenTrainingList = _normalize_wordlist_setting(omenTrainingList, wordlists_dir) -passgptTrainingList = _normalize_wordlist_setting(passgptTrainingList, wordlists_dir) if not SKIP_INIT: # Verify hashcat binary is available # hcatBin should be in PATH or be an absolute path (resolved from hcatPath + hcatBin if configured) @@ -2304,111 +2255,6 @@ def hcatOmen(hcatHashType, hcatHashFile, max_candidates): enum_proc.kill() -# PassGPT model directory - writable location for fine-tuned models. -# Models are saved to ~/.hate_crack/passgpt//. -def _passgpt_model_dir(): - model_dir = os.path.join(os.path.expanduser("~"), ".hate_crack", "passgpt") - os.makedirs(model_dir, exist_ok=True) - return model_dir - - -# PassGPT Attack - Fine-tune a model on a custom wordlist -def hcatPassGPTTrain(training_file, base_model=None, device=None): - training_file = os.path.abspath(training_file) - if not os.path.isfile(training_file): - print(f"Error: Training file not found: {training_file}") - return None - if base_model is None: - base_model = passgptModel - # Derive output dir name from training file - basename = os.path.splitext(os.path.basename(training_file))[0] - # Sanitize: replace non-alphanumeric chars with underscores - sanitized = "".join(c if c.isalnum() or c in "-_" else "_" for c in basename) - output_dir = os.path.join(_passgpt_model_dir(), sanitized) - os.makedirs(output_dir, exist_ok=True) - cmd = [ - sys.executable, - "-m", - "hate_crack.passgpt_train", - "--training-file", - training_file, - "--base-model", - base_model, - "--output-dir", - output_dir, - ] - if device: - cmd.extend(["--device", device]) - if debug_mode: - cmd.append("--debug") - print(f"[*] Running: {_format_cmd(cmd)}") - proc = subprocess.Popen(cmd) - try: - proc.wait() - except KeyboardInterrupt: - print("Killing PID {0}...".format(str(proc.pid))) - proc.kill() - return None - if proc.returncode == 0: - print(f"PassGPT model training complete. Model saved to: {output_dir}") - return output_dir - else: - print(f"PassGPT training failed with exit code {proc.returncode}") - return None - - -# PassGPT Attack - Generate candidates with ML model and pipe to hashcat -def hcatPassGPT( - hcatHashType, - hcatHashFile, - max_candidates, - model_name=None, - batch_size=None, -): - global hcatProcess - if model_name is None: - model_name = passgptModel - if batch_size is None: - batch_size = passgptBatchSize - gen_cmd = [ - sys.executable, - "-m", - "hate_crack.passgpt_generate", - "--num", - str(max_candidates), - "--model", - model_name, - "--batch-size", - str(batch_size), - ] - if debug_mode: - gen_cmd.append("--debug") - hashcat_cmd = [ - hcatBin, - "-m", - hcatHashType, - hcatHashFile, - "--session", - generate_session_id(), - "-o", - f"{hcatHashFile}.out", - ] - hashcat_cmd.extend(shlex.split(hcatTuning)) - _append_potfile_arg(hashcat_cmd) - print(f"[*] Running: {_format_cmd(gen_cmd)} | {_format_cmd(hashcat_cmd)}") - _debug_cmd(hashcat_cmd) - gen_proc = subprocess.Popen(gen_cmd, stdout=subprocess.PIPE) - hcatProcess = subprocess.Popen(hashcat_cmd, stdin=gen_proc.stdout) - gen_proc.stdout.close() - try: - hcatProcess.wait() - gen_proc.wait() - except KeyboardInterrupt: - print("Killing PID {0}...".format(str(hcatProcess.pid))) - hcatProcess.kill() - gen_proc.kill() - - # Extra - Good Measure def hcatGoodMeasure(hcatHashType, hcatHashFile): global hcatExtraCount @@ -3331,10 +3177,6 @@ def omen_attack(): return _attacks.omen_attack(_attack_ctx()) -def passgpt_attack(): - return _attacks.passgpt_attack(_attack_ctx()) - - # convert hex words for recycling def convert_hex(working_file): processed_words = [] @@ -3574,8 +3416,6 @@ def get_main_menu_options(): "98": show_readme, "99": quit_hc, } - if HAS_ML_DEPS: - options["17"] = passgpt_attack # Only show this when Hashview API is configured (requested behavior). if hashview_api_key: options["94"] = hashview_api @@ -4221,8 +4061,6 @@ def main(): print("\t(14) Loopback Attack") print("\t(15) LLM Attack") print("\t(16) OMEN Attack") - if HAS_ML_DEPS: - print("\t(17) PassGPT Attack") print("\n\t(90) Download rules from Hashmob.net") print("\n\t(91) Analyze Hashcat Rules") print("\t(92) Download wordlists from Hashmob.net") diff --git a/hate_crack/passgpt_generate.py b/hate_crack/passgpt_generate.py deleted file mode 100644 index 41e9b4c..0000000 --- a/hate_crack/passgpt_generate.py +++ /dev/null @@ -1,170 +0,0 @@ -"""Standalone PassGPT password candidate generator. - -Invokable as ``python -m hate_crack.passgpt_generate``. Outputs one -candidate password per line to stdout so it can be piped directly into -hashcat. Progress and diagnostic messages go to stderr. -""" - -from __future__ import annotations - -import argparse -import os -import sys - -# Disable HuggingFace telemetry before any HF imports -os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1" - - -_MPS_BATCH_SIZE_CAP = 64 - - -def _detect_device() -> str: - import torch - - if torch.cuda.is_available(): - return "cuda" - if hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): - return "mps" - return "cpu" - - -def _configure_mps() -> None: - """Set MPS memory limits before torch is imported.""" - import os - - os.environ.setdefault("PYTORCH_MPS_HIGH_WATERMARK_RATIO", "0.5") - os.environ.setdefault("PYTORCH_MPS_LOW_WATERMARK_RATIO", "0.3") - - -def generate( - num: int, - model_name: str, - batch_size: int, - max_length: int, - device: str | None, - debug: bool = False, -) -> None: - # If MPS is requested (or will be auto-detected), set memory limit before importing torch - if device == "mps" or device is None: - _configure_mps() - - if debug: - import logging - - logging.basicConfig(level=logging.DEBUG, stream=sys.stderr) - logging.getLogger("urllib3").setLevel(logging.DEBUG) - logging.getLogger("huggingface_hub").setLevel(logging.DEBUG) - - import torch - from transformers import GPT2LMHeadModel # type: ignore[attr-defined] - from transformers import RobertaTokenizerFast # type: ignore[attr-defined] - - if device is None: - device = _detect_device() - - if device == "mps" and batch_size > _MPS_BATCH_SIZE_CAP: - print( - f"[*] Capping batch size from {batch_size} to {_MPS_BATCH_SIZE_CAP} for MPS", - file=sys.stderr, - ) - batch_size = _MPS_BATCH_SIZE_CAP - - print(f"[*] Loading model {model_name} on {device}", file=sys.stderr) - tokenizer = RobertaTokenizerFast.from_pretrained(model_name) - model = GPT2LMHeadModel.from_pretrained(model_name).to(device) # type: ignore[arg-type] - model.eval() - - generated = 0 - seen: set[str] = set() - - print(f"[*] Generating {num} candidates (batch_size={batch_size})", file=sys.stderr) - with torch.no_grad(): - while generated < num: - current_batch = min(batch_size, num - generated) - input_ids = torch.full( - (current_batch, 1), - tokenizer.bos_token_id, - dtype=torch.long, - device=device, - ) - output = model.generate( - input_ids, - max_length=max_length, - do_sample=True, - top_k=0, - top_p=1.0, - num_return_sequences=current_batch, - pad_token_id=tokenizer.eos_token_id, - ) - # Strip BOS token - output = output[:, 1:] - for seq in output: - token_strs = [tokenizer.decode([t]) for t in seq] - password = "" - for t in token_strs: - if t in (tokenizer.eos_token, tokenizer.pad_token): - break - password += t.replace(" ", "") - if password and password not in seen: - seen.add(password) - sys.stdout.write(password + "\n") - generated += 1 - if generated >= num: - break - - sys.stdout.flush() - print(f"[*] Done. Generated {generated} unique candidates.", file=sys.stderr) - - -def main() -> None: - parser = argparse.ArgumentParser( - description="Generate password candidates using PassGPT" - ) - parser.add_argument( - "--num", - type=int, - default=1000000, - help="Number of candidates to generate (default: 1000000)", - ) - parser.add_argument( - "--model", - type=str, - default="javirandor/passgpt-10characters", - help="HuggingFace model name (default: javirandor/passgpt-10characters)", - ) - parser.add_argument( - "--batch-size", - type=int, - default=1024, - help="Generation batch size (default: 1024)", - ) - parser.add_argument( - "--max-length", - type=int, - default=12, - help="Max token length including special tokens (default: 12)", - ) - parser.add_argument( - "--device", - type=str, - default=None, - help="Device: cuda, mps, or cpu (default: auto-detect)", - ) - parser.add_argument( - "--debug", - action="store_true", - help="Enable debug logging for HTTP requests", - ) - args = parser.parse_args() - generate( - num=args.num, - model_name=args.model, - batch_size=args.batch_size, - max_length=args.max_length, - device=args.device, - debug=args.debug, - ) - - -if __name__ == "__main__": - main() diff --git a/hate_crack/passgpt_train.py b/hate_crack/passgpt_train.py deleted file mode 100644 index 3799738..0000000 --- a/hate_crack/passgpt_train.py +++ /dev/null @@ -1,343 +0,0 @@ -"""Fine-tune a PassGPT model on a custom password wordlist. - -Invokable as ``python -m hate_crack.passgpt_train``. Progress and -diagnostic messages go to stderr. -""" - -from __future__ import annotations - -import argparse -import os -import subprocess -import sys - - -# Disable HuggingFace telemetry before any HF imports -os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1" - - -def _detect_device() -> str: - import torch - - if torch.cuda.is_available(): - return "cuda" - if hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): - return "mps" - return "cpu" - - -def _configure_mps() -> None: - """Set MPS memory limits before torch is imported.""" - os.environ.setdefault("PYTORCH_MPS_HIGH_WATERMARK_RATIO", "0.5") - os.environ.setdefault("PYTORCH_MPS_LOW_WATERMARK_RATIO", "0.3") - - -def _get_available_memory_mb() -> int | None: - """Return available system RAM in MB, or None if detection fails.""" - try: - if sys.platform == "linux": - with open("/proc/meminfo") as f: - for line in f: - if line.startswith("MemAvailable:"): - return int(line.split()[1]) // 1024 - return None - elif sys.platform == "darwin": - # macOS: try os.sysconf first, fall back to sysctl - try: - page_size = os.sysconf("SC_PAGE_SIZE") - avail_pages = os.sysconf("SC_AVPHYS_PAGES") - if page_size > 0 and avail_pages > 0: - return (page_size * avail_pages) // (1024 * 1024) - except (ValueError, OSError): - pass - # Fallback: use sysctl for total memory (not available, but better than nothing) - try: - result = subprocess.run( - ["sysctl", "-n", "hw.memsize"], - capture_output=True, - text=True, - timeout=5, - ) - if result.returncode == 0: - return int(result.stdout.strip()) // (1024 * 1024) - except (subprocess.TimeoutExpired, FileNotFoundError, ValueError): - pass - return None - else: - return None - except Exception: - return None - - -def _count_lines(filepath: str) -> int: - """Count non-empty lines in a file without loading it into memory.""" - count = 0 - with open(filepath, encoding="utf-8", errors="replace") as f: - for line in f: - if line.strip(): - count += 1 - return count - - -def _estimate_training_memory_mb( - training_file: str, max_length: int = 16, max_lines: int = 0 -) -> int: - """Estimate peak memory usage in MB for training on the given file. - - Components: - - Model: ~500MB (GPT-2 small) - - Optimizer states: ~1000MB (2x model for AdamW momentum/variance) - - Dataset offset index: ~8 bytes per line - - Per-batch activations and tokenization buffer: ~200MB - """ - num_lines = _count_lines(training_file) - if max_lines > 0: - num_lines = min(num_lines, max_lines) - - model_mb = 500 - optimizer_mb = 1000 - # Offset index: 8 bytes per line (Python int in list) - index_mb = (num_lines * 8) // (1024 * 1024) - # Activation/buffer overhead - buffer_mb = 200 - - return model_mb + optimizer_mb + index_mb + buffer_mb - - -def train( - training_file: str, - output_dir: str, - base_model: str, - epochs: int, - batch_size: int, - device: str | None, - max_lines: int = 0, - memory_limit: int = 0, - debug: bool = False, -) -> None: - # --- Memory pre-check --- - if memory_limit > 0: - # Auto-tune max_lines to fit within memory_limit - estimated_base = _estimate_training_memory_mb(training_file, max_lines=1) - per_line_bytes = 8 # offset index cost per line - available_for_data = (memory_limit - estimated_base) * 1024 * 1024 - if available_for_data > 0: - auto_max_lines = available_for_data // per_line_bytes - if max_lines == 0 or auto_max_lines < max_lines: - max_lines = max(1, int(auto_max_lines)) - print( - f"[*] --memory-limit {memory_limit}MB: auto-set --max-lines to {max_lines}", - file=sys.stderr, - ) - else: - print( - f"[!] --memory-limit {memory_limit}MB is too low for model overhead alone.", - file=sys.stderr, - ) - sys.exit(1) - - if debug: - import logging - - logging.basicConfig(level=logging.DEBUG, stream=sys.stderr) - logging.getLogger("urllib3").setLevel(logging.DEBUG) - logging.getLogger("huggingface_hub").setLevel(logging.DEBUG) - - estimated = _estimate_training_memory_mb(training_file, max_lines=max_lines) - available = _get_available_memory_mb() - if available is not None and estimated > available: - print( - f"[!] Estimated memory usage ({estimated}MB) exceeds available RAM ({available}MB).", - file=sys.stderr, - ) - print( - "[!] Use --max-lines to limit wordlist size or --memory-limit to auto-tune.", - file=sys.stderr, - ) - sys.exit(1) - - if device == "mps" or device is None: - _configure_mps() - - import torch - from transformers import ( # type: ignore[attr-defined] - GPT2LMHeadModel, - RobertaTokenizerFast, - Trainer, - TrainingArguments, - ) - - if device is None: - device = _detect_device() - - print(f"[*] Loading base model {base_model} on {device}", file=sys.stderr) - tokenizer = RobertaTokenizerFast.from_pretrained(base_model) - model = GPT2LMHeadModel.from_pretrained(base_model).to(device) # type: ignore[arg-type] - - max_length = ( - model.config.n_positions if hasattr(model.config, "n_positions") else 16 - ) - - # Enable gradient checkpointing to reduce activation memory - model.gradient_checkpointing_enable() - - print(f"[*] Indexing training file: {training_file}", file=sys.stderr) - - class LazyPasswordDataset(torch.utils.data.Dataset): # type: ignore[type-arg] - """Dataset that indexes file byte offsets and tokenizes on-the-fly.""" - - def __init__( - self, - filepath: str, - tokenizer: object, - max_length: int, - max_lines: int = 0, - ): - self.filepath = filepath - self.tokenizer = tokenizer - self.max_length = max_length - self.offsets: list[int] = [] - with open(filepath, "rb") as f: - while True: - offset = f.tell() - line = f.readline() - if not line: - break - if line.strip(): - self.offsets.append(offset) - if max_lines > 0 and len(self.offsets) >= max_lines: - break - - def __len__(self) -> int: - return len(self.offsets) - - def __getitem__(self, idx: int) -> dict[str, object]: # type: ignore[override] - with open(self.filepath, "rb") as f: - f.seek(self.offsets[idx]) - line = f.readline().decode("utf-8", errors="replace").strip() - enc = self.tokenizer( # type: ignore[operator] - line, - truncation=True, - padding="max_length", - max_length=self.max_length, - return_tensors="pt", - ) - input_ids = enc["input_ids"].squeeze(0) - attention_mask = enc["attention_mask"].squeeze(0) - return { - "input_ids": input_ids, - "attention_mask": attention_mask, - "labels": input_ids, - } - - dataset = LazyPasswordDataset(training_file, tokenizer, max_length, max_lines) - print(f"[*] Indexed {len(dataset)} passwords", file=sys.stderr) - - # Use CPU for training args if device is MPS (Trainer handles device placement) - use_cpu = device not in ("cuda",) - use_fp16 = device == "cuda" - training_args = TrainingArguments( - output_dir=output_dir, - num_train_epochs=epochs, - per_device_train_batch_size=batch_size, - save_strategy="epoch", - logging_steps=100, - use_cpu=use_cpu, - report_to="none", - push_to_hub=False, - gradient_accumulation_steps=4, - fp16=use_fp16, - gradient_checkpointing=True, - ) - - trainer = Trainer( - model=model, - args=training_args, - train_dataset=dataset, - ) - - print( - f"[*] Starting training: {epochs} epochs, batch_size={batch_size}, device={device}", - file=sys.stderr, - ) - trainer.train() - - print(f"[*] Saving model to {output_dir}", file=sys.stderr) - model.save_pretrained(output_dir) - tokenizer.save_pretrained(output_dir) - print("[*] Training complete.", file=sys.stderr) - - -def main() -> None: - parser = argparse.ArgumentParser( - description="Fine-tune a PassGPT model on a password wordlist" - ) - parser.add_argument( - "--training-file", - type=str, - required=True, - help="Path to the password wordlist for training", - ) - parser.add_argument( - "--base-model", - type=str, - default="javirandor/passgpt-10characters", - help="Base HuggingFace model to fine-tune (default: javirandor/passgpt-10characters)", - ) - parser.add_argument( - "--output-dir", - type=str, - required=True, - help="Directory to save the fine-tuned model", - ) - parser.add_argument( - "--epochs", - type=int, - default=3, - help="Number of training epochs (default: 3)", - ) - parser.add_argument( - "--batch-size", - type=int, - default=8, - help="Training batch size (default: 8)", - ) - parser.add_argument( - "--device", - type=str, - default=None, - help="Device: cuda, mps, or cpu (default: auto-detect)", - ) - parser.add_argument( - "--max-lines", - type=int, - default=0, - help="Limit training to the first N lines of the wordlist (default: 0, no limit)", - ) - parser.add_argument( - "--memory-limit", - type=int, - default=0, - help="Memory cap in MB; auto-tunes --max-lines to fit (default: 0, no limit)", - ) - parser.add_argument( - "--debug", - action="store_true", - help="Enable debug logging for HTTP requests", - ) - args = parser.parse_args() - train( - training_file=args.training_file, - output_dir=args.output_dir, - base_model=args.base_model, - epochs=args.epochs, - batch_size=args.batch_size, - device=args.device, - max_lines=args.max_lines, - memory_limit=args.memory_limit, - debug=args.debug, - ) - - -if __name__ == "__main__": - main() diff --git a/pyproject.toml b/pyproject.toml index 1766ef6..4fb7201 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,12 +19,6 @@ dependencies = [ hate_crack = "hate_crack.__main__:main" [project.optional-dependencies] -ml = [ - "torch>=2.0.0", - "transformers>=4.30.0", - "datasets>=2.14.0", - "accelerate>=1.1.0", -] dev = [ "ty==0.0.17", "ruff==0.15.1", @@ -75,7 +69,7 @@ exclude = [ [tool.ty.rules] # Module-level globals in main.py are assigned at runtime unresolved-reference = "warn" -# Optional deps (torch, transformers, hashcat_rosetta) not always installed +# Optional deps (hashcat_rosetta) not always installed unresolved-import = "warn" # BeautifulSoup union types and module-level globals unresolved-attribute = "warn" diff --git a/tests/test_passgpt_attack.py b/tests/test_passgpt_attack.py deleted file mode 100644 index fd81e02..0000000 --- a/tests/test_passgpt_attack.py +++ /dev/null @@ -1,525 +0,0 @@ -import importlib.util -import os -import sys -from unittest.mock import MagicMock, patch - -import pytest - -_has_transformers = importlib.util.find_spec("transformers") is not None - -from hate_crack.passgpt_train import ( - _count_lines, - _estimate_training_memory_mb, - _get_available_memory_mb, -) - - -@pytest.fixture -def main_module(hc_module): - """Return the underlying hate_crack.main module for direct patching.""" - return hc_module._main - - -class TestHcatPassGPT: - def test_builds_correct_pipe_commands(self, main_module): - with ( - patch.object(main_module, "hcatBin", "hashcat"), - patch.object(main_module, "hcatTuning", "--force"), - patch.object(main_module, "hcatPotfilePath", ""), - patch.object(main_module, "hcatHashFile", "/tmp/hashes.txt", create=True), - patch.object( - main_module, "passgptModel", "javirandor/passgpt-10characters" - ), - patch.object(main_module, "passgptBatchSize", 1024), - patch("hate_crack.main.subprocess.Popen") as mock_popen, - ): - mock_gen_proc = MagicMock() - mock_gen_proc.stdout = MagicMock() - mock_hashcat_proc = MagicMock() - mock_hashcat_proc.wait.return_value = None - mock_gen_proc.wait.return_value = None - mock_popen.side_effect = [mock_gen_proc, mock_hashcat_proc] - - main_module.hcatPassGPT("1000", "/tmp/hashes.txt", 500000) - - assert mock_popen.call_count == 2 - # First call: passgpt generator - gen_cmd = mock_popen.call_args_list[0][0][0] - assert gen_cmd[0] == sys.executable - assert "-m" in gen_cmd - assert "hate_crack.passgpt_generate" in gen_cmd - assert "--num" in gen_cmd - assert "500000" in gen_cmd - assert "--model" in gen_cmd - assert "javirandor/passgpt-10characters" in gen_cmd - assert "--batch-size" in gen_cmd - assert "1024" in gen_cmd - # Second call: hashcat - hashcat_cmd = mock_popen.call_args_list[1][0][0] - assert hashcat_cmd[0] == "hashcat" - assert "1000" in hashcat_cmd - assert "/tmp/hashes.txt" in hashcat_cmd - - def test_custom_model_and_batch_size(self, main_module): - with ( - patch.object(main_module, "hcatBin", "hashcat"), - patch.object(main_module, "hcatTuning", "--force"), - patch.object(main_module, "hcatPotfilePath", ""), - patch.object(main_module, "hcatHashFile", "/tmp/hashes.txt", create=True), - patch.object( - main_module, "passgptModel", "javirandor/passgpt-10characters" - ), - patch.object(main_module, "passgptBatchSize", 1024), - patch("hate_crack.main.subprocess.Popen") as mock_popen, - ): - mock_gen_proc = MagicMock() - mock_gen_proc.stdout = MagicMock() - mock_hashcat_proc = MagicMock() - mock_hashcat_proc.wait.return_value = None - mock_gen_proc.wait.return_value = None - mock_popen.side_effect = [mock_gen_proc, mock_hashcat_proc] - - main_module.hcatPassGPT( - "1000", - "/tmp/hashes.txt", - 100000, - model_name="custom/model", - batch_size=512, - ) - - gen_cmd = mock_popen.call_args_list[0][0][0] - assert "custom/model" in gen_cmd - assert "512" in gen_cmd - - -class TestHcatPassGPTTrain: - def test_builds_correct_subprocess_command(self, main_module, tmp_path): - training_file = tmp_path / "wordlist.txt" - training_file.write_text("password123\nabc456\n") - - with ( - patch.object( - main_module, "passgptModel", "javirandor/passgpt-10characters" - ), - patch("hate_crack.main.subprocess.Popen") as mock_popen, - ): - mock_proc = MagicMock() - mock_proc.returncode = 0 - mock_proc.wait.return_value = None - mock_popen.return_value = mock_proc - - with patch.object( - main_module, - "_passgpt_model_dir", - return_value=str(tmp_path / "models"), - ): - result = main_module.hcatPassGPTTrain(str(training_file)) - - assert result is not None - assert mock_popen.call_count == 1 - cmd = mock_popen.call_args[0][0] - assert cmd[0] == sys.executable - assert "-m" in cmd - assert "hate_crack.passgpt_train" in cmd - assert "--training-file" in cmd - assert str(training_file) in cmd - assert "--base-model" in cmd - assert "javirandor/passgpt-10characters" in cmd - assert "--output-dir" in cmd - - def test_missing_training_file(self, main_module, capsys): - result = main_module.hcatPassGPTTrain("/nonexistent/wordlist.txt") - assert result is None - captured = capsys.readouterr() - assert "Training file not found" in captured.out - - def test_custom_base_model(self, main_module, tmp_path): - training_file = tmp_path / "wordlist.txt" - training_file.write_text("test\n") - - with patch("hate_crack.main.subprocess.Popen") as mock_popen: - mock_proc = MagicMock() - mock_proc.returncode = 0 - mock_proc.wait.return_value = None - mock_popen.return_value = mock_proc - - with patch.object( - main_module, - "_passgpt_model_dir", - return_value=str(tmp_path / "models"), - ): - main_module.hcatPassGPTTrain( - str(training_file), base_model="custom/base-model" - ) - - cmd = mock_popen.call_args[0][0] - assert "custom/base-model" in cmd - - def test_training_failure_returns_none(self, main_module, tmp_path): - training_file = tmp_path / "wordlist.txt" - training_file.write_text("test\n") - - with ( - patch.object( - main_module, "passgptModel", "javirandor/passgpt-10characters" - ), - patch("hate_crack.main.subprocess.Popen") as mock_popen, - ): - mock_proc = MagicMock() - mock_proc.returncode = 1 - mock_proc.wait.return_value = None - mock_popen.return_value = mock_proc - - with patch.object( - main_module, - "_passgpt_model_dir", - return_value=str(tmp_path / "models"), - ): - result = main_module.hcatPassGPTTrain(str(training_file)) - - assert result is None - - -class TestPassGPTModelDir: - def test_creates_directory(self, main_module, tmp_path): - target = str(tmp_path / "passgpt_models") - with patch("hate_crack.main.os.path.expanduser", return_value=str(tmp_path)): - result = main_module._passgpt_model_dir() - assert os.path.isdir(result) - assert result.endswith("passgpt") - - -class TestPassGPTAttackHandler: - def _make_ctx(self, model_dir=None): - ctx = MagicMock() - ctx.HAS_ML_DEPS = True - ctx.passgptMaxCandidates = 1000000 - ctx.passgptModel = "javirandor/passgpt-10characters" - ctx.passgptBatchSize = 1024 - ctx.hcatHashType = "1000" - ctx.hcatHashFile = "/tmp/hashes.txt" - ctx.hcatWordlists = "/tmp/wordlists" - if model_dir is None: - ctx._passgpt_model_dir.return_value = "/nonexistent/empty" - else: - ctx._passgpt_model_dir.return_value = model_dir - return ctx - - def test_select_default_model_and_generate(self): - ctx = self._make_ctx() - - # "1" selects default model, "" accepts default max candidates - inputs = iter(["1", ""]) - with ( - patch("builtins.input", side_effect=inputs), - patch("hate_crack.attacks.os.path.isdir", return_value=False), - ): - from hate_crack.attacks import passgpt_attack - - passgpt_attack(ctx) - - ctx.hcatPassGPT.assert_called_once_with( - "1000", - "/tmp/hashes.txt", - 1000000, - model_name="javirandor/passgpt-10characters", - batch_size=1024, - ) - - def test_select_local_model(self, tmp_path): - # Create a fake local model directory - model_dir = tmp_path / "passgpt" - local_model = model_dir / "my_model" - local_model.mkdir(parents=True) - (local_model / "config.json").write_text("{}") - - ctx = self._make_ctx(model_dir=str(model_dir)) - - # "2" selects the local model, "" accepts default max candidates - inputs = iter(["2", ""]) - with ( - patch("builtins.input", side_effect=inputs), - patch("hate_crack.attacks.os.path.isdir", return_value=True), - patch("hate_crack.attacks.os.listdir", return_value=["my_model"]), - patch("hate_crack.attacks.os.path.isfile", return_value=True), - patch( - "hate_crack.attacks.os.path.isdir", - side_effect=lambda p: True, - ), - ): - from hate_crack.attacks import passgpt_attack - - passgpt_attack(ctx) - - ctx.hcatPassGPT.assert_called_once() - call_kwargs = ctx.hcatPassGPT.call_args - # The model_name should be the local path - assert call_kwargs[1]["model_name"] == str(local_model) - - def test_train_new_model(self): - ctx = self._make_ctx() - ctx.select_file_with_autocomplete.return_value = "/tmp/wordlist.txt" - ctx.hcatPassGPTTrain.return_value = "/home/user/.hate_crack/passgpt/wordlist" - - # "T" for train, "" for default base model, "" for default device (auto-detected), "" for default max candidates - inputs = iter(["T", "", "", ""]) - with ( - patch("builtins.input", side_effect=inputs), - patch("hate_crack.attacks.os.path.isdir", return_value=False), - patch( - "hate_crack.passgpt_train._detect_device", return_value="cuda" - ), - ): - from hate_crack.attacks import passgpt_attack - - passgpt_attack(ctx) - - ctx.hcatPassGPTTrain.assert_called_once_with( - "/tmp/wordlist.txt", "javirandor/passgpt-10characters", device="cuda" - ) - ctx.hcatPassGPT.assert_called_once() - call_kwargs = ctx.hcatPassGPT.call_args - assert call_kwargs[1]["model_name"] == "/home/user/.hate_crack/passgpt/wordlist" - - def test_train_failure_aborts(self): - ctx = self._make_ctx() - ctx.select_file_with_autocomplete.return_value = "/tmp/wordlist.txt" - ctx.hcatPassGPTTrain.return_value = None - - # "T" for train, "" for default base model, "" for default device (auto-detected) - inputs = iter(["T", "", ""]) - with ( - patch("builtins.input", side_effect=inputs), - patch("hate_crack.attacks.os.path.isdir", return_value=False), - patch( - "hate_crack.passgpt_train._detect_device", return_value="cuda" - ), - ): - from hate_crack.attacks import passgpt_attack - - passgpt_attack(ctx) - - ctx.hcatPassGPTTrain.assert_called_once() - ctx.hcatPassGPT.assert_not_called() - - def test_ml_deps_missing(self, capsys): - ctx = MagicMock() - ctx.HAS_ML_DEPS = False - - from hate_crack.attacks import passgpt_attack - - passgpt_attack(ctx) - - captured = capsys.readouterr() - assert "ML dependencies" in captured.out - assert "uv pip install" in captured.out - ctx.hcatPassGPT.assert_not_called() - - def test_custom_max_candidates(self): - ctx = self._make_ctx() - - # "1" selects default model, "500000" for custom max candidates - inputs = iter(["1", "500000"]) - with ( - patch("builtins.input", side_effect=inputs), - patch("hate_crack.attacks.os.path.isdir", return_value=False), - ): - from hate_crack.attacks import passgpt_attack - - passgpt_attack(ctx) - - ctx.hcatPassGPT.assert_called_once_with( - "1000", - "/tmp/hashes.txt", - 500000, - model_name="javirandor/passgpt-10characters", - batch_size=1024, - ) - - -class TestGetAvailableMemoryMb: - def test_returns_int_or_none(self): - result = _get_available_memory_mb() - assert result is None or isinstance(result, int) - - def test_never_crashes_on_any_platform(self): - # Should not raise regardless of platform - _get_available_memory_mb() - - def test_returns_positive_when_detected(self): - result = _get_available_memory_mb() - if result is not None: - assert result > 0 - - -class TestCountLines: - def test_counts_non_empty_lines(self, tmp_path): - f = tmp_path / "test.txt" - f.write_text("line1\nline2\n\nline3\n") - assert _count_lines(str(f)) == 3 - - def test_empty_file(self, tmp_path): - f = tmp_path / "empty.txt" - f.write_text("") - assert _count_lines(str(f)) == 0 - - -class TestEstimateTrainingMemoryMb: - def test_returns_reasonable_estimate(self, tmp_path): - f = tmp_path / "words.txt" - f.write_text("password\n" * 1000) - estimate = _estimate_training_memory_mb(str(f)) - # Should include at least model + optimizer overhead (~1700MB) - assert estimate >= 1700 - - def test_max_lines_reduces_estimate(self, tmp_path): - f = tmp_path / "words.txt" - f.write_text("password\n" * 100000) - full = _estimate_training_memory_mb(str(f)) - limited = _estimate_training_memory_mb(str(f), max_lines=100) - assert limited <= full - - -class TestMemoryPrecheck: - def test_aborts_when_insufficient(self, tmp_path): - f = tmp_path / "words.txt" - f.write_text("password\n" * 10) - - with ( - patch("hate_crack.passgpt_train._get_available_memory_mb", return_value=1), - patch( - "hate_crack.passgpt_train._estimate_training_memory_mb", - return_value=5000, - ), - pytest.raises(SystemExit), - ): - from hate_crack.passgpt_train import train - - train( - training_file=str(f), - output_dir=str(tmp_path / "out"), - base_model="test", - epochs=1, - batch_size=1, - device="cpu", - ) - - @pytest.mark.skipif(not _has_transformers, reason="transformers not installed") - def test_skips_when_detection_fails(self, tmp_path): - """When memory detection returns None, training proceeds past the pre-check.""" - f = tmp_path / "words.txt" - f.write_text("password\n" * 10) - - mock_tokenizer = MagicMock() - mock_model = MagicMock() - mock_model.config.n_positions = 16 - mock_trainer = MagicMock() - - with ( - patch( - "hate_crack.passgpt_train._get_available_memory_mb", return_value=None - ), - patch( - "hate_crack.passgpt_train._estimate_training_memory_mb", - return_value=5000, - ), - patch("hate_crack.passgpt_train._configure_mps"), - patch( - "transformers.RobertaTokenizerFast.from_pretrained", - return_value=mock_tokenizer, - ), - patch( - "transformers.GPT2LMHeadModel.from_pretrained", - return_value=mock_model, - ), - patch("transformers.Trainer", return_value=mock_trainer), - patch("transformers.TrainingArguments"), - ): - from hate_crack.passgpt_train import train - - train( - training_file=str(f), - output_dir=str(tmp_path / "out"), - base_model="test", - epochs=1, - batch_size=1, - device="cpu", - ) - - mock_trainer.train.assert_called_once() - - -class TestMaxLines: - def test_count_lines_respects_limit(self, tmp_path): - f = tmp_path / "words.txt" - f.write_text("password\n" * 1000) - # _count_lines doesn't have a limit, but _estimate uses max_lines - total = _count_lines(str(f)) - assert total == 1000 - - def test_estimate_uses_max_lines(self, tmp_path): - f = tmp_path / "words.txt" - f.write_text("password\n" * 10000) - est_full = _estimate_training_memory_mb(str(f)) - est_limited = _estimate_training_memory_mb(str(f), max_lines=10) - assert est_limited <= est_full - - -class TestMemoryLimitAutoTune: - @pytest.mark.skipif(not _has_transformers, reason="transformers not installed") - def test_auto_tunes_max_lines(self, tmp_path, capsys): - f = tmp_path / "words.txt" - f.write_text("password\n" * 100) - - mock_tokenizer = MagicMock() - mock_model = MagicMock() - mock_model.config.n_positions = 16 - mock_trainer = MagicMock() - - with ( - patch( - "hate_crack.passgpt_train._get_available_memory_mb", return_value=None - ), - patch("hate_crack.passgpt_train._configure_mps"), - patch( - "transformers.RobertaTokenizerFast.from_pretrained", - return_value=mock_tokenizer, - ), - patch( - "transformers.GPT2LMHeadModel.from_pretrained", - return_value=mock_model, - ), - patch("transformers.Trainer", return_value=mock_trainer), - patch("transformers.TrainingArguments"), - ): - from hate_crack.passgpt_train import train - - train( - training_file=str(f), - output_dir=str(tmp_path / "out"), - base_model="test", - epochs=1, - batch_size=1, - device="cpu", - memory_limit=2000, - ) - - captured = capsys.readouterr() - assert "--memory-limit 2000MB: auto-set --max-lines" in captured.err - - def test_memory_limit_too_low_exits(self, tmp_path): - f = tmp_path / "words.txt" - f.write_text("password\n" * 10) - - with pytest.raises(SystemExit): - from hate_crack.passgpt_train import train - - train( - training_file=str(f), - output_dir=str(tmp_path / "out"), - base_model="test", - epochs=1, - batch_size=1, - device="cpu", - memory_limit=1, # 1MB - way too low - ) diff --git a/tests/test_ui_menu_options.py b/tests/test_ui_menu_options.py index db237b9..2df6ebd 100644 --- a/tests/test_ui_menu_options.py +++ b/tests/test_ui_menu_options.py @@ -27,16 +27,6 @@ MENU_OPTION_TEST_CASES = [ ("14", CLI_MODULE._attacks, "loopback_attack", "loopback"), ("15", CLI_MODULE._attacks, "ollama_attack", "ollama"), ("16", CLI_MODULE._attacks, "omen_attack", "omen"), - pytest.param( - "17", - CLI_MODULE._attacks, - "passgpt_attack", - "passgpt", - marks=pytest.mark.skipif( - not getattr(CLI_MODULE, "HAS_ML_DEPS", False), - reason="ML dependencies not installed", - ), - ), ("90", CLI_MODULE, "download_hashmob_rules", "hashmob-rules"), ("91", CLI_MODULE, "weakpass_wordlist_menu", "weakpass-menu"), ("92", CLI_MODULE, "download_hashmob_wordlists", "hashmob-wordlists"),