"""Tests for NTLM/NetNTLM hash preprocessing helpers (issues #27 and #28).""" import sys import importlib import pytest @pytest.fixture def main_module(monkeypatch): """Load hate_crack.main with SKIP_INIT to access helper functions.""" monkeypatch.setenv("HATE_CRACK_SKIP_INIT", "1") if "hate_crack.main" in sys.modules: mod = sys.modules["hate_crack.main"] importlib.reload(mod) return mod import hate_crack.main as mod return mod class TestCountComputerAccounts: """Issue #27 - count computer accounts (helper for detection).""" def test_counts_computer_accounts(self, tmp_path, main_module): hash_file = tmp_path / "hashes.txt" hash_file.write_text( "user1:1001:aad3b435b51404ee:hash1:::\n" "COMPUTER1$:1002:aad3b435b51404ee:hash2:::\n" "user2:1003:aad3b435b51404ee:hash3:::\n" "WORKSTATION$:1004:aad3b435b51404ee:hash4:::\n" ) assert main_module._count_computer_accounts(str(hash_file)) == 2 def test_no_computer_accounts(self, tmp_path, main_module): hash_file = tmp_path / "hashes.txt" hash_file.write_text( "user1:1001:aad3b435b51404ee:hash1:::\n" "user2:1002:aad3b435b51404ee:hash2:::\n" ) assert main_module._count_computer_accounts(str(hash_file)) == 0 def test_missing_file(self, tmp_path, main_module): assert main_module._count_computer_accounts(str(tmp_path / "nope.txt")) == 0 def test_empty_file(self, tmp_path, main_module): hash_file = tmp_path / "hashes.txt" hash_file.write_text("") assert main_module._count_computer_accounts(str(hash_file)) == 0 class TestFilterComputerAccounts: """Issue #27 - filter accounts ending with $.""" def test_removes_computer_accounts(self, tmp_path, main_module): hash_file = tmp_path / "hashes.txt" hash_file.write_text( "user1:1001:aad3b435b51404ee:hash1:::\n" "COMPUTER1$:1002:aad3b435b51404ee:hash2:::\n" "user2:1003:aad3b435b51404ee:hash3:::\n" "WORKSTATION$:1004:aad3b435b51404ee:hash4:::\n" ) output_file = tmp_path / "filtered.txt" removed = main_module._filter_computer_accounts( str(hash_file), str(output_file) ) assert removed == 2 lines = output_file.read_text().strip().split("\n") assert len(lines) == 2 assert all("$" not in line.split(":", 1)[0] for line in lines) def test_no_computer_accounts(self, tmp_path, main_module): hash_file = tmp_path / "hashes.txt" hash_file.write_text( "user1:1001:aad3b435b51404ee:hash1:::\n" "user2:1002:aad3b435b51404ee:hash2:::\n" ) output_file = tmp_path / "filtered.txt" removed = main_module._filter_computer_accounts( str(hash_file), str(output_file) ) assert removed == 0 lines = output_file.read_text().strip().split("\n") assert len(lines) == 2 def test_all_computer_accounts(self, tmp_path, main_module): hash_file = tmp_path / "hashes.txt" hash_file.write_text( "COMP1$:1001:aad3b435b51404ee:hash1:::\n" "COMP2$:1002:aad3b435b51404ee:hash2:::\n" ) output_file = tmp_path / "filtered.txt" removed = main_module._filter_computer_accounts( str(hash_file), str(output_file) ) assert removed == 2 content = output_file.read_text().strip() assert content == "" def test_missing_file(self, tmp_path, main_module): removed = main_module._filter_computer_accounts( str(tmp_path / "nonexistent.txt"), str(tmp_path / "output.txt"), ) assert removed == 0 def test_empty_file(self, tmp_path, main_module): hash_file = tmp_path / "hashes.txt" hash_file.write_text("") output_file = tmp_path / "filtered.txt" removed = main_module._filter_computer_accounts( str(hash_file), str(output_file) ) assert removed == 0 def test_malformed_lines(self, tmp_path, main_module): hash_file = tmp_path / "hashes.txt" hash_file.write_text( "user1:1001:aad3b435b51404ee:hash1:::\n" "malformed_line_without_dollar\n" "COMP$:1002:aad3b435b51404ee:hash2:::\n" ) output_file = tmp_path / "filtered.txt" removed = main_module._filter_computer_accounts( str(hash_file), str(output_file) ) assert removed == 1 lines = output_file.read_text().strip().split("\n") assert len(lines) == 2 def test_crlf_line_endings(self, tmp_path, main_module): """Test that CRLF (Windows) line endings are handled correctly.""" hash_file = tmp_path / "hashes.txt" hash_file.write_bytes( b"user1:1001:aad3b435b51404ee:hash1:::\r\n" b"COMPUTER1$:1002:aad3b435b51404ee:hash2:::\r\n" b"user2:1003:aad3b435b51404ee:hash3:::\r\n" ) output_file = tmp_path / "filtered.txt" removed = main_module._filter_computer_accounts( str(hash_file), str(output_file) ) assert removed == 1 lines = output_file.read_text().strip().split("\n") assert len(lines) == 2 # Verify no stray \r in output for line in lines: assert "\r" not in line class TestDedupNetntlmByUsername: """Issue #28 - deduplicate NetNTLM hashes by username.""" def test_removes_duplicates(self, tmp_path, main_module): hash_file = tmp_path / "netntlm.txt" hash_file.write_text( "user1::DOMAIN:challenge1:response1:blob1\n" "user2::DOMAIN:challenge2:response2:blob2\n" "user1::DOMAIN:challenge3:response3:blob3\n" "user3::DOMAIN:challenge4:response4:blob4\n" "user2::DOMAIN:challenge5:response5:blob5\n" ) output_file = tmp_path / "dedup.txt" total, duplicates = main_module._dedup_netntlm_by_username( str(hash_file), str(output_file) ) assert total == 5 assert duplicates == 2 lines = output_file.read_text().strip().split("\n") assert len(lines) == 3 # First occurrences should be kept assert "challenge1" in lines[0] assert "challenge2" in lines[1] assert "challenge4" in lines[2] def test_case_insensitive_dedup(self, tmp_path, main_module): hash_file = tmp_path / "netntlm.txt" hash_file.write_text( "User1::DOMAIN:challenge1:response1:blob1\n" "USER1::DOMAIN:challenge2:response2:blob2\n" "user1::DOMAIN:challenge3:response3:blob3\n" ) output_file = tmp_path / "dedup.txt" total, duplicates = main_module._dedup_netntlm_by_username( str(hash_file), str(output_file) ) assert total == 3 assert duplicates == 2 lines = output_file.read_text().strip().split("\n") assert len(lines) == 1 def test_no_duplicates(self, tmp_path, main_module): hash_file = tmp_path / "netntlm.txt" hash_file.write_text( "user1::DOMAIN:challenge1:response1:blob1\n" "user2::DOMAIN:challenge2:response2:blob2\n" ) output_file = tmp_path / "dedup.txt" total, duplicates = main_module._dedup_netntlm_by_username( str(hash_file), str(output_file) ) assert total == 2 assert duplicates == 0 # Output file should NOT be created when no duplicates exist assert not output_file.exists() def test_missing_file(self, tmp_path, main_module): total, duplicates = main_module._dedup_netntlm_by_username( str(tmp_path / "nonexistent.txt"), str(tmp_path / "output.txt"), ) assert total == 0 assert duplicates == 0 def test_empty_file(self, tmp_path, main_module): hash_file = tmp_path / "netntlm.txt" hash_file.write_text("") output_file = tmp_path / "dedup.txt" total, duplicates = main_module._dedup_netntlm_by_username( str(hash_file), str(output_file) ) assert total == 0 assert duplicates == 0 assert not output_file.exists() def test_malformed_lines(self, tmp_path, main_module): hash_file = tmp_path / "netntlm.txt" hash_file.write_text( "user1::DOMAIN:challenge1:response1:blob1\n" "malformed_line_without_colons\n" "user2::DOMAIN:challenge2:response2:blob2\n" ) output_file = tmp_path / "dedup.txt" total, duplicates = main_module._dedup_netntlm_by_username( str(hash_file), str(output_file) ) assert total == 3 assert duplicates == 0 assert not output_file.exists() def test_crlf_line_endings(self, tmp_path, main_module): """Test that CRLF (Windows) line endings are handled correctly.""" hash_file = tmp_path / "netntlm.txt" hash_file.write_bytes( b"user1::DOMAIN:challenge1:response1:blob1\r\n" b"user2::DOMAIN:challenge2:response2:blob2\r\n" b"user1::DOMAIN:challenge3:response3:blob3\r\n" ) output_file = tmp_path / "dedup.txt" total, duplicates = main_module._dedup_netntlm_by_username( str(hash_file), str(output_file) ) assert total == 3 assert duplicates == 1 lines = output_file.read_text().strip().split("\n") assert len(lines) == 2 # Verify no stray \r in output for line in lines: assert "\r" not in line class TestWriteFieldSortedUnique: """Test _write_field_sorted_unique helper for extracting hash fields.""" def test_extracts_nt_hashes_field_4(self, tmp_path, main_module): """Extract NT hashes (field 4) from pwdump format.""" hash_file = tmp_path / "pwdump.txt" hash_file.write_text( "user1:500:aad3b435b51404eeaad3b435b51404ee:31d6cfe0d16ae931b73c59d7e0c089c0:::\n" "user2:501:aad3b435b51404eeaad3b435b51404ee:8846f7eaee8fb117ad06bdd830b7586c:::\n" "user3:502:aad3b435b51404eeaad3b435b51404ee:31d6cfe0d16ae931b73c59d7e0c089c0:::\n" ) output_file = tmp_path / "nt.txt" result = main_module._write_field_sorted_unique( str(hash_file), str(output_file), 4, ":" ) assert result is True lines = output_file.read_text().strip().split("\n") assert len(lines) == 2 # Two unique NT hashes assert "31d6cfe0d16ae931b73c59d7e0c089c0" in lines assert "8846f7eaee8fb117ad06bdd830b7586c" in lines def test_extracts_lm_hashes_field_3(self, tmp_path, main_module): """Extract LM hashes (field 3) from pwdump format.""" hash_file = tmp_path / "pwdump.txt" hash_file.write_text( "user1:500:e52cac67419a9a224a3b108f3fa6cb6d:31d6cfe0d16ae931b73c59d7e0c089c0:::\n" "user2:501:aad3b435b51404eeaad3b435b51404ee:8846f7eaee8fb117ad06bdd830b7586c:::\n" "user3:502:e52cac67419a9a224a3b108f3fa6cb6d:abc123def456:::\n" ) output_file = tmp_path / "lm.txt" result = main_module._write_field_sorted_unique( str(hash_file), str(output_file), 3, ":" ) assert result is True lines = output_file.read_text().strip().split("\n") assert len(lines) == 2 # Two unique LM hashes assert "aad3b435b51404eeaad3b435b51404ee" in lines assert "e52cac67419a9a224a3b108f3fa6cb6d" in lines def test_sorts_and_deduplicates(self, tmp_path, main_module): """Verify sorting and deduplication.""" hash_file = tmp_path / "pwdump.txt" hash_file.write_text( "user1:500:lm1:zzz:::\n" "user2:501:lm2:aaa:::\n" "user3:502:lm3:mmm:::\n" "user4:503:lm4:aaa:::\n" # Duplicate NT hash ) output_file = tmp_path / "nt.txt" main_module._write_field_sorted_unique(str(hash_file), str(output_file), 4, ":") lines = output_file.read_text().strip().split("\n") assert len(lines) == 3 assert lines == ["aaa", "mmm", "zzz"] # Sorted alphabetically def test_handles_missing_fields(self, tmp_path, main_module): """Lines with fewer fields than requested should be skipped.""" hash_file = tmp_path / "pwdump.txt" hash_file.write_text( "user1:500:lm1:nt1:::\n" "malformed:500\n" # Only 2 fields "user2:501:lm2:nt2:::\n" ) output_file = tmp_path / "nt.txt" main_module._write_field_sorted_unique(str(hash_file), str(output_file), 4, ":") lines = output_file.read_text().strip().split("\n") assert len(lines) == 2 assert "nt1" in lines assert "nt2" in lines def test_missing_input_file(self, tmp_path, main_module): """Should return False when input file doesn't exist.""" result = main_module._write_field_sorted_unique( str(tmp_path / "nonexistent.txt"), str(tmp_path / "output.txt"), 4, ":" ) assert result is False def test_empty_file(self, tmp_path, main_module): """Empty input should create empty output.""" hash_file = tmp_path / "empty.txt" hash_file.write_text("") output_file = tmp_path / "out.txt" result = main_module._write_field_sorted_unique( str(hash_file), str(output_file), 4, ":" ) assert result is True assert output_file.read_text().strip() == "" class TestPwdumpFilterPipeline: """Full pipeline tests: filter -> extract NT/LM -> verify output.""" def test_full_pipeline_with_filtering(self, tmp_path, main_module): """Test complete flow: filter computer accounts, extract NT/LM hashes.""" # Step 1: Create pwdump file with mixed accounts pwdump_file = tmp_path / "dump.txt" pwdump_file.write_text( "Administrator:500:aad3b435b51404eeaad3b435b51404ee:31d6cfe0d16ae931b73c59d7e0c089c0:::\n" "Guest:501:aad3b435b51404eeaad3b435b51404ee:31d6cfe0d16ae931b73c59d7e0c089c0:::\n" "DESKTOP-ABC$:1001:aad3b435b51404eeaad3b435b51404ee:8846f7eaee8fb117ad06bdd830b7586c:::\n" "john:1002:e52cac67419a9a224a3b108f3fa6cb6d:5f4dcc3b5aa765d61d8327deb882cf99:::\n" "WORKSTATION$:1003:aad3b435b51404eeaad3b435b51404ee:deadbeefcafebabe1234567890abcdef:::\n" "alice:1004:aad3b435b51404eeaad3b435b51404ee:6cb75f652a9b52798eb6cf2201057c73:::\n" ) # Step 2: Count computer accounts count = main_module._count_computer_accounts(str(pwdump_file)) assert count == 2 # Step 3: Filter computer accounts filtered_file = tmp_path / "dump.txt.filtered" removed = main_module._filter_computer_accounts( str(pwdump_file), str(filtered_file) ) assert removed == 2 # Step 4: Verify filtered file preserves complete pwdump format filtered_lines = filtered_file.read_text().strip().split("\n") assert len(filtered_lines) == 4 for line in filtered_lines: # Each line should have pwdump format: user:uid:LM:NT::: parts = line.split(":") assert len(parts) == 7 # 7 fields total (6 colons) assert not parts[0].endswith("$") # No computer accounts # Step 5: Extract NT hashes from filtered file nt_file = tmp_path / "dump.txt.filtered.nt" result = main_module._write_field_sorted_unique( str(filtered_file), str(nt_file), 4, ":" ) assert result is True # Step 6: Verify NT hashes are correct and don't include computer account hashes nt_hashes = nt_file.read_text().strip().split("\n") assert len(nt_hashes) == 3 # Three unique NT hashes from non-computer accounts assert "31d6cfe0d16ae931b73c59d7e0c089c0" in nt_hashes # Admin/Guest empty hash assert "5f4dcc3b5aa765d61d8327deb882cf99" in nt_hashes # john's hash assert "6cb75f652a9b52798eb6cf2201057c73" in nt_hashes # alice's hash # Computer account hashes should NOT be present assert "8846f7eaee8fb117ad06bdd830b7586c" not in nt_hashes assert "deadbeefcafebabe1234567890abcdef" not in nt_hashes # Step 7: Extract LM hashes from filtered file lm_file = tmp_path / "dump.txt.filtered.lm" result = main_module._write_field_sorted_unique( str(filtered_file), str(lm_file), 3, ":" ) assert result is True # Step 8: Verify LM hashes are correct lm_hashes = lm_file.read_text().strip().split("\n") assert len(lm_hashes) == 2 # Two unique LM hashes assert ( "aad3b435b51404eeaad3b435b51404ee" in lm_hashes ) # Empty LM (Admin/Guest/alice) assert "e52cac67419a9a224a3b108f3fa6cb6d" in lm_hashes # john's LM def test_pipeline_without_filtering(self, tmp_path, main_module): """Test pipeline when no computer accounts exist.""" pwdump_file = tmp_path / "dump.txt" pwdump_file.write_text( "Administrator:500:aad3b435b51404eeaad3b435b51404ee:31d6cfe0d16ae931b73c59d7e0c089c0:::\n" "john:1002:e52cac67419a9a224a3b108f3fa6cb6d:5f4dcc3b5aa765d61d8327deb882cf99:::\n" "alice:1004:aad3b435b51404eeaad3b435b51404ee:6cb75f652a9b52798eb6cf2201057c73:::\n" ) # Count should be zero count = main_module._count_computer_accounts(str(pwdump_file)) assert count == 0 # Extract NT directly from original file (simulate no filtering step) nt_file = tmp_path / "dump.txt.nt" main_module._write_field_sorted_unique(str(pwdump_file), str(nt_file), 4, ":") nt_hashes = nt_file.read_text().strip().split("\n") assert len(nt_hashes) == 3 assert "31d6cfe0d16ae931b73c59d7e0c089c0" in nt_hashes assert "5f4dcc3b5aa765d61d8327deb882cf99" in nt_hashes assert "6cb75f652a9b52798eb6cf2201057c73" in nt_hashes def test_pipeline_with_realistic_hashes(self, tmp_path, main_module): """Test with realistic Active Directory pwdump data.""" pwdump_file = tmp_path / "ad_dump.txt" pwdump_file.write_text( # Real format examples "Administrator:500:aad3b435b51404eeaad3b435b51404ee:31d6cfe0d16ae931b73c59d7e0c089c0:::\n" "krbtgt:502:aad3b435b51404eeaad3b435b51404ee:d3c02561bba6ee4ad6cfd024ec8fda5d:::\n" "CORP-DC01$:1000:aad3b435b51404eeaad3b435b51404ee:a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6:::\n" "CORP-WKS01$:1001:aad3b435b51404eeaad3b435b51404ee:1234567890abcdef1234567890abcdef:::\n" "jdoe:1102:e52cac67419a9a224a3b108f3fa6cb6d:8846f7eaee8fb117ad06bdd830b7586c:::\n" "CORP-SRV01$:1003:aad3b435b51404eeaad3b435b51404ee:fedcba0987654321fedcba0987654321:::\n" "asmith:1103:aad3b435b51404eeaad3b435b51404ee:2b2ac52d43a3d5c5c5b5b5f5e5d5c5a5:::\n" ) # Count computer accounts (domain controllers, workstations, servers) count = main_module._count_computer_accounts(str(pwdump_file)) assert count == 3 # Filter them out filtered_file = tmp_path / "ad_dump.txt.filtered" removed = main_module._filter_computer_accounts( str(pwdump_file), str(filtered_file) ) assert removed == 3 # Extract NT hashes nt_file = tmp_path / "ad_dump.txt.filtered.nt" main_module._write_field_sorted_unique(str(filtered_file), str(nt_file), 4, ":") nt_hashes = nt_file.read_text().strip().split("\n") assert len(nt_hashes) == 4 # Four unique NT hashes from user accounts # Verify no computer account hashes leaked through assert "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6" not in nt_hashes assert "1234567890abcdef1234567890abcdef" not in nt_hashes assert "fedcba0987654321fedcba0987654321" not in nt_hashes def test_pipeline_with_bom(self, tmp_path, main_module): """Test that BOM character doesn't break filtering.""" pwdump_file = tmp_path / "bom_dump.txt" # Note: The main.py code strips BOM at line 3672 but _filter_computer_accounts # doesn't handle it. This tests if that causes issues. pwdump_file.write_text( "\ufeffAdministrator:500:aad3b435b51404eeaad3b435b51404ee:31d6cfe0d16ae931b73c59d7e0c089c0:::\n" "COMPUTER$:1001:aad3b435b51404eeaad3b435b51404ee:8846f7eaee8fb117ad06bdd830b7586c:::\n" "john:1002:e52cac67419a9a224a3b108f3fa6cb6d:5f4dcc3b5aa765d61d8327deb882cf99:::\n" ) filtered_file = tmp_path / "bom_dump.txt.filtered" removed = main_module._filter_computer_accounts( str(pwdump_file), str(filtered_file) ) assert removed == 1 filtered_lines = filtered_file.read_text().strip().split("\n") # BOM should be preserved in first line since filter doesn't strip it assert len(filtered_lines) == 2 # First username might have BOM attached first_username = filtered_lines[0].split(":", 1)[0] # BOM gets written through, so we check it's either with or without assert "Administrator" in first_username def test_pipeline_preserves_all_fields(self, tmp_path, main_module): """Verify filtered file maintains exact pwdump structure.""" pwdump_file = tmp_path / "structure.txt" pwdump_file.write_text( "user1:500:LM1:NT1:extra1:extra2:extra3\n" "COMP$:501:LM2:NT2:extra4:extra5:extra6\n" "user2:502:LM3:NT3:extra7:extra8:extra9\n" ) filtered_file = tmp_path / "structure.txt.filtered" main_module._filter_computer_accounts(str(pwdump_file), str(filtered_file)) filtered_lines = filtered_file.read_text().strip().split("\n") assert len(filtered_lines) == 2 # Each line should have exactly 7 fields for line in filtered_lines: assert line.count(":") == 6 parts = line.split(":") assert len(parts) == 7 def test_pipeline_empty_username_edge_case(self, tmp_path, main_module): """Test handling of lines with empty username field.""" pwdump_file = tmp_path / "empty_user.txt" pwdump_file.write_text( "user1:500:lm1:nt1:::\n" ":501:lm2:nt2:::\n" # Empty username "COMP$:502:lm3:nt3:::\n" "user2:503:lm4:nt4:::\n" ) count = main_module._count_computer_accounts(str(pwdump_file)) assert count == 1 # Only COMP$ should count filtered_file = tmp_path / "empty_user.txt.filtered" removed = main_module._filter_computer_accounts( str(pwdump_file), str(filtered_file) ) assert removed == 1 filtered_lines = filtered_file.read_text().strip().split("\n") assert len(filtered_lines) == 3 # Empty username line should be kept def test_pipeline_with_duplicate_hashes_across_accounts( self, tmp_path, main_module ): """Verify deduplication works correctly when multiple users share hashes.""" pwdump_file = tmp_path / "dup_hashes.txt" pwdump_file.write_text( "user1:500:aad3b435b51404eeaad3b435b51404ee:31d6cfe0d16ae931b73c59d7e0c089c0:::\n" "user2:501:aad3b435b51404eeaad3b435b51404ee:31d6cfe0d16ae931b73c59d7e0c089c0:::\n" # Same hash "COMP$:502:aad3b435b51404eeaad3b435b51404ee:8846f7eaee8fb117ad06bdd830b7586c:::\n" "user3:503:e52cac67419a9a224a3b108f3fa6cb6d:31d6cfe0d16ae931b73c59d7e0c089c0:::\n" # Same NT, different LM ) filtered_file = tmp_path / "dup_hashes.txt.filtered" main_module._filter_computer_accounts(str(pwdump_file), str(filtered_file)) nt_file = tmp_path / "dup_hashes.txt.filtered.nt" main_module._write_field_sorted_unique(str(filtered_file), str(nt_file), 4, ":") nt_hashes = nt_file.read_text().strip().split("\n") # Should have only 1 unique NT hash (31d6cfe0d16ae931b73c59d7e0c089c0) assert len(nt_hashes) == 1 assert nt_hashes[0] == "31d6cfe0d16ae931b73c59d7e0c089c0" lm_file = tmp_path / "dup_hashes.txt.filtered.lm" main_module._write_field_sorted_unique(str(filtered_file), str(lm_file), 3, ":") lm_hashes = lm_file.read_text().strip().split("\n") # Should have 2 unique LM hashes assert len(lm_hashes) == 2 assert "aad3b435b51404eeaad3b435b51404ee" in lm_hashes assert "e52cac67419a9a224a3b108f3fa6cb6d" in lm_hashes class TestE2EPreprocessingFlow: """End-to-end tests that simulate the actual main() preprocessing flow. These tests replicate the exact logic from main.py lines 3698-3747, exercising the full chain: format detection -> computer account filtering -> NT/LM hash extraction -> final hcatHashFile assignment. """ @staticmethod def _run_preprocessing(main_module, hash_file_path, input_responses): """Simulate the main() preprocessing block for hash type 1000. Replicates the exact flow from main.py: 1. Read first line, detect pwdump format 2. Count computer accounts, prompt to filter 3. Extract NT and LM hashes via _write_field_sorted_unique 4. Return the final hcatHashFile path and metadata Args: main_module: The hate_crack.main module hash_file_path: Path to the pwdump hash file input_responses: List of responses for input() calls (e.g., ["Y"] to accept filtering, ["N"] to decline) Returns: dict with keys: hcatHashFile, hcatHashFileOrig, pwdump_format, lmHashesFound, filtered_path, nt_file, lm_file """ import re input_iter = iter(input_responses) hcatHashFile = str(hash_file_path) hcatHashFileOrig = None pwdump_format = False lmHashesFound = False filtered_path = None # Read first line (same as main.py line 3702-3703) with open(hcatHashFile, "r") as f: hcatHashFileLine = f.readline().strip().lstrip("\ufeff") # Detect pwdump format (same regex as main.py line 3704) if re.search(r"[a-f0-9A-F]{32}:[a-f0-9A-F]{32}:::", hcatHashFileLine): pwdump_format = True # Count and optionally filter computer accounts computer_count = main_module._count_computer_accounts(hcatHashFile) if computer_count > 0: filter_choice = next(input_iter, "Y") if filter_choice.upper() == "Y": filtered_path = f"{hcatHashFile}.filtered" main_module._filter_computer_accounts(hcatHashFile, filtered_path) hcatHashFile = filtered_path # Extract NT hashes (field 4) - same as main.py line 3726 main_module._write_field_sorted_unique( hcatHashFile, f"{hcatHashFile}.nt", 4 ) # Extract LM hashes (field 3) - same as main.py line 3728 main_module._write_field_sorted_unique( hcatHashFile, f"{hcatHashFile}.lm", 3 ) # Check for LM hashes (same logic as main.py lines 3729-3735) lm_count = main_module.lineCount(hcatHashFile + ".lm") if ( lm_count == 1 and hcatHashFileLine.split(":")[2].lower() != "aad3b435b51404eeaad3b435b51404ee" ) or lm_count > 1: lmHashesFound = True # Decline LM brute force to keep test simple next(input_iter, "N") hcatHashFileOrig = hcatHashFile hcatHashFile = hcatHashFile + ".nt" return { "hcatHashFile": hcatHashFile, "hcatHashFileOrig": hcatHashFileOrig, "pwdump_format": pwdump_format, "lmHashesFound": lmHashesFound, "filtered_path": filtered_path, } def test_e2e_filter_computers_and_extract_nt(self, tmp_path, main_module): """Full flow: secretsdump.py output -> filter computers -> extract NT hashes.""" pwdump = tmp_path / "secretsdump.txt" pwdump.write_text( "Administrator:500:aad3b435b51404eeaad3b435b51404ee:31d6cfe0d16ae931b73c59d7e0c089c0:::\n" "Guest:501:aad3b435b51404eeaad3b435b51404ee:31d6cfe0d16ae931b73c59d7e0c089c0:::\n" "CORP-DC01$:1001:aad3b435b51404eeaad3b435b51404ee:8846f7eaee8fb117ad06bdd830b7586c:::\n" "john.doe:1002:e52cac67419a9a224a3b108f3fa6cb6d:5f4dcc3b5aa765d61d8327deb882cf99:::\n" "CORP-WKS01$:1003:aad3b435b51404eeaad3b435b51404ee:deadbeefcafebabe1234567890abcdef:::\n" "jane.smith:1004:aad3b435b51404eeaad3b435b51404ee:6cb75f652a9b52798eb6cf2201057c73:::\n" "CORP-SRV01$:1005:aad3b435b51404eeaad3b435b51404ee:1234567890abcdef1234567890abcdef:::\n" ) result = self._run_preprocessing(main_module, pwdump, ["Y", "N"]) # Verify pwdump detected assert result["pwdump_format"] is True # Verify filtering happened assert result["filtered_path"] is not None filtered = open(result["filtered_path"]).read() filtered_lines = filtered.strip().split("\n") assert len(filtered_lines) == 4, ( f"Expected 4 non-computer lines, got {len(filtered_lines)}" ) for line in filtered_lines: username = line.split(":")[0] assert not username.endswith("$"), ( f"Computer account leaked through: {username}" ) # Verify final hcatHashFile points to .nt file assert result["hcatHashFile"].endswith(".nt") # Verify NT hashes are correct (no computer account hashes) nt_content = open(result["hcatHashFile"]).read() nt_hashes = nt_content.strip().split("\n") assert "31d6cfe0d16ae931b73c59d7e0c089c0" in nt_hashes # Admin/Guest assert "5f4dcc3b5aa765d61d8327deb882cf99" in nt_hashes # john.doe assert "6cb75f652a9b52798eb6cf2201057c73" in nt_hashes # jane.smith # Computer hashes must NOT be present assert "8846f7eaee8fb117ad06bdd830b7586c" not in nt_hashes assert "deadbeefcafebabe1234567890abcdef" not in nt_hashes assert "1234567890abcdef1234567890abcdef" not in nt_hashes def test_e2e_decline_filter(self, tmp_path, main_module): """Full flow when user declines filtering - all hashes including computers.""" pwdump = tmp_path / "dump.txt" pwdump.write_text( "admin:500:aad3b435b51404eeaad3b435b51404ee:31d6cfe0d16ae931b73c59d7e0c089c0:::\n" "COMP$:1001:aad3b435b51404eeaad3b435b51404ee:8846f7eaee8fb117ad06bdd830b7586c:::\n" ) result = self._run_preprocessing(main_module, pwdump, ["N", "N"]) assert result["pwdump_format"] is True assert result["filtered_path"] is None # No filtering # NT file should contain BOTH hashes (computer included) nt_hashes = open(result["hcatHashFile"]).read().strip().split("\n") assert "31d6cfe0d16ae931b73c59d7e0c089c0" in nt_hashes assert "8846f7eaee8fb117ad06bdd830b7586c" in nt_hashes def test_e2e_no_computers_in_dump(self, tmp_path, main_module): """Full flow with no computer accounts - no prompt shown.""" pwdump = tmp_path / "clean.txt" pwdump.write_text( "admin:500:aad3b435b51404eeaad3b435b51404ee:31d6cfe0d16ae931b73c59d7e0c089c0:::\n" "john:501:e52cac67419a9a224a3b108f3fa6cb6d:5f4dcc3b5aa765d61d8327deb882cf99:::\n" ) # No input responses needed since no computer accounts -> no prompt result = self._run_preprocessing(main_module, pwdump, ["N"]) assert result["pwdump_format"] is True assert result["filtered_path"] is None nt_hashes = open(result["hcatHashFile"]).read().strip().split("\n") assert len(nt_hashes) == 2 def test_e2e_all_computers(self, tmp_path, main_module): """Full flow where ALL accounts are computer accounts.""" pwdump = tmp_path / "computers_only.txt" pwdump.write_text( "DC01$:500:aad3b435b51404eeaad3b435b51404ee:31d6cfe0d16ae931b73c59d7e0c089c0:::\n" "WKS01$:501:aad3b435b51404eeaad3b435b51404ee:8846f7eaee8fb117ad06bdd830b7586c:::\n" ) result = self._run_preprocessing(main_module, pwdump, ["Y", "N"]) assert result["filtered_path"] is not None filtered = open(result["filtered_path"]).read() assert filtered.strip() == "" # All lines removed # NT file should be empty too nt_content = open(result["hcatHashFile"]).read() assert nt_content.strip() == "" def test_e2e_lm_hashes_detected(self, tmp_path, main_module): """Full flow with non-empty LM hashes triggers LM detection.""" pwdump = tmp_path / "lm_hashes.txt" pwdump.write_text( "admin:500:e52cac67419a9a224a3b108f3fa6cb6d:31d6cfe0d16ae931b73c59d7e0c089c0:::\n" "COMP$:501:aad3b435b51404eeaad3b435b51404ee:8846f7eaee8fb117ad06bdd830b7586c:::\n" "john:502:a4f49c406510bdca00000000000000000:5f4dcc3b5aa765d61d8327deb882cf99:::\n" ) result = self._run_preprocessing(main_module, pwdump, ["Y", "N"]) assert result["lmHashesFound"] is True # LM file should only have non-computer LM hashes lm_path = result["hcatHashFileOrig"] + ".lm" lm_hashes = open(lm_path).read().strip().split("\n") # Computer LM hash should not be present for lm in lm_hashes: # These are from the filtered file (no COMP$) assert lm in [ "a4f49c406510bdca00000000000000000", "e52cac67419a9a224a3b108f3fa6cb6d", ] def test_e2e_domain_prefix_computers(self, tmp_path, main_module): """Full flow with domain\\computer$ format from secretsdump.""" pwdump = tmp_path / "domain_dump.txt" pwdump.write_text( "CORP\\Administrator:500:aad3b435b51404eeaad3b435b51404ee:31d6cfe0d16ae931b73c59d7e0c089c0:::\n" "CORP\\DESKTOP-PC$:1001:aad3b435b51404eeaad3b435b51404ee:8846f7eaee8fb117ad06bdd830b7586c:::\n" "CORP\\john.doe:1002:aad3b435b51404eeaad3b435b51404ee:5f4dcc3b5aa765d61d8327deb882cf99:::\n" ) result = self._run_preprocessing(main_module, pwdump, ["Y", "N"]) # Verify format detection still works (regex matches the LM:NT::: part) assert result["pwdump_format"] is True # The username field is "CORP\DESKTOP-PC$" - split on ":" gets that # But wait: "CORP\DESKTOP-PC$" doesn't end with $ in the first :-delimited field? # Actually it does: split(":", 1)[0] = "CORP\\DESKTOP-PC$" which ends with "$" assert result["filtered_path"] is not None filtered_lines = open(result["filtered_path"]).read().strip().split("\n") assert len(filtered_lines) == 2 for line in filtered_lines: username = line.split(":")[0] assert not username.endswith("$") class TestE2ENetNTLMPreprocessingFlow: """End-to-end tests that simulate the NetNTLM preprocessing flow. These tests replicate the exact logic from main.py for hash types 5500/5600: computer account filtering -> deduplication by username. """ @staticmethod def _run_netntlm_preprocessing(main_module, hash_file_path, input_responses): """Simulate the main() preprocessing block for NetNTLM hash types. Replicates the flow from main.py: 1. Count computer accounts, prompt to filter 2. Count duplicates by username, prompt to dedup 3. Return the final hcatHashFile path and metadata Args: main_module: The hate_crack.main module hash_file_path: Path to the NetNTLM hash file input_responses: List of responses for input() calls Returns: dict with keys: hcatHashFile, filtered, deduped, filtered_path, dedup_path """ input_iter = iter(input_responses) hcatHashFile = str(hash_file_path) filtered = False deduped = False filtered_path = None dedup_path = None # Step 1: Computer account filtering computer_count = main_module._count_computer_accounts(hcatHashFile) if computer_count > 0: filter_choice = next(input_iter, "Y") if filter_choice.upper() == "Y": filtered_path = f"{hcatHashFile}.filtered" main_module._filter_computer_accounts(hcatHashFile, filtered_path) hcatHashFile = filtered_path filtered = True # Step 2: Deduplication by username dedup_path_candidate = hcatHashFile + ".dedup" total, duplicates = main_module._dedup_netntlm_by_username( hcatHashFile, dedup_path_candidate ) if duplicates > 0: dedup_choice = next(input_iter, "Y") if dedup_choice.upper() == "Y": hcatHashFile = dedup_path_candidate dedup_path = dedup_path_candidate deduped = True return { "hcatHashFile": hcatHashFile, "filtered": filtered, "deduped": deduped, "filtered_path": filtered_path, "dedup_path": dedup_path, } def test_filter_and_dedup(self, tmp_path, main_module): """Accept both filtering and dedup - mixed users + computers with duplicates.""" hash_file = tmp_path / "netntlm.txt" hash_file.write_text( "user1::DOMAIN:chal1:resp1:blob1\n" "DC01$::DOMAIN:chal2:resp2:blob2\n" "user2::DOMAIN:chal3:resp3:blob3\n" "user1::DOMAIN:chal4:resp4:blob4\n" "FILESERV01$::DOMAIN:chal5:resp5:blob5\n" "user3::DOMAIN:chal6:resp6:blob6\n" ) result = self._run_netntlm_preprocessing(main_module, hash_file, ["Y", "Y"]) assert result["filtered"] is True assert result["deduped"] is True # Final file should have 3 unique non-computer users lines = open(result["hcatHashFile"]).read().strip().split("\n") assert len(lines) == 3 usernames = [line.split(":")[0] for line in lines] assert "DC01$" not in usernames assert "FILESERV01$" not in usernames # user1 should appear only once (deduped) assert usernames.count("user1") == 1 def test_filter_only_decline_dedup(self, tmp_path, main_module): """Accept filtering, decline dedup - computers removed but duplicates kept.""" hash_file = tmp_path / "netntlm.txt" hash_file.write_text( "user1::DOMAIN:chal1:resp1:blob1\n" "DC01$::DOMAIN:chal2:resp2:blob2\n" "user1::DOMAIN:chal3:resp3:blob3\n" ) result = self._run_netntlm_preprocessing(main_module, hash_file, ["Y", "N"]) assert result["filtered"] is True assert result["deduped"] is False # Should have 2 lines (both user1 entries, computer removed) lines = open(result["hcatHashFile"]).read().strip().split("\n") assert len(lines) == 2 for line in lines: assert not line.split(":")[0].endswith("$") def test_decline_filter_accept_dedup(self, tmp_path, main_module): """Decline filtering, accept dedup - computers kept but duplicates removed.""" hash_file = tmp_path / "netntlm.txt" hash_file.write_text( "user1::DOMAIN:chal1:resp1:blob1\n" "DC01$::DOMAIN:chal2:resp2:blob2\n" "user1::DOMAIN:chal3:resp3:blob3\n" "DC01$::DOMAIN:chal4:resp4:blob4\n" ) result = self._run_netntlm_preprocessing(main_module, hash_file, ["N", "Y"]) assert result["filtered"] is False assert result["deduped"] is True # Should have 2 unique usernames (user1 and DC01$) lines = open(result["hcatHashFile"]).read().strip().split("\n") assert len(lines) == 2 usernames = [line.split(":")[0] for line in lines] assert "user1" in usernames assert "DC01$" in usernames def test_no_computers(self, tmp_path, main_module): """No computer accounts - no filter prompt, only dedup prompt.""" hash_file = tmp_path / "netntlm.txt" hash_file.write_text( "user1::DOMAIN:chal1:resp1:blob1\n" "user2::DOMAIN:chal2:resp2:blob2\n" "user1::DOMAIN:chal3:resp3:blob3\n" ) # Only one input needed (for dedup), no filter prompt result = self._run_netntlm_preprocessing(main_module, hash_file, ["Y"]) assert result["filtered"] is False assert result["deduped"] is True lines = open(result["hcatHashFile"]).read().strip().split("\n") assert len(lines) == 2 def test_all_computers(self, tmp_path, main_module): """All accounts are computers - everything filtered, dedup gets empty file.""" hash_file = tmp_path / "netntlm.txt" hash_file.write_text( "DC01$::DOMAIN:chal1:resp1:blob1\n" "FILESERV01$::DOMAIN:chal2:resp2:blob2\n" "WORKSTATION01$::DOMAIN:chal3:resp3:blob3\n" ) result = self._run_netntlm_preprocessing(main_module, hash_file, ["Y"]) assert result["filtered"] is True # Dedup should find 0 duplicates on empty file, so no dedup prompt assert result["deduped"] is False content = open(result["hcatHashFile"]).read().strip() assert content == "" def test_domain_prefix(self, tmp_path, main_module): """CORP\\DC01$::DOMAIN:... format - domain prefix with computer account.""" hash_file = tmp_path / "netntlm.txt" hash_file.write_text( "CORP\\user1::DOMAIN:chal1:resp1:blob1\n" "CORP\\DC01$::DOMAIN:chal2:resp2:blob2\n" "CORP\\user2::DOMAIN:chal3:resp3:blob3\n" ) result = self._run_netntlm_preprocessing(main_module, hash_file, ["Y"]) assert result["filtered"] is True # No duplicates, so no dedup prompt assert result["deduped"] is False lines = open(result["hcatHashFile"]).read().strip().split("\n") assert len(lines) == 2 for line in lines: username = line.split(":")[0] assert not username.endswith("$")