diff --git a/hate_crack/api.py b/hate_crack/api.py index b02798e..3f684f2 100644 --- a/hate_crack/api.py +++ b/hate_crack/api.py @@ -37,6 +37,133 @@ class _RateLimiter: _hashmob_limiter = _RateLimiter(rate=1, period=2.0) +class _Hashmob429(Exception): + """Raised inside a _with_hashmob_backoff callback to trigger a backoff retry.""" + + +def _stream_response_to_file( + r, + dest_path: str, + *, + label: str | None = None, + show_progress: bool = True, +) -> bool: + """Write an already-opened streaming response to dest_path atomically via a .part file.""" + temp_path = dest_path + ".part" + try: + total = 0 + try: + total = int(r.headers.get("content-length") or 0) + except Exception: + pass + downloaded = 0 + os.makedirs(os.path.dirname(os.path.abspath(dest_path)), exist_ok=True) + with open(temp_path, "wb") as f: + for chunk in r.iter_content(chunk_size=8192): + if chunk: + f.write(chunk) + downloaded += len(chunk) + if show_progress: + if total: + done = int(50 * downloaded / total) + percent = 100 * downloaded / total + bar = "=" * done + " " * (50 - done) + sys.stdout.write( + f"\r[{bar}] {percent:6.2f}% ({downloaded // 1024} KB/{total // 1024} KB)" + ) + else: + sys.stdout.write(f"\rDownloaded {downloaded // 1024} KB") + sys.stdout.flush() + if show_progress: + sys.stdout.write("\n") + os.replace(temp_path, dest_path) + print(f"Downloaded {dest_path}") + return True + except KeyboardInterrupt: + print("\nKeyboard interrupt: Cleaning up partial download...") + if os.path.exists(temp_path): + try: + os.remove(temp_path) + print(f"Removed partial file: {temp_path}") + except Exception as e: + print(f"Failed to remove partial file: {e}") + raise + except Exception as e: + print(f"Error during download: {e}") + if os.path.exists(temp_path): + try: + os.remove(temp_path) + except Exception: + pass + return False + + +def _streamed_download( + url: str, + dest_path: str, + *, + headers: dict | None = None, + label: str | None = None, + timeout: int = 120, + chunk_size: int = 8192, + show_progress: bool = True, + skip_existing: bool = False, + allow_redirects: bool = True, +) -> bool: + """Download url to dest_path atomically, with optional progress bar. + + Returns True on success, False on handled failure. + Re-raises KeyboardInterrupt after cleaning up the .part file. + """ + if skip_existing and os.path.isfile(dest_path) and os.path.getsize(dest_path) > 0: + name = label or os.path.basename(dest_path) + print(f"[i] Skipping {name} (already present)") + return True + try: + with requests.get( + url, + headers=headers or {}, + stream=True, + timeout=timeout, + allow_redirects=allow_redirects, + ) as r: + r.raise_for_status() + return _stream_response_to_file(r, dest_path, label=label, show_progress=show_progress) + except KeyboardInterrupt: + raise + except Exception as e: + print(f"Error downloading {label or url}: {e}") + return False + + +def _with_hashmob_backoff( + fn: Callable[[], bool], + *, + max_attempts: int = 6, + base_delay: int = 30, + step: int = 30, + max_delay: int = 300, +) -> bool: + """Call fn() with bounded 429 backoff retry logic. + + fn() should raise _Hashmob429 to signal a rate-limit response. + Non-429 exceptions are re-raised immediately. + Returns True on success, False after max_attempts consecutive 429s. + """ + penalty = base_delay + for attempt in range(max_attempts): + try: + result = fn() + return result + except _Hashmob429: + print(f"[!] Rate limit hit (429). Backing off for {penalty} seconds...") + time.sleep(penalty) + penalty = min(penalty + step, max_delay) + step *= 2 + print(f"[!] Hashmob rate limit: gave up after {max_attempts} attempts.") + return False + + def _get_hate_path(): _package_path = os.path.dirname(os.path.realpath(__file__)) _repo_root = os.path.dirname(_package_path) @@ -1019,7 +1146,6 @@ class HashviewAPI: def download_wordlist( self, wordlist_id, output_file=None, *, update_dynamic: bool = False ): - import sys import re if int(wordlist_id) == 1 and update_dynamic: @@ -1038,6 +1164,7 @@ class HashviewAPI: url = f"{self.base_url}/v1/wordlists/{wordlist_id}" resp = self.session.get(url, headers=self._auth_headers(), stream=True) resp.raise_for_status() + if output_file is None: if int(wordlist_id) == 1: output_file = "dynamic-all.txt.gz" @@ -1046,37 +1173,20 @@ class HashviewAPI: match = re.search( r"filename=\"?([^\";]+)\"?", content_disp, re.IGNORECASE ) - if match: - output_file = os.path.basename(match.group(1)) - else: - output_file = f"wordlist_{wordlist_id}.gz" + output_file = ( + os.path.basename(match.group(1)) if match else f"wordlist_{wordlist_id}.gz" + ) + if not os.path.isabs(output_file): dest_dir = get_hcat_wordlists_dir() output_file = os.path.join(dest_dir, output_file) os.makedirs(os.path.dirname(output_file), exist_ok=True) - total = int(resp.headers.get("content-length", 0)) - downloaded = 0 - chunk_size = 8192 - with open(output_file, "wb") as f: - for chunk in resp.iter_content(chunk_size=chunk_size): - if chunk: - f.write(chunk) - downloaded += len(chunk) - if total > 0: - done = int(50 * downloaded / total) - bar = "[" + "=" * done + " " * (50 - done) + "]" - percent = 100 * downloaded / total - sys.stdout.write( - f"\rDownloading: {bar} {percent:5.1f}% ({downloaded}/{total} bytes)" - ) - sys.stdout.flush() - if total > 0: - sys.stdout.write("\n") - if total == 0: - print(f"Downloaded {downloaded} bytes.") - - return {"output_file": output_file, "size": downloaded} + ok = _stream_response_to_file(resp, output_file, label=output_file) + resp.close() + if ok: + return {"output_file": output_file, "size": os.path.getsize(output_file)} + return {"output_file": output_file, "size": 0} def create_customer(self, name): url = f"{self.base_url}/v1/customers/add" @@ -1334,78 +1444,42 @@ def download_hashmob_wordlist_list(): def download_hashmob_wordlist(file_name, out_path): """Download a wordlist file from Hashmob by file name.""" + import re + url = f"https://hashmob.net/api/v2/downloads/research/wordlists/{file_name}" api_key = get_hashmob_api_key() headers = {"api-key": api_key} if api_key else {} - base_backoff = 30 - max_backoff = 300 - penalty_add = 30 - penalty = base_backoff - while True: - _hashmob_limiter.wait() - try: - with requests.get( - url, headers=headers, stream=True, timeout=60, allow_redirects=True - ) as r: - if r.status_code == 429: - print( - f"[!] Rate limit hit (429). Backing off for {penalty} seconds..." - ) - time.sleep(penalty) - penalty = min(penalty + penalty_add, max_backoff) - penalty_add *= 2 - continue - if r.status_code in (301, 302, 303, 307, 308): - redirect_url = r.headers.get("Location") - if redirect_url: - print(f"Following redirect to: {redirect_url}") - return download_hashmob_wordlist(redirect_url, out_path) - print("Redirect with no Location header!") - return False - r.raise_for_status() - content_type = r.headers.get("Content-Type", "") - if "text/plain" in content_type: - html = r.content.decode(errors="replace") - import re - match = re.search( - r"]+http-equiv=['\"]refresh['\"][^>]+content=['\"]0;url=([^'\"]+)['\"]", - html, - re.IGNORECASE, - ) - if match: - real_url = match.group(1) - print(f"Found meta refresh redirect to: {real_url}") - with requests.get(real_url, stream=True, timeout=120) as r2: - r2.raise_for_status() - with open(out_path, "wb") as f: - for chunk in r2.iter_content(chunk_size=8192): - if chunk: - f.write(chunk) - print(f"Downloaded {out_path}") - return True - print( - "Error: Received HTML instead of file. Possible permission or quota issue." - ) - return False - with open(out_path, "wb") as f: - for chunk in r.iter_content(chunk_size=8192): - if chunk: - f.write(chunk) - print(f"Downloaded {out_path}") - return True - except Exception as e: - if ( - hasattr(e, "response") - and getattr(e.response, "status_code", None) == 429 - ): - print(f"[!] Rate limit hit (429). Backing off for {penalty} seconds...") - time.sleep(penalty) - penalty = min(penalty + penalty_add, max_backoff) - penalty_add *= 2 - continue - print(f"Error downloading wordlist: {e}") - return False + def _attempt(): + _hashmob_limiter.wait() + with requests.get( + url, headers=headers, stream=True, timeout=60, allow_redirects=True + ) as r: + if r.status_code == 429: + raise _Hashmob429() + r.raise_for_status() + content_type = r.headers.get("Content-Type", "") + if "text/plain" in content_type: + html = r.content.decode(errors="replace") + match = re.search( + r"]+http-equiv=['\"]refresh['\"][^>]+content=['\"]0;url=([^'\"]+)['\"]", + html, + re.IGNORECASE, + ) + if match: + real_url = match.group(1) + print(f"Found meta refresh redirect to: {real_url}") + return _streamed_download(real_url, out_path, label=file_name) + print("Error: Received HTML instead of file. Possible permission or quota issue.") + return False + # Normal binary download — re-request now that we've confirmed the URL is good + return _streamed_download(url, out_path, headers=headers, label=file_name) + + try: + return _with_hashmob_backoff(_attempt) + except Exception as e: + print(f"Error downloading wordlist: {e}") + return False def download_hashmob_rule_list(): @@ -1502,79 +1576,42 @@ def download_hashmob_rule(file_name, out_path): "HashMob._100.rule": "https://www.hashmob.net/api/v2/downloads/research/rules/HashMob._100.rule", "HashMob._66.rule": "https://www.hashmob.net/api/v2/downloads/research/rules/HashMob._66.rule", } - url = hashmob_rule_urls.get(file_name) - if not url: + primary_url = hashmob_rule_urls.get(file_name) + if not primary_url: print( f"[i] Hashmob rule not in pinned URL list, using public prefix: {file_name}" ) - url = f"https://www.hashmob.net/api/v2/downloads/research/rules/{file_name}" + primary_url = f"https://www.hashmob.net/api/v2/downloads/research/rules/{file_name}" alt_url = f"https://hashmob.net/api/v2/downloads/research/official/hashmob_rules/{file_name}" api_key = get_hashmob_api_key() headers = {"api-key": api_key} if api_key else {} - base_backoff = 30 - max_backoff = 300 - penalty_add = 30 - penalty = base_backoff - while True: + + def _attempt(): _hashmob_limiter.wait() - try: - with requests.get( - url, headers=headers, stream=True, timeout=60, allow_redirects=True - ) as r: - if r.status_code == 429: - print( - f"[!] Rate limit hit (429). Backing off for {penalty} seconds..." - ) - time.sleep(penalty) - penalty = min(penalty + penalty_add, max_backoff) - penalty_add *= 2 - continue - if r.status_code == 404 and alt_url: - print( - f"[i] Hashmob rule not found at primary URL, trying fallback: {alt_url}" - ) - with requests.get( - alt_url, - headers=headers, - stream=True, - timeout=60, - allow_redirects=True, - ) as r_alt: - if r_alt.status_code == 429: - print( - f"[!] Rate limit hit (429). Backing off for {penalty} seconds..." - ) - time.sleep(penalty) - penalty = min(penalty + penalty_add, max_backoff) - penalty_add *= 2 - continue - r_alt.raise_for_status() - with open(out_path, "wb") as f: - for chunk in r_alt.iter_content(chunk_size=8192): - if chunk: - f.write(chunk) - print(f"Downloaded {out_path}") - return True - r.raise_for_status() - with open(out_path, "wb") as f: - for chunk in r.iter_content(chunk_size=8192): - if chunk: - f.write(chunk) - print(f"Downloaded {out_path}") - return True - except Exception as e: - # If it's a 429 error, handle backoff, else fail - if ( - hasattr(e, "response") - and getattr(e.response, "status_code", None) == 429 - ): - print(f"[!] Rate limit hit (429). Backing off for {penalty} seconds...") - time.sleep(penalty) - penalty = min(penalty + penalty_add, max_backoff) - penalty_add *= 2 - continue - print(f"Error downloading rule: {e}") - return False + with requests.get( + primary_url, headers=headers, stream=True, timeout=60, allow_redirects=True + ) as r: + if r.status_code == 429: + raise _Hashmob429() + if r.status_code == 404 and alt_url: + print( + f"[i] Hashmob rule not found at primary URL, trying fallback: {alt_url}" + ) + with requests.get( + alt_url, headers=headers, stream=True, timeout=60, allow_redirects=True + ) as r_alt: + if r_alt.status_code == 429: + raise _Hashmob429() + r_alt.raise_for_status() + return _streamed_download(alt_url, out_path, headers=headers, label=file_name) + r.raise_for_status() + return _streamed_download(primary_url, out_path, headers=headers, label=file_name) + + try: + return _with_hashmob_backoff(_attempt) + except Exception as e: + print(f"Error downloading rule: {e}") + return False def list_official_wordlists(): @@ -1802,63 +1839,19 @@ def list_and_download_hashmob_rules(rules_dir=None): def download_official_wordlist(file_name, out_path): """Download a file from the official wordlists directory with a progress bar.""" - import sys - url = f"https://hashmob.net/api/v2/downloads/research/official/{file_name}" - archive_path = None - try: - with requests.get(url, stream=True, timeout=120) as r: - r.raise_for_status() - try: - total = int(r.headers.get("content-length") or 0) - except Exception: - total = 0 - downloaded = 0 - chunk_size = 8192 - out_path = sanitize_filename(file_name) - dest_dir = get_hcat_wordlists_dir() - archive_path = ( - os.path.join(dest_dir, out_path) - if not os.path.isabs(out_path) - else out_path - ) - temp_path = archive_path + ".part" - os.makedirs(os.path.dirname(archive_path), exist_ok=True) - with open(temp_path, "wb") as f: - for chunk in r.iter_content(chunk_size=chunk_size): - if chunk: - f.write(chunk) - downloaded += len(chunk) - if total: - done = int(50 * downloaded / total) - percent = 100 * downloaded / total - bar = "=" * done + " " * (50 - done) - sys.stdout.write( - f"\r[{bar}] {percent:6.2f}% ({downloaded // 1024} KB/{total // 1024} KB)" - ) - sys.stdout.flush() - else: - sys.stdout.write(f"\rDownloaded {downloaded // 1024} KB") - sys.stdout.flush() - sys.stdout.write("\n") - os.replace(temp_path, archive_path) - print(f"Downloaded {archive_path}") - if archive_path.endswith(".7z"): - extract_with_7z(archive_path) - return True - except KeyboardInterrupt: - print("\nKeyboard interrupt: Cleaning up partial download...") - temp_path = f"{archive_path}.part" if archive_path else None - if temp_path and os.path.exists(temp_path): - try: - os.remove(temp_path) - print(f"Removed partial file: {temp_path}") - except Exception as e: - print(f"Failed to remove partial file: {e}") - return False - except Exception as e: - print(f"Error downloading official wordlist: {e}") - return False + out_path = sanitize_filename(file_name) + dest_dir = get_hcat_wordlists_dir() + archive_path = ( + os.path.join(dest_dir, out_path) + if not os.path.isabs(out_path) + else out_path + ) + os.makedirs(os.path.dirname(archive_path), exist_ok=True) + ok = _streamed_download(url, archive_path, label=file_name) + if ok and archive_path.endswith(".7z"): + extract_with_7z(archive_path) + return ok def extract_with_7z(archive_path, output_dir=None, remove_archive=True):