Merge pull request #77 from trustedsec/bug/debug-flag-show-http-requests

fix: set HF_HUB_DISABLE_TELEMETRY before HuggingFace imports
2026-06-29 09:48:50 -07:00 · 2026-02-18 15:36:24 -05:00
parent c67a2f6800 893533c200
commit 00a60af9a6
5 changed files with 72 additions and 4 deletions
@@ -17,25 +17,52 @@ jobs:
        with:
          fetch-depth: 0

-      - name: Bump patch version
+      - name: Determine version bump type
+        id: bump-type
+        run: |
+          BRANCH="${{ github.head_ref }}"
+          TITLE="${{ github.event.pull_request.title }}"
+          # Feature branches (feat/) bump minor, everything else bumps patch
+          if echo "$BRANCH" | grep -qiE '^feat/'; then
+            echo "type=minor" >> "$GITHUB_OUTPUT"
+          elif echo "$TITLE" | grep -qiE '^feat'; then
+            echo "type=minor" >> "$GITHUB_OUTPUT"
+          else
+            echo "type=patch" >> "$GITHUB_OUTPUT"
+          fi
+          echo "Bump type: $(grep type "$GITHUB_OUTPUT" | cut -d= -f2) (branch: $BRANCH)"
+
+      - name: Bump version
        run: |
          git config user.name "github-actions[bot]"
          git config user.email "github-actions[bot]@users.noreply.github.com"

+          BUMP_TYPE="${{ steps.bump-type.outputs.type }}"
          latest=$(git tag --sort=-v:refname | grep -E '^v[0-9]+\.[0-9]+' | head -1)
          if [ -z "$latest" ]; then
            echo "No version tag found, starting at v0.0.1"
            next="v0.0.1"
          else
-            # Strip leading v
            version="${latest#v}"
            major=$(echo "$version" | cut -d. -f1)
            minor=$(echo "$version" | cut -d. -f2)
            patch=$(echo "$version" | cut -d. -f3)
            patch=${patch:-0}
-            next="v${major}.${minor}.$((patch + 1))"
+
+            if [ "$BUMP_TYPE" = "minor" ]; then
+              next="v${major}.$((minor + 1)).0"
+            else
+              next="v${major}.${minor}.$((patch + 1))"
+            fi
          fi

-          echo "Tagging $next (previous: ${latest:-none})"
+          echo "Tagging $next (previous: ${latest:-none}, bump: $BUMP_TYPE)"
          git tag -a "$next" -m "Release $next"
          git push origin "$next"
+
+      - name: Create GitHub Release
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: |
+          latest=$(git tag --sort=-v:refname | grep -E '^v[0-9]+\.[0-9]+' | head -1)
+          gh release create "$latest" --title "$latest" --notes "Release $latest" --latest
@@ -30,3 +30,9 @@ def setup_logging(logger: logging.Logger, hate_path: str, debug_mode: bool) -> N
            logging.Formatter("%(asctime)s %(levelname)s %(message)s")
        )
        logger.addHandler(stream_handler)
+    # Show HTTP requests made by the requests/urllib3 library.
+    debug_handler = logging.StreamHandler(sys.stderr)
+    debug_handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(name)s %(message)s"))
+    urllib3_logger = logging.getLogger("urllib3")
+    urllib3_logger.setLevel(logging.DEBUG)
+    urllib3_logger.addHandler(debug_handler)
@@ -37,6 +37,9 @@ try:
 except Exception:
    pass

+# Disable HuggingFace telemetry before any HF-related imports
+os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
+
 HAS_ML_DEPS = False
 try:
    import torch  # noqa: F401
@@ -2323,6 +2326,8 @@ def hcatPassGPTTrain(training_file, base_model=None, device=None):
    ]
    if device:
        cmd.extend(["--device", device])
+    if debug_mode:
+        cmd.append("--debug")
    print(f"[*] Running: {_format_cmd(cmd)}")
    proc = subprocess.Popen(cmd)
    try:
@@ -2363,6 +2368,8 @@ def hcatPassGPT(
        "--batch-size",
        str(batch_size),
    ]
+    if debug_mode:
+        gen_cmd.append("--debug")
    hashcat_cmd = [
        hcatBin,
        "-m",
@@ -42,11 +42,19 @@ def generate(
    batch_size: int,
    max_length: int,
    device: str | None,
+    debug: bool = False,
 ) -> None:
    # If MPS is requested (or will be auto-detected), set memory limit before importing torch
    if device == "mps" or device is None:
        _configure_mps()

+    if debug:
+        import logging
+
+        logging.basicConfig(level=logging.DEBUG, stream=sys.stderr)
+        logging.getLogger("urllib3").setLevel(logging.DEBUG)
+        logging.getLogger("huggingface_hub").setLevel(logging.DEBUG)
+
    import torch
    from transformers import GPT2LMHeadModel  # type: ignore[attr-defined]
    from transformers import RobertaTokenizerFast  # type: ignore[attr-defined]
@@ -142,6 +150,11 @@ def main() -> None:
        default=None,
        help="Device: cuda, mps, or cpu (default: auto-detect)",
    )
+    parser.add_argument(
+        "--debug",
+        action="store_true",
+        help="Enable debug logging for HTTP requests",
+    )
    args = parser.parse_args()
    generate(
        num=args.num,
@@ -149,6 +162,7 @@ def main() -> None:
        batch_size=args.batch_size,
        max_length=args.max_length,
        device=args.device,
+        debug=args.debug,
    )


@@ -113,6 +113,7 @@ def train(
    device: str | None,
    max_lines: int = 0,
    memory_limit: int = 0,
+    debug: bool = False,
 ) -> None:
    # --- Memory pre-check ---
    if memory_limit > 0:
@@ -135,6 +136,13 @@ def train(
            )
            sys.exit(1)

+    if debug:
+        import logging
+
+        logging.basicConfig(level=logging.DEBUG, stream=sys.stderr)
+        logging.getLogger("urllib3").setLevel(logging.DEBUG)
+        logging.getLogger("huggingface_hub").setLevel(logging.DEBUG)
+
    estimated = _estimate_training_memory_mb(training_file, max_lines=max_lines)
    available = _get_available_memory_mb()
    if available is not None and estimated > available:
@@ -312,6 +320,11 @@ def main() -> None:
        default=0,
        help="Memory cap in MB; auto-tunes --max-lines to fit (default: 0, no limit)",
    )
+    parser.add_argument(
+        "--debug",
+        action="store_true",
+        help="Enable debug logging for HTTP requests",
+    )
    args = parser.parse_args()
    train(
        training_file=args.training_file,
@@ -322,6 +335,7 @@ def main() -> None:
        device=args.device,
        max_lines=args.max_lines,
        memory_limit=args.memory_limit,
+        debug=args.debug,
    )