Merge pull request #120 from vmfunc/feat/day2-batch1

feat: dirlist overhaul, scan bug sweep, live-host probe + sarif/markdown export, modules tests
2026-06-12 11:01:24 -07:00 · 2026-06-10 15:13:42 -07:00
parent 306f9a864d c3a755f934
commit 5166b8d8e6
26 changed files with 3138 additions and 126 deletions
@@ -89,6 +89,7 @@ linters:
          - errcheck
          - noctx
          - gosec  # fake credentials in secret-scanner fixtures are not real keys
+          - bodyclose  # synthetic *http.Response fixtures carry no socket to close

 issues:
  max-issues-per-linter: 50
@@ -157,6 +157,14 @@ sif has a modular architecture. modules are defined in yaml and can be extended
 | flag | description |
 |------|-------------|
 | `-dirlist` | directory and file fuzzing (small/medium/large) |
+| `-mc` | dirlist: match these status codes (comma list, e.g. 200,301) |
+| `-fc` | dirlist: filter out these status codes (comma list) |
+| `-fs` | dirlist: filter out responses of these body sizes (comma list) |
+| `-fw` | dirlist: filter out responses with these word counts (comma list) |
+| `-fr` | dirlist: filter out responses whose body matches this regex |
+| `-ac` | dirlist: auto-calibrate the soft-404 wildcard baseline |
+| `-w` | dirlist: custom wordlist (local file or url; overrides `-dirlist` size) |
+| `-e` | dirlist: extensions appended to each word (comma list, e.g. php,bak,env) |
 | `-dnslist` | subdomain enumeration (small/medium/large) |
 | `-ports` | port scanning (common/full) |
 | `-nuclei` | vulnerability scanning with nuclei templates |
@@ -180,6 +188,7 @@ sif has a modular architecture. modules are defined in yaml and can be extended
 | `-crawl` | web crawler (spider same-host links/scripts/forms) |
 | `-crawl-depth` | max crawl recursion depth (default 2) |
 | `-passive` | passive subdomain/url discovery (zero traffic to target) |
+| `-probe` | live-host probe (status, title, server, redirect chain) |

 ### http options

@@ -199,6 +208,22 @@ these apply to every outbound request across all scanners:

 a scanner that sets a header explicitly (e.g. an api key) always wins over the global default.

+### report export
+
+write the run's findings out to a file for ci/cd or triage:
+
+| flag | description |
+|------|-------------|
+| `-sarif` | write a sarif 2.1.0 report to this file |
+| `-markdown`, `-md` | write a markdown report to this file |
+
+```bash
+# scan and emit both a sarif and markdown report
+./sif -u https://example.com -headers -cors -sarif out.sarif -md out.md
+```
+
+sarif output is ingestable by github code scanning; markdown is a readable per-target summary.
+
 ### yaml modules

 list available modules:
@@ -33,6 +33,42 @@ sizes: `small`, `medium`, `large`
 ./sif -u https://example.com -dirlist medium
 ```

+#### response filters
+
+modern apps serve a catch-all 200 for unknown routes, so a naive scan reports
+every path. these ffuf-style filters cut the noise (a filter always wins over a
+match):
+
+- `-mc <codes>` - match only these status codes (comma list, e.g. `200,301`)
+- `-fc <codes>` - filter out these status codes
+- `-fs <sizes>` - filter out responses of these body sizes
+- `-fw <counts>` - filter out responses with these word counts
+- `-fr <regex>` - filter out responses whose body matches this regex
+
+```bash
+./sif -u https://example.com -dirlist medium -mc 200,301 -fs 1234
+```
+
+#### wildcard calibration
+
+`-ac` probes a few paths that cannot exist, learns the soft-404 baseline
+(status + size + words), and auto-drops any response matching it - so SPA
+catch-all 200s stop flooding the output:
+
+```bash
+./sif -u https://example.com -dirlist medium -ac
+```
+
+#### custom wordlists and extensions
+
+`-w <path|url>` overrides the size switch with your own list (local file or
+remote url); `-e <exts>` appends each extension to every word, keeping the bare
+word too:
+
+```bash
+./sif -u https://example.com -w /path/to/words.txt -e php,bak,env
+```
+
 ### subdomain enumeration

 `-dnslist <size>` - enumerate subdomains
@@ -206,6 +242,14 @@ keyless and zero traffic to the target itself - all lookups hit third-party feed
 ./sif -u https://example.com -passive
 ```

+### live-host probe
+
+`-probe` - check whether the target is alive and report its final status, page title, server header, content-length and the redirect chain it walked
+
+```bash
+./sif -u https://example.com -probe
+```
+
 ### whois lookup

 `-whois` - perform whois lookups
@@ -327,6 +371,26 @@ cap outbound requests per second (0 = unlimited, default 0):
 ./sif -u https://example.com -rate-limit 20
 ```

+## output options
+
+write the collected findings out to a file after the scan. both formats can be requested in the same run.
+
+### -sarif
+
+write a sarif 2.1.0 report (one run, tool `sif`, one result per finding). ingestable by github code scanning and other sarif consumers:
+
+```bash
+./sif -u https://example.com -headers -cors -sarif out.sarif
+```
+
+### -md, --markdown
+
+write a readable markdown report grouped by target, then by module:
+
+```bash
+./sif -u https://example.com -headers -cors -md report.md
+```
+
 ## api options

 ### -api
@@ -21,6 +21,14 @@ import (

 type Settings struct {
 	Dirlist           string
+	DirMatchCodes     string // -mc dirlist: status codes to keep
+	DirFilterCodes    string // -fc dirlist: status codes to drop
+	DirFilterSizes    string // -fs dirlist: body sizes to drop
+	DirFilterWords    string // -fw dirlist: word counts to drop
+	DirFilterRegex    string // -fr dirlist: regex; body match drops response
+	DirCalibrate      bool   // -ac dirlist: auto-calibrate soft-404 baseline
+	DirWordlist       string // -w  dirlist: custom wordlist (file path or url)
+	DirExtensions     string // -e  dirlist: extensions appended to each word
 	Dnslist           string
 	Debug             bool
 	LogDir            string
@@ -53,6 +61,9 @@ type Settings struct {
 	Crawl             bool
 	CrawlDepth        int
 	Passive           bool
+	Probe             bool
+	SARIF             string // path to write a sarif 2.1.0 report to ("" = off)
+	Markdown          string // path to write a markdown report to ("" = off)
 	Modules           string // Comma-separated list of module IDs to run
 	ModuleTags        string // Run modules matching these tags
 	AllModules        bool   // Run all loaded modules
@@ -100,6 +111,14 @@ func Parse() *Settings {
 	portScopes := goflags.AllowdTypes{"common": Common, "full": Full, "none": Nil}
 	flagSet.CreateGroup("scans", "Scans",
 		flagSet.EnumVar(&settings.Dirlist, "dirlist", Nil, "Directory fuzzing scan size (small/medium/large)", listSizes),
+		flagSet.StringVar(&settings.DirMatchCodes, "mc", "", "Dirlist: match these status codes (comma list, e.g. 200,301)"),
+		flagSet.StringVar(&settings.DirFilterCodes, "fc", "", "Dirlist: filter out these status codes (comma list)"),
+		flagSet.StringVar(&settings.DirFilterSizes, "fs", "", "Dirlist: filter out responses of these body sizes (comma list)"),
+		flagSet.StringVar(&settings.DirFilterWords, "fw", "", "Dirlist: filter out responses with these word counts (comma list)"),
+		flagSet.StringVar(&settings.DirFilterRegex, "fr", "", "Dirlist: filter out responses whose body matches this regex"),
+		flagSet.BoolVar(&settings.DirCalibrate, "ac", false, "Dirlist: auto-calibrate the soft-404 wildcard baseline"),
+		flagSet.StringVar(&settings.DirWordlist, "w", "", "Dirlist: custom wordlist (local file path or url; overrides -dirlist size)"),
+		flagSet.StringVar(&settings.DirExtensions, "e", "", "Dirlist: extensions appended to each word (comma list, e.g. php,bak,env)"),
 		flagSet.EnumVar(&settings.Dnslist, "dnslist", Nil, "DNS fuzzing scan size (small/medium/large)", listSizes),
 		flagSet.EnumVar(&settings.Ports, "ports", Nil, "Port scanning scope (common/full)", portScopes),
 		flagSet.BoolVar(&settings.Dorking, "dork", false, "Enable Google dorking"),
@@ -124,6 +143,7 @@ func Parse() *Settings {
 		flagSet.BoolVar(&settings.Crawl, "crawl", false, "Enable web crawling (spider same-host links/scripts/forms)"),
 		flagSet.IntVar(&settings.CrawlDepth, "crawl-depth", defaultCrawlDepth, "Max crawl recursion depth"),
 		flagSet.BoolVar(&settings.Passive, "passive", false, "Enable passive subdomain/url discovery (zero traffic to target)"),
+		flagSet.BoolVar(&settings.Probe, "probe", false, "Probe the target for liveness (status, title, server, redirect chain)"),
 	)

 	flagSet.CreateGroup("runtime", "Runtime",
@@ -141,6 +161,11 @@ func Parse() *Settings {
 		flagSet.IntVar(&settings.RateLimit, "rate-limit", 0, "Max requests per second (0 = unlimited)"),
 	)

+	flagSet.CreateGroup("output", "Output",
+		flagSet.StringVar(&settings.SARIF, "sarif", "", "Write a SARIF 2.1.0 report to this file"),
+		flagSet.StringVarP(&settings.Markdown, "markdown", "md", "", "Write a markdown report to this file"),
+	)
+
 	flagSet.CreateGroup("api", "API",
 		flagSet.BoolVar(&settings.ApiMode, "api", false, "Enable API mode. Only useful for internal lunchcat usage"),
 	)
@@ -14,6 +14,7 @@ package modules

 import (
 	"context"
+	"errors"
 	"fmt"
 	"io"
 	"net/http"
@@ -26,6 +27,11 @@ import (
 // MaxBodySize limits response body to prevent memory exhaustion.
 const MaxBodySize = 5 * 1024 * 1024

+// ErrUnsupportedModuleType signals an executor for a module type that is not
+// yet implemented. Returning it (rather than an empty result) keeps callers
+// from mistaking "not implemented" for "scanned, found nothing".
+var ErrUnsupportedModuleType = errors.New("unsupported module type")
+
 // httpRequest represents a generated HTTP request.
 type httpRequest struct {
 	Method   string
@@ -379,22 +385,16 @@ func truncateEvidence(s string) string {
 	return s
 }

-// ExecuteDNSModule runs a DNS-based module (stub for now).
-func ExecuteDNSModule(ctx context.Context, target string, def *YAMLModule, opts Options) (*Result, error) {
-	// TODO: Implement DNS module execution
-	return &Result{
-		ModuleID: def.ID,
-		Target:   target,
-		Findings: []Finding{},
-	}, nil
+// ExecuteDNSModule runs a DNS-based module (not yet implemented).
+// returns ErrUnsupportedModuleType so the caller logs a clear failure rather
+// than reporting an empty (but successful-looking) result.
+func ExecuteDNSModule(_ context.Context, _ string, def *YAMLModule, _ Options) (*Result, error) {
+	return nil, fmt.Errorf("dns module %q: %w", def.ID, ErrUnsupportedModuleType)
 }

-// ExecuteTCPModule runs a TCP-based module (stub for now).
-func ExecuteTCPModule(ctx context.Context, target string, def *YAMLModule, opts Options) (*Result, error) {
-	// TODO: Implement TCP module execution
-	return &Result{
-		ModuleID: def.ID,
-		Target:   target,
-		Findings: []Finding{},
-	}, nil
+// ExecuteTCPModule runs a TCP-based module (not yet implemented).
+// returns ErrUnsupportedModuleType so the caller logs a clear failure rather
+// than reporting an empty (but successful-looking) result.
+func ExecuteTCPModule(_ context.Context, _ string, def *YAMLModule, _ Options) (*Result, error) {
+	return nil, fmt.Errorf("tcp module %q: %w", def.ID, ErrUnsupportedModuleType)
 }
@@ -0,0 +1,270 @@
+/*
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+:                                                                               :
+:   █▀ █ █▀▀   ·   Blazing-fast pentesting suite                                :
+:   ▄█ █ █▀    ·   BSD 3-Clause License                                         :
+:                                                                               :
+:   (c) 2022-2026 vmfunc, xyzeva,                                               :
+:                 lunchcat alumni & contributors                                :
+:                                                                               :
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+*/
+
+package modules
+
+import (
+	"context"
+	"errors"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+	"time"
+
+	"github.com/dropalldatabases/sif/internal/httpx"
+)
+
+const testTimeout = 5 * time.Second
+
+// TestExecuteHTTPModuleMatchAndExtract drives the full executor against a live
+// httptest server: a request hits a path, a matcher fires, an extractor captures.
+func TestExecuteHTTPModuleMatchAndExtract(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path == "/admin" {
+			w.Header().Set("X-App", "demo")
+			w.WriteHeader(http.StatusOK)
+			_, _ = w.Write([]byte(`flag{found-it} session=sess-4242`))
+			return
+		}
+		w.WriteHeader(http.StatusNotFound)
+	}))
+	defer srv.Close()
+
+	def := &YAMLModule{
+		ID:   "test-http-hit",
+		Type: TypeHTTP,
+		Info: YAMLModuleInfo{Severity: "high"},
+		HTTP: &HTTPConfig{
+			Method: "GET",
+			Paths:  []string{"{{BaseURL}}/admin", "{{BaseURL}}/missing"},
+			Matchers: []Matcher{
+				{Type: "status", Status: []int{200}},
+				{Type: "word", Part: "body", Words: []string{"flag{found-it}"}},
+			},
+			Extractors: []Extractor{
+				{Type: "regex", Name: "session", Part: "body", Regex: []string{`session=(\S+)`}, Group: 1},
+			},
+		},
+	}
+
+	// route through the shared httpx client so proxy/-H/-rate-limit would apply.
+	opts := Options{Timeout: testTimeout, Client: httpx.Client(testTimeout)}
+
+	result, err := ExecuteHTTPModule(context.Background(), srv.URL, def, opts)
+	if err != nil {
+		t.Fatalf("ExecuteHTTPModule: %v", err)
+	}
+
+	// only /admin satisfies status+word, /missing returns 404.
+	if len(result.Findings) != 1 {
+		t.Fatalf("got %d findings, want 1", len(result.Findings))
+	}
+	f := result.Findings[0]
+	if f.Severity != "high" {
+		t.Errorf("severity = %q, want high (carried from Info)", f.Severity)
+	}
+	if f.Extracted["session"] != "sess-4242" {
+		t.Errorf("extracted session = %q, want sess-4242", f.Extracted["session"])
+	}
+	if f.URL != srv.URL+"/admin" {
+		t.Errorf("finding url = %q, want %q", f.URL, srv.URL+"/admin")
+	}
+}
+
+// TestExecuteHTTPModuleNoMatch confirms a module that matches nothing reports
+// zero findings without erroring.
+func TestExecuteHTTPModuleNoMatch(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+		w.WriteHeader(http.StatusOK)
+		_, _ = w.Write([]byte("nothing interesting"))
+	}))
+	defer srv.Close()
+
+	def := &YAMLModule{
+		ID:   "test-http-miss",
+		Type: TypeHTTP,
+		HTTP: &HTTPConfig{
+			Paths: []string{"{{BaseURL}}/"},
+			Matchers: []Matcher{
+				{Type: "word", Part: "body", Words: []string{"never-present"}},
+			},
+		},
+	}
+
+	result, err := ExecuteHTTPModule(context.Background(), srv.URL, def, Options{Timeout: testTimeout, Client: httpx.Client(testTimeout)})
+	if err != nil {
+		t.Fatalf("ExecuteHTTPModule: %v", err)
+	}
+	if len(result.Findings) != 0 {
+		t.Fatalf("got %d findings, want 0", len(result.Findings))
+	}
+}
+
+// TestExecuteHTTPModulePayloadExpansion verifies payload templates reach the
+// server and the matching response is captured.
+func TestExecuteHTTPModulePayloadExpansion(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		// only the "boom" payload triggers the vulnerable branch.
+		if r.URL.Query().Get("q") == "boom" {
+			_, _ = w.Write([]byte("error: sql syntax near boom"))
+			return
+		}
+		_, _ = w.Write([]byte("ok"))
+	}))
+	defer srv.Close()
+
+	def := &YAMLModule{
+		ID:   "test-http-payload",
+		Type: TypeHTTP,
+		HTTP: &HTTPConfig{
+			Paths:    []string{"{{BaseURL}}/search?q={{payload}}"},
+			Payloads: []string{"safe", "boom"},
+			Matchers: []Matcher{
+				{Type: "word", Part: "body", Words: []string{"sql syntax"}},
+			},
+		},
+	}
+
+	result, err := ExecuteHTTPModule(context.Background(), srv.URL, def, Options{Timeout: testTimeout, Client: httpx.Client(testTimeout)})
+	if err != nil {
+		t.Fatalf("ExecuteHTTPModule: %v", err)
+	}
+	if len(result.Findings) != 1 {
+		t.Fatalf("got %d findings, want 1 (only boom payload)", len(result.Findings))
+	}
+}
+
+func TestExecuteHTTPModuleNoConfig(t *testing.T) {
+	def := &YAMLModule{ID: "x", Type: TypeHTTP}
+	if _, err := ExecuteHTTPModule(context.Background(), "http://h", def, Options{}); err == nil {
+		t.Fatal("expected error when HTTP config is nil")
+	}
+}
+
+// TestExecuteHTTPModuleContextCancel pins the cancellation path. The dispatch
+// loop selects between ctx.Done() and the concurrency semaphore, so a cancelled
+// context can either short-circuit with ctx.Err() or let the in-flight request
+// fail on the dead context. Both are correct: the contract is "never hang, never
+// invent a finding", which is what we assert here rather than forcing one race
+// winner (that made this test flaky under -count).
+func TestExecuteHTTPModuleContextCancel(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	}))
+	defer srv.Close()
+
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel()
+
+	def := &YAMLModule{
+		ID:   "test-http-cancel",
+		Type: TypeHTTP,
+		HTTP: &HTTPConfig{
+			Paths:    []string{"{{BaseURL}}/a"},
+			Matchers: []Matcher{{Type: "status", Status: []int{200}}},
+		},
+	}
+
+	result, err := ExecuteHTTPModule(ctx, srv.URL, def, Options{Timeout: testTimeout, Client: httpx.Client(testTimeout)})
+	if err != nil {
+		if !errors.Is(err, context.Canceled) {
+			t.Fatalf("err = %v, want context.Canceled or nil", err)
+		}
+		return
+	}
+	// no error means the request was dispatched but failed on the dead context;
+	// either way a cancelled scan must not surface findings.
+	if len(result.Findings) != 0 {
+		t.Fatalf("cancelled scan produced %d findings, want 0", len(result.Findings))
+	}
+}
+
+// TestExecuteDNSModuleUnsupported pins the current behavior: DNS execution is
+// not implemented and must signal it via ErrUnsupportedModuleType, not by
+// quietly returning an empty (successful-looking) result.
+func TestExecuteDNSModuleUnsupported(t *testing.T) {
+	def := &YAMLModule{ID: "dns-mod", Type: TypeDNS, DNS: &DNSConfig{Type: "A"}}
+	result, err := ExecuteDNSModule(context.Background(), "example.com", def, Options{})
+	if result != nil {
+		t.Errorf("result = %v, want nil for unsupported type", result)
+	}
+	if !errors.Is(err, ErrUnsupportedModuleType) {
+		t.Fatalf("err = %v, want ErrUnsupportedModuleType", err)
+	}
+}
+
+func TestExecuteTCPModuleUnsupported(t *testing.T) {
+	def := &YAMLModule{ID: "tcp-mod", Type: TypeTCP, TCP: &TCPConfig{Port: 22}}
+	result, err := ExecuteTCPModule(context.Background(), "example.com", def, Options{})
+	if result != nil {
+		t.Errorf("result = %v, want nil for unsupported type", result)
+	}
+	if !errors.Is(err, ErrUnsupportedModuleType) {
+		t.Fatalf("err = %v, want ErrUnsupportedModuleType", err)
+	}
+}
+
+// TestWrapperExecuteRoutesByType confirms the Module wrapper dispatches each
+// type to the right executor and propagates the unsupported-type sentinel.
+func TestWrapperExecuteRoutesByType(t *testing.T) {
+	t.Run("dns routes to unsupported", func(t *testing.T) {
+		def := &YAMLModule{ID: "d", Type: TypeDNS, DNS: &DNSConfig{}}
+		w := newYAMLModuleWrapper(def, "d.yaml")
+		if _, err := w.Execute(context.Background(), "t", Options{}); !errors.Is(err, ErrUnsupportedModuleType) {
+			t.Fatalf("err = %v, want ErrUnsupportedModuleType", err)
+		}
+	})
+
+	t.Run("tcp routes to unsupported", func(t *testing.T) {
+		def := &YAMLModule{ID: "t", Type: TypeTCP, TCP: &TCPConfig{}}
+		w := newYAMLModuleWrapper(def, "t.yaml")
+		if _, err := w.Execute(context.Background(), "t", Options{}); !errors.Is(err, ErrUnsupportedModuleType) {
+			t.Fatalf("err = %v, want ErrUnsupportedModuleType", err)
+		}
+	})
+
+	t.Run("missing http config errors", func(t *testing.T) {
+		def := &YAMLModule{ID: "h", Type: TypeHTTP}
+		w := newYAMLModuleWrapper(def, "h.yaml")
+		if _, err := w.Execute(context.Background(), "t", Options{}); err == nil {
+			t.Fatal("expected error for missing http config")
+		}
+	})
+
+	t.Run("unknown type errors", func(t *testing.T) {
+		def := &YAMLModule{ID: "z", Type: ModuleType("bogus")}
+		w := newYAMLModuleWrapper(def, "z.yaml")
+		if _, err := w.Execute(context.Background(), "t", Options{}); err == nil {
+			t.Fatal("expected error for unknown module type")
+		}
+	})
+}
+
+func TestTruncateEvidence(t *testing.T) {
+	short := "short evidence"
+	if got := truncateEvidence(short); got != short {
+		t.Errorf("short evidence changed: %q", got)
+	}
+
+	long := make([]byte, 600)
+	for i := range long {
+		long[i] = 'a'
+	}
+	got := truncateEvidence(string(long))
+	// 500 chars of content plus the ellipsis marker.
+	if len(got) != 503 {
+		t.Errorf("truncated len = %d, want 503", len(got))
+	}
+	if got[len(got)-3:] != "..." {
+		t.Errorf("truncated evidence missing ellipsis: %q", got[len(got)-3:])
+	}
+}
@@ -0,0 +1,269 @@
+/*
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+:                                                                               :
+:   █▀ █ █▀▀   ·   Blazing-fast pentesting suite                                :
+:   ▄█ █ █▀    ·   BSD 3-Clause License                                         :
+:                                                                               :
+:   (c) 2022-2026 vmfunc, xyzeva,                                               :
+:                 lunchcat alumni & contributors                                :
+:                                                                               :
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+*/
+
+package modules
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+// writeModule drops a yaml file into a temp dir and returns its path.
+func writeModule(t *testing.T, dir, name, content string) string {
+	t.Helper()
+	path := filepath.Join(dir, name)
+	if err := os.WriteFile(path, []byte(content), 0o600); err != nil {
+		t.Fatalf("write module: %v", err)
+	}
+	return path
+}
+
+func TestParseYAMLModuleValid(t *testing.T) {
+	const doc = `id: example-http
+type: http
+info:
+  name: Example
+  author: azzie
+  severity: medium
+  description: a test module
+  tags: [test, demo]
+http:
+  method: GET
+  paths:
+    - "{{BaseURL}}/admin"
+  matchers:
+    - type: status
+      status: [200]
+    - type: word
+      part: body
+      words: ["admin"]
+      condition: and
+  extractors:
+    - type: regex
+      name: token
+      part: body
+      regex: ["token=(\\w+)"]
+      group: 1
+`
+	dir := t.TempDir()
+	path := writeModule(t, dir, "ok.yaml", doc)
+
+	def, err := ParseYAMLModule(path)
+	if err != nil {
+		t.Fatalf("ParseYAMLModule: %v", err)
+	}
+	if def.ID != "example-http" {
+		t.Errorf("id = %q, want example-http", def.ID)
+	}
+	if def.Type != TypeHTTP {
+		t.Errorf("type = %q, want http", def.Type)
+	}
+	if def.Info.Severity != "medium" {
+		t.Errorf("severity = %q, want medium", def.Info.Severity)
+	}
+	if def.HTTP == nil {
+		t.Fatal("http config not parsed")
+	}
+	if len(def.HTTP.Matchers) != 2 {
+		t.Errorf("got %d matchers, want 2", len(def.HTTP.Matchers))
+	}
+	if len(def.HTTP.Extractors) != 1 || def.HTTP.Extractors[0].Group != 1 {
+		t.Errorf("extractor not parsed correctly: %+v", def.HTTP.Extractors)
+	}
+	if len(def.Info.Tags) != 2 {
+		t.Errorf("got %d tags, want 2", len(def.Info.Tags))
+	}
+}
+
+func TestParseYAMLModuleErrors(t *testing.T) {
+	dir := t.TempDir()
+
+	tests := []struct {
+		name    string
+		content string
+	}{
+		{
+			name:    "missing id",
+			content: "type: http\nhttp:\n  paths: [\"/\"]\n",
+		},
+		{
+			name:    "missing type",
+			content: "id: no-type\nhttp:\n  paths: [\"/\"]\n",
+		},
+		{
+			name:    "malformed yaml",
+			content: "id: bad\ntype: http\n  paths: [unbalanced\n   : nope\n",
+		},
+		{
+			// a scalar where a mapping is expected must fail to unmarshal.
+			name:    "type mismatch",
+			content: "id: bad-shape\ntype: http\nhttp: \"should-be-a-map\"\n",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			path := writeModule(t, dir, tt.name+".yaml", tt.content)
+			if _, err := ParseYAMLModule(path); err == nil {
+				t.Fatalf("expected error for %s", tt.name)
+			}
+		})
+	}
+}
+
+func TestParseYAMLModuleMissingFile(t *testing.T) {
+	if _, err := ParseYAMLModule(filepath.Join(t.TempDir(), "does-not-exist.yaml")); err == nil {
+		t.Fatal("expected error for missing file")
+	}
+}
+
+func TestYAMLModuleWrapperInfoAndType(t *testing.T) {
+	def := &YAMLModule{
+		ID:   "wrap-test",
+		Type: TypeHTTP,
+		Info: YAMLModuleInfo{
+			Name:        "Wrapped",
+			Author:      "azzie",
+			Severity:    "low",
+			Description: "desc",
+			Tags:        []string{"a", "b"},
+		},
+	}
+	w := newYAMLModuleWrapper(def, "wrap.yaml")
+
+	if w.Type() != TypeHTTP {
+		t.Errorf("Type() = %q, want http", w.Type())
+	}
+	info := w.Info()
+	if info.ID != "wrap-test" || info.Name != "Wrapped" || info.Severity != "low" {
+		t.Errorf("Info() mismatch: %+v", info)
+	}
+	if len(info.Tags) != 2 {
+		t.Errorf("Info().Tags = %v, want 2 entries", info.Tags)
+	}
+}
+
+// TestLoaderLoadAll exercises the directory walk: a valid module registers, a
+// malformed one is skipped without aborting the walk.
+func TestLoaderLoadAll(t *testing.T) {
+	Clear()
+	t.Cleanup(Clear)
+
+	dir := t.TempDir()
+	writeModule(t, dir, "good.yaml", "id: good-mod\ntype: http\nhttp:\n  paths: [\"{{BaseURL}}/\"]\n  matchers:\n    - type: status\n      status: [200]\n")
+	writeModule(t, dir, "bad.yml", "id: bad-mod\n") // missing type -> skipped
+	writeModule(t, dir, "ignore.txt", "not a module")
+
+	l := &Loader{builtinDir: dir, userDir: filepath.Join(dir, "nonexistent-user")}
+	if err := l.LoadAll(); err != nil {
+		t.Fatalf("LoadAll: %v", err)
+	}
+
+	// only the good module loads; the malformed one is logged and skipped.
+	if l.Loaded() != 1 {
+		t.Errorf("Loaded() = %d, want 1", l.Loaded())
+	}
+	if _, ok := Get("good-mod"); !ok {
+		t.Error("good-mod not registered")
+	}
+	if _, ok := Get("bad-mod"); ok {
+		t.Error("bad-mod should not have registered")
+	}
+}
+
+func TestNewLoaderDirs(t *testing.T) {
+	l, err := NewLoader()
+	if err != nil {
+		t.Fatalf("NewLoader: %v", err)
+	}
+	if l.BuiltinDir() == "" {
+		t.Error("BuiltinDir is empty")
+	}
+	if l.UserDir() == "" {
+		t.Error("UserDir is empty")
+	}
+}
+
+// TestRegistry exercises the package-level registry: register, get, dedupe by
+// id, filter by tag and type, count and clear.
+func TestRegistry(t *testing.T) {
+	Clear()
+	t.Cleanup(Clear)
+
+	http1 := newYAMLModuleWrapper(&YAMLModule{ID: "h1", Type: TypeHTTP, Info: YAMLModuleInfo{Tags: []string{"web", "cve"}}}, "h1")
+	http2 := newYAMLModuleWrapper(&YAMLModule{ID: "h2", Type: TypeHTTP, Info: YAMLModuleInfo{Tags: []string{"web"}}}, "h2")
+	dns1 := newYAMLModuleWrapper(&YAMLModule{ID: "d1", Type: TypeDNS, Info: YAMLModuleInfo{Tags: []string{"dns"}}}, "d1")
+
+	Register(http1)
+	Register(http2)
+	Register(dns1)
+
+	if Count() != 3 {
+		t.Fatalf("Count() = %d, want 3", Count())
+	}
+
+	got, ok := Get("h1")
+	if !ok || got.Info().ID != "h1" {
+		t.Errorf("Get(h1) = %v, %v", got, ok)
+	}
+	if _, ok := Get("missing"); ok {
+		t.Error("Get(missing) should report not found")
+	}
+
+	if n := len(ByType(TypeHTTP)); n != 2 {
+		t.Errorf("ByType(http) = %d, want 2", n)
+	}
+	if n := len(ByType(TypeDNS)); n != 1 {
+		t.Errorf("ByType(dns) = %d, want 1", n)
+	}
+	if n := len(ByTag("web")); n != 2 {
+		t.Errorf("ByTag(web) = %d, want 2", n)
+	}
+	if n := len(ByTag("cve")); n != 1 {
+		t.Errorf("ByTag(cve) = %d, want 1", n)
+	}
+	if n := len(ByTag("none")); n != 0 {
+		t.Errorf("ByTag(none) = %d, want 0", n)
+	}
+	if n := len(All()); n != 3 {
+		t.Errorf("All() = %d, want 3", n)
+	}
+
+	// re-registering the same id overwrites rather than duplicating.
+	Register(newYAMLModuleWrapper(&YAMLModule{ID: "h1", Type: TypeHTTP}, "h1-v2"))
+	if Count() != 3 {
+		t.Errorf("Count() after re-register = %d, want 3", Count())
+	}
+
+	Clear()
+	if Count() != 0 {
+		t.Errorf("Count() after Clear = %d, want 0", Count())
+	}
+}
+
+// TestResultType pins the ScanResult interface bridge.
+func TestResultType(t *testing.T) {
+	r := &Result{ModuleID: "abc"}
+	if r.ResultType() != "abc" {
+		t.Errorf("ResultType() = %q, want abc", r.ResultType())
+	}
+}
+
+// TestLoaderScriptStubNoop confirms the go-script loader is currently a no-op
+// that registers nothing and returns no error.
+func TestLoaderScriptStubNoop(t *testing.T) {
+	l := &Loader{}
+	if err := l.loadScript("anything.go"); err != nil {
+		t.Errorf("loadScript stub returned error: %v", err)
+	}
+}
@@ -0,0 +1,465 @@
+/*
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+:                                                                               :
+:   █▀ █ █▀▀   ·   Blazing-fast pentesting suite                                :
+:   ▄█ █ █▀    ·   BSD 3-Clause License                                         :
+:                                                                               :
+:   (c) 2022-2026 vmfunc, xyzeva,                                               :
+:                 lunchcat alumni & contributors                                :
+:                                                                               :
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+*/
+
+package modules
+
+import (
+	"net/http"
+	"strings"
+	"testing"
+)
+
+// fakeResponse builds a minimal *http.Response for matcher/extractor tests.
+// it carries no real socket (Body is http.NoBody), so there is nothing to
+// close; bodyclose is excluded for test files in .golangci.yml. header drives
+// the header/all parts without a live server; matchers read the body string
+// argument, not resp.Body.
+func fakeResponse(t *testing.T, status int, header http.Header) *http.Response {
+	t.Helper()
+	if header == nil {
+		header = http.Header{}
+	}
+	return &http.Response{StatusCode: status, Header: header, Body: http.NoBody}
+}
+
+func TestCheckMatcherStatus(t *testing.T) {
+	tests := []struct {
+		name   string
+		status int
+		want   []int
+		expect bool
+	}{
+		{name: "single match", status: 200, want: []int{200}, expect: true},
+		{name: "one of many", status: 404, want: []int{200, 301, 404}, expect: true},
+		{name: "no match", status: 500, want: []int{200, 404}, expect: false},
+		{name: "empty status list", status: 200, want: nil, expect: false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			m := &Matcher{Type: "status", Status: tt.want}
+			resp := fakeResponse(t, tt.status, nil)
+			if got := checkMatcher(m, resp, ""); got != tt.expect {
+				t.Errorf("checkMatcher status = %v, want %v", got, tt.expect)
+			}
+		})
+	}
+}
+
+func TestCheckMatcherWord(t *testing.T) {
+	const body = "welcome admin dashboard"
+
+	tests := []struct {
+		name      string
+		words     []string
+		condition string
+		expect    bool
+	}{
+		{name: "and all present", words: []string{"admin", "dashboard"}, condition: "and", expect: true},
+		{name: "and one missing", words: []string{"admin", "missing"}, condition: "and", expect: false},
+		{name: "default is and", words: []string{"admin", "missing"}, condition: "", expect: false},
+		{name: "or one present", words: []string{"missing", "admin"}, condition: "or", expect: true},
+		{name: "or none present", words: []string{"missing", "absent"}, condition: "or", expect: false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			m := &Matcher{Type: "word", Part: "body", Words: tt.words, Condition: tt.condition}
+			resp := fakeResponse(t, 200, nil)
+			if got := checkMatcher(m, resp, body); got != tt.expect {
+				t.Errorf("checkMatcher word = %v, want %v", got, tt.expect)
+			}
+		})
+	}
+}
+
+func TestCheckMatcherRegex(t *testing.T) {
+	const body = "version 1.2.3 build 99"
+
+	tests := []struct {
+		name      string
+		patterns  []string
+		condition string
+		expect    bool
+	}{
+		{name: "and all match", patterns: []string{`version \d`, `build \d+`}, condition: "and", expect: true},
+		{name: "and one fails", patterns: []string{`version \d`, `nope\d`}, condition: "and", expect: false},
+		{name: "or one matches", patterns: []string{`nope`, `build \d+`}, condition: "or", expect: true},
+		{name: "or none match", patterns: []string{`nope`, `zilch`}, condition: "or", expect: false},
+		// an invalid pattern under AND must fail closed, not panic.
+		{name: "and invalid pattern fails closed", patterns: []string{`version \d`, `(`}, condition: "and", expect: false},
+		// under OR an invalid pattern is skipped, a later valid one can still hit.
+		{name: "or invalid pattern skipped", patterns: []string{`(`, `build \d+`}, condition: "or", expect: true},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			m := &Matcher{Type: "regex", Part: "body", Regex: tt.patterns, Condition: tt.condition}
+			resp := fakeResponse(t, 200, nil)
+			if got := checkMatcher(m, resp, body); got != tt.expect {
+				t.Errorf("checkMatcher regex = %v, want %v", got, tt.expect)
+			}
+		})
+	}
+}
+
+func TestCheckMatcherHeaderPart(t *testing.T) {
+	header := http.Header{"X-Powered-By": []string{"PHP/8.1"}}
+	resp := fakeResponse(t, 200, header)
+
+	m := &Matcher{Type: "word", Part: "header", Words: []string{"PHP/8.1"}}
+	if !checkMatcher(m, resp, "body-content") {
+		t.Error("expected header-part word matcher to hit on header value")
+	}
+
+	// the same word lives only in the header, so a body-part matcher must miss.
+	mBody := &Matcher{Type: "word", Part: "body", Words: []string{"PHP/8.1"}}
+	if checkMatcher(mBody, resp, "body-content") {
+		t.Error("body-part matcher should not see header-only value")
+	}
+}
+
+func TestCheckMatcherUnknownType(t *testing.T) {
+	m := &Matcher{Type: "size", Part: "body"}
+	resp := fakeResponse(t, 200, nil)
+	if checkMatcher(m, resp, "anything") {
+		t.Error("unknown matcher type should not match")
+	}
+}
+
+func TestCheckMatchers(t *testing.T) {
+	resp := fakeResponse(t, 200, http.Header{"Server": []string{"nginx"}})
+	const body = "secret token here"
+
+	tests := []struct {
+		name     string
+		matchers []Matcher
+		expect   bool
+	}{
+		{
+			name:     "empty matchers never match",
+			matchers: nil,
+			expect:   false,
+		},
+		{
+			name: "all matchers pass (AND across matchers)",
+			matchers: []Matcher{
+				{Type: "status", Status: []int{200}},
+				{Type: "word", Part: "body", Words: []string{"secret"}},
+			},
+			expect: true,
+		},
+		{
+			name: "one matcher fails breaks AND",
+			matchers: []Matcher{
+				{Type: "status", Status: []int{200}},
+				{Type: "word", Part: "body", Words: []string{"absent"}},
+			},
+			expect: false,
+		},
+		{
+			name: "negative inverts a non-match into a pass",
+			matchers: []Matcher{
+				{Type: "word", Part: "body", Words: []string{"absent"}, Negative: true},
+			},
+			expect: true,
+		},
+		{
+			name: "negative inverts a match into a fail",
+			matchers: []Matcher{
+				{Type: "word", Part: "body", Words: []string{"secret"}, Negative: true},
+			},
+			expect: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := checkMatchers(tt.matchers, resp, body); got != tt.expect {
+				t.Errorf("checkMatchers = %v, want %v", got, tt.expect)
+			}
+		})
+	}
+}
+
+func TestCheckWords(t *testing.T) {
+	const content = "alpha beta gamma"
+
+	tests := []struct {
+		name      string
+		words     []string
+		condition string
+		expect    bool
+	}{
+		{name: "and all present", words: []string{"alpha", "gamma"}, condition: "and", expect: true},
+		{name: "and missing", words: []string{"alpha", "delta"}, condition: "and", expect: false},
+		{name: "or present", words: []string{"delta", "beta"}, condition: "or", expect: true},
+		{name: "or absent", words: []string{"delta", "epsilon"}, condition: "or", expect: false},
+		{name: "empty under and matches vacuously", words: nil, condition: "and", expect: true},
+		{name: "empty under or matches nothing", words: nil, condition: "or", expect: false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := checkWords(content, tt.words, tt.condition); got != tt.expect {
+				t.Errorf("checkWords = %v, want %v", got, tt.expect)
+			}
+		})
+	}
+}
+
+func TestCheckRegex(t *testing.T) {
+	const content = "id=42 name=root"
+
+	tests := []struct {
+		name      string
+		patterns  []string
+		condition string
+		expect    bool
+	}{
+		{name: "and all match", patterns: []string{`id=\d+`, `name=\w+`}, condition: "and", expect: true},
+		{name: "and one fails", patterns: []string{`id=\d+`, `zzz`}, condition: "and", expect: false},
+		{name: "or first matches", patterns: []string{`id=\d+`, `zzz`}, condition: "or", expect: true},
+		{name: "or none match", patterns: []string{`xxx`, `zzz`}, condition: "or", expect: false},
+		{name: "and bad regex fails closed", patterns: []string{`(`}, condition: "and", expect: false},
+		{name: "or bad regex skipped then match", patterns: []string{`(`, `name=\w+`}, condition: "or", expect: true},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := checkRegex(content, tt.patterns, tt.condition); got != tt.expect {
+				t.Errorf("checkRegex = %v, want %v", got, tt.expect)
+			}
+		})
+	}
+}
+
+func TestGetPart(t *testing.T) {
+	header := http.Header{"Server": []string{"nginx"}}
+	resp := fakeResponse(t, 200, header)
+	const body = "page body"
+
+	if got := getPart("body", resp, body); got != body {
+		t.Errorf("getPart body = %q, want %q", got, body)
+	}
+
+	headerPart := getPart("header", resp, body)
+	if !strings.Contains(headerPart, "Server") || !strings.Contains(headerPart, "nginx") {
+		t.Errorf("getPart header = %q, want it to include the header", headerPart)
+	}
+	if strings.Contains(headerPart, body) {
+		t.Errorf("getPart header should not include body, got %q", headerPart)
+	}
+
+	all := getPart("all", resp, body)
+	if !strings.Contains(all, "nginx") || !strings.Contains(all, body) {
+		t.Errorf("getPart all = %q, want both header and body", all)
+	}
+
+	// an unrecognised part falls back to the body.
+	if got := getPart("weird", resp, body); got != body {
+		t.Errorf("getPart fallback = %q, want body %q", got, body)
+	}
+
+	// empty part behaves like "all".
+	if got := getPart("", resp, body); !strings.Contains(got, "nginx") || !strings.Contains(got, body) {
+		t.Errorf("getPart empty = %q, want both header and body", got)
+	}
+}
+
+func TestRunExtractors(t *testing.T) {
+	resp := fakeResponse(t, 200, http.Header{"X-Token": []string{"abc123"}})
+	const body = `{"session":"sess-7788","role":"admin"}`
+
+	tests := []struct {
+		name       string
+		extractors []Extractor
+		wantKey    string
+		wantVal    string
+		wantNil    bool
+	}{
+		{
+			name:       "no extractors yields nil",
+			extractors: nil,
+			wantNil:    true,
+		},
+		{
+			name: "regex capture group on body",
+			extractors: []Extractor{
+				{Type: "regex", Name: "session", Part: "body", Regex: []string{`"session":"([^"]+)"`}, Group: 1},
+			},
+			wantKey: "session",
+			wantVal: "sess-7788",
+		},
+		{
+			name: "group zero is the whole match",
+			extractors: []Extractor{
+				{Type: "regex", Name: "role", Part: "body", Regex: []string{`role":"admin`}, Group: 0},
+			},
+			wantKey: "role",
+			wantVal: `role":"admin`,
+		},
+		{
+			name: "extract from header part",
+			extractors: []Extractor{
+				{Type: "regex", Name: "token", Part: "header", Regex: []string{`X-Token: (\S+)`}, Group: 1},
+			},
+			wantKey: "token",
+			wantVal: "abc123",
+		},
+		{
+			name: "first matching pattern wins",
+			extractors: []Extractor{
+				{Type: "regex", Name: "session", Part: "body", Regex: []string{`nomatch(\d+)`, `"session":"([^"]+)"`}, Group: 1},
+			},
+			wantKey: "session",
+			wantVal: "sess-7788",
+		},
+		{
+			name: "group index out of range is skipped",
+			extractors: []Extractor{
+				{Type: "regex", Name: "session", Part: "body", Regex: []string{`"session":"([^"]+)"`}, Group: 5},
+			},
+			wantNil: true,
+		},
+		{
+			name: "invalid pattern is skipped, no capture",
+			extractors: []Extractor{
+				{Type: "regex", Name: "session", Part: "body", Regex: []string{`(`}, Group: 1},
+			},
+			wantNil: true,
+		},
+		{
+			name: "non-regex extractor type is ignored",
+			extractors: []Extractor{
+				{Type: "kval", Name: "session", Part: "body", Regex: []string{`"session":"([^"]+)"`}, Group: 1},
+			},
+			wantNil: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := runExtractors(tt.extractors, resp, body)
+			if tt.wantNil {
+				if len(got) != 0 {
+					t.Errorf("runExtractors = %v, want empty", got)
+				}
+				return
+			}
+			if got[tt.wantKey] != tt.wantVal {
+				t.Errorf("runExtractors[%q] = %q, want %q", tt.wantKey, got[tt.wantKey], tt.wantVal)
+			}
+		})
+	}
+}
+
+func TestSubstituteVariables(t *testing.T) {
+	tests := []struct {
+		name     string
+		template string
+		baseURL  string
+		payload  string
+		want     string
+	}{
+		{
+			name:     "baseurl both cases",
+			template: "{{BaseURL}}/x and {{baseurl}}/y",
+			baseURL:  "http://h",
+			want:     "http://h/x and http://h/y",
+		},
+		{
+			name:     "payload both cases",
+			template: "q={{payload}}&r={{Payload}}",
+			payload:  "<script>",
+			want:     "q=<script>&r=<script>",
+		},
+		{
+			name:     "combined base and payload",
+			template: "{{BaseURL}}/search?q={{payload}}",
+			baseURL:  "http://h",
+			payload:  "x",
+			want:     "http://h/search?q=x",
+		},
+		{
+			name:     "no placeholders untouched",
+			template: "/static/path",
+			baseURL:  "http://h",
+			want:     "/static/path",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := substituteVariables(tt.template, tt.baseURL, tt.payload); got != tt.want {
+				t.Errorf("substituteVariables = %q, want %q", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestGenerateHTTPRequests(t *testing.T) {
+	t.Run("paths without payloads", func(t *testing.T) {
+		cfg := &HTTPConfig{
+			Paths: []string{"{{BaseURL}}/a", "{{BaseURL}}/b"},
+		}
+		// trailing slash on the target must be trimmed before substitution.
+		got := generateHTTPRequests("http://h/", cfg)
+		if len(got) != 2 {
+			t.Fatalf("got %d requests, want 2", len(got))
+		}
+		if got[0].Method != "GET" {
+			t.Errorf("default method = %q, want GET", got[0].Method)
+		}
+		if got[0].URL != "http://h/a" || got[1].URL != "http://h/b" {
+			t.Errorf("urls = %q,%q", got[0].URL, got[1].URL)
+		}
+	})
+
+	t.Run("payload expansion is path x payload", func(t *testing.T) {
+		cfg := &HTTPConfig{
+			Method:   "POST",
+			Paths:    []string{"{{BaseURL}}/q?x={{payload}}"},
+			Payloads: []string{"1", "2", "3"},
+			Body:     "data={{payload}}",
+		}
+		got := generateHTTPRequests("http://h", cfg)
+		if len(got) != 3 {
+			t.Fatalf("got %d requests, want 3", len(got))
+		}
+		for i, want := range []string{"1", "2", "3"} {
+			if got[i].Payload != want {
+				t.Errorf("req %d payload = %q, want %q", i, got[i].Payload, want)
+			}
+			if got[i].URL != "http://h/q?x="+want {
+				t.Errorf("req %d url = %q", i, got[i].URL)
+			}
+			if got[i].Body != "data="+want {
+				t.Errorf("req %d body = %q", i, got[i].Body)
+			}
+			if got[i].Method != "POST" {
+				t.Errorf("req %d method = %q, want POST", i, got[i].Method)
+			}
+		}
+	})
+
+	t.Run("multiple paths times multiple payloads", func(t *testing.T) {
+		cfg := &HTTPConfig{
+			Paths:    []string{"{{BaseURL}}/a", "{{BaseURL}}/b"},
+			Payloads: []string{"x", "y"},
+		}
+		got := generateHTTPRequests("http://h", cfg)
+		if len(got) != 4 {
+			t.Fatalf("got %d requests, want 4 (2 paths x 2 payloads)", len(got))
+		}
+	})
+}
@@ -0,0 +1,74 @@
+/*
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+:                                                                               :
+:   █▀ █ █▀▀   ·   Blazing-fast pentesting suite                                :
+:   ▄█ █ █▀    ·   BSD 3-Clause License                                         :
+:                                                                               :
+:   (c) 2022-2026 vmfunc, xyzeva,                                               :
+:                 lunchcat alumni & contributors                                :
+:                                                                               :
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+*/
+
+package report
+
+import (
+	"bytes"
+	"encoding/json"
+	"sort"
+	"strings"
+)
+
+// Markdown renders results as a readable report grouped by target, then by
+// module, with each module's finding pretty-printed as a json code block.
+func Markdown(results []Result) []byte {
+	var b strings.Builder
+	b.WriteString("# sif scan report\n\n")
+
+	// group module results under their target so the report reads target-first
+	// regardless of the order results came in.
+	byTarget := make(map[string][]Result)
+	order := make([]string, 0)
+	for i := 0; i < len(results); i++ {
+		t := results[i].Target
+		if _, seen := byTarget[t]; !seen {
+			order = append(order, t)
+		}
+		byTarget[t] = append(byTarget[t], results[i])
+	}
+
+	for i := 0; i < len(order); i++ {
+		target := order[i]
+		b.WriteString("## ")
+		b.WriteString(target)
+		b.WriteString("\n\n")
+
+		mods := byTarget[target]
+		// sort modules so the report is deterministic across runs
+		sort.SliceStable(mods, func(a, c int) bool { return mods[a].Module < mods[c].Module })
+
+		for j := 0; j < len(mods); j++ {
+			b.WriteString("### ")
+			b.WriteString(mods[j].Module)
+			b.WriteString("\n\n")
+			b.WriteString("```json\n")
+			b.WriteString(prettyJSON(mods[j].Data))
+			b.WriteString("\n```\n\n")
+		}
+	}
+
+	return []byte(b.String())
+}
+
+// prettyJSON re-indents the raw finding for readability; if it doesn't parse as
+// json (shouldn't happen, but never trust it) the raw bytes are returned as-is.
+func prettyJSON(raw json.RawMessage) string {
+	if len(raw) == 0 {
+		return "null"
+	}
+	var indented bytes.Buffer
+	if err := json.Indent(&indented, raw, "", "  "); err != nil {
+		return string(raw)
+	}
+	return indented.String()
+}
@@ -0,0 +1,26 @@
+/*
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+:                                                                               :
+:   █▀ █ █▀▀   ·   Blazing-fast pentesting suite                                :
+:   ▄█ █ █▀    ·   BSD 3-Clause License                                         :
+:                                                                               :
+:   (c) 2022-2026 vmfunc, xyzeva,                                               :
+:                 lunchcat alumni & contributors                                :
+:                                                                               :
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+*/
+
+// Package report serializes collected scan results to sarif and markdown. it's
+// deliberately decoupled from the scan package: callers map their own results
+// into report.Result, so report never imports a scanner type.
+package report
+
+import "encoding/json"
+
+// Result is one module's output for one target. Data is whatever the scanner
+// returned, carried as raw json so report stays free of scan types.
+type Result struct {
+	Target string
+	Module string
+	Data   json.RawMessage
+}
@@ -0,0 +1,172 @@
+/*
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+:                                                                               :
+:   █▀ █ █▀▀   ·   Blazing-fast pentesting suite                                :
+:   ▄█ █ █▀    ·   BSD 3-Clause License                                         :
+:                                                                               :
+:   (c) 2022-2026 vmfunc, xyzeva,                                               :
+:                 lunchcat alumni & contributors                                :
+:                                                                               :
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+*/
+
+package report
+
+import (
+	"encoding/json"
+	"strings"
+	"testing"
+)
+
+// fakeResults are a couple of representative findings across two targets used by
+// every test below.
+func fakeResults() []Result {
+	return []Result{
+		{Target: "https://a.example.com", Module: "cors", Data: json.RawMessage(`{"severity":"high"}`)},
+		{Target: "https://a.example.com", Module: "probe", Data: json.RawMessage(`{"status_code":200}`)},
+		{Target: "https://b.example.com", Module: "redirect", Data: json.RawMessage(`{"parameter":"next"}`)},
+	}
+}
+
+func TestSARIF_ValidAndContainsFindings(t *testing.T) {
+	out, err := SARIF(fakeResults())
+	if err != nil {
+		t.Fatalf("SARIF: %v", err)
+	}
+
+	// the output must parse back into the sarif shape
+	var doc sarifLog
+	if err := json.Unmarshal(out, &doc); err != nil {
+		t.Fatalf("sarif output is not valid json: %v", err)
+	}
+
+	if doc.Version != "2.1.0" {
+		t.Errorf("expected sarif version 2.1.0, got %q", doc.Version)
+	}
+	if len(doc.Runs) != 1 {
+		t.Fatalf("expected exactly one run, got %d", len(doc.Runs))
+	}
+	run := doc.Runs[0]
+	if run.Tool.Driver.Name != "sif" {
+		t.Errorf("expected tool name sif, got %q", run.Tool.Driver.Name)
+	}
+	if len(run.Results) != 3 {
+		t.Fatalf("expected 3 results, got %d", len(run.Results))
+	}
+
+	// each finding's module id surfaces as the ruleId and its target as the uri
+	tests := []struct {
+		ruleID string
+		target string
+	}{
+		{"cors", "https://a.example.com"},
+		{"probe", "https://a.example.com"},
+		{"redirect", "https://b.example.com"},
+	}
+	for _, tt := range tests {
+		if !sarifHasResult(run.Results, tt.ruleID, tt.target) {
+			t.Errorf("expected sarif result rule=%q target=%q, got %+v", tt.ruleID, tt.target, run.Results)
+		}
+	}
+
+	// rules list each module id once, deduped across targets
+	if len(run.Tool.Driver.Rules) != 3 {
+		t.Errorf("expected 3 deduped rules, got %d: %+v", len(run.Tool.Driver.Rules), run.Tool.Driver.Rules)
+	}
+}
+
+func TestSARIF_DedupesRulesAcrossTargets(t *testing.T) {
+	// the same module on two targets must yield one rule but two results.
+	results := []Result{
+		{Target: "https://a.example.com", Module: "cors", Data: json.RawMessage(`{}`)},
+		{Target: "https://b.example.com", Module: "cors", Data: json.RawMessage(`{}`)},
+	}
+	out, err := SARIF(results)
+	if err != nil {
+		t.Fatalf("SARIF: %v", err)
+	}
+	var doc sarifLog
+	if err := json.Unmarshal(out, &doc); err != nil {
+		t.Fatalf("invalid json: %v", err)
+	}
+	run := doc.Runs[0]
+	if len(run.Tool.Driver.Rules) != 1 {
+		t.Errorf("expected 1 deduped rule, got %d", len(run.Tool.Driver.Rules))
+	}
+	if len(run.Results) != 2 {
+		t.Errorf("expected 2 results, got %d", len(run.Results))
+	}
+}
+
+func TestSARIF_Empty(t *testing.T) {
+	out, err := SARIF(nil)
+	if err != nil {
+		t.Fatalf("SARIF: %v", err)
+	}
+	var doc sarifLog
+	if err := json.Unmarshal(out, &doc); err != nil {
+		t.Fatalf("empty sarif is not valid json: %v", err)
+	}
+	if len(doc.Runs) != 1 {
+		t.Fatalf("expected one run even when empty, got %d", len(doc.Runs))
+	}
+	if len(doc.Runs[0].Results) != 0 {
+		t.Errorf("expected no results, got %d", len(doc.Runs[0].Results))
+	}
+}
+
+func TestMarkdown_ContainsTargetsAndModules(t *testing.T) {
+	out := string(Markdown(fakeResults()))
+
+	wants := []string{
+		"# sif scan report",
+		"## https://a.example.com",
+		"## https://b.example.com",
+		"### cors",
+		"### probe",
+		"### redirect",
+		`"severity": "high"`, // re-indented finding body
+		`"parameter": "next"`,
+	}
+	for _, want := range wants {
+		if !strings.Contains(out, want) {
+			t.Errorf("markdown report missing %q\n---\n%s", want, out)
+		}
+	}
+}
+
+func TestMarkdown_GroupsByTarget(t *testing.T) {
+	// a.example.com's two modules must both appear before b.example.com's header.
+	out := string(Markdown(fakeResults()))
+	aHeader := strings.Index(out, "## https://a.example.com")
+	bHeader := strings.Index(out, "## https://b.example.com")
+	if aHeader < 0 || bHeader < 0 {
+		t.Fatalf("missing target headers in:\n%s", out)
+	}
+	if aHeader > bHeader {
+		t.Errorf("expected target a before target b, got a=%d b=%d", aHeader, bHeader)
+	}
+	// both of a's modules sit between a's header and b's header
+	corsIdx := strings.Index(out, "### cors")
+	probeIdx := strings.Index(out, "### probe")
+	if corsIdx < aHeader || corsIdx > bHeader || probeIdx < aHeader || probeIdx > bHeader {
+		t.Errorf("expected a's modules grouped under a, cors=%d probe=%d (a=%d b=%d)", corsIdx, probeIdx, aHeader, bHeader)
+	}
+}
+
+// sarifHasResult reports whether any result carries the given rule id and target
+// uri, the pairing that proves a finding survived serialization.
+func sarifHasResult(results []sarifResult, ruleID, target string) bool {
+	for i := 0; i < len(results); i++ {
+		r := results[i]
+		if r.RuleID != ruleID {
+			continue
+		}
+		for j := 0; j < len(r.Locations); j++ {
+			if r.Locations[j].PhysicalLocation.ArtifactLocation.URI == target {
+				return true
+			}
+		}
+	}
+	return false
+}
@@ -0,0 +1,133 @@
+/*
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+:                                                                               :
+:   █▀ █ █▀▀   ·   Blazing-fast pentesting suite                                :
+:   ▄█ █ █▀    ·   BSD 3-Clause License                                         :
+:                                                                               :
+:   (c) 2022-2026 vmfunc, xyzeva,                                               :
+:                 lunchcat alumni & contributors                                :
+:                                                                               :
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+*/
+
+package report
+
+import (
+	"encoding/json"
+	"fmt"
+)
+
+// sarif format/version constants pinned to the 2.1.0 schema so the output is
+// ingestable by github code scanning and other sarif consumers.
+const (
+	sarifVersion = "2.1.0"
+	sarifSchema  = "https://json.schemastore.org/sarif-2.1.0.json"
+	toolName     = "sif"
+)
+
+// sarifLog is the minimal valid 2.1.0 shape: one run from one tool.
+type sarifLog struct {
+	Schema  string     `json:"$schema"`
+	Version string     `json:"version"`
+	Runs    []sarifRun `json:"runs"`
+}
+
+type sarifRun struct {
+	Tool    sarifTool     `json:"tool"`
+	Results []sarifResult `json:"results"`
+}
+
+type sarifTool struct {
+	Driver sarifDriver `json:"driver"`
+}
+
+type sarifDriver struct {
+	Name  string      `json:"name"`
+	Rules []sarifRule `json:"rules"`
+}
+
+type sarifRule struct {
+	ID string `json:"id"`
+}
+
+type sarifResult struct {
+	RuleID    string          `json:"ruleId"`
+	Level     string          `json:"level"`
+	Message   sarifMessage    `json:"message"`
+	Locations []sarifLocation `json:"locations"`
+}
+
+type sarifMessage struct {
+	Text string `json:"text"`
+}
+
+type sarifLocation struct {
+	PhysicalLocation sarifPhysicalLocation `json:"physicalLocation"`
+}
+
+type sarifPhysicalLocation struct {
+	ArtifactLocation sarifArtifactLocation `json:"artifactLocation"`
+}
+
+type sarifArtifactLocation struct {
+	URI string `json:"uri"`
+}
+
+// sarifLevel is the default severity for findings; sif results don't carry a
+// uniform severity field, so "warning" is the neutral middle ground.
+const sarifLevel = "warning"
+
+// SARIF serializes results to a minimal valid sarif 2.1.0 log. Each module
+// result becomes one sarif result tagged with its module id (the rule) and the
+// target uri, with the raw module data inlined into the message for context.
+func SARIF(results []Result) ([]byte, error) {
+	sarifResults := make([]sarifResult, 0, len(results))
+	ruleSet := make(map[string]struct{}, len(results))
+
+	for i := 0; i < len(results); i++ {
+		res := results[i]
+		ruleSet[res.Module] = struct{}{}
+
+		sarifResults = append(sarifResults, sarifResult{
+			RuleID:  res.Module,
+			Level:   sarifLevel,
+			Message: sarifMessage{Text: messageFor(res)},
+			Locations: []sarifLocation{{
+				PhysicalLocation: sarifPhysicalLocation{
+					ArtifactLocation: sarifArtifactLocation{URI: res.Target},
+				},
+			}},
+		})
+	}
+
+	// rules must list each id exactly once; build it from the set so duplicate
+	// modules across targets don't duplicate the rule.
+	rules := make([]sarifRule, 0, len(ruleSet))
+	for id := range ruleSet {
+		rules = append(rules, sarifRule{ID: id})
+	}
+
+	doc := sarifLog{
+		Schema:  sarifSchema,
+		Version: sarifVersion,
+		Runs: []sarifRun{{
+			Tool:    sarifTool{Driver: sarifDriver{Name: toolName, Rules: rules}},
+			Results: sarifResults,
+		}},
+	}
+
+	out, err := json.MarshalIndent(doc, "", "  ")
+	if err != nil {
+		return nil, fmt.Errorf("marshal sarif: %w", err)
+	}
+	return out, nil
+}
+
+// messageFor builds a human-readable result message: the module id plus the raw
+// finding json so a sarif viewer shows what was actually found.
+func messageFor(res Result) string {
+	if len(res.Data) == 0 {
+		return fmt.Sprintf("%s finding on %s", res.Module, res.Target)
+	}
+	return fmt.Sprintf("%s finding on %s: %s", res.Module, res.Target, string(res.Data))
+}
@@ -16,8 +16,12 @@ import (
 	"bufio"
 	"context"
 	"fmt"
+	"io"
 	"net/http"
+	"os"
+	"regexp"
 	"strconv"
+	"strings"
 	"sync"
 	"time"

@@ -36,13 +40,342 @@ const (
 	bigFile    = "directory-list-2.3-big.txt"
 )

+// dirlistBodyCap bounds how many bytes we read per response before computing
+// size/word counts. modern apps stream large html; capping keeps memory flat
+// and makes size/word matching deterministic against arbitrarily large bodies.
+const dirlistBodyCap = 512 * 1024
+
+// soft-404 calibration probes. we ask for a handful of deterministic paths that
+// cannot exist, then treat any response shape they share as the wildcard
+// baseline. deterministic (no rng) so the workflow stays reproducible.
+const (
+	calibrationProbes = 3
+	calibrationPrefix = "/sif-cal-"
+)
+
+// statusNotFound / statusForbidden are the historical default "not interesting"
+// codes; they seed the filter set when no explicit -mc/-fc is given.
+const (
+	statusNotFound  = 404
+	statusForbidden = 403
+)
+
 type DirectoryResult struct {
 	Url        string `json:"url"`
 	StatusCode int    `json:"status_code"`
+	Size       int    `json:"size"`
+	Words      int    `json:"words"`
 }

-// Dirlist performs directory fuzzing on the target URL.
-func Dirlist(size string, url string, timeout time.Duration, threads int, logdir string) ([]DirectoryResult, error) {
+// DirlistOptions carries the ffuf-style matcher knobs. the zero value reproduces
+// the legacy behavior (report everything that isn't 404/403), so callers that
+// don't set anything keep the old defaults.
+type DirlistOptions struct {
+	MatchCodes  string // -mc comma list of status codes to keep
+	FilterCodes string // -fc comma list of status codes to drop
+	FilterSizes string // -fs comma list of body sizes to drop
+	FilterWords string // -fw comma list of word counts to drop
+	FilterRegex string // -fr regex; a body match drops the response
+	Calibrate   bool   // -ac auto-calibrate the soft-404 wildcard baseline
+	Wordlist    string // -w local path or url; overrides the size switch
+	Extensions  string // -e comma list appended to each word (php,bak,env)
+}
+
+// responseMeta is the shape we match on: just enough of the response to decide
+// keep/drop without holding the whole body.
+type responseMeta struct {
+	status int
+	size   int
+	words  int
+}
+
+// matcher decides whether a response is "interesting" using the same precedence
+// as ffuf/feroxbuster: an explicit filter (-fc/-fs/-fw/-fr or a calibrated
+// baseline) drops the response, otherwise the match-code set decides.
+type matcher struct {
+	matchCodes  map[int]struct{}
+	filterCodes map[int]struct{}
+	filterSizes map[int]struct{}
+	filterWords map[int]struct{}
+	filterRe    *regexp.Regexp
+	baselines   []responseMeta // calibrated soft-404 shapes to suppress
+}
+
+// newMatcher builds the matcher from raw flag strings. when -mc is empty the
+// match set is left nil, which Matches reads as "keep anything not explicitly
+// filtered" - i.e. the legacy behavior minus the hardcoded 404/403, which move
+// into the filter set instead.
+func newMatcher(opts *DirlistOptions) (*matcher, error) {
+	m := &matcher{
+		filterSizes: make(map[int]struct{}),
+		filterWords: make(map[int]struct{}),
+	}
+
+	codes, err := parseIntSet(opts.MatchCodes)
+	if err != nil {
+		return nil, fmt.Errorf("parse -mc: %w", err)
+	}
+	m.matchCodes = codes
+
+	m.filterCodes, err = parseIntSet(opts.FilterCodes)
+	if err != nil {
+		return nil, fmt.Errorf("parse -fc: %w", err)
+	}
+	// no explicit match set means we fall back to the historical "drop 404/403"
+	// behavior; encode it as filters so the rest of the logic is uniform.
+	if len(m.matchCodes) == 0 && len(m.filterCodes) == 0 {
+		m.filterCodes[statusNotFound] = struct{}{}
+		m.filterCodes[statusForbidden] = struct{}{}
+	}
+
+	m.filterSizes, err = parseIntSet(opts.FilterSizes)
+	if err != nil {
+		return nil, fmt.Errorf("parse -fs: %w", err)
+	}
+
+	m.filterWords, err = parseIntSet(opts.FilterWords)
+	if err != nil {
+		return nil, fmt.Errorf("parse -fw: %w", err)
+	}
+
+	if opts.FilterRegex != "" {
+		re, err := regexp.Compile(opts.FilterRegex)
+		if err != nil {
+			return nil, fmt.Errorf("parse -fr: %w", err)
+		}
+		m.filterRe = re
+	}
+
+	return m, nil
+}
+
+// Matches reports whether the response should surface as a finding. filters win
+// over matches: a calibrated baseline, an -fc/-fs/-fw hit, or an -fr body match
+// always drops the response; otherwise the -mc set (when set) gates it.
+func (m *matcher) Matches(meta responseMeta, body []byte) bool {
+	// a calibrated soft-404 shape is the same response the catch-all hands every
+	// bogus path, so drop anything that matches a baseline exactly.
+	for i := 0; i < len(m.baselines); i++ {
+		b := m.baselines[i]
+		if b.status == meta.status && b.size == meta.size && b.words == meta.words {
+			return false
+		}
+	}
+
+	if _, drop := m.filterCodes[meta.status]; drop {
+		return false
+	}
+	if _, drop := m.filterSizes[meta.size]; drop {
+		return false
+	}
+	if _, drop := m.filterWords[meta.words]; drop {
+		return false
+	}
+	if m.filterRe != nil && m.filterRe.Match(body) {
+		return false
+	}
+
+	// an explicit -mc set is allow-list semantics; without it we keep whatever
+	// survived the filters above.
+	if len(m.matchCodes) > 0 {
+		_, keep := m.matchCodes[meta.status]
+		return keep
+	}
+
+	return true
+}
+
+// parseIntSet turns a comma list like "200,301,500" into a set. empty input is a
+// nil set, not an error, so unset flags are a no-op.
+func parseIntSet(raw string) (map[int]struct{}, error) {
+	set := make(map[int]struct{})
+	if raw == "" {
+		return set, nil
+	}
+	for _, part := range strings.Split(raw, ",") {
+		part = strings.TrimSpace(part)
+		if part == "" {
+			continue
+		}
+		n, err := strconv.Atoi(part)
+		if err != nil {
+			return nil, fmt.Errorf("invalid integer %q: %w", part, err)
+		}
+		set[n] = struct{}{}
+	}
+	return set, nil
+}
+
+// readMeta drains the response (capped) and returns its match shape plus the
+// body bytes the regex filter needs. it never returns the raw resp; callers
+// close the body before this returns.
+func readMeta(resp *http.Response) (responseMeta, []byte) {
+	body, err := io.ReadAll(io.LimitReader(resp.Body, dirlistBodyCap))
+	if err != nil {
+		// a truncated/aborted body still has a usable status; treat what we read
+		// as the body rather than dropping the whole response.
+		charmlog.Debugf("dirlist: read body: %v", err)
+	}
+	return responseMeta{
+		status: resp.StatusCode,
+		size:   len(body),
+		words:  countWords(body),
+	}, body
+}
+
+// countWords counts whitespace-separated tokens; the cheap proxy ffuf uses to
+// tell a soft-404 stub apart from a real page of the same byte size.
+func countWords(body []byte) int {
+	return len(strings.Fields(string(body)))
+}
+
+// expandWords appends each extension to every base word, keeping the bare word
+// too. an empty extensions list returns the words unchanged.
+func expandWords(words []string, extensions string) []string {
+	exts := splitExtensions(extensions)
+	if len(exts) == 0 {
+		return words
+	}
+	// each word yields itself plus one entry per extension.
+	expanded := make([]string, 0, len(words)*(len(exts)+1))
+	for i := 0; i < len(words); i++ {
+		expanded = append(expanded, words[i])
+		for j := 0; j < len(exts); j++ {
+			expanded = append(expanded, words[i]+"."+exts[j])
+		}
+	}
+	return expanded
+}
+
+// splitExtensions normalizes "php, .bak ,env" into ["php","bak","env"]; a
+// leading dot is tolerated so both "php" and ".php" work.
+func splitExtensions(raw string) []string {
+	if raw == "" {
+		return nil
+	}
+	parts := strings.Split(raw, ",")
+	exts := make([]string, 0, len(parts))
+	for i := 0; i < len(parts); i++ {
+		ext := strings.TrimSpace(parts[i])
+		ext = strings.TrimPrefix(ext, ".")
+		if ext != "" {
+			exts = append(exts, ext)
+		}
+	}
+	return exts
+}
+
+// loadWordlist reads the fuzzing words. a custom -w overrides the size switch:
+// an http(s) value is fetched through the shared client, anything else is a
+// local file. with no -w it downloads the size-selected sif-runtime list.
+func loadWordlist(opts *DirlistOptions, size string, client *http.Client) ([]string, error) {
+	if opts.Wordlist != "" {
+		if strings.HasPrefix(opts.Wordlist, "http://") || strings.HasPrefix(opts.Wordlist, "https://") {
+			return fetchWordlist(opts.Wordlist, client)
+		}
+		return readWordlistFile(opts.Wordlist)
+	}
+
+	var file string
+	switch size {
+	case "small":
+		file = smallFile
+	case "medium":
+		file = mediumFile
+	case "large":
+		file = bigFile
+	default:
+		return nil, fmt.Errorf("unknown dirlist size %q", size)
+	}
+	return fetchWordlist(directoryURL+file, client)
+}
+
+// fetchWordlist downloads a remote wordlist through the shared client so proxy
+// and rate-limit settings apply to the fetch too.
+func fetchWordlist(listURL string, client *http.Client) ([]string, error) {
+	req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, listURL, http.NoBody)
+	if err != nil {
+		return nil, fmt.Errorf("build wordlist request: %w", err)
+	}
+	resp, err := client.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("download wordlist %q: %w", listURL, err)
+	}
+	defer resp.Body.Close()
+	return scanLines(resp.Body), nil
+}
+
+// readWordlistFile loads a local wordlist file.
+func readWordlistFile(path string) ([]string, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		return nil, fmt.Errorf("open wordlist %q: %w", path, err)
+	}
+	defer f.Close()
+	return scanLines(f), nil
+}
+
+// scanLines reads non-empty lines into a slice.
+func scanLines(r io.Reader) []string {
+	var lines []string
+	scanner := bufio.NewScanner(r)
+	scanner.Split(bufio.ScanLines)
+	for scanner.Scan() {
+		line := scanner.Text()
+		if line != "" {
+			lines = append(lines, line)
+		}
+	}
+	return lines
+}
+
+// calibrate probes a few paths that cannot exist and records the response shapes
+// the catch-all hands them. those baselines feed the matcher so a soft-404 200
+// (the SPA wildcard) is suppressed before the real run. deterministic by design:
+// the probe paths come from the loop index, never a random source.
+func calibrate(m *matcher, baseURL string, client *http.Client) {
+	for i := 0; i < calibrationProbes; i++ {
+		probe := baseURL + calibrationPrefix + strconv.Itoa(i)
+		req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, probe, http.NoBody)
+		if err != nil {
+			charmlog.Debugf("dirlist: build calibration request: %v", err)
+			continue
+		}
+		resp, err := client.Do(req)
+		if err != nil {
+			charmlog.Debugf("dirlist: calibration probe %s: %v", probe, err)
+			continue
+		}
+		meta, _ := readMeta(resp)
+		resp.Body.Close()
+
+		// a genuine hard 404 already gets filtered by code; only soft responses
+		// (a 200/30x catch-all) need a size/word baseline to suppress them.
+		if meta.status == statusNotFound {
+			continue
+		}
+		if !containsBaseline(m.baselines, meta) {
+			m.baselines = append(m.baselines, meta)
+		}
+	}
+}
+
+// containsBaseline reports whether the shape is already recorded, so repeated
+// probes returning the same soft-404 don't bloat the baseline set.
+func containsBaseline(baselines []responseMeta, meta responseMeta) bool {
+	for i := 0; i < len(baselines); i++ {
+		if baselines[i] == meta {
+			return true
+		}
+	}
+	return false
+}
+
+// Dirlist performs directory fuzzing on the target URL with ffuf-style response
+// filtering, soft-404 calibration and custom wordlists.
+//
+//nolint:gocritic // opts is the scanner's stable public config; passed by value to match the other scanners' entry points.
+func Dirlist(size string, url string, timeout time.Duration, threads int, logdir string, opts DirlistOptions) (DirectoryResults, error) {
 	log := output.Module("DIRLIST")
 	log.Start()

@@ -55,35 +388,27 @@ func Dirlist(size string, url string, timeout time.Duration, threads int, logdir
 		}
 	}

-	var list string
-	switch size {
-	case "small":
-		list = directoryURL + smallFile
-	case "medium":
-		list = directoryURL + mediumFile
-	case "large":
-		list = directoryURL + bigFile
+	matcher, err := newMatcher(&opts)
+	if err != nil {
+		log.Error("invalid matcher flags: %v", err)
+		return nil, err
 	}

 	client := httpx.Client(timeout)

-	req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, list, http.NoBody)
+	directories, err := loadWordlist(&opts, size, client)
 	if err != nil {
-		log.Error("Error creating directory list request: %s", err)
+		log.Error("Error loading directory list: %s", err)
 		return nil, err
 	}
-	resp, err := client.Do(req)
-	if err != nil {
-		log.Error("Error downloading directory list: %s", err)
-		return nil, err
-	}
-	defer resp.Body.Close()
+	directories = expandWords(directories, opts.Extensions)

-	var directories []string
-	scanner := bufio.NewScanner(resp.Body)
-	scanner.Split(bufio.ScanLines)
-	for scanner.Scan() {
-		directories = append(directories, scanner.Text())
+	// -ac learns the wildcard baseline before the run so catch-all 200s drop.
+	if opts.Calibrate {
+		calibrate(matcher, url, client)
+		if len(matcher.baselines) > 0 {
+			log.Info("calibrated %d soft-404 baseline(s)", len(matcher.baselines))
+		}
 	}

 	progress := output.NewProgress(len(directories), "fuzzing")
@@ -92,7 +417,7 @@ func Dirlist(size string, url string, timeout time.Duration, threads int, logdir
 	var mu sync.Mutex
 	wg.Add(threads)

-	results := make([]DirectoryResult, 0, 64)
+	results := make(DirectoryResults, 0, 64)
 	for thread := 0; thread < threads; thread++ {
 		go func(thread int) {
 			defer wg.Done()
@@ -116,24 +441,35 @@ func Dirlist(size string, url string, timeout time.Duration, threads int, logdir
 					continue
 				}

-				if resp.StatusCode != 404 && resp.StatusCode != 403 {
-					progress.Pause()
-					log.Success("found: %s [%s]", output.Highlight.Render(directory), output.Status.Render(strconv.Itoa(resp.StatusCode)))
-					progress.Resume()
-
-					if logdir != "" {
-						_ = logger.Write(sanitizedURL, logdir, fmt.Sprintf("%s [%s]\n", strconv.Itoa(resp.StatusCode), directory))
-					}
-
-					result := DirectoryResult{
-						Url:        resp.Request.URL.String(),
-						StatusCode: resp.StatusCode,
-					}
-					mu.Lock()
-					results = append(results, result)
-					mu.Unlock()
-				}
+				meta, body := readMeta(resp)
+				reqURL := resp.Request.URL.String()
 				resp.Body.Close()
+
+				if !matcher.Matches(meta, body) {
+					continue
+				}
+
+				progress.Pause()
+				log.Success("found: %s [%s] (size=%d words=%d)",
+					output.Highlight.Render(directory),
+					output.Status.Render(strconv.Itoa(meta.status)),
+					meta.size, meta.words)
+				progress.Resume()
+
+				if logdir != "" {
+					_ = logger.Write(sanitizedURL, logdir,
+						fmt.Sprintf("%s [%s] size=%d words=%d\n", strconv.Itoa(meta.status), directory, meta.size, meta.words))
+				}
+
+				result := DirectoryResult{
+					Url:        reqURL,
+					StatusCode: meta.status,
+					Size:       meta.size,
+					Words:      meta.words,
+				}
+				mu.Lock()
+				results = append(results, result)
+				mu.Unlock()
 			}
 		}(thread)
 	}
@@ -0,0 +1,360 @@
+/*
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+:                                                                               :
+:   █▀ █ █▀▀   ·   Blazing-fast pentesting suite                                :
+:   ▄█ █ █▀    ·   BSD 3-Clause License                                         :
+:                                                                               :
+:   (c) 2022-2026 vmfunc, xyzeva,                                               :
+:                 lunchcat alumni & contributors                                :
+:                                                                               :
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+*/
+
+package scan
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"path/filepath"
+	"reflect"
+	"strings"
+	"testing"
+	"time"
+)
+
+func TestMatcher_Matches(t *testing.T) {
+	tests := []struct {
+		name string
+		opts DirlistOptions
+		meta responseMeta
+		body string
+		want bool
+	}{
+		{
+			// default behavior: 404/403 drop, everything else surfaces
+			name: "default keeps 200",
+			opts: DirlistOptions{},
+			meta: responseMeta{status: 200, size: 10, words: 2},
+			want: true,
+		},
+		{
+			name: "default drops 404",
+			opts: DirlistOptions{},
+			meta: responseMeta{status: 404, size: 9, words: 1},
+			want: false,
+		},
+		{
+			name: "default drops 403",
+			opts: DirlistOptions{},
+			meta: responseMeta{status: 403, size: 9, words: 1},
+			want: false,
+		},
+		{
+			// -mc is allow-list: only listed codes survive
+			name: "mc allowlist keeps listed",
+			opts: DirlistOptions{MatchCodes: "200,301"},
+			meta: responseMeta{status: 301, size: 0, words: 0},
+			want: true,
+		},
+		{
+			name: "mc allowlist drops unlisted 200 already excluded",
+			opts: DirlistOptions{MatchCodes: "301"},
+			meta: responseMeta{status: 200, size: 5, words: 1},
+			want: false,
+		},
+		{
+			name: "fc drops listed code",
+			opts: DirlistOptions{FilterCodes: "500"},
+			meta: responseMeta{status: 500, size: 5, words: 1},
+			want: false,
+		},
+		{
+			// with an explicit -fc and no -mc, the implicit 404/403 filter is not
+			// added, so a 200 still surfaces
+			name: "fc leaves others",
+			opts: DirlistOptions{FilterCodes: "500"},
+			meta: responseMeta{status: 200, size: 5, words: 1},
+			want: true,
+		},
+		{
+			name: "fs drops listed size",
+			opts: DirlistOptions{FilterSizes: "1024"},
+			meta: responseMeta{status: 200, size: 1024, words: 50},
+			want: false,
+		},
+		{
+			name: "fw drops listed word count",
+			opts: DirlistOptions{FilterWords: "7"},
+			meta: responseMeta{status: 200, size: 40, words: 7},
+			want: false,
+		},
+		{
+			name: "fr drops body match",
+			opts: DirlistOptions{FilterRegex: "not found"},
+			meta: responseMeta{status: 200, size: 9, words: 2},
+			body: "page not found",
+			want: false,
+		},
+		{
+			name: "fr keeps non-match",
+			opts: DirlistOptions{FilterRegex: "not found"},
+			meta: responseMeta{status: 200, size: 5, words: 1},
+			body: "welcome",
+			want: true,
+		},
+		{
+			// filter precedence: -mc would keep it, but a size filter drops it
+			name: "filter wins over match",
+			opts: DirlistOptions{MatchCodes: "200", FilterSizes: "12"},
+			meta: responseMeta{status: 200, size: 12, words: 3},
+			want: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			m, err := newMatcher(&tt.opts)
+			if err != nil {
+				t.Fatalf("newMatcher: %v", err)
+			}
+			if got := m.Matches(tt.meta, []byte(tt.body)); got != tt.want {
+				t.Errorf("Matches(%+v, %q) = %v, want %v", tt.meta, tt.body, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestMatcher_BaselineSuppresses(t *testing.T) {
+	m, err := newMatcher(&DirlistOptions{})
+	if err != nil {
+		t.Fatalf("newMatcher: %v", err)
+	}
+	// a calibrated soft-404 shape drops an identical response
+	m.baselines = []responseMeta{{status: 200, size: 42, words: 5}}
+
+	soft := responseMeta{status: 200, size: 42, words: 5}
+	if m.Matches(soft, nil) {
+		t.Error("baseline-matching response should be suppressed")
+	}
+	// a real page with a different size must still surface
+	livePage := responseMeta{status: 200, size: 99, words: 12}
+	if !m.Matches(livePage, nil) {
+		t.Error("distinct response should not be suppressed by baseline")
+	}
+}
+
+func TestNewMatcher_InvalidFlags(t *testing.T) {
+	tests := []struct {
+		name string
+		opts DirlistOptions
+	}{
+		{"bad mc", DirlistOptions{MatchCodes: "abc"}},
+		{"bad fc", DirlistOptions{FilterCodes: "20x"}},
+		{"bad fs", DirlistOptions{FilterSizes: "big"}},
+		{"bad fw", DirlistOptions{FilterWords: "-"}},
+		{"bad regex", DirlistOptions{FilterRegex: "("}},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if _, err := newMatcher(&tt.opts); err == nil {
+				t.Errorf("newMatcher(%+v) expected error, got nil", tt.opts)
+			}
+		})
+	}
+}
+
+func TestExpandWords(t *testing.T) {
+	tests := []struct {
+		name  string
+		words []string
+		exts  string
+		want  []string
+	}{
+		{
+			name:  "no extensions unchanged",
+			words: []string{"admin", "login"},
+			exts:  "",
+			want:  []string{"admin", "login"},
+		},
+		{
+			name:  "appends each extension and keeps bare",
+			words: []string{"config"},
+			exts:  "php,bak,env",
+			want:  []string{"config", "config.php", "config.bak", "config.env"},
+		},
+		{
+			name:  "tolerates leading dot and spaces",
+			words: []string{"db"},
+			exts:  " .sql , bak ",
+			want:  []string{"db", "db.sql", "db.bak"},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := expandWords(tt.words, tt.exts)
+			if !reflect.DeepEqual(got, tt.want) {
+				t.Errorf("expandWords(%v, %q) = %v, want %v", tt.words, tt.exts, got, tt.want)
+			}
+		})
+	}
+}
+
+// softWildcardApp serves a couple of real paths and a catch-all soft-404: every
+// unknown path returns a fixed 200 body, the SPA pattern that floods dirlist.
+func softWildcardApp() *httptest.Server {
+	const softBody = "<html><body>app shell - route handled client side</body></html>"
+	mux := http.NewServeMux()
+	mux.HandleFunc("/admin", func(w http.ResponseWriter, r *http.Request) {
+		w.Write([]byte("<html><body>admin control panel dashboard here</body></html>"))
+	})
+	mux.HandleFunc("/login", func(w http.ResponseWriter, r *http.Request) {
+		w.Write([]byte("<html><body>please sign in with your account credentials now</body></html>"))
+	})
+	// catch-all: anything else gets the identical soft-404 shell
+	mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path == "/admin" || r.URL.Path == "/login" {
+			return
+		}
+		w.Write([]byte(softBody))
+	})
+	return httptest.NewServer(mux)
+}
+
+func TestDirlist_CalibrationSuppressesWildcard(t *testing.T) {
+	srv := softWildcardApp()
+	defer srv.Close()
+
+	// the wordlist mixes the two real paths with several bogus ones the catch-all
+	// answers with the soft-404 shell.
+	dir := t.TempDir()
+	wordlist := filepath.Join(dir, "words.txt")
+	if err := os.WriteFile(wordlist, []byte("admin\nlogin\nnope\nbogus\nmissing\n"), 0o600); err != nil {
+		t.Fatalf("write wordlist: %v", err)
+	}
+
+	// without calibration every bogus path is a soft-404 200 and floods output
+	noAC, err := Dirlist("small", srv.URL, 5*time.Second, 3, "", DirlistOptions{Wordlist: wordlist})
+	if err != nil {
+		t.Fatalf("Dirlist (no -ac): %v", err)
+	}
+	if len(noAC) < 5 {
+		t.Fatalf("expected the wildcard to flood all 5 paths without -ac, got %d", len(noAC))
+	}
+
+	// with -ac the soft-404 baseline is learned and the bogus paths drop
+	withAC, err := Dirlist("small", srv.URL, 5*time.Second, 3, "", DirlistOptions{
+		Wordlist:  wordlist,
+		Calibrate: true,
+	})
+	if err != nil {
+		t.Fatalf("Dirlist (-ac): %v", err)
+	}
+
+	got := pathSet(withAC)
+	if !has(got, "/admin") || !has(got, "/login") {
+		t.Errorf("real paths admin/login must still surface with -ac, got %v", sortedKeys(got))
+	}
+	for _, bogus := range []string{"/nope", "/bogus", "/missing"} {
+		if has(got, bogus) {
+			t.Errorf("soft-404 path %s should be suppressed by -ac, got %v", bogus, sortedKeys(got))
+		}
+	}
+}
+
+func TestDirlist_ExtensionExpansion(t *testing.T) {
+	// the server only answers config.php; the bare word and other extensions hit
+	// the catch-all soft-404, so -e must be what surfaces config.php.
+	const realBody = "<?php // database connection settings live here ?>"
+	mux := http.NewServeMux()
+	mux.HandleFunc("/config.php", func(w http.ResponseWriter, r *http.Request) {
+		w.Write([]byte(realBody))
+	})
+	mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
+		http.NotFound(w, r) // hard 404 for everything but config.php
+	})
+	srv := httptest.NewServer(mux)
+	defer srv.Close()
+
+	dir := t.TempDir()
+	wordlist := filepath.Join(dir, "words.txt")
+	if err := os.WriteFile(wordlist, []byte("config\n"), 0o600); err != nil {
+		t.Fatalf("write wordlist: %v", err)
+	}
+
+	results, err := Dirlist("small", srv.URL, 5*time.Second, 2, "", DirlistOptions{
+		Wordlist:   wordlist,
+		Extensions: "php,bak",
+	})
+	if err != nil {
+		t.Fatalf("Dirlist: %v", err)
+	}
+
+	got := pathSet(results)
+	if !has(got, "/config.php") {
+		t.Errorf("expected -e to surface config.php, got %v", sortedKeys(got))
+	}
+	if has(got, "/config") || has(got, "/config.bak") {
+		t.Errorf("only config.php exists; bare word and .bak are 404s, got %v", sortedKeys(got))
+	}
+}
+
+func TestDirlist_LocalWordlistOverridesSize(t *testing.T) {
+	// a local -w must be used verbatim and never touch directoryURL; point the
+	// remote at a sink that fails the test if it's ever hit.
+	orig := directoryURL
+	directoryURL = "http://127.0.0.1:0/should-not-be-fetched/"
+	defer func() { directoryURL = orig }()
+
+	mux := http.NewServeMux()
+	mux.HandleFunc("/secret", func(w http.ResponseWriter, r *http.Request) {
+		w.Write([]byte("<html>top secret area found</html>"))
+	})
+	mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
+		http.NotFound(w, r)
+	})
+	srv := httptest.NewServer(mux)
+	defer srv.Close()
+
+	dir := t.TempDir()
+	wordlist := filepath.Join(dir, "custom.txt")
+	if err := os.WriteFile(wordlist, []byte("secret\nabsent\n"), 0o600); err != nil {
+		t.Fatalf("write wordlist: %v", err)
+	}
+
+	results, err := Dirlist("large", srv.URL, 5*time.Second, 2, "", DirlistOptions{Wordlist: wordlist})
+	if err != nil {
+		t.Fatalf("Dirlist: %v", err)
+	}
+
+	got := pathSet(results)
+	if !has(got, "/secret") {
+		t.Errorf("expected the custom wordlist to find /secret, got %v", sortedKeys(got))
+	}
+	if has(got, "/absent") {
+		t.Errorf("/absent is a 404 and should not surface, got %v", sortedKeys(got))
+	}
+}
+
+// pathSet collects each result's url path for membership checks. it reuses the
+// package-level sortedKeys (crawl.go) for deterministic failure output.
+func pathSet(results DirectoryResults) map[string]struct{} {
+	set := make(map[string]struct{}, len(results))
+	for i := 0; i < len(results); i++ {
+		if idx := strings.Index(results[i].Url, "://"); idx >= 0 {
+			rest := results[i].Url[idx+len("://"):]
+			if slash := strings.Index(rest, "/"); slash >= 0 {
+				set[rest[slash:]] = struct{}{}
+				continue
+			}
+		}
+		set[results[i].Url] = struct{}{}
+	}
+	return set
+}
+
+// has is a tiny readability helper for set membership in assertions.
+func has(set map[string]struct{}, key string) bool {
+	_, ok := set[key]
+	return ok
+}
@@ -39,6 +39,23 @@ const (
 	dnsBigFile    = "subdomains-10000.txt"
 )

+// dnsScheme labels which url won a subdomain so we don't probe the second
+// scheme once the first already counted it.
+type dnsScheme string
+
+const (
+	dnsSchemeHTTP  dnsScheme = "http"
+	dnsSchemeHTTPS dnsScheme = "https"
+)
+
+// meaningfulStatus reports whether a probe response is a real "this host
+// exists" signal rather than a 404 or a wildcard catch-all redirect. a
+// wildcard-DNS host answers every candidate with the same redirect/404, so
+// gating on a successful, non-redirect status keeps it from flooding results.
+func meaningfulStatus(code int) bool {
+	return code >= http.StatusOK && code < http.StatusMultipleChoices
+}
+
 // Dnslist performs DNS subdomain enumeration on the target domain.
 func Dnslist(size string, url string, timeout time.Duration, threads int, logdir string) ([]string, error) {
 	log := output.Module("DNS")
@@ -88,6 +105,12 @@ func Dnslist(size string, url string, timeout time.Duration, threads int, logdir
 	if dnsTransport != nil {
 		client.Transport = dnsTransport
 	}
+	// don't chase redirects: a wildcard catch-all that 301s every candidate to
+	// the same landing page must read as a redirect status, not a 200, so it
+	// gets gated out instead of counting as a found host.
+	client.CheckRedirect = func(req *http.Request, via []*http.Request) error {
+		return http.ErrUseLastResponse
+	}

 	progress := output.NewProgress(len(dns), "enumerating")

@@ -109,52 +132,25 @@ func Dnslist(size string, url string, timeout time.Duration, threads int, logdir

 				charmlog.Debugf("Looking up: %s", domain)

-				// Check HTTP
-				httpReq, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, "http://"+domain+"."+sanitizedURL, http.NoBody)
-				if err != nil {
-					charmlog.Debugf("Error %s: %s", domain, err)
+				// probe http first, then https - but a subdomain is recorded at
+				// most once. firing both schemes and appending on each is what
+				// double-counted every host on the old path.
+				host := domain + "." + sanitizedURL
+				foundURL, scheme := probeSubdomain(client, host)
+				if foundURL == "" {
 					continue
 				}
-				resp, err := client.Do(httpReq)
-				if err != nil {
-					charmlog.Debugf("Error %s: %s", domain, err)
-				} else {
-					mu.Lock()
-					urls = append(urls, resp.Request.URL.String())
-					mu.Unlock()
-					resp.Body.Close()

-					progress.Pause()
-					log.Success("found: %s.%s [http]", output.Highlight.Render(domain), sanitizedURL)
-					progress.Resume()
+				mu.Lock()
+				urls = append(urls, foundURL)
+				mu.Unlock()

-					if logdir != "" {
-						logger.Write(sanitizedURL, logdir, fmt.Sprintf("[http] %s.%s\n", domain, sanitizedURL))
-					}
-				}
+				progress.Pause()
+				log.Success("found: %s [%s]", output.Highlight.Render(host), scheme)
+				progress.Resume()

-				// Check HTTPS
-				httpsReq, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, "https://"+domain+"."+sanitizedURL, http.NoBody)
-				if err != nil {
-					charmlog.Debugf("Error %s: %s", domain, err)
-					continue
-				}
-				resp, err = client.Do(httpsReq)
-				if err != nil {
-					charmlog.Debugf("Error %s: %s", domain, err)
-				} else {
-					mu.Lock()
-					urls = append(urls, resp.Request.URL.String())
-					mu.Unlock()
-					resp.Body.Close()
-
-					progress.Pause()
-					log.Success("found: %s.%s [https]", output.Highlight.Render(domain), sanitizedURL)
-					progress.Resume()
-
-					if logdir != "" {
-						_ = logger.Write(sanitizedURL, logdir, fmt.Sprintf("[https] %s.%s\n", domain, sanitizedURL))
-					}
+				if logdir != "" {
+					_ = logger.Write(sanitizedURL, logdir, fmt.Sprintf("[%s] %s\n", scheme, host))
 				}
 			}
 		}(thread)
@@ -166,3 +162,40 @@ func Dnslist(size string, url string, timeout time.Duration, threads int, logdir

 	return urls, nil
 }
+
+// probeSubdomain tries http then https for one host and returns the resolved
+// url + winning scheme on the first meaningful hit, or "" if neither scheme
+// gave a real signal. trying https only when http didn't already count is the
+// per-subdomain dedupe.
+func probeSubdomain(client *http.Client, host string) (string, dnsScheme) {
+	schemes := []struct {
+		prefix string
+		label  dnsScheme
+	}{
+		{"http://", dnsSchemeHTTP},
+		{"https://", dnsSchemeHTTPS},
+	}
+
+	for i := 0; i < len(schemes); i++ {
+		req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, schemes[i].prefix+host, http.NoBody)
+		if err != nil {
+			charmlog.Debugf("Error %s: %s", host, err)
+			continue
+		}
+		resp, err := client.Do(req)
+		if err != nil {
+			charmlog.Debugf("Error %s: %s", host, err)
+			continue
+		}
+		code := resp.StatusCode
+		resolved := resp.Request.URL.String()
+		resp.Body.Close()
+
+		if meaningfulStatus(code) {
+			return resolved, schemes[i].label
+		}
+		charmlog.Debugf("skip %s [%s]: status %d", host, schemes[i].label, code)
+	}
+
+	return "", ""
+}
@@ -0,0 +1,98 @@
+/*
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+:                                                                               :
+:   █▀ █ █▀▀   ·   Blazing-fast pentesting suite                                :
+:   ▄█ █ █▀    ·   BSD 3-Clause License                                         :
+:                                                                               :
+:   (c) 2022-2026 vmfunc, xyzeva,                                               :
+:                 lunchcat alumni & contributors                                :
+:                                                                               :
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+*/
+
+package scan
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"sync/atomic"
+	"testing"
+	"time"
+)
+
+func TestMeaningfulStatus(t *testing.T) {
+	tests := []struct {
+		name string
+		code int
+		want bool
+	}{
+		{"ok counts", http.StatusOK, true},
+		{"204 counts", http.StatusNoContent, true},
+		{"301 catch-all redirect dropped", http.StatusMovedPermanently, false},
+		{"302 catch-all redirect dropped", http.StatusFound, false},
+		{"404 dropped", http.StatusNotFound, false},
+		{"500 dropped", http.StatusInternalServerError, false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := meaningfulStatus(tt.code); got != tt.want {
+				t.Errorf("meaningfulStatus(%d) = %v, want %v", tt.code, got, tt.want)
+			}
+		})
+	}
+}
+
+// a host that answers 200 over http should count exactly once, not once per
+// scheme - the old path appended on both http and https.
+func TestProbeSubdomain_DedupesAcrossSchemes(t *testing.T) {
+	var hits int32
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		atomic.AddInt32(&hits, 1)
+		w.WriteHeader(http.StatusOK)
+	}))
+	defer srv.Close()
+
+	host := strings.TrimPrefix(srv.URL, "http://")
+	client := &http.Client{Timeout: 5 * time.Second}
+
+	url, scheme := probeSubdomain(client, host)
+	if url == "" {
+		t.Fatal("expected http probe to count the host")
+	}
+	if scheme != dnsSchemeHTTP {
+		t.Errorf("expected http scheme to win, got %q", scheme)
+	}
+	// http already counted, so https must not be tried - one request total.
+	if got := atomic.LoadInt32(&hits); got != 1 {
+		t.Errorf("expected exactly 1 probe request, got %d", got)
+	}
+}
+
+// a wildcard catch-all that 404s (or 301s) every candidate must not be reported
+// as found - that's the flood the gating closes.
+func TestProbeSubdomain_WildcardCatchAllNotFound(t *testing.T) {
+	for _, code := range []int{http.StatusNotFound, http.StatusMovedPermanently} {
+		srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if code == http.StatusMovedPermanently {
+				w.Header().Set("Location", "https://catch-all.example/")
+			}
+			w.WriteHeader(code)
+		}))
+
+		host := strings.TrimPrefix(srv.URL, "http://")
+		client := &http.Client{
+			Timeout: 5 * time.Second,
+			CheckRedirect: func(req *http.Request, via []*http.Request) error {
+				return http.ErrUseLastResponse
+			},
+		}
+
+		url, _ := probeSubdomain(client, host)
+		if url != "" {
+			t.Errorf("status %d should not count as found, got %q", code, url)
+		}
+		srv.Close()
+	}
+}
@@ -14,6 +14,57 @@ package frameworks

 import "testing"

+// the detector usually reports "unknown"; the version dug out of the body must
+// win so the cve lookup runs against a concrete version instead of "unknown".
+func TestResolveVersion(t *testing.T) {
+	tests := []struct {
+		name      string
+		detector  string
+		extracted string
+		want      string
+	}{
+		{"detector concrete wins", "9.0.0", "8.4.1", "9.0.0"},
+		{"unknown detector falls back to extracted", "unknown", "8.4.1", "8.4.1"},
+		{"empty detector falls back to extracted", "", "8.4.1", "8.4.1"},
+		{"both unknown stays unknown", "unknown", "unknown", "unknown"},
+		{"both empty/unknown stays unknown", "", "", "unknown"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := resolveVersion(tt.detector, tt.extracted); got != tt.want {
+				t.Errorf("resolveVersion(%q, %q) = %q, want %q", tt.detector, tt.extracted, got, tt.want)
+			}
+		})
+	}
+}
+
+// the regression itself: with the detector reporting "unknown" but a real
+// version extractable from the body, the cve lookup must use the extracted
+// version and surface the matching CVE - the old path looked up "unknown" and
+// missed it.
+func TestResolveVersionFeedsCVELookup(t *testing.T) {
+	const body = "Laravel 8.4.1"
+
+	// extractor pulls the concrete version out of the body...
+	extracted := ExtractVersionOptimized(body, "Laravel").Version
+	if extracted != "8.4.1" {
+		t.Fatalf("expected extracted version 8.4.1, got %q", extracted)
+	}
+
+	// ...and looking "unknown" up finds nothing, proving the old behavior missed it.
+	if cves, _ := getVulnerabilities("Laravel", "unknown"); len(cves) != 0 {
+		t.Fatalf("expected no CVEs for unknown version, got %v", cves)
+	}
+
+	// the reconciled version feeds the lookup and the CVE shows up.
+	version := resolveVersion("unknown", extracted)
+	cves, _ := getVulnerabilities("Laravel", version)
+	if len(cves) == 0 {
+		t.Errorf("expected Laravel %s to surface a CVE, got none", version)
+	}
+}
+
 func TestVersionAffected(t *testing.T) {
 	tests := []struct {
 		version  string
@@ -23,7 +74,7 @@ func TestVersionAffected(t *testing.T) {
 		{"4.2", "4.2", true},
 		{"4.2.1", "4.2", true},
 		{"4.2.13", "4.2", true},
-		{"4.20", "4.2", false},   // the boundary bug: 4.20 is not a 4.2.x release
+		{"4.20", "4.2", false}, // the boundary bug: 4.20 is not a 4.2.x release
 		{"4.20.0", "4.2", false},
 		{"5.0", "4.2", false},
 	}
@@ -118,17 +118,22 @@ func DetectFramework(url string, timeout time.Duration, logdir string) (*Framewo
 		return nil, nil //nolint:nilnil // no framework detected is not an error
 	}

-	// Get version match details
+	// Get version match details. the detector's own best.version is often
+	// "unknown" (it only fingerprints the framework, not always the version),
+	// while ExtractVersionOptimized digs the real version out of the body. prefer
+	// that for both the reported version and the cve lookup, otherwise CVEs that
+	// only match a concrete version are silently missed.
 	versionMatch := ExtractVersionOptimized(bodyStr, best.name)
-	cves, suggestions := getVulnerabilities(best.name, best.version)
+	version := resolveVersion(best.version, versionMatch.Version)
+	cves, suggestions := getVulnerabilities(best.name, version)

-	result := NewFrameworkResult(best.name, best.version, best.confidence, versionMatch.Confidence)
+	result := NewFrameworkResult(best.name, version, best.confidence, versionMatch.Confidence)
 	result.WithVulnerabilities(cves, suggestions)

 	// Log results
 	if logdir != "" {
 		logEntry := fmt.Sprintf("Detected framework: %s (version: %s, confidence: %.2f, version_confidence: %.2f)\n",
-			best.name, best.version, best.confidence, versionMatch.Confidence)
+			best.name, version, best.confidence, versionMatch.Confidence)
 		if len(cves) > 0 {
 			logEntry += fmt.Sprintf("  Risk Level: %s\n", result.RiskLevel)
 			logEntry += fmt.Sprintf("  CVEs: %v\n", cves)
@@ -138,7 +143,7 @@ func DetectFramework(url string, timeout time.Duration, logdir string) (*Framewo
 	}

 	log.Success("Detected %s framework (version: %s, confidence: %.2f)",
-		output.Highlight.Render(best.name), best.version, best.confidence)
+		output.Highlight.Render(best.name), version, best.confidence)

 	if versionMatch.Confidence > 0 {
 		charmlog.Debugf("Version detected from: %s (confidence: %.2f)",
@@ -160,6 +165,24 @@ func DetectFramework(url string, timeout time.Duration, logdir string) (*Framewo
 	return result, nil
 }

+// unknownVersion is the sentinel both detectors and the version extractor emit
+// when no concrete version could be read from the response.
+const unknownVersion = "unknown"
+
+// resolveVersion picks the version to report and look CVEs up against. the
+// detector's own value wins when it's concrete; otherwise we fall back to the
+// version dug out of the body by ExtractVersionOptimized. either being
+// "unknown"/empty means "no info", not a real version.
+func resolveVersion(detectorVersion, extractedVersion string) string {
+	if detectorVersion != "" && detectorVersion != unknownVersion {
+		return detectorVersion
+	}
+	if extractedVersion != "" && extractedVersion != unknownVersion {
+		return extractedVersion
+	}
+	return unknownVersion
+}
+
 // getVulnerabilities returns CVEs and recommendations for a framework version.
 func getVulnerabilities(framework, version string) ([]string, []string) {
 	entries, exists := knownCVEs[framework]
@@ -134,7 +134,7 @@ func TestIntegrationDirlist(t *testing.T) {
 	directoryURL = srv.URL + "/"
 	defer func() { directoryURL = orig }()

-	results, err := Dirlist("small", srv.URL, 5*time.Second, 3, "")
+	results, err := Dirlist("small", srv.URL, 5*time.Second, 3, "", DirlistOptions{})
 	if err != nil {
 		t.Fatalf("Dirlist: %v", err)
 	}
@@ -245,6 +245,22 @@ func TestIntegrationXSS(t *testing.T) {
 	}
 }

+func TestIntegrationProbe(t *testing.T) {
+	srv := newVulnApp()
+	defer srv.Close()
+
+	result, err := Probe(srv.URL, 5*time.Second, "")
+	if err != nil {
+		t.Fatalf("Probe: %v", err)
+	}
+	if result == nil || !result.Alive {
+		t.Fatalf("expected the vuln app to be alive, got %+v", result)
+	}
+	if result.StatusCode != http.StatusOK {
+		t.Errorf("expected 200 from the homepage, got %d", result.StatusCode)
+	}
+}
+
 func TestIntegrationPorts(t *testing.T) {
 	// a real listener stands in for an open port; a tiny server hands its number
 	// to Ports via the commonPorts wordlist.
@@ -0,0 +1,148 @@
+/*
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+:                                                                               :
+:   █▀ █ █▀▀   ·   Blazing-fast pentesting suite                                :
+:   ▄█ █ █▀    ·   BSD 3-Clause License                                         :
+:                                                                               :
+:   (c) 2022-2026 vmfunc, xyzeva,                                               :
+:                 lunchcat alumni & contributors                                :
+:                                                                               :
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+*/
+
+package scan
+
+import (
+	"context"
+	"fmt"
+	"io"
+	"net/http"
+	"regexp"
+	"strings"
+	"time"
+
+	"github.com/dropalldatabases/sif/internal/httpx"
+	"github.com/dropalldatabases/sif/internal/logger"
+	"github.com/dropalldatabases/sif/internal/output"
+)
+
+// ProbeResult is the httpx-style liveness snapshot for one target: did it answer,
+// where did it land, and the few fingerprint fields worth keeping.
+type ProbeResult struct {
+	URL           string   `json:"url"`
+	Alive         bool     `json:"alive"`
+	StatusCode    int      `json:"status_code"`
+	Title         string   `json:"title,omitempty"`
+	Server        string   `json:"server,omitempty"`
+	ContentLength int64    `json:"content_length"`
+	RedirectChain []string `json:"redirect_chain,omitempty"`
+}
+
+// probeMaxRedirects caps the chain we'll follow so a redirect loop can't run
+// forever; matches httpx's default depth.
+const probeMaxRedirects = 10
+
+// probeMaxBody bounds the body we read to extract a <title> (64KB) so a hostile
+// or huge response can't exhaust memory.
+const probeMaxBody = 64 * 1024
+
+// titleRe pulls the text out of the first <title>; DOTALL so a title spanning
+// lines is still caught.
+var titleRe = regexp.MustCompile(`(?is)<title[^>]*>(.*?)</title>`)
+
+// Probe checks whether the target is alive and reports its final status, page
+// title, Server header, content-length and the redirect chain it walked.
+func Probe(targetURL string, timeout time.Duration, logdir string) (*ProbeResult, error) {
+	log := output.Module("PROBE")
+	log.Start()
+
+	sanitizedURL := stripScheme(targetURL)
+
+	if logdir != "" {
+		if err := logger.WriteHeader(sanitizedURL, logdir, "Live-host probe"); err != nil {
+			log.Error("error creating log file: %v", err)
+			return nil, fmt.Errorf("create probe log: %w", err)
+		}
+	}
+
+	// follow redirects but record every hop; the chain is half the value of a
+	// probe. capping at probeMaxRedirects stops a loop from spinning forever.
+	chain := make([]string, 0, 4)
+	client := httpx.Client(timeout)
+	client.CheckRedirect = func(req *http.Request, via []*http.Request) error {
+		if len(via) >= probeMaxRedirects {
+			return fmt.Errorf("stopped after %d redirects", probeMaxRedirects)
+		}
+		chain = append(chain, req.URL.String())
+		return nil
+	}
+
+	req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, targetURL, http.NoBody)
+	if err != nil {
+		return nil, fmt.Errorf("build probe request: %w", err)
+	}
+
+	resp, err := client.Do(req)
+	if err != nil {
+		// a transport error means the host didn't answer; that's a dead probe,
+		// not a tool failure, so report it rather than bailing.
+		log.Warn("%s is dead: %v", output.Highlight.Render(sanitizedURL), err)
+		if logdir != "" {
+			logger.Write(sanitizedURL, logdir, fmt.Sprintf("dead: %v\n", err))
+		}
+		result := &ProbeResult{URL: targetURL, Alive: false, RedirectChain: chain}
+		log.Complete(0, "alive")
+		return result, nil
+	}
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(io.LimitReader(resp.Body, probeMaxBody))
+	if err != nil {
+		return nil, fmt.Errorf("read probe body: %w", err)
+	}
+
+	result := &ProbeResult{
+		URL:           targetURL,
+		Alive:         true,
+		StatusCode:    resp.StatusCode,
+		Title:         extractTitle(body),
+		Server:        resp.Header.Get("Server"),
+		ContentLength: resp.ContentLength,
+		RedirectChain: chain,
+	}
+
+	log.Info("%s [%s] %s",
+		output.Status.Render(fmt.Sprintf("%d", result.StatusCode)),
+		output.Highlight.Render(result.Title),
+		output.Muted.Render(result.Server))
+	if len(chain) > 0 {
+		log.Info("redirect chain: %s", strings.Join(chain, " -> "))
+	}
+
+	if logdir != "" {
+		logger.Write(sanitizedURL, logdir,
+			fmt.Sprintf("alive status=%d title=%q server=%q length=%d\n",
+				result.StatusCode, result.Title, result.Server, result.ContentLength))
+		if len(chain) > 0 {
+			logger.Write(sanitizedURL, logdir, "redirect chain: "+strings.Join(chain, " -> ")+"\n")
+		}
+	}
+
+	log.Complete(1, "alive")
+	return result, nil
+}
+
+// extractTitle returns the trimmed text of the first <title> in body, or "" when
+// there isn't one.
+func extractTitle(body []byte) string {
+	m := titleRe.FindSubmatch(body)
+	if len(m) < 2 {
+		return ""
+	}
+	return strings.TrimSpace(string(m[1]))
+}
+
+// ResultType identifies probe results for the result registry.
+func (r *ProbeResult) ResultType() string { return "probe" }
+
+var _ ScanResult = (*ProbeResult)(nil)
@@ -0,0 +1,133 @@
+/*
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+:                                                                               :
+:   █▀ █ █▀▀   ·   Blazing-fast pentesting suite                                :
+:   ▄█ █ █▀    ·   BSD 3-Clause License                                         :
+:                                                                               :
+:   (c) 2022-2026 vmfunc, xyzeva,                                               :
+:                 lunchcat alumni & contributors                                :
+:                                                                               :
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+*/
+
+package scan
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"testing"
+	"time"
+)
+
+func TestProbe_TitleServerStatus(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+		w.Header().Set("Server", "nginx/1.25.3")
+		w.WriteHeader(http.StatusOK)
+		w.Write([]byte("<html><head><title>  Welcome Home  </title></head><body>hi</body></html>"))
+	}))
+	defer srv.Close()
+
+	result, err := Probe(srv.URL, 5*time.Second, "")
+	if err != nil {
+		t.Fatalf("Probe: %v", err)
+	}
+	if !result.Alive {
+		t.Fatalf("expected alive, got %+v", result)
+	}
+	if result.StatusCode != http.StatusOK {
+		t.Errorf("expected status 200, got %d", result.StatusCode)
+	}
+	// title text is trimmed of surrounding whitespace
+	if result.Title != "Welcome Home" {
+		t.Errorf("expected trimmed title, got %q", result.Title)
+	}
+	if result.Server != "nginx/1.25.3" {
+		t.Errorf("expected server header, got %q", result.Server)
+	}
+}
+
+func TestProbe_RedirectChain(t *testing.T) {
+	// /a -> /b -> /c(final); the chain should record both intermediate hops the
+	// client followed before landing on the final 200.
+	mux := http.NewServeMux()
+	mux.HandleFunc("/a", func(w http.ResponseWriter, r *http.Request) {
+		http.Redirect(w, r, "/b", http.StatusFound)
+	})
+	mux.HandleFunc("/b", func(w http.ResponseWriter, r *http.Request) {
+		http.Redirect(w, r, "/c", http.StatusMovedPermanently)
+	})
+	mux.HandleFunc("/c", func(w http.ResponseWriter, _ *http.Request) {
+		w.WriteHeader(http.StatusOK)
+		w.Write([]byte("<title>final</title>"))
+	})
+	srv := httptest.NewServer(mux)
+	defer srv.Close()
+
+	result, err := Probe(srv.URL+"/a", 5*time.Second, "")
+	if err != nil {
+		t.Fatalf("Probe: %v", err)
+	}
+	if !result.Alive || result.StatusCode != http.StatusOK {
+		t.Fatalf("expected alive 200 after redirects, got %+v", result)
+	}
+	if result.Title != "final" {
+		t.Errorf("expected title of final hop, got %q", result.Title)
+	}
+	// two hops were followed (/b and /c are the urls requested after the first)
+	if len(result.RedirectChain) != 2 {
+		t.Fatalf("expected 2 redirect hops, got %d: %v", len(result.RedirectChain), result.RedirectChain)
+	}
+	if !hasSuffix(result.RedirectChain[0], "/b") || !hasSuffix(result.RedirectChain[1], "/c") {
+		t.Errorf("expected chain to walk /b then /c, got %v", result.RedirectChain)
+	}
+}
+
+func TestProbe_DeadHost(t *testing.T) {
+	// a server we immediately close so the dial fails; a dead host is a reported
+	// result, not an error.
+	srv := httptest.NewServer(http.HandlerFunc(func(http.ResponseWriter, *http.Request) {}))
+	deadURL := srv.URL
+	srv.Close()
+
+	result, err := Probe(deadURL, 2*time.Second, "")
+	if err != nil {
+		t.Fatalf("Probe should not error on a dead host: %v", err)
+	}
+	if result.Alive {
+		t.Errorf("expected dead host, got %+v", result)
+	}
+}
+
+func TestProbe_ExtractTitle(t *testing.T) {
+	tests := []struct {
+		name string
+		body string
+		want string
+	}{
+		{"simple", "<title>hello</title>", "hello"},
+		{"trimmed", "<title>  spaced  </title>", "spaced"},
+		{"attrs", `<title lang="en">attr</title>`, "attr"},
+		{"multiline", "<title>line one\nline two</title>", "line one\nline two"},
+		{"none", "<html><body>no title</body></html>", ""},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := extractTitle([]byte(tt.body))
+			if got != tt.want {
+				t.Errorf("extractTitle(%q) = %q, want %q", tt.body, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestProbeResult_ResultType(t *testing.T) {
+	r := &ProbeResult{}
+	if r.ResultType() != "probe" {
+		t.Errorf("expected result type 'probe', got %q", r.ResultType())
+	}
+}
+
+// hasSuffix is a tiny local helper so the redirect-chain assertions read clearly.
+func hasSuffix(s, suffix string) bool {
+	return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix
+}
@@ -41,29 +41,49 @@ func stripScheme(url string) string {
 	return url
 }

-func fetchRobotsTXT(url string, client *http.Client) *http.Response {
-	req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, url, http.NoBody)
-	if err != nil {
-		log.Debugf("Error creating request for robots.txt: %s", err)
-		return nil
-	}
-	resp, err := client.Do(req)
-	if err != nil {
-		log.Debugf("Error fetching robots.txt: %s", err)
-		return nil
-	}
+// maxRobotsRedirects caps how many 301 hops fetchRobotsTXT will chase. without
+// a bound an A->B->A redirect loop recursed forever and blew the stack.
+const maxRobotsRedirects = 10
+
+// fetchRobotsTXT follows 301s to robots.txt iteratively, bounded by both a hop
+// cap and a visited set so a redirect cycle terminates instead of recursing
+// without end.
+func fetchRobotsTXT(url string, client *http.Client) *http.Response {
+	visited := make(map[string]bool, maxRobotsRedirects)
+
+	for hop := 0; hop < maxRobotsRedirects; hop++ {
+		if visited[url] {
+			log.Debugf("redirect loop hit at %s, stopping", url)
+			return nil
+		}
+		visited[url] = true
+
+		req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, url, http.NoBody)
+		if err != nil {
+			log.Debugf("Error creating request for robots.txt: %s", err)
+			return nil
+		}
+		resp, err := client.Do(req)
+		if err != nil {
+			log.Debugf("Error fetching robots.txt: %s", err)
+			return nil
+		}
+
+		if resp.StatusCode != http.StatusMovedPermanently {
+			return resp
+		}

-	if resp.StatusCode == http.StatusMovedPermanently {
 		redirectURL := resp.Header.Get("Location")
+		resp.Body.Close()
 		if redirectURL == "" {
 			log.Debugf("Redirect location is empty for %s", url)
 			return nil
 		}
-		resp.Body.Close()
-		return fetchRobotsTXT(redirectURL, client)
+		url = redirectURL
 	}

-	return resp
+	log.Debugf("robots.txt redirect depth exceeded (%d hops)", maxRobotsRedirects)
+	return nil
 }

 // Scan performs a basic URL scan, including checks for robots.txt and other common endpoints.
@@ -3,7 +3,9 @@ package scan
 import (
 	"net/http"
 	"net/http/httptest"
+	"strconv"
 	"strings"
+	"sync/atomic"
 	"testing"
 	"time"
 )
@@ -155,6 +157,103 @@ func TestFetchRobotsTXT_Redirect(t *testing.T) {
 	}
 }

+// an A->B->A redirect loop must terminate (return nil) instead of recursing
+// forever and blowing the stack.
+func TestFetchRobotsTXT_RedirectLoop(t *testing.T) {
+	var serverA, serverB *httptest.Server
+
+	serverA = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Location", serverB.URL+"/robots.txt")
+		w.WriteHeader(http.StatusMovedPermanently)
+	}))
+	defer serverA.Close()
+
+	serverB = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Location", serverA.URL+"/robots.txt")
+		w.WriteHeader(http.StatusMovedPermanently)
+	}))
+	defer serverB.Close()
+
+	client := &http.Client{
+		Timeout: 5 * time.Second,
+		CheckRedirect: func(req *http.Request, via []*http.Request) error {
+			return http.ErrUseLastResponse
+		},
+	}
+
+	// the hop cap + visited set guarantee termination; a regression that drops
+	// either would spin forever and the test harness timeout would catch it.
+	resp := fetchRobotsTXT(serverA.URL+"/robots.txt", client)
+	if resp != nil {
+		resp.Body.Close()
+		t.Errorf("expected nil on redirect loop, got status %d", resp.StatusCode)
+	}
+}
+
+// a redirect chain longer than the hop cap stops at the bound rather than
+// following indefinitely.
+func TestFetchRobotsTXT_DepthCap(t *testing.T) {
+	var hops int32
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		// each hop points at a fresh path so the visited set never trips; only
+		// the depth cap can stop this.
+		n := atomic.AddInt32(&hops, 1)
+		w.Header().Set("Location", "/r"+strconv.Itoa(int(n)))
+		w.WriteHeader(http.StatusMovedPermanently)
+	}))
+	defer srv.Close()
+
+	client := &http.Client{
+		Timeout: 5 * time.Second,
+		CheckRedirect: func(req *http.Request, via []*http.Request) error {
+			return http.ErrUseLastResponse
+		},
+	}
+
+	resp := fetchRobotsTXT(srv.URL+"/robots.txt", client)
+	if resp != nil {
+		resp.Body.Close()
+		t.Errorf("expected nil once depth cap exceeded, got status %d", resp.StatusCode)
+	}
+	if got := atomic.LoadInt32(&hops); got > maxRobotsRedirects {
+		t.Errorf("followed %d hops, expected at most %d", got, maxRobotsRedirects)
+	}
+}
+
+// the old code flagged a dangling cname on ANY cname, including LookupCNAME
+// echoing the host back for a plain A record. only an off-host cname into a
+// known takeoverable provider should count.
+func TestDanglingProvider(t *testing.T) {
+	tests := []struct {
+		name        string
+		subdomain   string
+		cname       string
+		wantService string
+		wantOK      bool
+	}{
+		{"github pages dangling", "blog.example.com", "example.github.io.", "GitHub Pages", true},
+		{"heroku dangling", "app.example.com", "example.herokuapp.com.", "Heroku", true},
+		{"s3 dangling", "files.example.com", "bucket.s3.amazonaws.com.", "Amazon S3", true},
+		{"self-reference is not dangling", "www.example.com", "www.example.com.", "", false},
+		{"on-domain cname is not dangling", "www.example.com", "lb.example.com.", "", false},
+		{"unknown provider is not dangling", "x.example.com", "host.notaprovider.net.", "", false},
+		{"empty cname is not dangling", "x.example.com", "", "", false},
+		{"case-insensitive match", "x.example.com", "X.GitHub.IO.", "GitHub Pages", true},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			service, ok := danglingProvider(tt.subdomain, tt.cname)
+			if ok != tt.wantOK {
+				t.Errorf("danglingProvider(%q, %q) ok = %v, want %v", tt.subdomain, tt.cname, ok, tt.wantOK)
+			}
+			if service != tt.wantService {
+				t.Errorf("danglingProvider(%q, %q) service = %q, want %q", tt.subdomain, tt.cname, service, tt.wantService)
+			}
+		})
+	}
+}
+
 func TestSubdomainTakeoverResult(t *testing.T) {
 	result := SubdomainTakeoverResult{
 		Subdomain:  "test.example.com",
@@ -37,6 +37,36 @@ type SubdomainTakeoverResult struct {
 	Service    string `json:"service,omitempty"`
 }

+// takeoverProviders maps a takeoverable third-party's cname apex to its service
+// name. a "no such host" on a subdomain only counts as a dangling-cname takeover
+// when the cname points at one of these and the target is unclaimed - a cname
+// to anything else (or to the host itself) is a normal record, not a finding.
+var takeoverProviders = map[string]string{
+	"github.io":             "GitHub Pages",
+	"herokuapp.com":         "Heroku",
+	"herokudns.com":         "Heroku",
+	"myshopify.com":         "Shopify",
+	"wordpress.com":         "WordPress",
+	"s3.amazonaws.com":      "Amazon S3",
+	"ghost.io":              "Ghost",
+	"pantheonsite.io":       "Pantheon",
+	"zendesk.com":           "Zendesk",
+	"surge.sh":              "Surge",
+	"bitbucket.io":          "Bitbucket",
+	"fastly.net":            "Fastly",
+	"helpscoutdocs.com":     "Helpscout",
+	"cargocollective.com":   "Cargo",
+	"uservoice.com":         "Uservoice",
+	"webflow.io":            "Webflow",
+	"readthedocs.io":        "ReadTheDocs",
+	"azurewebsites.net":     "Azure",
+	"cloudapp.net":          "Azure",
+	"trafficmanager.net":    "Azure",
+	"blob.core.windows.net": "Azure",
+	"netlify.app":           "Netlify",
+	"netlify.com":           "Netlify",
+}
+
 // SubdomainTakeover checks dnsResults for dangling subdomains pointing at
 // unclaimed third-party services.
 func SubdomainTakeover(url string, dnsResults []string, timeout time.Duration, threads int, logdir string) ([]SubdomainTakeoverResult, error) {
@@ -104,6 +134,27 @@ func SubdomainTakeover(url string, dnsResults []string, timeout time.Duration, t
 	return results, nil
 }

+// danglingProvider reports whether cname points off-host at a known
+// takeoverable provider. a self-referential cname (LookupCNAME echoing an A
+// record back as the host) is rejected, since that's a live host, not a
+// dangling pointer.
+func danglingProvider(subdomain, cname string) (string, bool) {
+	// LookupCNAME returns a fqdn with a trailing dot; strip it so suffix and
+	// self-reference checks compare like-for-like.
+	target := strings.ToLower(strings.TrimSuffix(cname, "."))
+	host := strings.ToLower(strings.TrimSuffix(subdomain, "."))
+	if target == "" || target == host {
+		return "", false
+	}
+
+	for apex, service := range takeoverProviders {
+		if target == apex || strings.HasSuffix(target, "."+apex) {
+			return service, true
+		}
+	}
+	return "", false
+}
+
 func checkSubdomainTakeover(subdomain string, client *http.Client) (bool, string) {
 	req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, "http://"+subdomain, http.NoBody)
 	if err != nil {
@@ -111,11 +162,16 @@ func checkSubdomainTakeover(subdomain string, client *http.Client) (bool, string
 	}
 	resp, err := client.Do(req)
 	if err != nil {
+		// a dead host only matters if its cname still points at an unclaimed
+		// third-party service. LookupCNAME echoes the host back for plain A
+		// records, so "any cname" is not a signal - the cname must resolve to a
+		// known takeoverable provider and not be the host itself.
 		if strings.Contains(err.Error(), "no such host") {
-			// Check if CNAME exists
-			cname, err := net.DefaultResolver.LookupCNAME(context.TODO(), subdomain)
-			if err == nil && cname != "" {
-				return true, "Dangling CNAME"
+			cname, lookupErr := net.DefaultResolver.LookupCNAME(context.TODO(), subdomain)
+			if lookupErr == nil {
+				if service, ok := danglingProvider(subdomain, cname); ok {
+					return true, service + " (Dangling CNAME)"
+				}
 			}
 		}
 		return false, ""
@@ -38,6 +38,30 @@ file with one url per line.
 .BR \-dirlist " \fIsize\fR"
 directory and file fuzzing (small/medium/large).
 .TP
+.BR \-mc " \fIcodes\fR"
+dirlist: match only these status codes (comma list, e.g. 200,301).
+.TP
+.BR \-fc " \fIcodes\fR"
+dirlist: filter out these status codes (comma list).
+.TP
+.BR \-fs " \fIsizes\fR"
+dirlist: filter out responses of these body sizes (comma list).
+.TP
+.BR \-fw " \fIcounts\fR"
+dirlist: filter out responses with these word counts (comma list).
+.TP
+.BR \-fr " \fIregex\fR"
+dirlist: filter out responses whose body matches this regex.
+.TP
+.B \-ac
+dirlist: auto\-calibrate the soft\-404 wildcard baseline so catch\-all 200s are dropped.
+.TP
+.BR \-w " \fIpath|url\fR"
+dirlist: custom wordlist (local file or url); overrides the \fB\-dirlist\fR size.
+.TP
+.BR \-e " \fIexts\fR"
+dirlist: extensions appended to each word (comma list, e.g. php,bak,env).
+.TP
 .BR \-dnslist " \fIsize\fR"
 subdomain enumeration (small/medium/large).
 .TP
@@ -107,6 +131,9 @@ max crawl recursion depth (default 2).
 .B \-passive
 passive subdomain and historical url discovery from third\-party feeds (zero traffic to the target).
 .TP
+.B \-probe
+live\-host probe; reports liveness, final status, page title, server header and the redirect chain.
+.TP
 .B \-noscan
 skip the base url scan (robots.txt, etc).
 .SH OPTIONS
@@ -138,6 +165,12 @@ cookie header to send with every request.
 .BR \-rate\-limit " \fIn\fR"
 cap outbound requests per second (0 = unlimited, default 0).
 .TP
+.BR \-sarif " \fIfile\fR"
+write a sarif 2.1.0 report of the run to \fIfile\fR.
+.TP
+.BR \-md ", " \-\-markdown " \fIfile\fR"
+write a markdown report of the run to \fIfile\fR.
+.TP
 .B \-api
 emit json results and suppress the interactive output.
 .SH MODULES
@@ -29,6 +29,7 @@ import (
 	"github.com/dropalldatabases/sif/internal/logger"
 	"github.com/dropalldatabases/sif/internal/modules"
 	"github.com/dropalldatabases/sif/internal/output"
+	"github.com/dropalldatabases/sif/internal/report"
 	"github.com/dropalldatabases/sif/internal/scan"
 	"github.com/dropalldatabases/sif/internal/scan/builtin"
 	"github.com/dropalldatabases/sif/internal/scan/frameworks"
@@ -46,6 +47,10 @@ type App struct {
 // Version is set by main to the resolved build version and shown on the banner.
 var Version = "dev"

+// reportFileMode is the permission applied to written report files: owner
+// read/write, group/other read. reports aren't secret but may name targets.
+const reportFileMode = 0o644
+
 type UrlResult struct {
 	Url     string `json:"url"`
 	Results []ModuleResult
@@ -204,6 +209,12 @@ func (app *App) Run() error {

 	scansRun := make([]string, 0, 16)

+	// accumulate every module result across targets so the report writers can
+	// serialize the full run after the loop. only collected when an export flag
+	// is set, so the common path pays nothing.
+	wantReport := app.settings.SARIF != "" || app.settings.Markdown != ""
+	reportResults := make([]report.Result, 0, 16)
+
 	for _, url := range app.targets {
 		output.Info("Starting scan on %s", output.Highlight.Render(url))

@@ -231,11 +242,20 @@ func (app *App) Run() error {
 		}

 		if app.settings.Dirlist != "none" {
-			result, err := scan.Dirlist(app.settings.Dirlist, url, app.settings.Timeout, app.settings.Threads, app.settings.LogDir)
+			result, err := scan.Dirlist(app.settings.Dirlist, url, app.settings.Timeout, app.settings.Threads, app.settings.LogDir, scan.DirlistOptions{
+				MatchCodes:  app.settings.DirMatchCodes,
+				FilterCodes: app.settings.DirFilterCodes,
+				FilterSizes: app.settings.DirFilterSizes,
+				FilterWords: app.settings.DirFilterWords,
+				FilterRegex: app.settings.DirFilterRegex,
+				Calibrate:   app.settings.DirCalibrate,
+				Wordlist:    app.settings.DirWordlist,
+				Extensions:  app.settings.DirExtensions,
+			})
 			if err != nil {
 				log.Errorf("Error while running directory scan: %s", err)
 			} else {
-				moduleResults = append(moduleResults, ModuleResult{"dirlist", result})
+				moduleResults = append(moduleResults, NewModuleResult(result))
 				scansRun = append(scansRun, "Directory Listing")
 			}
 		}
@@ -441,6 +461,16 @@ func (app *App) Run() error {
 			}
 		}

+		if app.settings.Probe {
+			result, err := scan.Probe(url, app.settings.Timeout, app.settings.LogDir)
+			if err != nil {
+				log.Errorf("Error while running probe: %s", err)
+			} else if result != nil {
+				moduleResults = append(moduleResults, NewModuleResult(result))
+				scansRun = append(scansRun, "Probe")
+			}
+		}
+
 		// Load and run modules
 		if app.settings.AllModules || app.settings.Modules != "" || app.settings.ModuleTags != "" {
 			loader, err := modules.NewLoader()
@@ -511,6 +541,16 @@ func (app *App) Run() error {
 			}
 			fmt.Println(string(marshalled))
 		}
+
+		if wantReport {
+			reportResults = append(reportResults, collectReportResults(url, moduleResults)...)
+		}
+	}
+
+	if wantReport {
+		if err := app.writeReports(reportResults); err != nil {
+			return err
+		}
 	}

 	if !app.settings.ApiMode {
@@ -520,6 +560,48 @@ func (app *App) Run() error {
 	return nil
 }

+// collectReportResults flattens one target's module results into the report
+// model, carrying each finding as raw json so the report package stays free of
+// scan types. a result that won't marshal is skipped rather than failing the run.
+func collectReportResults(target string, moduleResults []ModuleResult) []report.Result {
+	out := make([]report.Result, 0, len(moduleResults))
+	for _, mr := range moduleResults {
+		data, err := json.Marshal(mr.Data)
+		if err != nil {
+			log.Warnf("report: skipping %s result for %s: %v", mr.Id, target, err)
+			continue
+		}
+		out = append(out, report.Result{Target: target, Module: mr.Id, Data: data})
+	}
+	return out
+}
+
+// writeReports serializes the collected results to the requested export files.
+// each writer runs independently so a bad path for one format doesn't suppress
+// the other.
+func (app *App) writeReports(results []report.Result) error {
+	if path := app.settings.SARIF; path != "" {
+		data, err := report.SARIF(results)
+		if err != nil {
+			return fmt.Errorf("build sarif report: %w", err)
+		}
+		if err := os.WriteFile(path, data, reportFileMode); err != nil {
+			return fmt.Errorf("write sarif report %q: %w", path, err)
+		}
+		output.Success("sarif report written to %s", path)
+	}
+
+	if path := app.settings.Markdown; path != "" {
+		data := report.Markdown(results)
+		if err := os.WriteFile(path, data, reportFileMode); err != nil {
+			return fmt.Errorf("write markdown report %q: %w", path, err)
+		}
+		output.Success("markdown report written to %s", path)
+	}
+
+	return nil
+}
+
 // expandTargets queries SecurityTrails for each original target and returns
 // newly discovered domains (subdomains + associated) for target expansion
 func (app *App) expandTargets() []string {