From 546ab091da8397fde2893bfa8ab25fd3dabb7619 Mon Sep 17 00:00:00 2001 From: vmfunc Date: Wed, 10 Jun 2026 15:19:54 -0700 Subject: [PATCH 1/3] perf(httpx): tune transport for connection reuse and add DrainClose the shared transport was a bare DefaultTransport.Clone() with the stock MaxIdleConnsPerHost=2, and call-sites only close response bodies without draining them - so go could never return a conn to the idle pool and every request re-dialed. high thread counts just thrashed the dialer. - plumb Threads through Options into buildTransport; size MaxIdleConnsPerHost to the worker count (floored) so concurrent workers on one host pool instead of re-dialing, MaxIdleConns=512, MaxConnsPerHost=0, IdleConnTimeout=90s, ForceAttemptHTTP2. the socks5 branch gets its own keepalive net.Dialer so it doesn't lose os-level pooling under proxy.Direct. - add DrainClose to read (capped) and close a body so the conn is reusable. - benchmark proves it: 50 sequential requests reuse 1 conn tuned vs 50 bare. --- internal/httpx/httpx.go | 77 +++++++++- internal/httpx/httpx_test.go | 274 +++++++++++++++++++++++++++++++++++ sif.go | 1 + 3 files changed, 345 insertions(+), 7 deletions(-) diff --git a/internal/httpx/httpx.go b/internal/httpx/httpx.go index d0f32dd..44f34a5 100644 --- a/internal/httpx/httpx.go +++ b/internal/httpx/httpx.go @@ -17,6 +17,8 @@ package httpx import ( "fmt" + "io" + "net" "net/http" "net/url" "strings" @@ -41,6 +43,29 @@ const headerSep = ": " // equal to the per-second rate keeps the cap honest over any one-second window. const limiterBurstPerRate = 1 +// transport pool tuning. go's default transport caps idle conns per host at 2 +// and reuse only kicks in once a response body is fully drained, so without +// these a high thread count just thrashes the dialer instead of pooling. +const ( + // total idle conns kept warm across every host we hit in a run. + maxIdleConns = 512 + // floor for per-host idle conns so a single-target run still pools even + // when the thread count is tiny. + minIdleConnsPerHost = 8 + // how long an idle conn lingers before the pool reaps it. + idleConnTimeout = 90 * time.Second + // keepalive probe interval for live conns; mirrors go's default dialer so + // the socks5 branch doesn't silently lose os-level keepalive. + dialKeepAlive = 30 * time.Second + // dial timeout for the socks5 branch; matches go's default dialer. + dialTimeout = 30 * time.Second +) + +// drainCap bounds how much of an unread body DrainClose will copy before +// closing; a body larger than this isn't worth slurping just to reuse the +// conn, so we cap the read and let the conn be discarded instead. +const drainCap = 16 << 10 + // Options carries the runtime knobs that apply to every outbound request. // RateLimit is requests/sec (0 = unlimited); Headers are "Key: Value" strings. type Options struct { @@ -49,6 +74,9 @@ type Options struct { Cookie string UserAgent string RateLimit int + // Threads is the scan worker count; it sizes the per-host idle pool so + // concurrent workers hitting one target reuse conns instead of dialing fresh. + Threads int } // configured holds the package-level transport built once by Configure. nil @@ -63,7 +91,7 @@ var ( // //nolint:gocritic // signature is the package's stable startup api; called once. func Configure(opts Options) error { - base, err := buildTransport(opts.Proxy) + base, err := buildTransport(opts.Proxy, opts.Threads) if err != nil { return err } @@ -104,9 +132,10 @@ func Client(timeout time.Duration) *http.Client { return &http.Client{Timeout: timeout, Transport: rt} } -// buildTransport clones the default transport and applies the proxy. An empty -// proxy leaves the default behavior (respects HTTP_PROXY env) intact. -func buildTransport(proxyURL string) (*http.Transport, error) { +// buildTransport clones the default transport, tunes its pool for the worker +// count and applies the proxy. An empty proxy leaves the default behavior +// (respects HTTP_PROXY env) intact. +func buildTransport(proxyURL string, threads int) (*http.Transport, error) { tr, ok := http.DefaultTransport.(*http.Transport) if !ok { // unreachable in practice, but never trust an assertion silently. @@ -114,6 +143,15 @@ func buildTransport(proxyURL string) (*http.Transport, error) { } transport := tr.Clone() + // size the idle pool so every worker can keep its conn warm. per-host idle + // must clear the thread count or workers past the cap re-dial each request; + // MaxConnsPerHost stays 0 (unbounded) so the limiter, not the pool, paces us. + transport.MaxIdleConns = maxIdleConns + transport.MaxIdleConnsPerHost = idlePerHost(threads) + transport.MaxConnsPerHost = 0 + transport.IdleConnTimeout = idleConnTimeout + transport.ForceAttemptHTTP2 = true + if proxyURL == "" { return transport, nil } @@ -127,9 +165,11 @@ func buildTransport(proxyURL string) (*http.Transport, error) { case schemeHTTP, schemeHTTPS: transport.Proxy = http.ProxyURL(parsed) case schemeSOCKS5: - // socks5 needs a custom dialer; the returned dialer implements - // ContextDialer so cancellation/timeouts propagate. - dialer, err := proxy.SOCKS5("tcp", parsed.Host, nil, proxy.Direct) + // socks5 needs a custom dialer. proxy.SOCKS5 takes a forward dialer, so + // hand it our own net.Dialer with keepalive set - the default + // proxy.Direct has none, which would kill os-level conn pooling. + fwd := &net.Dialer{Timeout: dialTimeout, KeepAlive: dialKeepAlive} + dialer, err := proxy.SOCKS5("tcp", parsed.Host, nil, fwd) if err != nil { return nil, fmt.Errorf("socks5 proxy %q: %w", proxyURL, err) } @@ -145,6 +185,29 @@ func buildTransport(proxyURL string) (*http.Transport, error) { return transport, nil } +// idlePerHost picks the per-host idle pool size: at least the worker count so +// no worker re-dials, never below the floor so a small thread count still pools. +func idlePerHost(threads int) int { + if threads < minIdleConnsPerHost { + return minIdleConnsPerHost + } + return threads +} + +// DrainClose fully reads (up to drainCap) and closes resp.Body. go only returns +// a conn to the idle pool when the body is read to EOF, so a caller that only +// closes leaks the conn and forces a fresh dial next time. Call this instead of +// a bare resp.Body.Close() to keep the pool warm. Safe on a nil response. +func DrainClose(resp *http.Response) { + if resp == nil || resp.Body == nil { + return + } + // the read result is intentionally ignored: we're discarding the body and + // about to close it, so a copy error changes nothing we can act on. + _, _ = io.Copy(io.Discard, io.LimitReader(resp.Body, drainCap)) + resp.Body.Close() +} + // parseHeaders splits each "Key: Value" entry on the first ": ". Entries // without the separator are rejected so a typo fails loud instead of silently. // The returned map is always non-nil so callers can range it unconditionally. diff --git a/internal/httpx/httpx_test.go b/internal/httpx/httpx_test.go index 4b37548..b589a3b 100644 --- a/internal/httpx/httpx_test.go +++ b/internal/httpx/httpx_test.go @@ -14,8 +14,12 @@ package httpx import ( "context" + "io" + "net" "net/http" "net/http/httptest" + "strings" + "sync" "testing" "time" ) @@ -215,3 +219,273 @@ func TestRateLimitUnlimited(t *testing.T) { t.Error("expected no limiter when RateLimit is 0") } } + +func TestIdlePerHost(t *testing.T) { + tests := []struct { + name string + threads int + want int + }{ + {name: "below floor clamps up", threads: 1, want: minIdleConnsPerHost}, + {name: "zero clamps up", threads: 0, want: minIdleConnsPerHost}, + {name: "at floor", threads: minIdleConnsPerHost, want: minIdleConnsPerHost}, + {name: "above floor passes through", threads: 64, want: 64}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := idlePerHost(tt.threads); got != tt.want { + t.Errorf("idlePerHost(%d) = %d, want %d", tt.threads, got, tt.want) + } + }) + } +} + +func TestBuildTransportTuning(t *testing.T) { + const threads = 32 + tr, err := buildTransport("", threads) + if err != nil { + t.Fatalf("buildTransport: %v", err) + } + + if tr.MaxIdleConns != maxIdleConns { + t.Errorf("MaxIdleConns = %d, want %d", tr.MaxIdleConns, maxIdleConns) + } + if tr.MaxIdleConnsPerHost != threads { + t.Errorf("MaxIdleConnsPerHost = %d, want %d", tr.MaxIdleConnsPerHost, threads) + } + if tr.MaxConnsPerHost != 0 { + t.Errorf("MaxConnsPerHost = %d, want 0 (unbounded)", tr.MaxConnsPerHost) + } + if tr.IdleConnTimeout != idleConnTimeout { + t.Errorf("IdleConnTimeout = %v, want %v", tr.IdleConnTimeout, idleConnTimeout) + } + if !tr.ForceAttemptHTTP2 { + t.Error("ForceAttemptHTTP2 = false, want true") + } +} + +func TestDrainClose(t *testing.T) { + resetConfig(t) + + // serve a body the caller never reads; DrainClose must drain it so the conn + // is eligible for reuse rather than abandoned mid-stream. + const body = "sif response body that the caller never reads" + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + io.WriteString(w, body) + })) + t.Cleanup(srv.Close) + + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, srv.URL, http.NoBody) + if err != nil { + t.Fatalf("new request: %v", err) + } + resp, err := Client(5 * time.Second).Do(req) + if err != nil { + t.Fatalf("do request: %v", err) + } + + DrainClose(resp) + + // after DrainClose the body is closed; a further read must fail. + if _, err := resp.Body.Read(make([]byte, 1)); err == nil { + t.Error("expected read after DrainClose to fail on a closed body") + } +} + +func TestDrainCloseNil(t *testing.T) { + // a nil response (e.g. an errored request) must not panic. + DrainClose(nil) + DrainClose(&http.Response{}) +} + +// countConns wraps a test server with a ConnState hook that tallies how many +// distinct tcp conns the server saw. distinct conns == failed reuse. +func countConns(t *testing.T) (*httptest.Server, func() int) { + t.Helper() + + var ( + mu sync.Mutex + conns = make(map[net.Conn]struct{}) + ) + srv := httptest.NewUnstartedServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + // always write a body so reuse depends on the caller draining it. + io.WriteString(w, "ok") + })) + srv.Config.ConnState = func(c net.Conn, state http.ConnState) { + if state != http.StateNew { + return + } + mu.Lock() + conns[c] = struct{}{} + mu.Unlock() + } + srv.Start() + t.Cleanup(srv.Close) + + return srv, func() int { + mu.Lock() + defer mu.Unlock() + return len(conns) + } +} + +func TestTransportReusesConnections(t *testing.T) { + resetConfig(t) + + const ( + threads = 8 + requests = 30 + ) + if err := Configure(Options{Threads: threads}); err != nil { + t.Fatalf("Configure: %v", err) + } + + srv, distinct := countConns(t) + + // fire N sequential requests through the tuned client, draining each body so + // the conn returns to the pool. a working pool serves all of them on one conn. + client := Client(5 * time.Second) + for i := 0; i < requests; i++ { + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, srv.URL, http.NoBody) + if err != nil { + t.Fatalf("new request %d: %v", i, err) + } + resp, err := client.Do(req) + if err != nil { + t.Fatalf("do request %d: %v", i, err) + } + DrainClose(resp) + } + + // sequential reuse should land on exactly one conn; allow a tiny margin for + // the rare race where a conn is reaped between requests. + const maxReuseConns = 2 + if got := distinct(); got > maxReuseConns { + t.Errorf("tuned client opened %d conns for %d requests, want <= %d (pool not reusing)", + got, requests, maxReuseConns) + } +} + +func TestBareClientDoesNotReuse(t *testing.T) { + srv, distinct := countConns(t) + + // the control: a bare DefaultTransport client whose caller closes but never + // drains the body. go can't reuse a half-read conn, so each request dials + // fresh - this is exactly the pre-tuning behavior we're fixing. + client := &http.Client{ + Timeout: 5 * time.Second, + Transport: http.DefaultTransport.(*http.Transport).Clone(), + } + + const requests = 30 + for i := 0; i < requests; i++ { + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, srv.URL, http.NoBody) + if err != nil { + t.Fatalf("new request %d: %v", i, err) + } + resp, err := client.Do(req) + if err != nil { + t.Fatalf("do request %d: %v", i, err) + } + // close without draining - the leak that kills reuse. + resp.Body.Close() + } + + // most requests should have dialed a fresh conn. don't demand exactly N (the + // scheduler occasionally reuses one), just that it's clearly not pooling. + const minDistinct = requests / 2 + if got := distinct(); got < minDistinct { + t.Errorf("bare client opened only %d conns for %d requests, want >= %d "+ + "(expected near-zero reuse without draining)", got, requests, minDistinct) + } +} + +// BenchmarkConnReuse contrasts the tuned, draining client against a bare client +// that closes without draining. the reported conns/op metric is the distinct +// tcp conns one pass of `requests` opened - tuned≈1, bare≈requests - so the +// README can quote real before/after reuse numbers. the conn map is reset per +// iteration so the metric stays a per-pass count and the bare path doesn't +// accumulate b.N*requests live sockets and exhaust the ephemeral port range. +// +// run the bare sub-bench with a bounded -benchtime (e.g. -benchtime 5x): its +// whole point is that it can't reuse, so a large b.N floods the local port +// space with TIME_WAIT sockets. the tuned sub-bench reuses and runs unbounded. +func BenchmarkConnReuse(b *testing.B) { + const requests = 50 + + run := func(b *testing.B, drain bool, client *http.Client) { + b.Helper() + var ( + mu sync.Mutex + conns = make(map[net.Conn]struct{}) + ) + srv := httptest.NewUnstartedServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + io.WriteString(w, strings.Repeat("x", 256)) + })) + srv.Config.ConnState = func(c net.Conn, state http.ConnState) { + if state != http.StateNew { + return + } + mu.Lock() + conns[c] = struct{}{} + mu.Unlock() + } + srv.Start() + defer srv.Close() + + var lastPass int + b.ResetTimer() + for n := 0; n < b.N; n++ { + mu.Lock() + conns = make(map[net.Conn]struct{}) + mu.Unlock() + for i := 0; i < requests; i++ { + req, _ := http.NewRequestWithContext(context.Background(), http.MethodGet, srv.URL, http.NoBody) + resp, err := client.Do(req) + if err != nil { + b.Fatalf("do: %v", err) + } + if drain { + DrainClose(resp) + } else { + resp.Body.Close() + } + } + // close idle conns between passes so the bare client's per-pass + // sockets land in TIME_WAIT and free up before the next pass. + client.CloseIdleConnections() + mu.Lock() + lastPass = len(conns) + mu.Unlock() + } + b.StopTimer() + + // distinct conns for a single pass of `requests`. + b.ReportMetric(float64(lastPass), "conns/op") + } + + b.Run("tuned-drain", func(b *testing.B) { + resetBench() + tr, err := buildTransport("", 8) + if err != nil { + b.Fatalf("buildTransport: %v", err) + } + run(b, true, &http.Client{Timeout: 5 * time.Second, Transport: tr}) + }) + + b.Run("bare-noDrain", func(b *testing.B) { + run(b, false, &http.Client{ + Timeout: 5 * time.Second, + Transport: http.DefaultTransport.(*http.Transport).Clone(), + }) + }) +} + +// resetBench clears the package transport without a *testing.T for benchmarks. +func resetBench() { + mu.Lock() + configured = nil + mu.Unlock() +} diff --git a/sif.go b/sif.go index 3f1746c..1d5e5d2 100644 --- a/sif.go +++ b/sif.go @@ -194,6 +194,7 @@ func (app *App) Run() error { Headers: app.settings.Header, Cookie: app.settings.Cookie, RateLimit: app.settings.RateLimit, + Threads: app.settings.Threads, }); err != nil { log.Warnf("http client config failed, continuing with defaults: %v", err) } From 1237f3f09e5c90cc8460da47a48f71cc24f25754 Mon Sep 17 00:00:00 2001 From: vmfunc Date: Wed, 10 Jun 2026 15:28:21 -0700 Subject: [PATCH 2/3] feat(finding): normalized finding layer for notify and diff scan results live in ~two dozen structs with no shared shape, so every consumer that wants "what did this run turn up" reimplements the type-switch. add internal/finding: an ordered Severity (info one inventory finding; vulns are the interesting bit + // so they bump severity and ride along in the evidence string. + sev := sevRecon + if len(r.Vulns) > 0 { + sev = SeverityHigh + } + raw := fmt.Sprintf("%d ports", len(r.Ports)) + if len(r.Vulns) > 0 { + raw = fmt.Sprintf("%s, %d vulns", raw, len(r.Vulns)) + } + return []Finding{{ + Target: target, + Module: "shodan", + Severity: sev, + Key: key("shodan", r.IP), + Title: "shodan host " + r.IP, + Raw: raw, + }} +} + +func flattenSQL(target string, r *scan.SQLResult) []Finding { + if r == nil { + return nil + } + out := make([]Finding, 0, len(r.AdminPanels)+len(r.DatabaseErrors)+len(r.ExposedPorts)) + for i := 0; i < len(r.AdminPanels); i++ { + p := r.AdminPanels[i] + out = append(out, Finding{ + Target: target, + Module: "sql", + Severity: sevAdminPanel, + Key: key("sql", "admin:"+p.URL), + Title: p.Type + " admin panel", + Raw: fmt.Sprintf("%s (%d)", p.URL, p.Status), + }) + } + for i := 0; i < len(r.DatabaseErrors); i++ { + e := r.DatabaseErrors[i] + out = append(out, Finding{ + Target: target, + Module: "sql", + Severity: sevDBError, + Key: key("sql", "dberr:"+e.URL+":"+e.DatabaseType), + Title: e.DatabaseType + " error disclosure", + Raw: e.ErrorPattern, + }) + } + for i := 0; i < len(r.ExposedPorts); i++ { + p := r.ExposedPorts[i] + out = append(out, Finding{ + Target: target, + Module: "sql", + Severity: SeverityMedium, + Key: key("sql", fmt.Sprintf("port:%d", p)), + Title: fmt.Sprintf("exposed db port %d", p), + Raw: fmt.Sprintf("%d", p), + }) + } + return out +} + +func flattenLFI(target string, r *scan.LFIResult) []Finding { + if r == nil { + return nil + } + out := make([]Finding, 0, len(r.Vulnerabilities)) + for i := 0; i < len(r.Vulnerabilities); i++ { + v := r.Vulnerabilities[i] + out = append(out, Finding{ + Target: target, + Module: "lfi", + Severity: ParseSeverity(v.Severity), + Key: key("lfi", v.URL+":"+v.Parameter), + Title: "lfi via " + v.Parameter, + Raw: v.Evidence, + }) + } + return out +} + +func flattenCMS(target string, r *scan.CMSResult) []Finding { + if r == nil || r.Name == "" { + return nil + } + return []Finding{{ + Target: target, + Module: "cms", + Severity: sevRecon, + Key: key("cms", r.Name), + Title: r.Name + " detected", + Raw: strings.TrimSpace(r.Name + " " + r.Version), + }} +} + +func flattenSecurityTrails(target string, r *scan.SecurityTrailsResult) []Finding { + if r == nil { + return nil + } + out := make([]Finding, 0, len(r.Subdomains)+len(r.AssociatedDomains)) + for i := 0; i < len(r.Subdomains); i++ { + d := r.Subdomains[i] + out = append(out, Finding{ + Target: target, + Module: "securitytrails", + Severity: sevRecon, + Key: key("securitytrails", "sub:"+d), + Title: "subdomain " + d, + Raw: d, + }) + } + for i := 0; i < len(r.AssociatedDomains); i++ { + d := r.AssociatedDomains[i] + out = append(out, Finding{ + Target: target, + Module: "securitytrails", + Severity: sevRecon, + Key: key("securitytrails", "assoc:"+d), + Title: "associated domain " + d, + Raw: d, + }) + } + return out +} + +func flattenCORS(target string, r *scan.CORSResult) []Finding { + if r == nil { + return nil + } + out := make([]Finding, 0, len(r.Findings)) + for i := 0; i < len(r.Findings); i++ { + f := r.Findings[i] + out = append(out, Finding{ + Target: target, + Module: "cors", + Severity: ParseSeverity(f.Severity), + Key: key("cors", f.URL+":"+f.OriginTested), + Title: f.Note, + Raw: "allow-origin: " + f.AllowOrigin, + }) + } + return out +} + +func flattenRedirect(target string, r *scan.RedirectResult) []Finding { + if r == nil { + return nil + } + out := make([]Finding, 0, len(r.Findings)) + for i := 0; i < len(r.Findings); i++ { + f := r.Findings[i] + out = append(out, Finding{ + Target: target, + Module: "redirect", + Severity: ParseSeverity(f.Severity), + Key: key("redirect", f.URL+":"+f.Parameter+":"+f.Via), + Title: "open redirect via " + f.Parameter, + Raw: f.Location, + }) + } + return out +} + +func flattenXSS(target string, r *scan.XSSResult) []Finding { + if r == nil { + return nil + } + out := make([]Finding, 0, len(r.Findings)) + for i := 0; i < len(r.Findings); i++ { + f := r.Findings[i] + out = append(out, Finding{ + Target: target, + Module: "xss", + Severity: ParseSeverity(f.Severity), + Key: key("xss", f.URL+":"+f.Parameter+":"+f.Context), + Title: "reflected xss in " + f.Parameter, + Raw: strings.Join(f.SurvivedRaw, " "), + }) + } + return out +} + +func flattenCrawl(target string, r *scan.CrawlResult) []Finding { + if r == nil { + return nil + } + out := make([]Finding, 0, len(r.URLs)) + for i := 0; i < len(r.URLs); i++ { + u := r.URLs[i] + out = append(out, Finding{ + Target: target, + Module: "crawl", + Severity: sevRecon, + Key: key("crawl", u), + Title: "crawled url", + Raw: u, + }) + } + return out +} + +func flattenPassive(target string, r *scan.PassiveResult) []Finding { + if r == nil { + return nil + } + out := make([]Finding, 0, len(r.Subdomains)+len(r.URLs)) + for i := 0; i < len(r.Subdomains); i++ { + s := r.Subdomains[i] + out = append(out, Finding{ + Target: target, + Module: "passive", + Severity: sevRecon, + Key: key("passive", "sub:"+s), + Title: "passive subdomain " + s, + Raw: s, + }) + } + for i := 0; i < len(r.URLs); i++ { + u := r.URLs[i] + out = append(out, Finding{ + Target: target, + Module: "passive", + Severity: sevRecon, + Key: key("passive", "url:"+u), + Title: "passive url", + Raw: u, + }) + } + return out +} + +func flattenProbe(target string, r *scan.ProbeResult) []Finding { + if r == nil || !r.Alive { + // a dead probe isn't a finding, just an absent host. + return nil + } + return []Finding{{ + Target: target, + Module: "probe", + Severity: sevRecon, + Key: key("probe", r.URL), + Title: fmt.Sprintf("alive %d", r.StatusCode), + Raw: strings.TrimSpace(fmt.Sprintf("%d %s", r.StatusCode, r.Title)), + }} +} + +func flattenHeaders(target string, rs []scan.HeaderResult) []Finding { + out := make([]Finding, 0, len(rs)) + for i := 0; i < len(rs); i++ { + h := rs[i] + out = append(out, Finding{ + Target: target, + Module: "headers", + Severity: sevRecon, + Key: key("headers", h.Name), + Title: h.Name, + Raw: h.Value, + }) + } + return out +} + +func flattenSecurityHeaders(target string, rs []scan.SecurityHeaderResult) []Finding { + out := make([]Finding, 0, len(rs)) + for i := 0; i < len(rs); i++ { + h := rs[i] + out = append(out, Finding{ + Target: target, + Module: "security_headers", + Severity: ParseSeverity(h.Severity), + Key: key("security_headers", h.Header), + Title: h.Header, + Raw: h.Note, + }) + } + return out +} + +// dirInteresting bounds the "noteworthy" 3xx range for a listed directory; a +// redirect (>=300) or anything past it is worth more than a plain 200 hit. +const dirRedirectFloor = 300 + +func flattenDirlist(target string, rs []scan.DirectoryResult) []Finding { + out := make([]Finding, 0, len(rs)) + for i := 0; i < len(rs); i++ { + d := rs[i] + sev := sevRecon + if d.StatusCode >= dirRedirectFloor { + sev = SeverityLow + } + out = append(out, Finding{ + Target: target, + Module: "dirlist", + Severity: sev, + Key: key("dirlist", d.Url), + Title: fmt.Sprintf("%s [%d]", d.Url, d.StatusCode), + Raw: fmt.Sprintf("status=%d size=%d", d.StatusCode, d.Size), + }) + } + return out +} + +func flattenCloudStorage(target string, rs []scan.CloudStorageResult) []Finding { + out := make([]Finding, 0, len(rs)) + for i := 0; i < len(rs); i++ { + b := rs[i] + sev := sevRecon + if b.IsPublic { + sev = sevPublicS3 + } + title := "bucket " + b.BucketName + if b.IsPublic { + title = "public bucket " + b.BucketName + } + out = append(out, Finding{ + Target: target, + Module: "cloudstorage", + Severity: sev, + Key: key("cloudstorage", b.BucketName), + Title: title, + Raw: fmt.Sprintf("public=%t", b.IsPublic), + }) + } + return out +} + +func flattenDork(target string, rs []scan.DorkResult) []Finding { + out := make([]Finding, 0, len(rs)) + for i := 0; i < len(rs); i++ { + d := rs[i] + out = append(out, Finding{ + Target: target, + Module: "dork", + Severity: sevRecon, + Key: key("dork", d.Url), + Title: "dork hit", + Raw: d.Url, + }) + } + return out +} + +func flattenTakeover(target string, rs []scan.SubdomainTakeoverResult) []Finding { + out := make([]Finding, 0, len(rs)) + for i := 0; i < len(rs); i++ { + t := rs[i] + // only the vulnerable ones are findings; a safe cname is noise here. + if !t.Vulnerable { + continue + } + out = append(out, Finding{ + Target: target, + Module: "subdomain_takeover", + Severity: sevTakeover, + Key: key("subdomain_takeover", t.Subdomain), + Title: "takeover: " + t.Subdomain, + Raw: t.Service, + }) + } + return out +} + +func flattenFramework(target string, r *frameworks.FrameworkResult) []Finding { + if r == nil || r.Name == "" { + return nil + } + // framework risk maps onto severity; an unset risk falls back to recon. + sev := ParseSeverity(r.RiskLevel) + if sev == SeverityUnknown { + sev = sevRecon + } + raw := strings.TrimSpace(r.Name + " " + r.Version) + if len(r.CVEs) > 0 { + raw = fmt.Sprintf("%s, %d cves", raw, len(r.CVEs)) + } + return []Finding{{ + Target: target, + Module: "framework", + Severity: sev, + Key: key("framework", r.Name), + Title: r.Name + " detected", + Raw: raw, + }} +} + +func flattenJS(target string, r *js.JavascriptScanResult) []Finding { + if r == nil { + return nil + } + supabase := r.SupabaseFindings() + out := make([]Finding, 0, len(r.SecretMatches)+len(supabase)+len(r.Endpoints)+len(r.FoundEnvironmentVars)) + for i := 0; i < len(r.SecretMatches); i++ { + s := r.SecretMatches[i] + out = append(out, Finding{ + Target: target, + Module: "js", + Severity: sevSecret, + Key: key("js", "secret:"+s.Rule+":"+s.Source), + Title: "secret: " + s.Rule, + Raw: s.Source, + }) + } + for i := 0; i < len(supabase); i++ { + s := supabase[i] + // a non-anon role on an exposed key is the real bug; anon is just recon. + sev := sevRecon + if s.Role != "" && s.Role != "anon" { + sev = SeverityHigh + } + out = append(out, Finding{ + Target: target, + Module: "js", + Severity: sev, + Key: key("js", "supabase:"+s.ProjectId), + Title: "supabase project " + s.ProjectId, + Raw: fmt.Sprintf("role=%s collections=%d", s.Role, s.Collections), + }) + } + for i := 0; i < len(r.Endpoints); i++ { + e := r.Endpoints[i] + out = append(out, Finding{ + Target: target, + Module: "js", + Severity: sevRecon, + Key: key("js", "endpoint:"+e), + Title: "js endpoint", + Raw: e, + }) + } + // env vars are a map; sort-free since the Key carries the name, and diff + // keys on the Key not on iteration order. + for name, value := range r.FoundEnvironmentVars { + out = append(out, Finding{ + Target: target, + Module: "js", + Severity: sevSecret, + Key: key("js", "env:"+name), + Title: "env var " + name, + Raw: value, + }) + } + return out +} + +func flattenModule(target string, r *modules.Result) []Finding { + if r == nil { + return nil + } + module := r.ResultType() + out := make([]Finding, 0, len(r.Findings)) + for i := 0; i < len(r.Findings); i++ { + f := r.Findings[i] + out = append(out, Finding{ + Target: target, + Module: module, + Severity: ParseSeverity(f.Severity), + Key: key(module, f.URL), + Title: module + " finding", + Raw: f.Evidence, + }) + } + return out +} + +func flattenNuclei(target string, events []output.ResultEvent) []Finding { + out := make([]Finding, 0, len(events)) + for i := 0; i < len(events); i++ { + e := events[i] + // host is the most reliable per-hit identifier; matched-at sharpens it + // when several templates fire on one host. + ident := e.TemplateID + ":" + e.Host + if e.Matched != "" { + ident = e.TemplateID + ":" + e.Matched + } + out = append(out, Finding{ + Target: target, + Module: "nuclei", + Severity: ParseSeverity(e.Info.SeverityHolder.Severity.String()), + Key: key("nuclei", ident), + Title: e.Info.Name, + Raw: e.Matched, + }) + } + return out +} + +func flattenStrings(target, module string, items []string) []Finding { + out := make([]Finding, 0, len(items)) + for i := 0; i < len(items); i++ { + v := items[i] + out = append(out, Finding{ + Target: target, + Module: module, + Severity: sevRecon, + Key: key(module, v), + Title: module + " item", + Raw: v, + }) + } + return out +} diff --git a/internal/finding/finding_test.go b/internal/finding/finding_test.go new file mode 100644 index 0000000..596fcf5 --- /dev/null +++ b/internal/finding/finding_test.go @@ -0,0 +1,354 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +package finding + +import ( + "strings" + "testing" + + "github.com/dropalldatabases/sif/internal/modules" + "github.com/dropalldatabases/sif/internal/scan" + "github.com/dropalldatabases/sif/internal/scan/frameworks" + "github.com/dropalldatabases/sif/internal/scan/js" + "github.com/projectdiscovery/nuclei/v3/pkg/model" + "github.com/projectdiscovery/nuclei/v3/pkg/model/types/severity" + "github.com/projectdiscovery/nuclei/v3/pkg/output" +) + +// scanResultType mirrors the minimal interface the scan packages implement; the +// coverage table below carries a value per ResultType() so a new scanner whose +// ResultType isn't represented (or isn't handled by Flatten) trips a failure. +type scanResultType interface { + ResultType() string +} + +// coverageCase is one representative, non-empty instance of a result type plus +// its expected module attribution. wantItems is how many findings Flatten must +// emit for the populated instance, proving the per-item fan-out works. +type coverageCase struct { + value any // the result as it reaches Flatten + typed scanResultType // same value when it implements ResultType(), else nil + module string // module id Flatten should stamp + wantItems int // findings the populated instance must produce +} + +// coverageCases is the registry the guard checks against. there must be one +// entry per distinct ResultType() in the scan tree (plus the raw []string and +// nuclei []ResultEvent that flow through the report without a ResultType). add a +// scanner without adding it here and TestFlattenCoversEveryResultType fails. +func coverageCases() []coverageCase { + return []coverageCase{ + { + value: &scan.ShodanResult{IP: "1.2.3.4", Ports: []int{80}, Vulns: []string{"CVE-1"}}, + typed: &scan.ShodanResult{}, + module: "shodan", + wantItems: 1, + }, + { + value: &scan.SQLResult{ + AdminPanels: []scan.SQLAdminPanel{{URL: "http://x/pma", Type: "phpMyAdmin", Status: 200}}, + DatabaseErrors: []scan.SQLDatabaseError{{URL: "http://x", DatabaseType: "mysql", ErrorPattern: "syntax"}}, + ExposedPorts: []int{3306}, + }, + typed: &scan.SQLResult{}, + module: "sql", + wantItems: 3, + }, + { + value: &scan.LFIResult{Vulnerabilities: []scan.LFIVulnerability{ + {URL: "http://x", Parameter: "file", Evidence: "root:x", Severity: "high"}, + }}, + typed: &scan.LFIResult{}, + module: "lfi", + wantItems: 1, + }, + { + value: &scan.CMSResult{Name: "WordPress", Version: "6.1"}, + typed: &scan.CMSResult{}, + module: "cms", + wantItems: 1, + }, + { + value: &scan.SecurityTrailsResult{Domain: "x.com", Subdomains: []string{"a.x.com"}, AssociatedDomains: []string{"y.com"}}, + typed: &scan.SecurityTrailsResult{}, + module: "securitytrails", + wantItems: 2, + }, + { + value: &scan.CORSResult{Findings: []scan.CORSFinding{{URL: "http://x", OriginTested: "null", AllowOrigin: "null", Severity: "medium", Note: "null origin"}}}, + typed: &scan.CORSResult{}, + module: "cors", + wantItems: 1, + }, + { + value: &scan.RedirectResult{Findings: []scan.RedirectFinding{{URL: "http://x", Parameter: "next", Location: "http://evil", Via: "header", Severity: "medium"}}}, + typed: &scan.RedirectResult{}, + module: "redirect", + wantItems: 1, + }, + { + value: &scan.XSSResult{Findings: []scan.XSSFinding{{URL: "http://x", Parameter: "q", Context: "html", SurvivedRaw: []string{"<"}, Severity: "high"}}}, + typed: &scan.XSSResult{}, + module: "xss", + wantItems: 1, + }, + { + value: &scan.CrawlResult{URLs: []string{"http://x/a"}}, + typed: &scan.CrawlResult{}, + module: "crawl", + wantItems: 1, + }, + { + value: &scan.PassiveResult{Subdomains: []string{"a.x.com"}, URLs: []string{"http://x/old"}}, + typed: &scan.PassiveResult{}, + module: "passive", + wantItems: 2, + }, + { + value: &scan.ProbeResult{URL: "http://x", Alive: true, StatusCode: 200, Title: "home"}, + typed: &scan.ProbeResult{}, + module: "probe", + wantItems: 1, + }, + { + value: scan.HeaderResults{{Name: "Server", Value: "nginx"}}, + typed: scan.HeaderResults{}, + module: "headers", + wantItems: 1, + }, + { + value: scan.SecurityHeaderResults{{Header: "Content-Security-Policy", Present: false, Severity: "medium", Note: "missing"}}, + typed: scan.SecurityHeaderResults{}, + module: "security_headers", + wantItems: 1, + }, + { + value: scan.DirectoryResults{{Url: "http://x/admin", StatusCode: 301, Size: 10, Words: 2}}, + typed: scan.DirectoryResults{}, + module: "dirlist", + wantItems: 1, + }, + { + value: scan.CloudStorageResults{{BucketName: "x-assets", IsPublic: true}}, + typed: scan.CloudStorageResults{}, + module: "cloudstorage", + wantItems: 1, + }, + { + value: scan.DorkResults{{Url: "http://x/leak", Count: 1}}, + typed: scan.DorkResults{}, + module: "dork", + wantItems: 1, + }, + { + value: scan.SubdomainTakeoverResults{{Subdomain: "old.x.com", Vulnerable: true, Service: "GitHub Pages"}}, + typed: scan.SubdomainTakeoverResults{}, + module: "subdomain_takeover", + wantItems: 1, + }, + { + value: &frameworks.FrameworkResult{Name: "Laravel", Version: "9.0", RiskLevel: "high", CVEs: []string{"CVE-2"}}, + typed: &frameworks.FrameworkResult{}, + module: "framework", + wantItems: 1, + }, + { + value: &js.JavascriptScanResult{ + SecretMatches: []js.SecretMatch{{Rule: "aws-key", Match: "AKIA...", Source: "http://x/app.js"}}, + Endpoints: []string{"/api/v1"}, + }, + typed: &js.JavascriptScanResult{}, + module: "js", + wantItems: 2, + }, + { + value: &modules.Result{ModuleID: "custom-mod", Target: "http://x", Findings: []modules.Finding{{URL: "http://x", Severity: "low", Evidence: "hit"}}}, + typed: &modules.Result{ModuleID: "custom-mod"}, + module: "custom-mod", + wantItems: 1, + }, + { + // nuclei results aren't ScanResult-typed; they ride through the report + // as a raw []ResultEvent, so cover that shape explicitly. + value: []output.ResultEvent{{TemplateID: "t1", Host: "x", Matched: "http://x", Info: model.Info{Name: "n", SeverityHolder: severity.Holder{Severity: severity.High}}}}, + module: "nuclei", + wantItems: 1, + }, + { + // dnslist/portscan/git all hand Flatten a bare []string keyed only by + // the module argument. + value: []string{"sub.x.com"}, + module: "dnslist", + wantItems: 1, + }, + } +} + +const target = "http://target.example" + +// TestFlattenCoversEveryResultType is the guard: every result type in the +// coverage table must flatten into the expected module without hitting the +// "unhandled" fallback. a new scanner that skips both the table and Flatten's +// switch trips this loudly. +func TestFlattenCoversEveryResultType(t *testing.T) { + for _, tc := range coverageCases() { + findings := Flatten(target, tc.module, tc.value) + + if len(findings) != tc.wantItems { + t.Errorf("module %q: got %d findings, want %d", tc.module, len(findings), tc.wantItems) + } + for i := 0; i < len(findings); i++ { + f := findings[i] + if strings.HasSuffix(f.Key, keySep+"unhandled") { + t.Errorf("module %q: Flatten has no case, fell through to unhandled (key=%q)", tc.module, f.Key) + } + if f.Target != target { + t.Errorf("module %q: target=%q, want %q", tc.module, f.Target, target) + } + if f.Module != tc.module { + t.Errorf("module %q: finding stamped module=%q, want %q", tc.module, f.Module, tc.module) + } + if f.Key == "" { + t.Errorf("module %q: empty Key", tc.module) + } + if !strings.HasPrefix(f.Key, tc.module+keySep) { + t.Errorf("module %q: Key %q not prefixed with module", tc.module, f.Key) + } + } + } +} + +// TestEveryResultTypeIsInCoverageTable cross-checks the table against the actual +// ResultType() registry: if a scanner type exists whose ResultType() isn't in +// the table, the coverage guard above would never exercise it. enumerate the +// known typed entries and assert each ResultType() string is present. +func TestEveryResultTypeIsInCoverageTable(t *testing.T) { + covered := make(map[string]struct{}) + for _, tc := range coverageCases() { + if tc.typed == nil { + continue + } + covered[tc.typed.ResultType()] = struct{}{} + } + + // the full set of ResultType() strings the scan tree exposes. keep this in + // lockstep with the ScanResult implementers; a missing entry means the table + // (and very likely Flatten) skipped a scanner. + want := []string{ + "shodan", "sql", "lfi", "cms", "securitytrails", + "cors", "redirect", "xss", "crawl", "passive", "probe", + "headers", "security_headers", "dirlist", "cloudstorage", + "dork", "subdomain_takeover", "framework", "js", "custom-mod", + } + for _, rt := range want { + if _, ok := covered[rt]; !ok { + t.Errorf("ResultType %q has no entry in coverageCases; Flatten coverage unverified", rt) + } + } +} + +// TestFlattenStableKeysAndSeverities pins the keys and severities for a couple +// of representative items so a refactor that quietly reshuffles them is caught. +func TestFlattenStableKeysAndSeverities(t *testing.T) { + tests := []struct { + name string + value any + module string + wantKey string + wantSev Severity + }{ + { + name: "cors honors source severity", + value: &scan.CORSResult{Findings: []scan.CORSFinding{{URL: "http://x", OriginTested: "null", AllowOrigin: "null", Severity: "high", Note: "n"}}}, + module: "cors", + wantKey: "cors:http://x:null", + wantSev: SeverityHigh, + }, + { + name: "public bucket is high", + value: scan.CloudStorageResults{{BucketName: "b", IsPublic: true}}, + module: "cloudstorage", + wantKey: "cloudstorage:b", + wantSev: SeverityHigh, + }, + { + name: "header is recon info", + value: scan.HeaderResults{{Name: "Server", Value: "nginx"}}, + module: "headers", + wantKey: "headers:Server", + wantSev: SeverityInfo, + }, + { + name: "vulnerable takeover is high", + value: scan.SubdomainTakeoverResults{{Subdomain: "old.x.com", Vulnerable: true, Service: "GitHub Pages"}}, + module: "subdomain_takeover", + wantKey: "subdomain_takeover:old.x.com", + wantSev: SeverityHigh, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + findings := Flatten(target, tt.module, tt.value) + if len(findings) != 1 { + t.Fatalf("got %d findings, want 1", len(findings)) + } + f := findings[0] + if f.Key != tt.wantKey { + t.Errorf("Key = %q, want %q", f.Key, tt.wantKey) + } + if f.Severity != tt.wantSev { + t.Errorf("Severity = %v, want %v", f.Severity, tt.wantSev) + } + }) + } +} + +// TestFlattenUnhandledTypeIsLoud asserts the fallback fires for a type Flatten +// doesn't know - this is what makes the guard above meaningful. +func TestFlattenUnhandledTypeIsLoud(t *testing.T) { + type bogus struct{} + findings := Flatten(target, "mystery", bogus{}) + if len(findings) != 1 { + t.Fatalf("got %d findings, want 1 placeholder", len(findings)) + } + if !strings.HasSuffix(findings[0].Key, keySep+"unhandled") { + t.Errorf("unhandled type should key on :unhandled, got %q", findings[0].Key) + } + if findings[0].Severity != SeverityUnknown { + t.Errorf("unhandled severity = %v, want SeverityUnknown", findings[0].Severity) + } +} + +// TestSubdomainTakeoverSkipsSafe confirms a non-vulnerable cname produces no +// finding; only the real takeover is a finding. +func TestSubdomainTakeoverSkipsSafe(t *testing.T) { + value := scan.SubdomainTakeoverResults{ + {Subdomain: "safe.x.com", Vulnerable: false}, + {Subdomain: "bad.x.com", Vulnerable: true, Service: "Heroku"}, + } + findings := Flatten(target, "subdomain_takeover", value) + if len(findings) != 1 { + t.Fatalf("got %d findings, want 1 (only the vulnerable one)", len(findings)) + } + if findings[0].Key != "subdomain_takeover:bad.x.com" { + t.Errorf("Key = %q, want subdomain_takeover:bad.x.com", findings[0].Key) + } +} + +// TestDeadProbeIsNotAFinding confirms a host that didn't answer yields nothing. +func TestDeadProbeIsNotAFinding(t *testing.T) { + findings := Flatten(target, "probe", &scan.ProbeResult{URL: "http://x", Alive: false}) + if len(findings) != 0 { + t.Errorf("dead probe produced %d findings, want 0", len(findings)) + } +} diff --git a/internal/finding/severity.go b/internal/finding/severity.go new file mode 100644 index 0000000..141821b --- /dev/null +++ b/internal/finding/severity.go @@ -0,0 +1,78 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +package finding + +import "strings" + +// Severity is an ordered severity rank shared by every normalized finding. +// the order matters: notify gates on a threshold and diff sorts by it, so the +// underlying ints have to compare info < low < medium < high < critical. +type Severity int + +// severity ranks, lowest to highest. SeverityUnknown sorts below everything so +// an unrecognized scanner string never silently outranks a real critical. +const ( + SeverityUnknown Severity = iota + SeverityInfo + SeverityLow + SeverityMedium + SeverityHigh + SeverityCritical +) + +// severityNames maps each rank to its canonical lowercase string. the wire +// format scanners emit ("info"/"low"/...) round-trips through this table. +var severityNames = map[Severity]string{ + SeverityUnknown: "unknown", + SeverityInfo: "info", + SeverityLow: "low", + SeverityMedium: "medium", + SeverityHigh: "high", + SeverityCritical: "critical", +} + +// String renders the canonical lowercase name for the rank. +func (s Severity) String() string { + if name, ok := severityNames[s]; ok { + return name + } + return severityNames[SeverityUnknown] +} + +// ParseSeverity maps a scanner's free-form severity string onto a rank. it's +// case/space insensitive and folds the common synonyms ("informational", +// "warning", "moderate") so the dozen scanners that each picked their own +// spelling all land on the same ladder. an empty or unrecognized value is +// SeverityUnknown rather than a guess. +func ParseSeverity(raw string) Severity { + switch strings.ToLower(strings.TrimSpace(raw)) { + case "critical": + return SeverityCritical + case "high": + return SeverityHigh + case "medium", "moderate", "warning": + return SeverityMedium + case "low": + return SeverityLow + case "info", "informational", "information", "none": + return SeverityInfo + default: + return SeverityUnknown + } +} + +// AtLeast reports whether s is at or above threshold; notify uses it to drop +// findings below the configured floor. +func (s Severity) AtLeast(threshold Severity) bool { + return s >= threshold +} diff --git a/internal/finding/severity_test.go b/internal/finding/severity_test.go new file mode 100644 index 0000000..6b4c681 --- /dev/null +++ b/internal/finding/severity_test.go @@ -0,0 +1,84 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +package finding + +import "testing" + +func TestParseSeverity(t *testing.T) { + tests := []struct { + in string + want Severity + }{ + {"critical", SeverityCritical}, + {"CRITICAL", SeverityCritical}, + {" high ", SeverityHigh}, + {"medium", SeverityMedium}, + {"moderate", SeverityMedium}, + {"warning", SeverityMedium}, + {"low", SeverityLow}, + {"info", SeverityInfo}, + {"informational", SeverityInfo}, + {"none", SeverityInfo}, + {"", SeverityUnknown}, + {"bogus", SeverityUnknown}, + } + for _, tt := range tests { + if got := ParseSeverity(tt.in); got != tt.want { + t.Errorf("ParseSeverity(%q) = %v, want %v", tt.in, got, tt.want) + } + } +} + +func TestSeverityOrdering(t *testing.T) { + // the ladder must be strictly increasing for AtLeast/sort to behave. + ordered := []Severity{ + SeverityUnknown, SeverityInfo, SeverityLow, + SeverityMedium, SeverityHigh, SeverityCritical, + } + for i := 1; i < len(ordered); i++ { + if ordered[i-1] >= ordered[i] { + t.Errorf("severity ladder not increasing at %d: %v !< %v", i, ordered[i-1], ordered[i]) + } + } +} + +func TestSeverityAtLeast(t *testing.T) { + tests := []struct { + sev Severity + threshold Severity + want bool + }{ + {SeverityHigh, SeverityMedium, true}, + {SeverityMedium, SeverityMedium, true}, + {SeverityLow, SeverityMedium, false}, + {SeverityCritical, SeverityInfo, true}, + {SeverityUnknown, SeverityInfo, false}, + } + for _, tt := range tests { + if got := tt.sev.AtLeast(tt.threshold); got != tt.want { + t.Errorf("%v.AtLeast(%v) = %v, want %v", tt.sev, tt.threshold, got, tt.want) + } + } +} + +func TestSeverityStringRoundTrip(t *testing.T) { + // every named rank renders to a string ParseSeverity maps back to the same + // rank, so the wire format is lossless for known severities. + for _, sev := range []Severity{ + SeverityInfo, SeverityLow, SeverityMedium, SeverityHigh, SeverityCritical, + } { + if got := ParseSeverity(sev.String()); got != sev { + t.Errorf("round-trip %v -> %q -> %v", sev, sev.String(), got) + } + } +} diff --git a/internal/scan/js/scan.go b/internal/scan/js/scan.go index 2cc3981..fd95233 100644 --- a/internal/scan/js/scan.go +++ b/internal/scan/js/scan.go @@ -39,6 +39,31 @@ type JavascriptScanResult struct { // ResultType implements the ScanResult interface. func (r *JavascriptScanResult) ResultType() string { return "js" } +// SupabaseFinding is the exported view of one discovered supabase project. the +// raw supabaseScanResult stays package-private (it carries scan internals), so +// downstream normalizers consume this projection instead. +type SupabaseFinding struct { + ProjectId string + Role string + Collections int +} + +// SupabaseFindings projects the package-private supabase results into a stable +// exported shape for the finding normalizer; role is what makes one interesting +// (a non-anon key is the real bug). +func (r *JavascriptScanResult) SupabaseFindings() []SupabaseFinding { + out := make([]SupabaseFinding, 0, len(r.SupabaseResults)) + for i := 0; i < len(r.SupabaseResults); i++ { + s := r.SupabaseResults[i] + out = append(out, SupabaseFinding{ + ProjectId: s.ProjectId, + Role: s.Role, + Collections: len(s.Collections), + }) + } + return out +} + func JavascriptScan(url string, timeout time.Duration, threads int, logdir string) (*JavascriptScanResult, error) { log := output.Module("JS") log.Start() diff --git a/sif.go b/sif.go index 1d5e5d2..8bdbf9c 100644 --- a/sif.go +++ b/sif.go @@ -25,6 +25,7 @@ import ( "github.com/charmbracelet/log" "github.com/dropalldatabases/sif/internal/config" + "github.com/dropalldatabases/sif/internal/finding" "github.com/dropalldatabases/sif/internal/httpx" "github.com/dropalldatabases/sif/internal/logger" "github.com/dropalldatabases/sif/internal/modules" @@ -216,6 +217,11 @@ func (app *App) Run() error { wantReport := app.settings.SARIF != "" || app.settings.Markdown != "" reportResults := make([]report.Result, 0, 16) + // normalized findings for the whole run; the single Flatten-driven view that + // notify and diff (later) consume. collected alongside the report so both + // describe the same scanners from one pass. + allFindings := make([]finding.Finding, 0, 16) + for _, url := range app.targets { output.Info("Starting scan on %s", output.Highlight.Render(url)) @@ -543,11 +549,18 @@ func (app *App) Run() error { fmt.Println(string(marshalled)) } + allFindings = append(allFindings, collectFindings(url, moduleResults)...) + // the report carries raw blobs and is only built when an export flag is + // set, so the common path skips the marshalling entirely. if wantReport { reportResults = append(reportResults, collectReportResults(url, moduleResults)...) } } + // the normalized findings are the handoff point for notify/diff; surface the + // count now so the path is live and observable without changing output. + log.Debugf("normalized %d findings across %d targets", len(allFindings), len(app.targets)) + if wantReport { if err := app.writeReports(reportResults); err != nil { return err @@ -561,6 +574,18 @@ func (app *App) Run() error { return nil } +// collectFindings normalizes one target's module results through finding.Flatten +// - the single normalization path that notify and diff (later bundles) build on. +// every scan result struct collapses to flat, severity-ranked findings here so a +// scanner is described once, not once per consumer. +func collectFindings(target string, moduleResults []ModuleResult) []finding.Finding { + out := make([]finding.Finding, 0, len(moduleResults)) + for _, mr := range moduleResults { + out = append(out, finding.Flatten(target, mr.Id, mr.Data)...) + } + return out +} + // collectReportResults flattens one target's module results into the report // model, carrying each finding as raw json so the report package stays free of // scan types. a result that won't marshal is skipped rather than failing the run. From a5f42ddfa6be4994fe24af28e89408c2fe9273ab Mon Sep 17 00:00:00 2001 From: vmfunc Date: Wed, 10 Jun 2026 15:21:24 -0700 Subject: [PATCH 3/3] feat(dnslist): async dns resolution with wildcard filtering dnslist previously http-probed every wordlist candidate through the blocking os resolver, so a big list meant a request per dead name and a wildcard zone flooded results. resolve each candidate first via a new internal/dnsx (retryabledns over a bundled 1.1.1.1/8.8.8.8/9.9.9.9 pool, promoted to a direct dep), fingerprint the apex with random nonexistent labels to detect catch-all zones, and http-probe only the names that actually resolve and aren't wildcard. add -resolvers to override the pool. resolverFn is a package-level seam so the dnsx tests stay hermetic; the dnslist newDNSResolver seam keeps the integration test network-free. --- go.mod | 2 +- internal/config/config.go | 2 + internal/dnsx/dnsx.go | 270 ++++++++++++++++++++++++++++++ internal/dnsx/dnsx_test.go | 176 +++++++++++++++++++ internal/scan/dnslist.go | 53 +++++- internal/scan/integration_test.go | 16 +- sif.go | 3 +- 7 files changed, 516 insertions(+), 6 deletions(-) create mode 100644 internal/dnsx/dnsx.go create mode 100644 internal/dnsx/dnsx_test.go diff --git a/go.mod b/go.mod index 35bb982..14b15d1 100644 --- a/go.mod +++ b/go.mod @@ -11,6 +11,7 @@ require ( github.com/likexian/whois v1.15.7 github.com/projectdiscovery/goflags v0.1.74 github.com/projectdiscovery/nuclei/v3 v3.8.0 + github.com/projectdiscovery/retryabledns v1.0.114 github.com/projectdiscovery/utils v0.10.1 github.com/rocketlaunchr/google-search v1.1.6 golang.org/x/net v0.53.0 @@ -288,7 +289,6 @@ require ( github.com/projectdiscovery/ratelimit v0.0.85 // indirect github.com/projectdiscovery/rawhttp v0.1.90 // indirect github.com/projectdiscovery/rdap v0.9.1-0.20221108103045-9865884d1917 // indirect - github.com/projectdiscovery/retryabledns v1.0.114 // indirect github.com/projectdiscovery/retryablehttp-go v1.3.8 // indirect github.com/projectdiscovery/sarif v0.0.1 // indirect github.com/projectdiscovery/tlsx v1.2.2 // indirect diff --git a/internal/config/config.go b/internal/config/config.go index 7eeea8d..d9742b7 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -30,6 +30,7 @@ type Settings struct { DirWordlist string // -w dirlist: custom wordlist (file path or url) DirExtensions string // -e dirlist: extensions appended to each word Dnslist string + Resolvers string // -resolvers dnslist: comma list overriding the bundled pool Debug bool LogDir string NoScan bool @@ -120,6 +121,7 @@ func Parse() *Settings { flagSet.StringVar(&settings.DirWordlist, "w", "", "Dirlist: custom wordlist (local file path or url; overrides -dirlist size)"), flagSet.StringVar(&settings.DirExtensions, "e", "", "Dirlist: extensions appended to each word (comma list, e.g. php,bak,env)"), flagSet.EnumVar(&settings.Dnslist, "dnslist", Nil, "DNS fuzzing scan size (small/medium/large)", listSizes), + flagSet.StringVar(&settings.Resolvers, "resolvers", "", "Dnslist: DNS resolvers to use (comma list, e.g. 1.1.1.1,8.8.8.8; overrides the bundled pool)"), flagSet.EnumVar(&settings.Ports, "ports", Nil, "Port scanning scope (common/full)", portScopes), flagSet.BoolVar(&settings.Dorking, "dork", false, "Enable Google dorking"), flagSet.BoolVar(&settings.Git, "git", false, "Enable git repository scanning"), diff --git a/internal/dnsx/dnsx.go b/internal/dnsx/dnsx.go new file mode 100644 index 0000000..c3c3ae7 --- /dev/null +++ b/internal/dnsx/dnsx.go @@ -0,0 +1,270 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +// Package dnsx resolves subdomain candidates against a bundled resolver pool +// before anything is probed over http, so the slow/inaccurate path of HTTP-ing +// every wordlist entry through the OS resolver is gone. it also fingerprints +// wildcard zones (a zone that answers every random label) so a catch-all +// nameserver can't flood the caller with phantom subdomains. +package dnsx + +import ( + "crypto/rand" + "fmt" + "math/big" + "sort" + "strings" + + retryabledns "github.com/projectdiscovery/retryabledns" +) + +// bundled default resolver pool. anycast cloudflare/google/quad9 - fast, public, +// and unlikely to rate-limit a recon sweep. -resolvers overrides this set. +const ( + resolverCloudflare = "1.1.1.1:53" + resolverGoogle = "8.8.8.8:53" + resolverQuad9 = "9.9.9.9:53" +) + +// defaultResolvers is the bundled pool used when the caller passes none. +var defaultResolvers = []string{resolverCloudflare, resolverGoogle, resolverQuad9} + +const ( + // defaultRetries is how many times retryabledns rotates through the pool on a + // timeout before giving up on a name. low enough to stay fast on a big list. + defaultRetries = 3 + + // wildcardProbes is how many random nonexistent labels we resolve to + // fingerprint a wildcard zone. more samples make a rotating catch-all (one + // that hands back a different ip per query) harder to miss, but each is a + // real lookup so this stays small. + wildcardProbes = 3 + + // randomLabelLen is the length of each random wildcard-probe label. long + // enough that a collision with a real host is astronomically unlikely. + randomLabelLen = 16 +) + +// randomLabelAlphabet is the lowercase-alnum set wildcard probe labels draw +// from; a valid dns label so the query isn't rejected before it leaves. +const randomLabelAlphabet = "abcdefghijklmnopqrstuvwxyz0123456789" + +// defaultDNSPort is appended to any resolver entry given without an explicit +// port, so "1.1.1.1" and "1.1.1.1:53" both work on the cli. +const defaultDNSPort = "53" + +// ParseResolvers splits a comma list of resolvers into a normalized slice, +// appending the default port to bare ips/hosts. an empty or blank input returns +// nil so the caller falls back to the bundled pool. +func ParseResolvers(raw string) []string { + if strings.TrimSpace(raw) == "" { + return nil + } + parts := strings.Split(raw, ",") + out := make([]string, 0, len(parts)) + for i := 0; i < len(parts); i++ { + entry := strings.TrimSpace(parts[i]) + if entry == "" { + continue + } + // a bare ip/host gets the default port; an entry already carrying ":port" + // (or a bracketed ipv6 literal) is left as-is. + if !strings.Contains(entry, ":") { + entry += ":" + defaultDNSPort + } + out = append(out, entry) + } + + return out +} + +// resolution is the resolved address set for one host. empty Addrs means the +// name did not resolve (nxdomain / no records). +type resolution struct { + Addrs []string +} + +// resolved reports whether the name returned any address. +func (r resolution) resolved() bool { + return len(r.Addrs) > 0 +} + +// resolverFn is the test seam: every lookup the package makes goes through this +// var, so a fake can answer without touching the network. real runs point it at +// a retryabledns-backed client via NewResolver. +var resolverFn func(host string) (resolution, error) + +// Resolver resolves candidates against a pool and filters wildcard answers. it +// is built once per scan and shared across the worker goroutines; the +// underlying retryabledns client is safe for concurrent use. +type Resolver struct { + // wildcardSigs holds the address sets a wildcard zone answers random labels + // with. nil/empty means the zone is not wildcard. a candidate whose answer is + // covered by one of these is a catch-all hit, not a real host. + wildcardSigs []map[string]struct{} +} + +// NewResolver wires resolverFn to a retryabledns client over the given pool +// (bundled default when resolvers is empty) and returns a Resolver. it does not +// fingerprint anything yet - call FingerprintWildcard with the apex first. +func NewResolver(resolvers []string) (*Resolver, error) { + pool := resolvers + if len(pool) == 0 { + pool = defaultResolvers + } + + client, err := retryabledns.New(pool, defaultRetries) + if err != nil { + return nil, fmt.Errorf("dnsx: build resolver over %v: %w", pool, err) + } + + // only install the real client when a test hasn't already injected a fake; + // the seam wins so hermetic tests never reach this client. + if resolverFn == nil { + resolverFn = func(host string) (resolution, error) { + data, err := client.Resolve(host) + if err != nil { + return resolution{}, fmt.Errorf("dnsx: resolve %q: %w", host, err) + } + return resolution{Addrs: mergeAddrs(data)}, nil + } + } + + return &Resolver{}, nil +} + +// FingerprintWildcard resolves wildcardProbes random labels under apex. any that +// answer mean the zone is a catch-all, so their address sets are recorded as +// signatures to filter real candidates against later. a clean zone leaves the +// signature list empty and nothing gets filtered. +func (r *Resolver) FingerprintWildcard(apex string) error { + apex = strings.TrimSuffix(apex, ".") + for i := 0; i < wildcardProbes; i++ { + label, err := randomLabel(randomLabelLen) + if err != nil { + return fmt.Errorf("dnsx: wildcard probe label: %w", err) + } + + res, err := resolverFn(label + "." + apex) + if err != nil { + // a probe failure (timeout / nxdomain surfaced as error) just means this + // sample says "not wildcard"; don't abort the whole fingerprint on it. + continue + } + if res.resolved() { + r.wildcardSigs = append(r.wildcardSigs, toSet(res.Addrs)) + } + } + + return nil +} + +// Resolve looks up host and reports whether it is a real, non-wildcard hit. a +// name that doesn't resolve, or whose answer matches a recorded wildcard +// signature, returns false so the caller skips probing it. +func (r *Resolver) Resolve(host string) (bool, error) { + res, err := resolverFn(host) + if err != nil { + return false, fmt.Errorf("dnsx: resolve %q: %w", host, err) + } + if !res.resolved() { + return false, nil + } + if r.isWildcard(res.Addrs) { + return false, nil + } + + return true, nil +} + +// isWildcard reports whether addrs is covered by any recorded wildcard +// signature. a candidate whose every address appears in a wildcard answer is a +// catch-all hit; a host with even one address outside the signature is a real, +// distinct record and survives. +func (r *Resolver) isWildcard(addrs []string) bool { + if len(r.wildcardSigs) == 0 { + return false + } + for i := 0; i < len(r.wildcardSigs); i++ { + if subset(addrs, r.wildcardSigs[i]) { + return true + } + } + + return false +} + +// mergeAddrs flattens the A and AAAA answers into one sorted, deduped slice so +// two equal answers compare equal regardless of record ordering. +func mergeAddrs(data *retryabledns.DNSData) []string { + if data == nil { + return nil + } + seen := make(map[string]struct{}, len(data.A)+len(data.AAAA)) + for i := 0; i < len(data.A); i++ { + seen[data.A[i]] = struct{}{} + } + for i := 0; i < len(data.AAAA); i++ { + seen[data.AAAA[i]] = struct{}{} + } + + addrs := make([]string, 0, len(seen)) + for addr := range seen { + addrs = append(addrs, addr) + } + sort.Strings(addrs) + + return addrs +} + +// toSet turns addrs into a lookup set for subset checks. +func toSet(addrs []string) map[string]struct{} { + set := make(map[string]struct{}, len(addrs)) + for i := 0; i < len(addrs); i++ { + set[addrs[i]] = struct{}{} + } + + return set +} + +// subset reports whether every addr is present in sig (and addrs is non-empty); +// an empty addrs can't be a wildcard match. +func subset(addrs []string, sig map[string]struct{}) bool { + if len(addrs) == 0 { + return false + } + for i := 0; i < len(addrs); i++ { + if _, ok := sig[addrs[i]]; !ok { + return false + } + } + + return true +} + +// randomLabel returns a cryptographically-random lowercase-alnum dns label of +// length n. crypto/rand (not math/rand) so a target can't predict the probe +// labels and special-case them to defeat wildcard detection. +func randomLabel(n int) (string, error) { + var b strings.Builder + b.Grow(n) + alphabetLen := big.NewInt(int64(len(randomLabelAlphabet))) + for i := 0; i < n; i++ { + idx, err := rand.Int(rand.Reader, alphabetLen) + if err != nil { + return "", fmt.Errorf("dnsx: random index: %w", err) + } + b.WriteByte(randomLabelAlphabet[idx.Int64()]) + } + + return b.String(), nil +} diff --git a/internal/dnsx/dnsx_test.go b/internal/dnsx/dnsx_test.go new file mode 100644 index 0000000..f104f13 --- /dev/null +++ b/internal/dnsx/dnsx_test.go @@ -0,0 +1,176 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +package dnsx + +import ( + "reflect" + "strings" + "testing" +) + +// withFakeResolver swaps resolverFn for fn for the duration of one test, then +// restores it - the seam that keeps every case below network-free. +func withFakeResolver(t *testing.T, fn func(host string) (resolution, error)) { + t.Helper() + orig := resolverFn + resolverFn = fn + t.Cleanup(func() { resolverFn = orig }) +} + +// newFingerprinted builds a Resolver and runs the wildcard fingerprint against +// apex using the already-injected fake; fatal on error. +func newFingerprinted(t *testing.T, apex string) *Resolver { + t.Helper() + r := &Resolver{} + if err := r.FingerprintWildcard(apex); err != nil { + t.Fatalf("FingerprintWildcard: %v", err) + } + + return r +} + +const testApex = "example.com" + +// a host that resolves to a real address, in a clean (non-wildcard) zone, is a +// genuine hit. +func TestResolve_FoundInCleanZone(t *testing.T) { + withFakeResolver(t, func(host string) (resolution, error) { + // nothing answers a random wildcard probe -> clean zone. + if strings.HasSuffix(host, "."+testApex) && host != "www."+testApex { + return resolution{}, nil + } + if host == "www."+testApex { + return resolution{Addrs: []string{"93.184.216.34"}}, nil + } + return resolution{}, nil + }) + + r := newFingerprinted(t, testApex) + if len(r.wildcardSigs) != 0 { + t.Fatalf("clean zone should record no wildcard signatures, got %d", len(r.wildcardSigs)) + } + + ok, err := r.Resolve("www." + testApex) + if err != nil { + t.Fatalf("Resolve: %v", err) + } + if !ok { + t.Error("a resolving host in a clean zone should be a hit") + } +} + +// nxdomain (no addresses) is not a hit, so the caller skips probing it. +func TestResolve_NxdomainSkipped(t *testing.T) { + withFakeResolver(t, func(string) (resolution, error) { + // every name, probes included, returns no records. + return resolution{}, nil + }) + + r := newFingerprinted(t, testApex) + + ok, err := r.Resolve("ghost." + testApex) + if err != nil { + t.Fatalf("Resolve: %v", err) + } + if ok { + t.Error("an nxdomain host must not count as found") + } +} + +// a wildcard zone answers the random probe labels, so a candidate that resolves +// to the same catch-all address is filtered out. +func TestResolve_WildcardFiltered(t *testing.T) { + const catchAll = "10.0.0.1" + withFakeResolver(t, func(string) (resolution, error) { + // the zone answers everything - probes and candidates alike - with one ip. + return resolution{Addrs: []string{catchAll}}, nil + }) + + r := newFingerprinted(t, testApex) + if len(r.wildcardSigs) == 0 { + t.Fatal("wildcard zone should record at least one signature") + } + + ok, err := r.Resolve("anything." + testApex) + if err != nil { + t.Fatalf("Resolve: %v", err) + } + if ok { + t.Error("a candidate matching the wildcard answer must be filtered") + } +} + +// a real host in a wildcard zone that resolves to a distinct address (not the +// catch-all) still survives the filter - one address outside the signature is +// enough to be a genuine record. +func TestResolve_DistinctHostSurvivesWildcard(t *testing.T) { + const catchAll = "10.0.0.1" + const realHost = "api." + testApex + withFakeResolver(t, func(host string) (resolution, error) { + if host == realHost { + return resolution{Addrs: []string{"203.0.113.7"}}, nil + } + // everything else (probes + other candidates) hits the catch-all. + return resolution{Addrs: []string{catchAll}}, nil + }) + + r := newFingerprinted(t, testApex) + if len(r.wildcardSigs) == 0 { + t.Fatal("wildcard zone should record at least one signature") + } + + ok, err := r.Resolve(realHost) + if err != nil { + t.Fatalf("Resolve: %v", err) + } + if !ok { + t.Error("a host resolving to a distinct address should survive the wildcard filter") + } +} + +func TestParseResolvers(t *testing.T) { + tests := []struct { + name string + in string + want []string + }{ + {"empty falls back to bundled", "", nil}, + {"blank falls back to bundled", " ", nil}, + {"bare ips get default port", "1.1.1.1,8.8.8.8", []string{"1.1.1.1:53", "8.8.8.8:53"}}, + {"explicit port preserved", "9.9.9.9:5353", []string{"9.9.9.9:5353"}}, + {"whitespace and empties trimmed", " 1.1.1.1 , ,8.8.8.8 ", []string{"1.1.1.1:53", "8.8.8.8:53"}}, + {"mixed bare and ported", "1.1.1.1,9.9.9.9:5353", []string{"1.1.1.1:53", "9.9.9.9:5353"}}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := ParseResolvers(tt.in); !reflect.DeepEqual(got, tt.want) { + t.Errorf("ParseResolvers(%q) = %v, want %v", tt.in, got, tt.want) + } + }) + } +} + +func TestNewResolver_DefaultsToBundledPool(t *testing.T) { + // keep the seam already installed so New doesn't replace it with a real + // client; we only assert the constructor accepts an empty override. + withFakeResolver(t, func(string) (resolution, error) { return resolution{}, nil }) + + r, err := NewResolver(nil) + if err != nil { + t.Fatalf("NewResolver(nil): %v", err) + } + if r == nil { + t.Fatal("NewResolver returned nil resolver") + } +} diff --git a/internal/scan/dnslist.go b/internal/scan/dnslist.go index 2f88a6d..ba1e1ab 100644 --- a/internal/scan/dnslist.go +++ b/internal/scan/dnslist.go @@ -21,6 +21,7 @@ import ( "time" charmlog "github.com/charmbracelet/log" + "github.com/dropalldatabases/sif/internal/dnsx" "github.com/dropalldatabases/sif/internal/httpx" "github.com/dropalldatabases/sif/internal/logger" "github.com/dropalldatabases/sif/internal/output" @@ -33,6 +34,27 @@ var dnsURL = "https://raw.githubusercontent.com/dropalldatabases/sif-runtime/mai // local server instead of resolving real DNS. nil keeps http.DefaultTransport. var dnsTransport http.RoundTripper +// hostResolver is the small slice of dnsx the dnslist worker needs: resolve a +// candidate and report whether it's a real, non-wildcard hit. +type hostResolver interface { + Resolve(host string) (bool, error) +} + +// newDNSResolver builds the resolver for one run; it's a var so integration +// tests inject a fake that answers without touching real dns. the apex is +// fingerprinted for wildcards before any candidate is checked. +var newDNSResolver = func(apex string, resolvers []string) (hostResolver, error) { + r, err := dnsx.NewResolver(resolvers) + if err != nil { + return nil, fmt.Errorf("dns resolver: %w", err) + } + if err := r.FingerprintWildcard(apex); err != nil { + return nil, fmt.Errorf("wildcard fingerprint: %w", err) + } + + return r, nil +} + const ( dnsSmallFile = "subdomains-100.txt" dnsMediumFile = "subdomains-1000.txt" @@ -56,8 +78,11 @@ func meaningfulStatus(code int) bool { return code >= http.StatusOK && code < http.StatusMultipleChoices } -// Dnslist performs DNS subdomain enumeration on the target domain. -func Dnslist(size string, url string, timeout time.Duration, threads int, logdir string) ([]string, error) { +// Dnslist performs DNS subdomain enumeration on the target domain. each +// candidate is resolved first; only names that actually resolve (and aren't a +// wildcard catch-all) are http-probed, so a big wordlist no longer means a +// http request per dead name. +func Dnslist(size string, url string, timeout time.Duration, threads int, logdir string, resolvers []string) ([]string, error) { log := output.Module("DNS") log.Start() @@ -92,6 +117,15 @@ func Dnslist(size string, url string, timeout time.Duration, threads int, logdir sanitizedURL := stripScheme(url) + // resolve against dns first, fingerprinting the apex for wildcards so a + // catch-all zone can't flood the probe step. build it once and share across + // the workers - the underlying client is concurrency-safe. + resolver, err := newDNSResolver(sanitizedURL, resolvers) + if err != nil { + log.Error("Error building DNS resolver: %s", err) + return nil, err + } + if logdir != "" { if err := logger.WriteHeader(sanitizedURL, logdir, size+" subdomain fuzzing"); err != nil { log.Error("Error creating log file: %v", err) @@ -132,10 +166,23 @@ func Dnslist(size string, url string, timeout time.Duration, threads int, logdir charmlog.Debugf("Looking up: %s", domain) + host := domain + "." + sanitizedURL + + // dns gate: skip the http probe entirely for names that don't + // resolve or that a wildcard zone answers. this is the whole point - + // no request per dead candidate. + ok, err := resolver.Resolve(host) + if err != nil { + charmlog.Debugf("resolve %s: %s", host, err) + continue + } + if !ok { + continue + } + // probe http first, then https - but a subdomain is recorded at // most once. firing both schemes and appending on each is what // double-counted every host on the old path. - host := domain + "." + sanitizedURL foundURL, scheme := probeSubdomain(client, host) if foundURL == "" { continue diff --git a/internal/scan/integration_test.go b/internal/scan/integration_test.go index 93f46b8..be168c0 100644 --- a/internal/scan/integration_test.go +++ b/internal/scan/integration_test.go @@ -424,7 +424,15 @@ func TestIntegrationDnslist(t *testing.T) { } defer func() { dnsTransport = origTr }() - found, err := Dnslist("small", "http://example.com", 5*time.Second, 2, "") + // inject a fake resolver so the run never touches real dns: every candidate + // resolves, nothing is wildcard, so all wordlist names reach the probe step. + origResolver := newDNSResolver + newDNSResolver = func(_ string, _ []string) (hostResolver, error) { + return resolveAllStub{}, nil + } + defer func() { newDNSResolver = origResolver }() + + found, err := Dnslist("small", "http://example.com", 5*time.Second, 2, "", nil) if err != nil { t.Fatalf("Dnslist: %v", err) } @@ -435,6 +443,12 @@ func TestIntegrationDnslist(t *testing.T) { } } +// resolveAllStub answers every host as a real, non-wildcard hit so the dns gate +// is a pass-through and the probe step gets the full wordlist. +type resolveAllStub struct{} + +func (resolveAllStub) Resolve(string) (bool, error) { return true, nil } + func contains(s []string, v string) bool { for i := 0; i < len(s); i++ { if s[i] == v { diff --git a/sif.go b/sif.go index 8bdbf9c..4be3e97 100644 --- a/sif.go +++ b/sif.go @@ -25,6 +25,7 @@ import ( "github.com/charmbracelet/log" "github.com/dropalldatabases/sif/internal/config" + "github.com/dropalldatabases/sif/internal/dnsx" "github.com/dropalldatabases/sif/internal/finding" "github.com/dropalldatabases/sif/internal/httpx" "github.com/dropalldatabases/sif/internal/logger" @@ -270,7 +271,7 @@ func (app *App) Run() error { var dnsResults []string if app.settings.Dnslist != "none" { - result, err := scan.Dnslist(app.settings.Dnslist, url, app.settings.Timeout, app.settings.Threads, app.settings.LogDir) + result, err := scan.Dnslist(app.settings.Dnslist, url, app.settings.Timeout, app.settings.Threads, app.settings.LogDir, dnsx.ParseResolvers(app.settings.Resolvers)) if err != nil { log.Errorf("Error while running dns scan: %s", err) } else {