fix(scan): dnslist dedup, robots recursion bound, framework version lookup, takeover cname

four recon-flagged bugs, each with a focused test:

- dnslist fired both http and https per candidate and counted a "found"
  on any non-error response (incl 404 and wildcard catch-all redirects),
  so every host double-counted and a wildcard-dns host flooded results.
  probe http then https with per-subdomain dedupe, gate on a meaningful
  (2xx, non-redirect) status, and stop chasing redirects so a catch-all
  301 reads as a redirect instead of a 200.

- fetchRobotsTXT recursed on every 301 Location with no depth limit and
  no visited set, so an A->B->A loop blew the stack. bound it to a named
  hop cap and a visited set, iteratively.

- framework cve lookup used best.version ("unknown" when the detector
  only fingerprints the framework) and threw away the version
  ExtractVersionOptimized dug out of the body, missing CVEs. reconcile
  via resolveVersion, preferring the extracted concrete version.

- subdomain takeover flagged a dangling cname whenever a no-such-host
  coincided with ANY cname (LookupCNAME echoes the host back for plain A
  records). only flag when the cname points off-host at a known
  takeoverable provider.
This commit is contained in:
vmfunc
2026-06-10 14:45:47 -07:00
parent 306f9a864d
commit 839c0a779c
7 changed files with 446 additions and 66 deletions
+70 -37
View File
@@ -39,6 +39,23 @@ const (
dnsBigFile = "subdomains-10000.txt"
)
// dnsScheme labels which url won a subdomain so we don't probe the second
// scheme once the first already counted it.
type dnsScheme string
const (
dnsSchemeHTTP dnsScheme = "http"
dnsSchemeHTTPS dnsScheme = "https"
)
// meaningfulStatus reports whether a probe response is a real "this host
// exists" signal rather than a 404 or a wildcard catch-all redirect. a
// wildcard-DNS host answers every candidate with the same redirect/404, so
// gating on a successful, non-redirect status keeps it from flooding results.
func meaningfulStatus(code int) bool {
return code >= http.StatusOK && code < http.StatusMultipleChoices
}
// Dnslist performs DNS subdomain enumeration on the target domain.
func Dnslist(size string, url string, timeout time.Duration, threads int, logdir string) ([]string, error) {
log := output.Module("DNS")
@@ -88,6 +105,12 @@ func Dnslist(size string, url string, timeout time.Duration, threads int, logdir
if dnsTransport != nil {
client.Transport = dnsTransport
}
// don't chase redirects: a wildcard catch-all that 301s every candidate to
// the same landing page must read as a redirect status, not a 200, so it
// gets gated out instead of counting as a found host.
client.CheckRedirect = func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
}
progress := output.NewProgress(len(dns), "enumerating")
@@ -109,52 +132,25 @@ func Dnslist(size string, url string, timeout time.Duration, threads int, logdir
charmlog.Debugf("Looking up: %s", domain)
// Check HTTP
httpReq, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, "http://"+domain+"."+sanitizedURL, http.NoBody)
if err != nil {
charmlog.Debugf("Error %s: %s", domain, err)
// probe http first, then https - but a subdomain is recorded at
// most once. firing both schemes and appending on each is what
// double-counted every host on the old path.
host := domain + "." + sanitizedURL
foundURL, scheme := probeSubdomain(client, host)
if foundURL == "" {
continue
}
resp, err := client.Do(httpReq)
if err != nil {
charmlog.Debugf("Error %s: %s", domain, err)
} else {
mu.Lock()
urls = append(urls, resp.Request.URL.String())
urls = append(urls, foundURL)
mu.Unlock()
resp.Body.Close()
progress.Pause()
log.Success("found: %s.%s [http]", output.Highlight.Render(domain), sanitizedURL)
log.Success("found: %s [%s]", output.Highlight.Render(host), scheme)
progress.Resume()
if logdir != "" {
logger.Write(sanitizedURL, logdir, fmt.Sprintf("[http] %s.%s\n", domain, sanitizedURL))
}
}
// Check HTTPS
httpsReq, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, "https://"+domain+"."+sanitizedURL, http.NoBody)
if err != nil {
charmlog.Debugf("Error %s: %s", domain, err)
continue
}
resp, err = client.Do(httpsReq)
if err != nil {
charmlog.Debugf("Error %s: %s", domain, err)
} else {
mu.Lock()
urls = append(urls, resp.Request.URL.String())
mu.Unlock()
resp.Body.Close()
progress.Pause()
log.Success("found: %s.%s [https]", output.Highlight.Render(domain), sanitizedURL)
progress.Resume()
if logdir != "" {
_ = logger.Write(sanitizedURL, logdir, fmt.Sprintf("[https] %s.%s\n", domain, sanitizedURL))
}
_ = logger.Write(sanitizedURL, logdir, fmt.Sprintf("[%s] %s\n", scheme, host))
}
}
}(thread)
@@ -166,3 +162,40 @@ func Dnslist(size string, url string, timeout time.Duration, threads int, logdir
return urls, nil
}
// probeSubdomain tries http then https for one host and returns the resolved
// url + winning scheme on the first meaningful hit, or "" if neither scheme
// gave a real signal. trying https only when http didn't already count is the
// per-subdomain dedupe.
func probeSubdomain(client *http.Client, host string) (string, dnsScheme) {
schemes := []struct {
prefix string
label dnsScheme
}{
{"http://", dnsSchemeHTTP},
{"https://", dnsSchemeHTTPS},
}
for i := 0; i < len(schemes); i++ {
req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, schemes[i].prefix+host, http.NoBody)
if err != nil {
charmlog.Debugf("Error %s: %s", host, err)
continue
}
resp, err := client.Do(req)
if err != nil {
charmlog.Debugf("Error %s: %s", host, err)
continue
}
code := resp.StatusCode
resolved := resp.Request.URL.String()
resp.Body.Close()
if meaningfulStatus(code) {
return resolved, schemes[i].label
}
charmlog.Debugf("skip %s [%s]: status %d", host, schemes[i].label, code)
}
return "", ""
}
+98
View File
@@ -0,0 +1,98 @@
/*
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
: :
: █▀ █ █▀▀ · Blazing-fast pentesting suite :
: ▄█ █ █▀ · BSD 3-Clause License :
: :
: (c) 2022-2026 vmfunc, xyzeva, :
: lunchcat alumni & contributors :
: :
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
*/
package scan
import (
"net/http"
"net/http/httptest"
"strings"
"sync/atomic"
"testing"
"time"
)
func TestMeaningfulStatus(t *testing.T) {
tests := []struct {
name string
code int
want bool
}{
{"ok counts", http.StatusOK, true},
{"204 counts", http.StatusNoContent, true},
{"301 catch-all redirect dropped", http.StatusMovedPermanently, false},
{"302 catch-all redirect dropped", http.StatusFound, false},
{"404 dropped", http.StatusNotFound, false},
{"500 dropped", http.StatusInternalServerError, false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := meaningfulStatus(tt.code); got != tt.want {
t.Errorf("meaningfulStatus(%d) = %v, want %v", tt.code, got, tt.want)
}
})
}
}
// a host that answers 200 over http should count exactly once, not once per
// scheme - the old path appended on both http and https.
func TestProbeSubdomain_DedupesAcrossSchemes(t *testing.T) {
var hits int32
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
atomic.AddInt32(&hits, 1)
w.WriteHeader(http.StatusOK)
}))
defer srv.Close()
host := strings.TrimPrefix(srv.URL, "http://")
client := &http.Client{Timeout: 5 * time.Second}
url, scheme := probeSubdomain(client, host)
if url == "" {
t.Fatal("expected http probe to count the host")
}
if scheme != dnsSchemeHTTP {
t.Errorf("expected http scheme to win, got %q", scheme)
}
// http already counted, so https must not be tried - one request total.
if got := atomic.LoadInt32(&hits); got != 1 {
t.Errorf("expected exactly 1 probe request, got %d", got)
}
}
// a wildcard catch-all that 404s (or 301s) every candidate must not be reported
// as found - that's the flood the gating closes.
func TestProbeSubdomain_WildcardCatchAllNotFound(t *testing.T) {
for _, code := range []int{http.StatusNotFound, http.StatusMovedPermanently} {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if code == http.StatusMovedPermanently {
w.Header().Set("Location", "https://catch-all.example/")
}
w.WriteHeader(code)
}))
host := strings.TrimPrefix(srv.URL, "http://")
client := &http.Client{
Timeout: 5 * time.Second,
CheckRedirect: func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
},
}
url, _ := probeSubdomain(client, host)
if url != "" {
t.Errorf("status %d should not count as found, got %q", code, url)
}
srv.Close()
}
}
@@ -14,6 +14,57 @@ package frameworks
import "testing"
// the detector usually reports "unknown"; the version dug out of the body must
// win so the cve lookup runs against a concrete version instead of "unknown".
func TestResolveVersion(t *testing.T) {
tests := []struct {
name string
detector string
extracted string
want string
}{
{"detector concrete wins", "9.0.0", "8.4.1", "9.0.0"},
{"unknown detector falls back to extracted", "unknown", "8.4.1", "8.4.1"},
{"empty detector falls back to extracted", "", "8.4.1", "8.4.1"},
{"both unknown stays unknown", "unknown", "unknown", "unknown"},
{"both empty/unknown stays unknown", "", "", "unknown"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := resolveVersion(tt.detector, tt.extracted); got != tt.want {
t.Errorf("resolveVersion(%q, %q) = %q, want %q", tt.detector, tt.extracted, got, tt.want)
}
})
}
}
// the regression itself: with the detector reporting "unknown" but a real
// version extractable from the body, the cve lookup must use the extracted
// version and surface the matching CVE - the old path looked up "unknown" and
// missed it.
func TestResolveVersionFeedsCVELookup(t *testing.T) {
const body = "Laravel 8.4.1"
// extractor pulls the concrete version out of the body...
extracted := ExtractVersionOptimized(body, "Laravel").Version
if extracted != "8.4.1" {
t.Fatalf("expected extracted version 8.4.1, got %q", extracted)
}
// ...and looking "unknown" up finds nothing, proving the old behavior missed it.
if cves, _ := getVulnerabilities("Laravel", "unknown"); len(cves) != 0 {
t.Fatalf("expected no CVEs for unknown version, got %v", cves)
}
// the reconciled version feeds the lookup and the CVE shows up.
version := resolveVersion("unknown", extracted)
cves, _ := getVulnerabilities("Laravel", version)
if len(cves) == 0 {
t.Errorf("expected Laravel %s to surface a CVE, got none", version)
}
}
func TestVersionAffected(t *testing.T) {
tests := []struct {
version string
+28 -5
View File
@@ -118,17 +118,22 @@ func DetectFramework(url string, timeout time.Duration, logdir string) (*Framewo
return nil, nil //nolint:nilnil // no framework detected is not an error
}
// Get version match details
// Get version match details. the detector's own best.version is often
// "unknown" (it only fingerprints the framework, not always the version),
// while ExtractVersionOptimized digs the real version out of the body. prefer
// that for both the reported version and the cve lookup, otherwise CVEs that
// only match a concrete version are silently missed.
versionMatch := ExtractVersionOptimized(bodyStr, best.name)
cves, suggestions := getVulnerabilities(best.name, best.version)
version := resolveVersion(best.version, versionMatch.Version)
cves, suggestions := getVulnerabilities(best.name, version)
result := NewFrameworkResult(best.name, best.version, best.confidence, versionMatch.Confidence)
result := NewFrameworkResult(best.name, version, best.confidence, versionMatch.Confidence)
result.WithVulnerabilities(cves, suggestions)
// Log results
if logdir != "" {
logEntry := fmt.Sprintf("Detected framework: %s (version: %s, confidence: %.2f, version_confidence: %.2f)\n",
best.name, best.version, best.confidence, versionMatch.Confidence)
best.name, version, best.confidence, versionMatch.Confidence)
if len(cves) > 0 {
logEntry += fmt.Sprintf(" Risk Level: %s\n", result.RiskLevel)
logEntry += fmt.Sprintf(" CVEs: %v\n", cves)
@@ -138,7 +143,7 @@ func DetectFramework(url string, timeout time.Duration, logdir string) (*Framewo
}
log.Success("Detected %s framework (version: %s, confidence: %.2f)",
output.Highlight.Render(best.name), best.version, best.confidence)
output.Highlight.Render(best.name), version, best.confidence)
if versionMatch.Confidence > 0 {
charmlog.Debugf("Version detected from: %s (confidence: %.2f)",
@@ -160,6 +165,24 @@ func DetectFramework(url string, timeout time.Duration, logdir string) (*Framewo
return result, nil
}
// unknownVersion is the sentinel both detectors and the version extractor emit
// when no concrete version could be read from the response.
const unknownVersion = "unknown"
// resolveVersion picks the version to report and look CVEs up against. the
// detector's own value wins when it's concrete; otherwise we fall back to the
// version dug out of the body by ExtractVersionOptimized. either being
// "unknown"/empty means "no info", not a real version.
func resolveVersion(detectorVersion, extractedVersion string) string {
if detectorVersion != "" && detectorVersion != unknownVersion {
return detectorVersion
}
if extractedVersion != "" && extractedVersion != unknownVersion {
return extractedVersion
}
return unknownVersion
}
// getVulnerabilities returns CVEs and recommendations for a framework version.
func getVulnerabilities(framework, version string) ([]string, []string) {
entries, exists := knownCVEs[framework]
+24 -4
View File
@@ -41,7 +41,23 @@ func stripScheme(url string) string {
return url
}
// maxRobotsRedirects caps how many 301 hops fetchRobotsTXT will chase. without
// a bound an A->B->A redirect loop recursed forever and blew the stack.
const maxRobotsRedirects = 10
// fetchRobotsTXT follows 301s to robots.txt iteratively, bounded by both a hop
// cap and a visited set so a redirect cycle terminates instead of recursing
// without end.
func fetchRobotsTXT(url string, client *http.Client) *http.Response {
visited := make(map[string]bool, maxRobotsRedirects)
for hop := 0; hop < maxRobotsRedirects; hop++ {
if visited[url] {
log.Debugf("redirect loop hit at %s, stopping", url)
return nil
}
visited[url] = true
req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, url, http.NoBody)
if err != nil {
log.Debugf("Error creating request for robots.txt: %s", err)
@@ -53,17 +69,21 @@ func fetchRobotsTXT(url string, client *http.Client) *http.Response {
return nil
}
if resp.StatusCode == http.StatusMovedPermanently {
if resp.StatusCode != http.StatusMovedPermanently {
return resp
}
redirectURL := resp.Header.Get("Location")
resp.Body.Close()
if redirectURL == "" {
log.Debugf("Redirect location is empty for %s", url)
return nil
}
resp.Body.Close()
return fetchRobotsTXT(redirectURL, client)
url = redirectURL
}
return resp
log.Debugf("robots.txt redirect depth exceeded (%d hops)", maxRobotsRedirects)
return nil
}
// Scan performs a basic URL scan, including checks for robots.txt and other common endpoints.
+99
View File
@@ -3,7 +3,9 @@ package scan
import (
"net/http"
"net/http/httptest"
"strconv"
"strings"
"sync/atomic"
"testing"
"time"
)
@@ -155,6 +157,103 @@ func TestFetchRobotsTXT_Redirect(t *testing.T) {
}
}
// an A->B->A redirect loop must terminate (return nil) instead of recursing
// forever and blowing the stack.
func TestFetchRobotsTXT_RedirectLoop(t *testing.T) {
var serverA, serverB *httptest.Server
serverA = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Location", serverB.URL+"/robots.txt")
w.WriteHeader(http.StatusMovedPermanently)
}))
defer serverA.Close()
serverB = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Location", serverA.URL+"/robots.txt")
w.WriteHeader(http.StatusMovedPermanently)
}))
defer serverB.Close()
client := &http.Client{
Timeout: 5 * time.Second,
CheckRedirect: func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
},
}
// the hop cap + visited set guarantee termination; a regression that drops
// either would spin forever and the test harness timeout would catch it.
resp := fetchRobotsTXT(serverA.URL+"/robots.txt", client)
if resp != nil {
resp.Body.Close()
t.Errorf("expected nil on redirect loop, got status %d", resp.StatusCode)
}
}
// a redirect chain longer than the hop cap stops at the bound rather than
// following indefinitely.
func TestFetchRobotsTXT_DepthCap(t *testing.T) {
var hops int32
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// each hop points at a fresh path so the visited set never trips; only
// the depth cap can stop this.
n := atomic.AddInt32(&hops, 1)
w.Header().Set("Location", "/r"+strconv.Itoa(int(n)))
w.WriteHeader(http.StatusMovedPermanently)
}))
defer srv.Close()
client := &http.Client{
Timeout: 5 * time.Second,
CheckRedirect: func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
},
}
resp := fetchRobotsTXT(srv.URL+"/robots.txt", client)
if resp != nil {
resp.Body.Close()
t.Errorf("expected nil once depth cap exceeded, got status %d", resp.StatusCode)
}
if got := atomic.LoadInt32(&hops); got > maxRobotsRedirects {
t.Errorf("followed %d hops, expected at most %d", got, maxRobotsRedirects)
}
}
// the old code flagged a dangling cname on ANY cname, including LookupCNAME
// echoing the host back for a plain A record. only an off-host cname into a
// known takeoverable provider should count.
func TestDanglingProvider(t *testing.T) {
tests := []struct {
name string
subdomain string
cname string
wantService string
wantOK bool
}{
{"github pages dangling", "blog.example.com", "example.github.io.", "GitHub Pages", true},
{"heroku dangling", "app.example.com", "example.herokuapp.com.", "Heroku", true},
{"s3 dangling", "files.example.com", "bucket.s3.amazonaws.com.", "Amazon S3", true},
{"self-reference is not dangling", "www.example.com", "www.example.com.", "", false},
{"on-domain cname is not dangling", "www.example.com", "lb.example.com.", "", false},
{"unknown provider is not dangling", "x.example.com", "host.notaprovider.net.", "", false},
{"empty cname is not dangling", "x.example.com", "", "", false},
{"case-insensitive match", "x.example.com", "X.GitHub.IO.", "GitHub Pages", true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
service, ok := danglingProvider(tt.subdomain, tt.cname)
if ok != tt.wantOK {
t.Errorf("danglingProvider(%q, %q) ok = %v, want %v", tt.subdomain, tt.cname, ok, tt.wantOK)
}
if service != tt.wantService {
t.Errorf("danglingProvider(%q, %q) service = %q, want %q", tt.subdomain, tt.cname, service, tt.wantService)
}
})
}
}
func TestSubdomainTakeoverResult(t *testing.T) {
result := SubdomainTakeoverResult{
Subdomain: "test.example.com",
+60 -4
View File
@@ -37,6 +37,36 @@ type SubdomainTakeoverResult struct {
Service string `json:"service,omitempty"`
}
// takeoverProviders maps a takeoverable third-party's cname apex to its service
// name. a "no such host" on a subdomain only counts as a dangling-cname takeover
// when the cname points at one of these and the target is unclaimed - a cname
// to anything else (or to the host itself) is a normal record, not a finding.
var takeoverProviders = map[string]string{
"github.io": "GitHub Pages",
"herokuapp.com": "Heroku",
"herokudns.com": "Heroku",
"myshopify.com": "Shopify",
"wordpress.com": "WordPress",
"s3.amazonaws.com": "Amazon S3",
"ghost.io": "Ghost",
"pantheonsite.io": "Pantheon",
"zendesk.com": "Zendesk",
"surge.sh": "Surge",
"bitbucket.io": "Bitbucket",
"fastly.net": "Fastly",
"helpscoutdocs.com": "Helpscout",
"cargocollective.com": "Cargo",
"uservoice.com": "Uservoice",
"webflow.io": "Webflow",
"readthedocs.io": "ReadTheDocs",
"azurewebsites.net": "Azure",
"cloudapp.net": "Azure",
"trafficmanager.net": "Azure",
"blob.core.windows.net": "Azure",
"netlify.app": "Netlify",
"netlify.com": "Netlify",
}
// SubdomainTakeover checks dnsResults for dangling subdomains pointing at
// unclaimed third-party services.
func SubdomainTakeover(url string, dnsResults []string, timeout time.Duration, threads int, logdir string) ([]SubdomainTakeoverResult, error) {
@@ -104,6 +134,27 @@ func SubdomainTakeover(url string, dnsResults []string, timeout time.Duration, t
return results, nil
}
// danglingProvider reports whether cname points off-host at a known
// takeoverable provider. a self-referential cname (LookupCNAME echoing an A
// record back as the host) is rejected, since that's a live host, not a
// dangling pointer.
func danglingProvider(subdomain, cname string) (string, bool) {
// LookupCNAME returns a fqdn with a trailing dot; strip it so suffix and
// self-reference checks compare like-for-like.
target := strings.ToLower(strings.TrimSuffix(cname, "."))
host := strings.ToLower(strings.TrimSuffix(subdomain, "."))
if target == "" || target == host {
return "", false
}
for apex, service := range takeoverProviders {
if target == apex || strings.HasSuffix(target, "."+apex) {
return service, true
}
}
return "", false
}
func checkSubdomainTakeover(subdomain string, client *http.Client) (bool, string) {
req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, "http://"+subdomain, http.NoBody)
if err != nil {
@@ -111,11 +162,16 @@ func checkSubdomainTakeover(subdomain string, client *http.Client) (bool, string
}
resp, err := client.Do(req)
if err != nil {
// a dead host only matters if its cname still points at an unclaimed
// third-party service. LookupCNAME echoes the host back for plain A
// records, so "any cname" is not a signal - the cname must resolve to a
// known takeoverable provider and not be the host itself.
if strings.Contains(err.Error(), "no such host") {
// Check if CNAME exists
cname, err := net.DefaultResolver.LookupCNAME(context.TODO(), subdomain)
if err == nil && cname != "" {
return true, "Dangling CNAME"
cname, lookupErr := net.DefaultResolver.LookupCNAME(context.TODO(), subdomain)
if lookupErr == nil {
if service, ok := danglingProvider(subdomain, cname); ok {
return true, service + " (Dangling CNAME)"
}
}
}
return false, ""