Files
sif/internal/scan/scan.go
T
vmfunc 839c0a779c fix(scan): dnslist dedup, robots recursion bound, framework version lookup, takeover cname
four recon-flagged bugs, each with a focused test:

- dnslist fired both http and https per candidate and counted a "found"
  on any non-error response (incl 404 and wildcard catch-all redirects),
  so every host double-counted and a wildcard-dns host flooded results.
  probe http then https with per-subdomain dedupe, gate on a meaningful
  (2xx, non-redirect) status, and stop chasing redirects so a catch-all
  301 reads as a redirect instead of a 200.

- fetchRobotsTXT recursed on every 301 Location with no depth limit and
  no visited set, so an A->B->A loop blew the stack. bound it to a named
  hop cap and a visited set, iteratively.

- framework cve lookup used best.version ("unknown" when the detector
  only fingerprints the framework) and threw away the version
  ExtractVersionOptimized dug out of the body, missing CVEs. reconcile
  via resolveVersion, preferring the extracted concrete version.

- subdomain takeover flagged a dangling cname whenever a no-such-host
  coincided with ANY cname (LookupCNAME echoes the host back for plain A
  records). only flag when the cname points off-host at a known
  takeoverable provider.
2026-06-10 14:47:17 -07:00

172 lines
5.6 KiB
Go

/*
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
: :
: █▀ █ █▀▀ · Blazing-fast pentesting suite :
: ▄█ █ █▀ · BSD 3-Clause License :
: :
: (c) 2022-2026 vmfunc, xyzeva, :
: lunchcat alumni & contributors :
: :
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
*/
// The scan package provides a collection of security scanning functions.
//
// Each scanning function typically returns a slice of custom result structures and an error.
// The package utilizes concurrent operations to improve scanning performance and provides
// options for logging and timeout management.
package scan
import (
"bufio"
"context"
"net/http"
"strconv"
"strings"
"sync"
"time"
"github.com/charmbracelet/log"
"github.com/dropalldatabases/sif/internal/httpx"
"github.com/dropalldatabases/sif/internal/logger"
"github.com/dropalldatabases/sif/internal/output"
)
// stripScheme drops the scheme:// prefix from url, or returns it unchanged when
// there's no scheme (so a bare host doesn't panic).
func stripScheme(url string) string {
if _, rest, ok := strings.Cut(url, "://"); ok {
return rest
}
return url
}
// maxRobotsRedirects caps how many 301 hops fetchRobotsTXT will chase. without
// a bound an A->B->A redirect loop recursed forever and blew the stack.
const maxRobotsRedirects = 10
// fetchRobotsTXT follows 301s to robots.txt iteratively, bounded by both a hop
// cap and a visited set so a redirect cycle terminates instead of recursing
// without end.
func fetchRobotsTXT(url string, client *http.Client) *http.Response {
visited := make(map[string]bool, maxRobotsRedirects)
for hop := 0; hop < maxRobotsRedirects; hop++ {
if visited[url] {
log.Debugf("redirect loop hit at %s, stopping", url)
return nil
}
visited[url] = true
req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, url, http.NoBody)
if err != nil {
log.Debugf("Error creating request for robots.txt: %s", err)
return nil
}
resp, err := client.Do(req)
if err != nil {
log.Debugf("Error fetching robots.txt: %s", err)
return nil
}
if resp.StatusCode != http.StatusMovedPermanently {
return resp
}
redirectURL := resp.Header.Get("Location")
resp.Body.Close()
if redirectURL == "" {
log.Debugf("Redirect location is empty for %s", url)
return nil
}
url = redirectURL
}
log.Debugf("robots.txt redirect depth exceeded (%d hops)", maxRobotsRedirects)
return nil
}
// Scan performs a basic URL scan, including checks for robots.txt and other common endpoints.
// It logs the results and doesn't return any values.
//
// Parameters:
// - url: the target URL to scan
// - timeout: maximum duration for the scan
// - threads: number of concurrent threads to use
// - logdir: directory to store log files (empty string for no logging)
func Scan(url string, timeout time.Duration, threads int, logdir string) {
output.ScanStart("base URL scanning")
sanitizedURL := stripScheme(url)
if logdir != "" {
if err := logger.WriteHeader(sanitizedURL, logdir, "URL scanning"); err != nil {
output.Error("Error creating log file: %v", err)
return
}
}
client := httpx.Client(timeout)
client.CheckRedirect = func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
}
resp := fetchRobotsTXT(url+"/robots.txt", client)
if resp == nil {
return
}
defer resp.Body.Close()
if resp.StatusCode != 404 && resp.StatusCode != 301 && resp.StatusCode != 302 && resp.StatusCode != 307 {
output.Success("File %s found", output.Status.Render("robots.txt"))
var robotsData []string
scanner := bufio.NewScanner(resp.Body)
scanner.Split(bufio.ScanLines)
for scanner.Scan() {
robotsData = append(robotsData, scanner.Text())
}
var wg sync.WaitGroup
wg.Add(threads)
for thread := 0; thread < threads; thread++ {
go func(thread int) {
defer wg.Done()
for i, robot := range robotsData {
if i%threads != thread {
continue
}
if robot == "" || strings.HasPrefix(robot, "#") || strings.HasPrefix(robot, "User-agent: ") || strings.HasPrefix(robot, "Sitemap: ") {
continue
}
_, sanitizedRobot, _ := strings.Cut(robot, ": ")
log.Debugf("%s", robot)
robotReq, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, url+"/"+sanitizedRobot, http.NoBody)
if err != nil {
log.Debugf("Error creating request for %s: %s", sanitizedRobot, err)
continue
}
resp, err := client.Do(robotReq)
if err != nil {
log.Debugf("Error %s: %s", sanitizedRobot, err)
continue
}
if resp.StatusCode != 404 {
output.Success("%s from robots: %s", output.Status.Render(strconv.Itoa(resp.StatusCode)), output.Highlight.Render(sanitizedRobot))
if logdir != "" {
logger.Write(sanitizedURL, logdir, strconv.Itoa(resp.StatusCode)+" from robots: ["+sanitizedRobot+"]\n")
}
}
resp.Body.Close()
}
}(thread)
}
wg.Wait()
}
}