Files
sif/internal/scan/scan.go
T
vmfunc 22168611e4 perf(scan): work-stealing worker pool replacing static stride partition
static i%threads stride partitions assign each item to a fixed worker, so a
goroutine stuck on a slow or timing-out host stalls while the rest idle behind
it (head-of-line blocking). add internal/pool.Each: workers all pull from one
shared channel, so a slow item only blocks its own worker and the others keep
draining. migrate git, ports, robots (scan), dnslist, dork, dirlist and
subdomaintakeover off the stride loops; per-item work, mutex-guarded appends and
progress increments are unchanged, results were already unordered.
2026-06-10 16:39:04 -07:00

162 lines
5.8 KiB
Go

/*
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
: :
: █▀ █ █▀▀ · Blazing-fast pentesting suite :
: ▄█ █ █▀ · BSD 3-Clause License :
: :
: (c) 2022-2026 vmfunc, xyzeva, :
: lunchcat alumni & contributors :
: :
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
*/
// The scan package provides a collection of security scanning functions.
//
// Each scanning function typically returns a slice of custom result structures and an error.
// The package utilizes concurrent operations to improve scanning performance and provides
// options for logging and timeout management.
package scan
import (
"bufio"
"context"
"net/http"
"strconv"
"strings"
"time"
"github.com/charmbracelet/log"
"github.com/dropalldatabases/sif/internal/httpx"
"github.com/dropalldatabases/sif/internal/logger"
"github.com/dropalldatabases/sif/internal/output"
"github.com/dropalldatabases/sif/internal/pool"
)
// stripScheme drops the scheme:// prefix from url, or returns it unchanged when
// there's no scheme (so a bare host doesn't panic).
func stripScheme(url string) string {
if _, rest, ok := strings.Cut(url, "://"); ok {
return rest
}
return url
}
// maxRobotsRedirects caps how many 301 hops fetchRobotsTXT will chase. without
// a bound an A->B->A redirect loop recursed forever and blew the stack.
const maxRobotsRedirects = 10
// fetchRobotsTXT follows 301s to robots.txt iteratively, bounded by both a hop
// cap and a visited set so a redirect cycle terminates instead of recursing
// without end.
func fetchRobotsTXT(url string, client *http.Client) *http.Response {
visited := make(map[string]bool, maxRobotsRedirects)
for hop := 0; hop < maxRobotsRedirects; hop++ {
if visited[url] {
log.Debugf("redirect loop hit at %s, stopping", url)
return nil
}
visited[url] = true
req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, url, http.NoBody)
if err != nil {
log.Debugf("Error creating request for robots.txt: %s", err)
return nil
}
resp, err := client.Do(req)
if err != nil {
log.Debugf("Error fetching robots.txt: %s", err)
return nil
}
if resp.StatusCode != http.StatusMovedPermanently {
return resp
}
redirectURL := resp.Header.Get("Location")
// only the Location header is used here; drain so the conn is reusable.
httpx.DrainClose(resp)
if redirectURL == "" {
log.Debugf("Redirect location is empty for %s", url)
return nil
}
url = redirectURL
}
log.Debugf("robots.txt redirect depth exceeded (%d hops)", maxRobotsRedirects)
return nil
}
// Scan performs a basic URL scan, including checks for robots.txt and other common endpoints.
// It logs the results and doesn't return any values.
//
// Parameters:
// - url: the target URL to scan
// - timeout: maximum duration for the scan
// - threads: number of concurrent threads to use
// - logdir: directory to store log files (empty string for no logging)
func Scan(url string, timeout time.Duration, threads int, logdir string) {
output.ScanStart("base URL scanning")
sanitizedURL := stripScheme(url)
if logdir != "" {
if err := logger.WriteHeader(sanitizedURL, logdir, "URL scanning"); err != nil {
output.Error("Error creating log file: %v", err)
return
}
}
client := httpx.Client(timeout)
client.CheckRedirect = func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
}
resp := fetchRobotsTXT(url+"/robots.txt", client) //nolint:bodyclose // drained and closed via httpx.DrainClose
if resp == nil {
return
}
// drain on close: the non-success branch never reads the body, so a bare
// close would leak the conn instead of returning it to the pool.
defer httpx.DrainClose(resp)
if resp.StatusCode != 404 && resp.StatusCode != 301 && resp.StatusCode != 302 && resp.StatusCode != 307 {
output.Success("File %s found", output.Status.Render("robots.txt"))
var robotsData []string
scanner := bufio.NewScanner(resp.Body)
scanner.Split(bufio.ScanLines)
for scanner.Scan() {
robotsData = append(robotsData, scanner.Text())
}
pool.Each(robotsData, threads, func(robot string) {
if robot == "" || strings.HasPrefix(robot, "#") || strings.HasPrefix(robot, "User-agent: ") || strings.HasPrefix(robot, "Sitemap: ") {
return
}
_, sanitizedRobot, _ := strings.Cut(robot, ": ")
log.Debugf("%s", robot)
robotReq, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, url+"/"+sanitizedRobot, http.NoBody)
if err != nil {
log.Debugf("Error creating request for %s: %s", sanitizedRobot, err)
return
}
resp, err := client.Do(robotReq) //nolint:bodyclose // drained and closed via httpx.DrainClose
if err != nil {
log.Debugf("Error %s: %s", sanitizedRobot, err)
return
}
if resp.StatusCode != 404 {
output.Success("%s from robots: %s", output.Status.Render(strconv.Itoa(resp.StatusCode)), output.Highlight.Render(sanitizedRobot))
if logdir != "" {
logger.Write(sanitizedURL, logdir, strconv.Itoa(resp.StatusCode)+" from robots: ["+sanitizedRobot+"]\n")
}
}
// status only; drain so the conn returns to the pool.
httpx.DrainClose(resp)
})
}
}