mirror of
https://github.com/lunchcat/sif.git
synced 2026-06-12 19:11:25 -07:00
0383a7bcd2
go only returns a conn to the idle pool when the body is read to EOF before Close. the header-only and early-return scan paths closed an unread body, leaking the conn and forcing a fresh dial each request. route those close sites through httpx.DrainClose so the tuned pool from phase 1 actually gets reused. body-read paths (scanner/io.ReadAll) are left untouched.
176 lines
6.1 KiB
Go
176 lines
6.1 KiB
Go
/*
|
|
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
|
|
: :
|
|
: █▀ █ █▀▀ · Blazing-fast pentesting suite :
|
|
: ▄█ █ █▀ · BSD 3-Clause License :
|
|
: :
|
|
: (c) 2022-2026 vmfunc, xyzeva, :
|
|
: lunchcat alumni & contributors :
|
|
: :
|
|
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
|
|
*/
|
|
|
|
// The scan package provides a collection of security scanning functions.
|
|
//
|
|
// Each scanning function typically returns a slice of custom result structures and an error.
|
|
// The package utilizes concurrent operations to improve scanning performance and provides
|
|
// options for logging and timeout management.
|
|
package scan
|
|
|
|
import (
|
|
"bufio"
|
|
"context"
|
|
"net/http"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/charmbracelet/log"
|
|
"github.com/dropalldatabases/sif/internal/httpx"
|
|
"github.com/dropalldatabases/sif/internal/logger"
|
|
"github.com/dropalldatabases/sif/internal/output"
|
|
)
|
|
|
|
// stripScheme drops the scheme:// prefix from url, or returns it unchanged when
|
|
// there's no scheme (so a bare host doesn't panic).
|
|
func stripScheme(url string) string {
|
|
if _, rest, ok := strings.Cut(url, "://"); ok {
|
|
return rest
|
|
}
|
|
return url
|
|
}
|
|
|
|
// maxRobotsRedirects caps how many 301 hops fetchRobotsTXT will chase. without
|
|
// a bound an A->B->A redirect loop recursed forever and blew the stack.
|
|
const maxRobotsRedirects = 10
|
|
|
|
// fetchRobotsTXT follows 301s to robots.txt iteratively, bounded by both a hop
|
|
// cap and a visited set so a redirect cycle terminates instead of recursing
|
|
// without end.
|
|
func fetchRobotsTXT(url string, client *http.Client) *http.Response {
|
|
visited := make(map[string]bool, maxRobotsRedirects)
|
|
|
|
for hop := 0; hop < maxRobotsRedirects; hop++ {
|
|
if visited[url] {
|
|
log.Debugf("redirect loop hit at %s, stopping", url)
|
|
return nil
|
|
}
|
|
visited[url] = true
|
|
|
|
req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, url, http.NoBody)
|
|
if err != nil {
|
|
log.Debugf("Error creating request for robots.txt: %s", err)
|
|
return nil
|
|
}
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
log.Debugf("Error fetching robots.txt: %s", err)
|
|
return nil
|
|
}
|
|
|
|
if resp.StatusCode != http.StatusMovedPermanently {
|
|
return resp
|
|
}
|
|
|
|
redirectURL := resp.Header.Get("Location")
|
|
// only the Location header is used here; drain so the conn is reusable.
|
|
httpx.DrainClose(resp)
|
|
if redirectURL == "" {
|
|
log.Debugf("Redirect location is empty for %s", url)
|
|
return nil
|
|
}
|
|
url = redirectURL
|
|
}
|
|
|
|
log.Debugf("robots.txt redirect depth exceeded (%d hops)", maxRobotsRedirects)
|
|
return nil
|
|
}
|
|
|
|
// Scan performs a basic URL scan, including checks for robots.txt and other common endpoints.
|
|
// It logs the results and doesn't return any values.
|
|
//
|
|
// Parameters:
|
|
// - url: the target URL to scan
|
|
// - timeout: maximum duration for the scan
|
|
// - threads: number of concurrent threads to use
|
|
// - logdir: directory to store log files (empty string for no logging)
|
|
func Scan(url string, timeout time.Duration, threads int, logdir string) {
|
|
output.ScanStart("base URL scanning")
|
|
|
|
sanitizedURL := stripScheme(url)
|
|
|
|
if logdir != "" {
|
|
if err := logger.WriteHeader(sanitizedURL, logdir, "URL scanning"); err != nil {
|
|
output.Error("Error creating log file: %v", err)
|
|
return
|
|
}
|
|
}
|
|
|
|
client := httpx.Client(timeout)
|
|
client.CheckRedirect = func(req *http.Request, via []*http.Request) error {
|
|
return http.ErrUseLastResponse
|
|
}
|
|
|
|
resp := fetchRobotsTXT(url+"/robots.txt", client) //nolint:bodyclose // drained and closed via httpx.DrainClose
|
|
if resp == nil {
|
|
return
|
|
}
|
|
// drain on close: the non-success branch never reads the body, so a bare
|
|
// close would leak the conn instead of returning it to the pool.
|
|
defer httpx.DrainClose(resp)
|
|
|
|
if resp.StatusCode != 404 && resp.StatusCode != 301 && resp.StatusCode != 302 && resp.StatusCode != 307 {
|
|
output.Success("File %s found", output.Status.Render("robots.txt"))
|
|
|
|
var robotsData []string
|
|
scanner := bufio.NewScanner(resp.Body)
|
|
scanner.Split(bufio.ScanLines)
|
|
for scanner.Scan() {
|
|
robotsData = append(robotsData, scanner.Text())
|
|
}
|
|
|
|
var wg sync.WaitGroup
|
|
wg.Add(threads)
|
|
for thread := 0; thread < threads; thread++ {
|
|
go func(thread int) {
|
|
defer wg.Done()
|
|
|
|
for i, robot := range robotsData {
|
|
if i%threads != thread {
|
|
continue
|
|
}
|
|
|
|
if robot == "" || strings.HasPrefix(robot, "#") || strings.HasPrefix(robot, "User-agent: ") || strings.HasPrefix(robot, "Sitemap: ") {
|
|
continue
|
|
}
|
|
|
|
_, sanitizedRobot, _ := strings.Cut(robot, ": ")
|
|
log.Debugf("%s", robot)
|
|
robotReq, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, url+"/"+sanitizedRobot, http.NoBody)
|
|
if err != nil {
|
|
log.Debugf("Error creating request for %s: %s", sanitizedRobot, err)
|
|
continue
|
|
}
|
|
resp, err := client.Do(robotReq) //nolint:bodyclose // drained and closed via httpx.DrainClose
|
|
if err != nil {
|
|
log.Debugf("Error %s: %s", sanitizedRobot, err)
|
|
continue
|
|
}
|
|
|
|
if resp.StatusCode != 404 {
|
|
output.Success("%s from robots: %s", output.Status.Render(strconv.Itoa(resp.StatusCode)), output.Highlight.Render(sanitizedRobot))
|
|
if logdir != "" {
|
|
logger.Write(sanitizedURL, logdir, strconv.Itoa(resp.StatusCode)+" from robots: ["+sanitizedRobot+"]\n")
|
|
}
|
|
}
|
|
// status only; drain so the conn returns to the pool.
|
|
httpx.DrainClose(resp)
|
|
}
|
|
|
|
}(thread)
|
|
}
|
|
wg.Wait()
|
|
}
|
|
}
|