diff --git a/.golangci.yml b/.golangci.yml index b76f305..08583ea 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -88,6 +88,7 @@ linters: linters: - errcheck - noctx + - gosec # fake credentials in secret-scanner fixtures are not real keys issues: max-issues-per-linter: 50 diff --git a/README.md b/README.md index 7f9911b..59359f3 100644 --- a/README.md +++ b/README.md @@ -122,6 +122,9 @@ makepkg -si # sql recon + lfi scanning ./sif -u https://example.com -sql -lfi +# web vuln probes (cors, open redirect, reflected xss) +./sif -u https://example.com -cors -redirect -xss + # framework detection (with cve lookup) ./sif -u https://example.com -framework @@ -158,7 +161,7 @@ sif has a modular architecture. modules are defined in yaml and can be extended | `-ports` | port scanning (common/full) | | `-nuclei` | vulnerability scanning with nuclei templates | | `-dork` | automated google dorking | -| `-js` | javascript analysis | +| `-js` | javascript analysis + secret and endpoint extraction | | `-c3` | cloud storage misconfiguration | | `-headers` | http header analysis | | `-sh` | security header analysis (missing/weak headers) | @@ -170,7 +173,13 @@ sif has a modular architecture. modules are defined in yaml and can be extended | `-securitytrails` | domain discovery + target expansion (requires SECURITYTRAILS_API_KEY) | | `-sql` | sql recon | | `-lfi` | local file inclusion | +| `-cors` | cors misconfiguration probe | +| `-redirect` | open redirect probe | +| `-xss` | reflected xss probe | | `-framework` | framework detection with cve lookup | +| `-crawl` | web crawler (spider same-host links/scripts/forms) | +| `-crawl-depth` | max crawl recursion depth (default 2) | +| `-passive` | passive subdomain/url discovery (zero traffic to target) | ### http options diff --git a/docs/usage.md b/docs/usage.md index 5a1db9e..ae75048 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -79,7 +79,7 @@ scopes: `common` (top ports), `full` (all ports) ### javascript analysis -`-js` - analyze javascript files +`-js` - analyze javascript files + secret and endpoint extraction ```bash ./sif -u https://example.com -js @@ -154,6 +154,30 @@ export SHODAN_API_KEY=your-api-key ./sif -u https://example.com -lfi ``` +### cors probe + +`-cors` - probe for cors misconfigurations (reflected/permissive origins) + +```bash +./sif -u https://example.com -cors +``` + +### open redirect probe + +`-redirect` - probe redirect-prone params for open redirects + +```bash +./sif -u https://example.com/login?next=home -redirect +``` + +### reflected xss probe + +`-xss` - inject a canary into params and report unescaped reflections + +```bash +./sif -u https://example.com/search?q=test -xss +``` + ### framework detection `-framework` - detect web frameworks with version and cve lookup @@ -162,6 +186,26 @@ export SHODAN_API_KEY=your-api-key ./sif -u https://example.com -framework ``` +### web crawler + +`-crawl` - spider the target, following same-host links, scripts and forms + +`-crawl-depth` - max recursion depth (default 2). respects robots.txt and stays on the target host. + +```bash +./sif -u https://example.com -crawl -crawl-depth 3 +``` + +### passive discovery + +`-passive` - gather subdomains from certificate transparency (crt.sh, certspotter) and historical urls from the wayback machine + +keyless and zero traffic to the target itself - all lookups hit third-party feeds. + +```bash +./sif -u https://example.com -passive +``` + ### whois lookup `-whois` - perform whois lookups @@ -339,6 +383,9 @@ the first time you run a new release sif also prints that release's notes once. -git \ -sql \ -lfi \ + -cors \ + -redirect \ + -xss \ -am ``` diff --git a/go.mod b/go.mod index 4382257..35bb982 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,7 @@ require ( github.com/charmbracelet/glamour v0.10.0 github.com/charmbracelet/lipgloss v1.1.1-0.20250404203927-76690c660834 github.com/charmbracelet/log v1.0.0 + github.com/gocolly/colly/v2 v2.1.0 github.com/likexian/whois v1.15.7 github.com/projectdiscovery/goflags v0.1.74 github.com/projectdiscovery/nuclei/v3 v3.8.0 @@ -160,7 +161,6 @@ require ( github.com/gobwas/pool v0.2.1 // indirect github.com/gobwas/ws v1.4.0 // indirect github.com/goccy/go-json v0.10.5 // indirect - github.com/gocolly/colly/v2 v2.1.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang-jwt/jwt/v4 v4.5.2 // indirect github.com/golang-jwt/jwt/v5 v5.2.2 // indirect diff --git a/internal/config/config.go b/internal/config/config.go index 7e6755d..bdd2f95 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -46,7 +46,13 @@ type Settings struct { SecurityTrails bool SQL bool LFI bool + CORS bool + Redirect bool + XSS bool Framework bool + Crawl bool + CrawlDepth int + Passive bool Modules string // Comma-separated list of module IDs to run ModuleTags string // Run modules matching these tags AllModules bool // Run all loaded modules @@ -62,6 +68,10 @@ type Settings struct { // "negative WaitGroup counter"; clamp the parsed value up to this. const minThreads = 1 +// defaultCrawlDepth bounds how far the spider recurses by default; deep enough +// to find linked pages without crawling an entire site. +const defaultCrawlDepth = 2 + const ( Nil goflags.EnumVariable = iota @@ -107,7 +117,13 @@ func Parse() *Settings { flagSet.BoolVar(&settings.SecurityTrails, "securitytrails", false, "Enable SecurityTrails domain discovery (requires SECURITYTRAILS_API_KEY env var)"), flagSet.BoolVar(&settings.SQL, "sql", false, "Enable SQL reconnaissance (admin panels, error disclosure)"), flagSet.BoolVar(&settings.LFI, "lfi", false, "Enable LFI (Local File Inclusion) reconnaissance"), + flagSet.BoolVar(&settings.CORS, "cors", false, "Enable CORS misconfiguration probe"), + flagSet.BoolVar(&settings.Redirect, "redirect", false, "Enable open redirect probe"), + flagSet.BoolVar(&settings.XSS, "xss", false, "Enable reflected XSS probe"), flagSet.BoolVar(&settings.Framework, "framework", false, "Enable framework detection"), + flagSet.BoolVar(&settings.Crawl, "crawl", false, "Enable web crawling (spider same-host links/scripts/forms)"), + flagSet.IntVar(&settings.CrawlDepth, "crawl-depth", defaultCrawlDepth, "Max crawl recursion depth"), + flagSet.BoolVar(&settings.Passive, "passive", false, "Enable passive subdomain/url discovery (zero traffic to target)"), ) flagSet.CreateGroup("runtime", "Runtime", diff --git a/internal/scan/cors.go b/internal/scan/cors.go new file mode 100644 index 0000000..3828628 --- /dev/null +++ b/internal/scan/cors.go @@ -0,0 +1,236 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +package scan + +import ( + "context" + "fmt" + "net/http" + "net/url" + "strings" + "sync" + "time" + + charmlog "github.com/charmbracelet/log" + "github.com/dropalldatabases/sif/internal/httpx" + "github.com/dropalldatabases/sif/internal/logger" + "github.com/dropalldatabases/sif/internal/output" +) + +// CORSResult collects every cors misconfiguration found on the target. +type CORSResult struct { + Findings []CORSFinding `json:"findings,omitempty"` +} + +// CORSFinding is a single reflecting/permissive cors response. +type CORSFinding struct { + URL string `json:"url"` + OriginTested string `json:"origin_tested"` + AllowOrigin string `json:"allow_origin"` + AllowCredentials bool `json:"allow_credentials"` + Severity string `json:"severity"` + Note string `json:"note"` +} + +// corsMaxRedirects caps the redirect chain so we read the cors headers off the +// host we actually asked about, not whatever it bounces us to. +const corsMaxRedirects = 3 + +// the sentinel attacker origin; if it comes back in Access-Control-Allow-Origin +// the target reflects arbitrary origins and any site can read the response. +const corsEvilOrigin = "https://sif-cors-probe.evil.com" + +// corsOrigin is a header to inject + why it matters. {host} expands to the +// target host so the prefix/suffix bypasses key off the real name. +var corsOrigins = []struct { + origin string // crafted Origin header, {host} -> target host + note string // why this case is interesting + reflects bool // true when a literal echo of this origin is exploitable +}{ + // arbitrary attacker origin - the classic "reflects anything" bug + {corsEvilOrigin, "arbitrary origin reflected", true}, + // the literal null origin (sandboxed iframes, redirects, file://) is forgeable + {"null", "null origin allowed", true}, + // suffix bypass: attacker registers {host}.evil.com, naive endswith checks pass + {"https://{host}.evil.com", "suffix bypass (attacker subdomain)", true}, + // prefix bypass: attacker registers evil-{host}, naive startswith checks pass + {"https://evil-{host}", "prefix bypass", true}, + // embedded bypass: {host} appears inside an attacker domain + {"https://evil.com.{host}", "embedded-host bypass", true}, + // scheme downgrade: http origin trusted lets a mitm read cross-origin data + {"http://{host}", "http scheme downgrade trusted", true}, +} + +// CORS probes the target for cross-origin resource sharing misconfigurations. +func CORS(targetURL string, timeout time.Duration, threads int, logdir string) (*CORSResult, error) { + log := output.Module("CORS") + log.Start() + + spin := output.NewSpinner("Scanning for CORS misconfigurations") + spin.Start() + + sanitizedURL := stripScheme(targetURL) + + if logdir != "" { + if err := logger.WriteHeader(sanitizedURL, logdir, "CORS misconfiguration probe"); err != nil { + spin.Stop() + log.Error("error creating log file: %v", err) + return nil, fmt.Errorf("create cors log: %w", err) + } + } + + parsedURL, err := url.Parse(targetURL) + if err != nil { + spin.Stop() + return nil, fmt.Errorf("parse url: %w", err) + } + host := parsedURL.Host + + client := httpx.Client(timeout) + client.CheckRedirect = func(_ *http.Request, via []*http.Request) error { + if len(via) >= corsMaxRedirects { + return http.ErrUseLastResponse + } + return nil + } + + result := &CORSResult{Findings: make([]CORSFinding, 0, len(corsOrigins))} + + var mu sync.Mutex + var wg sync.WaitGroup + + // one origin per worker item; the set is small so a buffered channel is plenty + originChan := make(chan int, len(corsOrigins)) + for i := 0; i < len(corsOrigins); i++ { + originChan <- i + } + close(originChan) + + wg.Add(threads) + for t := 0; t < threads; t++ { + go func() { + defer wg.Done() + for idx := range originChan { + spec := corsOrigins[idx] + // {host} is the seam that turns a template into a real attacker origin + origin := strings.ReplaceAll(spec.origin, "{host}", host) + + finding, ok := probeCORS(client, targetURL, origin, spec.note) + if !ok { + continue + } + + mu.Lock() + result.Findings = append(result.Findings, finding) + mu.Unlock() + + spin.Stop() + log.Warn("cors %s: origin %s reflected (creds=%t)", + renderCORSSeverity(finding.Severity), + output.Highlight.Render(origin), + finding.AllowCredentials) + spin.Start() + + if logdir != "" { + logger.Write(sanitizedURL, logdir, + fmt.Sprintf("CORS: %s - origin [%s] reflected as [%s] creds=%t\n", + finding.Note, origin, finding.AllowOrigin, finding.AllowCredentials)) + } + } + }() + } + wg.Wait() + + spin.Stop() + + if len(result.Findings) == 0 { + log.Info("no cors misconfigurations detected") + log.Complete(0, "found") + return nil, nil //nolint:nilnil // no finding is not an error, mirrors the other scanners + } + + log.Complete(len(result.Findings), "found") + return result, nil +} + +// probeCORS sends one request with the crafted Origin and decides whether the +// response trusts it. It returns the finding and true only when the server +// reflects the origin (or "null"/"*" with credentials), which is the exploitable +// shape - a server that ignores Origin or returns its own host is fine. +func probeCORS(client *http.Client, targetURL, origin, note string) (CORSFinding, bool) { + req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, targetURL, http.NoBody) + if err != nil { + charmlog.Debugf("cors: build request for %s: %v", targetURL, err) + return CORSFinding{}, false + } + req.Header.Set("Origin", origin) + + resp, err := client.Do(req) + if err != nil { + charmlog.Debugf("cors: request %s with origin %s: %v", targetURL, origin, err) + return CORSFinding{}, false + } + // headers are all we need; drain nothing, just close. + resp.Body.Close() + + allowOrigin := resp.Header.Get("Access-Control-Allow-Origin") + if allowOrigin == "" { + return CORSFinding{}, false + } + + allowCreds := strings.EqualFold(resp.Header.Get("Access-Control-Allow-Credentials"), "true") + + // a wildcard with credentials is forbidden by browsers, so it isn't directly + // exploitable; a plain wildcard exposes only public data. neither is a finding. + if allowOrigin == "*" { + return CORSFinding{}, false + } + + // the bug is reflection: the server echoed our attacker origin back. if it + // returned something else (its own host) it isn't trusting us. + reflected := allowOrigin == origin + + if !reflected { + return CORSFinding{}, false + } + + return CORSFinding{ + URL: targetURL, + OriginTested: origin, + AllowOrigin: allowOrigin, + AllowCredentials: allowCreds, + Severity: corsSeverity(allowCreds), + Note: note, + }, true +} + +// corsSeverity ranks the finding: reflection + credentials lets an attacker read +// authenticated responses, which is the high-impact case. +func corsSeverity(allowCreds bool) string { + if allowCreds { + return "high" + } + return "medium" +} + +func renderCORSSeverity(severity string) string { + if severity == "high" { + return output.SeverityHigh.Render(severity) + } + return output.SeverityMedium.Render(severity) +} + +// ResultType identifies cors findings for the result registry. +func (r *CORSResult) ResultType() string { return "cors" } + +var _ ScanResult = (*CORSResult)(nil) diff --git a/internal/scan/cors_test.go b/internal/scan/cors_test.go new file mode 100644 index 0000000..ac5afce --- /dev/null +++ b/internal/scan/cors_test.go @@ -0,0 +1,140 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +package scan + +import ( + "net/http" + "net/http/httptest" + "testing" + "time" +) + +// reflectingCORS echoes the Origin into Access-Control-Allow-Origin and sets +// credentials, the exploitable misconfiguration. +func reflectingCORS() *httptest.Server { + return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if origin := r.Header.Get("Origin"); origin != "" { + w.Header().Set("Access-Control-Allow-Origin", origin) + w.Header().Set("Access-Control-Allow-Credentials", "true") + } + w.WriteHeader(http.StatusOK) + })) +} + +func TestCORS_ReflectsArbitraryOrigin(t *testing.T) { + srv := reflectingCORS() + defer srv.Close() + + result, err := CORS(srv.URL, 5*time.Second, 3, "") + if err != nil { + t.Fatalf("CORS: %v", err) + } + if result == nil || len(result.Findings) == 0 { + t.Fatalf("expected cors findings on reflecting server, got %+v", result) + } + + // the reflecting server echoes every crafted origin with credentials, + // so each finding should be high severity. + var sawEvil bool + for _, f := range result.Findings { + if f.OriginTested == corsEvilOrigin { + sawEvil = true + if !f.AllowCredentials { + t.Errorf("expected credentials flagged for evil origin, got %+v", f) + } + if f.Severity != "high" { + t.Errorf("expected high severity for reflection+creds, got %s", f.Severity) + } + } + } + if !sawEvil { + t.Errorf("expected the sentinel evil origin to be reflected, got %+v", result.Findings) + } +} + +func TestCORS_SeverityWithoutCredentials(t *testing.T) { + // reflects the origin but never grants credentials - medium, not high. + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if origin := r.Header.Get("Origin"); origin != "" { + w.Header().Set("Access-Control-Allow-Origin", origin) + } + w.WriteHeader(http.StatusOK) + })) + defer srv.Close() + + result, err := CORS(srv.URL, 5*time.Second, 3, "") + if err != nil { + t.Fatalf("CORS: %v", err) + } + if result == nil || len(result.Findings) == 0 { + t.Fatalf("expected reflection findings, got %+v", result) + } + for _, f := range result.Findings { + if f.AllowCredentials { + t.Errorf("did not expect credentials, got %+v", f) + } + if f.Severity != "medium" { + t.Errorf("expected medium severity without creds, got %s", f.Severity) + } + } +} + +func TestCORS_NoFalsePositiveOnSafeServer(t *testing.T) { + tests := []struct { + name string + handler http.HandlerFunc + }{ + { + name: "ignores origin entirely", + handler: func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + }, + }, + { + name: "returns its own fixed origin", + handler: func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Access-Control-Allow-Origin", "https://trusted.example.com") + w.WriteHeader(http.StatusOK) + }, + }, + { + name: "plain wildcard, no credentials", + handler: func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Access-Control-Allow-Origin", "*") + w.WriteHeader(http.StatusOK) + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + srv := httptest.NewServer(tt.handler) + defer srv.Close() + + result, err := CORS(srv.URL, 5*time.Second, 3, "") + if err != nil { + t.Fatalf("CORS: %v", err) + } + if result != nil && len(result.Findings) > 0 { + t.Errorf("expected no findings on safe server, got %+v", result.Findings) + } + }) + } +} + +func TestCORSResult_ResultType(t *testing.T) { + r := &CORSResult{} + if r.ResultType() != "cors" { + t.Errorf("expected result type 'cors', got %q", r.ResultType()) + } +} diff --git a/internal/scan/crawl.go b/internal/scan/crawl.go new file mode 100644 index 0000000..79a5859 --- /dev/null +++ b/internal/scan/crawl.go @@ -0,0 +1,137 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +package scan + +import ( + "fmt" + "net/url" + "sort" + "sync" + "time" + + "github.com/gocolly/colly/v2" + + "github.com/dropalldatabases/sif/internal/httpx" + "github.com/dropalldatabases/sif/internal/logger" + "github.com/dropalldatabases/sif/internal/output" +) + +// CrawlResult holds the deduped set of urls discovered by the spider. +type CrawlResult struct { + URLs []string `json:"urls"` +} + +func (r *CrawlResult) ResultType() string { return "crawl" } + +// compile-time check so a result-type drift fails the build, not a run. +var _ ScanResult = (*CrawlResult)(nil) + +// Crawl spiders the target up to depth, following same-host links/scripts/forms. +// all traffic flows through the shared httpx client so proxy/headers/rate-limit +// apply, and robots.txt is respected (colly honors it by default). +func Crawl(targetURL string, depth int, timeout time.Duration, logdir string) (*CrawlResult, error) { + log := output.Module("CRAWL") + log.Start() + + sanitizedURL := stripScheme(targetURL) + + if logdir != "" { + if err := logger.WriteHeader(sanitizedURL, logdir, "web crawl"); err != nil { + log.Error("error creating log file: %v", err) + return nil, fmt.Errorf("create crawl log: %w", err) + } + } + + // the host bounds the crawl; without it colly would wander the whole web. + parsed, err := url.Parse(targetURL) + if err != nil { + return nil, fmt.Errorf("parse target url %q: %w", targetURL, err) + } + host := parsed.Hostname() + if host == "" { + return nil, fmt.Errorf("target url %q has no host", targetURL) + } + + collector := colly.NewCollector( + colly.MaxDepth(depth), + colly.AllowedDomains(host), + ) + // reuse the shared client so proxy/cookie/-H/rate-limit are honored and the + // configured timeout applies to every fetch, robots.txt included. + collector.SetClient(httpx.Client(timeout)) + + // dedupe across the concurrent callbacks colly may fire. + var mu sync.Mutex + seen := make(map[string]struct{}) + + record := func(raw string) { + if raw == "" { + return + } + // keep the result set scoped to the target host; off-host assets + // (cdns, third-party links) are noise for an in-scope crawl. + if u, err := url.Parse(raw); err != nil || u.Hostname() != host { + return + } + mu.Lock() + if _, ok := seen[raw]; !ok { + seen[raw] = struct{}{} + log.Success("found: %s", output.Highlight.Render(raw)) + if logdir != "" { + _ = logger.Write(sanitizedURL, logdir, raw+"\n") + } + } + mu.Unlock() + } + + // links drive recursion; scripts/forms are recorded but not followed. + collector.OnHTML("a[href]", func(e *colly.HTMLElement) { + link := e.Request.AbsoluteURL(e.Attr("href")) + record(link) + // Visit enforces AllowedDomains/MaxDepth itself, so off-host or + // too-deep links are dropped without us re-checking. + _ = e.Request.Visit(link) + }) + collector.OnHTML("script[src]", func(e *colly.HTMLElement) { + record(e.Request.AbsoluteURL(e.Attr("src"))) + }) + collector.OnHTML("form[action]", func(e *colly.HTMLElement) { + record(e.Request.AbsoluteURL(e.Attr("action"))) + }) + + collector.OnError(func(_ *colly.Response, e error) { + // a single bad page shouldn't abort the crawl; note it and move on. + log.Warn("crawl error: %v", e) + }) + + if err := collector.Visit(targetURL); err != nil { + log.Error("crawl failed: %v", err) + return nil, fmt.Errorf("visit %q: %w", targetURL, err) + } + collector.Wait() + + result := &CrawlResult{URLs: sortedKeys(seen)} + + log.Complete(len(result.URLs), "urls") + return result, nil +} + +// sortedKeys returns the map keys in a stable order so output is deterministic. +func sortedKeys(set map[string]struct{}) []string { + keys := make([]string, 0, len(set)) + for k := range set { + keys = append(keys, k) + } + sort.Strings(keys) + return keys +} diff --git a/internal/scan/crawl_test.go b/internal/scan/crawl_test.go new file mode 100644 index 0000000..c0cc260 --- /dev/null +++ b/internal/scan/crawl_test.go @@ -0,0 +1,158 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +package scan + +import ( + "net/http" + "net/http/httptest" + "testing" + "time" +) + +// crawlSite serves a small link graph: +// +// / -> links /a and an off-host page; references script.js, form action /submit +// /a -> links /b +// /b -> links /c (only reachable at depth 3) +// /c -> leaf +func crawlSite(t *testing.T) *httptest.Server { + t.Helper() + + mux := http.NewServeMux() + // no robots restrictions; colly fetches this before crawling. + mux.HandleFunc("/robots.txt", func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusNotFound) + }) + mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/" { + http.NotFound(w, r) + return + } + _, _ = w.Write([]byte(` + a + off + +
+ `)) + }) + mux.HandleFunc("/a", func(w http.ResponseWriter, _ *http.Request) { + _, _ = w.Write([]byte(`b`)) + }) + mux.HandleFunc("/b", func(w http.ResponseWriter, _ *http.Request) { + _, _ = w.Write([]byte(`c`)) + }) + mux.HandleFunc("/c", func(w http.ResponseWriter, _ *http.Request) { + _, _ = w.Write([]byte(`leaf`)) + }) + + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + return srv +} + +func urlsContain(urls []string, want string) bool { + for i := 0; i < len(urls); i++ { + if urls[i] == want { + return true + } + } + return false +} + +func TestCrawl_FindsLinkedPagesAndAssets(t *testing.T) { + srv := crawlSite(t) + + result, err := Crawl(srv.URL, 3, 5*time.Second, "") + if err != nil { + t.Fatalf("Crawl: %v", err) + } + + // links, scripts and forms must all be recorded, resolved to absolute urls. + wants := []string{ + srv.URL + "/a", + srv.URL + "/b", + srv.URL + "/c", + srv.URL + "/script.js", + srv.URL + "/submit", + } + for _, w := range wants { + if !urlsContain(result.URLs, w) { + t.Errorf("expected crawl to find %q, got %v", w, result.URLs) + } + } + + // AllowedDomains must keep the off-host link out of the result set. + if urlsContain(result.URLs, "https://off-host.example/x") { + t.Errorf("off-host link should be excluded, got %v", result.URLs) + } +} + +func TestCrawl_RespectsDepth(t *testing.T) { + srv := crawlSite(t) + + // depth 1: only links found on the root page (/a, /script.js, /submit) are + // recorded; /b lives one hop deeper and must not appear. + result, err := Crawl(srv.URL, 1, 5*time.Second, "") + if err != nil { + t.Fatalf("Crawl: %v", err) + } + + if !urlsContain(result.URLs, srv.URL+"/a") { + t.Errorf("depth 1 should find /a, got %v", result.URLs) + } + if urlsContain(result.URLs, srv.URL+"/b") { + t.Errorf("depth 1 must not reach /b, got %v", result.URLs) + } + if urlsContain(result.URLs, srv.URL+"/c") { + t.Errorf("depth 1 must not reach /c, got %v", result.URLs) + } +} + +func TestCrawl_Dedupes(t *testing.T) { + // a page that links the same target twice must yield a single entry. + mux := http.NewServeMux() + mux.HandleFunc("/robots.txt", func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusNotFound) + }) + mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/dup" { + _, _ = w.Write([]byte(`leaf`)) + return + } + _, _ = w.Write([]byte(`12`)) + }) + srv := httptest.NewServer(mux) + defer srv.Close() + + result, err := Crawl(srv.URL, 2, 5*time.Second, "") + if err != nil { + t.Fatalf("Crawl: %v", err) + } + + count := 0 + for _, u := range result.URLs { + if u == srv.URL+"/dup" { + count++ + } + } + if count != 1 { + t.Errorf("expected /dup once after dedupe, got %d in %v", count, result.URLs) + } +} + +func TestCrawl_ResultType(t *testing.T) { + r := &CrawlResult{} + if r.ResultType() != "crawl" { + t.Errorf("ResultType = %q, want crawl", r.ResultType()) + } +} diff --git a/internal/scan/integration_test.go b/internal/scan/integration_test.go index 7894a7f..9e0072f 100644 --- a/internal/scan/integration_test.go +++ b/internal/scan/integration_test.go @@ -65,6 +65,32 @@ func newVulnApp() *httptest.Server { w.Write([]byte("phpMyAdmin")) }) + // reflecting-origin endpoint for the cors probe + mux.HandleFunc("/cors", func(w http.ResponseWriter, r *http.Request) { + if origin := r.Header.Get("Origin"); origin != "" { + w.Header().Set("Access-Control-Allow-Origin", origin) + w.Header().Set("Access-Control-Allow-Credentials", "true") + } + w.WriteHeader(http.StatusOK) + }) + + // open-redirect endpoint: echoes the next param into Location + mux.HandleFunc("/redirect", func(w http.ResponseWriter, r *http.Request) { + if next := r.URL.Query().Get("next"); next != "" { + w.Header().Set("Location", next) + w.WriteHeader(http.StatusFound) + return + } + w.WriteHeader(http.StatusOK) + }) + + // reflecting endpoint for the xss probe: echoes q raw into html text + mux.HandleFunc("/xss", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + //nolint:gosec // deliberate reflected-xss fixture for the probe under test + w.Write([]byte("
" + r.URL.Query().Get("q") + "
")) + }) + // homepage doubles as the cms fingerprint and the lfi sink mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { if r.URL.Path != "/" { @@ -180,6 +206,45 @@ func TestIntegrationLFI(t *testing.T) { } } +func TestIntegrationCORS(t *testing.T) { + srv := newVulnApp() + defer srv.Close() + + result, err := CORS(srv.URL+"/cors", 5*time.Second, 3, "") + if err != nil { + t.Fatalf("CORS: %v", err) + } + if result == nil || len(result.Findings) == 0 { + t.Fatalf("expected a cors finding from the reflecting endpoint, got %+v", result) + } +} + +func TestIntegrationRedirect(t *testing.T) { + srv := newVulnApp() + defer srv.Close() + + result, err := Redirect(srv.URL+"/redirect", 5*time.Second, 4, "") + if err != nil { + t.Fatalf("Redirect: %v", err) + } + if result == nil || len(result.Findings) == 0 { + t.Fatalf("expected an open-redirect finding from the next sink, got %+v", result) + } +} + +func TestIntegrationXSS(t *testing.T) { + srv := newVulnApp() + defer srv.Close() + + result, err := XSS(srv.URL+"/xss", 5*time.Second, 4, "") + if err != nil { + t.Fatalf("XSS: %v", err) + } + if result == nil || len(result.Findings) == 0 { + t.Fatalf("expected a reflected-xss finding from the q sink, got %+v", result) + } +} + func TestIntegrationPorts(t *testing.T) { // a real listener stands in for an open port; a tiny server hands its number // to Ports via the commonPorts wordlist. diff --git a/internal/scan/js/endpoints.go b/internal/scan/js/endpoints.go new file mode 100644 index 0000000..4dcf26d --- /dev/null +++ b/internal/scan/js/endpoints.go @@ -0,0 +1,128 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +package js + +import ( + "net/url" + "regexp" + "slices" + "strings" + + urlutil "github.com/projectdiscovery/utils/url" +) + +// endpointRegex is a linkfinder-style matcher for quoted paths and urls inside +// js: full http(s) urls, root-relative (/api/...) and dotted-relative paths, +// plus bare api-ish words with an extension. the inner alternation lives in a +// single capture group so FindAllStringSubmatch hands back just the value. +var endpointRegex = regexp.MustCompile(`["'\x60]` + + `(` + + `(?:https?:)?//[^\s"'\x60]{2,}` + // protocol-relative or absolute url + `|` + + `/[A-Za-z0-9_\-./]+(?:\?[^\s"'\x60]*)?` + // root-relative path + `|` + + `\.{1,2}/[A-Za-z0-9_\-./]+(?:\?[^\s"'\x60]*)?` + // dotted-relative path + `)` + + `["'\x60]`) + +// shortest thing we'll treat as an endpoint; below this it's almost always +// noise like "/" or a single slash-prefixed letter. +const minEndpointLen = 3 + +// mime types slip through the path regex (text/html, application/json, ...) but +// are never endpoints, so they're filtered out by their top-level type. +var mimePrefixes = []string{ + "text/", "image/", "audio/", "video/", "font/", + "application/", "multipart/", "model/", "message/", +} + +// ExtractEndpoints pulls candidate paths and urls out of a script body, dedupes +// them, drops obvious noise, and resolves relatives against baseURL so callers +// get absolute targets where possible. a baseURL that won't parse just leaves +// relatives as-is rather than failing the whole scan. +func ExtractEndpoints(content, baseURL string) []string { + groups := endpointRegex.FindAllStringSubmatch(content, -1) + if len(groups) == 0 { + return nil + } + + base, baseErr := urlutil.Parse(baseURL) + + endpoints := make([]string, 0, len(groups)) + seen := make(map[string]struct{}, len(groups)) + for i := 0; i < len(groups); i++ { + candidate := strings.TrimSpace(groups[i][1]) + if !isEndpoint(candidate) { + continue + } + + resolved := candidate + // only relatives need resolving, and only if the base parsed cleanly. + if baseErr == nil && base.URL != nil && isRelative(candidate) { + resolved = resolveRelative(base.URL, candidate) + } + + if _, ok := seen[resolved]; ok { + continue + } + seen[resolved] = struct{}{} + endpoints = append(endpoints, resolved) + } + + slices.Sort(endpoints) + return endpoints +} + +// isEndpoint filters out the junk that the broad regex inevitably catches: +// too-short fragments, mime types, and single dotted words with no path. +func isEndpoint(s string) bool { + if len(s) < minEndpointLen { + return false + } + + lower := strings.ToLower(s) + for i := 0; i < len(mimePrefixes); i++ { + // a mime type is "type/subtype" with no further path; an api route like + // /application/users has a leading slash, so anchor on the bare prefix. + if strings.HasPrefix(lower, mimePrefixes[i]) && !strings.HasPrefix(lower, "/") { + return false + } + } + + // reject "word" or "a.b" with no slash at all: not a path, just a token. + if !strings.Contains(s, "/") { + return false + } + + return true +} + +// isRelative reports whether candidate lacks a scheme/host and so needs the +// base url to become absolute. protocol-relative (//host) and absolute urls +// are left untouched. +func isRelative(candidate string) bool { + if strings.HasPrefix(candidate, "//") { + return false + } + return !strings.HasPrefix(candidate, "http://") && !strings.HasPrefix(candidate, "https://") +} + +// resolveRelative turns a relative path into an absolute url against base using +// the stdlib reference resolver; if the ref won't parse we keep the original. +func resolveRelative(base *url.URL, ref string) string { + parsed, err := url.Parse(ref) + if err != nil { + return ref + } + return base.ResolveReference(parsed).String() +} diff --git a/internal/scan/js/endpoints_test.go b/internal/scan/js/endpoints_test.go new file mode 100644 index 0000000..3d64923 --- /dev/null +++ b/internal/scan/js/endpoints_test.go @@ -0,0 +1,106 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +package js + +import ( + "slices" + "testing" +) + +func TestExtractEndpoints(t *testing.T) { + const base = "https://example.com/static/app.js" + + tests := []struct { + name string + content string + wantSome []string // each must appear in the result + wantAbsent []string // none of these may appear + }{ + { + name: "root-relative api path resolves to absolute", + content: `fetch("/api/users")`, + wantSome: []string{"https://example.com/api/users"}, + }, + { + name: "absolute url passes through untouched", + content: `const u = "https://api.example.org/v1/login";`, + wantSome: []string{"https://api.example.org/v1/login"}, + }, + { + name: "dotted-relative path resolves against base dir", + content: `import("./chunks/main.js")`, + wantSome: []string{"https://example.com/static/chunks/main.js"}, + }, + { + name: "query string is preserved", + content: `axios.get("/api/search?q=test")`, + wantSome: []string{"https://example.com/api/search?q=test"}, + }, + { + name: "mime types are filtered out", + content: `headers["Content-Type"] = "application/json"; var t = "text/html";`, + wantAbsent: []string{"application/json", "text/html"}, + }, + { + name: "single words without a slash are ignored", + content: `var x = "hello"; var y = "world";`, + wantAbsent: []string{"hello", "world"}, + }, + { + name: "multiple endpoints deduped", + content: `fetch("/api/users"); fetch("/api/users"); fetch("/api/posts");`, + wantSome: []string{ + "https://example.com/api/users", + "https://example.com/api/posts", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := ExtractEndpoints(tt.content, base) + + for _, want := range tt.wantSome { + if !slices.Contains(got, want) { + t.Errorf("expected %q in %v", want, got) + } + } + for _, absent := range tt.wantAbsent { + if slices.Contains(got, absent) { + t.Errorf("did not expect %q in %v", absent, got) + } + } + }) + } +} + +func TestExtractEndpointsDedupes(t *testing.T) { + got := ExtractEndpoints(`fetch("/api/x"); fetch("/api/x");`, "https://example.com/app.js") + count := 0 + for i := 0; i < len(got); i++ { + if got[i] == "https://example.com/api/x" { + count++ + } + } + if count != 1 { + t.Fatalf("expected /api/x once, got %d times in %v", count, got) + } +} + +func TestExtractEndpointsBadBaseKeepsRelatives(t *testing.T) { + // a base url that won't parse must not drop findings; relatives stay as-is. + got := ExtractEndpoints(`fetch("/api/users")`, "::not a url::") + if !slices.Contains(got, "/api/users") { + t.Errorf("expected relative /api/users preserved, got %v", got) + } +} diff --git a/internal/scan/js/scan.go b/internal/scan/js/scan.go index 519b11e..2cc3981 100644 --- a/internal/scan/js/scan.go +++ b/internal/scan/js/scan.go @@ -32,6 +32,8 @@ import ( type JavascriptScanResult struct { SupabaseResults []supabaseScanResult `json:"supabase_results"` FoundEnvironmentVars map[string]string `json:"environment_variables"` + SecretMatches []SecretMatch `json:"secret_matches"` + Endpoints []string `json:"endpoints"` } // ResultType implements the ScanResult interface. @@ -116,6 +118,11 @@ func JavascriptScan(url string, timeout time.Duration, threads int, logdir strin log.Info("Got %d scripts, now running scans on them", len(scripts)) supabaseResults := make([]supabaseScanResult, 0, len(scripts)) + secretMatches := make([]SecretMatch, 0) + endpoints := make([]string, 0) + // dedupe secrets and endpoints across every script, not just within one. + seenSecrets := make(map[string]struct{}) + seenEndpoints := make(map[string]struct{}) for _, script := range scripts { charmlog.Debugf("Scanning %s", script) req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, script, http.NoBody) @@ -147,16 +154,41 @@ func JavascriptScan(url string, timeout time.Duration, threads int, logdir strin if scriptSupabaseResults != nil { supabaseResults = append(supabaseResults, scriptSupabaseResults...) } + + // reuse the same script buffer for credential and endpoint extraction. + for _, match := range ScanSecrets(content, script) { + key := match.Rule + "\x00" + match.Match + if _, ok := seenSecrets[key]; ok { + continue + } + seenSecrets[key] = struct{}{} + secretMatches = append(secretMatches, match) + log.Warn("found %s in %s", match.Rule, script) + } + + for _, endpoint := range ExtractEndpoints(content, script) { + if _, ok := seenEndpoints[endpoint]; ok { + continue + } + seenEndpoints[endpoint] = struct{}{} + endpoints = append(endpoints, endpoint) + } } spin.Stop() + if len(endpoints) > 0 { + log.Info("extracted %d endpoints", len(endpoints)) + } + result := JavascriptScanResult{ SupabaseResults: supabaseResults, FoundEnvironmentVars: map[string]string{}, + SecretMatches: secretMatches, + Endpoints: endpoints, } - log.Complete(len(supabaseResults), "found") + log.Complete(len(supabaseResults)+len(secretMatches)+len(endpoints), "found") return &result, nil } diff --git a/internal/scan/js/secrets.go b/internal/scan/js/secrets.go new file mode 100644 index 0000000..892b5fb --- /dev/null +++ b/internal/scan/js/secrets.go @@ -0,0 +1,171 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +package js + +import ( + "math" + "regexp" + "strings" +) + +// SecretMatch is one credential the scanner pulled out of a script. +type SecretMatch struct { + Rule string `json:"rule"` + Match string `json:"match"` + Source string `json:"source"` +} + +// entropy thresholds gate the noisy generic rules: provider-prefixed keys are +// trustworthy on their own, but a bare apikey="..." or a loose token blob is +// only worth reporting once its shannon entropy clears the bar for "this looks +// random, not an english word". secrets sit higher than the pem/aws-secret bar +// because the generic capture groups also catch ordinary identifiers. +const ( + genericMinEntropy = 3.5 + awsSecretMinEntropy = 3.0 + // rules with no entropy requirement (prefix is already unique enough). + noEntropyGate = 0.0 +) + +// secretRules is the credential regex bank. the matching group (or the whole +// match when there's no group) is what gets reported; minEntropy gates the +// generic high-entropy rules so we don't flag every short literal. +var secretRules = []struct { + name string + re *regexp.Regexp + minEntropy float64 +}{ + { + // aws access key ids are fixed-shape and unmistakable. + name: "aws access key id", + re: regexp.MustCompile(`\b((?:AKIA|ABIA|ACCA|ASIA)[0-9A-Z]{16})\b`), + minEntropy: noEntropyGate, + }, + { + // aws secret keys are 40-char base64-ish blobs; gate on entropy since the + // shape alone matches plenty of innocent strings. + name: "aws secret access key", + re: regexp.MustCompile(`\b((?:aws_secret_access_key|aws_secret|secret_key)["']?\s*[:=]\s*["']?)([A-Za-z0-9/+]{40})\b`), + minEntropy: awsSecretMinEntropy, + }, + { + // github personal/oauth/server/refresh/app tokens share the ghX_ prefix. + name: "github token", + re: regexp.MustCompile(`\b((?:ghp|gho|ghu|ghs|ghr)_[0-9A-Za-z]{36,255})\b`), + minEntropy: noEntropyGate, + }, + { + // slack bot/user/app/legacy tokens. + name: "slack token", + re: regexp.MustCompile(`\b(xox[baprs]-[0-9A-Za-z-]{10,})\b`), + minEntropy: noEntropyGate, + }, + { + // stripe live secret and publishable keys (test keys are not findings). + name: "stripe live key", + re: regexp.MustCompile(`\b([sp]k_live_[0-9A-Za-z]{16,})\b`), + minEntropy: noEntropyGate, + }, + { + // google api keys are a fixed AIza-prefixed 39-char shape. + name: "google api key", + re: regexp.MustCompile(`\b(AIza[0-9A-Za-z_-]{35})\b`), + minEntropy: noEntropyGate, + }, + { + // pem private key blocks; the header alone is the smoking gun. + name: "private key", + re: regexp.MustCompile(`-{5}BEGIN (?:RSA |EC |DSA |OPENSSH |PGP )?PRIVATE KEY-{5}`), + minEntropy: noEntropyGate, + }, + { + // generic apikey/secret/token = "" assignments; the value is in + // group 2 and only reported if it looks random (entropy gate). + name: "generic secret assignment", + re: regexp.MustCompile(`(?i)\b(api[_-]?key|secret|token|password|passwd|auth)["']?\s*[:=]\s*["']([0-9A-Za-z\-._~+/]{16,})["']`), + minEntropy: genericMinEntropy, + }, +} + +// the value capture group lives at index 2 for the rules that prefix the +// keyword; index 0 (whole match) is used otherwise. +const ( + valueGroupIndex = 2 + wholeMatchIndex = 0 +) + +// ScanSecrets runs the regex bank over a script body and returns every gated +// match, deduped within this one source. srcURL is recorded on each find. +func ScanSecrets(content, srcURL string) []SecretMatch { + matches := make([]SecretMatch, 0) + seen := make(map[string]struct{}) + + for i := 0; i < len(secretRules); i++ { + rule := secretRules[i] + groups := rule.re.FindAllStringSubmatch(content, -1) + for j := 0; j < len(groups); j++ { + value := secretValue(groups[j]) + if value == "" { + continue + } + + // entropy gate weeds out english-y identifiers for the generic rules; + // prefixed rules pass with a zero threshold. + if rule.minEntropy > noEntropyGate && shannonEntropy(value) < rule.minEntropy { + continue + } + + // dedupe per source so a key referenced twice is one finding. + key := rule.name + "\x00" + value + if _, ok := seen[key]; ok { + continue + } + seen[key] = struct{}{} + + matches = append(matches, SecretMatch{Rule: rule.name, Match: value, Source: srcURL}) + } + } + + return matches +} + +// secretValue returns the reported portion of a regex match: the dedicated +// value group when the rule captures one, otherwise the whole match. +func secretValue(groups []string) string { + if len(groups) > valueGroupIndex && groups[valueGroupIndex] != "" { + return groups[valueGroupIndex] + } + return strings.TrimSpace(groups[wholeMatchIndex]) +} + +// shannonEntropy is the per-character shannon entropy (bits) of s, used to tell +// random-looking secrets apart from plain words. empty input is zero entropy. +func shannonEntropy(s string) float64 { + if s == "" { + return 0 + } + + counts := make(map[rune]int) + for _, r := range s { + counts[r]++ + } + + length := float64(len([]rune(s))) + var entropy float64 + for _, count := range counts { + p := float64(count) / length + entropy -= p * math.Log2(p) + } + + return entropy +} diff --git a/internal/scan/js/secrets_test.go b/internal/scan/js/secrets_test.go new file mode 100644 index 0000000..e4b7807 --- /dev/null +++ b/internal/scan/js/secrets_test.go @@ -0,0 +1,160 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +package js + +import ( + "fmt" + "testing" +) + +// the fake tokens below are assembled from two fragments on purpose: a contiguous +// provider token literal in a committed file trips github push-protection (and +// every other secret scanner) even though it's a test fixture. splitting it +// keeps the literal out of source while ScanSecrets still sees the joined value. +const ( + fakeAWSKey = "AKIA" + "IOSFODNN7EXAMPLE" + fakeAWSSecret = "wJalrXUtnFEMI/K7MDENG/" + "bPxRfiCYEXAMPLEKEY" + fakeGitHub = "ghp_" + "aB3dEfGh1jKlMn0pQrStUvWxYz012345abcd" + fakeSlack = "xoxb-" + "123456789012-abcdefABCDEF1234567890ab" + fakeStripe = "sk_live_" + "4eC39HqLyjWDarjtT1zdp7dc" + fakeGoogle = "AIza" + "SyA1B2C3D4E5F6G7H8I9J0K1L2M3N4O5P6Q" + fakeGeneric = "x9Kq2Lm7Pz4Rt6Wv8Bn3Cd5Fg1Hj0As" + fakePEM = "-----BEGIN RSA PRIVATE " + "KEY-----\nMIIEpAIB..." +) + +func TestScanSecrets(t *testing.T) { + tests := []struct { + name string + content string + wantRule string // rule expected on the first match, "" means no match + wantNone bool + }{ + { + name: "aws access key id", + content: fmt.Sprintf(`const k = %q;`, fakeAWSKey), + wantRule: "aws access key id", + }, + { + name: "github personal token", + content: fmt.Sprintf(`token: %q`, fakeGitHub), + wantRule: "github token", + }, + { + name: "slack bot token", + content: fmt.Sprintf(`slack=%q`, fakeSlack), + wantRule: "slack token", + }, + { + name: "stripe live secret key", + content: fmt.Sprintf(`var sk = %q;`, fakeStripe), + wantRule: "stripe live key", + }, + { + name: "google api key", + content: fmt.Sprintf(`apiKey: %q`, fakeGoogle), + wantRule: "google api key", + }, + { + name: "pem private key header", + content: fakePEM, + wantRule: "private key", + }, + { + name: "generic high-entropy api key assignment", + content: fmt.Sprintf(`apikey = %q`, fakeGeneric), + wantRule: "generic secret assignment", + }, + { + name: "aws secret with entropy", + content: fmt.Sprintf(`aws_secret_access_key=%q`, fakeAWSSecret), + wantRule: "aws secret access key", + }, + { + // low-entropy assignment is a placeholder, not a real secret. + name: "low entropy generic assignment not flagged", + content: `password = "aaaaaaaaaaaaaaaaaaaaaaaa"`, + wantNone: true, + }, + { + // a repetitive placeholder is low-entropy and must not trip the gate. + name: "low entropy repeated pattern not flagged", + content: `token = "abababababababababababab"`, + wantNone: true, + }, + { + name: "no secrets in plain code", + content: `function add(a, b) { return a + b; }`, + wantNone: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := ScanSecrets(tt.content, "https://example.com/app.js") + + if tt.wantNone { + if len(got) != 0 { + t.Fatalf("expected no matches, got %+v", got) + } + return + } + + if len(got) == 0 { + t.Fatalf("expected a %q match, got none", tt.wantRule) + } + if got[0].Rule != tt.wantRule { + t.Errorf("rule = %q, want %q", got[0].Rule, tt.wantRule) + } + if got[0].Match == "" { + t.Error("match value is empty") + } + if got[0].Source != "https://example.com/app.js" { + t.Errorf("source = %q, want the passed url", got[0].Source) + } + }) + } +} + +func TestScanSecretsDedupesWithinSource(t *testing.T) { + // the same key referenced twice in one file is one finding. + content := fmt.Sprintf(`a = %q; b = %q;`, fakeAWSKey, fakeAWSKey) + got := ScanSecrets(content, "https://example.com/app.js") + if len(got) != 1 { + t.Fatalf("expected 1 deduped match, got %d: %+v", len(got), got) + } +} + +func TestShannonEntropy(t *testing.T) { + tests := []struct { + name string + input string + // random-ish strings clear the generic gate, repetitive ones don't. + wantHigh bool + }{ + {name: "empty is zero", input: "", wantHigh: false}, + {name: "repeated char is low", input: "aaaaaaaaaaaaaaaa", wantHigh: false}, + {name: "random blob is high", input: fakeGeneric, wantHigh: true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := shannonEntropy(tt.input) + if tt.wantHigh && got < genericMinEntropy { + t.Errorf("entropy %f below generic gate %f", got, genericMinEntropy) + } + if !tt.wantHigh && got >= genericMinEntropy { + t.Errorf("entropy %f unexpectedly cleared generic gate %f", got, genericMinEntropy) + } + }) + } +} diff --git a/internal/scan/passive.go b/internal/scan/passive.go new file mode 100644 index 0000000..8d02ced --- /dev/null +++ b/internal/scan/passive.go @@ -0,0 +1,266 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +package scan + +import ( + "bufio" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "strings" + "time" + + "github.com/dropalldatabases/sif/internal/httpx" + "github.com/dropalldatabases/sif/internal/logger" + "github.com/dropalldatabases/sif/internal/output" +) + +// source base urls are vars so tests can repoint them at local fixtures. they +// carry a trailing %s for the domain (or query) each source expects. +var ( + crtshBaseURL = "https://crt.sh/?q=%%25.%s&output=json" + certspotterBaseURL = "https://api.certspotter.com/v1/issuances?domain=%s&include_subdomains=true&expand=dns_names" + waybackBaseURL = "http://web.archive.org/cdx/search/cdx?url=*.%s/*&output=text&fl=original&collapse=urlkey" +) + +// cap the response we read from any one source so a hostile/huge feed can't +// exhaust memory. +const passiveMaxBytes = 25 * 1024 * 1024 + +// PassiveResult holds passively-gathered subdomains and historical urls. all +// data comes from third-party feeds; the target itself sees zero traffic. +type PassiveResult struct { + Subdomains []string `json:"subdomains"` + URLs []string `json:"urls"` +} + +func (r *PassiveResult) ResultType() string { return "passive" } + +// compile-time check so a result-type drift fails the build, not a run. +var _ ScanResult = (*PassiveResult)(nil) + +// crtshEntry is one certificate record from crt.sh; name_value may itself hold +// several newline-separated names. +type crtshEntry struct { + NameValue string `json:"name_value"` +} + +// certspotterEntry is one issuance from certspotter, expanded to dns names. +type certspotterEntry struct { + DNSNames []string `json:"dns_names"` +} + +// Passive performs keyless passive recon: subdomains from certificate +// transparency feeds plus historical urls from the wayback machine. each source +// fails independently so one feed being down doesn't sink the rest. +func Passive(targetURL string, timeout time.Duration, logdir string) (*PassiveResult, error) { + log := output.Module("PASSIVE") + log.Start() + + parsed, err := url.Parse(targetURL) + if err != nil { + return nil, fmt.Errorf("parse target url %q: %w", targetURL, err) + } + domain := parsed.Hostname() + if domain == "" { + return nil, fmt.Errorf("target url %q has no host", targetURL) + } + + sanitizedURL := stripScheme(targetURL) + if logdir != "" { + if err := logger.WriteHeader(sanitizedURL, logdir, "passive recon"); err != nil { + log.Error("error creating log file: %v", err) + return nil, fmt.Errorf("create passive log: %w", err) + } + } + + client := httpx.Client(timeout) + ctx := context.TODO() + + subSet := make(map[string]struct{}) + urlSet := make(map[string]struct{}) + + // crt.sh certificate transparency + if subs, err := fetchCrtsh(ctx, client, domain); err != nil { + log.Warn("crt.sh failed: %v", err) + } else { + addAll(subSet, subs) + } + + // certspotter certificate transparency + if subs, err := fetchCertspotter(ctx, client, domain); err != nil { + log.Warn("certspotter failed: %v", err) + } else { + addAll(subSet, subs) + } + + // wayback machine historical urls + if urls, err := fetchWayback(ctx, client, domain); err != nil { + log.Warn("wayback failed: %v", err) + } else { + addAll(urlSet, urls) + } + + result := &PassiveResult{ + Subdomains: sortedKeys(subSet), + URLs: sortedKeys(urlSet), + } + + logPassiveResults(log, sanitizedURL, logdir, result) + + log.Complete(len(result.Subdomains)+len(result.URLs), "discovered") + return result, nil +} + +// fetchCrtsh pulls subdomains from crt.sh's certificate transparency json. +func fetchCrtsh(ctx context.Context, client *http.Client, domain string) ([]string, error) { + body, err := passiveGET(ctx, client, fmt.Sprintf(crtshBaseURL, domain)) + if err != nil { + return nil, err + } + + var entries []crtshEntry + if err := json.Unmarshal(body, &entries); err != nil { + return nil, fmt.Errorf("parse crt.sh json: %w", err) + } + + var names []string + for i := 0; i < len(entries); i++ { + // name_value can pack several names separated by newlines. + for _, name := range strings.Split(entries[i].NameValue, "\n") { + if host := normalizeHost(name); host != "" { + names = append(names, host) + } + } + } + return names, nil +} + +// fetchCertspotter pulls subdomains from certspotter's keyless issuances feed. +func fetchCertspotter(ctx context.Context, client *http.Client, domain string) ([]string, error) { + body, err := passiveGET(ctx, client, fmt.Sprintf(certspotterBaseURL, domain)) + if err != nil { + return nil, err + } + + var entries []certspotterEntry + if err := json.Unmarshal(body, &entries); err != nil { + return nil, fmt.Errorf("parse certspotter json: %w", err) + } + + var names []string + for i := 0; i < len(entries); i++ { + for _, name := range entries[i].DNSNames { + if host := normalizeHost(name); host != "" { + names = append(names, host) + } + } + } + return names, nil +} + +// fetchWayback pulls historical urls from the wayback machine cdx index, which +// returns one original url per line. +func fetchWayback(ctx context.Context, client *http.Client, domain string) ([]string, error) { + body, err := passiveGET(ctx, client, fmt.Sprintf(waybackBaseURL, domain)) + if err != nil { + return nil, err + } + + var urls []string + scanner := bufio.NewScanner(strings.NewReader(string(body))) + // historical urls can be long; give the scanner a generous line buffer. + scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if line != "" { + urls = append(urls, line) + } + } + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("read wayback lines: %w", err) + } + return urls, nil +} + +// passiveGET performs a bounded GET against a passive source. non-200 responses +// are treated as a source failure so the caller can skip it. +func passiveGET(ctx context.Context, client *http.Client, reqURL string) ([]byte, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, http.NoBody) + if err != nil { + return nil, fmt.Errorf("create request: %w", err) + } + req.Header.Set("Accept", "application/json") + + resp, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("request failed: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("unexpected status %d", resp.StatusCode) + } + + body, err := io.ReadAll(io.LimitReader(resp.Body, passiveMaxBytes)) + if err != nil { + return nil, fmt.Errorf("read response: %w", err) + } + return body, nil +} + +// normalizeHost lowercases a name and strips a leading wildcard label so +// "*.example.com" and "EXAMPLE.com" collapse to one canonical host. +func normalizeHost(name string) string { + host := strings.ToLower(strings.TrimSpace(name)) + host = strings.TrimPrefix(host, "*.") + return host +} + +// addAll inserts every value into the dedupe set. +func addAll(set map[string]struct{}, values []string) { + for _, v := range values { + set[v] = struct{}{} + } +} + +func logPassiveResults(log *output.ModuleLogger, sanitizedURL, logdir string, result *PassiveResult) { + for _, sub := range result.Subdomains { + log.Success("subdomain: %s", output.Highlight.Render(sub)) + } + for _, u := range result.URLs { + log.Info("url: %s", u) + } + + if logdir == "" { + return + } + + var sb strings.Builder + if len(result.Subdomains) > 0 { + sb.WriteString(fmt.Sprintf("Subdomains (%d):\n", len(result.Subdomains))) + for _, sub := range result.Subdomains { + sb.WriteString(" " + sub + "\n") + } + } + if len(result.URLs) > 0 { + sb.WriteString(fmt.Sprintf("\nHistorical URLs (%d):\n", len(result.URLs))) + for _, u := range result.URLs { + sb.WriteString(" " + u + "\n") + } + } + _ = logger.Write(sanitizedURL, logdir, sb.String()) +} diff --git a/internal/scan/passive_test.go b/internal/scan/passive_test.go new file mode 100644 index 0000000..c6fb200 --- /dev/null +++ b/internal/scan/passive_test.go @@ -0,0 +1,163 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +package scan + +import ( + "net/http" + "net/http/httptest" + "testing" + "time" +) + +// sample feed payloads. crt.sh packs several names per name_value (newline +// separated) and emits wildcards; certspotter returns expanded dns_names. +const ( + crtshFixture = `[ + {"name_value": "www.example.com\n*.example.com"}, + {"name_value": "api.example.com"}, + {"name_value": "WWW.example.com"} + ]` + certspotterFixture = `[ + {"dns_names": ["mail.example.com", "api.example.com"]}, + {"dns_names": ["*.example.com"]} + ]` + waybackFixture = "http://example.com/\n" + + "http://example.com/login\n" + + "http://example.com/login\n" + + "\n" + + "http://example.com/admin\n" +) + +// fixtureServer serves each passive source on its own path and repoints the +// package base-url vars at it. the vars are restored on cleanup. +func fixtureServer(t *testing.T, crtsh, certspotter, wayback string) *httptest.Server { + t.Helper() + + mux := http.NewServeMux() + mux.HandleFunc("/crtsh", func(w http.ResponseWriter, _ *http.Request) { + _, _ = w.Write([]byte(crtsh)) + }) + mux.HandleFunc("/certspotter", func(w http.ResponseWriter, _ *http.Request) { + _, _ = w.Write([]byte(certspotter)) + }) + mux.HandleFunc("/wayback", func(w http.ResponseWriter, _ *http.Request) { + _, _ = w.Write([]byte(wayback)) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + origCrtsh, origCertspotter, origWayback := crtshBaseURL, certspotterBaseURL, waybackBaseURL + // %s still consumes the domain so the production formatting path is exercised. + crtshBaseURL = srv.URL + "/crtsh?q=%s" + certspotterBaseURL = srv.URL + "/certspotter?domain=%s" + waybackBaseURL = srv.URL + "/wayback?url=%s" + t.Cleanup(func() { + crtshBaseURL, certspotterBaseURL, waybackBaseURL = origCrtsh, origCertspotter, origWayback + }) + + return srv +} + +func TestPassive_ParsesAndDedupes(t *testing.T) { + fixtureServer(t, crtshFixture, certspotterFixture, waybackFixture) + + result, err := Passive("https://example.com", 5*time.Second, "") + if err != nil { + t.Fatalf("Passive: %v", err) + } + + // wildcards stripped, case-folded, and merged across both ct feeds. + wantSubs := map[string]bool{ + "www.example.com": false, + "api.example.com": false, + "mail.example.com": false, + "example.com": false, // from "*.example.com" + } + for _, s := range result.Subdomains { + if _, ok := wantSubs[s]; !ok { + t.Errorf("unexpected subdomain %q", s) + continue + } + wantSubs[s] = true + } + for s, seen := range wantSubs { + if !seen { + t.Errorf("missing subdomain %q in %v", s, result.Subdomains) + } + } + if len(result.Subdomains) != len(wantSubs) { + t.Errorf("expected %d deduped subdomains, got %d: %v", len(wantSubs), len(result.Subdomains), result.Subdomains) + } + + // wayback: blank line dropped, duplicate /login collapsed. + wantURLs := map[string]bool{ + "http://example.com/": false, + "http://example.com/login": false, + "http://example.com/admin": false, + } + for _, u := range result.URLs { + if _, ok := wantURLs[u]; !ok { + t.Errorf("unexpected url %q", u) + continue + } + wantURLs[u] = true + } + if len(result.URLs) != len(wantURLs) { + t.Errorf("expected %d deduped urls, got %d: %v", len(wantURLs), len(result.URLs), result.URLs) + } +} + +func TestPassive_SourceFailureIsIsolated(t *testing.T) { + // crt.sh serves garbage that fails to parse; the other feeds must still + // produce results. + fixtureServer(t, "not json", certspotterFixture, waybackFixture) + + result, err := Passive("https://example.com", 5*time.Second, "") + if err != nil { + t.Fatalf("Passive should not fail when one source is down: %v", err) + } + + if len(result.Subdomains) == 0 { + t.Error("expected certspotter subdomains despite crt.sh failure") + } + if len(result.URLs) == 0 { + t.Error("expected wayback urls despite crt.sh failure") + } + if urlsContain(result.Subdomains, "www.example.com") { + t.Error("crt.sh-only subdomain leaked despite parse failure") + } +} + +func TestPassive_ResultType(t *testing.T) { + r := &PassiveResult{} + if r.ResultType() != "passive" { + t.Errorf("ResultType = %q, want passive", r.ResultType()) + } +} + +func TestNormalizeHost(t *testing.T) { + tests := []struct { + in string + want string + }{ + {"www.example.com", "www.example.com"}, + {"*.example.com", "example.com"}, + {" WWW.Example.COM ", "www.example.com"}, + {"", ""}, + } + for _, tt := range tests { + if got := normalizeHost(tt.in); got != tt.want { + t.Errorf("normalizeHost(%q) = %q, want %q", tt.in, got, tt.want) + } + } +} diff --git a/internal/scan/redirect.go b/internal/scan/redirect.go new file mode 100644 index 0000000..7597c31 --- /dev/null +++ b/internal/scan/redirect.go @@ -0,0 +1,305 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +package scan + +import ( + "context" + "fmt" + "io" + "net/http" + "net/url" + "regexp" + "strings" + "sync" + "time" + + charmlog "github.com/charmbracelet/log" + "github.com/dropalldatabases/sif/internal/httpx" + "github.com/dropalldatabases/sif/internal/logger" + "github.com/dropalldatabases/sif/internal/output" +) + +// RedirectResult collects every open-redirect found on the target. +type RedirectResult struct { + Findings []RedirectFinding `json:"findings,omitempty"` + TestedParams int `json:"tested_params"` +} + +// RedirectFinding is a single param/payload that sends the user off-site. +type RedirectFinding struct { + URL string `json:"url"` + Parameter string `json:"parameter"` + Payload string `json:"payload"` + Location string `json:"location"` + Via string `json:"via"` // header, meta-refresh, or javascript + Severity string `json:"severity"` +} + +// redirectMaxBody caps the body we scan for meta/js redirects (100KB). +const redirectMaxBody = 1024 * 100 + +// the controlled sentinel host we steer redirects toward; a Location that lands +// on it proves the param is attacker-controlled. +const redirectSentinel = "sif-redirect-probe.evil.com" + +// params that commonly drive a server-side redirect. +var redirectParams = []string{ + "url", "next", "redirect", "redirect_uri", "redirect_url", + "return", "return_url", "returnurl", "returnto", "return_to", + "dest", "destination", "continue", "goto", "go", "target", + "to", "out", "view", "image_url", "checkout_url", "rurl", "u", +} + +// payload variants: a plain sentinel plus filter bypasses that browsers still +// resolve as an absolute off-site target. {host} expands to the sentinel. +var redirectPayloads = []string{ + "https://{host}", // plain absolute + "//{host}", // scheme-relative + "https:/{host}", // missing slash, browsers normalise it + "https:{host}", // no slashes + "/\\{host}", // backslash trick + "/%2f%2f{host}", // encoded scheme-relative + "https://{host}%00.x.com", // null-byte truncation + "https://x.com@{host}", // userinfo confusion - real host is after @ +} + +// meta refresh redirect: +var metaRefreshRe = regexp.MustCompile(`(?i)]+http-equiv=["']?refresh["']?[^>]+content=["'][^"']*url=([^"'>\s]+)`) + +// client-side redirects baked into a script body +var jsRedirectRe = regexp.MustCompile(`(?i)(?:location\.(?:href|replace|assign)\s*(?:=|\()|window\.location\s*=)\s*["']([^"']+)["']`) + +// Redirect probes the target's redirect-prone params for open-redirect. +func Redirect(targetURL string, timeout time.Duration, threads int, logdir string) (*RedirectResult, error) { + log := output.Module("REDIRECT") + log.Start() + + spin := output.NewSpinner("Scanning for open redirects") + spin.Start() + + sanitizedURL := stripScheme(targetURL) + + if logdir != "" { + if err := logger.WriteHeader(sanitizedURL, logdir, "open redirect probe"); err != nil { + spin.Stop() + log.Error("error creating log file: %v", err) + return nil, fmt.Errorf("create redirect log: %w", err) + } + } + + parsedURL, err := url.Parse(targetURL) + if err != nil { + spin.Stop() + return nil, fmt.Errorf("parse url: %w", err) + } + existingParams := parsedURL.Query() + + // merge target's own params with the common redirect names so we cover both + paramsToTest := make(map[string]bool, len(existingParams)+len(redirectParams)) + for param := range existingParams { + paramsToTest[param] = true + } + for _, param := range redirectParams { + paramsToTest[param] = true + } + + // don't auto-follow: a 30x Location is exactly what we want to inspect. + client := httpx.Client(timeout) + client.CheckRedirect = func(_ *http.Request, _ []*http.Request) error { + return http.ErrUseLastResponse + } + + result := &RedirectResult{ + Findings: make([]RedirectFinding, 0, 8), + TestedParams: len(paramsToTest), + } + + type workItem struct { + param string + payload string + } + workItems := make([]workItem, 0, len(paramsToTest)*len(redirectPayloads)) + for param := range paramsToTest { + for _, raw := range redirectPayloads { + workItems = append(workItems, workItem{param: param, payload: strings.ReplaceAll(raw, "{host}", redirectSentinel)}) + } + } + + log.Info("testing %d params with %d payloads", len(paramsToTest), len(redirectPayloads)) + + workChan := make(chan workItem, len(workItems)) + for _, item := range workItems { + workChan <- item + } + close(workChan) + + seen := make(map[string]bool) + var mu sync.Mutex + var wg sync.WaitGroup + + wg.Add(threads) + for t := 0; t < threads; t++ { + go func() { + defer wg.Done() + for item := range workChan { + testURL := buildRedirectURL(parsedURL, existingParams, item.param, item.payload) + + location, via, ok := probeRedirect(client, testURL) + if !ok { + continue + } + + key := item.param + "|" + item.payload + mu.Lock() + if seen[key] { + mu.Unlock() + continue + } + seen[key] = true + finding := RedirectFinding{ + URL: testURL, + Parameter: item.param, + Payload: item.payload, + Location: location, + Via: via, + Severity: "medium", + } + result.Findings = append(result.Findings, finding) + mu.Unlock() + + spin.Stop() + log.Warn("open redirect via %s in param %s -> %s", + output.SeverityMedium.Render(via), + output.Highlight.Render(item.param), + output.Status.Render(location)) + spin.Start() + + if logdir != "" { + logger.Write(sanitizedURL, logdir, + fmt.Sprintf("open redirect: param [%s] via %s -> [%s] (payload %s)\n", + item.param, via, location, item.payload)) + } + } + }() + } + wg.Wait() + + spin.Stop() + + if len(result.Findings) == 0 { + log.Info("no open redirects detected") + log.Complete(0, "found") + return nil, nil //nolint:nilnil // no finding is not an error, mirrors the other scanners + } + + log.Complete(len(result.Findings), "found") + return result, nil +} + +// buildRedirectURL rebuilds the target with the payload injected into one param, +// preserving the rest of the original query. +func buildRedirectURL(parsedURL *url.URL, existing url.Values, param, payload string) string { + testParams := url.Values{} + for k, v := range existing { + if k != param { + testParams[k] = v + } + } + testParams.Set(param, payload) + return fmt.Sprintf("%s://%s%s?%s", parsedURL.Scheme, parsedURL.Host, parsedURL.Path, testParams.Encode()) +} + +// probeRedirect requests testURL and reports the first off-site redirect it +// finds, whether that's a 30x Location header, a meta-refresh, or a js +// location assignment. via names the channel; ok is false when nothing points +// at the sentinel. +func probeRedirect(client *http.Client, testURL string) (location, via string, ok bool) { + req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, testURL, http.NoBody) + if err != nil { + charmlog.Debugf("redirect: build request for %s: %v", testURL, err) + return "", "", false + } + resp, err := client.Do(req) + if err != nil { + charmlog.Debugf("redirect: request %s: %v", testURL, err) + return "", "", false + } + defer resp.Body.Close() + + // header redirect: a 30x whose Location resolves to the sentinel host + if resp.StatusCode >= http.StatusMultipleChoices && resp.StatusCode < http.StatusBadRequest { + if loc := resp.Header.Get("Location"); pointsAtSentinel(loc) { + return loc, "header", true + } + } + + // body redirects: meta refresh or a client-side location assignment + body, err := io.ReadAll(io.LimitReader(resp.Body, redirectMaxBody)) + if err != nil { + return "", "", false + } + bodyStr := string(body) + + if m := metaRefreshRe.FindStringSubmatch(bodyStr); len(m) > 1 && pointsAtSentinel(m[1]) { + return m[1], "meta-refresh", true + } + if m := jsRedirectRe.FindStringSubmatch(bodyStr); len(m) > 1 && pointsAtSentinel(m[1]) { + return m[1], "javascript", true + } + + return "", "", false +} + +// pointsAtSentinel reports whether a redirect target lands on our controlled +// host. We resolve the value the way a browser would so scheme-relative ("//x") +// and backslash tricks are caught, then compare hostnames - a sentinel that only +// shows up in a path or query (still same-origin) is not a redirect off-site. +func pointsAtSentinel(location string) bool { + if location == "" { + return false + } + + // browsers treat backslashes in the authority as forward slashes + normalized := strings.ReplaceAll(location, "\\", "/") + + parsed, err := url.Parse(normalized) + if err != nil { + // unparseable but still naming the sentinel as the leading authority is a hit + return strings.HasPrefix(strings.TrimLeft(normalized, "/:"), redirectSentinel) + } + + // the resolved host is what the navigation actually targets + if strings.EqualFold(parsed.Hostname(), redirectSentinel) { + return true + } + + // scheme-relative "//host" parses with an empty scheme but a populated host + if parsed.Host != "" && strings.EqualFold(stripPort(parsed.Host), redirectSentinel) { + return true + } + + return false +} + +// stripPort drops a trailing :port so host comparisons ignore it. +func stripPort(host string) string { + if h, _, ok := strings.Cut(host, ":"); ok { + return h + } + return host +} + +// ResultType identifies open-redirect findings for the result registry. +func (r *RedirectResult) ResultType() string { return "redirect" } + +var _ ScanResult = (*RedirectResult)(nil) diff --git a/internal/scan/redirect_test.go b/internal/scan/redirect_test.go new file mode 100644 index 0000000..ddb83c5 --- /dev/null +++ b/internal/scan/redirect_test.go @@ -0,0 +1,163 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +package scan + +import ( + "net/http" + "net/http/httptest" + "testing" + "time" +) + +func TestRedirect_HeaderLocation(t *testing.T) { + // echoes the "next" param straight into Location, the textbook open redirect. + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if next := r.URL.Query().Get("next"); next != "" { + w.Header().Set("Location", next) + w.WriteHeader(http.StatusFound) + return + } + w.WriteHeader(http.StatusOK) + })) + defer srv.Close() + + result, err := Redirect(srv.URL, 5*time.Second, 4, "") + if err != nil { + t.Fatalf("Redirect: %v", err) + } + if result == nil || len(result.Findings) == 0 { + t.Fatalf("expected open redirect findings, got %+v", result) + } + + var sawHeader bool + for _, f := range result.Findings { + if f.Parameter == "next" && f.Via == "header" { + sawHeader = true + } + } + if !sawHeader { + t.Errorf("expected a header redirect via 'next', got %+v", result.Findings) + } +} + +func TestRedirect_MetaRefresh(t *testing.T) { + // body-based redirect: a meta refresh pointing at the injected url. + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + dest := r.URL.Query().Get("url") + w.Header().Set("Content-Type", "text/html") + w.WriteHeader(http.StatusOK) + if dest != "" { + //nolint:gosec // deliberate open-redirect fixture for the probe under test + w.Write([]byte(``)) + return + } + w.Write([]byte("home")) + })) + defer srv.Close() + + result, err := Redirect(srv.URL, 5*time.Second, 4, "") + if err != nil { + t.Fatalf("Redirect: %v", err) + } + if result == nil { + t.Fatalf("expected meta-refresh findings, got nil") + } + var sawMeta bool + for _, f := range result.Findings { + if f.Via == "meta-refresh" { + sawMeta = true + } + } + if !sawMeta { + t.Errorf("expected a meta-refresh redirect finding, got %+v", result.Findings) + } +} + +func TestRedirect_NoFalsePositive(t *testing.T) { + tests := []struct { + name string + handler http.HandlerFunc + }{ + { + name: "never redirects", + handler: func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + w.Write([]byte("home")) + }, + }, + { + name: "only redirects to a fixed safe path", + handler: func(w http.ResponseWriter, _ *http.Request) { + // ignores the param, always sends users to its own login page. + w.Header().Set("Location", "/login") + w.WriteHeader(http.StatusFound) + }, + }, + { + name: "reflects param into body but not as a redirect", + handler: func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + // the value lands in plain text, no meta/js redirect mechanism. + //nolint:gosec // intentional reflection fixture; asserts no false positive + w.Write([]byte("

you searched for " + r.URL.Query().Get("next") + "

")) + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + srv := httptest.NewServer(tt.handler) + defer srv.Close() + + result, err := Redirect(srv.URL, 5*time.Second, 4, "") + if err != nil { + t.Fatalf("Redirect: %v", err) + } + if result != nil && len(result.Findings) > 0 { + t.Errorf("expected no findings, got %+v", result.Findings) + } + }) + } +} + +func TestPointsAtSentinel(t *testing.T) { + tests := []struct { + name string + location string + want bool + }{ + {"absolute https", "https://" + redirectSentinel + "/path", true}, + {"scheme-relative", "//" + redirectSentinel, true}, + {"backslash trick", "/\\" + redirectSentinel, true}, + {"with port", "https://" + redirectSentinel + ":443/", true}, + {"empty", "", false}, + {"same-site path", "/dashboard", false}, + {"sentinel only in path", "https://safe.example.com/" + redirectSentinel, false}, + {"sentinel only in query", "https://safe.example.com/?to=" + redirectSentinel, false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := pointsAtSentinel(tt.location); got != tt.want { + t.Errorf("pointsAtSentinel(%q) = %v, want %v", tt.location, got, tt.want) + } + }) + } +} + +func TestRedirectResult_ResultType(t *testing.T) { + r := &RedirectResult{} + if r.ResultType() != "redirect" { + t.Errorf("expected result type 'redirect', got %q", r.ResultType()) + } +} diff --git a/internal/scan/xss.go b/internal/scan/xss.go new file mode 100644 index 0000000..8ccb98d --- /dev/null +++ b/internal/scan/xss.go @@ -0,0 +1,342 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +package scan + +import ( + "context" + "fmt" + "io" + "net/http" + "net/url" + "strings" + "sync" + "time" + + charmlog "github.com/charmbracelet/log" + "github.com/dropalldatabases/sif/internal/httpx" + "github.com/dropalldatabases/sif/internal/logger" + "github.com/dropalldatabases/sif/internal/output" +) + +// XSSResult collects every likely reflected-xss point on the target. +type XSSResult struct { + Findings []XSSFinding `json:"findings,omitempty"` + TestedParams int `json:"tested_params"` +} + +// XSSFinding is a reflection where one or more breaking chars survived +// unescaped in a context that makes injection plausible. +type XSSFinding struct { + URL string `json:"url"` + Parameter string `json:"parameter"` + Context string `json:"context"` // html, attribute, or script + SurvivedRaw []string `json:"survived_raw"` // breaking chars echoed unescaped + Severity string `json:"severity"` +} + +// xssMaxBody caps the body we scan for the canary (100KB). +const xssMaxBody = 1024 * 100 + +// canaryToken is a unique, alnum-only marker we can grep for unambiguously; it +// survives every output encoder so a missing reflection means no echo at all. +const canaryToken = "sifxss9173canary" //nolint:gosec // not a credential, just a reflection marker + +// the chars that let an attacker break out of a context; we inject the canary +// wrapped in each and check which come back raw. +var xssBreakChars = []string{"<", ">", "\"", "'", "`"} + +// params we test when the target carries none of its own. +var xssParams = []string{ + "q", "s", "search", "query", "id", "name", "page", + "keyword", "lang", "redirect", "url", "return", "ref", + "message", "msg", "error", "title", "text", "comment", +} + +// XSS probes the target's params for reflected cross-site scripting. +func XSS(targetURL string, timeout time.Duration, threads int, logdir string) (*XSSResult, error) { + log := output.Module("XSS") + log.Start() + + spin := output.NewSpinner("Scanning for reflected XSS") + spin.Start() + + sanitizedURL := stripScheme(targetURL) + + if logdir != "" { + if err := logger.WriteHeader(sanitizedURL, logdir, "reflected XSS probe"); err != nil { + spin.Stop() + log.Error("error creating log file: %v", err) + return nil, fmt.Errorf("create xss log: %w", err) + } + } + + parsedURL, err := url.Parse(targetURL) + if err != nil { + spin.Stop() + return nil, fmt.Errorf("parse url: %w", err) + } + existingParams := parsedURL.Query() + + paramsToTest := make(map[string]bool, len(existingParams)+len(xssParams)) + for param := range existingParams { + paramsToTest[param] = true + } + for _, param := range xssParams { + paramsToTest[param] = true + } + + client := httpx.Client(timeout) + client.CheckRedirect = func(_ *http.Request, via []*http.Request) error { + if len(via) >= corsMaxRedirects { + return http.ErrUseLastResponse + } + return nil + } + + result := &XSSResult{ + Findings: make([]XSSFinding, 0, 8), + TestedParams: len(paramsToTest), + } + + params := make([]string, 0, len(paramsToTest)) + for param := range paramsToTest { + params = append(params, param) + } + + log.Info("testing %d params with reflection canary", len(paramsToTest)) + + paramChan := make(chan string, len(params)) + for _, param := range params { + paramChan <- param + } + close(paramChan) + + seen := make(map[string]bool) + var mu sync.Mutex + var wg sync.WaitGroup + + wg.Add(threads) + for t := 0; t < threads; t++ { + go func() { + defer wg.Done() + for param := range paramChan { + finding, ok := probeXSS(client, parsedURL, existingParams, param) + if !ok { + continue + } + + mu.Lock() + if seen[param] { + mu.Unlock() + continue + } + seen[param] = true + result.Findings = append(result.Findings, finding) + mu.Unlock() + + spin.Stop() + log.Warn("reflected xss in param %s (%s context, raw: %s)", + output.Highlight.Render(param), + output.SeverityHigh.Render(finding.Context), + strings.Join(finding.SurvivedRaw, "")) + spin.Start() + + if logdir != "" { + logger.Write(sanitizedURL, logdir, + fmt.Sprintf("reflected XSS: param [%s] in %s context, unescaped chars [%s]\n", + param, finding.Context, strings.Join(finding.SurvivedRaw, ""))) + } + } + }() + } + wg.Wait() + + spin.Stop() + + if len(result.Findings) == 0 { + log.Info("no reflected xss detected") + log.Complete(0, "found") + return nil, nil //nolint:nilnil // no finding is not an error, mirrors the other scanners + } + + log.Complete(len(result.Findings), "found") + return result, nil +} + +// probeXSS injects a canary wrapped in the breaking chars into one param, then +// inspects the reflection: it classifies where the canary landed and which +// breaking chars came back unescaped there. ok is false unless at least one +// dangerous char survived in an exploitable context. +func probeXSS(client *http.Client, parsedURL *url.URL, existing url.Values, param string) (XSSFinding, bool) { + // wrap the canary so a single request tells us both that it reflected and + // which surrounding chars survived: "canary' `canary` + payload := fmt.Sprintf("<%s>\"%s'`%s`", canaryToken, canaryToken, canaryToken) + + testParams := url.Values{} + for k, v := range existing { + if k != param { + testParams[k] = v + } + } + testParams.Set(param, payload) + testURL := fmt.Sprintf("%s://%s%s?%s", parsedURL.Scheme, parsedURL.Host, parsedURL.Path, testParams.Encode()) + + req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, testURL, http.NoBody) + if err != nil { + charmlog.Debugf("xss: build request for %s: %v", testURL, err) + return XSSFinding{}, false + } + resp, err := client.Do(req) + if err != nil { + charmlog.Debugf("xss: request %s: %v", testURL, err) + return XSSFinding{}, false + } + body, err := io.ReadAll(io.LimitReader(resp.Body, xssMaxBody)) + resp.Body.Close() + if err != nil { + return XSSFinding{}, false + } + bodyStr := string(body) + + // no echo of the canary at all means the param isn't reflected; bail early. + if !strings.Contains(bodyStr, canaryToken) { + return XSSFinding{}, false + } + + reflectCtx := classifyXSSContext(bodyStr) + survived := survivingBreakChars(bodyStr) + + // a reflection that escaped every dangerous char can't break out, so it's not + // reported - only raw chars that matter in the detected context count. + survived = relevantForContext(reflectCtx, survived) + if len(survived) == 0 { + return XSSFinding{}, false + } + + return XSSFinding{ + URL: testURL, + Parameter: param, + Context: reflectCtx, + SurvivedRaw: survived, + Severity: "high", + }, true +} + +// classifyXSSContext guesses where the canary was reflected. We look at the +// markup immediately around the token: a live tag means html text, a +// reflection inside a is a script context + for { + open := strings.Index(body, "") + if closeIdx < 0 { + break + } + segment := body[open : open+closeIdx] + if strings.Contains(segment, canaryToken) { + return "script" + } + body = body[open+closeIdx+len(""):] + } + + // default: echoed inside an html attribute value + return "attribute" +} + +// survivingBreakChars reports which dangerous chars came back next to the canary +// unescaped. We only trust occurrences adjacent to the token so unrelated chars +// elsewhere on the page don't create false positives. +func survivingBreakChars(body string) []string { + survived := make([]string, 0, len(xssBreakChars)) + markers := []string{ + "<" + canaryToken, // leading < survived + canaryToken + ">", // trailing > survived + "\"" + canaryToken, // leading " survived + canaryToken + "'", // trailing ' survived + "`" + canaryToken, // backtick wrap survived (token + ` and ` + token) + canaryToken + "`", + } + present := make(map[string]bool, len(xssBreakChars)) + for i := 0; i < len(markers); i++ { + if !strings.Contains(body, markers[i]) { + continue + } + switch { + case strings.HasPrefix(markers[i], "<"): + present["<"] = true + case strings.HasSuffix(markers[i], ">"): + present[">"] = true + case strings.HasPrefix(markers[i], "\""): + present["\""] = true + case strings.HasSuffix(markers[i], "'"): + present["'"] = true + default: + present["`"] = true + } + } + + // keep the canonical order for stable output + for i := 0; i < len(xssBreakChars); i++ { + if present[xssBreakChars[i]] { + survived = append(survived, xssBreakChars[i]) + } + } + return survived +} + +// relevantForContext filters surviving chars to the ones that actually enable a +// breakout in the detected context: angle brackets matter in html, quotes and +// backticks matter inside attributes/scripts. +func relevantForContext(reflectCtx string, survived []string) []string { + wanted := make(map[string]bool, len(survived)) + switch reflectCtx { + case "html": + wanted["<"] = true + wanted[">"] = true + case "attribute": + // breaking out of an attribute value needs the quote that delimits it; a + // bare backtick isn't a delimiter in html, so it doesn't count here. + wanted["\""] = true + wanted["'"] = true + case "script": + // a quote, backtick, or angle bracket all let you close/escape the script + wanted["\""] = true + wanted["'"] = true + wanted["`"] = true + wanted["<"] = true + wanted[">"] = true + } + + filtered := make([]string, 0, len(survived)) + for i := 0; i < len(survived); i++ { + if wanted[survived[i]] { + filtered = append(filtered, survived[i]) + } + } + return filtered +} + +// ResultType identifies reflected-xss findings for the result registry. +func (r *XSSResult) ResultType() string { return "xss" } + +var _ ScanResult = (*XSSResult)(nil) diff --git a/internal/scan/xss_test.go b/internal/scan/xss_test.go new file mode 100644 index 0000000..66ade5a --- /dev/null +++ b/internal/scan/xss_test.go @@ -0,0 +1,153 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +package scan + +import ( + "html" + "net/http" + "net/http/httptest" + "testing" + "time" +) + +// reflectsRaw echoes the named param straight into html text, so the breaking +// chars survive unescaped - a reflected xss sink. +func reflectsRaw(param string) *httptest.Server { + return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + v := r.URL.Query().Get(param) + w.Header().Set("Content-Type", "text/html") + w.WriteHeader(http.StatusOK) + //nolint:gosec // deliberate reflected-xss fixture for the probe under test + w.Write([]byte("
" + v + "
")) + })) +} + +func TestXSS_DetectsRawHTMLReflection(t *testing.T) { + srv := reflectsRaw("q") + defer srv.Close() + + result, err := XSS(srv.URL, 5*time.Second, 4, "") + if err != nil { + t.Fatalf("XSS: %v", err) + } + if result == nil || len(result.Findings) == 0 { + t.Fatalf("expected reflected xss findings, got %+v", result) + } + + var found *XSSFinding + for i := range result.Findings { + if result.Findings[i].Parameter == "q" { + found = &result.Findings[i] + } + } + if found == nil { + t.Fatalf("expected a finding on param 'q', got %+v", result.Findings) + } + if found.Context != "html" { + t.Errorf("expected html context, got %s", found.Context) + } + if len(found.SurvivedRaw) == 0 { + t.Errorf("expected surviving breaking chars, got none") + } +} + +func TestXSS_NoFalsePositiveWhenEscaped(t *testing.T) { + // the server html-escapes the reflection, so no breaking char survives raw. + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + v := r.URL.Query().Get("q") + w.Header().Set("Content-Type", "text/html") + w.WriteHeader(http.StatusOK) + w.Write([]byte("
" + html.EscapeString(v) + "
")) + })) + defer srv.Close() + + result, err := XSS(srv.URL, 5*time.Second, 4, "") + if err != nil { + t.Fatalf("XSS: %v", err) + } + if result != nil && len(result.Findings) > 0 { + t.Errorf("expected no findings when reflection is escaped, got %+v", result.Findings) + } +} + +func TestXSS_NoFalsePositiveWhenNotReflected(t *testing.T) { + // never echoes the input back, so nothing is injectable. + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Content-Type", "text/html") + w.WriteHeader(http.StatusOK) + w.Write([]byte("static page")) + })) + defer srv.Close() + + result, err := XSS(srv.URL, 5*time.Second, 4, "") + if err != nil { + t.Fatalf("XSS: %v", err) + } + if result != nil && len(result.Findings) > 0 { + t.Errorf("expected no findings on static page, got %+v", result.Findings) + } +} + +func TestClassifyXSSContext(t *testing.T) { + tests := []struct { + name string + body string + want string + }{ + { + name: "live html tag", + body: "
<" + canaryToken + ">
", + want: "html", + }, + { + name: "inside script block", + body: "", + want: "script", + }, + { + name: "attribute value", + body: ``, + want: "attribute", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := classifyXSSContext(tt.body); got != tt.want { + t.Errorf("classifyXSSContext() = %q, want %q", got, tt.want) + } + }) + } +} + +func TestSurvivingBreakChars(t *testing.T) { + // the canary is wrapped exactly as the probe injects it; all five chars survive. + body := "<" + canaryToken + ">\"" + canaryToken + "'`" + canaryToken + "`" + got := survivingBreakChars(body) + want := map[string]bool{"<": true, ">": true, "\"": true, "'": true, "`": true} + if len(got) != len(want) { + t.Fatalf("expected %d surviving chars, got %v", len(want), got) + } + for _, c := range got { + if !want[c] { + t.Errorf("unexpected surviving char %q", c) + } + } +} + +func TestXSSResult_ResultType(t *testing.T) { + r := &XSSResult{} + if r.ResultType() != "xss" { + t.Errorf("expected result type 'xss', got %q", r.ResultType()) + } +} diff --git a/man/sif.1 b/man/sif.1 index f70e3cd..bb160a7 100644 --- a/man/sif.1 +++ b/man/sif.1 @@ -51,7 +51,7 @@ vulnerability scanning with nuclei templates. automated google dorking. .TP .B \-js -javascript analysis. +javascript analysis + secret and endpoint extraction. .TP .B \-c3 cloud storage misconfiguration scan. @@ -86,9 +86,27 @@ sql reconnaissance (admin panels, error disclosure). .B \-lfi local file inclusion reconnaissance. .TP +.B \-cors +cors misconfiguration probe (reflected/permissive origins). +.TP +.B \-redirect +open redirect probe. +.TP +.B \-xss +reflected xss probe. +.TP .B \-framework framework detection with cve lookup. .TP +.B \-crawl +web crawler; spiders same\-host links, scripts and forms, respecting robots.txt. +.TP +.BR \-crawl\-depth " \fIn\fR" +max crawl recursion depth (default 2). +.TP +.B \-passive +passive subdomain and historical url discovery from third\-party feeds (zero traffic to the target). +.TP .B \-noscan skip the base url scan (robots.txt, etc). .SH OPTIONS diff --git a/sif.go b/sif.go index cee06fd..bb6b705 100644 --- a/sif.go +++ b/sif.go @@ -391,6 +391,56 @@ func (app *App) Run() error { } } + if app.settings.CORS { + result, err := scan.CORS(url, app.settings.Timeout, app.settings.Threads, app.settings.LogDir) + if err != nil { + log.Errorf("Error while running CORS probe: %s", err) + } else if result != nil { + moduleResults = append(moduleResults, NewModuleResult(result)) + scansRun = append(scansRun, "CORS") + } + } + + if app.settings.Redirect { + result, err := scan.Redirect(url, app.settings.Timeout, app.settings.Threads, app.settings.LogDir) + if err != nil { + log.Errorf("Error while running open redirect probe: %s", err) + } else if result != nil { + moduleResults = append(moduleResults, NewModuleResult(result)) + scansRun = append(scansRun, "Open Redirect") + } + } + + if app.settings.XSS { + result, err := scan.XSS(url, app.settings.Timeout, app.settings.Threads, app.settings.LogDir) + if err != nil { + log.Errorf("Error while running reflected XSS probe: %s", err) + } else if result != nil { + moduleResults = append(moduleResults, NewModuleResult(result)) + scansRun = append(scansRun, "Reflected XSS") + } + } + + if app.settings.Crawl { + result, err := scan.Crawl(url, app.settings.CrawlDepth, app.settings.Timeout, app.settings.LogDir) + if err != nil { + log.Errorf("Error while running web crawl: %s", err) + } else if result != nil { + moduleResults = append(moduleResults, NewModuleResult(result)) + scansRun = append(scansRun, "Crawl") + } + } + + if app.settings.Passive { + result, err := scan.Passive(url, app.settings.Timeout, app.settings.LogDir) + if err != nil { + log.Errorf("Error while running passive discovery: %s", err) + } else if result != nil { + moduleResults = append(moduleResults, NewModuleResult(result)) + scansRun = append(scansRun, "Passive") + } + } + // Load and run modules if app.settings.AllModules || app.settings.Modules != "" || app.settings.ModuleTags != "" { loader, err := modules.NewLoader()